Update app.py
Browse files
app.py
CHANGED
|
@@ -6,15 +6,18 @@ from sklearn.metrics.pairwise import cosine_similarity
|
|
| 6 |
import os
|
| 7 |
import logging
|
| 8 |
|
|
|
|
|
|
|
| 9 |
logging.basicConfig(level=logging.INFO, format='%(asctime)s %(levelname)s: %(message)s [in %(pathname)s:%(lineno)d]')
|
| 10 |
logger = logging.getLogger(__name__)
|
| 11 |
|
| 12 |
app = Flask(__name__)
|
| 13 |
|
|
|
|
| 14 |
DF = None
|
| 15 |
ALL_TOPPINGS = []
|
| 16 |
FEATURE_DF = None
|
| 17 |
-
SCALER = None
|
| 18 |
NUMERICAL_COLS = ['Price', 'Slices', 'Rating', 'Spice_Level', 'Preparation_Time', 'Calories']
|
| 19 |
CATEGORICAL_FEATURES = [
|
| 20 |
'Serving_Size', 'Popular_Group', 'Dietary_Category',
|
|
@@ -27,58 +30,86 @@ DEFAULT_IMAGE_URL = 'https://images.dominos.co.in/new_margherita_2502.jpg'
|
|
| 27 |
|
| 28 |
def preprocess_data(df_path='pizza.csv'):
|
| 29 |
global DF, ALL_TOPPINGS, FEATURE_DF, SCALER, CATEGORICAL_FEATURES, CRUST_TYPE_COL
|
|
|
|
| 30 |
|
| 31 |
-
|
| 32 |
-
|
| 33 |
-
|
|
|
|
|
|
|
| 34 |
|
| 35 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 36 |
logger.info(f"Original DataFrame columns: {DF.columns.tolist()}")
|
| 37 |
|
|
|
|
| 38 |
potential_crust_cols = ['Crust_Type', 'Cr_Type']
|
| 39 |
valid_crust_cols = [col for col in potential_crust_cols if col in DF.columns]
|
| 40 |
if valid_crust_cols:
|
| 41 |
-
valid_crust_cols.sort(key=lambda col: DF[col].isnull().sum())
|
| 42 |
CRUST_TYPE_COL = valid_crust_cols[0]
|
| 43 |
logger.info(f"Using '{CRUST_TYPE_COL}' for crust type.")
|
| 44 |
if CRUST_TYPE_COL not in CATEGORICAL_FEATURES:
|
| 45 |
CATEGORICAL_FEATURES.append(CRUST_TYPE_COL)
|
|
|
|
| 46 |
for col in potential_crust_cols:
|
| 47 |
if col != CRUST_TYPE_COL and col in CATEGORICAL_FEATURES:
|
| 48 |
CATEGORICAL_FEATURES.remove(col)
|
| 49 |
else:
|
| 50 |
-
logger.warning("Crust type column not found. Crust type will not be used.")
|
| 51 |
CRUST_TYPE_COL = None
|
| 52 |
|
| 53 |
-
|
| 54 |
-
|
| 55 |
-
for col in
|
| 56 |
-
if col in DF.columns:
|
| 57 |
DF[col] = DF[col].fillna('')
|
|
|
|
| 58 |
|
| 59 |
-
|
| 60 |
-
|
| 61 |
for col in numerical_cols_in_df:
|
| 62 |
if col in DF.columns:
|
| 63 |
if pd.api.types.is_numeric_dtype(DF[col]):
|
| 64 |
-
|
|
|
|
|
|
|
| 65 |
else:
|
| 66 |
-
|
| 67 |
-
|
| 68 |
-
|
| 69 |
-
|
| 70 |
-
|
| 71 |
-
|
| 72 |
-
|
| 73 |
-
|
| 74 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 75 |
|
| 76 |
-
current_all_toppings = set()
|
| 77 |
-
for toppings_list in DF['Toppings_list_internal'].dropna():
|
| 78 |
-
current_all_toppings.update(t for t in toppings_list if t)
|
| 79 |
-
ALL_TOPPINGS = sorted(list(current_all_toppings))
|
| 80 |
-
logger.info(f"Found {len(ALL_TOPPINGS)} unique toppings. Example: {ALL_TOPPINGS[:5]}")
|
| 81 |
|
|
|
|
| 82 |
feature_data = {}
|
| 83 |
num_feature_map = {
|
| 84 |
'Price': 'Price_Rs', 'Slices': 'Slices', 'Rating': 'Rating',
|
|
@@ -88,107 +119,235 @@ def preprocess_data(df_path='pizza.csv'):
|
|
| 88 |
if df_col in DF.columns:
|
| 89 |
feature_data[feature_col] = DF[df_col].copy()
|
| 90 |
else:
|
| 91 |
-
|
|
|
|
| 92 |
|
|
|
|
| 93 |
if 'Spice_Level' in DF.columns:
|
| 94 |
-
DF['Spice_Level'] = DF['Spice_Level'].fillna('Mild')
|
| 95 |
spice_map = {'Mild': 1, 'Medium': 2, 'Hot': 3}
|
| 96 |
-
feature_data['Spice_Level'] = DF['Spice_Level'].map(spice_map).fillna(1.0)
|
| 97 |
else:
|
| 98 |
-
|
|
|
|
| 99 |
|
|
|
|
| 100 |
for feature_cat_col in CATEGORICAL_FEATURES:
|
| 101 |
-
if feature_cat_col in DF.columns:
|
|
|
|
|
|
|
| 102 |
for value in DF[feature_cat_col].unique():
|
| 103 |
-
if pd.notnull(value) and value != '':
|
| 104 |
feature_data[f"{feature_cat_col}_{value}"] = (DF[feature_cat_col] == value).astype(int)
|
|
|
|
|
|
|
| 105 |
|
|
|
|
| 106 |
for topping in ALL_TOPPINGS:
|
| 107 |
-
if topping:
|
| 108 |
feature_data[f"Topping_{topping}"] = DF['Toppings_list_internal'].apply(
|
| 109 |
lambda x: 1 if topping in x else 0
|
| 110 |
)
|
| 111 |
|
| 112 |
FEATURE_DF = pd.DataFrame(feature_data)
|
|
|
|
|
|
|
|
|
|
| 113 |
for col in NUMERICAL_COLS:
|
| 114 |
-
if col not in FEATURE_DF.columns:
|
|
|
|
|
|
|
| 115 |
if FEATURE_DF[col].isnull().any():
|
| 116 |
-
|
| 117 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 118 |
|
| 119 |
-
|
| 120 |
-
|
| 121 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 122 |
|
| 123 |
|
| 124 |
def get_recommendations(preferences):
|
| 125 |
global DF, FEATURE_DF, SCALER, CRUST_TYPE_COL, DEFAULT_IMAGE_URL
|
| 126 |
|
| 127 |
-
if
|
| 128 |
-
current_app.logger.error("Data not fully initialized for get_recommendations.")
|
| 129 |
return []
|
| 130 |
|
| 131 |
current_indices = DF.index.to_list()
|
| 132 |
current_app.logger.info(f"Starting with {len(current_indices)} pizzas before filtering. Preferences: {preferences}")
|
| 133 |
|
| 134 |
-
#
|
| 135 |
-
|
|
|
|
| 136 |
selected_toppings = set(preferences['toppings'])
|
| 137 |
-
if selected_toppings:
|
| 138 |
topping_mask = DF.loc[current_indices, 'Toppings_list_internal'].apply(
|
| 139 |
-
lambda
|
|
|
|
| 140 |
current_indices = DF.loc[current_indices][topping_mask].index.to_list()
|
| 141 |
current_app.logger.info(f"After toppings filter: {len(current_indices)} pizzas remaining")
|
| 142 |
if not current_indices: return []
|
| 143 |
|
| 144 |
# 2. Max Price
|
| 145 |
if 'price_range' in preferences and preferences['price_range'] and 'Price_Rs' in DF.columns:
|
| 146 |
-
|
| 147 |
-
|
| 148 |
-
|
| 149 |
-
|
| 150 |
-
|
| 151 |
-
|
| 152 |
-
f"After price filter ({min_price}-{max_price}): {len(current_indices)} pizzas
|
| 153 |
-
|
| 154 |
-
|
| 155 |
-
|
|
|
|
|
|
|
|
|
|
| 156 |
if 'slices' in preferences and preferences['slices'] is not None and 'Slices' in DF.columns:
|
| 157 |
try:
|
| 158 |
min_slices = int(preferences['slices'])
|
| 159 |
slices_mask = DF.loc[current_indices, 'Slices'] >= min_slices
|
| 160 |
current_indices = DF.loc[current_indices][slices_mask].index.to_list()
|
| 161 |
-
current_app.logger.info(f"After slices filter (>= {min_slices}): {len(current_indices)} pizzas
|
| 162 |
if not current_indices: return []
|
| 163 |
except ValueError:
|
| 164 |
current_app.logger.warning(f"Invalid value for slices: {preferences['slices']}")
|
| 165 |
|
| 166 |
-
# 4. Minimum Rating
|
| 167 |
if 'rating' in preferences and preferences['rating'] is not None and 'Rating' in DF.columns:
|
| 168 |
try:
|
| 169 |
min_rating = float(preferences['rating'])
|
| 170 |
rating_mask = DF.loc[current_indices, 'Rating'] >= min_rating
|
| 171 |
current_indices = DF.loc[current_indices][rating_mask].index.to_list()
|
| 172 |
-
current_app.logger.info(f"After rating filter (>= {min_rating}): {len(current_indices)} pizzas
|
| 173 |
if not current_indices: return []
|
| 174 |
except ValueError:
|
| 175 |
current_app.logger.warning(f"Invalid value for rating: {preferences['rating']}")
|
| 176 |
|
| 177 |
-
# 5. Max Preparation Time
|
| 178 |
-
if 'prep_time' in preferences and preferences[
|
| 179 |
-
'prep_time'] is not None and 'Preparation_Time_min' in DF.columns: # Changed 'preptime' to 'prep_time' to match JS
|
| 180 |
try:
|
| 181 |
-
|
| 182 |
-
max_prep_time = int(prep_time_str)
|
| 183 |
prep_mask = DF.loc[current_indices, 'Preparation_Time_min'] <= max_prep_time
|
| 184 |
current_indices = DF.loc[current_indices][prep_mask].index.to_list()
|
| 185 |
-
current_app.logger.info(
|
| 186 |
-
f"After prep time filter (<= {max_prep_time}): {len(current_indices)} pizzas remaining")
|
| 187 |
if not current_indices: return []
|
| 188 |
except ValueError:
|
| 189 |
-
current_app.logger.warning(f"Could not parse
|
| 190 |
|
| 191 |
-
# 6. Categorical Filters (
|
|
|
|
| 192 |
categorical_pref_map = {
|
| 193 |
"servingsize": "Serving_Size", "populargroup": "Popular_Group",
|
| 194 |
"dietarycategory": "Dietary_Category", "spicelevel": "Spice_Level",
|
|
@@ -196,158 +355,154 @@ def get_recommendations(preferences):
|
|
| 196 |
"restaurantchain": "Restaurant_Chain", "seasonalavailability": "Seasonal_Availability",
|
| 197 |
"breadtype": "Bread_Type", "crusttype": CRUST_TYPE_COL
|
| 198 |
}
|
| 199 |
-
|
| 200 |
for pref_key, df_col_name in categorical_pref_map.items():
|
| 201 |
-
if df_col_name and pref_key in preferences and preferences[pref_key]
|
| 202 |
-
|
| 203 |
-
|
| 204 |
-
|
| 205 |
-
|
| 206 |
-
|
| 207 |
-
|
| 208 |
-
|
| 209 |
-
|
| 210 |
-
f"
|
| 211 |
-
#
|
| 212 |
-
elif isinstance(pref_value, str) and pref_value and pref_value.lower() != "any":
|
| 213 |
-
cat_mask = DF.loc[current_indices, df_col_name] == pref_value
|
| 214 |
-
filtered_indices_count_before = len(current_indices)
|
| 215 |
-
current_indices = DF.loc[current_indices][cat_mask].index.to_list()
|
| 216 |
-
current_app.logger.info(
|
| 217 |
-
f"After {pref_key} filter ('{pref_value}'): {len(current_indices)} from {filtered_indices_count_before} pizzas remaining")
|
| 218 |
-
elif not pref_value: # Empty list or empty string means no filter for this category
|
| 219 |
-
current_app.logger.info(
|
| 220 |
-
f"Skipping filter for {pref_key} as no specific options were selected (value: {pref_value}).")
|
| 221 |
-
continue
|
| 222 |
-
|
| 223 |
-
if not current_indices: return []
|
| 224 |
|
| 225 |
if not current_indices:
|
| 226 |
-
current_app.logger.
|
| 227 |
return []
|
| 228 |
|
| 229 |
# --- Similarity Scoring Part ---
|
|
|
|
| 230 |
valid_indices_for_feature_df = FEATURE_DF.index.intersection(current_indices)
|
| 231 |
if valid_indices_for_feature_df.empty:
|
| 232 |
-
current_app.logger.
|
| 233 |
return []
|
| 234 |
|
| 235 |
filtered_feature_df = FEATURE_DF.loc[valid_indices_for_feature_df]
|
| 236 |
-
if filtered_feature_df.empty:
|
| 237 |
-
current_app.logger.warning("Filtered
|
| 238 |
return []
|
| 239 |
|
| 240 |
-
|
|
|
|
| 241 |
|
| 242 |
-
# Toppings
|
| 243 |
if 'toppings' in preferences and preferences['toppings']:
|
| 244 |
for topping in preferences['toppings']:
|
| 245 |
col_name = f"Topping_{topping}"
|
| 246 |
if col_name in user_vector.index:
|
| 247 |
user_vector[col_name] = 1.0
|
| 248 |
|
| 249 |
-
# Categorical
|
| 250 |
-
js_to_df_key_map_for_vector
|
| 251 |
-
|
| 252 |
-
|
| 253 |
-
|
| 254 |
-
|
| 255 |
-
|
| 256 |
-
|
| 257 |
-
|
| 258 |
-
|
| 259 |
-
|
| 260 |
-
|
| 261 |
-
pref_values_for_vector = preferences[pref_key]
|
| 262 |
-
# Ensure it's a list, even if frontend sent a single string (should be list)
|
| 263 |
-
if not isinstance(pref_values_for_vector, list):
|
| 264 |
-
pref_values_for_vector = [pref_values_for_vector]
|
| 265 |
-
|
| 266 |
-
for val_item in pref_values_for_vector:
|
| 267 |
-
if isinstance(val_item, str) and val_item.lower() == "any": # Should not happen with new UI
|
| 268 |
-
continue
|
| 269 |
-
col_name = f"{df_col_name}_{val_item}"
|
| 270 |
-
if col_name in user_vector.index:
|
| 271 |
-
user_vector[col_name] = 1.0
|
| 272 |
-
|
| 273 |
-
# Numerical for similarity
|
| 274 |
raw_user_num_prefs_dict = {}
|
| 275 |
-
|
| 276 |
|
| 277 |
if 'price_range' in preferences and preferences['price_range']:
|
| 278 |
-
|
| 279 |
-
preferences['price_range'][1])) / 2
|
|
|
|
| 280 |
if 'slices' in preferences and preferences['slices'] is not None:
|
| 281 |
-
raw_user_num_prefs_dict['Slices'] = float(preferences['slices'])
|
|
|
|
| 282 |
if 'rating' in preferences and preferences['rating'] is not None:
|
| 283 |
-
raw_user_num_prefs_dict['Rating'] = float(preferences['rating'])
|
| 284 |
-
|
| 285 |
-
|
| 286 |
-
|
| 287 |
-
|
| 288 |
-
if
|
| 289 |
-
|
| 290 |
-
|
| 291 |
-
|
| 292 |
-
|
| 293 |
-
|
| 294 |
-
|
| 295 |
-
|
| 296 |
-
|
| 297 |
-
|
| 298 |
-
if 'prep_time' in preferences and preferences['prep_time'] is not None: # Changed 'preptime'
|
| 299 |
-
try:
|
| 300 |
-
prep_time_str = str(preferences['prep_time']).lower().replace("min", "").strip()
|
| 301 |
-
raw_user_num_prefs_dict['Preparation_Time'] = float(prep_time_str)
|
| 302 |
-
except ValueError:
|
| 303 |
-
pass
|
| 304 |
-
|
| 305 |
-
# Scaling numerical preferences for user_vector
|
| 306 |
temp_scaling_df = pd.DataFrame(columns=NUMERICAL_COLS, index=[0])
|
| 307 |
for col in NUMERICAL_COLS:
|
| 308 |
-
|
| 309 |
-
|
| 310 |
-
|
| 311 |
-
|
| 312 |
-
if
|
| 313 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 314 |
|
| 315 |
-
scaled_user_num_values = SCALER.transform(temp_scaling_df[NUMERICAL_COLS])[0]
|
| 316 |
-
for i, col_name in enumerate(NUMERICAL_COLS):
|
| 317 |
-
if col_name in raw_user_num_prefs_dict: # Only set if user specified this numerical pref
|
| 318 |
-
user_vector[col_name] = scaled_user_num_values[i]
|
| 319 |
|
| 320 |
-
#
|
| 321 |
feature_matrix_filtered = filtered_feature_df.values
|
| 322 |
user_array = user_vector.values.reshape(1, -1)
|
| 323 |
|
|
|
|
| 324 |
if user_array.shape[1] != feature_matrix_filtered.shape[1]:
|
| 325 |
current_app.logger.error(
|
| 326 |
-
f"Shape mismatch! User vector: {user_array.shape}, Feature matrix: {feature_matrix_filtered.shape}"
|
| 327 |
-
|
| 328 |
-
|
| 329 |
-
|
| 330 |
-
|
| 331 |
-
|
| 332 |
-
|
| 333 |
user_array = aligned_user_vector.values.reshape(1, -1)
|
|
|
|
| 334 |
if user_array.shape[1] != feature_matrix_filtered.shape[1]:
|
| 335 |
-
current_app.logger.
|
| 336 |
-
f"Persistent Shape mismatch! User vector: {user_array.shape}, Feature matrix: {feature_matrix_filtered.shape}")
|
| 337 |
return []
|
| 338 |
|
|
|
|
| 339 |
similarities = cosine_similarity(user_array, feature_matrix_filtered)[0]
|
|
|
|
| 340 |
sorted_indices_in_filtered_df = similarities.argsort()[::-1]
|
|
|
|
| 341 |
final_recommendation_indices = valid_indices_for_feature_df[sorted_indices_in_filtered_df]
|
| 342 |
|
|
|
|
| 343 |
recommendations_list = []
|
| 344 |
-
frontend_keys
|
|
|
|
|
|
|
| 345 |
'id', 'name', 'toppings', 'price', 'slices', 'serving_size', 'rating', 'rating_count',
|
| 346 |
'description', 'popular_group', 'dietary_category', 'spice_level', 'sauce_type',
|
| 347 |
'cheese_amount', 'calories', 'allergens', 'prep_time', 'restaurant', 'seasonal',
|
| 348 |
'bread_type', 'image_url', 'crust_type'
|
| 349 |
]
|
| 350 |
-
|
| 351 |
'id': None, 'name': 'Pizza_Name', 'toppings': 'Toppings', 'price': 'Price_Rs', 'slices': 'Slices',
|
| 352 |
'serving_size': 'Serving_Size', 'rating': 'Rating', 'rating_count': 'Rating_Count',
|
| 353 |
'description': 'Description', 'popular_group': 'Popular_Group',
|
|
@@ -362,184 +517,112 @@ def get_recommendations(preferences):
|
|
| 362 |
for original_idx in final_recommendation_indices:
|
| 363 |
pizza_series = DF.iloc[original_idx]
|
| 364 |
rec_item = {}
|
| 365 |
-
for key in
|
| 366 |
-
df_col =
|
| 367 |
if key == 'id':
|
| 368 |
rec_item[key] = int(original_idx)
|
| 369 |
elif df_col and df_col in pizza_series:
|
| 370 |
value = pizza_series[df_col]
|
| 371 |
-
if isinstance(value, np.integer):
|
| 372 |
-
|
| 373 |
-
elif isinstance(value, np.
|
| 374 |
-
value = float(value)
|
| 375 |
-
elif isinstance(value, np.ndarray):
|
| 376 |
-
value = value.tolist()
|
| 377 |
rec_item[key] = "" if pd.isna(value) else value
|
| 378 |
-
elif key == 'crust_type' and not CRUST_TYPE_COL:
|
| 379 |
-
|
| 380 |
else:
|
| 381 |
rec_item[key] = ""
|
| 382 |
|
| 383 |
rec_item['rating_count'] = int(rec_item.get('rating_count', 0) or 0)
|
| 384 |
rec_item['image_url'] = rec_item.get('image_url') if rec_item.get('image_url') else DEFAULT_IMAGE_URL
|
| 385 |
-
|
| 386 |
-
for k_final, v_final in rec_item.items():
|
| 387 |
if isinstance(v_final, np.generic): rec_item[k_final] = v_final.item()
|
| 388 |
-
|
| 389 |
recommendations_list.append(rec_item)
|
| 390 |
|
| 391 |
-
current_app.logger.info(f"Final recommendations: {len(recommendations_list)}
|
| 392 |
return recommendations_list
|
| 393 |
|
| 394 |
|
| 395 |
-
@app.route('/')
|
| 396 |
-
def index_route():
|
| 397 |
-
global DF, ALL_TOPPINGS, CATEGORICAL_FEATURES, CRUST_TYPE_COL
|
| 398 |
-
if DF is None:
|
| 399 |
-
current_app.logger.error("Data not loaded attempting to serve / route.")
|
| 400 |
-
return "Error: Pizza data not loaded. Please check server logs.", 500
|
| 401 |
-
|
| 402 |
-
filter_options = {}
|
| 403 |
-
cols_for_filters = list(
|
| 404 |
-
set(CATEGORICAL_FEATURES + ['Spice_Level'])) # Spice_Level might be in CATEGORICAL_FEATURES or separate
|
| 405 |
-
|
| 406 |
-
if CRUST_TYPE_COL and CRUST_TYPE_COL not in cols_for_filters: # Ensure crust type is included if available
|
| 407 |
-
cols_for_filters.append(CRUST_TYPE_COL)
|
| 408 |
-
|
| 409 |
-
for col_name in cols_for_filters:
|
| 410 |
-
if col_name in DF.columns:
|
| 411 |
-
# Use a consistent key naming convention for JS
|
| 412 |
-
key_name = col_name.lower().replace('_', '')
|
| 413 |
-
# Special cases for consistency if needed, e.g. "spicelevel"
|
| 414 |
-
if col_name == "Spice_Level": key_name = "spicelevel"
|
| 415 |
-
if col_name == CRUST_TYPE_COL: key_name = "crusttype"
|
| 416 |
-
# if col_name == "Serving_Size": key_name = "servingsize" # Example
|
| 417 |
-
|
| 418 |
-
unique_values = sorted([v for v in DF[col_name].dropna().unique() if v != ''])
|
| 419 |
-
filter_options[key_name] = unique_values # e.g. filter_options['spicelevel'] = ['Mild', 'Medium', 'Hot']
|
| 420 |
-
|
| 421 |
-
default_recommendations_df = DF.sort_values('Rating', ascending=False).copy()
|
| 422 |
-
default_recs_list = []
|
| 423 |
-
|
| 424 |
-
frontend_keys = [
|
| 425 |
-
'id', 'name', 'toppings', 'price', 'slices', 'serving_size', 'rating', 'rating_count',
|
| 426 |
-
'description', 'popular_group', 'dietary_category', 'spice_level', 'sauce_type',
|
| 427 |
-
'cheese_amount', 'calories', 'allergens', 'prep_time', 'restaurant', 'seasonal',
|
| 428 |
-
'bread_type', 'image_url', 'crust_type'
|
| 429 |
-
]
|
| 430 |
-
df_to_frontend_map = {
|
| 431 |
-
'id': None, 'name': 'Pizza_Name', 'toppings': 'Toppings', 'price': 'Price_Rs', 'slices': 'Slices',
|
| 432 |
-
'serving_size': 'Serving_Size', 'rating': 'Rating', 'rating_count': 'Rating_Count',
|
| 433 |
-
'description': 'Description', 'popular_group': 'Popular_Group',
|
| 434 |
-
'dietary_category': 'Dietary_Category', 'spice_level': 'Spice_Level',
|
| 435 |
-
'sauce_type': 'Sauce_Type', 'cheese_amount': 'Cheese_Amount',
|
| 436 |
-
'calories': 'Calories_per_Slice', 'allergens': 'Allergens',
|
| 437 |
-
'prep_time': 'Preparation_Time_min', 'restaurant': 'Restaurant_Chain',
|
| 438 |
-
'seasonal': 'Seasonal_Availability', 'bread_type': 'Bread_Type',
|
| 439 |
-
'image_url': 'Image_Url', 'crust_type': CRUST_TYPE_COL
|
| 440 |
-
}
|
| 441 |
-
|
| 442 |
-
for original_idx, pizza_row in default_recommendations_df.iterrows():
|
| 443 |
-
rec_item = {}
|
| 444 |
-
for key in frontend_keys:
|
| 445 |
-
df_col = df_to_frontend_map.get(key)
|
| 446 |
-
if key == 'id':
|
| 447 |
-
rec_item[key] = int(original_idx)
|
| 448 |
-
elif df_col and df_col in pizza_row:
|
| 449 |
-
value = pizza_row[df_col]
|
| 450 |
-
if isinstance(value, np.integer):
|
| 451 |
-
value = int(value)
|
| 452 |
-
elif isinstance(value, np.floating):
|
| 453 |
-
value = float(value)
|
| 454 |
-
elif isinstance(value, np.ndarray):
|
| 455 |
-
value = value.tolist()
|
| 456 |
-
rec_item[key] = "" if pd.isna(value) else value
|
| 457 |
-
elif key == 'crust_type' and not CRUST_TYPE_COL:
|
| 458 |
-
rec_item[key] = "N/A"
|
| 459 |
-
else:
|
| 460 |
-
rec_item[key] = ""
|
| 461 |
-
|
| 462 |
-
rec_item['rating_count'] = int(rec_item.get('rating_count', 0) or 0)
|
| 463 |
-
rec_item['image_url'] = rec_item.get('image_url') if rec_item.get('image_url') else DEFAULT_IMAGE_URL
|
| 464 |
-
|
| 465 |
-
for k, v in rec_item.items():
|
| 466 |
-
if isinstance(v, np.generic):
|
| 467 |
-
rec_item[k] = v.item()
|
| 468 |
-
|
| 469 |
-
default_recs_list.append(rec_item)
|
| 470 |
-
|
| 471 |
-
current_app.logger.info(f"Serving {len(default_recs_list)} pizzas for initial display.")
|
| 472 |
-
current_app.logger.info(f"Filter options for template: {filter_options}")
|
| 473 |
-
|
| 474 |
-
return render_template('index.html',
|
| 475 |
-
toppings=ALL_TOPPINGS,
|
| 476 |
-
# Pass filter_options directly, JS will use these
|
| 477 |
-
filter_options=filter_options,
|
| 478 |
-
default_recommendations=default_recs_list,
|
| 479 |
-
default_image_url=DEFAULT_IMAGE_URL)
|
| 480 |
-
|
| 481 |
-
|
| 482 |
@app.route('/recommend', methods=['POST'])
|
| 483 |
def recommend():
|
| 484 |
try:
|
| 485 |
data = request.json
|
| 486 |
-
preferences = {}
|
| 487 |
current_app.logger.info(f"Received recommendation request with data: {data}")
|
| 488 |
|
| 489 |
-
#
|
| 490 |
-
# Keys should match
|
| 491 |
-
|
| 492 |
-
|
| 493 |
-
|
| 494 |
-
if key in data and data[key] is not None: # Allow 0 for rating
|
| 495 |
-
# For range sliders, value might be a string that needs parsing, ensure it's correct type
|
| 496 |
try:
|
| 497 |
-
if
|
| 498 |
-
|
| 499 |
-
else:
|
| 500 |
-
preferences[key] = int(data[key]) # slices, prep_time
|
| 501 |
except ValueError:
|
| 502 |
-
current_app.logger.warning(f"Could not parse numerical preference {
|
| 503 |
-
|
| 504 |
if 'price_range' in data and data['price_range']:
|
| 505 |
try:
|
| 506 |
preferences['price_range'] = [float(p) for p in data['price_range']]
|
| 507 |
except (ValueError, TypeError):
|
| 508 |
-
|
| 509 |
|
| 510 |
-
# Multi-select categorical preferences
|
| 511 |
-
# Keys
|
| 512 |
-
|
| 513 |
'toppings', 'servingsize', 'populargroup', 'dietarycategory',
|
| 514 |
'spicelevel', 'saucetype', 'cheeseamount', 'restaurantchain',
|
| 515 |
'seasonalavailability', 'breadtype', 'crusttype'
|
| 516 |
]
|
| 517 |
-
for
|
| 518 |
-
if
|
| 519 |
-
preferences[
|
| 520 |
-
elif
|
| 521 |
-
current_app.logger.warning(
|
| 522 |
-
|
| 523 |
-
if isinstance(data[key], str) and data[key]:
|
| 524 |
-
preferences[key] = [data[key]] # Wrap single string in a list for consistency
|
| 525 |
-
else: # If not string or empty, treat as no preference for this key
|
| 526 |
-
preferences[key] = []
|
| 527 |
|
| 528 |
current_app.logger.info(f"Processed preferences for filtering: {preferences}")
|
| 529 |
recommendations = get_recommendations(preferences)
|
| 530 |
current_app.logger.info(f"Returning {len(recommendations)} recommendations after filtering and scoring.")
|
| 531 |
return jsonify(recommendations)
|
|
|
|
| 532 |
except Exception as e:
|
| 533 |
-
current_app.logger.error(f"Error in /recommend: {e}", exc_info=True)
|
| 534 |
return jsonify({"error": "Failed to get recommendations due to a server issue.", "details": str(e)}), 500
|
| 535 |
|
| 536 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 537 |
if __name__ == '__main__':
|
| 538 |
-
|
| 539 |
-
|
| 540 |
-
|
| 541 |
-
|
| 542 |
-
|
| 543 |
-
|
| 544 |
-
|
| 545 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 6 |
import os
|
| 7 |
import logging
|
| 8 |
|
| 9 |
+
# --- Logging Configuration ---
|
| 10 |
+
# Ensure logging is configured before any loggers are potentially used by imported modules
|
| 11 |
logging.basicConfig(level=logging.INFO, format='%(asctime)s %(levelname)s: %(message)s [in %(pathname)s:%(lineno)d]')
|
| 12 |
logger = logging.getLogger(__name__)
|
| 13 |
|
| 14 |
app = Flask(__name__)
|
| 15 |
|
| 16 |
+
# --- Global Variables ---
|
| 17 |
DF = None
|
| 18 |
ALL_TOPPINGS = []
|
| 19 |
FEATURE_DF = None
|
| 20 |
+
SCALER = None # Will be initialized in preprocess_data
|
| 21 |
NUMERICAL_COLS = ['Price', 'Slices', 'Rating', 'Spice_Level', 'Preparation_Time', 'Calories']
|
| 22 |
CATEGORICAL_FEATURES = [
|
| 23 |
'Serving_Size', 'Popular_Group', 'Dietary_Category',
|
|
|
|
| 30 |
|
| 31 |
def preprocess_data(df_path='pizza.csv'):
|
| 32 |
global DF, ALL_TOPPINGS, FEATURE_DF, SCALER, CATEGORICAL_FEATURES, CRUST_TYPE_COL
|
| 33 |
+
logger.info(f"Attempting to preprocess data from relative path: {df_path}")
|
| 34 |
|
| 35 |
+
# Construct absolute path for the CSV file
|
| 36 |
+
# This is crucial for environments like Docker where working directory might differ
|
| 37 |
+
base_dir = os.path.dirname(os.path.abspath(__file__)) # Directory of the current script (app.py)
|
| 38 |
+
absolute_df_path = os.path.join(base_dir, df_path)
|
| 39 |
+
logger.info(f"Absolute path for CSV: {absolute_df_path}")
|
| 40 |
|
| 41 |
+
if not os.path.exists(absolute_df_path):
|
| 42 |
+
logger.error(f"Dataset file '{absolute_df_path}' not found.")
|
| 43 |
+
raise FileNotFoundError(f"Dataset file '{absolute_df_path}' not found. Ensure it's in the same directory as app.py.")
|
| 44 |
+
|
| 45 |
+
DF = pd.read_csv(absolute_df_path)
|
| 46 |
+
logger.info(f"Successfully loaded '{absolute_df_path}'. Original DataFrame shape: {DF.shape}")
|
| 47 |
logger.info(f"Original DataFrame columns: {DF.columns.tolist()}")
|
| 48 |
|
| 49 |
+
# Determine Crust Type Column
|
| 50 |
potential_crust_cols = ['Crust_Type', 'Cr_Type']
|
| 51 |
valid_crust_cols = [col for col in potential_crust_cols if col in DF.columns]
|
| 52 |
if valid_crust_cols:
|
| 53 |
+
valid_crust_cols.sort(key=lambda col: DF[col].isnull().sum()) # Prefer column with fewer NaNs
|
| 54 |
CRUST_TYPE_COL = valid_crust_cols[0]
|
| 55 |
logger.info(f"Using '{CRUST_TYPE_COL}' for crust type.")
|
| 56 |
if CRUST_TYPE_COL not in CATEGORICAL_FEATURES:
|
| 57 |
CATEGORICAL_FEATURES.append(CRUST_TYPE_COL)
|
| 58 |
+
# Remove other potential crust columns if they were in CATEGORICAL_FEATURES
|
| 59 |
for col in potential_crust_cols:
|
| 60 |
if col != CRUST_TYPE_COL and col in CATEGORICAL_FEATURES:
|
| 61 |
CATEGORICAL_FEATURES.remove(col)
|
| 62 |
else:
|
| 63 |
+
logger.warning("Crust type column (Crust_Type or Cr_Type) not found. Crust type will not be used.")
|
| 64 |
CRUST_TYPE_COL = None
|
| 65 |
|
| 66 |
+
# Fill NaN for text-based categorical columns and other text fields
|
| 67 |
+
text_cols_to_fill = list(set(CATEGORICAL_FEATURES + ['Toppings', 'Description', 'Allergens', 'Image_Url', 'Pizza_Name']))
|
| 68 |
+
for col in text_cols_to_fill:
|
| 69 |
+
if col and col in DF.columns: # Ensure col is not None (e.g. if CRUST_TYPE_COL is None)
|
| 70 |
DF[col] = DF[col].fillna('')
|
| 71 |
+
logger.info("Filled NaNs in text-based categorical columns with empty strings.")
|
| 72 |
|
| 73 |
+
# Fill NaN for numerical columns from the CSV
|
| 74 |
+
numerical_cols_in_df = ['Price_Rs', 'Slices', 'Rating', 'Rating_Count', 'Preparation_Time_min', 'Calories_per_Slice']
|
| 75 |
for col in numerical_cols_in_df:
|
| 76 |
if col in DF.columns:
|
| 77 |
if pd.api.types.is_numeric_dtype(DF[col]):
|
| 78 |
+
median_val = DF[col].median()
|
| 79 |
+
DF[col] = DF[col].fillna(median_val)
|
| 80 |
+
logger.info(f"Filled NaNs in numerical column '{col}' with its median ({median_val}).")
|
| 81 |
else:
|
| 82 |
+
# Attempt to convert to numeric, then fill with median or 0
|
| 83 |
+
numeric_series = pd.to_numeric(DF[col], errors='coerce')
|
| 84 |
+
median_val = 0
|
| 85 |
+
if not numeric_series.isnull().all():
|
| 86 |
+
median_val = numeric_series.median()
|
| 87 |
+
DF[col] = numeric_series.fillna(median_val)
|
| 88 |
+
logger.warning(f"Column '{col}' was not purely numeric. Converted to numeric, filled NaNs with median/0 ({median_val}).")
|
| 89 |
+
else:
|
| 90 |
+
logger.warning(f"Expected numerical column '{col}' not found in DataFrame. It will be missing from features if not handled.")
|
| 91 |
+
|
| 92 |
+
|
| 93 |
+
if 'Rating_Count' in DF.columns:
|
| 94 |
+
DF['Rating_Count'] = DF['Rating_Count'].fillna(0).astype(int)
|
| 95 |
+
|
| 96 |
+
# Process Toppings
|
| 97 |
+
if 'Toppings' in DF.columns:
|
| 98 |
+
DF['Toppings_list_internal'] = DF['Toppings'].astype(str).str.split(r';\s*') # Use raw string for regex
|
| 99 |
+
DF['Toppings_list_internal'] = DF['Toppings_list_internal'].apply(
|
| 100 |
+
lambda x: [t.strip() for t in x if isinstance(t, str) and t.strip()]) # Filter out empty strings after split
|
| 101 |
+
current_all_toppings = set()
|
| 102 |
+
for toppings_list in DF['Toppings_list_internal'].dropna():
|
| 103 |
+
current_all_toppings.update(t for t in toppings_list if t) # Ensure t is not empty
|
| 104 |
+
ALL_TOPPINGS = sorted(list(current_all_toppings))
|
| 105 |
+
logger.info(f"Found {len(ALL_TOPPINGS)} unique toppings. Example: {ALL_TOPPINGS[:5] if ALL_TOPPINGS else 'None'}")
|
| 106 |
+
else:
|
| 107 |
+
logger.warning("'Toppings' column not found. Topping features will be empty.")
|
| 108 |
+
DF['Toppings_list_internal'] = pd.Series([[] for _ in range(len(DF))]) # Empty list for all rows
|
| 109 |
+
ALL_TOPPINGS = []
|
| 110 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 111 |
|
| 112 |
+
# --- Feature Engineering ---
|
| 113 |
feature_data = {}
|
| 114 |
num_feature_map = {
|
| 115 |
'Price': 'Price_Rs', 'Slices': 'Slices', 'Rating': 'Rating',
|
|
|
|
| 119 |
if df_col in DF.columns:
|
| 120 |
feature_data[feature_col] = DF[df_col].copy()
|
| 121 |
else:
|
| 122 |
+
logger.warning(f"Numerical source column '{df_col}' for feature '{feature_col}' not found. Filling with zeros.")
|
| 123 |
+
feature_data[feature_col] = pd.Series([0.0] * len(DF)) # Ensure float for consistency
|
| 124 |
|
| 125 |
+
# Spice Level Feature (Numerical)
|
| 126 |
if 'Spice_Level' in DF.columns:
|
| 127 |
+
DF['Spice_Level'] = DF['Spice_Level'].fillna('Mild') # Default for NaNs
|
| 128 |
spice_map = {'Mild': 1, 'Medium': 2, 'Hot': 3}
|
| 129 |
+
feature_data['Spice_Level'] = DF['Spice_Level'].map(spice_map).fillna(1.0) # Ensure float
|
| 130 |
else:
|
| 131 |
+
logger.warning("'Spice_Level' column not found. Filling 'Spice_Level' feature with default (1.0).")
|
| 132 |
+
feature_data['Spice_Level'] = pd.Series([1.0] * len(DF)) # Default if column is missing
|
| 133 |
|
| 134 |
+
# One-Hot Encode Categorical Features
|
| 135 |
for feature_cat_col in CATEGORICAL_FEATURES:
|
| 136 |
+
if feature_cat_col and feature_cat_col in DF.columns: # Check if col_name is not None and exists
|
| 137 |
+
# Ensure the column is treated as string to avoid issues with mixed types in unique()
|
| 138 |
+
DF[feature_cat_col] = DF[feature_cat_col].astype(str)
|
| 139 |
for value in DF[feature_cat_col].unique():
|
| 140 |
+
if pd.notnull(value) and value.strip() != '': # Check for non-null and non-empty string values
|
| 141 |
feature_data[f"{feature_cat_col}_{value}"] = (DF[feature_cat_col] == value).astype(int)
|
| 142 |
+
elif feature_cat_col: # Log warning only if feature_cat_col was defined
|
| 143 |
+
logger.warning(f"Categorical source column '{feature_cat_col}' for one-hot encoding not found in DataFrame.")
|
| 144 |
|
| 145 |
+
# Topping Features (One-Hot Encoded)
|
| 146 |
for topping in ALL_TOPPINGS:
|
| 147 |
+
if topping: # Ensure topping string is not empty
|
| 148 |
feature_data[f"Topping_{topping}"] = DF['Toppings_list_internal'].apply(
|
| 149 |
lambda x: 1 if topping in x else 0
|
| 150 |
)
|
| 151 |
|
| 152 |
FEATURE_DF = pd.DataFrame(feature_data)
|
| 153 |
+
logger.info(f"FEATURE_DF created. Shape: {FEATURE_DF.shape}. Columns: {FEATURE_DF.columns.tolist()[:10]}...") # Log first 10 cols
|
| 154 |
+
|
| 155 |
+
# Ensure all NUMERICAL_COLS exist in FEATURE_DF and fill NaNs
|
| 156 |
for col in NUMERICAL_COLS:
|
| 157 |
+
if col not in FEATURE_DF.columns:
|
| 158 |
+
logger.warning(f"Numerical column '{col}' is missing from FEATURE_DF after construction. Adding as zeros.")
|
| 159 |
+
FEATURE_DF[col] = 0.0 # Ensure float
|
| 160 |
if FEATURE_DF[col].isnull().any():
|
| 161 |
+
mean_val = FEATURE_DF[col].mean()
|
| 162 |
+
fill_val = mean_val if pd.notna(mean_val) else 0.0
|
| 163 |
+
logger.info(f"Filling NaNs in numerical feature column '{col}' with {fill_val}.")
|
| 164 |
+
FEATURE_DF[col] = FEATURE_DF[col].fillna(fill_val)
|
| 165 |
+
|
| 166 |
+
# Scale Numerical Features
|
| 167 |
+
SCALER = MinMaxScaler() # Initialize scaler
|
| 168 |
+
if not FEATURE_DF.empty and all(col in FEATURE_DF.columns for col in NUMERICAL_COLS):
|
| 169 |
+
try:
|
| 170 |
+
FEATURE_DF[NUMERICAL_COLS] = SCALER.fit_transform(FEATURE_DF[NUMERICAL_COLS])
|
| 171 |
+
logger.info(f"Numerical columns ({NUMERICAL_COLS}) scaled. FEATURE_DF shape: {FEATURE_DF.shape}")
|
| 172 |
+
except Exception as e:
|
| 173 |
+
logger.error(f"Error during scaling of numerical columns: {e}. FEATURE_DF might be problematic.")
|
| 174 |
+
# Fallback: Keep numerical columns unscaled if scaling fails, or handle as needed
|
| 175 |
+
elif FEATURE_DF.empty:
|
| 176 |
+
logger.error("FEATURE_DF is empty before scaling. Scaling skipped. This will likely cause issues.")
|
| 177 |
+
else:
|
| 178 |
+
missing_cols = [col for col in NUMERICAL_COLS if col not in FEATURE_DF.columns]
|
| 179 |
+
logger.error(f"Not all numerical columns ({NUMERICAL_COLS}) found in FEATURE_DF for scaling. Missing: {missing_cols}. Scaling skipped.")
|
| 180 |
+
|
| 181 |
+
logger.info(f"Preprocessing done. DF is None: {DF is None}, FEATURE_DF is None: {FEATURE_DF is None}, SCALER is None: {SCALER is None}")
|
| 182 |
+
if FEATURE_DF is not None:
|
| 183 |
+
logger.info(f"Final FEATURE_DF shape: {FEATURE_DF.shape}")
|
| 184 |
+
if DF is not None:
|
| 185 |
+
logger.info(f"Final DF shape: {DF.shape}")
|
| 186 |
|
| 187 |
+
|
| 188 |
+
@app.route('/')
|
| 189 |
+
def index_route():
|
| 190 |
+
global DF, ALL_TOPPINGS, CATEGORICAL_FEATURES, CRUST_TYPE_COL, FEATURE_DF, DEFAULT_IMAGE_URL
|
| 191 |
+
# Critical check at the beginning of the route
|
| 192 |
+
if DF is None:
|
| 193 |
+
current_app.logger.error("DF is None when trying to serve '/'. Data preprocessing might have failed or not run.")
|
| 194 |
+
return "Error: Pizza data (DF) not loaded. Please check server logs.", 500
|
| 195 |
+
if FEATURE_DF is None: # Also check FEATURE_DF as it's derived
|
| 196 |
+
current_app.logger.error("FEATURE_DF is None when trying to serve '/'. Data preprocessing might have failed.")
|
| 197 |
+
return "Error: Pizza feature data (FEATURE_DF) not loaded. Please check server logs.", 500
|
| 198 |
+
|
| 199 |
+
filter_options = {}
|
| 200 |
+
# Ensure 'Spice_Level' is included for filter options if it exists in DF
|
| 201 |
+
cols_for_filters_set = set(cat_col for cat_col in CATEGORICAL_FEATURES if cat_col and cat_col in DF.columns) # Filter out None or non-existent
|
| 202 |
+
if 'Spice_Level' in DF.columns:
|
| 203 |
+
cols_for_filters_set.add('Spice_Level')
|
| 204 |
+
# CRUST_TYPE_COL is already in CATEGORICAL_FEATURES if found
|
| 205 |
+
|
| 206 |
+
for col_name in list(cols_for_filters_set):
|
| 207 |
+
# key_name for JS should be consistent (lowercase, no underscores)
|
| 208 |
+
key_name = col_name.lower().replace('_', '')
|
| 209 |
+
# No special handling for spicelevel or crusttype here, it's naturally handled by the line above.
|
| 210 |
+
|
| 211 |
+
unique_values = sorted([v for v in DF[col_name].astype(str).dropna().unique() if v.strip() != ''])
|
| 212 |
+
if unique_values: # Only add if there are actual values
|
| 213 |
+
filter_options[key_name] = unique_values
|
| 214 |
+
|
| 215 |
+
# Prepare default recommendations (e.g., top-rated)
|
| 216 |
+
# Make sure 'Rating' column exists
|
| 217 |
+
if 'Rating' in DF.columns:
|
| 218 |
+
default_recommendations_df = DF.sort_values('Rating', ascending=False).copy()
|
| 219 |
+
else:
|
| 220 |
+
logger.warning("'Rating' column not found in DF. Cannot sort for default recommendations. Using unsorted DF.")
|
| 221 |
+
default_recommendations_df = DF.copy() # Fallback to unsorted
|
| 222 |
+
|
| 223 |
+
default_recs_list = []
|
| 224 |
+
frontend_keys = [
|
| 225 |
+
'id', 'name', 'toppings', 'price', 'slices', 'serving_size', 'rating', 'rating_count',
|
| 226 |
+
'description', 'popular_group', 'dietary_category', 'spice_level', 'sauce_type',
|
| 227 |
+
'cheese_amount', 'calories', 'allergens', 'prep_time', 'restaurant', 'seasonal',
|
| 228 |
+
'bread_type', 'image_url', 'crust_type'
|
| 229 |
+
]
|
| 230 |
+
df_to_frontend_map = {
|
| 231 |
+
'id': None, 'name': 'Pizza_Name', 'toppings': 'Toppings', 'price': 'Price_Rs', 'slices': 'Slices',
|
| 232 |
+
'serving_size': 'Serving_Size', 'rating': 'Rating', 'rating_count': 'Rating_Count',
|
| 233 |
+
'description': 'Description', 'popular_group': 'Popular_Group',
|
| 234 |
+
'dietary_category': 'Dietary_Category', 'spice_level': 'Spice_Level',
|
| 235 |
+
'sauce_type': 'Sauce_Type', 'cheese_amount': 'Cheese_Amount',
|
| 236 |
+
'calories': 'Calories_per_Slice', 'allergens': 'Allergens',
|
| 237 |
+
'prep_time': 'Preparation_Time_min', 'restaurant': 'Restaurant_Chain',
|
| 238 |
+
'seasonal': 'Seasonal_Availability', 'bread_type': 'Bread_Type',
|
| 239 |
+
'image_url': 'Image_Url', 'crust_type': CRUST_TYPE_COL # Uses the determined CRUST_TYPE_COL
|
| 240 |
+
}
|
| 241 |
+
|
| 242 |
+
for original_idx, pizza_row in default_recommendations_df.iterrows():
|
| 243 |
+
rec_item = {}
|
| 244 |
+
for key in frontend_keys:
|
| 245 |
+
df_col = df_to_frontend_map.get(key)
|
| 246 |
+
if key == 'id':
|
| 247 |
+
rec_item[key] = int(original_idx) # Pizza ID is its original index in DF
|
| 248 |
+
elif df_col and df_col in pizza_row: # df_col can be None for 'id' or if CRUST_TYPE_COL is None
|
| 249 |
+
value = pizza_row[df_col]
|
| 250 |
+
# Type conversions for JSON serializability
|
| 251 |
+
if isinstance(value, np.integer): value = int(value)
|
| 252 |
+
elif isinstance(value, np.floating): value = float(value)
|
| 253 |
+
elif isinstance(value, np.ndarray): value = value.tolist()
|
| 254 |
+
rec_item[key] = "" if pd.isna(value) else value
|
| 255 |
+
elif key == 'crust_type' and not CRUST_TYPE_COL : # If CRUST_TYPE_COL was not found
|
| 256 |
+
rec_item[key] = "N/A"
|
| 257 |
+
else:
|
| 258 |
+
rec_item[key] = "" # Default for missing fields
|
| 259 |
+
|
| 260 |
+
rec_item['rating_count'] = int(rec_item.get('rating_count', 0) or 0) # Ensure int
|
| 261 |
+
rec_item['image_url'] = rec_item.get('image_url') if rec_item.get('image_url') else DEFAULT_IMAGE_URL
|
| 262 |
+
|
| 263 |
+
# Final pass to convert any remaining numpy generic types
|
| 264 |
+
for k_final, v_final in rec_item.items():
|
| 265 |
+
if isinstance(v_final, np.generic): rec_item[k_final] = v_final.item()
|
| 266 |
+
default_recs_list.append(rec_item)
|
| 267 |
+
|
| 268 |
+
current_app.logger.info(f"Serving {len(default_recs_list)} pizzas for initial display.")
|
| 269 |
+
current_app.logger.info(f"Filter options for template: {filter_options}")
|
| 270 |
+
current_app.logger.info(f"ALL_TOPPINGS for template: {ALL_TOPPINGS[:5] if ALL_TOPPINGS else 'None'}")
|
| 271 |
+
|
| 272 |
+
|
| 273 |
+
return render_template('index.html',
|
| 274 |
+
toppings=ALL_TOPPINGS,
|
| 275 |
+
filter_options=filter_options,
|
| 276 |
+
default_recommendations=default_recs_list,
|
| 277 |
+
default_image_url=DEFAULT_IMAGE_URL)
|
| 278 |
|
| 279 |
|
| 280 |
def get_recommendations(preferences):
|
| 281 |
global DF, FEATURE_DF, SCALER, CRUST_TYPE_COL, DEFAULT_IMAGE_URL
|
| 282 |
|
| 283 |
+
if DF is None or FEATURE_DF is None or SCALER is None:
|
| 284 |
+
current_app.logger.error("Data not fully initialized (DF, FEATURE_DF, or SCALER is None) for get_recommendations.")
|
| 285 |
return []
|
| 286 |
|
| 287 |
current_indices = DF.index.to_list()
|
| 288 |
current_app.logger.info(f"Starting with {len(current_indices)} pizzas before filtering. Preferences: {preferences}")
|
| 289 |
|
| 290 |
+
# --- Hard Filters ---
|
| 291 |
+
# 1. Toppings
|
| 292 |
+
if 'toppings' in preferences and preferences['toppings'] and 'Toppings_list_internal' in DF.columns:
|
| 293 |
selected_toppings = set(preferences['toppings'])
|
| 294 |
+
if selected_toppings: # Ensure not an empty list that would select nothing
|
| 295 |
topping_mask = DF.loc[current_indices, 'Toppings_list_internal'].apply(
|
| 296 |
+
lambda x_toppings: isinstance(x_toppings, list) and any(t in selected_toppings for t in x_toppings)
|
| 297 |
+
)
|
| 298 |
current_indices = DF.loc[current_indices][topping_mask].index.to_list()
|
| 299 |
current_app.logger.info(f"After toppings filter: {len(current_indices)} pizzas remaining")
|
| 300 |
if not current_indices: return []
|
| 301 |
|
| 302 |
# 2. Max Price
|
| 303 |
if 'price_range' in preferences and preferences['price_range'] and 'Price_Rs' in DF.columns:
|
| 304 |
+
try:
|
| 305 |
+
min_price = float(preferences['price_range'][0])
|
| 306 |
+
max_price = float(preferences['price_range'][1])
|
| 307 |
+
price_mask = (DF.loc[current_indices, 'Price_Rs'] >= min_price) & \
|
| 308 |
+
(DF.loc[current_indices, 'Price_Rs'] <= max_price)
|
| 309 |
+
current_indices = DF.loc[current_indices][price_mask].index.to_list()
|
| 310 |
+
current_app.logger.info(f"After price filter ({min_price}-{max_price}): {len(current_indices)} pizzas")
|
| 311 |
+
if not current_indices: return []
|
| 312 |
+
except (TypeError, ValueError, IndexError) as e:
|
| 313 |
+
current_app.logger.warning(f"Invalid price_range preference: {preferences['price_range']}. Error: {e}")
|
| 314 |
+
|
| 315 |
+
|
| 316 |
+
# 3. Number of Slices (Min Slices)
|
| 317 |
if 'slices' in preferences and preferences['slices'] is not None and 'Slices' in DF.columns:
|
| 318 |
try:
|
| 319 |
min_slices = int(preferences['slices'])
|
| 320 |
slices_mask = DF.loc[current_indices, 'Slices'] >= min_slices
|
| 321 |
current_indices = DF.loc[current_indices][slices_mask].index.to_list()
|
| 322 |
+
current_app.logger.info(f"After slices filter (>= {min_slices}): {len(current_indices)} pizzas")
|
| 323 |
if not current_indices: return []
|
| 324 |
except ValueError:
|
| 325 |
current_app.logger.warning(f"Invalid value for slices: {preferences['slices']}")
|
| 326 |
|
| 327 |
+
# 4. Minimum Rating
|
| 328 |
if 'rating' in preferences and preferences['rating'] is not None and 'Rating' in DF.columns:
|
| 329 |
try:
|
| 330 |
min_rating = float(preferences['rating'])
|
| 331 |
rating_mask = DF.loc[current_indices, 'Rating'] >= min_rating
|
| 332 |
current_indices = DF.loc[current_indices][rating_mask].index.to_list()
|
| 333 |
+
current_app.logger.info(f"After rating filter (>= {min_rating}): {len(current_indices)} pizzas")
|
| 334 |
if not current_indices: return []
|
| 335 |
except ValueError:
|
| 336 |
current_app.logger.warning(f"Invalid value for rating: {preferences['rating']}")
|
| 337 |
|
| 338 |
+
# 5. Max Preparation Time
|
| 339 |
+
if 'prep_time' in preferences and preferences['prep_time'] is not None and 'Preparation_Time_min' in DF.columns:
|
|
|
|
| 340 |
try:
|
| 341 |
+
max_prep_time = int(str(preferences['prep_time']).lower().replace("min", "").strip())
|
|
|
|
| 342 |
prep_mask = DF.loc[current_indices, 'Preparation_Time_min'] <= max_prep_time
|
| 343 |
current_indices = DF.loc[current_indices][prep_mask].index.to_list()
|
| 344 |
+
current_app.logger.info(f"After prep time filter (<= {max_prep_time}): {len(current_indices)} pizzas")
|
|
|
|
| 345 |
if not current_indices: return []
|
| 346 |
except ValueError:
|
| 347 |
+
current_app.logger.warning(f"Could not parse prep_time value: {preferences['prep_time']}")
|
| 348 |
|
| 349 |
+
# 6. Categorical Filters (Multi-select OR logic)
|
| 350 |
+
# JS keys: servingsize, populargroup, dietarycategory, spicelevel, saucetype, etc.
|
| 351 |
categorical_pref_map = {
|
| 352 |
"servingsize": "Serving_Size", "populargroup": "Popular_Group",
|
| 353 |
"dietarycategory": "Dietary_Category", "spicelevel": "Spice_Level",
|
|
|
|
| 355 |
"restaurantchain": "Restaurant_Chain", "seasonalavailability": "Seasonal_Availability",
|
| 356 |
"breadtype": "Bread_Type", "crusttype": CRUST_TYPE_COL
|
| 357 |
}
|
|
|
|
| 358 |
for pref_key, df_col_name in categorical_pref_map.items():
|
| 359 |
+
if df_col_name and pref_key in preferences and preferences[pref_key]: # Ensure df_col_name is not None
|
| 360 |
+
pref_value_list = preferences[pref_key] # Expected to be a list from JS
|
| 361 |
+
if isinstance(pref_value_list, list) and pref_value_list: # If list is not empty
|
| 362 |
+
if df_col_name in DF.columns:
|
| 363 |
+
cat_mask = DF.loc[current_indices, df_col_name].isin(pref_value_list)
|
| 364 |
+
current_indices = DF.loc[current_indices][cat_mask].index.to_list()
|
| 365 |
+
current_app.logger.info(f"After {pref_key} filter (isin {pref_value_list}): {len(current_indices)} pizzas")
|
| 366 |
+
if not current_indices: return []
|
| 367 |
+
else:
|
| 368 |
+
current_app.logger.warning(f"Column '{df_col_name}' for preference '{pref_key}' not found in DF. Filter skipped.")
|
| 369 |
+
# If pref_value_list is empty, it means "Any" for this category, so no filtering.
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 370 |
|
| 371 |
if not current_indices:
|
| 372 |
+
current_app.logger.info("No pizzas match all hard filter criteria.")
|
| 373 |
return []
|
| 374 |
|
| 375 |
# --- Similarity Scoring Part ---
|
| 376 |
+
# Filter FEATURE_DF to only include pizzas remaining after hard filters
|
| 377 |
valid_indices_for_feature_df = FEATURE_DF.index.intersection(current_indices)
|
| 378 |
if valid_indices_for_feature_df.empty:
|
| 379 |
+
current_app.logger.info("No valid indices remain for FEATURE_DF after hard filters.")
|
| 380 |
return []
|
| 381 |
|
| 382 |
filtered_feature_df = FEATURE_DF.loc[valid_indices_for_feature_df]
|
| 383 |
+
if filtered_feature_df.empty: # Should not happen if valid_indices_for_feature_df is not empty
|
| 384 |
+
current_app.logger.warning("Filtered FEATURE_DF is empty. This is unexpected.")
|
| 385 |
return []
|
| 386 |
|
| 387 |
+
# Create User Preference Vector (aligned with FEATURE_DF columns)
|
| 388 |
+
user_vector = pd.Series(0.0, index=FEATURE_DF.columns) # Initialize with 0.0 for float consistency
|
| 389 |
|
| 390 |
+
# 1. Toppings in User Vector
|
| 391 |
if 'toppings' in preferences and preferences['toppings']:
|
| 392 |
for topping in preferences['toppings']:
|
| 393 |
col_name = f"Topping_{topping}"
|
| 394 |
if col_name in user_vector.index:
|
| 395 |
user_vector[col_name] = 1.0
|
| 396 |
|
| 397 |
+
# 2. Categorical Preferences (One-Hot) in User Vector
|
| 398 |
+
# js_to_df_key_map_for_vector is same as categorical_pref_map but df_col_name is for one-hot prefix
|
| 399 |
+
for pref_key, df_col_prefix in categorical_pref_map.items():
|
| 400 |
+
if df_col_prefix and pref_key in preferences and preferences[pref_key]: # df_col_prefix can be None for CRUST_TYPE_COL
|
| 401 |
+
selected_values = preferences[pref_key] # This is a list
|
| 402 |
+
for val_item in selected_values:
|
| 403 |
+
# Construct the one-hot encoded column name (e.g., "Spice_Level_Mild")
|
| 404 |
+
one_hot_col_name = f"{df_col_prefix}_{val_item}"
|
| 405 |
+
if one_hot_col_name in user_vector.index:
|
| 406 |
+
user_vector[one_hot_col_name] = 1.0
|
| 407 |
+
|
| 408 |
+
# 3. Numerical Preferences in User Vector
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 409 |
raw_user_num_prefs_dict = {}
|
| 410 |
+
spice_map_for_num_pref = {'Mild': 1.0, 'Medium': 2.0, 'Hot': 3.0} # Use floats
|
| 411 |
|
| 412 |
if 'price_range' in preferences and preferences['price_range']:
|
| 413 |
+
try: # Average of min/max price for preference
|
| 414 |
+
raw_user_num_prefs_dict['Price'] = (float(preferences['price_range'][0]) + float(preferences['price_range'][1])) / 2
|
| 415 |
+
except: pass # Ignore if parsing fails
|
| 416 |
if 'slices' in preferences and preferences['slices'] is not None:
|
| 417 |
+
try: raw_user_num_prefs_dict['Slices'] = float(preferences['slices'])
|
| 418 |
+
except: pass
|
| 419 |
if 'rating' in preferences and preferences['rating'] is not None:
|
| 420 |
+
try: raw_user_num_prefs_dict['Rating'] = float(preferences['rating'])
|
| 421 |
+
except: pass
|
| 422 |
+
if 'prep_time' in preferences and preferences['prep_time'] is not None:
|
| 423 |
+
try: raw_user_num_prefs_dict['Preparation_Time'] = float(str(preferences['prep_time']).lower().replace("min","").strip())
|
| 424 |
+
except: pass
|
| 425 |
+
# Numerical Spice_Level: Only if *one* spice level is selected, use its mapped value.
|
| 426 |
+
# Otherwise, rely on the one-hot encoded spice level features.
|
| 427 |
+
if 'spicelevel' in preferences and isinstance(preferences['spicelevel'], list) and len(preferences['spicelevel']) == 1:
|
| 428 |
+
selected_spice = preferences['spicelevel'][0]
|
| 429 |
+
if selected_spice in spice_map_for_num_pref:
|
| 430 |
+
raw_user_num_prefs_dict['Spice_Level'] = spice_map_for_num_pref[selected_spice]
|
| 431 |
+
|
| 432 |
+
# Scale these raw numerical preferences using the SCALER
|
| 433 |
+
# Create a temporary DataFrame for scaling, ensuring all NUMERICAL_COLS are present
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 434 |
temp_scaling_df = pd.DataFrame(columns=NUMERICAL_COLS, index=[0])
|
| 435 |
for col in NUMERICAL_COLS:
|
| 436 |
+
# Default to the column's mean from FEATURE_DF if user didn't specify,
|
| 437 |
+
# or 0 if that's also not available (shouldn't happen if SCALER is fit)
|
| 438 |
+
# SCALER.data_min_ / SCALER.data_max_ or SCALER.mean_ could be used if available
|
| 439 |
+
default_val = 0.0
|
| 440 |
+
if hasattr(SCALER, 'data_min_') and col in FEATURE_DF.columns: # Check if scaler is fit and col exists
|
| 441 |
+
# Use the minimum of the scaled range as a neutral default if user didn't specify
|
| 442 |
+
col_idx_in_scaler = -1
|
| 443 |
+
try: col_idx_in_scaler = NUMERICAL_COLS.index(col)
|
| 444 |
+
except ValueError: pass
|
| 445 |
+
|
| 446 |
+
if col_idx_in_scaler != -1 and col_idx_in_scaler < len(SCALER.data_min_):
|
| 447 |
+
default_val = SCALER.data_min_[col_idx_in_scaler] # This is the original min, not scaled min (0)
|
| 448 |
+
else: # Fallback if col not in NUMERICAL_COLS used for SCALER fitting
|
| 449 |
+
logger.warning(f"Column {col} not found in SCALER's fitted columns during user vector creation. Defaulting to 0.")
|
| 450 |
+
|
| 451 |
+
temp_scaling_df.loc[0, col] = raw_user_num_prefs_dict.get(col, default_val)
|
| 452 |
+
|
| 453 |
+
|
| 454 |
+
if hasattr(SCALER, 'n_features_in_') : # Check if scaler has been fit
|
| 455 |
+
scaled_user_num_values = SCALER.transform(temp_scaling_df[NUMERICAL_COLS])[0]
|
| 456 |
+
for i, col_name in enumerate(NUMERICAL_COLS):
|
| 457 |
+
if col_name in raw_user_num_prefs_dict: # Only update user_vector if user specified this preference
|
| 458 |
+
user_vector[col_name] = scaled_user_num_values[i]
|
| 459 |
+
else:
|
| 460 |
+
logger.warning("SCALER is not fit. Cannot scale user's numerical preferences. Using raw values (0-1 range assumed).")
|
| 461 |
+
for col_name in NUMERICAL_COLS:
|
| 462 |
+
if col_name in raw_user_num_prefs_dict:
|
| 463 |
+
# Attempt a rough normalization if scaler is not fit, assuming values are in a reasonable range
|
| 464 |
+
# This is a fallback and might not be accurate.
|
| 465 |
+
user_vector[col_name] = raw_user_num_prefs_dict[col_name] / 100.0 # Example, needs domain knowledge
|
| 466 |
|
|
|
|
|
|
|
|
|
|
|
|
|
| 467 |
|
| 468 |
+
# Calculate Cosine Similarities
|
| 469 |
feature_matrix_filtered = filtered_feature_df.values
|
| 470 |
user_array = user_vector.values.reshape(1, -1)
|
| 471 |
|
| 472 |
+
# Ensure shapes match if FEATURE_DF columns changed dynamically (should not happen with current setup)
|
| 473 |
if user_array.shape[1] != feature_matrix_filtered.shape[1]:
|
| 474 |
current_app.logger.error(
|
| 475 |
+
f"Shape mismatch! User vector: {user_array.shape}, Feature matrix: {feature_matrix_filtered.shape}. "
|
| 476 |
+
f"User cols: {user_vector.index.tolist()[:5]}, Feature cols: {filtered_feature_df.columns.tolist()[:5]}"
|
| 477 |
+
)
|
| 478 |
+
# Attempt to align columns as a robust measure, though this indicates a deeper issue if it occurs.
|
| 479 |
+
common_cols = filtered_feature_df.columns.intersection(user_vector.index)
|
| 480 |
+
aligned_user_vector = pd.Series(0.0, index=filtered_feature_df.columns)
|
| 481 |
+
aligned_user_vector[common_cols] = user_vector[common_cols]
|
| 482 |
user_array = aligned_user_vector.values.reshape(1, -1)
|
| 483 |
+
|
| 484 |
if user_array.shape[1] != feature_matrix_filtered.shape[1]:
|
| 485 |
+
current_app.logger.critical(f"Persistent shape mismatch even after alignment. Cannot compute similarity.")
|
|
|
|
| 486 |
return []
|
| 487 |
|
| 488 |
+
|
| 489 |
similarities = cosine_similarity(user_array, feature_matrix_filtered)[0]
|
| 490 |
+
# Get indices sorted by similarity (descending) from the filtered_feature_df
|
| 491 |
sorted_indices_in_filtered_df = similarities.argsort()[::-1]
|
| 492 |
+
# Map these sorted indices back to original DF indices
|
| 493 |
final_recommendation_indices = valid_indices_for_feature_df[sorted_indices_in_filtered_df]
|
| 494 |
|
| 495 |
+
# Prepare list of recommendations
|
| 496 |
recommendations_list = []
|
| 497 |
+
# frontend_keys and df_to_frontend_map are defined in index_route, can be reused or redefined here
|
| 498 |
+
# For safety, redefine here or pass as argument if refactoring
|
| 499 |
+
frontend_keys_rec = [
|
| 500 |
'id', 'name', 'toppings', 'price', 'slices', 'serving_size', 'rating', 'rating_count',
|
| 501 |
'description', 'popular_group', 'dietary_category', 'spice_level', 'sauce_type',
|
| 502 |
'cheese_amount', 'calories', 'allergens', 'prep_time', 'restaurant', 'seasonal',
|
| 503 |
'bread_type', 'image_url', 'crust_type'
|
| 504 |
]
|
| 505 |
+
df_to_frontend_map_rec = {
|
| 506 |
'id': None, 'name': 'Pizza_Name', 'toppings': 'Toppings', 'price': 'Price_Rs', 'slices': 'Slices',
|
| 507 |
'serving_size': 'Serving_Size', 'rating': 'Rating', 'rating_count': 'Rating_Count',
|
| 508 |
'description': 'Description', 'popular_group': 'Popular_Group',
|
|
|
|
| 517 |
for original_idx in final_recommendation_indices:
|
| 518 |
pizza_series = DF.iloc[original_idx]
|
| 519 |
rec_item = {}
|
| 520 |
+
for key in frontend_keys_rec:
|
| 521 |
+
df_col = df_to_frontend_map_rec.get(key)
|
| 522 |
if key == 'id':
|
| 523 |
rec_item[key] = int(original_idx)
|
| 524 |
elif df_col and df_col in pizza_series:
|
| 525 |
value = pizza_series[df_col]
|
| 526 |
+
if isinstance(value, np.integer): value = int(value)
|
| 527 |
+
elif isinstance(value, np.floating): value = float(value)
|
| 528 |
+
elif isinstance(value, np.ndarray): value = value.tolist()
|
|
|
|
|
|
|
|
|
|
| 529 |
rec_item[key] = "" if pd.isna(value) else value
|
| 530 |
+
elif key == 'crust_type' and not CRUST_TYPE_COL :
|
| 531 |
+
rec_item[key] = "N/A"
|
| 532 |
else:
|
| 533 |
rec_item[key] = ""
|
| 534 |
|
| 535 |
rec_item['rating_count'] = int(rec_item.get('rating_count', 0) or 0)
|
| 536 |
rec_item['image_url'] = rec_item.get('image_url') if rec_item.get('image_url') else DEFAULT_IMAGE_URL
|
| 537 |
+
for k_final, v_final in rec_item.items(): # Final numpy type check
|
|
|
|
| 538 |
if isinstance(v_final, np.generic): rec_item[k_final] = v_final.item()
|
|
|
|
| 539 |
recommendations_list.append(rec_item)
|
| 540 |
|
| 541 |
+
current_app.logger.info(f"Final recommendations count: {len(recommendations_list)}")
|
| 542 |
return recommendations_list
|
| 543 |
|
| 544 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 545 |
@app.route('/recommend', methods=['POST'])
|
| 546 |
def recommend():
|
| 547 |
try:
|
| 548 |
data = request.json
|
| 549 |
+
preferences = {} # Store processed preferences
|
| 550 |
current_app.logger.info(f"Received recommendation request with data: {data}")
|
| 551 |
|
| 552 |
+
# Numerical/Range preferences from JS
|
| 553 |
+
# Keys in `data` should match JS: 'slices', 'rating', 'prep_time', 'price_range'
|
| 554 |
+
simple_numerical_prefs_js = ['slices', 'rating', 'prep_time']
|
| 555 |
+
for key_js in simple_numerical_prefs_js:
|
| 556 |
+
if key_js in data and data[key_js] is not None:
|
|
|
|
|
|
|
| 557 |
try:
|
| 558 |
+
if key_js == 'rating': preferences[key_js] = float(data[key_js])
|
| 559 |
+
else: preferences[key_js] = int(data[key_js]) # slices, prep_time
|
|
|
|
|
|
|
| 560 |
except ValueError:
|
| 561 |
+
current_app.logger.warning(f"Could not parse numerical preference '{key_js}': {data[key_js]}")
|
| 562 |
+
|
| 563 |
if 'price_range' in data and data['price_range']:
|
| 564 |
try:
|
| 565 |
preferences['price_range'] = [float(p) for p in data['price_range']]
|
| 566 |
except (ValueError, TypeError):
|
| 567 |
+
current_app.logger.warning(f"Could not parse price_range: {data['price_range']}")
|
| 568 |
|
| 569 |
+
# Multi-select categorical preferences from JS
|
| 570 |
+
# Keys in `data` should match JS: 'toppings', 'servingsize', 'dietarycategory', etc.
|
| 571 |
+
multi_select_prefs_js = [
|
| 572 |
'toppings', 'servingsize', 'populargroup', 'dietarycategory',
|
| 573 |
'spicelevel', 'saucetype', 'cheeseamount', 'restaurantchain',
|
| 574 |
'seasonalavailability', 'breadtype', 'crusttype'
|
| 575 |
]
|
| 576 |
+
for key_js in multi_select_prefs_js:
|
| 577 |
+
if key_js in data and isinstance(data[key_js], list):
|
| 578 |
+
preferences[key_js] = data[key_js] # Expecting a list (can be empty for "Any")
|
| 579 |
+
elif key_js in data: # If not a list, log warning
|
| 580 |
+
current_app.logger.warning(f"Preference for '{key_js}' was not a list: {data[key_js]}. Treating as empty (Any).")
|
| 581 |
+
preferences[key_js] = [] # Default to empty list if not a list
|
|
|
|
|
|
|
|
|
|
|
|
|
| 582 |
|
| 583 |
current_app.logger.info(f"Processed preferences for filtering: {preferences}")
|
| 584 |
recommendations = get_recommendations(preferences)
|
| 585 |
current_app.logger.info(f"Returning {len(recommendations)} recommendations after filtering and scoring.")
|
| 586 |
return jsonify(recommendations)
|
| 587 |
+
|
| 588 |
except Exception as e:
|
| 589 |
+
current_app.logger.error(f"Error in /recommend endpoint: {e}", exc_info=True)
|
| 590 |
return jsonify({"error": "Failed to get recommendations due to a server issue.", "details": str(e)}), 500
|
| 591 |
|
| 592 |
|
| 593 |
+
# --- Main Application Execution ---
|
| 594 |
+
# Call preprocess_data() at the module level.
|
| 595 |
+
# This ensures it runs once when the application (or each Gunicorn worker) starts.
|
| 596 |
+
try:
|
| 597 |
+
logger.info("----- Starting data preprocessing at module load... -----")
|
| 598 |
+
preprocess_data() # Use default 'pizza.csv'
|
| 599 |
+
logger.info("----- Data preprocessing completed successfully at module load. -----")
|
| 600 |
+
if DF is None:
|
| 601 |
+
logger.critical("CRITICAL AT STARTUP: Global DF is None after preprocess_data(). App will likely fail.")
|
| 602 |
+
if FEATURE_DF is None:
|
| 603 |
+
logger.critical("CRITICAL AT STARTUP: Global FEATURE_DF is None after preprocess_data(). App will likely fail.")
|
| 604 |
+
if SCALER is None: # SCALER should be initialized even if fitting fails
|
| 605 |
+
logger.critical("CRITICAL AT STARTUP: Global SCALER is None after preprocess_data(). App will likely fail.")
|
| 606 |
+
|
| 607 |
+
except FileNotFoundError as e:
|
| 608 |
+
logger.critical(f"CRITICAL ERROR AT MODULE LOAD (FileNotFoundError): {e}. Ensure 'pizza.csv' is in the /app directory (or same dir as app.py).")
|
| 609 |
+
# In a production Gunicorn setup, the app might still try to start, leading to errors in routes.
|
| 610 |
+
# For Hugging Face, it's better to log and let it attempt to run, as exiting might obscure logs.
|
| 611 |
+
except Exception as e:
|
| 612 |
+
logger.critical(f"Unexpected critical startup error during preprocessing at module load: {e}", exc_info=True)
|
| 613 |
+
|
| 614 |
+
|
| 615 |
if __name__ == '__main__':
|
| 616 |
+
# This block is primarily for local development using `python app.py`.
|
| 617 |
+
# preprocess_data() is already called above when the module is imported by Python interpreter.
|
| 618 |
+
logger.info("----- Running Flask app directly (e.g., python app.py) -----")
|
| 619 |
+
# Sanity check for local run, though globals should be set by the module-level call.
|
| 620 |
+
if DF is None or FEATURE_DF is None or SCALER is None:
|
| 621 |
+
logger.warning("One or more global data variables (DF, FEATURE_DF, SCALER) are None before local app.run(). This is unexpected if module-level preprocessing ran.")
|
| 622 |
+
# Optionally, re-run preprocessing if critical for local dev and something went wrong with module-level load
|
| 623 |
+
# logger.info("Attempting to re-run preprocess_data() for local development.")
|
| 624 |
+
# preprocess_data()
|
| 625 |
+
|
| 626 |
+
app.run(debug=True, host='0.0.0.0', port=7860, use_reloader=False)
|
| 627 |
+
# use_reloader=False is generally better when you have global state initialized at module level.
|
| 628 |
+
# If True, it might re-initialize globals on each reload, which can be slow.
|