Spaces:

rajkhanke
/

PIZZA_RECOMMENDATION_SYSTEM

Sleeping

App Files Files Community

rajkhanke commited on May 8, 2025

Commit

662c070

verified ·

1 Parent(s): 98c9259

Update app.py

Browse files

Files changed (1) hide show

app.py +394 -311

app.py CHANGED Viewed

@@ -6,15 +6,18 @@ from sklearn.metrics.pairwise import cosine_similarity
 import os
 import logging
 logging.basicConfig(level=logging.INFO, format='%(asctime)s %(levelname)s: %(message)s [in %(pathname)s:%(lineno)d]')
 logger = logging.getLogger(__name__)
 app = Flask(__name__)
 DF = None
 ALL_TOPPINGS = []
 FEATURE_DF = None
-SCALER = None
 NUMERICAL_COLS = ['Price', 'Slices', 'Rating', 'Spice_Level', 'Preparation_Time', 'Calories']
 CATEGORICAL_FEATURES = [
     'Serving_Size', 'Popular_Group', 'Dietary_Category',
@@ -27,58 +30,86 @@ DEFAULT_IMAGE_URL = 'https://images.dominos.co.in/new_margherita_2502.jpg'
 def preprocess_data(df_path='pizza.csv'):
     global DF, ALL_TOPPINGS, FEATURE_DF, SCALER, CATEGORICAL_FEATURES, CRUST_TYPE_COL
-    if not os.path.exists(df_path):
-        logger.error(f"Dataset file '{df_path}' not found.")
-        raise FileNotFoundError(f"Dataset file '{df_path}' not found.")
-    DF = pd.read_csv(df_path)
     logger.info(f"Original DataFrame columns: {DF.columns.tolist()}")
     potential_crust_cols = ['Crust_Type', 'Cr_Type']
     valid_crust_cols = [col for col in potential_crust_cols if col in DF.columns]
     if valid_crust_cols:
-        valid_crust_cols.sort(key=lambda col: DF[col].isnull().sum())
         CRUST_TYPE_COL = valid_crust_cols[0]
         logger.info(f"Using '{CRUST_TYPE_COL}' for crust type.")
         if CRUST_TYPE_COL not in CATEGORICAL_FEATURES:
             CATEGORICAL_FEATURES.append(CRUST_TYPE_COL)
         for col in potential_crust_cols:
             if col != CRUST_TYPE_COL and col in CATEGORICAL_FEATURES:
                 CATEGORICAL_FEATURES.remove(col)
     else:
-        logger.warning("Crust type column not found. Crust type will not be used.")
         CRUST_TYPE_COL = None
-    text_categorical_cols = list(
-        set(CATEGORICAL_FEATURES + ['Toppings', 'Description', 'Allergens', 'Image_Url', 'Pizza_Name']))
-    for col in text_categorical_cols:
-        if col in DF.columns:
             DF[col] = DF[col].fillna('')
-    numerical_cols_in_df = ['Price_Rs', 'Slices', 'Rating', 'Rating_Count', 'Preparation_Time_min',
-                            'Calories_per_Slice']
     for col in numerical_cols_in_df:
         if col in DF.columns:
             if pd.api.types.is_numeric_dtype(DF[col]):
-                DF[col] = DF[col].fillna(DF[col].median())
             else:
-                DF[col] = pd.to_numeric(DF[col], errors='coerce').fillna(
-                    DF[col].median() if pd.api.types.is_numeric_dtype(DF[col]) else 0)
-    if 'Rating_Count' in DF.columns: DF['Rating_Count'] = DF['Rating_Count'].fillna(0).astype(int)
-    DF['Toppings_list_internal'] = DF['Toppings'].astype(str).str.split(
-        ';\\s*')
-    DF['Toppings_list_internal'] = DF['Toppings_list_internal'].apply(
-        lambda x: [t.strip() for t in x if isinstance(t, str) and t.strip()])
-    current_all_toppings = set()
-    for toppings_list in DF['Toppings_list_internal'].dropna():
-        current_all_toppings.update(t for t in toppings_list if t)
-    ALL_TOPPINGS = sorted(list(current_all_toppings))
-    logger.info(f"Found {len(ALL_TOPPINGS)} unique toppings. Example: {ALL_TOPPINGS[:5]}")
     feature_data = {}
     num_feature_map = {
         'Price': 'Price_Rs', 'Slices': 'Slices', 'Rating': 'Rating',
@@ -88,107 +119,235 @@ def preprocess_data(df_path='pizza.csv'):
         if df_col in DF.columns:
             feature_data[feature_col] = DF[df_col].copy()
         else:
-            feature_data[feature_col] = pd.Series([0.0] * len(DF))
     if 'Spice_Level' in DF.columns:
-        DF['Spice_Level'] = DF['Spice_Level'].fillna('Mild')
         spice_map = {'Mild': 1, 'Medium': 2, 'Hot': 3}
-        feature_data['Spice_Level'] = DF['Spice_Level'].map(spice_map).fillna(1.0)
     else:
-        feature_data['Spice_Level'] = pd.Series([1.0] * len(DF))
     for feature_cat_col in CATEGORICAL_FEATURES:
-        if feature_cat_col in DF.columns:
             for value in DF[feature_cat_col].unique():
-                if pd.notnull(value) and value != '':
                     feature_data[f"{feature_cat_col}_{value}"] = (DF[feature_cat_col] == value).astype(int)
     for topping in ALL_TOPPINGS:
-        if topping:
             feature_data[f"Topping_{topping}"] = DF['Toppings_list_internal'].apply(
                 lambda x: 1 if topping in x else 0
             )
     FEATURE_DF = pd.DataFrame(feature_data)
     for col in NUMERICAL_COLS:
-        if col not in FEATURE_DF.columns: FEATURE_DF[col] = 0.0
         if FEATURE_DF[col].isnull().any():
-            FEATURE_DF[col] = FEATURE_DF[col].fillna(
-                FEATURE_DF[col].mean() if pd.notna(FEATURE_DF[col].mean()) else 0.0)
-    SCALER = MinMaxScaler()
-    FEATURE_DF[NUMERICAL_COLS] = SCALER.fit_transform(FEATURE_DF[NUMERICAL_COLS])
-    logger.info(f"Preproc done. FEATURE_DF shape: {FEATURE_DF.shape}")
 def get_recommendations(preferences):
     global DF, FEATURE_DF, SCALER, CRUST_TYPE_COL, DEFAULT_IMAGE_URL
-    if FEATURE_DF is None or SCALER is None or DF is None:
-        current_app.logger.error("Data not fully initialized for get_recommendations.")
         return []
     current_indices = DF.index.to_list()
     current_app.logger.info(f"Starting with {len(current_indices)} pizzas before filtering. Preferences: {preferences}")
-    # 1. Toppings (OR logic if multiple selected)
-    if 'toppings' in preferences and preferences['toppings']:
         selected_toppings = set(preferences['toppings'])
-        if selected_toppings:  # Ensure it's not an empty list
             topping_mask = DF.loc[current_indices, 'Toppings_list_internal'].apply(
-                lambda x: any(t in selected_toppings for t in x))
             current_indices = DF.loc[current_indices][topping_mask].index.to_list()
             current_app.logger.info(f"After toppings filter: {len(current_indices)} pizzas remaining")
             if not current_indices: return []
     # 2. Max Price
     if 'price_range' in preferences and preferences['price_range'] and 'Price_Rs' in DF.columns:
-        min_price = float(preferences['price_range'][0])
-        max_price = float(preferences['price_range'][1])
-        price_mask = (DF.loc[current_indices, 'Price_Rs'] >= min_price) & \
-                     (DF.loc[current_indices, 'Price_Rs'] <= max_price)
-        current_indices = DF.loc[current_indices][price_mask].index.to_list()
-        current_app.logger.info(
-            f"After price filter ({min_price}-{max_price}): {len(current_indices)} pizzas remaining")
-        if not current_indices: return []
-    # 3. Number of Slices (>= selected)
     if 'slices' in preferences and preferences['slices'] is not None and 'Slices' in DF.columns:
         try:
             min_slices = int(preferences['slices'])
             slices_mask = DF.loc[current_indices, 'Slices'] >= min_slices
             current_indices = DF.loc[current_indices][slices_mask].index.to_list()
-            current_app.logger.info(f"After slices filter (>= {min_slices}): {len(current_indices)} pizzas remaining")
             if not current_indices: return []
         except ValueError:
             current_app.logger.warning(f"Invalid value for slices: {preferences['slices']}")
-    # 4. Minimum Rating (>= selected)
     if 'rating' in preferences and preferences['rating'] is not None and 'Rating' in DF.columns:
         try:
             min_rating = float(preferences['rating'])
             rating_mask = DF.loc[current_indices, 'Rating'] >= min_rating
             current_indices = DF.loc[current_indices][rating_mask].index.to_list()
-            current_app.logger.info(f"After rating filter (>= {min_rating}): {len(current_indices)} pizzas remaining")
             if not current_indices: return []
         except ValueError:
             current_app.logger.warning(f"Invalid value for rating: {preferences['rating']}")
-    # 5. Max Preparation Time (<= selected)
-    if 'prep_time' in preferences and preferences[
-        'prep_time'] is not None and 'Preparation_Time_min' in DF.columns:  # Changed 'preptime' to 'prep_time' to match JS
         try:
-            prep_time_str = str(preferences['prep_time']).lower().replace("min", "").strip()
-            max_prep_time = int(prep_time_str)
             prep_mask = DF.loc[current_indices, 'Preparation_Time_min'] <= max_prep_time
             current_indices = DF.loc[current_indices][prep_mask].index.to_list()
-            current_app.logger.info(
-                f"After prep time filter (<= {max_prep_time}): {len(current_indices)} pizzas remaining")
             if not current_indices: return []
         except ValueError:
-            current_app.logger.warning(f"Could not parse preptime value: {preferences['prep_time']}")
-    # 6. Categorical Filters (Exact Match or Multi-select with OR logic)
     categorical_pref_map = {
         "servingsize": "Serving_Size", "populargroup": "Popular_Group",
         "dietarycategory": "Dietary_Category", "spicelevel": "Spice_Level",
@@ -196,158 +355,154 @@ def get_recommendations(preferences):
         "restaurantchain": "Restaurant_Chain", "seasonalavailability": "Seasonal_Availability",
         "breadtype": "Bread_Type", "crusttype": CRUST_TYPE_COL
     }
     for pref_key, df_col_name in categorical_pref_map.items():
-        if df_col_name and pref_key in preferences and preferences[pref_key] and df_col_name in DF.columns:
-            pref_value = preferences[pref_key]
-            # If pref_value is a list (from multi-select) and not empty
-            if isinstance(pref_value, list) and pref_value:
-                cat_mask = DF.loc[current_indices, df_col_name].isin(pref_value)
-                filtered_indices_count_before = len(current_indices)
-                current_indices = DF.loc[current_indices][cat_mask].index.to_list()
-                current_app.logger.info(
-                    f"After {pref_key} filter (isin {pref_value}): {len(current_indices)} from {filtered_indices_count_before} pizzas remaining")
-            # Legacy: if it's a single string (though frontend should send list now)
-            elif isinstance(pref_value, str) and pref_value and pref_value.lower() != "any":
-                cat_mask = DF.loc[current_indices, df_col_name] == pref_value
-                filtered_indices_count_before = len(current_indices)
-                current_indices = DF.loc[current_indices][cat_mask].index.to_list()
-                current_app.logger.info(
-                    f"After {pref_key} filter ('{pref_value}'): {len(current_indices)} from {filtered_indices_count_before} pizzas remaining")
-            elif not pref_value:  # Empty list or empty string means no filter for this category
-                current_app.logger.info(
-                    f"Skipping filter for {pref_key} as no specific options were selected (value: {pref_value}).")
-                continue
-            if not current_indices: return []
     if not current_indices:
-        current_app.logger.warning("No pizzas match all filter criteria after hard filters.")
         return []
     # --- Similarity Scoring Part ---
     valid_indices_for_feature_df = FEATURE_DF.index.intersection(current_indices)
     if valid_indices_for_feature_df.empty:
-        current_app.logger.warning("No valid indices remain for feature DF after hard filters.")
         return []
     filtered_feature_df = FEATURE_DF.loc[valid_indices_for_feature_df]
-    if filtered_feature_df.empty:
-        current_app.logger.warning("Filtered feature DF is empty after hard filters.")
         return []
-    user_vector = pd.Series(0.0, index=FEATURE_DF.columns)
-    # Toppings for similarity
     if 'toppings' in preferences and preferences['toppings']:
         for topping in preferences['toppings']:
             col_name = f"Topping_{topping}"
             if col_name in user_vector.index:
                 user_vector[col_name] = 1.0
-    # Categorical for similarity
-    js_to_df_key_map_for_vector = {
-        "servingsize": "Serving_Size", "populargroup": "Popular_Group",
-        "dietarycategory": "Dietary_Category", "saucetype": "Sauce_Type",
-        "cheeseamount": "Cheese_Amount", "restaurantchain": "Restaurant_Chain",
-        "seasonalavailability": "Seasonal_Availability", "breadtype": "Bread_Type",
-        "spicelevel": "Spice_Level"  # Add spicelevel here for one-hot encoding
-    }
-    if CRUST_TYPE_COL: js_to_df_key_map_for_vector["crusttype"] = CRUST_TYPE_COL
-    for pref_key, df_col_name in js_to_df_key_map_for_vector.items():
-        if pref_key in preferences and preferences[pref_key]:
-            pref_values_for_vector = preferences[pref_key]
-            # Ensure it's a list, even if frontend sent a single string (should be list)
-            if not isinstance(pref_values_for_vector, list):
-                pref_values_for_vector = [pref_values_for_vector]
-            for val_item in pref_values_for_vector:
-                if isinstance(val_item, str) and val_item.lower() == "any":  # Should not happen with new UI
-                    continue
-                col_name = f"{df_col_name}_{val_item}"
-                if col_name in user_vector.index:
-                    user_vector[col_name] = 1.0
-    # Numerical for similarity
     raw_user_num_prefs_dict = {}
-    spice_map = {'Mild': 1, 'Medium': 2, 'Hot': 3}
     if 'price_range' in preferences and preferences['price_range']:
-        raw_user_num_prefs_dict['Price'] = (float(preferences['price_range'][0]) + float(
-            preferences['price_range'][1])) / 2
     if 'slices' in preferences and preferences['slices'] is not None:
-        raw_user_num_prefs_dict['Slices'] = float(preferences['slices'])
     if 'rating' in preferences and preferences['rating'] is not None:
-        raw_user_num_prefs_dict['Rating'] = float(preferences['rating'])
-    # Handle numerical Spice_Level for user_vector
-    # Only set if exactly one spice level is chosen in the multi-select.
-    # The one-hot encoded versions are handled above.
-    if 'spicelevel' in preferences and preferences['spicelevel']:
-        selected_spice_levels = preferences['spicelevel']
-        if isinstance(selected_spice_levels, list) and len(selected_spice_levels) == 1:
-            # If only one specific spice level selected from multi-select
-            spice_val_str = selected_spice_levels[0]
-            if spice_val_str and spice_val_str.lower() != "any":
-                raw_user_num_prefs_dict['Spice_Level'] = float(spice_map.get(spice_val_str, 1))
-        # If multiple spice levels or "Any" (empty list), don't set numerical Spice_Level for user_vector.
-        # The one-hot encoded versions will cover the preference.
-    if 'prep_time' in preferences and preferences['prep_time'] is not None:  # Changed 'preptime'
-        try:
-            prep_time_str = str(preferences['prep_time']).lower().replace("min", "").strip()
-            raw_user_num_prefs_dict['Preparation_Time'] = float(prep_time_str)
-        except ValueError:
-            pass
-    # Scaling numerical preferences for user_vector
     temp_scaling_df = pd.DataFrame(columns=NUMERICAL_COLS, index=[0])
     for col in NUMERICAL_COLS:
-        temp_scaling_df.loc[0, col] = raw_user_num_prefs_dict.get(col, 0.0)  # Use default if not in dict
-    # Ensure all NUMERICAL_COLS exist in temp_scaling_df before transform
-    for col in NUMERICAL_COLS:
-        if col not in temp_scaling_df.columns:
-            temp_scaling_df[col] = 0.0  # Default to 0 or mean if appropriate
-    scaled_user_num_values = SCALER.transform(temp_scaling_df[NUMERICAL_COLS])[0]
-    for i, col_name in enumerate(NUMERICAL_COLS):
-        if col_name in raw_user_num_prefs_dict:  # Only set if user specified this numerical pref
-            user_vector[col_name] = scaled_user_num_values[i]
-    # Similarity calculation
     feature_matrix_filtered = filtered_feature_df.values
     user_array = user_vector.values.reshape(1, -1)
     if user_array.shape[1] != feature_matrix_filtered.shape[1]:
         current_app.logger.error(
-            f"Shape mismatch! User vector: {user_array.shape}, Feature matrix: {feature_matrix_filtered.shape}")
-        # This can happen if new columns were added to FEATURE_DF after user_vector was initialized
-        # Re-align user_vector to FEATURE_DF.columns
-        aligned_user_vector = pd.Series(0.0, index=FEATURE_DF.columns)
-        for col in user_vector.index:
-            if col in aligned_user_vector.index:
-                aligned_user_vector[col] = user_vector[col]
         user_array = aligned_user_vector.values.reshape(1, -1)
         if user_array.shape[1] != feature_matrix_filtered.shape[1]:
-            current_app.logger.error(
-                f"Persistent Shape mismatch! User vector: {user_array.shape}, Feature matrix: {feature_matrix_filtered.shape}")
             return []
     similarities = cosine_similarity(user_array, feature_matrix_filtered)[0]
     sorted_indices_in_filtered_df = similarities.argsort()[::-1]
     final_recommendation_indices = valid_indices_for_feature_df[sorted_indices_in_filtered_df]
     recommendations_list = []
-    frontend_keys = [
         'id', 'name', 'toppings', 'price', 'slices', 'serving_size', 'rating', 'rating_count',
         'description', 'popular_group', 'dietary_category', 'spice_level', 'sauce_type',
         'cheese_amount', 'calories', 'allergens', 'prep_time', 'restaurant', 'seasonal',
         'bread_type', 'image_url', 'crust_type'
     ]
-    df_to_frontend_map = {
         'id': None, 'name': 'Pizza_Name', 'toppings': 'Toppings', 'price': 'Price_Rs', 'slices': 'Slices',
         'serving_size': 'Serving_Size', 'rating': 'Rating', 'rating_count': 'Rating_Count',
         'description': 'Description', 'popular_group': 'Popular_Group',
@@ -362,184 +517,112 @@ def get_recommendations(preferences):
     for original_idx in final_recommendation_indices:
         pizza_series = DF.iloc[original_idx]
         rec_item = {}
-        for key in frontend_keys:
-            df_col = df_to_frontend_map.get(key)
             if key == 'id':
                 rec_item[key] = int(original_idx)
             elif df_col and df_col in pizza_series:
                 value = pizza_series[df_col]
-                if isinstance(value, np.integer):
-                    value = int(value)
-                elif isinstance(value, np.floating):
-                    value = float(value)
-                elif isinstance(value, np.ndarray):
-                    value = value.tolist()
                 rec_item[key] = "" if pd.isna(value) else value
-            elif key == 'crust_type' and not CRUST_TYPE_COL:
-                rec_item[key] = "N/A"
             else:
                 rec_item[key] = ""
         rec_item['rating_count'] = int(rec_item.get('rating_count', 0) or 0)
         rec_item['image_url'] = rec_item.get('image_url') if rec_item.get('image_url') else DEFAULT_IMAGE_URL
-        for k_final, v_final in rec_item.items():
             if isinstance(v_final, np.generic): rec_item[k_final] = v_final.item()
         recommendations_list.append(rec_item)
-    current_app.logger.info(f"Final recommendations: {len(recommendations_list)} pizzas")
     return recommendations_list
-@app.route('/')
-def index_route():
-    global DF, ALL_TOPPINGS, CATEGORICAL_FEATURES, CRUST_TYPE_COL
-    if DF is None:
-        current_app.logger.error("Data not loaded attempting to serve / route.")
-        return "Error: Pizza data not loaded. Please check server logs.", 500
-    filter_options = {}
-    cols_for_filters = list(
-        set(CATEGORICAL_FEATURES + ['Spice_Level']))  # Spice_Level might be in CATEGORICAL_FEATURES or separate
-    if CRUST_TYPE_COL and CRUST_TYPE_COL not in cols_for_filters:  # Ensure crust type is included if available
-        cols_for_filters.append(CRUST_TYPE_COL)
-    for col_name in cols_for_filters:
-        if col_name in DF.columns:
-            # Use a consistent key naming convention for JS
-            key_name = col_name.lower().replace('_', '')
-            # Special cases for consistency if needed, e.g. "spicelevel"
-            if col_name == "Spice_Level": key_name = "spicelevel"
-            if col_name == CRUST_TYPE_COL: key_name = "crusttype"
-            # if col_name == "Serving_Size": key_name = "servingsize" # Example
-            unique_values = sorted([v for v in DF[col_name].dropna().unique() if v != ''])
-            filter_options[key_name] = unique_values  # e.g. filter_options['spicelevel'] = ['Mild', 'Medium', 'Hot']
-    default_recommendations_df = DF.sort_values('Rating', ascending=False).copy()
-    default_recs_list = []
-    frontend_keys = [
-        'id', 'name', 'toppings', 'price', 'slices', 'serving_size', 'rating', 'rating_count',
-        'description', 'popular_group', 'dietary_category', 'spice_level', 'sauce_type',
-        'cheese_amount', 'calories', 'allergens', 'prep_time', 'restaurant', 'seasonal',
-        'bread_type', 'image_url', 'crust_type'
-    ]
-    df_to_frontend_map = {
-        'id': None, 'name': 'Pizza_Name', 'toppings': 'Toppings', 'price': 'Price_Rs', 'slices': 'Slices',
-        'serving_size': 'Serving_Size', 'rating': 'Rating', 'rating_count': 'Rating_Count',
-        'description': 'Description', 'popular_group': 'Popular_Group',
-        'dietary_category': 'Dietary_Category', 'spice_level': 'Spice_Level',
-        'sauce_type': 'Sauce_Type', 'cheese_amount': 'Cheese_Amount',
-        'calories': 'Calories_per_Slice', 'allergens': 'Allergens',
-        'prep_time': 'Preparation_Time_min', 'restaurant': 'Restaurant_Chain',
-        'seasonal': 'Seasonal_Availability', 'bread_type': 'Bread_Type',
-        'image_url': 'Image_Url', 'crust_type': CRUST_TYPE_COL
-    }
-    for original_idx, pizza_row in default_recommendations_df.iterrows():
-        rec_item = {}
-        for key in frontend_keys:
-            df_col = df_to_frontend_map.get(key)
-            if key == 'id':
-                rec_item[key] = int(original_idx)
-            elif df_col and df_col in pizza_row:
-                value = pizza_row[df_col]
-                if isinstance(value, np.integer):
-                    value = int(value)
-                elif isinstance(value, np.floating):
-                    value = float(value)
-                elif isinstance(value, np.ndarray):
-                    value = value.tolist()
-                rec_item[key] = "" if pd.isna(value) else value
-            elif key == 'crust_type' and not CRUST_TYPE_COL:
-                rec_item[key] = "N/A"
-            else:
-                rec_item[key] = ""
-        rec_item['rating_count'] = int(rec_item.get('rating_count', 0) or 0)
-        rec_item['image_url'] = rec_item.get('image_url') if rec_item.get('image_url') else DEFAULT_IMAGE_URL
-        for k, v in rec_item.items():
-            if isinstance(v, np.generic):
-                rec_item[k] = v.item()
-        default_recs_list.append(rec_item)
-    current_app.logger.info(f"Serving {len(default_recs_list)} pizzas for initial display.")
-    current_app.logger.info(f"Filter options for template: {filter_options}")
-    return render_template('index.html',
-                           toppings=ALL_TOPPINGS,
-                           # Pass filter_options directly, JS will use these
-                           filter_options=filter_options,
-                           default_recommendations=default_recs_list,
-                           default_image_url=DEFAULT_IMAGE_URL)
 @app.route('/recommend', methods=['POST'])
 def recommend():
     try:
         data = request.json
-        preferences = {}
         current_app.logger.info(f"Received recommendation request with data: {data}")
-        # Process all possible preferences
-        # Keys should match what JS sends (e.g., 'servingsize', 'spicelevel')
-        # Numerical/range preferences
-        simple_numerical_prefs = ['slices', 'rating', 'prep_time']  # 'prep_time' not 'preptime'
-        for key in simple_numerical_prefs:
-            if key in data and data[key] is not None:  # Allow 0 for rating
-                # For range sliders, value might be a string that needs parsing, ensure it's correct type
                 try:
-                    if key == 'rating':
-                        preferences[key] = float(data[key])
-                    else:
-                        preferences[key] = int(data[key])  # slices, prep_time
                 except ValueError:
-                    current_app.logger.warning(f"Could not parse numerical preference {key}: {data[key]}")
         if 'price_range' in data and data['price_range']:
             try:
                 preferences['price_range'] = [float(p) for p in data['price_range']]
             except (ValueError, TypeError):
-                current_app.logger.warning(f"Could not parse price_range: {data['price_range']}")
-        # Multi-select categorical preferences (including toppings)
-        # Keys like 'toppings', 'servingsize', 'dietarycategory', 'spicelevel', etc.
-        multi_select_prefs = [
             'toppings', 'servingsize', 'populargroup', 'dietarycategory',
             'spicelevel', 'saucetype', 'cheeseamount', 'restaurantchain',
             'seasonalavailability', 'breadtype', 'crusttype'
         ]
-        for key in multi_select_prefs:
-            if key in data and isinstance(data[key], list):  # Expecting a list
-                preferences[key] = data[key]  # Store the list (can be empty)
-            elif key in data:  # If not a list, log warning but try to process if it's a single string
-                current_app.logger.warning(
-                    f"Preference for {key} was not a list: {data[key]}. Processing as single if string.")
-                if isinstance(data[key], str) and data[key]:
-                    preferences[key] = [data[key]]  # Wrap single string in a list for consistency
-                else:  # If not string or empty, treat as no preference for this key
-                    preferences[key] = []
         current_app.logger.info(f"Processed preferences for filtering: {preferences}")
         recommendations = get_recommendations(preferences)
         current_app.logger.info(f"Returning {len(recommendations)} recommendations after filtering and scoring.")
         return jsonify(recommendations)
     except Exception as e:
-        current_app.logger.error(f"Error in /recommend: {e}", exc_info=True)
         return jsonify({"error": "Failed to get recommendations due to a server issue.", "details": str(e)}), 500
 if __name__ == '__main__':
-    try:
-        preprocess_data()
-        app.run(debug=True, use_reloader=False)  # use_reloader=False is good for dev with global vars
-    except FileNotFoundError as e:
-        logger.critical(f"CRITICAL ERROR: {e}. Ensure 'pizza.csv' is present.")
-    except Exception as e:
-        logger.critical(f"Unexpected critical startup error: {e}", exc_info=True)
-    # ... rest of the function

 import os
 import logging
+# --- Logging Configuration ---
+# Ensure logging is configured before any loggers are potentially used by imported modules
 logging.basicConfig(level=logging.INFO, format='%(asctime)s %(levelname)s: %(message)s [in %(pathname)s:%(lineno)d]')
 logger = logging.getLogger(__name__)
 app = Flask(__name__)
+# --- Global Variables ---
 DF = None
 ALL_TOPPINGS = []
 FEATURE_DF = None
+SCALER = None # Will be initialized in preprocess_data
 NUMERICAL_COLS = ['Price', 'Slices', 'Rating', 'Spice_Level', 'Preparation_Time', 'Calories']
 CATEGORICAL_FEATURES = [
     'Serving_Size', 'Popular_Group', 'Dietary_Category',
 def preprocess_data(df_path='pizza.csv'):
     global DF, ALL_TOPPINGS, FEATURE_DF, SCALER, CATEGORICAL_FEATURES, CRUST_TYPE_COL
+    logger.info(f"Attempting to preprocess data from relative path: {df_path}")
+    # Construct absolute path for the CSV file
+    # This is crucial for environments like Docker where working directory might differ
+    base_dir = os.path.dirname(os.path.abspath(__file__)) # Directory of the current script (app.py)
+    absolute_df_path = os.path.join(base_dir, df_path)
+    logger.info(f"Absolute path for CSV: {absolute_df_path}")
+    if not os.path.exists(absolute_df_path):
+        logger.error(f"Dataset file '{absolute_df_path}' not found.")
+        raise FileNotFoundError(f"Dataset file '{absolute_df_path}' not found. Ensure it's in the same directory as app.py.")
+    DF = pd.read_csv(absolute_df_path)
+    logger.info(f"Successfully loaded '{absolute_df_path}'. Original DataFrame shape: {DF.shape}")
     logger.info(f"Original DataFrame columns: {DF.columns.tolist()}")
+    # Determine Crust Type Column
     potential_crust_cols = ['Crust_Type', 'Cr_Type']
     valid_crust_cols = [col for col in potential_crust_cols if col in DF.columns]
     if valid_crust_cols:
+        valid_crust_cols.sort(key=lambda col: DF[col].isnull().sum()) # Prefer column with fewer NaNs
         CRUST_TYPE_COL = valid_crust_cols[0]
         logger.info(f"Using '{CRUST_TYPE_COL}' for crust type.")
         if CRUST_TYPE_COL not in CATEGORICAL_FEATURES:
             CATEGORICAL_FEATURES.append(CRUST_TYPE_COL)
+        # Remove other potential crust columns if they were in CATEGORICAL_FEATURES
         for col in potential_crust_cols:
             if col != CRUST_TYPE_COL and col in CATEGORICAL_FEATURES:
                 CATEGORICAL_FEATURES.remove(col)
     else:
+        logger.warning("Crust type column (Crust_Type or Cr_Type) not found. Crust type will not be used.")
         CRUST_TYPE_COL = None
+    # Fill NaN for text-based categorical columns and other text fields
+    text_cols_to_fill = list(set(CATEGORICAL_FEATURES + ['Toppings', 'Description', 'Allergens', 'Image_Url', 'Pizza_Name']))
+    for col in text_cols_to_fill:
+        if col and col in DF.columns: # Ensure col is not None (e.g. if CRUST_TYPE_COL is None)
             DF[col] = DF[col].fillna('')
+    logger.info("Filled NaNs in text-based categorical columns with empty strings.")
+    # Fill NaN for numerical columns from the CSV
+    numerical_cols_in_df = ['Price_Rs', 'Slices', 'Rating', 'Rating_Count', 'Preparation_Time_min', 'Calories_per_Slice']
     for col in numerical_cols_in_df:
         if col in DF.columns:
             if pd.api.types.is_numeric_dtype(DF[col]):
+                median_val = DF[col].median()
+                DF[col] = DF[col].fillna(median_val)
+                logger.info(f"Filled NaNs in numerical column '{col}' with its median ({median_val}).")
             else:
+                # Attempt to convert to numeric, then fill with median or 0
+                numeric_series = pd.to_numeric(DF[col], errors='coerce')
+                median_val = 0
+                if not numeric_series.isnull().all():
+                    median_val = numeric_series.median()
+                DF[col] = numeric_series.fillna(median_val)
+                logger.warning(f"Column '{col}' was not purely numeric. Converted to numeric, filled NaNs with median/0 ({median_val}).")
+        else:
+            logger.warning(f"Expected numerical column '{col}' not found in DataFrame. It will be missing from features if not handled.")
+    if 'Rating_Count' in DF.columns:
+        DF['Rating_Count'] = DF['Rating_Count'].fillna(0).astype(int)
+    # Process Toppings
+    if 'Toppings' in DF.columns:
+        DF['Toppings_list_internal'] = DF['Toppings'].astype(str).str.split(r';\s*') # Use raw string for regex
+        DF['Toppings_list_internal'] = DF['Toppings_list_internal'].apply(
+            lambda x: [t.strip() for t in x if isinstance(t, str) and t.strip()]) # Filter out empty strings after split
+        current_all_toppings = set()
+        for toppings_list in DF['Toppings_list_internal'].dropna():
+            current_all_toppings.update(t for t in toppings_list if t) # Ensure t is not empty
+        ALL_TOPPINGS = sorted(list(current_all_toppings))
+        logger.info(f"Found {len(ALL_TOPPINGS)} unique toppings. Example: {ALL_TOPPINGS[:5] if ALL_TOPPINGS else 'None'}")
+    else:
+        logger.warning("'Toppings' column not found. Topping features will be empty.")
+        DF['Toppings_list_internal'] = pd.Series([[] for _ in range(len(DF))]) # Empty list for all rows
+        ALL_TOPPINGS = []
+    # --- Feature Engineering ---
     feature_data = {}
     num_feature_map = {
         'Price': 'Price_Rs', 'Slices': 'Slices', 'Rating': 'Rating',
         if df_col in DF.columns:
             feature_data[feature_col] = DF[df_col].copy()
         else:
+            logger.warning(f"Numerical source column '{df_col}' for feature '{feature_col}' not found. Filling with zeros.")
+            feature_data[feature_col] = pd.Series([0.0] * len(DF)) # Ensure float for consistency
+    # Spice Level Feature (Numerical)
     if 'Spice_Level' in DF.columns:
+        DF['Spice_Level'] = DF['Spice_Level'].fillna('Mild') # Default for NaNs
         spice_map = {'Mild': 1, 'Medium': 2, 'Hot': 3}
+        feature_data['Spice_Level'] = DF['Spice_Level'].map(spice_map).fillna(1.0) # Ensure float
     else:
+        logger.warning("'Spice_Level' column not found. Filling 'Spice_Level' feature with default (1.0).")
+        feature_data['Spice_Level'] = pd.Series([1.0] * len(DF)) # Default if column is missing
+    # One-Hot Encode Categorical Features
     for feature_cat_col in CATEGORICAL_FEATURES:
+        if feature_cat_col and feature_cat_col in DF.columns: # Check if col_name is not None and exists
+            # Ensure the column is treated as string to avoid issues with mixed types in unique()
+            DF[feature_cat_col] = DF[feature_cat_col].astype(str)
             for value in DF[feature_cat_col].unique():
+                if pd.notnull(value) and value.strip() != '': # Check for non-null and non-empty string values
                     feature_data[f"{feature_cat_col}_{value}"] = (DF[feature_cat_col] == value).astype(int)
+        elif feature_cat_col: # Log warning only if feature_cat_col was defined
+             logger.warning(f"Categorical source column '{feature_cat_col}' for one-hot encoding not found in DataFrame.")
+    # Topping Features (One-Hot Encoded)
     for topping in ALL_TOPPINGS:
+        if topping: # Ensure topping string is not empty
             feature_data[f"Topping_{topping}"] = DF['Toppings_list_internal'].apply(
                 lambda x: 1 if topping in x else 0
             )
     FEATURE_DF = pd.DataFrame(feature_data)
+    logger.info(f"FEATURE_DF created. Shape: {FEATURE_DF.shape}. Columns: {FEATURE_DF.columns.tolist()[:10]}...") # Log first 10 cols
+    # Ensure all NUMERICAL_COLS exist in FEATURE_DF and fill NaNs
     for col in NUMERICAL_COLS:
+        if col not in FEATURE_DF.columns:
+            logger.warning(f"Numerical column '{col}' is missing from FEATURE_DF after construction. Adding as zeros.")
+            FEATURE_DF[col] = 0.0 # Ensure float
         if FEATURE_DF[col].isnull().any():
+            mean_val = FEATURE_DF[col].mean()
+            fill_val = mean_val if pd.notna(mean_val) else 0.0
+            logger.info(f"Filling NaNs in numerical feature column '{col}' with {fill_val}.")
+            FEATURE_DF[col] = FEATURE_DF[col].fillna(fill_val)
+    # Scale Numerical Features
+    SCALER = MinMaxScaler() # Initialize scaler
+    if not FEATURE_DF.empty and all(col in FEATURE_DF.columns for col in NUMERICAL_COLS):
+        try:
+            FEATURE_DF[NUMERICAL_COLS] = SCALER.fit_transform(FEATURE_DF[NUMERICAL_COLS])
+            logger.info(f"Numerical columns ({NUMERICAL_COLS}) scaled. FEATURE_DF shape: {FEATURE_DF.shape}")
+        except Exception as e:
+            logger.error(f"Error during scaling of numerical columns: {e}. FEATURE_DF might be problematic.")
+            # Fallback: Keep numerical columns unscaled if scaling fails, or handle as needed
+    elif FEATURE_DF.empty:
+        logger.error("FEATURE_DF is empty before scaling. Scaling skipped. This will likely cause issues.")
+    else:
+        missing_cols = [col for col in NUMERICAL_COLS if col not in FEATURE_DF.columns]
+        logger.error(f"Not all numerical columns ({NUMERICAL_COLS}) found in FEATURE_DF for scaling. Missing: {missing_cols}. Scaling skipped.")
+    logger.info(f"Preprocessing done. DF is None: {DF is None}, FEATURE_DF is None: {FEATURE_DF is None}, SCALER is None: {SCALER is None}")
+    if FEATURE_DF is not None:
+        logger.info(f"Final FEATURE_DF shape: {FEATURE_DF.shape}")
+    if DF is not None:
+        logger.info(f"Final DF shape: {DF.shape}")
+@app.route('/')
+def index_route():
+    global DF, ALL_TOPPINGS, CATEGORICAL_FEATURES, CRUST_TYPE_COL, FEATURE_DF, DEFAULT_IMAGE_URL
+    # Critical check at the beginning of the route
+    if DF is None:
+        current_app.logger.error("DF is None when trying to serve '/'. Data preprocessing might have failed or not run.")
+        return "Error: Pizza data (DF) not loaded. Please check server logs.", 500
+    if FEATURE_DF is None: # Also check FEATURE_DF as it's derived
+        current_app.logger.error("FEATURE_DF is None when trying to serve '/'. Data preprocessing might have failed.")
+        return "Error: Pizza feature data (FEATURE_DF) not loaded. Please check server logs.", 500
+    filter_options = {}
+    # Ensure 'Spice_Level' is included for filter options if it exists in DF
+    cols_for_filters_set = set(cat_col for cat_col in CATEGORICAL_FEATURES if cat_col and cat_col in DF.columns) # Filter out None or non-existent
+    if 'Spice_Level' in DF.columns:
+        cols_for_filters_set.add('Spice_Level')
+    # CRUST_TYPE_COL is already in CATEGORICAL_FEATURES if found
+    for col_name in list(cols_for_filters_set):
+        # key_name for JS should be consistent (lowercase, no underscores)
+        key_name = col_name.lower().replace('_', '')
+        # No special handling for spicelevel or crusttype here, it's naturally handled by the line above.
+        unique_values = sorted([v for v in DF[col_name].astype(str).dropna().unique() if v.strip() != ''])
+        if unique_values: # Only add if there are actual values
+            filter_options[key_name] = unique_values
+    # Prepare default recommendations (e.g., top-rated)
+    # Make sure 'Rating' column exists
+    if 'Rating' in DF.columns:
+        default_recommendations_df = DF.sort_values('Rating', ascending=False).copy()
+    else:
+        logger.warning("'Rating' column not found in DF. Cannot sort for default recommendations. Using unsorted DF.")
+        default_recommendations_df = DF.copy() # Fallback to unsorted
+    default_recs_list = []
+    frontend_keys = [
+        'id', 'name', 'toppings', 'price', 'slices', 'serving_size', 'rating', 'rating_count',
+        'description', 'popular_group', 'dietary_category', 'spice_level', 'sauce_type',
+        'cheese_amount', 'calories', 'allergens', 'prep_time', 'restaurant', 'seasonal',
+        'bread_type', 'image_url', 'crust_type'
+    ]
+    df_to_frontend_map = {
+        'id': None, 'name': 'Pizza_Name', 'toppings': 'Toppings', 'price': 'Price_Rs', 'slices': 'Slices',
+        'serving_size': 'Serving_Size', 'rating': 'Rating', 'rating_count': 'Rating_Count',
+        'description': 'Description', 'popular_group': 'Popular_Group',
+        'dietary_category': 'Dietary_Category', 'spice_level': 'Spice_Level',
+        'sauce_type': 'Sauce_Type', 'cheese_amount': 'Cheese_Amount',
+        'calories': 'Calories_per_Slice', 'allergens': 'Allergens',
+        'prep_time': 'Preparation_Time_min', 'restaurant': 'Restaurant_Chain',
+        'seasonal': 'Seasonal_Availability', 'bread_type': 'Bread_Type',
+        'image_url': 'Image_Url', 'crust_type': CRUST_TYPE_COL # Uses the determined CRUST_TYPE_COL
+    }
+    for original_idx, pizza_row in default_recommendations_df.iterrows():
+        rec_item = {}
+        for key in frontend_keys:
+            df_col = df_to_frontend_map.get(key)
+            if key == 'id':
+                rec_item[key] = int(original_idx) # Pizza ID is its original index in DF
+            elif df_col and df_col in pizza_row: # df_col can be None for 'id' or if CRUST_TYPE_COL is None
+                value = pizza_row[df_col]
+                # Type conversions for JSON serializability
+                if isinstance(value, np.integer): value = int(value)
+                elif isinstance(value, np.floating): value = float(value)
+                elif isinstance(value, np.ndarray): value = value.tolist()
+                rec_item[key] = "" if pd.isna(value) else value
+            elif key == 'crust_type' and not CRUST_TYPE_COL : # If CRUST_TYPE_COL was not found
+                 rec_item[key] = "N/A"
+            else:
+                rec_item[key] = "" # Default for missing fields
+        rec_item['rating_count'] = int(rec_item.get('rating_count', 0) or 0) # Ensure int
+        rec_item['image_url'] = rec_item.get('image_url') if rec_item.get('image_url') else DEFAULT_IMAGE_URL
+        # Final pass to convert any remaining numpy generic types
+        for k_final, v_final in rec_item.items():
+            if isinstance(v_final, np.generic): rec_item[k_final] = v_final.item()
+        default_recs_list.append(rec_item)
+    current_app.logger.info(f"Serving {len(default_recs_list)} pizzas for initial display.")
+    current_app.logger.info(f"Filter options for template: {filter_options}")
+    current_app.logger.info(f"ALL_TOPPINGS for template: {ALL_TOPPINGS[:5] if ALL_TOPPINGS else 'None'}")
+    return render_template('index.html',
+                           toppings=ALL_TOPPINGS,
+                           filter_options=filter_options,
+                           default_recommendations=default_recs_list,
+                           default_image_url=DEFAULT_IMAGE_URL)
 def get_recommendations(preferences):
     global DF, FEATURE_DF, SCALER, CRUST_TYPE_COL, DEFAULT_IMAGE_URL
+    if DF is None or FEATURE_DF is None or SCALER is None:
+        current_app.logger.error("Data not fully initialized (DF, FEATURE_DF, or SCALER is None) for get_recommendations.")
         return []
     current_indices = DF.index.to_list()
     current_app.logger.info(f"Starting with {len(current_indices)} pizzas before filtering. Preferences: {preferences}")
+    # --- Hard Filters ---
+    # 1. Toppings
+    if 'toppings' in preferences and preferences['toppings'] and 'Toppings_list_internal' in DF.columns:
         selected_toppings = set(preferences['toppings'])
+        if selected_toppings: # Ensure not an empty list that would select nothing
             topping_mask = DF.loc[current_indices, 'Toppings_list_internal'].apply(
+                lambda x_toppings: isinstance(x_toppings, list) and any(t in selected_toppings for t in x_toppings)
+            )
             current_indices = DF.loc[current_indices][topping_mask].index.to_list()
             current_app.logger.info(f"After toppings filter: {len(current_indices)} pizzas remaining")
             if not current_indices: return []
     # 2. Max Price
     if 'price_range' in preferences and preferences['price_range'] and 'Price_Rs' in DF.columns:
+        try:
+            min_price = float(preferences['price_range'][0])
+            max_price = float(preferences['price_range'][1])
+            price_mask = (DF.loc[current_indices, 'Price_Rs'] >= min_price) & \
+                         (DF.loc[current_indices, 'Price_Rs'] <= max_price)
+            current_indices = DF.loc[current_indices][price_mask].index.to_list()
+            current_app.logger.info(f"After price filter ({min_price}-{max_price}): {len(current_indices)} pizzas")
+            if not current_indices: return []
+        except (TypeError, ValueError, IndexError) as e:
+            current_app.logger.warning(f"Invalid price_range preference: {preferences['price_range']}. Error: {e}")
+    # 3. Number of Slices (Min Slices)
     if 'slices' in preferences and preferences['slices'] is not None and 'Slices' in DF.columns:
         try:
             min_slices = int(preferences['slices'])
             slices_mask = DF.loc[current_indices, 'Slices'] >= min_slices
             current_indices = DF.loc[current_indices][slices_mask].index.to_list()
+            current_app.logger.info(f"After slices filter (>= {min_slices}): {len(current_indices)} pizzas")
             if not current_indices: return []
         except ValueError:
             current_app.logger.warning(f"Invalid value for slices: {preferences['slices']}")
+    # 4. Minimum Rating
     if 'rating' in preferences and preferences['rating'] is not None and 'Rating' in DF.columns:
         try:
             min_rating = float(preferences['rating'])
             rating_mask = DF.loc[current_indices, 'Rating'] >= min_rating
             current_indices = DF.loc[current_indices][rating_mask].index.to_list()
+            current_app.logger.info(f"After rating filter (>= {min_rating}): {len(current_indices)} pizzas")
             if not current_indices: return []
         except ValueError:
             current_app.logger.warning(f"Invalid value for rating: {preferences['rating']}")
+    # 5. Max Preparation Time
+    if 'prep_time' in preferences and preferences['prep_time'] is not None and 'Preparation_Time_min' in DF.columns:
         try:
+            max_prep_time = int(str(preferences['prep_time']).lower().replace("min", "").strip())
             prep_mask = DF.loc[current_indices, 'Preparation_Time_min'] <= max_prep_time
             current_indices = DF.loc[current_indices][prep_mask].index.to_list()
+            current_app.logger.info(f"After prep time filter (<= {max_prep_time}): {len(current_indices)} pizzas")
             if not current_indices: return []
         except ValueError:
+            current_app.logger.warning(f"Could not parse prep_time value: {preferences['prep_time']}")
+    # 6. Categorical Filters (Multi-select OR logic)
+    # JS keys: servingsize, populargroup, dietarycategory, spicelevel, saucetype, etc.
     categorical_pref_map = {
         "servingsize": "Serving_Size", "populargroup": "Popular_Group",
         "dietarycategory": "Dietary_Category", "spicelevel": "Spice_Level",
         "restaurantchain": "Restaurant_Chain", "seasonalavailability": "Seasonal_Availability",
         "breadtype": "Bread_Type", "crusttype": CRUST_TYPE_COL
     }
     for pref_key, df_col_name in categorical_pref_map.items():
+        if df_col_name and pref_key in preferences and preferences[pref_key]: # Ensure df_col_name is not None
+            pref_value_list = preferences[pref_key] # Expected to be a list from JS
+            if isinstance(pref_value_list, list) and pref_value_list: # If list is not empty
+                if df_col_name in DF.columns:
+                    cat_mask = DF.loc[current_indices, df_col_name].isin(pref_value_list)
+                    current_indices = DF.loc[current_indices][cat_mask].index.to_list()
+                    current_app.logger.info(f"After {pref_key} filter (isin {pref_value_list}): {len(current_indices)} pizzas")
+                    if not current_indices: return []
+                else:
+                    current_app.logger.warning(f"Column '{df_col_name}' for preference '{pref_key}' not found in DF. Filter skipped.")
+            # If pref_value_list is empty, it means "Any" for this category, so no filtering.
     if not current_indices:
+        current_app.logger.info("No pizzas match all hard filter criteria.")
         return []
     # --- Similarity Scoring Part ---
+    # Filter FEATURE_DF to only include pizzas remaining after hard filters
     valid_indices_for_feature_df = FEATURE_DF.index.intersection(current_indices)
     if valid_indices_for_feature_df.empty:
+        current_app.logger.info("No valid indices remain for FEATURE_DF after hard filters.")
         return []
     filtered_feature_df = FEATURE_DF.loc[valid_indices_for_feature_df]
+    if filtered_feature_df.empty: # Should not happen if valid_indices_for_feature_df is not empty
+        current_app.logger.warning("Filtered FEATURE_DF is empty. This is unexpected.")
         return []
+    # Create User Preference Vector (aligned with FEATURE_DF columns)
+    user_vector = pd.Series(0.0, index=FEATURE_DF.columns) # Initialize with 0.0 for float consistency
+    # 1. Toppings in User Vector
     if 'toppings' in preferences and preferences['toppings']:
         for topping in preferences['toppings']:
             col_name = f"Topping_{topping}"
             if col_name in user_vector.index:
                 user_vector[col_name] = 1.0
+    # 2. Categorical Preferences (One-Hot) in User Vector
+    # js_to_df_key_map_for_vector is same as categorical_pref_map but df_col_name is for one-hot prefix
+    for pref_key, df_col_prefix in categorical_pref_map.items():
+        if df_col_prefix and pref_key in preferences and preferences[pref_key]: # df_col_prefix can be None for CRUST_TYPE_COL
+            selected_values = preferences[pref_key] # This is a list
+            for val_item in selected_values:
+                # Construct the one-hot encoded column name (e.g., "Spice_Level_Mild")
+                one_hot_col_name = f"{df_col_prefix}_{val_item}"
+                if one_hot_col_name in user_vector.index:
+                    user_vector[one_hot_col_name] = 1.0
+    # 3. Numerical Preferences in User Vector
     raw_user_num_prefs_dict = {}
+    spice_map_for_num_pref = {'Mild': 1.0, 'Medium': 2.0, 'Hot': 3.0} # Use floats
     if 'price_range' in preferences and preferences['price_range']:
+        try: # Average of min/max price for preference
+            raw_user_num_prefs_dict['Price'] = (float(preferences['price_range'][0]) + float(preferences['price_range'][1])) / 2
+        except: pass # Ignore if parsing fails
     if 'slices' in preferences and preferences['slices'] is not None:
+        try: raw_user_num_prefs_dict['Slices'] = float(preferences['slices'])
+        except: pass
     if 'rating' in preferences and preferences['rating'] is not None:
+        try: raw_user_num_prefs_dict['Rating'] = float(preferences['rating'])
+        except: pass
+    if 'prep_time' in preferences and preferences['prep_time'] is not None:
+        try: raw_user_num_prefs_dict['Preparation_Time'] = float(str(preferences['prep_time']).lower().replace("min","").strip())
+        except: pass
+    # Numerical Spice_Level: Only if *one* spice level is selected, use its mapped value.
+    # Otherwise, rely on the one-hot encoded spice level features.
+    if 'spicelevel' in preferences and isinstance(preferences['spicelevel'], list) and len(preferences['spicelevel']) == 1:
+        selected_spice = preferences['spicelevel'][0]
+        if selected_spice in spice_map_for_num_pref:
+            raw_user_num_prefs_dict['Spice_Level'] = spice_map_for_num_pref[selected_spice]
+    # Scale these raw numerical preferences using the SCALER
+    # Create a temporary DataFrame for scaling, ensuring all NUMERICAL_COLS are present
     temp_scaling_df = pd.DataFrame(columns=NUMERICAL_COLS, index=[0])
     for col in NUMERICAL_COLS:
+        # Default to the column's mean from FEATURE_DF if user didn't specify,
+        # or 0 if that's also not available (shouldn't happen if SCALER is fit)
+        # SCALER.data_min_ / SCALER.data_max_ or SCALER.mean_ could be used if available
+        default_val = 0.0
+        if hasattr(SCALER, 'data_min_') and col in FEATURE_DF.columns: # Check if scaler is fit and col exists
+             # Use the minimum of the scaled range as a neutral default if user didn't specify
+             col_idx_in_scaler = -1
+             try: col_idx_in_scaler = NUMERICAL_COLS.index(col)
+             except ValueError: pass
+             if col_idx_in_scaler != -1 and col_idx_in_scaler < len(SCALER.data_min_):
+                 default_val = SCALER.data_min_[col_idx_in_scaler] # This is the original min, not scaled min (0)
+             else: # Fallback if col not in NUMERICAL_COLS used for SCALER fitting
+                 logger.warning(f"Column {col} not found in SCALER's fitted columns during user vector creation. Defaulting to 0.")
+        temp_scaling_df.loc[0, col] = raw_user_num_prefs_dict.get(col, default_val)
+    if hasattr(SCALER, 'n_features_in_') : # Check if scaler has been fit
+        scaled_user_num_values = SCALER.transform(temp_scaling_df[NUMERICAL_COLS])[0]
+        for i, col_name in enumerate(NUMERICAL_COLS):
+            if col_name in raw_user_num_prefs_dict: # Only update user_vector if user specified this preference
+                user_vector[col_name] = scaled_user_num_values[i]
+    else:
+        logger.warning("SCALER is not fit. Cannot scale user's numerical preferences. Using raw values (0-1 range assumed).")
+        for col_name in NUMERICAL_COLS:
+            if col_name in raw_user_num_prefs_dict:
+                 # Attempt a rough normalization if scaler is not fit, assuming values are in a reasonable range
+                 # This is a fallback and might not be accurate.
+                 user_vector[col_name] = raw_user_num_prefs_dict[col_name] / 100.0 # Example, needs domain knowledge
+    # Calculate Cosine Similarities
     feature_matrix_filtered = filtered_feature_df.values
     user_array = user_vector.values.reshape(1, -1)
+    # Ensure shapes match if FEATURE_DF columns changed dynamically (should not happen with current setup)
     if user_array.shape[1] != feature_matrix_filtered.shape[1]:
         current_app.logger.error(
+            f"Shape mismatch! User vector: {user_array.shape}, Feature matrix: {feature_matrix_filtered.shape}. "
+            f"User cols: {user_vector.index.tolist()[:5]}, Feature cols: {filtered_feature_df.columns.tolist()[:5]}"
+        )
+        # Attempt to align columns as a robust measure, though this indicates a deeper issue if it occurs.
+        common_cols = filtered_feature_df.columns.intersection(user_vector.index)
+        aligned_user_vector = pd.Series(0.0, index=filtered_feature_df.columns)
+        aligned_user_vector[common_cols] = user_vector[common_cols]
         user_array = aligned_user_vector.values.reshape(1, -1)
         if user_array.shape[1] != feature_matrix_filtered.shape[1]:
+            current_app.logger.critical(f"Persistent shape mismatch even after alignment. Cannot compute similarity.")
             return []
     similarities = cosine_similarity(user_array, feature_matrix_filtered)[0]
+    # Get indices sorted by similarity (descending) from the filtered_feature_df
     sorted_indices_in_filtered_df = similarities.argsort()[::-1]
+    # Map these sorted indices back to original DF indices
     final_recommendation_indices = valid_indices_for_feature_df[sorted_indices_in_filtered_df]
+    # Prepare list of recommendations
     recommendations_list = []
+    # frontend_keys and df_to_frontend_map are defined in index_route, can be reused or redefined here
+    # For safety, redefine here or pass as argument if refactoring
+    frontend_keys_rec = [
         'id', 'name', 'toppings', 'price', 'slices', 'serving_size', 'rating', 'rating_count',
         'description', 'popular_group', 'dietary_category', 'spice_level', 'sauce_type',
         'cheese_amount', 'calories', 'allergens', 'prep_time', 'restaurant', 'seasonal',
         'bread_type', 'image_url', 'crust_type'
     ]
+    df_to_frontend_map_rec = {
         'id': None, 'name': 'Pizza_Name', 'toppings': 'Toppings', 'price': 'Price_Rs', 'slices': 'Slices',
         'serving_size': 'Serving_Size', 'rating': 'Rating', 'rating_count': 'Rating_Count',
         'description': 'Description', 'popular_group': 'Popular_Group',
     for original_idx in final_recommendation_indices:
         pizza_series = DF.iloc[original_idx]
         rec_item = {}
+        for key in frontend_keys_rec:
+            df_col = df_to_frontend_map_rec.get(key)
             if key == 'id':
                 rec_item[key] = int(original_idx)
             elif df_col and df_col in pizza_series:
                 value = pizza_series[df_col]
+                if isinstance(value, np.integer): value = int(value)
+                elif isinstance(value, np.floating): value = float(value)
+                elif isinstance(value, np.ndarray): value = value.tolist()
                 rec_item[key] = "" if pd.isna(value) else value
+            elif key == 'crust_type' and not CRUST_TYPE_COL :
+                 rec_item[key] = "N/A"
             else:
                 rec_item[key] = ""
         rec_item['rating_count'] = int(rec_item.get('rating_count', 0) or 0)
         rec_item['image_url'] = rec_item.get('image_url') if rec_item.get('image_url') else DEFAULT_IMAGE_URL
+        for k_final, v_final in rec_item.items(): # Final numpy type check
             if isinstance(v_final, np.generic): rec_item[k_final] = v_final.item()
         recommendations_list.append(rec_item)
+    current_app.logger.info(f"Final recommendations count: {len(recommendations_list)}")
     return recommendations_list
 @app.route('/recommend', methods=['POST'])
 def recommend():
     try:
         data = request.json
+        preferences = {} # Store processed preferences
         current_app.logger.info(f"Received recommendation request with data: {data}")
+        # Numerical/Range preferences from JS
+        # Keys in `data` should match JS: 'slices', 'rating', 'prep_time', 'price_range'
+        simple_numerical_prefs_js = ['slices', 'rating', 'prep_time']
+        for key_js in simple_numerical_prefs_js:
+            if key_js in data and data[key_js] is not None:
                 try:
+                    if key_js == 'rating': preferences[key_js] = float(data[key_js])
+                    else: preferences[key_js] = int(data[key_js]) # slices, prep_time
                 except ValueError:
+                    current_app.logger.warning(f"Could not parse numerical preference '{key_js}': {data[key_js]}")
         if 'price_range' in data and data['price_range']:
             try:
                 preferences['price_range'] = [float(p) for p in data['price_range']]
             except (ValueError, TypeError):
+                 current_app.logger.warning(f"Could not parse price_range: {data['price_range']}")
+        # Multi-select categorical preferences from JS
+        # Keys in `data` should match JS: 'toppings', 'servingsize', 'dietarycategory', etc.
+        multi_select_prefs_js = [
             'toppings', 'servingsize', 'populargroup', 'dietarycategory',
             'spicelevel', 'saucetype', 'cheeseamount', 'restaurantchain',
             'seasonalavailability', 'breadtype', 'crusttype'
         ]
+        for key_js in multi_select_prefs_js:
+            if key_js in data and isinstance(data[key_js], list):
+                preferences[key_js] = data[key_js] # Expecting a list (can be empty for "Any")
+            elif key_js in data: # If not a list, log warning
+                current_app.logger.warning(f"Preference for '{key_js}' was not a list: {data[key_js]}. Treating as empty (Any).")
+                preferences[key_js] = [] # Default to empty list if not a list
         current_app.logger.info(f"Processed preferences for filtering: {preferences}")
         recommendations = get_recommendations(preferences)
         current_app.logger.info(f"Returning {len(recommendations)} recommendations after filtering and scoring.")
         return jsonify(recommendations)
     except Exception as e:
+        current_app.logger.error(f"Error in /recommend endpoint: {e}", exc_info=True)
         return jsonify({"error": "Failed to get recommendations due to a server issue.", "details": str(e)}), 500
+# --- Main Application Execution ---
+# Call preprocess_data() at the module level.
+# This ensures it runs once when the application (or each Gunicorn worker) starts.
+try:
+    logger.info("----- Starting data preprocessing at module load... -----")
+    preprocess_data() # Use default 'pizza.csv'
+    logger.info("----- Data preprocessing completed successfully at module load. -----")
+    if DF is None:
+        logger.critical("CRITICAL AT STARTUP: Global DF is None after preprocess_data(). App will likely fail.")
+    if FEATURE_DF is None:
+        logger.critical("CRITICAL AT STARTUP: Global FEATURE_DF is None after preprocess_data(). App will likely fail.")
+    if SCALER is None: # SCALER should be initialized even if fitting fails
+        logger.critical("CRITICAL AT STARTUP: Global SCALER is None after preprocess_data(). App will likely fail.")
+except FileNotFoundError as e:
+    logger.critical(f"CRITICAL ERROR AT MODULE LOAD (FileNotFoundError): {e}. Ensure 'pizza.csv' is in the /app directory (or same dir as app.py).")
+    # In a production Gunicorn setup, the app might still try to start, leading to errors in routes.
+    # For Hugging Face, it's better to log and let it attempt to run, as exiting might obscure logs.
+except Exception as e:
+    logger.critical(f"Unexpected critical startup error during preprocessing at module load: {e}", exc_info=True)
 if __name__ == '__main__':
+    # This block is primarily for local development using `python app.py`.
+    # preprocess_data() is already called above when the module is imported by Python interpreter.
+    logger.info("----- Running Flask app directly (e.g., python app.py) -----")
+    # Sanity check for local run, though globals should be set by the module-level call.
+    if DF is None or FEATURE_DF is None or SCALER is None:
+        logger.warning("One or more global data variables (DF, FEATURE_DF, SCALER) are None before local app.run(). This is unexpected if module-level preprocessing ran.")
+        # Optionally, re-run preprocessing if critical for local dev and something went wrong with module-level load
+        # logger.info("Attempting to re-run preprocess_data() for local development.")
+        # preprocess_data()
+    app.run(debug=True, host='0.0.0.0', port=7860, use_reloader=False)
+    # use_reloader=False is generally better when you have global state initialized at module level.
+    # If True, it might re-initialize globals on each reload, which can be slow.