Spaces:

BeeBasic
/

FoodForAll3

Sleeping

App Files Files Community

BeeBasic commited on Oct 31, 2025

Commit

9c8ed8b

verified ·

1 Parent(s): b621f51

Update app.py

Browse files

Files changed (1) hide show

app.py +30 -30

app.py CHANGED Viewed

@@ -3,49 +3,45 @@ import pickle
 import pandas as pd
 from datetime import datetime
 import joblib # Import joblib
 # Load the trained model and necessary data structures
 try:
     # Load the trained model
     best_model = joblib.load('best_model.joblib')
-    # Recreate unique_canteen_info and training_columns based on the original data structure
-    # In a real deployment, these should be saved during training and loaded here.
-    # For this example, we will create dummy data structures based on the assumption of 10 canteens.
-    canteen_ids = [f'C00{i+1}' for i in range(10)]
-    canteen_names = [
-        'VIT University Main Canteen', 'SRM Campus Canteen', 'Anna University Mess',
-        'IIT Madras Hostel Mess', 'Sangeetha Veg Restaurant', 'Murugan Idli Shop',
-        'Adyar Ananda Bhavan (A2B)', 'The Marina Café', 'Buhari Hotel Canteen',
-        'Crescent College Cafeteria'
-    ]
-    unique_canteen_info = pd.DataFrame({'canteen_id': canteen_ids, 'canteen_name': canteen_names})
-    # Create a dummy DataFrame with all possible categories to get the column structure for one-hot encoding
-    dummy_data_for_cols = pd.DataFrame(columns=['canteen_id', 'canteen_name'])
-    for cid in canteen_ids:
-        for cname in canteen_names:
-             dummy_data_for_cols = pd.concat([dummy_data_for_cols, pd.DataFrame({'canteen_id': [cid], 'canteen_name': [cname]})], ignore_index=True)
-    dummy_encoded_for_cols = pd.get_dummies(dummy_data_for_cols, columns=['canteen_id', 'canteen_name'], drop_first=True)
-    training_columns = dummy_encoded_for_cols.columns.tolist()
 except FileNotFoundError:
     best_model = None
     unique_canteen_info = None
-    training_columns = None
-    print("Error: best_model.joblib not found. Model loading failed.")
 except Exception as e:
     best_model = None
     unique_canteen_info = None
-    training_columns = None
-    print(f"An error occurred during model loading: {e}")
 def predict_surplus(day, month, year):
     """Predicts surplus units for all canteens for a given date."""
-    if best_model is None or unique_canteen_info is None or training_columns is None:
         return "Model or necessary data not loaded. Cannot make predictions."
     try:
@@ -61,18 +57,22 @@ def predict_surplus(day, month, year):
     prediction_df['day_of_week'] = prediction_date.weekday() + 1 # Monday is 0, so add 1 to match the original data
     prediction_df['day_of_year'] = prediction_date.timetuple().tm_yday
-    # One-hot encode and align columns with training data
     prediction_encoded = pd.get_dummies(prediction_df, columns=['canteen_id', 'canteen_name'], drop_first=True)
-    for col in training_columns:
         if col not in prediction_encoded.columns:
-            prediction_encoded[col] = False
-    # Ensure the order of columns matches the training data features (excluding the target)
-    # Recreate the feature columns list based on the training_columns plus the time features
-    feature_columns = ['day', 'month', 'year', 'day_of_week', 'day_of_year'] + training_columns
     prediction_encoded = prediction_encoded[feature_columns]
     # Make predictions
     predicted_surplus_values = best_model.predict(prediction_encoded)
@@ -112,4 +112,4 @@ iface = gr.Interface(
     description="Enter a date (day, month, year) to predict the surplus units for each canteen."
 )
-iface.launch()

 import pandas as pd
 from datetime import datetime
 import joblib # Import joblib
+import numpy as np # Import numpy for handling boolean columns
 # Load the trained model and necessary data structures
 try:
     # Load the trained model
     best_model = joblib.load('best_model.joblib')
+    # Load the original training data to get the correct column order
+    # In a real deployment, you would save the list of training columns during training
+    # and load it here instead of the full dataset.
+    # Assuming the original data file is available at '/content/chennai_food_surplus_2024.csv'
+    original_df = pd.read_csv('/content/chennai_food_surplus_2024.csv')
+    original_df.drop_duplicates(inplace=True)
+    original_df_encoded = pd.get_dummies(original_df, columns=['canteen_id', 'canteen_name'], drop_first=True)
+    original_df_encoded['day_of_year'] = pd.to_datetime(original_df_encoded[['year', 'month', 'day']]).dt.dayofyear
+    # Define the feature columns used during training
+    feature_columns = [col for col in original_df_encoded.columns if col != 'surplus_units']
+    # Recreate unique_canteen_info based on the original data structure
+    unique_canteen_info = original_df[['canteen_id', 'canteen_name']].drop_duplicates().reset_index(drop=True)
 except FileNotFoundError:
     best_model = None
     unique_canteen_info = None
+    feature_columns = None
+    print("Error: best_model.joblib or original data file not found. Model loading failed.")
 except Exception as e:
     best_model = None
     unique_canteen_info = None
+    feature_columns = None
+    print(f"An error occurred during model or data loading: {e}")
 def predict_surplus(day, month, year):
     """Predicts surplus units for all canteens for a given date."""
+    if best_model is None or unique_canteen_info is None or feature_columns is None:
         return "Model or necessary data not loaded. Cannot make predictions."
     try:
     prediction_df['day_of_week'] = prediction_date.weekday() + 1 # Monday is 0, so add 1 to match the original data
     prediction_df['day_of_year'] = prediction_date.timetuple().tm_yday
+    # One-hot encode the categorical features
     prediction_encoded = pd.get_dummies(prediction_df, columns=['canteen_id', 'canteen_name'], drop_first=True)
+    # Align columns with the training data - add missing columns and reindex
+    for col in feature_columns:
         if col not in prediction_encoded.columns:
+            prediction_encoded[col] = False # Add missing columns with False (or 0 for numerical)
+    # Ensure the order of columns matches the training data features
     prediction_encoded = prediction_encoded[feature_columns]
+    # Convert boolean columns to int (CatBoost expects numerical features)
+    for col in prediction_encoded.select_dtypes(include='bool').columns:
+        prediction_encoded[col] = prediction_encoded[col].astype(int)
     # Make predictions
     predicted_surplus_values = best_model.predict(prediction_encoded)
     description="Enter a date (day, month, year) to predict the surplus units for each canteen."
 )
+iface.launch()