Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
|
@@ -3,49 +3,45 @@ import pickle
|
|
| 3 |
import pandas as pd
|
| 4 |
from datetime import datetime
|
| 5 |
import joblib # Import joblib
|
|
|
|
| 6 |
|
| 7 |
# Load the trained model and necessary data structures
|
| 8 |
try:
|
| 9 |
# Load the trained model
|
| 10 |
best_model = joblib.load('best_model.joblib')
|
| 11 |
|
| 12 |
-
#
|
| 13 |
-
# In a real deployment,
|
| 14 |
-
#
|
| 15 |
-
|
| 16 |
-
|
| 17 |
-
|
| 18 |
-
|
| 19 |
-
|
| 20 |
-
'Crescent College Cafeteria'
|
| 21 |
-
]
|
| 22 |
-
unique_canteen_info = pd.DataFrame({'canteen_id': canteen_ids, 'canteen_name': canteen_names})
|
| 23 |
|
| 24 |
-
#
|
| 25 |
-
|
| 26 |
-
for cid in canteen_ids:
|
| 27 |
-
for cname in canteen_names:
|
| 28 |
-
dummy_data_for_cols = pd.concat([dummy_data_for_cols, pd.DataFrame({'canteen_id': [cid], 'canteen_name': [cname]})], ignore_index=True)
|
| 29 |
|
| 30 |
-
|
| 31 |
-
|
|
|
|
| 32 |
|
| 33 |
|
| 34 |
except FileNotFoundError:
|
| 35 |
best_model = None
|
| 36 |
unique_canteen_info = None
|
| 37 |
-
|
| 38 |
-
print("Error: best_model.joblib not found. Model loading failed.")
|
| 39 |
except Exception as e:
|
| 40 |
best_model = None
|
| 41 |
unique_canteen_info = None
|
| 42 |
-
|
| 43 |
-
print(f"An error occurred during model loading: {e}")
|
| 44 |
|
| 45 |
|
| 46 |
def predict_surplus(day, month, year):
|
| 47 |
"""Predicts surplus units for all canteens for a given date."""
|
| 48 |
-
if best_model is None or unique_canteen_info is None or
|
| 49 |
return "Model or necessary data not loaded. Cannot make predictions."
|
| 50 |
|
| 51 |
try:
|
|
@@ -61,18 +57,22 @@ def predict_surplus(day, month, year):
|
|
| 61 |
prediction_df['day_of_week'] = prediction_date.weekday() + 1 # Monday is 0, so add 1 to match the original data
|
| 62 |
prediction_df['day_of_year'] = prediction_date.timetuple().tm_yday
|
| 63 |
|
| 64 |
-
# One-hot encode
|
| 65 |
prediction_encoded = pd.get_dummies(prediction_df, columns=['canteen_id', 'canteen_name'], drop_first=True)
|
| 66 |
|
| 67 |
-
|
|
|
|
| 68 |
if col not in prediction_encoded.columns:
|
| 69 |
-
prediction_encoded[col] = False
|
| 70 |
|
| 71 |
-
# Ensure the order of columns matches the training data features
|
| 72 |
-
# Recreate the feature columns list based on the training_columns plus the time features
|
| 73 |
-
feature_columns = ['day', 'month', 'year', 'day_of_week', 'day_of_year'] + training_columns
|
| 74 |
prediction_encoded = prediction_encoded[feature_columns]
|
| 75 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 76 |
# Make predictions
|
| 77 |
predicted_surplus_values = best_model.predict(prediction_encoded)
|
| 78 |
|
|
@@ -112,4 +112,4 @@ iface = gr.Interface(
|
|
| 112 |
description="Enter a date (day, month, year) to predict the surplus units for each canteen."
|
| 113 |
)
|
| 114 |
|
| 115 |
-
iface.launch()
|
|
|
|
| 3 |
import pandas as pd
|
| 4 |
from datetime import datetime
|
| 5 |
import joblib # Import joblib
|
| 6 |
+
import numpy as np # Import numpy for handling boolean columns
|
| 7 |
|
| 8 |
# Load the trained model and necessary data structures
|
| 9 |
try:
|
| 10 |
# Load the trained model
|
| 11 |
best_model = joblib.load('best_model.joblib')
|
| 12 |
|
| 13 |
+
# Load the original training data to get the correct column order
|
| 14 |
+
# In a real deployment, you would save the list of training columns during training
|
| 15 |
+
# and load it here instead of the full dataset.
|
| 16 |
+
# Assuming the original data file is available at '/content/chennai_food_surplus_2024.csv'
|
| 17 |
+
original_df = pd.read_csv('/content/chennai_food_surplus_2024.csv')
|
| 18 |
+
original_df.drop_duplicates(inplace=True)
|
| 19 |
+
original_df_encoded = pd.get_dummies(original_df, columns=['canteen_id', 'canteen_name'], drop_first=True)
|
| 20 |
+
original_df_encoded['day_of_year'] = pd.to_datetime(original_df_encoded[['year', 'month', 'day']]).dt.dayofyear
|
|
|
|
|
|
|
|
|
|
| 21 |
|
| 22 |
+
# Define the feature columns used during training
|
| 23 |
+
feature_columns = [col for col in original_df_encoded.columns if col != 'surplus_units']
|
|
|
|
|
|
|
|
|
|
| 24 |
|
| 25 |
+
|
| 26 |
+
# Recreate unique_canteen_info based on the original data structure
|
| 27 |
+
unique_canteen_info = original_df[['canteen_id', 'canteen_name']].drop_duplicates().reset_index(drop=True)
|
| 28 |
|
| 29 |
|
| 30 |
except FileNotFoundError:
|
| 31 |
best_model = None
|
| 32 |
unique_canteen_info = None
|
| 33 |
+
feature_columns = None
|
| 34 |
+
print("Error: best_model.joblib or original data file not found. Model loading failed.")
|
| 35 |
except Exception as e:
|
| 36 |
best_model = None
|
| 37 |
unique_canteen_info = None
|
| 38 |
+
feature_columns = None
|
| 39 |
+
print(f"An error occurred during model or data loading: {e}")
|
| 40 |
|
| 41 |
|
| 42 |
def predict_surplus(day, month, year):
|
| 43 |
"""Predicts surplus units for all canteens for a given date."""
|
| 44 |
+
if best_model is None or unique_canteen_info is None or feature_columns is None:
|
| 45 |
return "Model or necessary data not loaded. Cannot make predictions."
|
| 46 |
|
| 47 |
try:
|
|
|
|
| 57 |
prediction_df['day_of_week'] = prediction_date.weekday() + 1 # Monday is 0, so add 1 to match the original data
|
| 58 |
prediction_df['day_of_year'] = prediction_date.timetuple().tm_yday
|
| 59 |
|
| 60 |
+
# One-hot encode the categorical features
|
| 61 |
prediction_encoded = pd.get_dummies(prediction_df, columns=['canteen_id', 'canteen_name'], drop_first=True)
|
| 62 |
|
| 63 |
+
# Align columns with the training data - add missing columns and reindex
|
| 64 |
+
for col in feature_columns:
|
| 65 |
if col not in prediction_encoded.columns:
|
| 66 |
+
prediction_encoded[col] = False # Add missing columns with False (or 0 for numerical)
|
| 67 |
|
| 68 |
+
# Ensure the order of columns matches the training data features
|
|
|
|
|
|
|
| 69 |
prediction_encoded = prediction_encoded[feature_columns]
|
| 70 |
|
| 71 |
+
# Convert boolean columns to int (CatBoost expects numerical features)
|
| 72 |
+
for col in prediction_encoded.select_dtypes(include='bool').columns:
|
| 73 |
+
prediction_encoded[col] = prediction_encoded[col].astype(int)
|
| 74 |
+
|
| 75 |
+
|
| 76 |
# Make predictions
|
| 77 |
predicted_surplus_values = best_model.predict(prediction_encoded)
|
| 78 |
|
|
|
|
| 112 |
description="Enter a date (day, month, year) to predict the surplus units for each canteen."
|
| 113 |
)
|
| 114 |
|
| 115 |
+
iface.launch()
|