Spaces:
Sleeping
Sleeping
File size: 4,718 Bytes
933873e 36de921 9c8ed8b 36de921 9c8ed8b 36de921 9c8ed8b 36de921 9c8ed8b 36de921 9c8ed8b 36de921 9c8ed8b 36de921 9c8ed8b 36de921 933873e 36de921 9c8ed8b 36de921 9c8ed8b 36de921 9c8ed8b 36de921 9c8ed8b 36de921 9c8ed8b 36de921 933873e 36de921 933873e 36de921 933873e 9c8ed8b |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 |
import gradio as gr
import pickle
import pandas as pd
from datetime import datetime
import joblib # Import joblib
import numpy as np # Import numpy for handling boolean columns
# Load the trained model and necessary data structures
try:
# Load the trained model
best_model = joblib.load('best_model.joblib')
# Load the original training data to get the correct column order
# In a real deployment, you would save the list of training columns during training
# and load it here instead of the full dataset.
# Assuming the original data file is available at '/content/chennai_food_surplus_2024.csv'
original_df = pd.read_csv('/content/chennai_food_surplus_2024.csv')
original_df.drop_duplicates(inplace=True)
original_df_encoded = pd.get_dummies(original_df, columns=['canteen_id', 'canteen_name'], drop_first=True)
original_df_encoded['day_of_year'] = pd.to_datetime(original_df_encoded[['year', 'month', 'day']]).dt.dayofyear
# Define the feature columns used during training
feature_columns = [col for col in original_df_encoded.columns if col != 'surplus_units']
# Recreate unique_canteen_info based on the original data structure
unique_canteen_info = original_df[['canteen_id', 'canteen_name']].drop_duplicates().reset_index(drop=True)
except FileNotFoundError:
best_model = None
unique_canteen_info = None
feature_columns = None
print("Error: best_model.joblib or original data file not found. Model loading failed.")
except Exception as e:
best_model = None
unique_canteen_info = None
feature_columns = None
print(f"An error occurred during model or data loading: {e}")
def predict_surplus(day, month, year):
"""Predicts surplus units for all canteens for a given date."""
if best_model is None or unique_canteen_info is None or feature_columns is None:
return "Model or necessary data not loaded. Cannot make predictions."
try:
prediction_date = datetime(year, month, day)
except ValueError:
return "Invalid date provided. Please enter valid day, month, and year."
# Create prediction DataFrame
prediction_df = unique_canteen_info.copy()
prediction_df['year'] = prediction_date.year
prediction_df['month'] = prediction_date.month
prediction_df['day'] = prediction_date.day
prediction_df['day_of_week'] = prediction_date.weekday() + 1 # Monday is 0, so add 1 to match the original data
prediction_df['day_of_year'] = prediction_date.timetuple().tm_yday
# One-hot encode the categorical features
prediction_encoded = pd.get_dummies(prediction_df, columns=['canteen_id', 'canteen_name'], drop_first=True)
# Align columns with the training data - add missing columns and reindex
for col in feature_columns:
if col not in prediction_encoded.columns:
prediction_encoded[col] = False # Add missing columns with False (or 0 for numerical)
# Ensure the order of columns matches the training data features
prediction_encoded = prediction_encoded[feature_columns]
# Convert boolean columns to int (CatBoost expects numerical features)
for col in prediction_encoded.select_dtypes(include='bool').columns:
prediction_encoded[col] = prediction_encoded[col].astype(int)
# Make predictions
predicted_surplus_values = best_model.predict(prediction_encoded)
# Create a dictionary for output
output_data = {}
for i, row in unique_canteen_info.iterrows():
canteen_id = row['canteen_id']
canteen_name = row['canteen_name']
predicted_surplus = max(0, int(round(predicted_surplus_values[i]))) # Ensure non-negative integer
output_data[canteen_id] = {
'canteen_name': canteen_name,
'predicted_surplus': predicted_surplus
}
# Format the dictionary for better display in Gradio
formatted_output = "Predicted Surplus Units:\n\n"
for canteen_id, info in output_data.items():
formatted_output += f"Canteen ID: {canteen_id}\n"
formatted_output += f" Canteen Name: {info['canteen_name']}\n"
formatted_output += f" Predicted Surplus: {info['predicted_surplus']}\n"
formatted_output += "-" * 20 + "\n"
return formatted_output
# Create a Gradio interface with inputs for day, month, and year
iface = gr.Interface(
fn=predict_surplus,
inputs=[
gr.Number(label="Day", precision=0),
gr.Number(label="Month", precision=0),
gr.Number(label="Year", precision=0)
],
outputs="text",
title="Predict Canteen Surplus Units",
description="Enter a date (day, month, year) to predict the surplus units for each canteen."
)
iface.launch() |