import gradio as gr import pickle import pandas as pd from datetime import datetime import joblib # Import joblib import numpy as np # Import numpy for handling boolean columns # Load the trained model and necessary data structures try: # Load the trained model best_model = joblib.load('best_model.joblib') # Load the original training data to get the correct column order # In a real deployment, you would save the list of training columns during training # and load it here instead of the full dataset. # Assuming the original data file is available at '/content/chennai_food_surplus_2024.csv' original_df = pd.read_csv('/content/chennai_food_surplus_2024.csv') original_df.drop_duplicates(inplace=True) original_df_encoded = pd.get_dummies(original_df, columns=['canteen_id', 'canteen_name'], drop_first=True) original_df_encoded['day_of_year'] = pd.to_datetime(original_df_encoded[['year', 'month', 'day']]).dt.dayofyear # Define the feature columns used during training feature_columns = [col for col in original_df_encoded.columns if col != 'surplus_units'] # Recreate unique_canteen_info based on the original data structure unique_canteen_info = original_df[['canteen_id', 'canteen_name']].drop_duplicates().reset_index(drop=True) except FileNotFoundError: best_model = None unique_canteen_info = None feature_columns = None print("Error: best_model.joblib or original data file not found. Model loading failed.") except Exception as e: best_model = None unique_canteen_info = None feature_columns = None print(f"An error occurred during model or data loading: {e}") def predict_surplus(day, month, year): """Predicts surplus units for all canteens for a given date.""" if best_model is None or unique_canteen_info is None or feature_columns is None: return "Model or necessary data not loaded. Cannot make predictions." try: prediction_date = datetime(year, month, day) except ValueError: return "Invalid date provided. Please enter valid day, month, and year." # Create prediction DataFrame prediction_df = unique_canteen_info.copy() prediction_df['year'] = prediction_date.year prediction_df['month'] = prediction_date.month prediction_df['day'] = prediction_date.day prediction_df['day_of_week'] = prediction_date.weekday() + 1 # Monday is 0, so add 1 to match the original data prediction_df['day_of_year'] = prediction_date.timetuple().tm_yday # One-hot encode the categorical features prediction_encoded = pd.get_dummies(prediction_df, columns=['canteen_id', 'canteen_name'], drop_first=True) # Align columns with the training data - add missing columns and reindex for col in feature_columns: if col not in prediction_encoded.columns: prediction_encoded[col] = False # Add missing columns with False (or 0 for numerical) # Ensure the order of columns matches the training data features prediction_encoded = prediction_encoded[feature_columns] # Convert boolean columns to int (CatBoost expects numerical features) for col in prediction_encoded.select_dtypes(include='bool').columns: prediction_encoded[col] = prediction_encoded[col].astype(int) # Make predictions predicted_surplus_values = best_model.predict(prediction_encoded) # Create a dictionary for output output_data = {} for i, row in unique_canteen_info.iterrows(): canteen_id = row['canteen_id'] canteen_name = row['canteen_name'] predicted_surplus = max(0, int(round(predicted_surplus_values[i]))) # Ensure non-negative integer output_data[canteen_id] = { 'canteen_name': canteen_name, 'predicted_surplus': predicted_surplus } # Format the dictionary for better display in Gradio formatted_output = "Predicted Surplus Units:\n\n" for canteen_id, info in output_data.items(): formatted_output += f"Canteen ID: {canteen_id}\n" formatted_output += f" Canteen Name: {info['canteen_name']}\n" formatted_output += f" Predicted Surplus: {info['predicted_surplus']}\n" formatted_output += "-" * 20 + "\n" return formatted_output # Create a Gradio interface with inputs for day, month, and year iface = gr.Interface( fn=predict_surplus, inputs=[ gr.Number(label="Day", precision=0), gr.Number(label="Month", precision=0), gr.Number(label="Year", precision=0) ], outputs="text", title="Predict Canteen Surplus Units", description="Enter a date (day, month, year) to predict the surplus units for each canteen." ) iface.launch()