Spaces:
Sleeping
Sleeping
| import gradio as gr | |
| import pickle | |
| import pandas as pd | |
| from datetime import datetime | |
| import joblib # Import joblib | |
| import numpy as np # Import numpy for handling boolean columns | |
| # Load the trained model and necessary data structures | |
| try: | |
| # Load the trained model | |
| best_model = joblib.load('best_model.joblib') | |
| # Load the original training data to get the correct column order | |
| # In a real deployment, you would save the list of training columns during training | |
| # and load it here instead of the full dataset. | |
| # Assuming the original data file is available at '/content/chennai_food_surplus_2024.csv' | |
| original_df = pd.read_csv('/content/chennai_food_surplus_2024.csv') | |
| original_df.drop_duplicates(inplace=True) | |
| original_df_encoded = pd.get_dummies(original_df, columns=['canteen_id', 'canteen_name'], drop_first=True) | |
| original_df_encoded['day_of_year'] = pd.to_datetime(original_df_encoded[['year', 'month', 'day']]).dt.dayofyear | |
| # Define the feature columns used during training | |
| feature_columns = [col for col in original_df_encoded.columns if col != 'surplus_units'] | |
| # Recreate unique_canteen_info based on the original data structure | |
| unique_canteen_info = original_df[['canteen_id', 'canteen_name']].drop_duplicates().reset_index(drop=True) | |
| except FileNotFoundError: | |
| best_model = None | |
| unique_canteen_info = None | |
| feature_columns = None | |
| print("Error: best_model.joblib or original data file not found. Model loading failed.") | |
| except Exception as e: | |
| best_model = None | |
| unique_canteen_info = None | |
| feature_columns = None | |
| print(f"An error occurred during model or data loading: {e}") | |
| def predict_surplus(day, month, year): | |
| """Predicts surplus units for all canteens for a given date.""" | |
| if best_model is None or unique_canteen_info is None or feature_columns is None: | |
| return "Model or necessary data not loaded. Cannot make predictions." | |
| try: | |
| prediction_date = datetime(year, month, day) | |
| except ValueError: | |
| return "Invalid date provided. Please enter valid day, month, and year." | |
| # Create prediction DataFrame | |
| prediction_df = unique_canteen_info.copy() | |
| prediction_df['year'] = prediction_date.year | |
| prediction_df['month'] = prediction_date.month | |
| prediction_df['day'] = prediction_date.day | |
| prediction_df['day_of_week'] = prediction_date.weekday() + 1 # Monday is 0, so add 1 to match the original data | |
| prediction_df['day_of_year'] = prediction_date.timetuple().tm_yday | |
| # One-hot encode the categorical features | |
| prediction_encoded = pd.get_dummies(prediction_df, columns=['canteen_id', 'canteen_name'], drop_first=True) | |
| # Align columns with the training data - add missing columns and reindex | |
| for col in feature_columns: | |
| if col not in prediction_encoded.columns: | |
| prediction_encoded[col] = False # Add missing columns with False (or 0 for numerical) | |
| # Ensure the order of columns matches the training data features | |
| prediction_encoded = prediction_encoded[feature_columns] | |
| # Convert boolean columns to int (CatBoost expects numerical features) | |
| for col in prediction_encoded.select_dtypes(include='bool').columns: | |
| prediction_encoded[col] = prediction_encoded[col].astype(int) | |
| # Make predictions | |
| predicted_surplus_values = best_model.predict(prediction_encoded) | |
| # Create a dictionary for output | |
| output_data = {} | |
| for i, row in unique_canteen_info.iterrows(): | |
| canteen_id = row['canteen_id'] | |
| canteen_name = row['canteen_name'] | |
| predicted_surplus = max(0, int(round(predicted_surplus_values[i]))) # Ensure non-negative integer | |
| output_data[canteen_id] = { | |
| 'canteen_name': canteen_name, | |
| 'predicted_surplus': predicted_surplus | |
| } | |
| # Format the dictionary for better display in Gradio | |
| formatted_output = "Predicted Surplus Units:\n\n" | |
| for canteen_id, info in output_data.items(): | |
| formatted_output += f"Canteen ID: {canteen_id}\n" | |
| formatted_output += f" Canteen Name: {info['canteen_name']}\n" | |
| formatted_output += f" Predicted Surplus: {info['predicted_surplus']}\n" | |
| formatted_output += "-" * 20 + "\n" | |
| return formatted_output | |
| # Create a Gradio interface with inputs for day, month, and year | |
| iface = gr.Interface( | |
| fn=predict_surplus, | |
| inputs=[ | |
| gr.Number(label="Day", precision=0), | |
| gr.Number(label="Month", precision=0), | |
| gr.Number(label="Year", precision=0) | |
| ], | |
| outputs="text", | |
| title="Predict Canteen Surplus Units", | |
| description="Enter a date (day, month, year) to predict the surplus units for each canteen." | |
| ) | |
| iface.launch() |