File size: 4,718 Bytes
933873e
 
36de921
 
 
9c8ed8b
36de921
 
 
 
 
 
9c8ed8b
 
 
 
 
 
 
 
36de921
9c8ed8b
 
36de921
9c8ed8b
 
 
36de921
 
 
 
 
9c8ed8b
 
36de921
 
 
9c8ed8b
 
36de921
 
 
 
9c8ed8b
36de921
933873e
 
36de921
 
 
 
 
 
 
 
 
 
 
 
9c8ed8b
36de921
 
9c8ed8b
 
36de921
9c8ed8b
36de921
9c8ed8b
36de921
 
9c8ed8b
 
 
 
 
36de921
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
933873e
36de921
 
 
 
 
 
933873e
36de921
 
933873e
 
9c8ed8b
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
import gradio as gr
import pickle
import pandas as pd
from datetime import datetime
import joblib # Import joblib
import numpy as np # Import numpy for handling boolean columns

# Load the trained model and necessary data structures
try:
    # Load the trained model
    best_model = joblib.load('best_model.joblib')

    # Load the original training data to get the correct column order
    # In a real deployment, you would save the list of training columns during training
    # and load it here instead of the full dataset.
    # Assuming the original data file is available at '/content/chennai_food_surplus_2024.csv'
    original_df = pd.read_csv('/content/chennai_food_surplus_2024.csv')
    original_df.drop_duplicates(inplace=True)
    original_df_encoded = pd.get_dummies(original_df, columns=['canteen_id', 'canteen_name'], drop_first=True)
    original_df_encoded['day_of_year'] = pd.to_datetime(original_df_encoded[['year', 'month', 'day']]).dt.dayofyear

    # Define the feature columns used during training
    feature_columns = [col for col in original_df_encoded.columns if col != 'surplus_units']


    # Recreate unique_canteen_info based on the original data structure
    unique_canteen_info = original_df[['canteen_id', 'canteen_name']].drop_duplicates().reset_index(drop=True)


except FileNotFoundError:
    best_model = None
    unique_canteen_info = None
    feature_columns = None
    print("Error: best_model.joblib or original data file not found. Model loading failed.")
except Exception as e:
    best_model = None
    unique_canteen_info = None
    feature_columns = None
    print(f"An error occurred during model or data loading: {e}")


def predict_surplus(day, month, year):
    """Predicts surplus units for all canteens for a given date."""
    if best_model is None or unique_canteen_info is None or feature_columns is None:
        return "Model or necessary data not loaded. Cannot make predictions."

    try:
        prediction_date = datetime(year, month, day)
    except ValueError:
        return "Invalid date provided. Please enter valid day, month, and year."

    # Create prediction DataFrame
    prediction_df = unique_canteen_info.copy()
    prediction_df['year'] = prediction_date.year
    prediction_df['month'] = prediction_date.month
    prediction_df['day'] = prediction_date.day
    prediction_df['day_of_week'] = prediction_date.weekday() + 1 # Monday is 0, so add 1 to match the original data
    prediction_df['day_of_year'] = prediction_date.timetuple().tm_yday

    # One-hot encode the categorical features
    prediction_encoded = pd.get_dummies(prediction_df, columns=['canteen_id', 'canteen_name'], drop_first=True)

    # Align columns with the training data - add missing columns and reindex
    for col in feature_columns:
        if col not in prediction_encoded.columns:
            prediction_encoded[col] = False # Add missing columns with False (or 0 for numerical)

    # Ensure the order of columns matches the training data features
    prediction_encoded = prediction_encoded[feature_columns]

    # Convert boolean columns to int (CatBoost expects numerical features)
    for col in prediction_encoded.select_dtypes(include='bool').columns:
        prediction_encoded[col] = prediction_encoded[col].astype(int)


    # Make predictions
    predicted_surplus_values = best_model.predict(prediction_encoded)

    # Create a dictionary for output
    output_data = {}
    for i, row in unique_canteen_info.iterrows():
        canteen_id = row['canteen_id']
        canteen_name = row['canteen_name']
        predicted_surplus = max(0, int(round(predicted_surplus_values[i]))) # Ensure non-negative integer

        output_data[canteen_id] = {
            'canteen_name': canteen_name,
            'predicted_surplus': predicted_surplus
        }

    # Format the dictionary for better display in Gradio
    formatted_output = "Predicted Surplus Units:\n\n"
    for canteen_id, info in output_data.items():
        formatted_output += f"Canteen ID: {canteen_id}\n"
        formatted_output += f"  Canteen Name: {info['canteen_name']}\n"
        formatted_output += f"  Predicted Surplus: {info['predicted_surplus']}\n"
        formatted_output += "-" * 20 + "\n"

    return formatted_output


# Create a Gradio interface with inputs for day, month, and year
iface = gr.Interface(
    fn=predict_surplus,
    inputs=[
        gr.Number(label="Day", precision=0),
        gr.Number(label="Month", precision=0),
        gr.Number(label="Year", precision=0)
    ],
    outputs="text",
    title="Predict Canteen Surplus Units",
    description="Enter a date (day, month, year) to predict the surplus units for each canteen."
)

iface.launch()