BeeBasic commited on
Commit
9c8ed8b
·
verified ·
1 Parent(s): b621f51

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +30 -30
app.py CHANGED
@@ -3,49 +3,45 @@ import pickle
3
  import pandas as pd
4
  from datetime import datetime
5
  import joblib # Import joblib
 
6
 
7
  # Load the trained model and necessary data structures
8
  try:
9
  # Load the trained model
10
  best_model = joblib.load('best_model.joblib')
11
 
12
- # Recreate unique_canteen_info and training_columns based on the original data structure
13
- # In a real deployment, these should be saved during training and loaded here.
14
- # For this example, we will create dummy data structures based on the assumption of 10 canteens.
15
- canteen_ids = [f'C00{i+1}' for i in range(10)]
16
- canteen_names = [
17
- 'VIT University Main Canteen', 'SRM Campus Canteen', 'Anna University Mess',
18
- 'IIT Madras Hostel Mess', 'Sangeetha Veg Restaurant', 'Murugan Idli Shop',
19
- 'Adyar Ananda Bhavan (A2B)', 'The Marina Café', 'Buhari Hotel Canteen',
20
- 'Crescent College Cafeteria'
21
- ]
22
- unique_canteen_info = pd.DataFrame({'canteen_id': canteen_ids, 'canteen_name': canteen_names})
23
 
24
- # Create a dummy DataFrame with all possible categories to get the column structure for one-hot encoding
25
- dummy_data_for_cols = pd.DataFrame(columns=['canteen_id', 'canteen_name'])
26
- for cid in canteen_ids:
27
- for cname in canteen_names:
28
- dummy_data_for_cols = pd.concat([dummy_data_for_cols, pd.DataFrame({'canteen_id': [cid], 'canteen_name': [cname]})], ignore_index=True)
29
 
30
- dummy_encoded_for_cols = pd.get_dummies(dummy_data_for_cols, columns=['canteen_id', 'canteen_name'], drop_first=True)
31
- training_columns = dummy_encoded_for_cols.columns.tolist()
 
32
 
33
 
34
  except FileNotFoundError:
35
  best_model = None
36
  unique_canteen_info = None
37
- training_columns = None
38
- print("Error: best_model.joblib not found. Model loading failed.")
39
  except Exception as e:
40
  best_model = None
41
  unique_canteen_info = None
42
- training_columns = None
43
- print(f"An error occurred during model loading: {e}")
44
 
45
 
46
  def predict_surplus(day, month, year):
47
  """Predicts surplus units for all canteens for a given date."""
48
- if best_model is None or unique_canteen_info is None or training_columns is None:
49
  return "Model or necessary data not loaded. Cannot make predictions."
50
 
51
  try:
@@ -61,18 +57,22 @@ def predict_surplus(day, month, year):
61
  prediction_df['day_of_week'] = prediction_date.weekday() + 1 # Monday is 0, so add 1 to match the original data
62
  prediction_df['day_of_year'] = prediction_date.timetuple().tm_yday
63
 
64
- # One-hot encode and align columns with training data
65
  prediction_encoded = pd.get_dummies(prediction_df, columns=['canteen_id', 'canteen_name'], drop_first=True)
66
 
67
- for col in training_columns:
 
68
  if col not in prediction_encoded.columns:
69
- prediction_encoded[col] = False
70
 
71
- # Ensure the order of columns matches the training data features (excluding the target)
72
- # Recreate the feature columns list based on the training_columns plus the time features
73
- feature_columns = ['day', 'month', 'year', 'day_of_week', 'day_of_year'] + training_columns
74
  prediction_encoded = prediction_encoded[feature_columns]
75
 
 
 
 
 
 
76
  # Make predictions
77
  predicted_surplus_values = best_model.predict(prediction_encoded)
78
 
@@ -112,4 +112,4 @@ iface = gr.Interface(
112
  description="Enter a date (day, month, year) to predict the surplus units for each canteen."
113
  )
114
 
115
- iface.launch()
 
3
  import pandas as pd
4
  from datetime import datetime
5
  import joblib # Import joblib
6
+ import numpy as np # Import numpy for handling boolean columns
7
 
8
  # Load the trained model and necessary data structures
9
  try:
10
  # Load the trained model
11
  best_model = joblib.load('best_model.joblib')
12
 
13
+ # Load the original training data to get the correct column order
14
+ # In a real deployment, you would save the list of training columns during training
15
+ # and load it here instead of the full dataset.
16
+ # Assuming the original data file is available at '/content/chennai_food_surplus_2024.csv'
17
+ original_df = pd.read_csv('/content/chennai_food_surplus_2024.csv')
18
+ original_df.drop_duplicates(inplace=True)
19
+ original_df_encoded = pd.get_dummies(original_df, columns=['canteen_id', 'canteen_name'], drop_first=True)
20
+ original_df_encoded['day_of_year'] = pd.to_datetime(original_df_encoded[['year', 'month', 'day']]).dt.dayofyear
 
 
 
21
 
22
+ # Define the feature columns used during training
23
+ feature_columns = [col for col in original_df_encoded.columns if col != 'surplus_units']
 
 
 
24
 
25
+
26
+ # Recreate unique_canteen_info based on the original data structure
27
+ unique_canteen_info = original_df[['canteen_id', 'canteen_name']].drop_duplicates().reset_index(drop=True)
28
 
29
 
30
  except FileNotFoundError:
31
  best_model = None
32
  unique_canteen_info = None
33
+ feature_columns = None
34
+ print("Error: best_model.joblib or original data file not found. Model loading failed.")
35
  except Exception as e:
36
  best_model = None
37
  unique_canteen_info = None
38
+ feature_columns = None
39
+ print(f"An error occurred during model or data loading: {e}")
40
 
41
 
42
  def predict_surplus(day, month, year):
43
  """Predicts surplus units for all canteens for a given date."""
44
+ if best_model is None or unique_canteen_info is None or feature_columns is None:
45
  return "Model or necessary data not loaded. Cannot make predictions."
46
 
47
  try:
 
57
  prediction_df['day_of_week'] = prediction_date.weekday() + 1 # Monday is 0, so add 1 to match the original data
58
  prediction_df['day_of_year'] = prediction_date.timetuple().tm_yday
59
 
60
+ # One-hot encode the categorical features
61
  prediction_encoded = pd.get_dummies(prediction_df, columns=['canteen_id', 'canteen_name'], drop_first=True)
62
 
63
+ # Align columns with the training data - add missing columns and reindex
64
+ for col in feature_columns:
65
  if col not in prediction_encoded.columns:
66
+ prediction_encoded[col] = False # Add missing columns with False (or 0 for numerical)
67
 
68
+ # Ensure the order of columns matches the training data features
 
 
69
  prediction_encoded = prediction_encoded[feature_columns]
70
 
71
+ # Convert boolean columns to int (CatBoost expects numerical features)
72
+ for col in prediction_encoded.select_dtypes(include='bool').columns:
73
+ prediction_encoded[col] = prediction_encoded[col].astype(int)
74
+
75
+
76
  # Make predictions
77
  predicted_surplus_values = best_model.predict(prediction_encoded)
78
 
 
112
  description="Enter a date (day, month, year) to predict the surplus units for each canteen."
113
  )
114
 
115
+ iface.launch()