VishnuCodes commited on
Commit
3c7d03a
·
verified ·
1 Parent(s): 7e48847

Create main.py

Browse files
Files changed (1) hide show
  1. main.py +613 -0
main.py ADDED
@@ -0,0 +1,613 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from fastapi import FastAPI, UploadFile, File
2
+ from fastapi.responses import StreamingResponse, JSONResponse
3
+ import pandas as pd
4
+ import numpy as np
5
+ from sklearn.model_selection import train_test_split
6
+ from sklearn.preprocessing import StandardScaler, OneHotEncoder
7
+ from sklearn.compose import make_column_transformer
8
+ from sklearn.compose import ColumnTransformer
9
+ from xgboost import XGBRegressor
10
+ from sklearn.ensemble import RandomForestRegressor
11
+ from statsmodels.tsa.arima.model import ARIMA
12
+ from sklearn.pipeline import Pipeline
13
+ from statsmodels.tsa.statespace.sarimax import SARIMAX
14
+ from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score
15
+ import math
16
+ import io
17
+ import matplotlib.pyplot as plt
18
+ from typing import List
19
+
20
+ app = FastAPI()
21
+
22
+ # XGBoost-Only Endpoint
23
+ @app.post("/train-xgboost")
24
+ async def train_xgboost(files: List[UploadFile] = File(...)):
25
+ try:
26
+ # Read the uploaded CSV files into DataFrames
27
+ data_frames = {}
28
+ for file in files:
29
+ content = await file.read()
30
+ data_frames[file.filename] = pd.read_csv(io.StringIO(content.decode("utf-8")))
31
+
32
+ # Extract relevant DataFrames
33
+ sales_data = data_frames['restaurant_sales_linked.csv']
34
+ menu_data = data_frames['restaurant_menu_final.csv']
35
+
36
+ # Parse 'Date' in sales data
37
+ sales_data['Date'] = pd.to_datetime(sales_data['Date'])
38
+
39
+ # Aggregate weekly sales data for each menu item
40
+ sales_data['Week'] = sales_data['Date'].dt.isocalendar().week
41
+ weekly_sales = sales_data.groupby(['Week', 'Menu_ID']).agg({'Quantity Sold': 'sum', 'Revenue': 'sum'}).reset_index()
42
+
43
+ # Merge menu data for menu item details
44
+ merged_data = pd.merge(weekly_sales, menu_data, on='Menu_ID', how='left')
45
+
46
+ # Feature preparation
47
+ features = merged_data[['Week', 'Menu_ID', 'Price', 'Revenue']]
48
+ target = merged_data['Quantity Sold']
49
+
50
+ # Split data into training and testing sets
51
+ X_train, X_test, y_train, y_test = train_test_split(features, target, test_size=0.2, random_state=42)
52
+
53
+ # Preprocessing: Scaling numerical features and encoding categorical features
54
+ numerical_features = ['Week', 'Price', 'Revenue']
55
+ categorical_features = ['Menu_ID']
56
+
57
+ column_transformer = make_column_transformer(
58
+ (StandardScaler(), numerical_features),
59
+ (OneHotEncoder(handle_unknown="ignore"), categorical_features),
60
+ remainder="drop",
61
+ )
62
+
63
+ # Transform the training and test datasets
64
+ X_train_transformed = column_transformer.fit_transform(X_train)
65
+ X_test_transformed = column_transformer.transform(X_test)
66
+
67
+ # XGBoost model
68
+ xgb_model = XGBRegressor(
69
+ n_estimators=25,
70
+ learning_rate=0.1,
71
+ max_depth=5,
72
+ random_state=42,
73
+ tree_method="hist",
74
+ eval_metric="rmse",
75
+ )
76
+
77
+ # Train the XGBoost model
78
+ xgb_model.fit(
79
+ X_train_transformed,
80
+ y_train,
81
+ eval_set=[(X_test_transformed, y_test)],
82
+ verbose=False,
83
+ )
84
+
85
+ # Predictions and evaluation
86
+ xgb_y_pred = xgb_model.predict(X_test_transformed)
87
+
88
+ # Evaluation Metrics
89
+ xgb_mse = mean_squared_error(y_test, xgb_y_pred)
90
+ xgb_rmse = math.sqrt(xgb_mse)
91
+ xgb_mae = mean_absolute_error(y_test, xgb_y_pred)
92
+ xgb_r2 = r2_score(y_test, xgb_y_pred)
93
+
94
+ # Generate Graph
95
+ plt.figure(figsize=(10, 6))
96
+ plt.plot(y_test.values, label="Actual", alpha=0.7)
97
+ plt.plot(xgb_y_pred, label="Predicted", alpha=0.7)
98
+ plt.legend()
99
+ plt.title("Actual vs. Predicted (XGBoost)")
100
+ plt.xlabel("Index")
101
+ plt.ylabel("Quantity Sold")
102
+
103
+ # Save the plot to a BytesIO buffer
104
+ buf = io.BytesIO()
105
+ plt.savefig(buf, format="png")
106
+ buf.seek(0)
107
+ plt.close()
108
+
109
+ # Return response with metrics and graph
110
+ headers = {
111
+ "XGBoost_MSE": str(xgb_mse),
112
+ "XGBoost_RMSE": str(xgb_rmse),
113
+ "XGBoost_MAE": str(xgb_mae),
114
+ "XGBoost_R2": str(xgb_r2),
115
+ }
116
+
117
+ return StreamingResponse(buf, media_type="image/png", headers=headers)
118
+
119
+ except Exception as e:
120
+ return JSONResponse(content={"error": str(e)}, status_code=400)
121
+
122
+ @app.post("/train-sarimax-xgboost")
123
+ async def train_sarimax_xgboost(files: List[UploadFile] = File(...)):
124
+ try:
125
+ # Read the uploaded CSV files into DataFrames
126
+ data_frames = {}
127
+ for file in files:
128
+ content = await file.read()
129
+ data_frames[file.filename] = pd.read_csv(io.StringIO(content.decode("utf-8")))
130
+
131
+ # Extract relevant DataFrames
132
+ sales_data = data_frames['restaurant_sales_linked.csv']
133
+
134
+ # Parse 'Date' in sales data
135
+ sales_data['Date'] = pd.to_datetime(sales_data['Date'])
136
+ sales_data['Week'] = sales_data['Date'].dt.to_period('W').astype(str)
137
+ sales_data['Week'] = sales_data['Week'].str.split('/').str[0]
138
+ sales_data['Week'] = pd.to_datetime(sales_data['Week'])
139
+
140
+ # Select a single menu item for demonstration
141
+ menu_id = 1
142
+ menu_sales = sales_data[sales_data['Menu_ID'] == menu_id].set_index('Week')
143
+
144
+ # Debug: Check the length of menu_sales
145
+ if menu_sales.empty:
146
+ raise ValueError(f"No data available for Menu_ID {menu_id}")
147
+
148
+ # Train-test split for SARIMAX
149
+ train_size = int(len(menu_sales) * 0.8)
150
+ train_data, test_data = menu_sales[:train_size], menu_sales[train_size:]
151
+
152
+ # SARIMAX Model
153
+ sarimax_model = SARIMAX(train_data['Quantity Sold'], order=(1, 1, 1), seasonal_order=(1, 1, 1, 12))
154
+ sarimax_result = sarimax_model.fit(disp=False)
155
+
156
+ # Predictions with SARIMAX
157
+ sarimax_pred = sarimax_result.get_forecast(steps=len(test_data)).predicted_mean
158
+
159
+ # Debug: Ensure lengths match
160
+ if len(sarimax_pred) != len(test_data):
161
+ raise ValueError(f"Length mismatch: SARIMAX predictions ({len(sarimax_pred)}) vs Test data ({len(test_data)})")
162
+
163
+ # Calculate residuals
164
+ residuals = test_data['Quantity Sold'] - sarimax_pred
165
+
166
+ # Debug: Check residuals
167
+ if len(residuals) != len(test_data):
168
+ raise ValueError("Residuals length mismatch with test data")
169
+
170
+ # Prepare data for XGBoost
171
+ xgboost_features = test_data[['Revenue']].iloc[:len(sarimax_pred)]
172
+ xgboost_target = residuals.reset_index(drop=True)
173
+
174
+ # Debug: Ensure feature and target lengths match
175
+ if len(xgboost_features) != len(xgboost_target):
176
+ raise ValueError("XGBoost features and target lengths do not match")
177
+
178
+ # Preprocessing for XGBoost
179
+ scaler = StandardScaler()
180
+ X_transformed = scaler.fit_transform(xgboost_features)
181
+
182
+ # XGBoost Model
183
+ xgb_model = XGBRegressor(
184
+ n_estimators=25,
185
+ learning_rate=0.1,
186
+ max_depth=5,
187
+ random_state=42,
188
+ tree_method="hist",
189
+ eval_metric="rmse",
190
+ )
191
+ xgb_model.fit(X_transformed, xgboost_target)
192
+
193
+ # Combine SARIMAX and XGBoost Predictions
194
+ xgb_residual_pred = xgb_model.predict(X_transformed)
195
+ combined_pred = sarimax_pred.values + xgb_residual_pred
196
+
197
+ # Evaluation Metrics
198
+ combined_mse = mean_squared_error(test_data['Quantity Sold'], combined_pred)
199
+ combined_rmse = math.sqrt(combined_mse)
200
+ combined_mae = mean_absolute_error(test_data['Quantity Sold'], combined_pred)
201
+ combined_r2 = r2_score(test_data['Quantity Sold'], combined_pred)
202
+
203
+ # Generate Graph
204
+ plt.figure(figsize=(10, 6))
205
+ plt.plot(test_data['Quantity Sold'], label="Actual", alpha=0.7)
206
+ plt.plot(combined_pred, label="SARIMAX + XGBoost Predicted", alpha=0.7)
207
+ plt.legend()
208
+ plt.title("Actual vs. Predicted (SARIMAX + XGBoost)")
209
+ plt.xlabel("Index")
210
+ plt.ylabel("Quantity Sold")
211
+
212
+ # Save the plot to a BytesIO buffer
213
+ buf = io.BytesIO()
214
+ plt.savefig(buf, format="png")
215
+ buf.seek(0)
216
+ plt.close()
217
+
218
+ # Return combined response
219
+ headers = {
220
+ "SARIMAX_XGBoost_MSE": str(combined_mse),
221
+ "SARIMAX_XGBoost_RMSE": str(combined_rmse),
222
+ "SARIMAX_XGBoost_MAE": str(combined_mae),
223
+ "SARIMAX_XGBoost_R2": str(combined_r2),
224
+ }
225
+
226
+ return StreamingResponse(buf, media_type="image/png", headers=headers)
227
+
228
+ except Exception as e:
229
+ return JSONResponse(content={"error": str(e)}, status_code=400)
230
+
231
+ @app.post("/train-randomforest-xgboost")
232
+ async def train_randomforest_xgboost(files: List[UploadFile] = File(...)):
233
+ try:
234
+ # Read the uploaded CSV files into DataFrames
235
+ data_frames = {}
236
+ for file in files:
237
+ content = await file.read()
238
+ data_frames[file.filename] = pd.read_csv(io.StringIO(content.decode("utf-8")))
239
+
240
+ # Extract relevant DataFrames
241
+ sales_data = data_frames['restaurant_sales_linked.csv']
242
+
243
+ # Parse 'Date' in sales data
244
+ sales_data['Date'] = pd.to_datetime(sales_data['Date'])
245
+
246
+ # Aggregate weekly sales data for each menu item
247
+ sales_data['Week'] = sales_data['Date'].dt.isocalendar().week
248
+ weekly_sales = sales_data.groupby(['Week', 'Menu_ID']).agg({'Quantity Sold': 'sum', 'Revenue': 'sum'}).reset_index()
249
+
250
+ # Select features and target
251
+ features = weekly_sales[['Week', 'Menu_ID', 'Revenue']]
252
+ target = weekly_sales['Quantity Sold']
253
+
254
+ # Train-test split
255
+ X_train, X_test, y_train, y_test = train_test_split(features, target, test_size=0.2, random_state=42)
256
+
257
+ # Preprocessing
258
+ numerical_features = ['Week', 'Revenue']
259
+ categorical_features = ['Menu_ID']
260
+
261
+ column_transformer = make_column_transformer(
262
+ (StandardScaler(), numerical_features),
263
+ (OneHotEncoder(handle_unknown="ignore"), categorical_features),
264
+ remainder="passthrough",
265
+ )
266
+
267
+ # Transform features
268
+ X_train_transformed = column_transformer.fit_transform(X_train)
269
+ X_test_transformed = column_transformer.transform(X_test)
270
+
271
+ # Random Forest Regressor
272
+ rf_model = RandomForestRegressor(n_estimators=100, random_state=42)
273
+ rf_model.fit(X_train_transformed, y_train)
274
+
275
+ # Random Forest Predictions
276
+ rf_pred = rf_model.predict(X_test_transformed)
277
+
278
+ # Calculate Residuals
279
+ residuals = y_test - rf_pred
280
+
281
+ # XGBoost Model for Residuals
282
+ xgb_model = XGBRegressor(
283
+ n_estimators=50,
284
+ learning_rate=0.1,
285
+ max_depth=5,
286
+ random_state=42,
287
+ tree_method="hist",
288
+ eval_metric="rmse",
289
+ )
290
+
291
+ # Train XGBoost on Residuals
292
+ xgb_model.fit(X_test_transformed, residuals)
293
+
294
+ # XGBoost Predictions for Residuals
295
+ xgb_residual_pred = xgb_model.predict(X_test_transformed)
296
+
297
+ # Combine Predictions
298
+ combined_pred = rf_pred + xgb_residual_pred
299
+
300
+ # Evaluation Metrics
301
+ mse = mean_squared_error(y_test, combined_pred)
302
+ rmse = math.sqrt(mse)
303
+ mae = mean_absolute_error(y_test, combined_pred)
304
+ r2 = r2_score(y_test, combined_pred)
305
+
306
+ # Generate Graph
307
+ plt.figure(figsize=(10, 6))
308
+ plt.plot(y_test.values, label="Actual", alpha=0.7)
309
+ plt.plot(combined_pred, label="Random Forest + XGBoost Predicted", alpha=0.7)
310
+ plt.legend()
311
+ plt.title("Actual vs. Predicted (Random Forest + XGBoost)")
312
+ plt.xlabel("Index")
313
+ plt.ylabel("Quantity Sold")
314
+
315
+ # Save the plot to a BytesIO buffer
316
+ buf = io.BytesIO()
317
+ plt.savefig(buf, format="png")
318
+ buf.seek(0)
319
+ plt.close()
320
+
321
+ # Return response with metrics and graph
322
+ headers = {
323
+ "RF_XGBoost_MSE": str(mse),
324
+ "RF_XGBoost_RMSE": str(rmse),
325
+ "RF_XGBoost_MAE": str(mae),
326
+ "RF_XGBoost_R2": str(r2),
327
+ }
328
+
329
+ return StreamingResponse(buf, media_type="image/png", headers=headers)
330
+
331
+ except Exception as e:
332
+ return JSONResponse(content={"error": str(e)}, status_code=400)
333
+
334
+ @app.post("/train-randomforest")
335
+ async def train_randomforest(files: List[UploadFile] = File(...)):
336
+ try:
337
+ # Read the uploaded CSV files into DataFrames
338
+ data_frames = {}
339
+ for file in files:
340
+ content = await file.read()
341
+ data_frames[file.filename] = pd.read_csv(io.StringIO(content.decode("utf-8")))
342
+
343
+ # Extract relevant DataFrames
344
+ sales_data = data_frames['restaurant_sales_linked.csv']
345
+ menu_data = data_frames['restaurant_menu_final.csv']
346
+ recipe_data = data_frames['restaurant_recipe_final.csv']
347
+ inventory_data = data_frames['restaurant_inventory_linked.csv']
348
+
349
+ # Preprocessing
350
+ sales_data['Date'] = pd.to_datetime(sales_data['Date'])
351
+ inventory_data['Date'] = pd.to_datetime(inventory_data['Date'])
352
+
353
+ # Aggregate weekly sales data for each menu item
354
+ sales_data['Week'] = sales_data['Date'].dt.isocalendar().week
355
+ weekly_sales = sales_data.groupby(['Week', 'Menu_ID']).agg({'Quantity Sold': 'sum', 'Revenue': 'sum'}).reset_index()
356
+
357
+ # Merge menu data for menu item details
358
+ merged_data = pd.merge(weekly_sales, menu_data, on='Menu_ID', how='left')
359
+
360
+ # Calculate ingredient quantities needed for weekly sales
361
+ ingredient_requirements = pd.merge(merged_data, recipe_data, on='Menu_ID', how='left')
362
+ ingredient_requirements['Total_Ingredient_Quantity'] = (
363
+ ingredient_requirements['Quantity Sold'] * ingredient_requirements['Quantity_Per_Unit']
364
+ )
365
+
366
+ # Aggregate ingredient requirements
367
+ ingredient_needs = ingredient_requirements.groupby(['Week', 'Ingredient_ID']).agg(
368
+ {'Total_Ingredient_Quantity': 'sum'}
369
+ ).reset_index()
370
+
371
+ # Feature preparation
372
+ merged_data = pd.merge(merged_data, ingredient_needs, on='Week', how='left', suffixes=('', '_Ingredient'))
373
+
374
+ # Select features and target
375
+ features = merged_data[['Week', 'Menu_ID', 'Price', 'Revenue']]
376
+ target = merged_data['Quantity Sold']
377
+
378
+ # Split data into training and testing sets
379
+ X_train, X_test, y_train, y_test = train_test_split(features, target, test_size=0.2, random_state=42)
380
+
381
+ # Preprocessing: Scaling numerical features and encoding categorical features
382
+ numerical_features = ['Week', 'Price', 'Revenue']
383
+ categorical_features = ['Menu_ID']
384
+
385
+ preprocessor = ColumnTransformer(
386
+ transformers=[
387
+ ('num', StandardScaler(), numerical_features),
388
+ ('cat', OneHotEncoder(handle_unknown='ignore'), categorical_features)
389
+ ]
390
+ )
391
+
392
+ # Random Forest Regressor pipeline
393
+ pipeline = Pipeline(steps=[
394
+ ('preprocessor', preprocessor),
395
+ ('model', RandomForestRegressor(n_estimators=100, random_state=42))
396
+ ])
397
+
398
+ # Train the model
399
+ pipeline.fit(X_train, y_train)
400
+
401
+ # Predictions
402
+ y_pred = pipeline.predict(X_test)
403
+
404
+ # Evaluation Metrics
405
+ mse = mean_squared_error(y_test, y_pred)
406
+ rmse = math.sqrt(mse)
407
+ mae = mean_absolute_error(y_test, y_pred)
408
+ r2 = r2_score(y_test, y_pred)
409
+
410
+ plt.figure(figsize=(10, 6))
411
+ plt.plot(y_test.values, label="Actual", alpha=0.7)
412
+ plt.plot(y_pred, label="Random Forest", alpha=0.7)
413
+ plt.legend()
414
+ plt.title("Actual vs. Predicted (Random Forest)")
415
+ plt.xlabel("Index")
416
+ plt.ylabel("Quantity Sold")
417
+
418
+ # Save the plot to a BytesIO buffer
419
+ buf = io.BytesIO()
420
+ plt.savefig(buf, format="png")
421
+ buf.seek(0)
422
+ plt.close()
423
+
424
+ # Return response with metrics and graph
425
+ headers = {
426
+ "RF_XGBoost_MSE": str(mse),
427
+ "RF_XGBoost_RMSE": str(rmse),
428
+ "RF_XGBoost_MAE": str(mae),
429
+ "RF_XGBoost_R2": str(r2),
430
+ }
431
+
432
+ return StreamingResponse(buf, media_type="image/png", headers=headers)
433
+
434
+ except Exception as e:
435
+ return JSONResponse(content={"error": str(e)}, status_code=400)
436
+
437
+
438
+ @app.post("/train-arima")
439
+ async def train_arima(file: UploadFile = File(...)):
440
+ try:
441
+ # Load the uploaded CSV file into a DataFrame
442
+ content = await file.read()
443
+ sales_data = pd.read_csv(io.StringIO(content.decode("utf-8")))
444
+
445
+ # Prepare data for ARIMA: Aggregate total quantity sold for each menu item weekly
446
+ arima_data = sales_data.groupby(['Date', 'Menu_ID'])['Quantity Sold'].sum().unstack(fill_value=0)
447
+
448
+ # Ensure the index is datetime
449
+ arima_data.index = pd.to_datetime(arima_data.index)
450
+
451
+ # Split ARIMA data into training and testing sets (80-20 split)
452
+ arima_train = arima_data.iloc[:int(len(arima_data) * 0.8), :]
453
+ arima_test = arima_data.iloc[int(len(arima_data) * 0.8):, :]
454
+
455
+ # Store ARIMA models and predictions
456
+ arima_models = {}
457
+ arima_predictions = {}
458
+
459
+ # Fit ARIMA for each menu item
460
+ for menu_id in arima_data.columns:
461
+ # Train ARIMA model
462
+ model = ARIMA(arima_train[menu_id], order=(5, 1, 0))
463
+ arima_fitted = model.fit()
464
+ arima_models[menu_id] = arima_fitted # Save the fitted model
465
+
466
+ # Predict using ARIMA
467
+ forecast = arima_fitted.forecast(steps=len(arima_test))
468
+ arima_predictions[menu_id] = forecast
469
+
470
+ # Combine predictions into a single DataFrame
471
+ arima_predictions_df = pd.DataFrame(arima_predictions, index=arima_test.index)
472
+ # Calculate metrics for ARIMA
473
+ arima_metrics = {
474
+ "Mean Squared Error (MSE)": mean_squared_error(arima_test.values.flatten(), arima_predictions_df.values.flatten()),
475
+ "Root Mean Squared Error (RMSE)": math.sqrt(mean_squared_error(arima_test.values.flatten(), arima_predictions_df.values.flatten())),
476
+ "Mean Absolute Error (MAE)": mean_absolute_error(arima_test.values.flatten(), arima_predictions_df.values.flatten()),
477
+ "R-squared Score (R²)": r2_score(arima_test.values.flatten(), arima_predictions_df.values.flatten())
478
+ }
479
+
480
+ # Generate Graph
481
+ plt.figure(figsize=(12, 6))
482
+ plt.plot(arima_test.values.flatten(), label="Actual", alpha=0.7)
483
+ plt.plot(arima_predictions_df.values.flatten(), label="Predicted", alpha=0.7)
484
+ plt.legend()
485
+ plt.title("Actual vs. Predicted (ARIMA)")
486
+ plt.xlabel("Index")
487
+ plt.ylabel("Quantity Sold")
488
+
489
+ # Save the plot to a BytesIO buffer
490
+ buf = io.BytesIO()
491
+ plt.savefig(buf, format="png")
492
+ buf.seek(0)
493
+ plt.close()
494
+
495
+ # Return response with metrics and graph
496
+ return StreamingResponse(
497
+ buf,
498
+ media_type="image/png",
499
+ headers={
500
+ "ARIMA_MSE": str(arima_metrics["Mean Squared Error (MSE)"]),
501
+ "ARIMA_RMSE": str(arima_metrics["Root Mean Squared Error (RMSE)"]),
502
+ "ARIMA_MAE": str(arima_metrics["Mean Absolute Error (MAE)"]),
503
+ "ARIMA_R2": str(arima_metrics["R-squared Score (R²)"]),
504
+ }
505
+ )
506
+ except Exception as e:
507
+ return JSONResponse(content={"error": str(e)}, status_code=400)
508
+
509
+ @app.post("/predict-sales")
510
+ async def predict_sales(files: List[UploadFile] = File(...)):
511
+ try:
512
+ # Read the uploaded CSV files into DataFrames
513
+ data_frames = {}
514
+ for file in files:
515
+ content = await file.read()
516
+ data_frames[file.filename] = pd.read_csv(io.StringIO(content.decode("utf-8")))
517
+
518
+ # Extract relevant DataFrames
519
+ sales_data = data_frames['restaurant_sales_linked.csv']
520
+ menu_data = data_frames['restaurant_menu_final.csv']
521
+ recipe_data = data_frames['restaurant_recipe_final.csv']
522
+ ingredients_data = data_frames['restaurant_ingredients_final.csv']
523
+
524
+ # Parse 'Date' and preprocess data
525
+ sales_data['Date'] = pd.to_datetime(sales_data['Date'])
526
+ sales_data['Week'] = sales_data['Date'].dt.isocalendar().week
527
+ weekly_sales = sales_data.groupby(['Week', 'Menu_ID']).agg({'Quantity Sold': 'sum', 'Revenue': 'sum'}).reset_index()
528
+
529
+ # Select features and target
530
+ features = weekly_sales[['Week', 'Menu_ID', 'Revenue']]
531
+ target = weekly_sales['Quantity Sold']
532
+
533
+ # Preprocessing
534
+ numerical_features = ['Week', 'Revenue']
535
+ categorical_features = ['Menu_ID']
536
+
537
+ column_transformer = make_column_transformer(
538
+ (StandardScaler(), numerical_features),
539
+ (OneHotEncoder(handle_unknown="ignore"), categorical_features),
540
+ remainder="passthrough",
541
+ )
542
+
543
+ # Transform features
544
+ X_transformed = column_transformer.fit_transform(features)
545
+
546
+ # Split data for model training
547
+ X_train = X_transformed[:-len(features['Menu_ID'].unique())] # Exclude last batch for prediction
548
+ y_train = target[:-len(features['Menu_ID'].unique())]
549
+ X_future = X_transformed[-len(features['Menu_ID'].unique()):] # Batch for all menu items
550
+
551
+ # Train Random Forest on historical data
552
+ rf_model = RandomForestRegressor(n_estimators=100, random_state=42)
553
+ rf_model.fit(X_train, y_train)
554
+
555
+ # Predict future sales with Random Forest
556
+ rf_pred = rf_model.predict(X_future)
557
+
558
+ # Calculate residuals for training XGBoost
559
+ rf_train_pred = rf_model.predict(X_train)
560
+ residuals = y_train - rf_train_pred
561
+
562
+ # Train XGBoost on residuals
563
+ xgb_model = XGBRegressor(
564
+ n_estimators=50,
565
+ learning_rate=0.1,
566
+ max_depth=5,
567
+ random_state=42,
568
+ tree_method="hist",
569
+ eval_metric="rmse",
570
+ )
571
+ xgb_model.fit(X_train, residuals)
572
+
573
+ # Predict residuals for all menu items with XGBoost
574
+ xgb_residual_pred = xgb_model.predict(X_future)
575
+
576
+ # Combine predictions from both models
577
+ combined_pred = rf_pred + xgb_residual_pred
578
+
579
+ # Predict for all Menu_IDs and sort by predicted quantities
580
+ predicted_sales = pd.DataFrame({
581
+ 'Menu_ID': features['Menu_ID'].unique(),
582
+ 'Predicted Quantity': combined_pred
583
+ }).sort_values(by='Predicted Quantity', ascending=False).head(8) # Top 8 dishes
584
+
585
+ # Merge with menu and recipe data for detailed information
586
+ predicted_sales_details = predicted_sales.merge(menu_data, on='Menu_ID', how='inner')
587
+ predicted_sales_details = predicted_sales_details.merge(recipe_data, on='Menu_ID', how='inner')
588
+ predicted_sales_details = predicted_sales_details.merge(ingredients_data, on='Ingredient_ID', how='inner')
589
+
590
+ # Calculate ingredient requirements for the future week
591
+ predicted_sales_details['Total Ingredient Quantity'] = (
592
+ predicted_sales_details['Quantity_Per_Unit'] * predicted_sales_details['Predicted Quantity']
593
+ )
594
+
595
+ # Select and organize the final output
596
+ final_result = predicted_sales_details[[
597
+ 'Menu_Item', 'Predicted Quantity', 'Ingredient_Name', 'Total Ingredient Quantity'
598
+ ]]
599
+
600
+ # Save the final result to a CSV file
601
+ buffer = io.StringIO()
602
+ final_result.to_csv(buffer, index=False)
603
+ buffer.seek(0)
604
+
605
+ # Create a StreamingResponse to return the CSV file
606
+ return StreamingResponse(
607
+ io.BytesIO(buffer.getvalue().encode("utf-8")),
608
+ media_type="text/csv",
609
+ headers={"Content-Disposition": "attachment; filename=predicted_sales_ingredients.csv"}
610
+ )
611
+
612
+ except Exception as e:
613
+ return JSONResponse(content={"error": str(e)}, status_code=400)