File size: 23,928 Bytes
3c7d03a
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
from fastapi import FastAPI, UploadFile, File
from fastapi.responses import StreamingResponse, JSONResponse
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, OneHotEncoder
from sklearn.compose import make_column_transformer
from sklearn.compose import ColumnTransformer
from xgboost import XGBRegressor
from sklearn.ensemble import RandomForestRegressor
from statsmodels.tsa.arima.model import ARIMA
from sklearn.pipeline import Pipeline
from statsmodels.tsa.statespace.sarimax import SARIMAX
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score
import math
import io
import matplotlib.pyplot as plt
from typing import List

app = FastAPI()

# XGBoost-Only Endpoint
@app.post("/train-xgboost")
async def train_xgboost(files: List[UploadFile] = File(...)):
    try:
        # Read the uploaded CSV files into DataFrames
        data_frames = {}
        for file in files:
            content = await file.read()
            data_frames[file.filename] = pd.read_csv(io.StringIO(content.decode("utf-8")))

        # Extract relevant DataFrames
        sales_data = data_frames['restaurant_sales_linked.csv']
        menu_data = data_frames['restaurant_menu_final.csv']

        # Parse 'Date' in sales data
        sales_data['Date'] = pd.to_datetime(sales_data['Date'])

        # Aggregate weekly sales data for each menu item
        sales_data['Week'] = sales_data['Date'].dt.isocalendar().week
        weekly_sales = sales_data.groupby(['Week', 'Menu_ID']).agg({'Quantity Sold': 'sum', 'Revenue': 'sum'}).reset_index()

        # Merge menu data for menu item details
        merged_data = pd.merge(weekly_sales, menu_data, on='Menu_ID', how='left')

        # Feature preparation
        features = merged_data[['Week', 'Menu_ID', 'Price', 'Revenue']]
        target = merged_data['Quantity Sold']

        # Split data into training and testing sets
        X_train, X_test, y_train, y_test = train_test_split(features, target, test_size=0.2, random_state=42)

        # Preprocessing: Scaling numerical features and encoding categorical features
        numerical_features = ['Week', 'Price', 'Revenue']
        categorical_features = ['Menu_ID']

        column_transformer = make_column_transformer(
            (StandardScaler(), numerical_features),
            (OneHotEncoder(handle_unknown="ignore"), categorical_features),
            remainder="drop",
        )

        # Transform the training and test datasets
        X_train_transformed = column_transformer.fit_transform(X_train)
        X_test_transformed = column_transformer.transform(X_test)

        # XGBoost model
        xgb_model = XGBRegressor(
            n_estimators=25,
            learning_rate=0.1,
            max_depth=5,
            random_state=42,
            tree_method="hist",
            eval_metric="rmse",
        )

        # Train the XGBoost model
        xgb_model.fit(
            X_train_transformed,
            y_train,
            eval_set=[(X_test_transformed, y_test)],
            verbose=False,
        )

        # Predictions and evaluation
        xgb_y_pred = xgb_model.predict(X_test_transformed)

        # Evaluation Metrics
        xgb_mse = mean_squared_error(y_test, xgb_y_pred)
        xgb_rmse = math.sqrt(xgb_mse)
        xgb_mae = mean_absolute_error(y_test, xgb_y_pred)
        xgb_r2 = r2_score(y_test, xgb_y_pred)

        # Generate Graph
        plt.figure(figsize=(10, 6))
        plt.plot(y_test.values, label="Actual", alpha=0.7)
        plt.plot(xgb_y_pred, label="Predicted", alpha=0.7)
        plt.legend()
        plt.title("Actual vs. Predicted (XGBoost)")
        plt.xlabel("Index")
        plt.ylabel("Quantity Sold")

        # Save the plot to a BytesIO buffer
        buf = io.BytesIO()
        plt.savefig(buf, format="png")
        buf.seek(0)
        plt.close()

        # Return response with metrics and graph
        headers = {
            "XGBoost_MSE": str(xgb_mse),
            "XGBoost_RMSE": str(xgb_rmse),
            "XGBoost_MAE": str(xgb_mae),
            "XGBoost_R2": str(xgb_r2),
        }

        return StreamingResponse(buf, media_type="image/png", headers=headers)

    except Exception as e:
        return JSONResponse(content={"error": str(e)}, status_code=400)

@app.post("/train-sarimax-xgboost")
async def train_sarimax_xgboost(files: List[UploadFile] = File(...)):
    try:
        # Read the uploaded CSV files into DataFrames
        data_frames = {}
        for file in files:
            content = await file.read()
            data_frames[file.filename] = pd.read_csv(io.StringIO(content.decode("utf-8")))

        # Extract relevant DataFrames
        sales_data = data_frames['restaurant_sales_linked.csv']

        # Parse 'Date' in sales data
        sales_data['Date'] = pd.to_datetime(sales_data['Date'])
        sales_data['Week'] = sales_data['Date'].dt.to_period('W').astype(str)
        sales_data['Week'] = sales_data['Week'].str.split('/').str[0]
        sales_data['Week'] = pd.to_datetime(sales_data['Week'])

        # Select a single menu item for demonstration
        menu_id = 1
        menu_sales = sales_data[sales_data['Menu_ID'] == menu_id].set_index('Week')

        # Debug: Check the length of menu_sales
        if menu_sales.empty:
            raise ValueError(f"No data available for Menu_ID {menu_id}")

        # Train-test split for SARIMAX
        train_size = int(len(menu_sales) * 0.8)
        train_data, test_data = menu_sales[:train_size], menu_sales[train_size:]

        # SARIMAX Model
        sarimax_model = SARIMAX(train_data['Quantity Sold'], order=(1, 1, 1), seasonal_order=(1, 1, 1, 12))
        sarimax_result = sarimax_model.fit(disp=False)

        # Predictions with SARIMAX
        sarimax_pred = sarimax_result.get_forecast(steps=len(test_data)).predicted_mean

        # Debug: Ensure lengths match
        if len(sarimax_pred) != len(test_data):
            raise ValueError(f"Length mismatch: SARIMAX predictions ({len(sarimax_pred)}) vs Test data ({len(test_data)})")

        # Calculate residuals
        residuals = test_data['Quantity Sold'] - sarimax_pred

        # Debug: Check residuals
        if len(residuals) != len(test_data):
            raise ValueError("Residuals length mismatch with test data")

        # Prepare data for XGBoost
        xgboost_features = test_data[['Revenue']].iloc[:len(sarimax_pred)]
        xgboost_target = residuals.reset_index(drop=True)

        # Debug: Ensure feature and target lengths match
        if len(xgboost_features) != len(xgboost_target):
            raise ValueError("XGBoost features and target lengths do not match")

        # Preprocessing for XGBoost
        scaler = StandardScaler()
        X_transformed = scaler.fit_transform(xgboost_features)

        # XGBoost Model
        xgb_model = XGBRegressor(
            n_estimators=25,
            learning_rate=0.1,
            max_depth=5,
            random_state=42,
            tree_method="hist",
            eval_metric="rmse",
        )
        xgb_model.fit(X_transformed, xgboost_target)

        # Combine SARIMAX and XGBoost Predictions
        xgb_residual_pred = xgb_model.predict(X_transformed)
        combined_pred = sarimax_pred.values + xgb_residual_pred

        # Evaluation Metrics
        combined_mse = mean_squared_error(test_data['Quantity Sold'], combined_pred)
        combined_rmse = math.sqrt(combined_mse)
        combined_mae = mean_absolute_error(test_data['Quantity Sold'], combined_pred)
        combined_r2 = r2_score(test_data['Quantity Sold'], combined_pred)

        # Generate Graph
        plt.figure(figsize=(10, 6))
        plt.plot(test_data['Quantity Sold'], label="Actual", alpha=0.7)
        plt.plot(combined_pred, label="SARIMAX + XGBoost Predicted", alpha=0.7)
        plt.legend()
        plt.title("Actual vs. Predicted (SARIMAX + XGBoost)")
        plt.xlabel("Index")
        plt.ylabel("Quantity Sold")

        # Save the plot to a BytesIO buffer
        buf = io.BytesIO()
        plt.savefig(buf, format="png")
        buf.seek(0)
        plt.close()

        # Return combined response
        headers = {
            "SARIMAX_XGBoost_MSE": str(combined_mse),
            "SARIMAX_XGBoost_RMSE": str(combined_rmse),
            "SARIMAX_XGBoost_MAE": str(combined_mae),
            "SARIMAX_XGBoost_R2": str(combined_r2),
        }

        return StreamingResponse(buf, media_type="image/png", headers=headers)

    except Exception as e:
        return JSONResponse(content={"error": str(e)}, status_code=400)

@app.post("/train-randomforest-xgboost")
async def train_randomforest_xgboost(files: List[UploadFile] = File(...)):
    try:
        # Read the uploaded CSV files into DataFrames
        data_frames = {}
        for file in files:
            content = await file.read()
            data_frames[file.filename] = pd.read_csv(io.StringIO(content.decode("utf-8")))

        # Extract relevant DataFrames
        sales_data = data_frames['restaurant_sales_linked.csv']

        # Parse 'Date' in sales data
        sales_data['Date'] = pd.to_datetime(sales_data['Date'])

        # Aggregate weekly sales data for each menu item
        sales_data['Week'] = sales_data['Date'].dt.isocalendar().week
        weekly_sales = sales_data.groupby(['Week', 'Menu_ID']).agg({'Quantity Sold': 'sum', 'Revenue': 'sum'}).reset_index()

        # Select features and target
        features = weekly_sales[['Week', 'Menu_ID', 'Revenue']]
        target = weekly_sales['Quantity Sold']

        # Train-test split
        X_train, X_test, y_train, y_test = train_test_split(features, target, test_size=0.2, random_state=42)

        # Preprocessing
        numerical_features = ['Week', 'Revenue']
        categorical_features = ['Menu_ID']

        column_transformer = make_column_transformer(
            (StandardScaler(), numerical_features),
            (OneHotEncoder(handle_unknown="ignore"), categorical_features),
            remainder="passthrough",
        )

        # Transform features
        X_train_transformed = column_transformer.fit_transform(X_train)
        X_test_transformed = column_transformer.transform(X_test)

        # Random Forest Regressor
        rf_model = RandomForestRegressor(n_estimators=100, random_state=42)
        rf_model.fit(X_train_transformed, y_train)

        # Random Forest Predictions
        rf_pred = rf_model.predict(X_test_transformed)

        # Calculate Residuals
        residuals = y_test - rf_pred

        # XGBoost Model for Residuals
        xgb_model = XGBRegressor(
            n_estimators=50,
            learning_rate=0.1,
            max_depth=5,
            random_state=42,
            tree_method="hist",
            eval_metric="rmse",
        )

        # Train XGBoost on Residuals
        xgb_model.fit(X_test_transformed, residuals)

        # XGBoost Predictions for Residuals
        xgb_residual_pred = xgb_model.predict(X_test_transformed)

        # Combine Predictions
        combined_pred = rf_pred + xgb_residual_pred

        # Evaluation Metrics
        mse = mean_squared_error(y_test, combined_pred)
        rmse = math.sqrt(mse)
        mae = mean_absolute_error(y_test, combined_pred)
        r2 = r2_score(y_test, combined_pred)

        # Generate Graph
        plt.figure(figsize=(10, 6))
        plt.plot(y_test.values, label="Actual", alpha=0.7)
        plt.plot(combined_pred, label="Random Forest + XGBoost Predicted", alpha=0.7)
        plt.legend()
        plt.title("Actual vs. Predicted (Random Forest + XGBoost)")
        plt.xlabel("Index")
        plt.ylabel("Quantity Sold")

        # Save the plot to a BytesIO buffer
        buf = io.BytesIO()
        plt.savefig(buf, format="png")
        buf.seek(0)
        plt.close()

        # Return response with metrics and graph
        headers = {
            "RF_XGBoost_MSE": str(mse),
            "RF_XGBoost_RMSE": str(rmse),
            "RF_XGBoost_MAE": str(mae),
            "RF_XGBoost_R2": str(r2),
        }

        return StreamingResponse(buf, media_type="image/png", headers=headers)

    except Exception as e:
        return JSONResponse(content={"error": str(e)}, status_code=400)
    
@app.post("/train-randomforest")
async def train_randomforest(files: List[UploadFile] = File(...)):
    try:
        # Read the uploaded CSV files into DataFrames
        data_frames = {}
        for file in files:
            content = await file.read()
            data_frames[file.filename] = pd.read_csv(io.StringIO(content.decode("utf-8")))

        # Extract relevant DataFrames
        sales_data = data_frames['restaurant_sales_linked.csv']
        menu_data = data_frames['restaurant_menu_final.csv']
        recipe_data = data_frames['restaurant_recipe_final.csv']
        inventory_data = data_frames['restaurant_inventory_linked.csv']

        # Preprocessing
        sales_data['Date'] = pd.to_datetime(sales_data['Date'])
        inventory_data['Date'] = pd.to_datetime(inventory_data['Date'])

        # Aggregate weekly sales data for each menu item
        sales_data['Week'] = sales_data['Date'].dt.isocalendar().week
        weekly_sales = sales_data.groupby(['Week', 'Menu_ID']).agg({'Quantity Sold': 'sum', 'Revenue': 'sum'}).reset_index()

        # Merge menu data for menu item details
        merged_data = pd.merge(weekly_sales, menu_data, on='Menu_ID', how='left')

        # Calculate ingredient quantities needed for weekly sales
        ingredient_requirements = pd.merge(merged_data, recipe_data, on='Menu_ID', how='left')
        ingredient_requirements['Total_Ingredient_Quantity'] = (
            ingredient_requirements['Quantity Sold'] * ingredient_requirements['Quantity_Per_Unit']
        )

        # Aggregate ingredient requirements
        ingredient_needs = ingredient_requirements.groupby(['Week', 'Ingredient_ID']).agg(
            {'Total_Ingredient_Quantity': 'sum'}
        ).reset_index()

        # Feature preparation
        merged_data = pd.merge(merged_data, ingredient_needs, on='Week', how='left', suffixes=('', '_Ingredient'))

        # Select features and target
        features = merged_data[['Week', 'Menu_ID', 'Price', 'Revenue']]
        target = merged_data['Quantity Sold']

        # Split data into training and testing sets
        X_train, X_test, y_train, y_test = train_test_split(features, target, test_size=0.2, random_state=42)

        # Preprocessing: Scaling numerical features and encoding categorical features
        numerical_features = ['Week', 'Price', 'Revenue']
        categorical_features = ['Menu_ID']

        preprocessor = ColumnTransformer(
            transformers=[
                ('num', StandardScaler(), numerical_features),
                ('cat', OneHotEncoder(handle_unknown='ignore'), categorical_features)
            ]
        )

        # Random Forest Regressor pipeline
        pipeline = Pipeline(steps=[
            ('preprocessor', preprocessor),
            ('model', RandomForestRegressor(n_estimators=100, random_state=42))
        ])

        # Train the model
        pipeline.fit(X_train, y_train)

        # Predictions
        y_pred = pipeline.predict(X_test)

        # Evaluation Metrics
        mse = mean_squared_error(y_test, y_pred)
        rmse = math.sqrt(mse)
        mae = mean_absolute_error(y_test, y_pred)
        r2 = r2_score(y_test, y_pred)

        plt.figure(figsize=(10, 6))
        plt.plot(y_test.values, label="Actual", alpha=0.7)
        plt.plot(y_pred, label="Random Forest", alpha=0.7)
        plt.legend()
        plt.title("Actual vs. Predicted (Random Forest)")
        plt.xlabel("Index")
        plt.ylabel("Quantity Sold")

        # Save the plot to a BytesIO buffer
        buf = io.BytesIO()
        plt.savefig(buf, format="png")
        buf.seek(0)
        plt.close()

        # Return response with metrics and graph
        headers = {
            "RF_XGBoost_MSE": str(mse),
            "RF_XGBoost_RMSE": str(rmse),
            "RF_XGBoost_MAE": str(mae),
            "RF_XGBoost_R2": str(r2),
        }

        return StreamingResponse(buf, media_type="image/png", headers=headers)

    except Exception as e:
        return JSONResponse(content={"error": str(e)}, status_code=400)
    

@app.post("/train-arima")
async def train_arima(file: UploadFile = File(...)):
    try:
        # Load the uploaded CSV file into a DataFrame
        content = await file.read()
        sales_data = pd.read_csv(io.StringIO(content.decode("utf-8")))

        # Prepare data for ARIMA: Aggregate total quantity sold for each menu item weekly
        arima_data = sales_data.groupby(['Date', 'Menu_ID'])['Quantity Sold'].sum().unstack(fill_value=0)

        # Ensure the index is datetime
        arima_data.index = pd.to_datetime(arima_data.index)

        # Split ARIMA data into training and testing sets (80-20 split)
        arima_train = arima_data.iloc[:int(len(arima_data) * 0.8), :]
        arima_test = arima_data.iloc[int(len(arima_data) * 0.8):, :]

        # Store ARIMA models and predictions
        arima_models = {}
        arima_predictions = {}

        # Fit ARIMA for each menu item
        for menu_id in arima_data.columns:
            # Train ARIMA model
            model = ARIMA(arima_train[menu_id], order=(5, 1, 0))
            arima_fitted = model.fit()
            arima_models[menu_id] = arima_fitted  # Save the fitted model

            # Predict using ARIMA
            forecast = arima_fitted.forecast(steps=len(arima_test))
            arima_predictions[menu_id] = forecast

        # Combine predictions into a single DataFrame
        arima_predictions_df = pd.DataFrame(arima_predictions, index=arima_test.index)
        # Calculate metrics for ARIMA
        arima_metrics = {
            "Mean Squared Error (MSE)": mean_squared_error(arima_test.values.flatten(), arima_predictions_df.values.flatten()),
            "Root Mean Squared Error (RMSE)": math.sqrt(mean_squared_error(arima_test.values.flatten(), arima_predictions_df.values.flatten())),
            "Mean Absolute Error (MAE)": mean_absolute_error(arima_test.values.flatten(), arima_predictions_df.values.flatten()),
            "R-squared Score (R²)": r2_score(arima_test.values.flatten(), arima_predictions_df.values.flatten())
        }

        # Generate Graph
        plt.figure(figsize=(12, 6))
        plt.plot(arima_test.values.flatten(), label="Actual", alpha=0.7)
        plt.plot(arima_predictions_df.values.flatten(), label="Predicted", alpha=0.7)
        plt.legend()
        plt.title("Actual vs. Predicted (ARIMA)")
        plt.xlabel("Index")
        plt.ylabel("Quantity Sold")

        # Save the plot to a BytesIO buffer
        buf = io.BytesIO()
        plt.savefig(buf, format="png")
        buf.seek(0)
        plt.close()

        # Return response with metrics and graph
        return StreamingResponse(
            buf, 
            media_type="image/png", 
            headers={
                "ARIMA_MSE": str(arima_metrics["Mean Squared Error (MSE)"]),
                "ARIMA_RMSE": str(arima_metrics["Root Mean Squared Error (RMSE)"]),
                "ARIMA_MAE": str(arima_metrics["Mean Absolute Error (MAE)"]),
                "ARIMA_R2": str(arima_metrics["R-squared Score (R²)"]),
            }
        )
    except Exception as e:
        return JSONResponse(content={"error": str(e)}, status_code=400)
    
@app.post("/predict-sales")
async def predict_sales(files: List[UploadFile] = File(...)):
    try:
        # Read the uploaded CSV files into DataFrames
        data_frames = {}
        for file in files:
            content = await file.read()
            data_frames[file.filename] = pd.read_csv(io.StringIO(content.decode("utf-8")))

        # Extract relevant DataFrames
        sales_data = data_frames['restaurant_sales_linked.csv']
        menu_data = data_frames['restaurant_menu_final.csv']
        recipe_data = data_frames['restaurant_recipe_final.csv']
        ingredients_data = data_frames['restaurant_ingredients_final.csv']

        # Parse 'Date' and preprocess data
        sales_data['Date'] = pd.to_datetime(sales_data['Date'])
        sales_data['Week'] = sales_data['Date'].dt.isocalendar().week
        weekly_sales = sales_data.groupby(['Week', 'Menu_ID']).agg({'Quantity Sold': 'sum', 'Revenue': 'sum'}).reset_index()

        # Select features and target
        features = weekly_sales[['Week', 'Menu_ID', 'Revenue']]
        target = weekly_sales['Quantity Sold']

        # Preprocessing
        numerical_features = ['Week', 'Revenue']
        categorical_features = ['Menu_ID']

        column_transformer = make_column_transformer(
            (StandardScaler(), numerical_features),
            (OneHotEncoder(handle_unknown="ignore"), categorical_features),
            remainder="passthrough",
        )

        # Transform features
        X_transformed = column_transformer.fit_transform(features)

        # Split data for model training
        X_train = X_transformed[:-len(features['Menu_ID'].unique())]  # Exclude last batch for prediction
        y_train = target[:-len(features['Menu_ID'].unique())]
        X_future = X_transformed[-len(features['Menu_ID'].unique()):]  # Batch for all menu items

        # Train Random Forest on historical data
        rf_model = RandomForestRegressor(n_estimators=100, random_state=42)
        rf_model.fit(X_train, y_train)

        # Predict future sales with Random Forest
        rf_pred = rf_model.predict(X_future)

        # Calculate residuals for training XGBoost
        rf_train_pred = rf_model.predict(X_train)
        residuals = y_train - rf_train_pred

        # Train XGBoost on residuals
        xgb_model = XGBRegressor(
            n_estimators=50,
            learning_rate=0.1,
            max_depth=5,
            random_state=42,
            tree_method="hist",
            eval_metric="rmse",
        )
        xgb_model.fit(X_train, residuals)

        # Predict residuals for all menu items with XGBoost
        xgb_residual_pred = xgb_model.predict(X_future)

        # Combine predictions from both models
        combined_pred = rf_pred + xgb_residual_pred

        # Predict for all Menu_IDs and sort by predicted quantities
        predicted_sales = pd.DataFrame({
            'Menu_ID': features['Menu_ID'].unique(),
            'Predicted Quantity': combined_pred
        }).sort_values(by='Predicted Quantity', ascending=False).head(8)  # Top 8 dishes

        # Merge with menu and recipe data for detailed information
        predicted_sales_details = predicted_sales.merge(menu_data, on='Menu_ID', how='inner')
        predicted_sales_details = predicted_sales_details.merge(recipe_data, on='Menu_ID', how='inner')
        predicted_sales_details = predicted_sales_details.merge(ingredients_data, on='Ingredient_ID', how='inner')

        # Calculate ingredient requirements for the future week
        predicted_sales_details['Total Ingredient Quantity'] = (
            predicted_sales_details['Quantity_Per_Unit'] * predicted_sales_details['Predicted Quantity']
        )

        # Select and organize the final output
        final_result = predicted_sales_details[[
            'Menu_Item', 'Predicted Quantity', 'Ingredient_Name', 'Total Ingredient Quantity'
        ]]

        # Save the final result to a CSV file
        buffer = io.StringIO()
        final_result.to_csv(buffer, index=False)
        buffer.seek(0)

        # Create a StreamingResponse to return the CSV file
        return StreamingResponse(
            io.BytesIO(buffer.getvalue().encode("utf-8")),
            media_type="text/csv",
            headers={"Content-Disposition": "attachment; filename=predicted_sales_ingredients.csv"}
        )

    except Exception as e:
        return JSONResponse(content={"error": str(e)}, status_code=400)