from fastapi import FastAPI, UploadFile, File from fastapi.responses import StreamingResponse, JSONResponse import pandas as pd import numpy as np from sklearn.model_selection import train_test_split from sklearn.preprocessing import StandardScaler, OneHotEncoder from sklearn.compose import make_column_transformer from sklearn.compose import ColumnTransformer from xgboost import XGBRegressor from sklearn.ensemble import RandomForestRegressor from statsmodels.tsa.arima.model import ARIMA from sklearn.pipeline import Pipeline from statsmodels.tsa.statespace.sarimax import SARIMAX from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score import math import io import matplotlib.pyplot as plt from typing import List app = FastAPI() # XGBoost-Only Endpoint @app.post("/train-xgboost") async def train_xgboost(files: List[UploadFile] = File(...)): try: # Read the uploaded CSV files into DataFrames data_frames = {} for file in files: content = await file.read() data_frames[file.filename] = pd.read_csv(io.StringIO(content.decode("utf-8"))) # Extract relevant DataFrames sales_data = data_frames['restaurant_sales_linked.csv'] menu_data = data_frames['restaurant_menu_final.csv'] # Parse 'Date' in sales data sales_data['Date'] = pd.to_datetime(sales_data['Date']) # Aggregate weekly sales data for each menu item sales_data['Week'] = sales_data['Date'].dt.isocalendar().week weekly_sales = sales_data.groupby(['Week', 'Menu_ID']).agg({'Quantity Sold': 'sum', 'Revenue': 'sum'}).reset_index() # Merge menu data for menu item details merged_data = pd.merge(weekly_sales, menu_data, on='Menu_ID', how='left') # Feature preparation features = merged_data[['Week', 'Menu_ID', 'Price', 'Revenue']] target = merged_data['Quantity Sold'] # Split data into training and testing sets X_train, X_test, y_train, y_test = train_test_split(features, target, test_size=0.2, random_state=42) # Preprocessing: Scaling numerical features and encoding categorical features numerical_features = ['Week', 'Price', 'Revenue'] categorical_features = ['Menu_ID'] column_transformer = make_column_transformer( (StandardScaler(), numerical_features), (OneHotEncoder(handle_unknown="ignore"), categorical_features), remainder="drop", ) # Transform the training and test datasets X_train_transformed = column_transformer.fit_transform(X_train) X_test_transformed = column_transformer.transform(X_test) # XGBoost model xgb_model = XGBRegressor( n_estimators=25, learning_rate=0.1, max_depth=5, random_state=42, tree_method="hist", eval_metric="rmse", ) # Train the XGBoost model xgb_model.fit( X_train_transformed, y_train, eval_set=[(X_test_transformed, y_test)], verbose=False, ) # Predictions and evaluation xgb_y_pred = xgb_model.predict(X_test_transformed) # Evaluation Metrics xgb_mse = mean_squared_error(y_test, xgb_y_pred) xgb_rmse = math.sqrt(xgb_mse) xgb_mae = mean_absolute_error(y_test, xgb_y_pred) xgb_r2 = r2_score(y_test, xgb_y_pred) # Generate Graph plt.figure(figsize=(10, 6)) plt.plot(y_test.values, label="Actual", alpha=0.7) plt.plot(xgb_y_pred, label="Predicted", alpha=0.7) plt.legend() plt.title("Actual vs. Predicted (XGBoost)") plt.xlabel("Index") plt.ylabel("Quantity Sold") # Save the plot to a BytesIO buffer buf = io.BytesIO() plt.savefig(buf, format="png") buf.seek(0) plt.close() # Return response with metrics and graph headers = { "XGBoost_MSE": str(xgb_mse), "XGBoost_RMSE": str(xgb_rmse), "XGBoost_MAE": str(xgb_mae), "XGBoost_R2": str(xgb_r2), } return StreamingResponse(buf, media_type="image/png", headers=headers) except Exception as e: return JSONResponse(content={"error": str(e)}, status_code=400) @app.post("/train-sarimax-xgboost") async def train_sarimax_xgboost(files: List[UploadFile] = File(...)): try: # Read the uploaded CSV files into DataFrames data_frames = {} for file in files: content = await file.read() data_frames[file.filename] = pd.read_csv(io.StringIO(content.decode("utf-8"))) # Extract relevant DataFrames sales_data = data_frames['restaurant_sales_linked.csv'] # Parse 'Date' in sales data sales_data['Date'] = pd.to_datetime(sales_data['Date']) sales_data['Week'] = sales_data['Date'].dt.to_period('W').astype(str) sales_data['Week'] = sales_data['Week'].str.split('/').str[0] sales_data['Week'] = pd.to_datetime(sales_data['Week']) # Select a single menu item for demonstration menu_id = 1 menu_sales = sales_data[sales_data['Menu_ID'] == menu_id].set_index('Week') # Debug: Check the length of menu_sales if menu_sales.empty: raise ValueError(f"No data available for Menu_ID {menu_id}") # Train-test split for SARIMAX train_size = int(len(menu_sales) * 0.8) train_data, test_data = menu_sales[:train_size], menu_sales[train_size:] # SARIMAX Model sarimax_model = SARIMAX(train_data['Quantity Sold'], order=(1, 1, 1), seasonal_order=(1, 1, 1, 12)) sarimax_result = sarimax_model.fit(disp=False) # Predictions with SARIMAX sarimax_pred = sarimax_result.get_forecast(steps=len(test_data)).predicted_mean # Debug: Ensure lengths match if len(sarimax_pred) != len(test_data): raise ValueError(f"Length mismatch: SARIMAX predictions ({len(sarimax_pred)}) vs Test data ({len(test_data)})") # Calculate residuals residuals = test_data['Quantity Sold'] - sarimax_pred # Debug: Check residuals if len(residuals) != len(test_data): raise ValueError("Residuals length mismatch with test data") # Prepare data for XGBoost xgboost_features = test_data[['Revenue']].iloc[:len(sarimax_pred)] xgboost_target = residuals.reset_index(drop=True) # Debug: Ensure feature and target lengths match if len(xgboost_features) != len(xgboost_target): raise ValueError("XGBoost features and target lengths do not match") # Preprocessing for XGBoost scaler = StandardScaler() X_transformed = scaler.fit_transform(xgboost_features) # XGBoost Model xgb_model = XGBRegressor( n_estimators=25, learning_rate=0.1, max_depth=5, random_state=42, tree_method="hist", eval_metric="rmse", ) xgb_model.fit(X_transformed, xgboost_target) # Combine SARIMAX and XGBoost Predictions xgb_residual_pred = xgb_model.predict(X_transformed) combined_pred = sarimax_pred.values + xgb_residual_pred # Evaluation Metrics combined_mse = mean_squared_error(test_data['Quantity Sold'], combined_pred) combined_rmse = math.sqrt(combined_mse) combined_mae = mean_absolute_error(test_data['Quantity Sold'], combined_pred) combined_r2 = r2_score(test_data['Quantity Sold'], combined_pred) # Generate Graph plt.figure(figsize=(10, 6)) plt.plot(test_data['Quantity Sold'], label="Actual", alpha=0.7) plt.plot(combined_pred, label="SARIMAX + XGBoost Predicted", alpha=0.7) plt.legend() plt.title("Actual vs. Predicted (SARIMAX + XGBoost)") plt.xlabel("Index") plt.ylabel("Quantity Sold") # Save the plot to a BytesIO buffer buf = io.BytesIO() plt.savefig(buf, format="png") buf.seek(0) plt.close() # Return combined response headers = { "SARIMAX_XGBoost_MSE": str(combined_mse), "SARIMAX_XGBoost_RMSE": str(combined_rmse), "SARIMAX_XGBoost_MAE": str(combined_mae), "SARIMAX_XGBoost_R2": str(combined_r2), } return StreamingResponse(buf, media_type="image/png", headers=headers) except Exception as e: return JSONResponse(content={"error": str(e)}, status_code=400) @app.post("/train-randomforest-xgboost") async def train_randomforest_xgboost(files: List[UploadFile] = File(...)): try: # Read the uploaded CSV files into DataFrames data_frames = {} for file in files: content = await file.read() data_frames[file.filename] = pd.read_csv(io.StringIO(content.decode("utf-8"))) # Extract relevant DataFrames sales_data = data_frames['restaurant_sales_linked.csv'] # Parse 'Date' in sales data sales_data['Date'] = pd.to_datetime(sales_data['Date']) # Aggregate weekly sales data for each menu item sales_data['Week'] = sales_data['Date'].dt.isocalendar().week weekly_sales = sales_data.groupby(['Week', 'Menu_ID']).agg({'Quantity Sold': 'sum', 'Revenue': 'sum'}).reset_index() # Select features and target features = weekly_sales[['Week', 'Menu_ID', 'Revenue']] target = weekly_sales['Quantity Sold'] # Train-test split X_train, X_test, y_train, y_test = train_test_split(features, target, test_size=0.2, random_state=42) # Preprocessing numerical_features = ['Week', 'Revenue'] categorical_features = ['Menu_ID'] column_transformer = make_column_transformer( (StandardScaler(), numerical_features), (OneHotEncoder(handle_unknown="ignore"), categorical_features), remainder="passthrough", ) # Transform features X_train_transformed = column_transformer.fit_transform(X_train) X_test_transformed = column_transformer.transform(X_test) # Random Forest Regressor rf_model = RandomForestRegressor(n_estimators=100, random_state=42) rf_model.fit(X_train_transformed, y_train) # Random Forest Predictions rf_pred = rf_model.predict(X_test_transformed) # Calculate Residuals residuals = y_test - rf_pred # XGBoost Model for Residuals xgb_model = XGBRegressor( n_estimators=50, learning_rate=0.1, max_depth=5, random_state=42, tree_method="hist", eval_metric="rmse", ) # Train XGBoost on Residuals xgb_model.fit(X_test_transformed, residuals) # XGBoost Predictions for Residuals xgb_residual_pred = xgb_model.predict(X_test_transformed) # Combine Predictions combined_pred = rf_pred + xgb_residual_pred # Evaluation Metrics mse = mean_squared_error(y_test, combined_pred) rmse = math.sqrt(mse) mae = mean_absolute_error(y_test, combined_pred) r2 = r2_score(y_test, combined_pred) # Generate Graph plt.figure(figsize=(10, 6)) plt.plot(y_test.values, label="Actual", alpha=0.7) plt.plot(combined_pred, label="Random Forest + XGBoost Predicted", alpha=0.7) plt.legend() plt.title("Actual vs. Predicted (Random Forest + XGBoost)") plt.xlabel("Index") plt.ylabel("Quantity Sold") # Save the plot to a BytesIO buffer buf = io.BytesIO() plt.savefig(buf, format="png") buf.seek(0) plt.close() # Return response with metrics and graph headers = { "RF_XGBoost_MSE": str(mse), "RF_XGBoost_RMSE": str(rmse), "RF_XGBoost_MAE": str(mae), "RF_XGBoost_R2": str(r2), } return StreamingResponse(buf, media_type="image/png", headers=headers) except Exception as e: return JSONResponse(content={"error": str(e)}, status_code=400) @app.post("/train-randomforest") async def train_randomforest(files: List[UploadFile] = File(...)): try: # Read the uploaded CSV files into DataFrames data_frames = {} for file in files: content = await file.read() data_frames[file.filename] = pd.read_csv(io.StringIO(content.decode("utf-8"))) # Extract relevant DataFrames sales_data = data_frames['restaurant_sales_linked.csv'] menu_data = data_frames['restaurant_menu_final.csv'] recipe_data = data_frames['restaurant_recipe_final.csv'] inventory_data = data_frames['restaurant_inventory_linked.csv'] # Preprocessing sales_data['Date'] = pd.to_datetime(sales_data['Date']) inventory_data['Date'] = pd.to_datetime(inventory_data['Date']) # Aggregate weekly sales data for each menu item sales_data['Week'] = sales_data['Date'].dt.isocalendar().week weekly_sales = sales_data.groupby(['Week', 'Menu_ID']).agg({'Quantity Sold': 'sum', 'Revenue': 'sum'}).reset_index() # Merge menu data for menu item details merged_data = pd.merge(weekly_sales, menu_data, on='Menu_ID', how='left') # Calculate ingredient quantities needed for weekly sales ingredient_requirements = pd.merge(merged_data, recipe_data, on='Menu_ID', how='left') ingredient_requirements['Total_Ingredient_Quantity'] = ( ingredient_requirements['Quantity Sold'] * ingredient_requirements['Quantity_Per_Unit'] ) # Aggregate ingredient requirements ingredient_needs = ingredient_requirements.groupby(['Week', 'Ingredient_ID']).agg( {'Total_Ingredient_Quantity': 'sum'} ).reset_index() # Feature preparation merged_data = pd.merge(merged_data, ingredient_needs, on='Week', how='left', suffixes=('', '_Ingredient')) # Select features and target features = merged_data[['Week', 'Menu_ID', 'Price', 'Revenue']] target = merged_data['Quantity Sold'] # Split data into training and testing sets X_train, X_test, y_train, y_test = train_test_split(features, target, test_size=0.2, random_state=42) # Preprocessing: Scaling numerical features and encoding categorical features numerical_features = ['Week', 'Price', 'Revenue'] categorical_features = ['Menu_ID'] preprocessor = ColumnTransformer( transformers=[ ('num', StandardScaler(), numerical_features), ('cat', OneHotEncoder(handle_unknown='ignore'), categorical_features) ] ) # Random Forest Regressor pipeline pipeline = Pipeline(steps=[ ('preprocessor', preprocessor), ('model', RandomForestRegressor(n_estimators=100, random_state=42)) ]) # Train the model pipeline.fit(X_train, y_train) # Predictions y_pred = pipeline.predict(X_test) # Evaluation Metrics mse = mean_squared_error(y_test, y_pred) rmse = math.sqrt(mse) mae = mean_absolute_error(y_test, y_pred) r2 = r2_score(y_test, y_pred) plt.figure(figsize=(10, 6)) plt.plot(y_test.values, label="Actual", alpha=0.7) plt.plot(y_pred, label="Random Forest", alpha=0.7) plt.legend() plt.title("Actual vs. Predicted (Random Forest)") plt.xlabel("Index") plt.ylabel("Quantity Sold") # Save the plot to a BytesIO buffer buf = io.BytesIO() plt.savefig(buf, format="png") buf.seek(0) plt.close() # Return response with metrics and graph headers = { "RF_XGBoost_MSE": str(mse), "RF_XGBoost_RMSE": str(rmse), "RF_XGBoost_MAE": str(mae), "RF_XGBoost_R2": str(r2), } return StreamingResponse(buf, media_type="image/png", headers=headers) except Exception as e: return JSONResponse(content={"error": str(e)}, status_code=400) @app.post("/train-arima") async def train_arima(file: UploadFile = File(...)): try: # Load the uploaded CSV file into a DataFrame content = await file.read() sales_data = pd.read_csv(io.StringIO(content.decode("utf-8"))) # Prepare data for ARIMA: Aggregate total quantity sold for each menu item weekly arima_data = sales_data.groupby(['Date', 'Menu_ID'])['Quantity Sold'].sum().unstack(fill_value=0) # Ensure the index is datetime arima_data.index = pd.to_datetime(arima_data.index) # Split ARIMA data into training and testing sets (80-20 split) arima_train = arima_data.iloc[:int(len(arima_data) * 0.8), :] arima_test = arima_data.iloc[int(len(arima_data) * 0.8):, :] # Store ARIMA models and predictions arima_models = {} arima_predictions = {} # Fit ARIMA for each menu item for menu_id in arima_data.columns: # Train ARIMA model model = ARIMA(arima_train[menu_id], order=(5, 1, 0)) arima_fitted = model.fit() arima_models[menu_id] = arima_fitted # Save the fitted model # Predict using ARIMA forecast = arima_fitted.forecast(steps=len(arima_test)) arima_predictions[menu_id] = forecast # Combine predictions into a single DataFrame arima_predictions_df = pd.DataFrame(arima_predictions, index=arima_test.index) # Calculate metrics for ARIMA arima_metrics = { "Mean Squared Error (MSE)": mean_squared_error(arima_test.values.flatten(), arima_predictions_df.values.flatten()), "Root Mean Squared Error (RMSE)": math.sqrt(mean_squared_error(arima_test.values.flatten(), arima_predictions_df.values.flatten())), "Mean Absolute Error (MAE)": mean_absolute_error(arima_test.values.flatten(), arima_predictions_df.values.flatten()), "R-squared Score (R²)": r2_score(arima_test.values.flatten(), arima_predictions_df.values.flatten()) } # Generate Graph plt.figure(figsize=(12, 6)) plt.plot(arima_test.values.flatten(), label="Actual", alpha=0.7) plt.plot(arima_predictions_df.values.flatten(), label="Predicted", alpha=0.7) plt.legend() plt.title("Actual vs. Predicted (ARIMA)") plt.xlabel("Index") plt.ylabel("Quantity Sold") # Save the plot to a BytesIO buffer buf = io.BytesIO() plt.savefig(buf, format="png") buf.seek(0) plt.close() # Return response with metrics and graph return StreamingResponse( buf, media_type="image/png", headers={ "ARIMA_MSE": str(arima_metrics["Mean Squared Error (MSE)"]), "ARIMA_RMSE": str(arima_metrics["Root Mean Squared Error (RMSE)"]), "ARIMA_MAE": str(arima_metrics["Mean Absolute Error (MAE)"]), "ARIMA_R2": str(arima_metrics["R-squared Score (R²)"]), } ) except Exception as e: return JSONResponse(content={"error": str(e)}, status_code=400) @app.post("/predict-sales") async def predict_sales(files: List[UploadFile] = File(...)): try: # Read the uploaded CSV files into DataFrames data_frames = {} for file in files: content = await file.read() data_frames[file.filename] = pd.read_csv(io.StringIO(content.decode("utf-8"))) # Extract relevant DataFrames sales_data = data_frames['restaurant_sales_linked.csv'] menu_data = data_frames['restaurant_menu_final.csv'] recipe_data = data_frames['restaurant_recipe_final.csv'] ingredients_data = data_frames['restaurant_ingredients_final.csv'] # Parse 'Date' and preprocess data sales_data['Date'] = pd.to_datetime(sales_data['Date']) sales_data['Week'] = sales_data['Date'].dt.isocalendar().week weekly_sales = sales_data.groupby(['Week', 'Menu_ID']).agg({'Quantity Sold': 'sum', 'Revenue': 'sum'}).reset_index() # Select features and target features = weekly_sales[['Week', 'Menu_ID', 'Revenue']] target = weekly_sales['Quantity Sold'] # Preprocessing numerical_features = ['Week', 'Revenue'] categorical_features = ['Menu_ID'] column_transformer = make_column_transformer( (StandardScaler(), numerical_features), (OneHotEncoder(handle_unknown="ignore"), categorical_features), remainder="passthrough", ) # Transform features X_transformed = column_transformer.fit_transform(features) # Split data for model training X_train = X_transformed[:-len(features['Menu_ID'].unique())] # Exclude last batch for prediction y_train = target[:-len(features['Menu_ID'].unique())] X_future = X_transformed[-len(features['Menu_ID'].unique()):] # Batch for all menu items # Train Random Forest on historical data rf_model = RandomForestRegressor(n_estimators=100, random_state=42) rf_model.fit(X_train, y_train) # Predict future sales with Random Forest rf_pred = rf_model.predict(X_future) # Calculate residuals for training XGBoost rf_train_pred = rf_model.predict(X_train) residuals = y_train - rf_train_pred # Train XGBoost on residuals xgb_model = XGBRegressor( n_estimators=50, learning_rate=0.1, max_depth=5, random_state=42, tree_method="hist", eval_metric="rmse", ) xgb_model.fit(X_train, residuals) # Predict residuals for all menu items with XGBoost xgb_residual_pred = xgb_model.predict(X_future) # Combine predictions from both models combined_pred = rf_pred + xgb_residual_pred # Predict for all Menu_IDs and sort by predicted quantities predicted_sales = pd.DataFrame({ 'Menu_ID': features['Menu_ID'].unique(), 'Predicted Quantity': combined_pred }).sort_values(by='Predicted Quantity', ascending=False).head(8) # Top 8 dishes # Merge with menu and recipe data for detailed information predicted_sales_details = predicted_sales.merge(menu_data, on='Menu_ID', how='inner') predicted_sales_details = predicted_sales_details.merge(recipe_data, on='Menu_ID', how='inner') predicted_sales_details = predicted_sales_details.merge(ingredients_data, on='Ingredient_ID', how='inner') # Calculate ingredient requirements for the future week predicted_sales_details['Total Ingredient Quantity'] = ( predicted_sales_details['Quantity_Per_Unit'] * predicted_sales_details['Predicted Quantity'] ) # Select and organize the final output final_result = predicted_sales_details[[ 'Menu_Item', 'Predicted Quantity', 'Ingredient_Name', 'Total Ingredient Quantity' ]] # Save the final result to a CSV file buffer = io.StringIO() final_result.to_csv(buffer, index=False) buffer.seek(0) # Create a StreamingResponse to return the CSV file return StreamingResponse( io.BytesIO(buffer.getvalue().encode("utf-8")), media_type="text/csv", headers={"Content-Disposition": "attachment; filename=predicted_sales_ingredients.csv"} ) except Exception as e: return JSONResponse(content={"error": str(e)}, status_code=400)