| import os |
| import logging |
| import pandas as pd |
| import numpy as np |
| import joblib |
| from pathlib import Path |
| from scipy.stats import norm |
|
|
| logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s') |
|
|
| def generate_forecasts_and_inventory(parquet_path: str, model_path: str, output_path: str): |
| """ |
| Applies the trained LightGBM model to generate point forecasts. |
| Then, applies mathematical inventory optimization: |
| Safety Stock = Z_alpha * sigma_L |
| """ |
| logging.info("Loading ensemble models and validation data...") |
| model_median = joblib.load(model_path) |
| model_q95 = joblib.load(model_path.replace('.pkl', '_q95.pkl')) |
| df = pd.read_parquet(parquet_path) |
| |
| |
| categorical_cols = ['store_nbr', 'family', 'city', 'state', 'store_type', 'cluster', 'is_holiday'] |
| for col in categorical_cols: |
| if col in df.columns: |
| df[col] = df[col].astype('category') |
| |
| features = [ |
| 'store_nbr', 'family', 'city', 'state', 'store_type', 'cluster', |
| 'onpromotion', 'month', 'day_of_week', 'day_of_year', 'is_weekend', 'is_holiday', |
| 'dcoilwtico', |
| 'sales_lag_1', 'sales_lag_7', 'sales_lag_28', |
| 'transactions_lag_1', 'transactions_lag_7', |
| 'rolling_mean_7', 'rolling_std_7', 'rolling_mean_28' |
| ] |
| |
| |
| |
| df['forecast_sales'] = model_median.predict(df[features]) |
| df['forecast_sales'] = np.maximum(0, df['forecast_sales']) |
| df['forecast_q95'] = np.maximum(0, model_q95.predict(df[features])) |
| |
| |
| df['error'] = df['sales'] - df['forecast_sales'] |
| |
| |
| df['naive_error'] = df['sales'] - df['sales_lag_1'] |
| mae_model = df['error'].abs().mean() |
| rmse_model = np.sqrt((df['error'] ** 2).mean()) |
| mae_naive = df['naive_error'].abs().mean() |
| rmse_naive = np.sqrt((df['naive_error'] ** 2).mean()) |
| |
| fva = (mae_naive - mae_model) / mae_naive if mae_naive > 0 else 0.0 |
| fva_rmse = (rmse_naive - rmse_model) / rmse_naive if rmse_naive > 0 else 0.0 |
| logging.info(f"Forecast Value Add (FVA) against naive lag_1: {fva:.1%}") |
| logging.info(f"Global Model RMSE: {rmse_model:.2f} (Naive: {rmse_naive:.2f}, FVA RMSE: {fva_rmse:.1%})") |
| |
| |
| logging.info("Calculating Safety Stock via Service Level math...") |
| |
| lead_time_days = 7 |
| lead_time_sd = 2 |
| |
| |
| |
| |
| |
| df['dynamic_daily_buffer'] = df['forecast_q95'] - df['forecast_sales'] |
| df['sigma_1'] = df['dynamic_daily_buffer'] / 1.645 |
| df['sigma_1'] = df['sigma_1'].fillna(0) |
| |
| |
| demand_stats = df.groupby(['store_nbr', 'family'], observed=True)['forecast_sales'].mean().reset_index() |
| demand_stats.rename(columns={'forecast_sales': 'mu_d'}, inplace=True) |
| |
| |
| df = df.merge(demand_stats, on=['store_nbr', 'family'], how='left') |
| |
| |
| df['sigma_L'] = np.sqrt((lead_time_days * (df['sigma_1'] ** 2)) + ((df['mu_d'] ** 2) * (lead_time_sd ** 2))) |
| |
| |
| z_95 = norm.ppf(0.95) |
| df['safety_stock_95'] = z_95 * df['sigma_L'] |
| |
| |
| |
| df['order_up_to_level'] = (df['forecast_sales'] * lead_time_days) + df['safety_stock_95'] |
| |
| logging.info(f"Saving final analytical dataset to {output_path}...") |
| |
| df[['date', 'store_nbr', 'family', 'sales', 'forecast_sales', 'error', 'safety_stock_95', 'order_up_to_level']].to_parquet(output_path) |
| logging.info("Optimization complete.") |
|
|
| if __name__ == "__main__": |
| project_dir = Path(__file__).resolve().parents[2] |
| parquet_path = os.path.join(project_dir, "data", "processed", "features.parquet") |
| model_path = os.path.join(project_dir, "src", "models", "lgb_model.pkl") |
| output_path = os.path.join(project_dir, "data", "processed", "analytical_results.parquet") |
| |
| generate_forecasts_and_inventory(parquet_path, model_path, output_path) |
|
|