File size: 2,644 Bytes
0914e96 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 |
# FILE: ai-service/training/train_revenue_forecaster.py (NEW FILE)
import pandas as pd
import joblib
import os
import sys
# Holt's Exponential Smoothing is a powerful forecasting model
from statsmodels.tsa.api import Holt
from pathlib import Path
def train_revenue_forecaster():
"""
Loads the monthly revenue data, trains a Holt's Exponential Smoothing model
on it, and saves the trained model to the /models folder.
"""
print("--- Starting AI Revenue Forecaster Model Training ---")
try:
# --- Setup to find files from the root directory ---
root_dir = Path(__file__).resolve().parents[1]
sys.path.append(str(root_dir))
data_path = root_dir / 'data' / 'revenue_training_data.csv'
# --- Load and prepare the data ---
df = pd.read_csv(data_path)
print(f"Loaded {len(df)} monthly records from {data_path}")
# The model needs a clean time-series index to learn properly
df['month'] = pd.to_datetime(df['month'])
df.set_index('month', inplace=True)
# Ensure the data is sorted by date
df.sort_index(inplace=True)
# We are training on the 'total_revenue' column
time_series = df['total_revenue']
if len(time_series) < 4:
print("π΄ ERROR: Not enough historical data (at least 4 months required). Aborting training.")
return
except (FileNotFoundError, pd.errors.EmptyDataError):
print(f"π΄ ERROR: Data file not found or is empty at {data_path}")
return
except Exception as e:
print(f"π΄ ERROR during data loading/preparation: {e}")
return
# --- Train the Holt's Forecasting Model ---
try:
print("Training the Holt's Exponential Smoothing model...")
# 'initialization_method="estimated"' helps the model find the best starting parameters
# 'fit(optimized=True)' tells it to find the best possible alpha and beta values
model = Holt(time_series, initialization_method="estimated").fit(optimized=True)
print("--- Model training complete! ---")
# --- Save the trained model ---
models_dir = root_dir / 'models'
models_dir.mkdir(exist_ok=True) # Ensure the directory exists
model_path = models_dir / 'revenue_forecaster_v1.joblib'
joblib.dump(model, model_path)
print(f"π Success! Revenue Forecaster model saved to: {model_path}")
except Exception as e:
print(f"π΄ ERROR during model training or saving: {e}")
if __name__ == '__main__':
train_revenue_forecaster() |