File size: 2,644 Bytes
0914e96
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
# FILE: ai-service/training/train_revenue_forecaster.py (NEW FILE)

import pandas as pd
import joblib
import os
import sys
# Holt's Exponential Smoothing is a powerful forecasting model
from statsmodels.tsa.api import Holt 
from pathlib import Path

def train_revenue_forecaster():
    """
    Loads the monthly revenue data, trains a Holt's Exponential Smoothing model
    on it, and saves the trained model to the /models folder.
    """
    print("--- Starting AI Revenue Forecaster Model Training ---")

    try:
        # --- Setup to find files from the root directory ---
        root_dir = Path(__file__).resolve().parents[1]
        sys.path.append(str(root_dir))
        
        data_path = root_dir / 'data' / 'revenue_training_data.csv'

        # --- Load and prepare the data ---
        df = pd.read_csv(data_path)
        print(f"Loaded {len(df)} monthly records from {data_path}")

        # The model needs a clean time-series index to learn properly
        df['month'] = pd.to_datetime(df['month'])
        df.set_index('month', inplace=True)
        
        # Ensure the data is sorted by date
        df.sort_index(inplace=True)

        # We are training on the 'total_revenue' column
        time_series = df['total_revenue']

        if len(time_series) < 4:
            print("πŸ”΄ ERROR: Not enough historical data (at least 4 months required). Aborting training.")
            return

    except (FileNotFoundError, pd.errors.EmptyDataError):
        print(f"πŸ”΄ ERROR: Data file not found or is empty at {data_path}")
        return
    except Exception as e:
        print(f"πŸ”΄ ERROR during data loading/preparation: {e}")
        return
    
    # --- Train the Holt's Forecasting Model ---
    try:
        print("Training the Holt's Exponential Smoothing model...")
        # 'initialization_method="estimated"' helps the model find the best starting parameters
        # 'fit(optimized=True)' tells it to find the best possible alpha and beta values
        model = Holt(time_series, initialization_method="estimated").fit(optimized=True)
        
        print("--- Model training complete! ---")
        
        # --- Save the trained model ---
        models_dir = root_dir / 'models'
        models_dir.mkdir(exist_ok=True) # Ensure the directory exists
        model_path = models_dir / 'revenue_forecaster_v1.joblib'
        
        joblib.dump(model, model_path)
        print(f"πŸŽ‰ Success! Revenue Forecaster model saved to: {model_path}")

    except Exception as e:
        print(f"πŸ”΄ ERROR during model training or saving: {e}")


if __name__ == '__main__':
    train_revenue_forecaster()