""" ML-3m-trader Model =================== LightGBM multi-class classifier for trade signal prediction. Handles training, validation, prediction, and persistence. """ import os import sys import numpy as np import pandas as pd import joblib import config as cfg from features import get_feature_columns try: import lightgbm as lgb except ImportError: print("[ERROR] lightgbm not installed. Run: pip install lightgbm") sys.exit(1) def _ensure_dirs(): os.makedirs(cfg.MODEL_DIR, exist_ok=True) def train(df: pd.DataFrame, labels: np.ndarray) -> lgb.LGBMClassifier: """ Train a LightGBM classifier on the provided feature DataFrame. Uses chronological train/validation split (no shuffle). Parameters ---------- df : pd.DataFrame DataFrame with feature columns present. labels : np.ndarray Integer labels aligned with df index. Returns ------- lgb.LGBMClassifier Trained model. """ feature_cols = get_feature_columns() X = df[feature_cols].values.astype(np.float32) y = labels.astype(np.int32) split_idx = int(len(X) * cfg.TRAIN_SPLIT_RATIO) X_train, X_val = X[:split_idx], X[split_idx:] y_train, y_val = y[:split_idx], y[split_idx:] print(f"[INFO] Training set : {X_train.shape[0]:,} samples") print(f"[INFO] Validation set: {X_val.shape[0]:,} samples") params = dict(cfg.LGBM_PARAMS) n_est = params.pop("n_estimators", 500) model = lgb.LGBMClassifier(n_estimators=n_est, **params) model.fit( X_train, y_train, eval_set=[(X_val, y_val)], eval_metric="multi_logloss", callbacks=[ lgb.early_stopping(cfg.EARLY_STOPPING_ROUNDS, verbose=True), lgb.log_evaluation(period=50), ], ) # Feature importance imp = pd.DataFrame({ "feature": feature_cols, "importance": model.feature_importances_, }).sort_values("importance", ascending=False) print("\n[INFO] Feature importance (top 10):") print(imp.head(10).to_string(index=False)) return model def predict(model: lgb.LGBMClassifier, df: pd.DataFrame) -> np.ndarray: """ Generate predictions for the given DataFrame. Returns ------- np.ndarray of int Predicted labels. """ feature_cols = get_feature_columns() X = df[feature_cols].values.astype(np.float32) return model.predict(X) def predict_proba(model: lgb.LGBMClassifier, df: pd.DataFrame) -> np.ndarray: """Return class probabilities.""" feature_cols = get_feature_columns() X = df[feature_cols].values.astype(np.float32) return model.predict_proba(X) def save_model(model: lgb.LGBMClassifier, filename: str = "lgbm_model.pkl"): """Persist model to disk via joblib.""" _ensure_dirs() path = os.path.join(cfg.MODEL_DIR, filename) joblib.dump(model, path) print(f"[INFO] Model saved to {path}") def load_model(filename: str = "lgbm_model.pkl") -> lgb.LGBMClassifier: """Load a previously saved model.""" path = os.path.join(cfg.MODEL_DIR, filename) if not os.path.exists(path): print(f"[ERROR] Model file not found: {path}") print(" Run 'python main.py train' first.") sys.exit(1) model = joblib.load(path) print(f"[INFO] Model loaded from {path}") return model