"""
Feature engineering for LightGBM-based time series forecasting.

Key features:
  - Calendar: day-of-week, month, year, week-of-year, is_weekend, quarter
  - Lag features: sales at t-7, t-14, t-21, t-28, t-35, t-364 (same day last year)
  - Rolling statistics: 7-day and 28-day rolling mean/std/max
  - Trend: linear trend index
  - External: sell_price, snap flag, event indicators

All features are added in-place to the dataframe using a lag-safe approach
(no data leakage: lags are always ≥ HORIZON days back from forecast date).
"""
from __future__ import annotations

import numpy as np
import pandas as pd

from src.config import TARGET_COL, DATE_COL, ID_COL, HORIZON


# ── Calendar features ──────────────────────────────────────────────────────

def add_calendar_features(df: pd.DataFrame) -> pd.DataFrame:
    """Add date-derived features."""
    df = df.copy()
    d = df[DATE_COL]
    df["dayofweek"]  = d.dt.dayofweek          # 0=Mon … 6=Sun
    df["month"]      = d.dt.month
    df["year"]       = d.dt.year
    df["weekofyear"] = d.dt.isocalendar().week.astype(int)
    df["dayofyear"]  = d.dt.dayofyear
    df["is_weekend"] = (df["dayofweek"] >= 5).astype(int)
    df["quarter"]    = d.dt.quarter
    # Month-end / month-start: high spending days
    df["is_month_start"] = d.dt.is_month_start.astype(int)
    df["is_month_end"]   = d.dt.is_month_end.astype(int)
    return df


# ── Lag features ───────────────────────────────────────────────────────────

LAG_DAYS = [7, 14, 21, 28, 35, 42, 56, 364]   # all ≥ HORIZON=28 ✓

def add_lag_features(df: pd.DataFrame) -> pd.DataFrame:
    """
    Add lagged sales values per series.
    Sorted by (unique_id, ds) required before calling.
    """
    df = df.sort_values([ID_COL, DATE_COL]).copy()
    for lag in LAG_DAYS:
        col = f"lag_{lag}"
        df[col] = df.groupby(ID_COL)[TARGET_COL].shift(lag)
    return df


# ── Rolling statistics ─────────────────────────────────────────────────────

ROLL_WINDOWS  = [7, 28]
ROLL_LAG      = HORIZON    # shift before rolling so no leakage

def add_rolling_features(df: pd.DataFrame) -> pd.DataFrame:
    """
    Rolling mean/std computed on lagged sales (lag=HORIZON).
    Rolling over a window of W days ending at t-HORIZON.
    """
    df = df.sort_values([ID_COL, DATE_COL]).copy()
    lagged = df.groupby(ID_COL)[TARGET_COL].shift(ROLL_LAG)

    for w in ROLL_WINDOWS:
        rolled = lagged.groupby(df[ID_COL]).transform(
            lambda x: x.rolling(w, min_periods=1).mean()
        )
        df[f"rolling_mean_{w}"] = rolled

        rolled_std = lagged.groupby(df[ID_COL]).transform(
            lambda x: x.rolling(w, min_periods=1).std()
        )
        df[f"rolling_std_{w}"] = rolled_std.fillna(0)

    return df


# ── Price features ─────────────────────────────────────────────────────────

def add_price_features(df: pd.DataFrame) -> pd.DataFrame:
    """Normalise sell_price within each series."""
    if "sell_price" not in df.columns:
        return df
    df = df.copy()
    gp = df.groupby(ID_COL)["sell_price"]
    df["price_norm"]   = df["sell_price"] / gp.transform("mean")
    df["price_change"] = df.groupby(ID_COL)["sell_price"].pct_change().fillna(0)
    return df


# ── Event / snap features ──────────────────────────────────────────────────

def add_event_features(df: pd.DataFrame) -> pd.DataFrame:
    """Binary flags for holidays and SNAP days."""
    df = df.copy()
    if "event_name_1" in df.columns:
        df["has_event"] = df["event_name_1"].notna().astype(int)
    if "snap_CA" in df.columns:
        snap_cols = [c for c in df.columns if c.startswith("snap_")]
        df["is_snap"] = df[snap_cols].max(axis=1).astype(int)
    return df


# ── Master feature builder ─────────────────────────────────────────────────

FEATURE_COLS: list[str] = []  # filled dynamically

def build_features(df: pd.DataFrame, fit: bool = True) -> pd.DataFrame:
    """
    Apply all feature engineering steps.

    Args:
        df  : DataFrame in long format with (unique_id, ds, y, optional exog).
        fit : If True, also cache the final feature column list.

    Returns:
        DataFrame with all features added.
    """
    global FEATURE_COLS

    df = add_calendar_features(df)
    df = add_lag_features(df)
    df = add_rolling_features(df)
    df = add_price_features(df)
    df = add_event_features(df)

    # Drop rows where lags are undefined (first LAG_DAYS[-1] rows per series)
    df = df.dropna(subset=[f"lag_{LAG_DAYS[0]}"])

    if fit:
        # Collect all numeric feature columns (exclude id/date/target)
        exclude = {ID_COL, DATE_COL, TARGET_COL,
                   "event_name_1", "event_name_2",
                   "event_type_1", "event_type_2"}
        FEATURE_COLS = [c for c in df.columns
                        if c not in exclude
                        and df[c].dtype in (np.float64, np.float32,
                                            np.int64, np.int32, int, float)]

    return df


def get_feature_cols() -> list[str]:
    """Return feature columns set during last build_features(fit=True) call."""
    return list(FEATURE_COLS)