"""
models/base.py
Shared utilities for all CNN-LSTM variants.
Optimised for CPU training on HF Spaces.
"""

import numpy as np
import hashlib
import pickle
from pathlib import Path
from sklearn.preprocessing import RobustScaler
from sklearn.utils.class_weight import compute_class_weight

SEED      = 42
CACHE_DIR = Path("/tmp/p2_model_cache")
CACHE_DIR.mkdir(exist_ok=True)

# Clear any v1 cache files (missing max_daily_date field)
for _f in CACHE_DIR.glob("*.pkl"):
    try:
        import pickle as _pkl
        with open(_f, "rb") as _fh:
            _d = _pkl.load(_fh)
        # If any result dict lacks max_daily_date, bust the whole cache
        if isinstance(_d, dict) and "results" in _d:
            _needs_bust = any(
                isinstance(r, dict) and "max_daily_date" not in r
                for r in _d["results"].values() if r is not None
            )
            if _needs_bust:
                _f.unlink(missing_ok=True)
    except Exception:
        _f.unlink(missing_ok=True)
np.random.seed(SEED)


# ── Cache helpers ─────────────────────────────────────────────────────────────

def make_cache_key(last_date, start_yr, fee_bps, epochs, split, include_cash, lookback):
    raw = f"v2_{last_date}_{start_yr}_{fee_bps}_{epochs}_{split}_{include_cash}_{lookback}"
    return hashlib.md5(raw.encode()).hexdigest()


def save_cache(key, payload):
    with open(CACHE_DIR / f"{key}.pkl", "wb") as f:
        pickle.dump(payload, f)


def load_cache(key):
    path = CACHE_DIR / f"{key}.pkl"
    if path.exists():
        try:
            with open(path, "rb") as f:
                return pickle.load(f)
        except Exception:
            path.unlink(missing_ok=True)
    return None


# ── Sequence builder ──────────────────────────────────────────────────────────

def build_sequences(features, targets, lookback):
    X, y = [], []
    for i in range(lookback, len(features)):
        X.append(features[i - lookback: i])
        y.append(targets[i])
    return np.array(X, dtype=np.float32), np.array(y, dtype=np.float32)


# ── Train / val / test split ──────────────────────────────────────────────────

def train_val_test_split(X, y, train_pct=0.70, val_pct=0.15):
    n  = len(X)
    t1 = int(n * train_pct)
    t2 = int(n * (train_pct + val_pct))
    return X[:t1], y[:t1], X[t1:t2], y[t1:t2], X[t2:], y[t2:]


# ── Feature scaling ───────────────────────────────────────────────────────────

def scale_features(X_train, X_val, X_test):
    n_feat = X_train.shape[2]
    scaler = RobustScaler()
    scaler.fit(X_train.reshape(-1, n_feat))
    def _t(X):
        s = X.shape
        return scaler.transform(X.reshape(-1, n_feat)).reshape(s)
    return _t(X_train), _t(X_val), _t(X_test), scaler


# ── Label builder (no CASH class — CASH is a risk overlay) ───────────────────

def returns_to_labels(y_raw):
    """Simple argmax — model always predicts one of the ETFs."""
    return np.argmax(y_raw, axis=1).astype(np.int32)


# ── Class weights ─────────────────────────────────────────────────────────────

def compute_class_weights(y_labels, n_classes):
    present = np.unique(y_labels)
    try:
        weights = compute_class_weight("balanced", classes=present, y=y_labels)
        weight_dict = {int(c): float(w) for c, w in zip(present, weights)}
    except Exception:
        weight_dict = {}
    for c in range(n_classes):
        if c not in weight_dict:
            weight_dict[c] = 1.0
    return weight_dict


# ── Callbacks ─────────────────────────────────────────────────────────────────

def get_callbacks(patience_es=15, patience_lr=8, min_lr=1e-6):
    from tensorflow import keras
    return [
        keras.callbacks.EarlyStopping(
            monitor="val_loss", patience=patience_es,
            restore_best_weights=True, verbose=0,
        ),
        keras.callbacks.ReduceLROnPlateau(
            monitor="val_loss", factor=0.5,
            patience=patience_lr, min_lr=min_lr, verbose=0,
        ),
    ]


# ── Output head ───────────────────────────────────────────────────────────────

def classification_head(x, n_classes, dropout=0.3):
    from tensorflow import keras
    x = keras.layers.Dense(32, activation="relu")(x)
    x = keras.layers.Dropout(dropout)(x)
    x = keras.layers.Dense(n_classes, activation="softmax")(x)
    return x


# ── Auto lookback selection (Approach 1 proxy, fast) ─────────────────────────

def find_best_lookback(X_raw, y_raw, train_pct, val_pct, n_classes,
                        include_cash=False, candidates=None):
    from tensorflow import keras

    if candidates is None:
        candidates = [30, 45, 60]

    best_lb, best_loss = candidates[0], np.inf

    for lb in candidates:
        try:
            X_seq, y_seq = build_sequences(X_raw, y_raw, lb)
            y_lab        = returns_to_labels(y_seq)
            X_tr, y_tr, X_v, y_v, _, _ = train_val_test_split(X_seq, y_lab, train_pct, val_pct)
            X_tr_s, X_v_s, _, _        = scale_features(X_tr, X_v, X_v)
            cw = compute_class_weights(y_tr, n_classes)

            inp = keras.Input(shape=X_tr_s.shape[1:])
            x   = keras.layers.Conv1D(16, min(3, lb), padding="causal", activation="relu")(inp)
            x   = keras.layers.GlobalAveragePooling1D()(x)
            out = keras.layers.Dense(n_classes, activation="softmax")(x)
            m   = keras.Model(inp, out)
            m.compile(optimizer="adam", loss="sparse_categorical_crossentropy")

            hist = m.fit(
                X_tr_s, y_tr,
                validation_data=(X_v_s, y_v),
                epochs=15, batch_size=64, class_weight=cw,
                callbacks=[keras.callbacks.EarlyStopping(patience=5, restore_best_weights=True)],
                verbose=0,
            )
            val_loss = min(hist.history.get("val_loss", [np.inf]))
            if val_loss < best_loss:
                best_loss, best_lb = val_loss, lb
            del m
        except Exception:
            continue

    return best_lb