"""Biologically realistic synthetic fMRI data generation.

Generates data with hemodynamic response convolution, modality-specific
activation patterns, spatial autocorrelation, temporal noise structure,
and scanner drift - mimicking real fMRI recordings.
"""

import numpy as np

# --- Hemodynamic Response Function ---

def generate_hrf(tr_seconds=1.0, duration=30.0):
    """Canonical double-gamma hemodynamic response function.

    Models the BOLD signal: a positive peak at ~5-6s followed by a
    smaller negative undershoot at ~15s.
    """
    t = np.arange(0, duration, tr_seconds)
    # Double gamma parameters (SPM canonical)
    a1, b1 = 6.0, 1.0   # positive peak
    a2, b2 = 16.0, 1.0   # undershoot
    c = 1.0 / 6.0        # undershoot ratio

    from scipy.stats import gamma as gamma_dist
    h = gamma_dist.pdf(t, a1, scale=b1) - c * gamma_dist.pdf(t, a2, scale=b2)
    h = h / np.max(np.abs(h))  # normalize to [-1, 1]
    return h


def generate_stimulus_events(n_timepoints, tr_seconds=1.0, n_events=5, seed=42):
    """Generate random stimulus onset times as a binary event train.

    Returns a (n_timepoints,) array with 1s at stimulus onsets.
    Events are spaced at least 8 seconds apart.
    """
    rng = np.random.default_rng(seed)
    total_seconds = n_timepoints * tr_seconds
    min_gap = 8.0  # minimum inter-stimulus interval

    events = np.zeros(n_timepoints)
    onsets = []
    attempts = 0
    while len(onsets) < n_events and attempts < 1000:
        t = rng.uniform(2.0, total_seconds - 10.0)
        if all(abs(t - o) > min_gap for o in onsets):
            onsets.append(t)
        attempts += 1

    for onset in onsets:
        idx = int(onset / tr_seconds)
        if 0 <= idx < n_timepoints:
            events[idx] = 1.0

    return events


# --- Modality-Specific Activation Weights ---

# Weight for each ROI given a stimulus modality (0 = no response, 1 = maximum)
MODALITY_WEIGHTS = {
    "visual": {
        # Strong visual cortex activation
        "V1": 1.0, "V2": 0.95, "V3": 0.85, "V4": 0.8,
        "MT": 0.75, "MST": 0.7, "FFC": 0.65, "VVC": 0.6,
        # Weak cross-modal
        "A1": 0.05, "LBelt": 0.04, "MBelt": 0.03, "PBelt": 0.03, "A4": 0.02, "A5": 0.02,
        # Minimal language
        "44": 0.08, "45": 0.07, "IFJa": 0.06, "IFJp": 0.05,
        "TPOJ1": 0.1, "TPOJ2": 0.08, "STV": 0.07, "PSL": 0.06,
        # Moderate executive (attention)
        "46": 0.3, "9-46d": 0.25, "8Av": 0.35, "8Ad": 0.3,
        "FEF": 0.4, "p32pr": 0.15, "a32pr": 0.12,
    },
    "auditory": {
        "V1": 0.03, "V2": 0.03, "V3": 0.02, "V4": 0.02,
        "MT": 0.02, "MST": 0.01, "FFC": 0.01, "VVC": 0.01,
        "A1": 1.0, "LBelt": 0.95, "MBelt": 0.9, "PBelt": 0.85, "A4": 0.75, "A5": 0.7,
        "44": 0.15, "45": 0.12, "IFJa": 0.1, "IFJp": 0.08,
        "TPOJ1": 0.25, "TPOJ2": 0.2, "STV": 0.3, "PSL": 0.2,
        "46": 0.2, "9-46d": 0.15, "8Av": 0.12, "8Ad": 0.1,
        "FEF": 0.08, "p32pr": 0.1, "a32pr": 0.08,
    },
    "language": {
        "V1": 0.05, "V2": 0.04, "V3": 0.03, "V4": 0.03,
        "MT": 0.02, "MST": 0.02, "FFC": 0.1, "VVC": 0.08,
        "A1": 0.3, "LBelt": 0.25, "MBelt": 0.2, "PBelt": 0.15, "A4": 0.2, "A5": 0.15,
        "44": 1.0, "45": 0.95, "IFJa": 0.85, "IFJp": 0.8,
        "TPOJ1": 0.9, "TPOJ2": 0.85, "STV": 0.75, "PSL": 0.7,
        "46": 0.5, "9-46d": 0.45, "8Av": 0.3, "8Ad": 0.25,
        "FEF": 0.15, "p32pr": 0.35, "a32pr": 0.3,
    },
    "multimodal": {
        "V1": 0.7, "V2": 0.65, "V3": 0.55, "V4": 0.5,
        "MT": 0.5, "MST": 0.45, "FFC": 0.4, "VVC": 0.35,
        "A1": 0.7, "LBelt": 0.65, "MBelt": 0.55, "PBelt": 0.5, "A4": 0.45, "A5": 0.4,
        "44": 0.65, "45": 0.6, "IFJa": 0.5, "IFJp": 0.45,
        "TPOJ1": 0.6, "TPOJ2": 0.55, "STV": 0.5, "PSL": 0.45,
        "46": 0.4, "9-46d": 0.35, "8Av": 0.3, "8Ad": 0.25,
        "FEF": 0.3, "p32pr": 0.25, "a32pr": 0.2,
    },
}


def generate_realistic_predictions(
    n_timepoints,
    roi_indices,
    stimulus_type="visual",
    tr_seconds=1.0,
    n_events=5,
    snr=2.0,
    seed=42,
):
    """Generate biologically realistic fMRI-like predictions.

    Parameters
    ----------
    n_timepoints : int
        Number of TRs.
    roi_indices : dict[str, np.ndarray]
        ROI name -> vertex indices mapping.
    stimulus_type : str
        One of "visual", "auditory", "language", "multimodal".
    tr_seconds : float
        Repetition time in seconds.
    n_events : int
        Number of stimulus events.
    snr : float
        Signal-to-noise ratio (higher = cleaner signal).
    seed : int
        Random seed.
    """
    rng = np.random.default_rng(seed)
    n_vertices = max(max(v) for v in roi_indices.values()) + 1
    predictions = np.zeros((n_timepoints, n_vertices))

    # 1. Generate stimulus-evoked signal
    events = generate_stimulus_events(n_timepoints, tr_seconds, n_events, seed)
    hrf = generate_hrf(tr_seconds)

    # Convolve events with HRF
    bold_signal = np.convolve(events, hrf)[:n_timepoints]

    # 2. Apply modality-specific weights per ROI
    weights = MODALITY_WEIGHTS.get(stimulus_type, MODALITY_WEIGHTS["multimodal"])
    for roi_name, vertices in roi_indices.items():
        w = weights.get(roi_name, 0.1)
        # Add per-ROI latency jitter (higher-order areas respond later)
        latency_shift = 0
        if roi_name in ["44", "45", "IFJa", "IFJp", "46", "9-46d"]:
            latency_shift = int(2.0 / tr_seconds)  # ~2s later for association cortex
        elif roi_name in ["TPOJ1", "TPOJ2", "STV", "PSL"]:
            latency_shift = int(1.5 / tr_seconds)

        shifted = np.roll(bold_signal, latency_shift) * w
        # Add per-vertex variation within ROI
        for v in vertices:
            if v < n_vertices:
                vertex_scale = 0.8 + 0.4 * rng.random()
                predictions[:, v] = shifted * vertex_scale

    # 3. Add temporal autocorrelation (AR(1) noise)
    ar_coeff = 0.5
    noise = rng.standard_normal(predictions.shape)
    for t in range(1, n_timepoints):
        noise[t] += ar_coeff * noise[t - 1]

    # 4. Add scanner drift (low-frequency sinusoidal)
    t_axis = np.arange(n_timepoints) * tr_seconds
    drift = 0.1 * np.sin(2 * np.pi * t_axis / (n_timepoints * tr_seconds * 0.8))
    drift = drift[:, np.newaxis]

    # 5. Combine signal + noise + drift
    signal_power = np.std(predictions[predictions != 0]) if np.any(predictions != 0) else 1.0
    noise_power = signal_power / max(snr, 0.1)
    predictions = predictions + noise * noise_power + drift

    # 6. Spatial smoothing (average with neighbors within same ROI)
    for roi_name, vertices in roi_indices.items():
        valid = vertices[vertices < n_vertices]
        if len(valid) > 1:
            roi_data = predictions[:, valid].copy()
            kernel = np.ones(min(3, len(valid))) / min(3, len(valid))
            for t in range(n_timepoints):
                predictions[t, valid] = np.convolve(roi_data[t], kernel, mode="same")

    return predictions


def generate_correlated_features(
    brain_predictions,
    alignment_strength=0.5,
    feature_dim=512,
    seed=42,
):
    """Generate model features with controllable correlation to brain data.

    Parameters
    ----------
    brain_predictions : np.ndarray
        Brain data of shape (n_stimuli, n_vertices).
    alignment_strength : float
        0.0 = random features, 1.0 = perfectly correlated with brain.
    feature_dim : int
        Output feature dimensionality.
    seed : int
        Random seed.

    Returns
    -------
    np.ndarray
        Features of shape (n_stimuli, feature_dim).
    """
    rng = np.random.default_rng(seed)
    n_stimuli = brain_predictions.shape[0]

    # Project brain data to feature_dim via random projection
    n_vertices = brain_predictions.shape[1]
    projection = rng.standard_normal((n_vertices, feature_dim)) / np.sqrt(n_vertices)
    brain_projected = brain_predictions @ projection

    # Generate random features
    random_features = rng.standard_normal((n_stimuli, feature_dim))

    # Mix: strength controls brain-alignment vs randomness
    strength = np.clip(alignment_strength, 0.0, 1.0)
    features = strength * brain_projected + (1 - strength) * random_features

    # Standardize
    features = (features - features.mean(axis=0)) / (features.std(axis=0) + 1e-8)
    return features