"""Biologically realistic synthetic fMRI data generation. Generates data with hemodynamic response convolution, modality-specific activation patterns, spatial autocorrelation, temporal noise structure, and scanner drift - mimicking real fMRI recordings. """ import numpy as np # --- Hemodynamic Response Function --- def generate_hrf(tr_seconds=1.0, duration=30.0): """Canonical double-gamma hemodynamic response function. Models the BOLD signal: a positive peak at ~5-6s followed by a smaller negative undershoot at ~15s. """ t = np.arange(0, duration, tr_seconds) # Double gamma parameters (SPM canonical) a1, b1 = 6.0, 1.0 # positive peak a2, b2 = 16.0, 1.0 # undershoot c = 1.0 / 6.0 # undershoot ratio from scipy.stats import gamma as gamma_dist h = gamma_dist.pdf(t, a1, scale=b1) - c * gamma_dist.pdf(t, a2, scale=b2) h = h / np.max(np.abs(h)) # normalize to [-1, 1] return h def generate_stimulus_events(n_timepoints, tr_seconds=1.0, n_events=5, seed=42): """Generate random stimulus onset times as a binary event train. Returns a (n_timepoints,) array with 1s at stimulus onsets. Events are spaced at least 8 seconds apart. """ rng = np.random.default_rng(seed) total_seconds = n_timepoints * tr_seconds min_gap = 8.0 # minimum inter-stimulus interval events = np.zeros(n_timepoints) onsets = [] attempts = 0 while len(onsets) < n_events and attempts < 1000: t = rng.uniform(2.0, total_seconds - 10.0) if all(abs(t - o) > min_gap for o in onsets): onsets.append(t) attempts += 1 for onset in onsets: idx = int(onset / tr_seconds) if 0 <= idx < n_timepoints: events[idx] = 1.0 return events # --- Modality-Specific Activation Weights --- # Weight for each ROI given a stimulus modality (0 = no response, 1 = maximum) MODALITY_WEIGHTS = { "visual": { # Strong visual cortex activation "V1": 1.0, "V2": 0.95, "V3": 0.85, "V4": 0.8, "MT": 0.75, "MST": 0.7, "FFC": 0.65, "VVC": 0.6, # Weak cross-modal "A1": 0.05, "LBelt": 0.04, "MBelt": 0.03, "PBelt": 0.03, "A4": 0.02, "A5": 0.02, # Minimal language "44": 0.08, "45": 0.07, "IFJa": 0.06, "IFJp": 0.05, "TPOJ1": 0.1, "TPOJ2": 0.08, "STV": 0.07, "PSL": 0.06, # Moderate executive (attention) "46": 0.3, "9-46d": 0.25, "8Av": 0.35, "8Ad": 0.3, "FEF": 0.4, "p32pr": 0.15, "a32pr": 0.12, }, "auditory": { "V1": 0.03, "V2": 0.03, "V3": 0.02, "V4": 0.02, "MT": 0.02, "MST": 0.01, "FFC": 0.01, "VVC": 0.01, "A1": 1.0, "LBelt": 0.95, "MBelt": 0.9, "PBelt": 0.85, "A4": 0.75, "A5": 0.7, "44": 0.15, "45": 0.12, "IFJa": 0.1, "IFJp": 0.08, "TPOJ1": 0.25, "TPOJ2": 0.2, "STV": 0.3, "PSL": 0.2, "46": 0.2, "9-46d": 0.15, "8Av": 0.12, "8Ad": 0.1, "FEF": 0.08, "p32pr": 0.1, "a32pr": 0.08, }, "language": { "V1": 0.05, "V2": 0.04, "V3": 0.03, "V4": 0.03, "MT": 0.02, "MST": 0.02, "FFC": 0.1, "VVC": 0.08, "A1": 0.3, "LBelt": 0.25, "MBelt": 0.2, "PBelt": 0.15, "A4": 0.2, "A5": 0.15, "44": 1.0, "45": 0.95, "IFJa": 0.85, "IFJp": 0.8, "TPOJ1": 0.9, "TPOJ2": 0.85, "STV": 0.75, "PSL": 0.7, "46": 0.5, "9-46d": 0.45, "8Av": 0.3, "8Ad": 0.25, "FEF": 0.15, "p32pr": 0.35, "a32pr": 0.3, }, "multimodal": { "V1": 0.7, "V2": 0.65, "V3": 0.55, "V4": 0.5, "MT": 0.5, "MST": 0.45, "FFC": 0.4, "VVC": 0.35, "A1": 0.7, "LBelt": 0.65, "MBelt": 0.55, "PBelt": 0.5, "A4": 0.45, "A5": 0.4, "44": 0.65, "45": 0.6, "IFJa": 0.5, "IFJp": 0.45, "TPOJ1": 0.6, "TPOJ2": 0.55, "STV": 0.5, "PSL": 0.45, "46": 0.4, "9-46d": 0.35, "8Av": 0.3, "8Ad": 0.25, "FEF": 0.3, "p32pr": 0.25, "a32pr": 0.2, }, } def generate_realistic_predictions( n_timepoints, roi_indices, stimulus_type="visual", tr_seconds=1.0, n_events=5, snr=2.0, seed=42, ): """Generate biologically realistic fMRI-like predictions. Parameters ---------- n_timepoints : int Number of TRs. roi_indices : dict[str, np.ndarray] ROI name -> vertex indices mapping. stimulus_type : str One of "visual", "auditory", "language", "multimodal". tr_seconds : float Repetition time in seconds. n_events : int Number of stimulus events. snr : float Signal-to-noise ratio (higher = cleaner signal). seed : int Random seed. """ rng = np.random.default_rng(seed) n_vertices = max(max(v) for v in roi_indices.values()) + 1 predictions = np.zeros((n_timepoints, n_vertices)) # 1. Generate stimulus-evoked signal events = generate_stimulus_events(n_timepoints, tr_seconds, n_events, seed) hrf = generate_hrf(tr_seconds) # Convolve events with HRF bold_signal = np.convolve(events, hrf)[:n_timepoints] # 2. Apply modality-specific weights per ROI weights = MODALITY_WEIGHTS.get(stimulus_type, MODALITY_WEIGHTS["multimodal"]) for roi_name, vertices in roi_indices.items(): w = weights.get(roi_name, 0.1) # Add per-ROI latency jitter (higher-order areas respond later) latency_shift = 0 if roi_name in ["44", "45", "IFJa", "IFJp", "46", "9-46d"]: latency_shift = int(2.0 / tr_seconds) # ~2s later for association cortex elif roi_name in ["TPOJ1", "TPOJ2", "STV", "PSL"]: latency_shift = int(1.5 / tr_seconds) shifted = np.roll(bold_signal, latency_shift) * w # Add per-vertex variation within ROI for v in vertices: if v < n_vertices: vertex_scale = 0.8 + 0.4 * rng.random() predictions[:, v] = shifted * vertex_scale # 3. Add temporal autocorrelation (AR(1) noise) ar_coeff = 0.5 noise = rng.standard_normal(predictions.shape) for t in range(1, n_timepoints): noise[t] += ar_coeff * noise[t - 1] # 4. Add scanner drift (low-frequency sinusoidal) t_axis = np.arange(n_timepoints) * tr_seconds drift = 0.1 * np.sin(2 * np.pi * t_axis / (n_timepoints * tr_seconds * 0.8)) drift = drift[:, np.newaxis] # 5. Combine signal + noise + drift signal_power = np.std(predictions[predictions != 0]) if np.any(predictions != 0) else 1.0 noise_power = signal_power / max(snr, 0.1) predictions = predictions + noise * noise_power + drift # 6. Spatial smoothing (average with neighbors within same ROI) for roi_name, vertices in roi_indices.items(): valid = vertices[vertices < n_vertices] if len(valid) > 1: roi_data = predictions[:, valid].copy() kernel = np.ones(min(3, len(valid))) / min(3, len(valid)) for t in range(n_timepoints): predictions[t, valid] = np.convolve(roi_data[t], kernel, mode="same") return predictions def generate_correlated_features( brain_predictions, alignment_strength=0.5, feature_dim=512, seed=42, ): """Generate model features with controllable correlation to brain data. Parameters ---------- brain_predictions : np.ndarray Brain data of shape (n_stimuli, n_vertices). alignment_strength : float 0.0 = random features, 1.0 = perfectly correlated with brain. feature_dim : int Output feature dimensionality. seed : int Random seed. Returns ------- np.ndarray Features of shape (n_stimuli, feature_dim). """ rng = np.random.default_rng(seed) n_stimuli = brain_predictions.shape[0] # Project brain data to feature_dim via random projection n_vertices = brain_predictions.shape[1] projection = rng.standard_normal((n_vertices, feature_dim)) / np.sqrt(n_vertices) brain_projected = brain_predictions @ projection # Generate random features random_features = rng.standard_normal((n_stimuli, feature_dim)) # Mix: strength controls brain-alignment vs randomness strength = np.clip(alignment_strength, 0.0, 1.0) features = strength * brain_projected + (1 - strength) * random_features # Standardize features = (features - features.mean(axis=0)) / (features.std(axis=0) + 1e-8) return features