Spaces:
Running
Running
File size: 8,356 Bytes
9b23ae9 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 | """Biologically realistic synthetic fMRI data generation.
Generates data with hemodynamic response convolution, modality-specific
activation patterns, spatial autocorrelation, temporal noise structure,
and scanner drift - mimicking real fMRI recordings.
"""
import numpy as np
# --- Hemodynamic Response Function ---
def generate_hrf(tr_seconds=1.0, duration=30.0):
"""Canonical double-gamma hemodynamic response function.
Models the BOLD signal: a positive peak at ~5-6s followed by a
smaller negative undershoot at ~15s.
"""
t = np.arange(0, duration, tr_seconds)
# Double gamma parameters (SPM canonical)
a1, b1 = 6.0, 1.0 # positive peak
a2, b2 = 16.0, 1.0 # undershoot
c = 1.0 / 6.0 # undershoot ratio
from scipy.stats import gamma as gamma_dist
h = gamma_dist.pdf(t, a1, scale=b1) - c * gamma_dist.pdf(t, a2, scale=b2)
h = h / np.max(np.abs(h)) # normalize to [-1, 1]
return h
def generate_stimulus_events(n_timepoints, tr_seconds=1.0, n_events=5, seed=42):
"""Generate random stimulus onset times as a binary event train.
Returns a (n_timepoints,) array with 1s at stimulus onsets.
Events are spaced at least 8 seconds apart.
"""
rng = np.random.default_rng(seed)
total_seconds = n_timepoints * tr_seconds
min_gap = 8.0 # minimum inter-stimulus interval
events = np.zeros(n_timepoints)
onsets = []
attempts = 0
while len(onsets) < n_events and attempts < 1000:
t = rng.uniform(2.0, total_seconds - 10.0)
if all(abs(t - o) > min_gap for o in onsets):
onsets.append(t)
attempts += 1
for onset in onsets:
idx = int(onset / tr_seconds)
if 0 <= idx < n_timepoints:
events[idx] = 1.0
return events
# --- Modality-Specific Activation Weights ---
# Weight for each ROI given a stimulus modality (0 = no response, 1 = maximum)
MODALITY_WEIGHTS = {
"visual": {
# Strong visual cortex activation
"V1": 1.0, "V2": 0.95, "V3": 0.85, "V4": 0.8,
"MT": 0.75, "MST": 0.7, "FFC": 0.65, "VVC": 0.6,
# Weak cross-modal
"A1": 0.05, "LBelt": 0.04, "MBelt": 0.03, "PBelt": 0.03, "A4": 0.02, "A5": 0.02,
# Minimal language
"44": 0.08, "45": 0.07, "IFJa": 0.06, "IFJp": 0.05,
"TPOJ1": 0.1, "TPOJ2": 0.08, "STV": 0.07, "PSL": 0.06,
# Moderate executive (attention)
"46": 0.3, "9-46d": 0.25, "8Av": 0.35, "8Ad": 0.3,
"FEF": 0.4, "p32pr": 0.15, "a32pr": 0.12,
},
"auditory": {
"V1": 0.03, "V2": 0.03, "V3": 0.02, "V4": 0.02,
"MT": 0.02, "MST": 0.01, "FFC": 0.01, "VVC": 0.01,
"A1": 1.0, "LBelt": 0.95, "MBelt": 0.9, "PBelt": 0.85, "A4": 0.75, "A5": 0.7,
"44": 0.15, "45": 0.12, "IFJa": 0.1, "IFJp": 0.08,
"TPOJ1": 0.25, "TPOJ2": 0.2, "STV": 0.3, "PSL": 0.2,
"46": 0.2, "9-46d": 0.15, "8Av": 0.12, "8Ad": 0.1,
"FEF": 0.08, "p32pr": 0.1, "a32pr": 0.08,
},
"language": {
"V1": 0.05, "V2": 0.04, "V3": 0.03, "V4": 0.03,
"MT": 0.02, "MST": 0.02, "FFC": 0.1, "VVC": 0.08,
"A1": 0.3, "LBelt": 0.25, "MBelt": 0.2, "PBelt": 0.15, "A4": 0.2, "A5": 0.15,
"44": 1.0, "45": 0.95, "IFJa": 0.85, "IFJp": 0.8,
"TPOJ1": 0.9, "TPOJ2": 0.85, "STV": 0.75, "PSL": 0.7,
"46": 0.5, "9-46d": 0.45, "8Av": 0.3, "8Ad": 0.25,
"FEF": 0.15, "p32pr": 0.35, "a32pr": 0.3,
},
"multimodal": {
"V1": 0.7, "V2": 0.65, "V3": 0.55, "V4": 0.5,
"MT": 0.5, "MST": 0.45, "FFC": 0.4, "VVC": 0.35,
"A1": 0.7, "LBelt": 0.65, "MBelt": 0.55, "PBelt": 0.5, "A4": 0.45, "A5": 0.4,
"44": 0.65, "45": 0.6, "IFJa": 0.5, "IFJp": 0.45,
"TPOJ1": 0.6, "TPOJ2": 0.55, "STV": 0.5, "PSL": 0.45,
"46": 0.4, "9-46d": 0.35, "8Av": 0.3, "8Ad": 0.25,
"FEF": 0.3, "p32pr": 0.25, "a32pr": 0.2,
},
}
def generate_realistic_predictions(
n_timepoints,
roi_indices,
stimulus_type="visual",
tr_seconds=1.0,
n_events=5,
snr=2.0,
seed=42,
):
"""Generate biologically realistic fMRI-like predictions.
Parameters
----------
n_timepoints : int
Number of TRs.
roi_indices : dict[str, np.ndarray]
ROI name -> vertex indices mapping.
stimulus_type : str
One of "visual", "auditory", "language", "multimodal".
tr_seconds : float
Repetition time in seconds.
n_events : int
Number of stimulus events.
snr : float
Signal-to-noise ratio (higher = cleaner signal).
seed : int
Random seed.
"""
rng = np.random.default_rng(seed)
n_vertices = max(max(v) for v in roi_indices.values()) + 1
predictions = np.zeros((n_timepoints, n_vertices))
# 1. Generate stimulus-evoked signal
events = generate_stimulus_events(n_timepoints, tr_seconds, n_events, seed)
hrf = generate_hrf(tr_seconds)
# Convolve events with HRF
bold_signal = np.convolve(events, hrf)[:n_timepoints]
# 2. Apply modality-specific weights per ROI
weights = MODALITY_WEIGHTS.get(stimulus_type, MODALITY_WEIGHTS["multimodal"])
for roi_name, vertices in roi_indices.items():
w = weights.get(roi_name, 0.1)
# Add per-ROI latency jitter (higher-order areas respond later)
latency_shift = 0
if roi_name in ["44", "45", "IFJa", "IFJp", "46", "9-46d"]:
latency_shift = int(2.0 / tr_seconds) # ~2s later for association cortex
elif roi_name in ["TPOJ1", "TPOJ2", "STV", "PSL"]:
latency_shift = int(1.5 / tr_seconds)
shifted = np.roll(bold_signal, latency_shift) * w
# Add per-vertex variation within ROI
for v in vertices:
if v < n_vertices:
vertex_scale = 0.8 + 0.4 * rng.random()
predictions[:, v] = shifted * vertex_scale
# 3. Add temporal autocorrelation (AR(1) noise)
ar_coeff = 0.5
noise = rng.standard_normal(predictions.shape)
for t in range(1, n_timepoints):
noise[t] += ar_coeff * noise[t - 1]
# 4. Add scanner drift (low-frequency sinusoidal)
t_axis = np.arange(n_timepoints) * tr_seconds
drift = 0.1 * np.sin(2 * np.pi * t_axis / (n_timepoints * tr_seconds * 0.8))
drift = drift[:, np.newaxis]
# 5. Combine signal + noise + drift
signal_power = np.std(predictions[predictions != 0]) if np.any(predictions != 0) else 1.0
noise_power = signal_power / max(snr, 0.1)
predictions = predictions + noise * noise_power + drift
# 6. Spatial smoothing (average with neighbors within same ROI)
for roi_name, vertices in roi_indices.items():
valid = vertices[vertices < n_vertices]
if len(valid) > 1:
roi_data = predictions[:, valid].copy()
kernel = np.ones(min(3, len(valid))) / min(3, len(valid))
for t in range(n_timepoints):
predictions[t, valid] = np.convolve(roi_data[t], kernel, mode="same")
return predictions
def generate_correlated_features(
brain_predictions,
alignment_strength=0.5,
feature_dim=512,
seed=42,
):
"""Generate model features with controllable correlation to brain data.
Parameters
----------
brain_predictions : np.ndarray
Brain data of shape (n_stimuli, n_vertices).
alignment_strength : float
0.0 = random features, 1.0 = perfectly correlated with brain.
feature_dim : int
Output feature dimensionality.
seed : int
Random seed.
Returns
-------
np.ndarray
Features of shape (n_stimuli, feature_dim).
"""
rng = np.random.default_rng(seed)
n_stimuli = brain_predictions.shape[0]
# Project brain data to feature_dim via random projection
n_vertices = brain_predictions.shape[1]
projection = rng.standard_normal((n_vertices, feature_dim)) / np.sqrt(n_vertices)
brain_projected = brain_predictions @ projection
# Generate random features
random_features = rng.standard_normal((n_stimuli, feature_dim))
# Mix: strength controls brain-alignment vs randomness
strength = np.clip(alignment_strength, 0.0, 1.0)
features = strength * brain_projected + (1 - strength) * random_features
# Standardize
features = (features - features.mean(axis=0)) / (features.std(axis=0) + 1e-8)
return features
|