File size: 8,356 Bytes
9b23ae9
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
"""Biologically realistic synthetic fMRI data generation.

Generates data with hemodynamic response convolution, modality-specific
activation patterns, spatial autocorrelation, temporal noise structure,
and scanner drift - mimicking real fMRI recordings.
"""

import numpy as np

# --- Hemodynamic Response Function ---

def generate_hrf(tr_seconds=1.0, duration=30.0):
    """Canonical double-gamma hemodynamic response function.

    Models the BOLD signal: a positive peak at ~5-6s followed by a
    smaller negative undershoot at ~15s.
    """
    t = np.arange(0, duration, tr_seconds)
    # Double gamma parameters (SPM canonical)
    a1, b1 = 6.0, 1.0   # positive peak
    a2, b2 = 16.0, 1.0   # undershoot
    c = 1.0 / 6.0        # undershoot ratio

    from scipy.stats import gamma as gamma_dist
    h = gamma_dist.pdf(t, a1, scale=b1) - c * gamma_dist.pdf(t, a2, scale=b2)
    h = h / np.max(np.abs(h))  # normalize to [-1, 1]
    return h


def generate_stimulus_events(n_timepoints, tr_seconds=1.0, n_events=5, seed=42):
    """Generate random stimulus onset times as a binary event train.

    Returns a (n_timepoints,) array with 1s at stimulus onsets.
    Events are spaced at least 8 seconds apart.
    """
    rng = np.random.default_rng(seed)
    total_seconds = n_timepoints * tr_seconds
    min_gap = 8.0  # minimum inter-stimulus interval

    events = np.zeros(n_timepoints)
    onsets = []
    attempts = 0
    while len(onsets) < n_events and attempts < 1000:
        t = rng.uniform(2.0, total_seconds - 10.0)
        if all(abs(t - o) > min_gap for o in onsets):
            onsets.append(t)
        attempts += 1

    for onset in onsets:
        idx = int(onset / tr_seconds)
        if 0 <= idx < n_timepoints:
            events[idx] = 1.0

    return events


# --- Modality-Specific Activation Weights ---

# Weight for each ROI given a stimulus modality (0 = no response, 1 = maximum)
MODALITY_WEIGHTS = {
    "visual": {
        # Strong visual cortex activation
        "V1": 1.0, "V2": 0.95, "V3": 0.85, "V4": 0.8,
        "MT": 0.75, "MST": 0.7, "FFC": 0.65, "VVC": 0.6,
        # Weak cross-modal
        "A1": 0.05, "LBelt": 0.04, "MBelt": 0.03, "PBelt": 0.03, "A4": 0.02, "A5": 0.02,
        # Minimal language
        "44": 0.08, "45": 0.07, "IFJa": 0.06, "IFJp": 0.05,
        "TPOJ1": 0.1, "TPOJ2": 0.08, "STV": 0.07, "PSL": 0.06,
        # Moderate executive (attention)
        "46": 0.3, "9-46d": 0.25, "8Av": 0.35, "8Ad": 0.3,
        "FEF": 0.4, "p32pr": 0.15, "a32pr": 0.12,
    },
    "auditory": {
        "V1": 0.03, "V2": 0.03, "V3": 0.02, "V4": 0.02,
        "MT": 0.02, "MST": 0.01, "FFC": 0.01, "VVC": 0.01,
        "A1": 1.0, "LBelt": 0.95, "MBelt": 0.9, "PBelt": 0.85, "A4": 0.75, "A5": 0.7,
        "44": 0.15, "45": 0.12, "IFJa": 0.1, "IFJp": 0.08,
        "TPOJ1": 0.25, "TPOJ2": 0.2, "STV": 0.3, "PSL": 0.2,
        "46": 0.2, "9-46d": 0.15, "8Av": 0.12, "8Ad": 0.1,
        "FEF": 0.08, "p32pr": 0.1, "a32pr": 0.08,
    },
    "language": {
        "V1": 0.05, "V2": 0.04, "V3": 0.03, "V4": 0.03,
        "MT": 0.02, "MST": 0.02, "FFC": 0.1, "VVC": 0.08,
        "A1": 0.3, "LBelt": 0.25, "MBelt": 0.2, "PBelt": 0.15, "A4": 0.2, "A5": 0.15,
        "44": 1.0, "45": 0.95, "IFJa": 0.85, "IFJp": 0.8,
        "TPOJ1": 0.9, "TPOJ2": 0.85, "STV": 0.75, "PSL": 0.7,
        "46": 0.5, "9-46d": 0.45, "8Av": 0.3, "8Ad": 0.25,
        "FEF": 0.15, "p32pr": 0.35, "a32pr": 0.3,
    },
    "multimodal": {
        "V1": 0.7, "V2": 0.65, "V3": 0.55, "V4": 0.5,
        "MT": 0.5, "MST": 0.45, "FFC": 0.4, "VVC": 0.35,
        "A1": 0.7, "LBelt": 0.65, "MBelt": 0.55, "PBelt": 0.5, "A4": 0.45, "A5": 0.4,
        "44": 0.65, "45": 0.6, "IFJa": 0.5, "IFJp": 0.45,
        "TPOJ1": 0.6, "TPOJ2": 0.55, "STV": 0.5, "PSL": 0.45,
        "46": 0.4, "9-46d": 0.35, "8Av": 0.3, "8Ad": 0.25,
        "FEF": 0.3, "p32pr": 0.25, "a32pr": 0.2,
    },
}


def generate_realistic_predictions(
    n_timepoints,
    roi_indices,
    stimulus_type="visual",
    tr_seconds=1.0,
    n_events=5,
    snr=2.0,
    seed=42,
):
    """Generate biologically realistic fMRI-like predictions.

    Parameters
    ----------
    n_timepoints : int
        Number of TRs.
    roi_indices : dict[str, np.ndarray]
        ROI name -> vertex indices mapping.
    stimulus_type : str
        One of "visual", "auditory", "language", "multimodal".
    tr_seconds : float
        Repetition time in seconds.
    n_events : int
        Number of stimulus events.
    snr : float
        Signal-to-noise ratio (higher = cleaner signal).
    seed : int
        Random seed.
    """
    rng = np.random.default_rng(seed)
    n_vertices = max(max(v) for v in roi_indices.values()) + 1
    predictions = np.zeros((n_timepoints, n_vertices))

    # 1. Generate stimulus-evoked signal
    events = generate_stimulus_events(n_timepoints, tr_seconds, n_events, seed)
    hrf = generate_hrf(tr_seconds)

    # Convolve events with HRF
    bold_signal = np.convolve(events, hrf)[:n_timepoints]

    # 2. Apply modality-specific weights per ROI
    weights = MODALITY_WEIGHTS.get(stimulus_type, MODALITY_WEIGHTS["multimodal"])
    for roi_name, vertices in roi_indices.items():
        w = weights.get(roi_name, 0.1)
        # Add per-ROI latency jitter (higher-order areas respond later)
        latency_shift = 0
        if roi_name in ["44", "45", "IFJa", "IFJp", "46", "9-46d"]:
            latency_shift = int(2.0 / tr_seconds)  # ~2s later for association cortex
        elif roi_name in ["TPOJ1", "TPOJ2", "STV", "PSL"]:
            latency_shift = int(1.5 / tr_seconds)

        shifted = np.roll(bold_signal, latency_shift) * w
        # Add per-vertex variation within ROI
        for v in vertices:
            if v < n_vertices:
                vertex_scale = 0.8 + 0.4 * rng.random()
                predictions[:, v] = shifted * vertex_scale

    # 3. Add temporal autocorrelation (AR(1) noise)
    ar_coeff = 0.5
    noise = rng.standard_normal(predictions.shape)
    for t in range(1, n_timepoints):
        noise[t] += ar_coeff * noise[t - 1]

    # 4. Add scanner drift (low-frequency sinusoidal)
    t_axis = np.arange(n_timepoints) * tr_seconds
    drift = 0.1 * np.sin(2 * np.pi * t_axis / (n_timepoints * tr_seconds * 0.8))
    drift = drift[:, np.newaxis]

    # 5. Combine signal + noise + drift
    signal_power = np.std(predictions[predictions != 0]) if np.any(predictions != 0) else 1.0
    noise_power = signal_power / max(snr, 0.1)
    predictions = predictions + noise * noise_power + drift

    # 6. Spatial smoothing (average with neighbors within same ROI)
    for roi_name, vertices in roi_indices.items():
        valid = vertices[vertices < n_vertices]
        if len(valid) > 1:
            roi_data = predictions[:, valid].copy()
            kernel = np.ones(min(3, len(valid))) / min(3, len(valid))
            for t in range(n_timepoints):
                predictions[t, valid] = np.convolve(roi_data[t], kernel, mode="same")

    return predictions


def generate_correlated_features(
    brain_predictions,
    alignment_strength=0.5,
    feature_dim=512,
    seed=42,
):
    """Generate model features with controllable correlation to brain data.

    Parameters
    ----------
    brain_predictions : np.ndarray
        Brain data of shape (n_stimuli, n_vertices).
    alignment_strength : float
        0.0 = random features, 1.0 = perfectly correlated with brain.
    feature_dim : int
        Output feature dimensionality.
    seed : int
        Random seed.

    Returns
    -------
    np.ndarray
        Features of shape (n_stimuli, feature_dim).
    """
    rng = np.random.default_rng(seed)
    n_stimuli = brain_predictions.shape[0]

    # Project brain data to feature_dim via random projection
    n_vertices = brain_predictions.shape[1]
    projection = rng.standard_normal((n_vertices, feature_dim)) / np.sqrt(n_vertices)
    brain_projected = brain_predictions @ projection

    # Generate random features
    random_features = rng.standard_normal((n_stimuli, feature_dim))

    # Mix: strength controls brain-alignment vs randomness
    strength = np.clip(alignment_strength, 0.0, 1.0)
    features = strength * brain_projected + (1 - strength) * random_features

    # Standardize
    features = (features - features.mean(axis=0)) / (features.std(axis=0) + 1e-8)
    return features