beta3 commited on
Commit
8d0cde8
Β·
verified Β·
1 Parent(s): b8d709a

Upload 4 files

Browse files
Files changed (4) hide show
  1. README.md +3 -3
  2. app.py +800 -0
  3. packages.txt +4 -0
  4. requirements.txt +23 -0
README.md CHANGED
@@ -1,8 +1,8 @@
1
  ---
2
  title: TRIBE V2 Neural Activity Predictor
3
- emoji: πŸ‘
4
- colorFrom: purple
5
- colorTo: red
6
  sdk: gradio
7
  sdk_version: 6.10.0
8
  python_version: '3.12'
 
1
  ---
2
  title: TRIBE V2 Neural Activity Predictor
3
+ emoji: πŸ”₯
4
+ colorFrom: indigo
5
+ colorTo: pink
6
  sdk: gradio
7
  sdk_version: 6.10.0
8
  python_version: '3.12'
app.py ADDED
@@ -0,0 +1,800 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ TRIBE v2 β€” Brain Encoding Demo
3
+ HuggingFace Spaces Β· ZeroGPU
4
+ """
5
+
6
+ import os
7
+
8
+ os.environ["HF_HUB_ENABLE_HF_TRANSFER"] = "0"
9
+ os.environ["PYVISTA_OFF_SCREEN"] = "true"
10
+ os.environ["DISPLAY"] = ""
11
+ os.environ["VTK_DEFAULT_RENDER_WINDOW_OFFSCREEN"] = "true"
12
+
13
+ import tempfile
14
+ from pathlib import Path
15
+
16
+ import numpy as np
17
+ import matplotlib
18
+ matplotlib.use("Agg")
19
+
20
+ import gradio as gr
21
+ import spaces
22
+
23
+ # ── Constants ──────────────────────────────────────────────────────────────────
24
+ CACHE_FOLDER = Path("./cache")
25
+ CACHE_FOLDER.mkdir(parents=True, exist_ok=True)
26
+
27
+ SAMPLE_VIDEO_URL = "https://download.blender.org/durian/trailer/sintel_trailer-480p.mp4"
28
+
29
+ FIRE_COLORSCALE = [
30
+ [0.00, "rgb(0,0,0)"],
31
+ [0.15, "rgb(30,0,20)"],
32
+ [0.30, "rgb(120,10,5)"],
33
+ [0.50, "rgb(200,50,0)"],
34
+ [0.65, "rgb(240,120,0)"],
35
+ [0.80, "rgb(255,200,20)"],
36
+ [1.00, "rgb(255,255,220)"],
37
+ ]
38
+
39
+ # ── HTML blocks ────────────────────────────────────────────────────────────────
40
+ HEADER = """
41
+ <div id="tribe-header">
42
+ <div class="tribe-wordmark">TRIBE v2</div>
43
+ <p class="tribe-subtitle">
44
+ A Foundation Model of Vision, Audition &amp; Language for In-Silico Neuroscience
45
+ </p>
46
+ <div class="tribe-links">
47
+ <a href="https://huggingface.co/facebook/tribev2" target="_blank">Weights</a>
48
+ <span class="sep">Β·</span>
49
+ <a href="https://ai.meta.com/research/publications/a-foundation-model-of-vision-audition-and-language-for-in-silico-neuroscience/" target="_blank">Paper</a>
50
+ <span class="sep">Β·</span>
51
+ <a href="https://github.com/facebookresearch/tribev2" target="_blank">Code</a>
52
+ <span class="sep">Β·</span>
53
+ <a href="https://aidemos.atmeta.com/tribev2/" target="_blank">Official Demo</a>
54
+ </div>
55
+ </div>
56
+ """
57
+
58
+ NOTICE = """
59
+ <div class="tribe-notice">
60
+ <span class="notice-label">Note</span>
61
+ This demo runs on ZeroGPU (shared H200). Processing video and audio inputs
62
+ involves downloading WhisperX on first run and may take 2–4 minutes.
63
+ Subsequent runs within the same session are significantly faster.
64
+ </div>
65
+ """
66
+
67
+ MODEL_INFO = """
68
+ <div class="info-grid">
69
+ <div class="info-item">
70
+ <div class="info-key">Architecture</div>
71
+ <div class="info-val">Transformer encoder mapping multimodal features to cortical surface activity</div>
72
+ </div>
73
+ <div class="info-item">
74
+ <div class="info-key">Encoders</div>
75
+ <div class="info-val">V-JEPA2 (video) Β· Wav2Vec-BERT 2.0 (audio) Β· LLaMA 3.2-3B (text)</div>
76
+ </div>
77
+ <div class="info-item">
78
+ <div class="info-key">Preprocessing</div>
79
+ <div class="info-val">WhisperX extracts word-level timestamps from audio/video, enabling the text encoder to process speech with precise timing</div>
80
+ </div>
81
+ <div class="info-item">
82
+ <div class="info-key">Output</div>
83
+ <div class="info-val">Predicted fMRI BOLD responses on the fsaverage5 cortical mesh β€” 20,484 vertices, 1 TR = 1 s</div>
84
+ </div>
85
+ <div class="info-item">
86
+ <div class="info-key">Training data</div>
87
+ <div class="info-val">700+ healthy subjects exposed to images, podcasts, videos, and text (naturalistic paradigm)</div>
88
+ </div>
89
+ <div class="info-item">
90
+ <div class="info-key">License</div>
91
+ <div class="info-val">CC BY-NC 4.0 β€” research and non-commercial use only</div>
92
+ </div>
93
+ </div>
94
+ """
95
+
96
+ NOTES_HTML = """
97
+ <div class="tribe-footer">
98
+ <span class="footer-label">Usage notes</span>
99
+ <ul>
100
+ <li>The 3D brain view is interactive: drag to rotate, scroll to zoom, use the slider to navigate timesteps.</li>
101
+ <li>The text encoder requires access to the gated <strong>LLaMA 3.2-3B</strong> model on Hugging Face. Text input may fail if access is not granted.</li>
102
+ <li>ZeroGPU sessions are ephemeral. If the Space goes idle, the next request re-initialises the model (~30 s).</li>
103
+ <li>This is an unofficial community demo. For the official interactive visualisation, see <a href="https://aidemos.atmeta.com/tribev2/" target="_blank">aidemos.atmeta.com/tribev2</a>.</li>
104
+ </ul>
105
+ </div>
106
+ """
107
+
108
+ # ── Singletons ─────────────────────────────────────────────────────────────────
109
+ _model = None
110
+ _plotter = None
111
+ _mesh_cache = None
112
+
113
+
114
+ def _load_model():
115
+ global _model, _plotter
116
+ if _model is None:
117
+ from tribev2.demo_utils import TribeModel
118
+ from tribev2.plotting import PlotBrain
119
+
120
+ hf_token = os.environ.get("HF_TOKEN")
121
+ if hf_token:
122
+ from huggingface_hub import login
123
+ login(token=hf_token, add_to_git_credential=False)
124
+
125
+ _model = TribeModel.from_pretrained("facebook/tribev2", cache_folder=CACHE_FOLDER)
126
+ _plotter = PlotBrain(mesh="fsaverage5")
127
+ return _model, _plotter
128
+
129
+
130
+ def _load_mesh():
131
+ global _mesh_cache
132
+ if _mesh_cache is None:
133
+ from nilearn import datasets, surface
134
+ fsaverage = datasets.fetch_surf_fsaverage("fsaverage5")
135
+ coords_L, faces_L = surface.load_surf_mesh(fsaverage.pial_left)
136
+ coords_R, faces_R = surface.load_surf_mesh(fsaverage.pial_right)
137
+ _mesh_cache = (
138
+ np.array(coords_L), np.array(faces_L),
139
+ np.array(coords_R), np.array(faces_R),
140
+ )
141
+ return _mesh_cache
142
+
143
+
144
+ # ── 3-D brain builder ──────────────────────────────────────────────────────────
145
+ def build_3d_figure(preds: np.ndarray, vmin_val: float = 0.5) -> str:
146
+ """Return an HTML iframe with interactive 3-D brain β€” white base,
147
+ fire activation overlay, centered slider."""
148
+ import plotly.graph_objects as go
149
+ import json
150
+ import html as _html
151
+
152
+ coords_L, faces_L, coords_R, faces_R = _load_mesh()
153
+ n_verts_L = coords_L.shape[0]
154
+ n_t = preds.shape[0]
155
+
156
+ # Normalization: same threshold as the timeline slider
157
+ vmax = np.percentile(preds, 99)
158
+ vmin = vmin_val
159
+
160
+ BG = "#1a1a2e"
161
+ MONO = "ui-monospace, 'Cascadia Code', 'Source Code Pro', monospace"
162
+
163
+ # White base colorscale: 0β†’white, fire only above threshold
164
+ WHITE_FIRE = [
165
+ [0.00, "rgb(245,245,245)"],
166
+ [0.25, "rgb(220,180,160)"],
167
+ [0.45, "rgb(200,60,10)"],
168
+ [0.65, "rgb(240,120,0)"],
169
+ [0.80, "rgb(255,200,20)"],
170
+ [1.00, "rgb(255,255,220)"],
171
+ ]
172
+
173
+ mesh_kw = dict(
174
+ colorscale=WHITE_FIRE, cmin=0, cmax=1, showscale=False,
175
+ flatshading=False, hoverinfo="skip",
176
+ lighting=dict(ambient=0.60, diffuse=0.85, specular=0.25, roughness=0.45),
177
+ lightposition=dict(x=80, y=180, z=200),
178
+ )
179
+
180
+ def _vals(t):
181
+ v = preds[t]
182
+ return np.clip((v - vmin) / max(vmax - vmin, 1e-8), 0, 1)
183
+
184
+ def _traces(t):
185
+ vn = _vals(t)
186
+ offset = 8.0
187
+ tL = go.Mesh3d(
188
+ x=coords_L[:, 0] - offset, y=coords_L[:, 1], z=coords_L[:, 2],
189
+ i=faces_L[:, 0], j=faces_L[:, 1], k=faces_L[:, 2],
190
+ intensity=vn[:n_verts_L], name="Left", **mesh_kw)
191
+ tR = go.Mesh3d(
192
+ x=coords_R[:, 0] + offset, y=coords_R[:, 1], z=coords_R[:, 2],
193
+ i=faces_R[:, 0], j=faces_R[:, 1], k=faces_R[:, 2],
194
+ intensity=vn[n_verts_L:], name="Right", **mesh_kw)
195
+ return tL, tR
196
+
197
+ def _intensity_only(t):
198
+ vn = _vals(t)
199
+ return [go.Mesh3d(intensity=vn[:n_verts_L]),
200
+ go.Mesh3d(intensity=vn[n_verts_L:])]
201
+
202
+ tL0, tR0 = _traces(0)
203
+ frames = [
204
+ go.Frame(data=_intensity_only(t), name=str(t),
205
+ layout=go.Layout(title_text=f"t = {t} s"))
206
+ for t in range(n_t)
207
+ ]
208
+
209
+ slider_steps = [
210
+ dict(args=[[str(t)], dict(frame=dict(duration=0, redraw=True),
211
+ mode="immediate", transition=dict(duration=0))],
212
+ label=str(t), method="animate")
213
+ for t in range(n_t)
214
+ ]
215
+
216
+ fig = go.Figure(
217
+ data=[tL0, tR0],
218
+ frames=frames,
219
+ layout=go.Layout(
220
+ height=500,
221
+ paper_bgcolor=BG,
222
+ plot_bgcolor=BG,
223
+ scene=dict(
224
+ bgcolor=BG,
225
+ xaxis=dict(visible=False),
226
+ yaxis=dict(visible=False),
227
+ zaxis=dict(visible=False),
228
+ camera=dict(
229
+ eye=dict(x=0, y=-1.9, z=0.4),
230
+ up=dict(x=0, y=0, z=1),
231
+ ),
232
+ aspectmode="data",
233
+ ),
234
+ margin=dict(l=0, r=0, t=8, b=70),
235
+ title=dict(
236
+ text="t = 0 s β€” drag to rotate Β· scroll to zoom",
237
+ font=dict(color="#9ca3af", family=MONO, size=11),
238
+ x=0.5,
239
+ ),
240
+ updatemenus=[],
241
+ sliders=[dict(
242
+ active=0, steps=slider_steps,
243
+ currentvalue=dict(
244
+ prefix="t = ", suffix=" s",
245
+ font=dict(color="#9ca3af", family=MONO, size=11),
246
+ visible=True, xanchor="center",
247
+ ),
248
+ pad=dict(b=8, t=8),
249
+ len=0.85, x=0.5, xanchor="center", y=0,
250
+ bgcolor="#111827", bordercolor="#1f2937",
251
+ tickcolor="#374151",
252
+ font=dict(color="#6b7280", family=MONO, size=10),
253
+ )],
254
+ ),
255
+ )
256
+
257
+ inner_html = fig.to_html(
258
+ include_plotlyjs=True,
259
+ full_html=True,
260
+ config={"responsive": True, "displayModeBar": False},
261
+ )
262
+ srcdoc = _html.escape(inner_html, quote=True)
263
+ return (
264
+ f'<iframe srcdoc="{srcdoc}" '
265
+ f'style="width:100%;height:520px;border:none;background:{BG};" '
266
+ f'scrolling="no"></iframe>'
267
+ )
268
+
269
+
270
+ # ── Core inference ─────────────────────────────────────────────────────────────
271
+ @spaces.GPU(duration=300)
272
+ def run_prediction(input_type, video_file, audio_file, text_input, n_timesteps, vmin_val, show_stimuli):
273
+ model, plotter = _load_model()
274
+
275
+ if input_type == "Video" and video_file is not None:
276
+ df = model.get_events_dataframe(video_path=video_file)
277
+ stimuli = show_stimuli
278
+ elif input_type == "Audio" and audio_file is not None:
279
+ df = model.get_events_dataframe(audio_path=audio_file)
280
+ stimuli = False
281
+ elif input_type == "Text" and text_input.strip():
282
+ with tempfile.NamedTemporaryFile(mode="w", suffix=".txt", delete=False, encoding="utf-8") as tmp:
283
+ tmp.write(text_input.strip())
284
+ fpath = tmp.name
285
+ try:
286
+ df = model.get_events_dataframe(text_path=fpath)
287
+ finally:
288
+ os.unlink(fpath)
289
+ stimuli = False
290
+ else:
291
+ raise gr.Error("Please provide an input for the selected modality.")
292
+
293
+ # ZeroGPU runs in a daemon process β€” DataLoader cannot spawn children.
294
+ import torch.utils.data
295
+ _orig = torch.utils.data.DataLoader.__init__
296
+ def _patched(self, *a, **kw):
297
+ kw["num_workers"] = 0
298
+ _orig(self, *a, **kw)
299
+ torch.utils.data.DataLoader.__init__ = _patched
300
+ try:
301
+ preds, segments = model.predict(events=df)
302
+ finally:
303
+ torch.utils.data.DataLoader.__init__ = _orig
304
+
305
+ n = min(int(n_timesteps), len(preds))
306
+ if n == 0:
307
+ raise gr.Error("Model returned no predictions for this input.")
308
+
309
+ preds_n = preds[:n]
310
+
311
+ timeline_fig = plotter.plot_timesteps(
312
+ preds_n, segments=segments[:n],
313
+ cmap="fire", norm_percentile=99, vmin=vmin_val,
314
+ alpha_cmap=(0.0, 0.2), show_stimuli=stimuli,
315
+ )
316
+ timeline_fig.set_dpi(180)
317
+ brain_3d_html = build_3d_figure(preds_n, vmin_val=vmin_val)
318
+
319
+ status = (
320
+ f"{preds.shape[0]} timesteps Γ— {preds.shape[1]:,} vertices "
321
+ f"(fsaverage5) β€” showing first {n}"
322
+ )
323
+ return brain_3d_html, timeline_fig, status
324
+
325
+
326
+ def download_sample_video():
327
+ from tribev2.demo_utils import download_file
328
+ dest = CACHE_FOLDER / "sintel_trailer.mp4"
329
+ download_file(SAMPLE_VIDEO_URL, dest)
330
+ return str(dest)
331
+
332
+
333
+ # ── CSS ────────────────────────────────────────────────────────────────────────
334
+ CSS = """
335
+ @import url('https://fonts.googleapis.com/css2?family=Inter:wght@300;400;500;600&display=swap');
336
+
337
+ *, *::before, *::after { box-sizing: border-box; }
338
+
339
+ body, .gradio-container {
340
+ background: #0b0e17 !important;
341
+ color: #c9d4e8 !important;
342
+ font-family: 'Inter', system-ui, sans-serif !important;
343
+ }
344
+ .gradio-container {
345
+ max-width: 100% !important;
346
+ width: 100% !important;
347
+ margin: 0 !important;
348
+ padding: 0 28px 56px !important;
349
+ }
350
+
351
+ /* ── Header ── */
352
+ #tribe-header {
353
+ padding: 36px 0 22px;
354
+ text-align: center;
355
+ border-bottom: 1px solid #1a2235;
356
+ }
357
+ .tribe-wordmark {
358
+ font-size: 2.4rem;
359
+ font-weight: 600;
360
+ letter-spacing: -0.03em;
361
+ color: #edf2ff;
362
+ line-height: 1;
363
+ margin-bottom: 10px;
364
+ }
365
+ .tribe-subtitle {
366
+ font-size: 0.87rem;
367
+ color: #5a6a88;
368
+ margin: 0 0 12px;
369
+ line-height: 1.6;
370
+ }
371
+ .tribe-links { font-size: 0.76rem; }
372
+ .tribe-links a { color: #5a7aaa; text-decoration: none; transition: color 0.15s; }
373
+ .tribe-links a:hover { color: #a0b8d8; }
374
+ .tribe-links .sep { margin: 0 8px; color: #1e2a3a; }
375
+
376
+ /* ── Notice ── */
377
+ .tribe-notice {
378
+ background: #0d1120;
379
+ border: 1px solid #1a2235;
380
+ border-left: 3px solid #1b4f8a;
381
+ border-radius: 4px;
382
+ padding: 11px 16px;
383
+ font-size: 0.79rem;
384
+ color: #5a7aaa;
385
+ line-height: 1.6;
386
+ margin: 16px 0 0;
387
+ }
388
+ .notice-label {
389
+ font-weight: 600;
390
+ color: #4a9fd4;
391
+ margin-right: 8px;
392
+ text-transform: uppercase;
393
+ font-size: 0.66rem;
394
+ letter-spacing: 0.1em;
395
+ }
396
+
397
+ /* ── Panel box β€” applied via elem_classes ── */
398
+ .tribe-box {
399
+ background: #0d1120 !important;
400
+ border: 1px solid #1a2235 !important;
401
+ border-radius: 6px !important;
402
+ overflow: hidden !important;
403
+ padding: 0 !important;
404
+ }
405
+
406
+ /* ── Section label ── */
407
+ .sec-label {
408
+ font-size: 0.7rem;
409
+ font-weight: 600;
410
+ letter-spacing: 0.1em;
411
+ text-transform: uppercase;
412
+ padding: 11px 16px;
413
+ border-bottom: 1px solid #1a2235;
414
+ margin: 0;
415
+ }
416
+ .sec-label-input { color: #4a9fd4; }
417
+ .sec-label-brain { color: #4a9fd4; }
418
+ .sec-label-timeline { color: #4a9fd4; }
419
+
420
+ /* ── Inner padding for input col ── */
421
+ .input-col-inner { padding: 14px 16px 14px; }
422
+ .input-col-inner > .gr-group,
423
+ .input-col-inner > div { margin-bottom: 10px; }
424
+
425
+ /* ── Modality buttons ── */
426
+ .modality-selector { width: 100% !important; }
427
+ .modality-selector > .wrap {
428
+ display: grid !important;
429
+ grid-template-columns: 1fr 1fr 1fr !important;
430
+ gap: 5px !important;
431
+ background: transparent !important;
432
+ border: none !important;
433
+ padding: 0 !important;
434
+ width: 100% !important;
435
+ }
436
+ .modality-selector label {
437
+ display: flex !important;
438
+ align-items: center !important;
439
+ justify-content: center !important;
440
+ padding: 9px 4px !important;
441
+ border-radius: 4px !important;
442
+ font-size: 0.82rem !important;
443
+ font-weight: 600 !important;
444
+ cursor: pointer !important;
445
+ transition: all 0.18s !important;
446
+ user-select: none !important;
447
+ text-align: center !important;
448
+ border: 1px solid transparent !important;
449
+ }
450
+ /* Force white text on ALL spans inside modality labels */
451
+ .modality-selector label span,
452
+ .modality-selector label > span,
453
+ .modality-selector span {
454
+ color: #ffffff !important;
455
+ display: inline !important;
456
+ }
457
+ /* Video β€” blue */
458
+ .modality-selector label:nth-child(1) {
459
+ background: #1a4a7a !important;
460
+ border-color: #2478bb !important;
461
+ }
462
+ .modality-selector label:nth-child(1):has(input:checked) {
463
+ background: #2478bb !important;
464
+ border-color: #4a9fd4 !important;
465
+ box-shadow: 0 0 10px rgba(36,120,187,0.5) !important;
466
+ }
467
+ /* Audio β€” teal */
468
+ .modality-selector label:nth-child(2) {
469
+ background: #0d4a3a !important;
470
+ border-color: #0f9e80 !important;
471
+ }
472
+ .modality-selector label:nth-child(2):has(input:checked) {
473
+ background: #0f9e80 !important;
474
+ border-color: #2dbba3 !important;
475
+ box-shadow: 0 0 10px rgba(15,158,128,0.5) !important;
476
+ }
477
+ /* Text β€” indigo */
478
+ .modality-selector label:nth-child(3) {
479
+ background: #2a2060 !important;
480
+ border-color: #4a5eab !important;
481
+ }
482
+ .modality-selector label:nth-child(3):has(input:checked) {
483
+ background: #4a5eab !important;
484
+ border-color: #7080d0 !important;
485
+ box-shadow: 0 0 10px rgba(74,94,171,0.5) !important;
486
+ }
487
+ .modality-selector input[type=radio] { display: none !important; }
488
+
489
+ /* ── Gradio component labels ── */
490
+ label > span {
491
+ font-size: 0.69rem !important;
492
+ color: #3a4f6a !important;
493
+ font-weight: 500 !important;
494
+ text-transform: uppercase !important;
495
+ letter-spacing: 0.09em !important;
496
+ }
497
+
498
+ /* ── Upload / video / audio ── */
499
+ .gr-video, .gr-audio,
500
+ [data-testid="video"], [data-testid="audio"] {
501
+ background: #080c18 !important;
502
+ border: 1px solid #1a2235 !important;
503
+ border-radius: 4px !important;
504
+ width: 100% !important;
505
+ color: #c9d4e8 !important;
506
+ }
507
+
508
+ /* Wrapper group: no border, no padding, invisible groups leave zero trace */
509
+ .upload-slot-wrap {
510
+ border: none !important;
511
+ background: transparent !important;
512
+ padding: 0 !important;
513
+ margin: 0 !important;
514
+ }
515
+
516
+ /* The actual component (Video/Audio) β€” fixed height */
517
+ .upload-slot {
518
+ height: 220px !important;
519
+ min-height: 220px !important;
520
+ max-height: 220px !important;
521
+ overflow: hidden !important;
522
+ position: relative !important;
523
+ }
524
+ .upload-slot > * { max-height: 220px !important; overflow: hidden !important; }
525
+ .upload-slot video {
526
+ width: 100% !important;
527
+ height: 170px !important;
528
+ max-height: 170px !important;
529
+ object-fit: contain !important;
530
+ display: block !important;
531
+ background: #080c18 !important;
532
+ }
533
+
534
+ /* Modality label β€” add breathing room below the "Modality" title */
535
+ .modality-selector > .wrap { margin-top: 6px !important; }
536
+
537
+ /* ── Main row: panels align to top, NOT stretched to equal height ── */
538
+ #main-row {
539
+ align-items: flex-start !important;
540
+ }
541
+ /* panel-brain shrinks to fit its content (the plot), no empty space */
542
+ .panel-brain {
543
+ align-self: flex-start !important;
544
+ }
545
+
546
+ /* ── Textarea ── */
547
+ textarea {
548
+ background: #080c18 !important;
549
+ border: 1px solid #1a2235 !important;
550
+ border-radius: 4px !important;
551
+ color: #c9d4e8 !important;
552
+ font-size: 0.86rem !important;
553
+ line-height: 1.6 !important;
554
+ resize: vertical !important;
555
+ width: 100% !important;
556
+ }
557
+ textarea::placeholder { color: #3a4f6a !important; }
558
+ textarea:focus { border-color: #1b4f8a !important; outline: none !important; }
559
+
560
+ /* ── Slider & checkbox ── */
561
+ input[type=range] { accent-color: #2478bb !important; }
562
+ input[type=checkbox] { accent-color: #2478bb !important; }
563
+
564
+ /* ── Run button ── */
565
+ .btn-run button {
566
+ background: #edf2ff !important;
567
+ color: #0b0e17 !important;
568
+ font-weight: 600 !important;
569
+ font-size: 0.87rem !important;
570
+ letter-spacing: 0.03em !important;
571
+ border: none !important;
572
+ border-radius: 4px !important;
573
+ padding: 11px 0 !important;
574
+ width: 100% !important;
575
+ cursor: pointer !important;
576
+ transition: background 0.15s !important;
577
+ margin-top: 8px !important;
578
+ }
579
+ .btn-run button:hover { background: #c0cfe8 !important; }
580
+
581
+ /* ── Sample button ── */
582
+ .btn-sample button {
583
+ background: transparent !important;
584
+ color: #3a4f6a !important;
585
+ border: 1px solid #1a2235 !important;
586
+ border-radius: 4px !important;
587
+ font-size: 0.74rem !important;
588
+ padding: 5px 12px !important;
589
+ cursor: pointer !important;
590
+ transition: all 0.15s !important;
591
+ width: 100% !important;
592
+ margin-top: 6px !important;
593
+ }
594
+ .btn-sample button:hover { color: #7a9abf !important; border-color: #1b4f8a !important; }
595
+
596
+ /* ── Status ── */
597
+ .status-line p {
598
+ font-size: 0.72rem !important;
599
+ color: #3a4f6a !important;
600
+ margin: 8px 0 0 !important;
601
+ font-variant-numeric: tabular-nums !important;
602
+ font-family: ui-monospace, monospace !important;
603
+ }
604
+
605
+ /* ── Plot containers ── */
606
+ .plot-3d {
607
+ width: 100% !important;
608
+ min-height: 500px !important;
609
+ overflow: hidden !important;
610
+ padding: 0 !important;
611
+ margin: 0 !important;
612
+ display: block !important;
613
+ }
614
+ .plot-3d > div { width: 100% !important; }
615
+ .plot-timeline {
616
+ background: #07090f !important;
617
+ width: 100% !important;
618
+ min-height: 340px !important;
619
+ overflow: hidden !important;
620
+ padding: 0 !important;
621
+ margin: 0 !important;
622
+ }
623
+ .plot-timeline .label-wrap { display: none !important; }
624
+ .plot-timeline .wrap { padding: 0 !important; margin: 0 !important; }
625
+ .panel-brain .wrap,
626
+ .panel-brain > * { gap: 0 !important; padding-top: 0 !important; margin-top: 0 !important; }
627
+
628
+ /* ── Accordion ── */
629
+ .gr-accordion > .label-wrap {
630
+ background: transparent !important;
631
+ border: none !important;
632
+ border-top: 1px solid #1a2235 !important;
633
+ padding: 9px 0 !important;
634
+ font-size: 0.74rem !important;
635
+ color: #3a4f6a !important;
636
+ }
637
+ .gr-accordion > .label-wrap:hover { color: #5a7aaa !important; }
638
+
639
+ /* ── Model info ── */
640
+ .info-grid { display: flex; flex-direction: column; }
641
+ .info-item {
642
+ display: flex; gap: 20px; padding: 9px 0;
643
+ border-bottom: 1px solid #0e1220;
644
+ font-size: 0.79rem; line-height: 1.55;
645
+ }
646
+ .info-item:last-child { border-bottom: none; }
647
+ .info-key {
648
+ min-width: 120px; color: #3a4f6a; font-weight: 500;
649
+ flex-shrink: 0; font-size: 0.71rem;
650
+ text-transform: uppercase; letter-spacing: 0.07em; padding-top: 2px;
651
+ }
652
+ .info-val { color: #5a7aaa; }
653
+
654
+ /* ── Footer ── */
655
+ .tribe-footer {
656
+ margin-top: 24px; padding-top: 16px;
657
+ border-top: 1px solid #1a2235;
658
+ font-size: 0.74rem; color: #3a4f6a; line-height: 1.7;
659
+ }
660
+ .footer-label {
661
+ display: block; font-weight: 600; text-transform: uppercase;
662
+ letter-spacing: 0.09em; font-size: 0.63rem; color: #1e2a3a; margin-bottom: 8px;
663
+ }
664
+ .tribe-footer ul { margin: 0; padding-left: 16px; }
665
+ .tribe-footer li { margin-bottom: 4px; }
666
+ .tribe-footer a { color: #3a4f6a; text-decoration: none; }
667
+ .tribe-footer a:hover { color: #5a7aaa; }
668
+ .tribe-footer strong { color: #4a6080; font-weight: 500; }
669
+ """
670
+
671
+ # ── Brain placeholder ─────────────────────────────────────────────────────────
672
+ BRAIN_PLACEHOLDER = """
673
+ <div style="
674
+ width:100%; height:500px;
675
+ display:flex; flex-direction:column;
676
+ align-items:center; justify-content:center;
677
+ color:#1e2a3a; font-family:ui-monospace,'Cascadia Code','Source Code Pro',monospace;
678
+ font-size:0.78rem; letter-spacing:0.06em; gap:14px;
679
+ background:#0d1120;
680
+ ">
681
+ <svg width="54" height="54" viewBox="0 0 54 54" fill="none" xmlns="http://www.w3.org/2000/svg">
682
+ <ellipse cx="19" cy="27" rx="13" ry="17" stroke="#1e3a5a" stroke-width="1.5"/>
683
+ <ellipse cx="35" cy="27" rx="13" ry="17" stroke="#1e3a5a" stroke-width="1.5"/>
684
+ <path d="M19 10 Q27 6 35 10" stroke="#1e3a5a" stroke-width="1.5" fill="none"/>
685
+ <path d="M19 44 Q27 48 35 44" stroke="#1e3a5a" stroke-width="1.5" fill="none"/>
686
+ <line x1="27" y1="10" x2="27" y2="44" stroke="#1e3a5a" stroke-width="1" stroke-dasharray="3 3"/>
687
+ <path d="M12 20 Q9 27 12 34" stroke="#1e3a5a" stroke-width="1.2" fill="none"/>
688
+ <path d="M42 20 Q45 27 42 34" stroke="#1e3a5a" stroke-width="1.2" fill="none"/>
689
+ </svg>
690
+ <span style="color:#1e3a5a; text-transform:uppercase; letter-spacing:0.12em;">
691
+ Run prediction to visualize cortical activity
692
+ </span>
693
+ </div>
694
+ """
695
+
696
+ # ── UI ─────────────────────────────────────────────────────────────────────────
697
+ with gr.Blocks() as demo:
698
+
699
+ gr.HTML(HEADER)
700
+ gr.HTML(NOTICE)
701
+
702
+ with gr.Accordion("About the model", open=False):
703
+ gr.HTML(MODEL_INFO)
704
+
705
+ with gr.Row(elem_id="main-row"):
706
+
707
+ # ── Col left: Input ──
708
+ with gr.Column(scale=1, elem_classes=["tribe-box", "panel-input"]):
709
+ gr.HTML('<div class="sec-label sec-label-input">Input</div>')
710
+ with gr.Column(elem_classes=["input-col-inner"]):
711
+
712
+ input_type = gr.Radio(
713
+ choices=["Video", "Audio", "Text"],
714
+ value="Video",
715
+ label="Modality",
716
+ elem_classes=["modality-selector"],
717
+ )
718
+
719
+ with gr.Group(visible=True, elem_classes=["upload-slot-wrap"]) as video_group:
720
+ video_file = gr.Video(label="Video file β€” mp4, mkv, avi", elem_classes=["upload-slot"])
721
+
722
+ sample_btn = gr.Button(
723
+ "Load sample (Sintel trailer)",
724
+ elem_classes=["btn-sample"],
725
+ visible=True,
726
+ )
727
+
728
+ with gr.Group(visible=False, elem_classes=["upload-slot-wrap"]) as audio_group:
729
+ audio_file = gr.Audio(
730
+ label="Audio file β€” wav, mp3, flac",
731
+ type="filepath",
732
+ elem_classes=["upload-slot"],
733
+ )
734
+
735
+ with gr.Group(visible=False) as text_group:
736
+ text_input = gr.Textbox(
737
+ label="Text",
738
+ placeholder="Enter text. Converted to speech internally.",
739
+ lines=4, max_lines=8,
740
+ )
741
+
742
+ with gr.Accordion("Settings", open=True):
743
+ n_timesteps = gr.Slider(
744
+ minimum=1, maximum=30, value=10, step=1,
745
+ label="Timesteps to visualize (1 TR = 1 s)",
746
+ )
747
+ vmin_slider = gr.Slider(
748
+ minimum=-0.5, maximum=1.0, value=0.5, step=0.05,
749
+ label="Activation threshold (vmin) β€” lower = more brain covered",
750
+ )
751
+ show_stimuli = gr.Checkbox(
752
+ value=True,
753
+ label="Overlay stimulus frames (video only)",
754
+ )
755
+
756
+ run_btn = gr.Button("Run prediction", elem_classes=["btn-run"])
757
+ status_md = gr.Markdown(value="", elem_classes=["status-line"])
758
+
759
+ # ── Col right: 3D Brain ──
760
+ with gr.Column(scale=2, elem_classes=["tribe-box", "panel-brain"]):
761
+ gr.HTML('<div class="sec-label sec-label-brain">Cortical surface &mdash; predicted BOLD response &nbsp;&middot;&nbsp; drag to rotate &nbsp;&middot;&nbsp; scroll to zoom</div>')
762
+ brain_3d = gr.HTML(value=BRAIN_PLACEHOLDER, elem_classes=["plot-3d"])
763
+
764
+ with gr.Row():
765
+ with gr.Column(elem_classes=["tribe-box"]):
766
+ gr.HTML('<div class="sec-label sec-label-timeline">Timeline &mdash; stimulus and predicted brain response per timestep</div>')
767
+ timeline_plot = gr.Plot(elem_classes=["plot-timeline"])
768
+
769
+ gr.HTML(NOTES_HTML)
770
+
771
+ # ── Callbacks ──
772
+ def toggle_inputs(choice):
773
+ return (
774
+ gr.update(visible=choice == "Video"),
775
+ gr.update(visible=choice == "Audio"),
776
+ gr.update(visible=choice == "Text"),
777
+ gr.update(visible=choice == "Video"),
778
+ )
779
+
780
+ input_type.change(
781
+ fn=toggle_inputs, inputs=[input_type],
782
+ outputs=[video_group, audio_group, text_group, sample_btn],
783
+ )
784
+ sample_btn.click(fn=download_sample_video, inputs=[], outputs=[video_file])
785
+ run_btn.click(
786
+ fn=run_prediction,
787
+ inputs=[input_type, video_file, audio_file, text_input, n_timesteps, vmin_slider, show_stimuli],
788
+ outputs=[brain_3d, timeline_plot, status_md],
789
+ show_progress="full",
790
+ )
791
+
792
+ demo.launch(
793
+ ssr_mode=False,
794
+ css=CSS,
795
+ theme=gr.themes.Base(
796
+ primary_hue=gr.themes.colors.slate,
797
+ neutral_hue=gr.themes.colors.slate,
798
+ font=gr.themes.GoogleFont("Inter"),
799
+ ),
800
+ )
packages.txt ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ libosmesa6
2
+ libgl1
3
+ libegl1
4
+ libgles2
requirements.txt ADDED
@@ -0,0 +1,23 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # TRIBE v2 β€” HuggingFace Spaces requirements
2
+ #
3
+ # NOTE: Do NOT pin gradio here β€” HF Spaces pre-installs its own version
4
+ # (currently 6.10.0) and any conflicting pin will break the build.
5
+
6
+ # Install tribev2 directly from GitHub with the [plotting] extra
7
+ tribev2[plotting] @ git+https://github.com/facebookresearch/tribev2.git
8
+
9
+ # ZeroGPU support (pre-installed on HF Spaces but listed for clarity)
10
+ spaces>=0.19.4
11
+
12
+ # Ensure headless matplotlib backend works
13
+ matplotlib>=3.8.0
14
+
15
+ # huggingface_hub for HF_TOKEN login helper
16
+ huggingface_hub>=0.23.0
17
+
18
+ # tribev2 uses `uvx` internally to run WhisperX (audio transcription).
19
+ # Installing uv via pip makes the `uvx` binary available in PATH.
20
+ uv>=0.4.0
21
+
22
+ # 3D brain visualization
23
+ plotly>=5.18.0