Spaces:

hmb
/

tribe2

Configuration error

hmb HF Staff Claude Opus 4.6 (1M context) commited on 26 days ago

Commit

752b527

1 Parent(s): 247b81b

Redesign: dark neural observatory aesthetic

Syne + DM Mono typography, charcoal background, amber accent,
glassmorphic zone cards, stat grid, radial glow header.

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>

Files changed (1) hide show

app.py +398 -160

app.py CHANGED Viewed

@@ -26,29 +26,20 @@ def get_model():
         return _model
 # fsaverage5 cortical region mapping (approximate vertex index ranges)
-# These map TRIBE's ~20k output vertices to broad cortical zones.
 ZONE_VERTEX_RANGES = {
-    "Frontal zone": (0, 3500),
-    "Action zone": (3500, 6500),        # motor / premotor cortex
-    "Attention zone": (6500, 10000),     # parietal / dorsal attention
-    "Speech & recognition zone": (10000, 14500),  # temporal / auditory
-    "Visual zone": (14500, 20484),       # occipital / visual cortex
 }
-ZONE_DESCRIPTIONS = {
-    "Frontal zone": "Evaluates meaning, intent, and what matters most in the frame.",
-    "Action zone": "Tracks movement, gestures, physical action, and body dynamics.",
-    "Attention zone": "Relates to how attention spreads across the frame and where the viewer is likely to look.",
-    "Speech & recognition zone": "Supports speech, sound, face, and familiar-object recognition.",
-    "Visual zone": "Processes the image itself: shape, contrast, color, motion, and detail.",
-}
-ZONE_COLORS = {
-    "Frontal zone": "#d4af37",
-    "Action zone": "#e07830",
-    "Attention zone": "#3cb371",
-    "Speech & recognition zone": "#cd5c5c",
-    "Visual zone": "#4682b4",
 }
@@ -63,37 +54,32 @@ def normalize(arr):
 @spaces.GPU
 def analyze_video(video_path):
-    """Run TRIBE v2 inference and extract zone signals + composite engagement."""
     m = get_model()
     df = m.get_events_dataframe(video_path=video_path)
     preds, segments = m.predict(events=df)
-    # preds shape: (n_timesteps, n_vertices)
     n_steps = preds.shape[0]
     if n_steps < 2:
         raise gr.Error("Video too short to analyze.")
-    # Build timestamps from segments (each step ~1s with 5s hemodynamic offset)
     if hasattr(segments, "start_time"):
         timestamps = [float(s.start_time) for s in segments]
     else:
         timestamps = list(np.arange(n_steps) * 1.0)
     duration = timestamps[-1] if timestamps else n_steps
-    # Extract per-zone activation over time
     zones = {}
     for zone_name, (start, end) in ZONE_VERTEX_RANGES.items():
         end = min(end, preds.shape[1])
         zone_signal = np.mean(preds[:, start:end], axis=1)
         zones[zone_name] = normalize(zone_signal)
-    # Composite engagement = weighted average of all zones
     engagement = normalize(
-        0.25 * zones["Frontal zone"]
-        + 0.20 * zones["Action zone"]
-        + 0.25 * zones["Attention zone"]
-        + 0.15 * zones["Speech & recognition zone"]
-        + 0.15 * zones["Visual zone"]
     )
     return timestamps, engagement, zones, duration
@@ -103,36 +89,51 @@ def analyze_video(video_path):
 def make_engagement_chart(timestamps, engagement):
     fig = go.Figure()
     fig.add_trace(go.Scatter(
         x=timestamps, y=engagement,
         mode="lines",
-        line=dict(color="#c8a84e", width=2),
         fill="tozeroy",
-        fillcolor="rgba(200,168,78,0.08)",
-        hovertemplate="Time: %{x:.1f}s<br>Response: %{y:.0f}<extra></extra>",
     ))
     fig.update_layout(
         template="plotly_dark",
-        paper_bgcolor="#1a1a1a",
-        plot_bgcolor="#1a1a1a",
-        margin=dict(l=40, r=20, t=10, b=40),
-        height=220,
-        xaxis=dict(title="", showgrid=False, color="#888"),
-        yaxis=dict(title="", showgrid=False, range=[0, 105], color="#888"),
     )
     return fig
 # ── Feedback generation ─────────────────────────────────────────────────────
-def zone_activity_label(mean_val):
-    if mean_val >= 70:
-        return "more active now"
-    if mean_val >= 40:
-        return "active now"
-    return "low activity"
 def generate_feedback(engagement, zones):
     avg = float(np.mean(engagement))
     mx = float(np.max(engagement))
@@ -146,91 +147,115 @@ def generate_feedback(engagement, zones):
     end_avg = float(np.mean(engagement[-hook_len:]))
     if avg >= 72:
-        strength = "Strong"
     elif avg >= 50:
-        strength = "Average"
     else:
-        strength = "Weak"
     zone_avgs = {name: float(np.mean(sig)) for name, sig in zones.items()}
     weakest_zone = min(zone_avgs, key=zone_avgs.get)
     recs = []
     if hook_avg < 55:
-        recs.append("**Weak hook.** The first few seconds don't grab attention. Start with your strongest visual or a surprising moment.")
     if end_avg < 45:
-        recs.append("**Flat ending.** Viewers drop off at the end. Add a payoff, callback, or cliffhanger in the final seconds.")
     if weak_pct > 30:
-        recs.append(f"**{weak_pct:.0f}% dead air.** Too many flat moments. Cut or speed up the low-engagement stretches.")
     zone_recs = {
-        "Action zone": "Add more movement, gestures, or physical action to keep the body-dynamics signal alive.",
-        "Attention zone": "Vary your shots more — the visual pace is too uniform. Add cuts, zooms, or angle changes.",
-        "Speech & recognition zone": "More face time or recognizable elements. Faces and familiar objects are powerful anchors.",
-        "Visual zone": "The visual texture is flat. Play with contrast, color grading, or compositional variety.",
-        "Frontal zone": "The content lacks a clear narrative thread. Give the viewer something to follow or anticipate.",
     }
     if zone_avgs[weakest_zone] < 55:
-        recs.append(f"**Weakest zone: {weakest_zone}.** {zone_recs.get(weakest_zone, '')}")
     if not recs:
-        recs.append("Looking solid. Minor gains possible from tightening pacing in the middle third.")
-    summary = f"""### Overall: {strength} (avg {avg:.0f}/100)
-| Metric | Value |
-|--------|-------|
-| Average | {avg:.0f} |
-| Peak | {mx:.0f} |
-| Floor | {mn:.0f} |
-| Hook (first 10%) | {hook_avg:.0f} |
-| Ending (last 10%) | {end_avg:.0f} |
-| Dead air | {weak_pct:.0f}% |
-### Recommendations
-"""
-    for r in recs:
-        summary += f"- {r}\n"
-    return summary, zone_avgs, strength
-# ── Zone bars HTML ──────────────────────────────────────────────────────────
 def make_zone_html(zone_avgs):
-    html = '<div style="display:flex;flex-direction:column;gap:16px;">'
-    for zone_name, desc in ZONE_DESCRIPTIONS.items():
         val = zone_avgs.get(zone_name, 0)
-        color = ZONE_COLORS[zone_name]
-        label = zone_activity_label(val)
-        html += f"""
-        <div style="background:#fafaf7;border-radius:12px;padding:16px 18px;border:1px solid #e8e5dd;">
-            <div style="display:flex;align-items:center;gap:8px;margin-bottom:4px;">
-                <span style="width:10px;height:10px;border-radius:50%;background:{color};display:inline-block;"></span>
-                <strong style="font-size:14px;">{zone_name}</strong>
             </div>
-            <div style="font-size:12px;color:#888;margin-bottom:2px;">{label}</div>
-            <div style="font-size:12px;color:#666;margin-bottom:8px;">{desc}</div>
-            <div style="background:#e8e5dd;border-radius:4px;height:6px;overflow:hidden;">
-                <div style="background:{color};height:100%;width:{val:.0f}%;border-radius:4px;"></div>
             </div>
         </div>"""
-    html += "</div>"
-    return html
 # ── Main handlers ───────────────────────────────────────────────────────────
 def process_video(video):
     if video is None:
-        raise gr.Error("Please upload a video.")
     timestamps, engagement, zones, duration = analyze_video(video)
     chart = make_engagement_chart(timestamps, engagement)
-    feedback, zone_avgs, strength = generate_feedback(engagement, zones)
     zone_html = make_zone_html(zone_avgs)
-    stats_text = f"Avg: {np.mean(engagement):.0f}  ·  Max: {np.max(engagement):.0f}  ·  Min: {np.min(engagement):.0f}  ·  Duration: {duration:.1f}s"
-    return chart, zone_html, feedback, stats_text
 def process_comparison(video_a, video_b):
@@ -243,93 +268,309 @@ def process_comparison(video_a, video_b):
     fig = go.Figure()
     fig.add_trace(go.Scatter(
         x=ts_a, y=eng_a, mode="lines", name="Video A",
-        line=dict(color="#c8a84e", width=2),
     ))
     fig.add_trace(go.Scatter(
         x=ts_b, y=eng_b, mode="lines", name="Video B",
-        line=dict(color="#4682b4", width=2),
     ))
     fig.update_layout(
         template="plotly_dark",
-        paper_bgcolor="#1a1a1a", plot_bgcolor="#1a1a1a",
-        margin=dict(l=40, r=20, t=10, b=40),
-        height=280,
-        xaxis=dict(title="Time (s)", showgrid=False, color="#888"),
-        yaxis=dict(title="Response", showgrid=False, range=[0, 105], color="#888"),
-        legend=dict(font=dict(color="#ccc")),
     )
     avg_a, avg_b = np.mean(eng_a), np.mean(eng_b)
     winner = "A" if avg_a > avg_b else "B"
     diff = abs(avg_a - avg_b)
-    summary = f"""### A/B Comparison
-| | Video A | Video B |
 |--|---------|---------|
-| **Avg response** | {avg_a:.0f} | {avg_b:.0f} |
 | **Peak** | {np.max(eng_a):.0f} | {np.max(eng_b):.0f} |
 | **Floor** | {np.min(eng_a):.0f} | {np.min(eng_b):.0f} |
 | **Duration** | {dur_a:.1f}s | {dur_b:.1f}s |
-**Video {winner}** has stronger predicted engagement (+{diff:.0f} avg).
 """
     return fig, summary
-# ── UI ──────────────────────────────────────────────────────────────────────
-HEADER_TEXT = (
-    "The model shows where the video seems to affect the viewer more or less strongly. "
-    "It is based on an average viewer model, not one real person. "
-    "Treat each timestamp as an approximate editing zone and inspect the nearby window around it."
-)
 theme = gr.themes.Base(
     primary_hue=gr.themes.colors.amber,
-    secondary_hue=gr.themes.colors.stone,
-    neutral_hue=gr.themes.colors.stone,
-    font=[gr.themes.GoogleFont("Inter"), "system-ui", "sans-serif"],
-    font_mono=[gr.themes.GoogleFont("JetBrains Mono"), "monospace"],
 ).set(
-    body_background_fill="#f5f3ee",
-    block_background_fill="#ffffff",
-    block_border_width="1px",
-    block_border_color="#e8e5dd",
-    block_radius="12px",
-    block_shadow="0 1px 3px rgba(0,0,0,0.04)",
-    input_background_fill="#fafaf7",
 )
-with gr.Blocks(theme=theme, title="TRIBE2 – Video Engagement Analyzer") as demo:
-    gr.Markdown(f"# TRIBE2\n\n{HEADER_TEXT}")
-    with gr.Tab("Workspace"):
         with gr.Row():
-            video_input = gr.Video(label="Video", height=400)
-            with gr.Column(scale=2):
-                stats = gr.Textbox(label="Stats", interactive=False, lines=1)
-                chart = gr.Plot(label="Predicted response over time")
             with gr.Column(scale=1):
-                zone_html = gr.HTML(label="Brain zones")
-        analyze_btn = gr.Button("Analyze", variant="primary", size="lg")
-        feedback = gr.Markdown(label="Feedback")
         analyze_btn.click(
             fn=process_video,
             inputs=[video_input],
-            outputs=[chart, zone_html, feedback, stats],
         )
-    with gr.Tab("Compare (A/B)"):
         with gr.Row():
             vid_a = gr.Video(label="Video A")
             vid_b = gr.Video(label="Video B")
         compare_btn = gr.Button("Compare", variant="primary", size="lg")
-        compare_chart = gr.Plot(label="Response comparison")
         compare_md = gr.Markdown()
         compare_btn.click(
@@ -338,24 +579,21 @@ with gr.Blocks(theme=theme, title="TRIBE2 – Video Engagement Analyzer") as dem
             outputs=[compare_chart, compare_md],
         )
-    with gr.Tab("Details"):
-        gr.Markdown("""### How it works
-**TRIBE2** uses Meta's [TRIBE v2](https://huggingface.co/facebook/tribev2) brain-encoding foundation model to predict fMRI-level cortical responses to your video.
-The model combines **V-JEPA2** (vision), **Wav2Vec-BERT 2.0** (audio), and **LLaMA 3.2** (language) to predict activation across ~20,000 cortical vertices on the fsaverage5 brain surface.
-We aggregate those vertex-level predictions into five broad cortical zones:
-- **Frontal zone** — prefrontal cortex: meaning, intent, narrative comprehension
-- **Action zone** — motor / premotor cortex: movement, gestures, body dynamics
-- **Attention zone** — parietal / dorsal attention: gaze, spatial focus, visual salience
-- **Speech & recognition zone** — temporal cortex: speech, sound, face and object recognition
-- **Visual zone** — occipital cortex: shape, contrast, color, motion, detail
-These are combined into a single **predicted response curve** that estimates where viewers engage or tune out.
-Note: predictions include a ~5s hemodynamic offset (inherent to fMRI). Treat timestamps as approximate editing zones.
 """)

         return _model
 # fsaverage5 cortical region mapping (approximate vertex index ranges)
 ZONE_VERTEX_RANGES = {
+    "Frontal": (0, 3500),
+    "Action": (3500, 6500),
+    "Attention": (6500, 10000),
+    "Speech": (10000, 14500),
+    "Visual": (14500, 20484),
 }
+ZONE_META = {
+    "Frontal":   {"desc": "Meaning, intent, narrative focus",    "color": "#D4A017", "icon": "01"},
+    "Action":    {"desc": "Movement, gestures, body dynamics",   "color": "#E8651A", "icon": "02"},
+    "Attention": {"desc": "Gaze direction, spatial salience",    "color": "#22C55E", "icon": "03"},
+    "Speech":    {"desc": "Voice, faces, object recognition",    "color": "#EF4444", "icon": "04"},
+    "Visual":    {"desc": "Shape, contrast, color, motion",      "color": "#3B82F6", "icon": "05"},
 }
 @spaces.GPU
 def analyze_video(video_path):
     m = get_model()
     df = m.get_events_dataframe(video_path=video_path)
     preds, segments = m.predict(events=df)
     n_steps = preds.shape[0]
     if n_steps < 2:
         raise gr.Error("Video too short to analyze.")
     if hasattr(segments, "start_time"):
         timestamps = [float(s.start_time) for s in segments]
     else:
         timestamps = list(np.arange(n_steps) * 1.0)
     duration = timestamps[-1] if timestamps else n_steps
     zones = {}
     for zone_name, (start, end) in ZONE_VERTEX_RANGES.items():
         end = min(end, preds.shape[1])
         zone_signal = np.mean(preds[:, start:end], axis=1)
         zones[zone_name] = normalize(zone_signal)
     engagement = normalize(
+        0.25 * zones["Frontal"]
+        + 0.20 * zones["Action"]
+        + 0.25 * zones["Attention"]
+        + 0.15 * zones["Speech"]
+        + 0.15 * zones["Visual"]
     )
     return timestamps, engagement, zones, duration
 def make_engagement_chart(timestamps, engagement):
     fig = go.Figure()
+    # Subtle grid area
     fig.add_trace(go.Scatter(
         x=timestamps, y=engagement,
         mode="lines",
+        line=dict(color="#D4A017", width=2.5),
         fill="tozeroy",
+        fillcolor="rgba(212,160,23,0.06)",
+        hovertemplate="<b>%{x:.1f}s</b><br>Response: %{y:.0f}/100<extra></extra>",
     ))
     fig.update_layout(
         template="plotly_dark",
+        paper_bgcolor="rgba(0,0,0,0)",
+        plot_bgcolor="rgba(0,0,0,0)",
+        margin=dict(l=45, r=15, t=15, b=40),
+        height=260,
+        xaxis=dict(
+            title="",
+            showgrid=True,
+            gridcolor="rgba(255,255,255,0.04)",
+            zeroline=False,
+            color="#666",
+            tickfont=dict(size=11, family="DM Mono, monospace"),
+        ),
+        yaxis=dict(
+            title="",
+            showgrid=True,
+            gridcolor="rgba(255,255,255,0.04)",
+            zeroline=False,
+            range=[0, 105],
+            color="#666",
+            tickfont=dict(size=11, family="DM Mono, monospace"),
+        ),
+        hoverlabel=dict(
+            bgcolor="#1a1a1a",
+            bordercolor="#D4A017",
+            font=dict(color="#fff", family="DM Mono, monospace", size=12),
+        ),
     )
     return fig
 # ── Feedback generation ─────────────────────────────────────────────────────
 def generate_feedback(engagement, zones):
     avg = float(np.mean(engagement))
     mx = float(np.max(engagement))
     end_avg = float(np.mean(engagement[-hook_len:]))
     if avg >= 72:
+        strength, strength_color = "STRONG", "#22C55E"
     elif avg >= 50:
+        strength, strength_color = "AVERAGE", "#D4A017"
     else:
+        strength, strength_color = "WEAK", "#EF4444"
     zone_avgs = {name: float(np.mean(sig)) for name, sig in zones.items()}
     weakest_zone = min(zone_avgs, key=zone_avgs.get)
     recs = []
     if hook_avg < 55:
+        recs.append("**Weak hook** — first seconds don't grab. Lead with your strongest visual or a surprise.")
     if end_avg < 45:
+        recs.append("**Flat ending** — viewers drop off. Add a payoff or cliffhanger in the final seconds.")
     if weak_pct > 30:
+        recs.append(f"**{weak_pct:.0f}% dead air** — too many flat stretches. Cut or accelerate the lows.")
     zone_recs = {
+        "Action": "More movement, gestures, or physical dynamics.",
+        "Attention": "Vary shots — add cuts, zooms, angle changes.",
+        "Speech": "More face time or recognizable elements.",
+        "Visual": "Flat texture. Try contrast, color grading, composition.",
+        "Frontal": "No clear thread. Give viewers something to follow.",
     }
     if zone_avgs[weakest_zone] < 55:
+        recs.append(f"**{weakest_zone} zone is dragging** — {zone_recs.get(weakest_zone, '')}")
     if not recs:
+        recs.append("Solid cut. Minor gains from tightening the middle third.")
+    rec_lines = "\n".join(f"- {r}" for r in recs)
+    return zone_avgs, strength, strength_color, avg, mx, mn, hook_avg, end_avg, weak_pct, rec_lines
+# ── HTML builders ───────────────────────────────────────────────────────────
+def make_stats_html(strength, strength_color, avg, mx, mn, hook_avg, end_avg, weak_pct):
+    return f"""
+    <div class="stats-grid">
+        <div class="stat-card stat-verdict" style="border-color: {strength_color}40;">
+            <div class="stat-label">Verdict</div>
+            <div class="stat-value" style="color: {strength_color};">{strength}</div>
+        </div>
+        <div class="stat-card">
+            <div class="stat-label">Average</div>
+            <div class="stat-value">{avg:.0f}</div>
+        </div>
+        <div class="stat-card">
+            <div class="stat-label">Peak</div>
+            <div class="stat-value">{mx:.0f}</div>
+        </div>
+        <div class="stat-card">
+            <div class="stat-label">Floor</div>
+            <div class="stat-value">{mn:.0f}</div>
+        </div>
+        <div class="stat-card">
+            <div class="stat-label">Hook</div>
+            <div class="stat-value">{hook_avg:.0f}</div>
+        </div>
+        <div class="stat-card">
+            <div class="stat-label">Dead air</div>
+            <div class="stat-value">{weak_pct:.0f}%</div>
+        </div>
+    </div>
+    """
 def make_zone_html(zone_avgs):
+    cards = ""
+    for zone_name, meta in ZONE_META.items():
         val = zone_avgs.get(zone_name, 0)
+        color = meta["color"]
+        idx = meta["icon"]
+        if val >= 70:
+            activity = "HIGH"
+        elif val >= 40:
+            activity = "MID"
+        else:
+            activity = "LOW"
+        cards += f"""
+        <div class="zone-card">
+            <div class="zone-header">
+                <span class="zone-idx" style="color: {color};">{idx}</span>
+                <span class="zone-name">{zone_name}</span>
+                <span class="zone-activity" style="color: {color};">{activity}</span>
             </div>
+            <div class="zone-desc">{meta['desc']}</div>
+            <div class="zone-bar-track">
+                <div class="zone-bar-fill" style="width:{val:.0f}%;background:{color};"></div>
             </div>
+            <div class="zone-val" style="color: {color};">{val:.0f}</div>
         </div>"""
+    return f'<div class="zone-stack">{cards}</div>'
 # ── Main handlers ───────────────────────────────────────────────────────────
 def process_video(video):
     if video is None:
+        raise gr.Error("Upload a video first.")
     timestamps, engagement, zones, duration = analyze_video(video)
     chart = make_engagement_chart(timestamps, engagement)
+    zone_avgs, strength, strength_color, avg, mx, mn, hook_avg, end_avg, weak_pct, recs = generate_feedback(engagement, zones)
+    stats_html = make_stats_html(strength, strength_color, avg, mx, mn, hook_avg, end_avg, weak_pct)
     zone_html = make_zone_html(zone_avgs)
+    return chart, stats_html, zone_html, recs
 def process_comparison(video_a, video_b):
     fig = go.Figure()
     fig.add_trace(go.Scatter(
         x=ts_a, y=eng_a, mode="lines", name="Video A",
+        line=dict(color="#D4A017", width=2.5),
     ))
     fig.add_trace(go.Scatter(
         x=ts_b, y=eng_b, mode="lines", name="Video B",
+        line=dict(color="#3B82F6", width=2.5),
     ))
     fig.update_layout(
         template="plotly_dark",
+        paper_bgcolor="rgba(0,0,0,0)",
+        plot_bgcolor="rgba(0,0,0,0)",
+        margin=dict(l=45, r=15, t=15, b=40),
+        height=300,
+        xaxis=dict(showgrid=True, gridcolor="rgba(255,255,255,0.04)", zeroline=False, color="#666",
+                   tickfont=dict(size=11, family="DM Mono, monospace")),
+        yaxis=dict(showgrid=True, gridcolor="rgba(255,255,255,0.04)", zeroline=False, range=[0, 105], color="#666",
+                   tickfont=dict(size=11, family="DM Mono, monospace")),
+        legend=dict(font=dict(color="#999", family="DM Mono, monospace", size=11),
+                    bgcolor="rgba(0,0,0,0)", orientation="h", y=1.08),
+        hoverlabel=dict(bgcolor="#1a1a1a", bordercolor="#D4A017",
+                        font=dict(color="#fff", family="DM Mono, monospace", size=12)),
     )
     avg_a, avg_b = np.mean(eng_a), np.mean(eng_b)
     winner = "A" if avg_a > avg_b else "B"
     diff = abs(avg_a - avg_b)
+    summary = f"""| | Video A | Video B |
 |--|---------|---------|
+| **Average** | {avg_a:.0f} | {avg_b:.0f} |
 | **Peak** | {np.max(eng_a):.0f} | {np.max(eng_b):.0f} |
 | **Floor** | {np.min(eng_a):.0f} | {np.min(eng_b):.0f} |
 | **Duration** | {dur_a:.1f}s | {dur_b:.1f}s |
+**Video {winner}** wins by **+{diff:.0f}** avg response.
 """
     return fig, summary
+# ── Custom CSS ──────────────────────────────────────────────────────────────
+CSS = """
+@import url('https://fonts.googleapis.com/css2?family=DM+Mono:wght@300;400;500&family=Syne:wght@400;600;700;800&display=swap');
+/* Global overrides */
+.gradio-container {
+    max-width: 1400px !important;
+    font-family: 'Syne', sans-serif !important;
+    background: #0A0A0B !important;
+}
+.dark .gradio-container { background: #0A0A0B !important; }
+/* Header */
+.hero-header {
+    text-align: center;
+    padding: 48px 20px 32px;
+    position: relative;
+}
+.hero-header::before {
+    content: '';
+    position: absolute;
+    top: 0; left: 50%;
+    transform: translateX(-50%);
+    width: 400px; height: 400px;
+    background: radial-gradient(circle, rgba(212,160,23,0.08) 0%, transparent 70%);
+    pointer-events: none;
+}
+.hero-title {
+    font-family: 'Syne', sans-serif;
+    font-size: 56px;
+    font-weight: 800;
+    letter-spacing: -2px;
+    color: #FAFAFA;
+    margin: 0;
+    line-height: 1;
+}
+.hero-title span { color: #D4A017; }
+.hero-sub {
+    font-family: 'DM Mono', monospace;
+    font-size: 13px;
+    color: #666;
+    margin-top: 12px;
+    letter-spacing: 0.5px;
+    line-height: 1.6;
+    max-width: 600px;
+    margin-left: auto;
+    margin-right: auto;
+}
+/* Tabs */
+.tab-nav { border: none !important; }
+.tab-nav button {
+    font-family: 'DM Mono', monospace !important;
+    font-size: 12px !important;
+    letter-spacing: 1px !important;
+    text-transform: uppercase !important;
+    color: #555 !important;
+    border: none !important;
+    background: transparent !important;
+    padding: 10px 20px !important;
+}
+.tab-nav button.selected {
+    color: #D4A017 !important;
+    border-bottom: 2px solid #D4A017 !important;
+    background: transparent !important;
+}
+/* Stat cards */
+.stats-grid {
+    display: grid;
+    grid-template-columns: repeat(6, 1fr);
+    gap: 10px;
+    margin-top: 8px;
+}
+.stat-card {
+    background: rgba(255,255,255,0.03);
+    border: 1px solid rgba(255,255,255,0.06);
+    border-radius: 10px;
+    padding: 14px 16px;
+    text-align: center;
+}
+.stat-card.stat-verdict {
+    border-width: 2px;
+}
+.stat-label {
+    font-family: 'DM Mono', monospace;
+    font-size: 10px;
+    text-transform: uppercase;
+    letter-spacing: 1.5px;
+    color: #555;
+    margin-bottom: 6px;
+}
+.stat-value {
+    font-family: 'Syne', sans-serif;
+    font-size: 22px;
+    font-weight: 700;
+    color: #FAFAFA;
+}
+/* Zone cards */
+.zone-stack {
+    display: flex;
+    flex-direction: column;
+    gap: 8px;
+}
+.zone-card {
+    background: rgba(255,255,255,0.02);
+    border: 1px solid rgba(255,255,255,0.06);
+    border-radius: 10px;
+    padding: 14px 16px;
+}
+.zone-header {
+    display: flex;
+    align-items: center;
+    gap: 10px;
+    margin-bottom: 4px;
+}
+.zone-idx {
+    font-family: 'DM Mono', monospace;
+    font-size: 11px;
+    font-weight: 500;
+    opacity: 0.7;
+}
+.zone-name {
+    font-family: 'Syne', sans-serif;
+    font-size: 14px;
+    font-weight: 600;
+    color: #E0E0E0;
+    flex: 1;
+}
+.zone-activity {
+    font-family: 'DM Mono', monospace;
+    font-size: 10px;
+    letter-spacing: 1px;
+    font-weight: 500;
+}
+.zone-desc {
+    font-family: 'DM Mono', monospace;
+    font-size: 11px;
+    color: #555;
+    margin-bottom: 10px;
+    line-height: 1.4;
+}
+.zone-bar-track {
+    height: 4px;
+    background: rgba(255,255,255,0.06);
+    border-radius: 2px;
+    overflow: hidden;
+    margin-bottom: 6px;
+}
+.zone-bar-fill {
+    height: 100%;
+    border-radius: 2px;
+    transition: width 0.8s cubic-bezier(0.22, 1, 0.36, 1);
+}
+.zone-val {
+    font-family: 'DM Mono', monospace;
+    font-size: 12px;
+    font-weight: 500;
+    text-align: right;
+}
+/* Plot container */
+.plot-container {
+    border-radius: 12px;
+    overflow: hidden;
+}
+/* Blocks overrides */
+.block { background: transparent !important; border: none !important; box-shadow: none !important; }
+.label-wrap { display: none !important; }
+.prose { color: #999 !important; }
+.prose h3 { color: #E0E0E0 !important; font-family: 'Syne', sans-serif !important; }
+.prose strong { color: #FAFAFA !important; }
+.prose table { border-color: rgba(255,255,255,0.08) !important; }
+.prose th, .prose td { border-color: rgba(255,255,255,0.08) !important; color: #999 !important; }
+.prose th { color: #ccc !important; }
+.prose a { color: #D4A017 !important; }
+/* Video component */
+.video-container { border-radius: 12px; overflow: hidden; }
+/* Button */
+button.primary {
+    background: #D4A017 !important;
+    border: none !important;
+    color: #0A0A0B !important;
+    font-family: 'Syne', sans-serif !important;
+    font-weight: 700 !important;
+    letter-spacing: 0.5px !important;
+    border-radius: 10px !important;
+}
+button.primary:hover {
+    background: #E8B12A !important;
+}
+/* Markdown feedback section */
+.feedback-section .prose {
+    font-family: 'DM Mono', monospace !important;
+    font-size: 13px !important;
+    line-height: 1.7 !important;
+}
+/* Responsive */
+@media (max-width: 768px) {
+    .stats-grid { grid-template-columns: repeat(3, 1fr); }
+    .hero-title { font-size: 36px; }
+}
+"""
+# ── UI ──────────────────────────────────────────────────────────────────────
 theme = gr.themes.Base(
     primary_hue=gr.themes.colors.amber,
+    neutral_hue=gr.themes.colors.zinc,
 ).set(
+    body_background_fill="#0A0A0B",
+    body_text_color="#999999",
+    block_background_fill="transparent",
+    block_border_width="0px",
+    block_shadow="none",
+    input_background_fill="#111113",
+    input_border_color="rgba(255,255,255,0.08)",
+    input_border_width="1px",
+    button_primary_background_fill="#D4A017",
+    button_primary_text_color="#0A0A0B",
 )
+with gr.Blocks(theme=theme, css=CSS, title="TRIBE2") as demo:
+    gr.HTML("""
+    <div class="hero-header">
+        <h1 class="hero-title">TRIBE<span>2</span></h1>
+        <p class="hero-sub">
+            Predict where your video loses the viewer. Powered by Meta's brain-encoding
+            model — maps cortical response across 20,000 vertices in real time.
+        </p>
+    </div>
+    """)
+    with gr.Tab("Analyze"):
+        video_input = gr.Video(label="", height=360)
+        analyze_btn = gr.Button("Run analysis", variant="primary", size="lg")
+        stats_html = gr.HTML()
+        chart = gr.Plot()
         with gr.Row():
+            with gr.Column(scale=2, elem_classes=["feedback-section"]):
+                feedback = gr.Markdown()
             with gr.Column(scale=1):
+                zone_html = gr.HTML()
         analyze_btn.click(
             fn=process_video,
             inputs=[video_input],
+            outputs=[chart, stats_html, zone_html, feedback],
         )
+    with gr.Tab("A/B Compare"):
         with gr.Row():
             vid_a = gr.Video(label="Video A")
             vid_b = gr.Video(label="Video B")
         compare_btn = gr.Button("Compare", variant="primary", size="lg")
+        compare_chart = gr.Plot()
         compare_md = gr.Markdown()
         compare_btn.click(
             outputs=[compare_chart, compare_md],
         )
+    with gr.Tab("How it works"):
+        gr.Markdown("""### The model
+**TRIBE v2** is Meta's brain-encoding foundation model. It predicts fMRI-level cortical
+activation from video, audio, and text using three extractors:
+V-JEPA2 (vision) + Wav2Vec-BERT 2.0 (audio) + LLaMA 3.2 (language)
+The output is a prediction across ~20,000 cortical vertices on the fsaverage5 surface.
+We aggregate those into five zones and derive a composite engagement signal.
+Predictions carry a ~5s hemodynamic offset inherent to fMRI.
+Treat each timestamp as an approximate editing window.
+[Model card](https://huggingface.co/facebook/tribev2)
 """)