MHMisinfo

Sleeping

App Files Files Community

rocky250 commited on Apr 19

Commit

44bafbe

verified ·

1 Parent(s): dceedff

Update app.py

Browse files

Files changed (1) hide show

app.py +786 -422

app.py CHANGED Viewed

@@ -1,440 +1,804 @@
 """
-MHMisinfo — Mental Health Misinformation Detector
-Gradio Space: paste a YouTube URL → fetch metadata + transcripts → run 4-stream SeTa-Attention model → show verdict
 """
-import os, re, json, sys, warnings
-warnings.filterwarnings("ignore")
-import numpy as np
-import torch
-import torch.nn as nn
-import torch.nn.functional as F
-import gradio as gr
-from dataclasses import dataclass
-from typing import Dict, List, Optional
-from huggingface_hub import hf_hub_download
-# ── YouTube helpers ────────────────────────────────────────────────────────────
-from googleapiclient.discovery import build as yt_build
-from youtube_transcript_api import YouTubeTranscriptApi, NoTranscriptFound, TranscriptsDisabled
-# ── Model + Data (inline, no src/ import needed) ──────────────────────────────
-import re as _re
-TAG_SPLIT_RE = _re.compile(r"[\s,]+")
-TEXT_RE = _re.compile(r"[A-Za-z0-9']+")
-@dataclass
-class Vocab:
-    token_to_idx: Dict[str, int]
-    idx_to_token: List[str]
-    pad_token: str = "<pad>"
-    unk_token: str = "<unk>"
-    @property
-    def pad_idx(self): return self.token_to_idx[self.pad_token]
-    @property
-    def unk_idx(self): return self.token_to_idx[self.unk_token]
-    def encode(self, tokens, max_len):
-        ids = [self.token_to_idx.get(t, self.unk_idx) for t in tokens]
-        if len(ids) >= max_len: return ids[:max_len]
-        return ids + [self.pad_idx] * (max_len - len(ids))
-    @staticmethod
-    def from_serializable(obj):
-        return Vocab(token_to_idx=obj["token_to_idx"],
-                     idx_to_token=obj["idx_to_token"],
-                     pad_token=obj.get("pad_token","<pad>"),
-                     unk_token=obj.get("unk_token","<unk>"))
-def tokenize_tags(text):
-    if not isinstance(text, str): return []
-    cleaned = text.replace("#"," ")
-    return [t for t in TAG_SPLIT_RE.split(cleaned.lower()) if t]
-def tokenize_text(text):
-    if not isinstance(text, str): return []
-    return [t.lower() for t in TEXT_RE.findall(text)]
-# ── Model Architecture (identical to src/model.py) ────────────────────────────
-class SeTaAttention(nn.Module):
-    def __init__(self, input_dim, attn_dim, dropout=0.1):
-        super().__init__()
-        self.proj = nn.Linear(input_dim, attn_dim)
-        self.sem_query = nn.Parameter(torch.randn(attn_dim))
-        self.task_query = nn.Parameter(torch.randn(attn_dim))
-        self.out = nn.Linear(input_dim * 2, input_dim)
-        self.dropout = nn.Dropout(dropout)
-    def _attend(self, h, query, mask):
-        proj = torch.tanh(self.proj(h))
-        scores = torch.matmul(proj, query)
-        scores = scores.masked_fill(~mask, -1e9)
-        weights = torch.softmax(scores, dim=1)
-        return torch.sum(h * weights.unsqueeze(-1), dim=1)
-    def forward(self, h, mask):
-        sem = self._attend(h, self.sem_query, mask)
-        task = self._attend(h, self.task_query, mask)
-        return self.dropout(torch.tanh(self.out(torch.cat([sem, task], dim=-1))))
-class StreamEncoder(nn.Module):
-    def __init__(self, vocab_size, emb_dim, hidden_dim, attn_dim, proj_dim, mlp_dim, dropout=0.2):
-        super().__init__()
-        self.embedding = nn.Embedding(vocab_size, emb_dim, padding_idx=0)
-        self.gru = nn.GRU(emb_dim, hidden_dim, batch_first=True, bidirectional=True)
-        self.attn = SeTaAttention(hidden_dim*2, attn_dim, dropout=dropout)
-        self.proj = nn.Sequential(
-            nn.Linear(hidden_dim*2, mlp_dim), nn.ReLU(), nn.Dropout(dropout), nn.Linear(mlp_dim, proj_dim)
         )
-        self.dropout = nn.Dropout(dropout)
-    def forward(self, x):
-        mask = x != 0
-        emb = self.dropout(self.embedding(x))
-        h, _ = self.gru(emb)
-        attn_vec = self.attn(h, mask)
-        proj = self.dropout(torch.tanh(self.proj(attn_vec)))
-        return attn_vec, proj
-class MultiStreamModel(nn.Module):
-    def __init__(self, vocab_sizes, num_classes, emb_dim=128, hidden_dim=128, attn_dim=128,
-                 proj_dim=128, mlp_dim=256, dropout=0.2, include_tags_ccm=False, per_modality_trust=False):
-        super().__init__()
-        self.include_tags_ccm = include_tags_ccm
-        self.per_modality_trust = per_modality_trust
-        self.num_classes = num_classes
-        h_dim = hidden_dim * 2
-        self.encoders = nn.ModuleDict({
-            "tags": StreamEncoder(vocab_sizes["tags"], emb_dim, hidden_dim, attn_dim, proj_dim, mlp_dim, dropout),
-            "text": StreamEncoder(vocab_sizes["text"], emb_dim, hidden_dim, attn_dim, proj_dim, mlp_dim, dropout),
-            "audio_transcript": StreamEncoder(vocab_sizes["audio_transcript"], emb_dim, hidden_dim, attn_dim, proj_dim, mlp_dim, dropout),
-            "video_transcript": StreamEncoder(vocab_sizes["video_transcript"], emb_dim, hidden_dim, attn_dim, proj_dim, mlp_dim, dropout),
-        })
-        ccm_dim = 3 + (3 if include_tags_ccm else 0)
-        trust_in = h_dim + ccm_dim
-        if per_modality_trust:
-            self.trust_mlps = nn.ModuleDict({k: self._make_mlp(trust_in, mlp_dim, 1, dropout)
-                                              for k in ["text","audio_transcript","video_transcript","tags"]})
-            self.trust_mlp = None
         else:
-            self.trust_mlp = self._make_mlp(trust_in, mlp_dim, 1, dropout)
-            self.trust_mlps = None
-        self.uncertainty_mlp = self._make_mlp(h_dim, mlp_dim, 1, dropout)
-        classifier_in = proj_dim * 5 + ccm_dim + 4 + 4
-        out_dim = 1 if num_classes == 2 else num_classes
-        self.mlp = nn.Sequential(
-            nn.Linear(classifier_in, mlp_dim), nn.ReLU(), nn.Dropout(dropout), nn.Linear(mlp_dim, out_dim)
         )
-    @staticmethod
-    def _make_mlp(in_dim, hidden_dim, out_dim, dropout):
-        return nn.Sequential(nn.Linear(in_dim, hidden_dim), nn.ReLU(), nn.Dropout(dropout), nn.Linear(hidden_dim, out_dim))
-    @staticmethod
-    def _cosine(a, b):
-        return F.cosine_similarity(a, b, dim=-1, eps=1e-8).unsqueeze(-1)
-    def _compute_ccm(self, h_text, h_audio, h_video, h_tags):
-        sims = [self._cosine(h_text, h_audio), self._cosine(h_text, h_video), self._cosine(h_audio, h_video)]
-        if self.include_tags_ccm:
-            sims += [self._cosine(h_text, h_tags), self._cosine(h_audio, h_tags), self._cosine(h_video, h_tags)]
-        return torch.cat(sims, dim=-1)
-    def _trust_logit(self, key, h_i, ccm):
-        x = torch.cat([h_i, ccm], dim=-1)
-        return self.trust_mlps[key](x) if self.per_modality_trust else self.trust_mlp(x)
-    def forward(self, batch, return_details=False):
-        h_tags, p_tags = self.encoders["tags"](batch["tags"])
-        h_text, p_text = self.encoders["text"](batch["text"])
-        h_audio, p_audio = self.encoders["audio_transcript"](batch["audio_transcript"])
-        h_video, p_video = self.encoders["video_transcript"](batch["video_transcript"])
-        ccm = self._compute_ccm(h_text, h_audio, h_video, h_tags)
-        trust_logits = torch.cat([self._trust_logit("text", h_text, ccm),
-                                   self._trust_logit("audio_transcript", h_audio, ccm),
-                                   self._trust_logit("video_transcript", h_video, ccm),
-                                   self._trust_logit("tags", h_tags, ccm)], dim=-1)
-        trust_w = torch.softmax(trust_logits, dim=-1)
-        sigmas = torch.cat([F.softplus(self.uncertainty_mlp(h)) + 1e-6
-                             for h in [h_text, h_audio, h_video, h_tags]], dim=-1)
-        confidence = 1.0 / sigmas
-        fusion_w = trust_w * confidence
-        fusion_w = fusion_w / (fusion_w.sum(dim=-1, keepdim=True) + 1e-8)
-        proj_stack = torch.stack([p_text, p_audio, p_video, p_tags], dim=1)
-        fused = torch.sum(proj_stack * fusion_w.unsqueeze(-1), dim=1)
-        combined = torch.cat([p_text, p_audio, p_video, p_tags, fused, ccm, trust_w, sigmas], dim=-1)
-        logits = self.mlp(combined)
-        if not return_details: return logits
-        return logits, {"ccm": ccm, "trust_w": trust_w, "sigma": sigmas, "fusion_w": fusion_w}
-# ── Globals ────────────────────────────────────────────────────────────────────
-_model = None
-_vocabs = None
-_max_lens = None
-_config = None
-_device = "cpu"
-REPO_ID = "rocky250/MHMisinfo"
-YT_API_KEY = os.environ.get("YT_API_KEY", "")
-def _load_model():
-    global _model, _vocabs, _max_lens, _config
-    if _model is not None:
-        return
-    ckpt_path = hf_hub_download(repo_id=REPO_ID, filename="best_multimodal.pt")
-    ckpt = torch.load(ckpt_path, map_location=_device, weights_only=False)
-    vocabs_raw = ckpt["vocabs"]
-    _vocabs = {k: Vocab.from_serializable(v) for k, v in vocabs_raw.items()}
-    _max_lens = ckpt["max_lens"]
-    _config = ckpt["config"]
-    num_classes = ckpt["num_classes"]
-    _model = MultiStreamModel(
-        vocab_sizes={k: len(v.token_to_idx) for k, v in _vocabs.items()},
-        num_classes=num_classes,
-        emb_dim=_config["emb_dim"], hidden_dim=_config["hidden_dim"],
-        attn_dim=_config["attn_dim"], proj_dim=_config["proj_dim"],
-        mlp_dim=_config["mlp_dim"], dropout=_config["dropout"],
-        include_tags_ccm=_config.get("include_tags_ccm", False),
-        per_modality_trust=_config.get("per_modality_trust", False),
-    ).to(_device)
-    _model.load_state_dict(ckpt["model_state"])
-    _model.eval()
-def _extract_video_id(url: str) -> Optional[str]:
-    patterns = [
-        r"(?:v=|youtu\.be/|embed/|shorts/)([A-Za-z0-9_-]{11})",
-    ]
-    for p in patterns:
-        m = re.search(p, url)
-        if m: return m.group(1)
-    return None
-def _fetch_yt_metadata(video_id: str):
-    """Fetch title, description, tags via YouTube Data API v3."""
-    if not YT_API_KEY:
-        return None, None, None, "⚠️ No YouTube API key set. Set the YT_API_KEY secret in Space settings."
-    try:
-        yt = yt_build("youtube", "v3", developerKey=YT_API_KEY, cache_discovery=False)
-        resp = yt.videos().list(part="snippet", id=video_id).execute()
-        if not resp.get("items"):
-            return None, None, None, "❌ Video not found or unavailable."
-        snippet = resp["items"][0]["snippet"]
-        title = snippet.get("title", "")
-        desc = snippet.get("description", "")
-        tags = " ".join(snippet.get("tags", []))
-        return title, desc, tags, None
-    except Exception as e:
-        return None, None, None, f"❌ YouTube API error: {e}"
-def _fetch_transcript(video_id: str, field: str) -> str:
-    """Fetch transcript text (same text used for both audio & video transcript streams)."""
-    try:
-        transcript_list = YouTubeTranscriptApi.get_transcript(video_id, languages=["en"])
-        return " ".join(t["text"] for t in transcript_list)
-    except (NoTranscriptFound, TranscriptsDisabled):
-        return ""
-    except Exception:
-        return ""
-def _encode_single(text: str, tags: str, audio_t: str, video_t: str) -> Dict[str, torch.Tensor]:
-    _load_model()
-    streams = {
-        "tags": tokenize_tags(tags),
-        "text": tokenize_text(text),
-        "audio_transcript": tokenize_text(audio_t),
-        "video_transcript": tokenize_text(video_t),
-    }
-    batch = {}
-    for s, tokens in streams.items():
-        ids = _vocabs[s].encode(tokens, _max_lens[s])
-        batch[s] = torch.tensor([ids], dtype=torch.long).to(_device)
-    return batch
-def _run_inference(text, tags, audio_t, video_t):
-    batch = _encode_single(text, tags, audio_t, video_t)
-    with torch.no_grad():
-        logits, details = _model(batch, return_details=True)
-    prob = float(torch.sigmoid(logits).squeeze())
-    pred = int(prob >= 0.5)
-    trust = details["trust_w"][0].cpu().numpy().tolist()
-    sigma = details["sigma"][0].cpu().numpy().tolist()
-    ccm = details["ccm"][0].cpu().numpy().tolist()
-    return prob, pred, trust, sigma, ccm
-# ── Gradio logic ───────────────────────────────────────────────────────────────
-MODALITIES = ["text", "audio_transcript", "video_transcript", "tags"]
-CCM_LABELS_3 = ["text↔audio", "text↔video", "audio↔video"]
-CCM_LABELS_6 = CCM_LABELS_3 + ["text↔tags", "audio↔tags", "video↔tags"]
-LABEL_COLORS = {0: "#22c55e", 1: "#ef4444"}
-LABEL_NAMES = {0: "✅ Credible / Not Misinformation", 1: "⚠️ Potential Misinformation"}
-def _bar(value: float, color: str) -> str:
-    pct = int(value * 100)
-    return (
-        f'<div style="background:#e5e7eb;border-radius:6px;height:14px;width:100%;margin:2px 0">'
-        f'<div style="background:{color};width:{pct}%;height:100%;border-radius:6px;transition:width 0.4s"></div>'
-        f'</div><small style="color:#6b7280">{value:.3f}</small>'
     )
-def analyze_url(url: str):
-    if not url.strip():
-        return [gr.update(visible=False)] * 4 + ["Please enter a YouTube URL."]
-    video_id = _extract_video_id(url.strip())
-    if not video_id:
-        return [gr.update(visible=False)] * 4 + ["❌ Could not extract a valid YouTube video ID from that URL."]
-    # Fetch metadata
-    title, desc, tags, err = _fetch_yt_metadata(video_id)
-    if err:
-        return [gr.update(visible=False)] * 4 + [err]
-    # Fetch transcript
-    transcript = _fetch_transcript(video_id, "transcript")
-    text_field = f"{title} {desc}".strip()
-    # Run model
-    try:
-        _load_model()
-        prob, pred, trust, sigma, ccm = _run_inference(text_field, tags, transcript, transcript)
-    except Exception as e:
-        return [gr.update(visible=False)] * 4 + [f"❌ Model error: {e}"]
-    # ── Verdict card ──────────────────────────────────────────────────────────
-    color = LABEL_COLORS[pred]
-    label_text = LABEL_NAMES[pred]
-    conf_pct = int(prob * 100) if pred == 1 else int((1 - prob) * 100)
-    verdict_html = f"""
-    <div style="border:2px solid {color};border-radius:12px;padding:20px 24px;background:{color}18;margin-bottom:8px">
-      <div style="font-size:1.5rem;font-weight:700;color:{color}">{label_text}</div>
-      <div style="font-size:2.5rem;font-weight:800;color:{color};margin:6px 0">{conf_pct}% confident</div>
-      <div style="color:#6b7280;font-size:0.9rem">Raw misinfo probability: <b>{prob:.4f}</b></div>
-    </div>
-    <div style="background:#f9fafb;border-radius:10px;padding:14px 16px;margin-top:6px">
-      <b>🎬 Video:</b> <a href="{url}" target="_blank">{title}</a><br>
-      <b>🏷️ Tags:</b> {tags[:120] + '…' if len(tags)>120 else (tags or '(none)')}<br>
-      <b>📝 Transcript:</b> {('Available (' + str(len(transcript.split())) + ' words)') if transcript else '(not available — model used title/description only)'}
-    </div>
-    """
-    # ── Modality trust weights ─────────────────────────────────────────────────
-    trust_html = "<h4 style='margin-bottom:8px'>Modality Trust Weights</h4>"
-    trust_html += "<small style='color:#6b7280'>How much the model relied on each stream</small><br><br>"
-    for m, t in zip(MODALITIES, trust):
-        trust_html += f"<b>{m.replace('_',' ').title()}</b>{_bar(t, '#3b82f6')}"
-    # ── Uncertainty (sigma) ───────────────────────────────────────────────────
-    sigma_html = "<h4 style='margin-bottom:8px'>Uncertainty (σ)</h4>"
-    sigma_html += "<small style='color:#6b7280'>Higher = encoder less certain about this stream</small><br><br>"
-    max_s = max(sigma) if sigma else 1
-    for m, s in zip(MODALITIES, sigma):
-        sigma_html += f"<b>{m.replace('_',' ').title()}</b>{_bar(s/max_s, '#f59e0b')}"
-    # ── CCM ───────────────────────────────────────────────────────────────────
-    ccm_labels = CCM_LABELS_6 if len(ccm) == 6 else CCM_LABELS_3
-    ccm_html = "<h4 style='margin-bottom:8px'>Cross-Channel Consistency (CCM)</h4>"
-    ccm_html += "<small style='color:#6b7280'>Cosine similarity between modality representations (−1 to 1)</small><br><br>"
-    for lbl, val in zip(ccm_labels, ccm):
-        norm = (val + 1) / 2  # map [-1,1] → [0,1]
-        ccm_html += f"<b>{lbl}</b>{_bar(norm, '#8b5cf6')}<small style='color:#9ca3af'>raw: {val:.3f}</small><br>"
-    status = "✅ Analysis complete."
-    return (
-        gr.update(value=verdict_html, visible=True),
-        gr.update(value=trust_html, visible=True),
-        gr.update(value=sigma_html, visible=True),
-        gr.update(value=ccm_html, visible=True),
-        status,
     )
-# ── UI ─────────────────────────────────────────────────────────────────────────
-CSS = """
-#header { text-align:center; margin-bottom: 20px; }
-#header h1 { font-size: 2rem; font-weight: 800; margin: 0; }
-#header p  { color: #6b7280; margin: 4px 0 0; }
-.panel { border-radius:12px !important; }
-footer { display:none !important; }
-"""
-with gr.Blocks(css=CSS, title="MHMisinfo — Mental Health Misinformation Detector") as demo:
-    gr.HTML("""
-    <div id="header">
-      <h1>🧠 MHMisinfo</h1>
-      <p>4-Stream SeTa-Attention model for detecting mental health misinformation on YouTube</p>
-      <p style="font-size:0.8rem;color:#9ca3af">
-        Based on: <i>"Supporters and Skeptics: LLM-based Analysis of Engagement with Mental Health (Mis)Information Content on Video-sharing Platforms"</i>
-      </p>
-    </div>
-    """)
-    with gr.Row():
-        with gr.Column(scale=3):
-            url_input = gr.Textbox(
-                placeholder="Paste a YouTube URL here, e.g. https://www.youtube.com/watch?v=...",
-                label="YouTube Video URL",
-                lines=1,
             )
-        with gr.Column(scale=1, min_width=120):
-            analyze_btn = gr.Button("🔍 Analyze", variant="primary", size="lg")
-    status_box = gr.Textbox(label="Status", interactive=False, lines=1, visible=True)
-    with gr.Row():
-        verdict_out = gr.HTML(visible=False, elem_classes="panel")
-    with gr.Row():
-        with gr.Column():
-            trust_out = gr.HTML(visible=False, elem_classes="panel")
-        with gr.Column():
-            sigma_out = gr.HTML(visible=False, elem_classes="panel")
-    with gr.Row():
-        ccm_out = gr.HTML(visible=False, elem_classes="panel")
-    gr.HTML("""
-    <hr style="margin:28px 0 16px">
-    <details>
-      <summary style="cursor:pointer;font-weight:600;color:#374151">ℹ️ How it works</summary>
-      <div style="padding:12px 0;color:#6b7280;font-size:0.9rem">
-        <b>4 streams:</b> video title+description (text), hashtags/tags, audio transcript, video transcript.<br>
-        Each stream is encoded by a BiGRU with SeTa dual-attention. The model computes:<br>
-        &nbsp;• <b>CCM</b> (Cross-Channel Consistency Matrix) — cosine similarity between stream representations<br>
-        &nbsp;• <b>Trust weights</b> — learned per-stream reliability given CCM context<br>
-        &nbsp;• <b>Uncertainty (σ)</b> — calibrated confidence per stream via DMTE<br>
-        These are fused into a single classification head.<br><br>
-        <b>Note:</b> The model was trained on short YouTube mental health videos. Results on other content types may vary.
-        ROC-AUC on held-out test: <b>0.967</b>. Positive-class F1: <b>0.828</b>.
-      </div>
-    </details>
-    """)
-    analyze_btn.click(
-        fn=analyze_url,
-        inputs=[url_input],
-        outputs=[verdict_out, trust_out, sigma_out, ccm_out, status_box],
-    )
-    url_input.submit(
-        fn=analyze_url,
-        inputs=[url_input],
-        outputs=[verdict_out, trust_out, sigma_out, ccm_out, status_box],
-    )
-demo.launch()

 """
+app.py — Video Verifier & Sentiment Analyzer
+Professional dark-mode Streamlit application.
 """
+import os
+import time
+import streamlit as st
+import pandas as pd
+from fetcher import (
+    extract_video_id,
+    fetch_video_metadata,
+    fetch_transcript,
+    fetch_comments,
+    search_videos_by_title,
+)
+from analyzer import (
+    detect_misinformation,
+    analyze_sentiment_batch,
+    sentiment_summary,
+    extract_keywords,
+    sentiment_weighted_keywords,
+)
+from charts import (
+    misinfo_gauge,
+    sentiment_donut,
+    keyword_bar,
+    stream_trust_bars,
+    sentiment_timeline,
+    keyword_comparison,
+)
+# ══════════════════════════════════════════════════════════════════════════════
+#  PAGE CONFIG & GLOBAL STYLES
+# ══════════════════════════════════════════════════════════════════════════════
+st.set_page_config(
+    page_title="VideoVerifier — MHMisinfo",
+    page_icon="🔬",
+    layout="wide",
+    initial_sidebar_state="expanded",
+)
+st.markdown("""
+<style>
+/* ── Google Fonts ── */
+@import url('https://fonts.googleapis.com/css2?family=DM+Mono:wght@400;500&family=Syne:wght@400;600;700;800&family=IBM+Plex+Sans:wght@300;400;500&display=swap');
+/* ── Root palette ── */
+:root {
+    --bg:         #0d0f14;
+    --card:       #13161e;
+    --border:     #1e2330;
+    --text:       #e8eaf0;
+    --dim:        #5a6070;
+    --cyan:       #00d4ff;
+    --green:      #00e5a0;
+    --red:        #ff4757;
+    --amber:      #ffb347;
+    --purple:     #b388ff;
+    --blue:       #4a8eff;
+}
+/* ── App shell ── */
+html, body, [class*="css"] {
+    background-color: var(--bg) !important;
+    color: var(--text) !important;
+    font-family: 'IBM Plex Sans', sans-serif !important;
+}
+.stApp { background: var(--bg) !important; }
+/* ── Hide Streamlit chrome ── */
+#MainMenu, footer, header { visibility: hidden; }
+.block-container { padding: 1.5rem 2rem !important; max-width: 1400px; }
+/* ── Sidebar ── */
+section[data-testid="stSidebar"] {
+    background: var(--card) !important;
+    border-right: 1px solid var(--border) !important;
+}
+section[data-testid="stSidebar"] * { color: var(--text) !important; }
+/* ── Inputs ── */
+input, textarea, select, .stTextInput input {
+    background: #1a1d27 !important;
+    border: 1px solid var(--border) !important;
+    color: var(--text) !important;
+    border-radius: 8px !important;
+    font-family: 'DM Mono', monospace !important;
+    font-size: 0.88rem !important;
+}
+input:focus, textarea:focus {
+    border-color: var(--cyan) !important;
+    box-shadow: 0 0 0 2px rgba(0,212,255,0.15) !important;
+}
+/* ── Buttons ── */
+.stButton > button {
+    background: linear-gradient(135deg, #00d4ff22, #4a8eff22) !important;
+    border: 1px solid var(--cyan) !important;
+    color: var(--cyan) !important;
+    border-radius: 8px !important;
+    font-family: 'DM Mono', monospace !important;
+    font-size: 0.85rem !important;
+    letter-spacing: 0.05em !important;
+    padding: 0.45rem 1.2rem !important;
+    transition: all 0.2s ease !important;
+}
+.stButton > button:hover {
+    background: linear-gradient(135deg, #00d4ff44, #4a8eff33) !important;
+    box-shadow: 0 0 16px rgba(0,212,255,0.25) !important;
+    transform: translateY(-1px) !important;
+}
+.stButton > button[kind="primary"] {
+    background: linear-gradient(135deg, var(--cyan), var(--blue)) !important;
+    border: none !important;
+    color: var(--bg) !important;
+    font-weight: 600 !important;
+}
+/* ── Cards ── */
+.vv-card {
+    background: var(--card);
+    border: 1px solid var(--border);
+    border-radius: 12px;
+    padding: 1.2rem 1.4rem;
+    margin-bottom: 1rem;
+}
+.vv-card-accent {
+    background: var(--card);
+    border-top: 2px solid var(--cyan);
+    border-left: 1px solid var(--border);
+    border-right: 1px solid var(--border);
+    border-bottom: 1px solid var(--border);
+    border-radius: 0 0 12px 12px;
+    padding: 1.2rem 1.4rem;
+    margin-bottom: 1rem;
+}
+/* ── Section headers ── */
+.vv-section-title {
+    font-family: 'Syne', sans-serif;
+    font-size: 0.7rem;
+    font-weight: 700;
+    letter-spacing: 0.18em;
+    text-transform: uppercase;
+    color: var(--dim);
+    margin-bottom: 0.6rem;
+}
+/* ── Hero title ── */
+.vv-hero {
+    font-family: 'Syne', sans-serif;
+    font-size: 1.6rem;
+    font-weight: 800;
+    background: linear-gradient(135deg, var(--cyan), var(--blue));
+    -webkit-background-clip: text;
+    -webkit-text-fill-color: transparent;
+    background-clip: text;
+    letter-spacing: -0.02em;
+    line-height: 1.2;
+    margin: 0 0 0.2rem;
+}
+/* ── Stat chips ── */
+.vv-stat {
+    display: inline-block;
+    background: #1a1d27;
+    border: 1px solid var(--border);
+    border-radius: 6px;
+    padding: 0.25rem 0.7rem;
+    font-family: 'DM Mono', monospace;
+    font-size: 0.78rem;
+    color: var(--cyan);
+    margin: 0.15rem 0.2rem 0.15rem 0;
+}
+/* ── Badge ── */
+.vv-badge-green {
+    display: inline-block;
+    background: rgba(0,229,160,0.12);
+    border: 1px solid var(--green);
+    color: var(--green);
+    border-radius: 20px;
+    padding: 0.2rem 0.8rem;
+    font-size: 0.78rem;
+    font-family: 'DM Mono', monospace;
+}
+.vv-badge-red {
+    display: inline-block;
+    background: rgba(255,71,87,0.12);
+    border: 1px solid var(--red);
+    color: var(--red);
+    border-radius: 20px;
+    padding: 0.2rem 0.8rem;
+    font-size: 0.78rem;
+    font-family: 'DM Mono', monospace;
+}
+.vv-badge-amber {
+    display: inline-block;
+    background: rgba(255,179,71,0.12);
+    border: 1px solid var(--amber);
+    color: var(--amber);
+    border-radius: 20px;
+    padding: 0.2rem 0.8rem;
+    font-size: 0.78rem;
+    font-family: 'DM Mono', monospace;
+}
+/* ── Reasoning box ── */
+.vv-reasoning {
+    background: #0d1119;
+    border-left: 3px solid var(--amber);
+    padding: 0.7rem 1rem;
+    border-radius: 0 8px 8px 0;
+    font-size: 0.83rem;
+    color: #c0c4cc;
+    line-height: 1.6;
+    font-family: 'IBM Plex Sans', sans-serif;
+    margin-top: 0.6rem;
+}
+/* ── Dataframe ── */
+.stDataFrame {
+    background: var(--card) !important;
+    border: 1px solid var(--border) !important;
+    border-radius: 8px !important;
+}
+.stDataFrame th {
+    background: #1a1d27 !important;
+    color: var(--cyan) !important;
+    font-family: 'DM Mono', monospace !important;
+    font-size: 0.78rem !important;
+}
+.stDataFrame td {
+    color: var(--text) !important;
+    font-size: 0.8rem !important;
+    border-color: var(--border) !important;
+}
+/* ── Tabs ── */
+.stTabs [data-baseweb="tab-list"] {
+    background: transparent !important;
+    border-bottom: 1px solid var(--border) !important;
+    gap: 0 !important;
+}
+.stTabs [data-baseweb="tab"] {
+    background: transparent !important;
+    color: var(--dim) !important;
+    font-family: 'DM Mono', monospace !important;
+    font-size: 0.82rem !important;
+    letter-spacing: 0.05em !important;
+    border: none !important;
+    padding: 0.5rem 1.2rem !important;
+}
+.stTabs [aria-selected="true"] {
+    color: var(--cyan) !important;
+    border-bottom: 2px solid var(--cyan) !important;
+}
+/* ── Spinner ── */
+.stSpinner > div { border-top-color: var(--cyan) !important; }
+/* ── Alerts ── */
+.stAlert { border-radius: 8px !important; font-size: 0.85rem !important; }
+/* ── Divider ── */
+hr { border-color: var(--border) !important; }
+/* ── Select box ── */
+.stSelectbox > div > div {
+    background: #1a1d27 !important;
+    border-color: var(--border) !important;
+    color: var(--text) !important;
+}
+/* ── File uploader ── */
+.stFileUploader {
+    background: #1a1d27 !important;
+    border: 1px dashed var(--border) !important;
+    border-radius: 8px !important;
+}
+/* ── Progress bar ── */
+.stProgress > div > div > div {
+    background: linear-gradient(90deg, var(--cyan), var(--blue)) !important;
+}
+/* ── Number input ── */
+.stNumberInput input {
+    background: #1a1d27 !important;
+    border-color: var(--border) !important;
+}
+/* ── Expander ── */
+.streamlit-expanderHeader {
+    background: var(--card) !important;
+    border-color: var(--border) !important;
+    color: var(--text) !important;
+    font-family: 'DM Mono', monospace !important;
+    font-size: 0.85rem !important;
+}
+</style>
+""", unsafe_allow_html=True)
+# ══════════════════════════════════════════════════════════════════════════════
+#  SESSION STATE HELPERS
+# ══════════════════════════════════════════════════════════════════════════════
+def init_state():
+    defaults = {
+        "metadata":     None,
+        "transcript":   "",
+        "comments_df":  pd.DataFrame(),
+        "sentiments":   [],
+        "sent_summary": {},
+        "misinfo":      None,
+        "keywords":     [],
+        "pos_kw":       [],
+        "neg_kw":       [],
+        "video_id":     None,
+        "analysed":     False,
+        "status_log":   [],
+    }
+    for k, v in defaults.items():
+        if k not in st.session_state:
+            st.session_state[k] = v
+init_state()
+# ══════════════════════════════════════════════════════════════════════════════
+#  SIDEBAR
+# ══════════════════════════════════════════════════════════════════════════════
+with st.sidebar:
+    st.markdown('<p class="vv-hero" style="font-size:1.1rem">🔬 VideoVerifier</p>', unsafe_allow_html=True)
+    st.markdown('<p style="color:#5a6070;font-size:0.78rem;font-family:\'DM Mono\',monospace;margin-top:-8px">Mental Health Misinfo Detector</p>', unsafe_allow_html=True)
+    st.markdown("---")
+    st.markdown('<p class="vv-section-title">⚙️ Configuration</p>', unsafe_allow_html=True)
+    api_key = st.text_input(
+        "YouTube API v3 Key",
+        value=os.environ.get("YT_API_KEY", ""),
+        type="password",
+        placeholder="AIza...",
+        help="Get a free key at console.cloud.google.com",
+    )
+    sentiment_method = st.selectbox(
+        "Sentiment Engine",
+        ["vader", "hf"],
+        format_func=lambda x: "VADER (fast, CPU)" if x == "vader" else "DistilBERT (accurate, ~500MB)",
+        help="VADER is ~100× faster and works offline. DistilBERT downloads ~500MB on first run.",
+    )
+    max_comments = st.number_input(
+        "Max comments to fetch",
+        min_value=10, max_value=500, value=150, step=10,
+        help="YouTube API quota: ~1 unit per comment request",
+    )
+    st.markdown("---")
+    st.markdown('<p class="vv-section-title">📋 About</p>', unsafe_allow_html=True)
+    st.markdown(
+        '<p style="font-size:0.78rem;color:#5a6070;line-height:1.6">'
+        '4-stream SeTa-Attention model for mental health misinformation detection. '
+        'Plug your <code style="background:#1a1d27;padding:1px 4px;border-radius:3px;color:#00d4ff">detect_misinformation()</code> '
+        'function in <b>analyzer.py</b> to connect your trained checkpoint.'
+        '</p>',
+        unsafe_allow_html=True,
+    )
+    if st.session_state.status_log:
+        st.markdown("---")
+        st.markdown('<p class="vv-section-title">📜 Log</p>', unsafe_allow_html=True)
+        for msg in st.session_state.status_log[-6:]:
+            st.markdown(f'<p style="font-size:0.72rem;color:#5a6070;font-family:\'DM Mono\',monospace;margin:2px 0">{msg}</p>', unsafe_allow_html=True)
+# ══════════════════════════════════════════════════════════════════════════════
+#  HEADER
+# ══════════════════════════════════════════════════════════════════════════════
+st.markdown(
+    '<h1 class="vv-hero" style="font-size:2rem">Video Verifier & Sentiment Analyzer</h1>'
+    '<p style="color:#5a6070;font-size:0.85rem;margin-top:-4px;font-family:\'DM Mono\',monospace">'
+    'Detect mental health misinformation · Analyze public sentiment · Understand video content at a glance'
+    '</p>',
+    unsafe_allow_html=True,
+)
+st.markdown("---")
+# ══════════════════════════════════════════════════════════════════════════════
+#  INPUT SECTION
+# ══════════════════════════════════════════════════════════════════════════════
+input_tab1, input_tab2 = st.tabs(["🔗 YouTube URL", "📁 Upload Video File"])
+video_id_to_analyze = None
+with input_tab1:
+    col_url, col_btn = st.columns([5, 1])
+    with col_url:
+        yt_url = st.text_input(
+            "YouTube URL",
+            placeholder="https://www.youtube.com/watch?v=... or youtu.be/...",
+            label_visibility="collapsed",
         )
+    with col_btn:
+        analyze_url_btn = st.button("🔍 Analyze", type="primary", use_container_width=True)
+    if analyze_url_btn and yt_url:
+        vid = extract_video_id(yt_url)
+        if vid:
+            video_id_to_analyze = vid
         else:
+            st.error("❌ Could not extract a valid YouTube video ID. Check the URL format.")
+with input_tab2:
+    st.markdown(
+        '<div class="vv-card">'
+        '<p class="vv-section-title">Upload a video file</p>'
+        '<p style="font-size:0.82rem;color:#5a6070;line-height:1.6">'
+        '⚠️ <b>Important:</b> The YouTube Data API cannot search by raw video bytes. '
+        'After uploading, enter the video title or a keyword to find the matching YouTube entry. '
+        'For local-only analysis, the system will run misinformation detection on the filename metadata.'
+        '</p></div>',
+        unsafe_allow_html=True,
+    )
+    uploaded = st.file_uploader(
+        "Drop a video file",
+        type=["mp4", "mov", "avi", "mkv", "webm"],
+        label_visibility="collapsed",
+    )
+    if uploaded:
+        col_kw, col_search = st.columns([4, 1])
+        with col_kw:
+            kw = st.text_input(
+                "Video title / keyword to search on YouTube",
+                placeholder=f"e.g. {uploaded.name.replace('.mp4','').replace('_',' ')}",
+            )
+        with col_search:
+            search_btn = st.button("🔎 Find on YT", use_container_width=True)
+        if search_btn and kw and api_key:
+            with st.spinner("Searching YouTube…"):
+                results = search_videos_by_title(kw, api_key, max_results=5)
+            if results:
+                st.markdown('<p class="vv-section-title">Select the matching video</p>', unsafe_allow_html=True)
+                for r in results:
+                    c1, c2, c3 = st.columns([1, 4, 1])
+                    with c1:
+                        if r["thumbnail_url"]:
+                            st.image(r["thumbnail_url"], width=80)
+                    with c2:
+                        st.markdown(
+                            f'<p style="margin:0;font-size:0.85rem;font-weight:500">{r["title"]}</p>'
+                            f'<p style="margin:0;font-size:0.75rem;color:#5a6070">{r["channel_title"]} · {r["published_at"]}</p>',
+                            unsafe_allow_html=True,
+                        )
+                    with c3:
+                        if st.button("Select", key=f"sel_{r['video_id']}"):
+                            video_id_to_analyze = r["video_id"]
+            else:
+                st.warning("No results found. Try a different keyword or check your API key.")
+        elif search_btn and not api_key:
+            st.error("Please enter your YouTube API key in the sidebar first.")
+# ══════════════════════════════════════════════════════════════════════════════
+#  DATA FETCHING & ANALYSIS PIPELINE
+# ══════════════════════════════════════════════════════════════════════════════
+def run_full_pipeline(video_id: str):
+    log = []
+    # 1. Metadata
+    with st.spinner("Fetching video metadata…"):
+        meta, err = fetch_video_metadata(video_id, api_key)
+    if err:
+        st.error(f"❌ {err}")
+        return
+    log.append(f"✅ Metadata: {meta['title'][:50]}")
+    st.session_state.metadata = meta
+    # 2. Transcript
+    with st.spinner("Fetching transcript…"):
+        transcript, t_status = fetch_transcript(video_id)
+    log.append(t_status)
+    st.session_state.transcript = transcript
+    # 3. Comments
+    with st.spinner(f"Fetching up to {max_comments} comments…"):
+        comments_df, c_status = fetch_comments(video_id, api_key, max_comments=int(max_comments))
+    log.append(c_status)
+    st.session_state.comments_df = comments_df
+    # 4. Misinformation
+    with st.spinner("Running misinformation detection…"):
+        misinfo = detect_misinformation(
+            text=f"{meta['title']} {meta['description']}",
+            tags=meta["tags"],
+            audio_transcript=transcript,
+            video_transcript=transcript,
         )
+    log.append(f"🔬 Misinfo score: {misinfo['confidence_pct']}%")
+    st.session_state.misinfo = misinfo
+    # 5. Keywords
+    kw = extract_keywords(f"{meta['title']} {meta['description']} {transcript}", meta["tags"])
+    st.session_state.keywords = kw
+    # 6. Sentiment
+    if not comments_df.empty:
+        texts = comments_df["text"].fillna("").tolist()
+        with st.spinner(f"Analyzing sentiment of {len(texts)} comments ({sentiment_method.upper()})…"):
+            progress = st.progress(0, text="Sentiment analysis…")
+            batch_size = 64
+            results = []
+            for i in range(0, len(texts), batch_size):
+                chunk = texts[i: i + batch_size]
+                results += analyze_sentiment_batch(chunk, method=sentiment_method, batch_size=batch_size)
+                progress.progress(min((i + batch_size) / len(texts), 1.0),
+                                   text=f"Analyzed {min(i+batch_size, len(texts))}/{len(texts)} comments…")
+            progress.empty()
+        st.session_state.sentiments = results
+        st.session_state.sent_summary = sentiment_summary(results)
+        pos_kw, neg_kw = sentiment_weighted_keywords(comments_df, results)
+        st.session_state.pos_kw = pos_kw
+        st.session_state.neg_kw = neg_kw
+        log.append(f"💬 Sentiment: {st.session_state.sent_summary['pos_pct']}% pos / {st.session_state.sent_summary['neg_pct']}% neg")
+    else:
+        st.session_state.sentiments = []
+        st.session_state.sent_summary = {}
+        log.append("💬 Skipped (no comments)")
+    st.session_state.video_id = video_id
+    st.session_state.analysed = True
+    st.session_state.status_log = log
+    st.rerun()
+if video_id_to_analyze and api_key:
+    run_full_pipeline(video_id_to_analyze)
+elif video_id_to_analyze and not api_key:
+    st.error("⚠️ Please enter your YouTube API key in the sidebar before analyzing.")
+# ══════════════════════════════════════════════════════════════════════════════
+#  RESULTS DASHBOARD
+# ══════════════════════════════════════════════════════════════════════════════
+if not st.session_state.analysed:
+    # Landing state
+    st.markdown(
+        '<div style="text-align:center;padding:4rem 2rem">'
+        '<p style="font-size:3rem">🔬</p>'
+        '<p style="font-family:\'Syne\',sans-serif;font-size:1.1rem;color:#5a6070">'
+        'Paste a YouTube URL above and click <b style="color:#00d4ff">Analyze</b> to begin</p>'
+        '<p style="font-size:0.8rem;color:#3a3f50;font-family:\'DM Mono\',monospace">'
+        'Misinformation detection · Sentiment analysis · Comment insights</p>'
+        '</div>',
+        unsafe_allow_html=True,
+    )
+    st.stop()
+meta       = st.session_state.metadata
+transcript = st.session_state.transcript
+comments_df= st.session_state.comments_df
+misinfo    = st.session_state.misinfo
+keywords   = st.session_state.keywords
+sentiments = st.session_state.sentiments
+sent_sum   = st.session_state.sent_summary
+pos_kw     = st.session_state.pos_kw
+neg_kw     = st.session_state.neg_kw
+video_id   = st.session_state.video_id
+# ── Layout: left (info) / right (analytics) ───────────────────────────────────
+left_col, right_col = st.columns([2, 3], gap="large")
+# ╔══════════════════════════════╗
+# ║  LEFT COLUMN — Video Info   ║
+# ╚══════════════════════════════╝
+with left_col:
+    # Thumbnail + embed
+    if meta.get("thumbnail_url"):
+        st.image(meta["thumbnail_url"], use_column_width=True)
+    st.markdown(
+        f'<a href="https://www.youtube.com/watch?v={video_id}" target="_blank" '
+        f'style="display:block;text-align:center;font-family:\'DM Mono\',monospace;'
+        f'font-size:0.78rem;color:#5a6070;text-decoration:none;margin:4px 0 12px">▶ Open on YouTube</a>',
+        unsafe_allow_html=True,
+    )
+    # Title & channel
+    st.markdown(
+        f'<div class="vv-card">'
+        f'<p class="vv-section-title">Video</p>'
+        f'<p style="font-family:\'Syne\',sans-serif;font-size:1.05rem;font-weight:700;margin:0 0 4px">{meta["title"]}</p>'
+        f'<p style="font-size:0.82rem;color:#5a6070;margin:0">by <b style="color:#b0b4c0">{meta["channel_title"]}</b> · {meta["published_at"]}</p>'
+        f'</div>',
+        unsafe_allow_html=True,
     )
+    # Stats
+    st.markdown('<p class="vv-section-title">Metrics</p>', unsafe_allow_html=True)
+    s1, s2 = st.columns(2)
+    with s1:
+        st.markdown(f'<span class="vv-stat">👁 {meta["view_count"]:,}</span>', unsafe_allow_html=True)
+        st.markdown(f'<span class="vv-stat">👍 {meta["like_count"]:,}</span>', unsafe_allow_html=True)
+    with s2:
+        st.markdown(f'<span class="vv-stat">💬 {meta["comment_count"]:,}</span>', unsafe_allow_html=True)
+        st.markdown(f'<span class="vv-stat">⏱ {meta["duration"]}</span>', unsafe_allow_html=True)
+    # Tags
+    if meta.get("tags"):
+        st.markdown('<p class="vv-section-title" style="margin-top:1rem">Tags</p>', unsafe_allow_html=True)
+        tag_html = "".join(
+            f'<span style="display:inline-block;background:#1a1d27;border:1px solid #1e2330;border-radius:4px;'
+            f'padding:2px 8px;font-family:\'DM Mono\',monospace;font-size:0.7rem;color:#8090a0;margin:2px">'
+            f'#{t}</span>'
+            for t in meta["tags"][:20]
+        )
+        st.markdown(tag_html, unsafe_allow_html=True)
+    # Description (collapsed)
+    if meta.get("description"):
+        with st.expander("📄 Description", expanded=False):
+            st.markdown(
+                f'<p style="font-size:0.8rem;color:#8090a0;line-height:1.65;white-space:pre-wrap">'
+                f'{meta["description"][:1200]}{"…" if len(meta["description"])>1200 else ""}</p>',
+                unsafe_allow_html=True,
+            )
+    # Transcript (collapsed)
+    with st.expander(f"📝 Transcript ({len(transcript.split()) if transcript else 0} words)", expanded=False):
+        if transcript:
+            st.markdown(
+                f'<p style="font-size:0.78rem;color:#8090a0;line-height:1.65">'
+                f'{transcript[:2500]}{"…" if len(transcript)>2500 else ""}</p>',
+                unsafe_allow_html=True,
+            )
+        else:
+            st.info("No transcript available for this video.")
+# ╔══════════════════════════════╗
+# ║  RIGHT COLUMN — Analytics   ║
+# ╚══════════════════════════════╝
+with right_col:
+    # ── Misinfo verdict ──────────────────────────────────────────────────────
+    st.markdown('<p class="vv-section-title">🔬 Misinformation Analysis</p>', unsafe_allow_html=True)
+    score = misinfo["score"]
+    if score < 0.35:
+        badge = '<span class="vv-badge-green">✅ Appears Credible</span>'
+    elif score < 0.65:
+        badge = '<span class="vv-badge-amber">⚠️ Uncertain / Mixed Signals</span>'
+    else:
+        badge = '<span class="vv-badge-red">🚨 Likely Misinformation</span>'
+    st.markdown(badge, unsafe_allow_html=True)
+    ga_col, detail_col = st.columns([1, 1])
+    with ga_col:
+        st.plotly_chart(
+            misinfo_gauge(score, "Misinfo Confidence"),
+            use_container_width=True,
+            config={"displayModeBar": False},
+        )
+    with detail_col:
+        st.plotly_chart(
+            stream_trust_bars(misinfo["stream_details"]),
+            use_container_width=True,
+            config={"displayModeBar": False},
+        )
+    st.markdown(
+        f'<div class="vv-reasoning">🧠 <b>Reasoning:</b> {misinfo["reasoning"]}</div>',
+        unsafe_allow_html=True,
     )
+    st.markdown("---")
+    # ── Sentiment analytics ──────────────────────────────────────────────────
+    st.markdown('<p class="vv-section-title">💬 Comment Sentiment</p>', unsafe_allow_html=True)
+    if sent_sum:
+        s_col1, s_col2, s_col3 = st.columns(3)
+        with s_col1:
+            st.markdown(
+                f'<div class="vv-card" style="text-align:center">'
+                f'<p style="color:#00e5a0;font-family:\'DM Mono\',monospace;font-size:1.6rem;font-weight:700;margin:0">{sent_sum["pos_pct"]}%</p>'
+                f'<p style="color:#5a6070;font-size:0.75rem;margin:0">Positive</p></div>',
+                unsafe_allow_html=True,
+            )
+        with s_col2:
+            st.markdown(
+                f'<div class="vv-card" style="text-align:center">'
+                f'<p style="color:#ff4757;font-family:\'DM Mono\',monospace;font-size:1.6rem;font-weight:700;margin:0">{sent_sum["neg_pct"]}%</p>'
+                f'<p style="color:#5a6070;font-size:0.75rem;margin:0">Negative</p></div>',
+                unsafe_allow_html=True,
+            )
+        with s_col3:
+            st.markdown(
+                f'<div class="vv-card" style="text-align:center">'
+                f'<p style="color:#5a6070;font-family:\'DM Mono\',monospace;font-size:1.6rem;font-weight:700;margin:0">{sent_sum["neu_pct"]}%</p>'
+                f'<p style="color:#5a6070;font-size:0.75rem;margin:0">Neutral</p></div>',
+                unsafe_allow_html=True,
+            )
+        d_col, t_col = st.columns([1, 1])
+        with d_col:
+            st.plotly_chart(
+                sentiment_donut(sent_sum),
+                use_container_width=True,
+                config={"displayModeBar": False},
+            )
+        with t_col:
+            st.plotly_chart(
+                sentiment_timeline(comments_df, sentiments),
+                use_container_width=True,
+                config={"displayModeBar": False},
+            )
+        # Keyword charts
+        kw_col1, kw_col2 = st.columns(2)
+        with kw_col1:
+            st.plotly_chart(
+                keyword_bar(keywords, title="Top Video Keywords", color="#00d4ff"),
+                use_container_width=True,
+                config={"displayModeBar": False},
+            )
+        with kw_col2:
+            st.plotly_chart(
+                keyword_comparison(pos_kw, neg_kw),
+                use_container_width=True,
+                config={"displayModeBar": False},
+            )
+    else:
+        st.info("⚠️ No comment sentiment data — comments may be disabled or unavailable.")
+        if keywords:
+            st.plotly_chart(
+                keyword_bar(keywords, title="Top Video Keywords", color="#00d4ff"),
+                use_container_width=True,
+                config={"displayModeBar": False},
             )
+    # ── Comments table ───────────────────────────────────────────────────────
+    st.markdown("---")
+    st.markdown('<p class="vv-section-title">📊 Comments Deep-Dive</p>', unsafe_allow_html=True)
+    if not comments_df.empty:
+        display_df = comments_df.copy()
+        if sentiments:
+            display_df["sentiment"] = [s["label"] for s in sentiments]
+            display_df["compound"]  = [round(s.get("compound", 0), 3) for s in sentiments]
+        tab_all, tab_pos, tab_neg, tab_top = st.tabs([
+            f"All ({len(display_df)})",
+            f"Positive ({sent_sum.get('POSITIVE',0)})",
+            f"Negative ({sent_sum.get('NEGATIVE',0)})",
+            "Most Liked",
+        ])
+        show_cols = ["author", "text", "likes", "published_at"]
+        if "sentiment" in display_df.columns:
+            show_cols += ["sentiment", "compound"]
+        with tab_all:
+            st.dataframe(display_df[show_cols].head(100), use_container_width=True, height=320)
+        with tab_pos:
+            pos_df = display_df[display_df.get("sentiment", pd.Series()) == "POSITIVE"] if "sentiment" in display_df else pd.DataFrame()
+            if not pos_df.empty:
+                st.dataframe(pos_df[show_cols].head(50), use_container_width=True, height=320)
+            else:
+                st.info("No positive comments in this dataset.")
+        with tab_neg:
+            neg_df = display_df[display_df.get("sentiment", pd.Series()) == "NEGATIVE"] if "sentiment" in display_df else pd.DataFrame()
+            if not neg_df.empty:
+                st.dataframe(neg_df[show_cols].head(50), use_container_width=True, height=320)
+            else:
+                st.info("No negative comments in this dataset.")
+        with tab_top:
+            top_df = display_df.sort_values("likes", ascending=False).head(20)
+            st.dataframe(top_df[show_cols], use_container_width=True, height=320)
+    else:
+        st.info("No comments available for this video.")