rocky250 commited on
Commit
44bafbe
Β·
verified Β·
1 Parent(s): dceedff

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +786 -422
app.py CHANGED
@@ -1,440 +1,804 @@
1
  """
2
- MHMisinfo β€” Mental Health Misinformation Detector
3
- Gradio Space: paste a YouTube URL β†’ fetch metadata + transcripts β†’ run 4-stream SeTa-Attention model β†’ show verdict
4
  """
5
 
6
- import os, re, json, sys, warnings
7
- warnings.filterwarnings("ignore")
8
-
9
- import numpy as np
10
- import torch
11
- import torch.nn as nn
12
- import torch.nn.functional as F
13
- import gradio as gr
14
- from dataclasses import dataclass
15
- from typing import Dict, List, Optional
16
- from huggingface_hub import hf_hub_download
17
-
18
- # ── YouTube helpers ────────────────────────────────────────────────────────────
19
- from googleapiclient.discovery import build as yt_build
20
- from youtube_transcript_api import YouTubeTranscriptApi, NoTranscriptFound, TranscriptsDisabled
21
-
22
- # ── Model + Data (inline, no src/ import needed) ──────────────────────────────
23
- import re as _re
24
-
25
- TAG_SPLIT_RE = _re.compile(r"[\s,]+")
26
- TEXT_RE = _re.compile(r"[A-Za-z0-9']+")
27
-
28
- @dataclass
29
- class Vocab:
30
- token_to_idx: Dict[str, int]
31
- idx_to_token: List[str]
32
- pad_token: str = "<pad>"
33
- unk_token: str = "<unk>"
34
-
35
- @property
36
- def pad_idx(self): return self.token_to_idx[self.pad_token]
37
- @property
38
- def unk_idx(self): return self.token_to_idx[self.unk_token]
39
-
40
- def encode(self, tokens, max_len):
41
- ids = [self.token_to_idx.get(t, self.unk_idx) for t in tokens]
42
- if len(ids) >= max_len: return ids[:max_len]
43
- return ids + [self.pad_idx] * (max_len - len(ids))
44
-
45
- @staticmethod
46
- def from_serializable(obj):
47
- return Vocab(token_to_idx=obj["token_to_idx"],
48
- idx_to_token=obj["idx_to_token"],
49
- pad_token=obj.get("pad_token","<pad>"),
50
- unk_token=obj.get("unk_token","<unk>"))
51
-
52
- def tokenize_tags(text):
53
- if not isinstance(text, str): return []
54
- cleaned = text.replace("#"," ")
55
- return [t for t in TAG_SPLIT_RE.split(cleaned.lower()) if t]
56
-
57
- def tokenize_text(text):
58
- if not isinstance(text, str): return []
59
- return [t.lower() for t in TEXT_RE.findall(text)]
60
-
61
-
62
- # ── Model Architecture (identical to src/model.py) ────────────────────────────
63
- class SeTaAttention(nn.Module):
64
- def __init__(self, input_dim, attn_dim, dropout=0.1):
65
- super().__init__()
66
- self.proj = nn.Linear(input_dim, attn_dim)
67
- self.sem_query = nn.Parameter(torch.randn(attn_dim))
68
- self.task_query = nn.Parameter(torch.randn(attn_dim))
69
- self.out = nn.Linear(input_dim * 2, input_dim)
70
- self.dropout = nn.Dropout(dropout)
71
-
72
- def _attend(self, h, query, mask):
73
- proj = torch.tanh(self.proj(h))
74
- scores = torch.matmul(proj, query)
75
- scores = scores.masked_fill(~mask, -1e9)
76
- weights = torch.softmax(scores, dim=1)
77
- return torch.sum(h * weights.unsqueeze(-1), dim=1)
78
-
79
- def forward(self, h, mask):
80
- sem = self._attend(h, self.sem_query, mask)
81
- task = self._attend(h, self.task_query, mask)
82
- return self.dropout(torch.tanh(self.out(torch.cat([sem, task], dim=-1))))
83
-
84
-
85
- class StreamEncoder(nn.Module):
86
- def __init__(self, vocab_size, emb_dim, hidden_dim, attn_dim, proj_dim, mlp_dim, dropout=0.2):
87
- super().__init__()
88
- self.embedding = nn.Embedding(vocab_size, emb_dim, padding_idx=0)
89
- self.gru = nn.GRU(emb_dim, hidden_dim, batch_first=True, bidirectional=True)
90
- self.attn = SeTaAttention(hidden_dim*2, attn_dim, dropout=dropout)
91
- self.proj = nn.Sequential(
92
- nn.Linear(hidden_dim*2, mlp_dim), nn.ReLU(), nn.Dropout(dropout), nn.Linear(mlp_dim, proj_dim)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
93
  )
94
- self.dropout = nn.Dropout(dropout)
95
-
96
- def forward(self, x):
97
- mask = x != 0
98
- emb = self.dropout(self.embedding(x))
99
- h, _ = self.gru(emb)
100
- attn_vec = self.attn(h, mask)
101
- proj = self.dropout(torch.tanh(self.proj(attn_vec)))
102
- return attn_vec, proj
103
-
104
-
105
- class MultiStreamModel(nn.Module):
106
- def __init__(self, vocab_sizes, num_classes, emb_dim=128, hidden_dim=128, attn_dim=128,
107
- proj_dim=128, mlp_dim=256, dropout=0.2, include_tags_ccm=False, per_modality_trust=False):
108
- super().__init__()
109
- self.include_tags_ccm = include_tags_ccm
110
- self.per_modality_trust = per_modality_trust
111
- self.num_classes = num_classes
112
- h_dim = hidden_dim * 2
113
- self.encoders = nn.ModuleDict({
114
- "tags": StreamEncoder(vocab_sizes["tags"], emb_dim, hidden_dim, attn_dim, proj_dim, mlp_dim, dropout),
115
- "text": StreamEncoder(vocab_sizes["text"], emb_dim, hidden_dim, attn_dim, proj_dim, mlp_dim, dropout),
116
- "audio_transcript": StreamEncoder(vocab_sizes["audio_transcript"], emb_dim, hidden_dim, attn_dim, proj_dim, mlp_dim, dropout),
117
- "video_transcript": StreamEncoder(vocab_sizes["video_transcript"], emb_dim, hidden_dim, attn_dim, proj_dim, mlp_dim, dropout),
118
- })
119
- ccm_dim = 3 + (3 if include_tags_ccm else 0)
120
- trust_in = h_dim + ccm_dim
121
- if per_modality_trust:
122
- self.trust_mlps = nn.ModuleDict({k: self._make_mlp(trust_in, mlp_dim, 1, dropout)
123
- for k in ["text","audio_transcript","video_transcript","tags"]})
124
- self.trust_mlp = None
125
  else:
126
- self.trust_mlp = self._make_mlp(trust_in, mlp_dim, 1, dropout)
127
- self.trust_mlps = None
128
- self.uncertainty_mlp = self._make_mlp(h_dim, mlp_dim, 1, dropout)
129
- classifier_in = proj_dim * 5 + ccm_dim + 4 + 4
130
- out_dim = 1 if num_classes == 2 else num_classes
131
- self.mlp = nn.Sequential(
132
- nn.Linear(classifier_in, mlp_dim), nn.ReLU(), nn.Dropout(dropout), nn.Linear(mlp_dim, out_dim)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
133
  )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
134
 
135
- @staticmethod
136
- def _make_mlp(in_dim, hidden_dim, out_dim, dropout):
137
- return nn.Sequential(nn.Linear(in_dim, hidden_dim), nn.ReLU(), nn.Dropout(dropout), nn.Linear(hidden_dim, out_dim))
138
-
139
- @staticmethod
140
- def _cosine(a, b):
141
- return F.cosine_similarity(a, b, dim=-1, eps=1e-8).unsqueeze(-1)
142
-
143
- def _compute_ccm(self, h_text, h_audio, h_video, h_tags):
144
- sims = [self._cosine(h_text, h_audio), self._cosine(h_text, h_video), self._cosine(h_audio, h_video)]
145
- if self.include_tags_ccm:
146
- sims += [self._cosine(h_text, h_tags), self._cosine(h_audio, h_tags), self._cosine(h_video, h_tags)]
147
- return torch.cat(sims, dim=-1)
148
-
149
- def _trust_logit(self, key, h_i, ccm):
150
- x = torch.cat([h_i, ccm], dim=-1)
151
- return self.trust_mlps[key](x) if self.per_modality_trust else self.trust_mlp(x)
152
-
153
- def forward(self, batch, return_details=False):
154
- h_tags, p_tags = self.encoders["tags"](batch["tags"])
155
- h_text, p_text = self.encoders["text"](batch["text"])
156
- h_audio, p_audio = self.encoders["audio_transcript"](batch["audio_transcript"])
157
- h_video, p_video = self.encoders["video_transcript"](batch["video_transcript"])
158
- ccm = self._compute_ccm(h_text, h_audio, h_video, h_tags)
159
- trust_logits = torch.cat([self._trust_logit("text", h_text, ccm),
160
- self._trust_logit("audio_transcript", h_audio, ccm),
161
- self._trust_logit("video_transcript", h_video, ccm),
162
- self._trust_logit("tags", h_tags, ccm)], dim=-1)
163
- trust_w = torch.softmax(trust_logits, dim=-1)
164
- sigmas = torch.cat([F.softplus(self.uncertainty_mlp(h)) + 1e-6
165
- for h in [h_text, h_audio, h_video, h_tags]], dim=-1)
166
- confidence = 1.0 / sigmas
167
- fusion_w = trust_w * confidence
168
- fusion_w = fusion_w / (fusion_w.sum(dim=-1, keepdim=True) + 1e-8)
169
- proj_stack = torch.stack([p_text, p_audio, p_video, p_tags], dim=1)
170
- fused = torch.sum(proj_stack * fusion_w.unsqueeze(-1), dim=1)
171
- combined = torch.cat([p_text, p_audio, p_video, p_tags, fused, ccm, trust_w, sigmas], dim=-1)
172
- logits = self.mlp(combined)
173
- if not return_details: return logits
174
- return logits, {"ccm": ccm, "trust_w": trust_w, "sigma": sigmas, "fusion_w": fusion_w}
175
-
176
-
177
- # ── Globals ────────────────────────────────────────────────────────────────────
178
- _model = None
179
- _vocabs = None
180
- _max_lens = None
181
- _config = None
182
- _device = "cpu"
183
-
184
- REPO_ID = "rocky250/MHMisinfo"
185
- YT_API_KEY = os.environ.get("YT_API_KEY", "")
186
-
187
-
188
- def _load_model():
189
- global _model, _vocabs, _max_lens, _config
190
- if _model is not None:
191
- return
192
- ckpt_path = hf_hub_download(repo_id=REPO_ID, filename="best_multimodal.pt")
193
- ckpt = torch.load(ckpt_path, map_location=_device, weights_only=False)
194
- vocabs_raw = ckpt["vocabs"]
195
- _vocabs = {k: Vocab.from_serializable(v) for k, v in vocabs_raw.items()}
196
- _max_lens = ckpt["max_lens"]
197
- _config = ckpt["config"]
198
- num_classes = ckpt["num_classes"]
199
- _model = MultiStreamModel(
200
- vocab_sizes={k: len(v.token_to_idx) for k, v in _vocabs.items()},
201
- num_classes=num_classes,
202
- emb_dim=_config["emb_dim"], hidden_dim=_config["hidden_dim"],
203
- attn_dim=_config["attn_dim"], proj_dim=_config["proj_dim"],
204
- mlp_dim=_config["mlp_dim"], dropout=_config["dropout"],
205
- include_tags_ccm=_config.get("include_tags_ccm", False),
206
- per_modality_trust=_config.get("per_modality_trust", False),
207
- ).to(_device)
208
- _model.load_state_dict(ckpt["model_state"])
209
- _model.eval()
210
-
211
-
212
- def _extract_video_id(url: str) -> Optional[str]:
213
- patterns = [
214
- r"(?:v=|youtu\.be/|embed/|shorts/)([A-Za-z0-9_-]{11})",
215
- ]
216
- for p in patterns:
217
- m = re.search(p, url)
218
- if m: return m.group(1)
219
- return None
220
-
221
-
222
- def _fetch_yt_metadata(video_id: str):
223
- """Fetch title, description, tags via YouTube Data API v3."""
224
- if not YT_API_KEY:
225
- return None, None, None, "⚠️ No YouTube API key set. Set the YT_API_KEY secret in Space settings."
226
- try:
227
- yt = yt_build("youtube", "v3", developerKey=YT_API_KEY, cache_discovery=False)
228
- resp = yt.videos().list(part="snippet", id=video_id).execute()
229
- if not resp.get("items"):
230
- return None, None, None, "❌ Video not found or unavailable."
231
- snippet = resp["items"][0]["snippet"]
232
- title = snippet.get("title", "")
233
- desc = snippet.get("description", "")
234
- tags = " ".join(snippet.get("tags", []))
235
- return title, desc, tags, None
236
- except Exception as e:
237
- return None, None, None, f"❌ YouTube API error: {e}"
238
-
239
-
240
- def _fetch_transcript(video_id: str, field: str) -> str:
241
- """Fetch transcript text (same text used for both audio & video transcript streams)."""
242
- try:
243
- transcript_list = YouTubeTranscriptApi.get_transcript(video_id, languages=["en"])
244
- return " ".join(t["text"] for t in transcript_list)
245
- except (NoTranscriptFound, TranscriptsDisabled):
246
- return ""
247
- except Exception:
248
- return ""
249
-
250
-
251
- def _encode_single(text: str, tags: str, audio_t: str, video_t: str) -> Dict[str, torch.Tensor]:
252
- _load_model()
253
- streams = {
254
- "tags": tokenize_tags(tags),
255
- "text": tokenize_text(text),
256
- "audio_transcript": tokenize_text(audio_t),
257
- "video_transcript": tokenize_text(video_t),
258
- }
259
- batch = {}
260
- for s, tokens in streams.items():
261
- ids = _vocabs[s].encode(tokens, _max_lens[s])
262
- batch[s] = torch.tensor([ids], dtype=torch.long).to(_device)
263
- return batch
264
-
265
-
266
- def _run_inference(text, tags, audio_t, video_t):
267
- batch = _encode_single(text, tags, audio_t, video_t)
268
- with torch.no_grad():
269
- logits, details = _model(batch, return_details=True)
270
- prob = float(torch.sigmoid(logits).squeeze())
271
- pred = int(prob >= 0.5)
272
- trust = details["trust_w"][0].cpu().numpy().tolist()
273
- sigma = details["sigma"][0].cpu().numpy().tolist()
274
- ccm = details["ccm"][0].cpu().numpy().tolist()
275
- return prob, pred, trust, sigma, ccm
276
-
277
-
278
- # ── Gradio logic ───────────────────────────────────────────────────────────────
279
- MODALITIES = ["text", "audio_transcript", "video_transcript", "tags"]
280
- CCM_LABELS_3 = ["text↔audio", "text↔video", "audio↔video"]
281
- CCM_LABELS_6 = CCM_LABELS_3 + ["text↔tags", "audio↔tags", "video↔tags"]
282
-
283
- LABEL_COLORS = {0: "#22c55e", 1: "#ef4444"}
284
- LABEL_NAMES = {0: "βœ… Credible / Not Misinformation", 1: "⚠️ Potential Misinformation"}
285
-
286
-
287
- def _bar(value: float, color: str) -> str:
288
- pct = int(value * 100)
289
- return (
290
- f'<div style="background:#e5e7eb;border-radius:6px;height:14px;width:100%;margin:2px 0">'
291
- f'<div style="background:{color};width:{pct}%;height:100%;border-radius:6px;transition:width 0.4s"></div>'
292
- f'</div><small style="color:#6b7280">{value:.3f}</small>'
293
  )
294
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
295
 
296
- def analyze_url(url: str):
297
- if not url.strip():
298
- return [gr.update(visible=False)] * 4 + ["Please enter a YouTube URL."]
 
 
 
 
 
 
 
299
 
300
- video_id = _extract_video_id(url.strip())
301
- if not video_id:
302
- return [gr.update(visible=False)] * 4 + ["❌ Could not extract a valid YouTube video ID from that URL."]
303
 
304
- # Fetch metadata
305
- title, desc, tags, err = _fetch_yt_metadata(video_id)
306
- if err:
307
- return [gr.update(visible=False)] * 4 + [err]
308
-
309
- # Fetch transcript
310
- transcript = _fetch_transcript(video_id, "transcript")
311
- text_field = f"{title} {desc}".strip()
312
-
313
- # Run model
314
- try:
315
- _load_model()
316
- prob, pred, trust, sigma, ccm = _run_inference(text_field, tags, transcript, transcript)
317
- except Exception as e:
318
- return [gr.update(visible=False)] * 4 + [f"❌ Model error: {e}"]
319
-
320
- # ── Verdict card ──────────────────────────────────────────────────────────
321
- color = LABEL_COLORS[pred]
322
- label_text = LABEL_NAMES[pred]
323
- conf_pct = int(prob * 100) if pred == 1 else int((1 - prob) * 100)
324
- verdict_html = f"""
325
- <div style="border:2px solid {color};border-radius:12px;padding:20px 24px;background:{color}18;margin-bottom:8px">
326
- <div style="font-size:1.5rem;font-weight:700;color:{color}">{label_text}</div>
327
- <div style="font-size:2.5rem;font-weight:800;color:{color};margin:6px 0">{conf_pct}% confident</div>
328
- <div style="color:#6b7280;font-size:0.9rem">Raw misinfo probability: <b>{prob:.4f}</b></div>
329
- </div>
330
- <div style="background:#f9fafb;border-radius:10px;padding:14px 16px;margin-top:6px">
331
- <b>🎬 Video:</b> <a href="{url}" target="_blank">{title}</a><br>
332
- <b>🏷️ Tags:</b> {tags[:120] + '…' if len(tags)>120 else (tags or '(none)')}<br>
333
- <b>πŸ“ Transcript:</b> {('Available (' + str(len(transcript.split())) + ' words)') if transcript else '(not available β€” model used title/description only)'}
334
- </div>
335
- """
336
-
337
- # ── Modality trust weights ─────────────────────────────────────────────────
338
- trust_html = "<h4 style='margin-bottom:8px'>Modality Trust Weights</h4>"
339
- trust_html += "<small style='color:#6b7280'>How much the model relied on each stream</small><br><br>"
340
- for m, t in zip(MODALITIES, trust):
341
- trust_html += f"<b>{m.replace('_',' ').title()}</b>{_bar(t, '#3b82f6')}"
342
-
343
- # ── Uncertainty (sigma) ───────────────────────────────────────────────────
344
- sigma_html = "<h4 style='margin-bottom:8px'>Uncertainty (Οƒ)</h4>"
345
- sigma_html += "<small style='color:#6b7280'>Higher = encoder less certain about this stream</small><br><br>"
346
- max_s = max(sigma) if sigma else 1
347
- for m, s in zip(MODALITIES, sigma):
348
- sigma_html += f"<b>{m.replace('_',' ').title()}</b>{_bar(s/max_s, '#f59e0b')}"
349
-
350
- # ── CCM ───────────────────────────────────────────────────────────────────
351
- ccm_labels = CCM_LABELS_6 if len(ccm) == 6 else CCM_LABELS_3
352
- ccm_html = "<h4 style='margin-bottom:8px'>Cross-Channel Consistency (CCM)</h4>"
353
- ccm_html += "<small style='color:#6b7280'>Cosine similarity between modality representations (βˆ’1 to 1)</small><br><br>"
354
- for lbl, val in zip(ccm_labels, ccm):
355
- norm = (val + 1) / 2 # map [-1,1] β†’ [0,1]
356
- ccm_html += f"<b>{lbl}</b>{_bar(norm, '#8b5cf6')}<small style='color:#9ca3af'>raw: {val:.3f}</small><br>"
357
-
358
- status = "βœ… Analysis complete."
359
- return (
360
- gr.update(value=verdict_html, visible=True),
361
- gr.update(value=trust_html, visible=True),
362
- gr.update(value=sigma_html, visible=True),
363
- gr.update(value=ccm_html, visible=True),
364
- status,
365
  )
366
 
 
367
 
368
- # ── UI ─────────────────────────────────────────────────────────────────────────
369
- CSS = """
370
- #header { text-align:center; margin-bottom: 20px; }
371
- #header h1 { font-size: 2rem; font-weight: 800; margin: 0; }
372
- #header p { color: #6b7280; margin: 4px 0 0; }
373
- .panel { border-radius:12px !important; }
374
- footer { display:none !important; }
375
- """
376
 
377
- with gr.Blocks(css=CSS, title="MHMisinfo β€” Mental Health Misinformation Detector") as demo:
378
- gr.HTML("""
379
- <div id="header">
380
- <h1>🧠 MHMisinfo</h1>
381
- <p>4-Stream SeTa-Attention model for detecting mental health misinformation on YouTube</p>
382
- <p style="font-size:0.8rem;color:#9ca3af">
383
- Based on: <i>"Supporters and Skeptics: LLM-based Analysis of Engagement with Mental Health (Mis)Information Content on Video-sharing Platforms"</i>
384
- </p>
385
- </div>
386
- """)
387
-
388
- with gr.Row():
389
- with gr.Column(scale=3):
390
- url_input = gr.Textbox(
391
- placeholder="Paste a YouTube URL here, e.g. https://www.youtube.com/watch?v=...",
392
- label="YouTube Video URL",
393
- lines=1,
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
394
  )
395
- with gr.Column(scale=1, min_width=120):
396
- analyze_btn = gr.Button("πŸ” Analyze", variant="primary", size="lg")
397
-
398
- status_box = gr.Textbox(label="Status", interactive=False, lines=1, visible=True)
399
-
400
- with gr.Row():
401
- verdict_out = gr.HTML(visible=False, elem_classes="panel")
402
-
403
- with gr.Row():
404
- with gr.Column():
405
- trust_out = gr.HTML(visible=False, elem_classes="panel")
406
- with gr.Column():
407
- sigma_out = gr.HTML(visible=False, elem_classes="panel")
408
-
409
- with gr.Row():
410
- ccm_out = gr.HTML(visible=False, elem_classes="panel")
411
-
412
- gr.HTML("""
413
- <hr style="margin:28px 0 16px">
414
- <details>
415
- <summary style="cursor:pointer;font-weight:600;color:#374151">ℹ️ How it works</summary>
416
- <div style="padding:12px 0;color:#6b7280;font-size:0.9rem">
417
- <b>4 streams:</b> video title+description (text), hashtags/tags, audio transcript, video transcript.<br>
418
- Each stream is encoded by a BiGRU with SeTa dual-attention. The model computes:<br>
419
- &nbsp;β€’ <b>CCM</b> (Cross-Channel Consistency Matrix) β€” cosine similarity between stream representations<br>
420
- &nbsp;β€’ <b>Trust weights</b> β€” learned per-stream reliability given CCM context<br>
421
- &nbsp;β€’ <b>Uncertainty (Οƒ)</b> β€” calibrated confidence per stream via DMTE<br>
422
- These are fused into a single classification head.<br><br>
423
- <b>Note:</b> The model was trained on short YouTube mental health videos. Results on other content types may vary.
424
- ROC-AUC on held-out test: <b>0.967</b>. Positive-class F1: <b>0.828</b>.
425
- </div>
426
- </details>
427
- """)
428
-
429
- analyze_btn.click(
430
- fn=analyze_url,
431
- inputs=[url_input],
432
- outputs=[verdict_out, trust_out, sigma_out, ccm_out, status_box],
433
- )
434
- url_input.submit(
435
- fn=analyze_url,
436
- inputs=[url_input],
437
- outputs=[verdict_out, trust_out, sigma_out, ccm_out, status_box],
438
- )
439
 
440
- demo.launch()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  """
2
+ app.py β€” Video Verifier & Sentiment Analyzer
3
+ Professional dark-mode Streamlit application.
4
  """
5
 
6
+ import os
7
+ import time
8
+ import streamlit as st
9
+ import pandas as pd
10
+
11
+ from fetcher import (
12
+ extract_video_id,
13
+ fetch_video_metadata,
14
+ fetch_transcript,
15
+ fetch_comments,
16
+ search_videos_by_title,
17
+ )
18
+ from analyzer import (
19
+ detect_misinformation,
20
+ analyze_sentiment_batch,
21
+ sentiment_summary,
22
+ extract_keywords,
23
+ sentiment_weighted_keywords,
24
+ )
25
+ from charts import (
26
+ misinfo_gauge,
27
+ sentiment_donut,
28
+ keyword_bar,
29
+ stream_trust_bars,
30
+ sentiment_timeline,
31
+ keyword_comparison,
32
+ )
33
+
34
+ # ══════════════════════════════════════════════════════════════════════════════
35
+ # PAGE CONFIG & GLOBAL STYLES
36
+ # ══════════════════════════════════════════════════════════════════════════════
37
+
38
+ st.set_page_config(
39
+ page_title="VideoVerifier β€” MHMisinfo",
40
+ page_icon="πŸ”¬",
41
+ layout="wide",
42
+ initial_sidebar_state="expanded",
43
+ )
44
+
45
+ st.markdown("""
46
+ <style>
47
+ /* ── Google Fonts ── */
48
+ @import url('https://fonts.googleapis.com/css2?family=DM+Mono:wght@400;500&family=Syne:wght@400;600;700;800&family=IBM+Plex+Sans:wght@300;400;500&display=swap');
49
+
50
+ /* ── Root palette ── */
51
+ :root {
52
+ --bg: #0d0f14;
53
+ --card: #13161e;
54
+ --border: #1e2330;
55
+ --text: #e8eaf0;
56
+ --dim: #5a6070;
57
+ --cyan: #00d4ff;
58
+ --green: #00e5a0;
59
+ --red: #ff4757;
60
+ --amber: #ffb347;
61
+ --purple: #b388ff;
62
+ --blue: #4a8eff;
63
+ }
64
+
65
+ /* ── App shell ── */
66
+ html, body, [class*="css"] {
67
+ background-color: var(--bg) !important;
68
+ color: var(--text) !important;
69
+ font-family: 'IBM Plex Sans', sans-serif !important;
70
+ }
71
+
72
+ .stApp { background: var(--bg) !important; }
73
+
74
+ /* ── Hide Streamlit chrome ── */
75
+ #MainMenu, footer, header { visibility: hidden; }
76
+ .block-container { padding: 1.5rem 2rem !important; max-width: 1400px; }
77
+
78
+ /* ── Sidebar ── */
79
+ section[data-testid="stSidebar"] {
80
+ background: var(--card) !important;
81
+ border-right: 1px solid var(--border) !important;
82
+ }
83
+ section[data-testid="stSidebar"] * { color: var(--text) !important; }
84
+
85
+ /* ── Inputs ── */
86
+ input, textarea, select, .stTextInput input {
87
+ background: #1a1d27 !important;
88
+ border: 1px solid var(--border) !important;
89
+ color: var(--text) !important;
90
+ border-radius: 8px !important;
91
+ font-family: 'DM Mono', monospace !important;
92
+ font-size: 0.88rem !important;
93
+ }
94
+ input:focus, textarea:focus {
95
+ border-color: var(--cyan) !important;
96
+ box-shadow: 0 0 0 2px rgba(0,212,255,0.15) !important;
97
+ }
98
+
99
+ /* ── Buttons ── */
100
+ .stButton > button {
101
+ background: linear-gradient(135deg, #00d4ff22, #4a8eff22) !important;
102
+ border: 1px solid var(--cyan) !important;
103
+ color: var(--cyan) !important;
104
+ border-radius: 8px !important;
105
+ font-family: 'DM Mono', monospace !important;
106
+ font-size: 0.85rem !important;
107
+ letter-spacing: 0.05em !important;
108
+ padding: 0.45rem 1.2rem !important;
109
+ transition: all 0.2s ease !important;
110
+ }
111
+ .stButton > button:hover {
112
+ background: linear-gradient(135deg, #00d4ff44, #4a8eff33) !important;
113
+ box-shadow: 0 0 16px rgba(0,212,255,0.25) !important;
114
+ transform: translateY(-1px) !important;
115
+ }
116
+ .stButton > button[kind="primary"] {
117
+ background: linear-gradient(135deg, var(--cyan), var(--blue)) !important;
118
+ border: none !important;
119
+ color: var(--bg) !important;
120
+ font-weight: 600 !important;
121
+ }
122
+
123
+ /* ── Cards ── */
124
+ .vv-card {
125
+ background: var(--card);
126
+ border: 1px solid var(--border);
127
+ border-radius: 12px;
128
+ padding: 1.2rem 1.4rem;
129
+ margin-bottom: 1rem;
130
+ }
131
+ .vv-card-accent {
132
+ background: var(--card);
133
+ border-top: 2px solid var(--cyan);
134
+ border-left: 1px solid var(--border);
135
+ border-right: 1px solid var(--border);
136
+ border-bottom: 1px solid var(--border);
137
+ border-radius: 0 0 12px 12px;
138
+ padding: 1.2rem 1.4rem;
139
+ margin-bottom: 1rem;
140
+ }
141
+
142
+ /* ── Section headers ── */
143
+ .vv-section-title {
144
+ font-family: 'Syne', sans-serif;
145
+ font-size: 0.7rem;
146
+ font-weight: 700;
147
+ letter-spacing: 0.18em;
148
+ text-transform: uppercase;
149
+ color: var(--dim);
150
+ margin-bottom: 0.6rem;
151
+ }
152
+
153
+ /* ── Hero title ── */
154
+ .vv-hero {
155
+ font-family: 'Syne', sans-serif;
156
+ font-size: 1.6rem;
157
+ font-weight: 800;
158
+ background: linear-gradient(135deg, var(--cyan), var(--blue));
159
+ -webkit-background-clip: text;
160
+ -webkit-text-fill-color: transparent;
161
+ background-clip: text;
162
+ letter-spacing: -0.02em;
163
+ line-height: 1.2;
164
+ margin: 0 0 0.2rem;
165
+ }
166
+
167
+ /* ── Stat chips ── */
168
+ .vv-stat {
169
+ display: inline-block;
170
+ background: #1a1d27;
171
+ border: 1px solid var(--border);
172
+ border-radius: 6px;
173
+ padding: 0.25rem 0.7rem;
174
+ font-family: 'DM Mono', monospace;
175
+ font-size: 0.78rem;
176
+ color: var(--cyan);
177
+ margin: 0.15rem 0.2rem 0.15rem 0;
178
+ }
179
+
180
+ /* ── Badge ── */
181
+ .vv-badge-green {
182
+ display: inline-block;
183
+ background: rgba(0,229,160,0.12);
184
+ border: 1px solid var(--green);
185
+ color: var(--green);
186
+ border-radius: 20px;
187
+ padding: 0.2rem 0.8rem;
188
+ font-size: 0.78rem;
189
+ font-family: 'DM Mono', monospace;
190
+ }
191
+ .vv-badge-red {
192
+ display: inline-block;
193
+ background: rgba(255,71,87,0.12);
194
+ border: 1px solid var(--red);
195
+ color: var(--red);
196
+ border-radius: 20px;
197
+ padding: 0.2rem 0.8rem;
198
+ font-size: 0.78rem;
199
+ font-family: 'DM Mono', monospace;
200
+ }
201
+ .vv-badge-amber {
202
+ display: inline-block;
203
+ background: rgba(255,179,71,0.12);
204
+ border: 1px solid var(--amber);
205
+ color: var(--amber);
206
+ border-radius: 20px;
207
+ padding: 0.2rem 0.8rem;
208
+ font-size: 0.78rem;
209
+ font-family: 'DM Mono', monospace;
210
+ }
211
+
212
+ /* ── Reasoning box ── */
213
+ .vv-reasoning {
214
+ background: #0d1119;
215
+ border-left: 3px solid var(--amber);
216
+ padding: 0.7rem 1rem;
217
+ border-radius: 0 8px 8px 0;
218
+ font-size: 0.83rem;
219
+ color: #c0c4cc;
220
+ line-height: 1.6;
221
+ font-family: 'IBM Plex Sans', sans-serif;
222
+ margin-top: 0.6rem;
223
+ }
224
+
225
+ /* ── Dataframe ── */
226
+ .stDataFrame {
227
+ background: var(--card) !important;
228
+ border: 1px solid var(--border) !important;
229
+ border-radius: 8px !important;
230
+ }
231
+ .stDataFrame th {
232
+ background: #1a1d27 !important;
233
+ color: var(--cyan) !important;
234
+ font-family: 'DM Mono', monospace !important;
235
+ font-size: 0.78rem !important;
236
+ }
237
+ .stDataFrame td {
238
+ color: var(--text) !important;
239
+ font-size: 0.8rem !important;
240
+ border-color: var(--border) !important;
241
+ }
242
+
243
+ /* ── Tabs ── */
244
+ .stTabs [data-baseweb="tab-list"] {
245
+ background: transparent !important;
246
+ border-bottom: 1px solid var(--border) !important;
247
+ gap: 0 !important;
248
+ }
249
+ .stTabs [data-baseweb="tab"] {
250
+ background: transparent !important;
251
+ color: var(--dim) !important;
252
+ font-family: 'DM Mono', monospace !important;
253
+ font-size: 0.82rem !important;
254
+ letter-spacing: 0.05em !important;
255
+ border: none !important;
256
+ padding: 0.5rem 1.2rem !important;
257
+ }
258
+ .stTabs [aria-selected="true"] {
259
+ color: var(--cyan) !important;
260
+ border-bottom: 2px solid var(--cyan) !important;
261
+ }
262
+
263
+ /* ── Spinner ── */
264
+ .stSpinner > div { border-top-color: var(--cyan) !important; }
265
+
266
+ /* ── Alerts ── */
267
+ .stAlert { border-radius: 8px !important; font-size: 0.85rem !important; }
268
+
269
+ /* ── Divider ── */
270
+ hr { border-color: var(--border) !important; }
271
+
272
+ /* ── Select box ── */
273
+ .stSelectbox > div > div {
274
+ background: #1a1d27 !important;
275
+ border-color: var(--border) !important;
276
+ color: var(--text) !important;
277
+ }
278
+
279
+ /* ── File uploader ── */
280
+ .stFileUploader {
281
+ background: #1a1d27 !important;
282
+ border: 1px dashed var(--border) !important;
283
+ border-radius: 8px !important;
284
+ }
285
+
286
+ /* ── Progress bar ── */
287
+ .stProgress > div > div > div {
288
+ background: linear-gradient(90deg, var(--cyan), var(--blue)) !important;
289
+ }
290
+
291
+ /* ── Number input ── */
292
+ .stNumberInput input {
293
+ background: #1a1d27 !important;
294
+ border-color: var(--border) !important;
295
+ }
296
+
297
+ /* ── Expander ── */
298
+ .streamlit-expanderHeader {
299
+ background: var(--card) !important;
300
+ border-color: var(--border) !important;
301
+ color: var(--text) !important;
302
+ font-family: 'DM Mono', monospace !important;
303
+ font-size: 0.85rem !important;
304
+ }
305
+ </style>
306
+ """, unsafe_allow_html=True)
307
+
308
+
309
+ # ══════════════════════════════════════════════════════════════════════════════
310
+ # SESSION STATE HELPERS
311
+ # ══════════════════════════════════════════════════════════════════════════════
312
+
313
+ def init_state():
314
+ defaults = {
315
+ "metadata": None,
316
+ "transcript": "",
317
+ "comments_df": pd.DataFrame(),
318
+ "sentiments": [],
319
+ "sent_summary": {},
320
+ "misinfo": None,
321
+ "keywords": [],
322
+ "pos_kw": [],
323
+ "neg_kw": [],
324
+ "video_id": None,
325
+ "analysed": False,
326
+ "status_log": [],
327
+ }
328
+ for k, v in defaults.items():
329
+ if k not in st.session_state:
330
+ st.session_state[k] = v
331
+
332
+ init_state()
333
+
334
+
335
+ # ══════════════════════════════════════════════════════════════════════════════
336
+ # SIDEBAR
337
+ # ══════════════════════════════════════════════════════════════════════════════
338
+
339
+ with st.sidebar:
340
+ st.markdown('<p class="vv-hero" style="font-size:1.1rem">πŸ”¬ VideoVerifier</p>', unsafe_allow_html=True)
341
+ st.markdown('<p style="color:#5a6070;font-size:0.78rem;font-family:\'DM Mono\',monospace;margin-top:-8px">Mental Health Misinfo Detector</p>', unsafe_allow_html=True)
342
+ st.markdown("---")
343
+
344
+ st.markdown('<p class="vv-section-title">βš™οΈ Configuration</p>', unsafe_allow_html=True)
345
+
346
+ api_key = st.text_input(
347
+ "YouTube API v3 Key",
348
+ value=os.environ.get("YT_API_KEY", ""),
349
+ type="password",
350
+ placeholder="AIza...",
351
+ help="Get a free key at console.cloud.google.com",
352
+ )
353
+
354
+ sentiment_method = st.selectbox(
355
+ "Sentiment Engine",
356
+ ["vader", "hf"],
357
+ format_func=lambda x: "VADER (fast, CPU)" if x == "vader" else "DistilBERT (accurate, ~500MB)",
358
+ help="VADER is ~100Γ— faster and works offline. DistilBERT downloads ~500MB on first run.",
359
+ )
360
+
361
+ max_comments = st.number_input(
362
+ "Max comments to fetch",
363
+ min_value=10, max_value=500, value=150, step=10,
364
+ help="YouTube API quota: ~1 unit per comment request",
365
+ )
366
+
367
+ st.markdown("---")
368
+ st.markdown('<p class="vv-section-title">πŸ“‹ About</p>', unsafe_allow_html=True)
369
+ st.markdown(
370
+ '<p style="font-size:0.78rem;color:#5a6070;line-height:1.6">'
371
+ '4-stream SeTa-Attention model for mental health misinformation detection. '
372
+ 'Plug your <code style="background:#1a1d27;padding:1px 4px;border-radius:3px;color:#00d4ff">detect_misinformation()</code> '
373
+ 'function in <b>analyzer.py</b> to connect your trained checkpoint.'
374
+ '</p>',
375
+ unsafe_allow_html=True,
376
+ )
377
+
378
+ if st.session_state.status_log:
379
+ st.markdown("---")
380
+ st.markdown('<p class="vv-section-title">πŸ“œ Log</p>', unsafe_allow_html=True)
381
+ for msg in st.session_state.status_log[-6:]:
382
+ st.markdown(f'<p style="font-size:0.72rem;color:#5a6070;font-family:\'DM Mono\',monospace;margin:2px 0">{msg}</p>', unsafe_allow_html=True)
383
+
384
+
385
+ # ══════════════════════════════════════════════════════════════════════════════
386
+ # HEADER
387
+ # ══════════════════════════════════════════════════════════════════════════════
388
+
389
+ st.markdown(
390
+ '<h1 class="vv-hero" style="font-size:2rem">Video Verifier & Sentiment Analyzer</h1>'
391
+ '<p style="color:#5a6070;font-size:0.85rem;margin-top:-4px;font-family:\'DM Mono\',monospace">'
392
+ 'Detect mental health misinformation Β· Analyze public sentiment Β· Understand video content at a glance'
393
+ '</p>',
394
+ unsafe_allow_html=True,
395
+ )
396
+ st.markdown("---")
397
+
398
+
399
+ # ══════════════════════════════════════════════════════════════════════════════
400
+ # INPUT SECTION
401
+ # ══════════════════════════════════════════════════════════════════════════════
402
+
403
+ input_tab1, input_tab2 = st.tabs(["πŸ”— YouTube URL", "πŸ“ Upload Video File"])
404
+
405
+ video_id_to_analyze = None
406
+
407
+ with input_tab1:
408
+ col_url, col_btn = st.columns([5, 1])
409
+ with col_url:
410
+ yt_url = st.text_input(
411
+ "YouTube URL",
412
+ placeholder="https://www.youtube.com/watch?v=... or youtu.be/...",
413
+ label_visibility="collapsed",
414
  )
415
+ with col_btn:
416
+ analyze_url_btn = st.button("πŸ” Analyze", type="primary", use_container_width=True)
417
+
418
+ if analyze_url_btn and yt_url:
419
+ vid = extract_video_id(yt_url)
420
+ if vid:
421
+ video_id_to_analyze = vid
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
422
  else:
423
+ st.error("❌ Could not extract a valid YouTube video ID. Check the URL format.")
424
+
425
+ with input_tab2:
426
+ st.markdown(
427
+ '<div class="vv-card">'
428
+ '<p class="vv-section-title">Upload a video file</p>'
429
+ '<p style="font-size:0.82rem;color:#5a6070;line-height:1.6">'
430
+ '⚠️ <b>Important:</b> The YouTube Data API cannot search by raw video bytes. '
431
+ 'After uploading, enter the video title or a keyword to find the matching YouTube entry. '
432
+ 'For local-only analysis, the system will run misinformation detection on the filename metadata.'
433
+ '</p></div>',
434
+ unsafe_allow_html=True,
435
+ )
436
+ uploaded = st.file_uploader(
437
+ "Drop a video file",
438
+ type=["mp4", "mov", "avi", "mkv", "webm"],
439
+ label_visibility="collapsed",
440
+ )
441
+ if uploaded:
442
+ col_kw, col_search = st.columns([4, 1])
443
+ with col_kw:
444
+ kw = st.text_input(
445
+ "Video title / keyword to search on YouTube",
446
+ placeholder=f"e.g. {uploaded.name.replace('.mp4','').replace('_',' ')}",
447
+ )
448
+ with col_search:
449
+ search_btn = st.button("πŸ”Ž Find on YT", use_container_width=True)
450
+
451
+ if search_btn and kw and api_key:
452
+ with st.spinner("Searching YouTube…"):
453
+ results = search_videos_by_title(kw, api_key, max_results=5)
454
+ if results:
455
+ st.markdown('<p class="vv-section-title">Select the matching video</p>', unsafe_allow_html=True)
456
+ for r in results:
457
+ c1, c2, c3 = st.columns([1, 4, 1])
458
+ with c1:
459
+ if r["thumbnail_url"]:
460
+ st.image(r["thumbnail_url"], width=80)
461
+ with c2:
462
+ st.markdown(
463
+ f'<p style="margin:0;font-size:0.85rem;font-weight:500">{r["title"]}</p>'
464
+ f'<p style="margin:0;font-size:0.75rem;color:#5a6070">{r["channel_title"]} Β· {r["published_at"]}</p>',
465
+ unsafe_allow_html=True,
466
+ )
467
+ with c3:
468
+ if st.button("Select", key=f"sel_{r['video_id']}"):
469
+ video_id_to_analyze = r["video_id"]
470
+ else:
471
+ st.warning("No results found. Try a different keyword or check your API key.")
472
+ elif search_btn and not api_key:
473
+ st.error("Please enter your YouTube API key in the sidebar first.")
474
+
475
+
476
+ # ══════════════════════════════════════════════════════════════════════════════
477
+ # DATA FETCHING & ANALYSIS PIPELINE
478
+ # ══════════════════════════════════════════════════════════════════════════════
479
+
480
+ def run_full_pipeline(video_id: str):
481
+ log = []
482
+
483
+ # 1. Metadata
484
+ with st.spinner("Fetching video metadata…"):
485
+ meta, err = fetch_video_metadata(video_id, api_key)
486
+ if err:
487
+ st.error(f"❌ {err}")
488
+ return
489
+ log.append(f"βœ… Metadata: {meta['title'][:50]}")
490
+ st.session_state.metadata = meta
491
+
492
+ # 2. Transcript
493
+ with st.spinner("Fetching transcript…"):
494
+ transcript, t_status = fetch_transcript(video_id)
495
+ log.append(t_status)
496
+ st.session_state.transcript = transcript
497
+
498
+ # 3. Comments
499
+ with st.spinner(f"Fetching up to {max_comments} comments…"):
500
+ comments_df, c_status = fetch_comments(video_id, api_key, max_comments=int(max_comments))
501
+ log.append(c_status)
502
+ st.session_state.comments_df = comments_df
503
+
504
+ # 4. Misinformation
505
+ with st.spinner("Running misinformation detection…"):
506
+ misinfo = detect_misinformation(
507
+ text=f"{meta['title']} {meta['description']}",
508
+ tags=meta["tags"],
509
+ audio_transcript=transcript,
510
+ video_transcript=transcript,
511
  )
512
+ log.append(f"πŸ”¬ Misinfo score: {misinfo['confidence_pct']}%")
513
+ st.session_state.misinfo = misinfo
514
+
515
+ # 5. Keywords
516
+ kw = extract_keywords(f"{meta['title']} {meta['description']} {transcript}", meta["tags"])
517
+ st.session_state.keywords = kw
518
+
519
+ # 6. Sentiment
520
+ if not comments_df.empty:
521
+ texts = comments_df["text"].fillna("").tolist()
522
+ with st.spinner(f"Analyzing sentiment of {len(texts)} comments ({sentiment_method.upper()})…"):
523
+ progress = st.progress(0, text="Sentiment analysis…")
524
+ batch_size = 64
525
+ results = []
526
+ for i in range(0, len(texts), batch_size):
527
+ chunk = texts[i: i + batch_size]
528
+ results += analyze_sentiment_batch(chunk, method=sentiment_method, batch_size=batch_size)
529
+ progress.progress(min((i + batch_size) / len(texts), 1.0),
530
+ text=f"Analyzed {min(i+batch_size, len(texts))}/{len(texts)} comments…")
531
+ progress.empty()
532
+ st.session_state.sentiments = results
533
+ st.session_state.sent_summary = sentiment_summary(results)
534
+ pos_kw, neg_kw = sentiment_weighted_keywords(comments_df, results)
535
+ st.session_state.pos_kw = pos_kw
536
+ st.session_state.neg_kw = neg_kw
537
+ log.append(f"πŸ’¬ Sentiment: {st.session_state.sent_summary['pos_pct']}% pos / {st.session_state.sent_summary['neg_pct']}% neg")
538
+ else:
539
+ st.session_state.sentiments = []
540
+ st.session_state.sent_summary = {}
541
+ log.append("πŸ’¬ Skipped (no comments)")
542
+
543
+ st.session_state.video_id = video_id
544
+ st.session_state.analysed = True
545
+ st.session_state.status_log = log
546
+ st.rerun()
547
+
548
+
549
+ if video_id_to_analyze and api_key:
550
+ run_full_pipeline(video_id_to_analyze)
551
+ elif video_id_to_analyze and not api_key:
552
+ st.error("⚠️ Please enter your YouTube API key in the sidebar before analyzing.")
553
+
554
+
555
+ # ══════════════════════════════════════════════════════════════════════════════
556
+ # RESULTS DASHBOARD
557
+ # ══════════════════════════════════════════════════════════════════════════════
558
+
559
+ if not st.session_state.analysed:
560
+ # Landing state
561
+ st.markdown(
562
+ '<div style="text-align:center;padding:4rem 2rem">'
563
+ '<p style="font-size:3rem">πŸ”¬</p>'
564
+ '<p style="font-family:\'Syne\',sans-serif;font-size:1.1rem;color:#5a6070">'
565
+ 'Paste a YouTube URL above and click <b style="color:#00d4ff">Analyze</b> to begin</p>'
566
+ '<p style="font-size:0.8rem;color:#3a3f50;font-family:\'DM Mono\',monospace">'
567
+ 'Misinformation detection Β· Sentiment analysis Β· Comment insights</p>'
568
+ '</div>',
569
+ unsafe_allow_html=True,
570
+ )
571
+ st.stop()
572
+
573
+ meta = st.session_state.metadata
574
+ transcript = st.session_state.transcript
575
+ comments_df= st.session_state.comments_df
576
+ misinfo = st.session_state.misinfo
577
+ keywords = st.session_state.keywords
578
+ sentiments = st.session_state.sentiments
579
+ sent_sum = st.session_state.sent_summary
580
+ pos_kw = st.session_state.pos_kw
581
+ neg_kw = st.session_state.neg_kw
582
+ video_id = st.session_state.video_id
583
+
584
+
585
+ # ── Layout: left (info) / right (analytics) ───────────────────────────────────
586
+
587
+ left_col, right_col = st.columns([2, 3], gap="large")
588
+
589
+ # ╔══════════════════════════════╗
590
+ # β•‘ LEFT COLUMN β€” Video Info β•‘
591
+ # β•šβ•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•
592
+ with left_col:
593
+
594
+ # Thumbnail + embed
595
+ if meta.get("thumbnail_url"):
596
+ st.image(meta["thumbnail_url"], use_column_width=True)
597
+
598
+ st.markdown(
599
+ f'<a href="https://www.youtube.com/watch?v={video_id}" target="_blank" '
600
+ f'style="display:block;text-align:center;font-family:\'DM Mono\',monospace;'
601
+ f'font-size:0.78rem;color:#5a6070;text-decoration:none;margin:4px 0 12px">β–Ά Open on YouTube</a>',
602
+ unsafe_allow_html=True,
603
+ )
604
 
605
+ # Title & channel
606
+ st.markdown(
607
+ f'<div class="vv-card">'
608
+ f'<p class="vv-section-title">Video</p>'
609
+ f'<p style="font-family:\'Syne\',sans-serif;font-size:1.05rem;font-weight:700;margin:0 0 4px">{meta["title"]}</p>'
610
+ f'<p style="font-size:0.82rem;color:#5a6070;margin:0">by <b style="color:#b0b4c0">{meta["channel_title"]}</b> Β· {meta["published_at"]}</p>'
611
+ f'</div>',
612
+ unsafe_allow_html=True,
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
613
  )
614
 
615
+ # Stats
616
+ st.markdown('<p class="vv-section-title">Metrics</p>', unsafe_allow_html=True)
617
+ s1, s2 = st.columns(2)
618
+ with s1:
619
+ st.markdown(f'<span class="vv-stat">πŸ‘ {meta["view_count"]:,}</span>', unsafe_allow_html=True)
620
+ st.markdown(f'<span class="vv-stat">πŸ‘ {meta["like_count"]:,}</span>', unsafe_allow_html=True)
621
+ with s2:
622
+ st.markdown(f'<span class="vv-stat">πŸ’¬ {meta["comment_count"]:,}</span>', unsafe_allow_html=True)
623
+ st.markdown(f'<span class="vv-stat">⏱ {meta["duration"]}</span>', unsafe_allow_html=True)
624
+
625
+ # Tags
626
+ if meta.get("tags"):
627
+ st.markdown('<p class="vv-section-title" style="margin-top:1rem">Tags</p>', unsafe_allow_html=True)
628
+ tag_html = "".join(
629
+ f'<span style="display:inline-block;background:#1a1d27;border:1px solid #1e2330;border-radius:4px;'
630
+ f'padding:2px 8px;font-family:\'DM Mono\',monospace;font-size:0.7rem;color:#8090a0;margin:2px">'
631
+ f'#{t}</span>'
632
+ for t in meta["tags"][:20]
633
+ )
634
+ st.markdown(tag_html, unsafe_allow_html=True)
635
+
636
+ # Description (collapsed)
637
+ if meta.get("description"):
638
+ with st.expander("πŸ“„ Description", expanded=False):
639
+ st.markdown(
640
+ f'<p style="font-size:0.8rem;color:#8090a0;line-height:1.65;white-space:pre-wrap">'
641
+ f'{meta["description"][:1200]}{"…" if len(meta["description"])>1200 else ""}</p>',
642
+ unsafe_allow_html=True,
643
+ )
644
 
645
+ # Transcript (collapsed)
646
+ with st.expander(f"πŸ“ Transcript ({len(transcript.split()) if transcript else 0} words)", expanded=False):
647
+ if transcript:
648
+ st.markdown(
649
+ f'<p style="font-size:0.78rem;color:#8090a0;line-height:1.65">'
650
+ f'{transcript[:2500]}{"…" if len(transcript)>2500 else ""}</p>',
651
+ unsafe_allow_html=True,
652
+ )
653
+ else:
654
+ st.info("No transcript available for this video.")
655
 
 
 
 
656
 
657
+ # ╔══════════════════════════════╗
658
+ # β•‘ RIGHT COLUMN β€” Analytics β•‘
659
+ # β•šβ•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•
660
+ with right_col:
661
+
662
+ # ── Misinfo verdict ──────────────────────────────────────────────────────
663
+ st.markdown('<p class="vv-section-title">πŸ”¬ Misinformation Analysis</p>', unsafe_allow_html=True)
664
+
665
+ score = misinfo["score"]
666
+ if score < 0.35:
667
+ badge = '<span class="vv-badge-green">βœ… Appears Credible</span>'
668
+ elif score < 0.65:
669
+ badge = '<span class="vv-badge-amber">⚠️ Uncertain / Mixed Signals</span>'
670
+ else:
671
+ badge = '<span class="vv-badge-red">🚨 Likely Misinformation</span>'
672
+
673
+ st.markdown(badge, unsafe_allow_html=True)
674
+
675
+ ga_col, detail_col = st.columns([1, 1])
676
+ with ga_col:
677
+ st.plotly_chart(
678
+ misinfo_gauge(score, "Misinfo Confidence"),
679
+ use_container_width=True,
680
+ config={"displayModeBar": False},
681
+ )
682
+ with detail_col:
683
+ st.plotly_chart(
684
+ stream_trust_bars(misinfo["stream_details"]),
685
+ use_container_width=True,
686
+ config={"displayModeBar": False},
687
+ )
688
+
689
+ st.markdown(
690
+ f'<div class="vv-reasoning">🧠 <b>Reasoning:</b> {misinfo["reasoning"]}</div>',
691
+ unsafe_allow_html=True,
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
692
  )
693
 
694
+ st.markdown("---")
695
 
696
+ # ── Sentiment analytics ──────────────────────────────────────────────────
697
+ st.markdown('<p class="vv-section-title">πŸ’¬ Comment Sentiment</p>', unsafe_allow_html=True)
 
 
 
 
 
 
698
 
699
+ if sent_sum:
700
+ s_col1, s_col2, s_col3 = st.columns(3)
701
+ with s_col1:
702
+ st.markdown(
703
+ f'<div class="vv-card" style="text-align:center">'
704
+ f'<p style="color:#00e5a0;font-family:\'DM Mono\',monospace;font-size:1.6rem;font-weight:700;margin:0">{sent_sum["pos_pct"]}%</p>'
705
+ f'<p style="color:#5a6070;font-size:0.75rem;margin:0">Positive</p></div>',
706
+ unsafe_allow_html=True,
707
+ )
708
+ with s_col2:
709
+ st.markdown(
710
+ f'<div class="vv-card" style="text-align:center">'
711
+ f'<p style="color:#ff4757;font-family:\'DM Mono\',monospace;font-size:1.6rem;font-weight:700;margin:0">{sent_sum["neg_pct"]}%</p>'
712
+ f'<p style="color:#5a6070;font-size:0.75rem;margin:0">Negative</p></div>',
713
+ unsafe_allow_html=True,
714
+ )
715
+ with s_col3:
716
+ st.markdown(
717
+ f'<div class="vv-card" style="text-align:center">'
718
+ f'<p style="color:#5a6070;font-family:\'DM Mono\',monospace;font-size:1.6rem;font-weight:700;margin:0">{sent_sum["neu_pct"]}%</p>'
719
+ f'<p style="color:#5a6070;font-size:0.75rem;margin:0">Neutral</p></div>',
720
+ unsafe_allow_html=True,
721
+ )
722
+
723
+ d_col, t_col = st.columns([1, 1])
724
+ with d_col:
725
+ st.plotly_chart(
726
+ sentiment_donut(sent_sum),
727
+ use_container_width=True,
728
+ config={"displayModeBar": False},
729
+ )
730
+ with t_col:
731
+ st.plotly_chart(
732
+ sentiment_timeline(comments_df, sentiments),
733
+ use_container_width=True,
734
+ config={"displayModeBar": False},
735
+ )
736
+
737
+ # Keyword charts
738
+ kw_col1, kw_col2 = st.columns(2)
739
+ with kw_col1:
740
+ st.plotly_chart(
741
+ keyword_bar(keywords, title="Top Video Keywords", color="#00d4ff"),
742
+ use_container_width=True,
743
+ config={"displayModeBar": False},
744
+ )
745
+ with kw_col2:
746
+ st.plotly_chart(
747
+ keyword_comparison(pos_kw, neg_kw),
748
+ use_container_width=True,
749
+ config={"displayModeBar": False},
750
+ )
751
+
752
+ else:
753
+ st.info("⚠️ No comment sentiment data β€” comments may be disabled or unavailable.")
754
+ if keywords:
755
+ st.plotly_chart(
756
+ keyword_bar(keywords, title="Top Video Keywords", color="#00d4ff"),
757
+ use_container_width=True,
758
+ config={"displayModeBar": False},
759
  )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
760
 
761
+ # ── Comments table ───────────────────────────────────────────────────────
762
+ st.markdown("---")
763
+ st.markdown('<p class="vv-section-title">πŸ“Š Comments Deep-Dive</p>', unsafe_allow_html=True)
764
+
765
+ if not comments_df.empty:
766
+ display_df = comments_df.copy()
767
+ if sentiments:
768
+ display_df["sentiment"] = [s["label"] for s in sentiments]
769
+ display_df["compound"] = [round(s.get("compound", 0), 3) for s in sentiments]
770
+
771
+ tab_all, tab_pos, tab_neg, tab_top = st.tabs([
772
+ f"All ({len(display_df)})",
773
+ f"Positive ({sent_sum.get('POSITIVE',0)})",
774
+ f"Negative ({sent_sum.get('NEGATIVE',0)})",
775
+ "Most Liked",
776
+ ])
777
+
778
+ show_cols = ["author", "text", "likes", "published_at"]
779
+ if "sentiment" in display_df.columns:
780
+ show_cols += ["sentiment", "compound"]
781
+
782
+ with tab_all:
783
+ st.dataframe(display_df[show_cols].head(100), use_container_width=True, height=320)
784
+
785
+ with tab_pos:
786
+ pos_df = display_df[display_df.get("sentiment", pd.Series()) == "POSITIVE"] if "sentiment" in display_df else pd.DataFrame()
787
+ if not pos_df.empty:
788
+ st.dataframe(pos_df[show_cols].head(50), use_container_width=True, height=320)
789
+ else:
790
+ st.info("No positive comments in this dataset.")
791
+
792
+ with tab_neg:
793
+ neg_df = display_df[display_df.get("sentiment", pd.Series()) == "NEGATIVE"] if "sentiment" in display_df else pd.DataFrame()
794
+ if not neg_df.empty:
795
+ st.dataframe(neg_df[show_cols].head(50), use_container_width=True, height=320)
796
+ else:
797
+ st.info("No negative comments in this dataset.")
798
+
799
+ with tab_top:
800
+ top_df = display_df.sort_values("likes", ascending=False).head(20)
801
+ st.dataframe(top_df[show_cols], use_container_width=True, height=320)
802
+
803
+ else:
804
+ st.info("No comments available for this video.")