Abdullah172 commited on
Commit
15440a9
·
verified ·
1 Parent(s): 7370cf3

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +540 -830
app.py CHANGED
@@ -1,885 +1,595 @@
1
  """
2
- app.py — Video Verifier & Sentiment Analyzer (Gradio 6.x)
3
-
4
  """
5
 
6
- import os
 
7
  import pandas as pd
8
- import gradio as gr
9
-
10
- from fetcher import (
11
- extract_video_id,
12
- fetch_video_metadata,
13
- fetch_transcript,
14
- fetch_comments,
15
- search_videos_by_title,
16
- )
17
- from analyzer import (
18
- detect_misinformation,
19
- analyze_sentiment_batch,
20
- sentiment_summary,
21
- extract_keywords,
22
- sentiment_weighted_keywords,
23
- )
24
- from charts import (
25
- sentiment_donut,
26
- keyword_bar,
27
- sentiment_timeline,
28
- keyword_comparison,
29
- modality_misinfo_distribution,
30
- trust_score_by_modality,
31
- uncertainty_analysis,
 
 
 
 
 
 
 
32
  )
33
 
34
 
35
- # CSS — full-viewport dark theme, zero white bleed
36
-
37
-
38
- CSS = """
39
- @import url('https://fonts.googleapis.com/css2?family=DM+Mono:wght@400;500&family=Syne:wght@400;600;700;800&family=IBM+Plex+Sans:wght@300;400;500&display=swap');
40
-
41
- /* Variables*/
42
- :root {
43
- --bg: #0d0f14;
44
- --card: #13161e;
45
- --border: #1e2330;
46
- --text: #e8eaf0;
47
- --dim: #5a6070;
48
- --cyan: #00d4ff;
49
- --green: #00e5a0;
50
- --red: #ff4757;
51
- --amber: #ffb347;
52
- --blue: #4a8eff;
53
- }
54
-
55
- /* Force dark everywhere — prevent white bleed */
56
- html, body {
57
- background: var(--bg) !important;
58
- color: var(--text) !important;
59
- margin: 0; padding: 0;
60
- }
61
- .gradio-container, #root, #app, main, .main, .wrap, .svelte-1kyws56 {
62
- background: var(--bg) !important;
63
- max-width: 100% !important;
64
- width: 100% !important;
65
- margin: 0 auto !important;
66
- padding: 0 1.5rem !important;
67
- box-sizing: border-box !important;
68
- }
69
- /* kill Gradio's default white blocks */
70
- .block, .wrap, .panel, .padded, div.form,
71
- div[class*="block"], div[class*="wrap"],
72
- div[class*="panel"], div[class*="gap"],
73
- .gap { background: transparent !important; border: none !important; }
74
-
75
- /* Cards / Groups ─ */
76
- .gr-group, .gr-box, .vv-section {
77
- background: var(--card) !important;
78
- border: 1px solid var(--border) !important;
79
- border-radius: 12px !important;
80
- padding: 1rem 1.25rem !important;
81
- }
82
-
83
- /* Tabs */
84
- .tab-nav button {
85
- background: transparent !important;
86
- border: none !important;
87
- color: var(--dim) !important;
88
- font-family: 'DM Mono', monospace !important;
89
- font-size: 0.82rem !important;
90
- letter-spacing: 0.05em !important;
91
- border-bottom: 2px solid transparent !important;
92
- padding: 0.5rem 1.2rem !important;
93
- transition: color 0.18s;
94
- }
95
- .tab-nav button.selected {
96
- color: var(--cyan) !important;
97
- border-bottom-color: var(--cyan) !important;
98
- }
99
- .tab-nav { border-bottom: 1px solid var(--border) !important; }
100
-
101
- /* Inputs */
102
- input[type="text"], input[type="password"], input[type="number"], textarea, select {
103
- background: #1a1d27 !important;
104
- border: 1px solid var(--border) !important;
105
- color: var(--text) !important;
106
- border-radius: 8px !important;
107
- font-family: 'DM Mono', monospace !important;
108
- font-size: 0.88rem !important;
109
- }
110
- input:focus, textarea:focus, select:focus {
111
- border-color: var(--cyan) !important;
112
- box-shadow: 0 0 0 2px rgba(0,212,255,0.15) !important;
113
- outline: none !important;
114
- }
115
- label, .gr-label, span.svelte-1b6s6s {
116
- color: var(--dim) !important;
117
- font-family: 'DM Mono', monospace !important;
118
- font-size: 0.75rem !important;
119
- letter-spacing: 0.08em !important;
120
- text-transform: uppercase;
121
- }
122
-
123
- /* Slider */
124
- input[type="range"] { accent-color: var(--cyan); }
125
-
126
- /* Buttons ─ */
127
- button.primary, button[variant="primary"], .primary {
128
- background: linear-gradient(135deg, var(--cyan), var(--blue)) !important;
129
- border: none !important;
130
- color: #0d0f14 !important;
131
- font-weight: 700 !important;
132
- font-family: 'DM Mono', monospace !important;
133
- border-radius: 8px !important;
134
- letter-spacing: 0.06em !important;
135
- }
136
- button.secondary {
137
- background: rgba(0,212,255,0.08) !important;
138
- border: 1px solid var(--cyan) !important;
139
- color: var(--cyan) !important;
140
- border-radius: 8px !important;
141
- font-family: 'DM Mono', monospace !important;
142
- }
143
- button:hover { opacity: 0.88; transform: translateY(-1px); transition: all 0.15s; }
144
-
145
- /* Dropdowns ─ */
146
- .dropdown, ul[role="listbox"], li[role="option"] {
147
- background: #1a1d27 !important;
148
- border-color: var(--border) !important;
149
- color: var(--text) !important;
150
- }
151
- li[role="option"]:hover { background: #242736 !important; }
152
-
153
- /* Dataframe ─ */
154
- .gr-dataframe, table { background: var(--card) !important; }
155
- .gr-dataframe th {
156
- background: #1a1d27 !important;
157
- color: var(--cyan) !important;
158
- font-family: 'DM Mono', monospace !important;
159
- font-size: 0.72rem !important;
160
- padding: 6px 10px;
161
- border-bottom: 1px solid var(--border);
162
- text-transform: uppercase;
163
- letter-spacing: 0.08em;
164
- }
165
- .gr-dataframe td {
166
- color: var(--text) !important;
167
- font-size: 0.77rem !important;
168
- padding: 5px 10px;
169
- border-bottom: 1px solid var(--border);
170
- }
171
- .gr-dataframe tr:hover td { background: rgba(0,212,255,0.04) !important; }
172
-
173
- /* Accordion ─ */
174
- details > summary {
175
- color: var(--dim) !important;
176
- font-family: 'DM Mono', monospace !important;
177
- font-size: 0.82rem !important;
178
- cursor: pointer;
179
- list-style: none;
180
- }
181
- details[open] > summary { color: var(--cyan) !important; }
182
-
183
- /* Plot containers ─ */
184
- .js-plotly-plot, .plotly { background: transparent !important; }
185
- .modebar { display: none !important; }
186
-
187
- /* Scrollbar ─ */
188
- ::-webkit-scrollbar { width: 6px; height: 6px; }
189
- ::-webkit-scrollbar-track { background: var(--bg); }
190
- ::-webkit-scrollbar-thumb { background: var(--border); border-radius: 3px; }
191
- ::-webkit-scrollbar-thumb:hover { background: var(--dim); }
192
-
193
-
194
- /* Shared HTML component classes */
195
-
196
- .vv-hero {
197
- font-family: 'Syne', sans-serif;
198
- font-size: 1.65rem;
199
- font-weight: 800;
200
- background: linear-gradient(135deg, #00d4ff, #4a8eff);
201
- -webkit-background-clip: text;
202
- -webkit-text-fill-color: transparent;
203
- background-clip: text;
204
- letter-spacing: -0.02em;
205
- line-height: 1.2;
206
- }
207
- .vv-section-title {
208
- font-family: 'Syne', sans-serif;
209
- font-size: 0.68rem;
210
- font-weight: 700;
211
- letter-spacing: 0.18em;
212
- text-transform: uppercase;
213
- color: #5a6070;
214
- margin-bottom: 0.5rem;
215
- margin-top: 0;
216
- }
217
- .vv-card {
218
- background: #13161e;
219
- border: 1px solid #1e2330;
220
- border-radius: 12px;
221
- padding: 1.1rem 1.3rem;
222
- margin-bottom: 0.7rem;
223
- }
224
- .vv-stat {
225
- display: inline-block;
226
- background: #1a1d27;
227
- border: 1px solid #1e2330;
228
- border-radius: 6px;
229
- padding: 0.25rem 0.75rem;
230
- font-family: 'DM Mono', monospace;
231
- font-size: 0.77rem;
232
- color: #00d4ff;
233
- margin: 0.15rem 0.2rem;
234
- }
235
- .vv-badge-green {
236
- display: inline-block;
237
- background: rgba(0,229,160,0.12);
238
- border: 1px solid #00e5a0;
239
- color: #00e5a0;
240
- border-radius: 20px;
241
- padding: 0.32rem 1.1rem;
242
- font-size: 0.85rem;
243
- font-family: 'DM Mono', monospace;
244
- font-weight: 600;
245
- }
246
- .vv-badge-red {
247
- display: inline-block;
248
- background: rgba(255,71,87,0.12);
249
- border: 1px solid #ff4757;
250
- color: #ff4757;
251
- border-radius: 20px;
252
- padding: 0.32rem 1.1rem;
253
- font-size: 0.85rem;
254
- font-family: 'DM Mono', monospace;
255
- font-weight: 600;
256
- }
257
- .vv-badge-amber {
258
- display: inline-block;
259
- background: rgba(255,179,71,0.12);
260
- border: 1px solid #ffb347;
261
- color: #ffb347;
262
- border-radius: 20px;
263
- padding: 0.32rem 1.1rem;
264
- font-size: 0.85rem;
265
- font-family: 'DM Mono', monospace;
266
- font-weight: 600;
267
- }
268
- .vv-reasoning {
269
- background: #0d1119;
270
- border-left: 3px solid #ffb347;
271
- padding: 0.8rem 1rem;
272
- border-radius: 0 8px 8px 0;
273
- font-size: 0.83rem;
274
- color: #c0c4cc;
275
- line-height: 1.65;
276
- font-family: 'IBM Plex Sans', sans-serif;
277
- margin-top: 8px;
278
- }
279
- .vv-tag {
280
- display: inline-block;
281
- background: #1a1d27;
282
- border: 1px solid #1e2330;
283
- border-radius: 4px;
284
- padding: 2px 8px;
285
- font-family: 'DM Mono', monospace;
286
- font-size: 0.7rem;
287
- color: #8090a0;
288
- margin: 2px;
289
- }
290
- .vv-stat-big-green { font-family: 'DM Mono', monospace; font-size: 1.6rem; font-weight: 700; color: #00e5a0; margin: 0; }
291
- .vv-stat-big-red { font-family: 'DM Mono', monospace; font-size: 1.6rem; font-weight: 700; color: #ff4757; margin: 0; }
292
- .vv-stat-big-dim { font-family: 'DM Mono', monospace; font-size: 1.6rem; font-weight: 700; color: #5a6070; margin: 0; }
293
- .vv-log-line { font-size: 0.72rem; color: #5a6070; font-family: 'DM Mono', monospace; margin: 2px 0; }
294
- .vv-hr { border: none; border-top: 1px solid #1e2330; margin: 1.1rem 0; }
295
- """
296
 
 
 
 
 
 
 
 
 
 
 
297
 
298
- # HELPERS
 
 
 
 
 
 
 
 
 
299
 
 
300
 
301
- def _empty_plotly(msg: str = "Run analysis to see data", h: int = 230):
302
- import plotly.graph_objects as go
303
- fig = go.Figure()
304
- fig.update_layout(
305
- paper_bgcolor="rgba(0,0,0,0)", plot_bgcolor="rgba(0,0,0,0)",
306
- font=dict(color="#5a6070"), margin=dict(l=10, r=10, t=10, b=10), height=h,
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
307
  )
 
308
  fig.add_annotation(
309
- text=msg, x=0.5, y=0.5, xref="paper", yref="paper",
310
- showarrow=False, font=dict(size=12, color="#5a6070"),
 
 
 
 
311
  )
312
- return fig
313
 
314
-
315
- def _blank_outputs(status_msg: str):
316
- """19-tuple for ALL_OUTPUTS when nothing has run."""
317
- ep = _empty_plotly()
318
- return (
319
- f'<p style="color:#ff4757;font-family:DM Mono,monospace;padding:8px">{status_msg}</p>', # 0 status
320
- "<p class='vv-log-line'>—</p>", # 1 log
321
- "<div style='padding:3rem;text-align:center;color:#5a6070;font-family:DM Mono,monospace'>No data yet.</div>", # 2 left panel
322
- "", "", # 3 badge, 4 reasoning
323
- ep, ep, ep, # 5 modality_dist, 6 trust, 7 uncertainty
324
- ep, ep, ep, ep, # 8 donut, 9 timeline, 10 kw_bar, 11 kw_comp
325
- "", "", "", # 12 stat_pos, 13 stat_neg, 14 stat_neu
326
- pd.DataFrame(), pd.DataFrame(), pd.DataFrame(), pd.DataFrame(), # 15 df_all, 16 df_pos, 17 df_neg, 18 df_top
327
  )
328
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
329
 
330
- # PIPELINE
 
 
 
 
 
 
331
 
 
332
 
333
- def run_pipeline(
334
- url_or_id: str,
335
- sentiment_method: str,
336
- max_comments: int,
337
- progress=gr.Progress(track_tqdm=False),
338
- ):
339
- # Read API key from environment (NEVER from UI)
340
- api_key = os.environ.get("YT_API_KEY", "").strip()
341
 
342
- # Guards
343
- if not (url_or_id or "").strip():
344
- yield _blank_outputs("⚠️ Please enter a YouTube URL or video ID.")
345
- return
346
 
347
- video_id = extract_video_id(url_or_id.strip())
348
- if not video_id:
349
- yield _blank_outputs("❌ Could not parse a valid YouTube video ID.")
350
- return
 
351
 
352
- if not api_key:
353
- yield _blank_outputs(
354
- "⚠️ YouTube API key not found. "
355
- "Set the <code>YT_API_KEY</code> environment variable / Space secret."
356
- )
357
- return
358
-
359
- # 1 — Metadata
360
- progress(0.05, desc="Fetching video metadata…")
361
- meta, err = fetch_video_metadata(video_id, api_key)
362
- if err:
363
- yield _blank_outputs(f"❌ {err}")
364
- return
365
-
366
- # 2 — Transcript
367
- progress(0.20, desc="Fetching transcript…")
368
- transcript, t_status = fetch_transcript(video_id)
369
-
370
- # 3 — Comments
371
- progress(0.35, desc=f"Fetching up to {max_comments} comments…")
372
- comments_df, c_status = fetch_comments(video_id, api_key, max_comments=int(max_comments))
373
-
374
- # 4 — Misinformation
375
- # BUG FIX: previously both audio_transcript and video_transcript were set
376
- # to the same `transcript` variable. When the transcript was empty (no
377
- # captions), ALL three modalities hit the empty-string fallback inside
378
- # _compute_modality_analysis and returned a fixed 50/50 split with
379
- # logit_m = logit_c = 0, trust = 0 %, uncertainty = 100 % — values that
380
- # never changed across videos.
381
- # The fix keeps audio_transcript = spoken transcript (speech stream) and
382
- # video_transcript = spoken transcript too, but detect_misinformation()
383
- # now internally builds the video segment as transcript + title + tags,
384
- # giving all three modalities distinct content and therefore distinct scores.
385
- progress(0.50, desc="Running misinformation detection…")
386
- misinfo = detect_misinformation(
387
- text=f"{meta['title']} {meta['description']}",
388
- tags=meta["tags"],
389
- audio_transcript=transcript, # speech/audio stream
390
- video_transcript=transcript, # enriched inside analyzer with title+tags
391
- )
392
 
393
- # 5 — Keywords
394
- keywords = extract_keywords(
395
- f"{meta['title']} {meta['description']} {transcript}",
396
- meta["tags"],
 
 
397
  )
398
 
399
- # 6 — Sentiment
400
- sentiments, sent_sum, pos_kw, neg_kw = [], {}, [], []
401
-
402
- if not comments_df.empty:
403
- texts = comments_df["text"].fillna("").tolist()
404
- batch = 64
405
- for i in range(0, len(texts), batch):
406
- chunk = texts[i: i + batch]
407
- sentiments += analyze_sentiment_batch(chunk, method=sentiment_method, batch_size=batch)
408
- frac = 0.60 + 0.30 * min((i + batch) / max(len(texts), 1), 1.0)
409
- progress(frac, desc=f"Sentiment {min(i+batch, len(texts))}/{len(texts)}…")
410
-
411
- sent_sum = sentiment_summary(sentiments)
412
- pos_kw, neg_kw = sentiment_weighted_keywords(comments_df, sentiments)
413
-
414
- # 7 — Build outputs
415
- progress(0.97, desc="Building charts…")
416
- yield _build_outputs(
417
- meta=meta, video_id=video_id, transcript=transcript,
418
- comments_df=comments_df, misinfo=misinfo, keywords=keywords,
419
- sentiments=sentiments, sent_sum=sent_sum,
420
- pos_kw=pos_kw, neg_kw=neg_kw,
421
- status_log=[
422
- f"✅ Metadata: {meta['title'][:55]}",
423
- t_status,
424
- c_status,
425
- f"🔬 Misinfo score: {misinfo['confidence_pct']}%",
426
- *(
427
- [f"💬 Sentiment: {sent_sum['pos_pct']}% pos / {sent_sum['neg_pct']}% neg"]
428
- if sent_sum
429
- else ["💬 No comments — sentiment skipped"]
430
- ),
431
- ],
432
- )
433
 
 
 
434
 
435
- # OUTPUT BUILDER
 
 
 
 
 
 
 
 
436
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
437
 
438
- def _build_outputs(
439
- meta, video_id, transcript, comments_df,
440
- misinfo, keywords, sentiments, sent_sum, pos_kw, neg_kw, status_log,
441
- ):
442
- # Status
443
- status_html = (
444
- '<p style="color:#00e5a0;font-family:DM Mono,monospace;font-size:0.82rem;padding:6px 0">'
445
- "✅ Analysis complete</p>"
 
446
  )
447
 
448
- # Log
449
- log_html = "".join(f'<p class="vv-log-line">{line}</p>' for line in status_log)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
450
 
451
- # Left panel
452
- thumb_html = (
453
- f'<img src="{meta["thumbnail_url"]}" '
454
- 'style="width:100%;border-radius:8px;margin-bottom:8px;display:block">'
455
- if meta.get("thumbnail_url") else ""
 
456
  )
457
- tag_html = "".join(f'<span class="vv-tag">#{t}</span>' for t in meta.get("tags", [])[:20])
458
- desc_short = meta.get("description", "")[:1200]
459
- word_count = len(transcript.split()) if transcript else 0
460
- transcript_short = (transcript[:2500] + "…" if len(transcript) > 2500 else transcript) if transcript else "(not available)"
461
-
462
- left_html = f"""
463
- {thumb_html}
464
- <a href="https://www.youtube.com/watch?v={video_id}" target="_blank"
465
- style="display:block;text-align:center;font-family:'DM Mono',monospace;
466
- font-size:0.75rem;color:#5a6070;text-decoration:none;margin:4px 0 10px">
467
- ▶ Open on YouTube
468
- </a>
469
- <div class="vv-card">
470
- <p class="vv-section-title">Video</p>
471
- <p style="font-family:'Syne',sans-serif;font-size:1.05rem;font-weight:700;margin:0 0 4px;color:#e8eaf0">
472
- {meta['title']}
473
- </p>
474
- <p style="font-size:0.82rem;color:#5a6070;margin:0">
475
- by <b style="color:#b0b4c0">{meta['channel_title']}</b> · {meta['published_at']}
476
- </p>
477
- </div>
478
-
479
- <p class="vv-section-title">Metrics</p>
480
- <span class="vv-stat">👁 {meta['view_count']:,}</span>
481
- <span class="vv-stat">👍 {meta['like_count']:,}</span>
482
- <span class="vv-stat">💬 {meta['comment_count']:,}</span>
483
- <span class="vv-stat">⏱ {meta['duration']}</span>
484
-
485
- <p class="vv-section-title" style="margin-top:1rem">Tags</p>
486
- {tag_html or '<span style="color:#5a6070;font-size:0.78rem">(none)</span>'}
487
-
488
- <details style="margin-top:1rem">
489
- <summary>📄 Description</summary>
490
- <p style="font-size:0.78rem;color:#8090a0;line-height:1.65;white-space:pre-wrap;margin-top:6px">{desc_short}</p>
491
- </details>
492
- <details style="margin-top:0.5rem">
493
- <summary>📝 Transcript ({word_count} words)</summary>
494
- <p style="font-size:0.75rem;color:#8090a0;line-height:1.65;margin-top:6px">{transcript_short}</p>
495
- </details>
496
- """
497
-
498
- # Misinfo badge
499
- score = misinfo["score"]
500
- if score < 0.35:
501
- badge_html = '<span class="vv-badge-green">✅ Appears Credible</span>'
502
- elif score < 0.65:
503
- badge_html = '<span class="vv-badge-amber">⚠️ Uncertain / Mixed Signals</span>'
504
- else:
505
- badge_html = '<span class="vv-badge-red">🚨 Likely Misinformation</span>'
506
 
507
- reasoning_html = (
508
- f'<div class="vv-reasoning">🧠 <b>Reasoning:</b> {misinfo["reasoning"]}</div>'
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
509
  )
510
 
511
- # Three new modality charts — derived from model logit/softmax/entropy
512
- mod_analysis = misinfo.get("modality_analysis", {})
513
-
514
- try:
515
- fig_mod_dist = modality_misinfo_distribution(mod_analysis)
516
- except Exception:
517
- fig_mod_dist = _empty_plotly("Modality distribution unavailable")
518
-
519
- try:
520
- fig_trust = trust_score_by_modality(mod_analysis)
521
- except Exception:
522
- fig_trust = _empty_plotly("Trust score unavailable")
523
-
524
- try:
525
- fig_uncert = uncertainty_analysis(mod_analysis)
526
- except Exception:
527
- fig_uncert = _empty_plotly("Uncertainty analysis unavailable")
528
-
529
- # Sentiment charts (unchanged)
530
- try:
531
- fig_donut = sentiment_donut(sent_sum) if sent_sum else _empty_plotly("No comments analysed")
532
- except Exception:
533
- fig_donut = _empty_plotly()
534
-
535
- try:
536
- fig_timeline = (
537
- sentiment_timeline(comments_df, sentiments)
538
- if (sent_sum and not comments_df.empty)
539
- else _empty_plotly("No comments analysed")
540
- )
541
- except Exception:
542
- fig_timeline = _empty_plotly()
543
-
544
- try:
545
- fig_kw = keyword_bar(keywords, title="Top Video Keywords", color="#00d4ff")
546
- except Exception:
547
- fig_kw = _empty_plotly()
548
-
549
- try:
550
- fig_kw_comp = (
551
- keyword_comparison(pos_kw, neg_kw)
552
- if (pos_kw or neg_kw)
553
- else _empty_plotly("No keyword comparison — no comments")
554
- )
555
- except Exception:
556
- fig_kw_comp = _empty_plotly()
557
-
558
- # Sentiment stat boxes (unchanged)
559
- if sent_sum:
560
- stat_pos = (
561
- f'<div class="vv-card" style="text-align:center">'
562
- f'<p class="vv-stat-big-green">{sent_sum["pos_pct"]}%</p>'
563
- f'<p style="color:#5a6070;font-size:0.75rem;margin:4px 0 0">Positive</p></div>'
564
- )
565
- stat_neg = (
566
- f'<div class="vv-card" style="text-align:center">'
567
- f'<p class="vv-stat-big-red">{sent_sum["neg_pct"]}%</p>'
568
- f'<p style="color:#5a6070;font-size:0.75rem;margin:4px 0 0">Negative</p></div>'
569
- )
570
- stat_neu = (
571
- f'<div class="vv-card" style="text-align:center">'
572
- f'<p class="vv-stat-big-dim">{sent_sum["neu_pct"]}%</p>'
573
- f'<p style="color:#5a6070;font-size:0.75rem;margin:4px 0 0">Neutral</p></div>'
574
- )
575
- else:
576
- placeholder = (
577
- '<div class="vv-card" style="text-align:center;color:#5a6070;'
578
- 'font-family:DM Mono,monospace;font-size:0.8rem;padding:1.2rem">N/A</div>'
579
- )
580
- stat_pos = stat_neg = stat_neu = placeholder
581
-
582
- # Comment DataFrames (unchanged)
583
- show_cols = ["author", "text", "likes", "published_at"]
584
- df_all = df_pos = df_neg = df_top = pd.DataFrame()
585
-
586
- if not comments_df.empty:
587
- display_df = comments_df.copy()
588
- if sentiments:
589
- display_df["sentiment"] = [s["label"] for s in sentiments]
590
- display_df["compound"] = [round(s.get("compound", 0), 3) for s in sentiments]
591
- cols = show_cols + ["sentiment", "compound"]
592
- else:
593
- cols = show_cols
594
-
595
- df_all = display_df[cols].head(100).reset_index(drop=True)
596
- df_top = (
597
- display_df.sort_values("likes", ascending=False)
598
- .head(20)[cols]
599
- .reset_index(drop=True)
600
- )
601
- if "sentiment" in display_df.columns:
602
- df_pos = display_df[display_df["sentiment"] == "POSITIVE"][cols].head(50).reset_index(drop=True)
603
- df_neg = display_df[display_df["sentiment"] == "NEGATIVE"][cols].head(50).reset_index(drop=True)
604
-
605
- return (
606
- status_html, # 0 status_box
607
- log_html, # 1 log_html_out
608
- left_html, # 2 left_panel_html
609
- badge_html, # 3 misinfo_badge_html
610
- reasoning_html, # 4 misinfo_reasoning_html
611
- fig_mod_dist, # 5 modality_dist_plot
612
- fig_trust, # 6 trust_score_plot
613
- fig_uncert, # 7 uncertainty_plot
614
- fig_donut, # 8 donut_plot
615
- fig_timeline, # 9 timeline_plot
616
- fig_kw, # 10 kw_bar_plot
617
- fig_kw_comp, # 11 kw_comp_plot
618
- stat_pos, # 12 stat_pos_html
619
- stat_neg, # 13 stat_neg_html
620
- stat_neu, # 14 stat_neu_html
621
- df_all, # 15 df_all_out
622
- df_pos, # 16 df_pos_out
623
- df_neg, # 17 df_neg_out
624
- df_top, # 18 df_top_out
625
  )
626
 
 
 
 
 
 
 
 
627
 
628
- # UPLOAD / SEARCH HELPERS
629
 
630
 
631
- def do_search(keyword: str):
632
- api_key = os.environ.get("YT_API_KEY", "").strip()
633
- if not api_key:
634
- return (
635
- "<p style='color:#ff4757;font-family:DM Mono,monospace'>⚠️ YT_API_KEY secret not set.</p>",
636
- gr.update(choices=[], value=None, visible=False),
637
- )
638
- if not (keyword or "").strip():
639
- return (
640
- "<p style='color:#ffb347;font-family:DM Mono,monospace'>Enter a keyword to search.</p>",
641
- gr.update(choices=[], value=None, visible=False),
642
- )
643
 
644
- results = search_videos_by_title(keyword.strip(), api_key, max_results=5)
645
- if not results:
646
- return (
647
- "<p style='color:#ffb347;font-family:DM Mono,monospace'>No results found.</p>",
648
- gr.update(choices=[], value=None, visible=False),
649
- )
650
 
651
- html = ""
652
- choices = []
653
- for r in results:
654
- vid = r["video_id"]
655
- url = f"https://www.youtube.com/watch?v={vid}"
656
- choices.append((r["title"][:70], url))
657
- html += (
658
- f'<div class="vv-card" style="display:flex;align-items:center;gap:12px;margin-bottom:6px">'
659
- f'<img src="{r["thumbnail_url"]}" '
660
- f' style="width:72px;height:54px;object-fit:cover;border-radius:6px;flex-shrink:0">'
661
- f'<div>'
662
- f'<p style="margin:0;font-size:0.85rem;font-weight:600;color:#e8eaf0">{r["title"][:80]}</p>'
663
- f'<p style="margin:0;font-size:0.75rem;color:#5a6070">'
664
- f'{r["channel_title"]} · {r["published_at"]} · '
665
- f'<code style="color:#00d4ff">v={vid}</code></p>'
666
- f'</div></div>'
667
- )
668
- return html, gr.update(choices=choices, value=None, visible=True)
669
-
670
-
671
- def pick_and_analyze(selected_url, sentiment_method, max_comments):
672
- if not selected_url:
673
- yield _blank_outputs("Select a video from the search results above.")
674
- return
675
- yield from run_pipeline(selected_url, sentiment_method, max_comments)
676
-
677
-
678
- # GRADIO BLOCKS UI
679
-
680
-
681
- with gr.Blocks(title="VideoVerifier — MHMisinfo") as demo:
682
-
683
- # Header
684
- gr.HTML("""
685
- <div style="padding:1.5rem 0 0.8rem;border-bottom:1px solid #1e2330;margin-bottom:1.2rem">
686
- <h1 class="vv-hero">🔬 Video Verifier & Sentiment Analyzer</h1>
687
- <p style="color:#5a6070;font-size:0.85rem;margin-top:4px;font-family:'DM Mono',monospace">
688
- mental health misinformation detection
689
- </p>
690
- </div>
691
- """)
692
-
693
- # Settings — NO API key field
694
- with gr.Accordion("⚙️ Settings", open=False):
695
- gr.HTML("""
696
- <div style="background:#0d1119;border:1px solid #1e2330;border-radius:8px;
697
- padding:0.7rem 1rem;margin-bottom:0.8rem;font-family:'DM Mono',monospace;
698
- font-size:0.78rem;color:#5a6070">
699
- 🔑 YouTube API key is read from the <code style="color:#00d4ff">YT_API_KEY</code>
700
- Space secret — it is never exposed in the UI.
701
- </div>
702
- """)
703
- with gr.Row():
704
- sentiment_selector = gr.Dropdown(
705
- choices=[
706
- ("VADER — fast, CPU-only (~5 000 comments/sec)", "vader"),
707
- ("DistilBERT — accurate, downloads ~500 MB on first run", "hf"),
708
  ],
709
- value="vader",
710
- label="Sentiment Engine",
711
- scale=3,
712
- )
713
- max_comments_slider = gr.Slider(
714
- minimum=10, maximum=500, value=150, step=10,
715
- label="Max comments to fetch",
716
- scale=3,
717
- info="YouTube API quota: ~1 unit per comment request",
718
- )
719
-
720
- # Input tabs
721
- with gr.Tabs():
722
-
723
- with gr.TabItem("🔗 YouTube URL"):
724
- with gr.Row():
725
- url_input = gr.Textbox(
726
- placeholder="https://www.youtube.com/watch?v=... or youtu.be/... or raw 11-char ID",
727
- label="YouTube URL / Video ID",
728
- scale=5,
729
- )
730
- analyze_btn = gr.Button("🔍 Analyze", variant="primary", scale=1, min_width=130)
731
-
732
- with gr.TabItem("📁 Upload / Search by Title"):
733
- gr.HTML("""
734
- <div class="vv-card" style="margin-bottom:8px">
735
- <p class="vv-section-title">Search by video title or keyword</p>
736
- <p style="font-size:0.82rem;color:#5a6070;line-height:1.6;margin:0">
737
- Upload your file, then type the title or keyword below to locate the matching YouTube entry.
738
- </p>
739
- </div>
740
- """)
741
- upload_file = gr.File(
742
- label="Drop a video file (mp4, mov, avi, mkv, webm)",
743
- file_types=[".mp4", ".mov", ".avi", ".mkv", ".webm"],
744
- )
745
- with gr.Row():
746
- kw_input = gr.Textbox(placeholder="Enter video title or keyword…", label="Search keyword", scale=4)
747
- search_btn = gr.Button("🔎 Find on YouTube", scale=1)
748
- search_results_html = gr.HTML()
749
- search_radio = gr.Radio(label="Select a video to analyze", choices=[], visible=False)
750
-
751
- # Status
752
- status_box = gr.HTML(
753
- '<p style="color:#5a6070;font-family:DM Mono,monospace;font-size:0.8rem;padding:6px 0">'
754
- "Enter a URL above and click Analyze.</p>"
755
  )
756
 
757
- # Main results layout
758
- with gr.Row(equal_height=False):
759
-
760
- # LEFT — video info
761
- with gr.Column(scale=2):
762
- left_panel_html = gr.HTML(
763
- "<div style='padding:3rem;text-align:center;color:#5a6070;"
764
- "font-family:DM Mono,monospace'>No data yet.</div>"
765
- )
766
-
767
- # RIGHT — analytics
768
- with gr.Column(scale=3):
769
-
770
- # ── Misinformation Analysis ───────────────────────────────────────
771
- gr.HTML('<p class="vv-section-title" style="margin-top:0">🔬 Misinformation Analysis</p>')
772
- misinfo_badge_html = gr.HTML()
773
-
774
- # Row 1 — Modality Misinformation Distribution (full width)
775
- with gr.Row():
776
- modality_dist_plot = gr.Plot(label="", show_label=False)
777
-
778
- # Row 2 — Trust Score | Uncertainty Analysis (side by side)
779
- with gr.Row():
780
- trust_score_plot = gr.Plot(label="", show_label=False)
781
- uncertainty_plot = gr.Plot(label="", show_label=False)
782
-
783
- misinfo_reasoning_html = gr.HTML()
784
-
785
- gr.HTML('<hr class="vv-hr">')
786
-
787
- # ── Comment Sentiment ─────────────────────────────────────────────
788
- gr.HTML('<p class="vv-section-title">💬 Comment Sentiment</p>')
789
- with gr.Row():
790
- stat_pos_html = gr.HTML()
791
- stat_neg_html = gr.HTML()
792
- stat_neu_html = gr.HTML()
793
- with gr.Row():
794
- donut_plot = gr.Plot(label="", show_label=False)
795
- timeline_plot = gr.Plot(label="", show_label=False)
796
- with gr.Row():
797
- kw_bar_plot = gr.Plot(label="", show_label=False)
798
- kw_comp_plot = gr.Plot(label="", show_label=False)
799
-
800
- gr.HTML('<hr class="vv-hr">')
801
-
802
- # ── Comments Deep-Dive ────────────────────────────────────────────
803
- gr.HTML('<p class="vv-section-title">📊 Comments Deep-Dive</p>')
804
- with gr.Tabs():
805
- with gr.TabItem("All"):
806
- df_all_out = gr.Dataframe(
807
- headers=["author", "text", "likes", "published_at", "sentiment", "compound"],
808
- datatype=["str", "str", "number", "str", "str", "number"],
809
- wrap=True,
810
- max_height=320,
811
- )
812
- with gr.TabItem("Positive"):
813
- df_pos_out = gr.Dataframe(wrap=True, max_height=320)
814
- with gr.TabItem("Negative"):
815
- df_neg_out = gr.Dataframe(wrap=True, max_height=320)
816
- with gr.TabItem("Most Liked"):
817
- df_top_out = gr.Dataframe(wrap=True, max_height=320)
818
-
819
- # Activity log
820
- with gr.Accordion("📜 Activity Log", open=False):
821
- log_html_out = gr.HTML('<p class="vv-log-line">—</p>')
822
-
823
- # Footer
824
- gr.HTML("""
825
- <div style="margin-top:2rem;padding-top:1rem;border-top:1px solid #1e2330;
826
- text-align:center;font-family:'DM Mono',monospace;font-size:0.72rem;color:#3a3f50">
827
- 4-stream SeTa-Attention BiGRU · CCM / DMTE / Uncertainty Fusion ·
828
- Test ROC-AUC 0.967
829
- </div>
830
- """)
831
-
832
- # ── Output list — order must match _build_outputs / _blank_outputs exactly ─
833
- ALL_OUTPUTS = [
834
- status_box, # 0
835
- log_html_out, # 1
836
- left_panel_html, # 2
837
- misinfo_badge_html, # 3
838
- misinfo_reasoning_html, # 4
839
- modality_dist_plot, # 5
840
- trust_score_plot, # 6
841
- uncertainty_plot, # 7
842
- donut_plot, # 8
843
- timeline_plot, # 9
844
- kw_bar_plot, # 10
845
- kw_comp_plot, # 11
846
- stat_pos_html, # 12
847
- stat_neg_html, # 13
848
- stat_neu_html, # 14
849
- df_all_out, # 15
850
- df_pos_out, # 16
851
- df_neg_out, # 17
852
- df_top_out, # 18
853
- ]
854
 
855
- # Pipeline inputs (no api_key_input — read from env)
856
- _pipeline_inputs = [url_input, sentiment_selector, max_comments_slider]
857
 
858
- # Events: URL tab
859
- analyze_btn.click(fn=run_pipeline, inputs=_pipeline_inputs, outputs=ALL_OUTPUTS)
860
- url_input.submit(fn=run_pipeline, inputs=_pipeline_inputs, outputs=ALL_OUTPUTS)
861
 
862
- # Events: Upload/Search tab
863
- search_btn.click(
864
- fn=do_search,
865
- inputs=[kw_input],
866
- outputs=[search_results_html, search_radio],
867
- )
868
- search_radio.change(
869
- fn=pick_and_analyze,
870
- inputs=[search_radio, sentiment_selector, max_comments_slider],
871
- outputs=ALL_OUTPUTS,
872
- )
873
 
 
874
 
875
- # Launch — css and theme go HERE in Gradio 6.x (NOT in gr.Blocks)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
876
 
877
- if __name__ == "__main__":
878
- demo.launch(
879
- css=CSS,
880
- theme=gr.themes.Base(
881
- primary_hue=gr.themes.colors.cyan,
882
- neutral_hue=gr.themes.colors.gray,
883
- font=[gr.themes.GoogleFont("IBM Plex Sans"), "sans-serif"],
 
 
 
884
  ),
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
885
  )
 
 
 
 
 
 
 
 
1
  """
2
+ charts.py — Plotly chart builders for Mental Health Information Verification.
3
+ Pure functions, no Streamlit/Gradio imports.
4
  """
5
 
6
+ from typing import Dict, List, Tuple
7
+ import plotly.graph_objects as go
8
  import pandas as pd
9
+ import numpy as np
10
+
11
+
12
+ # ============================================================
13
+ # Medical / Mental Health Information Theme
14
+ # ============================================================
15
+
16
+ DARK_BG = "#ffffff"
17
+ CARD_BG = "#f8fafc"
18
+ BORDER = "#e2e8f0"
19
+ TEXT_MAIN = "#1e293b"
20
+ TEXT_DIM = "#64748b"
21
+
22
+ # Medical information-verification palette
23
+ CYAN = "#0891b2" # clinical cyan
24
+ GREEN = "#10b981" # reliable / safe information
25
+ RED = "#ef4444" # misinformation risk
26
+ AMBER = "#f59e0b" # uncertain / mixed
27
+ PURPLE = "#8b5cf6"
28
+ BLUE = "#2563eb"
29
+
30
+ PLOTLY_LAYOUT = dict(
31
+ paper_bgcolor="#ffffff",
32
+ plot_bgcolor="#ffffff",
33
+ font=dict(family="'Inter', 'IBM Plex Sans', sans-serif", color=TEXT_MAIN, size=12),
34
+ margin=dict(l=20, r=20, t=45, b=25),
35
+ hoverlabel=dict(
36
+ bgcolor="#ffffff",
37
+ bordercolor=CYAN,
38
+ font=dict(color=TEXT_MAIN, family="'Inter', sans-serif", size=12),
39
+ ),
40
  )
41
 
42
 
43
+ def make_interactive(fig: go.Figure, height: int = 300) -> go.Figure:
44
+ """Apply shared interactive behaviour to every chart."""
45
+ fig.update_layout(
46
+ height=height,
47
+ hovermode="closest",
48
+ dragmode="zoom",
49
+ transition=dict(duration=400, easing="cubic-in-out"),
50
+ legend=dict(
51
+ itemclick="toggle",
52
+ itemdoubleclick="toggleothers",
53
+ bgcolor="rgba(255,255,255,0)",
54
+ font=dict(size=11, color=TEXT_MAIN),
55
+ ),
56
+ modebar=dict(
57
+ bgcolor="rgba(255,255,255,0)",
58
+ color=TEXT_DIM,
59
+ activecolor=CYAN,
60
+ ),
61
+ )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
62
 
63
+ fig.update_xaxes(
64
+ showspikes=True,
65
+ spikecolor=CYAN,
66
+ spikethickness=1,
67
+ spikedash="dot",
68
+ showline=True,
69
+ linecolor=BORDER,
70
+ gridcolor="#edf2f7",
71
+ zerolinecolor=BORDER,
72
+ )
73
 
74
+ fig.update_yaxes(
75
+ showspikes=True,
76
+ spikecolor=CYAN,
77
+ spikethickness=1,
78
+ spikedash="dot",
79
+ showline=True,
80
+ linecolor=BORDER,
81
+ gridcolor="#edf2f7",
82
+ zerolinecolor=BORDER,
83
+ )
84
 
85
+ return fig
86
 
87
+
88
+ # ============================================================
89
+ # Overall Misinformation Gauge
90
+ # ============================================================
91
+
92
+ def misinfo_gauge(score: float, label: str) -> go.Figure:
93
+ """Gauge chart for mental-health misinformation confidence score (0–1)."""
94
+ pct = score * 100
95
+
96
+ if score < 0.35:
97
+ bar_color = GREEN
98
+ risk_text = "Likely Reliable Health Information"
99
+ elif score < 0.65:
100
+ bar_color = AMBER
101
+ risk_text = "Uncertain / Mixed Health Claims"
102
+ else:
103
+ bar_color = RED
104
+ risk_text = "Likely Mental Health Misinformation"
105
+
106
+ fig = go.Figure(go.Indicator(
107
+ mode="gauge+number+delta",
108
+ value=pct,
109
+ number={
110
+ "suffix": "%",
111
+ "font": {
112
+ "size": 34,
113
+ "color": bar_color,
114
+ "family": "'Inter', sans-serif",
115
+ },
116
+ },
117
+ delta={
118
+ "reference": 50,
119
+ "increasing": {"color": RED},
120
+ "decreasing": {"color": GREEN},
121
+ },
122
+ title={
123
+ "text": f"{label}<br><span style='font-size:11px;color:{TEXT_DIM}'>{risk_text}</span>",
124
+ "font": {"size": 13, "color": TEXT_DIM},
125
+ },
126
+ gauge={
127
+ "axis": {
128
+ "range": [0, 100],
129
+ "tickwidth": 1,
130
+ "tickcolor": BORDER,
131
+ "tickfont": {"color": TEXT_DIM, "size": 10},
132
+ },
133
+ "bar": {"color": bar_color, "thickness": 0.32},
134
+ "bgcolor": CARD_BG,
135
+ "borderwidth": 0,
136
+ "steps": [
137
+ {"range": [0, 35], "color": "#ecfdf5"},
138
+ {"range": [35, 65], "color": "#fffbeb"},
139
+ {"range": [65, 100], "color": "#fef2f2"},
140
+ ],
141
+ "threshold": {
142
+ "line": {"color": TEXT_MAIN, "width": 2},
143
+ "thickness": 0.75,
144
+ "value": pct,
145
+ },
146
+ },
147
+ ))
148
+
149
+ fig.update_layout(**PLOTLY_LAYOUT)
150
+ return make_interactive(fig, height=260)
151
+
152
+
153
+ # ============================================================
154
+ # Sentiment Donut
155
+ # ============================================================
156
+
157
+ def sentiment_donut(summary: Dict) -> go.Figure:
158
+ """Donut chart: Positive / Negative / Neutral audience sentiment."""
159
+ labels = ["Supportive / Positive", "Neutral / Informational", "Concerned / Negative"]
160
+ values = [summary["POSITIVE"], summary["NEUTRAL"], summary["NEGATIVE"]]
161
+ colors = [GREEN, "#cbd5e1", RED]
162
+
163
+ fig = go.Figure(go.Pie(
164
+ labels=labels,
165
+ values=values,
166
+ hole=0.62,
167
+ pull=[0.04, 0.02, 0.04],
168
+ marker=dict(colors=colors, line=dict(color="#ffffff", width=3)),
169
+ textinfo="label+percent",
170
+ hoverinfo="label+value+percent",
171
+ insidetextorientation="radial",
172
+ textfont=dict(size=11, color=TEXT_MAIN),
173
+ hovertemplate="<b>%{label}</b><br>%{value} comments<br>%{percent}<extra></extra>",
174
+ rotation=90,
175
+ ))
176
+
177
+ avg = summary.get("avg_compound", 0)
178
+ overall = "Supportive Discussion" if avg > 0.05 else (
179
+ "Concerned Discussion" if avg < -0.05 else "Mixed Discussion"
180
  )
181
+
182
  fig.add_annotation(
183
+ text=f"<b>{overall}</b><br><span style='font-size:11px;color:{TEXT_DIM}'>{summary['total']} comments</span>",
184
+ x=0.5,
185
+ y=0.5,
186
+ showarrow=False,
187
+ font=dict(size=13, color=TEXT_MAIN),
188
+ align="center",
189
  )
 
190
 
191
+ fig.update_layout(
192
+ **PLOTLY_LAYOUT,
193
+ title=dict(text="Audience Sentiment Around Health Information", font=dict(size=13, color=TEXT_DIM), x=0),
194
+ legend=dict(orientation="h", y=-0.10, font=dict(size=10)),
 
 
 
 
 
 
 
 
 
195
  )
196
 
197
+ return make_interactive(fig, height=310)
198
+
199
+
200
+ # ============================================================
201
+ # Keyword Bar
202
+ # ============================================================
203
+
204
+ def keyword_bar(
205
+ keywords: List[Tuple[str, float]],
206
+ title: str = "Key Mental Health Information Signals",
207
+ color: str = CYAN,
208
+ ) -> go.Figure:
209
+ if not keywords:
210
+ return _empty_fig(title)
211
+
212
+ words, weights = zip(*keywords[:15])
213
+ max_w = max(weights) or 1
214
+ norm = [w / max_w * 100 for w in weights]
215
+
216
+ fig = go.Figure(go.Bar(
217
+ x=norm,
218
+ y=words,
219
+ orientation="h",
220
+ marker=dict(
221
+ color=norm,
222
+ colorscale=[[0, "#e0f2fe"], [1, color]],
223
+ line=dict(color="#ffffff", width=1),
224
+ ),
225
+ text=[f"{w:.0f}" for w in weights],
226
+ textposition="inside",
227
+ textfont=dict(size=10, color="#ffffff"),
228
+ hovertemplate="<b>%{y}</b><br>Signal weight: %{text}<br>Normalised: %{x:.1f}%<extra></extra>",
229
+ ))
230
 
231
+ fig.update_layout(
232
+ **PLOTLY_LAYOUT,
233
+ title=dict(text=title, font=dict(size=13, color=TEXT_DIM), x=0),
234
+ yaxis=dict(autorange="reversed", tickfont=dict(size=11), gridcolor="#edf2f7"),
235
+ xaxis=dict(showticklabels=False, gridcolor="#edf2f7"),
236
+ bargap=0.35,
237
+ )
238
 
239
+ return make_interactive(fig, height=380)
240
 
 
 
 
 
 
 
 
 
241
 
242
+ # ============================================================
243
+ # Stream Misinformation Bars
244
+ # ============================================================
 
245
 
246
+ def stream_trust_bars(stream_details: Dict) -> go.Figure:
247
+ """Horizontal bar chart for per-stream misinformation scores."""
248
+ labels = list(stream_details.keys())
249
+ values = [round(v * 100, 1) for v in stream_details.values()]
250
+ colors = [RED if v > 50 else (AMBER if v > 30 else GREEN) for v in values]
251
 
252
+ fig = go.Figure(go.Bar(
253
+ x=values,
254
+ y=[l.replace("_", " ").title() for l in labels],
255
+ orientation="h",
256
+ marker=dict(color=colors, line=dict(color="#ffffff", width=1)),
257
+ text=[f"{v}%" for v in values],
258
+ textposition="outside",
259
+ textfont=dict(size=11, color=TEXT_MAIN),
260
+ hovertemplate="<b>%{y}</b><br>Misinformation signal: %{x:.1f}%<extra></extra>",
261
+ ))
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
262
 
263
+ fig.update_layout(
264
+ **PLOTLY_LAYOUT,
265
+ title=dict(text="Per-Stream Health Information Risk", font=dict(size=13, color=TEXT_DIM), x=0),
266
+ xaxis=dict(range=[0, 110], showticklabels=False, gridcolor="#edf2f7"),
267
+ yaxis=dict(tickfont=dict(size=11)),
268
+ bargap=0.4,
269
  )
270
 
271
+ return make_interactive(fig, height=220)
272
+
273
+
274
+ # ============================================================
275
+ # Modality Distribution
276
+ # ============================================================
277
+
278
+ def modality_misinfo_distribution(modality_analysis: Dict) -> go.Figure:
279
+ """Grouped bar chart Misinformation vs Reliable Health Information per modality."""
280
+ MODALITIES = ["Text", "Audio", "Video"]
281
+ KEYS = ["text", "audio", "video"]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
282
 
283
+ misinfo_pcts = [modality_analysis.get(k, {}).get("misinfo_pct", 50.0) for k in KEYS]
284
+ credible_pcts = [modality_analysis.get(k, {}).get("credible_pct", 50.0) for k in KEYS]
285
 
286
+ logit_tips = [
287
+ (
288
+ f"logit_m={modality_analysis.get(k, {}).get('misinfo_logit', 0.0):+.4f} | "
289
+ f"logit_r={modality_analysis.get(k, {}).get('credible_logit', 0.0):+.4f}"
290
+ )
291
+ for k in KEYS
292
+ ]
293
+
294
+ fig = go.Figure()
295
 
296
+ fig.add_trace(go.Bar(
297
+ name="Misinformation Signal",
298
+ x=MODALITIES,
299
+ y=misinfo_pcts,
300
+ marker=dict(color=[RED, RED, RED], opacity=0.88, line=dict(color="#ffffff", width=1)),
301
+ text=[f"{v:.1f}%" for v in misinfo_pcts],
302
+ textposition="outside",
303
+ textfont=dict(size=11, color=RED),
304
+ customdata=logit_tips,
305
+ hovertemplate=(
306
+ "<b>%{x} — Misinformation Signal</b><br>"
307
+ "Softmax score: %{y:.2f}%<br>"
308
+ "%{customdata}<extra></extra>"
309
+ ),
310
+ ))
311
+
312
+ fig.add_trace(go.Bar(
313
+ name="Reliable Health Information",
314
+ x=MODALITIES,
315
+ y=credible_pcts,
316
+ marker=dict(color=[GREEN, GREEN, GREEN], opacity=0.88, line=dict(color="#ffffff", width=1)),
317
+ text=[f"{v:.1f}%" for v in credible_pcts],
318
+ textposition="outside",
319
+ textfont=dict(size=11, color=GREEN),
320
+ customdata=logit_tips,
321
+ hovertemplate=(
322
+ "<b>%{x} — Reliable Health Information</b><br>"
323
+ "Softmax score: %{y:.2f}%<br>"
324
+ "%{customdata}<extra></extra>"
325
+ ),
326
+ ))
327
 
328
+ fig.update_layout(
329
+ **PLOTLY_LAYOUT,
330
+ title=dict(text="Modality-Level Health Information Assessment", font=dict(size=13, color=TEXT_DIM), x=0),
331
+ barmode="group",
332
+ xaxis=dict(title="Modality", tickfont=dict(size=12), gridcolor="#edf2f7"),
333
+ yaxis=dict(title="Model Score (%)", range=[0, 115], gridcolor="#edf2f7", ticksuffix="%"),
334
+ legend=dict(orientation="h", y=1.12, font=dict(size=11), bgcolor="rgba(255,255,255,0)"),
335
+ bargap=0.22,
336
+ bargroupgap=0.06,
337
  )
338
 
339
+ return make_interactive(fig, height=290)
340
+
341
+
342
+ # ============================================================
343
+ # Trust Score
344
+ # ============================================================
345
+
346
+ def trust_score_by_modality(modality_analysis: Dict) -> go.Figure:
347
+ """Vertical bar chart — reliability/trustworthiness coefficient per modality."""
348
+ MODALITIES = ["Text", "Audio", "Video"]
349
+ KEYS = ["text", "audio", "video"]
350
+
351
+ trust_vals = [modality_analysis.get(k, {}).get("trust_score", 0.0) for k in KEYS]
352
+ bar_colors = [GREEN if v >= 60 else (AMBER if v >= 35 else RED) for v in trust_vals]
353
+
354
+ fig = go.Figure(go.Bar(
355
+ x=MODALITIES,
356
+ y=trust_vals,
357
+ marker=dict(color=bar_colors, opacity=0.88, line=dict(color="#ffffff", width=1)),
358
+ text=[f"{v:.1f}%" for v in trust_vals],
359
+ textposition="outside",
360
+ textfont=dict(size=11, color=TEXT_MAIN),
361
+ hovertemplate=(
362
+ "<b>%{x}</b><br>"
363
+ "Reliability level: %{y:.2f}%<br>"
364
+ "<i>Higher score means the modality provides stronger health-information evidence.</i>"
365
+ "<extra></extra>"
366
+ ),
367
+ ))
368
+
369
+ for level, label, color in [(80, "High Reliability", GREEN), (50, "Moderate Reliability", AMBER)]:
370
+ fig.add_hline(
371
+ y=level,
372
+ line=dict(color=color, width=1, dash="dot"),
373
+ annotation_text=label,
374
+ annotation_position="right",
375
+ annotation_font=dict(size=9, color=color),
376
+ )
377
 
378
+ fig.update_layout(
379
+ **PLOTLY_LAYOUT,
380
+ title=dict(text="Reliability Score by Modality", font=dict(size=13, color=TEXT_DIM), x=0),
381
+ xaxis=dict(title="Modality", tickfont=dict(size=12), gridcolor="#edf2f7"),
382
+ yaxis=dict(title="Reliability Level (%)", range=[0, 115], gridcolor="#edf2f7", ticksuffix="%"),
383
+ bargap=0.38,
384
  )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
385
 
386
+ return make_interactive(fig, height=280)
387
+
388
+
389
+ # ============================================================
390
+ # Uncertainty Analysis
391
+ # ============================================================
392
+
393
+ def uncertainty_analysis(modality_analysis: Dict) -> go.Figure:
394
+ """Vertical bar chart — Shannon entropy uncertainty per modality."""
395
+ MODALITIES = ["Text", "Audio", "Video"]
396
+ KEYS = ["text", "audio", "video"]
397
+
398
+ uncertainty_vals = [modality_analysis.get(k, {}).get("uncertainty", 100.0) for k in KEYS]
399
+ misinfo_pcts = [modality_analysis.get(k, {}).get("misinfo_pct", 50.0) for k in KEYS]
400
+
401
+ bar_colors = [GREEN if v <= 35 else (AMBER if v <= 65 else RED) for v in uncertainty_vals]
402
+
403
+ fig = go.Figure(go.Bar(
404
+ x=MODALITIES,
405
+ y=uncertainty_vals,
406
+ marker=dict(color=bar_colors, opacity=0.88, line=dict(color="#ffffff", width=1)),
407
+ text=[f"{v:.1f}%" for v in uncertainty_vals],
408
+ textposition="outside",
409
+ textfont=dict(size=11, color=TEXT_MAIN),
410
+ customdata=[[f"p_misinformation={m:.1f}%"] for m in misinfo_pcts],
411
+ hovertemplate=(
412
+ "<b>%{x}</b><br>"
413
+ "Uncertainty: %{y:.2f}%<br>"
414
+ "%{customdata[0]}<br>"
415
+ "<i>Higher uncertainty means the model is less confident about the health claim.</i>"
416
+ "<extra></extra>"
417
+ ),
418
+ ))
419
+
420
+ fig.add_hline(
421
+ y=100,
422
+ line=dict(color=RED, width=1, dash="dot"),
423
+ annotation_text="Maximum Uncertainty",
424
+ annotation_position="right",
425
+ annotation_font=dict(size=9, color=RED),
426
  )
427
 
428
+ fig.add_hline(
429
+ y=50,
430
+ line=dict(color=AMBER, width=1, dash="dot"),
431
+ annotation_text="Moderate Uncertainty",
432
+ annotation_position="right",
433
+ annotation_font=dict(size=9, color=AMBER),
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
434
  )
435
 
436
+ fig.update_layout(
437
+ **PLOTLY_LAYOUT,
438
+ title=dict(text="Model Uncertainty in Health Information Assessment", font=dict(size=13, color=TEXT_DIM), x=0),
439
+ xaxis=dict(title="Modality", tickfont=dict(size=12), gridcolor="#edf2f7"),
440
+ yaxis=dict(title="Uncertainty (%)", range=[0, 120], gridcolor="#edf2f7", ticksuffix="%"),
441
+ bargap=0.38,
442
+ )
443
 
444
+ return make_interactive(fig, height=280)
445
 
446
 
447
+ # ============================================================
448
+ # Comment Sentiment Timeline
449
+ # ============================================================
 
 
 
 
 
 
 
 
 
450
 
451
+ def sentiment_timeline(comments_df: pd.DataFrame, sentiments: List[Dict]) -> go.Figure:
452
+ """Scatter plot: comment index vs sentiment compound score."""
453
+ if comments_df.empty:
454
+ return _empty_fig("Audience Response Distribution")
 
 
455
 
456
+ df = comments_df.copy()
457
+ df["compound"] = [s.get("compound", 0) for s in sentiments]
458
+ df["label"] = [s.get("label", "NEUTRAL") for s in sentiments]
459
+ df["color"] = df["label"].map({"POSITIVE": GREEN, "NEGATIVE": RED, "NEUTRAL": AMBER})
460
+ df["text_short"] = df["text"].str[:80] + "…"
461
+
462
+ fig = go.Figure()
463
+
464
+ for lbl, clr, display_name in [
465
+ ("POSITIVE", GREEN, "Supportive / Positive"),
466
+ ("NEGATIVE", RED, "Concerned / Negative"),
467
+ ("NEUTRAL", AMBER, "Neutral / Informational"),
468
+ ]:
469
+ sub = df[df["label"] == lbl]
470
+ if sub.empty:
471
+ continue
472
+
473
+ fig.add_trace(go.Scatter(
474
+ x=sub.index,
475
+ y=sub["compound"],
476
+ mode="markers",
477
+ name=display_name,
478
+ marker=dict(
479
+ size=np.clip(np.log1p(sub["likes"].fillna(0)) * 4 + 4, 4, 20),
480
+ color=clr,
481
+ opacity=0.78,
482
+ line=dict(width=1, color="#ffffff"),
483
+ ),
484
+ text=sub["text_short"],
485
+ customdata=np.stack(
486
+ [
487
+ sub["likes"].fillna(0).astype(str),
488
+ sub["label"].astype(str),
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
489
  ],
490
+ axis=-1,
491
+ ),
492
+ hovertemplate=(
493
+ "<b>%{text}</b><br>"
494
+ "Audience response: %{customdata[1]}<br>"
495
+ "Compound score: %{y:.2f}<br>"
496
+ "Likes: %{customdata[0]}<extra></extra>"
497
+ ),
498
+ ))
499
+
500
+ fig.add_hline(y=0, line=dict(color=BORDER, width=1, dash="dot"))
501
+
502
+ fig.update_layout(
503
+ **PLOTLY_LAYOUT,
504
+ title=dict(text="Audience Response to Mental Health Information", font=dict(size=13, color=TEXT_DIM), x=0),
505
+ xaxis=dict(title="Comment Index", gridcolor="#edf2f7", showgrid=False),
506
+ yaxis=dict(title="Sentiment Score", gridcolor="#edf2f7", range=[-1.1, 1.1]),
507
+ legend=dict(orientation="h", y=1.12, font=dict(size=10)),
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
508
  )
509
 
510
+ return make_interactive(fig, height=320)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
511
 
 
 
512
 
513
+ # ============================================================
514
+ # Keyword Comparison
515
+ # ============================================================
516
 
517
+ def keyword_comparison(
518
+ pos_kw: List[Tuple[str, float]],
519
+ neg_kw: List[Tuple[str, float]],
520
+ ) -> go.Figure:
521
+ """Diverging bar chart: supportive vs concerned health-information keywords."""
522
+ if not pos_kw and not neg_kw:
523
+ return _empty_fig("Audience Keyword Signals")
524
+
525
+ top = 10
526
+ pos_kw = pos_kw[:top]
527
+ neg_kw = neg_kw[:top]
528
 
529
+ fig = go.Figure()
530
 
531
+ if pos_kw:
532
+ pw, pv = zip(*pos_kw)
533
+ max_p = max(pv) or 1
534
+
535
+ fig.add_trace(go.Bar(
536
+ name="Supportive / Reliable Signals",
537
+ y=list(pw),
538
+ x=[v / max_p * 100 for v in pv],
539
+ orientation="h",
540
+ marker=dict(color=GREEN, line=dict(color="#ffffff", width=1)),
541
+ hovertemplate="<b>%{y}</b><br>Supportive keyword score: %{x:.1f}<extra></extra>",
542
+ ))
543
+
544
+ if neg_kw:
545
+ nw, nv = zip(*neg_kw)
546
+ max_n = max(nv) or 1
547
+
548
+ fig.add_trace(go.Bar(
549
+ name="Concern / Misinformation Signals",
550
+ y=list(nw),
551
+ x=[-v / max_n * 100 for v in nv],
552
+ orientation="h",
553
+ marker=dict(color=RED, line=dict(color="#ffffff", width=1)),
554
+ hovertemplate="<b>%{y}</b><br>Concern keyword score: %{x:.1f}<extra></extra>",
555
+ ))
556
 
557
+ fig.update_layout(
558
+ **PLOTLY_LAYOUT,
559
+ title=dict(text="Audience Keyword Signals", font=dict(size=13, color=TEXT_DIM), x=0),
560
+ barmode="overlay",
561
+ xaxis=dict(
562
+ title="← Concern / Misinformation Signals | Supportive / Reliable Signals →",
563
+ gridcolor="#edf2f7",
564
+ zeroline=True,
565
+ zerolinecolor=BORDER,
566
+ zerolinewidth=2,
567
  ),
568
+ yaxis=dict(tickfont=dict(size=10)),
569
+ legend=dict(orientation="h", y=1.1),
570
+ )
571
+
572
+ return make_interactive(fig, height=360)
573
+
574
+
575
+ # ============================================================
576
+ # Empty Figure Helper
577
+ # ============================================================
578
+
579
+ def _empty_fig(title: str) -> go.Figure:
580
+ fig = go.Figure()
581
+
582
+ fig.add_annotation(
583
+ text="No health-information data available",
584
+ x=0.5,
585
+ y=0.5,
586
+ showarrow=False,
587
+ font=dict(size=14, color=TEXT_DIM),
588
  )
589
+
590
+ fig.update_layout(
591
+ **PLOTLY_LAYOUT,
592
+ title=dict(text=title, x=0, font=dict(size=13, color=TEXT_DIM)),
593
+ )
594
+
595
+ return make_interactive(fig, height=250)