rocky250 commited on
Commit
a78e55d
Β·
verified Β·
1 Parent(s): 3d4063f

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +802 -531
app.py CHANGED
@@ -33,583 +33,854 @@ from charts import (
33
  uncertainty_analysis,
34
  )
35
 
36
-
37
-
38
- DARK_BG = "#ffffff" # FIXED: white background
39
- CARD_BG = "#ffffff" # FIXED: white cards
40
- BORDER = "#e2e8f0"
41
- TEXT_MAIN = "#1e293b"
42
- TEXT_DIM = "#64748b"
43
-
44
- CYAN = "#0891b2"
45
- GREEN = "#10b981"
46
- RED = "#ef4444"
47
- AMBER = "#f59e0b"
48
- PURPLE = "#8b5cf6"
49
- BLUE = "#2563eb"
50
-
51
- PLOTLY_LAYOUT = dict(
52
- paper_bgcolor="#ffffff",
53
- plot_bgcolor="#ffffff",
54
- font=dict(family="'Inter', 'IBM Plex Sans', sans-serif", color=TEXT_MAIN, size=12),
55
- margin=dict(l=20, r=20, t=45, b=25),
56
- hoverlabel=dict(
57
- bgcolor="#ffffff",
58
- bordercolor=CYAN,
59
- font=dict(color=TEXT_MAIN, size=12),
60
- ),
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
61
  )
62
 
63
- def make_interactive(fig: go.Figure, height: int = 300) -> go.Figure:
64
- """Apply shared interactive behaviour to every chart."""
 
65
  fig.update_layout(
66
- height=height,
67
- hovermode="closest",
68
- dragmode="zoom",
69
- transition=dict(duration=400, easing="cubic-in-out"),
70
- legend=dict(
71
- itemclick="toggle",
72
- itemdoubleclick="toggleothers",
73
- bgcolor="rgba(255,255,255,0)",
74
- font=dict(size=11, color=TEXT_MAIN),
75
- ),
76
- modebar=dict(
77
- bgcolor="rgba(255,255,255,0)",
78
- color=TEXT_DIM,
79
- activecolor=CYAN,
80
- ),
81
  )
82
-
83
- fig.update_xaxes(
84
- showspikes=True,
85
- spikecolor=CYAN,
86
- spikethickness=1,
87
- spikedash="dot",
88
- showline=True,
89
- linecolor=BORDER,
90
- gridcolor="#edf2f7",
91
- zerolinecolor=BORDER,
92
- )
93
-
94
- fig.update_yaxes(
95
- showspikes=True,
96
- spikecolor=CYAN,
97
- spikethickness=1,
98
- spikedash="dot",
99
- showline=True,
100
- linecolor=BORDER,
101
- gridcolor="#edf2f7",
102
- zerolinecolor=BORDER,
103
  )
104
-
105
  return fig
106
 
107
 
108
- # ============================================================
109
- # Overall Misinformation Gauge
110
- # ============================================================
111
-
112
- def misinfo_gauge(score: float, label: str) -> go.Figure:
113
- """Gauge chart for mental-health misinformation confidence score (0–1)."""
114
- pct = score * 100
115
-
116
- if score < 0.35:
117
- bar_color = GREEN
118
- risk_text = "Likely Reliable Health Information"
119
- elif score < 0.65:
120
- bar_color = AMBER
121
- risk_text = "Uncertain / Mixed Health Claims"
122
- else:
123
- bar_color = RED
124
- risk_text = "Likely Mental Health Misinformation"
125
-
126
- fig = go.Figure(go.Indicator(
127
- mode="gauge+number+delta",
128
- value=pct,
129
- number={
130
- "suffix": "%",
131
- "font": {
132
- "size": 34,
133
- "color": bar_color,
134
- "family": "'Inter', sans-serif",
135
- },
136
- },
137
- delta={
138
- "reference": 50,
139
- "increasing": {"color": RED},
140
- "decreasing": {"color": GREEN},
141
- },
142
- title={
143
- "text": f"{label}<br><span style='font-size:11px;color:{TEXT_DIM}'>{risk_text}</span>",
144
- "font": {"size": 13, "color": TEXT_DIM},
145
- },
146
- gauge={
147
- "axis": {
148
- "range": [0, 100],
149
- "tickwidth": 1,
150
- "tickcolor": BORDER,
151
- "tickfont": {"color": TEXT_DIM, "size": 10},
152
- },
153
- "bar": {"color": bar_color, "thickness": 0.32},
154
- "bgcolor": CARD_BG,
155
- "borderwidth": 0,
156
- "steps": [
157
- {"range": [0, 35], "color": "#ecfdf5"},
158
- {"range": [35, 65], "color": "#fffbeb"},
159
- {"range": [65, 100], "color": "#fef2f2"},
160
- ],
161
- "threshold": {
162
- "line": {"color": TEXT_MAIN, "width": 2},
163
- "thickness": 0.75,
164
- "value": pct,
165
- },
166
- },
167
- ))
168
-
169
- fig.update_layout(**PLOTLY_LAYOUT)
170
- return make_interactive(fig, height=260)
171
-
172
-
173
- # ============================================================
174
- # Sentiment Donut
175
- # ============================================================
176
-
177
- def sentiment_donut(summary: Dict) -> go.Figure:
178
- """Donut chart: Positive / Negative / Neutral audience sentiment."""
179
- labels = ["Supportive / Positive", "Neutral / Informational", "Concerned / Negative"]
180
- values = [summary["POSITIVE"], summary["NEUTRAL"], summary["NEGATIVE"]]
181
- colors = [GREEN, "#cbd5e1", RED]
182
-
183
- fig = go.Figure(go.Pie(
184
- labels=labels,
185
- values=values,
186
- hole=0.62,
187
- pull=[0.04, 0.02, 0.04],
188
- marker=dict(colors=colors, line=dict(color="#ffffff", width=3)),
189
- textinfo="label+percent",
190
- hoverinfo="label+value+percent",
191
- insidetextorientation="radial",
192
- textfont=dict(size=11, color=TEXT_MAIN),
193
- hovertemplate="<b>%{label}</b><br>%{value} comments<br>%{percent}<extra></extra>",
194
- rotation=90,
195
- ))
196
-
197
- avg = summary.get("avg_compound", 0)
198
- overall = "Supportive Discussion" if avg > 0.05 else (
199
- "Concerned Discussion" if avg < -0.05 else "Mixed Discussion"
200
  )
201
 
202
- fig.add_annotation(
203
- text=f"<b>{overall}</b><br><span style='font-size:11px;color:{TEXT_DIM}'>{summary['total']} comments</span>",
204
- x=0.5,
205
- y=0.5,
206
- showarrow=False,
207
- font=dict(size=13, color=TEXT_MAIN),
208
- align="center",
209
- )
210
 
211
- fig.update_layout(
212
- **PLOTLY_LAYOUT,
213
- title=dict(text="Audience Sentiment Around Health Information", font=dict(size=13, color=TEXT_DIM), x=0),
214
- legend=dict(orientation="h", y=-0.10, font=dict(size=10)),
215
- )
216
 
217
- return make_interactive(fig, height=310)
218
-
219
-
220
- # ============================================================
221
- # Keyword Bar
222
- # ============================================================
223
-
224
- def keyword_bar(
225
- keywords: List[Tuple[str, float]],
226
- title: str = "Key Mental Health Information Signals",
227
- color: str = CYAN,
228
- ) -> go.Figure:
229
- if not keywords:
230
- return _empty_fig(title)
231
-
232
- words, weights = zip(*keywords[:15])
233
- max_w = max(weights) or 1
234
- norm = [w / max_w * 100 for w in weights]
235
-
236
- fig = go.Figure(go.Bar(
237
- x=norm,
238
- y=words,
239
- orientation="h",
240
- marker=dict(
241
- color=norm,
242
- colorscale=[[0, "#e0f2fe"], [1, color]],
243
- line=dict(color="#ffffff", width=1),
244
- ),
245
- text=[f"{w:.0f}" for w in weights],
246
- textposition="inside",
247
- textfont=dict(size=10, color="#ffffff"),
248
- hovertemplate="<b>%{y}</b><br>Signal weight: %{text}<br>Normalised: %{x:.1f}%<extra></extra>",
249
- ))
250
 
251
- fig.update_layout(
252
- **PLOTLY_LAYOUT,
253
- title=dict(text=title, font=dict(size=13, color=TEXT_DIM), x=0),
254
- yaxis=dict(autorange="reversed", tickfont=dict(size=11), gridcolor="#edf2f7"),
255
- xaxis=dict(showticklabels=False, gridcolor="#edf2f7"),
256
- bargap=0.35,
257
- )
 
258
 
259
- return make_interactive(fig, height=380)
 
 
 
260
 
 
 
 
 
261
 
262
- # ============================================================
263
- # Stream Misinformation Bars
264
- # ============================================================
265
-
266
- def stream_trust_bars(stream_details: Dict) -> go.Figure:
267
- """Horizontal bar chart for per-stream misinformation scores."""
268
- labels = list(stream_details.keys())
269
- values = [round(v * 100, 1) for v in stream_details.values()]
270
- colors = [RED if v > 50 else (AMBER if v > 30 else GREEN) for v in values]
271
-
272
- fig = go.Figure(go.Bar(
273
- x=values,
274
- y=[l.replace("_", " ").title() for l in labels],
275
- orientation="h",
276
- marker=dict(color=colors, line=dict(color="#ffffff", width=1)),
277
- text=[f"{v}%" for v in values],
278
- textposition="outside",
279
- textfont=dict(size=11, color=TEXT_MAIN),
280
- hovertemplate="<b>%{y}</b><br>Misinformation signal: %{x:.1f}%<extra></extra>",
281
- ))
282
-
283
- fig.update_layout(
284
- **PLOTLY_LAYOUT,
285
- title=dict(text="Per-Stream Health Information Risk", font=dict(size=13, color=TEXT_DIM), x=0),
286
- xaxis=dict(range=[0, 110], showticklabels=False, gridcolor="#edf2f7"),
287
- yaxis=dict(tickfont=dict(size=11)),
288
- bargap=0.4,
 
 
 
 
 
 
 
 
 
 
 
 
289
  )
290
 
291
- return make_interactive(fig, height=220)
292
-
 
 
 
293
 
294
- # ============================================================
295
- # Modality Distribution
296
- # ============================================================
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
297
 
298
- def modality_misinfo_distribution(modality_analysis: Dict) -> go.Figure:
299
- """Grouped bar chart β€” Misinformation vs Reliable Health Information per modality."""
300
- MODALITIES = ["Text", "Audio", "Video"]
301
- KEYS = ["text", "audio", "video"]
302
 
303
- misinfo_pcts = [modality_analysis.get(k, {}).get("misinfo_pct", 50.0) for k in KEYS]
304
- credible_pcts = [modality_analysis.get(k, {}).get("credible_pct", 50.0) for k in KEYS]
305
 
306
- logit_tips = [
307
- (
308
- f"logit_m={modality_analysis.get(k, {}).get('misinfo_logit', 0.0):+.4f} | "
309
- f"logit_r={modality_analysis.get(k, {}).get('credible_logit', 0.0):+.4f}"
310
- )
311
- for k in KEYS
312
- ]
313
 
314
- fig = go.Figure()
315
-
316
- fig.add_trace(go.Bar(
317
- name="Misinformation Signal",
318
- x=MODALITIES,
319
- y=misinfo_pcts,
320
- marker=dict(color=[RED, RED, RED], opacity=0.88, line=dict(color="#ffffff", width=1)),
321
- text=[f"{v:.1f}%" for v in misinfo_pcts],
322
- textposition="outside",
323
- textfont=dict(size=11, color=RED),
324
- customdata=logit_tips,
325
- hovertemplate=(
326
- "<b>%{x} β€” Misinformation Signal</b><br>"
327
- "Softmax score: %{y:.2f}%<br>"
328
- "%{customdata}<extra></extra>"
329
- ),
330
- ))
331
-
332
- fig.add_trace(go.Bar(
333
- name="Reliable Health Information",
334
- x=MODALITIES,
335
- y=credible_pcts,
336
- marker=dict(color=[GREEN, GREEN, GREEN], opacity=0.88, line=dict(color="#ffffff", width=1)),
337
- text=[f"{v:.1f}%" for v in credible_pcts],
338
- textposition="outside",
339
- textfont=dict(size=11, color=GREEN),
340
- customdata=logit_tips,
341
- hovertemplate=(
342
- "<b>%{x} β€” Reliable Health Information</b><br>"
343
- "Softmax score: %{y:.2f}%<br>"
344
- "%{customdata}<extra></extra>"
345
- ),
346
- ))
347
-
348
- fig.update_layout(
349
- **PLOTLY_LAYOUT,
350
- title=dict(text="Modality-Level Health Information Assessment", font=dict(size=13, color=TEXT_DIM), x=0),
351
- barmode="group",
352
- xaxis=dict(title="Modality", tickfont=dict(size=12), gridcolor="#edf2f7"),
353
- yaxis=dict(title="Model Score (%)", range=[0, 115], gridcolor="#edf2f7", ticksuffix="%"),
354
- legend=dict(orientation="h", y=1.12, font=dict(size=11), bgcolor="rgba(255,255,255,0)"),
355
- bargap=0.22,
356
- bargroupgap=0.06,
357
  )
358
 
359
- return make_interactive(fig, height=290)
360
-
361
-
362
- # ============================================================
363
- # Trust Score
364
- # ============================================================
365
-
366
- def trust_score_by_modality(modality_analysis: Dict) -> go.Figure:
367
- """Vertical bar chart β€” reliability/trustworthiness coefficient per modality."""
368
- MODALITIES = ["Text", "Audio", "Video"]
369
- KEYS = ["text", "audio", "video"]
370
-
371
- trust_vals = [modality_analysis.get(k, {}).get("trust_score", 0.0) for k in KEYS]
372
- bar_colors = [GREEN if v >= 60 else (AMBER if v >= 35 else RED) for v in trust_vals]
373
-
374
- fig = go.Figure(go.Bar(
375
- x=MODALITIES,
376
- y=trust_vals,
377
- marker=dict(color=bar_colors, opacity=0.88, line=dict(color="#ffffff", width=1)),
378
- text=[f"{v:.1f}%" for v in trust_vals],
379
- textposition="outside",
380
- textfont=dict(size=11, color=TEXT_MAIN),
381
- hovertemplate=(
382
- "<b>%{x}</b><br>"
383
- "Reliability level: %{y:.2f}%<br>"
384
- "<i>Higher score means the modality provides stronger health-information evidence.</i>"
385
- "<extra></extra>"
386
- ),
387
- ))
388
-
389
- for level, label, color in [(80, "High Reliability", GREEN), (50, "Moderate Reliability", AMBER)]:
390
- fig.add_hline(
391
- y=level,
392
- line=dict(color=color, width=1, dash="dot"),
393
- annotation_text=label,
394
- annotation_position="right",
395
- annotation_font=dict(size=9, color=color),
396
- )
397
-
398
- fig.update_layout(
399
- **PLOTLY_LAYOUT,
400
- title=dict(text="Reliability Score by Modality", font=dict(size=13, color=TEXT_DIM), x=0),
401
- xaxis=dict(title="Modality", tickfont=dict(size=12), gridcolor="#edf2f7"),
402
- yaxis=dict(title="Reliability Level (%)", range=[0, 115], gridcolor="#edf2f7", ticksuffix="%"),
403
- bargap=0.38,
404
- )
405
 
406
- return make_interactive(fig, height=280)
407
-
408
-
409
- # ============================================================
410
- # Uncertainty Analysis
411
- # ============================================================
412
-
413
- def uncertainty_analysis(modality_analysis: Dict) -> go.Figure:
414
- """Vertical bar chart β€” Shannon entropy uncertainty per modality."""
415
- MODALITIES = ["Text", "Audio", "Video"]
416
- KEYS = ["text", "audio", "video"]
417
-
418
- uncertainty_vals = [modality_analysis.get(k, {}).get("uncertainty", 100.0) for k in KEYS]
419
- misinfo_pcts = [modality_analysis.get(k, {}).get("misinfo_pct", 50.0) for k in KEYS]
420
-
421
- bar_colors = [GREEN if v <= 35 else (AMBER if v <= 65 else RED) for v in uncertainty_vals]
422
-
423
- fig = go.Figure(go.Bar(
424
- x=MODALITIES,
425
- y=uncertainty_vals,
426
- marker=dict(color=bar_colors, opacity=0.88, line=dict(color="#ffffff", width=1)),
427
- text=[f"{v:.1f}%" for v in uncertainty_vals],
428
- textposition="outside",
429
- textfont=dict(size=11, color=TEXT_MAIN),
430
- customdata=[[f"p_misinformation={m:.1f}%"] for m in misinfo_pcts],
431
- hovertemplate=(
432
- "<b>%{x}</b><br>"
433
- "Uncertainty: %{y:.2f}%<br>"
434
- "%{customdata[0]}<br>"
435
- "<i>Higher uncertainty means the model is less confident about the health claim.</i>"
436
- "<extra></extra>"
437
- ),
438
- ))
439
-
440
- fig.add_hline(
441
- y=100,
442
- line=dict(color=RED, width=1, dash="dot"),
443
- annotation_text="Maximum Uncertainty",
444
- annotation_position="right",
445
- annotation_font=dict(size=9, color=RED),
446
  )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
447
 
448
- fig.add_hline(
449
- y=50,
450
- line=dict(color=AMBER, width=1, dash="dot"),
451
- annotation_text="Moderate Uncertainty",
452
- annotation_position="right",
453
- annotation_font=dict(size=9, color=AMBER),
454
  )
455
 
456
- fig.update_layout(
457
- **PLOTLY_LAYOUT,
458
- title=dict(text="Model Uncertainty in Health Information Assessment", font=dict(size=13, color=TEXT_DIM), x=0),
459
- xaxis=dict(title="Modality", tickfont=dict(size=12), gridcolor="#edf2f7"),
460
- yaxis=dict(title="Uncertainty (%)", range=[0, 120], gridcolor="#edf2f7", ticksuffix="%"),
461
- bargap=0.38,
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
462
  )
463
 
464
- return make_interactive(fig, height=280)
465
 
 
466
 
467
- # ============================================================
468
- # Comment Sentiment Timeline
469
- # ============================================================
470
 
471
- def sentiment_timeline(comments_df: pd.DataFrame, sentiments: List[Dict]) -> go.Figure:
472
- """Scatter plot: comment index vs sentiment compound score."""
473
- if comments_df.empty:
474
- return _empty_fig("Audience Response Distribution")
475
-
476
- df = comments_df.copy()
477
- df["compound"] = [s.get("compound", 0) for s in sentiments]
478
- df["label"] = [s.get("label", "NEUTRAL") for s in sentiments]
479
- df["color"] = df["label"].map({"POSITIVE": GREEN, "NEGATIVE": RED, "NEUTRAL": AMBER})
480
- df["text_short"] = df["text"].str[:80] + "…"
 
 
481
 
482
- fig = go.Figure()
 
 
 
 
 
483
 
484
- for lbl, clr, display_name in [
485
- ("POSITIVE", GREEN, "Supportive / Positive"),
486
- ("NEGATIVE", RED, "Concerned / Negative"),
487
- ("NEUTRAL", AMBER, "Neutral / Informational"),
488
- ]:
489
- sub = df[df["label"] == lbl]
490
- if sub.empty:
491
- continue
492
-
493
- fig.add_trace(go.Scatter(
494
- x=sub.index,
495
- y=sub["compound"],
496
- mode="markers",
497
- name=display_name,
498
- marker=dict(
499
- size=np.clip(np.log1p(sub["likes"].fillna(0)) * 4 + 4, 4, 20),
500
- color=clr,
501
- opacity=0.78,
502
- line=dict(width=1, color="#ffffff"),
503
- ),
504
- text=sub["text_short"],
505
- customdata=np.stack(
506
- [
507
- sub["likes"].fillna(0).astype(str),
508
- sub["label"].astype(str),
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
509
  ],
510
- axis=-1,
511
- ),
512
- hovertemplate=(
513
- "<b>%{text}</b><br>"
514
- "Audience response: %{customdata[1]}<br>"
515
- "Compound score: %{y:.2f}<br>"
516
- "Likes: %{customdata[0]}<extra></extra>"
517
- ),
518
- ))
519
-
520
- fig.add_hline(y=0, line=dict(color=BORDER, width=1, dash="dot"))
521
-
522
- fig.update_layout(
523
- **PLOTLY_LAYOUT,
524
- title=dict(text="Audience Response to Mental Health Information", font=dict(size=13, color=TEXT_DIM), x=0),
525
- xaxis=dict(title="Comment Index", gridcolor="#edf2f7", showgrid=False),
526
- yaxis=dict(title="Sentiment Score", gridcolor="#edf2f7", range=[-1.1, 1.1]),
527
- legend=dict(orientation="h", y=1.12, font=dict(size=10)),
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
528
  )
529
 
530
- return make_interactive(fig, height=320)
531
-
532
-
533
- # ============================================================
534
- # Keyword Comparison
535
- # ============================================================
536
-
537
- def keyword_comparison(
538
- pos_kw: List[Tuple[str, float]],
539
- neg_kw: List[Tuple[str, float]],
540
- ) -> go.Figure:
541
- """Diverging bar chart: supportive vs concerned health-information keywords."""
542
- if not pos_kw and not neg_kw:
543
- return _empty_fig("Audience Keyword Signals")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
544
 
545
- top = 10
546
- pos_kw = pos_kw[:top]
547
- neg_kw = neg_kw[:top]
548
 
549
- fig = go.Figure()
 
 
550
 
551
- if pos_kw:
552
- pw, pv = zip(*pos_kw)
553
- max_p = max(pv) or 1
554
-
555
- fig.add_trace(go.Bar(
556
- name="Supportive / Reliable Signals",
557
- y=list(pw),
558
- x=[v / max_p * 100 for v in pv],
559
- orientation="h",
560
- marker=dict(color=GREEN, line=dict(color="#ffffff", width=1)),
561
- hovertemplate="<b>%{y}</b><br>Supportive keyword score: %{x:.1f}<extra></extra>",
562
- ))
563
-
564
- if neg_kw:
565
- nw, nv = zip(*neg_kw)
566
- max_n = max(nv) or 1
567
-
568
- fig.add_trace(go.Bar(
569
- name="Concern / Misinformation Signals",
570
- y=list(nw),
571
- x=[-v / max_n * 100 for v in nv],
572
- orientation="h",
573
- marker=dict(color=RED, line=dict(color="#ffffff", width=1)),
574
- hovertemplate="<b>%{y}</b><br>Concern keyword score: %{x:.1f}<extra></extra>",
575
- ))
576
-
577
- fig.update_layout(
578
- **PLOTLY_LAYOUT,
579
- title=dict(text="Audience Keyword Signals", font=dict(size=13, color=TEXT_DIM), x=0),
580
- barmode="overlay",
581
- xaxis=dict(
582
- title="← Concern / Misinformation Signals | Supportive / Reliable Signals β†’",
583
- gridcolor="#edf2f7",
584
- zeroline=True,
585
- zerolinecolor=BORDER,
586
- zerolinewidth=2,
587
- ),
588
- yaxis=dict(tickfont=dict(size=10)),
589
- legend=dict(orientation="h", y=1.1),
590
  )
591
 
592
- return make_interactive(fig, height=360)
593
-
594
-
595
- # ============================================================
596
- # Empty Figure Helper
597
- # ============================================================
598
-
599
- def _empty_fig(title: str) -> go.Figure:
600
- fig = go.Figure()
601
 
602
- fig.add_annotation(
603
- text="No health-information data available",
604
- x=0.5,
605
- y=0.5,
606
- showarrow=False,
607
- font=dict(size=14, color=TEXT_DIM),
608
- )
609
 
610
- fig.update_layout(
611
- **PLOTLY_LAYOUT,
612
- title=dict(text=title, x=0, font=dict(size=13, color=TEXT_DIM)),
 
 
 
 
 
613
  )
614
 
615
- return make_interactive(fig, height=250)
 
33
  uncertainty_analysis,
34
  )
35
 
36
+ CSS = """
37
+ @import url('https://fonts.googleapis.com/css2?family=DM+Mono:wght@400;500&family=Syne:wght@400;600;700;800&family=IBM+Plex+Sans:wght@300;400;500&display=swap');
38
+
39
+ /* Variables*/
40
+ :root {
41
+ --bg: #0d0f14;
42
+ --card: #13161e;
43
+ --border: #1e2330;
44
+ --text: #e8eaf0;
45
+ --dim: #5a6070;
46
+ --cyan: #00d4ff;
47
+ --green: #00e5a0;
48
+ --red: #ff4757;
49
+ --amber: #ffb347;
50
+ --blue: #4a8eff;
51
+ }
52
+
53
+ /* Force dark everywhere β€” prevent white bleed */
54
+ html, body {
55
+ background: var(--bg) !important;
56
+ color: var(--text) !important;
57
+ margin: 0; padding: 0;
58
+ }
59
+ .gradio-container, #root, #app, main, .main, .wrap, .svelte-1kyws56 {
60
+ background: var(--bg) !important;
61
+ max-width: 100% !important;
62
+ width: 100% !important;
63
+ margin: 0 auto !important;
64
+ padding: 0 1.5rem !important;
65
+ box-sizing: border-box !important;
66
+ }
67
+ /* kill Gradio's default white blocks */
68
+ .block, .wrap, .panel, .padded, div.form,
69
+ div[class*="block"], div[class*="wrap"],
70
+ div[class*="panel"], div[class*="gap"],
71
+ .gap { background: transparent !important; border: none !important; }
72
+
73
+ /* Cards / Groups ─ */
74
+ .gr-group, .gr-box, .vv-section {
75
+ background: var(--card) !important;
76
+ border: 1px solid var(--border) !important;
77
+ border-radius: 12px !important;
78
+ padding: 1rem 1.25rem !important;
79
+ }
80
+
81
+ /* Tabs */
82
+ .tab-nav button {
83
+ background: transparent !important;
84
+ border: none !important;
85
+ color: var(--dim) !important;
86
+ font-family: 'DM Mono', monospace !important;
87
+ font-size: 0.82rem !important;
88
+ letter-spacing: 0.05em !important;
89
+ border-bottom: 2px solid transparent !important;
90
+ padding: 0.5rem 1.2rem !important;
91
+ transition: color 0.18s;
92
+ }
93
+ .tab-nav button.selected {
94
+ color: var(--cyan) !important;
95
+ border-bottom-color: var(--cyan) !important;
96
+ }
97
+ .tab-nav { border-bottom: 1px solid var(--border) !important; }
98
+
99
+ /* Inputs */
100
+ input[type="text"], input[type="password"], input[type="number"], textarea, select {
101
+ background: #1a1d27 !important;
102
+ border: 1px solid var(--border) !important;
103
+ color: var(--text) !important;
104
+ border-radius: 8px !important;
105
+ font-family: 'DM Mono', monospace !important;
106
+ font-size: 0.88rem !important;
107
+ }
108
+ input:focus, textarea:focus, select:focus {
109
+ border-color: var(--cyan) !important;
110
+ box-shadow: 0 0 0 2px rgba(0,212,255,0.15) !important;
111
+ outline: none !important;
112
+ }
113
+ label, .gr-label, span.svelte-1b6s6s {
114
+ color: var(--dim) !important;
115
+ font-family: 'DM Mono', monospace !important;
116
+ font-size: 0.75rem !important;
117
+ letter-spacing: 0.08em !important;
118
+ text-transform: uppercase;
119
+ }
120
+
121
+ /* Slider */
122
+ input[type="range"] { accent-color: var(--cyan); }
123
+
124
+ /* Buttons ─ */
125
+ button.primary, button[variant="primary"], .primary {
126
+ background: linear-gradient(135deg, var(--cyan), var(--blue)) !important;
127
+ border: none !important;
128
+ color: #0d0f14 !important;
129
+ font-weight: 700 !important;
130
+ font-family: 'DM Mono', monospace !important;
131
+ border-radius: 8px !important;
132
+ letter-spacing: 0.06em !important;
133
+ }
134
+ button.secondary {
135
+ background: rgba(0,212,255,0.08) !important;
136
+ border: 1px solid var(--cyan) !important;
137
+ color: var(--cyan) !important;
138
+ border-radius: 8px !important;
139
+ font-family: 'DM Mono', monospace !important;
140
+ }
141
+ button:hover { opacity: 0.88; transform: translateY(-1px); transition: all 0.15s; }
142
+
143
+ /* Dropdowns ─ */
144
+ .dropdown, ul[role="listbox"], li[role="option"] {
145
+ background: #1a1d27 !important;
146
+ border-color: var(--border) !important;
147
+ color: var(--text) !important;
148
+ }
149
+ li[role="option"]:hover { background: #242736 !important; }
150
+
151
+ /* Dataframe ─ */
152
+ .gr-dataframe, table { background: var(--card) !important; }
153
+ .gr-dataframe th {
154
+ background: #1a1d27 !important;
155
+ color: var(--cyan) !important;
156
+ font-family: 'DM Mono', monospace !important;
157
+ font-size: 0.72rem !important;
158
+ padding: 6px 10px;
159
+ border-bottom: 1px solid var(--border);
160
+ text-transform: uppercase;
161
+ letter-spacing: 0.08em;
162
+ }
163
+ .gr-dataframe td {
164
+ color: var(--text) !important;
165
+ font-size: 0.77rem !important;
166
+ padding: 5px 10px;
167
+ border-bottom: 1px solid var(--border);
168
+ }
169
+ .gr-dataframe tr:hover td { background: rgba(0,212,255,0.04) !important; }
170
+
171
+ /* Accordion ─ */
172
+ details > summary {
173
+ color: var(--dim) !important;
174
+ font-family: 'DM Mono', monospace !important;
175
+ font-size: 0.82rem !important;
176
+ cursor: pointer;
177
+ list-style: none;
178
+ }
179
+ details[open] > summary { color: var(--cyan) !important; }
180
+
181
+ /* Plot containers ─ */
182
+ .js-plotly-plot, .plotly { background: transparent !important; }
183
+ .modebar { display: none !important; }
184
+
185
+ /* Scrollbar ─ */
186
+ ::-webkit-scrollbar { width: 6px; height: 6px; }
187
+ ::-webkit-scrollbar-track { background: var(--bg); }
188
+ ::-webkit-scrollbar-thumb { background: var(--border); border-radius: 3px; }
189
+ ::-webkit-scrollbar-thumb:hover { background: var(--dim); }
190
+
191
+
192
+ /* Shared HTML component classes */
193
+
194
+ .vv-hero {
195
+ font-family: 'Syne', sans-serif;
196
+ font-size: 1.65rem;
197
+ font-weight: 800;
198
+ background: linear-gradient(135deg, #00d4ff, #4a8eff);
199
+ -webkit-background-clip: text;
200
+ -webkit-text-fill-color: transparent;
201
+ background-clip: text;
202
+ letter-spacing: -0.02em;
203
+ line-height: 1.2;
204
+ }
205
+ .vv-section-title {
206
+ font-family: 'Syne', sans-serif;
207
+ font-size: 0.68rem;
208
+ font-weight: 700;
209
+ letter-spacing: 0.18em;
210
+ text-transform: uppercase;
211
+ color: #5a6070;
212
+ margin-bottom: 0.5rem;
213
+ margin-top: 0;
214
+ }
215
+ .vv-card {
216
+ background: #13161e;
217
+ border: 1px solid #1e2330;
218
+ border-radius: 12px;
219
+ padding: 1.1rem 1.3rem;
220
+ margin-bottom: 0.7rem;
221
+ }
222
+ .vv-stat {
223
+ display: inline-block;
224
+ background: #1a1d27;
225
+ border: 1px solid #1e2330;
226
+ border-radius: 6px;
227
+ padding: 0.25rem 0.75rem;
228
+ font-family: 'DM Mono', monospace;
229
+ font-size: 0.77rem;
230
+ color: #00d4ff;
231
+ margin: 0.15rem 0.2rem;
232
+ }
233
+ .vv-badge-green {
234
+ display: inline-block;
235
+ background: rgba(0,229,160,0.12);
236
+ border: 1px solid #00e5a0;
237
+ color: #00e5a0;
238
+ border-radius: 20px;
239
+ padding: 0.32rem 1.1rem;
240
+ font-size: 0.85rem;
241
+ font-family: 'DM Mono', monospace;
242
+ font-weight: 600;
243
+ }
244
+ .vv-badge-red {
245
+ display: inline-block;
246
+ background: rgba(255,71,87,0.12);
247
+ border: 1px solid #ff4757;
248
+ color: #ff4757;
249
+ border-radius: 20px;
250
+ padding: 0.32rem 1.1rem;
251
+ font-size: 0.85rem;
252
+ font-family: 'DM Mono', monospace;
253
+ font-weight: 600;
254
+ }
255
+ .vv-badge-amber {
256
+ display: inline-block;
257
+ background: rgba(255,179,71,0.12);
258
+ border: 1px solid #ffb347;
259
+ color: #ffb347;
260
+ border-radius: 20px;
261
+ padding: 0.32rem 1.1rem;
262
+ font-size: 0.85rem;
263
+ font-family: 'DM Mono', monospace;
264
+ font-weight: 600;
265
+ }
266
+ .vv-reasoning {
267
+ background: #0d1119;
268
+ border-left: 3px solid #ffb347;
269
+ padding: 0.8rem 1rem;
270
+ border-radius: 0 8px 8px 0;
271
+ font-size: 0.83rem;
272
+ color: #c0c4cc;
273
+ line-height: 1.65;
274
+ font-family: 'IBM Plex Sans', sans-serif;
275
+ margin-top: 8px;
276
+ }
277
+ .vv-tag {
278
+ display: inline-block;
279
+ background: #1a1d27;
280
+ border: 1px solid #1e2330;
281
+ border-radius: 4px;
282
+ padding: 2px 8px;
283
+ font-family: 'DM Mono', monospace;
284
+ font-size: 0.7rem;
285
+ color: #8090a0;
286
+ margin: 2px;
287
+ }
288
+ .vv-stat-big-green { font-family: 'DM Mono', monospace; font-size: 1.6rem; font-weight: 700; color: #00e5a0; margin: 0; }
289
+ .vv-stat-big-red { font-family: 'DM Mono', monospace; font-size: 1.6rem; font-weight: 700; color: #ff4757; margin: 0; }
290
+ .vv-stat-big-dim { font-family: 'DM Mono', monospace; font-size: 1.6rem; font-weight: 700; color: #5a6070; margin: 0; }
291
+ .vv-log-line { font-size: 0.72rem; color: #5a6070; font-family: 'DM Mono', monospace; margin: 2px 0; }
292
+ .vv-hr { border: none; border-top: 1px solid #1e2330; margin: 1.1rem 0; }
293
+ """
294
+
295
+ THEME = gr.Themes.Base(
296
+ primary_hue=gr.themes.colors.cyan,
297
+ neutral_hue=gr.themes.colors.gray,
298
+ front=[gr.themes.GoogleFront("IBM Plex Sans"),"sans-serif"],
299
  )
300
 
301
+ def _empty_plotly(msg: str = "Run analysis to see data", h: int = 230):
302
+ import plotly.graph_objects as go
303
+ fig = go.Figure()
304
  fig.update_layout(
305
+ paper_bgcolor="rgba(0,0,0,0)", plot_bgcolor="rgba(0,0,0,0)",
306
+ font=dict(color="#5a6070"), margin=dict(l=10, r=10, t=10, b=10), height=h,
 
 
 
 
 
 
 
 
 
 
 
 
 
307
  )
308
+ fig.add_annotation(
309
+ text=msg, x=0.5, y=0.5, xref="paper", yref="paper",
310
+ showarrow=False, font=dict(size=12, color="#5a6070"),
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
311
  )
 
312
  return fig
313
 
314
 
315
+ def _blank_outputs(status_msg: str):
316
+ """19-tuple for ALL_OUTPUTS when nothing has run."""
317
+ ep = _empty_plotly()
318
+ return (
319
+ f'<p style="color:#ff4757;font-family:DM Mono,monospace;padding:8px">{status_msg}</p>', # 0 status
320
+ "<p class='vv-log-line'>β€”</p>", # 1 log
321
+ "<div style='padding:3rem;text-align:center;color:#5a6070;font-family:DM Mono,monospace'>No data yet.</div>", # 2 left panel
322
+ "", "", # 3 badge, 4 reasoning
323
+ ep, ep, ep, # 5 modality_dist, 6 trust, 7 uncertainty
324
+ ep, ep, ep, ep, # 8 donut, 9 timeline, 10 kw_bar, 11 kw_comp
325
+ "", "", "", # 12 stat_pos, 13 stat_neg, 14 stat_neu
326
+ pd.DataFrame(), pd.DataFrame(), pd.DataFrame(), pd.DataFrame(), # 15 df_all, 16 df_pos, 17 df_neg, 18 df_top
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
327
  )
328
 
 
 
 
 
 
 
 
 
329
 
330
+ # PIPELINE
 
 
 
 
331
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
332
 
333
+ def run_pipeline(
334
+ url_or_id: str,
335
+ sentiment_method: str,
336
+ max_comments: int,
337
+ progress=gr.Progress(track_tqdm=False),
338
+ ):
339
+ # Read API key from environment (NEVER from UI)
340
+ api_key = os.environ.get("YT_API_KEY", "").strip()
341
 
342
+ # Guards
343
+ if not (url_or_id or "").strip():
344
+ yield _blank_outputs("⚠️ Please enter a YouTube URL or video ID.")
345
+ return
346
 
347
+ video_id = extract_video_id(url_or_id.strip())
348
+ if not video_id:
349
+ yield _blank_outputs("❌ Could not parse a valid YouTube video ID.")
350
+ return
351
 
352
+ if not api_key:
353
+ yield _blank_outputs(
354
+ "⚠️ YouTube API key not found. "
355
+ "Set the <code>YT_API_KEY</code> environment variable / Space secret."
356
+ )
357
+ return
358
+
359
+ # 1 β€” Metadata
360
+ progress(0.05, desc="Fetching video metadata…")
361
+ meta, err = fetch_video_metadata(video_id, api_key)
362
+ if err:
363
+ yield _blank_outputs(f"❌ {err}")
364
+ return
365
+
366
+ # 2 β€” Transcript
367
+ progress(0.20, desc="Fetching transcript…")
368
+ transcript, t_status = fetch_transcript(video_id)
369
+
370
+ # 3 β€” Comments
371
+ progress(0.35, desc=f"Fetching up to {max_comments} comments…")
372
+ comments_df, c_status = fetch_comments(video_id, api_key, max_comments=int(max_comments))
373
+
374
+ # 4 β€” Misinformation
375
+ # BUG FIX: previously both audio_transcript and video_transcript were set
376
+ # to the same `transcript` variable. When the transcript was empty (no
377
+ # captions), ALL three modalities hit the empty-string fallback inside
378
+ # _compute_modality_analysis and returned a fixed 50/50 split with
379
+ # logit_m = logit_c = 0, trust = 0 %, uncertainty = 100 % β€” values that
380
+ # never changed across videos.
381
+ # The fix keeps audio_transcript = spoken transcript (speech stream) and
382
+ # video_transcript = spoken transcript too, but detect_misinformation()
383
+ # now internally builds the video segment as transcript + title + tags,
384
+ # giving all three modalities distinct content and therefore distinct scores.
385
+ progress(0.50, desc="Running misinformation detection…")
386
+ misinfo = detect_misinformation(
387
+ text=f"{meta['title']} {meta['description']}",
388
+ tags=meta["tags"],
389
+ audio_transcript=transcript, # speech/audio stream
390
+ video_transcript=transcript, # enriched inside analyzer with title+tags
391
  )
392
 
393
+ # 5 β€” Keywords
394
+ keywords = extract_keywords(
395
+ f"{meta['title']} {meta['description']} {transcript}",
396
+ meta["tags"],
397
+ )
398
 
399
+ # 6 β€” Sentiment
400
+ sentiments, sent_sum, pos_kw, neg_kw = [], {}, [], []
401
+
402
+ if not comments_df.empty:
403
+ texts = comments_df["text"].fillna("").tolist()
404
+ batch = 64
405
+ for i in range(0, len(texts), batch):
406
+ chunk = texts[i: i + batch]
407
+ sentiments += analyze_sentiment_batch(chunk, method=sentiment_method, batch_size=batch)
408
+ frac = 0.60 + 0.30 * min((i + batch) / max(len(texts), 1), 1.0)
409
+ progress(frac, desc=f"Sentiment {min(i+batch, len(texts))}/{len(texts)}…")
410
+
411
+ sent_sum = sentiment_summary(sentiments)
412
+ pos_kw, neg_kw = sentiment_weighted_keywords(comments_df, sentiments)
413
+
414
+ # 7 β€” Build outputs
415
+ progress(0.97, desc="Building charts…")
416
+ yield _build_outputs(
417
+ meta=meta, video_id=video_id, transcript=transcript,
418
+ comments_df=comments_df, misinfo=misinfo, keywords=keywords,
419
+ sentiments=sentiments, sent_sum=sent_sum,
420
+ pos_kw=pos_kw, neg_kw=neg_kw,
421
+ status_log=[
422
+ f"βœ… Metadata: {meta['title'][:55]}",
423
+ t_status,
424
+ c_status,
425
+ f"πŸ”¬ Misinfo score: {misinfo['confidence_pct']}%",
426
+ *(
427
+ [f"πŸ’¬ Sentiment: {sent_sum['pos_pct']}% pos / {sent_sum['neg_pct']}% neg"]
428
+ if sent_sum
429
+ else ["πŸ’¬ No comments β€” sentiment skipped"]
430
+ ),
431
+ ],
432
+ )
433
 
 
 
 
 
434
 
435
+ # OUTPUT BUILDER
 
436
 
 
 
 
 
 
 
 
437
 
438
+ def _build_outputs(
439
+ meta, video_id, transcript, comments_df,
440
+ misinfo, keywords, sentiments, sent_sum, pos_kw, neg_kw, status_log,
441
+ ):
442
+ # Status
443
+ status_html = (
444
+ '<p style="color:#00e5a0;font-family:DM Mono,monospace;font-size:0.82rem;padding:6px 0">'
445
+ "βœ… Analysis complete</p>"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
446
  )
447
 
448
+ # Log
449
+ log_html = "".join(f'<p class="vv-log-line">{line}</p>' for line in status_log)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
450
 
451
+ # Left panel
452
+ thumb_html = (
453
+ f'<img src="{meta["thumbnail_url"]}" '
454
+ 'style="width:100%;border-radius:8px;margin-bottom:8px;display:block">'
455
+ if meta.get("thumbnail_url") else ""
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
456
  )
457
+ tag_html = "".join(f'<span class="vv-tag">#{t}</span>' for t in meta.get("tags", [])[:20])
458
+ desc_short = meta.get("description", "")[:1200]
459
+ word_count = len(transcript.split()) if transcript else 0
460
+ transcript_short = (transcript[:2500] + "…" if len(transcript) > 2500 else transcript) if transcript else "(not available)"
461
+
462
+ left_html = f"""
463
+ {thumb_html}
464
+ <a href="https://www.youtube.com/watch?v={video_id}" target="_blank"
465
+ style="display:block;text-align:center;font-family:'DM Mono',monospace;
466
+ font-size:0.75rem;color:#5a6070;text-decoration:none;margin:4px 0 10px">
467
+ β–Ά Open on YouTube
468
+ </a>
469
+ <div class="vv-card">
470
+ <p class="vv-section-title">Video</p>
471
+ <p style="font-family:'Syne',sans-serif;font-size:1.05rem;font-weight:700;margin:0 0 4px;color:#e8eaf0">
472
+ {meta['title']}
473
+ </p>
474
+ <p style="font-size:0.82rem;color:#5a6070;margin:0">
475
+ by <b style="color:#b0b4c0">{meta['channel_title']}</b> Β· {meta['published_at']}
476
+ </p>
477
+ </div>
478
+
479
+ <p class="vv-section-title">Metrics</p>
480
+ <span class="vv-stat">πŸ‘ {meta['view_count']:,}</span>
481
+ <span class="vv-stat">πŸ‘ {meta['like_count']:,}</span>
482
+ <span class="vv-stat">πŸ’¬ {meta['comment_count']:,}</span>
483
+ <span class="vv-stat">⏱ {meta['duration']}</span>
484
+
485
+ <p class="vv-section-title" style="margin-top:1rem">Tags</p>
486
+ {tag_html or '<span style="color:#5a6070;font-size:0.78rem">(none)</span>'}
487
+
488
+ <details style="margin-top:1rem">
489
+ <summary>πŸ“„ Description</summary>
490
+ <p style="font-size:0.78rem;color:#8090a0;line-height:1.65;white-space:pre-wrap;margin-top:6px">{desc_short}</p>
491
+ </details>
492
+ <details style="margin-top:0.5rem">
493
+ <summary>πŸ“ Transcript ({word_count} words)</summary>
494
+ <p style="font-size:0.75rem;color:#8090a0;line-height:1.65;margin-top:6px">{transcript_short}</p>
495
+ </details>
496
+ """
497
+
498
+ # Misinfo badge
499
+ score = misinfo["score"]
500
+ if score < 0.35:
501
+ badge_html = '<span class="vv-badge-green">βœ… Appears Credible</span>'
502
+ elif score < 0.65:
503
+ badge_html = '<span class="vv-badge-amber">⚠️ Uncertain / Mixed Signals</span>'
504
+ else:
505
+ badge_html = '<span class="vv-badge-red">🚨 Likely Misinformation</span>'
506
 
507
+ reasoning_html = (
508
+ f'<div class="vv-reasoning">🧠 <b>Reasoning:</b> {misinfo["reasoning"]}</div>'
 
 
 
 
509
  )
510
 
511
+ # Three new modality charts β€” derived from model logit/softmax/entropy
512
+ mod_analysis = misinfo.get("modality_analysis", {})
513
+
514
+ try:
515
+ fig_mod_dist = modality_misinfo_distribution(mod_analysis)
516
+ except Exception:
517
+ fig_mod_dist = _empty_plotly("Modality distribution unavailable")
518
+
519
+ try:
520
+ fig_trust = trust_score_by_modality(mod_analysis)
521
+ except Exception:
522
+ fig_trust = _empty_plotly("Trust score unavailable")
523
+
524
+ try:
525
+ fig_uncert = uncertainty_analysis(mod_analysis)
526
+ except Exception:
527
+ fig_uncert = _empty_plotly("Uncertainty analysis unavailable")
528
+
529
+ # Sentiment charts (unchanged)
530
+ try:
531
+ fig_donut = sentiment_donut(sent_sum) if sent_sum else _empty_plotly("No comments analysed")
532
+ except Exception:
533
+ fig_donut = _empty_plotly()
534
+
535
+ try:
536
+ fig_timeline = (
537
+ sentiment_timeline(comments_df, sentiments)
538
+ if (sent_sum and not comments_df.empty)
539
+ else _empty_plotly("No comments analysed")
540
+ )
541
+ except Exception:
542
+ fig_timeline = _empty_plotly()
543
+
544
+ try:
545
+ fig_kw = keyword_bar(keywords, title="Top Video Keywords", color="#00d4ff")
546
+ except Exception:
547
+ fig_kw = _empty_plotly()
548
+
549
+ try:
550
+ fig_kw_comp = (
551
+ keyword_comparison(pos_kw, neg_kw)
552
+ if (pos_kw or neg_kw)
553
+ else _empty_plotly("No keyword comparison β€” no comments")
554
+ )
555
+ except Exception:
556
+ fig_kw_comp = _empty_plotly()
557
+
558
+ # Sentiment stat boxes (unchanged)
559
+ if sent_sum:
560
+ stat_pos = (
561
+ f'<div class="vv-card" style="text-align:center">'
562
+ f'<p class="vv-stat-big-green">{sent_sum["pos_pct"]}%</p>'
563
+ f'<p style="color:#5a6070;font-size:0.75rem;margin:4px 0 0">Positive</p></div>'
564
+ )
565
+ stat_neg = (
566
+ f'<div class="vv-card" style="text-align:center">'
567
+ f'<p class="vv-stat-big-red">{sent_sum["neg_pct"]}%</p>'
568
+ f'<p style="color:#5a6070;font-size:0.75rem;margin:4px 0 0">Negative</p></div>'
569
+ )
570
+ stat_neu = (
571
+ f'<div class="vv-card" style="text-align:center">'
572
+ f'<p class="vv-stat-big-dim">{sent_sum["neu_pct"]}%</p>'
573
+ f'<p style="color:#5a6070;font-size:0.75rem;margin:4px 0 0">Neutral</p></div>'
574
+ )
575
+ else:
576
+ placeholder = (
577
+ '<div class="vv-card" style="text-align:center;color:#5a6070;'
578
+ 'font-family:DM Mono,monospace;font-size:0.8rem;padding:1.2rem">N/A</div>'
579
+ )
580
+ stat_pos = stat_neg = stat_neu = placeholder
581
+
582
+ # Comment DataFrames (unchanged)
583
+ show_cols = ["author", "text", "likes", "published_at"]
584
+ df_all = df_pos = df_neg = df_top = pd.DataFrame()
585
+
586
+ if not comments_df.empty:
587
+ display_df = comments_df.copy()
588
+ if sentiments:
589
+ display_df["sentiment"] = [s["label"] for s in sentiments]
590
+ display_df["compound"] = [round(s.get("compound", 0), 3) for s in sentiments]
591
+ cols = show_cols + ["sentiment", "compound"]
592
+ else:
593
+ cols = show_cols
594
+
595
+ df_all = display_df[cols].head(100).reset_index(drop=True)
596
+ df_top = (
597
+ display_df.sort_values("likes", ascending=False)
598
+ .head(20)[cols]
599
+ .reset_index(drop=True)
600
+ )
601
+ if "sentiment" in display_df.columns:
602
+ df_pos = display_df[display_df["sentiment"] == "POSITIVE"][cols].head(50).reset_index(drop=True)
603
+ df_neg = display_df[display_df["sentiment"] == "NEGATIVE"][cols].head(50).reset_index(drop=True)
604
+
605
+ return (
606
+ status_html, # 0 status_box
607
+ log_html, # 1 log_html_out
608
+ left_html, # 2 left_panel_html
609
+ badge_html, # 3 misinfo_badge_html
610
+ reasoning_html, # 4 misinfo_reasoning_html
611
+ fig_mod_dist, # 5 modality_dist_plot
612
+ fig_trust, # 6 trust_score_plot
613
+ fig_uncert, # 7 uncertainty_plot
614
+ fig_donut, # 8 donut_plot
615
+ fig_timeline, # 9 timeline_plot
616
+ fig_kw, # 10 kw_bar_plot
617
+ fig_kw_comp, # 11 kw_comp_plot
618
+ stat_pos, # 12 stat_pos_html
619
+ stat_neg, # 13 stat_neg_html
620
+ stat_neu, # 14 stat_neu_html
621
+ df_all, # 15 df_all_out
622
+ df_pos, # 16 df_pos_out
623
+ df_neg, # 17 df_neg_out
624
+ df_top, # 18 df_top_out
625
  )
626
 
 
627
 
628
+ # UPLOAD / SEARCH HELPERS
629
 
 
 
 
630
 
631
+ def do_search(keyword: str):
632
+ api_key = os.environ.get("YT_API_KEY", "").strip()
633
+ if not api_key:
634
+ return (
635
+ "<p style='color:#ff4757;font-family:DM Mono,monospace'>⚠️ YT_API_KEY secret not set.</p>",
636
+ gr.update(choices=[], value=None, visible=False),
637
+ )
638
+ if not (keyword or "").strip():
639
+ return (
640
+ "<p style='color:#ffb347;font-family:DM Mono,monospace'>Enter a keyword to search.</p>",
641
+ gr.update(choices=[], value=None, visible=False),
642
+ )
643
 
644
+ results = search_videos_by_title(keyword.strip(), api_key, max_results=5)
645
+ if not results:
646
+ return (
647
+ "<p style='color:#ffb347;font-family:DM Mono,monospace'>No results found.</p>",
648
+ gr.update(choices=[], value=None, visible=False),
649
+ )
650
 
651
+ html = ""
652
+ choices = []
653
+ for r in results:
654
+ vid = r["video_id"]
655
+ url = f"https://www.youtube.com/watch?v={vid}"
656
+ choices.append((r["title"][:70], url))
657
+ html += (
658
+ f'<div class="vv-card" style="display:flex;align-items:center;gap:12px;margin-bottom:6px">'
659
+ f'<img src="{r["thumbnail_url"]}" '
660
+ f' style="width:72px;height:54px;object-fit:cover;border-radius:6px;flex-shrink:0">'
661
+ f'<div>'
662
+ f'<p style="margin:0;font-size:0.85rem;font-weight:600;color:#e8eaf0">{r["title"][:80]}</p>'
663
+ f'<p style="margin:0;font-size:0.75rem;color:#5a6070">'
664
+ f'{r["channel_title"]} Β· {r["published_at"]} Β· '
665
+ f'<code style="color:#00d4ff">v={vid}</code></p>'
666
+ f'</div></div>'
667
+ )
668
+ return html, gr.update(choices=choices, value=None, visible=True)
669
+
670
+
671
+ def pick_and_analyze(selected_url, sentiment_method, max_comments):
672
+ if not selected_url:
673
+ yield _blank_outputs("Select a video from the search results above.")
674
+ return
675
+ yield from run_pipeline(selected_url, sentiment_method, max_comments)
676
+
677
+
678
+ # GRADIO BLOCKS UI
679
+
680
+
681
+ with gr.Blocks(title="VideoVerifier β€” MHMisinfo") as demo:
682
+
683
+ # Header
684
+ gr.HTML("""
685
+ <div style="padding:1.5rem 0 0.8rem;border-bottom:1px solid #1e2330;margin-bottom:1.2rem">
686
+ <h1 class="vv-hero">πŸ”¬ Video Verifier & Sentiment Analyzer</h1>
687
+ <p style="color:#5a6070;font-size:0.85rem;margin-top:4px;font-family:'DM Mono',monospace">
688
+ mental health misinformation detection
689
+ </p>
690
+ </div>
691
+ """)
692
+
693
+ # Settings β€” NO API key field
694
+ with gr.Accordion("βš™οΈ Settings", open=False):
695
+ gr.HTML("""
696
+ <div style="background:#0d1119;border:1px solid #1e2330;border-radius:8px;
697
+ padding:0.7rem 1rem;margin-bottom:0.8rem;font-family:'DM Mono',monospace;
698
+ font-size:0.78rem;color:#5a6070">
699
+ πŸ”‘ YouTube API key is read from the <code style="color:#00d4ff">YT_API_KEY</code>
700
+ Space secret β€” it is never exposed in the UI.
701
+ </div>
702
+ """)
703
+ with gr.Row():
704
+ sentiment_selector = gr.Dropdown(
705
+ choices=[
706
+ ("VADER β€” fast, CPU-only (~5 000 comments/sec)", "vader"),
707
+ ("DistilBERT β€” accurate, downloads ~500 MB on first run", "hf"),
708
  ],
709
+ value="vader",
710
+ label="Sentiment Engine",
711
+ scale=3,
712
+ )
713
+ max_comments_slider = gr.Slider(
714
+ minimum=10, maximum=500, value=150, step=10,
715
+ label="Max comments to fetch",
716
+ scale=3,
717
+ info="YouTube API quota: ~1 unit per comment request",
718
+ )
719
+
720
+ # Input tabs
721
+ with gr.Tabs():
722
+
723
+ with gr.TabItem("πŸ”— YouTube URL"):
724
+ with gr.Row():
725
+ url_input = gr.Textbox(
726
+ placeholder="https://www.youtube.com/watch?v=... or youtu.be/... or raw 11-char ID",
727
+ label="YouTube URL / Video ID",
728
+ scale=5,
729
+ )
730
+ analyze_btn = gr.Button("πŸ” Analyze", variant="primary", scale=1, min_width=130)
731
+
732
+ with gr.TabItem("πŸ“ Upload / Search by Title"):
733
+ gr.HTML("""
734
+ <div class="vv-card" style="margin-bottom:8px">
735
+ <p class="vv-section-title">Search by video title or keyword</p>
736
+ <p style="font-size:0.82rem;color:#5a6070;line-height:1.6;margin:0">
737
+ Upload your file, then type the title or keyword below to locate the matching YouTube entry.
738
+ </p>
739
+ </div>
740
+ """)
741
+ upload_file = gr.File(
742
+ label="Drop a video file (mp4, mov, avi, mkv, webm)",
743
+ file_types=[".mp4", ".mov", ".avi", ".mkv", ".webm"],
744
+ )
745
+ with gr.Row():
746
+ kw_input = gr.Textbox(placeholder="Enter video title or keyword…", label="Search keyword", scale=4)
747
+ search_btn = gr.Button("πŸ”Ž Find on YouTube", scale=1)
748
+ search_results_html = gr.HTML()
749
+ search_radio = gr.Radio(label="Select a video to analyze", choices=[], visible=False)
750
+
751
+ # Status
752
+ status_box = gr.HTML(
753
+ '<p style="color:#5a6070;font-family:DM Mono,monospace;font-size:0.8rem;padding:6px 0">'
754
+ "Enter a URL above and click Analyze.</p>"
755
  )
756
 
757
+ # Main results layout
758
+ with gr.Row(equal_height=False):
759
+
760
+ # LEFT β€” video info
761
+ with gr.Column(scale=2):
762
+ left_panel_html = gr.HTML(
763
+ "<div style='padding:3rem;text-align:center;color:#5a6070;"
764
+ "font-family:DM Mono,monospace'>No data yet.</div>"
765
+ )
766
+
767
+ # RIGHT β€” analytics
768
+ with gr.Column(scale=3):
769
+
770
+ # ── Misinformation Analysis ───────────────────────────────────────
771
+ gr.HTML('<p class="vv-section-title" style="margin-top:0">πŸ”¬ Misinformation Analysis</p>')
772
+ misinfo_badge_html = gr.HTML()
773
+
774
+ # Row 1 β€” Modality Misinformation Distribution (full width)
775
+ with gr.Row():
776
+ modality_dist_plot = gr.Plot(label="", show_label=False)
777
+
778
+ # Row 2 β€” Trust Score | Uncertainty Analysis (side by side)
779
+ with gr.Row():
780
+ trust_score_plot = gr.Plot(label="", show_label=False)
781
+ uncertainty_plot = gr.Plot(label="", show_label=False)
782
+
783
+ misinfo_reasoning_html = gr.HTML()
784
+
785
+ gr.HTML('<hr class="vv-hr">')
786
+
787
+ # ── Comment Sentiment ─────────────────────────────────────────────
788
+ gr.HTML('<p class="vv-section-title">πŸ’¬ Comment Sentiment</p>')
789
+ with gr.Row():
790
+ stat_pos_html = gr.HTML()
791
+ stat_neg_html = gr.HTML()
792
+ stat_neu_html = gr.HTML()
793
+ with gr.Row():
794
+ donut_plot = gr.Plot(label="", show_label=False)
795
+ timeline_plot = gr.Plot(label="", show_label=False)
796
+ with gr.Row():
797
+ kw_bar_plot = gr.Plot(label="", show_label=False)
798
+ kw_comp_plot = gr.Plot(label="", show_label=False)
799
+
800
+ gr.HTML('<hr class="vv-hr">')
801
+
802
+ # ── Comments Deep-Dive ────────────────────────────────────────────
803
+ gr.HTML('<p class="vv-section-title">πŸ“Š Comments Deep-Dive</p>')
804
+ with gr.Tabs():
805
+ with gr.TabItem("All"):
806
+ df_all_out = gr.Dataframe(
807
+ headers=["author", "text", "likes", "published_at", "sentiment", "compound"],
808
+ datatype=["str", "str", "number", "str", "str", "number"],
809
+ wrap=True,
810
+ max_height=320,
811
+ )
812
+ with gr.TabItem("Positive"):
813
+ df_pos_out = gr.Dataframe(wrap=True, max_height=320)
814
+ with gr.TabItem("Negative"):
815
+ df_neg_out = gr.Dataframe(wrap=True, max_height=320)
816
+ with gr.TabItem("Most Liked"):
817
+ df_top_out = gr.Dataframe(wrap=True, max_height=320)
818
+
819
+ # Activity log
820
+ with gr.Accordion("πŸ“œ Activity Log", open=False):
821
+ log_html_out = gr.HTML('<p class="vv-log-line">β€”</p>')
822
+
823
+ # Footer
824
+ gr.HTML("""
825
+ <div style="margin-top:2rem;padding-top:1rem;border-top:1px solid #1e2330;
826
+ text-align:center;font-family:'DM Mono',monospace;font-size:0.72rem;color:#3a3f50">
827
+ 4-stream SeTa-Attention BiGRU Β· CCM / DMTE / Uncertainty Fusion Β·
828
+ Test ROC-AUC 0.967
829
+ </div>
830
+ """)
831
+
832
+ # ── Output list β€” order must match _build_outputs / _blank_outputs exactly ─
833
+ ALL_OUTPUTS = [
834
+ status_box, # 0
835
+ log_html_out, # 1
836
+ left_panel_html, # 2
837
+ misinfo_badge_html, # 3
838
+ misinfo_reasoning_html, # 4
839
+ modality_dist_plot, # 5
840
+ trust_score_plot, # 6
841
+ uncertainty_plot, # 7
842
+ donut_plot, # 8
843
+ timeline_plot, # 9
844
+ kw_bar_plot, # 10
845
+ kw_comp_plot, # 11
846
+ stat_pos_html, # 12
847
+ stat_neg_html, # 13
848
+ stat_neu_html, # 14
849
+ df_all_out, # 15
850
+ df_pos_out, # 16
851
+ df_neg_out, # 17
852
+ df_top_out, # 18
853
+ ]
854
 
855
+ # Pipeline inputs (no api_key_input β€” read from env)
856
+ _pipeline_inputs = [url_input, sentiment_selector, max_comments_slider]
 
857
 
858
+ # Events: URL tab
859
+ analyze_btn.click(fn=run_pipeline, inputs=_pipeline_inputs, outputs=ALL_OUTPUTS)
860
+ url_input.submit(fn=run_pipeline, inputs=_pipeline_inputs, outputs=ALL_OUTPUTS)
861
 
862
+ # Events: Upload/Search tab
863
+ search_btn.click(
864
+ fn=do_search,
865
+ inputs=[kw_input],
866
+ outputs=[search_results_html, search_radio],
867
+ )
868
+ search_radio.change(
869
+ fn=pick_and_analyze,
870
+ inputs=[search_radio, sentiment_selector, max_comments_slider],
871
+ outputs=ALL_OUTPUTS,
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
872
  )
873
 
 
 
 
 
 
 
 
 
 
874
 
875
+ # Launch β€” css and theme go HERE in Gradio 6.x (NOT in gr.Blocks)
 
 
 
 
 
 
876
 
877
+ if __name__ == "__main__":
878
+ demo.launch(
879
+ css=CSS,
880
+ theme=gr.themes.Base(
881
+ primary_hue=gr.themes.colors.cyan,
882
+ neutral_hue=gr.themes.colors.gray,
883
+ font=[gr.themes.GoogleFont("IBM Plex Sans"), "sans-serif"],
884
+ ),
885
  )
886