rocky250 commited on
Commit
b1690db
·
verified ·
1 Parent(s): 5a3aba5

Update fetcher.py

Browse files
Files changed (1) hide show
  1. fetcher.py +541 -196
fetcher.py CHANGED
@@ -1,206 +1,551 @@
1
  """
2
- fetcher.py — YouTube Data API v3 helpers
3
  """
4
 
5
- import re
6
- import requests
 
7
  import pandas as pd
8
-
9
-
10
-
11
- # Video ID extraction
12
-
13
-
14
- def extract_video_id(url_or_id: str) -> str | None:
15
- """Return an 11-char YouTube video ID, or None if not found."""
16
- patterns = [
17
- r"(?:youtube\.com/watch\?v=|youtu\.be/|youtube\.com/embed/|youtube\.com/shorts/)([a-zA-Z0-9_-]{11})",
18
- r"^([a-zA-Z0-9_-]{11})$",
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
19
  ]
20
- for pattern in patterns:
21
- m = re.search(pattern, url_or_id)
22
- if m:
23
- return m.group(1)
24
- return None
25
-
26
-
27
-
28
- # Duration parser
29
-
30
-
31
- def _parse_duration(iso: str) -> str:
32
- m = re.match(r"PT(?:(\d+)H)?(?:(\d+)M)?(?:(\d+)S)?", iso or "PT0S")
33
- if not m:
34
- return "0:00"
35
- h, mn, s = (int(x or 0) for x in m.groups())
36
- return f"{h}:{mn:02d}:{s:02d}" if h else f"{mn}:{s:02d}"
37
-
38
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
39
 
40
- # Metadata
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
41
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
42
 
43
- def fetch_video_metadata(video_id: str, api_key: str) -> tuple[dict | None, str | None]:
44
- """Return (meta_dict, error_string). One will be None."""
45
- try:
46
- resp = requests.get(
47
- "https://www.googleapis.com/youtube/v3/videos",
48
- params={
49
- "id": video_id,
50
- "key": api_key,
51
- "part": "snippet,statistics,contentDetails",
52
- },
53
- timeout=15,
54
- )
55
- data = resp.json()
56
- if "error" in data:
57
- return None, data["error"].get("message", "YouTube API error")
58
-
59
- items = data.get("items", [])
60
- if not items:
61
- return None, "Video not found — check the ID or URL."
62
-
63
- item = items[0]
64
- sn = item.get("snippet", {})
65
- st = item.get("statistics", {})
66
- cd = item.get("contentDetails", {})
67
-
68
- meta = {
69
- "title": sn.get("title", "Unknown"),
70
- "description": sn.get("description", ""),
71
- "channel_title": sn.get("channelTitle", "Unknown"),
72
- "published_at": sn.get("publishedAt", "")[:10],
73
- "tags": sn.get("tags", []),
74
- "thumbnail_url": (
75
- sn.get("thumbnails", {}).get("high", {}).get("url", "")
76
- or sn.get("thumbnails", {}).get("medium", {}).get("url", "")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
77
  ),
78
- "view_count": int(st.get("viewCount", 0)),
79
- "like_count": int(st.get("likeCount", 0)),
80
- "comment_count": int(st.get("commentCount", 0)),
81
- "duration": _parse_duration(cd.get("duration", "PT0S")),
82
- }
83
- return meta, None
84
-
85
- except requests.exceptions.Timeout:
86
- return None, "Request timed out. Check your internet connection."
87
- except Exception as exc:
88
- return None, str(exc)
89
-
90
-
91
-
92
- # Transcript
93
-
94
-
95
- def fetch_transcript(video_id: str) -> tuple[str, str]:
96
- """Return (text, status_message)."""
97
- try:
98
- from youtube_transcript_api import YouTubeTranscriptApi, TranscriptsDisabled, NoTranscriptFound
99
- segments = YouTubeTranscriptApi.get_transcript(video_id)
100
- text = " ".join(s["text"] for s in segments)
101
- return text, f" Transcript: {len(text.split())} words"
102
- except Exception as exc:
103
- short = str(exc)[:80]
104
- return "", f" Transcript unavailable: {short}"
105
-
106
-
107
-
108
- # Comments
109
-
110
-
111
- def fetch_comments(
112
- video_id: str,
113
- api_key: str,
114
- max_comments: int = 150,
115
- ) -> tuple[pd.DataFrame, str]:
116
- """Return (DataFrame, status_message)."""
117
- rows = []
118
- next_token = None
119
-
120
- try:
121
- while len(rows) < max_comments:
122
- want = min(100, max_comments - len(rows))
123
- params = {
124
- "videoId": video_id,
125
- "key": api_key,
126
- "part": "snippet",
127
- "maxResults": want,
128
- "order": "relevance",
129
- }
130
- if next_token:
131
- params["pageToken"] = next_token
132
-
133
- resp = requests.get(
134
- "https://www.googleapis.com/youtube/v3/commentThreads",
135
- params=params,
136
- timeout=15,
137
- )
138
- data = resp.json()
139
-
140
- if "error" in data:
141
- msg = data["error"].get("message", "Comment API error")
142
- break
143
-
144
- for item in data.get("items", []):
145
- c = item["snippet"]["topLevelComment"]["snippet"]
146
- rows.append({
147
- "author": c.get("authorDisplayName", ""),
148
- "text": c.get("textDisplay", ""),
149
- "likes": int(c.get("likeCount", 0)),
150
- "published_at": c.get("publishedAt", "")[:10],
151
- })
152
-
153
- next_token = data.get("nextPageToken")
154
- if not next_token or not data.get("items"):
155
- break
156
-
157
- if not rows:
158
- return pd.DataFrame(), " No comments fetched (comments may be disabled)"
159
-
160
- df = pd.DataFrame(rows)
161
- return df, f" Comments: {len(df)} fetched"
162
-
163
- except requests.exceptions.Timeout:
164
- return pd.DataFrame(), " Comments request timed out"
165
- except Exception as exc:
166
- return pd.DataFrame(), f" Comments error: {str(exc)[:80]}"
167
-
168
-
169
-
170
- # Search by keyword
171
-
172
-
173
- def search_videos_by_title(
174
- keyword: str,
175
- api_key: str,
176
- max_results: int = 5,
177
- ) -> list[dict]:
178
- try:
179
- resp = requests.get(
180
- "https://www.googleapis.com/youtube/v3/search",
181
- params={
182
- "q": keyword,
183
- "key": api_key,
184
- "part": "snippet",
185
- "type": "video",
186
- "maxResults": max_results,
187
- },
188
- timeout=15,
189
- )
190
- data = resp.json()
191
- results = []
192
- for item in data.get("items", []):
193
- vid_id = item.get("id", {}).get("videoId", "")
194
- sn = item.get("snippet", {})
195
- if not vid_id:
196
- continue
197
- results.append({
198
- "video_id": vid_id,
199
- "title": sn.get("title", ""),
200
- "channel_title": sn.get("channelTitle", ""),
201
- "published_at": sn.get("publishedAt", "")[:10],
202
- "thumbnail_url": sn.get("thumbnails", {}).get("medium", {}).get("url", ""),
203
- })
204
- return results
205
- except Exception:
206
- return []
 
1
  """
2
+ charts.py — All Plotly chart builders. Pure functions, no Streamlit imports.
3
  """
4
 
5
+ from typing import Dict, List, Tuple, Optional
6
+ import plotly.graph_objects as go
7
+ import plotly.express as px
8
  import pandas as pd
9
+ import numpy as np
10
+
11
+ # Shared theme ─
12
+ DARK_BG = "#0d0f14"
13
+ CARD_BG = "#13161e"
14
+ BORDER = "#1e2330"
15
+ TEXT_MAIN = "#e8eaf0"
16
+ TEXT_DIM = "#5a6070"
17
+ CYAN = "#00d4ff"
18
+ GREEN = "#00e5a0"
19
+ RED = "#ff4757"
20
+ AMBER = "#ffb347"
21
+ PURPLE = "#b388ff"
22
+ BLUE = "#4a8eff"
23
+
24
+ PLOTLY_LAYOUT = dict(
25
+ paper_bgcolor="rgba(0,0,0,0)",
26
+ plot_bgcolor="rgba(0,0,0,0)",
27
+ font=dict(family="'DM Mono', monospace", color=TEXT_MAIN, size=12),
28
+ margin=dict(l=20, r=20, t=40, b=20),
29
+ )
30
+
31
+
32
+ # Misinformation Gauge
33
+
34
+ def misinfo_gauge(score: float, label: str) -> go.Figure:
35
+ """Gauge chart for misinformation confidence score (0–1)."""
36
+ pct = score * 100
37
+ if score < 0.35:
38
+ bar_color = GREEN
39
+ elif score < 0.65:
40
+ bar_color = AMBER
41
+ else:
42
+ bar_color = RED
43
+
44
+ fig = go.Figure(go.Indicator(
45
+ mode="gauge+number+delta",
46
+ value=pct,
47
+ number={"suffix": "%", "font": {"size": 32, "color": bar_color, "family": "'DM Mono', monospace"}},
48
+ delta={"reference": 50, "increasing": {"color": RED}, "decreasing": {"color": GREEN}},
49
+ title={"text": label, "font": {"size": 13, "color": TEXT_DIM}},
50
+ gauge={
51
+ "axis": {
52
+ "range": [0, 100],
53
+ "tickwidth": 1,
54
+ "tickcolor": BORDER,
55
+ "tickfont": {"color": TEXT_DIM, "size": 10},
56
+ },
57
+ "bar": {"color": bar_color, "thickness": 0.3},
58
+ "bgcolor": CARD_BG,
59
+ "borderwidth": 0,
60
+ "steps": [
61
+ {"range": [0, 35], "color": "#0d1f18"},
62
+ {"range": [35, 65], "color": "#1f1a0d"},
63
+ {"range": [65, 100],"color": "#1f0d0d"},
64
+ ],
65
+ "threshold": {
66
+ "line": {"color": TEXT_MAIN, "width": 2},
67
+ "thickness": 0.75,
68
+ "value": pct,
69
+ },
70
+ },
71
+ ))
72
+ fig.update_layout(**PLOTLY_LAYOUT, height=260)
73
+ return fig
74
+
75
+
76
+ # Sentiment Donut ─
77
+
78
+ def sentiment_donut(summary: Dict) -> go.Figure:
79
+ """Donut chart: Positive / Negative / Neutral breakdown."""
80
+ labels = ["Positive", "Neutral", "Negative"]
81
+ values = [summary["POSITIVE"], summary["NEUTRAL"], summary["NEGATIVE"]]
82
+ colors = [GREEN, TEXT_DIM, RED]
83
+
84
+ fig = go.Figure(go.Pie(
85
+ labels=labels,
86
+ values=values,
87
+ hole=0.62,
88
+ marker=dict(colors=colors, line=dict(color=DARK_BG, width=3)),
89
+ textinfo="label+percent",
90
+ textfont=dict(family="'DM Mono', monospace", size=11, color=TEXT_MAIN),
91
+ hovertemplate="<b>%{label}</b><br>%{value} comments (%{percent})<extra></extra>",
92
+ rotation=90,
93
+ ))
94
+
95
+ # Centre annotation
96
+ avg = summary.get("avg_compound", 0)
97
+ overall = "😊 Positive" if avg > 0.05 else ("😟 Negative" if avg < -0.05 else "😐 Mixed")
98
+ fig.add_annotation(
99
+ text=f"<b>{overall}</b><br><span style='font-size:11px;color:{TEXT_DIM}'>{summary['total']} comments</span>",
100
+ x=0.5, y=0.5,
101
+ showarrow=False,
102
+ font=dict(size=13, color=TEXT_MAIN, family="'DM Mono', monospace"),
103
+ align="center",
104
+ )
105
+ fig.update_layout(**PLOTLY_LAYOUT, height=300,
106
+ legend=dict(orientation="h", y=-0.08, font=dict(size=11)))
107
+ return fig
108
+
109
+
110
+ # Keyword Bar Chart ─
111
+
112
+ def keyword_bar(
113
+ keywords: List[Tuple[str, float]],
114
+ title: str = "Top Keywords",
115
+ color: str = CYAN,
116
+ ) -> go.Figure:
117
+ if not keywords:
118
+ return _empty_fig(title)
119
+
120
+ words, weights = zip(*keywords[:15])
121
+ # Normalize to 0-100
122
+ max_w = max(weights) or 1
123
+ norm = [w / max_w * 100 for w in weights]
124
+
125
+ fig = go.Figure(go.Bar(
126
+ x=norm,
127
+ y=words,
128
+ orientation="h",
129
+ marker=dict(
130
+ color=norm,
131
+ colorscale=[[0, f"{color}33"], [1, color]],
132
+ line=dict(width=0),
133
+ ),
134
+ text=[f"{w:.0f}" for w in weights],
135
+ textposition="inside",
136
+ textfont=dict(size=10, color=DARK_BG),
137
+ hovertemplate="<b>%{y}</b><br>Weight: %{text}<extra></extra>",
138
+ ))
139
+ fig.update_layout(
140
+ **PLOTLY_LAYOUT,
141
+ title=dict(text=title, font=dict(size=13, color=TEXT_DIM), x=0),
142
+ height=380,
143
+ yaxis=dict(autorange="reversed", tickfont=dict(size=11), gridcolor=BORDER),
144
+ xaxis=dict(showticklabels=False, gridcolor=BORDER),
145
+ bargap=0.35,
146
+ )
147
+ return fig
148
+
149
+
150
+ # Stream Trust Bars ─
151
+
152
+ def stream_trust_bars(stream_details: Dict) -> go.Figure:
153
+ """Horizontal bar chart for per-stream misinfo scores."""
154
+ labels = list(stream_details.keys())
155
+ values = [round(v * 100, 1) for v in stream_details.values()]
156
+ colors = [RED if v > 50 else (AMBER if v > 30 else GREEN) for v in values]
157
+
158
+ fig = go.Figure(go.Bar(
159
+ x=values,
160
+ y=[l.replace("_", " ").title() for l in labels],
161
+ orientation="h",
162
+ marker=dict(color=colors, line=dict(width=0)),
163
+ text=[f"{v}%" for v in values],
164
+ textposition="outside",
165
+ textfont=dict(size=11, color=TEXT_MAIN),
166
+ hovertemplate="<b>%{y}</b><br>Score: %{x}%<extra></extra>",
167
+ ))
168
+ fig.update_layout(
169
+ **PLOTLY_LAYOUT,
170
+ title=dict(text="Per-Stream Analysis", font=dict(size=13, color=TEXT_DIM), x=0),
171
+ height=220,
172
+ xaxis=dict(range=[0, 110], showticklabels=False, gridcolor=BORDER),
173
+ yaxis=dict(tickfont=dict(size=11)),
174
+ bargap=0.4,
175
+ )
176
+ return fig
177
+
178
+
179
+ # Modality Misinformation Distribution ─
180
+
181
+ def modality_misinfo_distribution(modality_analysis: Dict) -> go.Figure:
182
+ """
183
+ Grouped bar chart — Misinformation Score vs Not-Misinformation Score per modality.
184
+
185
+ Bars are derived directly from the model's per-stream softmax probabilities
186
+ (values in ``modality_analysis[modality]["misinfo_pct"]`` /
187
+ ``modality_analysis[modality]["credible_pct"]``).
188
+ Each pair of bars sums to exactly 100 % because they are complementary
189
+ softmax outputs from the same binary classification head.
190
+
191
+ Parameters
192
+ ----------
193
+ modality_analysis : dict
194
+ Mapping {"text": {...}, "audio": {...}, "video": {...}} as returned by
195
+ ``analyzer._compute_modality_analysis()`` — one sub-dict per stream.
196
+ """
197
+ MODALITIES = ["Text", "Audio", "Video"]
198
+ KEYS = ["text", "audio", "video"]
199
+
200
+ misinfo_pcts = [modality_analysis.get(k, {}).get("misinfo_pct", 50.0) for k in KEYS]
201
+ credible_pcts = [modality_analysis.get(k, {}).get("credible_pct", 50.0) for k in KEYS]
202
+ logit_tips = [
203
+ (f"logit_m={modality_analysis.get(k, {}).get('misinfo_logit', 0.0):+.4f} | "
204
+ f"logit_c={modality_analysis.get(k, {}).get('credible_logit', 0.0):+.4f}")
205
+ for k in KEYS
206
  ]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
207
 
208
+ fig = go.Figure()
209
+
210
+ fig.add_trace(go.Bar(
211
+ name="Misinformation Score",
212
+ x=MODALITIES,
213
+ y=misinfo_pcts,
214
+ marker=dict(
215
+ color=[RED, RED, RED],
216
+ opacity=0.88,
217
+ line=dict(color=DARK_BG, width=1),
218
+ ),
219
+ text=[f"{v:.1f}%" for v in misinfo_pcts],
220
+ textposition="outside",
221
+ textfont=dict(size=11, color=RED),
222
+ customdata=logit_tips,
223
+ hovertemplate=(
224
+ "<b>%{x} — Misinformation</b><br>"
225
+ "Softmax: %{y:.2f}%<br>"
226
+ "%{customdata}<extra></extra>"
227
+ ),
228
+ ))
229
+
230
+ fig.add_trace(go.Bar(
231
+ name="Not Misinformation",
232
+ x=MODALITIES,
233
+ y=credible_pcts,
234
+ marker=dict(
235
+ color=[GREEN, GREEN, GREEN],
236
+ opacity=0.88,
237
+ line=dict(color=DARK_BG, width=1),
238
+ ),
239
+ text=[f"{v:.1f}%" for v in credible_pcts],
240
+ textposition="outside",
241
+ textfont=dict(size=11, color=GREEN),
242
+ customdata=logit_tips,
243
+ hovertemplate=(
244
+ "<b>%{x} — Credible</b><br>"
245
+ "Softmax: %{y:.2f}%<br>"
246
+ "%{customdata}<extra></extra>"
247
+ ),
248
+ ))
249
+
250
+ fig.update_layout(
251
+ **PLOTLY_LAYOUT,
252
+ title=dict(
253
+ text="Modality Misinformation Distribution",
254
+ font=dict(size=13, color=TEXT_DIM),
255
+ x=0,
256
+ ),
257
+ barmode="group",
258
+ height=280,
259
+ xaxis=dict(
260
+ title="Modality",
261
+ tickfont=dict(size=12),
262
+ gridcolor=BORDER,
263
+ ),
264
+ yaxis=dict(
265
+ title="Softmax Score (%)",
266
+ range=[0, 115],
267
+ gridcolor=BORDER,
268
+ ticksuffix="%",
269
+ ),
270
+ legend=dict(
271
+ orientation="h",
272
+ y=1.12,
273
+ font=dict(size=11),
274
+ bgcolor="rgba(0,0,0,0)",
275
+ ),
276
+ bargap=0.22,
277
+ bargroupgap=0.06,
278
+ )
279
+ return fig
280
+
281
+
282
+ # Trust Score by Modality ─
283
+
284
+ def trust_score_by_modality(modality_analysis: Dict) -> go.Figure:
285
+ """
286
+ Vertical bar chart — model's reliability/trustworthiness coefficient per stream.
287
+
288
+ Trust is computed as a linear combination of model confidence (1 ��� Shannon entropy)
289
+ and content-richness, both derived from the actual inference pass, never fixed.
290
+
291
+ Parameters
292
+ ----------
293
+ modality_analysis : dict
294
+ Same structure as ``modality_misinfo_distribution``.
295
+ """
296
+ MODALITIES = ["Text", "Audio", "Video"]
297
+ KEYS = ["text", "audio", "video"]
298
+
299
+ trust_vals = [modality_analysis.get(k, {}).get("trust_score", 0.0) for k in KEYS]
300
+ bar_colors = [
301
+ (GREEN if v >= 60 else (AMBER if v >= 35 else RED))
302
+ for v in trust_vals
303
+ ]
304
 
305
+ fig = go.Figure(go.Bar(
306
+ x=MODALITIES,
307
+ y=trust_vals,
308
+ marker=dict(
309
+ color=bar_colors,
310
+ opacity=0.88,
311
+ line=dict(color=DARK_BG, width=1),
312
+ ),
313
+ text=[f"{v:.1f}%" for v in trust_vals],
314
+ textposition="outside",
315
+ textfont=dict(size=11, color=TEXT_MAIN),
316
+ hovertemplate=(
317
+ "<b>%{x}</b><br>"
318
+ "Trust Level: %{y:.2f}%<br>"
319
+ "<i>Derived from (1 – H_entropy) × content_richness</i>"
320
+ "<extra></extra>"
321
+ ),
322
+ ))
323
+
324
+ # Reference lines
325
+ for level, label, color in [(80, "High Trust", GREEN), (50, "Threshold", AMBER)]:
326
+ fig.add_hline(
327
+ y=level,
328
+ line=dict(color=color, width=1, dash="dot"),
329
+ annotation_text=label,
330
+ annotation_position="right",
331
+ annotation_font=dict(size=9, color=color),
332
+ )
333
 
334
+ fig.update_layout(
335
+ **PLOTLY_LAYOUT,
336
+ title=dict(
337
+ text="Trust Score by Modality",
338
+ font=dict(size=13, color=TEXT_DIM),
339
+ x=0,
340
+ ),
341
+ height=280,
342
+ xaxis=dict(
343
+ title="Modality",
344
+ tickfont=dict(size=12),
345
+ gridcolor=BORDER,
346
+ ),
347
+ yaxis=dict(
348
+ title="Trust Level (%)",
349
+ range=[0, 115],
350
+ gridcolor=BORDER,
351
+ ticksuffix="%",
352
+ ),
353
+ bargap=0.38,
354
+ )
355
+ return fig
356
+
357
+
358
+ # Uncertainty Analysis
359
+
360
+ def uncertainty_analysis(modality_analysis: Dict) -> go.Figure:
361
+ """
362
+ Vertical bar chart — Shannon entropy of the model's softmax distribution per stream.
363
+
364
+ High entropy ( → 100 %) means the model is maximally unsure (uniform distribution).
365
+ Low entropy ( → 0 %) means the model is highly confident in its prediction.
366
+ Values come directly from H = –Σ p·log₂(p) over the two softmax outputs.
367
+
368
+ Parameters
369
+ ----------
370
+ modality_analysis : dict
371
+ Same structure as ``modality_misinfo_distribution``.
372
+ """
373
+ MODALITIES = ["Text", "Audio", "Video"]
374
+ KEYS = ["text", "audio", "video"]
375
+
376
+ uncertainty_vals = [modality_analysis.get(k, {}).get("uncertainty", 100.0) for k in KEYS]
377
+ misinfo_pcts = [modality_analysis.get(k, {}).get("misinfo_pct", 50.0) for k in KEYS]
378
+
379
+ # Colour encodes confidence direction: red = uncertain, green = confident
380
+ bar_colors = [
381
+ (GREEN if v <= 35 else (AMBER if v <= 65 else RED))
382
+ for v in uncertainty_vals
383
+ ]
384
 
385
+ fig = go.Figure(go.Bar(
386
+ x=MODALITIES,
387
+ y=uncertainty_vals,
388
+ marker=dict(
389
+ color=bar_colors,
390
+ opacity=0.88,
391
+ line=dict(color=DARK_BG, width=1),
392
+ ),
393
+ text=[f"{v:.1f}%" for v in uncertainty_vals],
394
+ textposition="outside",
395
+ textfont=dict(size=11, color=TEXT_MAIN),
396
+ customdata=[[f"p_misinfo={m:.1f}%"] for m in misinfo_pcts],
397
+ hovertemplate=(
398
+ "<b>%{x}</b><br>"
399
+ "Uncertainty (H): %{y:.2f}%<br>"
400
+ "%{customdata[0]}<br>"
401
+ "<i>H = –Σ p·log₂(p), normalised to %</i>"
402
+ "<extra></extra>"
403
+ ),
404
+ ))
405
+
406
+ # Max-entropy reference
407
+ fig.add_hline(
408
+ y=100,
409
+ line=dict(color=RED, width=1, dash="dot"),
410
+ annotation_text="Max Entropy (no signal)",
411
+ annotation_position="right",
412
+ annotation_font=dict(size=9, color=RED),
413
+ )
414
+ fig.add_hline(
415
+ y=50,
416
+ line=dict(color=AMBER, width=1, dash="dot"),
417
+ annotation_text="Mid Uncertainty",
418
+ annotation_position="right",
419
+ annotation_font=dict(size=9, color=AMBER),
420
+ )
421
+
422
+ fig.update_layout(
423
+ **PLOTLY_LAYOUT,
424
+ title=dict(
425
+ text="Uncertainty Analysis (Shannon Entropy)",
426
+ font=dict(size=13, color=TEXT_DIM),
427
+ x=0,
428
+ ),
429
+ height=280,
430
+ xaxis=dict(
431
+ title="Modality",
432
+ tickfont=dict(size=12),
433
+ gridcolor=BORDER,
434
+ ),
435
+ yaxis=dict(
436
+ title="Uncertainty (%)",
437
+ range=[0, 120],
438
+ gridcolor=BORDER,
439
+ ticksuffix="%",
440
+ ),
441
+ bargap=0.38,
442
+ )
443
+ return fig
444
+
445
+
446
+ # Comment Sentiment Timeline
447
+
448
+ def sentiment_timeline(comments_df: pd.DataFrame, sentiments: List[Dict]) -> go.Figure:
449
+ """Scatter: comment likes vs. sentiment compound score."""
450
+ if comments_df.empty:
451
+ return _empty_fig("Comment Sentiment Distribution")
452
+
453
+ df = comments_df.copy()
454
+ df["compound"] = [s.get("compound", 0) for s in sentiments]
455
+ df["label"] = [s.get("label", "NEUTRAL") for s in sentiments]
456
+ df["color"] = df["label"].map({"POSITIVE": GREEN, "NEGATIVE": RED, "NEUTRAL": AMBER})
457
+ df["text_short"] = df["text"].str[:80] + "…"
458
+
459
+ fig = go.Figure()
460
+ for lbl, clr in [("POSITIVE", GREEN), ("NEGATIVE", RED), ("NEUTRAL", AMBER)]:
461
+ sub = df[df["label"] == lbl]
462
+ if sub.empty:
463
+ continue
464
+ fig.add_trace(go.Scatter(
465
+ x=sub.index,
466
+ y=sub["compound"],
467
+ mode="markers",
468
+ name=lbl,
469
+ marker=dict(
470
+ size=np.clip(np.log1p(sub["likes"].fillna(0)) * 4 + 4, 4, 20),
471
+ color=clr,
472
+ opacity=0.75,
473
+ line=dict(width=0),
474
  ),
475
+ text=sub["text_short"],
476
+ hovertemplate="<b>%{text}</b><br>Sentiment: %{y:.2f}<br>Likes: %{marker.size}<extra></extra>",
477
+ ))
478
+
479
+ fig.add_hline(y=0, line=dict(color=BORDER, width=1, dash="dot"))
480
+ fig.update_layout(
481
+ **PLOTLY_LAYOUT,
482
+ title=dict(text="Comment Sentiment (size = likes)", font=dict(size=13, color=TEXT_DIM), x=0),
483
+ height=320,
484
+ xaxis=dict(title="Comment index", gridcolor=BORDER, showgrid=False),
485
+ yaxis=dict(title="Compound score", gridcolor=BORDER, range=[-1.1, 1.1]),
486
+ legend=dict(orientation="h", y=1.12, font=dict(size=11)),
487
+ )
488
+ return fig
489
+
490
+
491
+ # Positive vs Negative Keyword Comparison ─
492
+
493
+ def keyword_comparison(
494
+ pos_kw: List[Tuple[str, float]],
495
+ neg_kw: List[Tuple[str, float]],
496
+ ) -> go.Figure:
497
+ """Diverging bar chart: positive keywords right, negative left."""
498
+ if not pos_kw and not neg_kw:
499
+ return _empty_fig("Sentiment Keywords")
500
+
501
+ top = 10
502
+ pos_kw = pos_kw[:top]
503
+ neg_kw = neg_kw[:top]
504
+
505
+ fig = go.Figure()
506
+
507
+ if pos_kw:
508
+ pw, pv = zip(*pos_kw)
509
+ max_p = max(pv) or 1
510
+ fig.add_trace(go.Bar(
511
+ name="Positive",
512
+ y=list(pw),
513
+ x=[v/max_p*100 for v in pv],
514
+ orientation="h",
515
+ marker_color=GREEN,
516
+ hovertemplate="<b>%{y}</b><br>Score: %{x:.1f}<extra></extra>",
517
+ ))
518
+
519
+ if neg_kw:
520
+ nw, nv = zip(*neg_kw)
521
+ max_n = max(nv) or 1
522
+ fig.add_trace(go.Bar(
523
+ name="Negative",
524
+ y=list(nw),
525
+ x=[-v/max_n*100 for v in nv],
526
+ orientation="h",
527
+ marker_color=RED,
528
+ hovertemplate="<b>%{y}</b><br>Score: %{x:.1f}<extra></extra>",
529
+ ))
530
+
531
+ fig.update_layout(
532
+ **PLOTLY_LAYOUT,
533
+ title=dict(text="Sentiment-Weighted Keywords", font=dict(size=13, color=TEXT_DIM), x=0),
534
+ height=360,
535
+ barmode="overlay",
536
+ xaxis=dict(title="← Negative | Positive →", gridcolor=BORDER, zeroline=True,
537
+ zerolinecolor=BORDER, zerolinewidth=2),
538
+ yaxis=dict(tickfont=dict(size=10)),
539
+ legend=dict(orientation="h", y=1.1),
540
+ )
541
+ return fig
542
+
543
+
544
+ # Helpers
545
+
546
+ def _empty_fig(title: str) -> go.Figure:
547
+ fig = go.Figure()
548
+ fig.add_annotation(text="No data available", x=0.5, y=0.5, showarrow=False,
549
+ font=dict(size=14, color=TEXT_DIM))
550
+ fig.update_layout(**PLOTLY_LAYOUT, title=dict(text=title, x=0), height=250)
551
+ return fig