rocky250 commited on
Commit
7bb49de
Β·
verified Β·
1 Parent(s): df99890

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +525 -707
app.py CHANGED
@@ -1,12 +1,12 @@
1
  """
2
  app.py β€” Video Verifier & Sentiment Analyzer
3
- Professional dark-mode Streamlit application.
 
4
  """
5
 
6
  import os
7
- import time
8
- import streamlit as st
9
  import pandas as pd
 
10
 
11
  from fetcher import (
12
  extract_video_id,
@@ -32,773 +32,591 @@ from charts import (
32
  )
33
 
34
  # ══════════════════════════════════════════════════════════════════════════════
35
- # PAGE CONFIG & GLOBAL STYLES
36
  # ══════════════════════════════════════════════════════════════════════════════
37
 
38
- st.set_page_config(
39
- page_title="VideoVerifier β€” MHMisinfo",
40
- page_icon="πŸ”¬",
41
- layout="wide",
42
- initial_sidebar_state="expanded",
43
- )
44
-
45
- st.markdown("""
46
- <style>
47
- /* ── Google Fonts ── */
48
  @import url('https://fonts.googleapis.com/css2?family=DM+Mono:wght@400;500&family=Syne:wght@400;600;700;800&family=IBM+Plex+Sans:wght@300;400;500&display=swap');
49
 
50
- /* ── Root palette ── */
51
  :root {
52
- --bg: #0d0f14;
53
- --card: #13161e;
54
- --border: #1e2330;
55
- --text: #e8eaf0;
56
- --dim: #5a6070;
57
- --cyan: #00d4ff;
58
- --green: #00e5a0;
59
- --red: #ff4757;
60
- --amber: #ffb347;
61
- --purple: #b388ff;
62
- --blue: #4a8eff;
63
- }
64
-
65
- /* ── App shell ── */
66
- html, body, [class*="css"] {
67
- background-color: var(--bg) !important;
68
- color: var(--text) !important;
69
- font-family: 'IBM Plex Sans', sans-serif !important;
70
- }
71
-
72
- .stApp { background: var(--bg) !important; }
73
-
74
- /* ── Hide Streamlit chrome ── */
75
- #MainMenu, footer, header { visibility: hidden; }
76
- .block-container { padding: 1.5rem 2rem !important; max-width: 1400px; }
77
-
78
- /* ── Sidebar ── */
79
- section[data-testid="stSidebar"] {
80
- background: var(--card) !important;
81
- border-right: 1px solid var(--border) !important;
82
- }
83
- section[data-testid="stSidebar"] * { color: var(--text) !important; }
84
-
85
- /* ── Inputs ── */
86
- input, textarea, select, .stTextInput input {
87
- background: #1a1d27 !important;
88
- border: 1px solid var(--border) !important;
89
- color: var(--text) !important;
90
- border-radius: 8px !important;
91
- font-family: 'DM Mono', monospace !important;
92
- font-size: 0.88rem !important;
93
- }
94
- input:focus, textarea:focus {
95
- border-color: var(--cyan) !important;
96
- box-shadow: 0 0 0 2px rgba(0,212,255,0.15) !important;
97
- }
98
-
99
- /* ── Buttons ── */
100
- .stButton > button {
101
- background: linear-gradient(135deg, #00d4ff22, #4a8eff22) !important;
102
- border: 1px solid var(--cyan) !important;
103
- color: var(--cyan) !important;
104
- border-radius: 8px !important;
105
- font-family: 'DM Mono', monospace !important;
106
- font-size: 0.85rem !important;
107
- letter-spacing: 0.05em !important;
108
- padding: 0.45rem 1.2rem !important;
109
- transition: all 0.2s ease !important;
110
- }
111
- .stButton > button:hover {
112
- background: linear-gradient(135deg, #00d4ff44, #4a8eff33) !important;
113
- box-shadow: 0 0 16px rgba(0,212,255,0.25) !important;
114
- transform: translateY(-1px) !important;
115
- }
116
- .stButton > button[kind="primary"] {
117
- background: linear-gradient(135deg, var(--cyan), var(--blue)) !important;
118
- border: none !important;
119
- color: var(--bg) !important;
120
- font-weight: 600 !important;
121
- }
122
-
123
- /* ── Cards ── */
124
- .vv-card {
125
- background: var(--card);
126
- border: 1px solid var(--border);
127
- border-radius: 12px;
128
- padding: 1.2rem 1.4rem;
129
- margin-bottom: 1rem;
130
- }
131
- .vv-card-accent {
132
- background: var(--card);
133
- border-top: 2px solid var(--cyan);
134
- border-left: 1px solid var(--border);
135
- border-right: 1px solid var(--border);
136
- border-bottom: 1px solid var(--border);
137
- border-radius: 0 0 12px 12px;
138
- padding: 1.2rem 1.4rem;
139
- margin-bottom: 1rem;
140
- }
141
-
142
- /* ── Section headers ── */
143
- .vv-section-title {
144
- font-family: 'Syne', sans-serif;
145
- font-size: 0.7rem;
146
- font-weight: 700;
147
- letter-spacing: 0.18em;
148
- text-transform: uppercase;
149
- color: var(--dim);
150
- margin-bottom: 0.6rem;
151
- }
152
-
153
- /* ── Hero title ── */
154
- .vv-hero {
155
- font-family: 'Syne', sans-serif;
156
- font-size: 1.6rem;
157
- font-weight: 800;
158
- background: linear-gradient(135deg, var(--cyan), var(--blue));
159
- -webkit-background-clip: text;
160
- -webkit-text-fill-color: transparent;
161
- background-clip: text;
162
- letter-spacing: -0.02em;
163
- line-height: 1.2;
164
- margin: 0 0 0.2rem;
165
- }
166
-
167
- /* ── Stat chips ── */
168
- .vv-stat {
169
- display: inline-block;
170
- background: #1a1d27;
171
- border: 1px solid var(--border);
172
- border-radius: 6px;
173
- padding: 0.25rem 0.7rem;
174
- font-family: 'DM Mono', monospace;
175
- font-size: 0.78rem;
176
- color: var(--cyan);
177
- margin: 0.15rem 0.2rem 0.15rem 0;
178
- }
179
-
180
- /* ── Badge ── */
181
- .vv-badge-green {
182
- display: inline-block;
183
- background: rgba(0,229,160,0.12);
184
- border: 1px solid var(--green);
185
- color: var(--green);
186
- border-radius: 20px;
187
- padding: 0.2rem 0.8rem;
188
- font-size: 0.78rem;
189
- font-family: 'DM Mono', monospace;
190
- }
191
- .vv-badge-red {
192
- display: inline-block;
193
- background: rgba(255,71,87,0.12);
194
- border: 1px solid var(--red);
195
- color: var(--red);
196
- border-radius: 20px;
197
- padding: 0.2rem 0.8rem;
198
- font-size: 0.78rem;
199
- font-family: 'DM Mono', monospace;
200
- }
201
- .vv-badge-amber {
202
- display: inline-block;
203
- background: rgba(255,179,71,0.12);
204
- border: 1px solid var(--amber);
205
- color: var(--amber);
206
- border-radius: 20px;
207
- padding: 0.2rem 0.8rem;
208
- font-size: 0.78rem;
209
- font-family: 'DM Mono', monospace;
210
- }
211
-
212
- /* ── Reasoning box ── */
213
- .vv-reasoning {
214
- background: #0d1119;
215
- border-left: 3px solid var(--amber);
216
- padding: 0.7rem 1rem;
217
- border-radius: 0 8px 8px 0;
218
- font-size: 0.83rem;
219
- color: #c0c4cc;
220
- line-height: 1.6;
221
- font-family: 'IBM Plex Sans', sans-serif;
222
- margin-top: 0.6rem;
223
- }
224
-
225
- /* ── Dataframe ── */
226
- .stDataFrame {
227
- background: var(--card) !important;
228
- border: 1px solid var(--border) !important;
229
- border-radius: 8px !important;
230
- }
231
- .stDataFrame th {
232
- background: #1a1d27 !important;
233
- color: var(--cyan) !important;
234
- font-family: 'DM Mono', monospace !important;
235
- font-size: 0.78rem !important;
236
- }
237
- .stDataFrame td {
238
- color: var(--text) !important;
239
- font-size: 0.8rem !important;
240
- border-color: var(--border) !important;
241
- }
242
-
243
- /* ── Tabs ── */
244
- .stTabs [data-baseweb="tab-list"] {
245
- background: transparent !important;
246
- border-bottom: 1px solid var(--border) !important;
247
- gap: 0 !important;
248
- }
249
- .stTabs [data-baseweb="tab"] {
250
- background: transparent !important;
251
- color: var(--dim) !important;
252
- font-family: 'DM Mono', monospace !important;
253
- font-size: 0.82rem !important;
254
- letter-spacing: 0.05em !important;
255
- border: none !important;
256
- padding: 0.5rem 1.2rem !important;
257
- }
258
- .stTabs [aria-selected="true"] {
259
- color: var(--cyan) !important;
260
- border-bottom: 2px solid var(--cyan) !important;
261
- }
262
-
263
- /* ── Spinner ── */
264
- .stSpinner > div { border-top-color: var(--cyan) !important; }
265
-
266
- /* ── Alerts ── */
267
- .stAlert { border-radius: 8px !important; font-size: 0.85rem !important; }
268
-
269
- /* ── Divider ── */
270
- hr { border-color: var(--border) !important; }
271
-
272
- /* ── Select box ── */
273
- .stSelectbox > div > div {
274
- background: #1a1d27 !important;
275
- border-color: var(--border) !important;
276
- color: var(--text) !important;
277
- }
278
-
279
- /* ── File uploader ── */
280
- .stFileUploader {
281
- background: #1a1d27 !important;
282
- border: 1px dashed var(--border) !important;
283
- border-radius: 8px !important;
284
  }
285
 
286
- /* ── Progress bar ── */
287
- .stProgress > div > div > div {
288
- background: linear-gradient(90deg, var(--cyan), var(--blue)) !important;
289
- }
290
-
291
- /* ── Number input ── */
292
- .stNumberInput input {
293
- background: #1a1d27 !important;
294
- border-color: var(--border) !important;
295
- }
296
-
297
- /* ── Expander ── */
298
- .streamlit-expanderHeader {
299
- background: var(--card) !important;
300
- border-color: var(--border) !important;
301
- color: var(--text) !important;
302
- font-family: 'DM Mono', monospace !important;
303
- font-size: 0.85rem !important;
304
- }
305
- </style>
306
- """, unsafe_allow_html=True)
307
-
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
308
 
309
  # ══════════════════════════════════════════════════════════════════════════════
310
- # SESSION STATE HELPERS
311
  # ══════════════════════════════════════════════════════════════════════════════
312
 
313
- def init_state():
314
- defaults = {
315
- "metadata": None,
316
- "transcript": "",
317
- "comments_df": pd.DataFrame(),
318
- "sentiments": [],
319
- "sent_summary": {},
320
- "misinfo": None,
321
- "keywords": [],
322
- "pos_kw": [],
323
- "neg_kw": [],
324
- "video_id": None,
325
- "analysed": False,
326
- "status_log": [],
327
- }
328
- for k, v in defaults.items():
329
- if k not in st.session_state:
330
- st.session_state[k] = v
331
-
332
- init_state()
 
 
 
 
333
 
334
 
335
  # ══════════════════════════════════════════════════════════════════════════════
336
- # SIDEBAR
337
  # ══════════════════════════════════════════════════════════════════════════════
338
 
339
- with st.sidebar:
340
- st.markdown('<p class="vv-hero" style="font-size:1.1rem">πŸ”¬ VideoVerifier</p>', unsafe_allow_html=True)
341
- st.markdown('<p style="color:#5a6070;font-size:0.78rem;font-family:\'DM Mono\',monospace;margin-top:-8px">Mental Health Misinfo Detector</p>', unsafe_allow_html=True)
342
- st.markdown("---")
 
 
 
 
 
 
 
 
 
 
 
 
 
343
 
344
- st.markdown('<p class="vv-section-title">βš™οΈ Configuration</p>', unsafe_allow_html=True)
 
 
 
345
 
346
- api_key = st.text_input(
347
- "YouTube API v3 Key",
348
- value=os.environ.get("YT_API_KEY", ""),
349
- type="password",
350
- placeholder="AIza...",
351
- help="Get a free key at console.cloud.google.com",
352
- )
353
 
354
- sentiment_method = st.selectbox(
355
- "Sentiment Engine",
356
- ["vader", "hf"],
357
- format_func=lambda x: "VADER (fast, CPU)" if x == "vader" else "DistilBERT (accurate, ~500MB)",
358
- help="VADER is ~100Γ— faster and works offline. DistilBERT downloads ~500MB on first run.",
359
- )
360
 
361
- max_comments = st.number_input(
362
- "Max comments to fetch",
363
- min_value=10, max_value=500, value=150, step=10,
364
- help="YouTube API quota: ~1 unit per comment request",
 
 
 
 
 
 
 
 
 
 
 
365
  )
366
 
367
- st.markdown("---")
368
- st.markdown('<p class="vv-section-title">πŸ“‹ About</p>', unsafe_allow_html=True)
369
- st.markdown(
370
- '<p style="font-size:0.78rem;color:#5a6070;line-height:1.6">'
371
- '4-stream SeTa-Attention model for mental health misinformation detection. '
372
- 'Plug your <code style="background:#1a1d27;padding:1px 4px;border-radius:3px;color:#00d4ff">detect_misinformation()</code> '
373
- 'function in <b>analyzer.py</b> to connect your trained checkpoint.'
374
- '</p>',
375
- unsafe_allow_html=True,
376
  )
377
 
378
- if st.session_state.status_log:
379
- st.markdown("---")
380
- st.markdown('<p class="vv-section-title">πŸ“œ Log</p>', unsafe_allow_html=True)
381
- for msg in st.session_state.status_log[-6:]:
382
- st.markdown(f'<p style="font-size:0.72rem;color:#5a6070;font-family:\'DM Mono\',monospace;margin:2px 0">{msg}</p>', unsafe_allow_html=True)
383
-
384
 
385
- # ══════════════════════════════════════════════════════════════════════════════
386
- # HEADER
387
- # ══════════════════════════════════════════════════════════════════════════════
388
-
389
- st.markdown(
390
- '<h1 class="vv-hero" style="font-size:2rem">Video Verifier & Sentiment Analyzer</h1>'
391
- '<p style="color:#5a6070;font-size:0.85rem;margin-top:-4px;font-family:\'DM Mono\',monospace">'
392
- 'Detect mental health misinformation Β· Analyze public sentiment Β· Understand video content at a glance'
393
- '</p>',
394
- unsafe_allow_html=True,
395
- )
396
- st.markdown("---")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
397
 
398
 
399
  # ══════════════════════════════════════════════════════════════════════════════
400
- # INPUT SECTION
401
  # ═════════════════════════════════════════════════════���════════════════════════
402
 
403
- input_tab1, input_tab2 = st.tabs(["πŸ”— YouTube URL", "πŸ“ Upload Video File"])
404
-
405
- video_id_to_analyze = None
 
 
 
 
 
 
406
 
407
- with input_tab1:
408
- col_url, col_btn = st.columns([5, 1])
409
- with col_url:
410
- yt_url = st.text_input(
411
- "YouTube URL",
412
- placeholder="https://www.youtube.com/watch?v=... or youtu.be/...",
413
- label_visibility="collapsed",
414
- )
415
- with col_btn:
416
- analyze_url_btn = st.button("πŸ” Analyze", type="primary", use_container_width=True)
417
 
418
- if analyze_url_btn and yt_url:
419
- vid = extract_video_id(yt_url)
420
- if vid:
421
- video_id_to_analyze = vid
422
- else:
423
- st.error("❌ Could not extract a valid YouTube video ID. Check the URL format.")
424
-
425
- with input_tab2:
426
- st.markdown(
427
- '<div class="vv-card">'
428
- '<p class="vv-section-title">Upload a video file</p>'
429
- '<p style="font-size:0.82rem;color:#5a6070;line-height:1.6">'
430
- '⚠️ <b>Important:</b> The YouTube Data API cannot search by raw video bytes. '
431
- 'After uploading, enter the video title or a keyword to find the matching YouTube entry. '
432
- 'For local-only analysis, the system will run misinformation detection on the filename metadata.'
433
- '</p></div>',
434
- unsafe_allow_html=True,
435
- )
436
- uploaded = st.file_uploader(
437
- "Drop a video file",
438
- type=["mp4", "mov", "avi", "mkv", "webm"],
439
- label_visibility="collapsed",
440
  )
441
- if uploaded:
442
- col_kw, col_search = st.columns([4, 1])
443
- with col_kw:
444
- kw = st.text_input(
445
- "Video title / keyword to search on YouTube",
446
- placeholder=f"e.g. {uploaded.name.replace('.mp4','').replace('_',' ')}",
447
- )
448
- with col_search:
449
- search_btn = st.button("πŸ”Ž Find on YT", use_container_width=True)
450
-
451
- if search_btn and kw and api_key:
452
- with st.spinner("Searching YouTube…"):
453
- results = search_videos_by_title(kw, api_key, max_results=5)
454
- if results:
455
- st.markdown('<p class="vv-section-title">Select the matching video</p>', unsafe_allow_html=True)
456
- for r in results:
457
- c1, c2, c3 = st.columns([1, 4, 1])
458
- with c1:
459
- if r["thumbnail_url"]:
460
- st.image(r["thumbnail_url"], width=80)
461
- with c2:
462
- st.markdown(
463
- f'<p style="margin:0;font-size:0.85rem;font-weight:500">{r["title"]}</p>'
464
- f'<p style="margin:0;font-size:0.75rem;color:#5a6070">{r["channel_title"]} Β· {r["published_at"]}</p>',
465
- unsafe_allow_html=True,
466
- )
467
- with c3:
468
- if st.button("Select", key=f"sel_{r['video_id']}"):
469
- video_id_to_analyze = r["video_id"]
470
- else:
471
- st.warning("No results found. Try a different keyword or check your API key.")
472
- elif search_btn and not api_key:
473
- st.error("Please enter your YouTube API key in the sidebar first.")
474
-
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
475
 
476
- # ══════════════════════════════════════════════════════════════════════════════
477
- # DATA FETCHING & ANALYSIS PIPELINE
478
- # ══════════════════════════════════════════════════════════════════════════════
 
 
479
 
480
- def run_full_pipeline(video_id: str):
481
- log = []
 
 
 
 
 
482
 
483
- # 1. Metadata
484
- with st.spinner("Fetching video metadata…"):
485
- meta, err = fetch_video_metadata(video_id, api_key)
486
- if err:
487
- st.error(f"❌ {err}")
488
- return
489
- log.append(f"βœ… Metadata: {meta['title'][:50]}")
490
- st.session_state.metadata = meta
491
-
492
- # 2. Transcript
493
- with st.spinner("Fetching transcript…"):
494
- transcript, t_status = fetch_transcript(video_id)
495
- log.append(t_status)
496
- st.session_state.transcript = transcript
497
-
498
- # 3. Comments
499
- with st.spinner(f"Fetching up to {max_comments} comments…"):
500
- comments_df, c_status = fetch_comments(video_id, api_key, max_comments=int(max_comments))
501
- log.append(c_status)
502
- st.session_state.comments_df = comments_df
503
-
504
- # 4. Misinformation
505
- with st.spinner("Running misinformation detection…"):
506
- misinfo = detect_misinformation(
507
- text=f"{meta['title']} {meta['description']}",
508
- tags=meta["tags"],
509
- audio_transcript=transcript,
510
- video_transcript=transcript,
511
- )
512
- log.append(f"πŸ”¬ Misinfo score: {misinfo['confidence_pct']}%")
513
- st.session_state.misinfo = misinfo
514
 
515
- # 5. Keywords
516
- kw = extract_keywords(f"{meta['title']} {meta['description']} {transcript}", meta["tags"])
517
- st.session_state.keywords = kw
518
 
519
- # 6. Sentiment
520
  if not comments_df.empty:
521
- texts = comments_df["text"].fillna("").tolist()
522
- with st.spinner(f"Analyzing sentiment of {len(texts)} comments ({sentiment_method.upper()})…"):
523
- progress = st.progress(0, text="Sentiment analysis…")
524
- batch_size = 64
525
- results = []
526
- for i in range(0, len(texts), batch_size):
527
- chunk = texts[i: i + batch_size]
528
- results += analyze_sentiment_batch(chunk, method=sentiment_method, batch_size=batch_size)
529
- progress.progress(min((i + batch_size) / len(texts), 1.0),
530
- text=f"Analyzed {min(i+batch_size, len(texts))}/{len(texts)} comments…")
531
- progress.empty()
532
- st.session_state.sentiments = results
533
- st.session_state.sent_summary = sentiment_summary(results)
534
- pos_kw, neg_kw = sentiment_weighted_keywords(comments_df, results)
535
- st.session_state.pos_kw = pos_kw
536
- st.session_state.neg_kw = neg_kw
537
- log.append(f"πŸ’¬ Sentiment: {st.session_state.sent_summary['pos_pct']}% pos / {st.session_state.sent_summary['neg_pct']}% neg")
538
- else:
539
- st.session_state.sentiments = []
540
- st.session_state.sent_summary = {}
541
- log.append("πŸ’¬ Skipped (no comments)")
542
-
543
- st.session_state.video_id = video_id
544
- st.session_state.analysed = True
545
- st.session_state.status_log = log
546
- st.rerun()
547
 
 
 
548
 
549
- if video_id_to_analyze and api_key:
550
- run_full_pipeline(video_id_to_analyze)
551
- elif video_id_to_analyze and not api_key:
552
- st.error("⚠️ Please enter your YouTube API key in the sidebar before analyzing.")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
553
 
554
 
555
  # ══════════════════════════════════════════════════════════════════════════════
556
- # RESULTS DASHBOARD
557
  # ══════════════════════════════════════════════════════════════════════════════
558
 
559
- if not st.session_state.analysed:
560
- # Landing state
561
- st.markdown(
562
- '<div style="text-align:center;padding:4rem 2rem">'
563
- '<p style="font-size:3rem">πŸ”¬</p>'
564
- '<p style="font-family:\'Syne\',sans-serif;font-size:1.1rem;color:#5a6070">'
565
- 'Paste a YouTube URL above and click <b style="color:#00d4ff">Analyze</b> to begin</p>'
566
- '<p style="font-size:0.8rem;color:#3a3f50;font-family:\'DM Mono\',monospace">'
567
- 'Misinformation detection Β· Sentiment analysis Β· Comment insights</p>'
568
- '</div>',
569
- unsafe_allow_html=True,
570
- )
571
- st.stop()
572
-
573
- meta = st.session_state.metadata
574
- transcript = st.session_state.transcript
575
- comments_df= st.session_state.comments_df
576
- misinfo = st.session_state.misinfo
577
- keywords = st.session_state.keywords
578
- sentiments = st.session_state.sentiments
579
- sent_sum = st.session_state.sent_summary
580
- pos_kw = st.session_state.pos_kw
581
- neg_kw = st.session_state.neg_kw
582
- video_id = st.session_state.video_id
583
-
584
-
585
- # ── Layout: left (info) / right (analytics) ───────────────────────────────────
586
-
587
- left_col, right_col = st.columns([2, 3], gap="large")
588
-
589
- # ╔══════════════════════════════╗
590
- # β•‘ LEFT COLUMN β€” Video Info β•‘
591
- # β•šβ•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•
592
- with left_col:
593
-
594
- # Thumbnail + embed
595
- if meta.get("thumbnail_url"):
596
- st.image(meta["thumbnail_url"], use_column_width=True)
597
-
598
- st.markdown(
599
- f'<a href="https://www.youtube.com/watch?v={video_id}" target="_blank" '
600
- f'style="display:block;text-align:center;font-family:\'DM Mono\',monospace;'
601
- f'font-size:0.78rem;color:#5a6070;text-decoration:none;margin:4px 0 12px">β–Ά Open on YouTube</a>',
602
- unsafe_allow_html=True,
603
- )
604
-
605
- # Title & channel
606
- st.markdown(
607
- f'<div class="vv-card">'
608
- f'<p class="vv-section-title">Video</p>'
609
- f'<p style="font-family:\'Syne\',sans-serif;font-size:1.05rem;font-weight:700;margin:0 0 4px">{meta["title"]}</p>'
610
- f'<p style="font-size:0.82rem;color:#5a6070;margin:0">by <b style="color:#b0b4c0">{meta["channel_title"]}</b> Β· {meta["published_at"]}</p>'
611
- f'</div>',
612
- unsafe_allow_html=True,
613
- )
614
-
615
- # Stats
616
- st.markdown('<p class="vv-section-title">Metrics</p>', unsafe_allow_html=True)
617
- s1, s2 = st.columns(2)
618
- with s1:
619
- st.markdown(f'<span class="vv-stat">πŸ‘ {meta["view_count"]:,}</span>', unsafe_allow_html=True)
620
- st.markdown(f'<span class="vv-stat">πŸ‘ {meta["like_count"]:,}</span>', unsafe_allow_html=True)
621
- with s2:
622
- st.markdown(f'<span class="vv-stat">πŸ’¬ {meta["comment_count"]:,}</span>', unsafe_allow_html=True)
623
- st.markdown(f'<span class="vv-stat">⏱ {meta["duration"]}</span>', unsafe_allow_html=True)
624
-
625
- # Tags
626
- if meta.get("tags"):
627
- st.markdown('<p class="vv-section-title" style="margin-top:1rem">Tags</p>', unsafe_allow_html=True)
628
- tag_html = "".join(
629
- f'<span style="display:inline-block;background:#1a1d27;border:1px solid #1e2330;border-radius:4px;'
630
- f'padding:2px 8px;font-family:\'DM Mono\',monospace;font-size:0.7rem;color:#8090a0;margin:2px">'
631
- f'#{t}</span>'
632
- for t in meta["tags"][:20]
633
  )
634
- st.markdown(tag_html, unsafe_allow_html=True)
635
-
636
- # Description (collapsed)
637
- if meta.get("description"):
638
- with st.expander("πŸ“„ Description", expanded=False):
639
- st.markdown(
640
- f'<p style="font-size:0.8rem;color:#8090a0;line-height:1.65;white-space:pre-wrap">'
641
- f'{meta["description"][:1200]}{"…" if len(meta["description"])>1200 else ""}</p>',
642
- unsafe_allow_html=True,
643
- )
644
-
645
- # Transcript (collapsed)
646
- with st.expander(f"πŸ“ Transcript ({len(transcript.split()) if transcript else 0} words)", expanded=False):
647
- if transcript:
648
- st.markdown(
649
- f'<p style="font-size:0.78rem;color:#8090a0;line-height:1.65">'
650
- f'{transcript[:2500]}{"…" if len(transcript)>2500 else ""}</p>',
651
- unsafe_allow_html=True,
652
- )
653
- else:
654
- st.info("No transcript available for this video.")
655
-
656
-
657
- # ╔══════════════════════════════╗
658
- # β•‘ RIGHT COLUMN β€” Analytics β•‘
659
- # β•šβ•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•
660
- with right_col:
661
-
662
- # ── Misinfo verdict ──────────────────────────────────────────────────────
663
- st.markdown('<p class="vv-section-title">πŸ”¬ Misinformation Analysis</p>', unsafe_allow_html=True)
664
-
665
- score = misinfo["score"]
666
- if score < 0.35:
667
- badge = '<span class="vv-badge-green">βœ… Appears Credible</span>'
668
- elif score < 0.65:
669
- badge = '<span class="vv-badge-amber">⚠️ Uncertain / Mixed Signals</span>'
670
- else:
671
- badge = '<span class="vv-badge-red">🚨 Likely Misinformation</span>'
672
-
673
- st.markdown(badge, unsafe_allow_html=True)
674
-
675
- ga_col, detail_col = st.columns([1, 1])
676
- with ga_col:
677
- st.plotly_chart(
678
- misinfo_gauge(score, "Misinfo Confidence"),
679
- use_container_width=True,
680
- config={"displayModeBar": False},
681
  )
682
- with detail_col:
683
- st.plotly_chart(
684
- stream_trust_bars(misinfo["stream_details"]),
685
- use_container_width=True,
686
- config={"displayModeBar": False},
 
 
 
 
 
 
 
 
687
  )
 
688
 
689
- st.markdown(
690
- f'<div class="vv-reasoning">🧠 <b>Reasoning:</b> {misinfo["reasoning"]}</div>',
691
- unsafe_allow_html=True,
692
- )
693
 
694
- st.markdown("---")
 
 
 
 
 
695
 
696
- # ── Sentiment analytics ──────────────────────────────────────────────────
697
- st.markdown('<p class="vv-section-title">πŸ’¬ Comment Sentiment</p>', unsafe_allow_html=True)
698
 
699
- if sent_sum:
700
- s_col1, s_col2, s_col3 = st.columns(3)
701
- with s_col1:
702
- st.markdown(
703
- f'<div class="vv-card" style="text-align:center">'
704
- f'<p style="color:#00e5a0;font-family:\'DM Mono\',monospace;font-size:1.6rem;font-weight:700;margin:0">{sent_sum["pos_pct"]}%</p>'
705
- f'<p style="color:#5a6070;font-size:0.75rem;margin:0">Positive</p></div>',
706
- unsafe_allow_html=True,
707
- )
708
- with s_col2:
709
- st.markdown(
710
- f'<div class="vv-card" style="text-align:center">'
711
- f'<p style="color:#ff4757;font-family:\'DM Mono\',monospace;font-size:1.6rem;font-weight:700;margin:0">{sent_sum["neg_pct"]}%</p>'
712
- f'<p style="color:#5a6070;font-size:0.75rem;margin:0">Negative</p></div>',
713
- unsafe_allow_html=True,
714
- )
715
- with s_col3:
716
- st.markdown(
717
- f'<div class="vv-card" style="text-align:center">'
718
- f'<p style="color:#5a6070;font-family:\'DM Mono\',monospace;font-size:1.6rem;font-weight:700;margin:0">{sent_sum["neu_pct"]}%</p>'
719
- f'<p style="color:#5a6070;font-size:0.75rem;margin:0">Neutral</p></div>',
720
- unsafe_allow_html=True,
721
- )
722
 
723
- d_col, t_col = st.columns([1, 1])
724
- with d_col:
725
- st.plotly_chart(
726
- sentiment_donut(sent_sum),
727
- use_container_width=True,
728
- config={"displayModeBar": False},
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
729
  )
730
- with t_col:
731
- st.plotly_chart(
732
- sentiment_timeline(comments_df, sentiments),
733
- use_container_width=True,
734
- config={"displayModeBar": False},
 
 
 
735
  )
736
-
737
- # Keyword charts
738
- kw_col1, kw_col2 = st.columns(2)
739
- with kw_col1:
740
- st.plotly_chart(
741
- keyword_bar(keywords, title="Top Video Keywords", color="#00d4ff"),
742
- use_container_width=True,
743
- config={"displayModeBar": False},
744
- )
745
- with kw_col2:
746
- st.plotly_chart(
747
- keyword_comparison(pos_kw, neg_kw),
748
- use_container_width=True,
749
- config={"displayModeBar": False},
750
  )
751
 
752
- else:
753
- st.info("⚠️ No comment sentiment data β€” comments may be disabled or unavailable.")
754
- if keywords:
755
- st.plotly_chart(
756
- keyword_bar(keywords, title="Top Video Keywords", color="#00d4ff"),
757
- use_container_width=True,
758
- config={"displayModeBar": False},
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
759
  )
760
 
761
- # ── Comments table ───────────────────────────────────────────────────────
762
- st.markdown("---")
763
- st.markdown('<p class="vv-section-title">πŸ“Š Comments Deep-Dive</p>', unsafe_allow_html=True)
764
-
765
- if not comments_df.empty:
766
- display_df = comments_df.copy()
767
- if sentiments:
768
- display_df["sentiment"] = [s["label"] for s in sentiments]
769
- display_df["compound"] = [round(s.get("compound", 0), 3) for s in sentiments]
770
-
771
- tab_all, tab_pos, tab_neg, tab_top = st.tabs([
772
- f"All ({len(display_df)})",
773
- f"Positive ({sent_sum.get('POSITIVE',0)})",
774
- f"Negative ({sent_sum.get('NEGATIVE',0)})",
775
- "Most Liked",
776
- ])
777
 
778
- show_cols = ["author", "text", "likes", "published_at"]
779
- if "sentiment" in display_df.columns:
780
- show_cols += ["sentiment", "compound"]
781
 
782
- with tab_all:
783
- st.dataframe(display_df[show_cols].head(100), use_container_width=True, height=320)
 
 
 
 
784
 
785
- with tab_pos:
786
- pos_df = display_df[display_df.get("sentiment", pd.Series()) == "POSITIVE"] if "sentiment" in display_df else pd.DataFrame()
787
- if not pos_df.empty:
788
- st.dataframe(pos_df[show_cols].head(50), use_container_width=True, height=320)
789
- else:
790
- st.info("No positive comments in this dataset.")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
791
 
792
- with tab_neg:
793
- neg_df = display_df[display_df.get("sentiment", pd.Series()) == "NEGATIVE"] if "sentiment" in display_df else pd.DataFrame()
794
- if not neg_df.empty:
795
- st.dataframe(neg_df[show_cols].head(50), use_container_width=True, height=320)
796
- else:
797
- st.info("No negative comments in this dataset.")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
798
 
799
- with tab_top:
800
- top_df = display_df.sort_values("likes", ascending=False).head(20)
801
- st.dataframe(top_df[show_cols], use_container_width=True, height=320)
 
 
802
 
803
- else:
804
- st.info("No comments available for this video.")
 
 
1
  """
2
  app.py β€” Video Verifier & Sentiment Analyzer
3
+ Gradio dark-mode application.
4
+ fetcher.py, analyzer.py, charts.py are UNCHANGED β€” only this file replaces the Streamlit version.
5
  """
6
 
7
  import os
 
 
8
  import pandas as pd
9
+ import gradio as gr
10
 
11
  from fetcher import (
12
  extract_video_id,
 
32
  )
33
 
34
  # ══════════════════════════════════════════════════════════════════════════════
35
+ # CSS β€” same dark palette as the Streamlit version
36
  # ══════════════════════════════════════════════════════════════════════════════
37
 
38
+ CSS = """
 
 
 
 
 
 
 
 
 
39
  @import url('https://fonts.googleapis.com/css2?family=DM+Mono:wght@400;500&family=Syne:wght@400;600;700;800&family=IBM+Plex+Sans:wght@300;400;500&display=swap');
40
 
 
41
  :root {
42
+ --bg: #0d0f14;
43
+ --card: #13161e;
44
+ --border: #1e2330;
45
+ --text: #e8eaf0;
46
+ --dim: #5a6070;
47
+ --cyan: #00d4ff;
48
+ --green: #00e5a0;
49
+ --red: #ff4757;
50
+ --amber: #ffb347;
51
+ --blue: #4a8eff;
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
52
  }
53
 
54
+ body, .gradio-container { background: var(--bg) !important; font-family: 'IBM Plex Sans', sans-serif !important; color: var(--text) !important; max-width: 1400px !important; margin: 0 auto; }
55
+ footer { display: none !important; }
56
+
57
+ /* Panels */
58
+ .gr-group, .gr-box, .gr-panel, div[class*="block"] { background: var(--card) !important; border: 1px solid var(--border) !important; border-radius: 12px !important; }
59
+
60
+ /* Tabs */
61
+ .tab-nav button { background: transparent !important; border: none !important; color: var(--dim) !important; font-family: 'DM Mono', monospace !important; font-size: 0.82rem !important; letter-spacing: 0.05em !important; border-bottom: 2px solid transparent !important; padding: 0.5rem 1.2rem !important; transition: color 0.2s; }
62
+ .tab-nav button.selected { color: var(--cyan) !important; border-bottom-color: var(--cyan) !important; }
63
+ .tab-nav { border-bottom: 1px solid var(--border) !important; }
64
+
65
+ /* Inputs */
66
+ input[type="text"], input[type="password"], textarea { background: #1a1d27 !important; border: 1px solid var(--border) !important; color: var(--text) !important; border-radius: 8px !important; font-family: 'DM Mono', monospace !important; font-size: 0.88rem !important; }
67
+ input:focus, textarea:focus { border-color: var(--cyan) !important; box-shadow: 0 0 0 2px rgba(0,212,255,0.15) !important; outline: none !important; }
68
+ label, .gr-label { color: var(--dim) !important; font-family: 'DM Mono', monospace !important; font-size: 0.75rem !important; letter-spacing: 0.08em !important; text-transform: uppercase; }
69
+
70
+ /* Buttons */
71
+ button.primary, button[variant="primary"] { background: linear-gradient(135deg, var(--cyan), var(--blue)) !important; border: none !important; color: var(--bg) !important; font-weight: 700 !important; font-family: 'DM Mono', monospace !important; border-radius: 8px !important; letter-spacing: 0.06em !important; }
72
+ button.secondary { background: rgba(0,212,255,0.08) !important; border: 1px solid var(--cyan) !important; color: var(--cyan) !important; border-radius: 8px !important; font-family: 'DM Mono', monospace !important; }
73
+ button:hover { opacity: 0.88; transform: translateY(-1px); transition: all 0.15s; }
74
+
75
+ /* Dropdown */
76
+ select { background: #1a1d27 !important; border: 1px solid var(--border) !important; color: var(--text) !important; border-radius: 8px !important; }
77
+
78
+ /* Slider */
79
+ input[type="range"] { accent-color: var(--cyan); }
80
+
81
+ /* Dataframe */
82
+ .gr-dataframe table { background: var(--card) !important; border-collapse: collapse; width: 100%; }
83
+ .gr-dataframe th { background: #1a1d27 !important; color: var(--cyan) !important; font-family: 'DM Mono', monospace !important; font-size: 0.75rem !important; padding: 6px 10px; border-bottom: 1px solid var(--border); }
84
+ .gr-dataframe td { color: var(--text) !important; font-size: 0.78rem !important; padding: 5px 10px; border-bottom: 1px solid var(--border); }
85
+ .gr-dataframe tr:hover td { background: rgba(0,212,255,0.04) !important; }
86
+
87
+ /* Shared HTML helpers */
88
+ .vv-hero { font-family: 'Syne', sans-serif; font-size: 1.6rem; font-weight: 800; background: linear-gradient(135deg, #00d4ff, #4a8eff); -webkit-background-clip: text; -webkit-text-fill-color: transparent; background-clip: text; letter-spacing: -0.02em; line-height: 1.2; }
89
+ .vv-section-title { font-family: 'Syne', sans-serif; font-size: 0.68rem; font-weight: 700; letter-spacing: 0.18em; text-transform: uppercase; color: #5a6070; margin-bottom: 0.5rem; }
90
+ .vv-card { background: #13161e; border: 1px solid #1e2330; border-radius: 12px; padding: 1.2rem 1.4rem; margin-bottom: 0.8rem; }
91
+ .vv-stat { display: inline-block; background: #1a1d27; border: 1px solid #1e2330; border-radius: 6px; padding: 0.25rem 0.75rem; font-family: 'DM Mono', monospace; font-size: 0.78rem; color: #00d4ff; margin: 0.15rem 0.2rem; }
92
+ .vv-badge-green { display: inline-block; background: rgba(0,229,160,0.12); border: 1px solid #00e5a0; color: #00e5a0; border-radius: 20px; padding: 0.3rem 1rem; font-size: 0.82rem; font-family: 'DM Mono', monospace; }
93
+ .vv-badge-red { display: inline-block; background: rgba(255,71,87,0.12); border: 1px solid #ff4757; color: #ff4757; border-radius: 20px; padding: 0.3rem 1rem; font-size: 0.82rem; font-family: 'DM Mono', monospace; }
94
+ .vv-badge-amber { display: inline-block; background: rgba(255,179,71,0.12); border: 1px solid #ffb347; color: #ffb347; border-radius: 20px; padding: 0.3rem 1rem; font-size: 0.82rem; font-family: 'DM Mono', monospace; }
95
+ .vv-reasoning { background: #0d1119; border-left: 3px solid #ffb347; padding: 0.75rem 1rem; border-radius: 0 8px 8px 0; font-size: 0.83rem; color: #c0c4cc; line-height: 1.65; font-family: 'IBM Plex Sans', sans-serif; }
96
+ .vv-tag { display: inline-block; background: #1a1d27; border: 1px solid #1e2330; border-radius: 4px; padding: 2px 8px; font-family: 'DM Mono', monospace; font-size: 0.7rem; color: #8090a0; margin: 2px; }
97
+ .vv-stat-big-green { font-family: 'DM Mono', monospace; font-size: 1.6rem; font-weight: 700; color: #00e5a0; margin: 0; }
98
+ .vv-stat-big-red { font-family: 'DM Mono', monospace; font-size: 1.6rem; font-weight: 700; color: #ff4757; margin: 0; }
99
+ .vv-stat-big-dim { font-family: 'DM Mono', monospace; font-size: 1.6rem; font-weight: 700; color: #5a6070; margin: 0; }
100
+ .vv-log-line { font-size: 0.72rem; color: #5a6070; font-family: 'DM Mono', monospace; margin: 2px 0; }
101
+ """
102
 
103
  # ══════════════════════════════════════════════════════════════════════════════
104
+ # SHARED HELPERS
105
  # ══════════════════════════════════════════════════════════════════════════════
106
 
107
+ def _empty_plotly():
108
+ import plotly.graph_objects as go
109
+ fig = go.Figure()
110
+ fig.update_layout(
111
+ paper_bgcolor="rgba(0,0,0,0)", plot_bgcolor="rgba(0,0,0,0)",
112
+ font=dict(color="#5a6070"), margin=dict(l=10, r=10, t=10, b=10), height=200,
113
+ )
114
+ fig.add_annotation(text="Run analysis to see data", x=0.5, y=0.5,
115
+ showarrow=False, font=dict(size=13, color="#5a6070"))
116
+ return fig
117
+
118
+
119
+ def _blank_outputs(status_msg: str):
120
+ """18-element tuple matching ALL_OUTPUTS when nothing has run yet."""
121
+ ep = _empty_plotly()
122
+ return (
123
+ f'<p style="color:#ff4757;font-family:DM Mono,monospace;padding:8px">{status_msg}</p>', # status
124
+ "<p class='vv-log-line'>β€”</p>", # log
125
+ "<div style='padding:3rem;text-align:center;color:#5a6070'>No data yet.</div>", # left panel
126
+ "", "", # badge, reasoning
127
+ ep, ep, ep, ep, ep, ep, # 6 charts
128
+ "", "", "", # 3 stat boxes
129
+ pd.DataFrame(), pd.DataFrame(), pd.DataFrame(), pd.DataFrame(), # 4 dataframes
130
+ )
131
 
132
 
133
  # ══════════════════════════════════════════════════════════════════════════════
134
+ # PIPELINE (replaces Streamlit's run_full_pipeline + session_state writes)
135
  # ══════════════════════════════════════════════════════════════════════════════
136
 
137
+ def run_pipeline(
138
+ url_or_id: str,
139
+ api_key: str,
140
+ sentiment_method: str,
141
+ max_comments: int,
142
+ progress=gr.Progress(track_tqdm=False),
143
+ ):
144
+ """
145
+ Generator function β€” yields one final tuple when all work is done.
146
+ gr.Progress() gives the user an animated progress bar while waiting.
147
+ All logic is identical to the Streamlit version; we just return values
148
+ instead of writing to st.session_state.
149
+ """
150
+ # ── Input guards ──────────────────────────────────────────────────────────
151
+ if not (url_or_id or "").strip():
152
+ yield _blank_outputs("⚠️ Please enter a YouTube URL or video ID.")
153
+ return
154
 
155
+ video_id = extract_video_id(url_or_id.strip())
156
+ if not video_id:
157
+ yield _blank_outputs("❌ Could not parse a valid YouTube video ID from that input.")
158
+ return
159
 
160
+ if not (api_key or "").strip():
161
+ yield _blank_outputs("⚠️ YouTube API key is required. Set it in the βš™οΈ Settings tab.")
162
+ return
 
 
 
 
163
 
164
+ # 1 ── Metadata ─────────────────────────────────────────────────────────────
165
+ progress(0.05, desc="Fetching video metadata…")
166
+ meta, err_msg = fetch_video_metadata(video_id, api_key)
167
+ if err_msg:
168
+ yield _blank_outputs(f"❌ {err_msg}")
169
+ return
170
 
171
+ # 2 ── Transcript ────────────────────────────────────────────────────────────
172
+ progress(0.20, desc="Fetching transcript…")
173
+ transcript, t_status = fetch_transcript(video_id)
174
+
175
+ # 3 ── Comments ──────────────────────────────────────────────────────────────
176
+ progress(0.35, desc=f"Fetching up to {max_comments} comments…")
177
+ comments_df, c_status = fetch_comments(video_id, api_key, max_comments=int(max_comments))
178
+
179
+ # 4 ── Misinformation detection ──────────────────────────────────────────────
180
+ progress(0.50, desc="Running misinformation detection…")
181
+ misinfo = detect_misinformation(
182
+ text=f"{meta['title']} {meta['description']}",
183
+ tags=meta["tags"],
184
+ audio_transcript=transcript,
185
+ video_transcript=transcript,
186
  )
187
 
188
+ # 5 ── Keywords ──────────────────────────────────────────────────────────────
189
+ keywords = extract_keywords(
190
+ f"{meta['title']} {meta['description']} {transcript}",
191
+ meta["tags"],
 
 
 
 
 
192
  )
193
 
194
+ # 6 ── Sentiment (batched, same logic as Streamlit version) ─────────────────
195
+ sentiments, sent_sum, pos_kw, neg_kw = [], {}, [], []
 
 
 
 
196
 
197
+ if not comments_df.empty:
198
+ texts = comments_df["text"].fillna("").tolist()
199
+ batch_size = 64
200
+ for i in range(0, len(texts), batch_size):
201
+ chunk = texts[i: i + batch_size]
202
+ sentiments += analyze_sentiment_batch(chunk, method=sentiment_method, batch_size=batch_size)
203
+ frac = 0.60 + 0.30 * min((i + batch_size) / max(len(texts), 1), 1.0)
204
+ progress(frac, desc=f"Sentiment: {min(i+batch_size, len(texts))}/{len(texts)}…")
205
+
206
+ sent_sum = sentiment_summary(sentiments)
207
+ pos_kw, neg_kw = sentiment_weighted_keywords(comments_df, sentiments)
208
+
209
+ # 7 ── Assemble and yield ────────────────────────────────────────────────────
210
+ progress(0.97, desc="Building charts…")
211
+ yield _build_outputs(
212
+ meta=meta, video_id=video_id, transcript=transcript,
213
+ comments_df=comments_df, misinfo=misinfo, keywords=keywords,
214
+ sentiments=sentiments, sent_sum=sent_sum,
215
+ pos_kw=pos_kw, neg_kw=neg_kw,
216
+ status_log=[
217
+ f"βœ… Metadata: {meta['title'][:55]}",
218
+ t_status, c_status,
219
+ f"πŸ”¬ Misinfo score: {misinfo['confidence_pct']}%",
220
+ *(
221
+ [f"πŸ’¬ Sentiment: {sent_sum['pos_pct']}% pos / {sent_sum['neg_pct']}% neg"]
222
+ if sent_sum else ["πŸ’¬ Skipped (no comments available)"]
223
+ ),
224
+ ],
225
+ )
226
 
227
 
228
  # ══════════════════════════════════════════════════════════════════════════════
229
+ # OUTPUT BUILDER (assembles the 18-element tuple from all computed data)
230
  # ═════════════════════════════════════════════════════���════════════════════════
231
 
232
+ def _build_outputs(
233
+ meta, video_id, transcript, comments_df,
234
+ misinfo, keywords, sentiments, sent_sum, pos_kw, neg_kw, status_log,
235
+ ):
236
+ # ── Status ────────────────────────────────────────────────────────────────
237
+ status_html = (
238
+ '<p style="color:#00e5a0;font-family:DM Mono,monospace;font-size:0.82rem;padding:6px 0">'
239
+ 'βœ… Analysis complete</p>'
240
+ )
241
 
242
+ # ── Log ───────────────────────────────────────────────────────────────────
243
+ log_html = "".join(f'<p class="vv-log-line">{line}</p>' for line in status_log)
 
 
 
 
 
 
 
 
244
 
245
+ # ── Left panel (video info) ────────────────────────────────────────────────
246
+ thumb_html = (
247
+ f'<img src="{meta["thumbnail_url"]}" '
248
+ f'style="width:100%;border-radius:8px;margin-bottom:8px">'
249
+ if meta.get("thumbnail_url") else ""
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
250
  )
251
+ tag_html = "".join(f'<span class="vv-tag">#{t}</span>' for t in meta.get("tags", [])[:20])
252
+ desc_text = meta.get("description", "")
253
+ desc_short = desc_text[:1200] + ("…" if len(desc_text) > 1200 else "")
254
+ word_count = len(transcript.split()) if transcript else 0
255
+ transcript_short = (transcript[:2500] + ("…" if len(transcript) > 2500 else "")) if transcript else "(not available)"
256
+
257
+ left_html = f"""
258
+ {thumb_html}
259
+ <a href="https://www.youtube.com/watch?v={video_id}" target="_blank"
260
+ style="display:block;text-align:center;font-family:'DM Mono',monospace;
261
+ font-size:0.76rem;color:#5a6070;text-decoration:none;margin:4px 0 10px">
262
+ β–Ά Open on YouTube
263
+ </a>
264
+ <div class="vv-card">
265
+ <p class="vv-section-title">Video</p>
266
+ <p style="font-family:'Syne',sans-serif;font-size:1.05rem;font-weight:700;margin:0 0 4px;color:#e8eaf0">
267
+ {meta['title']}
268
+ </p>
269
+ <p style="font-size:0.82rem;color:#5a6070;margin:0">
270
+ by <b style="color:#b0b4c0">{meta['channel_title']}</b> Β· {meta['published_at']}
271
+ </p>
272
+ </div>
273
+ <p class="vv-section-title">Metrics</p>
274
+ <span class="vv-stat">πŸ‘ {meta['view_count']:,}</span>
275
+ <span class="vv-stat">πŸ‘ {meta['like_count']:,}</span>
276
+ <span class="vv-stat">πŸ’¬ {meta['comment_count']:,}</span>
277
+ <span class="vv-stat">⏱ {meta['duration']}</span>
278
+ <p class="vv-section-title" style="margin-top:1rem">Tags</p>
279
+ {tag_html or '<span style="color:#5a6070;font-size:0.78rem">(none)</span>'}
280
+ <details style="margin-top:1rem">
281
+ <summary style="cursor:pointer;font-family:'DM Mono',monospace;font-size:0.78rem;color:#5a6070">
282
+ πŸ“„ Description
283
+ </summary>
284
+ <p style="font-size:0.78rem;color:#8090a0;line-height:1.65;white-space:pre-wrap;margin-top:6px">
285
+ {desc_short}
286
+ </p>
287
+ </details>
288
+ <details style="margin-top:0.5rem">
289
+ <summary style="cursor:pointer;font-family:'DM Mono',monospace;font-size:0.78rem;color:#5a6070">
290
+ πŸ“ Transcript ({word_count} words)
291
+ </summary>
292
+ <p style="font-size:0.75rem;color:#8090a0;line-height:1.65;margin-top:6px">
293
+ {transcript_short}
294
+ </p>
295
+ </details>
296
+ """
297
+
298
+ # ── Misinfo badge + reasoning ──────────────────────────────────────────────
299
+ score = misinfo["score"]
300
+ if score < 0.35:
301
+ badge_html = '<span class="vv-badge-green">βœ… Appears Credible</span>'
302
+ elif score < 0.65:
303
+ badge_html = '<span class="vv-badge-amber">⚠️ Uncertain / Mixed Signals</span>'
304
+ else:
305
+ badge_html = '<span class="vv-badge-red">🚨 Likely Misinformation</span>'
306
 
307
+ reasoning_html = (
308
+ f'<div class="vv-reasoning" style="margin-top:8px">'
309
+ f'🧠 <b>Reasoning:</b> {misinfo["reasoning"]}'
310
+ f'</div>'
311
+ )
312
 
313
+ # ── Plotly charts ──────────────────────────────────────────────────────────
314
+ fig_gauge = misinfo_gauge(score, "Misinfo Confidence")
315
+ fig_streams = stream_trust_bars(misinfo["stream_details"])
316
+ fig_donut = sentiment_donut(sent_sum) if sent_sum else _empty_plotly()
317
+ fig_timeline = sentiment_timeline(comments_df, sentiments) if (sent_sum and not comments_df.empty) else _empty_plotly()
318
+ fig_kw = keyword_bar(keywords, title="Top Video Keywords", color="#00d4ff")
319
+ fig_kw_comp = keyword_comparison(pos_kw, neg_kw) if (pos_kw or neg_kw) else _empty_plotly()
320
 
321
+ # ── Sentiment stat boxes ───────────────────────────────────────────────────
322
+ if sent_sum:
323
+ stat_pos = (f'<div class="vv-card" style="text-align:center">'
324
+ f'<p class="vv-stat-big-green">{sent_sum["pos_pct"]}%</p>'
325
+ f'<p style="color:#5a6070;font-size:0.75rem;margin:0">Positive</p></div>')
326
+ stat_neg = (f'<div class="vv-card" style="text-align:center">'
327
+ f'<p class="vv-stat-big-red">{sent_sum["neg_pct"]}%</p>'
328
+ f'<p style="color:#5a6070;font-size:0.75rem;margin:0">Negative</p></div>')
329
+ stat_neu = (f'<div class="vv-card" style="text-align:center">'
330
+ f'<p class="vv-stat-big-dim">{sent_sum["neu_pct"]}%</p>'
331
+ f'<p style="color:#5a6070;font-size:0.75rem;margin:0">Neutral</p></div>')
332
+ else:
333
+ placeholder = '<div class="vv-card" style="text-align:center;color:#5a6070;font-size:0.8rem">N/A</div>'
334
+ stat_pos = stat_neg = stat_neu = placeholder
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
335
 
336
+ # ── Comment DataFrames for the 4 tabs ─────────────────────────────────────
337
+ show_cols = ["author", "text", "likes", "published_at"]
338
+ df_all = df_pos = df_neg = df_top = pd.DataFrame()
339
 
 
340
  if not comments_df.empty:
341
+ display_df = comments_df.copy()
342
+ if sentiments:
343
+ display_df["sentiment"] = [s["label"] for s in sentiments]
344
+ display_df["compound"] = [round(s.get("compound", 0), 3) for s in sentiments]
345
+ cols = show_cols + ["sentiment", "compound"]
346
+ else:
347
+ cols = show_cols
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
348
 
349
+ df_all = display_df[cols].head(100).reset_index(drop=True)
350
+ df_top = display_df.sort_values("likes", ascending=False).head(20)[cols].reset_index(drop=True)
351
 
352
+ if "sentiment" in display_df.columns:
353
+ df_pos = display_df[display_df["sentiment"] == "POSITIVE"][cols].head(50).reset_index(drop=True)
354
+ df_neg = display_df[display_df["sentiment"] == "NEGATIVE"][cols].head(50).reset_index(drop=True)
355
+
356
+ return (
357
+ status_html, # 0 status_box
358
+ log_html, # 1 log_html_out
359
+ left_html, # 2 left_panel_html
360
+ badge_html, # 3 misinfo_badge_html
361
+ reasoning_html, # 4 misinfo_reasoning_html
362
+ fig_gauge, # 5 misinfo_gauge_plot
363
+ fig_streams, # 6 stream_bars_plot
364
+ fig_donut, # 7 donut_plot
365
+ fig_timeline, # 8 timeline_plot
366
+ fig_kw, # 9 kw_bar_plot
367
+ fig_kw_comp, # 10 kw_comp_plot
368
+ stat_pos, # 11 stat_pos_html
369
+ stat_neg, # 12 stat_neg_html
370
+ stat_neu, # 13 stat_neu_html
371
+ df_all, # 14 df_all_out
372
+ df_pos, # 15 df_pos_out
373
+ df_neg, # 16 df_neg_out
374
+ df_top, # 17 df_top_out
375
+ )
376
 
377
 
378
  # ══════════════════════════════════════════════════════════════════════════════
379
+ # UPLOAD TAB β€” search by keyword helper
380
  # ══════════════════════════════════════════════════════════════════════════════
381
 
382
+ def do_search(keyword: str, api_key: str):
383
+ if not (api_key or "").strip():
384
+ return (
385
+ "<p style='color:#ff4757;font-family:DM Mono,monospace'>⚠️ API key required.</p>",
386
+ gr.update(choices=[], value=None, visible=False),
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
387
  )
388
+ results = search_videos_by_title(keyword, api_key, max_results=5)
389
+ if not results:
390
+ return (
391
+ "<p style='color:#ffb347;font-family:DM Mono,monospace'>No results found. Try a different keyword.</p>",
392
+ gr.update(choices=[], value=None, visible=False),
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
393
  )
394
+
395
+ html = ""
396
+ choices = []
397
+ for r in results:
398
+ vid = r["video_id"]
399
+ url = f"https://www.youtube.com/watch?v={vid}"
400
+ choices.append((r["title"][:70], url))
401
+ html += (
402
+ f'<div class="vv-card" style="display:flex;align-items:center;gap:12px;margin-bottom:6px">'
403
+ f'<img src="{r["thumbnail_url"]}" style="width:72px;height:54px;object-fit:cover;border-radius:6px;flex-shrink:0">'
404
+ f'<div><p style="margin:0;font-size:0.85rem;font-weight:600;color:#e8eaf0">{r["title"][:80]}</p>'
405
+ f'<p style="margin:0;font-size:0.75rem;color:#5a6070">{r["channel_title"]} Β· {r["published_at"]} Β· '
406
+ f'<code style="color:#00d4ff">v={vid}</code></p></div></div>'
407
  )
408
+ return html, gr.update(choices=choices, value=None, visible=True)
409
 
 
 
 
 
410
 
411
+ def pick_and_analyze(selected_url, api_key, sentiment_method, max_comments):
412
+ """When user picks a search result, run the full pipeline on it."""
413
+ if not selected_url:
414
+ yield _blank_outputs("Select a video from the search results above.")
415
+ return
416
+ yield from run_pipeline(selected_url, api_key, sentiment_method, max_comments)
417
 
 
 
418
 
419
+ # ══════════════════════════════════════════════════════════════════════════════
420
+ # GRADIO BLOCKS UI
421
+ # ══════════════════════════════════════════════════════════════════════════════
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
422
 
423
+ with gr.Blocks(css=CSS, title="VideoVerifier β€” MHMisinfo", theme=gr.themes.Base()) as demo:
424
+
425
+ # ── Header ────────────────────────���───────────────────────────────────────
426
+ gr.HTML("""
427
+ <div style="padding:1.5rem 0 0.8rem;border-bottom:1px solid #1e2330;margin-bottom:1.2rem">
428
+ <h1 class="vv-hero">πŸ”¬ Video Verifier & Sentiment Analyzer</h1>
429
+ <p style="color:#5a6070;font-size:0.85rem;margin-top:4px;font-family:'DM Mono',monospace">
430
+ Detect mental health misinformation Β· Analyze public sentiment Β· Understand video content at a glance
431
+ </p>
432
+ </div>
433
+ """)
434
+
435
+ # ── Settings row (always visible at top) ──────────────────────────────────
436
+ with gr.Accordion("βš™οΈ Settings", open=False):
437
+ with gr.Row():
438
+ api_key_input = gr.Textbox(
439
+ value=os.environ.get("YT_API_KEY", ""),
440
+ placeholder="AIza…",
441
+ label="YouTube Data API v3 Key",
442
+ type="password",
443
+ scale=3,
444
+ info="Get a free key at console.cloud.google.com β†’ Enable YouTube Data API v3",
445
  )
446
+ sentiment_selector = gr.Dropdown(
447
+ choices=[
448
+ ("VADER β€” fast, CPU-only (~5 000 comments/sec)", "vader"),
449
+ ("DistilBERT β€” accurate, downloads ~500 MB on first run", "hf"),
450
+ ],
451
+ value="vader",
452
+ label="Sentiment Engine",
453
+ scale=2,
454
  )
455
+ max_comments_slider = gr.Slider(
456
+ minimum=10, maximum=500, value=150, step=10,
457
+ label="Max comments to fetch",
458
+ scale=2,
459
+ info="YouTube API quota: ~1 unit per comment request",
 
 
 
 
 
 
 
 
 
460
  )
461
 
462
+ # ── Input tabs ─────────────────────────────────────────────────────────────
463
+ with gr.Tabs():
464
+
465
+ # Tab 1 β€” URL
466
+ with gr.TabItem("πŸ”— YouTube URL"):
467
+ with gr.Row():
468
+ url_input = gr.Textbox(
469
+ placeholder="https://www.youtube.com/watch?v=... or youtu.be/... or raw 11-char ID",
470
+ label="YouTube URL / Video ID",
471
+ scale=5,
472
+ )
473
+ analyze_btn = gr.Button("πŸ” Analyze", variant="primary", scale=1, min_width=130)
474
+
475
+ # Tab 2 β€” Upload / search
476
+ with gr.TabItem("πŸ“ Upload / Search by Title"):
477
+ gr.HTML("""
478
+ <div class="vv-card" style="margin-bottom:8px">
479
+ <p class="vv-section-title">Upload a video file β†’ find matching YouTube metadata</p>
480
+ <p style="font-size:0.82rem;color:#5a6070;line-height:1.6;margin:0">
481
+ ⚠️ The YouTube Data API cannot search by raw video bytes.
482
+ Upload your file, then type the title or a keyword below to find the matching YouTube entry.
483
+ </p>
484
+ </div>
485
+ """)
486
+ upload_file = gr.File(
487
+ label="Drop a video file (mp4, mov, avi, mkv, webm)",
488
+ file_types=[".mp4", ".mov", ".avi", ".mkv", ".webm"],
489
+ )
490
+ with gr.Row():
491
+ kw_input = gr.Textbox(placeholder="Enter video title or keyword…", label="Search keyword", scale=4)
492
+ search_btn = gr.Button("πŸ”Ž Find on YouTube", scale=1)
493
+ search_results_html = gr.HTML()
494
+ search_radio = gr.Radio(
495
+ label="Select a video to analyze",
496
+ choices=[],
497
+ visible=False,
498
  )
499
 
500
+ # ── Status bar ─────────────────────────────────────────────────────────────
501
+ status_box = gr.HTML(
502
+ '<p style="color:#5a6070;font-family:DM Mono,monospace;font-size:0.8rem;padding:6px 0">'
503
+ 'Enter a URL above and click Analyze.</p>'
504
+ )
 
 
 
 
 
 
 
 
 
 
 
505
 
506
+ # ── Main results layout ────────────────────────────────────────────────────
507
+ with gr.Row(equal_height=False):
 
508
 
509
+ # LEFT β€” video info
510
+ with gr.Column(scale=2):
511
+ left_panel_html = gr.HTML(
512
+ '<div style="padding:3rem;text-align:center;color:#5a6070;'
513
+ 'font-family:DM Mono,monospace">No data yet.</div>'
514
+ )
515
 
516
+ # RIGHT β€” analytics
517
+ with gr.Column(scale=3):
518
+
519
+ # Misinfo block
520
+ gr.HTML('<p class="vv-section-title" style="margin-top:0">πŸ”¬ Misinformation Analysis</p>')
521
+ misinfo_badge_html = gr.HTML()
522
+ with gr.Row():
523
+ misinfo_gauge_plot = gr.Plot(label="", show_label=False)
524
+ stream_bars_plot = gr.Plot(label="", show_label=False)
525
+ misinfo_reasoning_html = gr.HTML()
526
+
527
+ gr.HTML('<hr style="border-color:#1e2330;margin:1rem 0">')
528
+
529
+ # Sentiment block
530
+ gr.HTML('<p class="vv-section-title">πŸ’¬ Comment Sentiment</p>')
531
+ with gr.Row():
532
+ stat_pos_html = gr.HTML()
533
+ stat_neg_html = gr.HTML()
534
+ stat_neu_html = gr.HTML()
535
+ with gr.Row():
536
+ donut_plot = gr.Plot(label="", show_label=False)
537
+ timeline_plot = gr.Plot(label="", show_label=False)
538
+ with gr.Row():
539
+ kw_bar_plot = gr.Plot(label="", show_label=False)
540
+ kw_comp_plot = gr.Plot(label="", show_label=False)
541
+
542
+ gr.HTML('<hr style="border-color:#1e2330;margin:1rem 0">')
543
+
544
+ # Comments deep-dive
545
+ gr.HTML('<p class="vv-section-title">πŸ“Š Comments Deep-Dive</p>')
546
+ with gr.Tabs():
547
+ with gr.TabItem("All"):
548
+ df_all_out = gr.Dataframe(
549
+ headers=["author","text","likes","published_at","sentiment","compound"],
550
+ datatype=["str","str","number","str","str","number"],
551
+ wrap=True, height=320,
552
+ )
553
+ with gr.TabItem("Positive"):
554
+ df_pos_out = gr.Dataframe(wrap=True, height=320)
555
+ with gr.TabItem("Negative"):
556
+ df_neg_out = gr.Dataframe(wrap=True, height=320)
557
+ with gr.TabItem("Most Liked"):
558
+ df_top_out = gr.Dataframe(wrap=True, height=320)
559
+
560
+ # ── Activity log ──────────────────────────────────────────────────────────
561
+ with gr.Accordion("πŸ“œ Activity Log", open=False):
562
+ log_html_out = gr.HTML(
563
+ '<p class="vv-log-line">β€”</p>'
564
+ )
565
 
566
+ # ── Footer ─────────────────────────────────────────────────────────────────
567
+ gr.HTML("""
568
+ <div style="margin-top:2rem;padding-top:1rem;border-top:1px solid #1e2330;
569
+ text-align:center;font-family:'DM Mono',monospace;font-size:0.72rem;color:#3a3f50">
570
+ 4-stream SeTa-Attention BiGRU Β· CCM / DMTE / Uncertainty Fusion Β·
571
+ Plug your checkpoint into
572
+ <code style="color:#00d4ff">detect_misinformation()</code> in analyzer.py Β·
573
+ Test ROC-AUC 0.967
574
+ </div>
575
+ """)
576
+
577
+ # ══════════════════════════════════════════════════════════════════════════
578
+ # OUTPUT LIST β€” order must exactly match _build_outputs / _blank_outputs
579
+ # ══════════════════════════════════════════════════════════════════════════
580
+ ALL_OUTPUTS = [
581
+ status_box, # 0
582
+ log_html_out, # 1
583
+ left_panel_html, # 2
584
+ misinfo_badge_html, # 3
585
+ misinfo_reasoning_html,# 4
586
+ misinfo_gauge_plot, # 5
587
+ stream_bars_plot, # 6
588
+ donut_plot, # 7
589
+ timeline_plot, # 8
590
+ kw_bar_plot, # 9
591
+ kw_comp_plot, # 10
592
+ stat_pos_html, # 11
593
+ stat_neg_html, # 12
594
+ stat_neu_html, # 13
595
+ df_all_out, # 14
596
+ df_pos_out, # 15
597
+ df_neg_out, # 16
598
+ df_top_out, # 17
599
+ ]
600
+
601
+ # ── Events: URL tab ───────────────────────────────────────────────────────
602
+ _pipeline_inputs = [url_input, api_key_input, sentiment_selector, max_comments_slider]
603
+
604
+ analyze_btn.click(fn=run_pipeline, inputs=_pipeline_inputs, outputs=ALL_OUTPUTS)
605
+ url_input.submit(fn=run_pipeline, inputs=_pipeline_inputs, outputs=ALL_OUTPUTS)
606
+
607
+ # ── Events: Upload/Search tab ─────────────────────────────────────────────
608
+ search_btn.click(
609
+ fn=do_search,
610
+ inputs=[kw_input, api_key_input],
611
+ outputs=[search_results_html, search_radio],
612
+ )
613
 
614
+ search_radio.change(
615
+ fn=pick_and_analyze,
616
+ inputs=[search_radio, api_key_input, sentiment_selector, max_comments_slider],
617
+ outputs=ALL_OUTPUTS,
618
+ )
619
 
620
+ # ══════════════════════════════════════════════════════════════════════════════
621
+ if __name__ == "__main__":
622
+ demo.launch(show_api=False)