Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
|
@@ -1,5 +1,5 @@
|
|
| 1 |
"""
|
| 2 |
-
app.py —
|
| 3 |
|
| 4 |
"""
|
| 5 |
|
|
@@ -341,17 +341,17 @@ def run_pipeline(
|
|
| 341 |
|
| 342 |
# Guards
|
| 343 |
if not (url_or_id or "").strip():
|
| 344 |
-
yield _blank_outputs("
|
| 345 |
return
|
| 346 |
|
| 347 |
video_id = extract_video_id(url_or_id.strip())
|
| 348 |
if not video_id:
|
| 349 |
-
yield _blank_outputs("
|
| 350 |
return
|
| 351 |
|
| 352 |
if not api_key:
|
| 353 |
yield _blank_outputs(
|
| 354 |
-
"
|
| 355 |
"Set the <code>YT_API_KEY</code> environment variable / Space secret."
|
| 356 |
)
|
| 357 |
return
|
|
@@ -360,7 +360,7 @@ def run_pipeline(
|
|
| 360 |
progress(0.05, desc="Fetching video metadata…")
|
| 361 |
meta, err = fetch_video_metadata(video_id, api_key)
|
| 362 |
if err:
|
| 363 |
-
yield _blank_outputs(f"
|
| 364 |
return
|
| 365 |
|
| 366 |
# 2 — Transcript
|
|
@@ -372,16 +372,7 @@ def run_pipeline(
|
|
| 372 |
comments_df, c_status = fetch_comments(video_id, api_key, max_comments=int(max_comments))
|
| 373 |
|
| 374 |
# 4 — Misinformation
|
| 375 |
-
|
| 376 |
-
# to the same `transcript` variable. When the transcript was empty (no
|
| 377 |
-
# captions), ALL three modalities hit the empty-string fallback inside
|
| 378 |
-
# _compute_modality_analysis and returned a fixed 50/50 split with
|
| 379 |
-
# logit_m = logit_c = 0, trust = 0 %, uncertainty = 100 % — values that
|
| 380 |
-
# never changed across videos.
|
| 381 |
-
# The fix keeps audio_transcript = spoken transcript (speech stream) and
|
| 382 |
-
# video_transcript = spoken transcript too, but detect_misinformation()
|
| 383 |
-
# now internally builds the video segment as transcript + title + tags,
|
| 384 |
-
# giving all three modalities distinct content and therefore distinct scores.
|
| 385 |
progress(0.50, desc="Running misinformation detection…")
|
| 386 |
misinfo = detect_misinformation(
|
| 387 |
text=f"{meta['title']} {meta['description']}",
|
|
@@ -419,7 +410,7 @@ def run_pipeline(
|
|
| 419 |
sentiments=sentiments, sent_sum=sent_sum,
|
| 420 |
pos_kw=pos_kw, neg_kw=neg_kw,
|
| 421 |
status_log=[
|
| 422 |
-
f"
|
| 423 |
t_status,
|
| 424 |
c_status,
|
| 425 |
f"🔬 Misinfo score: {misinfo['confidence_pct']}%",
|
|
@@ -442,7 +433,7 @@ def _build_outputs(
|
|
| 442 |
# Status
|
| 443 |
status_html = (
|
| 444 |
'<p style="color:#00e5a0;font-family:DM Mono,monospace;font-size:0.82rem;padding:6px 0">'
|
| 445 |
-
"
|
| 446 |
)
|
| 447 |
|
| 448 |
# Log
|
|
@@ -498,9 +489,9 @@ def _build_outputs(
|
|
| 498 |
# Misinfo badge
|
| 499 |
score = misinfo["score"]
|
| 500 |
if score < 0.35:
|
| 501 |
-
badge_html = '<span class="vv-badge-green">
|
| 502 |
elif score < 0.65:
|
| 503 |
-
badge_html = '<span class="vv-badge-amber">
|
| 504 |
else:
|
| 505 |
badge_html = '<span class="vv-badge-red">🚨 Likely Misinformation</span>'
|
| 506 |
|
|
@@ -560,12 +551,12 @@ def _build_outputs(
|
|
| 560 |
stat_pos = (
|
| 561 |
f'<div class="vv-card" style="text-align:center">'
|
| 562 |
f'<p class="vv-stat-big-green">{sent_sum["pos_pct"]}%</p>'
|
| 563 |
-
f'<p style="color:#5a6070;font-size:0.75rem;margin:4px 0 0">
|
| 564 |
)
|
| 565 |
stat_neg = (
|
| 566 |
f'<div class="vv-card" style="text-align:center">'
|
| 567 |
f'<p class="vv-stat-big-red">{sent_sum["neg_pct"]}%</p>'
|
| 568 |
-
f'<p style="color:#5a6070;font-size:0.75rem;margin:4px 0 0">
|
| 569 |
)
|
| 570 |
stat_neu = (
|
| 571 |
f'<div class="vv-card" style="text-align:center">'
|
|
@@ -599,8 +590,8 @@ def _build_outputs(
|
|
| 599 |
.reset_index(drop=True)
|
| 600 |
)
|
| 601 |
if "sentiment" in display_df.columns:
|
| 602 |
-
df_pos = display_df[display_df["sentiment"] == "
|
| 603 |
-
df_neg = display_df[display_df["sentiment"] == "
|
| 604 |
|
| 605 |
return (
|
| 606 |
status_html, # 0 status_box
|
|
@@ -632,7 +623,7 @@ def do_search(keyword: str):
|
|
| 632 |
api_key = os.environ.get("YT_API_KEY", "").strip()
|
| 633 |
if not api_key:
|
| 634 |
return (
|
| 635 |
-
"<p style='color:#ff4757;font-family:DM Mono,monospace'>
|
| 636 |
gr.update(choices=[], value=None, visible=False),
|
| 637 |
)
|
| 638 |
if not (keyword or "").strip():
|
|
@@ -678,14 +669,14 @@ def pick_and_analyze(selected_url, sentiment_method, max_comments):
|
|
| 678 |
# GRADIO BLOCKS UI
|
| 679 |
|
| 680 |
|
| 681 |
-
with gr.Blocks(title="
|
| 682 |
|
| 683 |
# Header
|
| 684 |
gr.HTML("""
|
| 685 |
<div style="padding:1.5rem 0 0.8rem;border-bottom:1px solid #1e2330;margin-bottom:1.2rem">
|
| 686 |
-
<h1 class="vv-hero">🔬
|
| 687 |
<p style="color:#5a6070;font-size:0.85rem;margin-top:4px;font-family:'DM Mono',monospace">
|
| 688 |
-
|
| 689 |
</p>
|
| 690 |
</div>
|
| 691 |
""")
|
|
@@ -809,9 +800,9 @@ with gr.Blocks(title="VideoVerifier — MHMisinfo") as demo:
|
|
| 809 |
wrap=True,
|
| 810 |
max_height=320,
|
| 811 |
)
|
| 812 |
-
with gr.TabItem("
|
| 813 |
df_pos_out = gr.Dataframe(wrap=True, max_height=320)
|
| 814 |
-
with gr.TabItem("
|
| 815 |
df_neg_out = gr.Dataframe(wrap=True, max_height=320)
|
| 816 |
with gr.TabItem("Most Liked"):
|
| 817 |
df_top_out = gr.Dataframe(wrap=True, max_height=320)
|
|
|
|
| 1 |
"""
|
| 2 |
+
app.py — Misinformation Detection & Public Engagement (Gradio 6.x)
|
| 3 |
|
| 4 |
"""
|
| 5 |
|
|
|
|
| 341 |
|
| 342 |
# Guards
|
| 343 |
if not (url_or_id or "").strip():
|
| 344 |
+
yield _blank_outputs(" Please enter a YouTube URL or video ID.")
|
| 345 |
return
|
| 346 |
|
| 347 |
video_id = extract_video_id(url_or_id.strip())
|
| 348 |
if not video_id:
|
| 349 |
+
yield _blank_outputs(" Could not parse a valid YouTube video ID.")
|
| 350 |
return
|
| 351 |
|
| 352 |
if not api_key:
|
| 353 |
yield _blank_outputs(
|
| 354 |
+
" YouTube API key not found. "
|
| 355 |
"Set the <code>YT_API_KEY</code> environment variable / Space secret."
|
| 356 |
)
|
| 357 |
return
|
|
|
|
| 360 |
progress(0.05, desc="Fetching video metadata…")
|
| 361 |
meta, err = fetch_video_metadata(video_id, api_key)
|
| 362 |
if err:
|
| 363 |
+
yield _blank_outputs(f" {err}")
|
| 364 |
return
|
| 365 |
|
| 366 |
# 2 — Transcript
|
|
|
|
| 372 |
comments_df, c_status = fetch_comments(video_id, api_key, max_comments=int(max_comments))
|
| 373 |
|
| 374 |
# 4 — Misinformation
|
| 375 |
+
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 376 |
progress(0.50, desc="Running misinformation detection…")
|
| 377 |
misinfo = detect_misinformation(
|
| 378 |
text=f"{meta['title']} {meta['description']}",
|
|
|
|
| 410 |
sentiments=sentiments, sent_sum=sent_sum,
|
| 411 |
pos_kw=pos_kw, neg_kw=neg_kw,
|
| 412 |
status_log=[
|
| 413 |
+
f" Metadata: {meta['title'][:55]}",
|
| 414 |
t_status,
|
| 415 |
c_status,
|
| 416 |
f"🔬 Misinfo score: {misinfo['confidence_pct']}%",
|
|
|
|
| 433 |
# Status
|
| 434 |
status_html = (
|
| 435 |
'<p style="color:#00e5a0;font-family:DM Mono,monospace;font-size:0.82rem;padding:6px 0">'
|
| 436 |
+
" Analysis complete</p>"
|
| 437 |
)
|
| 438 |
|
| 439 |
# Log
|
|
|
|
| 489 |
# Misinfo badge
|
| 490 |
score = misinfo["score"]
|
| 491 |
if score < 0.35:
|
| 492 |
+
badge_html = '<span class="vv-badge-green"> Appears Credible</span>'
|
| 493 |
elif score < 0.65:
|
| 494 |
+
badge_html = '<span class="vv-badge-amber"> Uncertain / Mixed Signals</span>'
|
| 495 |
else:
|
| 496 |
badge_html = '<span class="vv-badge-red">🚨 Likely Misinformation</span>'
|
| 497 |
|
|
|
|
| 551 |
stat_pos = (
|
| 552 |
f'<div class="vv-card" style="text-align:center">'
|
| 553 |
f'<p class="vv-stat-big-green">{sent_sum["pos_pct"]}%</p>'
|
| 554 |
+
f'<p style="color:#5a6070;font-size:0.75rem;margin:4px 0 0">Positively Engagement</p></div>'
|
| 555 |
)
|
| 556 |
stat_neg = (
|
| 557 |
f'<div class="vv-card" style="text-align:center">'
|
| 558 |
f'<p class="vv-stat-big-red">{sent_sum["neg_pct"]}%</p>'
|
| 559 |
+
f'<p style="color:#5a6070;font-size:0.75rem;margin:4px 0 0">Negatively Engagement</p></div>'
|
| 560 |
)
|
| 561 |
stat_neu = (
|
| 562 |
f'<div class="vv-card" style="text-align:center">'
|
|
|
|
| 590 |
.reset_index(drop=True)
|
| 591 |
)
|
| 592 |
if "sentiment" in display_df.columns:
|
| 593 |
+
df_pos = display_df[display_df["sentiment"] == "Positively Engagement"][cols].head(50).reset_index(drop=True)
|
| 594 |
+
df_neg = display_df[display_df["sentiment"] == "Negatively Engagement"][cols].head(50).reset_index(drop=True)
|
| 595 |
|
| 596 |
return (
|
| 597 |
status_html, # 0 status_box
|
|
|
|
| 623 |
api_key = os.environ.get("YT_API_KEY", "").strip()
|
| 624 |
if not api_key:
|
| 625 |
return (
|
| 626 |
+
"<p style='color:#ff4757;font-family:DM Mono,monospace'> YT_API_KEY secret not set.</p>",
|
| 627 |
gr.update(choices=[], value=None, visible=False),
|
| 628 |
)
|
| 629 |
if not (keyword or "").strip():
|
|
|
|
| 669 |
# GRADIO BLOCKS UI
|
| 670 |
|
| 671 |
|
| 672 |
+
with gr.Blocks(title="Misinformation Detection & Public Engagement ") as demo:
|
| 673 |
|
| 674 |
# Header
|
| 675 |
gr.HTML("""
|
| 676 |
<div style="padding:1.5rem 0 0.8rem;border-bottom:1px solid #1e2330;margin-bottom:1.2rem">
|
| 677 |
+
<h1 class="vv-hero">🔬 Misinformation Detection & Public Engagement </h1>
|
| 678 |
<p style="color:#5a6070;font-size:0.85rem;margin-top:4px;font-family:'DM Mono',monospace">
|
| 679 |
+
Misinformation detection
|
| 680 |
</p>
|
| 681 |
</div>
|
| 682 |
""")
|
|
|
|
| 800 |
wrap=True,
|
| 801 |
max_height=320,
|
| 802 |
)
|
| 803 |
+
with gr.TabItem("Positively Engagement"):
|
| 804 |
df_pos_out = gr.Dataframe(wrap=True, max_height=320)
|
| 805 |
+
with gr.TabItem("Negatively Engagement"):
|
| 806 |
df_neg_out = gr.Dataframe(wrap=True, max_height=320)
|
| 807 |
with gr.TabItem("Most Liked"):
|
| 808 |
df_top_out = gr.Dataframe(wrap=True, max_height=320)
|