MHMisinfo

Sleeping

File size: 31,162 Bytes

e8a0f51
470419d
6a0b783
 
a7770aa
6a0b783
 
 
 
db0b10b
6a0b783
 
3e43481
6a0b783
 
 
3d4063f
6a0b783
 
 
 
 
3e43481
6a0b783
 
 
 
 
e8a0f51
a78e55d
1e7da4b
a78e55d
 
1e7da4b
 
 
 
 
 
 
 
 
 
 
 
 
41e5d7a
 
a78e55d
 
1e7da4b
a78e55d
 
1e7da4b
a78e55d
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1e7da4b
a78e55d
 
 
 
 
 
 
1e7da4b
 
 
a78e55d
 
 
 
 
41e5d7a
 
a78e55d
 
 
 
1e7da4b
 
a78e55d
1e7da4b
 
a78e55d
 
 
41e5d7a
1e7da4b
a78e55d
 
1e7da4b
a78e55d
1e7da4b
a78e55d
1e7da4b
a78e55d
 
 
41e5d7a
a78e55d
 
1e7da4b
a78e55d
1e7da4b
 
 
 
 
a78e55d
 
1e7da4b
 
41e5d7a
1e7da4b
 
a78e55d
 
 
 
1e7da4b
a78e55d
 
 
1e7da4b
a78e55d
1e7da4b
a78e55d
1e7da4b
41e5d7a
1e7da4b
a78e55d
1e7da4b
a78e55d
 
1e7da4b
a78e55d
 
 
1e7da4b
 
a78e55d
 
1e7da4b
a78e55d
 
 
1e7da4b
 
a78e55d
 
 
41e5d7a
a78e55d
 
 
 
 
 
 
 
 
 
1e7da4b
 
2b86ee0
1e7da4b
 
 
 
 
 
a78e55d
 
1e7da4b
2b86ee0
 
1e7da4b
2b86ee0
1e7da4b
2b86ee0
 
a78e55d
41e5d7a
1e7da4b
 
 
 
 
 
 
 
41e5d7a
2b86ee0
 
1e7da4b
 
41e5d7a
 
2b86ee0
1e7da4b
 
 
2b86ee0
1e7da4b
2b86ee0
41e5d7a
 
2b86ee0
1e7da4b
41e5d7a
 
1e7da4b
 
 
 
2b86ee0
 
 
41e5d7a
 
2b86ee0
1e7da4b
2b86ee0
1e7da4b
2b86ee0
1e7da4b
 
a78e55d
41e5d7a
1e7da4b
2b86ee0
1e7da4b
2b86ee0
1e7da4b
 
 
 
 
 
a78e55d
1e7da4b
a78e55d
2b86ee0
1e7da4b
 
 
2b86ee0
 
 
1e7da4b
 
a78e55d
 
2b86ee0
1e7da4b
 
 
2b86ee0
 
 
1e7da4b
 
a78e55d
 
2b86ee0
1e7da4b
 
 
2b86ee0
 
 
1e7da4b
 
a78e55d
1e7da4b
a78e55d
1e7da4b
 
2b86ee0
1e7da4b
2b86ee0
1e7da4b
2b86ee0
1e7da4b
2b86ee0
 
 
1e7da4b
 
 
 
 
 
 
 
 
 
 
2b86ee0
1e7da4b
 
 
2b86ee0
1e7da4b
 
2b86ee0
 
1e7da4b
 
2b86ee0
1e7da4b
 
2b86ee0
 
1e7da4b
 
2b86ee0
1e7da4b
 
2b86ee0
 
 
 
1e7da4b
 
2b86ee0
 
1e7da4b
a78e55d
 
e8a0f51
a78e55d
 
 
15440a9
1e7da4b
 
15440a9
a78e55d
 
1e7da4b
15440a9
 
44bafbe
15440a9
a78e55d
 
 
1e7da4b
 
 
 
 
 
 
 
27c3779
15440a9
7bb49de
a78e55d
 
 
 
 
 
 
908f983
a78e55d
1e7da4b
a78e55d
44bafbe
a78e55d
 
1e7da4b
a78e55d
27c3779
a78e55d
 
1e7da4b
a78e55d
 
 
 
 
 
 
1e7da4b
a78e55d
 
 
 
 
 
 
 
 
 
 
 
1e7da4b
 
44bafbe
 
a78e55d
 
 
 
15440a9
a78e55d
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1e7da4b
a78e55d
 
 
 
 
 
 
 
 
 
15440a9
44bafbe
a78e55d
 
 
 
 
1e7da4b
 
7bb49de
44bafbe
a78e55d
44bafbe
a78e55d
 
1e7da4b
a78e55d
7bb49de
a78e55d
 
 
 
 
 
 
 
1e7da4b
 
a78e55d
 
 
 
1e7da4b
a78e55d
 
1e7da4b
 
 
 
 
a78e55d
 
 
41e5d7a
 
1e7da4b
 
41e5d7a
 
1e7da4b
 
41e5d7a
 
1e7da4b
 
41e5d7a
 
1e7da4b
 
41e5d7a
 
a78e55d
41e5d7a
1e7da4b
a78e55d
 
 
1e7da4b
a78e55d
 
 
1e7da4b
a78e55d
 
 
 
 
1e7da4b
a78e55d
1e7da4b
a78e55d
 
44bafbe
a78e55d
 
7bb49de
44bafbe
a78e55d
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
41e5d7a
a78e55d
 
 
 
 
 
 
 
 
 
 
 
 
 
1e7da4b
 
 
a78e55d
 
1e7da4b
 
 
a78e55d
 
1e7da4b
a78e55d
1e7da4b
a78e55d
 
 
1e7da4b
 
a78e55d
 
 
 
 
 
 
 
 
1e7da4b
 
a78e55d
 
 
 
1e7da4b
 
 
 
 
 
 
 
 
 
 
a78e55d
 
 
 
 
 
 
 
1e7da4b
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
15440a9
44bafbe
908f983
a78e55d
 
 
 
1e7da4b
a78e55d
 
 
 
1e7da4b
a78e55d
 
15440a9
a78e55d
 
 
1e7da4b
a78e55d
 
15440a9
a78e55d
 
 
 
 
 
 
 
 
1e7da4b
a78e55d
1e7da4b
 
a78e55d
1e7da4b
a78e55d
 
 
 
 
 
 
 
 
 
 
 
2d4bbca
a78e55d
 
1e7da4b
2d4bbca
a78e55d
 
 
6b8f2fc
 
1e7da4b
 
 
 
6b8f2fc
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
a78e55d
 
 
 
 
 
 
 
 
 
 
 
 
 
 
41e5d7a
a78e55d
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1e7da4b
a78e55d
7bb49de
44bafbe
a78e55d
 
 
 
41e5d7a
1e7da4b
a78e55d
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
c56cd46
a78e55d
c56cd46
a78e55d
 
 
 
 
 
 
 
b017631
1e7da4b
a78e55d
 
 
 
 
 
1e7da4b
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
a78e55d
15440a9
a78e55d
44bafbe
a78e55d
 
27c3779
a78e55d
 
 
 
 
 
 
 
 
15440a9
 
 
a78e55d
 
 
 
1e7da4b
41e5d7a
1e7da4b
a78e55d
41e5d7a

import os
import pandas as pd
import gradio as gr

from fetcher import (
    extract_video_id,
    fetch_video_metadata,
    fetch_transcript,
    fetch_comments,
    search_videos_by_title,
)
from analyzer import (
    detect_misinformation,
    analyze_sentiment_batch,
    sentiment_summary,
    extract_keywords,
    sentiment_weighted_keywords,
)
from charts import (
    sentiment_donut,
    keyword_bar,
    sentiment_timeline,
    keyword_comparison,
    modality_misinfo_distribution,
    trust_score_by_modality,
    uncertainty_analysis,
)


CSS = """
@import url('https://fonts.googleapis.com/css2?family=DM+Mono:wght@400;500&family=Syne:wght@400;600;700;800&family=IBM+Plex+Sans:wght@300;400;500&display=swap');

:root {
    --bg:          #FFFFE3;
    --card:        #FFFFFF;
    --border:      #BDDDFC;
    --text:        #4A4A4A;
    --dim:         #7b7b7b;
    --primary:     #269ccc;
    --ink-dark:    #384959;
    --stormy-sky:  #88BDF2;
    --stormy-slate:#6A89A7;
    --ink-grey:    #CBCBCB;
    --green:       #2e9e6b;
    --red:         #c0392b;
    --amber:       #d4841a;
}

html, body {
    background: var(--bg) !important;
    color:      var(--text) !important;
    margin: 0; padding: 0;
}
.gradio-container, #root, #app, main, .main, .wrap, .svelte-1kyws56 {
    background: var(--bg) !important;
    max-width: 100% !important;
    width: 100% !important;
    margin: 0 auto !important;
    padding: 0 1.5rem !important;
    box-sizing: border-box !important;
}
.block, .wrap, .panel, .padded, div.form,
div[class*="block"], div[class*="wrap"],
div[class*="panel"], div[class*="gap"],
.gap { background: transparent !important; border: none !important; }

.gr-group, .gr-box, .vv-section {
    background: var(--card) !important;
    border: 1px solid var(--border) !important;
    border-radius: 12px !important;
    padding: 1rem 1.25rem !important;
}

.tab-nav button {
    background: transparent !important;
    border: none !important;
    color: var(--dim) !important;
    font-family: 'DM Mono', monospace !important;
    font-size: 0.82rem !important;
    letter-spacing: 0.05em !important;
    border-bottom: 2px solid transparent !important;
    padding: 0.5rem 1.2rem !important;
    transition: color 0.18s;
}
.tab-nav button.selected {
    color: var(--primary) !important;
    border-bottom-color: var(--primary) !important;
}
.tab-nav { border-bottom: 1px solid var(--border) !important; }

input[type="text"], input[type="password"], input[type="number"], textarea, select {
    background: #f5f7fa !important;
    border: 1px solid var(--border) !important;
    color: var(--text) !important;
    border-radius: 8px !important;
    font-family: 'DM Mono', monospace !important;
    font-size: 0.88rem !important;
}
input:focus, textarea:focus, select:focus {
    border-color: var(--primary) !important;
    box-shadow: 0 0 0 2px rgba(38,156,204,0.18) !important;
    outline: none !important;
}
label, .gr-label, span.svelte-1b6s6s {
    color: var(--dim) !important;
    font-family: 'DM Mono', monospace !important;
    font-size: 0.75rem !important;
    letter-spacing: 0.08em !important;
    text-transform: uppercase;
}

input[type="range"] { accent-color: var(--primary); }

button.primary, button[variant="primary"], .primary {
    background: linear-gradient(135deg, var(--primary), #1a7aaa) !important;
    border: none !important;
    color: #ffffff !important;
    font-weight: 700 !important;
    font-family: 'DM Mono', monospace !important;
    border-radius: 8px !important;
    letter-spacing: 0.06em !important;
}
button.secondary {
    background: rgba(38,156,204,0.08) !important;
    border: 1px solid var(--primary) !important;
    color: var(--primary) !important;
    border-radius: 8px !important;
    font-family: 'DM Mono', monospace !important;
}
button:hover { opacity: 0.88; transform: translateY(-1px); transition: all 0.15s; }

.dropdown, ul[role="listbox"], li[role="option"] {
    background: #f5f7fa !important;
    border-color: var(--border) !important;
    color: var(--text) !important;
}
li[role="option"]:hover { background: #e8f4fb !important; }

.gr-dataframe, table { background: var(--card) !important; }
.gr-dataframe th {
    background: #EEF6FD !important;
    color: var(--primary) !important;
    font-family: 'DM Mono', monospace !important;
    font-size: 0.72rem !important;
    padding: 6px 10px;
    border-bottom: 1px solid var(--border);
    text-transform: uppercase;
    letter-spacing: 0.08em;
}
.gr-dataframe td {
    color: var(--text) !important;
    font-size: 0.77rem !important;
    padding: 5px 10px;
    border-bottom: 1px solid var(--border);
}
.gr-dataframe tr:hover td { background: rgba(38,156,204,0.05) !important; }

details > summary {
    color: var(--dim) !important;
    font-family: 'DM Mono', monospace !important;
    font-size: 0.82rem !important;
    cursor: pointer;
    list-style: none;
}
details[open] > summary { color: var(--primary) !important; }

.js-plotly-plot, .plotly { background: transparent !important; }
.modebar { display: none !important; }

::-webkit-scrollbar { width: 6px; height: 6px; }
::-webkit-scrollbar-track { background: var(--bg); }
::-webkit-scrollbar-thumb { background: var(--border); border-radius: 3px; }
::-webkit-scrollbar-thumb:hover { background: var(--dim); }

.vv-hero {
    font-family: 'Syne', sans-serif !important;
    font-size: 1.65rem !important;
    font-weight: 800 !important;
    background: linear-gradient(135deg, #269ccc, #384959);
    -webkit-background-clip: text;
    -webkit-text-fill-color: transparent;
    background-clip: text;
    letter-spacing: -0.02em;
    line-height: 1.2;
}
.vv-section-title {
    font-family: 'Syne', sans-serif !important;
    font-size: 0.68rem !important;
    font-weight: 700 !important;
    letter-spacing: 0.18em !important;
    text-transform: uppercase !important;
    color: #384959 !important;
    margin-bottom: 0.5rem !important;
    margin-top: 0 !important;
}

.vv-card {
    background: #FFFFFF !important;
    border: 1px solid #BDDDFC !important;
    border-radius: 12px !important;
    padding: 1.1rem 1.3rem !important;
    margin-bottom: 0.7rem !important;
}

.vv-metric-grid {
    display: grid !important;
    grid-template-columns: repeat(4, 1fr) !important;
    gap: 0.55rem !important;
    margin: 0.4rem 0 1rem !important;
}
.vv-metric-card {
    background: #FFFFFF !important;
    border: 1px solid #BDDDFC !important;
    border-radius: 12px !important;
    padding: 0.8rem 0.7rem !important;
    text-align: center !important;
    transition: transform 0.18s ease, box-shadow 0.18s ease !important;
    cursor: default !important;
}
.vv-metric-card:hover {
    transform: translateY(-4px) !important;
    box-shadow: 0 8px 24px rgba(38,156,204,0.18) !important;
}
.vv-metric-value {
    display: block !important;
    font-family: 'DM Mono', monospace !important;
    font-size: 1.15rem !important;
    font-weight: 700 !important;
    color: #269ccc !important;
    margin: 0 !important;
    line-height: 1.2 !important;
}
.vv-metric-label {
    display: block !important;
    font-family: 'DM Mono', monospace !important;
    font-size: 0.62rem !important;
    letter-spacing: 0.1em !important;
    text-transform: uppercase !important;
    color: #7b7b7b !important;
    margin: 4px 0 0 !important;
}

.vv-stat {
    display: inline-block !important;
    background: #EEF6FD !important;
    border: 1px solid #BDDDFC !important;
    border-radius: 6px !important;
    padding: 0.25rem 0.75rem !important;
    font-family: 'DM Mono', monospace !important;
    font-size: 0.77rem !important;
    color: #269ccc !important;
    margin: 0.15rem 0.2rem !important;
}

.vv-badge-green {
    display: inline-block !important;
    background: rgba(46,158,107,0.10) !important;
    border: 1px solid #2e9e6b !important;
    color: #2e9e6b !important;
    border-radius: 20px !important;
    padding: 0.32rem 1.1rem !important;
    font-size: 0.85rem !important;
    font-family: 'DM Mono', monospace !important;
    font-weight: 600 !important;
}
.vv-badge-red {
    display: inline-block !important;
    background: rgba(192,57,43,0.10) !important;
    border: 1px solid #c0392b !important;
    color: #c0392b !important;
    border-radius: 20px !important;
    padding: 0.32rem 1.1rem !important;
    font-size: 0.85rem !important;
    font-family: 'DM Mono', monospace !important;
    font-weight: 600 !important;
}
.vv-badge-amber {
    display: inline-block !important;
    background: rgba(212,132,26,0.10) !important;
    border: 1px solid #d4841a !important;
    color: #d4841a !important;
    border-radius: 20px !important;
    padding: 0.32rem 1.1rem !important;
    font-size: 0.85rem !important;
    font-family: 'DM Mono', monospace !important;
    font-weight: 600 !important;
}

.vv-reasoning {
    background: #f7f9fb !important;
    border-left: 3px solid #d4841a !important;
    padding: 0.8rem 1rem !important;
    border-radius: 0 8px 8px 0 !important;
    font-size: 0.83rem !important;
    color: #4A4A4A !important;
    line-height: 1.65 !important;
    font-family: 'IBM Plex Sans', sans-serif !important;
    margin-top: 8px !important;
}

.vv-tag {
    display: inline-block !important;
    background: #BDDDFC !important;
    border: none !important;
    border-radius: 20px !important;
    padding: 3px 10px !important;
    font-family: 'DM Mono', monospace !important;
    font-size: 0.7rem !important;
    color: #384959 !important;
    margin: 2px !important;
    font-weight: 500 !important;
}

.vv-stat-big-green {
    font-family: 'DM Mono', monospace !important;
    font-size: 1.6rem !important;
    font-weight: 700 !important;
    color: #2e9e6b !important;
    margin: 0 !important;
}
.vv-stat-big-red {
    font-family: 'DM Mono', monospace !important;
    font-size: 1.6rem !important;
    font-weight: 700 !important;
    color: #c0392b !important;
    margin: 0 !important;
}
.vv-stat-big-dim {
    font-family: 'DM Mono', monospace !important;
    font-size: 1.6rem !important;
    font-weight: 700 !important;
    color: #7b7b7b !important;
    margin: 0 !important;
}
.vv-log-line {
    font-size: 0.72rem !important;
    color: #7b7b7b !important;
    font-family: 'DM Mono', monospace !important;
    margin: 2px 0 !important;
}
.vv-hr { border: none; border-top: 1px solid #BDDDFC; margin: 1.1rem 0; }
"""


def _empty_plotly(msg: str = "Run analysis to see data", h: int = 230):
    import plotly.graph_objects as go
    fig = go.Figure()
    fig.update_layout(
        paper_bgcolor="rgba(0,0,0,0)", plot_bgcolor="rgba(189,221,252,0.13)",
        font=dict(color="#7b7b7b"), margin=dict(l=10, r=10, t=10, b=10), height=h,
    )
    fig.add_annotation(
        text=msg, x=0.5, y=0.5, xref="paper", yref="paper",
        showarrow=False, font=dict(size=12, color="#7b7b7b"),
    )
    return fig


def _blank_outputs(status_msg: str):
    ep = _empty_plotly()
    return (
        f'<p style="color:#c0392b;font-family:DM Mono,monospace;padding:8px">{status_msg}</p>',
        "<p class='vv-log-line'>—</p>",
        "<div style='padding:3rem;text-align:center;color:#7b7b7b;font-family:DM Mono,monospace'>No data yet.</div>",
        "", "",
        ep, ep, ep,
        ep, ep, ep, ep,
        "", "", "",
        pd.DataFrame(), pd.DataFrame(), pd.DataFrame(), pd.DataFrame(),
    )


def run_pipeline(
    url_or_id: str,
    sentiment_method: str,
    max_comments: int,
    progress=gr.Progress(track_tqdm=False),
):
    api_key = os.environ.get("YT_API_KEY", "").strip()

    if not (url_or_id or "").strip():
        yield _blank_outputs("⚠️ Please enter a YouTube URL or video ID.")
        return

    video_id = extract_video_id(url_or_id.strip())
    if not video_id:
        yield _blank_outputs("❌ Could not parse a valid YouTube video ID.")
        return

    if not api_key:
        yield _blank_outputs(
            "⚠️ YouTube API key not found. "
            "Set the <code>YT_API_KEY</code> environment variable / Space secret."
        )
        return

    progress(0.05, desc="Fetching video metadata…")
    meta, err = fetch_video_metadata(video_id, api_key)
    if err:
        yield _blank_outputs(f"❌ {err}")
        return

    progress(0.20, desc="Fetching transcript…")
    transcript, t_status = fetch_transcript(video_id)

    progress(0.35, desc=f"Fetching up to {max_comments} comments…")
    comments_df, c_status = fetch_comments(video_id, api_key, max_comments=int(max_comments))

    progress(0.50, desc="Running misinformation detection…")
    misinfo = detect_misinformation(
        text=f"{meta['title']} {meta['description']}",
        tags=meta["tags"],
        audio_transcript=transcript,
        video_transcript=transcript,
    )

    keywords = extract_keywords(
        f"{meta['title']} {meta['description']} {transcript}",
        meta["tags"],
    )

    sentiments, sent_sum, pos_kw, neg_kw = [], {}, [], []

    if not comments_df.empty:
        texts = comments_df["text"].fillna("").tolist()
        batch = 64
        for i in range(0, len(texts), batch):
            chunk = texts[i: i + batch]
            sentiments += analyze_sentiment_batch(chunk, method=sentiment_method, batch_size=batch)
            frac = 0.60 + 0.30 * min((i + batch) / max(len(texts), 1), 1.0)
            progress(frac, desc=f"Sentiment {min(i+batch, len(texts))}/{len(texts)}…")

        sent_sum = sentiment_summary(sentiments)
        pos_kw, neg_kw = sentiment_weighted_keywords(comments_df, sentiments)

    progress(0.97, desc="Building charts…")
    yield _build_outputs(
        meta=meta, video_id=video_id, transcript=transcript,
        comments_df=comments_df, misinfo=misinfo, keywords=keywords,
        sentiments=sentiments, sent_sum=sent_sum,
        pos_kw=pos_kw, neg_kw=neg_kw,
        status_log=[
            f"✅ Metadata: {meta['title'][:55]}",
            t_status,
            c_status,
            f"🔬 Misinfo score: {misinfo['confidence_pct']}%",
            *(
                [f"💬 Sentiment: {sent_sum['pos_pct']}% pos / {sent_sum['neg_pct']}% neg"]
                if sent_sum
                else ["💬 No comments — sentiment skipped"]
            ),
        ],
    )


def _build_outputs(
    meta, video_id, transcript, comments_df,
    misinfo, keywords, sentiments, sent_sum, pos_kw, neg_kw, status_log,
):
    status_html = (
        '<p style="color:#2e9e6b;font-family:DM Mono,monospace;font-size:0.82rem;padding:6px 0">'
        "✅ Analysis complete</p>"
    )

    log_html = "".join(f'<p class="vv-log-line">{line}</p>' for line in status_log)

    thumb_html = (
        f'<img src="{meta["thumbnail_url"]}" '
        'style="width:100%;border-radius:8px;margin-bottom:8px;display:block">'
        if meta.get("thumbnail_url") else ""
    )
    tag_html = "".join(f'<span class="vv-tag">#{t}</span>' for t in meta.get("tags", [])[:20])
    desc_short = meta.get("description", "")[:1200]
    word_count = len(transcript.split()) if transcript else 0
    transcript_short = (transcript[:2500] + "…" if len(transcript) > 2500 else transcript) if transcript else "(not available)"

    left_html = f"""
    {thumb_html}
    <a href="https://www.youtube.com/watch?v={video_id}" target="_blank"
       style="display:block;text-align:center;font-family:'DM Mono',monospace;
              font-size:0.75rem;color:#7b7b7b;text-decoration:none;margin:4px 0 10px">
      ▶ Open on YouTube
    </a>
    <div class="vv-card">
      <p class="vv-section-title">Video</p>
      <p style="font-family:'Syne',sans-serif;font-size:1.05rem;font-weight:700;margin:0 0 6px;color:#4A4A4A !important">
        {meta['title']}
      </p>
      <p style="font-size:0.82rem;color:#7b7b7b !important;margin:0">
        by <b style="color:#384959 !important">{meta['channel_title']}</b>
        &nbsp;·&nbsp;
        <span style="color:#7b7b7b !important">{meta['published_at']}</span>
      </p>
    </div>

    <p class="vv-section-title">Metrics</p>
    <div class="vv-metric-grid">
      <div class="vv-metric-card">
        <span class="vv-metric-value">👁 {meta['view_count']:,}</span>
        <span class="vv-metric-label">Views</span>
      </div>
      <div class="vv-metric-card">
        <span class="vv-metric-value">👍 {meta['like_count']:,}</span>
        <span class="vv-metric-label">Likes</span>
      </div>
      <div class="vv-metric-card">
        <span class="vv-metric-value">💬 {meta['comment_count']:,}</span>
        <span class="vv-metric-label">Comments</span>
      </div>
      <div class="vv-metric-card">
        <span class="vv-metric-value">⏱ {meta['duration']}</span>
        <span class="vv-metric-label">Duration</span>
      </div>
    </div>

    <p class="vv-section-title" style="margin-top:0.8rem">Tags</p>
    {tag_html or '<span style="color:#7b7b7b;font-size:0.78rem">(none)</span>'}

    <details style="margin-top:1rem">
      <summary>📄 Description</summary>
      <p style="font-size:0.78rem;color:#7b7b7b;line-height:1.65;white-space:pre-wrap;margin-top:6px">{desc_short}</p>
    </details>
    <details style="margin-top:0.5rem">
      <summary>📝 Transcript ({word_count} words)</summary>
      <p style="font-size:0.75rem;color:#7b7b7b;line-height:1.65;margin-top:6px">{transcript_short}</p>
    </details>
    """

    score = misinfo["score"]
    if score < 0.35:
        badge_html = '<span class="vv-badge-green">✅ Appears Credible</span>'
    elif score < 0.65:
        badge_html = '<span class="vv-badge-amber">⚠️ Uncertain / Mixed Signals</span>'
    else:
        badge_html = '<span class="vv-badge-red">🚨 Likely Misinformation</span>'

    reasoning_html = (
        f'<div class="vv-reasoning">🧠 <b>Reasoning:</b> {misinfo["reasoning"]}</div>'
    )

    mod_analysis = misinfo.get("modality_analysis", {})

    try:
        fig_mod_dist = modality_misinfo_distribution(mod_analysis)
    except Exception:
        fig_mod_dist = _empty_plotly("Modality distribution unavailable")

    try:
        fig_trust = trust_score_by_modality(mod_analysis)
    except Exception:
        fig_trust = _empty_plotly("Trust score unavailable")

    try:
        fig_uncert = uncertainty_analysis(mod_analysis)
    except Exception:
        fig_uncert = _empty_plotly("Uncertainty analysis unavailable")

    try:
        fig_donut = sentiment_donut(sent_sum) if sent_sum else _empty_plotly("No comments analysed")
    except Exception:
        fig_donut = _empty_plotly()

    try:
        fig_timeline = (
            sentiment_timeline(comments_df, sentiments)
            if (sent_sum and not comments_df.empty)
            else _empty_plotly("No comments analysed")
        )
    except Exception:
        fig_timeline = _empty_plotly()

    try:
        fig_kw = keyword_bar(keywords, title="Top Video Keywords", color="#269ccc")
    except Exception:
        fig_kw = _empty_plotly()

    try:
        fig_kw_comp = (
            keyword_comparison(pos_kw, neg_kw)
            if (pos_kw or neg_kw)
            else _empty_plotly("No keyword comparison — no comments")
        )
    except Exception:
        fig_kw_comp = _empty_plotly()

    if sent_sum:
        stat_pos = (
            f'<div class="vv-card" style="text-align:center">'
            f'<p class="vv-stat-big-green">{sent_sum["pos_pct"]}%</p>'
            f'<p style="color:#7b7b7b !important;font-size:0.75rem;margin:4px 0 0;font-family:DM Mono,monospace">Positively Engagement</p></div>'
        )
        stat_neg = (
            f'<div class="vv-card" style="text-align:center">'
            f'<p class="vv-stat-big-red">{sent_sum["neg_pct"]}%</p>'
            f'<p style="color:#7b7b7b !important;font-size:0.75rem;margin:4px 0 0;font-family:DM Mono,monospace">Negatively Engagement</p></div>'
        )
        stat_neu = (
            f'<div class="vv-card" style="text-align:center">'
            f'<p class="vv-stat-big-dim">{sent_sum["neu_pct"]}%</p>'
            f'<p style="color:#7b7b7b !important;font-size:0.75rem;margin:4px 0 0;font-family:DM Mono,monospace">Neutral</p></div>'
        )
    else:
        placeholder = (
            '<div class="vv-card" style="text-align:center;color:#7b7b7b !important;'
            'font-family:DM Mono,monospace;font-size:0.8rem;padding:1.2rem">N/A</div>'
        )
        stat_pos = stat_neg = stat_neu = placeholder

    show_cols = ["author", "text", "likes", "published_at"]
    df_all = df_pos = df_neg = df_top = pd.DataFrame()

    if not comments_df.empty:
        display_df = comments_df.copy()
        if sentiments:
            display_df["sentiment"] = [s["label"] for s in sentiments]
            display_df["compound"]  = [round(s.get("compound", 0), 3) for s in sentiments]
            cols = show_cols + ["sentiment", "compound"]
        else:
            cols = show_cols

        if "sentiment" in display_df.columns:
            df_pos = display_df[display_df["sentiment"] == "POSITIVE"][cols].head(50).reset_index(drop=True)
            df_neg = display_df[display_df["sentiment"] == "NEGATIVE"][cols].head(50).reset_index(drop=True)
            display_df["sentiment"] = display_df["sentiment"].replace({
                "POSITIVE": "Positively Engagement",
                "NEGATIVE": "Negatively Engagement",
                "NEUTRAL":  "Neutral",
            })
            df_pos["sentiment"] = "Positively Engagement"
            df_neg["sentiment"] = "Negatively Engagement"

        df_all = display_df[cols].head(100).reset_index(drop=True)
        df_top = (
            display_df.sort_values("likes", ascending=False)
            .head(20)[cols]
            .reset_index(drop=True)
        )

    return (
        status_html,
        log_html,
        left_html,
        badge_html,
        reasoning_html,
        fig_mod_dist,
        fig_trust,
        fig_uncert,
        fig_donut,
        fig_timeline,
        fig_kw,
        fig_kw_comp,
        stat_pos,
        stat_neg,
        stat_neu,
        df_all,
        df_pos,
        df_neg,
        df_top,
    )


def do_search(keyword: str):
    api_key = os.environ.get("YT_API_KEY", "").strip()
    if not api_key:
        return (
            "<p style='color:#c0392b;font-family:DM Mono,monospace'>⚠️ YT_API_KEY secret not set.</p>",
            gr.update(choices=[], value=None, visible=False),
        )
    if not (keyword or "").strip():
        return (
            "<p style='color:#d4841a;font-family:DM Mono,monospace'>Enter a keyword to search.</p>",
            gr.update(choices=[], value=None, visible=False),
        )

    results = search_videos_by_title(keyword.strip(), api_key, max_results=5)
    if not results:
        return (
            "<p style='color:#d4841a;font-family:DM Mono,monospace'>No results found.</p>",
            gr.update(choices=[], value=None, visible=False),
        )

    html = ""
    choices = []
    for r in results:
        vid = r["video_id"]
        url = f"https://www.youtube.com/watch?v={vid}"
        choices.append((r["title"][:70], url))
        html += (
            f'<div class="vv-card" style="display:flex;align-items:center;gap:12px;margin-bottom:6px">'
            f'<img src="{r["thumbnail_url"]}" '
            f'     style="width:72px;height:54px;object-fit:cover;border-radius:6px;flex-shrink:0">'
            f'<div>'
            f'<p style="margin:0;font-size:0.85rem;font-weight:600;color:#4A4A4A !important">{r["title"][:80]}</p>'
            f'<p style="margin:0;font-size:0.75rem;color:#7b7b7b !important">'
            f'{r["channel_title"]} · {r["published_at"]} · '
            f'<code style="color:#269ccc">v={vid}</code></p>'
            f'</div></div>'
        )
    return html, gr.update(choices=choices, value=None, visible=True)


def pick_and_analyze(selected_url, sentiment_method, max_comments):
    if not selected_url:
        yield _blank_outputs("Select a video from the search results above.")
        return
    yield from run_pipeline(selected_url, sentiment_method, max_comments)


with gr.Blocks(title="Misinformation Detection & Public Engagement") as demo:

    gr.HTML("""
    <div style="padding:1.5rem 0 0.8rem;border-bottom:1px solid #BDDDFC;margin-bottom:1.2rem">
      <h1 class="vv-hero">🔬 Misinformation Detection & Public Engagement</h1>
    </div>
    """)

    with gr.Accordion("⚙️ Settings", open=False):
        gr.HTML("""
        <div style="background:#f5f7fa;border:1px solid #BDDDFC;border-radius:8px;
                    padding:0.7rem 1rem;margin-bottom:0.8rem;font-family:'DM Mono',monospace;
                    font-size:0.78rem;color:#7b7b7b">
          🔑 YouTube API key is read from the <code style="color:#269ccc">YT_API_KEY</code>
          Space secret — it is never exposed in the UI.
        </div>
        """)
        with gr.Row():
            sentiment_selector = gr.Dropdown(
                choices=[
                    ("VADER — fast, CPU-only (~5 000 comments/sec)", "vader"),
                    ("DistilBERT — accurate, downloads ~500 MB on first run", "hf"),
                ],
                value="vader",
                label="Sentiment Engine",
                scale=3,
            )
            max_comments_slider = gr.Slider(
                minimum=10, maximum=500, value=150, step=10,
                label="Max comments to fetch",
                scale=3,
                info="YouTube API quota: ~1 unit per comment request",
            )

    with gr.Tabs():

        with gr.TabItem("🔗 YouTube URL"):
            with gr.Row():
                url_input   = gr.Textbox(
                    placeholder="https://www.youtube.com/watch?v=...  or  youtu.be/...  or raw 11-char ID",
                    label="YouTube URL / Video ID",
                    scale=5,
                )
                analyze_btn = gr.Button("🔍 Analyze", variant="primary", scale=1, min_width=130)

        with gr.TabItem("📁 Upload / Search by Title"):
            gr.HTML("""
            <div class="vv-card" style="margin-bottom:8px">
              <p class="vv-section-title">Search by video title or keyword</p>
              <p style="font-size:0.82rem;color:#7b7b7b;line-height:1.6;margin:0">
                Upload your file, then type the title or keyword below to locate the matching YouTube entry.
              </p>
            </div>
            """)
            upload_file = gr.File(
                label="Drop a video file (mp4, mov, avi, mkv, webm)",
                file_types=[".mp4", ".mov", ".avi", ".mkv", ".webm"],
            )
            with gr.Row():
                kw_input   = gr.Textbox(placeholder="Enter video title or keyword…", label="Search keyword", scale=4)
                search_btn = gr.Button("🔎 Find on YouTube", scale=1)
            search_results_html = gr.HTML()
            search_radio        = gr.Radio(label="Select a video to analyze", choices=[], visible=False)

    status_box = gr.HTML(
        '<p style="color:#7b7b7b;font-family:DM Mono,monospace;font-size:0.8rem;padding:6px 0">'
        "Enter a URL above and click Analyze.</p>"
    )

    with gr.Row(equal_height=False):

        with gr.Column(scale=2):
            left_panel_html = gr.HTML(
                "<div style='padding:3rem;text-align:center;color:#7b7b7b;"
                "font-family:DM Mono,monospace'>No data yet.</div>"
            )

        with gr.Column(scale=3):

            gr.HTML('<p class="vv-section-title" style="margin-top:0">🔬 Misinformation Analysis</p>')
            misinfo_badge_html = gr.HTML()

            with gr.Row():
                modality_dist_plot = gr.Plot(label="", show_label=False)

            with gr.Row():
                trust_score_plot = gr.Plot(label="", show_label=False)
                uncertainty_plot = gr.Plot(label="", show_label=False)

            misinfo_reasoning_html = gr.HTML()

            gr.HTML('<hr class="vv-hr">')

            gr.HTML('<p class="vv-section-title">💬 Comment Sentiment</p>')
            with gr.Row():
                stat_pos_html = gr.HTML()
                stat_neg_html = gr.HTML()
                stat_neu_html = gr.HTML()
            with gr.Row():
                donut_plot    = gr.Plot(label="", show_label=False)
                timeline_plot = gr.Plot(label="", show_label=False)
            with gr.Row():
                kw_bar_plot   = gr.Plot(label="", show_label=False)
                kw_comp_plot  = gr.Plot(label="", show_label=False)

            gr.HTML('<hr class="vv-hr">')

            gr.HTML('<p class="vv-section-title">📊 Comments Deep-Dive</p>')
            with gr.Tabs():
                with gr.TabItem("All"):
                    df_all_out = gr.Dataframe(
                        headers=["author", "text", "likes", "published_at", "sentiment", "compound"],
                        datatype=["str", "str", "number", "str", "str", "number"],
                        wrap=True,
                        max_height=320,
                    )
                with gr.TabItem("Positively Engagement"):
                    df_pos_out = gr.Dataframe(wrap=True, max_height=320)
                with gr.TabItem("Negatively Engagement"):
                    df_neg_out = gr.Dataframe(wrap=True, max_height=320)
                with gr.TabItem("Most Liked"):
                    df_top_out = gr.Dataframe(wrap=True, max_height=320)

    with gr.Accordion("📜 Activity Log", open=False):
        log_html_out = gr.HTML('<p class="vv-log-line">—</p>')

    gr.HTML("""
    <div style="margin-top:2rem;padding-top:1rem;border-top:1px solid #BDDDFC;
                text-align:center;font-family:'DM Mono',monospace;font-size:0.72rem;color:#7b7b7b">
      4-stream SeTa-Attention BiGRU · CCM / DMTE / Uncertainty Fusion ·
      Test ROC-AUC 0.967
    </div>
    """)

    ALL_OUTPUTS = [
        status_box,
        log_html_out,
        left_panel_html,
        misinfo_badge_html,
        misinfo_reasoning_html,
        modality_dist_plot,
        trust_score_plot,
        uncertainty_plot,
        donut_plot,
        timeline_plot,
        kw_bar_plot,
        kw_comp_plot,
        stat_pos_html,
        stat_neg_html,
        stat_neu_html,
        df_all_out,
        df_pos_out,
        df_neg_out,
        df_top_out,
    ]

    _pipeline_inputs = [url_input, sentiment_selector, max_comments_slider]

    analyze_btn.click(fn=run_pipeline, inputs=_pipeline_inputs, outputs=ALL_OUTPUTS)
    url_input.submit(fn=run_pipeline,  inputs=_pipeline_inputs, outputs=ALL_OUTPUTS)

    search_btn.click(
        fn=do_search,
        inputs=[kw_input],
        outputs=[search_results_html, search_radio],
    )
    search_radio.change(
        fn=pick_and_analyze,
        inputs=[search_radio, sentiment_selector, max_comments_slider],
        outputs=ALL_OUTPUTS,
    )


if __name__ == "__main__":
    demo.launch(
        css=CSS,
        theme=gr.themes.Base(
            primary_hue=gr.themes.colors.blue,
            neutral_hue=gr.themes.colors.slate,
            font=[gr.themes.GoogleFont("IBM Plex Sans"), "sans-serif"],
        ),
    )