import os import pandas as pd import gradio as gr from fetcher import ( extract_video_id, fetch_video_metadata, fetch_transcript, fetch_comments, search_videos_by_title, ) from analyzer import ( detect_misinformation, analyze_sentiment_batch, sentiment_summary, extract_keywords, sentiment_weighted_keywords, ) from charts import ( sentiment_donut, keyword_bar, sentiment_timeline, keyword_comparison, modality_misinfo_distribution, trust_score_by_modality, uncertainty_analysis, ) CSS = """ @import url('https://fonts.googleapis.com/css2?family=DM+Mono:wght@400;500&family=Syne:wght@400;600;700;800&family=IBM+Plex+Sans:wght@300;400;500&display=swap'); :root { --bg: #FFFFE3; --card: #FFFFFF; --border: #BDDDFC; --text: #4A4A4A; --dim: #7b7b7b; --primary: #269ccc; --ink-dark: #384959; --stormy-sky: #88BDF2; --stormy-slate:#6A89A7; --ink-grey: #CBCBCB; --green: #2e9e6b; --red: #c0392b; --amber: #d4841a; } html, body { background: var(--bg) !important; color: var(--text) !important; margin: 0; padding: 0; } .gradio-container, #root, #app, main, .main, .wrap, .svelte-1kyws56 { background: var(--bg) !important; max-width: 100% !important; width: 100% !important; margin: 0 auto !important; padding: 0 1.5rem !important; box-sizing: border-box !important; } .block, .wrap, .panel, .padded, div.form, div[class*="block"], div[class*="wrap"], div[class*="panel"], div[class*="gap"], .gap { background: transparent !important; border: none !important; } .gr-group, .gr-box, .vv-section { background: var(--card) !important; border: 1px solid var(--border) !important; border-radius: 12px !important; padding: 1rem 1.25rem !important; } .tab-nav button { background: transparent !important; border: none !important; color: var(--dim) !important; font-family: 'DM Mono', monospace !important; font-size: 0.82rem !important; letter-spacing: 0.05em !important; border-bottom: 2px solid transparent !important; padding: 0.5rem 1.2rem !important; transition: color 0.18s; } .tab-nav button.selected { color: var(--primary) !important; border-bottom-color: var(--primary) !important; } .tab-nav { border-bottom: 1px solid var(--border) !important; } input[type="text"], input[type="password"], input[type="number"], textarea, select { background: #f5f7fa !important; border: 1px solid var(--border) !important; color: var(--text) !important; border-radius: 8px !important; font-family: 'DM Mono', monospace !important; font-size: 0.88rem !important; } input:focus, textarea:focus, select:focus { border-color: var(--primary) !important; box-shadow: 0 0 0 2px rgba(38,156,204,0.18) !important; outline: none !important; } label, .gr-label, span.svelte-1b6s6s { color: var(--dim) !important; font-family: 'DM Mono', monospace !important; font-size: 0.75rem !important; letter-spacing: 0.08em !important; text-transform: uppercase; } input[type="range"] { accent-color: var(--primary); } button.primary, button[variant="primary"], .primary { background: linear-gradient(135deg, var(--primary), #1a7aaa) !important; border: none !important; color: #ffffff !important; font-weight: 700 !important; font-family: 'DM Mono', monospace !important; border-radius: 8px !important; letter-spacing: 0.06em !important; } button.secondary { background: rgba(38,156,204,0.08) !important; border: 1px solid var(--primary) !important; color: var(--primary) !important; border-radius: 8px !important; font-family: 'DM Mono', monospace !important; } button:hover { opacity: 0.88; transform: translateY(-1px); transition: all 0.15s; } .dropdown, ul[role="listbox"], li[role="option"] { background: #f5f7fa !important; border-color: var(--border) !important; color: var(--text) !important; } li[role="option"]:hover { background: #e8f4fb !important; } .gr-dataframe, table { background: var(--card) !important; } .gr-dataframe th { background: #EEF6FD !important; color: var(--primary) !important; font-family: 'DM Mono', monospace !important; font-size: 0.72rem !important; padding: 6px 10px; border-bottom: 1px solid var(--border); text-transform: uppercase; letter-spacing: 0.08em; } .gr-dataframe td { color: var(--text) !important; font-size: 0.77rem !important; padding: 5px 10px; border-bottom: 1px solid var(--border); } .gr-dataframe tr:hover td { background: rgba(38,156,204,0.05) !important; } details > summary { color: var(--dim) !important; font-family: 'DM Mono', monospace !important; font-size: 0.82rem !important; cursor: pointer; list-style: none; } details[open] > summary { color: var(--primary) !important; } .js-plotly-plot, .plotly { background: transparent !important; } .modebar { display: none !important; } ::-webkit-scrollbar { width: 6px; height: 6px; } ::-webkit-scrollbar-track { background: var(--bg); } ::-webkit-scrollbar-thumb { background: var(--border); border-radius: 3px; } ::-webkit-scrollbar-thumb:hover { background: var(--dim); } .vv-hero { font-family: 'Syne', sans-serif !important; font-size: 1.65rem !important; font-weight: 800 !important; background: linear-gradient(135deg, #269ccc, #384959); -webkit-background-clip: text; -webkit-text-fill-color: transparent; background-clip: text; letter-spacing: -0.02em; line-height: 1.2; } .vv-section-title { font-family: 'Syne', sans-serif !important; font-size: 0.68rem !important; font-weight: 700 !important; letter-spacing: 0.18em !important; text-transform: uppercase !important; color: #384959 !important; margin-bottom: 0.5rem !important; margin-top: 0 !important; } .vv-card { background: #FFFFFF !important; border: 1px solid #BDDDFC !important; border-radius: 12px !important; padding: 1.1rem 1.3rem !important; margin-bottom: 0.7rem !important; } .vv-metric-grid { display: grid !important; grid-template-columns: repeat(4, 1fr) !important; gap: 0.55rem !important; margin: 0.4rem 0 1rem !important; } .vv-metric-card { background: #FFFFFF !important; border: 1px solid #BDDDFC !important; border-radius: 12px !important; padding: 0.8rem 0.7rem !important; text-align: center !important; transition: transform 0.18s ease, box-shadow 0.18s ease !important; cursor: default !important; } .vv-metric-card:hover { transform: translateY(-4px) !important; box-shadow: 0 8px 24px rgba(38,156,204,0.18) !important; } .vv-metric-value { display: block !important; font-family: 'DM Mono', monospace !important; font-size: 1.15rem !important; font-weight: 700 !important; color: #269ccc !important; margin: 0 !important; line-height: 1.2 !important; } .vv-metric-label { display: block !important; font-family: 'DM Mono', monospace !important; font-size: 0.62rem !important; letter-spacing: 0.1em !important; text-transform: uppercase !important; color: #7b7b7b !important; margin: 4px 0 0 !important; } .vv-stat { display: inline-block !important; background: #EEF6FD !important; border: 1px solid #BDDDFC !important; border-radius: 6px !important; padding: 0.25rem 0.75rem !important; font-family: 'DM Mono', monospace !important; font-size: 0.77rem !important; color: #269ccc !important; margin: 0.15rem 0.2rem !important; } .vv-badge-green { display: inline-block !important; background: rgba(46,158,107,0.10) !important; border: 1px solid #2e9e6b !important; color: #2e9e6b !important; border-radius: 20px !important; padding: 0.32rem 1.1rem !important; font-size: 0.85rem !important; font-family: 'DM Mono', monospace !important; font-weight: 600 !important; } .vv-badge-red { display: inline-block !important; background: rgba(192,57,43,0.10) !important; border: 1px solid #c0392b !important; color: #c0392b !important; border-radius: 20px !important; padding: 0.32rem 1.1rem !important; font-size: 0.85rem !important; font-family: 'DM Mono', monospace !important; font-weight: 600 !important; } .vv-badge-amber { display: inline-block !important; background: rgba(212,132,26,0.10) !important; border: 1px solid #d4841a !important; color: #d4841a !important; border-radius: 20px !important; padding: 0.32rem 1.1rem !important; font-size: 0.85rem !important; font-family: 'DM Mono', monospace !important; font-weight: 600 !important; } .vv-reasoning { background: #f7f9fb !important; border-left: 3px solid #d4841a !important; padding: 0.8rem 1rem !important; border-radius: 0 8px 8px 0 !important; font-size: 0.83rem !important; color: #4A4A4A !important; line-height: 1.65 !important; font-family: 'IBM Plex Sans', sans-serif !important; margin-top: 8px !important; } .vv-tag { display: inline-block !important; background: #BDDDFC !important; border: none !important; border-radius: 20px !important; padding: 3px 10px !important; font-family: 'DM Mono', monospace !important; font-size: 0.7rem !important; color: #384959 !important; margin: 2px !important; font-weight: 500 !important; } .vv-stat-big-green { font-family: 'DM Mono', monospace !important; font-size: 1.6rem !important; font-weight: 700 !important; color: #2e9e6b !important; margin: 0 !important; } .vv-stat-big-red { font-family: 'DM Mono', monospace !important; font-size: 1.6rem !important; font-weight: 700 !important; color: #c0392b !important; margin: 0 !important; } .vv-stat-big-dim { font-family: 'DM Mono', monospace !important; font-size: 1.6rem !important; font-weight: 700 !important; color: #7b7b7b !important; margin: 0 !important; } .vv-log-line { font-size: 0.72rem !important; color: #7b7b7b !important; font-family: 'DM Mono', monospace !important; margin: 2px 0 !important; } .vv-hr { border: none; border-top: 1px solid #BDDDFC; margin: 1.1rem 0; } """ def _empty_plotly(msg: str = "Run analysis to see data", h: int = 230): import plotly.graph_objects as go fig = go.Figure() fig.update_layout( paper_bgcolor="rgba(0,0,0,0)", plot_bgcolor="rgba(189,221,252,0.13)", font=dict(color="#7b7b7b"), margin=dict(l=10, r=10, t=10, b=10), height=h, ) fig.add_annotation( text=msg, x=0.5, y=0.5, xref="paper", yref="paper", showarrow=False, font=dict(size=12, color="#7b7b7b"), ) return fig def _blank_outputs(status_msg: str): ep = _empty_plotly() return ( f'

{status_msg}

', "

", "
No data yet.
", "", "", ep, ep, ep, ep, ep, ep, ep, "", "", "", pd.DataFrame(), pd.DataFrame(), pd.DataFrame(), pd.DataFrame(), ) def run_pipeline( url_or_id: str, sentiment_method: str, max_comments: int, progress=gr.Progress(track_tqdm=False), ): api_key = os.environ.get("YT_API_KEY", "").strip() if not (url_or_id or "").strip(): yield _blank_outputs("⚠️ Please enter a YouTube URL or video ID.") return video_id = extract_video_id(url_or_id.strip()) if not video_id: yield _blank_outputs("❌ Could not parse a valid YouTube video ID.") return if not api_key: yield _blank_outputs( "⚠️ YouTube API key not found. " "Set the YT_API_KEY environment variable / Space secret." ) return progress(0.05, desc="Fetching video metadata…") meta, err = fetch_video_metadata(video_id, api_key) if err: yield _blank_outputs(f"❌ {err}") return progress(0.20, desc="Fetching transcript…") transcript, t_status = fetch_transcript(video_id) progress(0.35, desc=f"Fetching up to {max_comments} comments…") comments_df, c_status = fetch_comments(video_id, api_key, max_comments=int(max_comments)) progress(0.50, desc="Running misinformation detection…") misinfo = detect_misinformation( text=f"{meta['title']} {meta['description']}", tags=meta["tags"], audio_transcript=transcript, video_transcript=transcript, ) keywords = extract_keywords( f"{meta['title']} {meta['description']} {transcript}", meta["tags"], ) sentiments, sent_sum, pos_kw, neg_kw = [], {}, [], [] if not comments_df.empty: texts = comments_df["text"].fillna("").tolist() batch = 64 for i in range(0, len(texts), batch): chunk = texts[i: i + batch] sentiments += analyze_sentiment_batch(chunk, method=sentiment_method, batch_size=batch) frac = 0.60 + 0.30 * min((i + batch) / max(len(texts), 1), 1.0) progress(frac, desc=f"Sentiment {min(i+batch, len(texts))}/{len(texts)}…") sent_sum = sentiment_summary(sentiments) pos_kw, neg_kw = sentiment_weighted_keywords(comments_df, sentiments) progress(0.97, desc="Building charts…") yield _build_outputs( meta=meta, video_id=video_id, transcript=transcript, comments_df=comments_df, misinfo=misinfo, keywords=keywords, sentiments=sentiments, sent_sum=sent_sum, pos_kw=pos_kw, neg_kw=neg_kw, status_log=[ f"✅ Metadata: {meta['title'][:55]}", t_status, c_status, f"🔬 Misinfo score: {misinfo['confidence_pct']}%", *( [f"💬 Sentiment: {sent_sum['pos_pct']}% pos / {sent_sum['neg_pct']}% neg"] if sent_sum else ["💬 No comments — sentiment skipped"] ), ], ) def _build_outputs( meta, video_id, transcript, comments_df, misinfo, keywords, sentiments, sent_sum, pos_kw, neg_kw, status_log, ): status_html = ( '

' "✅ Analysis complete

" ) log_html = "".join(f'

{line}

' for line in status_log) thumb_html = ( f'' if meta.get("thumbnail_url") else "" ) tag_html = "".join(f'#{t}' for t in meta.get("tags", [])[:20]) desc_short = meta.get("description", "")[:1200] word_count = len(transcript.split()) if transcript else 0 transcript_short = (transcript[:2500] + "…" if len(transcript) > 2500 else transcript) if transcript else "(not available)" left_html = f""" {thumb_html} ▶ Open on YouTube

Video

{meta['title']}

by {meta['channel_title']}  ·  {meta['published_at']}

Metrics

👁 {meta['view_count']:,} Views
👍 {meta['like_count']:,} Likes
💬 {meta['comment_count']:,} Comments
⏱ {meta['duration']} Duration

Tags

{tag_html or '(none)'}
📄 Description

{desc_short}

📝 Transcript ({word_count} words)

{transcript_short}

""" score = misinfo["score"] if score < 0.35: badge_html = '✅ Appears Credible' elif score < 0.65: badge_html = '⚠️ Uncertain / Mixed Signals' else: badge_html = '🚨 Likely Misinformation' reasoning_html = ( f'
🧠 Reasoning: {misinfo["reasoning"]}
' ) mod_analysis = misinfo.get("modality_analysis", {}) try: fig_mod_dist = modality_misinfo_distribution(mod_analysis) except Exception: fig_mod_dist = _empty_plotly("Modality distribution unavailable") try: fig_trust = trust_score_by_modality(mod_analysis) except Exception: fig_trust = _empty_plotly("Trust score unavailable") try: fig_uncert = uncertainty_analysis(mod_analysis) except Exception: fig_uncert = _empty_plotly("Uncertainty analysis unavailable") try: fig_donut = sentiment_donut(sent_sum) if sent_sum else _empty_plotly("No comments analysed") except Exception: fig_donut = _empty_plotly() try: fig_timeline = ( sentiment_timeline(comments_df, sentiments) if (sent_sum and not comments_df.empty) else _empty_plotly("No comments analysed") ) except Exception: fig_timeline = _empty_plotly() try: fig_kw = keyword_bar(keywords, title="Top Video Keywords", color="#269ccc") except Exception: fig_kw = _empty_plotly() try: fig_kw_comp = ( keyword_comparison(pos_kw, neg_kw) if (pos_kw or neg_kw) else _empty_plotly("No keyword comparison — no comments") ) except Exception: fig_kw_comp = _empty_plotly() if sent_sum: stat_pos = ( f'
' f'

{sent_sum["pos_pct"]}%

' f'

Positively Engagement

' ) stat_neg = ( f'
' f'

{sent_sum["neg_pct"]}%

' f'

Negatively Engagement

' ) stat_neu = ( f'
' f'

{sent_sum["neu_pct"]}%

' f'

Neutral

' ) else: placeholder = ( '
N/A
' ) stat_pos = stat_neg = stat_neu = placeholder show_cols = ["author", "text", "likes", "published_at"] df_all = df_pos = df_neg = df_top = pd.DataFrame() if not comments_df.empty: display_df = comments_df.copy() if sentiments: display_df["sentiment"] = [s["label"] for s in sentiments] display_df["compound"] = [round(s.get("compound", 0), 3) for s in sentiments] cols = show_cols + ["sentiment", "compound"] else: cols = show_cols if "sentiment" in display_df.columns: df_pos = display_df[display_df["sentiment"] == "POSITIVE"][cols].head(50).reset_index(drop=True) df_neg = display_df[display_df["sentiment"] == "NEGATIVE"][cols].head(50).reset_index(drop=True) display_df["sentiment"] = display_df["sentiment"].replace({ "POSITIVE": "Positively Engagement", "NEGATIVE": "Negatively Engagement", "NEUTRAL": "Neutral", }) df_pos["sentiment"] = "Positively Engagement" df_neg["sentiment"] = "Negatively Engagement" df_all = display_df[cols].head(100).reset_index(drop=True) df_top = ( display_df.sort_values("likes", ascending=False) .head(20)[cols] .reset_index(drop=True) ) return ( status_html, log_html, left_html, badge_html, reasoning_html, fig_mod_dist, fig_trust, fig_uncert, fig_donut, fig_timeline, fig_kw, fig_kw_comp, stat_pos, stat_neg, stat_neu, df_all, df_pos, df_neg, df_top, ) def do_search(keyword: str): api_key = os.environ.get("YT_API_KEY", "").strip() if not api_key: return ( "

⚠️ YT_API_KEY secret not set.

", gr.update(choices=[], value=None, visible=False), ) if not (keyword or "").strip(): return ( "

Enter a keyword to search.

", gr.update(choices=[], value=None, visible=False), ) results = search_videos_by_title(keyword.strip(), api_key, max_results=5) if not results: return ( "

No results found.

", gr.update(choices=[], value=None, visible=False), ) html = "" choices = [] for r in results: vid = r["video_id"] url = f"https://www.youtube.com/watch?v={vid}" choices.append((r["title"][:70], url)) html += ( f'
' f'' f'
' f'

{r["title"][:80]}

' f'

' f'{r["channel_title"]} · {r["published_at"]} · ' f'v={vid}

' f'
' ) return html, gr.update(choices=choices, value=None, visible=True) def pick_and_analyze(selected_url, sentiment_method, max_comments): if not selected_url: yield _blank_outputs("Select a video from the search results above.") return yield from run_pipeline(selected_url, sentiment_method, max_comments) with gr.Blocks(title="Misinformation Detection & Public Engagement") as demo: gr.HTML("""

🔬 Misinformation Detection & Public Engagement

""") with gr.Accordion("⚙️ Settings", open=False): gr.HTML("""
🔑 YouTube API key is read from the YT_API_KEY Space secret — it is never exposed in the UI.
""") with gr.Row(): sentiment_selector = gr.Dropdown( choices=[ ("VADER — fast, CPU-only (~5 000 comments/sec)", "vader"), ("DistilBERT — accurate, downloads ~500 MB on first run", "hf"), ], value="vader", label="Sentiment Engine", scale=3, ) max_comments_slider = gr.Slider( minimum=10, maximum=500, value=150, step=10, label="Max comments to fetch", scale=3, info="YouTube API quota: ~1 unit per comment request", ) with gr.Tabs(): with gr.TabItem("🔗 YouTube URL"): with gr.Row(): url_input = gr.Textbox( placeholder="https://www.youtube.com/watch?v=... or youtu.be/... or raw 11-char ID", label="YouTube URL / Video ID", scale=5, ) analyze_btn = gr.Button("🔍 Analyze", variant="primary", scale=1, min_width=130) with gr.TabItem("📁 Upload / Search by Title"): gr.HTML("""

Search by video title or keyword

Upload your file, then type the title or keyword below to locate the matching YouTube entry.

""") upload_file = gr.File( label="Drop a video file (mp4, mov, avi, mkv, webm)", file_types=[".mp4", ".mov", ".avi", ".mkv", ".webm"], ) with gr.Row(): kw_input = gr.Textbox(placeholder="Enter video title or keyword…", label="Search keyword", scale=4) search_btn = gr.Button("🔎 Find on YouTube", scale=1) search_results_html = gr.HTML() search_radio = gr.Radio(label="Select a video to analyze", choices=[], visible=False) status_box = gr.HTML( '

' "Enter a URL above and click Analyze.

" ) with gr.Row(equal_height=False): with gr.Column(scale=2): left_panel_html = gr.HTML( "
No data yet.
" ) with gr.Column(scale=3): gr.HTML('

🔬 Misinformation Analysis

') misinfo_badge_html = gr.HTML() with gr.Row(): modality_dist_plot = gr.Plot(label="", show_label=False) with gr.Row(): trust_score_plot = gr.Plot(label="", show_label=False) uncertainty_plot = gr.Plot(label="", show_label=False) misinfo_reasoning_html = gr.HTML() gr.HTML('
') gr.HTML('

💬 Comment Sentiment

') with gr.Row(): stat_pos_html = gr.HTML() stat_neg_html = gr.HTML() stat_neu_html = gr.HTML() with gr.Row(): donut_plot = gr.Plot(label="", show_label=False) timeline_plot = gr.Plot(label="", show_label=False) with gr.Row(): kw_bar_plot = gr.Plot(label="", show_label=False) kw_comp_plot = gr.Plot(label="", show_label=False) gr.HTML('
') gr.HTML('

📊 Comments Deep-Dive

') with gr.Tabs(): with gr.TabItem("All"): df_all_out = gr.Dataframe( headers=["author", "text", "likes", "published_at", "sentiment", "compound"], datatype=["str", "str", "number", "str", "str", "number"], wrap=True, max_height=320, ) with gr.TabItem("Positively Engagement"): df_pos_out = gr.Dataframe(wrap=True, max_height=320) with gr.TabItem("Negatively Engagement"): df_neg_out = gr.Dataframe(wrap=True, max_height=320) with gr.TabItem("Most Liked"): df_top_out = gr.Dataframe(wrap=True, max_height=320) with gr.Accordion("📜 Activity Log", open=False): log_html_out = gr.HTML('

') gr.HTML("""
4-stream SeTa-Attention BiGRU · CCM / DMTE / Uncertainty Fusion · Test ROC-AUC 0.967
""") ALL_OUTPUTS = [ status_box, log_html_out, left_panel_html, misinfo_badge_html, misinfo_reasoning_html, modality_dist_plot, trust_score_plot, uncertainty_plot, donut_plot, timeline_plot, kw_bar_plot, kw_comp_plot, stat_pos_html, stat_neg_html, stat_neu_html, df_all_out, df_pos_out, df_neg_out, df_top_out, ] _pipeline_inputs = [url_input, sentiment_selector, max_comments_slider] analyze_btn.click(fn=run_pipeline, inputs=_pipeline_inputs, outputs=ALL_OUTPUTS) url_input.submit(fn=run_pipeline, inputs=_pipeline_inputs, outputs=ALL_OUTPUTS) search_btn.click( fn=do_search, inputs=[kw_input], outputs=[search_results_html, search_radio], ) search_radio.change( fn=pick_and_analyze, inputs=[search_radio, sentiment_selector, max_comments_slider], outputs=ALL_OUTPUTS, ) if __name__ == "__main__": demo.launch( css=CSS, theme=gr.themes.Base( primary_hue=gr.themes.colors.blue, neutral_hue=gr.themes.colors.slate, font=[gr.themes.GoogleFont("IBM Plex Sans"), "sans-serif"], ), )