Spaces:
Sleeping
Sleeping
| import os | |
| import pandas as pd | |
| import gradio as gr | |
| from fetcher import ( | |
| extract_video_id, | |
| fetch_video_metadata, | |
| fetch_transcript, | |
| fetch_comments, | |
| search_videos_by_title, | |
| ) | |
| from analyzer import ( | |
| detect_misinformation, | |
| analyze_sentiment_batch, | |
| sentiment_summary, | |
| extract_keywords, | |
| sentiment_weighted_keywords, | |
| ) | |
| from charts import ( | |
| sentiment_donut, | |
| keyword_bar, | |
| sentiment_timeline, | |
| keyword_comparison, | |
| modality_misinfo_distribution, | |
| trust_score_by_modality, | |
| uncertainty_analysis, | |
| ) | |
| CSS = """ | |
| @import url('https://fonts.googleapis.com/css2?family=DM+Mono:wght@400;500&family=Syne:wght@400;600;700;800&family=IBM+Plex+Sans:wght@300;400;500&display=swap'); | |
| :root { | |
| --bg: #FFFFE3; | |
| --card: #FFFFFF; | |
| --border: #BDDDFC; | |
| --text: #4A4A4A; | |
| --dim: #7b7b7b; | |
| --primary: #269ccc; | |
| --ink-dark: #384959; | |
| --stormy-sky: #88BDF2; | |
| --stormy-slate:#6A89A7; | |
| --ink-grey: #CBCBCB; | |
| --green: #2e9e6b; | |
| --red: #c0392b; | |
| --amber: #d4841a; | |
| } | |
| html, body { | |
| background: var(--bg) !important; | |
| color: var(--text) !important; | |
| margin: 0; padding: 0; | |
| } | |
| .gradio-container, #root, #app, main, .main, .wrap, .svelte-1kyws56 { | |
| background: var(--bg) !important; | |
| max-width: 100% !important; | |
| width: 100% !important; | |
| margin: 0 auto !important; | |
| padding: 0 1.5rem !important; | |
| box-sizing: border-box !important; | |
| } | |
| .block, .wrap, .panel, .padded, div.form, | |
| div[class*="block"], div[class*="wrap"], | |
| div[class*="panel"], div[class*="gap"], | |
| .gap { background: transparent !important; border: none !important; } | |
| .gr-group, .gr-box, .vv-section { | |
| background: var(--card) !important; | |
| border: 1px solid var(--border) !important; | |
| border-radius: 12px !important; | |
| padding: 1rem 1.25rem !important; | |
| } | |
| .tab-nav button { | |
| background: transparent !important; | |
| border: none !important; | |
| color: var(--dim) !important; | |
| font-family: 'DM Mono', monospace !important; | |
| font-size: 0.82rem !important; | |
| letter-spacing: 0.05em !important; | |
| border-bottom: 2px solid transparent !important; | |
| padding: 0.5rem 1.2rem !important; | |
| transition: color 0.18s; | |
| } | |
| .tab-nav button.selected { | |
| color: var(--primary) !important; | |
| border-bottom-color: var(--primary) !important; | |
| } | |
| .tab-nav { border-bottom: 1px solid var(--border) !important; } | |
| input[type="text"], input[type="password"], input[type="number"], textarea, select { | |
| background: #f5f7fa !important; | |
| border: 1px solid var(--border) !important; | |
| color: var(--text) !important; | |
| border-radius: 8px !important; | |
| font-family: 'DM Mono', monospace !important; | |
| font-size: 0.88rem !important; | |
| } | |
| input:focus, textarea:focus, select:focus { | |
| border-color: var(--primary) !important; | |
| box-shadow: 0 0 0 2px rgba(38,156,204,0.18) !important; | |
| outline: none !important; | |
| } | |
| label, .gr-label, span.svelte-1b6s6s { | |
| color: var(--dim) !important; | |
| font-family: 'DM Mono', monospace !important; | |
| font-size: 0.75rem !important; | |
| letter-spacing: 0.08em !important; | |
| text-transform: uppercase; | |
| } | |
| input[type="range"] { accent-color: var(--primary); } | |
| button.primary, button[variant="primary"], .primary { | |
| background: linear-gradient(135deg, var(--primary), #1a7aaa) !important; | |
| border: none !important; | |
| color: #ffffff !important; | |
| font-weight: 700 !important; | |
| font-family: 'DM Mono', monospace !important; | |
| border-radius: 8px !important; | |
| letter-spacing: 0.06em !important; | |
| } | |
| button.secondary { | |
| background: rgba(38,156,204,0.08) !important; | |
| border: 1px solid var(--primary) !important; | |
| color: var(--primary) !important; | |
| border-radius: 8px !important; | |
| font-family: 'DM Mono', monospace !important; | |
| } | |
| button:hover { opacity: 0.88; transform: translateY(-1px); transition: all 0.15s; } | |
| .dropdown, ul[role="listbox"], li[role="option"] { | |
| background: #f5f7fa !important; | |
| border-color: var(--border) !important; | |
| color: var(--text) !important; | |
| } | |
| li[role="option"]:hover { background: #e8f4fb !important; } | |
| .gr-dataframe, table { background: var(--card) !important; } | |
| .gr-dataframe th { | |
| background: #EEF6FD !important; | |
| color: var(--primary) !important; | |
| font-family: 'DM Mono', monospace !important; | |
| font-size: 0.72rem !important; | |
| padding: 6px 10px; | |
| border-bottom: 1px solid var(--border); | |
| text-transform: uppercase; | |
| letter-spacing: 0.08em; | |
| } | |
| .gr-dataframe td { | |
| color: var(--text) !important; | |
| font-size: 0.77rem !important; | |
| padding: 5px 10px; | |
| border-bottom: 1px solid var(--border); | |
| } | |
| .gr-dataframe tr:hover td { background: rgba(38,156,204,0.05) !important; } | |
| details > summary { | |
| color: var(--dim) !important; | |
| font-family: 'DM Mono', monospace !important; | |
| font-size: 0.82rem !important; | |
| cursor: pointer; | |
| list-style: none; | |
| } | |
| details[open] > summary { color: var(--primary) !important; } | |
| .js-plotly-plot, .plotly { background: transparent !important; } | |
| .modebar { display: none !important; } | |
| ::-webkit-scrollbar { width: 6px; height: 6px; } | |
| ::-webkit-scrollbar-track { background: var(--bg); } | |
| ::-webkit-scrollbar-thumb { background: var(--border); border-radius: 3px; } | |
| ::-webkit-scrollbar-thumb:hover { background: var(--dim); } | |
| .vv-hero { | |
| font-family: 'Syne', sans-serif !important; | |
| font-size: 1.65rem !important; | |
| font-weight: 800 !important; | |
| background: linear-gradient(135deg, #269ccc, #384959); | |
| -webkit-background-clip: text; | |
| -webkit-text-fill-color: transparent; | |
| background-clip: text; | |
| letter-spacing: -0.02em; | |
| line-height: 1.2; | |
| } | |
| .vv-section-title { | |
| font-family: 'Syne', sans-serif !important; | |
| font-size: 0.68rem !important; | |
| font-weight: 700 !important; | |
| letter-spacing: 0.18em !important; | |
| text-transform: uppercase !important; | |
| color: #384959 !important; | |
| margin-bottom: 0.5rem !important; | |
| margin-top: 0 !important; | |
| } | |
| .vv-card { | |
| background: #FFFFFF !important; | |
| border: 1px solid #BDDDFC !important; | |
| border-radius: 12px !important; | |
| padding: 1.1rem 1.3rem !important; | |
| margin-bottom: 0.7rem !important; | |
| } | |
| .vv-metric-grid { | |
| display: grid !important; | |
| grid-template-columns: repeat(4, 1fr) !important; | |
| gap: 0.55rem !important; | |
| margin: 0.4rem 0 1rem !important; | |
| } | |
| .vv-metric-card { | |
| background: #FFFFFF !important; | |
| border: 1px solid #BDDDFC !important; | |
| border-radius: 12px !important; | |
| padding: 0.8rem 0.7rem !important; | |
| text-align: center !important; | |
| transition: transform 0.18s ease, box-shadow 0.18s ease !important; | |
| cursor: default !important; | |
| } | |
| .vv-metric-card:hover { | |
| transform: translateY(-4px) !important; | |
| box-shadow: 0 8px 24px rgba(38,156,204,0.18) !important; | |
| } | |
| .vv-metric-value { | |
| display: block !important; | |
| font-family: 'DM Mono', monospace !important; | |
| font-size: 1.15rem !important; | |
| font-weight: 700 !important; | |
| color: #269ccc !important; | |
| margin: 0 !important; | |
| line-height: 1.2 !important; | |
| } | |
| .vv-metric-label { | |
| display: block !important; | |
| font-family: 'DM Mono', monospace !important; | |
| font-size: 0.62rem !important; | |
| letter-spacing: 0.1em !important; | |
| text-transform: uppercase !important; | |
| color: #7b7b7b !important; | |
| margin: 4px 0 0 !important; | |
| } | |
| .vv-stat { | |
| display: inline-block !important; | |
| background: #EEF6FD !important; | |
| border: 1px solid #BDDDFC !important; | |
| border-radius: 6px !important; | |
| padding: 0.25rem 0.75rem !important; | |
| font-family: 'DM Mono', monospace !important; | |
| font-size: 0.77rem !important; | |
| color: #269ccc !important; | |
| margin: 0.15rem 0.2rem !important; | |
| } | |
| .vv-badge-green { | |
| display: inline-block !important; | |
| background: rgba(46,158,107,0.10) !important; | |
| border: 1px solid #2e9e6b !important; | |
| color: #2e9e6b !important; | |
| border-radius: 20px !important; | |
| padding: 0.32rem 1.1rem !important; | |
| font-size: 0.85rem !important; | |
| font-family: 'DM Mono', monospace !important; | |
| font-weight: 600 !important; | |
| } | |
| .vv-badge-red { | |
| display: inline-block !important; | |
| background: rgba(192,57,43,0.10) !important; | |
| border: 1px solid #c0392b !important; | |
| color: #c0392b !important; | |
| border-radius: 20px !important; | |
| padding: 0.32rem 1.1rem !important; | |
| font-size: 0.85rem !important; | |
| font-family: 'DM Mono', monospace !important; | |
| font-weight: 600 !important; | |
| } | |
| .vv-badge-amber { | |
| display: inline-block !important; | |
| background: rgba(212,132,26,0.10) !important; | |
| border: 1px solid #d4841a !important; | |
| color: #d4841a !important; | |
| border-radius: 20px !important; | |
| padding: 0.32rem 1.1rem !important; | |
| font-size: 0.85rem !important; | |
| font-family: 'DM Mono', monospace !important; | |
| font-weight: 600 !important; | |
| } | |
| .vv-reasoning { | |
| background: #f7f9fb !important; | |
| border-left: 3px solid #d4841a !important; | |
| padding: 0.8rem 1rem !important; | |
| border-radius: 0 8px 8px 0 !important; | |
| font-size: 0.83rem !important; | |
| color: #4A4A4A !important; | |
| line-height: 1.65 !important; | |
| font-family: 'IBM Plex Sans', sans-serif !important; | |
| margin-top: 8px !important; | |
| } | |
| .vv-tag { | |
| display: inline-block !important; | |
| background: #BDDDFC !important; | |
| border: none !important; | |
| border-radius: 20px !important; | |
| padding: 3px 10px !important; | |
| font-family: 'DM Mono', monospace !important; | |
| font-size: 0.7rem !important; | |
| color: #384959 !important; | |
| margin: 2px !important; | |
| font-weight: 500 !important; | |
| } | |
| .vv-stat-big-green { | |
| font-family: 'DM Mono', monospace !important; | |
| font-size: 1.6rem !important; | |
| font-weight: 700 !important; | |
| color: #2e9e6b !important; | |
| margin: 0 !important; | |
| } | |
| .vv-stat-big-red { | |
| font-family: 'DM Mono', monospace !important; | |
| font-size: 1.6rem !important; | |
| font-weight: 700 !important; | |
| color: #c0392b !important; | |
| margin: 0 !important; | |
| } | |
| .vv-stat-big-dim { | |
| font-family: 'DM Mono', monospace !important; | |
| font-size: 1.6rem !important; | |
| font-weight: 700 !important; | |
| color: #7b7b7b !important; | |
| margin: 0 !important; | |
| } | |
| .vv-log-line { | |
| font-size: 0.72rem !important; | |
| color: #7b7b7b !important; | |
| font-family: 'DM Mono', monospace !important; | |
| margin: 2px 0 !important; | |
| } | |
| .vv-hr { border: none; border-top: 1px solid #BDDDFC; margin: 1.1rem 0; } | |
| """ | |
| def _empty_plotly(msg: str = "Run analysis to see data", h: int = 230): | |
| import plotly.graph_objects as go | |
| fig = go.Figure() | |
| fig.update_layout( | |
| paper_bgcolor="rgba(0,0,0,0)", plot_bgcolor="rgba(189,221,252,0.13)", | |
| font=dict(color="#7b7b7b"), margin=dict(l=10, r=10, t=10, b=10), height=h, | |
| ) | |
| fig.add_annotation( | |
| text=msg, x=0.5, y=0.5, xref="paper", yref="paper", | |
| showarrow=False, font=dict(size=12, color="#7b7b7b"), | |
| ) | |
| return fig | |
| def _blank_outputs(status_msg: str): | |
| ep = _empty_plotly() | |
| return ( | |
| f'<p style="color:#c0392b;font-family:DM Mono,monospace;padding:8px">{status_msg}</p>', | |
| "<p class='vv-log-line'>—</p>", | |
| "<div style='padding:3rem;text-align:center;color:#7b7b7b;font-family:DM Mono,monospace'>No data yet.</div>", | |
| "", "", | |
| ep, ep, ep, | |
| ep, ep, ep, ep, | |
| "", "", "", | |
| pd.DataFrame(), pd.DataFrame(), pd.DataFrame(), pd.DataFrame(), | |
| ) | |
| def run_pipeline( | |
| url_or_id: str, | |
| sentiment_method: str, | |
| max_comments: int, | |
| progress=gr.Progress(track_tqdm=False), | |
| ): | |
| api_key = os.environ.get("YT_API_KEY", "").strip() | |
| if not (url_or_id or "").strip(): | |
| yield _blank_outputs("⚠️ Please enter a YouTube URL or video ID.") | |
| return | |
| video_id = extract_video_id(url_or_id.strip()) | |
| if not video_id: | |
| yield _blank_outputs("❌ Could not parse a valid YouTube video ID.") | |
| return | |
| if not api_key: | |
| yield _blank_outputs( | |
| "⚠️ YouTube API key not found. " | |
| "Set the <code>YT_API_KEY</code> environment variable / Space secret." | |
| ) | |
| return | |
| progress(0.05, desc="Fetching video metadata…") | |
| meta, err = fetch_video_metadata(video_id, api_key) | |
| if err: | |
| yield _blank_outputs(f"❌ {err}") | |
| return | |
| progress(0.20, desc="Fetching transcript…") | |
| transcript, t_status = fetch_transcript(video_id) | |
| progress(0.35, desc=f"Fetching up to {max_comments} comments…") | |
| comments_df, c_status = fetch_comments(video_id, api_key, max_comments=int(max_comments)) | |
| progress(0.50, desc="Running misinformation detection…") | |
| misinfo = detect_misinformation( | |
| text=f"{meta['title']} {meta['description']}", | |
| tags=meta["tags"], | |
| audio_transcript=transcript, | |
| video_transcript=transcript, | |
| ) | |
| keywords = extract_keywords( | |
| f"{meta['title']} {meta['description']} {transcript}", | |
| meta["tags"], | |
| ) | |
| sentiments, sent_sum, pos_kw, neg_kw = [], {}, [], [] | |
| if not comments_df.empty: | |
| texts = comments_df["text"].fillna("").tolist() | |
| batch = 64 | |
| for i in range(0, len(texts), batch): | |
| chunk = texts[i: i + batch] | |
| sentiments += analyze_sentiment_batch(chunk, method=sentiment_method, batch_size=batch) | |
| frac = 0.60 + 0.30 * min((i + batch) / max(len(texts), 1), 1.0) | |
| progress(frac, desc=f"Sentiment {min(i+batch, len(texts))}/{len(texts)}…") | |
| sent_sum = sentiment_summary(sentiments) | |
| pos_kw, neg_kw = sentiment_weighted_keywords(comments_df, sentiments) | |
| progress(0.97, desc="Building charts…") | |
| yield _build_outputs( | |
| meta=meta, video_id=video_id, transcript=transcript, | |
| comments_df=comments_df, misinfo=misinfo, keywords=keywords, | |
| sentiments=sentiments, sent_sum=sent_sum, | |
| pos_kw=pos_kw, neg_kw=neg_kw, | |
| status_log=[ | |
| f"✅ Metadata: {meta['title'][:55]}", | |
| t_status, | |
| c_status, | |
| f"🔬 Misinfo score: {misinfo['confidence_pct']}%", | |
| *( | |
| [f"💬 Sentiment: {sent_sum['pos_pct']}% pos / {sent_sum['neg_pct']}% neg"] | |
| if sent_sum | |
| else ["💬 No comments — sentiment skipped"] | |
| ), | |
| ], | |
| ) | |
| def _build_outputs( | |
| meta, video_id, transcript, comments_df, | |
| misinfo, keywords, sentiments, sent_sum, pos_kw, neg_kw, status_log, | |
| ): | |
| status_html = ( | |
| '<p style="color:#2e9e6b;font-family:DM Mono,monospace;font-size:0.82rem;padding:6px 0">' | |
| "✅ Analysis complete</p>" | |
| ) | |
| log_html = "".join(f'<p class="vv-log-line">{line}</p>' for line in status_log) | |
| thumb_html = ( | |
| f'<img src="{meta["thumbnail_url"]}" ' | |
| 'style="width:100%;border-radius:8px;margin-bottom:8px;display:block">' | |
| if meta.get("thumbnail_url") else "" | |
| ) | |
| tag_html = "".join(f'<span class="vv-tag">#{t}</span>' for t in meta.get("tags", [])[:20]) | |
| desc_short = meta.get("description", "")[:1200] | |
| word_count = len(transcript.split()) if transcript else 0 | |
| transcript_short = (transcript[:2500] + "…" if len(transcript) > 2500 else transcript) if transcript else "(not available)" | |
| left_html = f""" | |
| {thumb_html} | |
| <a href="https://www.youtube.com/watch?v={video_id}" target="_blank" | |
| style="display:block;text-align:center;font-family:'DM Mono',monospace; | |
| font-size:0.75rem;color:#7b7b7b;text-decoration:none;margin:4px 0 10px"> | |
| ▶ Open on YouTube | |
| </a> | |
| <div class="vv-card"> | |
| <p class="vv-section-title">Video</p> | |
| <p style="font-family:'Syne',sans-serif;font-size:1.05rem;font-weight:700;margin:0 0 6px;color:#4A4A4A !important"> | |
| {meta['title']} | |
| </p> | |
| <p style="font-size:0.82rem;color:#7b7b7b !important;margin:0"> | |
| by <b style="color:#384959 !important">{meta['channel_title']}</b> | |
| · | |
| <span style="color:#7b7b7b !important">{meta['published_at']}</span> | |
| </p> | |
| </div> | |
| <p class="vv-section-title">Metrics</p> | |
| <div class="vv-metric-grid"> | |
| <div class="vv-metric-card"> | |
| <span class="vv-metric-value">👁 {meta['view_count']:,}</span> | |
| <span class="vv-metric-label">Views</span> | |
| </div> | |
| <div class="vv-metric-card"> | |
| <span class="vv-metric-value">👍 {meta['like_count']:,}</span> | |
| <span class="vv-metric-label">Likes</span> | |
| </div> | |
| <div class="vv-metric-card"> | |
| <span class="vv-metric-value">💬 {meta['comment_count']:,}</span> | |
| <span class="vv-metric-label">Comments</span> | |
| </div> | |
| <div class="vv-metric-card"> | |
| <span class="vv-metric-value">⏱ {meta['duration']}</span> | |
| <span class="vv-metric-label">Duration</span> | |
| </div> | |
| </div> | |
| <p class="vv-section-title" style="margin-top:0.8rem">Tags</p> | |
| {tag_html or '<span style="color:#7b7b7b;font-size:0.78rem">(none)</span>'} | |
| <details style="margin-top:1rem"> | |
| <summary>📄 Description</summary> | |
| <p style="font-size:0.78rem;color:#7b7b7b;line-height:1.65;white-space:pre-wrap;margin-top:6px">{desc_short}</p> | |
| </details> | |
| <details style="margin-top:0.5rem"> | |
| <summary>📝 Transcript ({word_count} words)</summary> | |
| <p style="font-size:0.75rem;color:#7b7b7b;line-height:1.65;margin-top:6px">{transcript_short}</p> | |
| </details> | |
| """ | |
| score = misinfo["score"] | |
| if score < 0.35: | |
| badge_html = '<span class="vv-badge-green">✅ Appears Credible</span>' | |
| elif score < 0.65: | |
| badge_html = '<span class="vv-badge-amber">⚠️ Uncertain / Mixed Signals</span>' | |
| else: | |
| badge_html = '<span class="vv-badge-red">🚨 Likely Misinformation</span>' | |
| reasoning_html = ( | |
| f'<div class="vv-reasoning">🧠 <b>Reasoning:</b> {misinfo["reasoning"]}</div>' | |
| ) | |
| mod_analysis = misinfo.get("modality_analysis", {}) | |
| try: | |
| fig_mod_dist = modality_misinfo_distribution(mod_analysis) | |
| except Exception: | |
| fig_mod_dist = _empty_plotly("Modality distribution unavailable") | |
| try: | |
| fig_trust = trust_score_by_modality(mod_analysis) | |
| except Exception: | |
| fig_trust = _empty_plotly("Trust score unavailable") | |
| try: | |
| fig_uncert = uncertainty_analysis(mod_analysis) | |
| except Exception: | |
| fig_uncert = _empty_plotly("Uncertainty analysis unavailable") | |
| try: | |
| fig_donut = sentiment_donut(sent_sum) if sent_sum else _empty_plotly("No comments analysed") | |
| except Exception: | |
| fig_donut = _empty_plotly() | |
| try: | |
| fig_timeline = ( | |
| sentiment_timeline(comments_df, sentiments) | |
| if (sent_sum and not comments_df.empty) | |
| else _empty_plotly("No comments analysed") | |
| ) | |
| except Exception: | |
| fig_timeline = _empty_plotly() | |
| try: | |
| fig_kw = keyword_bar(keywords, title="Top Video Keywords", color="#269ccc") | |
| except Exception: | |
| fig_kw = _empty_plotly() | |
| try: | |
| fig_kw_comp = ( | |
| keyword_comparison(pos_kw, neg_kw) | |
| if (pos_kw or neg_kw) | |
| else _empty_plotly("No keyword comparison — no comments") | |
| ) | |
| except Exception: | |
| fig_kw_comp = _empty_plotly() | |
| if sent_sum: | |
| stat_pos = ( | |
| f'<div class="vv-card" style="text-align:center">' | |
| f'<p class="vv-stat-big-green">{sent_sum["pos_pct"]}%</p>' | |
| f'<p style="color:#7b7b7b !important;font-size:0.75rem;margin:4px 0 0;font-family:DM Mono,monospace">Positively Engagement</p></div>' | |
| ) | |
| stat_neg = ( | |
| f'<div class="vv-card" style="text-align:center">' | |
| f'<p class="vv-stat-big-red">{sent_sum["neg_pct"]}%</p>' | |
| f'<p style="color:#7b7b7b !important;font-size:0.75rem;margin:4px 0 0;font-family:DM Mono,monospace">Negatively Engagement</p></div>' | |
| ) | |
| stat_neu = ( | |
| f'<div class="vv-card" style="text-align:center">' | |
| f'<p class="vv-stat-big-dim">{sent_sum["neu_pct"]}%</p>' | |
| f'<p style="color:#7b7b7b !important;font-size:0.75rem;margin:4px 0 0;font-family:DM Mono,monospace">Neutral</p></div>' | |
| ) | |
| else: | |
| placeholder = ( | |
| '<div class="vv-card" style="text-align:center;color:#7b7b7b !important;' | |
| 'font-family:DM Mono,monospace;font-size:0.8rem;padding:1.2rem">N/A</div>' | |
| ) | |
| stat_pos = stat_neg = stat_neu = placeholder | |
| show_cols = ["author", "text", "likes", "published_at"] | |
| df_all = df_pos = df_neg = df_top = pd.DataFrame() | |
| if not comments_df.empty: | |
| display_df = comments_df.copy() | |
| if sentiments: | |
| display_df["sentiment"] = [s["label"] for s in sentiments] | |
| display_df["compound"] = [round(s.get("compound", 0), 3) for s in sentiments] | |
| cols = show_cols + ["sentiment", "compound"] | |
| else: | |
| cols = show_cols | |
| if "sentiment" in display_df.columns: | |
| df_pos = display_df[display_df["sentiment"] == "POSITIVE"][cols].head(50).reset_index(drop=True) | |
| df_neg = display_df[display_df["sentiment"] == "NEGATIVE"][cols].head(50).reset_index(drop=True) | |
| display_df["sentiment"] = display_df["sentiment"].replace({ | |
| "POSITIVE": "Positively Engagement", | |
| "NEGATIVE": "Negatively Engagement", | |
| "NEUTRAL": "Neutral", | |
| }) | |
| df_pos["sentiment"] = "Positively Engagement" | |
| df_neg["sentiment"] = "Negatively Engagement" | |
| df_all = display_df[cols].head(100).reset_index(drop=True) | |
| df_top = ( | |
| display_df.sort_values("likes", ascending=False) | |
| .head(20)[cols] | |
| .reset_index(drop=True) | |
| ) | |
| return ( | |
| status_html, | |
| log_html, | |
| left_html, | |
| badge_html, | |
| reasoning_html, | |
| fig_mod_dist, | |
| fig_trust, | |
| fig_uncert, | |
| fig_donut, | |
| fig_timeline, | |
| fig_kw, | |
| fig_kw_comp, | |
| stat_pos, | |
| stat_neg, | |
| stat_neu, | |
| df_all, | |
| df_pos, | |
| df_neg, | |
| df_top, | |
| ) | |
| def do_search(keyword: str): | |
| api_key = os.environ.get("YT_API_KEY", "").strip() | |
| if not api_key: | |
| return ( | |
| "<p style='color:#c0392b;font-family:DM Mono,monospace'>⚠️ YT_API_KEY secret not set.</p>", | |
| gr.update(choices=[], value=None, visible=False), | |
| ) | |
| if not (keyword or "").strip(): | |
| return ( | |
| "<p style='color:#d4841a;font-family:DM Mono,monospace'>Enter a keyword to search.</p>", | |
| gr.update(choices=[], value=None, visible=False), | |
| ) | |
| results = search_videos_by_title(keyword.strip(), api_key, max_results=5) | |
| if not results: | |
| return ( | |
| "<p style='color:#d4841a;font-family:DM Mono,monospace'>No results found.</p>", | |
| gr.update(choices=[], value=None, visible=False), | |
| ) | |
| html = "" | |
| choices = [] | |
| for r in results: | |
| vid = r["video_id"] | |
| url = f"https://www.youtube.com/watch?v={vid}" | |
| choices.append((r["title"][:70], url)) | |
| html += ( | |
| f'<div class="vv-card" style="display:flex;align-items:center;gap:12px;margin-bottom:6px">' | |
| f'<img src="{r["thumbnail_url"]}" ' | |
| f' style="width:72px;height:54px;object-fit:cover;border-radius:6px;flex-shrink:0">' | |
| f'<div>' | |
| f'<p style="margin:0;font-size:0.85rem;font-weight:600;color:#4A4A4A !important">{r["title"][:80]}</p>' | |
| f'<p style="margin:0;font-size:0.75rem;color:#7b7b7b !important">' | |
| f'{r["channel_title"]} · {r["published_at"]} · ' | |
| f'<code style="color:#269ccc">v={vid}</code></p>' | |
| f'</div></div>' | |
| ) | |
| return html, gr.update(choices=choices, value=None, visible=True) | |
| def pick_and_analyze(selected_url, sentiment_method, max_comments): | |
| if not selected_url: | |
| yield _blank_outputs("Select a video from the search results above.") | |
| return | |
| yield from run_pipeline(selected_url, sentiment_method, max_comments) | |
| with gr.Blocks(title="Misinformation Detection & Public Engagement") as demo: | |
| gr.HTML(""" | |
| <div style="padding:1.5rem 0 0.8rem;border-bottom:1px solid #BDDDFC;margin-bottom:1.2rem"> | |
| <h1 class="vv-hero">🔬 Misinformation Detection & Public Engagement</h1> | |
| </div> | |
| """) | |
| with gr.Accordion("⚙️ Settings", open=False): | |
| gr.HTML(""" | |
| <div style="background:#f5f7fa;border:1px solid #BDDDFC;border-radius:8px; | |
| padding:0.7rem 1rem;margin-bottom:0.8rem;font-family:'DM Mono',monospace; | |
| font-size:0.78rem;color:#7b7b7b"> | |
| 🔑 YouTube API key is read from the <code style="color:#269ccc">YT_API_KEY</code> | |
| Space secret — it is never exposed in the UI. | |
| </div> | |
| """) | |
| with gr.Row(): | |
| sentiment_selector = gr.Dropdown( | |
| choices=[ | |
| ("VADER — fast, CPU-only (~5 000 comments/sec)", "vader"), | |
| ("DistilBERT — accurate, downloads ~500 MB on first run", "hf"), | |
| ], | |
| value="vader", | |
| label="Sentiment Engine", | |
| scale=3, | |
| ) | |
| max_comments_slider = gr.Slider( | |
| minimum=10, maximum=500, value=150, step=10, | |
| label="Max comments to fetch", | |
| scale=3, | |
| info="YouTube API quota: ~1 unit per comment request", | |
| ) | |
| with gr.Tabs(): | |
| with gr.TabItem("🔗 YouTube URL"): | |
| with gr.Row(): | |
| url_input = gr.Textbox( | |
| placeholder="https://www.youtube.com/watch?v=... or youtu.be/... or raw 11-char ID", | |
| label="YouTube URL / Video ID", | |
| scale=5, | |
| ) | |
| analyze_btn = gr.Button("🔍 Analyze", variant="primary", scale=1, min_width=130) | |
| with gr.TabItem("📁 Upload / Search by Title"): | |
| gr.HTML(""" | |
| <div class="vv-card" style="margin-bottom:8px"> | |
| <p class="vv-section-title">Search by video title or keyword</p> | |
| <p style="font-size:0.82rem;color:#7b7b7b;line-height:1.6;margin:0"> | |
| Upload your file, then type the title or keyword below to locate the matching YouTube entry. | |
| </p> | |
| </div> | |
| """) | |
| upload_file = gr.File( | |
| label="Drop a video file (mp4, mov, avi, mkv, webm)", | |
| file_types=[".mp4", ".mov", ".avi", ".mkv", ".webm"], | |
| ) | |
| with gr.Row(): | |
| kw_input = gr.Textbox(placeholder="Enter video title or keyword…", label="Search keyword", scale=4) | |
| search_btn = gr.Button("🔎 Find on YouTube", scale=1) | |
| search_results_html = gr.HTML() | |
| search_radio = gr.Radio(label="Select a video to analyze", choices=[], visible=False) | |
| status_box = gr.HTML( | |
| '<p style="color:#7b7b7b;font-family:DM Mono,monospace;font-size:0.8rem;padding:6px 0">' | |
| "Enter a URL above and click Analyze.</p>" | |
| ) | |
| with gr.Row(equal_height=False): | |
| with gr.Column(scale=2): | |
| left_panel_html = gr.HTML( | |
| "<div style='padding:3rem;text-align:center;color:#7b7b7b;" | |
| "font-family:DM Mono,monospace'>No data yet.</div>" | |
| ) | |
| with gr.Column(scale=3): | |
| gr.HTML('<p class="vv-section-title" style="margin-top:0">🔬 Misinformation Analysis</p>') | |
| misinfo_badge_html = gr.HTML() | |
| with gr.Row(): | |
| modality_dist_plot = gr.Plot(label="", show_label=False) | |
| with gr.Row(): | |
| trust_score_plot = gr.Plot(label="", show_label=False) | |
| uncertainty_plot = gr.Plot(label="", show_label=False) | |
| misinfo_reasoning_html = gr.HTML() | |
| gr.HTML('<hr class="vv-hr">') | |
| gr.HTML('<p class="vv-section-title">💬 Comment Sentiment</p>') | |
| with gr.Row(): | |
| stat_pos_html = gr.HTML() | |
| stat_neg_html = gr.HTML() | |
| stat_neu_html = gr.HTML() | |
| with gr.Row(): | |
| donut_plot = gr.Plot(label="", show_label=False) | |
| timeline_plot = gr.Plot(label="", show_label=False) | |
| with gr.Row(): | |
| kw_bar_plot = gr.Plot(label="", show_label=False) | |
| kw_comp_plot = gr.Plot(label="", show_label=False) | |
| gr.HTML('<hr class="vv-hr">') | |
| gr.HTML('<p class="vv-section-title">📊 Comments Deep-Dive</p>') | |
| with gr.Tabs(): | |
| with gr.TabItem("All"): | |
| df_all_out = gr.Dataframe( | |
| headers=["author", "text", "likes", "published_at", "sentiment", "compound"], | |
| datatype=["str", "str", "number", "str", "str", "number"], | |
| wrap=True, | |
| max_height=320, | |
| ) | |
| with gr.TabItem("Positively Engagement"): | |
| df_pos_out = gr.Dataframe(wrap=True, max_height=320) | |
| with gr.TabItem("Negatively Engagement"): | |
| df_neg_out = gr.Dataframe(wrap=True, max_height=320) | |
| with gr.TabItem("Most Liked"): | |
| df_top_out = gr.Dataframe(wrap=True, max_height=320) | |
| with gr.Accordion("📜 Activity Log", open=False): | |
| log_html_out = gr.HTML('<p class="vv-log-line">—</p>') | |
| gr.HTML(""" | |
| <div style="margin-top:2rem;padding-top:1rem;border-top:1px solid #BDDDFC; | |
| text-align:center;font-family:'DM Mono',monospace;font-size:0.72rem;color:#7b7b7b"> | |
| 4-stream SeTa-Attention BiGRU · CCM / DMTE / Uncertainty Fusion · | |
| Test ROC-AUC 0.967 | |
| </div> | |
| """) | |
| ALL_OUTPUTS = [ | |
| status_box, | |
| log_html_out, | |
| left_panel_html, | |
| misinfo_badge_html, | |
| misinfo_reasoning_html, | |
| modality_dist_plot, | |
| trust_score_plot, | |
| uncertainty_plot, | |
| donut_plot, | |
| timeline_plot, | |
| kw_bar_plot, | |
| kw_comp_plot, | |
| stat_pos_html, | |
| stat_neg_html, | |
| stat_neu_html, | |
| df_all_out, | |
| df_pos_out, | |
| df_neg_out, | |
| df_top_out, | |
| ] | |
| _pipeline_inputs = [url_input, sentiment_selector, max_comments_slider] | |
| analyze_btn.click(fn=run_pipeline, inputs=_pipeline_inputs, outputs=ALL_OUTPUTS) | |
| url_input.submit(fn=run_pipeline, inputs=_pipeline_inputs, outputs=ALL_OUTPUTS) | |
| search_btn.click( | |
| fn=do_search, | |
| inputs=[kw_input], | |
| outputs=[search_results_html, search_radio], | |
| ) | |
| search_radio.change( | |
| fn=pick_and_analyze, | |
| inputs=[search_radio, sentiment_selector, max_comments_slider], | |
| outputs=ALL_OUTPUTS, | |
| ) | |
| if __name__ == "__main__": | |
| demo.launch( | |
| css=CSS, | |
| theme=gr.themes.Base( | |
| primary_hue=gr.themes.colors.blue, | |
| neutral_hue=gr.themes.colors.slate, | |
| font=[gr.themes.GoogleFont("IBM Plex Sans"), "sans-serif"], | |
| ), | |
| ) |