import streamlit as st import requests import torch import numpy as np import os import plotly.graph_objects as go import plotly.express as px from transformers import AutoTokenizer, AutoModelForSequenceClassification from datetime import datetime import pandas as pd import re from urllib.parse import urlparse # ───────────────────────────────────────────── # 🔑 API KEY — PASTE YOUR NewsAPI KEY HERE NEWS_API_KEY = os.getenv("NEWS_API_KEY") # Get your free key at https://newsapi.org/register # ───────────────────────────────────────────── # ───────────────────────────────────────────── # Model: Pre-trained fine-tuned BERT for fake news # No training needed — loaded directly from HuggingFace Hub MODEL_NAME = "jy46604790/Fake-News-Bert-Detect" # ───────────────────────────────────────────── # Source credibility database SOURCE_CREDIBILITY = { "bbc.com": 0.97, "bbc.co.uk": 0.97, "reuters.com": 0.96, "apnews.com": 0.95, "theguardian.com": 0.93, "nytimes.com": 0.92, "washingtonpost.com": 0.91, "npr.org": 0.92, "bloomberg.com": 0.90, "economist.com": 0.92, "ft.com": 0.91, "nature.com": 0.97, "science.org": 0.97, "who.int": 0.98, "cdc.gov": 0.97, "gov.uk": 0.94, "thehindu.com": 0.88, "ndtv.com": 0.82, "hindustantimes.com": 0.80, "timesofindia.com": 0.79, "cnn.com": 0.78, "foxnews.com": 0.65, "huffpost.com": 0.70, "buzzfeed.com": 0.62, "vice.com": 0.68, "vox.com": 0.74, "medium.com": 0.52, "substack.com": 0.50, "infowars.com": 0.05, "naturalnews.com": 0.08, "beforeitsnews.com": 0.06, "worldnewsdailyreport.com": 0.04, "empirenews.net": 0.04, "theonion.com": 0.10, } CREDIBILITY_LABELS = { (0.85, 1.0): ("🟢 Highly Credible", "#22c55e"), (0.65, 0.85): ("🟡 Moderately Credible", "#f59e0b"), (0.40, 0.65): ("🟠 Low Credibility", "#f97316"), (0.0, 0.40): ("🔴 Very Low / Known Misinformation", "#ef4444"), } FAKE_INDICATORS = [ (r'\b(SHOCKING|BOMBSHELL|BREAKING|EXCLUSIVE)\b', "ALL-CAPS sensational trigger words"), (r'(!{2,}|\?{2,})', "Excessive punctuation (!! or ??)"), (r'\b(they don\'t want you to know|mainstream media won\'t tell)\b', "Anti-establishment conspiracy framing"), (r'\b(miracle|cure|secret|censored|banned)\b', "Clickbait / pseudoscience language"), (r'\b(100%|proven fact|scientists hate)\b', "Overconfident absolute claims"), (r'(share before deleted|share before banned)', "Urgency/fear-of-censorship manipulation"), (r'\b(deep state|new world order|illuminati|cabal)\b', "Conspiracy theory terminology"), (r'\baccording to sources\b(?!.*\bnamed\b)', "Vague anonymous sourcing"), ] @st.cache_resource(show_spinner=False) def load_model(): tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME) model = AutoModelForSequenceClassification.from_pretrained(MODEL_NAME) model.eval() return tokenizer, model def classify_text(text, tokenizer, model): inputs = tokenizer(text, return_tensors="pt", truncation=True, max_length=512, padding=True) with torch.no_grad(): outputs = model(**inputs) probs = torch.softmax(outputs.logits, dim=1).squeeze().numpy() labels = model.config.id2label fake_idx = next((i for i, l in labels.items() if "fake" in l.lower() or "0" == str(i)), 0) real_idx = 1 - fake_idx fake_prob = float(probs[fake_idx]) real_prob = float(probs[real_idx]) prediction = "FAKE" if fake_prob > real_prob else "REAL" confidence = max(fake_prob, real_prob) return prediction, confidence, fake_prob, real_prob def get_source_credibility(url_or_domain): if not url_or_domain: return None, 0.5, "Unknown Source", "#94a3b8" try: domain = urlparse(url_or_domain).netloc.lower().replace("www.", "") except Exception: domain = url_or_domain.lower().replace("www.", "") if domain in SOURCE_CREDIBILITY: score = SOURCE_CREDIBILITY[domain] else: score = 0.45 if domain.endswith(".gov") or domain.endswith(".edu"): score = 0.90 elif domain.endswith(".org"): score = 0.65 label, color = "Unknown", "#94a3b8" for (low, high), (lbl, clr) in CREDIBILITY_LABELS.items(): if low <= score <= high: label, color = lbl, clr break return domain, score, label, color def detect_fake_indicators(text): found = [] for pattern, description in FAKE_INDICATORS: if re.search(pattern, text, re.IGNORECASE): found.append(description) return found def fetch_news(query, api_key, max_articles=6): if not api_key or api_key == "YOUR_NEWSAPI_KEY_HERE": return None, "⚠️ No API key provided. Add your NewsAPI key in app.py." url = ( f"https://newsapi.org/v2/everything?" f"q={requests.utils.quote(query)}&language=en&sortBy=publishedAt" f"&pageSize={max_articles}&apiKey={api_key}" ) try: resp = requests.get(url, timeout=10) data = resp.json() if data.get("status") != "ok": return None, data.get("message", "API error") return data.get("articles", []), None except Exception as e: return None, str(e) def make_confidence_gauge(fake_prob, real_prob): fig = go.Figure(go.Indicator( mode="gauge+number+delta", value=round(fake_prob * 100, 1), domain={"x": [0, 1], "y": [0, 1]}, title={"text": "Fake Probability %", "font": {"size": 18, "color": "#e2e8f0"}}, number={"font": {"size": 36, "color": "#f8fafc"}, "suffix": "%"}, gauge={ "axis": {"range": [0, 100], "tickcolor": "#64748b", "tickfont": {"color": "#94a3b8"}}, "bar": {"color": "#6366f1"}, "steps": [ {"range": [0, 30], "color": "#14532d"}, {"range": [30, 55], "color": "#713f12"}, {"range": [55, 100], "color": "#7f1d1d"}, ], "threshold": { "line": {"color": "#fbbf24", "width": 4}, "thickness": 0.85, "value": 50, }, }, )) fig.update_layout( paper_bgcolor="rgba(0,0,0,0)", plot_bgcolor="rgba(0,0,0,0)", font={"color": "#e2e8f0"}, height=280, margin=dict(t=50, b=10, l=30, r=30), ) return fig def make_prob_bar(fake_prob, real_prob): fig = go.Figure() fig.add_trace(go.Bar( x=["FAKE", "REAL"], y=[fake_prob * 100, real_prob * 100], marker_color=["#ef4444", "#22c55e"], text=[f"{fake_prob*100:.1f}%", f"{real_prob*100:.1f}%"], textposition="outside", textfont=dict(color="#f8fafc", size=14), width=0.45, )) fig.update_layout( paper_bgcolor="rgba(0,0,0,0)", plot_bgcolor="rgba(0,0,0,0)", font=dict(color="#e2e8f0"), yaxis=dict(range=[0, 115], gridcolor="#1e293b", ticksuffix="%", tickfont=dict(color="#64748b")), xaxis=dict(tickfont=dict(color="#e2e8f0", size=14)), height=260, margin=dict(t=10, b=10, l=10, r=10), showlegend=False, ) return fig def credibility_bar_chart(domain, score): fig = go.Figure(go.Bar( x=[score * 100], y=[domain or "Unknown"], orientation="h", marker=dict( color=score * 100, colorscale=[[0, "#ef4444"], [0.5, "#f59e0b"], [1, "#22c55e"]], cmin=0, cmax=100, ), text=[f"{score*100:.0f}/100"], textposition="outside", textfont=dict(color="#f8fafc"), )) fig.update_layout( paper_bgcolor="rgba(0,0,0,0)", plot_bgcolor="rgba(0,0,0,0)", xaxis=dict(range=[0, 115], gridcolor="#1e293b", ticksuffix="", tickfont=dict(color="#64748b")), yaxis=dict(tickfont=dict(color="#e2e8f0", size=13)), height=120, margin=dict(t=5, b=5, l=10, r=60), ) return fig # ══════════════════════════════════════════════ # STREAMLIT UI # ══════════════════════════════════════════════ st.set_page_config( page_title="FakeScope — AI News Verifier", page_icon="🔍", layout="wide", initial_sidebar_state="expanded", ) st.markdown(""" """, unsafe_allow_html=True) # ── Sidebar ────────────────────────────────── with st.sidebar: st.markdown("## 🔍 FakeScope") st.markdown("---") mode = st.radio("**Mode**", ["📝 Paste Article / Text", "🌐 Fetch Live News"]) st.markdown("---") st.markdown("**About the Model**") st.caption(f"`{MODEL_NAME}`") st.caption("Fine-tuned BERT — no local training required.") st.markdown("---") st.markdown("**Credibility DB**") st.caption(f"{len(SOURCE_CREDIBILITY)} known sources indexed.") st.markdown("---") st.caption("Built with 🤗 Transformers + Streamlit") # ── Hero ───────────────────────────────────── st.markdown("""

🔎 FakeScope

AI-powered fake news detector · BERT · Source Credibility · Real-time News · Explainability

""", unsafe_allow_html=True) # ── Load model ─────────────────────────────── with st.spinner("⚡ Loading BERT model from HuggingFace (first run only)…"): try: tokenizer, model = load_model() st.success("✅ Model loaded successfully!", icon="🤖") except Exception as e: st.error(f"Model load failed: {e}") st.stop() # ════════════════════════════════════════════ # MODE 1 — Paste Text # ════════════════════════════════════════════ if mode == "📝 Paste Article / Text": st.markdown('
Paste news article or headline
', unsafe_allow_html=True) col_in, col_meta = st.columns([3, 1]) with col_in: news_text = st.text_area("", height=180, placeholder="Paste a news headline, paragraph, or full article here…", label_visibility="collapsed") with col_meta: source_url = st.text_input("Source URL (optional)", placeholder="https://bbc.com/…") analyze_btn = st.button("🔍 Analyze", use_container_width=True) if analyze_btn: if not news_text.strip(): st.warning("Please paste some text to analyze.") else: with st.spinner("Running BERT inference…"): prediction, confidence, fake_prob, real_prob = classify_text( news_text, tokenizer, model) indicators = detect_fake_indicators(news_text) domain, cred_score, cred_label, cred_color = get_source_credibility(source_url) # ── Verdict ────────────────────────────── st.markdown("---") vcol1, vcol2, vcol3 = st.columns([1, 2, 1]) with vcol2: if prediction == "FAKE": low_conf = confidence < 0.75 warning = ( "
" "⚠ Low confidence — verify manually before concluding
" if low_conf else "" ) st.markdown( f"""
⚠ FAKE NEWS
Confidence: {confidence*100:.1f}%
{warning}
""", unsafe_allow_html=True, ) else: st.markdown( f"""
✅ LIKELY REAL
Confidence: {confidence*100:.1f}%
""", unsafe_allow_html=True, ) st.markdown("
", unsafe_allow_html=True) # ── Charts ─────────────────────────────── ch1, ch2 = st.columns(2) with ch1: st.markdown('
Confidence Gauge
', unsafe_allow_html=True) st.plotly_chart(make_confidence_gauge(fake_prob, real_prob), use_container_width=True, config={"displayModeBar": False}) with ch2: st.markdown('
Probability Distribution
', unsafe_allow_html=True) st.plotly_chart(make_prob_bar(fake_prob, real_prob), use_container_width=True, config={"displayModeBar": False}) # ── Source Credibility ─────────────────── st.markdown('
Source Credibility Score
', unsafe_allow_html=True) st.markdown( f"""
{cred_label} {domain or 'Unknown domain'}
""", unsafe_allow_html=True, ) st.plotly_chart(credibility_bar_chart(domain or "Unknown", cred_score), use_container_width=True, config={"displayModeBar": False}) # ── Why it might be fake ───────────────── st.markdown('
🧠 Explanation — Why it may be Fake
', unsafe_allow_html=True) with st.container(): if indicators: st.markdown("**Linguistic red flags detected:**") pills_html = "".join( f'⚠ {i}' for i in indicators) st.markdown(pills_html, unsafe_allow_html=True) else: st.success("No obvious linguistic red flags detected in the text.") if prediction == "FAKE": reasons = [] if fake_prob > 0.85: reasons.append("Very high BERT fake-probability score (>85%)") if cred_score < 0.5: reasons.append( f"Source '{domain}' has very low credibility ({cred_score*100:.0f}/100)") if indicators: reasons.append( f"{len(indicators)} sensational/clickbait linguistic patterns found") if reasons: st.markdown("**Key reasons for FAKE classification:**") for r in reasons: st.markdown(f"  🔸 {r}") # ── Stats ──────────────────────────────── st.markdown('
Analytics Summary
', unsafe_allow_html=True) m1, m2, m3, m4 = st.columns(4) with m1: st.markdown( f'
{fake_prob*100:.0f}%
' f'
FAKE PROB
', unsafe_allow_html=True) with m2: st.markdown( f'
{real_prob*100:.0f}%
' f'
REAL PROB
', unsafe_allow_html=True) with m3: st.markdown( f'
{cred_score*100:.0f}
' f'
SOURCE SCORE
', unsafe_allow_html=True) with m4: st.markdown( f'
{len(indicators)}
' f'
RED FLAGS
', unsafe_allow_html=True) # ════════════════════════════════════════════ # MODE 2 — Live News Feed # ════════════════════════════════════════════ else: st.markdown('
Fetch & analyze live news articles
', unsafe_allow_html=True) qcol, bcol = st.columns([4, 1]) with qcol: query = st.text_input("", placeholder="Search topic e.g. 'climate change', 'election 2024'…", label_visibility="collapsed") with bcol: fetch_btn = st.button("📡 Fetch News", use_container_width=True) if fetch_btn: if not query.strip(): st.warning("Enter a search query.") else: with st.spinner(f"Fetching news for: **{query}**…"): articles, err = fetch_news(query, NEWS_API_KEY) if err: st.error(f"NewsAPI error: {err}") elif not articles: st.info("No articles found. Try a different query.") else: results = [] progress = st.progress(0) for i, art in enumerate(articles): text = (art.get("title") or "") + " " + (art.get("description") or "") if text.strip(): pred, conf, fp, rp = classify_text(text, tokenizer, model) domain, cscore, clabel, ccolor = get_source_credibility( art.get("url", "")) indicators = detect_fake_indicators(text) results.append({ "title": art.get("title", "No title"), "source": art.get("source", {}).get("name", "Unknown"), "url": art.get("url", "#"), "publishedAt": art.get("publishedAt", ""), "prediction": pred, "confidence": conf, "fake_prob": fp, "real_prob": rp, "cred_score": cscore, "cred_label": clabel, "indicators": indicators, }) progress.progress((i + 1) / len(articles)) progress.empty() fake_count = sum(1 for r in results if r["prediction"] == "FAKE") real_count = len(results) - fake_count avg_conf = np.mean([r["confidence"] for r in results]) * 100 st.markdown("---") st.markdown('
Batch Analysis Summary
', unsafe_allow_html=True) sm1, sm2, sm3, sm4 = st.columns(4) with sm1: st.markdown( f'
{len(results)}
' f'
ARTICLES
', unsafe_allow_html=True) with sm2: st.markdown( f'
{fake_count}
' f'
FLAGGED FAKE
', unsafe_allow_html=True) with sm3: st.markdown( f'
{real_count}
' f'
LIKELY REAL
', unsafe_allow_html=True) with sm4: st.markdown( f'
{avg_conf:.0f}%
' f'
AVG CONFIDENCE
', unsafe_allow_html=True) st.markdown("
", unsafe_allow_html=True) titles_short = [r["title"][:45] + "…" if len(r["title"]) > 45 else r["title"] for r in results] colors = ["#ef4444" if r["prediction"] == "FAKE" else "#22c55e" for r in results] fig_batch = go.Figure(go.Bar( y=titles_short, x=[r["fake_prob"] * 100 for r in results], orientation="h", marker_color=colors, text=[f"{r['fake_prob']*100:.0f}%" for r in results], textposition="outside", textfont=dict(color="#e2e8f0", size=11), )) fig_batch.update_layout( paper_bgcolor="rgba(0,0,0,0)", plot_bgcolor="rgba(0,0,0,0)", xaxis=dict(range=[0, 120], ticksuffix="%", gridcolor="#1e293b", tickfont=dict(color="#64748b")), yaxis=dict(tickfont=dict(color="#e2e8f0", size=11)), height=max(300, len(results) * 55), margin=dict(t=10, b=10, l=10, r=80), title=dict(text="Fake Probability per Article", font=dict(color="#94a3b8", size=13)), ) st.plotly_chart(fig_batch, use_container_width=True, config={"displayModeBar": False}) st.markdown('
Individual Article Results
', unsafe_allow_html=True) for r in results: badge_color = "#ef4444" if r["prediction"] == "FAKE" else "#22c55e" badge_text = "⚠ FAKE" if r["prediction"] == "FAKE" else "✅ REAL" ind_html = "".join( f'{ind}' for ind in r["indicators"][:2] ) if r["indicators"] else "" st.markdown( f"""

{r['title']}

{badge_text}

📰 {r['source']}  ·  Confidence: {r['confidence']*100:.1f}%  ·  Source credibility: {r['cred_label']}

{ind_html}

Read original →

""", unsafe_allow_html=True, ) # ── Footer ─────────────────────────────────── st.markdown("---") st.markdown( '

' 'FakeScope · Powered by 🤗 Transformers · For educational use only' '

', unsafe_allow_html=True, )