| import gradio as gr |
| from transformers import pipeline |
| from PIL import Image, ImageEnhance, ImageOps |
| from duckduckgo_search import DDGS |
| import pytesseract |
| import numpy as np |
| import re |
| import os |
| import traceback |
| import time |
|
|
| |
| |
| |
| classifier = None |
| classifier_error = None |
|
|
| print("Loading BERT Fake News Classifier...") |
| try: |
| |
| classifier = pipeline( |
| "text-classification", |
| model="jy46604790/Fake-News-Bert-Detect", |
| device=-1, |
| truncation=True, |
| max_length=512 |
| ) |
| print("BERT Classifier loaded.") |
| except Exception as e: |
| classifier_error = str(e) |
| print(f"BERT load failed: {e}") |
|
|
|
|
| |
| |
| |
| def analyze_tone_fallback(text): |
| """ |
| Heuristic analyzer β only fires on strong fake-specific signals. |
| Returns (risk_score 0-100, label, engine_name) |
| Higher score = more likely fake/sensational. |
| """ |
| score = 0 |
| reasons = [] |
| words = text.split() |
| text_lower = text.lower() |
|
|
| if not words: |
| return 0, "π’ Neutral / Standard", "Heuristic Engine" |
|
|
| |
| caps_words = [w for w in words if w.isupper() and len(w) > 3] |
| caps_ratio = len(caps_words) / max(len(words), 1) |
| if caps_ratio > 0.20: |
| score += 18 |
| reasons.append("Excessive ALL-CAPS") |
|
|
| |
| if text.count("!") >= 3: |
| score += 12 |
| reasons.append("Multiple exclamation marks") |
| if re.search(r'\?\!|\!\?', text): |
| score += 8 |
| reasons.append("Sensationalist punctuation (?! or !?)") |
|
|
| |
| FAKE_PHRASES = [ |
| "you won't believe", "mind-blowing", "miracle cure", |
| "they don't want you to know", "secret they're hiding", |
| "doctors hate", "one weird trick", "illuminati", "plandemic", |
| "scamdemic", "sheeple", "share before deleted", "banned video", |
| "censored truth", "99% won't share", "forwarded as received", |
| "going viral now", "wake up people", "deep state agenda", |
| "government is hiding", "they are hiding", "what they don't tell you", |
| ] |
| hits = [p for p in FAKE_PHRASES if p in text_lower] |
| if hits: |
| score += min(len(hits) * 18, 45) |
| reasons.append(f"Fake-specific phrases: {', '.join(hits[:3])}") |
|
|
| |
| RUMOR_PATTERNS = [ |
| r'forwarded as received', |
| r'circulating on (whatsapp|telegram|social media)', |
| r'cannot be independently verified', |
| r'unverified (claim|source|report)', |
| r'we cannot confirm', |
| r'rumou?r (has it|is spreading|is going around)', |
| ] |
| rumor_hits = sum(1 for p in RUMOR_PATTERNS if re.search(p, text_lower)) |
| if rumor_hits: |
| score += rumor_hits * 15 |
| reasons.append(f"Rumor-forwarding language ({rumor_hits} pattern(s))") |
|
|
| |
| ANON_PATTERNS = [ |
| r'anonymous source(s)? (claim|say|report)', |
| r'insider (reveals|claims|exposes)', |
| r'unnamed official (says|claims)', |
| r'unidentified group (claimed|said|announced)', |
| ] |
| for p in ANON_PATTERNS: |
| if re.search(p, text_lower): |
| score += 12 |
| reasons.append("Anonymous source making strong claim") |
| break |
|
|
| risk_score = min(score, 100) |
| if risk_score >= 40: |
| label = "π΄ High Risk: Sensationalized / Clickbait Style" |
| elif risk_score >= 20: |
| label = "π‘ Medium Risk: Slightly Sensational" |
| else: |
| label = "π’ Low Risk: Standard Journalistic Style" |
|
|
| reason_str = "; ".join(reasons) if reasons else "No significant fake signals" |
| return risk_score, label, f"Heuristic Engine ({reason_str})" |
|
|
|
|
| |
| |
| |
| def classify_with_bert(text): |
| """ |
| Returns (fake_probability 0-100, label, engine) |
| For jy46604790/Fake-News-Bert-Detect: |
| LABEL_0 = Real news |
| LABEL_1 = Fake news |
| """ |
| if classifier is None: |
| return analyze_tone_fallback(text) |
|
|
| try: |
| result = classifier(text, truncation=True, max_length=512)[0] |
| label_raw = result["label"] |
| score = result["score"] |
|
|
| if label_raw == "LABEL_1": |
| |
| fake_prob = round(score * 100, 1) |
| label = "π΄ High Risk: Model flagged as Fake" |
| else: |
| |
| fake_prob = round((1 - score) * 100, 1) |
| label = "π’ Low Risk: Model classifies as Real" |
|
|
| return fake_prob, label, f"BERT Deep Neural Classifier ({score*100:.1f}% model confidence)" |
|
|
| except Exception as e: |
| print(f"BERT inference failed: {e}") |
| return analyze_tone_fallback(text) |
|
|
|
|
| |
| |
| |
| def detect_spoofed_source_label(raw_text): |
| DECEPTIVE_PATTERNS = [ |
| r'(as reported|according|source|sourced|published|confirmed|breaking news from|exclusive from|via)\s*:?\s*(by\s+)?(reuters|bbc|ndtv|cnn|apnews|ap news|the hindu|indian express|bloomberg|aljazeera|times of india|washington post|new york times|the guardian|firstpost|thewire|snopes)', |
| r'(reuters|bbc|ndtv|cnn|apnews)\s*(exclusive|breaking|confirms|confirmed|reports|reported)\b', |
| r'\b(reuters|bbc|ndtv|cnn)\b.{0,15}(verif|confirm|report)', |
| ] |
| CREDIBLE_OUTLETS = [ |
| "reuters","bbc","ndtv","cnn","apnews","ap news","the hindu", |
| "indian express","bloomberg","aljazeera","times of india", |
| "washington post","new york times","the guardian","firstpost", |
| "thewire","snopes","politifact","factcheck","boomlive","altnews" |
| ] |
| text_lower = raw_text.lower() |
| urls_in_text = re.findall(r'https?://\S+', raw_text) |
|
|
| for pattern in DECEPTIVE_PATTERNS: |
| match = re.search(pattern, text_lower) |
| if match: |
| matched_text = match.group(0) |
| outlet_found = next((o for o in CREDIBLE_OUTLETS if o in matched_text), "") |
| if not outlet_found: |
| continue |
| outlet_key = outlet_found.lower().replace(" ", "") |
| real_url_found = any(outlet_key in u.lower() for u in urls_in_text) |
| if not real_url_found: |
| return True, outlet_found.title() |
| return False, "" |
|
|
|
|
| |
| |
| |
| TRUSTED_DOMAINS = [ |
| "livelaw.in","barandbench.com","reuters.com","apnews.com", |
| "bbc.com","bbc.co.uk","nytimes.com","washingtonpost.com", |
| "theguardian.com","indianexpress.com","thehindu.com", |
| "ndtv.com","bloomberg.com","economist.com","snopes.com", |
| "politifact.com","factcheck.org","altnews.in","boomlive.in", |
| "independent.co.uk","cnn.com","aljazeera.com", |
| "timesofindia.indiatimes.com","thewire.in","firstpost.com", |
| "pib.gov.in","mea.gov.in","mohfw.gov.in","hindustantimes.com", |
| "scroll.in","theprint.in","news18.com","zeenews.india.com", |
| "wionews.com","businesstoday.in","livemint.com","moneycontrol.com" |
| ] |
|
|
| def is_trusted_domain(url): |
| try: |
| from urllib.parse import urlparse |
| domain = urlparse(url).netloc.lower().replace("www.", "") |
| for trusted in TRUSTED_DOMAINS: |
| if domain == trusted or domain.endswith("." + trusted): |
| return True, domain |
| return False, domain |
| except Exception: |
| return False, "" |
|
|
| def fetch_url_title(url): |
| try: |
| import requests |
| r = requests.get(url, timeout=5, headers={ |
| 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36' |
| }) |
| if r.status_code == 200: |
| m = re.search(r'<title>(.*?)</title>', r.text, re.IGNORECASE | re.DOTALL) |
| if m: |
| title = m.group(1).strip() |
| for ent, rep in [('&','&'),('"','"'),(''',"'"),('<','<'),('>','>')]: |
| title = title.replace(ent, rep) |
| return title |
| except Exception as e: |
| print(f"URL title fetch error: {e}") |
| return None |
|
|
|
|
| |
| |
| |
| def normalize_english(text): |
| if not text: |
| return "" |
| text = re.sub(r'http\S+|www\.\S+|\S+@\S+', ' ', text) |
| text = re.sub(r'[^a-zA-Z0-9\s.,!?\'"\-]', ' ', text) |
| text = re.sub(r'\s+', ' ', text) |
| return text.strip() |
|
|
| def process_image_to_text(image): |
| if image is None: |
| return "" |
| try: |
| if not isinstance(image, Image.Image): |
| image = Image.fromarray(np.uint8(image)) |
| image = ImageOps.grayscale(image) |
| image = ImageEnhance.Contrast(image).enhance(2.5) |
| text = pytesseract.image_to_string(image, config=r'--oem 3 --psm 3') |
| return normalize_english(text) |
| except Exception as e: |
| traceback.print_exc() |
| return f"OCR Extraction Failed: {str(e)}" |
|
|
|
|
| |
| |
| |
|
|
| |
| |
| FACT_CHECK_OUTLETS = [ |
| "snopes.com","politifact.com","factcheck.org","altnews.in", |
| "boomlive.in","thequint.com","vishvasnews.com","logically.ai", |
| "reuters.com/fact-check","apnews.com/hub/ap-fact-check", |
| "bbc.com/news/reality_check","thehindu.com/specials/fact-check" |
| ] |
|
|
| |
| DEBUNK_KEYWORDS = [ |
| "fake news","hoax","debunked","fabricated","false claim","misleading", |
| "misinformation","no such attack","no evidence","fact-check finds", |
| "did not happen","not verified","unverified claim","rumour spreading", |
| "rumor spreading","manipulated video","out of context","satire", |
| "this is false","this claim is false","claim is misleading", |
| "viral claim","incorrect claim" |
| ] |
|
|
| |
| REAL_NEWS_SIGNALS = [ |
| "police said","official said","government said","confirmed by", |
| "spokesperson said","press conference","fir registered","arrested", |
| "minister said","court order","official statement","pib confirmed", |
| "reported by","according to officials","sources confirmed", |
| "eyewitnesses said","investigation reveals","charged with", |
| "published report","breaking news confirmed","official release" |
| ] |
|
|
| def build_search_query(text): |
| """Build a focused, short search query from article text.""" |
| |
| clean = re.sub(r'https?://\S+', '', text) |
| clean = re.sub(r'[^\w\s]', ' ', clean) |
|
|
| STOP = { |
| "the","and","but","or","for","with","about","against","from","into", |
| "through","during","before","after","above","below","under","over", |
| "again","further","then","once","here","there","when","where","why", |
| "how","all","any","both","each","few","more","most","other","some", |
| "such","than","too","very","can","will","just","should","would", |
| "these","those","this","that","in","on","at","to","of","by","an","a", |
| "is","are","was","were","be","has","have","had","says","said","its", |
| "it","as","up","do","he","she","they","we","our","their","his","her" |
| } |
| words = [w for w in clean.split() if w.lower() not in STOP and len(w) > 2] |
|
|
| |
| return " ".join(words[:7]) |
|
|
|
|
| def ddg_search(query, max_results=6, retries=2): |
| """Search DuckDuckGo with retry logic.""" |
| for attempt in range(retries): |
| try: |
| with DDGS() as ddgs: |
| results = list(ddgs.text(query, max_results=max_results)) |
| if results: |
| return results |
| except Exception as e: |
| print(f"DDG attempt {attempt+1} failed: {e}") |
| time.sleep(1) |
| return [] |
|
|
|
|
| def verify_facts_online(text): |
| """ |
| Multi-pass web fact-checking. |
| Returns: (results_list, html_markdown, hits_count, debunk_score, real_corroboration_count) |
| |
| KEY LOGIC: |
| - If found on trusted news site β strong REAL signal |
| - If found on fact-check site labeling it FALSE β FAKE signal |
| - If found on general news with real reporting signals β REAL signal |
| - If found with debunk keywords from non-fact-check sources β moderate FAKE signal |
| - If nothing found β UNCERTAIN |
| """ |
| query = build_search_query(text) |
| if len(query.split()) < 2: |
| no_result_html = _card("UNVERIFIED", "Headline Too Short", |
| "The text is too short to run a meaningful web search.", "debunk", "#fb923c") |
| return [], no_result_html, 0, 0, 0 |
|
|
| print(f"[Web Search] Query: '{query}'") |
|
|
| |
| results = ddg_search(query, max_results=8) |
|
|
| |
| if not results: |
| short_q = " ".join(query.split()[:4]) |
| print(f"[Web Search] Fallback query: '{short_q}'") |
| results = ddg_search(short_q, max_results=5) |
|
|
| if not results: |
| no_result_html = _card("UNVERIFIED", "No Online Matches Found", |
| "No mainstream news outlets or fact-check databases are reporting this claim. " |
| "This may be a fabricated rumor, hyper-local event, or newly generated hoax. " |
| "Treat with caution until a direct source is found.", |
| "debunk", "#fb923c") |
| return [], no_result_html, 0, 0, 0 |
|
|
| markdown_out = "" |
| debunk_score = 0 |
| real_corroboration = 0 |
| trusted_source_found = 0 |
|
|
| for idx, res in enumerate(results): |
| title = res.get('title', 'Source') |
| body = res.get('body', '') |
| link = res.get('href', '#') |
| combined = (title + " " + body).lower() |
| link_lower = link.lower() |
|
|
| |
| is_fact_check_site = any(fc in link_lower for fc in FACT_CHECK_OUTLETS) |
|
|
| |
| is_trusted_news = any(td in link_lower for td in TRUSTED_DOMAINS) |
|
|
| debunk_hits = sum(1 for kw in DEBUNK_KEYWORDS if kw in combined) |
| real_hits = sum(1 for sig in REAL_NEWS_SIGNALS if sig in combined) |
|
|
| if is_fact_check_site: |
| if debunk_hits > 0: |
| |
| debunk_score += 2 |
| status = _badge("π¨ FACT-CHECKER: FALSE", "danger") |
| card_class = "source-debunk" |
| trusted_source_found += 1 |
| else: |
| |
| real_corroboration += 1 |
| status = _badge("β FACT-CHECK CORROBORATED", "success") |
| card_class = "source-credible" |
| trusted_source_found += 1 |
|
|
| elif is_trusted_news: |
| if debunk_hits >= 2 and real_hits == 0: |
| |
| debunk_score += 1 |
| status = _badge("β οΈ REPORTED AS MISINFORMATION", "warning") |
| card_class = "source-debunk" |
| else: |
| |
| real_corroboration += 1 |
| trusted_source_found += 1 |
| status = _badge("β TRUSTED SOURCE", "success") |
| card_class = "source-credible" |
|
|
| else: |
| |
| if debunk_hits >= 2 and real_hits == 0: |
| debunk_score += 1 |
| status = _badge("π¨ DEBUNK SIGNALS", "danger") |
| card_class = "source-debunk" |
| elif real_hits > 0: |
| real_corroboration += 1 |
| status = _badge("β NEWS CORROBORATION", "success") |
| card_class = "source-credible" |
| else: |
| status = _badge("β RELATED RESULT", "neutral") |
| card_class = "source-credible" |
|
|
| |
| display_body = body[:280] + ("..." if len(body) > 280 else "") |
|
|
| markdown_out += ( |
| f"<div class='source-card {card_class}'>" |
| f"<div class='source-header'>" |
| f"<span class='source-idx'>#{idx+1}</span>" |
| f"<h4>{title}</h4>" |
| f"{status}" |
| f"</div>" |
| f"<p class='source-body'>\"{display_body}\"</p>" |
| f"<div class='source-footer'>" |
| f"<a href='{link}' target='_blank' class='source-link'>π View source</a>" |
| f"</div></div>\n" |
| ) |
|
|
| return results, markdown_out, len(results), debunk_score, real_corroboration |
|
|
|
|
| def _badge(text, kind): |
| colors = { |
| "success": ("rgba(16,185,129,0.1)", "#10b981", "rgba(16,185,129,0.15)"), |
| "danger": ("rgba(239,68,68,0.1)", "#ef4444", "rgba(239,68,68,0.15)"), |
| "warning": ("rgba(245,158,11,0.1)", "#f59e0b", "rgba(245,158,11,0.15)"), |
| "neutral": ("rgba(100,116,139,0.1)","#64748b", "rgba(100,116,139,0.15)"), |
| } |
| bg, color, border = colors.get(kind, colors["neutral"]) |
| return (f"<span class='badge' style='background:{bg};color:{color};" |
| f"border:1px solid {border};'>{text}</span>") |
|
|
| def _card(idx_label, title, body_text, cls, color): |
| return ( |
| f"<div class='source-card source-{cls}' style='border-left-color:{color};'>" |
| f"<div class='source-header'>" |
| f"<span class='source-idx' style='color:{color};'>{idx_label}</span>" |
| f"<h4>{title}</h4></div>" |
| f"<p class='source-body'>{body_text}</p>" |
| f"</div>" |
| ) |
|
|
|
|
| |
| |
| |
| def compute_verdict(is_url_verified, verified_domain_name, |
| hits_count, debunk_score, real_corroboration, |
| bert_fake_prob, is_label_spoofed, is_url_verified_flag): |
| """ |
| DECISION HIERARCHY: |
| 1. Direct trusted URL present β high reliability (90-97%) |
| 2. Fact-checker debunked (debunk_score β₯ 2) β very low reliability (5-20%) |
| 3. Strong debunk signals (debunk_score == 1) β low reliability (25-38%) |
| 4. Real corroboration found (trusted sources) β high reliability (70-92%) |
| 5. Some web results but no clear signal β moderate (45-65%) |
| 6. No web results β fall back to BERT model only |
| """ |
| spoof_penalty = 12 if is_label_spoofed and not is_url_verified_flag else 0 |
|
|
| |
| if is_url_verified: |
| if bert_fake_prob < 50: |
| return 96, "π VERIFIED GENUINE", "verdict-genuine-title", "#10b981", ( |
| f"Directly linked to trusted domain <strong>{verified_domain_name}</strong> " |
| f"and written in an objective style. High confidence this is genuine." |
| ) |
| else: |
| return 78, "π SENSATIONALIZED β CORE FACTS REAL", "verdict-sensationalized-title", "#f59e0b", ( |
| f"Verified via <strong>{verified_domain_name}</strong> but writing style is sensational. " |
| f"Core facts are likely authentic; specific details may be exaggerated." |
| ) |
|
|
| |
| if debunk_score >= 3: |
| rel = max(6 - spoof_penalty, 3) |
| return rel, "π¨ FABRICATED / DEBUNKED", "verdict-fabricated-title", "#ef4444", ( |
| "Multiple fact-checkers and credible sources have debunked this claim. " |
| "This is almost certainly misinformation. Do NOT share." |
| ) |
|
|
| if debunk_score == 2: |
| rel = max(15 - spoof_penalty, 5) |
| return rel, "π¨ LIKELY FAKE / DEBUNKED", "verdict-fabricated-title", "#ef4444", ( |
| "Two or more credible sources flag this as false or misleading. " |
| "Strong evidence this is misinformation." |
| ) |
|
|
| if debunk_score == 1: |
| rel = max(32 - spoof_penalty, 10) |
| return rel, "β οΈ SUSPICIOUS CLAIM", "verdict-fabricated-title", "#ef4444", ( |
| "At least one credible source contradicts or flags this claim. " |
| "Treat with strong skepticism and verify from primary sources." |
| ) |
|
|
| |
| if real_corroboration >= 3: |
| rel = min(88 + (real_corroboration * 1) - spoof_penalty, 95) |
| return rel, "π VERIFIED GENUINE", "verdict-genuine-title", "#10b981", ( |
| f"Found in <strong>{real_corroboration}</strong> credible/trusted sources with no debunking signals. " |
| f"High confidence this is genuine news." |
| ) |
|
|
| if real_corroboration == 2: |
| rel = min(80 - spoof_penalty, 88) |
| if bert_fake_prob > 60: |
| return 68, "π LIKELY REAL β VERIFY DETAILS", "verdict-sensationalized-title", "#f59e0b", ( |
| "Found in 2 credible sources, but writing style raises some flags. " |
| "Core story appears real; verify specific claims independently." |
| ) |
| return rel, "π VERIFIED GENUINE", "verdict-genuine-title", "#10b981", ( |
| "Found in 2 credible sources with objective writing style. " |
| "High confidence this is genuine." |
| ) |
|
|
| if real_corroboration == 1: |
| if bert_fake_prob < 40: |
| return max(68 - spoof_penalty, 50), "π LIKELY REAL β NEEDS MORE SOURCES", "verdict-sensationalized-title", "#f59e0b", ( |
| "Found in one credible source with objective writing. " |
| "Likely genuine but seek additional confirmation." |
| ) |
| else: |
| return max(50 - spoof_penalty, 30), "β οΈ UNVERIFIED β MIXED SIGNALS", "verdict-unverified-title", "#fb923c", ( |
| "Only one corroborating source found and writing style is questionable. " |
| "Exercise caution and verify from a primary source." |
| ) |
|
|
| |
| if hits_count > 0: |
| if bert_fake_prob < 35: |
| return max(58 - spoof_penalty, 40), "β οΈ UNVERIFIED β PROBABLY REAL", "verdict-unverified-title", "#fb923c", ( |
| "Some web results found but from non-trusted domains. " |
| "Writing style appears objective. Likely real but needs a primary source link." |
| ) |
| else: |
| return max(38 - spoof_penalty, 20), "β οΈ UNVERIFIED β SUSPICIOUS", "verdict-unverified-title", "#fb923c", ( |
| "Some web results found but content is not clearly corroborated by trusted outlets. " |
| "Sensational writing style detected. Verify before sharing." |
| ) |
|
|
| |
| if bert_fake_prob >= 65: |
| return max(22 - spoof_penalty, 8), "β οΈ UNVERIFIED β HIGH FAKE RISK", "verdict-unverified-title", "#fb923c", ( |
| "No online corroboration found AND the AI model flags this as likely fake. " |
| "This is possibly a fabricated or circulating rumor. " |
| "<br><small style='color:#6b7280;'>Tip: Paste the direct article URL if you have one.</small>" |
| ) |
| elif bert_fake_prob >= 40: |
| return max(38 - spoof_penalty, 20), "β οΈ UNVERIFIED β UNCERTAIN", "verdict-unverified-title", "#fb923c", ( |
| "No online corroboration found. Could be a very recent, hyper-local, or fabricated story. " |
| "Seek a direct source before sharing. " |
| "<br><small style='color:#6b7280;'>Tip: Paste the direct article URL if you have one.</small>" |
| ) |
| else: |
| return max(52 - spoof_penalty, 35), "β οΈ UNVERIFIED β POSSIBLY REAL", "verdict-unverified-title", "#fb923c", ( |
| "No mainstream coverage found, but writing style appears legitimate. " |
| "Could be a hyper-local or very recent story. " |
| "Seek a primary source before sharing. " |
| "<br><small style='color:#6b7280;'>Tip: Paste the direct article URL if you have one.</small>" |
| ) |
|
|
|
|
| |
| |
| |
| def process_and_verdict(text_input, ocr_output, img_input, source_tab): |
| raw_text = "" |
| updated_ocr = ocr_output |
|
|
| if source_tab == "image": |
| if not ocr_output or not ocr_output.strip(): |
| if img_input is None: |
| return ( |
| gr.update(visible=False), |
| "<div class='source-card source-debunk'><h4>β οΈ Input Error</h4>" |
| "<p>Please upload an image or extract OCR text first.</p></div>", |
| "", "", ocr_output, gr.update(open=False) |
| ) |
| raw_text = process_image_to_text(img_input) |
| updated_ocr = raw_text |
| else: |
| raw_text = ocr_output |
| else: |
| raw_text = text_input |
|
|
| |
| urls = re.findall(r'(https?://\S+)', raw_text) |
| is_url_verified = False |
| verified_domain_name = "" |
| verified_url_card = "" |
|
|
| if urls: |
| for url in urls: |
| is_trusted, domain = is_trusted_domain(url) |
| if is_trusted: |
| is_url_verified = True |
| verified_domain_name = domain |
| page_title = fetch_url_title(url) or f"Verified Article on {domain.title()}" |
| verified_url_card = ( |
| f"<div class='source-card source-credible' style='border-left-width:6px;'>" |
| f"<div class='source-header'>" |
| f"<span class='source-idx' style='background:#10b981;color:white;'>β DIRECT SOURCE</span>" |
| f"<h4>{page_title}</h4>" |
| f"{_badge('β TRUSTED DOMAIN','success')}" |
| f"</div>" |
| f"<p class='source-body'>Direct link verified from trusted domain " |
| f"<strong>{domain}</strong>.</p>" |
| f"<div class='source-footer'>" |
| f"<a href='{url}' target='_blank' class='source-link'>π View on {domain.title()}</a>" |
| f"</div></div>" |
| ) |
| break |
|
|
| |
| is_label_spoofed, spoofed_outlet = detect_spoofed_source_label(raw_text) |
| spoofed_warning_card = "" |
| if is_label_spoofed and not is_url_verified: |
| spoofed_warning_card = ( |
| f"<div class='source-card source-debunk' style='border-left-color:#f59e0b;'>" |
| f"<div class='source-header'>" |
| f"<span class='source-idx' style='color:#f59e0b;'>β οΈ CLAIM CHECK</span>" |
| f"<h4>Outlet Claimed Without Verifiable Link</h4>" |
| f"{_badge('π¨ UNVERIFIED CLAIM','danger')}" |
| f"</div>" |
| f"<p class='source-body'>Content claims to be from <strong>{spoofed_outlet}</strong> " |
| f"but no verified URL from that outlet was found. " |
| f"This is a common credibility manipulation tactic.</p>" |
| f"</div>" |
| ) |
|
|
| |
| cleaned_text = normalize_english(raw_text) |
| if not cleaned_text or len(cleaned_text) < 10: |
| return ( |
| gr.update(visible=False), |
| "<div class='source-card source-debunk'><h4>β οΈ Too Short</h4>" |
| "<p>Please provide a full sentence or news headline (min 10 characters).</p></div>", |
| "", "", updated_ocr, gr.update(open=False) |
| ) |
|
|
| |
| bert_fake_prob, bert_label, bert_engine = classify_with_bert(cleaned_text) |
|
|
| |
| search_results, web_markdown, hits_count, debunk_score, real_corroboration = verify_facts_online(cleaned_text) |
|
|
| |
| if spoofed_warning_card: |
| web_markdown = spoofed_warning_card + "\n" + web_markdown |
| if is_url_verified: |
| web_markdown = verified_url_card + "\n" + web_markdown |
|
|
| |
| reliability, verdict_title, verdict_class, verdict_color, verdict_desc = compute_verdict( |
| is_url_verified, verified_domain_name, |
| hits_count, debunk_score, real_corroboration, |
| bert_fake_prob, is_label_spoofed, is_url_verified |
| ) |
|
|
| |
| spoof_metric = "" |
| if is_label_spoofed and not is_url_verified: |
| spoof_metric = ("<div class='metric-item'>" |
| "<span class='metric-val' style='color:#f59e0b;'>β οΈ CLAIMED</span>" |
| "<span class='metric-lbl'>OUTLET UNVERIFIED</span></div>") |
|
|
| verdict_html = f""" |
| <div class='verdict-dashboard'> |
| <div class='verdict-score-wrapper'> |
| <div class='verdict-circle-progress' |
| style='background:conic-gradient({verdict_color} {reliability}%,#334155 {reliability}%);'> |
| <div class='verdict-circle-inner'> |
| <span class='verdict-score'>{reliability}%</span> |
| <span class='verdict-score-label'>RELIABILITY</span> |
| </div> |
| </div> |
| </div> |
| <div class='verdict-details'> |
| <div class='verdict-title {verdict_class}'>{verdict_title}</div> |
| <p class='verdict-desc'>{verdict_desc}</p> |
| <div class='verdict-metrics'> |
| <div class='metric-item'> |
| <span class='metric-val'>{bert_label.split(":")[0]}</span> |
| <span class='metric-lbl'>AI CLASSIFICATION</span> |
| </div> |
| <div class='metric-item'> |
| <span class='metric-val'>{real_corroboration} trusted</span> |
| <span class='metric-lbl'>REAL SOURCES FOUND</span> |
| </div> |
| <div class='metric-item'> |
| <span class='metric-val'>{debunk_score} flag{"s" if debunk_score!=1 else ""}</span> |
| <span class='metric-lbl'>DEBUNK SIGNALS</span> |
| </div> |
| <div class='metric-item'> |
| <span class='metric-val'>{hits_count} total</span> |
| <span class='metric-lbl'>WEB RESULTS</span> |
| </div> |
| {spoof_metric} |
| </div> |
| </div> |
| </div> |
| """ |
|
|
| ai_report_html = f""" |
| <div class='intel-card'> |
| <div class='intel-item'> |
| <h5>Classifier Engine</h5> |
| <div class='intel-val' style='color:#06b6d4;'>{bert_engine}</div> |
| </div> |
| <div class='intel-item'> |
| <h5>AI Classification</h5> |
| <div class='intel-val'>{bert_label}</div> |
| </div> |
| <div class='intel-item'> |
| <h5>Fake Probability (AI)</h5> |
| <div class='intel-val' style='color:{"#ef4444" if bert_fake_prob>50 else "#10b981"};'>{bert_fake_prob}%</div> |
| </div> |
| <div class='intel-item'> |
| <h5>Web Corroboration</h5> |
| <div class='intel-val'>{real_corroboration} trusted source(s) | |
| <span style='color:#ef4444;'>{debunk_score} debunk signal{"s" if debunk_score!=1 else ""}</span> |
| </div> |
| </div> |
| {"<div class='intel-item'><h5>β οΈ Outlet Claim</h5><div class='intel-val' style='color:#f59e0b;'>\"" + spoofed_outlet + "\" claimed without verified URL.</div></div>" if is_label_spoofed and not is_url_verified else ""} |
| <div class='intel-item'> |
| <h5>Analyzed Text</h5> |
| <div class='intel-val' style='font-weight:normal;font-size:0.88rem;font-family:monospace; |
| background:#0b0f19;padding:12px;border-radius:8px;border:1px solid #1e293b; |
| color:#cbd5e1;word-break:break-all;'>{cleaned_text[:800]}{"..." if len(cleaned_text)>800 else ""}</div> |
| </div> |
| </div> |
| """ |
|
|
| banner_html = ( |
| "<div class='verdict-banner-true'>π’ LIKELY TRUE NEWS</div>" |
| if reliability >= 60 else |
| "<div class='verdict-banner-false'>π΄ LIKELY FALSE / UNVERIFIED</div>" |
| ) |
|
|
| return ( |
| gr.update(visible=True), |
| banner_html, |
| verdict_html, |
| ai_report_html, |
| web_markdown, |
| updated_ocr, |
| gr.update(open=False) |
| ) |
|
|
|
|
| def handle_ocr_scan(img): |
| if img is None: |
| return "β οΈ Image not uploaded yet." |
| return process_image_to_text(img) |
|
|
|
|
| |
| |
| |
| css = """ |
| @import url('https://fonts.googleapis.com/css2?family=Outfit:wght@300;400;600;800;900&family=Inter:wght@300;400;600;700&display=swap'); |
| |
| body { background-color: #0b0f19 !important; } |
| .gradio-container { |
| background-color: #0b0f19 !important; |
| font-family: 'Inter','Outfit',sans-serif !important; |
| color: #f1f5f9 !important; |
| max-width: 1200px !important; |
| margin: 0 auto !important; |
| padding: 20px !important; |
| } |
| .cyber-title { |
| text-align: center; |
| background: linear-gradient(135deg,#06b6d4 0%,#10b981 50%,#3b82f6 100%); |
| -webkit-background-clip: text; |
| -webkit-text-fill-color: transparent; |
| font-size: 3rem; font-weight: 900; |
| margin-top: 10px; margin-bottom: 2px; |
| letter-spacing: -0.04em; |
| font-family: 'Outfit',sans-serif !important; |
| } |
| .cyber-subtitle { |
| text-align: center; color: #94a3b8; |
| font-size: 1.15rem; margin-bottom: 35px; |
| font-weight: 400; |
| } |
| .block { |
| background-color: #111827 !important; |
| border: 1px solid #1f2937 !important; |
| border-radius: 16px !important; |
| box-shadow: 0 15px 35px -10px rgba(0,0,0,0.6) !important; |
| overflow: hidden !important; |
| } |
| textarea, input[type="text"] { |
| background-color: #030712 !important; |
| border: 1px solid #1f2937 !important; |
| color: #f1f5f9 !important; |
| font-size: 0.95rem !important; |
| border-radius: 8px !important; |
| } |
| textarea:focus, input[type="text"]:focus { |
| border-color: #06b6d4 !important; |
| box-shadow: 0 0 10px rgba(6,182,212,0.2) !important; |
| } |
| .tab-nav { |
| border-bottom: 1px solid #1f2937 !important; |
| background-color: #0b0f19 !important; |
| padding: 8px 12px 0 12px !important; |
| } |
| .tab-nav button { |
| color: #6b7280 !important; font-weight: 700 !important; |
| font-size: 0.9rem !important; border: none !important; |
| background: transparent !important; padding: 10px 18px !important; |
| border-radius: 8px 8px 0 0 !important; |
| } |
| .tab-nav button.selected { |
| color: #06b6d4 !important; background-color: #111827 !important; |
| border: 1px solid #1f2937 !important; |
| border-bottom: 1px solid #111827 !important; |
| } |
| .cyber-btn { |
| background: linear-gradient(135deg,#111827 0%,#1f2937 100%) !important; |
| border: 1px dashed #06b6d4 !important; color: #06b6d4 !important; |
| font-weight: 800 !important; letter-spacing: 0.04em; |
| text-transform: uppercase; |
| border-radius: 12px !important; |
| font-family: 'Outfit',sans-serif !important; |
| } |
| .cyber-btn:hover { |
| background: linear-gradient(135deg,#06b6d4 0%,#3b82f6 100%) !important; |
| color: #ffffff !important; border-style: solid !important; |
| box-shadow: 0 0 20px rgba(6,182,212,0.5) !important; |
| } |
| .verdict-dashboard { |
| display: flex; align-items: center; |
| background: linear-gradient(135deg,#030712 0%,#111827 100%); |
| border: 1px solid #1f2937; border-radius: 16px; |
| padding: 24px; gap: 24px; margin-bottom: 25px; |
| } |
| @media(max-width:768px){.verdict-dashboard{flex-direction:column;text-align:center;}} |
| .verdict-score-wrapper{flex-shrink:0;} |
| .verdict-circle-progress { |
| width:140px; height:140px; border-radius:50%; |
| display:flex; align-items:center; justify-content:center; |
| box-shadow:0 8px 16px rgba(0,0,0,0.4); |
| } |
| .verdict-circle-inner { |
| width:114px; height:114px; background-color:#0b0f19; border-radius:50%; |
| display:flex; flex-direction:column; align-items:center; justify-content:center; |
| } |
| .verdict-score{font-size:2.1rem;font-weight:900;color:#fff;line-height:1;font-family:'Outfit',sans-serif !important;} |
| .verdict-score-label{font-size:0.65rem;color:#6b7280;text-transform:uppercase;letter-spacing:0.12em;margin-top:4px;} |
| .verdict-details{flex-grow:1;} |
| .verdict-title{ |
| font-size:1.7rem;font-weight:900;letter-spacing:-0.02em; |
| margin-bottom:6px;text-transform:uppercase;font-family:'Outfit',sans-serif !important; |
| } |
| .verdict-desc{font-size:0.95rem;color:#9ca3af;line-height:1.5;margin-bottom:16px;} |
| .verdict-metrics{ |
| display:flex;gap:20px;flex-wrap:wrap; |
| border-top:1px solid #1f2937;padding-top:14px; |
| } |
| .metric-item{display:flex;flex-direction:column;} |
| .metric-val{font-size:0.9rem;font-weight:700;color:#f3f4f6;} |
| .metric-lbl{font-size:0.65rem;color:#4b5563;text-transform:uppercase;letter-spacing:0.05em;} |
| .verdict-genuine-title {color:#10b981;text-shadow:0 0 20px rgba(16,185,129,0.35);} |
| .verdict-sensationalized-title{color:#f59e0b;text-shadow:0 0 20px rgba(245,158,11,0.35);} |
| .verdict-unverified-title {color:#fb923c;text-shadow:0 0 20px rgba(251,146,60,0.35);} |
| .verdict-fabricated-title {color:#ef4444;text-shadow:0 0 20px rgba(239,68,68,0.35);} |
| .source-card{ |
| background-color:#030712;border:1px solid #1f2937; |
| border-radius:12px;padding:16px;margin-bottom:14px; |
| } |
| .source-card.source-credible{border-left:4px solid #10b981;} |
| .source-card.source-debunk{border-left:4px solid #ef4444;} |
| .source-header{ |
| display:flex;justify-content:space-between;align-items:center; |
| margin-bottom:10px;gap:12px;flex-wrap:wrap; |
| } |
| .source-idx{ |
| font-size:0.75rem;font-weight:800;background-color:#111827; |
| color:#9ca3af;padding:2px 7px;border-radius:4px;white-space:nowrap; |
| } |
| .source-header h4{margin:0;font-size:0.95rem;font-weight:700;color:#fff;flex-grow:1;line-height:1.35;} |
| .badge{font-size:0.65rem;font-weight:800;padding:3px 9px;border-radius:20px;text-transform:uppercase;white-space:nowrap;} |
| .source-body{font-size:0.85rem;color:#9ca3af;line-height:1.45;margin:0 0 12px 0;font-style:italic;} |
| .source-footer{display:flex;justify-content:flex-end;} |
| .source-link{font-size:0.75rem;color:#06b6d4;text-decoration:none;font-weight:700;} |
| .source-link:hover{color:#3b82f6;text-decoration:underline;} |
| .intel-card{background-color:#030712;border:1px solid #1f2937;border-radius:12px;padding:20px;} |
| .intel-item{margin-bottom:18px;} |
| .intel-item:last-child{margin-bottom:0;} |
| .intel-item h5{ |
| margin:0 0 6px 0;font-size:0.8rem;color:#4b5563; |
| text-transform:uppercase;letter-spacing:0.06em; |
| } |
| .intel-val{font-size:1.05rem;font-weight:600;color:#fff;} |
| .verdict-banner-true{ |
| text-align:center; |
| background:linear-gradient(135deg,rgba(16,185,129,0.08) 0%,rgba(16,185,129,0.18) 100%); |
| border:2px solid #10b981;color:#10b981;font-size:1.3rem;font-weight:800; |
| padding:10px 16px;border-radius:8px;margin-bottom:15px;text-transform:uppercase; |
| font-family:'Outfit',sans-serif !important; |
| } |
| .verdict-banner-false{ |
| text-align:center; |
| background:linear-gradient(135deg,rgba(239,68,68,0.08) 0%,rgba(239,68,68,0.18) 100%); |
| border:2px solid #ef4444;color:#ef4444;font-size:1.3rem;font-weight:800; |
| padding:10px 16px;border-radius:8px;margin-bottom:15px;text-transform:uppercase; |
| font-family:'Outfit',sans-serif !important; |
| } |
| """ |
|
|
| |
| |
| |
| with gr.Blocks(css=css, theme=gr.themes.Base(), title="True Fact Checker & Fake News Detector") as app: |
|
|
| tab_state = gr.State(value="image") |
|
|
| gr.HTML("<div class='cyber-title'>βοΈ TRUE FACT CHECKER</div>") |
| gr.HTML("<div class='cyber-subtitle'>Web-First Verification Β· BERT Neural Analysis Β· Live Fact-Check Consensus</div>") |
|
|
| with gr.Row(): |
| with gr.Column(scale=1): |
| with gr.Tabs() as input_tabs: |
| with gr.Tab("πΈ Screenshot Scanner", id=0) as tab_img: |
| gr.HTML("<div style='margin-bottom:10px;color:#9ca3af;font-size:0.9rem;'>" |
| "Upload an article screenshot, social media post, or newspaper clipping.</div>") |
| img_input = gr.Image( |
| type="numpy", sources=["upload","clipboard"], |
| label="Drag screenshot here or paste from clipboard", height=240 |
| ) |
| ocr_scan_btn = gr.Button("π Extract Screenshot Text", variant="secondary", elem_classes="cyber-btn") |
| ocr_output_box = gr.Textbox( |
| label="OCR Extracted Text (Review/Edit before verifying)", |
| lines=5, |
| placeholder="OCR text appears here. Edit for accuracy if needed...", |
| interactive=True |
| ) |
| ocr_scan_btn.click(handle_ocr_scan, inputs=img_input, outputs=ocr_output_box) |
|
|
| with gr.Tab("π Direct Text / Headline", id=1) as tab_txt: |
| gr.HTML("<div style='margin-bottom:10px;color:#9ca3af;font-size:0.9rem;'>" |
| "Paste a rumor headline, WhatsApp forward, news claim, or full article text.</div>") |
| text_input = gr.Textbox( |
| lines=8, |
| placeholder="e.g. 'Breaking: Scientists discover miracle cure but government is hiding it...'", |
| label="Paste news text or headline here" |
| ) |
|
|
| tab_img.select(lambda: "image", outputs=tab_state) |
| tab_txt.select(lambda: "text", outputs=tab_state) |
|
|
| verify_btn = gr.Button( |
| "β‘ Analyze & Verify Fact Consensus", |
| variant="primary", elem_classes="cyber-btn", size="lg" |
| ) |
|
|
| with gr.Column(scale=1): |
| placeholder_card = gr.HTML( |
| """<div class='intel-card' style='text-align:center;padding:40px 20px;border-style:dashed;'> |
| <div style='font-size:3rem;color:#1f2937;margin-bottom:15px;'>π‘</div> |
| <h4 style='color:#6b7280;font-family:Outfit,sans-serif;font-size:1.15rem;margin-bottom:8px;'>Telemetry Idle</h4> |
| <p style='color:#4b5563;font-size:0.9rem;max-width:320px;margin:0 auto;'> |
| Upload an image or paste a news claim, then click Analyze to begin verification.</p> |
| </div>""", |
| visible=True |
| ) |
|
|
| verdict_panel = gr.Column(visible=False) |
| with verdict_panel: |
| banner_output = gr.HTML() |
| with gr.Accordion("π View Detailed Confidence & Telemetry", open=False) as confidence_accordion: |
| verdict_html_output = gr.HTML() |
| with gr.Tabs(): |
| with gr.Tab("π Live Web Sources"): |
| sources_markdown_output = gr.HTML() |
| with gr.Tab("π§ AI Intel"): |
| ai_report_html_output = gr.HTML() |
|
|
| verify_btn.click( |
| fn=process_and_verdict, |
| inputs=[text_input, ocr_output_box, img_input, tab_state], |
| outputs=[ |
| verdict_panel, banner_output, verdict_html_output, |
| ai_report_html_output, sources_markdown_output, |
| ocr_output_box, confidence_accordion |
| ] |
| ) |
| verify_btn.click(fn=lambda: gr.update(visible=False), inputs=None, outputs=placeholder_card) |
|
|
| if __name__ == "__main__": |
| app.launch(share=True) |