import gradio as gr
from transformers import pipeline
from PIL import Image, ImageEnhance, ImageOps
from duckduckgo_search import DDGS
import pytesseract
import numpy as np
import re
import os
import traceback
import time
# ==========================================
# 1. Load AI Models
# ==========================================
classifier = None
classifier_error = None
print("Loading BERT Fake News Classifier...")
try:
# This model: LABEL_0 = Real, LABEL_1 = Fake
classifier = pipeline(
"text-classification",
model="jy46604790/Fake-News-Bert-Detect",
device=-1,
truncation=True,
max_length=512
)
print("BERT Classifier loaded.")
except Exception as e:
classifier_error = str(e)
print(f"BERT load failed: {e}")
# ==========================================
# 2. Improved Tone / Heuristic Analyzer
# ==========================================
def analyze_tone_fallback(text):
"""
Heuristic analyzer โ only fires on strong fake-specific signals.
Returns (risk_score 0-100, label, engine_name)
Higher score = more likely fake/sensational.
"""
score = 0
reasons = []
words = text.split()
text_lower = text.lower()
if not words:
return 0, "๐ข Neutral / Standard", "Heuristic Engine"
# ALL-CAPS overuse (clickbait)
caps_words = [w for w in words if w.isupper() and len(w) > 3]
caps_ratio = len(caps_words) / max(len(words), 1)
if caps_ratio > 0.20:
score += 18
reasons.append("Excessive ALL-CAPS")
# Sensational punctuation
if text.count("!") >= 3:
score += 12
reasons.append("Multiple exclamation marks")
if re.search(r'\?\!|\!\?', text):
score += 8
reasons.append("Sensationalist punctuation (?! or !?)")
# Hard fake-specific phrases (almost never in real journalism)
FAKE_PHRASES = [
"you won't believe", "mind-blowing", "miracle cure",
"they don't want you to know", "secret they're hiding",
"doctors hate", "one weird trick", "illuminati", "plandemic",
"scamdemic", "sheeple", "share before deleted", "banned video",
"censored truth", "99% won't share", "forwarded as received",
"going viral now", "wake up people", "deep state agenda",
"government is hiding", "they are hiding", "what they don't tell you",
]
hits = [p for p in FAKE_PHRASES if p in text_lower]
if hits:
score += min(len(hits) * 18, 45)
reasons.append(f"Fake-specific phrases: {', '.join(hits[:3])}")
# Rumor forwarding language
RUMOR_PATTERNS = [
r'forwarded as received',
r'circulating on (whatsapp|telegram|social media)',
r'cannot be independently verified',
r'unverified (claim|source|report)',
r'we cannot confirm',
r'rumou?r (has it|is spreading|is going around)',
]
rumor_hits = sum(1 for p in RUMOR_PATTERNS if re.search(p, text_lower))
if rumor_hits:
score += rumor_hits * 15
reasons.append(f"Rumor-forwarding language ({rumor_hits} pattern(s))")
# Anonymous + strong claim combo
ANON_PATTERNS = [
r'anonymous source(s)? (claim|say|report)',
r'insider (reveals|claims|exposes)',
r'unnamed official (says|claims)',
r'unidentified group (claimed|said|announced)',
]
for p in ANON_PATTERNS:
if re.search(p, text_lower):
score += 12
reasons.append("Anonymous source making strong claim")
break
risk_score = min(score, 100)
if risk_score >= 40:
label = "๐ด High Risk: Sensationalized / Clickbait Style"
elif risk_score >= 20:
label = "๐ก Medium Risk: Slightly Sensational"
else:
label = "๐ข Low Risk: Standard Journalistic Style"
reason_str = "; ".join(reasons) if reasons else "No significant fake signals"
return risk_score, label, f"Heuristic Engine ({reason_str})"
# ==========================================
# 3. BERT Classification (corrected label mapping)
# ==========================================
def classify_with_bert(text):
"""
Returns (fake_probability 0-100, label, engine)
For jy46604790/Fake-News-Bert-Detect:
LABEL_0 = Real news
LABEL_1 = Fake news
"""
if classifier is None:
return analyze_tone_fallback(text)
try:
result = classifier(text, truncation=True, max_length=512)[0]
label_raw = result["label"] # "LABEL_0" or "LABEL_1"
score = result["score"] # confidence in that label
if label_raw == "LABEL_1":
# Model thinks it's fake, with `score` confidence
fake_prob = round(score * 100, 1)
label = "๐ด High Risk: Model flagged as Fake"
else:
# Model thinks it's real
fake_prob = round((1 - score) * 100, 1) # low fake probability
label = "๐ข Low Risk: Model classifies as Real"
return fake_prob, label, f"BERT Deep Neural Classifier ({score*100:.1f}% model confidence)"
except Exception as e:
print(f"BERT inference failed: {e}")
return analyze_tone_fallback(text)
# ==========================================
# 4. Outlet Spoof Detector
# ==========================================
def detect_spoofed_source_label(raw_text):
DECEPTIVE_PATTERNS = [
r'(as reported|according|source|sourced|published|confirmed|breaking news from|exclusive from|via)\s*:?\s*(by\s+)?(reuters|bbc|ndtv|cnn|apnews|ap news|the hindu|indian express|bloomberg|aljazeera|times of india|washington post|new york times|the guardian|firstpost|thewire|snopes)',
r'(reuters|bbc|ndtv|cnn|apnews)\s*(exclusive|breaking|confirms|confirmed|reports|reported)\b',
r'\b(reuters|bbc|ndtv|cnn)\b.{0,15}(verif|confirm|report)',
]
CREDIBLE_OUTLETS = [
"reuters","bbc","ndtv","cnn","apnews","ap news","the hindu",
"indian express","bloomberg","aljazeera","times of india",
"washington post","new york times","the guardian","firstpost",
"thewire","snopes","politifact","factcheck","boomlive","altnews"
]
text_lower = raw_text.lower()
urls_in_text = re.findall(r'https?://\S+', raw_text)
for pattern in DECEPTIVE_PATTERNS:
match = re.search(pattern, text_lower)
if match:
matched_text = match.group(0)
outlet_found = next((o for o in CREDIBLE_OUTLETS if o in matched_text), "")
if not outlet_found:
continue
outlet_key = outlet_found.lower().replace(" ", "")
real_url_found = any(outlet_key in u.lower() for u in urls_in_text)
if not real_url_found:
return True, outlet_found.title()
return False, ""
# ==========================================
# 5. URL Verification
# ==========================================
TRUSTED_DOMAINS = [
"livelaw.in","barandbench.com","reuters.com","apnews.com",
"bbc.com","bbc.co.uk","nytimes.com","washingtonpost.com",
"theguardian.com","indianexpress.com","thehindu.com",
"ndtv.com","bloomberg.com","economist.com","snopes.com",
"politifact.com","factcheck.org","altnews.in","boomlive.in",
"independent.co.uk","cnn.com","aljazeera.com",
"timesofindia.indiatimes.com","thewire.in","firstpost.com",
"pib.gov.in","mea.gov.in","mohfw.gov.in","hindustantimes.com",
"scroll.in","theprint.in","news18.com","zeenews.india.com",
"wionews.com","businesstoday.in","livemint.com","moneycontrol.com"
]
def is_trusted_domain(url):
try:
from urllib.parse import urlparse
domain = urlparse(url).netloc.lower().replace("www.", "")
for trusted in TRUSTED_DOMAINS:
if domain == trusted or domain.endswith("." + trusted):
return True, domain
return False, domain
except Exception:
return False, ""
def fetch_url_title(url):
try:
import requests
r = requests.get(url, timeout=5, headers={
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36'
})
if r.status_code == 200:
m = re.search(r'
(.*?)', r.text, re.IGNORECASE | re.DOTALL)
if m:
title = m.group(1).strip()
for ent, rep in [('&','&'),('"','"'),(''',"'"),('<','<'),('>','>')]:
title = title.replace(ent, rep)
return title
except Exception as e:
print(f"URL title fetch error: {e}")
return None
# ==========================================
# 6. OCR
# ==========================================
def normalize_english(text):
if not text:
return ""
text = re.sub(r'http\S+|www\.\S+|\S+@\S+', ' ', text)
text = re.sub(r'[^a-zA-Z0-9\s.,!?\'"\-]', ' ', text)
text = re.sub(r'\s+', ' ', text)
return text.strip()
def process_image_to_text(image):
if image is None:
return ""
try:
if not isinstance(image, Image.Image):
image = Image.fromarray(np.uint8(image))
image = ImageOps.grayscale(image)
image = ImageEnhance.Contrast(image).enhance(2.5)
text = pytesseract.image_to_string(image, config=r'--oem 3 --psm 3')
return normalize_english(text)
except Exception as e:
traceback.print_exc()
return f"OCR Extraction Failed: {str(e)}"
# ==========================================
# 7. Web Fact-Check Engine (IMPROVED)
# ==========================================
# Outlets that carry real debunking/fact-check content โ their mention of
# "fake" or "false" is EVIDENCE of real journalism, not a fake signal.
FACT_CHECK_OUTLETS = [
"snopes.com","politifact.com","factcheck.org","altnews.in",
"boomlive.in","thequint.com","vishvasnews.com","logically.ai",
"reuters.com/fact-check","apnews.com/hub/ap-fact-check",
"bbc.com/news/reality_check","thehindu.com/specials/fact-check"
]
# Debunk signals โ only count when the SOURCE is not a fact-checker reporting on fakes
DEBUNK_KEYWORDS = [
"fake news","hoax","debunked","fabricated","false claim","misleading",
"misinformation","no such attack","no evidence","fact-check finds",
"did not happen","not verified","unverified claim","rumour spreading",
"rumor spreading","manipulated video","out of context","satire",
"this is false","this claim is false","claim is misleading",
"viral claim","incorrect claim"
]
# Signals that confirm REAL reporting
REAL_NEWS_SIGNALS = [
"police said","official said","government said","confirmed by",
"spokesperson said","press conference","fir registered","arrested",
"minister said","court order","official statement","pib confirmed",
"reported by","according to officials","sources confirmed",
"eyewitnesses said","investigation reveals","charged with",
"published report","breaking news confirmed","official release"
]
def build_search_query(text):
"""Build a focused, short search query from article text."""
# Remove URLs and special chars
clean = re.sub(r'https?://\S+', '', text)
clean = re.sub(r'[^\w\s]', ' ', clean)
STOP = {
"the","and","but","or","for","with","about","against","from","into",
"through","during","before","after","above","below","under","over",
"again","further","then","once","here","there","when","where","why",
"how","all","any","both","each","few","more","most","other","some",
"such","than","too","very","can","will","just","should","would",
"these","those","this","that","in","on","at","to","of","by","an","a",
"is","are","was","were","be","has","have","had","says","said","its",
"it","as","up","do","he","she","they","we","our","their","his","her"
}
words = [w for w in clean.split() if w.lower() not in STOP and len(w) > 2]
# Use first 7 meaningful words for a tight query
return " ".join(words[:7])
def ddg_search(query, max_results=6, retries=2):
"""Search DuckDuckGo with retry logic."""
for attempt in range(retries):
try:
with DDGS() as ddgs:
results = list(ddgs.text(query, max_results=max_results))
if results:
return results
except Exception as e:
print(f"DDG attempt {attempt+1} failed: {e}")
time.sleep(1)
return []
def verify_facts_online(text):
"""
Multi-pass web fact-checking.
Returns: (results_list, html_markdown, hits_count, debunk_score, real_corroboration_count)
KEY LOGIC:
- If found on trusted news site โ strong REAL signal
- If found on fact-check site labeling it FALSE โ FAKE signal
- If found on general news with real reporting signals โ REAL signal
- If found with debunk keywords from non-fact-check sources โ moderate FAKE signal
- If nothing found โ UNCERTAIN
"""
query = build_search_query(text)
if len(query.split()) < 2:
no_result_html = _card("UNVERIFIED", "Headline Too Short",
"The text is too short to run a meaningful web search.", "debunk", "#fb923c")
return [], no_result_html, 0, 0, 0
print(f"[Web Search] Query: '{query}'")
# Pass 1: full query
results = ddg_search(query, max_results=8)
# Pass 2: shorter query fallback
if not results:
short_q = " ".join(query.split()[:4])
print(f"[Web Search] Fallback query: '{short_q}'")
results = ddg_search(short_q, max_results=5)
if not results:
no_result_html = _card("UNVERIFIED", "No Online Matches Found",
"No mainstream news outlets or fact-check databases are reporting this claim. "
"This may be a fabricated rumor, hyper-local event, or newly generated hoax. "
"Treat with caution until a direct source is found.",
"debunk", "#fb923c")
return [], no_result_html, 0, 0, 0
markdown_out = ""
debunk_score = 0
real_corroboration = 0
trusted_source_found = 0
for idx, res in enumerate(results):
title = res.get('title', 'Source')
body = res.get('body', '')
link = res.get('href', '#')
combined = (title + " " + body).lower()
link_lower = link.lower()
# Is this result from a FACT-CHECK site?
is_fact_check_site = any(fc in link_lower for fc in FACT_CHECK_OUTLETS)
# Is this from a trusted news domain?
is_trusted_news = any(td in link_lower for td in TRUSTED_DOMAINS)
debunk_hits = sum(1 for kw in DEBUNK_KEYWORDS if kw in combined)
real_hits = sum(1 for sig in REAL_NEWS_SIGNALS if sig in combined)
if is_fact_check_site:
if debunk_hits > 0:
# Fact-checker found it FALSE
debunk_score += 2
status = _badge("๐จ FACT-CHECKER: FALSE", "danger")
card_class = "source-debunk"
trusted_source_found += 1
else:
# Fact-checker article exists but doesn't debunk it
real_corroboration += 1
status = _badge("โ FACT-CHECK CORROBORATED", "success")
card_class = "source-credible"
trusted_source_found += 1
elif is_trusted_news:
if debunk_hits >= 2 and real_hits == 0:
# Trusted news reporting it AS fake news
debunk_score += 1
status = _badge("โ ๏ธ REPORTED AS MISINFORMATION", "warning")
card_class = "source-debunk"
else:
# Trusted news covering the story normally
real_corroboration += 1
trusted_source_found += 1
status = _badge("โ TRUSTED SOURCE", "success")
card_class = "source-credible"
else:
# General web result
if debunk_hits >= 2 and real_hits == 0:
debunk_score += 1
status = _badge("๐จ DEBUNK SIGNALS", "danger")
card_class = "source-debunk"
elif real_hits > 0:
real_corroboration += 1
status = _badge("โ NEWS CORROBORATION", "success")
card_class = "source-credible"
else:
status = _badge("โ RELATED RESULT", "neutral")
card_class = "source-credible"
# Truncate body for display
display_body = body[:280] + ("..." if len(body) > 280 else "")
markdown_out += (
f""
f""
f"
\"{display_body}\"
"
f"
\n"
)
return results, markdown_out, len(results), debunk_score, real_corroboration
def _badge(text, kind):
colors = {
"success": ("rgba(16,185,129,0.1)", "#10b981", "rgba(16,185,129,0.15)"),
"danger": ("rgba(239,68,68,0.1)", "#ef4444", "rgba(239,68,68,0.15)"),
"warning": ("rgba(245,158,11,0.1)", "#f59e0b", "rgba(245,158,11,0.15)"),
"neutral": ("rgba(100,116,139,0.1)","#64748b", "rgba(100,116,139,0.15)"),
}
bg, color, border = colors.get(kind, colors["neutral"])
return (f"{text}")
def _card(idx_label, title, body_text, cls, color):
return (
f""
)
# ==========================================
# 8. MAIN VERDICT ENGINE (Redesigned)
# ==========================================
def compute_verdict(is_url_verified, verified_domain_name,
hits_count, debunk_score, real_corroboration,
bert_fake_prob, is_label_spoofed, is_url_verified_flag):
"""
DECISION HIERARCHY:
1. Direct trusted URL present โ high reliability (90-97%)
2. Fact-checker debunked (debunk_score โฅ 2) โ very low reliability (5-20%)
3. Strong debunk signals (debunk_score == 1) โ low reliability (25-38%)
4. Real corroboration found (trusted sources) โ high reliability (70-92%)
5. Some web results but no clear signal โ moderate (45-65%)
6. No web results โ fall back to BERT model only
"""
spoof_penalty = 12 if is_label_spoofed and not is_url_verified_flag else 0
# โโ Case 1: Direct trusted URL โโ
if is_url_verified:
if bert_fake_prob < 50:
return 96, "๐ VERIFIED GENUINE", "verdict-genuine-title", "#10b981", (
f"Directly linked to trusted domain {verified_domain_name} "
f"and written in an objective style. High confidence this is genuine."
)
else:
return 78, "๐ SENSATIONALIZED โ CORE FACTS REAL", "verdict-sensationalized-title", "#f59e0b", (
f"Verified via {verified_domain_name} but writing style is sensational. "
f"Core facts are likely authentic; specific details may be exaggerated."
)
# โโ Case 2: Fact-checker or multiple debunk signals โโ
if debunk_score >= 3:
rel = max(6 - spoof_penalty, 3)
return rel, "๐จ FABRICATED / DEBUNKED", "verdict-fabricated-title", "#ef4444", (
"Multiple fact-checkers and credible sources have debunked this claim. "
"This is almost certainly misinformation. Do NOT share."
)
if debunk_score == 2:
rel = max(15 - spoof_penalty, 5)
return rel, "๐จ LIKELY FAKE / DEBUNKED", "verdict-fabricated-title", "#ef4444", (
"Two or more credible sources flag this as false or misleading. "
"Strong evidence this is misinformation."
)
if debunk_score == 1:
rel = max(32 - spoof_penalty, 10)
return rel, "โ ๏ธ SUSPICIOUS CLAIM", "verdict-fabricated-title", "#ef4444", (
"At least one credible source contradicts or flags this claim. "
"Treat with strong skepticism and verify from primary sources."
)
# โโ Case 3: Real corroboration found โโ
if real_corroboration >= 3:
rel = min(88 + (real_corroboration * 1) - spoof_penalty, 95)
return rel, "๐ VERIFIED GENUINE", "verdict-genuine-title", "#10b981", (
f"Found in {real_corroboration} credible/trusted sources with no debunking signals. "
f"High confidence this is genuine news."
)
if real_corroboration == 2:
rel = min(80 - spoof_penalty, 88)
if bert_fake_prob > 60:
return 68, "๐ LIKELY REAL โ VERIFY DETAILS", "verdict-sensationalized-title", "#f59e0b", (
"Found in 2 credible sources, but writing style raises some flags. "
"Core story appears real; verify specific claims independently."
)
return rel, "๐ VERIFIED GENUINE", "verdict-genuine-title", "#10b981", (
"Found in 2 credible sources with objective writing style. "
"High confidence this is genuine."
)
if real_corroboration == 1:
if bert_fake_prob < 40:
return max(68 - spoof_penalty, 50), "๐ LIKELY REAL โ NEEDS MORE SOURCES", "verdict-sensationalized-title", "#f59e0b", (
"Found in one credible source with objective writing. "
"Likely genuine but seek additional confirmation."
)
else:
return max(50 - spoof_penalty, 30), "โ ๏ธ UNVERIFIED โ MIXED SIGNALS", "verdict-unverified-title", "#fb923c", (
"Only one corroborating source found and writing style is questionable. "
"Exercise caution and verify from a primary source."
)
# โโ Case 4: Web results exist but no clear real/fake signal โโ
if hits_count > 0:
if bert_fake_prob < 35:
return max(58 - spoof_penalty, 40), "โ ๏ธ UNVERIFIED โ PROBABLY REAL", "verdict-unverified-title", "#fb923c", (
"Some web results found but from non-trusted domains. "
"Writing style appears objective. Likely real but needs a primary source link."
)
else:
return max(38 - spoof_penalty, 20), "โ ๏ธ UNVERIFIED โ SUSPICIOUS", "verdict-unverified-title", "#fb923c", (
"Some web results found but content is not clearly corroborated by trusted outlets. "
"Sensational writing style detected. Verify before sharing."
)
# โโ Case 5: No web results โ rely on BERT โโ
if bert_fake_prob >= 65:
return max(22 - spoof_penalty, 8), "โ ๏ธ UNVERIFIED โ HIGH FAKE RISK", "verdict-unverified-title", "#fb923c", (
"No online corroboration found AND the AI model flags this as likely fake. "
"This is possibly a fabricated or circulating rumor. "
"
Tip: Paste the direct article URL if you have one."
)
elif bert_fake_prob >= 40:
return max(38 - spoof_penalty, 20), "โ ๏ธ UNVERIFIED โ UNCERTAIN", "verdict-unverified-title", "#fb923c", (
"No online corroboration found. Could be a very recent, hyper-local, or fabricated story. "
"Seek a direct source before sharing. "
"
Tip: Paste the direct article URL if you have one."
)
else:
return max(52 - spoof_penalty, 35), "โ ๏ธ UNVERIFIED โ POSSIBLY REAL", "verdict-unverified-title", "#fb923c", (
"No mainstream coverage found, but writing style appears legitimate. "
"Could be a hyper-local or very recent story. "
"Seek a primary source before sharing. "
"
Tip: Paste the direct article URL if you have one."
)
# ==========================================
# 9. Master Process Function
# ==========================================
def process_and_verdict(text_input, ocr_output, img_input, source_tab):
raw_text = ""
updated_ocr = ocr_output
if source_tab == "image":
if not ocr_output or not ocr_output.strip():
if img_input is None:
return (
gr.update(visible=False),
"โ ๏ธ Input Error
"
"
Please upload an image or extract OCR text first.
",
"", "", ocr_output, gr.update(open=False)
)
raw_text = process_image_to_text(img_input)
updated_ocr = raw_text
else:
raw_text = ocr_output
else:
raw_text = text_input
# โโ Extract URLs โโ
urls = re.findall(r'(https?://\S+)', raw_text)
is_url_verified = False
verified_domain_name = ""
verified_url_card = ""
if urls:
for url in urls:
is_trusted, domain = is_trusted_domain(url)
if is_trusted:
is_url_verified = True
verified_domain_name = domain
page_title = fetch_url_title(url) or f"Verified Article on {domain.title()}"
verified_url_card = (
f""
f""
f"
Direct link verified from trusted domain "
f"{domain}.
"
f"
"
)
break
# โโ Spoof detection โโ
is_label_spoofed, spoofed_outlet = detect_spoofed_source_label(raw_text)
spoofed_warning_card = ""
if is_label_spoofed and not is_url_verified:
spoofed_warning_card = (
f""
f""
f"
Content claims to be from {spoofed_outlet} "
f"but no verified URL from that outlet was found. "
f"This is a common credibility manipulation tactic.
"
f"
"
)
# โโ Clean text for analysis โโ
cleaned_text = normalize_english(raw_text)
if not cleaned_text or len(cleaned_text) < 10:
return (
gr.update(visible=False),
"โ ๏ธ Too Short
"
"
Please provide a full sentence or news headline (min 10 characters).
",
"", "", updated_ocr, gr.update(open=False)
)
# โโ BERT Classification โโ
bert_fake_prob, bert_label, bert_engine = classify_with_bert(cleaned_text)
# โโ Web Fact-Check โโ
search_results, web_markdown, hits_count, debunk_score, real_corroboration = verify_facts_online(cleaned_text)
# Prepend cards
if spoofed_warning_card:
web_markdown = spoofed_warning_card + "\n" + web_markdown
if is_url_verified:
web_markdown = verified_url_card + "\n" + web_markdown
# โโ Compute final verdict โโ
reliability, verdict_title, verdict_class, verdict_color, verdict_desc = compute_verdict(
is_url_verified, verified_domain_name,
hits_count, debunk_score, real_corroboration,
bert_fake_prob, is_label_spoofed, is_url_verified
)
# โโ Build Verdict HTML โโ
spoof_metric = ""
if is_label_spoofed and not is_url_verified:
spoof_metric = (""
"โ ๏ธ CLAIMED"
"OUTLET UNVERIFIED
")
verdict_html = f"""
{reliability}%
RELIABILITY
{verdict_title}
{verdict_desc}
{bert_label.split(":")[0]}
AI CLASSIFICATION
{real_corroboration} trusted
REAL SOURCES FOUND
{debunk_score} flag{"s" if debunk_score!=1 else ""}
DEBUNK SIGNALS
{hits_count} total
WEB RESULTS
{spoof_metric}
"""
ai_report_html = f"""
Classifier Engine
{bert_engine}
AI Classification
{bert_label}
Fake Probability (AI)
{bert_fake_prob}%
Web Corroboration
{real_corroboration} trusted source(s) |
{debunk_score} debunk signal{"s" if debunk_score!=1 else ""}
{"
โ ๏ธ Outlet Claim
\"" + spoofed_outlet + "\" claimed without verified URL.
" if is_label_spoofed and not is_url_verified else ""}
Analyzed Text
{cleaned_text[:800]}{"..." if len(cleaned_text)>800 else ""}
"""
banner_html = (
"๐ข LIKELY TRUE NEWS
"
if reliability >= 60 else
"๐ด LIKELY FALSE / UNVERIFIED
"
)
return (
gr.update(visible=True),
banner_html,
verdict_html,
ai_report_html,
web_markdown,
updated_ocr,
gr.update(open=False)
)
def handle_ocr_scan(img):
if img is None:
return "โ ๏ธ Image not uploaded yet."
return process_image_to_text(img)
# ==========================================
# 10. Stylesheet
# ==========================================
css = """
@import url('https://fonts.googleapis.com/css2?family=Outfit:wght@300;400;600;800;900&family=Inter:wght@300;400;600;700&display=swap');
body { background-color: #0b0f19 !important; }
.gradio-container {
background-color: #0b0f19 !important;
font-family: 'Inter','Outfit',sans-serif !important;
color: #f1f5f9 !important;
max-width: 1200px !important;
margin: 0 auto !important;
padding: 20px !important;
}
.cyber-title {
text-align: center;
background: linear-gradient(135deg,#06b6d4 0%,#10b981 50%,#3b82f6 100%);
-webkit-background-clip: text;
-webkit-text-fill-color: transparent;
font-size: 3rem; font-weight: 900;
margin-top: 10px; margin-bottom: 2px;
letter-spacing: -0.04em;
font-family: 'Outfit',sans-serif !important;
}
.cyber-subtitle {
text-align: center; color: #94a3b8;
font-size: 1.15rem; margin-bottom: 35px;
font-weight: 400;
}
.block {
background-color: #111827 !important;
border: 1px solid #1f2937 !important;
border-radius: 16px !important;
box-shadow: 0 15px 35px -10px rgba(0,0,0,0.6) !important;
overflow: hidden !important;
}
textarea, input[type="text"] {
background-color: #030712 !important;
border: 1px solid #1f2937 !important;
color: #f1f5f9 !important;
font-size: 0.95rem !important;
border-radius: 8px !important;
}
textarea:focus, input[type="text"]:focus {
border-color: #06b6d4 !important;
box-shadow: 0 0 10px rgba(6,182,212,0.2) !important;
}
.tab-nav {
border-bottom: 1px solid #1f2937 !important;
background-color: #0b0f19 !important;
padding: 8px 12px 0 12px !important;
}
.tab-nav button {
color: #6b7280 !important; font-weight: 700 !important;
font-size: 0.9rem !important; border: none !important;
background: transparent !important; padding: 10px 18px !important;
border-radius: 8px 8px 0 0 !important;
}
.tab-nav button.selected {
color: #06b6d4 !important; background-color: #111827 !important;
border: 1px solid #1f2937 !important;
border-bottom: 1px solid #111827 !important;
}
.cyber-btn {
background: linear-gradient(135deg,#111827 0%,#1f2937 100%) !important;
border: 1px dashed #06b6d4 !important; color: #06b6d4 !important;
font-weight: 800 !important; letter-spacing: 0.04em;
text-transform: uppercase;
border-radius: 12px !important;
font-family: 'Outfit',sans-serif !important;
}
.cyber-btn:hover {
background: linear-gradient(135deg,#06b6d4 0%,#3b82f6 100%) !important;
color: #ffffff !important; border-style: solid !important;
box-shadow: 0 0 20px rgba(6,182,212,0.5) !important;
}
.verdict-dashboard {
display: flex; align-items: center;
background: linear-gradient(135deg,#030712 0%,#111827 100%);
border: 1px solid #1f2937; border-radius: 16px;
padding: 24px; gap: 24px; margin-bottom: 25px;
}
@media(max-width:768px){.verdict-dashboard{flex-direction:column;text-align:center;}}
.verdict-score-wrapper{flex-shrink:0;}
.verdict-circle-progress {
width:140px; height:140px; border-radius:50%;
display:flex; align-items:center; justify-content:center;
box-shadow:0 8px 16px rgba(0,0,0,0.4);
}
.verdict-circle-inner {
width:114px; height:114px; background-color:#0b0f19; border-radius:50%;
display:flex; flex-direction:column; align-items:center; justify-content:center;
}
.verdict-score{font-size:2.1rem;font-weight:900;color:#fff;line-height:1;font-family:'Outfit',sans-serif !important;}
.verdict-score-label{font-size:0.65rem;color:#6b7280;text-transform:uppercase;letter-spacing:0.12em;margin-top:4px;}
.verdict-details{flex-grow:1;}
.verdict-title{
font-size:1.7rem;font-weight:900;letter-spacing:-0.02em;
margin-bottom:6px;text-transform:uppercase;font-family:'Outfit',sans-serif !important;
}
.verdict-desc{font-size:0.95rem;color:#9ca3af;line-height:1.5;margin-bottom:16px;}
.verdict-metrics{
display:flex;gap:20px;flex-wrap:wrap;
border-top:1px solid #1f2937;padding-top:14px;
}
.metric-item{display:flex;flex-direction:column;}
.metric-val{font-size:0.9rem;font-weight:700;color:#f3f4f6;}
.metric-lbl{font-size:0.65rem;color:#4b5563;text-transform:uppercase;letter-spacing:0.05em;}
.verdict-genuine-title {color:#10b981;text-shadow:0 0 20px rgba(16,185,129,0.35);}
.verdict-sensationalized-title{color:#f59e0b;text-shadow:0 0 20px rgba(245,158,11,0.35);}
.verdict-unverified-title {color:#fb923c;text-shadow:0 0 20px rgba(251,146,60,0.35);}
.verdict-fabricated-title {color:#ef4444;text-shadow:0 0 20px rgba(239,68,68,0.35);}
.source-card{
background-color:#030712;border:1px solid #1f2937;
border-radius:12px;padding:16px;margin-bottom:14px;
}
.source-card.source-credible{border-left:4px solid #10b981;}
.source-card.source-debunk{border-left:4px solid #ef4444;}
.source-header{
display:flex;justify-content:space-between;align-items:center;
margin-bottom:10px;gap:12px;flex-wrap:wrap;
}
.source-idx{
font-size:0.75rem;font-weight:800;background-color:#111827;
color:#9ca3af;padding:2px 7px;border-radius:4px;white-space:nowrap;
}
.source-header h4{margin:0;font-size:0.95rem;font-weight:700;color:#fff;flex-grow:1;line-height:1.35;}
.badge{font-size:0.65rem;font-weight:800;padding:3px 9px;border-radius:20px;text-transform:uppercase;white-space:nowrap;}
.source-body{font-size:0.85rem;color:#9ca3af;line-height:1.45;margin:0 0 12px 0;font-style:italic;}
.source-footer{display:flex;justify-content:flex-end;}
.source-link{font-size:0.75rem;color:#06b6d4;text-decoration:none;font-weight:700;}
.source-link:hover{color:#3b82f6;text-decoration:underline;}
.intel-card{background-color:#030712;border:1px solid #1f2937;border-radius:12px;padding:20px;}
.intel-item{margin-bottom:18px;}
.intel-item:last-child{margin-bottom:0;}
.intel-item h5{
margin:0 0 6px 0;font-size:0.8rem;color:#4b5563;
text-transform:uppercase;letter-spacing:0.06em;
}
.intel-val{font-size:1.05rem;font-weight:600;color:#fff;}
.verdict-banner-true{
text-align:center;
background:linear-gradient(135deg,rgba(16,185,129,0.08) 0%,rgba(16,185,129,0.18) 100%);
border:2px solid #10b981;color:#10b981;font-size:1.3rem;font-weight:800;
padding:10px 16px;border-radius:8px;margin-bottom:15px;text-transform:uppercase;
font-family:'Outfit',sans-serif !important;
}
.verdict-banner-false{
text-align:center;
background:linear-gradient(135deg,rgba(239,68,68,0.08) 0%,rgba(239,68,68,0.18) 100%);
border:2px solid #ef4444;color:#ef4444;font-size:1.3rem;font-weight:800;
padding:10px 16px;border-radius:8px;margin-bottom:15px;text-transform:uppercase;
font-family:'Outfit',sans-serif !important;
}
"""
# ==========================================
# 11. Gradio App
# ==========================================
with gr.Blocks(css=css, theme=gr.themes.Base(), title="True Fact Checker & Fake News Detector") as app:
tab_state = gr.State(value="image")
gr.HTML("โ๏ธ TRUE FACT CHECKER
")
gr.HTML("Web-First Verification ยท BERT Neural Analysis ยท Live Fact-Check Consensus
")
with gr.Row():
with gr.Column(scale=1):
with gr.Tabs() as input_tabs:
with gr.Tab("๐ธ Screenshot Scanner", id=0) as tab_img:
gr.HTML(""
"Upload an article screenshot, social media post, or newspaper clipping.
")
img_input = gr.Image(
type="numpy", sources=["upload","clipboard"],
label="Drag screenshot here or paste from clipboard", height=240
)
ocr_scan_btn = gr.Button("๐ Extract Screenshot Text", variant="secondary", elem_classes="cyber-btn")
ocr_output_box = gr.Textbox(
label="OCR Extracted Text (Review/Edit before verifying)",
lines=5,
placeholder="OCR text appears here. Edit for accuracy if needed...",
interactive=True
)
ocr_scan_btn.click(handle_ocr_scan, inputs=img_input, outputs=ocr_output_box)
with gr.Tab("๐ Direct Text / Headline", id=1) as tab_txt:
gr.HTML(""
"Paste a rumor headline, WhatsApp forward, news claim, or full article text.
")
text_input = gr.Textbox(
lines=8,
placeholder="e.g. 'Breaking: Scientists discover miracle cure but government is hiding it...'",
label="Paste news text or headline here"
)
tab_img.select(lambda: "image", outputs=tab_state)
tab_txt.select(lambda: "text", outputs=tab_state)
verify_btn = gr.Button(
"โก Analyze & Verify Fact Consensus",
variant="primary", elem_classes="cyber-btn", size="lg"
)
with gr.Column(scale=1):
placeholder_card = gr.HTML(
"""
๐ก
Telemetry Idle
Upload an image or paste a news claim, then click Analyze to begin verification.
""",
visible=True
)
verdict_panel = gr.Column(visible=False)
with verdict_panel:
banner_output = gr.HTML()
with gr.Accordion("๐ View Detailed Confidence & Telemetry", open=False) as confidence_accordion:
verdict_html_output = gr.HTML()
with gr.Tabs():
with gr.Tab("๐ Live Web Sources"):
sources_markdown_output = gr.HTML()
with gr.Tab("๐ง AI Intel"):
ai_report_html_output = gr.HTML()
verify_btn.click(
fn=process_and_verdict,
inputs=[text_input, ocr_output_box, img_input, tab_state],
outputs=[
verdict_panel, banner_output, verdict_html_output,
ai_report_html_output, sources_markdown_output,
ocr_output_box, confidence_accordion
]
)
verify_btn.click(fn=lambda: gr.update(visible=False), inputs=None, outputs=placeholder_card)
if __name__ == "__main__":
app.launch(share=True)