Spaces:

jkg012
/

FNN

Sleeping

App Files Files Community

FNN / app.py

jkg012

Update app.py

fe536af verified 12 days ago

raw

history blame contribute delete

43.5 kB

	import gradio as gr
	from transformers import pipeline
	from PIL import Image, ImageEnhance, ImageOps
	from duckduckgo_search import DDGS
	import pytesseract
	import numpy as np
	import re
	import os
	import traceback
	import time

	# ==========================================
	# 1. Load AI Models
	# ==========================================
	classifier = None
	classifier_error = None

	print("Loading BERT Fake News Classifier...")
	try:
	# This model: LABEL_0 = Real, LABEL_1 = Fake
	classifier = pipeline(
	"text-classification",
	model="jy46604790/Fake-News-Bert-Detect",
	device=-1,
	truncation=True,
	max_length=512
	)
	print("BERT Classifier loaded.")
	except Exception as e:
	classifier_error = str(e)
	print(f"BERT load failed: {e}")


	# ==========================================
	# 2. Improved Tone / Heuristic Analyzer
	# ==========================================
	def analyze_tone_fallback(text):
	"""
	Heuristic analyzer — only fires on strong fake-specific signals.
	Returns (risk_score 0-100, label, engine_name)
	Higher score = more likely fake/sensational.
	"""
	score = 0
	reasons = []
	words = text.split()
	text_lower = text.lower()

	if not words:
	return 0, "🟢 Neutral / Standard", "Heuristic Engine"

	# ALL-CAPS overuse (clickbait)
	caps_words = [w for w in words if w.isupper() and len(w) > 3]
	caps_ratio = len(caps_words) / max(len(words), 1)
	if caps_ratio > 0.20:
	score += 18
	reasons.append("Excessive ALL-CAPS")

	# Sensational punctuation
	if text.count("!") >= 3:
	score += 12
	reasons.append("Multiple exclamation marks")
	if re.search(r'\?\!\|\!\?', text):
	score += 8
	reasons.append("Sensationalist punctuation (?! or !?)")

	# Hard fake-specific phrases (almost never in real journalism)
	FAKE_PHRASES = [
	"you won't believe", "mind-blowing", "miracle cure",
	"they don't want you to know", "secret they're hiding",
	"doctors hate", "one weird trick", "illuminati", "plandemic",
	"scamdemic", "sheeple", "share before deleted", "banned video",
	"censored truth", "99% won't share", "forwarded as received",
	"going viral now", "wake up people", "deep state agenda",
	"government is hiding", "they are hiding", "what they don't tell you",
	]
	hits = [p for p in FAKE_PHRASES if p in text_lower]
	if hits:
	score += min(len(hits) * 18, 45)
	reasons.append(f"Fake-specific phrases: {', '.join(hits[:3])}")

	# Rumor forwarding language
	RUMOR_PATTERNS = [
	r'forwarded as received',
	r'circulating on (whatsapp\|telegram\|social media)',
	r'cannot be independently verified',
	r'unverified (claim\|source\|report)',
	r'we cannot confirm',
	r'rumou?r (has it\|is spreading\|is going around)',
	]
	rumor_hits = sum(1 for p in RUMOR_PATTERNS if re.search(p, text_lower))
	if rumor_hits:
	score += rumor_hits * 15
	reasons.append(f"Rumor-forwarding language ({rumor_hits} pattern(s))")

	# Anonymous + strong claim combo
	ANON_PATTERNS = [
	r'anonymous source(s)? (claim\|say\|report)',
	r'insider (reveals\|claims\|exposes)',
	r'unnamed official (says\|claims)',
	r'unidentified group (claimed\|said\|announced)',
	]
	for p in ANON_PATTERNS:
	if re.search(p, text_lower):
	score += 12
	reasons.append("Anonymous source making strong claim")
	break

	risk_score = min(score, 100)
	if risk_score >= 40:
	label = "🔴 High Risk: Sensationalized / Clickbait Style"
	elif risk_score >= 20:
	label = "🟡 Medium Risk: Slightly Sensational"
	else:
	label = "🟢 Low Risk: Standard Journalistic Style"

	reason_str = "; ".join(reasons) if reasons else "No significant fake signals"
	return risk_score, label, f"Heuristic Engine ({reason_str})"


	# ==========================================
	# 3. BERT Classification (corrected label mapping)
	# ==========================================
	def classify_with_bert(text):
	"""
	Returns (fake_probability 0-100, label, engine)
	For jy46604790/Fake-News-Bert-Detect:
	LABEL_0 = Real news
	LABEL_1 = Fake news
	"""
	if classifier is None:
	return analyze_tone_fallback(text)

	try:
	result = classifier(text, truncation=True, max_length=512)[0]
	label_raw = result["label"] # "LABEL_0" or "LABEL_1"
	score = result["score"] # confidence in that label

	if label_raw == "LABEL_1":
	# Model thinks it's fake, with `score` confidence
	fake_prob = round(score * 100, 1)
	label = "🔴 High Risk: Model flagged as Fake"
	else:
	# Model thinks it's real
	fake_prob = round((1 - score) * 100, 1) # low fake probability
	label = "🟢 Low Risk: Model classifies as Real"

	return fake_prob, label, f"BERT Deep Neural Classifier ({score*100:.1f}% model confidence)"

	except Exception as e:
	print(f"BERT inference failed: {e}")
	return analyze_tone_fallback(text)


	# ==========================================
	# 4. Outlet Spoof Detector
	# ==========================================
	def detect_spoofed_source_label(raw_text):
	DECEPTIVE_PATTERNS = [
	r'(as reported\|according\|source\|sourced\|published\|confirmed\|breaking news from\|exclusive from\|via)\s:?\s(by\s+)?(reuters\|bbc\|ndtv\|cnn\|apnews\|ap news\|the hindu\|indian express\|bloomberg\|aljazeera\|times of india\|washington post\|new york times\|the guardian\|firstpost\|thewire\|snopes)',
	r'(reuters\|bbc\|ndtv\|cnn\|apnews)\s*(exclusive\|breaking\|confirms\|confirmed\|reports\|reported)\b',
	r'\b(reuters\|bbc\|ndtv\|cnn)\b.{0,15}(verif\|confirm\|report)',
	]
	CREDIBLE_OUTLETS = [
	"reuters","bbc","ndtv","cnn","apnews","ap news","the hindu",
	"indian express","bloomberg","aljazeera","times of india",
	"washington post","new york times","the guardian","firstpost",
	"thewire","snopes","politifact","factcheck","boomlive","altnews"
	]
	text_lower = raw_text.lower()
	urls_in_text = re.findall(r'https?://\S+', raw_text)

	for pattern in DECEPTIVE_PATTERNS:
	match = re.search(pattern, text_lower)
	if match:
	matched_text = match.group(0)
	outlet_found = next((o for o in CREDIBLE_OUTLETS if o in matched_text), "")
	if not outlet_found:
	continue
	outlet_key = outlet_found.lower().replace(" ", "")
	real_url_found = any(outlet_key in u.lower() for u in urls_in_text)
	if not real_url_found:
	return True, outlet_found.title()
	return False, ""


	# ==========================================
	# 5. URL Verification
	# ==========================================
	TRUSTED_DOMAINS = [
	"livelaw.in","barandbench.com","reuters.com","apnews.com",
	"bbc.com","bbc.co.uk","nytimes.com","washingtonpost.com",
	"theguardian.com","indianexpress.com","thehindu.com",
	"ndtv.com","bloomberg.com","economist.com","snopes.com",
	"politifact.com","factcheck.org","altnews.in","boomlive.in",
	"independent.co.uk","cnn.com","aljazeera.com",
	"timesofindia.indiatimes.com","thewire.in","firstpost.com",
	"pib.gov.in","mea.gov.in","mohfw.gov.in","hindustantimes.com",
	"scroll.in","theprint.in","news18.com","zeenews.india.com",
	"wionews.com","businesstoday.in","livemint.com","moneycontrol.com"
	]

	def is_trusted_domain(url):
	try:
	from urllib.parse import urlparse
	domain = urlparse(url).netloc.lower().replace("www.", "")
	for trusted in TRUSTED_DOMAINS:
	if domain == trusted or domain.endswith("." + trusted):
	return True, domain
	return False, domain
	except Exception:
	return False, ""

	def fetch_url_title(url):
	try:
	import requests
	r = requests.get(url, timeout=5, headers={
	'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36'
	})
	if r.status_code == 200:
	m = re.search(r'<title>(.*?)</title>', r.text, re.IGNORECASE \| re.DOTALL)
	if m:
	title = m.group(1).strip()
	for ent, rep in [('&','&'),('"','"'),(''',"'"),('<','<'),('>','>')]:
	title = title.replace(ent, rep)
	return title
	except Exception as e:
	print(f"URL title fetch error: {e}")
	return None


	# ==========================================
	# 6. OCR
	# ==========================================
	def normalize_english(text):
	if not text:
	return ""
	text = re.sub(r'http\S+\|www\.\S+\|\S+@\S+', ' ', text)
	text = re.sub(r'[^a-zA-Z0-9\s.,!?\'"\-]', ' ', text)
	text = re.sub(r'\s+', ' ', text)
	return text.strip()

	def process_image_to_text(image):
	if image is None:
	return ""
	try:
	if not isinstance(image, Image.Image):
	image = Image.fromarray(np.uint8(image))
	image = ImageOps.grayscale(image)
	image = ImageEnhance.Contrast(image).enhance(2.5)
	text = pytesseract.image_to_string(image, config=r'--oem 3 --psm 3')
	return normalize_english(text)
	except Exception as e:
	traceback.print_exc()
	return f"OCR Extraction Failed: {str(e)}"


	# ==========================================
	# 7. Web Fact-Check Engine (IMPROVED)
	# ==========================================

	# Outlets that carry real debunking/fact-check content — their mention of
	# "fake" or "false" is EVIDENCE of real journalism, not a fake signal.
	FACT_CHECK_OUTLETS = [
	"snopes.com","politifact.com","factcheck.org","altnews.in",
	"boomlive.in","thequint.com","vishvasnews.com","logically.ai",
	"reuters.com/fact-check","apnews.com/hub/ap-fact-check",
	"bbc.com/news/reality_check","thehindu.com/specials/fact-check"
	]

	# Debunk signals — only count when the SOURCE is not a fact-checker reporting on fakes
	DEBUNK_KEYWORDS = [
	"fake news","hoax","debunked","fabricated","false claim","misleading",
	"misinformation","no such attack","no evidence","fact-check finds",
	"did not happen","not verified","unverified claim","rumour spreading",
	"rumor spreading","manipulated video","out of context","satire",
	"this is false","this claim is false","claim is misleading",
	"viral claim","incorrect claim"
	]

	# Signals that confirm REAL reporting
	REAL_NEWS_SIGNALS = [
	"police said","official said","government said","confirmed by",
	"spokesperson said","press conference","fir registered","arrested",
	"minister said","court order","official statement","pib confirmed",
	"reported by","according to officials","sources confirmed",
	"eyewitnesses said","investigation reveals","charged with",
	"published report","breaking news confirmed","official release"
	]

	def build_search_query(text):
	"""Build a focused, short search query from article text."""
	# Remove URLs and special chars
	clean = re.sub(r'https?://\S+', '', text)
	clean = re.sub(r'[^\w\s]', ' ', clean)

	STOP = {
	"the","and","but","or","for","with","about","against","from","into",
	"through","during","before","after","above","below","under","over",
	"again","further","then","once","here","there","when","where","why",
	"how","all","any","both","each","few","more","most","other","some",
	"such","than","too","very","can","will","just","should","would",
	"these","those","this","that","in","on","at","to","of","by","an","a",
	"is","are","was","were","be","has","have","had","says","said","its",
	"it","as","up","do","he","she","they","we","our","their","his","her"
	}
	words = [w for w in clean.split() if w.lower() not in STOP and len(w) > 2]

	# Use first 7 meaningful words for a tight query
	return " ".join(words[:7])


	def ddg_search(query, max_results=6, retries=2):
	"""Search DuckDuckGo with retry logic."""
	for attempt in range(retries):
	try:
	with DDGS() as ddgs:
	results = list(ddgs.text(query, max_results=max_results))
	if results:
	return results
	except Exception as e:
	print(f"DDG attempt {attempt+1} failed: {e}")
	time.sleep(1)
	return []


	def verify_facts_online(text):
	"""
	Multi-pass web fact-checking.
	Returns: (results_list, html_markdown, hits_count, debunk_score, real_corroboration_count)

	KEY LOGIC:
	- If found on trusted news site → strong REAL signal
	- If found on fact-check site labeling it FALSE → FAKE signal
	- If found on general news with real reporting signals → REAL signal
	- If found with debunk keywords from non-fact-check sources → moderate FAKE signal
	- If nothing found → UNCERTAIN
	"""
	query = build_search_query(text)
	if len(query.split()) < 2:
	no_result_html = _card("UNVERIFIED", "Headline Too Short",
	"The text is too short to run a meaningful web search.", "debunk", "#fb923c")
	return [], no_result_html, 0, 0, 0

	print(f"[Web Search] Query: '{query}'")

	# Pass 1: full query
	results = ddg_search(query, max_results=8)

	# Pass 2: shorter query fallback
	if not results:
	short_q = " ".join(query.split()[:4])
	print(f"[Web Search] Fallback query: '{short_q}'")
	results = ddg_search(short_q, max_results=5)

	if not results:
	no_result_html = _card("UNVERIFIED", "No Online Matches Found",
	"No mainstream news outlets or fact-check databases are reporting this claim. "
	"This may be a fabricated rumor, hyper-local event, or newly generated hoax. "
	"Treat with caution until a direct source is found.",
	"debunk", "#fb923c")
	return [], no_result_html, 0, 0, 0

	markdown_out = ""
	debunk_score = 0
	real_corroboration = 0
	trusted_source_found = 0

	for idx, res in enumerate(results):
	title = res.get('title', 'Source')
	body = res.get('body', '')
	link = res.get('href', '#')
	combined = (title + " " + body).lower()
	link_lower = link.lower()

	# Is this result from a FACT-CHECK site?
	is_fact_check_site = any(fc in link_lower for fc in FACT_CHECK_OUTLETS)

	# Is this from a trusted news domain?
	is_trusted_news = any(td in link_lower for td in TRUSTED_DOMAINS)

	debunk_hits = sum(1 for kw in DEBUNK_KEYWORDS if kw in combined)
	real_hits = sum(1 for sig in REAL_NEWS_SIGNALS if sig in combined)

	if is_fact_check_site:
	if debunk_hits > 0:
	# Fact-checker found it FALSE
	debunk_score += 2
	status = _badge("🚨 FACT-CHECKER: FALSE", "danger")
	card_class = "source-debunk"
	trusted_source_found += 1
	else:
	# Fact-checker article exists but doesn't debunk it
	real_corroboration += 1
	status = _badge("✓ FACT-CHECK CORROBORATED", "success")
	card_class = "source-credible"
	trusted_source_found += 1

	elif is_trusted_news:
	if debunk_hits >= 2 and real_hits == 0:
	# Trusted news reporting it AS fake news
	debunk_score += 1
	status = _badge("⚠️ REPORTED AS MISINFORMATION", "warning")
	card_class = "source-debunk"
	else:
	# Trusted news covering the story normally
	real_corroboration += 1
	trusted_source_found += 1
	status = _badge("✓ TRUSTED SOURCE", "success")
	card_class = "source-credible"

	else:
	# General web result
	if debunk_hits >= 2 and real_hits == 0:
	debunk_score += 1
	status = _badge("🚨 DEBUNK SIGNALS", "danger")
	card_class = "source-debunk"
	elif real_hits > 0:
	real_corroboration += 1
	status = _badge("✓ NEWS CORROBORATION", "success")
	card_class = "source-credible"
	else:
	status = _badge("◉ RELATED RESULT", "neutral")
	card_class = "source-credible"

	# Truncate body for display
	display_body = body[:280] + ("..." if len(body) > 280 else "")

	markdown_out += (
	f"<div class='source-card {card_class}'>"
	f"<div class='source-header'>"
	f"<span class='source-idx'>#{idx+1}</span>"
	f"<h4>{title}</h4>"
	f"{status}"
	f"</div>"
	f"<p class='source-body'>\"{display_body}\"</p>"
	f"<div class='source-footer'>"
	f"<a href='{link}' target='_blank' class='source-link'>🔗 View source</a>"
	f"</div></div>\n"
	)

	return results, markdown_out, len(results), debunk_score, real_corroboration


	def _badge(text, kind):
	colors = {
	"success": ("rgba(16,185,129,0.1)", "#10b981", "rgba(16,185,129,0.15)"),
	"danger": ("rgba(239,68,68,0.1)", "#ef4444", "rgba(239,68,68,0.15)"),
	"warning": ("rgba(245,158,11,0.1)", "#f59e0b", "rgba(245,158,11,0.15)"),
	"neutral": ("rgba(100,116,139,0.1)","#64748b", "rgba(100,116,139,0.15)"),
	}
	bg, color, border = colors.get(kind, colors["neutral"])
	return (f"<span class='badge' style='background:{bg};color:{color};"
	f"border:1px solid {border};'>{text}</span>")

	def _card(idx_label, title, body_text, cls, color):
	return (
	f"<div class='source-card source-{cls}' style='border-left-color:{color};'>"
	f"<div class='source-header'>"
	f"<span class='source-idx' style='color:{color};'>{idx_label}</span>"
	f"<h4>{title}</h4></div>"
	f"<p class='source-body'>{body_text}</p>"
	f"</div>"
	)


	# ==========================================
	# 8. MAIN VERDICT ENGINE (Redesigned)
	# ==========================================
	def compute_verdict(is_url_verified, verified_domain_name,
	hits_count, debunk_score, real_corroboration,
	bert_fake_prob, is_label_spoofed, is_url_verified_flag):
	"""
	DECISION HIERARCHY:
	1. Direct trusted URL present → high reliability (90-97%)
	2. Fact-checker debunked (debunk_score ≥ 2) → very low reliability (5-20%)
	3. Strong debunk signals (debunk_score == 1) → low reliability (25-38%)
	4. Real corroboration found (trusted sources) → high reliability (70-92%)
	5. Some web results but no clear signal → moderate (45-65%)
	6. No web results → fall back to BERT model only
	"""
	spoof_penalty = 12 if is_label_spoofed and not is_url_verified_flag else 0

	# ── Case 1: Direct trusted URL ──
	if is_url_verified:
	if bert_fake_prob < 50:
	return 96, "🏆 VERIFIED GENUINE", "verdict-genuine-title", "#10b981", (
	f"Directly linked to trusted domain <strong>{verified_domain_name}</strong> "
	f"and written in an objective style. High confidence this is genuine."
	)
	else:
	return 78, "📝 SENSATIONALIZED — CORE FACTS REAL", "verdict-sensationalized-title", "#f59e0b", (
	f"Verified via <strong>{verified_domain_name}</strong> but writing style is sensational. "
	f"Core facts are likely authentic; specific details may be exaggerated."
	)

	# ── Case 2: Fact-checker or multiple debunk signals ──
	if debunk_score >= 3:
	rel = max(6 - spoof_penalty, 3)
	return rel, "🚨 FABRICATED / DEBUNKED", "verdict-fabricated-title", "#ef4444", (
	"Multiple fact-checkers and credible sources have debunked this claim. "
	"This is almost certainly misinformation. Do NOT share."
	)

	if debunk_score == 2:
	rel = max(15 - spoof_penalty, 5)
	return rel, "🚨 LIKELY FAKE / DEBUNKED", "verdict-fabricated-title", "#ef4444", (
	"Two or more credible sources flag this as false or misleading. "
	"Strong evidence this is misinformation."
	)

	if debunk_score == 1:
	rel = max(32 - spoof_penalty, 10)
	return rel, "⚠️ SUSPICIOUS CLAIM", "verdict-fabricated-title", "#ef4444", (
	"At least one credible source contradicts or flags this claim. "
	"Treat with strong skepticism and verify from primary sources."
	)

	# ── Case 3: Real corroboration found ──
	if real_corroboration >= 3:
	rel = min(88 + (real_corroboration * 1) - spoof_penalty, 95)
	return rel, "🏆 VERIFIED GENUINE", "verdict-genuine-title", "#10b981", (
	f"Found in <strong>{real_corroboration}</strong> credible/trusted sources with no debunking signals. "
	f"High confidence this is genuine news."
	)

	if real_corroboration == 2:
	rel = min(80 - spoof_penalty, 88)
	if bert_fake_prob > 60:
	return 68, "📝 LIKELY REAL — VERIFY DETAILS", "verdict-sensationalized-title", "#f59e0b", (
	"Found in 2 credible sources, but writing style raises some flags. "
	"Core story appears real; verify specific claims independently."
	)
	return rel, "🏆 VERIFIED GENUINE", "verdict-genuine-title", "#10b981", (
	"Found in 2 credible sources with objective writing style. "
	"High confidence this is genuine."
	)

	if real_corroboration == 1:
	if bert_fake_prob < 40:
	return max(68 - spoof_penalty, 50), "📝 LIKELY REAL — NEEDS MORE SOURCES", "verdict-sensationalized-title", "#f59e0b", (
	"Found in one credible source with objective writing. "
	"Likely genuine but seek additional confirmation."
	)
	else:
	return max(50 - spoof_penalty, 30), "⚠️ UNVERIFIED — MIXED SIGNALS", "verdict-unverified-title", "#fb923c", (
	"Only one corroborating source found and writing style is questionable. "
	"Exercise caution and verify from a primary source."
	)

	# ── Case 4: Web results exist but no clear real/fake signal ──
	if hits_count > 0:
	if bert_fake_prob < 35:
	return max(58 - spoof_penalty, 40), "⚠️ UNVERIFIED — PROBABLY REAL", "verdict-unverified-title", "#fb923c", (
	"Some web results found but from non-trusted domains. "
	"Writing style appears objective. Likely real but needs a primary source link."
	)
	else:
	return max(38 - spoof_penalty, 20), "⚠️ UNVERIFIED — SUSPICIOUS", "verdict-unverified-title", "#fb923c", (
	"Some web results found but content is not clearly corroborated by trusted outlets. "
	"Sensational writing style detected. Verify before sharing."
	)

	# ── Case 5: No web results — rely on BERT ──
	if bert_fake_prob >= 65:
	return max(22 - spoof_penalty, 8), "⚠️ UNVERIFIED — HIGH FAKE RISK", "verdict-unverified-title", "#fb923c", (
	"No online corroboration found AND the AI model flags this as likely fake. "
	"This is possibly a fabricated or circulating rumor. "
	"<br><small style='color:#6b7280;'>Tip: Paste the direct article URL if you have one.</small>"
	)
	elif bert_fake_prob >= 40:
	return max(38 - spoof_penalty, 20), "⚠️ UNVERIFIED — UNCERTAIN", "verdict-unverified-title", "#fb923c", (
	"No online corroboration found. Could be a very recent, hyper-local, or fabricated story. "
	"Seek a direct source before sharing. "
	"<br><small style='color:#6b7280;'>Tip: Paste the direct article URL if you have one.</small>"
	)
	else:
	return max(52 - spoof_penalty, 35), "⚠️ UNVERIFIED — POSSIBLY REAL", "verdict-unverified-title", "#fb923c", (
	"No mainstream coverage found, but writing style appears legitimate. "
	"Could be a hyper-local or very recent story. "
	"Seek a primary source before sharing. "
	"<br><small style='color:#6b7280;'>Tip: Paste the direct article URL if you have one.</small>"
	)


	# ==========================================
	# 9. Master Process Function
	# ==========================================
	def process_and_verdict(text_input, ocr_output, img_input, source_tab):
	raw_text = ""
	updated_ocr = ocr_output

	if source_tab == "image":
	if not ocr_output or not ocr_output.strip():
	if img_input is None:
	return (
	gr.update(visible=False),
	"<div class='source-card source-debunk'><h4>⚠️ Input Error</h4>"
	"<p>Please upload an image or extract OCR text first.</p></div>",
	"", "", ocr_output, gr.update(open=False)
	)
	raw_text = process_image_to_text(img_input)
	updated_ocr = raw_text
	else:
	raw_text = ocr_output
	else:
	raw_text = text_input

	# ── Extract URLs ──
	urls = re.findall(r'(https?://\S+)', raw_text)
	is_url_verified = False
	verified_domain_name = ""
	verified_url_card = ""

	if urls:
	for url in urls:
	is_trusted, domain = is_trusted_domain(url)
	if is_trusted:
	is_url_verified = True
	verified_domain_name = domain
	page_title = fetch_url_title(url) or f"Verified Article on {domain.title()}"
	verified_url_card = (
	f"<div class='source-card source-credible' style='border-left-width:6px;'>"
	f"<div class='source-header'>"
	f"<span class='source-idx' style='background:#10b981;color:white;'>✓ DIRECT SOURCE</span>"
	f"<h4>{page_title}</h4>"
	f"{_badge('✓ TRUSTED DOMAIN','success')}"
	f"</div>"
	f"<p class='source-body'>Direct link verified from trusted domain "
	f"<strong>{domain}</strong>.</p>"
	f"<div class='source-footer'>"
	f"<a href='{url}' target='_blank' class='source-link'>🔗 View on {domain.title()}</a>"
	f"</div></div>"
	)
	break

	# ── Spoof detection ──
	is_label_spoofed, spoofed_outlet = detect_spoofed_source_label(raw_text)
	spoofed_warning_card = ""
	if is_label_spoofed and not is_url_verified:
	spoofed_warning_card = (
	f"<div class='source-card source-debunk' style='border-left-color:#f59e0b;'>"
	f"<div class='source-header'>"
	f"<span class='source-idx' style='color:#f59e0b;'>⚠️ CLAIM CHECK</span>"
	f"<h4>Outlet Claimed Without Verifiable Link</h4>"
	f"{_badge('🚨 UNVERIFIED CLAIM','danger')}"
	f"</div>"
	f"<p class='source-body'>Content claims to be from <strong>{spoofed_outlet}</strong> "
	f"but no verified URL from that outlet was found. "
	f"This is a common credibility manipulation tactic.</p>"
	f"</div>"
	)

	# ── Clean text for analysis ──
	cleaned_text = normalize_english(raw_text)
	if not cleaned_text or len(cleaned_text) < 10:
	return (
	gr.update(visible=False),
	"<div class='source-card source-debunk'><h4>⚠️ Too Short</h4>"
	"<p>Please provide a full sentence or news headline (min 10 characters).</p></div>",
	"", "", updated_ocr, gr.update(open=False)
	)

	# ── BERT Classification ──
	bert_fake_prob, bert_label, bert_engine = classify_with_bert(cleaned_text)

	# ── Web Fact-Check ──
	search_results, web_markdown, hits_count, debunk_score, real_corroboration = verify_facts_online(cleaned_text)

	# Prepend cards
	if spoofed_warning_card:
	web_markdown = spoofed_warning_card + "\n" + web_markdown
	if is_url_verified:
	web_markdown = verified_url_card + "\n" + web_markdown

	# ── Compute final verdict ──
	reliability, verdict_title, verdict_class, verdict_color, verdict_desc = compute_verdict(
	is_url_verified, verified_domain_name,
	hits_count, debunk_score, real_corroboration,
	bert_fake_prob, is_label_spoofed, is_url_verified
	)

	# ── Build Verdict HTML ──
	spoof_metric = ""
	if is_label_spoofed and not is_url_verified:
	spoof_metric = ("<div class='metric-item'>"
	"<span class='metric-val' style='color:#f59e0b;'>⚠️ CLAIMED</span>"
	"<span class='metric-lbl'>OUTLET UNVERIFIED</span></div>")

	verdict_html = f"""
	<div class='verdict-dashboard'>
	<div class='verdict-score-wrapper'>
	<div class='verdict-circle-progress'
	style='background:conic-gradient({verdict_color} {reliability}%,#334155 {reliability}%);'>
	<div class='verdict-circle-inner'>
	<span class='verdict-score'>{reliability}%</span>
	<span class='verdict-score-label'>RELIABILITY</span>
	</div>
	</div>
	</div>
	<div class='verdict-details'>
	<div class='verdict-title {verdict_class}'>{verdict_title}</div>
	<p class='verdict-desc'>{verdict_desc}</p>
	<div class='verdict-metrics'>
	<div class='metric-item'>
	<span class='metric-val'>{bert_label.split(":")[0]}</span>
	<span class='metric-lbl'>AI CLASSIFICATION</span>
	</div>
	<div class='metric-item'>
	<span class='metric-val'>{real_corroboration} trusted</span>
	<span class='metric-lbl'>REAL SOURCES FOUND</span>
	</div>
	<div class='metric-item'>
	<span class='metric-val'>{debunk_score} flag{"s" if debunk_score!=1 else ""}</span>
	<span class='metric-lbl'>DEBUNK SIGNALS</span>
	</div>
	<div class='metric-item'>
	<span class='metric-val'>{hits_count} total</span>
	<span class='metric-lbl'>WEB RESULTS</span>
	</div>
	{spoof_metric}
	</div>
	</div>
	</div>
	"""

	ai_report_html = f"""
	<div class='intel-card'>
	<div class='intel-item'>
	<h5>Classifier Engine</h5>
	<div class='intel-val' style='color:#06b6d4;'>{bert_engine}</div>
	</div>
	<div class='intel-item'>
	<h5>AI Classification</h5>
	<div class='intel-val'>{bert_label}</div>
	</div>
	<div class='intel-item'>
	<h5>Fake Probability (AI)</h5>
	<div class='intel-val' style='color:{"#ef4444" if bert_fake_prob>50 else "#10b981"};'>{bert_fake_prob}%</div>
	</div>
	<div class='intel-item'>
	<h5>Web Corroboration</h5>
	<div class='intel-val'>{real_corroboration} trusted source(s)  \|
	<span style='color:#ef4444;'>{debunk_score} debunk signal{"s" if debunk_score!=1 else ""}</span>
	</div>
	</div>
	{"<div class='intel-item'><h5>⚠️ Outlet Claim</h5><div class='intel-val' style='color:#f59e0b;'>\"" + spoofed_outlet + "\" claimed without verified URL.</div></div>" if is_label_spoofed and not is_url_verified else ""}
	<div class='intel-item'>
	<h5>Analyzed Text</h5>
	<div class='intel-val' style='font-weight:normal;font-size:0.88rem;font-family:monospace;
	background:#0b0f19;padding:12px;border-radius:8px;border:1px solid #1e293b;
	color:#cbd5e1;word-break:break-all;'>{cleaned_text[:800]}{"..." if len(cleaned_text)>800 else ""}</div>
	</div>
	</div>
	"""

	banner_html = (
	"<div class='verdict-banner-true'>🟢 LIKELY TRUE NEWS</div>"
	if reliability >= 60 else
	"<div class='verdict-banner-false'>🔴 LIKELY FALSE / UNVERIFIED</div>"
	)

	return (
	gr.update(visible=True),
	banner_html,
	verdict_html,
	ai_report_html,
	web_markdown,
	updated_ocr,
	gr.update(open=False)
	)


	def handle_ocr_scan(img):
	if img is None:
	return "⚠️ Image not uploaded yet."
	return process_image_to_text(img)


	# ==========================================
	# 10. Stylesheet
	# ==========================================
	css = """
	@import url('https://fonts.googleapis.com/css2?family=Outfit:wght@300;400;600;800;900&family=Inter:wght@300;400;600;700&display=swap');

	body { background-color: #0b0f19 !important; }
	.gradio-container {
	background-color: #0b0f19 !important;
	font-family: 'Inter','Outfit',sans-serif !important;
	color: #f1f5f9 !important;
	max-width: 1200px !important;
	margin: 0 auto !important;
	padding: 20px !important;
	}
	.cyber-title {
	text-align: center;
	background: linear-gradient(135deg,#06b6d4 0%,#10b981 50%,#3b82f6 100%);
	-webkit-background-clip: text;
	-webkit-text-fill-color: transparent;
	font-size: 3rem; font-weight: 900;
	margin-top: 10px; margin-bottom: 2px;
	letter-spacing: -0.04em;
	font-family: 'Outfit',sans-serif !important;
	}
	.cyber-subtitle {
	text-align: center; color: #94a3b8;
	font-size: 1.15rem; margin-bottom: 35px;
	font-weight: 400;
	}
	.block {
	background-color: #111827 !important;
	border: 1px solid #1f2937 !important;
	border-radius: 16px !important;
	box-shadow: 0 15px 35px -10px rgba(0,0,0,0.6) !important;
	overflow: hidden !important;
	}
	textarea, input[type="text"] {
	background-color: #030712 !important;
	border: 1px solid #1f2937 !important;
	color: #f1f5f9 !important;
	font-size: 0.95rem !important;
	border-radius: 8px !important;
	}
	textarea:focus, input[type="text"]:focus {
	border-color: #06b6d4 !important;
	box-shadow: 0 0 10px rgba(6,182,212,0.2) !important;
	}
	.tab-nav {
	border-bottom: 1px solid #1f2937 !important;
	background-color: #0b0f19 !important;
	padding: 8px 12px 0 12px !important;
	}
	.tab-nav button {
	color: #6b7280 !important; font-weight: 700 !important;
	font-size: 0.9rem !important; border: none !important;
	background: transparent !important; padding: 10px 18px !important;
	border-radius: 8px 8px 0 0 !important;
	}
	.tab-nav button.selected {
	color: #06b6d4 !important; background-color: #111827 !important;
	border: 1px solid #1f2937 !important;
	border-bottom: 1px solid #111827 !important;
	}
	.cyber-btn {
	background: linear-gradient(135deg,#111827 0%,#1f2937 100%) !important;
	border: 1px dashed #06b6d4 !important; color: #06b6d4 !important;
	font-weight: 800 !important; letter-spacing: 0.04em;
	text-transform: uppercase;
	border-radius: 12px !important;
	font-family: 'Outfit',sans-serif !important;
	}
	.cyber-btn:hover {
	background: linear-gradient(135deg,#06b6d4 0%,#3b82f6 100%) !important;
	color: #ffffff !important; border-style: solid !important;
	box-shadow: 0 0 20px rgba(6,182,212,0.5) !important;
	}
	.verdict-dashboard {
	display: flex; align-items: center;
	background: linear-gradient(135deg,#030712 0%,#111827 100%);
	border: 1px solid #1f2937; border-radius: 16px;
	padding: 24px; gap: 24px; margin-bottom: 25px;
	}
	@media(max-width:768px){.verdict-dashboard{flex-direction:column;text-align:center;}}
	.verdict-score-wrapper{flex-shrink:0;}
	.verdict-circle-progress {
	width:140px; height:140px; border-radius:50%;
	display:flex; align-items:center; justify-content:center;
	box-shadow:0 8px 16px rgba(0,0,0,0.4);
	}
	.verdict-circle-inner {
	width:114px; height:114px; background-color:#0b0f19; border-radius:50%;
	display:flex; flex-direction:column; align-items:center; justify-content:center;
	}
	.verdict-score{font-size:2.1rem;font-weight:900;color:#fff;line-height:1;font-family:'Outfit',sans-serif !important;}
	.verdict-score-label{font-size:0.65rem;color:#6b7280;text-transform:uppercase;letter-spacing:0.12em;margin-top:4px;}
	.verdict-details{flex-grow:1;}
	.verdict-title{
	font-size:1.7rem;font-weight:900;letter-spacing:-0.02em;
	margin-bottom:6px;text-transform:uppercase;font-family:'Outfit',sans-serif !important;
	}
	.verdict-desc{font-size:0.95rem;color:#9ca3af;line-height:1.5;margin-bottom:16px;}
	.verdict-metrics{
	display:flex;gap:20px;flex-wrap:wrap;
	border-top:1px solid #1f2937;padding-top:14px;
	}
	.metric-item{display:flex;flex-direction:column;}
	.metric-val{font-size:0.9rem;font-weight:700;color:#f3f4f6;}
	.metric-lbl{font-size:0.65rem;color:#4b5563;text-transform:uppercase;letter-spacing:0.05em;}
	.verdict-genuine-title {color:#10b981;text-shadow:0 0 20px rgba(16,185,129,0.35);}
	.verdict-sensationalized-title{color:#f59e0b;text-shadow:0 0 20px rgba(245,158,11,0.35);}
	.verdict-unverified-title {color:#fb923c;text-shadow:0 0 20px rgba(251,146,60,0.35);}
	.verdict-fabricated-title {color:#ef4444;text-shadow:0 0 20px rgba(239,68,68,0.35);}
	.source-card{
	background-color:#030712;border:1px solid #1f2937;
	border-radius:12px;padding:16px;margin-bottom:14px;
	}
	.source-card.source-credible{border-left:4px solid #10b981;}
	.source-card.source-debunk{border-left:4px solid #ef4444;}
	.source-header{
	display:flex;justify-content:space-between;align-items:center;
	margin-bottom:10px;gap:12px;flex-wrap:wrap;
	}
	.source-idx{
	font-size:0.75rem;font-weight:800;background-color:#111827;
	color:#9ca3af;padding:2px 7px;border-radius:4px;white-space:nowrap;
	}
	.source-header h4{margin:0;font-size:0.95rem;font-weight:700;color:#fff;flex-grow:1;line-height:1.35;}
	.badge{font-size:0.65rem;font-weight:800;padding:3px 9px;border-radius:20px;text-transform:uppercase;white-space:nowrap;}
	.source-body{font-size:0.85rem;color:#9ca3af;line-height:1.45;margin:0 0 12px 0;font-style:italic;}
	.source-footer{display:flex;justify-content:flex-end;}
	.source-link{font-size:0.75rem;color:#06b6d4;text-decoration:none;font-weight:700;}
	.source-link:hover{color:#3b82f6;text-decoration:underline;}
	.intel-card{background-color:#030712;border:1px solid #1f2937;border-radius:12px;padding:20px;}
	.intel-item{margin-bottom:18px;}
	.intel-item:last-child{margin-bottom:0;}
	.intel-item h5{
	margin:0 0 6px 0;font-size:0.8rem;color:#4b5563;
	text-transform:uppercase;letter-spacing:0.06em;
	}
	.intel-val{font-size:1.05rem;font-weight:600;color:#fff;}
	.verdict-banner-true{
	text-align:center;
	background:linear-gradient(135deg,rgba(16,185,129,0.08) 0%,rgba(16,185,129,0.18) 100%);
	border:2px solid #10b981;color:#10b981;font-size:1.3rem;font-weight:800;
	padding:10px 16px;border-radius:8px;margin-bottom:15px;text-transform:uppercase;
	font-family:'Outfit',sans-serif !important;
	}
	.verdict-banner-false{
	text-align:center;
	background:linear-gradient(135deg,rgba(239,68,68,0.08) 0%,rgba(239,68,68,0.18) 100%);
	border:2px solid #ef4444;color:#ef4444;font-size:1.3rem;font-weight:800;
	padding:10px 16px;border-radius:8px;margin-bottom:15px;text-transform:uppercase;
	font-family:'Outfit',sans-serif !important;
	}
	"""

	# ==========================================
	# 11. Gradio App
	# ==========================================
	with gr.Blocks(css=css, theme=gr.themes.Base(), title="True Fact Checker & Fake News Detector") as app:

	tab_state = gr.State(value="image")

	gr.HTML("<div class='cyber-title'>⚖️ TRUE FACT CHECKER</div>")
	gr.HTML("<div class='cyber-subtitle'>Web-First Verification · BERT Neural Analysis · Live Fact-Check Consensus</div>")

	with gr.Row():
	with gr.Column(scale=1):
	with gr.Tabs() as input_tabs:
	with gr.Tab("📸 Screenshot Scanner", id=0) as tab_img:
	gr.HTML("<div style='margin-bottom:10px;color:#9ca3af;font-size:0.9rem;'>"
	"Upload an article screenshot, social media post, or newspaper clipping.</div>")
	img_input = gr.Image(
	type="numpy", sources=["upload","clipboard"],
	label="Drag screenshot here or paste from clipboard", height=240
	)
	ocr_scan_btn = gr.Button("🔎 Extract Screenshot Text", variant="secondary", elem_classes="cyber-btn")
	ocr_output_box = gr.Textbox(
	label="OCR Extracted Text (Review/Edit before verifying)",
	lines=5,
	placeholder="OCR text appears here. Edit for accuracy if needed...",
	interactive=True
	)
	ocr_scan_btn.click(handle_ocr_scan, inputs=img_input, outputs=ocr_output_box)

	with gr.Tab("📝 Direct Text / Headline", id=1) as tab_txt:
	gr.HTML("<div style='margin-bottom:10px;color:#9ca3af;font-size:0.9rem;'>"
	"Paste a rumor headline, WhatsApp forward, news claim, or full article text.</div>")
	text_input = gr.Textbox(
	lines=8,
	placeholder="e.g. 'Breaking: Scientists discover miracle cure but government is hiding it...'",
	label="Paste news text or headline here"
	)

	tab_img.select(lambda: "image", outputs=tab_state)
	tab_txt.select(lambda: "text", outputs=tab_state)

	verify_btn = gr.Button(
	"⚡ Analyze & Verify Fact Consensus",
	variant="primary", elem_classes="cyber-btn", size="lg"
	)

	with gr.Column(scale=1):
	placeholder_card = gr.HTML(
	"""<div class='intel-card' style='text-align:center;padding:40px 20px;border-style:dashed;'>
	<div style='font-size:3rem;color:#1f2937;margin-bottom:15px;'>📡</div>
	<h4 style='color:#6b7280;font-family:Outfit,sans-serif;font-size:1.15rem;margin-bottom:8px;'>Telemetry Idle</h4>
	<p style='color:#4b5563;font-size:0.9rem;max-width:320px;margin:0 auto;'>
	Upload an image or paste a news claim, then click Analyze to begin verification.</p>
	</div>""",
	visible=True
	)

	verdict_panel = gr.Column(visible=False)
	with verdict_panel:
	banner_output = gr.HTML()
	with gr.Accordion("🔓 View Detailed Confidence & Telemetry", open=False) as confidence_accordion:
	verdict_html_output = gr.HTML()
	with gr.Tabs():
	with gr.Tab("🌐 Live Web Sources"):
	sources_markdown_output = gr.HTML()
	with gr.Tab("🧠 AI Intel"):
	ai_report_html_output = gr.HTML()

	verify_btn.click(
	fn=process_and_verdict,
	inputs=[text_input, ocr_output_box, img_input, tab_state],
	outputs=[
	verdict_panel, banner_output, verdict_html_output,
	ai_report_html_output, sources_markdown_output,
	ocr_output_box, confidence_accordion
	]
	)
	verify_btn.click(fn=lambda: gr.update(visible=False), inputs=None, outputs=placeholder_card)

	if __name__ == "__main__":
	app.launch(share=True)