import gradio as gr
from transformers import pipeline
from PIL import Image, ImageEnhance, ImageOps
from duckduckgo_search import DDGS
import pytesseract
import numpy as np
import re
import os
import traceback
import time

# ==========================================
# 1. Load AI Models
# ==========================================
classifier = None
classifier_error = None

print("Loading BERT Fake News Classifier...")
try:
    # This model: LABEL_0 = Real, LABEL_1 = Fake
    classifier = pipeline(
        "text-classification",
        model="jy46604790/Fake-News-Bert-Detect",
        device=-1,
        truncation=True,
        max_length=512
    )
    print("BERT Classifier loaded.")
except Exception as e:
    classifier_error = str(e)
    print(f"BERT load failed: {e}")


# ==========================================
# 2. Improved Tone / Heuristic Analyzer
# ==========================================
def analyze_tone_fallback(text):
    """
    Heuristic analyzer — only fires on strong fake-specific signals.
    Returns (risk_score 0-100, label, engine_name)
    Higher score = more likely fake/sensational.
    """
    score = 0
    reasons = []
    words = text.split()
    text_lower = text.lower()

    if not words:
        return 0, "🟢 Neutral / Standard", "Heuristic Engine"

    # ALL-CAPS overuse (clickbait)
    caps_words = [w for w in words if w.isupper() and len(w) > 3]
    caps_ratio = len(caps_words) / max(len(words), 1)
    if caps_ratio > 0.20:
        score += 18
        reasons.append("Excessive ALL-CAPS")

    # Sensational punctuation
    if text.count("!") >= 3:
        score += 12
        reasons.append("Multiple exclamation marks")
    if re.search(r'\?\!|\!\?', text):
        score += 8
        reasons.append("Sensationalist punctuation (?! or !?)")

    # Hard fake-specific phrases (almost never in real journalism)
    FAKE_PHRASES = [
        "you won't believe", "mind-blowing", "miracle cure",
        "they don't want you to know", "secret they're hiding",
        "doctors hate", "one weird trick", "illuminati", "plandemic",
        "scamdemic", "sheeple", "share before deleted", "banned video",
        "censored truth", "99% won't share", "forwarded as received",
        "going viral now", "wake up people", "deep state agenda",
        "government is hiding", "they are hiding", "what they don't tell you",
    ]
    hits = [p for p in FAKE_PHRASES if p in text_lower]
    if hits:
        score += min(len(hits) * 18, 45)
        reasons.append(f"Fake-specific phrases: {', '.join(hits[:3])}")

    # Rumor forwarding language
    RUMOR_PATTERNS = [
        r'forwarded as received',
        r'circulating on (whatsapp|telegram|social media)',
        r'cannot be independently verified',
        r'unverified (claim|source|report)',
        r'we cannot confirm',
        r'rumou?r (has it|is spreading|is going around)',
    ]
    rumor_hits = sum(1 for p in RUMOR_PATTERNS if re.search(p, text_lower))
    if rumor_hits:
        score += rumor_hits * 15
        reasons.append(f"Rumor-forwarding language ({rumor_hits} pattern(s))")

    # Anonymous + strong claim combo
    ANON_PATTERNS = [
        r'anonymous source(s)? (claim|say|report)',
        r'insider (reveals|claims|exposes)',
        r'unnamed official (says|claims)',
        r'unidentified group (claimed|said|announced)',
    ]
    for p in ANON_PATTERNS:
        if re.search(p, text_lower):
            score += 12
            reasons.append("Anonymous source making strong claim")
            break

    risk_score = min(score, 100)
    if risk_score >= 40:
        label = "🔴 High Risk: Sensationalized / Clickbait Style"
    elif risk_score >= 20:
        label = "🟡 Medium Risk: Slightly Sensational"
    else:
        label = "🟢 Low Risk: Standard Journalistic Style"

    reason_str = "; ".join(reasons) if reasons else "No significant fake signals"
    return risk_score, label, f"Heuristic Engine ({reason_str})"


# ==========================================
# 3. BERT Classification (corrected label mapping)
# ==========================================
def classify_with_bert(text):
    """
    Returns (fake_probability 0-100, label, engine)
    For jy46604790/Fake-News-Bert-Detect:
      LABEL_0 = Real news
      LABEL_1 = Fake news
    """
    if classifier is None:
        return analyze_tone_fallback(text)

    try:
        result = classifier(text, truncation=True, max_length=512)[0]
        label_raw = result["label"]   # "LABEL_0" or "LABEL_1"
        score = result["score"]       # confidence in that label

        if label_raw == "LABEL_1":
            # Model thinks it's fake, with `score` confidence
            fake_prob = round(score * 100, 1)
            label = "🔴 High Risk: Model flagged as Fake"
        else:
            # Model thinks it's real
            fake_prob = round((1 - score) * 100, 1)   # low fake probability
            label = "🟢 Low Risk: Model classifies as Real"

        return fake_prob, label, f"BERT Deep Neural Classifier ({score*100:.1f}% model confidence)"

    except Exception as e:
        print(f"BERT inference failed: {e}")
        return analyze_tone_fallback(text)


# ==========================================
# 4. Outlet Spoof Detector
# ==========================================
def detect_spoofed_source_label(raw_text):
    DECEPTIVE_PATTERNS = [
        r'(as reported|according|source|sourced|published|confirmed|breaking news from|exclusive from|via)\s*:?\s*(by\s+)?(reuters|bbc|ndtv|cnn|apnews|ap news|the hindu|indian express|bloomberg|aljazeera|times of india|washington post|new york times|the guardian|firstpost|thewire|snopes)',
        r'(reuters|bbc|ndtv|cnn|apnews)\s*(exclusive|breaking|confirms|confirmed|reports|reported)\b',
        r'\b(reuters|bbc|ndtv|cnn)\b.{0,15}(verif|confirm|report)',
    ]
    CREDIBLE_OUTLETS = [
        "reuters","bbc","ndtv","cnn","apnews","ap news","the hindu",
        "indian express","bloomberg","aljazeera","times of india",
        "washington post","new york times","the guardian","firstpost",
        "thewire","snopes","politifact","factcheck","boomlive","altnews"
    ]
    text_lower = raw_text.lower()
    urls_in_text = re.findall(r'https?://\S+', raw_text)

    for pattern in DECEPTIVE_PATTERNS:
        match = re.search(pattern, text_lower)
        if match:
            matched_text = match.group(0)
            outlet_found = next((o for o in CREDIBLE_OUTLETS if o in matched_text), "")
            if not outlet_found:
                continue
            outlet_key = outlet_found.lower().replace(" ", "")
            real_url_found = any(outlet_key in u.lower() for u in urls_in_text)
            if not real_url_found:
                return True, outlet_found.title()
    return False, ""


# ==========================================
# 5. URL Verification
# ==========================================
TRUSTED_DOMAINS = [
    "livelaw.in","barandbench.com","reuters.com","apnews.com",
    "bbc.com","bbc.co.uk","nytimes.com","washingtonpost.com",
    "theguardian.com","indianexpress.com","thehindu.com",
    "ndtv.com","bloomberg.com","economist.com","snopes.com",
    "politifact.com","factcheck.org","altnews.in","boomlive.in",
    "independent.co.uk","cnn.com","aljazeera.com",
    "timesofindia.indiatimes.com","thewire.in","firstpost.com",
    "pib.gov.in","mea.gov.in","mohfw.gov.in","hindustantimes.com",
    "scroll.in","theprint.in","news18.com","zeenews.india.com",
    "wionews.com","businesstoday.in","livemint.com","moneycontrol.com"
]

def is_trusted_domain(url):
    try:
        from urllib.parse import urlparse
        domain = urlparse(url).netloc.lower().replace("www.", "")
        for trusted in TRUSTED_DOMAINS:
            if domain == trusted or domain.endswith("." + trusted):
                return True, domain
        return False, domain
    except Exception:
        return False, ""

def fetch_url_title(url):
    try:
        import requests
        r = requests.get(url, timeout=5, headers={
            'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36'
        })
        if r.status_code == 200:
            m = re.search(r'<title>(.*?)</title>', r.text, re.IGNORECASE | re.DOTALL)
            if m:
                title = m.group(1).strip()
                for ent, rep in [('&amp;','&'),('&quot;','"'),('&#39;',"'"),('&lt;','<'),('&gt;','>')]:
                    title = title.replace(ent, rep)
                return title
    except Exception as e:
        print(f"URL title fetch error: {e}")
    return None


# ==========================================
# 6. OCR
# ==========================================
def normalize_english(text):
    if not text:
        return ""
    text = re.sub(r'http\S+|www\.\S+|\S+@\S+', ' ', text)
    text = re.sub(r'[^a-zA-Z0-9\s.,!?\'"\-]', ' ', text)
    text = re.sub(r'\s+', ' ', text)
    return text.strip()

def process_image_to_text(image):
    if image is None:
        return ""
    try:
        if not isinstance(image, Image.Image):
            image = Image.fromarray(np.uint8(image))
        image = ImageOps.grayscale(image)
        image = ImageEnhance.Contrast(image).enhance(2.5)
        text = pytesseract.image_to_string(image, config=r'--oem 3 --psm 3')
        return normalize_english(text)
    except Exception as e:
        traceback.print_exc()
        return f"OCR Extraction Failed: {str(e)}"


# ==========================================
# 7. Web Fact-Check Engine (IMPROVED)
# ==========================================

# Outlets that carry real debunking/fact-check content — their mention of
# "fake" or "false" is EVIDENCE of real journalism, not a fake signal.
FACT_CHECK_OUTLETS = [
    "snopes.com","politifact.com","factcheck.org","altnews.in",
    "boomlive.in","thequint.com","vishvasnews.com","logically.ai",
    "reuters.com/fact-check","apnews.com/hub/ap-fact-check",
    "bbc.com/news/reality_check","thehindu.com/specials/fact-check"
]

# Debunk signals — only count when the SOURCE is not a fact-checker reporting on fakes
DEBUNK_KEYWORDS = [
    "fake news","hoax","debunked","fabricated","false claim","misleading",
    "misinformation","no such attack","no evidence","fact-check finds",
    "did not happen","not verified","unverified claim","rumour spreading",
    "rumor spreading","manipulated video","out of context","satire",
    "this is false","this claim is false","claim is misleading",
    "viral claim","incorrect claim"
]

# Signals that confirm REAL reporting
REAL_NEWS_SIGNALS = [
    "police said","official said","government said","confirmed by",
    "spokesperson said","press conference","fir registered","arrested",
    "minister said","court order","official statement","pib confirmed",
    "reported by","according to officials","sources confirmed",
    "eyewitnesses said","investigation reveals","charged with",
    "published report","breaking news confirmed","official release"
]

def build_search_query(text):
    """Build a focused, short search query from article text."""
    # Remove URLs and special chars
    clean = re.sub(r'https?://\S+', '', text)
    clean = re.sub(r'[^\w\s]', ' ', clean)

    STOP = {
        "the","and","but","or","for","with","about","against","from","into",
        "through","during","before","after","above","below","under","over",
        "again","further","then","once","here","there","when","where","why",
        "how","all","any","both","each","few","more","most","other","some",
        "such","than","too","very","can","will","just","should","would",
        "these","those","this","that","in","on","at","to","of","by","an","a",
        "is","are","was","were","be","has","have","had","says","said","its",
        "it","as","up","do","he","she","they","we","our","their","his","her"
    }
    words = [w for w in clean.split() if w.lower() not in STOP and len(w) > 2]

    # Use first 7 meaningful words for a tight query
    return " ".join(words[:7])


def ddg_search(query, max_results=6, retries=2):
    """Search DuckDuckGo with retry logic."""
    for attempt in range(retries):
        try:
            with DDGS() as ddgs:
                results = list(ddgs.text(query, max_results=max_results))
            if results:
                return results
        except Exception as e:
            print(f"DDG attempt {attempt+1} failed: {e}")
            time.sleep(1)
    return []


def verify_facts_online(text):
    """
    Multi-pass web fact-checking.
    Returns: (results_list, html_markdown, hits_count, debunk_score, real_corroboration_count)

    KEY LOGIC:
    - If found on trusted news site → strong REAL signal
    - If found on fact-check site labeling it FALSE → FAKE signal
    - If found on general news with real reporting signals → REAL signal
    - If found with debunk keywords from non-fact-check sources → moderate FAKE signal
    - If nothing found → UNCERTAIN
    """
    query = build_search_query(text)
    if len(query.split()) < 2:
        no_result_html = _card("UNVERIFIED", "Headline Too Short", 
            "The text is too short to run a meaningful web search.", "debunk", "#fb923c")
        return [], no_result_html, 0, 0, 0

    print(f"[Web Search] Query: '{query}'")

    # Pass 1: full query
    results = ddg_search(query, max_results=8)

    # Pass 2: shorter query fallback
    if not results:
        short_q = " ".join(query.split()[:4])
        print(f"[Web Search] Fallback query: '{short_q}'")
        results = ddg_search(short_q, max_results=5)

    if not results:
        no_result_html = _card("UNVERIFIED", "No Online Matches Found",
            "No mainstream news outlets or fact-check databases are reporting this claim. "
            "This may be a fabricated rumor, hyper-local event, or newly generated hoax. "
            "Treat with caution until a direct source is found.",
            "debunk", "#fb923c")
        return [], no_result_html, 0, 0, 0

    markdown_out = ""
    debunk_score = 0
    real_corroboration = 0
    trusted_source_found = 0

    for idx, res in enumerate(results):
        title = res.get('title', 'Source')
        body = res.get('body', '')
        link = res.get('href', '#')
        combined = (title + " " + body).lower()
        link_lower = link.lower()

        # Is this result from a FACT-CHECK site?
        is_fact_check_site = any(fc in link_lower for fc in FACT_CHECK_OUTLETS)

        # Is this from a trusted news domain?
        is_trusted_news = any(td in link_lower for td in TRUSTED_DOMAINS)

        debunk_hits = sum(1 for kw in DEBUNK_KEYWORDS if kw in combined)
        real_hits = sum(1 for sig in REAL_NEWS_SIGNALS if sig in combined)

        if is_fact_check_site:
            if debunk_hits > 0:
                # Fact-checker found it FALSE
                debunk_score += 2
                status = _badge("🚨 FACT-CHECKER: FALSE", "danger")
                card_class = "source-debunk"
                trusted_source_found += 1
            else:
                # Fact-checker article exists but doesn't debunk it
                real_corroboration += 1
                status = _badge("✓ FACT-CHECK CORROBORATED", "success")
                card_class = "source-credible"
                trusted_source_found += 1

        elif is_trusted_news:
            if debunk_hits >= 2 and real_hits == 0:
                # Trusted news reporting it AS fake news
                debunk_score += 1
                status = _badge("⚠️ REPORTED AS MISINFORMATION", "warning")
                card_class = "source-debunk"
            else:
                # Trusted news covering the story normally
                real_corroboration += 1
                trusted_source_found += 1
                status = _badge("✓ TRUSTED SOURCE", "success")
                card_class = "source-credible"

        else:
            # General web result
            if debunk_hits >= 2 and real_hits == 0:
                debunk_score += 1
                status = _badge("🚨 DEBUNK SIGNALS", "danger")
                card_class = "source-debunk"
            elif real_hits > 0:
                real_corroboration += 1
                status = _badge("✓ NEWS CORROBORATION", "success")
                card_class = "source-credible"
            else:
                status = _badge("◉ RELATED RESULT", "neutral")
                card_class = "source-credible"

        # Truncate body for display
        display_body = body[:280] + ("..." if len(body) > 280 else "")

        markdown_out += (
            f"<div class='source-card {card_class}'>"
            f"<div class='source-header'>"
            f"<span class='source-idx'>#{idx+1}</span>"
            f"<h4>{title}</h4>"
            f"{status}"
            f"</div>"
            f"<p class='source-body'>\"{display_body}\"</p>"
            f"<div class='source-footer'>"
            f"<a href='{link}' target='_blank' class='source-link'>🔗 View source</a>"
            f"</div></div>\n"
        )

    return results, markdown_out, len(results), debunk_score, real_corroboration


def _badge(text, kind):
    colors = {
        "success": ("rgba(16,185,129,0.1)", "#10b981", "rgba(16,185,129,0.15)"),
        "danger":  ("rgba(239,68,68,0.1)",  "#ef4444", "rgba(239,68,68,0.15)"),
        "warning": ("rgba(245,158,11,0.1)", "#f59e0b", "rgba(245,158,11,0.15)"),
        "neutral": ("rgba(100,116,139,0.1)","#64748b", "rgba(100,116,139,0.15)"),
    }
    bg, color, border = colors.get(kind, colors["neutral"])
    return (f"<span class='badge' style='background:{bg};color:{color};"
            f"border:1px solid {border};'>{text}</span>")

def _card(idx_label, title, body_text, cls, color):
    return (
        f"<div class='source-card source-{cls}' style='border-left-color:{color};'>"
        f"<div class='source-header'>"
        f"<span class='source-idx' style='color:{color};'>{idx_label}</span>"
        f"<h4>{title}</h4></div>"
        f"<p class='source-body'>{body_text}</p>"
        f"</div>"
    )


# ==========================================
# 8. MAIN VERDICT ENGINE (Redesigned)
# ==========================================
def compute_verdict(is_url_verified, verified_domain_name,
                    hits_count, debunk_score, real_corroboration,
                    bert_fake_prob, is_label_spoofed, is_url_verified_flag):
    """
    DECISION HIERARCHY:
    1. Direct trusted URL present → high reliability (90-97%)
    2. Fact-checker debunked (debunk_score ≥ 2) → very low reliability (5-20%)
    3. Strong debunk signals (debunk_score == 1) → low reliability (25-38%)
    4. Real corroboration found (trusted sources) → high reliability (70-92%)
    5. Some web results but no clear signal → moderate (45-65%)
    6. No web results → fall back to BERT model only
    """
    spoof_penalty = 12 if is_label_spoofed and not is_url_verified_flag else 0

    # ── Case 1: Direct trusted URL ──
    if is_url_verified:
        if bert_fake_prob < 50:
            return 96, "🏆 VERIFIED GENUINE", "verdict-genuine-title", "#10b981", (
                f"Directly linked to trusted domain <strong>{verified_domain_name}</strong> "
                f"and written in an objective style. High confidence this is genuine."
            )
        else:
            return 78, "📝 SENSATIONALIZED — CORE FACTS REAL", "verdict-sensationalized-title", "#f59e0b", (
                f"Verified via <strong>{verified_domain_name}</strong> but writing style is sensational. "
                f"Core facts are likely authentic; specific details may be exaggerated."
            )

    # ── Case 2: Fact-checker or multiple debunk signals ──
    if debunk_score >= 3:
        rel = max(6 - spoof_penalty, 3)
        return rel, "🚨 FABRICATED / DEBUNKED", "verdict-fabricated-title", "#ef4444", (
            "Multiple fact-checkers and credible sources have debunked this claim. "
            "This is almost certainly misinformation. Do NOT share."
        )

    if debunk_score == 2:
        rel = max(15 - spoof_penalty, 5)
        return rel, "🚨 LIKELY FAKE / DEBUNKED", "verdict-fabricated-title", "#ef4444", (
            "Two or more credible sources flag this as false or misleading. "
            "Strong evidence this is misinformation."
        )

    if debunk_score == 1:
        rel = max(32 - spoof_penalty, 10)
        return rel, "⚠️ SUSPICIOUS CLAIM", "verdict-fabricated-title", "#ef4444", (
            "At least one credible source contradicts or flags this claim. "
            "Treat with strong skepticism and verify from primary sources."
        )

    # ── Case 3: Real corroboration found ──
    if real_corroboration >= 3:
        rel = min(88 + (real_corroboration * 1) - spoof_penalty, 95)
        return rel, "🏆 VERIFIED GENUINE", "verdict-genuine-title", "#10b981", (
            f"Found in <strong>{real_corroboration}</strong> credible/trusted sources with no debunking signals. "
            f"High confidence this is genuine news."
        )

    if real_corroboration == 2:
        rel = min(80 - spoof_penalty, 88)
        if bert_fake_prob > 60:
            return 68, "📝 LIKELY REAL — VERIFY DETAILS", "verdict-sensationalized-title", "#f59e0b", (
                "Found in 2 credible sources, but writing style raises some flags. "
                "Core story appears real; verify specific claims independently."
            )
        return rel, "🏆 VERIFIED GENUINE", "verdict-genuine-title", "#10b981", (
            "Found in 2 credible sources with objective writing style. "
            "High confidence this is genuine."
        )

    if real_corroboration == 1:
        if bert_fake_prob < 40:
            return max(68 - spoof_penalty, 50), "📝 LIKELY REAL — NEEDS MORE SOURCES", "verdict-sensationalized-title", "#f59e0b", (
                "Found in one credible source with objective writing. "
                "Likely genuine but seek additional confirmation."
            )
        else:
            return max(50 - spoof_penalty, 30), "⚠️ UNVERIFIED — MIXED SIGNALS", "verdict-unverified-title", "#fb923c", (
                "Only one corroborating source found and writing style is questionable. "
                "Exercise caution and verify from a primary source."
            )

    # ── Case 4: Web results exist but no clear real/fake signal ──
    if hits_count > 0:
        if bert_fake_prob < 35:
            return max(58 - spoof_penalty, 40), "⚠️ UNVERIFIED — PROBABLY REAL", "verdict-unverified-title", "#fb923c", (
                "Some web results found but from non-trusted domains. "
                "Writing style appears objective. Likely real but needs a primary source link."
            )
        else:
            return max(38 - spoof_penalty, 20), "⚠️ UNVERIFIED — SUSPICIOUS", "verdict-unverified-title", "#fb923c", (
                "Some web results found but content is not clearly corroborated by trusted outlets. "
                "Sensational writing style detected. Verify before sharing."
            )

    # ── Case 5: No web results — rely on BERT ──
    if bert_fake_prob >= 65:
        return max(22 - spoof_penalty, 8), "⚠️ UNVERIFIED — HIGH FAKE RISK", "verdict-unverified-title", "#fb923c", (
            "No online corroboration found AND the AI model flags this as likely fake. "
            "This is possibly a fabricated or circulating rumor. "
            "<br><small style='color:#6b7280;'>Tip: Paste the direct article URL if you have one.</small>"
        )
    elif bert_fake_prob >= 40:
        return max(38 - spoof_penalty, 20), "⚠️ UNVERIFIED — UNCERTAIN", "verdict-unverified-title", "#fb923c", (
            "No online corroboration found. Could be a very recent, hyper-local, or fabricated story. "
            "Seek a direct source before sharing. "
            "<br><small style='color:#6b7280;'>Tip: Paste the direct article URL if you have one.</small>"
        )
    else:
        return max(52 - spoof_penalty, 35), "⚠️ UNVERIFIED — POSSIBLY REAL", "verdict-unverified-title", "#fb923c", (
            "No mainstream coverage found, but writing style appears legitimate. "
            "Could be a hyper-local or very recent story. "
            "Seek a primary source before sharing. "
            "<br><small style='color:#6b7280;'>Tip: Paste the direct article URL if you have one.</small>"
        )


# ==========================================
# 9. Master Process Function
# ==========================================
def process_and_verdict(text_input, ocr_output, img_input, source_tab):
    raw_text = ""
    updated_ocr = ocr_output

    if source_tab == "image":
        if not ocr_output or not ocr_output.strip():
            if img_input is None:
                return (
                    gr.update(visible=False),
                    "<div class='source-card source-debunk'><h4>⚠️ Input Error</h4>"
                    "<p>Please upload an image or extract OCR text first.</p></div>",
                    "", "", ocr_output, gr.update(open=False)
                )
            raw_text = process_image_to_text(img_input)
            updated_ocr = raw_text
        else:
            raw_text = ocr_output
    else:
        raw_text = text_input

    # ── Extract URLs ──
    urls = re.findall(r'(https?://\S+)', raw_text)
    is_url_verified = False
    verified_domain_name = ""
    verified_url_card = ""

    if urls:
        for url in urls:
            is_trusted, domain = is_trusted_domain(url)
            if is_trusted:
                is_url_verified = True
                verified_domain_name = domain
                page_title = fetch_url_title(url) or f"Verified Article on {domain.title()}"
                verified_url_card = (
                    f"<div class='source-card source-credible' style='border-left-width:6px;'>"
                    f"<div class='source-header'>"
                    f"<span class='source-idx' style='background:#10b981;color:white;'>✓ DIRECT SOURCE</span>"
                    f"<h4>{page_title}</h4>"
                    f"{_badge('✓ TRUSTED DOMAIN','success')}"
                    f"</div>"
                    f"<p class='source-body'>Direct link verified from trusted domain "
                    f"<strong>{domain}</strong>.</p>"
                    f"<div class='source-footer'>"
                    f"<a href='{url}' target='_blank' class='source-link'>🔗 View on {domain.title()}</a>"
                    f"</div></div>"
                )
                break

    # ── Spoof detection ──
    is_label_spoofed, spoofed_outlet = detect_spoofed_source_label(raw_text)
    spoofed_warning_card = ""
    if is_label_spoofed and not is_url_verified:
        spoofed_warning_card = (
            f"<div class='source-card source-debunk' style='border-left-color:#f59e0b;'>"
            f"<div class='source-header'>"
            f"<span class='source-idx' style='color:#f59e0b;'>⚠️ CLAIM CHECK</span>"
            f"<h4>Outlet Claimed Without Verifiable Link</h4>"
            f"{_badge('🚨 UNVERIFIED CLAIM','danger')}"
            f"</div>"
            f"<p class='source-body'>Content claims to be from <strong>{spoofed_outlet}</strong> "
            f"but no verified URL from that outlet was found. "
            f"This is a common credibility manipulation tactic.</p>"
            f"</div>"
        )

    # ── Clean text for analysis ──
    cleaned_text = normalize_english(raw_text)
    if not cleaned_text or len(cleaned_text) < 10:
        return (
            gr.update(visible=False),
            "<div class='source-card source-debunk'><h4>⚠️ Too Short</h4>"
            "<p>Please provide a full sentence or news headline (min 10 characters).</p></div>",
            "", "", updated_ocr, gr.update(open=False)
        )

    # ── BERT Classification ──
    bert_fake_prob, bert_label, bert_engine = classify_with_bert(cleaned_text)

    # ── Web Fact-Check ──
    search_results, web_markdown, hits_count, debunk_score, real_corroboration = verify_facts_online(cleaned_text)

    # Prepend cards
    if spoofed_warning_card:
        web_markdown = spoofed_warning_card + "\n" + web_markdown
    if is_url_verified:
        web_markdown = verified_url_card + "\n" + web_markdown

    # ── Compute final verdict ──
    reliability, verdict_title, verdict_class, verdict_color, verdict_desc = compute_verdict(
        is_url_verified, verified_domain_name,
        hits_count, debunk_score, real_corroboration,
        bert_fake_prob, is_label_spoofed, is_url_verified
    )

    # ── Build Verdict HTML ──
    spoof_metric = ""
    if is_label_spoofed and not is_url_verified:
        spoof_metric = ("<div class='metric-item'>"
                        "<span class='metric-val' style='color:#f59e0b;'>⚠️ CLAIMED</span>"
                        "<span class='metric-lbl'>OUTLET UNVERIFIED</span></div>")

    verdict_html = f"""
    <div class='verdict-dashboard'>
      <div class='verdict-score-wrapper'>
        <div class='verdict-circle-progress'
             style='background:conic-gradient({verdict_color} {reliability}%,#334155 {reliability}%);'>
          <div class='verdict-circle-inner'>
            <span class='verdict-score'>{reliability}%</span>
            <span class='verdict-score-label'>RELIABILITY</span>
          </div>
        </div>
      </div>
      <div class='verdict-details'>
        <div class='verdict-title {verdict_class}'>{verdict_title}</div>
        <p class='verdict-desc'>{verdict_desc}</p>
        <div class='verdict-metrics'>
          <div class='metric-item'>
            <span class='metric-val'>{bert_label.split(":")[0]}</span>
            <span class='metric-lbl'>AI CLASSIFICATION</span>
          </div>
          <div class='metric-item'>
            <span class='metric-val'>{real_corroboration} trusted</span>
            <span class='metric-lbl'>REAL SOURCES FOUND</span>
          </div>
          <div class='metric-item'>
            <span class='metric-val'>{debunk_score} flag{"s" if debunk_score!=1 else ""}</span>
            <span class='metric-lbl'>DEBUNK SIGNALS</span>
          </div>
          <div class='metric-item'>
            <span class='metric-val'>{hits_count} total</span>
            <span class='metric-lbl'>WEB RESULTS</span>
          </div>
          {spoof_metric}
        </div>
      </div>
    </div>
    """

    ai_report_html = f"""
    <div class='intel-card'>
      <div class='intel-item'>
        <h5>Classifier Engine</h5>
        <div class='intel-val' style='color:#06b6d4;'>{bert_engine}</div>
      </div>
      <div class='intel-item'>
        <h5>AI Classification</h5>
        <div class='intel-val'>{bert_label}</div>
      </div>
      <div class='intel-item'>
        <h5>Fake Probability (AI)</h5>
        <div class='intel-val' style='color:{"#ef4444" if bert_fake_prob>50 else "#10b981"};'>{bert_fake_prob}%</div>
      </div>
      <div class='intel-item'>
        <h5>Web Corroboration</h5>
        <div class='intel-val'>{real_corroboration} trusted source(s) &nbsp;|&nbsp;
          <span style='color:#ef4444;'>{debunk_score} debunk signal{"s" if debunk_score!=1 else ""}</span>
        </div>
      </div>
      {"<div class='intel-item'><h5>⚠️ Outlet Claim</h5><div class='intel-val' style='color:#f59e0b;'>\"" + spoofed_outlet + "\" claimed without verified URL.</div></div>" if is_label_spoofed and not is_url_verified else ""}
      <div class='intel-item'>
        <h5>Analyzed Text</h5>
        <div class='intel-val' style='font-weight:normal;font-size:0.88rem;font-family:monospace;
          background:#0b0f19;padding:12px;border-radius:8px;border:1px solid #1e293b;
          color:#cbd5e1;word-break:break-all;'>{cleaned_text[:800]}{"..." if len(cleaned_text)>800 else ""}</div>
      </div>
    </div>
    """

    banner_html = (
        "<div class='verdict-banner-true'>🟢 LIKELY TRUE NEWS</div>"
        if reliability >= 60 else
        "<div class='verdict-banner-false'>🔴 LIKELY FALSE / UNVERIFIED</div>"
    )

    return (
        gr.update(visible=True),
        banner_html,
        verdict_html,
        ai_report_html,
        web_markdown,
        updated_ocr,
        gr.update(open=False)
    )


def handle_ocr_scan(img):
    if img is None:
        return "⚠️ Image not uploaded yet."
    return process_image_to_text(img)


# ==========================================
# 10. Stylesheet
# ==========================================
css = """
@import url('https://fonts.googleapis.com/css2?family=Outfit:wght@300;400;600;800;900&family=Inter:wght@300;400;600;700&display=swap');

body { background-color: #0b0f19 !important; }
.gradio-container {
    background-color: #0b0f19 !important;
    font-family: 'Inter','Outfit',sans-serif !important;
    color: #f1f5f9 !important;
    max-width: 1200px !important;
    margin: 0 auto !important;
    padding: 20px !important;
}
.cyber-title {
    text-align: center;
    background: linear-gradient(135deg,#06b6d4 0%,#10b981 50%,#3b82f6 100%);
    -webkit-background-clip: text;
    -webkit-text-fill-color: transparent;
    font-size: 3rem; font-weight: 900;
    margin-top: 10px; margin-bottom: 2px;
    letter-spacing: -0.04em;
    font-family: 'Outfit',sans-serif !important;
}
.cyber-subtitle {
    text-align: center; color: #94a3b8;
    font-size: 1.15rem; margin-bottom: 35px;
    font-weight: 400;
}
.block {
    background-color: #111827 !important;
    border: 1px solid #1f2937 !important;
    border-radius: 16px !important;
    box-shadow: 0 15px 35px -10px rgba(0,0,0,0.6) !important;
    overflow: hidden !important;
}
textarea, input[type="text"] {
    background-color: #030712 !important;
    border: 1px solid #1f2937 !important;
    color: #f1f5f9 !important;
    font-size: 0.95rem !important;
    border-radius: 8px !important;
}
textarea:focus, input[type="text"]:focus {
    border-color: #06b6d4 !important;
    box-shadow: 0 0 10px rgba(6,182,212,0.2) !important;
}
.tab-nav {
    border-bottom: 1px solid #1f2937 !important;
    background-color: #0b0f19 !important;
    padding: 8px 12px 0 12px !important;
}
.tab-nav button {
    color: #6b7280 !important; font-weight: 700 !important;
    font-size: 0.9rem !important; border: none !important;
    background: transparent !important; padding: 10px 18px !important;
    border-radius: 8px 8px 0 0 !important;
}
.tab-nav button.selected {
    color: #06b6d4 !important; background-color: #111827 !important;
    border: 1px solid #1f2937 !important;
    border-bottom: 1px solid #111827 !important;
}
.cyber-btn {
    background: linear-gradient(135deg,#111827 0%,#1f2937 100%) !important;
    border: 1px dashed #06b6d4 !important; color: #06b6d4 !important;
    font-weight: 800 !important; letter-spacing: 0.04em;
    text-transform: uppercase;
    border-radius: 12px !important;
    font-family: 'Outfit',sans-serif !important;
}
.cyber-btn:hover {
    background: linear-gradient(135deg,#06b6d4 0%,#3b82f6 100%) !important;
    color: #ffffff !important; border-style: solid !important;
    box-shadow: 0 0 20px rgba(6,182,212,0.5) !important;
}
.verdict-dashboard {
    display: flex; align-items: center;
    background: linear-gradient(135deg,#030712 0%,#111827 100%);
    border: 1px solid #1f2937; border-radius: 16px;
    padding: 24px; gap: 24px; margin-bottom: 25px;
}
@media(max-width:768px){.verdict-dashboard{flex-direction:column;text-align:center;}}
.verdict-score-wrapper{flex-shrink:0;}
.verdict-circle-progress {
    width:140px; height:140px; border-radius:50%;
    display:flex; align-items:center; justify-content:center;
    box-shadow:0 8px 16px rgba(0,0,0,0.4);
}
.verdict-circle-inner {
    width:114px; height:114px; background-color:#0b0f19; border-radius:50%;
    display:flex; flex-direction:column; align-items:center; justify-content:center;
}
.verdict-score{font-size:2.1rem;font-weight:900;color:#fff;line-height:1;font-family:'Outfit',sans-serif !important;}
.verdict-score-label{font-size:0.65rem;color:#6b7280;text-transform:uppercase;letter-spacing:0.12em;margin-top:4px;}
.verdict-details{flex-grow:1;}
.verdict-title{
    font-size:1.7rem;font-weight:900;letter-spacing:-0.02em;
    margin-bottom:6px;text-transform:uppercase;font-family:'Outfit',sans-serif !important;
}
.verdict-desc{font-size:0.95rem;color:#9ca3af;line-height:1.5;margin-bottom:16px;}
.verdict-metrics{
    display:flex;gap:20px;flex-wrap:wrap;
    border-top:1px solid #1f2937;padding-top:14px;
}
.metric-item{display:flex;flex-direction:column;}
.metric-val{font-size:0.9rem;font-weight:700;color:#f3f4f6;}
.metric-lbl{font-size:0.65rem;color:#4b5563;text-transform:uppercase;letter-spacing:0.05em;}
.verdict-genuine-title       {color:#10b981;text-shadow:0 0 20px rgba(16,185,129,0.35);}
.verdict-sensationalized-title{color:#f59e0b;text-shadow:0 0 20px rgba(245,158,11,0.35);}
.verdict-unverified-title    {color:#fb923c;text-shadow:0 0 20px rgba(251,146,60,0.35);}
.verdict-fabricated-title    {color:#ef4444;text-shadow:0 0 20px rgba(239,68,68,0.35);}
.source-card{
    background-color:#030712;border:1px solid #1f2937;
    border-radius:12px;padding:16px;margin-bottom:14px;
}
.source-card.source-credible{border-left:4px solid #10b981;}
.source-card.source-debunk{border-left:4px solid #ef4444;}
.source-header{
    display:flex;justify-content:space-between;align-items:center;
    margin-bottom:10px;gap:12px;flex-wrap:wrap;
}
.source-idx{
    font-size:0.75rem;font-weight:800;background-color:#111827;
    color:#9ca3af;padding:2px 7px;border-radius:4px;white-space:nowrap;
}
.source-header h4{margin:0;font-size:0.95rem;font-weight:700;color:#fff;flex-grow:1;line-height:1.35;}
.badge{font-size:0.65rem;font-weight:800;padding:3px 9px;border-radius:20px;text-transform:uppercase;white-space:nowrap;}
.source-body{font-size:0.85rem;color:#9ca3af;line-height:1.45;margin:0 0 12px 0;font-style:italic;}
.source-footer{display:flex;justify-content:flex-end;}
.source-link{font-size:0.75rem;color:#06b6d4;text-decoration:none;font-weight:700;}
.source-link:hover{color:#3b82f6;text-decoration:underline;}
.intel-card{background-color:#030712;border:1px solid #1f2937;border-radius:12px;padding:20px;}
.intel-item{margin-bottom:18px;}
.intel-item:last-child{margin-bottom:0;}
.intel-item h5{
    margin:0 0 6px 0;font-size:0.8rem;color:#4b5563;
    text-transform:uppercase;letter-spacing:0.06em;
}
.intel-val{font-size:1.05rem;font-weight:600;color:#fff;}
.verdict-banner-true{
    text-align:center;
    background:linear-gradient(135deg,rgba(16,185,129,0.08) 0%,rgba(16,185,129,0.18) 100%);
    border:2px solid #10b981;color:#10b981;font-size:1.3rem;font-weight:800;
    padding:10px 16px;border-radius:8px;margin-bottom:15px;text-transform:uppercase;
    font-family:'Outfit',sans-serif !important;
}
.verdict-banner-false{
    text-align:center;
    background:linear-gradient(135deg,rgba(239,68,68,0.08) 0%,rgba(239,68,68,0.18) 100%);
    border:2px solid #ef4444;color:#ef4444;font-size:1.3rem;font-weight:800;
    padding:10px 16px;border-radius:8px;margin-bottom:15px;text-transform:uppercase;
    font-family:'Outfit',sans-serif !important;
}
"""

# ==========================================
# 11. Gradio App
# ==========================================
with gr.Blocks(css=css, theme=gr.themes.Base(), title="True Fact Checker & Fake News Detector") as app:

    tab_state = gr.State(value="image")

    gr.HTML("<div class='cyber-title'>⚖️ TRUE FACT CHECKER</div>")
    gr.HTML("<div class='cyber-subtitle'>Web-First Verification · BERT Neural Analysis · Live Fact-Check Consensus</div>")

    with gr.Row():
        with gr.Column(scale=1):
            with gr.Tabs() as input_tabs:
                with gr.Tab("📸 Screenshot Scanner", id=0) as tab_img:
                    gr.HTML("<div style='margin-bottom:10px;color:#9ca3af;font-size:0.9rem;'>"
                            "Upload an article screenshot, social media post, or newspaper clipping.</div>")
                    img_input = gr.Image(
                        type="numpy", sources=["upload","clipboard"],
                        label="Drag screenshot here or paste from clipboard", height=240
                    )
                    ocr_scan_btn = gr.Button("🔎 Extract Screenshot Text", variant="secondary", elem_classes="cyber-btn")
                    ocr_output_box = gr.Textbox(
                        label="OCR Extracted Text (Review/Edit before verifying)",
                        lines=5,
                        placeholder="OCR text appears here. Edit for accuracy if needed...",
                        interactive=True
                    )
                    ocr_scan_btn.click(handle_ocr_scan, inputs=img_input, outputs=ocr_output_box)

                with gr.Tab("📝 Direct Text / Headline", id=1) as tab_txt:
                    gr.HTML("<div style='margin-bottom:10px;color:#9ca3af;font-size:0.9rem;'>"
                            "Paste a rumor headline, WhatsApp forward, news claim, or full article text.</div>")
                    text_input = gr.Textbox(
                        lines=8,
                        placeholder="e.g. 'Breaking: Scientists discover miracle cure but government is hiding it...'",
                        label="Paste news text or headline here"
                    )

            tab_img.select(lambda: "image", outputs=tab_state)
            tab_txt.select(lambda: "text", outputs=tab_state)

            verify_btn = gr.Button(
                "⚡ Analyze & Verify Fact Consensus",
                variant="primary", elem_classes="cyber-btn", size="lg"
            )

        with gr.Column(scale=1):
            placeholder_card = gr.HTML(
                """<div class='intel-card' style='text-align:center;padding:40px 20px;border-style:dashed;'>
                  <div style='font-size:3rem;color:#1f2937;margin-bottom:15px;'>📡</div>
                  <h4 style='color:#6b7280;font-family:Outfit,sans-serif;font-size:1.15rem;margin-bottom:8px;'>Telemetry Idle</h4>
                  <p style='color:#4b5563;font-size:0.9rem;max-width:320px;margin:0 auto;'>
                    Upload an image or paste a news claim, then click Analyze to begin verification.</p>
                </div>""",
                visible=True
            )

            verdict_panel = gr.Column(visible=False)
            with verdict_panel:
                banner_output = gr.HTML()
                with gr.Accordion("🔓 View Detailed Confidence & Telemetry", open=False) as confidence_accordion:
                    verdict_html_output = gr.HTML()
                    with gr.Tabs():
                        with gr.Tab("🌐 Live Web Sources"):
                            sources_markdown_output = gr.HTML()
                        with gr.Tab("🧠 AI Intel"):
                            ai_report_html_output = gr.HTML()

    verify_btn.click(
        fn=process_and_verdict,
        inputs=[text_input, ocr_output_box, img_input, tab_state],
        outputs=[
            verdict_panel, banner_output, verdict_html_output,
            ai_report_html_output, sources_markdown_output,
            ocr_output_box, confidence_accordion
        ]
    )
    verify_btn.click(fn=lambda: gr.update(visible=False), inputs=None, outputs=placeholder_card)

if __name__ == "__main__":
    app.launch(share=True)