Spaces:

abd8433
/

TRAK-Fake-Detection-Model

Running

File size: 6,688 Bytes

4fd1c52

import gradio as gr
from transformers import pipeline, AutoTokenizer, AutoModelForSequenceClassification
import torch
import torch.nn.functional as F
import requests
import os

NEWS_API_KEY = os.environ.get("NEWS_API_KEY", "")

TRUSTED_NEWS_SOURCES = [
    "reuters.com",
    "apnews.com",
    "bbc.com",
    "bbc.co.uk",
    "theguardian.com",
    "nytimes.com",
    "washingtonpost.com",
    "bloomberg.com",
    "cnn.com",
    "aljazeera.com",
    "forbes.com",
    "ft.com",
    "economist.com",
    "time.com",
    "nbcnews.com"
]

print("Loading TRAK models...")

# Model 1 - TRAK Fake Detection BERT
clf1 = pipeline("text-classification", model="abd8433/TRAK-fake-detection-bert")

# Model 2 - TRAK Fake Detection Distilroberta
tokenizer2 = AutoTokenizer.from_pretrained("abd8433/TRAK-fake-detection-Distilroberta")
model2 = AutoModelForSequenceClassification.from_pretrained("abd8433/TRAK-fake-detection-Distilroberta")
model2.eval()

# Model 3 - TRAK Fake Detection TinyBERT
tokenizer3 = AutoTokenizer.from_pretrained("abd8433/TRAK-fake-detection-tinybert")
model3 = AutoModelForSequenceClassification.from_pretrained("abd8433/TRAK-fake-detection-tinybert")
model3.eval()

# Model 4 - TRAK Fake Detection RoBERTa
tokenizer4 = AutoTokenizer.from_pretrained("abd8433/TRAK-fake-Detection-roberta")
model4 = AutoModelForSequenceClassification.from_pretrained("abd8433/TRAK-fake-Detection-roberta")
model4.eval()

# Model 5 - TRAK RoBERTa T Fake Detection
tokenizer5 = AutoTokenizer.from_pretrained("abd8433/TRAK-Roberta-t-fake-detection")
model5 = AutoModelForSequenceClassification.from_pretrained("abd8433/TRAK-Roberta-t-fake-detection")
model5.eval()

print("All TRAK models loaded!")


def get_fake_score_model1(text):
    result = clf1(text, truncation=True, max_length=512)[0]
    label = result["label"]
    score = result["score"]
    if label == "LABEL_0":
        return round(score * 100, 2)
    else:
        return round((1 - score) * 100, 2)

def get_fake_score_model2(text):
    encoded = tokenizer2(text, truncation=True, padding="max_length", max_length=512, return_tensors="pt")
    with torch.no_grad():
        logits = model2(**encoded)["logits"]
    probs = F.softmax(logits, dim=1)[0]
    return round(float(probs[1]) * 100, 2)

def get_fake_score_model3(text):
    encoded = tokenizer3(text, truncation=True, max_length=512, return_tensors="pt")
    with torch.no_grad():
        logits = model3(**encoded).logits
    probs = F.softmax(logits, dim=1)[0]
    return round(float(probs[1]) * 100, 2)

def get_fake_score_model4(text):
    encoded = tokenizer4(text, truncation=True, max_length=512, return_tensors="pt")
    with torch.no_grad():
        logits = model4(**encoded).logits
    probs = F.softmax(logits, dim=1)[0]
    fake_idx = 1
    for idx, label in model4.config.id2label.items():
        if "fake" in label.lower():
            fake_idx = idx
    return round(float(probs[fake_idx]) * 100, 2)

def get_fake_score_model5(text):
    encoded = tokenizer5(text, truncation=True, max_length=512, return_tensors="pt")
    with torch.no_grad():
        logits = model5(**encoded).logits
    probs = F.softmax(logits, dim=1)[0]
    fake_idx = 1
    for idx, label in model5.config.id2label.items():
        if "fake" in label.lower():
            fake_idx = idx
    return round(float(probs[fake_idx]) * 100, 2)


def check_news_exists(text):
    if not NEWS_API_KEY:
        return False, "API key not set", False
    try:
        query = text[:80]
        url = "https://newsapi.org/v2/everything"
        params = {
            "q": query,
            "apiKey": NEWS_API_KEY,
            "pageSize": 5,
            "language": "en",
            "sortBy": "relevancy"
        }
        response = requests.get(url, params=params, timeout=5)
        data = response.json()

        if data.get("totalResults", 0) > 0:
            # Check if any result is from a trusted source
            for article in data["articles"]:
                source_url = article.get("url", "")
                source_name = article["source"]["name"]
                for trusted in TRUSTED_NEWS_SOURCES:
                    if trusted in source_url:
                        return True, source_name, True  # found in trusted source

            # Found in news but not in trusted top 15
            source = data["articles"][0]["source"]["name"]
            return True, source, False

        return False, "Not found in news", False
    except:
        return False, "News check failed", False


def detect(text):
    score1 = get_fake_score_model1(text)
    score2 = get_fake_score_model2(text)
    score3 = get_fake_score_model3(text)
    score4 = get_fake_score_model4(text)
    score5 = get_fake_score_model5(text)

    avg_fake = round((score1 + score2 + score3 + score4 + score5) / 5, 2)

    exists_in_news, news_source, is_trusted = check_news_exists(text)

    # If found in TOP 15 trusted sources → force REAL
    if is_trusted:
        return (
            "✅ REAL",
            "100%",
            "0%",
            f"✅ Verified in trusted source: {news_source}",
            "Trusted source override applied — skipped model voting"
        )

    # If found in any news → reduce fake score by 30%
    if exists_in_news:
        avg_fake = max(0, avg_fake * 0.7)

    avg_fake = round(avg_fake, 2)
    avg_real = round(100 - avg_fake, 2)

    votes_fake = 0
    if score1 >= 50: votes_fake += 1
    if score2 >= 50: votes_fake += 1
    if score3 >= 50: votes_fake += 1
    if score4 >= 50: votes_fake += 1
    if score5 >= 50: votes_fake += 1

    if votes_fake >= 3 and avg_fake >= 65:
        verdict = "❌ FAKE"
    elif votes_fake >= 3 and avg_fake >= 45:
        verdict = "⚠️ SUSPICIOUS"
    elif votes_fake == 2 and avg_fake >= 55:
        verdict = "⚠️ SUSPICIOUS"
    else:
        verdict = "✅ REAL"

    news_info = f"Found in: {news_source}" if exists_in_news else "Not found in real news sources"
    debug = f"M1:{score1} M2:{score2} M3:{score3} M4:{score4} M5:{score5} Votes:{votes_fake}/5"

    return verdict, str(avg_real) + "%", str(avg_fake) + "%", news_info, debug


inputs = gr.Textbox(lines=10, placeholder="Paste news article here...", label="News Article")
out1 = gr.Textbox(label="Verdict")
out2 = gr.Textbox(label="Real Confidence")
out3 = gr.Textbox(label="Fake Confidence")
out4 = gr.Textbox(label="News Verification")
out5 = gr.Textbox(label="Debug")

demo = gr.Interface(
    fn=detect,
    inputs=inputs,
    outputs=[out1, out2, out3, out4, out5],
    title="TRAK Fake News Detector",
    description="Uses 5 TRAK AI models plus NewsAPI verification against top 15 trusted news sources."
)

demo.launch()