abd8433's picture
Create app.py
4fd1c52 verified
import gradio as gr
from transformers import pipeline, AutoTokenizer, AutoModelForSequenceClassification
import torch
import torch.nn.functional as F
import requests
import os
NEWS_API_KEY = os.environ.get("NEWS_API_KEY", "")
TRUSTED_NEWS_SOURCES = [
"reuters.com",
"apnews.com",
"bbc.com",
"bbc.co.uk",
"theguardian.com",
"nytimes.com",
"washingtonpost.com",
"bloomberg.com",
"cnn.com",
"aljazeera.com",
"forbes.com",
"ft.com",
"economist.com",
"time.com",
"nbcnews.com"
]
print("Loading TRAK models...")
# Model 1 - TRAK Fake Detection BERT
clf1 = pipeline("text-classification", model="abd8433/TRAK-fake-detection-bert")
# Model 2 - TRAK Fake Detection Distilroberta
tokenizer2 = AutoTokenizer.from_pretrained("abd8433/TRAK-fake-detection-Distilroberta")
model2 = AutoModelForSequenceClassification.from_pretrained("abd8433/TRAK-fake-detection-Distilroberta")
model2.eval()
# Model 3 - TRAK Fake Detection TinyBERT
tokenizer3 = AutoTokenizer.from_pretrained("abd8433/TRAK-fake-detection-tinybert")
model3 = AutoModelForSequenceClassification.from_pretrained("abd8433/TRAK-fake-detection-tinybert")
model3.eval()
# Model 4 - TRAK Fake Detection RoBERTa
tokenizer4 = AutoTokenizer.from_pretrained("abd8433/TRAK-fake-Detection-roberta")
model4 = AutoModelForSequenceClassification.from_pretrained("abd8433/TRAK-fake-Detection-roberta")
model4.eval()
# Model 5 - TRAK RoBERTa T Fake Detection
tokenizer5 = AutoTokenizer.from_pretrained("abd8433/TRAK-Roberta-t-fake-detection")
model5 = AutoModelForSequenceClassification.from_pretrained("abd8433/TRAK-Roberta-t-fake-detection")
model5.eval()
print("All TRAK models loaded!")
def get_fake_score_model1(text):
result = clf1(text, truncation=True, max_length=512)[0]
label = result["label"]
score = result["score"]
if label == "LABEL_0":
return round(score * 100, 2)
else:
return round((1 - score) * 100, 2)
def get_fake_score_model2(text):
encoded = tokenizer2(text, truncation=True, padding="max_length", max_length=512, return_tensors="pt")
with torch.no_grad():
logits = model2(**encoded)["logits"]
probs = F.softmax(logits, dim=1)[0]
return round(float(probs[1]) * 100, 2)
def get_fake_score_model3(text):
encoded = tokenizer3(text, truncation=True, max_length=512, return_tensors="pt")
with torch.no_grad():
logits = model3(**encoded).logits
probs = F.softmax(logits, dim=1)[0]
return round(float(probs[1]) * 100, 2)
def get_fake_score_model4(text):
encoded = tokenizer4(text, truncation=True, max_length=512, return_tensors="pt")
with torch.no_grad():
logits = model4(**encoded).logits
probs = F.softmax(logits, dim=1)[0]
fake_idx = 1
for idx, label in model4.config.id2label.items():
if "fake" in label.lower():
fake_idx = idx
return round(float(probs[fake_idx]) * 100, 2)
def get_fake_score_model5(text):
encoded = tokenizer5(text, truncation=True, max_length=512, return_tensors="pt")
with torch.no_grad():
logits = model5(**encoded).logits
probs = F.softmax(logits, dim=1)[0]
fake_idx = 1
for idx, label in model5.config.id2label.items():
if "fake" in label.lower():
fake_idx = idx
return round(float(probs[fake_idx]) * 100, 2)
def check_news_exists(text):
if not NEWS_API_KEY:
return False, "API key not set", False
try:
query = text[:80]
url = "https://newsapi.org/v2/everything"
params = {
"q": query,
"apiKey": NEWS_API_KEY,
"pageSize": 5,
"language": "en",
"sortBy": "relevancy"
}
response = requests.get(url, params=params, timeout=5)
data = response.json()
if data.get("totalResults", 0) > 0:
# Check if any result is from a trusted source
for article in data["articles"]:
source_url = article.get("url", "")
source_name = article["source"]["name"]
for trusted in TRUSTED_NEWS_SOURCES:
if trusted in source_url:
return True, source_name, True # found in trusted source
# Found in news but not in trusted top 15
source = data["articles"][0]["source"]["name"]
return True, source, False
return False, "Not found in news", False
except:
return False, "News check failed", False
def detect(text):
score1 = get_fake_score_model1(text)
score2 = get_fake_score_model2(text)
score3 = get_fake_score_model3(text)
score4 = get_fake_score_model4(text)
score5 = get_fake_score_model5(text)
avg_fake = round((score1 + score2 + score3 + score4 + score5) / 5, 2)
exists_in_news, news_source, is_trusted = check_news_exists(text)
# If found in TOP 15 trusted sources β†’ force REAL
if is_trusted:
return (
"βœ… REAL",
"100%",
"0%",
f"βœ… Verified in trusted source: {news_source}",
"Trusted source override applied β€” skipped model voting"
)
# If found in any news β†’ reduce fake score by 30%
if exists_in_news:
avg_fake = max(0, avg_fake * 0.7)
avg_fake = round(avg_fake, 2)
avg_real = round(100 - avg_fake, 2)
votes_fake = 0
if score1 >= 50: votes_fake += 1
if score2 >= 50: votes_fake += 1
if score3 >= 50: votes_fake += 1
if score4 >= 50: votes_fake += 1
if score5 >= 50: votes_fake += 1
if votes_fake >= 3 and avg_fake >= 65:
verdict = "❌ FAKE"
elif votes_fake >= 3 and avg_fake >= 45:
verdict = "⚠️ SUSPICIOUS"
elif votes_fake == 2 and avg_fake >= 55:
verdict = "⚠️ SUSPICIOUS"
else:
verdict = "βœ… REAL"
news_info = f"Found in: {news_source}" if exists_in_news else "Not found in real news sources"
debug = f"M1:{score1} M2:{score2} M3:{score3} M4:{score4} M5:{score5} Votes:{votes_fake}/5"
return verdict, str(avg_real) + "%", str(avg_fake) + "%", news_info, debug
inputs = gr.Textbox(lines=10, placeholder="Paste news article here...", label="News Article")
out1 = gr.Textbox(label="Verdict")
out2 = gr.Textbox(label="Real Confidence")
out3 = gr.Textbox(label="Fake Confidence")
out4 = gr.Textbox(label="News Verification")
out5 = gr.Textbox(label="Debug")
demo = gr.Interface(
fn=detect,
inputs=inputs,
outputs=[out1, out2, out3, out4, out5],
title="TRAK Fake News Detector",
description="Uses 5 TRAK AI models plus NewsAPI verification against top 15 trusted news sources."
)
demo.launch()