Spaces:

abd8433
/

TRAK-Fake-Detection-Model

Running

App Files Files Community

TRAK-Fake-Detection-Model / app.py

abd8433

Create app.py

4fd1c52 verified about 17 hours ago

raw

history blame contribute delete

6.69 kB

	import gradio as gr
	from transformers import pipeline, AutoTokenizer, AutoModelForSequenceClassification
	import torch
	import torch.nn.functional as F
	import requests
	import os

	NEWS_API_KEY = os.environ.get("NEWS_API_KEY", "")

	TRUSTED_NEWS_SOURCES = [
	"reuters.com",
	"apnews.com",
	"bbc.com",
	"bbc.co.uk",
	"theguardian.com",
	"nytimes.com",
	"washingtonpost.com",
	"bloomberg.com",
	"cnn.com",
	"aljazeera.com",
	"forbes.com",
	"ft.com",
	"economist.com",
	"time.com",
	"nbcnews.com"
	]

	print("Loading TRAK models...")

	# Model 1 - TRAK Fake Detection BERT
	clf1 = pipeline("text-classification", model="abd8433/TRAK-fake-detection-bert")

	# Model 2 - TRAK Fake Detection Distilroberta
	tokenizer2 = AutoTokenizer.from_pretrained("abd8433/TRAK-fake-detection-Distilroberta")
	model2 = AutoModelForSequenceClassification.from_pretrained("abd8433/TRAK-fake-detection-Distilroberta")
	model2.eval()

	# Model 3 - TRAK Fake Detection TinyBERT
	tokenizer3 = AutoTokenizer.from_pretrained("abd8433/TRAK-fake-detection-tinybert")
	model3 = AutoModelForSequenceClassification.from_pretrained("abd8433/TRAK-fake-detection-tinybert")
	model3.eval()

	# Model 4 - TRAK Fake Detection RoBERTa
	tokenizer4 = AutoTokenizer.from_pretrained("abd8433/TRAK-fake-Detection-roberta")
	model4 = AutoModelForSequenceClassification.from_pretrained("abd8433/TRAK-fake-Detection-roberta")
	model4.eval()

	# Model 5 - TRAK RoBERTa T Fake Detection
	tokenizer5 = AutoTokenizer.from_pretrained("abd8433/TRAK-Roberta-t-fake-detection")
	model5 = AutoModelForSequenceClassification.from_pretrained("abd8433/TRAK-Roberta-t-fake-detection")
	model5.eval()

	print("All TRAK models loaded!")


	def get_fake_score_model1(text):
	result = clf1(text, truncation=True, max_length=512)[0]
	label = result["label"]
	score = result["score"]
	if label == "LABEL_0":
	return round(score * 100, 2)
	else:
	return round((1 - score) * 100, 2)

	def get_fake_score_model2(text):
	encoded = tokenizer2(text, truncation=True, padding="max_length", max_length=512, return_tensors="pt")
	with torch.no_grad():
	logits = model2(**encoded)["logits"]
	probs = F.softmax(logits, dim=1)[0]
	return round(float(probs[1]) * 100, 2)

	def get_fake_score_model3(text):
	encoded = tokenizer3(text, truncation=True, max_length=512, return_tensors="pt")
	with torch.no_grad():
	logits = model3(**encoded).logits
	probs = F.softmax(logits, dim=1)[0]
	return round(float(probs[1]) * 100, 2)

	def get_fake_score_model4(text):
	encoded = tokenizer4(text, truncation=True, max_length=512, return_tensors="pt")
	with torch.no_grad():
	logits = model4(**encoded).logits
	probs = F.softmax(logits, dim=1)[0]
	fake_idx = 1
	for idx, label in model4.config.id2label.items():
	if "fake" in label.lower():
	fake_idx = idx
	return round(float(probs[fake_idx]) * 100, 2)

	def get_fake_score_model5(text):
	encoded = tokenizer5(text, truncation=True, max_length=512, return_tensors="pt")
	with torch.no_grad():
	logits = model5(**encoded).logits
	probs = F.softmax(logits, dim=1)[0]
	fake_idx = 1
	for idx, label in model5.config.id2label.items():
	if "fake" in label.lower():
	fake_idx = idx
	return round(float(probs[fake_idx]) * 100, 2)


	def check_news_exists(text):
	if not NEWS_API_KEY:
	return False, "API key not set", False
	try:
	query = text[:80]
	url = "https://newsapi.org/v2/everything"
	params = {
	"q": query,
	"apiKey": NEWS_API_KEY,
	"pageSize": 5,
	"language": "en",
	"sortBy": "relevancy"
	}
	response = requests.get(url, params=params, timeout=5)
	data = response.json()

	if data.get("totalResults", 0) > 0:
	# Check if any result is from a trusted source
	for article in data["articles"]:
	source_url = article.get("url", "")
	source_name = article["source"]["name"]
	for trusted in TRUSTED_NEWS_SOURCES:
	if trusted in source_url:
	return True, source_name, True # found in trusted source

	# Found in news but not in trusted top 15
	source = data["articles"][0]["source"]["name"]
	return True, source, False

	return False, "Not found in news", False
	except:
	return False, "News check failed", False


	def detect(text):
	score1 = get_fake_score_model1(text)
	score2 = get_fake_score_model2(text)
	score3 = get_fake_score_model3(text)
	score4 = get_fake_score_model4(text)
	score5 = get_fake_score_model5(text)

	avg_fake = round((score1 + score2 + score3 + score4 + score5) / 5, 2)

	exists_in_news, news_source, is_trusted = check_news_exists(text)

	# If found in TOP 15 trusted sources → force REAL
	if is_trusted:
	return (
	"✅ REAL",
	"100%",
	"0%",
	f"✅ Verified in trusted source: {news_source}",
	"Trusted source override applied — skipped model voting"
	)

	# If found in any news → reduce fake score by 30%
	if exists_in_news:
	avg_fake = max(0, avg_fake * 0.7)

	avg_fake = round(avg_fake, 2)
	avg_real = round(100 - avg_fake, 2)

	votes_fake = 0
	if score1 >= 50: votes_fake += 1
	if score2 >= 50: votes_fake += 1
	if score3 >= 50: votes_fake += 1
	if score4 >= 50: votes_fake += 1
	if score5 >= 50: votes_fake += 1

	if votes_fake >= 3 and avg_fake >= 65:
	verdict = "❌ FAKE"
	elif votes_fake >= 3 and avg_fake >= 45:
	verdict = "⚠️ SUSPICIOUS"
	elif votes_fake == 2 and avg_fake >= 55:
	verdict = "⚠️ SUSPICIOUS"
	else:
	verdict = "✅ REAL"

	news_info = f"Found in: {news_source}" if exists_in_news else "Not found in real news sources"
	debug = f"M1:{score1} M2:{score2} M3:{score3} M4:{score4} M5:{score5} Votes:{votes_fake}/5"

	return verdict, str(avg_real) + "%", str(avg_fake) + "%", news_info, debug


	inputs = gr.Textbox(lines=10, placeholder="Paste news article here...", label="News Article")
	out1 = gr.Textbox(label="Verdict")
	out2 = gr.Textbox(label="Real Confidence")
	out3 = gr.Textbox(label="Fake Confidence")
	out4 = gr.Textbox(label="News Verification")
	out5 = gr.Textbox(label="Debug")

	demo = gr.Interface(
	fn=detect,
	inputs=inputs,
	outputs=[out1, out2, out3, out4, out5],
	title="TRAK Fake News Detector",
	description="Uses 5 TRAK AI models plus NewsAPI verification against top 15 trusted news sources."
	)

	demo.launch()