Spaces:

pravjet
/

misinfo_detection

Sleeping

App Files Files Community

misinfo_detection / app.py

pravjet

Update app.py

0565ab0 verified 5 months ago

raw

history blame contribute delete

5.37 kB

	import gradio as gr
	from transformers import AutoTokenizer, AutoModelForSequenceClassification
	import torch
	import requests
	from bs4 import BeautifulSoup
	import matplotlib.pyplot as plt
	import openai
	import os

	# Load model and tokenizer
	model_name = "mrm8488/bert-tiny-finetuned-fake-news-detection"
	tokenizer = AutoTokenizer.from_pretrained(model_name)
	model = AutoModelForSequenceClassification.from_pretrained(model_name)

	# Verdict counters
	verdict_counts = {"Authentic": 0, "Possibly Misinformation": 0}

	# API keys
	FACT_CHECK_API_KEY = os.getenv("FACT_CHECK_API_KEY")
	OPENAI_API_KEY = os.getenv("OPENAI_API_KEY")

	def extract_text_from_url(url):
	try:
	response = requests.get(url, timeout=5)
	soup = BeautifulSoup(response.text, "html.parser")
	paragraphs = soup.find_all("p")
	text = " ".join([p.get_text() for p in paragraphs])
	return text.strip()[:3000]
	except Exception as e:
	return f"Error fetching URL: {e}"

	def update_chart():
	labels = list(verdict_counts.keys())
	sizes = list(verdict_counts.values())
	fig, ax = plt.subplots()
	ax.pie(sizes, labels=labels, autopct='%1.1f%%', startangle=90)
	ax.set_title("Verdict Distribution")
	return fig

	def fact_check_google_api(query, api_key):
	if not api_key:
	return "API key not found. Please set FACT_CHECK_API_KEY in environment."
	url = "https://factchecktools.googleapis.com/v1alpha1/claims:search"
	params = {
	"query": query,
	"languageCode": "en-US",
	"key": api_key
	}
	try:
	response = requests.get(url, params=params)
	if response.status_code == 200:
	data = response.json()
	if "claims" in data:
	results = []
	for claim in data["claims"]:
	text = claim.get("text", "No claim text")
	review = claim.get("claimReview", [{}])[0]
	rating = review.get("textualRating", "No rating")
	publisher = review.get("publisher", {}).get("name", "Unknown")
	results.append(f"Claim: {text}\nRating: {rating}\nSource: {publisher}")
	return "\n\n".join(results)
	else:
	return "No fact-checks found for this query."
	else:
	return f"Error: {response.status_code} - {response.text}"
	except Exception as e:
	return f"Error calling Fact Check API: {e}"

	def gpt_fact_check(prompt):
	if not OPENAI_API_KEY:
	return "OpenAI API key not found. Please set OPENAI_API_KEY in environment."
	try:
	client = openai.OpenAI(api_key=OPENAI_API_KEY)
	response = client.chat.completions.create(
	model="gpt-3.5-turbo", # or "gpt-4" if you have access
	messages=[
	{"role": "system", "content": "You are a helpful assistant for fact-checking news articles. Analyze the following content for misinformation, summarize the main claim, and explain your reasoning."},
	{"role": "user", "content": prompt}
	],
	max_tokens=300,
	temperature=0.2,
	)
	return response.choices[0].message.content.strip()
	except Exception as e:
	return f"OpenAI API error: {e}"

	def detect_misinformation(input_text, input_type):
	if input_type == "URL":
	input_text = extract_text_from_url(input_text)
	if input_text.startswith("Error"):
	return input_text, "Error", 0.0, update_chart(), "URL extraction failed.", ""

	inputs = tokenizer(input_text, return_tensors="pt", truncation=True, padding=True)
	with torch.no_grad():
	outputs = model(**inputs)
	probs = torch.nn.functional.softmax(outputs.logits, dim=1)
	score = probs[0][1].item()
	verdict = "Possibly Misinformation" if score > 0.5 else "Authentic"
	verdict_counts[verdict] += 1
	fact_check_result = fact_check_google_api(input_text, FACT_CHECK_API_KEY)
	gpt_result = gpt_fact_check(input_text)
	return input_text[:1000], verdict, round(score * 100, 2), update_chart(), fact_check_result, gpt_result

	with gr.Blocks() as demo:
	gr.Markdown("## 🧠 Misinformation Detection Dashboard")
	gr.Markdown("Paste article text or a URL. Choose input type and get a verdict.")

	with gr.Row():
	input_text = gr.Textbox(label="Enter Text or URL", lines=6, placeholder="Paste article text or URL here...")
	input_type = gr.Radio(["Auto Detect", "Text", "URL"], value="Auto Detect", label="Input Type")

	output_text = gr.Textbox(label="Processed Text", lines=6)
	verdict = gr.Label(label="Verdict")
	score = gr.Label(label="Authenticity Score (%)")
	chart = gr.Plot(label="Analytics Dashboard")
	fact_check = gr.Textbox(label="Fact Check Results", lines=6)
	gpt_fact = gr.Textbox(label="OpenAI GPT Analysis", lines=6)

	btn = gr.Button("Analyze")

	def handle_input(text, mode):
	if mode == "Auto Detect":
	if text.startswith("http://") or text.startswith("https://"):
	mode = "URL"
	else:
	mode = "Text"
	return detect_misinformation(text, mode)

	btn.click(
	fn=handle_input,
	inputs=[input_text, input_type],
	outputs=[output_text, verdict, score, chart, fact_check, gpt_fact]
	)

	demo.launch()