Spaces:
Sleeping
Sleeping
File size: 5,370 Bytes
3868de3 c03eefd 3868de3 95de976 3868de3 95de976 3868de3 95de976 3868de3 95de976 3868de3 a53997d 8e83412 3868de3 8e83412 3868de3 8e83412 3868de3 95de976 0565ab0 95de976 0565ab0 95de976 3868de3 95de976 3868de3 95de976 3868de3 95de976 3868de3 95de976 3868de3 0565ab0 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 | import gradio as gr
from transformers import AutoTokenizer, AutoModelForSequenceClassification
import torch
import requests
from bs4 import BeautifulSoup
import matplotlib.pyplot as plt
import openai
import os
# Load model and tokenizer
model_name = "mrm8488/bert-tiny-finetuned-fake-news-detection"
tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModelForSequenceClassification.from_pretrained(model_name)
# Verdict counters
verdict_counts = {"Authentic": 0, "Possibly Misinformation": 0}
# API keys
FACT_CHECK_API_KEY = os.getenv("FACT_CHECK_API_KEY")
OPENAI_API_KEY = os.getenv("OPENAI_API_KEY")
def extract_text_from_url(url):
try:
response = requests.get(url, timeout=5)
soup = BeautifulSoup(response.text, "html.parser")
paragraphs = soup.find_all("p")
text = " ".join([p.get_text() for p in paragraphs])
return text.strip()[:3000]
except Exception as e:
return f"Error fetching URL: {e}"
def update_chart():
labels = list(verdict_counts.keys())
sizes = list(verdict_counts.values())
fig, ax = plt.subplots()
ax.pie(sizes, labels=labels, autopct='%1.1f%%', startangle=90)
ax.set_title("Verdict Distribution")
return fig
def fact_check_google_api(query, api_key):
if not api_key:
return "API key not found. Please set FACT_CHECK_API_KEY in environment."
url = "https://factchecktools.googleapis.com/v1alpha1/claims:search"
params = {
"query": query,
"languageCode": "en-US",
"key": api_key
}
try:
response = requests.get(url, params=params)
if response.status_code == 200:
data = response.json()
if "claims" in data:
results = []
for claim in data["claims"]:
text = claim.get("text", "No claim text")
review = claim.get("claimReview", [{}])[0]
rating = review.get("textualRating", "No rating")
publisher = review.get("publisher", {}).get("name", "Unknown")
results.append(f"Claim: {text}\nRating: {rating}\nSource: {publisher}")
return "\n\n".join(results)
else:
return "No fact-checks found for this query."
else:
return f"Error: {response.status_code} - {response.text}"
except Exception as e:
return f"Error calling Fact Check API: {e}"
def gpt_fact_check(prompt):
if not OPENAI_API_KEY:
return "OpenAI API key not found. Please set OPENAI_API_KEY in environment."
try:
client = openai.OpenAI(api_key=OPENAI_API_KEY)
response = client.chat.completions.create(
model="gpt-3.5-turbo", # or "gpt-4" if you have access
messages=[
{"role": "system", "content": "You are a helpful assistant for fact-checking news articles. Analyze the following content for misinformation, summarize the main claim, and explain your reasoning."},
{"role": "user", "content": prompt}
],
max_tokens=300,
temperature=0.2,
)
return response.choices[0].message.content.strip()
except Exception as e:
return f"OpenAI API error: {e}"
def detect_misinformation(input_text, input_type):
if input_type == "URL":
input_text = extract_text_from_url(input_text)
if input_text.startswith("Error"):
return input_text, "Error", 0.0, update_chart(), "URL extraction failed.", ""
inputs = tokenizer(input_text, return_tensors="pt", truncation=True, padding=True)
with torch.no_grad():
outputs = model(**inputs)
probs = torch.nn.functional.softmax(outputs.logits, dim=1)
score = probs[0][1].item()
verdict = "Possibly Misinformation" if score > 0.5 else "Authentic"
verdict_counts[verdict] += 1
fact_check_result = fact_check_google_api(input_text, FACT_CHECK_API_KEY)
gpt_result = gpt_fact_check(input_text)
return input_text[:1000], verdict, round(score * 100, 2), update_chart(), fact_check_result, gpt_result
with gr.Blocks() as demo:
gr.Markdown("## 🧠 Misinformation Detection Dashboard")
gr.Markdown("Paste article text or a URL. Choose input type and get a verdict.")
with gr.Row():
input_text = gr.Textbox(label="Enter Text or URL", lines=6, placeholder="Paste article text or URL here...")
input_type = gr.Radio(["Auto Detect", "Text", "URL"], value="Auto Detect", label="Input Type")
output_text = gr.Textbox(label="Processed Text", lines=6)
verdict = gr.Label(label="Verdict")
score = gr.Label(label="Authenticity Score (%)")
chart = gr.Plot(label="Analytics Dashboard")
fact_check = gr.Textbox(label="Fact Check Results", lines=6)
gpt_fact = gr.Textbox(label="OpenAI GPT Analysis", lines=6)
btn = gr.Button("Analyze")
def handle_input(text, mode):
if mode == "Auto Detect":
if text.startswith("http://") or text.startswith("https://"):
mode = "URL"
else:
mode = "Text"
return detect_misinformation(text, mode)
btn.click(
fn=handle_input,
inputs=[input_text, input_type],
outputs=[output_text, verdict, score, chart, fact_check, gpt_fact]
)
demo.launch() |