Spaces:

pravjet
/

misinfo_detection

Sleeping

File size: 5,370 Bytes

import gradio as gr
from transformers import AutoTokenizer, AutoModelForSequenceClassification
import torch
import requests
from bs4 import BeautifulSoup
import matplotlib.pyplot as plt
import openai
import os

# Load model and tokenizer
model_name = "mrm8488/bert-tiny-finetuned-fake-news-detection"
tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModelForSequenceClassification.from_pretrained(model_name)

# Verdict counters
verdict_counts = {"Authentic": 0, "Possibly Misinformation": 0}

# API keys
FACT_CHECK_API_KEY = os.getenv("FACT_CHECK_API_KEY")
OPENAI_API_KEY = os.getenv("OPENAI_API_KEY")

def extract_text_from_url(url):
    try:
        response = requests.get(url, timeout=5)
        soup = BeautifulSoup(response.text, "html.parser")
        paragraphs = soup.find_all("p")
        text = " ".join([p.get_text() for p in paragraphs])
        return text.strip()[:3000]
    except Exception as e:
        return f"Error fetching URL: {e}"

def update_chart():
    labels = list(verdict_counts.keys())
    sizes = list(verdict_counts.values())
    fig, ax = plt.subplots()
    ax.pie(sizes, labels=labels, autopct='%1.1f%%', startangle=90)
    ax.set_title("Verdict Distribution")
    return fig

def fact_check_google_api(query, api_key):
    if not api_key:
        return "API key not found. Please set FACT_CHECK_API_KEY in environment."
    url = "https://factchecktools.googleapis.com/v1alpha1/claims:search"
    params = {
        "query": query,
        "languageCode": "en-US",
        "key": api_key
    }
    try:
        response = requests.get(url, params=params)
        if response.status_code == 200:
            data = response.json()
            if "claims" in data:
                results = []
                for claim in data["claims"]:
                    text = claim.get("text", "No claim text")
                    review = claim.get("claimReview", [{}])[0]
                    rating = review.get("textualRating", "No rating")
                    publisher = review.get("publisher", {}).get("name", "Unknown")
                    results.append(f"Claim: {text}\nRating: {rating}\nSource: {publisher}")
                return "\n\n".join(results)
            else:
                return "No fact-checks found for this query."
        else:
            return f"Error: {response.status_code} - {response.text}"
    except Exception as e:
        return f"Error calling Fact Check API: {e}"

def gpt_fact_check(prompt):
    if not OPENAI_API_KEY:
        return "OpenAI API key not found. Please set OPENAI_API_KEY in environment."
    try:
        client = openai.OpenAI(api_key=OPENAI_API_KEY)
        response = client.chat.completions.create(
            model="gpt-3.5-turbo",  # or "gpt-4" if you have access
            messages=[
                {"role": "system", "content": "You are a helpful assistant for fact-checking news articles. Analyze the following content for misinformation, summarize the main claim, and explain your reasoning."},
                {"role": "user", "content": prompt}
            ],
            max_tokens=300,
            temperature=0.2,
        )
        return response.choices[0].message.content.strip()
    except Exception as e:
        return f"OpenAI API error: {e}"

def detect_misinformation(input_text, input_type):
    if input_type == "URL":
        input_text = extract_text_from_url(input_text)
        if input_text.startswith("Error"):
            return input_text, "Error", 0.0, update_chart(), "URL extraction failed.", ""

    inputs = tokenizer(input_text, return_tensors="pt", truncation=True, padding=True)
    with torch.no_grad():
        outputs = model(**inputs)
        probs = torch.nn.functional.softmax(outputs.logits, dim=1)
        score = probs[0][1].item()
        verdict = "Possibly Misinformation" if score > 0.5 else "Authentic"
        verdict_counts[verdict] += 1
        fact_check_result = fact_check_google_api(input_text, FACT_CHECK_API_KEY)
        gpt_result = gpt_fact_check(input_text)
        return input_text[:1000], verdict, round(score * 100, 2), update_chart(), fact_check_result, gpt_result

with gr.Blocks() as demo:
    gr.Markdown("## 🧠 Misinformation Detection Dashboard")
    gr.Markdown("Paste article text or a URL. Choose input type and get a verdict.")

    with gr.Row():
        input_text = gr.Textbox(label="Enter Text or URL", lines=6, placeholder="Paste article text or URL here...")
        input_type = gr.Radio(["Auto Detect", "Text", "URL"], value="Auto Detect", label="Input Type")

    output_text = gr.Textbox(label="Processed Text", lines=6)
    verdict = gr.Label(label="Verdict")
    score = gr.Label(label="Authenticity Score (%)")
    chart = gr.Plot(label="Analytics Dashboard")
    fact_check = gr.Textbox(label="Fact Check Results", lines=6)
    gpt_fact = gr.Textbox(label="OpenAI GPT Analysis", lines=6)

    btn = gr.Button("Analyze")

    def handle_input(text, mode):
        if mode == "Auto Detect":
            if text.startswith("http://") or text.startswith("https://"):
                mode = "URL"
            else:
                mode = "Text"
        return detect_misinformation(text, mode)

    btn.click(
        fn=handle_input,
        inputs=[input_text, input_type],
        outputs=[output_text, verdict, score, chart, fact_check, gpt_fact]
    )

demo.launch()