Spaces:
Sleeping
Sleeping
| import gradio as gr | |
| from transformers import AutoTokenizer, AutoModelForSequenceClassification | |
| import torch | |
| import requests | |
| from bs4 import BeautifulSoup | |
| import matplotlib.pyplot as plt | |
| import openai | |
| import os | |
| # Load model and tokenizer | |
| model_name = "mrm8488/bert-tiny-finetuned-fake-news-detection" | |
| tokenizer = AutoTokenizer.from_pretrained(model_name) | |
| model = AutoModelForSequenceClassification.from_pretrained(model_name) | |
| # Verdict counters | |
| verdict_counts = {"Authentic": 0, "Possibly Misinformation": 0} | |
| # API keys | |
| FACT_CHECK_API_KEY = os.getenv("FACT_CHECK_API_KEY") | |
| OPENAI_API_KEY = os.getenv("OPENAI_API_KEY") | |
| def extract_text_from_url(url): | |
| try: | |
| response = requests.get(url, timeout=5) | |
| soup = BeautifulSoup(response.text, "html.parser") | |
| paragraphs = soup.find_all("p") | |
| text = " ".join([p.get_text() for p in paragraphs]) | |
| return text.strip()[:3000] | |
| except Exception as e: | |
| return f"Error fetching URL: {e}" | |
| def update_chart(): | |
| labels = list(verdict_counts.keys()) | |
| sizes = list(verdict_counts.values()) | |
| fig, ax = plt.subplots() | |
| ax.pie(sizes, labels=labels, autopct='%1.1f%%', startangle=90) | |
| ax.set_title("Verdict Distribution") | |
| return fig | |
| def fact_check_google_api(query, api_key): | |
| if not api_key: | |
| return "API key not found. Please set FACT_CHECK_API_KEY in environment." | |
| url = "https://factchecktools.googleapis.com/v1alpha1/claims:search" | |
| params = { | |
| "query": query, | |
| "languageCode": "en-US", | |
| "key": api_key | |
| } | |
| try: | |
| response = requests.get(url, params=params) | |
| if response.status_code == 200: | |
| data = response.json() | |
| if "claims" in data: | |
| results = [] | |
| for claim in data["claims"]: | |
| text = claim.get("text", "No claim text") | |
| review = claim.get("claimReview", [{}])[0] | |
| rating = review.get("textualRating", "No rating") | |
| publisher = review.get("publisher", {}).get("name", "Unknown") | |
| results.append(f"Claim: {text}\nRating: {rating}\nSource: {publisher}") | |
| return "\n\n".join(results) | |
| else: | |
| return "No fact-checks found for this query." | |
| else: | |
| return f"Error: {response.status_code} - {response.text}" | |
| except Exception as e: | |
| return f"Error calling Fact Check API: {e}" | |
| def gpt_fact_check(prompt): | |
| if not OPENAI_API_KEY: | |
| return "OpenAI API key not found. Please set OPENAI_API_KEY in environment." | |
| try: | |
| client = openai.OpenAI(api_key=OPENAI_API_KEY) | |
| response = client.chat.completions.create( | |
| model="gpt-3.5-turbo", # or "gpt-4" if you have access | |
| messages=[ | |
| {"role": "system", "content": "You are a helpful assistant for fact-checking news articles. Analyze the following content for misinformation, summarize the main claim, and explain your reasoning."}, | |
| {"role": "user", "content": prompt} | |
| ], | |
| max_tokens=300, | |
| temperature=0.2, | |
| ) | |
| return response.choices[0].message.content.strip() | |
| except Exception as e: | |
| return f"OpenAI API error: {e}" | |
| def detect_misinformation(input_text, input_type): | |
| if input_type == "URL": | |
| input_text = extract_text_from_url(input_text) | |
| if input_text.startswith("Error"): | |
| return input_text, "Error", 0.0, update_chart(), "URL extraction failed.", "" | |
| inputs = tokenizer(input_text, return_tensors="pt", truncation=True, padding=True) | |
| with torch.no_grad(): | |
| outputs = model(**inputs) | |
| probs = torch.nn.functional.softmax(outputs.logits, dim=1) | |
| score = probs[0][1].item() | |
| verdict = "Possibly Misinformation" if score > 0.5 else "Authentic" | |
| verdict_counts[verdict] += 1 | |
| fact_check_result = fact_check_google_api(input_text, FACT_CHECK_API_KEY) | |
| gpt_result = gpt_fact_check(input_text) | |
| return input_text[:1000], verdict, round(score * 100, 2), update_chart(), fact_check_result, gpt_result | |
| with gr.Blocks() as demo: | |
| gr.Markdown("## 🧠 Misinformation Detection Dashboard") | |
| gr.Markdown("Paste article text or a URL. Choose input type and get a verdict.") | |
| with gr.Row(): | |
| input_text = gr.Textbox(label="Enter Text or URL", lines=6, placeholder="Paste article text or URL here...") | |
| input_type = gr.Radio(["Auto Detect", "Text", "URL"], value="Auto Detect", label="Input Type") | |
| output_text = gr.Textbox(label="Processed Text", lines=6) | |
| verdict = gr.Label(label="Verdict") | |
| score = gr.Label(label="Authenticity Score (%)") | |
| chart = gr.Plot(label="Analytics Dashboard") | |
| fact_check = gr.Textbox(label="Fact Check Results", lines=6) | |
| gpt_fact = gr.Textbox(label="OpenAI GPT Analysis", lines=6) | |
| btn = gr.Button("Analyze") | |
| def handle_input(text, mode): | |
| if mode == "Auto Detect": | |
| if text.startswith("http://") or text.startswith("https://"): | |
| mode = "URL" | |
| else: | |
| mode = "Text" | |
| return detect_misinformation(text, mode) | |
| btn.click( | |
| fn=handle_input, | |
| inputs=[input_text, input_type], | |
| outputs=[output_text, verdict, score, chart, fact_check, gpt_fact] | |
| ) | |
| demo.launch() |