Spaces:
Sleeping
Sleeping
| import os | |
| import requests | |
| import time | |
| import csv | |
| import pandas as pd | |
| import kagglehub | |
| import gradio as gr | |
| from cerebras.cloud.sdk import Cerebras | |
| from langchain_huggingface import HuggingFaceEmbeddings | |
| from langchain_community.vectorstores import FAISS | |
| from langchain.schema import Document | |
| from langchain_community.document_loaders import TextLoader | |
| from langchain.text_splitter import RecursiveCharacterTextSplitter | |
| import numpy as np | |
| # Initialize Cerebras API client | |
| Cerekey = os.getenv("Kc") | |
| client = Cerebras(api_key= Cerekey) | |
| Newskey = os.getenv("Nk") | |
| def get_latest_news(query): | |
| url = f"https://newsapi.org/v2/everything?q={query}&apiKey={Newskey}" | |
| response = requests.get(url) | |
| data = response.json() | |
| return [(article["title"], article["url"], article["source"]["name"]) for article in data.get("articles", [])[:2]] | |
| def update_fact_checks_file(query): | |
| with open("fact_checks.txt", "w", encoding="utf-8") as file: | |
| file.write(f"{query}\n") | |
| def create_faiss_retriever(): | |
| if not os.path.exists("fact_checks.txt"): | |
| open("fact_checks.txt", "w").close() | |
| loader = TextLoader("fact_checks.txt") | |
| documents = loader.load() | |
| text_splitter = RecursiveCharacterTextSplitter(chunk_size=200, chunk_overlap=50) | |
| docs = text_splitter.split_documents(documents) | |
| embedding_model = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2") | |
| vector_store = FAISS.from_documents(docs, embedding_model) | |
| return vector_store.as_retriever(search_type="similarity", search_kwargs={"k": 4}) | |
| def clear_fact_checks_file(): | |
| open("fact_checks.txt", "w").close() | |
| def fact_check_with_llama3(query): | |
| update_fact_checks_file(query) | |
| retriever = create_faiss_retriever() | |
| retrieved_docs = retriever.invoke(query) | |
| retrieved_texts = [doc.page_content for doc in retrieved_docs] | |
| news = get_latest_news(query) | |
| context_text = "\n".join(retrieved_texts) | |
| prompt = f""" | |
| Claim: {query} | |
| Context: {context_text} | |
| Based on the provided context, determine whether the claim is True, False, or Misleading. Provide a concise explanation and cite relevant sources. Don't mention any instance of your knowledge cut-off. | |
| """ | |
| stream = client.chat.completions.create( | |
| messages=[{"role": "system", "content": prompt}], | |
| model="llama-3.3-70b", | |
| stream=True, | |
| max_completion_tokens=512, | |
| temperature=0.2, | |
| top_p=1 | |
| ) | |
| result = "".join(chunk.choices[0].delta.content or "" for chunk in stream) | |
| sources = "\n".join([f"{title} ({source}): {url}" for title, url, source in news]) | |
| clear_fact_checks_file() | |
| return result, sources if sources else "No relevant sources found." | |
| def map_politifact_label(label): | |
| label_mapping = { | |
| "pants-fire": "False", | |
| "false": "False", | |
| "half-true": "Misleading", | |
| "mostly-true": "True", | |
| "barely-true": "False", | |
| "true": "True" | |
| } | |
| return label_mapping.get(label.lower(), "Unknown") | |
| def evaluate_politifact(csv_file): | |
| df = pd.read_csv(csv_file.name) | |
| results = [] | |
| for index, row in df.iterrows(): | |
| claim = row["sources_quote"] | |
| actual_label = map_politifact_label(row["fact"]) # Convert Politifact label to Facto equivalent | |
| start_time = time.time() | |
| facto_result, sources = fact_check_with_llama3(claim) | |
| time_taken = time.time() - start_time | |
| accuracy = "100" if facto_result.lower() == actual_label.lower() else "0" | |
| results.append([claim, facto_result, actual_label, time_taken, accuracy]) | |
| results_df = pd.DataFrame(results, columns=["Claim", "Facto Verdict", "Politifact Verdict", "Time Taken (s)", "Accuracy"]) | |
| output_csv = "fact_check_results.csv" | |
| results_df.to_csv(output_csv, index=False) | |
| return output_csv | |
| def gradio_interface(csv_file): | |
| output_csv = evaluate_politifact(csv_file) | |
| return output_csv | |
| gui = gr.Interface( | |
| fn=gradio_interface, | |
| inputs=gr.File(label="Upload Politifact CSV"), | |
| outputs=gr.File(label="Fact-Check Results CSV"), | |
| title="Facto - AI Fact-Checking System", | |
| description="Upload a CSV file with claims, and the system will verify them using Llama 3.3 and compare the results with Politifact." | |
| ) | |
| gui.launch(debug=True) |