Spaces:

CereusTech
/

Facto_Eval

Sleeping

App Files Files Community

Facto_Eval / app.py

Afeezee

Update app.py

918517f verified about 1 year ago

raw

history blame contribute delete

4.34 kB

	import os
	import requests
	import time
	import csv
	import pandas as pd
	import kagglehub
	import gradio as gr
	from cerebras.cloud.sdk import Cerebras
	from langchain_huggingface import HuggingFaceEmbeddings
	from langchain_community.vectorstores import FAISS
	from langchain.schema import Document
	from langchain_community.document_loaders import TextLoader
	from langchain.text_splitter import RecursiveCharacterTextSplitter
	import numpy as np

	# Initialize Cerebras API client
	Cerekey = os.getenv("Kc")
	client = Cerebras(api_key= Cerekey)

	Newskey = os.getenv("Nk")

	def get_latest_news(query):
	url = f"https://newsapi.org/v2/everything?q={query}&apiKey={Newskey}"
	response = requests.get(url)
	data = response.json()
	return [(article["title"], article["url"], article["source"]["name"]) for article in data.get("articles", [])[:2]]

	def update_fact_checks_file(query):
	with open("fact_checks.txt", "w", encoding="utf-8") as file:
	file.write(f"{query}\n")

	def create_faiss_retriever():
	if not os.path.exists("fact_checks.txt"):
	open("fact_checks.txt", "w").close()

	loader = TextLoader("fact_checks.txt")
	documents = loader.load()

	text_splitter = RecursiveCharacterTextSplitter(chunk_size=200, chunk_overlap=50)
	docs = text_splitter.split_documents(documents)

	embedding_model = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")
	vector_store = FAISS.from_documents(docs, embedding_model)

	return vector_store.as_retriever(search_type="similarity", search_kwargs={"k": 4})

	def clear_fact_checks_file():
	open("fact_checks.txt", "w").close()

	def fact_check_with_llama3(query):
	update_fact_checks_file(query)
	retriever = create_faiss_retriever()
	retrieved_docs = retriever.invoke(query)
	retrieved_texts = [doc.page_content for doc in retrieved_docs]
	news = get_latest_news(query)
	context_text = "\n".join(retrieved_texts)

	prompt = f"""
	Claim: {query}
	Context: {context_text}
	Based on the provided context, determine whether the claim is True, False, or Misleading. Provide a concise explanation and cite relevant sources. Don't mention any instance of your knowledge cut-off.
	"""

	stream = client.chat.completions.create(
	messages=[{"role": "system", "content": prompt}],
	model="llama-3.3-70b",
	stream=True,
	max_completion_tokens=512,
	temperature=0.2,
	top_p=1
	)

	result = "".join(chunk.choices[0].delta.content or "" for chunk in stream)
	sources = "\n".join([f"{title} ({source}): {url}" for title, url, source in news])
	clear_fact_checks_file()
	return result, sources if sources else "No relevant sources found."

	def map_politifact_label(label):
	label_mapping = {
	"pants-fire": "False",
	"false": "False",
	"half-true": "Misleading",
	"mostly-true": "True",
	"barely-true": "False",
	"true": "True"
	}
	return label_mapping.get(label.lower(), "Unknown")

	def evaluate_politifact(csv_file):
	df = pd.read_csv(csv_file.name)
	results = []

	for index, row in df.iterrows():
	claim = row["sources_quote"]
	actual_label = map_politifact_label(row["fact"]) # Convert Politifact label to Facto equivalent
	start_time = time.time()
	facto_result, sources = fact_check_with_llama3(claim)
	time_taken = time.time() - start_time
	accuracy = "100" if facto_result.lower() == actual_label.lower() else "0"

	results.append([claim, facto_result, actual_label, time_taken, accuracy])

	results_df = pd.DataFrame(results, columns=["Claim", "Facto Verdict", "Politifact Verdict", "Time Taken (s)", "Accuracy"])
	output_csv = "fact_check_results.csv"
	results_df.to_csv(output_csv, index=False)

	return output_csv

	def gradio_interface(csv_file):
	output_csv = evaluate_politifact(csv_file)
	return output_csv

	gui = gr.Interface(
	fn=gradio_interface,
	inputs=gr.File(label="Upload Politifact CSV"),
	outputs=gr.File(label="Fact-Check Results CSV"),
	title="Facto - AI Fact-Checking System",
	description="Upload a CSV file with claims, and the system will verify them using Llama 3.3 and compare the results with Politifact."
	)

	gui.launch(debug=True)