Spaces:

CereusTech
/

Facto_Eval

Sleeping

App Files Files Community

Afeezee commited on Feb 2, 2025

Commit

d074dd5

verified ·

1 Parent(s): 8b3875c

Create app.py

Browse files

Files changed (1) hide show

app.py +119 -0

app.py ADDED Viewed

	@@ -0,0 +1,119 @@

+import os
+import requests
+import time
+import csv
+import pandas as pd
+import kagglehub
+import gradio as gr
+from cerebras.cloud.sdk import Cerebras
+from langchain_huggingface import HuggingFaceEmbeddings
+from langchain_community.vectorstores import FAISS
+from langchain.schema import Document
+from langchain_community.document_loaders import TextLoader
+from langchain.text_splitter import RecursiveCharacterTextSplitter
+import numpy as np
+# Initialize Cerebras API client
+client = Cerebras(api_key= "csk-vy9xw6mmk38ytjwyn34cty6e99f5j3c3m28xdphrv6nxj363")
+Newskey = "77b0ae0438aa43a1b52630d89ddef4be"
+def get_latest_news(query):
+    url = f"https://newsapi.org/v2/everything?q={query}&apiKey={Newskey}"
+    response = requests.get(url)
+    data = response.json()
+    return [(article["title"], article["url"], article["source"]["name"]) for article in data.get("articles", [])[:2]]
+def update_fact_checks_file(query):
+    with open("fact_checks.txt", "w", encoding="utf-8") as file:
+        file.write(f"{query}\n")
+def create_faiss_retriever():
+    if not os.path.exists("fact_checks.txt"):
+        open("fact_checks.txt", "w").close()
+    loader = TextLoader("fact_checks.txt")
+    documents = loader.load()
+    text_splitter = RecursiveCharacterTextSplitter(chunk_size=200, chunk_overlap=50)
+    docs = text_splitter.split_documents(documents)
+    embedding_model = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")
+    vector_store = FAISS.from_documents(docs, embedding_model)
+    return vector_store.as_retriever(search_type="similarity", search_kwargs={"k": 4})
+def clear_fact_checks_file():
+    open("fact_checks.txt", "w").close()
+def fact_check_with_llama3(query):
+    update_fact_checks_file(query)
+    retriever = create_faiss_retriever()
+    retrieved_docs = retriever.invoke(query)
+    retrieved_texts = [doc.page_content for doc in retrieved_docs]
+    news = get_latest_news(query)
+    context_text = "\n".join(retrieved_texts)
+    prompt = f"""
+    Claim: {query}
+    Context: {context_text}
+    Based on the provided context, determine whether the claim is True, False, or Misleading. Provide a concise explanation and cite relevant sources. Don't mention any instance of your knowledge cut-off.
+    """
+    stream = client.chat.completions.create(
+        messages=[{"role": "system", "content": prompt}],
+        model="llama-3.3-70b",
+        stream=True,
+        max_completion_tokens=512,
+        temperature=0.2,
+        top_p=1
+    )
+    result = "".join(chunk.choices[0].delta.content or "" for chunk in stream)
+    sources = "\n".join([f"{title} ({source}): {url}" for title, url, source in news])
+    clear_fact_checks_file()
+    return result, sources if sources else "No relevant sources found."
+def map_politifact_label(label):
+    label_mapping = {
+        "false": "False",
+        "half-true": "Misleading",
+        "mostly-true": "Misleading",
+        "barely-true": "Misleading",
+        "true": "True"
+    }
+    return label_mapping.get(label.lower(), "Unknown")
+def evaluate_politifact(csv_file):
+    df = pd.read_csv(csv_file.name)
+    results = []
+    for index, row in df.iterrows():
+        claim = row["claim"]
+        actual_label = map_politifact_label(row["label"])  # Convert Politifact label to Facto equivalent
+        start_time = time.time()
+        facto_result, sources = fact_check_with_llama3(claim)
+        time_taken = time.time() - start_time
+        accuracy = "100% Accurate" if facto_result.lower() == actual_label.lower() else "Not Accurate"
+        results.append([claim, facto_result, actual_label, time_taken, sources, accuracy])
+    results_df = pd.DataFrame(results, columns=["Claim", "Facto Verdict", "Politifact Verdict", "Time Taken (s)", "Sources", "Accuracy"])
+    output_csv = "fact_check_results.csv"
+    results_df.to_csv(output_csv, index=False)
+    return output_csv
+def gradio_interface(csv_file):
+    output_csv = evaluate_politifact(csv_file)
+    return output_csv
+gui = gr.Interface(
+    fn=gradio_interface,
+    inputs=gr.File(label="Upload Politifact CSV"),
+    outputs=gr.File(label="Fact-Check Results CSV"),
+    title="Facto - AI Fact-Checking System",
+    description="Upload a CSV file with claims, and the system will verify them using Llama 3.3 and compare the results with Politifact."
+)
+gui.launch(debug=True)