Afeezee commited on
Commit
d074dd5
·
verified ·
1 Parent(s): 8b3875c

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +119 -0
app.py ADDED
@@ -0,0 +1,119 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import requests
3
+ import time
4
+ import csv
5
+ import pandas as pd
6
+ import kagglehub
7
+ import gradio as gr
8
+ from cerebras.cloud.sdk import Cerebras
9
+ from langchain_huggingface import HuggingFaceEmbeddings
10
+ from langchain_community.vectorstores import FAISS
11
+ from langchain.schema import Document
12
+ from langchain_community.document_loaders import TextLoader
13
+ from langchain.text_splitter import RecursiveCharacterTextSplitter
14
+ import numpy as np
15
+
16
+ # Initialize Cerebras API client
17
+ client = Cerebras(api_key= "csk-vy9xw6mmk38ytjwyn34cty6e99f5j3c3m28xdphrv6nxj363")
18
+
19
+ Newskey = "77b0ae0438aa43a1b52630d89ddef4be"
20
+
21
+ def get_latest_news(query):
22
+ url = f"https://newsapi.org/v2/everything?q={query}&apiKey={Newskey}"
23
+ response = requests.get(url)
24
+ data = response.json()
25
+ return [(article["title"], article["url"], article["source"]["name"]) for article in data.get("articles", [])[:2]]
26
+
27
+ def update_fact_checks_file(query):
28
+ with open("fact_checks.txt", "w", encoding="utf-8") as file:
29
+ file.write(f"{query}\n")
30
+
31
+ def create_faiss_retriever():
32
+ if not os.path.exists("fact_checks.txt"):
33
+ open("fact_checks.txt", "w").close()
34
+
35
+ loader = TextLoader("fact_checks.txt")
36
+ documents = loader.load()
37
+
38
+ text_splitter = RecursiveCharacterTextSplitter(chunk_size=200, chunk_overlap=50)
39
+ docs = text_splitter.split_documents(documents)
40
+
41
+ embedding_model = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")
42
+ vector_store = FAISS.from_documents(docs, embedding_model)
43
+
44
+ return vector_store.as_retriever(search_type="similarity", search_kwargs={"k": 4})
45
+
46
+ def clear_fact_checks_file():
47
+ open("fact_checks.txt", "w").close()
48
+
49
+ def fact_check_with_llama3(query):
50
+ update_fact_checks_file(query)
51
+ retriever = create_faiss_retriever()
52
+ retrieved_docs = retriever.invoke(query)
53
+ retrieved_texts = [doc.page_content for doc in retrieved_docs]
54
+ news = get_latest_news(query)
55
+ context_text = "\n".join(retrieved_texts)
56
+
57
+ prompt = f"""
58
+ Claim: {query}
59
+ Context: {context_text}
60
+ Based on the provided context, determine whether the claim is True, False, or Misleading. Provide a concise explanation and cite relevant sources. Don't mention any instance of your knowledge cut-off.
61
+ """
62
+
63
+ stream = client.chat.completions.create(
64
+ messages=[{"role": "system", "content": prompt}],
65
+ model="llama-3.3-70b",
66
+ stream=True,
67
+ max_completion_tokens=512,
68
+ temperature=0.2,
69
+ top_p=1
70
+ )
71
+
72
+ result = "".join(chunk.choices[0].delta.content or "" for chunk in stream)
73
+ sources = "\n".join([f"{title} ({source}): {url}" for title, url, source in news])
74
+ clear_fact_checks_file()
75
+ return result, sources if sources else "No relevant sources found."
76
+
77
+ def map_politifact_label(label):
78
+ label_mapping = {
79
+ "false": "False",
80
+ "half-true": "Misleading",
81
+ "mostly-true": "Misleading",
82
+ "barely-true": "Misleading",
83
+ "true": "True"
84
+ }
85
+ return label_mapping.get(label.lower(), "Unknown")
86
+
87
+ def evaluate_politifact(csv_file):
88
+ df = pd.read_csv(csv_file.name)
89
+ results = []
90
+
91
+ for index, row in df.iterrows():
92
+ claim = row["claim"]
93
+ actual_label = map_politifact_label(row["label"]) # Convert Politifact label to Facto equivalent
94
+ start_time = time.time()
95
+ facto_result, sources = fact_check_with_llama3(claim)
96
+ time_taken = time.time() - start_time
97
+ accuracy = "100% Accurate" if facto_result.lower() == actual_label.lower() else "Not Accurate"
98
+
99
+ results.append([claim, facto_result, actual_label, time_taken, sources, accuracy])
100
+
101
+ results_df = pd.DataFrame(results, columns=["Claim", "Facto Verdict", "Politifact Verdict", "Time Taken (s)", "Sources", "Accuracy"])
102
+ output_csv = "fact_check_results.csv"
103
+ results_df.to_csv(output_csv, index=False)
104
+
105
+ return output_csv
106
+
107
+ def gradio_interface(csv_file):
108
+ output_csv = evaluate_politifact(csv_file)
109
+ return output_csv
110
+
111
+ gui = gr.Interface(
112
+ fn=gradio_interface,
113
+ inputs=gr.File(label="Upload Politifact CSV"),
114
+ outputs=gr.File(label="Fact-Check Results CSV"),
115
+ title="Facto - AI Fact-Checking System",
116
+ description="Upload a CSV file with claims, and the system will verify them using Llama 3.3 and compare the results with Politifact."
117
+ )
118
+
119
+ gui.launch(debug=True)