Spaces:

nancyattri
/

TruthCheck

Sleeping

App Files Files Community

TruthCheck / app.py

nancyattri

path changes

283a488 3 days ago

raw

history blame contribute delete

5.99 kB

	from flask import Flask, render_template, request
	from sentence_transformers import SentenceTransformer, util, CrossEncoder # CrossEncoder add kiya
	from transformers import T5Tokenizer, T5ForConditionalGeneration # Transformers add kiya
	from serpapi import GoogleSearch
	import os
	import json
	import torch # Torch add kiya

	app = Flask(__name__)

	# === CONFIGURATION ===
	SERPAPI_KEY = "ebde36523cc9138dce4067eaf7a35f45f2c50ae61fb7074595182c96470aaa58"
	SEARCH_NUM_RESULTS = 5
	SIMILARITY_THRESHOLD = 0.7

	# === Load model and facts ===
	model = SentenceTransformer("all-MiniLM-L6-v2")
	# Purana model wahi rahega

	# Naya NLI model (Logical Judge)
	nli_model = CrossEncoder('cross-encoder/nli-distilroberta-base')

	# Naya T5 model (Explanation Expert)
	t5_tokenizer = T5Tokenizer.from_pretrained("t5-small")
	t5_model = T5ForConditionalGeneration.from_pretrained("t5-small")

	if os.path.exists("facts.json"):
	with open("facts.json", "r") as f:
	verified_facts = json.load(f)
	else:
	verified_facts = [
	"humans have 2 legs",
	"humans are mammals",
	"earth is a planet",
	"earth orbits the sun",
	"the sun rises in the east",
	"water boils at 100 degrees celsius",
	"india is a country",
	"the moon revolves around earth",
	"earth has one moon",
	"2 + 2 = 4",
	"the sky is blue",
	"oxygen is essential for humans",
	"Sun is a star",
	"Sun is Hot",

	]


	def search_google(query):
	params = {
	"q": query,
	"api_key": SERPAPI_KEY,
	"engine": "google",
	"num": SEARCH_NUM_RESULTS
	}
	search = GoogleSearch(params)
	results = search.get_dict()
	# Return a list of tuples: (text_to_encode, link)
	return [(res['title'] + " " + res.get('snippet', ''), res.get('link', '#'))
	for res in results.get("organic_results", [])]

	def generate_explanation(claim, evidence):
	input_text = f"explain fact: {claim} based on source: {evidence}"
	inputs = t5_tokenizer.encode(input_text, return_tensors="pt", max_length=512, truncation=True)
	outputs = t5_model.generate(inputs, max_length=100, num_beams=4, early_stopping=True)
	return t5_tokenizer.decode(outputs[0], skip_special_tokens=True)

	def predict_fake_news(user_input):
	user_input_clean = user_input.lower().strip()
	best_match_url = "#" # Initialize URL

	# First, match against verified facts directly
	for fact in verified_facts:
	if util.cos_sim(
	model.encode(user_input_clean, convert_to_tensor=True),
	model.encode(fact, convert_to_tensor=True)
	)[0][0] > 0.9:
	# If matched with a fact, we don't have a URL, so return the default
	return "REAL (matched with verified fact)", 0.95, best_match_url

	# If not matched with facts, then try Google
	print("\n Searching Google for similar articles...")

	# Get results as (text, link) tuples
	results_with_links = search_google(user_input)

	# Extract only the text for embedding
	results = [text for text, link in results_with_links]

	if not results:
	print(" No search results found.")
	return "FAKE (no supporting articles found)", 0.0, best_match_url

	# Compare with search results
	input_embedding = model.encode(user_input, convert_to_tensor=True)
	result_embeddings = model.encode(results, convert_to_tensor=True)

	similarities = util.cos_sim(input_embedding, result_embeddings)[0]
	max_sim = float(similarities.max())

	# Find the index of the highest similarity score
	max_sim_index = similarities.argmax().item()

	# Get the URL corresponding to the best match
	# best_match_url = results_with_links[max_sim_index][1]


	# print(f"\n Highest Similarity Score: {max_sim:.2f}")
	# if max_sim >= SIMILARITY_THRESHOLD:
	# return "REAL (matched with trusted news online)", max_sim, best_match_url
	# else:
	# return "FAKE (not matched with trusted news)", max_sim, best_match_url
	# 1. Sabse pehle best match ka URL aur Text nikal lo
	best_match_url = results_with_links[max_sim_index][1]
	best_evidence = results_with_links[max_sim_index][0]

	# 2. NLI logic (Contradiction check karne ke liye)
	#
	nli_scores = nli_model.predict([(user_input, best_evidence)])
	label = nli_scores.argmax()

	# 3. Verdict decide karne ka naya tarika
	if label == 0:
	# Agar Google kuch aur keh raha hai aur aap kuch aur (Men vs Women)
	verdict = "FAKE (Logical Contradiction Detected)"
	max_sim = max_sim * 0.5 # Jhoot pakde jaane par score kam kar diya
	elif label == 1 or label ==2 and max_sim >= SIMILARITY_THRESHOLD:
	# Label 1 (Support) OR Label 2 (Neutral) dono ko REAL maano agar match accha hai # Agar dono ki baat ek hi hai aur similarity bhi high hai
	verdict = "REAL (Supported by News Sources)"
	else:
	# Agar match nahi ho raha ya neutral hai
	verdict = "FAKE (Inconsistent or Unverified Information(Low Evidence))"

	# 4. T5 Model se explanation banwayein
	explanation = generate_explanation(user_input, best_evidence)

	# 5. AB SABSE ZAROORI: Ab 4 cheezein return karni hain
	return verdict, max_sim, best_match_url, explanation
	@app.route("/")
	@app.route("/home")
	def home():
	return render_template("home.html")

	@app.route("/about")
	def about():
	return render_template("about.html")

	@app.route("/detector", methods=["GET", "POST"])
	def index():
	verdict, score, news_link, explanation = None, None, None, None
	user_input = ""

	if request.method == "POST":
	user_input = request.form["news"]
	# New: Unpack the returned link
	verdict, score, news_link, explanation = predict_fake_news(user_input)

	# New: Pass news_link to the template
	return render_template("index.html", verdict=verdict, score=score, user_input=user_input, news_link=news_link, explanation=explanation )


	if __name__ == "__main__":
	app.run(host="0.0.0.0", port=7860)