TruthCheck / app.py
nancyattri's picture
path changes
283a488
from flask import Flask, render_template, request
from sentence_transformers import SentenceTransformer, util, CrossEncoder # CrossEncoder add kiya
from transformers import T5Tokenizer, T5ForConditionalGeneration # Transformers add kiya
from serpapi import GoogleSearch
import os
import json
import torch # Torch add kiya
app = Flask(__name__)
# === CONFIGURATION ===
SERPAPI_KEY = "ebde36523cc9138dce4067eaf7a35f45f2c50ae61fb7074595182c96470aaa58"
SEARCH_NUM_RESULTS = 5
SIMILARITY_THRESHOLD = 0.7
# === Load model and facts ===
model = SentenceTransformer("all-MiniLM-L6-v2")
# Purana model wahi rahega
# Naya NLI model (Logical Judge)
nli_model = CrossEncoder('cross-encoder/nli-distilroberta-base')
# Naya T5 model (Explanation Expert)
t5_tokenizer = T5Tokenizer.from_pretrained("t5-small")
t5_model = T5ForConditionalGeneration.from_pretrained("t5-small")
if os.path.exists("facts.json"):
with open("facts.json", "r") as f:
verified_facts = json.load(f)
else:
verified_facts = [
"humans have 2 legs",
"humans are mammals",
"earth is a planet",
"earth orbits the sun",
"the sun rises in the east",
"water boils at 100 degrees celsius",
"india is a country",
"the moon revolves around earth",
"earth has one moon",
"2 + 2 = 4",
"the sky is blue",
"oxygen is essential for humans",
"Sun is a star",
"Sun is Hot",
]
def search_google(query):
params = {
"q": query,
"api_key": SERPAPI_KEY,
"engine": "google",
"num": SEARCH_NUM_RESULTS
}
search = GoogleSearch(params)
results = search.get_dict()
# Return a list of tuples: (text_to_encode, link)
return [(res['title'] + " " + res.get('snippet', ''), res.get('link', '#'))
for res in results.get("organic_results", [])]
def generate_explanation(claim, evidence):
input_text = f"explain fact: {claim} based on source: {evidence}"
inputs = t5_tokenizer.encode(input_text, return_tensors="pt", max_length=512, truncation=True)
outputs = t5_model.generate(inputs, max_length=100, num_beams=4, early_stopping=True)
return t5_tokenizer.decode(outputs[0], skip_special_tokens=True)
def predict_fake_news(user_input):
user_input_clean = user_input.lower().strip()
best_match_url = "#" # Initialize URL
# First, match against verified facts directly
for fact in verified_facts:
if util.cos_sim(
model.encode(user_input_clean, convert_to_tensor=True),
model.encode(fact, convert_to_tensor=True)
)[0][0] > 0.9:
# If matched with a fact, we don't have a URL, so return the default
return "REAL (matched with verified fact)", 0.95, best_match_url
# If not matched with facts, then try Google
print("\n Searching Google for similar articles...")
# Get results as (text, link) tuples
results_with_links = search_google(user_input)
# Extract only the text for embedding
results = [text for text, link in results_with_links]
if not results:
print(" No search results found.")
return "FAKE (no supporting articles found)", 0.0, best_match_url
# Compare with search results
input_embedding = model.encode(user_input, convert_to_tensor=True)
result_embeddings = model.encode(results, convert_to_tensor=True)
similarities = util.cos_sim(input_embedding, result_embeddings)[0]
max_sim = float(similarities.max())
# Find the index of the highest similarity score
max_sim_index = similarities.argmax().item()
# Get the URL corresponding to the best match
# best_match_url = results_with_links[max_sim_index][1]
# print(f"\n Highest Similarity Score: {max_sim:.2f}")
# if max_sim >= SIMILARITY_THRESHOLD:
# return "REAL (matched with trusted news online)", max_sim, best_match_url
# else:
# return "FAKE (not matched with trusted news)", max_sim, best_match_url
# 1. Sabse pehle best match ka URL aur Text nikal lo
best_match_url = results_with_links[max_sim_index][1]
best_evidence = results_with_links[max_sim_index][0]
# 2. NLI logic (Contradiction check karne ke liye)
#
nli_scores = nli_model.predict([(user_input, best_evidence)])
label = nli_scores.argmax()
# 3. Verdict decide karne ka naya tarika
if label == 0:
# Agar Google kuch aur keh raha hai aur aap kuch aur (Men vs Women)
verdict = "FAKE (Logical Contradiction Detected)"
max_sim = max_sim * 0.5 # Jhoot pakde jaane par score kam kar diya
elif label == 1 or label ==2 and max_sim >= SIMILARITY_THRESHOLD:
# Label 1 (Support) OR Label 2 (Neutral) dono ko REAL maano agar match accha hai # Agar dono ki baat ek hi hai aur similarity bhi high hai
verdict = "REAL (Supported by News Sources)"
else:
# Agar match nahi ho raha ya neutral hai
verdict = "FAKE (Inconsistent or Unverified Information(Low Evidence))"
# 4. T5 Model se explanation banwayein
explanation = generate_explanation(user_input, best_evidence)
# 5. AB SABSE ZAROORI: Ab 4 cheezein return karni hain
return verdict, max_sim, best_match_url, explanation
@app.route("/")
@app.route("/home")
def home():
return render_template("home.html")
@app.route("/about")
def about():
return render_template("about.html")
@app.route("/detector", methods=["GET", "POST"])
def index():
verdict, score, news_link, explanation = None, None, None, None
user_input = ""
if request.method == "POST":
user_input = request.form["news"]
# New: Unpack the returned link
verdict, score, news_link, explanation = predict_fake_news(user_input)
# New: Pass news_link to the template
return render_template("index.html", verdict=verdict, score=score, user_input=user_input, news_link=news_link, explanation=explanation )
if __name__ == "__main__":
app.run(host="0.0.0.0", port=7860)