Spaces:

nancyattri
/

TruthCheck

Sleeping

File size: 5,987 Bytes

from flask import Flask, render_template, request
from sentence_transformers import SentenceTransformer, util, CrossEncoder # CrossEncoder add kiya
from transformers import T5Tokenizer, T5ForConditionalGeneration # Transformers add kiya
from serpapi import GoogleSearch
import os
import json
import torch # Torch add kiya

app = Flask(__name__)

# === CONFIGURATION ===
SERPAPI_KEY = "ebde36523cc9138dce4067eaf7a35f45f2c50ae61fb7074595182c96470aaa58"
SEARCH_NUM_RESULTS = 5
SIMILARITY_THRESHOLD = 0.7

# === Load model and facts ===
model = SentenceTransformer("all-MiniLM-L6-v2")
# Purana model wahi rahega

# Naya NLI model (Logical Judge)
nli_model = CrossEncoder('cross-encoder/nli-distilroberta-base')

# Naya T5 model (Explanation Expert)
t5_tokenizer = T5Tokenizer.from_pretrained("t5-small")
t5_model = T5ForConditionalGeneration.from_pretrained("t5-small")

if os.path.exists("facts.json"):
    with open("facts.json", "r") as f:
        verified_facts = json.load(f)
else:
    verified_facts = [
    "humans have 2 legs",
    "humans are mammals",
    "earth is a planet",
    "earth orbits the sun",
    "the sun rises in the east",
    "water boils at 100 degrees celsius",
    "india is a country",
    "the moon revolves around earth",
    "earth has one moon",
    "2 + 2 = 4",
    "the sky is blue",
    "oxygen is essential for humans",
    "Sun is a star",
    "Sun is Hot",

]


def search_google(query):
    params = {
        "q": query,
        "api_key": SERPAPI_KEY,
        "engine": "google",
        "num": SEARCH_NUM_RESULTS
    }
    search = GoogleSearch(params)
    results = search.get_dict()
    # Return a list of tuples: (text_to_encode, link)
    return [(res['title'] + " " + res.get('snippet', ''), res.get('link', '#'))
            for res in results.get("organic_results", [])]

def generate_explanation(claim, evidence):
    input_text = f"explain fact: {claim} based on source: {evidence}"
    inputs = t5_tokenizer.encode(input_text, return_tensors="pt", max_length=512, truncation=True)
    outputs = t5_model.generate(inputs, max_length=100, num_beams=4, early_stopping=True)
    return t5_tokenizer.decode(outputs[0], skip_special_tokens=True)

def predict_fake_news(user_input):
    user_input_clean = user_input.lower().strip()
    best_match_url = "#" # Initialize URL

    # First, match against verified facts directly
    for fact in verified_facts:
        if util.cos_sim(
            model.encode(user_input_clean, convert_to_tensor=True),
            model.encode(fact, convert_to_tensor=True)
        )[0][0] > 0.9:
            # If matched with a fact, we don't have a URL, so return the default
            return "REAL (matched with verified fact)", 0.95, best_match_url

    # If not matched with facts, then try Google
    print("\n Searching Google for similar articles...")
    
    # Get results as (text, link) tuples
    results_with_links = search_google(user_input)
    
    # Extract only the text for embedding
    results = [text for text, link in results_with_links]

    if not results:
        print(" No search results found.")
        return "FAKE  (no supporting articles found)", 0.0, best_match_url

    # Compare with search results
    input_embedding = model.encode(user_input, convert_to_tensor=True)
    result_embeddings = model.encode(results, convert_to_tensor=True)

    similarities = util.cos_sim(input_embedding, result_embeddings)[0]
    max_sim = float(similarities.max())
    
    # Find the index of the highest similarity score
    max_sim_index = similarities.argmax().item()
    
    # Get the URL corresponding to the best match
    # best_match_url = results_with_links[max_sim_index][1]


    # print(f"\n Highest Similarity Score: {max_sim:.2f}")
    # if max_sim >= SIMILARITY_THRESHOLD:
    #     return "REAL (matched with trusted news online)", max_sim, best_match_url
    # else:
    #     return "FAKE (not matched with trusted news)", max_sim, best_match_url
    # 1. Sabse pehle best match ka URL aur Text nikal lo
    best_match_url = results_with_links[max_sim_index][1]
    best_evidence = results_with_links[max_sim_index][0]

    # 2. NLI logic (Contradiction check karne ke liye)
    # 
    nli_scores = nli_model.predict([(user_input, best_evidence)])
    label = nli_scores.argmax() 

    # 3. Verdict decide karne ka naya tarika
    if label == 0: 
        # Agar Google kuch aur keh raha hai aur aap kuch aur (Men vs Women)
        verdict = "FAKE (Logical Contradiction Detected)"
        max_sim = max_sim * 0.5 # Jhoot pakde jaane par score kam kar diya
    elif label == 1 or label ==2 and max_sim >= SIMILARITY_THRESHOLD:
     # Label 1 (Support) OR Label 2 (Neutral) dono ko REAL maano agar match accha hai   # Agar dono ki baat ek hi hai aur similarity bhi high hai
        verdict = "REAL (Supported by News Sources)"
    else:
        # Agar match nahi ho raha ya neutral hai
        verdict = "FAKE (Inconsistent or Unverified Information(Low Evidence))"

    # 4. T5 Model se explanation banwayein
    explanation = generate_explanation(user_input, best_evidence)

    # 5. AB SABSE ZAROORI: Ab 4 cheezein return karni hain
    return verdict, max_sim, best_match_url, explanation
@app.route("/")
@app.route("/home")
def home():
    return render_template("home.html")

@app.route("/about")
def about():
    return render_template("about.html")

@app.route("/detector", methods=["GET", "POST"])
def index():
    verdict, score, news_link, explanation = None, None, None, None    
    user_input = ""

    if request.method == "POST":
        user_input = request.form["news"]
        # New: Unpack the returned link
        verdict, score, news_link, explanation = predict_fake_news(user_input) 
        
    # New: Pass news_link to the template
    return render_template("index.html", verdict=verdict, score=score, user_input=user_input, news_link=news_link, explanation=explanation )


if __name__ == "__main__":
    app.run(host="0.0.0.0", port=7860)