Spaces:
Sleeping
Sleeping
File size: 5,987 Bytes
c9dd308 283a488 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 | from flask import Flask, render_template, request
from sentence_transformers import SentenceTransformer, util, CrossEncoder # CrossEncoder add kiya
from transformers import T5Tokenizer, T5ForConditionalGeneration # Transformers add kiya
from serpapi import GoogleSearch
import os
import json
import torch # Torch add kiya
app = Flask(__name__)
# === CONFIGURATION ===
SERPAPI_KEY = "ebde36523cc9138dce4067eaf7a35f45f2c50ae61fb7074595182c96470aaa58"
SEARCH_NUM_RESULTS = 5
SIMILARITY_THRESHOLD = 0.7
# === Load model and facts ===
model = SentenceTransformer("all-MiniLM-L6-v2")
# Purana model wahi rahega
# Naya NLI model (Logical Judge)
nli_model = CrossEncoder('cross-encoder/nli-distilroberta-base')
# Naya T5 model (Explanation Expert)
t5_tokenizer = T5Tokenizer.from_pretrained("t5-small")
t5_model = T5ForConditionalGeneration.from_pretrained("t5-small")
if os.path.exists("facts.json"):
with open("facts.json", "r") as f:
verified_facts = json.load(f)
else:
verified_facts = [
"humans have 2 legs",
"humans are mammals",
"earth is a planet",
"earth orbits the sun",
"the sun rises in the east",
"water boils at 100 degrees celsius",
"india is a country",
"the moon revolves around earth",
"earth has one moon",
"2 + 2 = 4",
"the sky is blue",
"oxygen is essential for humans",
"Sun is a star",
"Sun is Hot",
]
def search_google(query):
params = {
"q": query,
"api_key": SERPAPI_KEY,
"engine": "google",
"num": SEARCH_NUM_RESULTS
}
search = GoogleSearch(params)
results = search.get_dict()
# Return a list of tuples: (text_to_encode, link)
return [(res['title'] + " " + res.get('snippet', ''), res.get('link', '#'))
for res in results.get("organic_results", [])]
def generate_explanation(claim, evidence):
input_text = f"explain fact: {claim} based on source: {evidence}"
inputs = t5_tokenizer.encode(input_text, return_tensors="pt", max_length=512, truncation=True)
outputs = t5_model.generate(inputs, max_length=100, num_beams=4, early_stopping=True)
return t5_tokenizer.decode(outputs[0], skip_special_tokens=True)
def predict_fake_news(user_input):
user_input_clean = user_input.lower().strip()
best_match_url = "#" # Initialize URL
# First, match against verified facts directly
for fact in verified_facts:
if util.cos_sim(
model.encode(user_input_clean, convert_to_tensor=True),
model.encode(fact, convert_to_tensor=True)
)[0][0] > 0.9:
# If matched with a fact, we don't have a URL, so return the default
return "REAL (matched with verified fact)", 0.95, best_match_url
# If not matched with facts, then try Google
print("\n Searching Google for similar articles...")
# Get results as (text, link) tuples
results_with_links = search_google(user_input)
# Extract only the text for embedding
results = [text for text, link in results_with_links]
if not results:
print(" No search results found.")
return "FAKE (no supporting articles found)", 0.0, best_match_url
# Compare with search results
input_embedding = model.encode(user_input, convert_to_tensor=True)
result_embeddings = model.encode(results, convert_to_tensor=True)
similarities = util.cos_sim(input_embedding, result_embeddings)[0]
max_sim = float(similarities.max())
# Find the index of the highest similarity score
max_sim_index = similarities.argmax().item()
# Get the URL corresponding to the best match
# best_match_url = results_with_links[max_sim_index][1]
# print(f"\n Highest Similarity Score: {max_sim:.2f}")
# if max_sim >= SIMILARITY_THRESHOLD:
# return "REAL (matched with trusted news online)", max_sim, best_match_url
# else:
# return "FAKE (not matched with trusted news)", max_sim, best_match_url
# 1. Sabse pehle best match ka URL aur Text nikal lo
best_match_url = results_with_links[max_sim_index][1]
best_evidence = results_with_links[max_sim_index][0]
# 2. NLI logic (Contradiction check karne ke liye)
#
nli_scores = nli_model.predict([(user_input, best_evidence)])
label = nli_scores.argmax()
# 3. Verdict decide karne ka naya tarika
if label == 0:
# Agar Google kuch aur keh raha hai aur aap kuch aur (Men vs Women)
verdict = "FAKE (Logical Contradiction Detected)"
max_sim = max_sim * 0.5 # Jhoot pakde jaane par score kam kar diya
elif label == 1 or label ==2 and max_sim >= SIMILARITY_THRESHOLD:
# Label 1 (Support) OR Label 2 (Neutral) dono ko REAL maano agar match accha hai # Agar dono ki baat ek hi hai aur similarity bhi high hai
verdict = "REAL (Supported by News Sources)"
else:
# Agar match nahi ho raha ya neutral hai
verdict = "FAKE (Inconsistent or Unverified Information(Low Evidence))"
# 4. T5 Model se explanation banwayein
explanation = generate_explanation(user_input, best_evidence)
# 5. AB SABSE ZAROORI: Ab 4 cheezein return karni hain
return verdict, max_sim, best_match_url, explanation
@app.route("/")
@app.route("/home")
def home():
return render_template("home.html")
@app.route("/about")
def about():
return render_template("about.html")
@app.route("/detector", methods=["GET", "POST"])
def index():
verdict, score, news_link, explanation = None, None, None, None
user_input = ""
if request.method == "POST":
user_input = request.form["news"]
# New: Unpack the returned link
verdict, score, news_link, explanation = predict_fake_news(user_input)
# New: Pass news_link to the template
return render_template("index.html", verdict=verdict, score=score, user_input=user_input, news_link=news_link, explanation=explanation )
if __name__ == "__main__":
app.run(host="0.0.0.0", port=7860) |