Spaces:
Sleeping
Sleeping
| from flask import Flask, render_template, request | |
| from sentence_transformers import SentenceTransformer, util, CrossEncoder # CrossEncoder add kiya | |
| from transformers import T5Tokenizer, T5ForConditionalGeneration # Transformers add kiya | |
| from serpapi import GoogleSearch | |
| import os | |
| import json | |
| import torch # Torch add kiya | |
| app = Flask(__name__) | |
| # === CONFIGURATION === | |
| SERPAPI_KEY = "ebde36523cc9138dce4067eaf7a35f45f2c50ae61fb7074595182c96470aaa58" | |
| SEARCH_NUM_RESULTS = 5 | |
| SIMILARITY_THRESHOLD = 0.7 | |
| # === Load model and facts === | |
| model = SentenceTransformer("all-MiniLM-L6-v2") | |
| # Purana model wahi rahega | |
| # Naya NLI model (Logical Judge) | |
| nli_model = CrossEncoder('cross-encoder/nli-distilroberta-base') | |
| # Naya T5 model (Explanation Expert) | |
| t5_tokenizer = T5Tokenizer.from_pretrained("t5-small") | |
| t5_model = T5ForConditionalGeneration.from_pretrained("t5-small") | |
| if os.path.exists("facts.json"): | |
| with open("facts.json", "r") as f: | |
| verified_facts = json.load(f) | |
| else: | |
| verified_facts = [ | |
| "humans have 2 legs", | |
| "humans are mammals", | |
| "earth is a planet", | |
| "earth orbits the sun", | |
| "the sun rises in the east", | |
| "water boils at 100 degrees celsius", | |
| "india is a country", | |
| "the moon revolves around earth", | |
| "earth has one moon", | |
| "2 + 2 = 4", | |
| "the sky is blue", | |
| "oxygen is essential for humans", | |
| "Sun is a star", | |
| "Sun is Hot", | |
| ] | |
| def search_google(query): | |
| params = { | |
| "q": query, | |
| "api_key": SERPAPI_KEY, | |
| "engine": "google", | |
| "num": SEARCH_NUM_RESULTS | |
| } | |
| search = GoogleSearch(params) | |
| results = search.get_dict() | |
| # Return a list of tuples: (text_to_encode, link) | |
| return [(res['title'] + " " + res.get('snippet', ''), res.get('link', '#')) | |
| for res in results.get("organic_results", [])] | |
| def generate_explanation(claim, evidence): | |
| input_text = f"explain fact: {claim} based on source: {evidence}" | |
| inputs = t5_tokenizer.encode(input_text, return_tensors="pt", max_length=512, truncation=True) | |
| outputs = t5_model.generate(inputs, max_length=100, num_beams=4, early_stopping=True) | |
| return t5_tokenizer.decode(outputs[0], skip_special_tokens=True) | |
| def predict_fake_news(user_input): | |
| user_input_clean = user_input.lower().strip() | |
| best_match_url = "#" # Initialize URL | |
| # First, match against verified facts directly | |
| for fact in verified_facts: | |
| if util.cos_sim( | |
| model.encode(user_input_clean, convert_to_tensor=True), | |
| model.encode(fact, convert_to_tensor=True) | |
| )[0][0] > 0.9: | |
| # If matched with a fact, we don't have a URL, so return the default | |
| return "REAL (matched with verified fact)", 0.95, best_match_url | |
| # If not matched with facts, then try Google | |
| print("\n Searching Google for similar articles...") | |
| # Get results as (text, link) tuples | |
| results_with_links = search_google(user_input) | |
| # Extract only the text for embedding | |
| results = [text for text, link in results_with_links] | |
| if not results: | |
| print(" No search results found.") | |
| return "FAKE (no supporting articles found)", 0.0, best_match_url | |
| # Compare with search results | |
| input_embedding = model.encode(user_input, convert_to_tensor=True) | |
| result_embeddings = model.encode(results, convert_to_tensor=True) | |
| similarities = util.cos_sim(input_embedding, result_embeddings)[0] | |
| max_sim = float(similarities.max()) | |
| # Find the index of the highest similarity score | |
| max_sim_index = similarities.argmax().item() | |
| # Get the URL corresponding to the best match | |
| # best_match_url = results_with_links[max_sim_index][1] | |
| # print(f"\n Highest Similarity Score: {max_sim:.2f}") | |
| # if max_sim >= SIMILARITY_THRESHOLD: | |
| # return "REAL (matched with trusted news online)", max_sim, best_match_url | |
| # else: | |
| # return "FAKE (not matched with trusted news)", max_sim, best_match_url | |
| # 1. Sabse pehle best match ka URL aur Text nikal lo | |
| best_match_url = results_with_links[max_sim_index][1] | |
| best_evidence = results_with_links[max_sim_index][0] | |
| # 2. NLI logic (Contradiction check karne ke liye) | |
| # | |
| nli_scores = nli_model.predict([(user_input, best_evidence)]) | |
| label = nli_scores.argmax() | |
| # 3. Verdict decide karne ka naya tarika | |
| if label == 0: | |
| # Agar Google kuch aur keh raha hai aur aap kuch aur (Men vs Women) | |
| verdict = "FAKE (Logical Contradiction Detected)" | |
| max_sim = max_sim * 0.5 # Jhoot pakde jaane par score kam kar diya | |
| elif label == 1 or label ==2 and max_sim >= SIMILARITY_THRESHOLD: | |
| # Label 1 (Support) OR Label 2 (Neutral) dono ko REAL maano agar match accha hai # Agar dono ki baat ek hi hai aur similarity bhi high hai | |
| verdict = "REAL (Supported by News Sources)" | |
| else: | |
| # Agar match nahi ho raha ya neutral hai | |
| verdict = "FAKE (Inconsistent or Unverified Information(Low Evidence))" | |
| # 4. T5 Model se explanation banwayein | |
| explanation = generate_explanation(user_input, best_evidence) | |
| # 5. AB SABSE ZAROORI: Ab 4 cheezein return karni hain | |
| return verdict, max_sim, best_match_url, explanation | |
| def home(): | |
| return render_template("home.html") | |
| def about(): | |
| return render_template("about.html") | |
| def index(): | |
| verdict, score, news_link, explanation = None, None, None, None | |
| user_input = "" | |
| if request.method == "POST": | |
| user_input = request.form["news"] | |
| # New: Unpack the returned link | |
| verdict, score, news_link, explanation = predict_fake_news(user_input) | |
| # New: Pass news_link to the template | |
| return render_template("index.html", verdict=verdict, score=score, user_input=user_input, news_link=news_link, explanation=explanation ) | |
| if __name__ == "__main__": | |
| app.run(host="0.0.0.0", port=7860) |