File size: 5,987 Bytes
c9dd308
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
283a488
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
from flask import Flask, render_template, request
from sentence_transformers import SentenceTransformer, util, CrossEncoder # CrossEncoder add kiya
from transformers import T5Tokenizer, T5ForConditionalGeneration # Transformers add kiya
from serpapi import GoogleSearch
import os
import json
import torch # Torch add kiya

app = Flask(__name__)

# === CONFIGURATION ===
SERPAPI_KEY = "ebde36523cc9138dce4067eaf7a35f45f2c50ae61fb7074595182c96470aaa58"
SEARCH_NUM_RESULTS = 5
SIMILARITY_THRESHOLD = 0.7

# === Load model and facts ===
model = SentenceTransformer("all-MiniLM-L6-v2")
# Purana model wahi rahega

# Naya NLI model (Logical Judge)
nli_model = CrossEncoder('cross-encoder/nli-distilroberta-base')

# Naya T5 model (Explanation Expert)
t5_tokenizer = T5Tokenizer.from_pretrained("t5-small")
t5_model = T5ForConditionalGeneration.from_pretrained("t5-small")

if os.path.exists("facts.json"):
    with open("facts.json", "r") as f:
        verified_facts = json.load(f)
else:
    verified_facts = [
    "humans have 2 legs",
    "humans are mammals",
    "earth is a planet",
    "earth orbits the sun",
    "the sun rises in the east",
    "water boils at 100 degrees celsius",
    "india is a country",
    "the moon revolves around earth",
    "earth has one moon",
    "2 + 2 = 4",
    "the sky is blue",
    "oxygen is essential for humans",
    "Sun is a star",
    "Sun is Hot",

]


def search_google(query):
    params = {
        "q": query,
        "api_key": SERPAPI_KEY,
        "engine": "google",
        "num": SEARCH_NUM_RESULTS
    }
    search = GoogleSearch(params)
    results = search.get_dict()
    # Return a list of tuples: (text_to_encode, link)
    return [(res['title'] + " " + res.get('snippet', ''), res.get('link', '#'))
            for res in results.get("organic_results", [])]

def generate_explanation(claim, evidence):
    input_text = f"explain fact: {claim} based on source: {evidence}"
    inputs = t5_tokenizer.encode(input_text, return_tensors="pt", max_length=512, truncation=True)
    outputs = t5_model.generate(inputs, max_length=100, num_beams=4, early_stopping=True)
    return t5_tokenizer.decode(outputs[0], skip_special_tokens=True)

def predict_fake_news(user_input):
    user_input_clean = user_input.lower().strip()
    best_match_url = "#" # Initialize URL

    # First, match against verified facts directly
    for fact in verified_facts:
        if util.cos_sim(
            model.encode(user_input_clean, convert_to_tensor=True),
            model.encode(fact, convert_to_tensor=True)
        )[0][0] > 0.9:
            # If matched with a fact, we don't have a URL, so return the default
            return "REAL (matched with verified fact)", 0.95, best_match_url

    # If not matched with facts, then try Google
    print("\n Searching Google for similar articles...")
    
    # Get results as (text, link) tuples
    results_with_links = search_google(user_input)
    
    # Extract only the text for embedding
    results = [text for text, link in results_with_links]

    if not results:
        print(" No search results found.")
        return "FAKE  (no supporting articles found)", 0.0, best_match_url

    # Compare with search results
    input_embedding = model.encode(user_input, convert_to_tensor=True)
    result_embeddings = model.encode(results, convert_to_tensor=True)

    similarities = util.cos_sim(input_embedding, result_embeddings)[0]
    max_sim = float(similarities.max())
    
    # Find the index of the highest similarity score
    max_sim_index = similarities.argmax().item()
    
    # Get the URL corresponding to the best match
    # best_match_url = results_with_links[max_sim_index][1]


    # print(f"\n Highest Similarity Score: {max_sim:.2f}")
    # if max_sim >= SIMILARITY_THRESHOLD:
    #     return "REAL (matched with trusted news online)", max_sim, best_match_url
    # else:
    #     return "FAKE (not matched with trusted news)", max_sim, best_match_url
    # 1. Sabse pehle best match ka URL aur Text nikal lo
    best_match_url = results_with_links[max_sim_index][1]
    best_evidence = results_with_links[max_sim_index][0]

    # 2. NLI logic (Contradiction check karne ke liye)
    # 
    nli_scores = nli_model.predict([(user_input, best_evidence)])
    label = nli_scores.argmax() 

    # 3. Verdict decide karne ka naya tarika
    if label == 0: 
        # Agar Google kuch aur keh raha hai aur aap kuch aur (Men vs Women)
        verdict = "FAKE (Logical Contradiction Detected)"
        max_sim = max_sim * 0.5 # Jhoot pakde jaane par score kam kar diya
    elif label == 1 or label ==2 and max_sim >= SIMILARITY_THRESHOLD:
     # Label 1 (Support) OR Label 2 (Neutral) dono ko REAL maano agar match accha hai   # Agar dono ki baat ek hi hai aur similarity bhi high hai
        verdict = "REAL (Supported by News Sources)"
    else:
        # Agar match nahi ho raha ya neutral hai
        verdict = "FAKE (Inconsistent or Unverified Information(Low Evidence))"

    # 4. T5 Model se explanation banwayein
    explanation = generate_explanation(user_input, best_evidence)

    # 5. AB SABSE ZAROORI: Ab 4 cheezein return karni hain
    return verdict, max_sim, best_match_url, explanation
@app.route("/")
@app.route("/home")
def home():
    return render_template("home.html")

@app.route("/about")
def about():
    return render_template("about.html")

@app.route("/detector", methods=["GET", "POST"])
def index():
    verdict, score, news_link, explanation = None, None, None, None    
    user_input = ""

    if request.method == "POST":
        user_input = request.form["news"]
        # New: Unpack the returned link
        verdict, score, news_link, explanation = predict_fake_news(user_input) 
        
    # New: Pass news_link to the template
    return render_template("index.html", verdict=verdict, score=score, user_input=user_input, news_link=news_link, explanation=explanation )


if __name__ == "__main__":
    app.run(host="0.0.0.0", port=7860)