mdAmin313 commited on
Commit
1b1b755
·
1 Parent(s): 681fdba

Initial commit: AI misinformation detector

Browse files
Files changed (2) hide show
  1. .env +4 -0
  2. app.py +253 -197
.env ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ GNEWS_API_KEY = "c41717a7b25455cd0937016c539e72d5"
2
+ NEWSORG_API_KEY ="9067f24c056541fd937a455293d9ace3"
3
+ OPENAI_API_KEY = ""
4
+ GEMINI_API_KEY = "AIzaSyBmzG18sh5yMNdDGonfquo5B7-HEkMewro"
app.py CHANGED
@@ -1,58 +1,50 @@
1
  import os
 
2
  import re
3
- import requests
4
  from typing import List, Dict, Any, Optional
5
- from fastapi import FastAPI
6
- from fastapi.middleware.cors import CORSMiddleware
7
  from pydantic import BaseModel
8
- import gradio as gr
 
9
  from bs4 import BeautifulSoup
10
 
11
- # ---------------- ENV ----------------
12
- GNEWS_API_KEY = "c41717a7b25455cd0937016c539e72d5"
13
- NEWSORG_API_KEY = "9067f24c056541fd937a455293d9ace3"
14
- GEMINI_API_KEY = "AIzaSyBmzG18sh5yMNdDGonfquo5B7-HEkMewro"
15
- GEMINI_CX = "727386fd4ef37425d"
16
-
17
- # ---------------- Lazy-load models ----------------
18
- SENTE_MODEL = None
19
- ZS_PIPE = None
20
-
21
- def get_sentence_model():
22
- global SENTE_MODEL
23
- if SENTE_MODEL is None:
24
- from sentence_transformers import SentenceTransformer
25
- SENTE_MODEL = SentenceTransformer("all-mpnet-base-v2")
26
- return SENTE_MODEL
27
-
28
- def get_zs_pipe():
29
- global ZS_PIPE
30
- if ZS_PIPE is None:
31
- from transformers import pipeline
32
- ZS_PIPE = pipeline("zero-shot-classification", model="facebook/bart-large-mnli")
33
- return ZS_PIPE
34
-
35
- # ---------------- FastAPI ----------------
36
- app = FastAPI()
37
- app.add_middleware(CORSMiddleware, allow_origins=["*"], allow_methods=["*"])
38
 
 
 
 
 
 
 
 
 
 
39
  class VerifyRequest(BaseModel):
40
  text: str
41
- mode: Optional[str] = "fast"
42
-
43
- # ---------------- Helpers ----------------
44
- TRUSTED_DOMAINS = {
45
- "bbc.co.uk","bbc.com","cnn.com","nytimes.com","reuters.com","apnews.com",
46
- "theguardian.com","npr.org","washingtonpost.com","wsj.com","usatoday.com",
47
- "bloomberg.com","aljazeera.com","msnbc.com","cnbc.com","foxnews.com"
48
- }
49
- UNWANTED_KEYWORDS = [
50
- "movie","film","trailer","episode","comic","manga","fan","fandom","imdb",
51
- "review","tv series","fiction","novel","fantasy","screenplay","actor","actress"
52
- ]
53
 
 
54
  def safe_headers():
55
- return {"User-Agent": "misinfo-tool/1.0"}
56
 
57
  def domain_from_url(url: str) -> Optional[str]:
58
  if not url: return None
@@ -68,6 +60,98 @@ def domain_from_url(url: str) -> Optional[str]:
68
  return None
69
  return None
70
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
71
  def is_unwanted_snippet(snippet: str) -> bool:
72
  if not snippet: return False
73
  s = snippet.lower()
@@ -81,23 +165,28 @@ def filter_sources(sources: List[Dict[str,str]]) -> List[Dict[str,str]]:
81
  seen.add(url)
82
  domain = domain_from_url(url)
83
  s["domain"] = domain or ""
84
- if domain in TRUSTED_DOMAINS:
85
- kept.append(s)
86
- continue
87
- if domain and any(d in domain for d in ["imdb.com","youtube.com","wikipedia.org","fandom.com","comicbook.com"]):
88
- continue
89
- if is_unwanted_snippet(s.get("snippet","")) or is_unwanted_snippet(s.get("title","")):
90
- continue
91
  kept.append(s)
92
  return kept
93
 
94
- def summarize_text(text: str, max_len=300) -> str:
95
- sentences = re.split(r'(?<=[.!?]) +', text.strip())
96
- summary = sentences[0] if sentences else text
97
- if len(summary) > max_len:
98
- summary = summary[:max_len].rsplit(' ',1)[0] + "..."
99
- return summary
 
 
 
 
 
 
 
100
 
 
101
  def summarize_evidence(sources: List[Dict[str,str]], max_chars=800) -> str:
102
  if not sources:
103
  return "No credible news sources found."
@@ -112,170 +201,137 @@ def summarize_evidence(sources: List[Dict[str,str]], max_chars=800) -> str:
112
  return res[:max_chars].rsplit(" ",1)[0] + "..."
113
  return res
114
 
 
115
  def fuse_scores(fast_conf: float, deep_outcome: Optional[str], evidence_count: int) -> Dict[str,Any]:
116
  base = fast_conf*0.5 + min(evidence_count/5.0,1.0)*0.5
117
  if deep_outcome and deep_outcome.lower() in ["false","misleading"]:
118
  base *= 0.7
119
- score = int(round(max(0,min(1,base))*100))
120
- color = "green" if score>=70 else "yellow" if score>=40 else "red"
121
  return {"score":score, "color":color}
122
 
123
- # ---------------- Zero-shot classify ----------------
124
- def classify_text_type(text: str) -> Dict[str, Any]:
125
- labels = ["news","rumor","fact","opinion","satire","unverifiable"]
126
- try:
127
- pipe = get_zs_pipe()
128
- res = pipe(text, labels, multi_label=False, truncation=True)
129
- label = res["labels"][0]
130
- score = float(res["scores"][0])
131
- return {"type": label, "score": round(score,3), "scores": dict(zip(res["labels"], res["scores"]))}
132
- except Exception:
133
- t = text.lower()
134
- if any(k in t for k in ["according to","reported","breaking","news","announced"]):
135
- return {"type":"news","score":0.65,"scores":{}}
136
- if any(k in t for k in ["i think","in my opinion","i believe","should"]):
137
- return {"type":"opinion","score":0.7,"scores":{}}
138
- if any(k in t for k in ["joke","satire","not real","parody"]):
139
- return {"type":"satire","score":0.7,"scores":{}}
140
- if any(k in t for k in ["study shows","research","published","peer-reviewed"]):
141
- return {"type":"fact","score":0.6,"scores":{}}
142
- return {"type":"rumor","score":0.45,"scores":{}}
143
-
144
- # ---------------- Search functions ----------------
145
- def fetch_gnews(query: str, max_results=6) -> List[Dict[str,str]]:
146
- if not GNEWS_API_KEY: return []
147
- try:
148
- url = "https://gnews.io/api/v4/search"
149
- params = {"q": query, "token": GNEWS_API_KEY, "max": max_results, "lang":"en"}
150
- r = requests.get(url, params=params, headers=safe_headers(), timeout=6)
151
- r.raise_for_status()
152
- js = r.json()
153
- return [{"title": a.get("title"), "url": a.get("url"), "source": a.get("source",{}).get("name"), "snippet": a.get("description")} for a in js.get("articles", [])[:max_results]]
154
- except: return []
155
 
156
- def fetch_newsapi(query: str, max_results=6) -> List[Dict[str,str]]:
157
- if not NEWSORG_API_KEY: return []
158
  try:
159
- url = "https://newsapi.org/v2/everything"
160
- params = {"q": query, "pageSize": max_results, "apiKey": NEWSORG_API_KEY, "language":"en"}
161
  r = requests.get(url, params=params, headers=safe_headers(), timeout=6)
162
  r.raise_for_status()
163
  js = r.json()
164
- return [{"title": a.get("title"), "url": a.get("url"), "source": a.get("source",{}).get("name"), "snippet": a.get("description")} for a in js.get("articles", [])[:max_results]]
165
- except: return []
166
-
167
- def google_dork_search(query: str, max_results=6) -> List[Dict[str,str]]:
168
- """Uses Google Custom Search API (Gemini)"""
169
- if not GEMINI_API_KEY or not GEMINI_CX: return []
170
- try:
171
- url = "https://www.googleapis.com/customsearch/v1"
172
- params = {"key": GEMINI_API_KEY, "cx": GEMINI_CX, "q": query, "num": max_results}
173
- r = requests.get(url, params=params, timeout=6)
174
- r.raise_for_status()
175
- js = r.json()
176
- items = js.get("items", [])
177
- return [{"title": i.get("title"), "url": i.get("link"), "snippet": i.get("snippet"), "source": None} for i in items]
178
- except: return []
179
 
180
- def duckduckgo_search(query: str, max_results=8) -> List[Dict[str,str]]:
181
- try:
182
- url = "https://html.duckduckgo.com/html/"
183
- r = requests.post(url, data={"q": query}, headers=safe_headers(), timeout=6)
184
- r.raise_for_status()
185
- soup = BeautifulSoup(r.text, "html.parser")
186
  results = []
187
- for res in soup.select(".result__a")[:max_results]:
188
- title = res.get_text()
189
- href = res.get("href")
190
- snippet_node = res.find_parent().select_one(".result__snippet")
191
- snippet = snippet_node.get_text() if snippet_node else ""
192
- results.append({"title": title, "url": href, "source":None, "snippet": snippet})
193
- return results
194
- except: return []
 
 
 
 
 
 
 
 
 
 
 
 
 
 
195
 
196
- # ---------------- Main verification ----------------
197
- def verify_claim(text: str, mode: str="fast") -> Dict[str, Any]:
198
- claim = text.strip()
 
 
 
 
 
 
199
  text_type_res = classify_text_type(claim)
 
 
 
200
  user_summary = summarize_text(claim)
201
 
202
- # Step 1: Search all sources
203
- all_raw = fetch_gnews(user_summary) + fetch_newsapi(user_summary) + google_dork_search(user_summary) + duckduckgo_search(user_summary)
 
 
 
204
  filtered = filter_sources(all_raw)
 
 
205
  evidence_summary = summarize_evidence(filtered)
206
 
207
- # Step 2: Fast classification
208
- fast_label, fast_conf = "Unverifiable", 0.4
209
- try:
210
- pipe = get_zs_pipe()
211
- cls = pipe(claim, ["True","False","Misleading","Unverifiable"], multi_label=False, truncation=True)
212
- fast_label = cls["labels"][0]
213
- fast_conf = float(cls["scores"][0])
214
- except: pass
215
-
216
- # Step 3: Deep reasoning (placeholder)
217
- # Step 3: Deep reasoning
218
- deep_result = None
219
- if mode.lower() in ["deep","hybrid"]:
220
- if GEMINI_CLIENT:
221
  try:
222
- prompt = f"""
223
- Verify the following claim: "{claim}".
224
- Provide a JSON object with keys:
225
- outcome (True/False/Unverifiable),
226
- explanation,
227
- comparison (list of claim-evidence pairs),
228
- takeaways (list of advice).
229
- """
230
- response = GEMINI_CLIENT.models.generate_content(
231
- model="gemini-2.5-flash",
232
- contents=prompt
233
- )
234
- # Parse Gemini response as JSON
235
- import json
236
- deep_result = json.loads(response.text)
237
- except Exception as e:
238
- deep_result = {
239
- "outcome":"Unverifiable",
240
- "explanation": f"Gemini API error: {str(e)}",
241
- "takeaways":["Search credible sources","Cross-check claims"]
242
- }
243
- else:
244
- deep_result = {
245
- "outcome":"Unverifiable",
246
- "explanation":"Demo mode: Deep reasoning not configured (API key missing).",
247
- "takeaways":["Search credible sources","Cross-check claims","Beware sensational headlines"]
248
- }
249
-
250
- # Step 4: Fuse score
251
  deep_outcome = deep_result.get("outcome") if deep_result else None
252
  fuse = fuse_scores(fast_conf, deep_outcome, len(filtered))
253
 
254
  return {
255
- "Claim": claim,
256
- "Text type": text_type_res["type"],
257
- "Text type scores": text_type_res.get("scores", {}),
258
- "User summary": user_summary,
259
- "Fast classification": f"{fast_label} ({fast_conf:.2f})",
260
- "Evidence count raw": len(all_raw),
261
- "Evidence count filtered": len(filtered),
262
- "Evidence summary": evidence_summary,
263
- "Deep result": deep_result or "N/A",
264
- "Credibility": fuse
 
 
265
  }
266
 
267
- # ---------------- FastAPI endpoint ----------------
268
- @app.post("/verify")
269
- async def verify_endpoint(payload: VerifyRequest):
270
- return verify_claim(payload.text, payload.mode)
271
-
272
- # ---------------- Gradio UI ----------------
273
- iface = gr.Interface(
274
- fn=verify_claim,
275
- inputs=[gr.Textbox(label="Claim", lines=4), gr.Dropdown(["fast","deep","hybrid"], label="Mode")],
276
- outputs=gr.JSON(label="Result"),
277
- title="Hybrid Misinformation Detector"
278
- )
279
-
280
- # Mount Gradio inside FastAPI
281
- app = gr.mount_gradio_app(app, iface, path="/") # UI at root
 
1
  import os
2
+ import json
3
  import re
 
4
  from typing import List, Dict, Any, Optional
5
+ from fastapi import FastAPI, HTTPException
6
+ from fastapi.responses import HTMLResponse
7
  from pydantic import BaseModel
8
+ from dotenv import load_dotenv
9
+ import requests
10
  from bs4 import BeautifulSoup
11
 
12
+ # NLP / AI
13
+ try:
14
+ from sentence_transformers import SentenceTransformer, util
15
+ SENTE_MODEL = SentenceTransformer("all-mpnet-base-v2")
16
+ except Exception:
17
+ SENTE_MODEL = None
18
+
19
+ try:
20
+ from transformers import pipeline
21
+ ZS_PIPE = pipeline("zero-shot-classification", model="facebook/bart-large-mnli")
22
+ except Exception:
23
+ ZS_PIPE = None
24
+
25
+ # Gemini AI
26
+ try:
27
+ from google import genai
28
+ GEMINI_CLIENT = genai.Client() # uses GEMINI_API_KEY from environment
29
+ except Exception:
30
+ GEMINI_CLIENT = None
 
 
 
 
 
 
 
 
31
 
32
+ load_dotenv()
33
+ GNEWS_API_KEY = os.getenv("GNEWS_API_KEY")
34
+ NEWSORG_API_KEY = os.getenv("NEWSORG_API_KEY")
35
+ OPENAI_API_KEY = os.getenv("OPENAI_API_KEY")
36
+ GEMINI_API_KEY = os.getenv("GEMINI_API_KEY")
37
+
38
+ app = FastAPI(title="Hybrid Misinformation Detector")
39
+
40
+ # ---------------- Models ----------------
41
  class VerifyRequest(BaseModel):
42
  text: str
43
+ mode: Optional[str] = "fast" # fast, deep, hybrid
 
 
 
 
 
 
 
 
 
 
 
44
 
45
+ # ---------------- Utilities ----------------
46
  def safe_headers():
47
+ return {"User-Agent": "misinfo-tool/1.0 (+https://example.com)"}
48
 
49
  def domain_from_url(url: str) -> Optional[str]:
50
  if not url: return None
 
60
  return None
61
  return None
62
 
63
+ # ---------------- Trusted / Blacklist ----------------
64
+ TRUSTED_DOMAINS = {
65
+ "bbc.co.uk","bbc.com","cnn.com","nytimes.com","reuters.com","apnews.com",
66
+ "theguardian.com","npr.org","washingtonpost.com","wsj.com","usatoday.com",
67
+ "bloomberg.com","aljazeera.com","msnbc.com","cnbc.com","foxnews.com",
68
+ "scientificamerican.com","nature.com","sciencedaily.com"
69
+ }
70
+
71
+ BLACKLISTED_DOMAINS = {
72
+ "imdb.com","youtube.com","wikipedia.org","fandom.com","comicbook.com",
73
+ "rottentomatoes.com","hulu.com","netflix.com","ign.com","forbes.com"
74
+ }
75
+
76
+ UNWANTED_KEYWORDS = [
77
+ "movie","film","episode","tv show","trailer","comic","manga","fan","fandom",
78
+ "review","fiction","novel","fantasy","screenplay","actor","actress"
79
+ ]
80
+
81
+ # ---------------- NLP classify ----------------
82
+ def classify_text_type(text: str) -> Dict[str, Any]:
83
+ labels = ["news","rumor","fact","opinion","satire","unverifiable"]
84
+ if ZS_PIPE:
85
+ try:
86
+ res = ZS_PIPE(text, labels, multi_label=False, truncation=True)
87
+ label = res["labels"][0]
88
+ score = float(res["scores"][0])
89
+ return {"type": label, "score": round(score,3), "scores": dict(zip(res["labels"], res["scores"]))}
90
+ except Exception:
91
+ pass
92
+ t = text.lower()
93
+ if any(k in t for k in ["according to","reported","breaking","news","announced"]):
94
+ return {"type":"news","score":0.65,"scores":{}}
95
+ if any(k in t for k in ["i think","in my opinion","i believe","should"]):
96
+ return {"type":"opinion","score":0.7,"scores":{}}
97
+ if any(k in t for k in ["joke","satire","not real","parody"]):
98
+ return {"type":"satire","score":0.7,"scores":{}}
99
+ if any(k in t for k in ["study shows","research","published","peer-reviewed"]):
100
+ return {"type":"fact","score":0.6,"scores":{}}
101
+ return {"type":"rumor","score":0.45,"scores":{}}
102
+
103
+ def summarize_text(text: str, max_len=300) -> str:
104
+ sentences = re.split(r'(?<=[.!?]) +', text.strip())
105
+ summary = sentences[0] if sentences else text
106
+ if len(summary) > max_len:
107
+ summary = summary[:max_len].rsplit(' ',1)[0] + "..."
108
+ return summary
109
+
110
+ # ---------------- Search ----------------
111
+ def fetch_gnews(query: str, max_results=6) -> List[Dict[str,str]]:
112
+ if not GNEWS_API_KEY:
113
+ return []
114
+ try:
115
+ url = "https://gnews.io/api/v4/search"
116
+ params = {"q": query, "token": GNEWS_API_KEY, "max": max_results, "lang":"en"}
117
+ r = requests.get(url, params=params, headers=safe_headers(), timeout=6)
118
+ r.raise_for_status()
119
+ js = r.json()
120
+ return [{"title": a.get("title"), "url": a.get("url"), "source": a.get("source",{}).get("name"), "snippet": a.get("description")} for a in js.get("articles", [])[:max_results]]
121
+ except Exception:
122
+ return []
123
+
124
+ def fetch_newsapi(query: str, max_results=6) -> List[Dict[str,str]]:
125
+ if not NEWSORG_API_KEY:
126
+ return []
127
+ try:
128
+ url = "https://newsapi.org/v2/everything"
129
+ params = {"q": query, "pageSize": max_results, "apiKey": NEWSORG_API_KEY, "language":"en"}
130
+ r = requests.get(url, params=params, headers=safe_headers(), timeout=6)
131
+ r.raise_for_status()
132
+ js = r.json()
133
+ return [{"title": a.get("title"), "url": a.get("url"), "source": a.get("source",{}).get("name"), "snippet": a.get("description")} for a in js.get("articles", [])[:max_results]]
134
+ except Exception:
135
+ return []
136
+
137
+ def duckduckgo_search(query: str, max_results=8) -> List[Dict[str,str]]:
138
+ try:
139
+ url = "https://html.duckduckgo.com/html/"
140
+ r = requests.post(url, data={"q": query}, headers=safe_headers(), timeout=6)
141
+ r.raise_for_status()
142
+ soup = BeautifulSoup(r.text, "html.parser")
143
+ results = []
144
+ for res in soup.select(".result__a")[:max_results]:
145
+ title = res.get_text()
146
+ href = res.get("href")
147
+ snippet_node = res.find_parent().select_one(".result__snippet")
148
+ snippet = snippet_node.get_text() if snippet_node else ""
149
+ results.append({"title": title, "url": href, "source":None, "snippet": snippet})
150
+ return results
151
+ except Exception:
152
+ return []
153
+
154
+ # ---------------- Filtering ----------------
155
  def is_unwanted_snippet(snippet: str) -> bool:
156
  if not snippet: return False
157
  s = snippet.lower()
 
165
  seen.add(url)
166
  domain = domain_from_url(url)
167
  s["domain"] = domain or ""
168
+ if not domain: continue
169
+ if domain in BLACKLISTED_DOMAINS: continue
170
+ if domain not in TRUSTED_DOMAINS: continue
171
+ if is_unwanted_snippet(s.get("snippet","")) or is_unwanted_snippet(s.get("title","")): continue
 
 
 
172
  kept.append(s)
173
  return kept
174
 
175
+ def semantic_filter(claim: str, sources: List[Dict[str,str]], threshold=0.3):
176
+ if not SENTE_MODEL:
177
+ return sources
178
+ claim_emb = SENTE_MODEL.encode(claim, convert_to_tensor=True)
179
+ filtered = []
180
+ for s in sources:
181
+ snippet = s.get("snippet","")
182
+ if not snippet: continue
183
+ snippet_emb = SENTE_MODEL.encode(snippet, convert_to_tensor=True)
184
+ sim = util.cos_sim(claim_emb, snippet_emb).item()
185
+ if sim >= threshold:
186
+ filtered.append(s)
187
+ return filtered
188
 
189
+ # ---------------- Evidence summary ----------------
190
  def summarize_evidence(sources: List[Dict[str,str]], max_chars=800) -> str:
191
  if not sources:
192
  return "No credible news sources found."
 
201
  return res[:max_chars].rsplit(" ",1)[0] + "..."
202
  return res
203
 
204
+ # ---------------- Fusion ----------------
205
  def fuse_scores(fast_conf: float, deep_outcome: Optional[str], evidence_count: int) -> Dict[str,Any]:
206
  base = fast_conf*0.5 + min(evidence_count/5.0,1.0)*0.5
207
  if deep_outcome and deep_outcome.lower() in ["false","misleading"]:
208
  base *= 0.7
209
+ score = int(round(max(0, min(1, base)) * 100))
210
+ color = "green" if score >= 70 else "yellow" if score >= 40 else "red"
211
  return {"score":score, "color":color}
212
 
213
+ # ---------------- Fact Check API (placeholder) ----------------
214
+ def factcheck_claim(claim: str) -> Dict[str, Any]:
215
+ """
216
+ Query Google Fact Check Tools API to check the claim.
217
+ Requires GEMINI_API_KEY or your provided key in `GEMINI_API_KEY`.
218
+ """
219
+ api_key = "AIzaSyB0A-MIHs8qkjYTWE-TnoLw46KplX-Ihjs" # your key
220
+ url = "https://factchecktools.googleapis.com/v1alpha1/claims:search"
221
+ params = {"query": claim, "key": api_key, "languageCode": "en", "pageSize": 5}
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
222
 
 
 
223
  try:
 
 
224
  r = requests.get(url, params=params, headers=safe_headers(), timeout=6)
225
  r.raise_for_status()
226
  js = r.json()
227
+ claims = js.get("claims", [])
 
 
 
 
 
 
 
 
 
 
 
 
 
 
228
 
 
 
 
 
 
 
229
  results = []
230
+ for c in claims:
231
+ claimant = c.get("claimant", "Unknown")
232
+ text = c.get("text", "")
233
+ claimReview = c.get("claimReview", [])
234
+ for review in claimReview:
235
+ publisher = review.get("publisher", {}).get("name")
236
+ url = review.get("url")
237
+ title = review.get("title")
238
+ review_rating = review.get("textualRating")
239
+ results.append({
240
+ "claimant": claimant,
241
+ "text": text,
242
+ "publisher": publisher,
243
+ "url": url,
244
+ "title": title,
245
+ "rating": review_rating
246
+ })
247
+ outcome = "Unverified" if not results else results[0].get("rating", "Unverified")
248
+ return {"outcome": outcome, "source": results}
249
+ except Exception as e:
250
+ return {"outcome": "Error", "source": [], "error": str(e)}
251
+
252
 
253
+ # ---------------- API ----------------
254
+ @app.post("/verify")
255
+ async def verify(req: VerifyRequest):
256
+ claim = (req.text or "").strip()
257
+ mode = (req.mode or "fast").lower()
258
+ if not claim:
259
+ raise HTTPException(status_code=400, detail="Empty claim")
260
+
261
+ # Step 1 classify
262
  text_type_res = classify_text_type(claim)
263
+ stored_type = text_type_res["type"]
264
+
265
+ # Step 2 summarize
266
  user_summary = summarize_text(claim)
267
 
268
+ # Step 3 search
269
+ query = f"{user_summary} site:bbc.com OR site:cnn.com OR site:reuters.com OR site:apnews.com"
270
+ all_raw = fetch_gnews(query) + fetch_newsapi(query) + duckduckgo_search(query)
271
+
272
+ # Step 4 filter
273
  filtered = filter_sources(all_raw)
274
+ filtered = semantic_filter(claim, filtered)
275
+
276
  evidence_summary = summarize_evidence(filtered)
277
 
278
+ # Step 5 fast guess
279
+ fast_label = "Unverifiable"; fast_conf = 0.4
280
+ if ZS_PIPE:
 
 
 
 
 
 
 
 
 
 
 
281
  try:
282
+ cls = ZS_PIPE(claim, ["True","False","Misleading","Unverifiable"], multi_label=False, truncation=True)
283
+ fast_label = cls["labels"][0]
284
+ fast_conf = float(cls["scores"][0])
285
+ except Exception:
286
+ pass
287
+
288
+ # Step 6 deep (Gemini)
289
+ deep_result = None
290
+ if mode in ["deep","hybrid"]:
291
+ if GEMINI_CLIENT:
292
+ try:
293
+ prompt = f"""
294
+ Verify the following claim: "{claim}".
295
+ Provide JSON with keys: outcome, explanation, comparison (list), takeaways (list).
296
+ """
297
+ response = GEMINI_CLIENT.models.generate_content(
298
+ model="gemini-2.5-flash",
299
+ contents=prompt
300
+ )
301
+ deep_result = json.loads(response.text)
302
+ except Exception as e:
303
+ deep_result = {"outcome":"Unverifiable","explanation":f"Gemini API error: {str(e)}","takeaways":["Check credible sources"]}
304
+ else:
305
+ deep_result = {"outcome":"Unverifiable","explanation":"Demo mode: API missing","takeaways":["Check credible sources"]}
306
+
307
+ # Step 7 fact-check API
308
+ factcheck = factcheck_claim(claim)
309
+
310
+ # Step 8 fusion
311
  deep_outcome = deep_result.get("outcome") if deep_result else None
312
  fuse = fuse_scores(fast_conf, deep_outcome, len(filtered))
313
 
314
  return {
315
+ "claim": claim,
316
+ "text_type": stored_type,
317
+ "text_type_scores": text_type_res.get("scores", {}),
318
+ "user_summary": user_summary,
319
+ "fast": {"label": fast_label, "confidence": round(fast_conf,3)},
320
+ "evidence_count_raw": len(all_raw),
321
+ "evidence_count_filtered": len(filtered),
322
+ "evidence": filtered,
323
+ "evidence_summary": evidence_summary,
324
+ "deep": deep_result or {},
325
+ "factcheck": factcheck,
326
+ "credibility": fuse
327
  }
328
 
329
+ # ---------------- Frontend ----------------
330
+ @app.get("/", response_class=HTMLResponse)
331
+ def root():
332
+ with open("static/index.html","r",encoding="utf-8") as f:
333
+ return f.read()
334
+
335
+ if __name__ == "__main__":
336
+ import uvicorn
337
+ uvicorn.run("app:app", host="0.0.0.0", port=int(os.getenv("PORT","8000")), reload=True)