Initial commit: AI misinformation detector
Browse files- __pycache__/app.cpython-313.pyc +0 -0
- __pycache__/gcp_clients.cpython-313.pyc +0 -0
- __pycache__/prompting.cpython-313.pyc +0 -0
- __pycache__/retriever.cpython-313.pyc +0 -0
- app.py +244 -32
- gcp_clients.py +0 -32
- prompting.py +0 -16
- retriever.py +0 -13
- static/index.html +242 -0
__pycache__/app.cpython-313.pyc
ADDED
|
Binary file (2.57 kB). View file
|
|
|
__pycache__/gcp_clients.cpython-313.pyc
ADDED
|
Binary file (3.14 kB). View file
|
|
|
__pycache__/prompting.cpython-313.pyc
ADDED
|
Binary file (1.15 kB). View file
|
|
|
__pycache__/retriever.cpython-313.pyc
ADDED
|
Binary file (4.44 kB). View file
|
|
|
app.py
CHANGED
|
@@ -1,46 +1,258 @@
|
|
| 1 |
-
# app.py
|
| 2 |
import os
|
|
|
|
|
|
|
|
|
|
| 3 |
from fastapi import FastAPI, HTTPException
|
|
|
|
| 4 |
from pydantic import BaseModel
|
|
|
|
|
|
|
|
|
|
| 5 |
|
| 6 |
-
#
|
| 7 |
-
|
| 8 |
-
import
|
| 9 |
-
|
|
|
|
|
|
|
| 10 |
|
| 11 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 12 |
|
| 13 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 14 |
|
| 15 |
-
|
| 16 |
-
speech = gcp_clients.SpeechClient(mock=MOCK)
|
| 17 |
-
vertex = gcp_clients.VertexClient(mock=MOCK)
|
| 18 |
-
retr = retriever.Retriever(mock=MOCK)
|
| 19 |
|
| 20 |
-
|
| 21 |
-
image_url: str = None
|
| 22 |
-
audio_url: str = None
|
| 23 |
-
text: str = None
|
| 24 |
|
| 25 |
-
|
| 26 |
-
|
| 27 |
-
|
| 28 |
-
raise HTTPException(status_code=400, detail="Provide at least one of image_url, audio_url, or text")
|
| 29 |
|
| 30 |
-
|
| 31 |
-
|
| 32 |
-
evidence['vision'] = vision.annotate_image(req.image_url)
|
| 33 |
-
if req.audio_url:
|
| 34 |
-
evidence['speech'] = speech.transcribe_audio(req.audio_url)
|
| 35 |
-
claim_text = req.text or evidence.get('vision', {}).get('ocr_text')
|
| 36 |
|
| 37 |
-
|
| 38 |
-
|
| 39 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 40 |
|
| 41 |
return {
|
| 42 |
-
"claim":
|
| 43 |
-
"
|
| 44 |
-
"
|
| 45 |
-
"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 46 |
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
import os
|
| 2 |
+
import json
|
| 3 |
+
import re
|
| 4 |
+
from typing import List, Dict, Any, Optional
|
| 5 |
from fastapi import FastAPI, HTTPException
|
| 6 |
+
from fastapi.responses import HTMLResponse
|
| 7 |
from pydantic import BaseModel
|
| 8 |
+
from dotenv import load_dotenv
|
| 9 |
+
import requests
|
| 10 |
+
from bs4 import BeautifulSoup
|
| 11 |
|
| 12 |
+
# Optional heavy libs
|
| 13 |
+
try:
|
| 14 |
+
from sentence_transformers import SentenceTransformer, util
|
| 15 |
+
SENTE_MODEL = SentenceTransformer("all-mpnet-base-v2")
|
| 16 |
+
except Exception:
|
| 17 |
+
SENTE_MODEL = None
|
| 18 |
|
| 19 |
+
try:
|
| 20 |
+
from transformers import pipeline
|
| 21 |
+
ZS_PIPE = pipeline("zero-shot-classification", model="facebook/bart-large-mnli")
|
| 22 |
+
except Exception:
|
| 23 |
+
ZS_PIPE = None
|
| 24 |
|
| 25 |
+
load_dotenv()
|
| 26 |
+
GNEWS_API_KEY = os.getenv("GNEWS_API_KEY")
|
| 27 |
+
NEWSORG_API_KEY = os.getenv("NEWSORG_API_KEY")
|
| 28 |
+
OPENAI_API_KEY = os.getenv("OPENAI_API_KEY")
|
| 29 |
+
GEMINI_API_KEY = os.getenv("GEMINI_API_KEY") # placeholder
|
| 30 |
|
| 31 |
+
app = FastAPI(title="Hybrid Misinformation Detector")
|
|
|
|
|
|
|
|
|
|
| 32 |
|
| 33 |
+
# ---------------- Models ----------------
|
|
|
|
|
|
|
|
|
|
| 34 |
|
| 35 |
+
class VerifyRequest(BaseModel):
|
| 36 |
+
text: str
|
| 37 |
+
mode: Optional[str] = "fast" # fast, deep, hybrid
|
|
|
|
| 38 |
|
| 39 |
+
def safe_headers():
|
| 40 |
+
return {"User-Agent": "misinfo-tool/1.0 (+https://example.com)"}
|
|
|
|
|
|
|
|
|
|
|
|
|
| 41 |
|
| 42 |
+
# ---------------- NLP classify ----------------
|
| 43 |
+
def classify_text_type(text: str) -> Dict[str, Any]:
|
| 44 |
+
labels = ["news","rumor","fact","opinion","satire","unverifiable"]
|
| 45 |
+
if ZS_PIPE:
|
| 46 |
+
try:
|
| 47 |
+
res = ZS_PIPE(text, labels, multi_label=False, truncation=True)
|
| 48 |
+
label = res["labels"][0]
|
| 49 |
+
score = float(res["scores"][0])
|
| 50 |
+
return {"type": label, "score": round(score,3), "scores": dict(zip(res["labels"], res["scores"]))}
|
| 51 |
+
except Exception:
|
| 52 |
+
pass
|
| 53 |
+
t = text.lower()
|
| 54 |
+
if any(k in t for k in ["according to","reported","breaking","news","announced"]):
|
| 55 |
+
return {"type":"news","score":0.65,"scores":{}}
|
| 56 |
+
if any(k in t for k in ["i think","in my opinion","i believe","should"]):
|
| 57 |
+
return {"type":"opinion","score":0.7,"scores":{}}
|
| 58 |
+
if any(k in t for k in ["joke","satire","not real","parody"]):
|
| 59 |
+
return {"type":"satire","score":0.7,"scores":{}}
|
| 60 |
+
if any(k in t for k in ["study shows","research","published","peer-reviewed"]):
|
| 61 |
+
return {"type":"fact","score":0.6,"scores":{}}
|
| 62 |
+
return {"type":"rumor","score":0.45,"scores":{}}
|
| 63 |
+
|
| 64 |
+
# ---------------- Summarize ----------------
|
| 65 |
+
def summarize_text(text: str, max_len=300) -> str:
|
| 66 |
+
sentences = re.split(r'(?<=[.!?]) +', text.strip())
|
| 67 |
+
summary = sentences[0] if sentences else text
|
| 68 |
+
if len(summary) > max_len:
|
| 69 |
+
summary = summary[:max_len].rsplit(' ',1)[0] + "..."
|
| 70 |
+
return summary
|
| 71 |
+
|
| 72 |
+
# ---------------- Search helpers ----------------
|
| 73 |
+
def fetch_gnews(query: str, max_results=6) -> List[Dict[str,str]]:
|
| 74 |
+
if not GNEWS_API_KEY:
|
| 75 |
+
return []
|
| 76 |
+
try:
|
| 77 |
+
url = "https://gnews.io/api/v4/search"
|
| 78 |
+
params = {"q": query, "token": GNEWS_API_KEY, "max": max_results, "lang":"en"}
|
| 79 |
+
r = requests.get(url, params=params, headers=safe_headers(), timeout=6)
|
| 80 |
+
r.raise_for_status()
|
| 81 |
+
js = r.json()
|
| 82 |
+
return [{"title": a.get("title"), "url": a.get("url"), "source": a.get("source",{}).get("name"), "snippet": a.get("description")} for a in js.get("articles", [])[:max_results]]
|
| 83 |
+
except Exception:
|
| 84 |
+
return []
|
| 85 |
+
|
| 86 |
+
def fetch_newsapi(query: str, max_results=6) -> List[Dict[str,str]]:
|
| 87 |
+
if not NEWSORG_API_KEY:
|
| 88 |
+
return []
|
| 89 |
+
try:
|
| 90 |
+
url = "https://newsapi.org/v2/everything"
|
| 91 |
+
params = {"q": query, "pageSize": max_results, "apiKey": NEWSORG_API_KEY, "language":"en"}
|
| 92 |
+
r = requests.get(url, params=params, headers=safe_headers(), timeout=6)
|
| 93 |
+
r.raise_for_status()
|
| 94 |
+
js = r.json()
|
| 95 |
+
return [{"title": a.get("title"), "url": a.get("url"), "source": a.get("source",{}).get("name"), "snippet": a.get("description")} for a in js.get("articles", [])[:max_results]]
|
| 96 |
+
except Exception:
|
| 97 |
+
return []
|
| 98 |
+
|
| 99 |
+
def duckduckgo_search(query: str, max_results=8) -> List[Dict[str,str]]:
|
| 100 |
+
try:
|
| 101 |
+
url = "https://html.duckduckgo.com/html/"
|
| 102 |
+
r = requests.post(url, data={"q": query}, headers=safe_headers(), timeout=6)
|
| 103 |
+
r.raise_for_status()
|
| 104 |
+
soup = BeautifulSoup(r.text, "html.parser")
|
| 105 |
+
results = []
|
| 106 |
+
for res in soup.select(".result__a")[:max_results]:
|
| 107 |
+
title = res.get_text()
|
| 108 |
+
href = res.get("href")
|
| 109 |
+
snippet_node = res.find_parent().select_one(".result__snippet")
|
| 110 |
+
snippet = snippet_node.get_text() if snippet_node else ""
|
| 111 |
+
results.append({"title": title, "url": href, "source":None, "snippet": snippet})
|
| 112 |
+
return results
|
| 113 |
+
except Exception:
|
| 114 |
+
return []
|
| 115 |
+
|
| 116 |
+
# ---------------- Filtering ----------------
|
| 117 |
+
TRUSTED_DOMAINS = {
|
| 118 |
+
"bbc.co.uk","bbc.com","cnn.com","nytimes.com","reuters.com","apnews.com",
|
| 119 |
+
"theguardian.com","npr.org","washingtonpost.com","wsj.com","usatoday.com",
|
| 120 |
+
"bloomberg.com","aljazeera.com","msnbc.com","cnbc.com","foxnews.com"
|
| 121 |
+
}
|
| 122 |
+
|
| 123 |
+
UNWANTED_KEYWORDS = [
|
| 124 |
+
"movie","film","trailer","episode","comic","manga","fan","fandom","imdb",
|
| 125 |
+
"review","tv series","fiction","novel","fantasy","screenplay","actor","actress"
|
| 126 |
+
]
|
| 127 |
+
|
| 128 |
+
def domain_from_url(url: str) -> Optional[str]:
|
| 129 |
+
if not url: return None
|
| 130 |
+
try:
|
| 131 |
+
m = re.search(r"https?://(?:www\.)?([^/]+)/?", url)
|
| 132 |
+
if m:
|
| 133 |
+
domain = m.group(1).lower()
|
| 134 |
+
parts = domain.split('.')
|
| 135 |
+
if len(parts) > 2:
|
| 136 |
+
domain = ".".join(parts[-2:])
|
| 137 |
+
return domain
|
| 138 |
+
except Exception:
|
| 139 |
+
return None
|
| 140 |
+
return None
|
| 141 |
+
|
| 142 |
+
def is_unwanted_snippet(snippet: str) -> bool:
|
| 143 |
+
if not snippet: return False
|
| 144 |
+
s = snippet.lower()
|
| 145 |
+
return any(k in s for k in UNWANTED_KEYWORDS)
|
| 146 |
+
|
| 147 |
+
def filter_sources(sources: List[Dict[str,str]]) -> List[Dict[str,str]]:
|
| 148 |
+
kept, seen = [], set()
|
| 149 |
+
for s in sources:
|
| 150 |
+
url = s.get("url") or ""
|
| 151 |
+
if not url or url in seen: continue
|
| 152 |
+
seen.add(url)
|
| 153 |
+
domain = domain_from_url(url)
|
| 154 |
+
s["domain"] = domain or ""
|
| 155 |
+
if domain in TRUSTED_DOMAINS:
|
| 156 |
+
kept.append(s); continue
|
| 157 |
+
if domain and any(d in domain for d in ["imdb.com","youtube.com","wikipedia.org","fandom.com","comicbook.com"]):
|
| 158 |
+
continue
|
| 159 |
+
if is_unwanted_snippet(s.get("snippet","")) or is_unwanted_snippet(s.get("title","")):
|
| 160 |
+
continue
|
| 161 |
+
kept.append(s)
|
| 162 |
+
return kept
|
| 163 |
+
|
| 164 |
+
# ---------------- Evidence summary ----------------
|
| 165 |
+
def summarize_evidence(sources: List[Dict[str,str]], max_chars=800) -> str:
|
| 166 |
+
if not sources:
|
| 167 |
+
return "No credible news sources found."
|
| 168 |
+
parts = []
|
| 169 |
+
for s in sources[:8]:
|
| 170 |
+
t = s.get("title") or ""
|
| 171 |
+
snip = s.get("snippet") or ""
|
| 172 |
+
domain = s.get("domain") or domain_from_url(s.get("url","")) or ""
|
| 173 |
+
parts.append(f"{t} ({domain}) β {snip}")
|
| 174 |
+
res = "\n".join(parts)
|
| 175 |
+
if len(res) > max_chars:
|
| 176 |
+
return res[:max_chars].rsplit(" ",1)[0] + "..."
|
| 177 |
+
return res
|
| 178 |
+
|
| 179 |
+
# ---------------- Fusion ----------------
|
| 180 |
+
def fuse_scores(fast_conf: float, deep_outcome: Optional[str], evidence_count: int) -> Dict[str,Any]:
|
| 181 |
+
base = fast_conf*0.5 + min(evidence_count/5.0,1.0)*0.5
|
| 182 |
+
if deep_outcome and deep_outcome.lower() in ["false","misleading"]:
|
| 183 |
+
base *= 0.7
|
| 184 |
+
score = int(round(max(0, min(1, base)) * 100))
|
| 185 |
+
color = "green" if score >= 70 else "yellow" if score >= 40 else "red"
|
| 186 |
+
return {"score":score, "color":color}
|
| 187 |
+
|
| 188 |
+
# ---------------- API ----------------
|
| 189 |
+
@app.post("/verify")
|
| 190 |
+
async def verify(req: VerifyRequest):
|
| 191 |
+
claim = (req.text or "").strip()
|
| 192 |
+
mode = (req.mode or "fast").lower()
|
| 193 |
+
if not claim:
|
| 194 |
+
raise HTTPException(status_code=400, detail="Empty claim")
|
| 195 |
+
|
| 196 |
+
# Step 1 classify
|
| 197 |
+
text_type_res = classify_text_type(claim)
|
| 198 |
+
stored_type = text_type_res["type"]
|
| 199 |
+
|
| 200 |
+
# Step 2 summarize
|
| 201 |
+
user_summary = summarize_text(claim)
|
| 202 |
+
|
| 203 |
+
# Step 3 search
|
| 204 |
+
query = user_summary
|
| 205 |
+
all_raw = fetch_gnews(query) + fetch_newsapi(query) + duckduckgo_search(query)
|
| 206 |
+
|
| 207 |
+
# Step 4 filter
|
| 208 |
+
filtered = filter_sources(all_raw)
|
| 209 |
+
evidence_summary = summarize_evidence(filtered)
|
| 210 |
+
|
| 211 |
+
# Step 5 fast guess
|
| 212 |
+
fast_label = "Unverifiable"; fast_conf = 0.4
|
| 213 |
+
if ZS_PIPE:
|
| 214 |
+
try:
|
| 215 |
+
cls = ZS_PIPE(claim, ["True","False","Misleading","Unverifiable"], multi_label=False, truncation=True)
|
| 216 |
+
fast_label = cls["labels"][0]
|
| 217 |
+
fast_conf = float(cls["scores"][0])
|
| 218 |
+
except Exception:
|
| 219 |
+
pass
|
| 220 |
+
|
| 221 |
+
# Step 6 deep (simplified fallback)
|
| 222 |
+
deep_result = None
|
| 223 |
+
if mode in ["deep","hybrid"]:
|
| 224 |
+
deep_result = {
|
| 225 |
+
"outcome":"Unverifiable",
|
| 226 |
+
"explanation":"Demo mode: Deep reasoning not configured (missing API key).",
|
| 227 |
+
"comparison":[],
|
| 228 |
+
"takeaways":["Search credible sources","Cross-check claims","Beware sensational headlines"],
|
| 229 |
+
"tip":"Look for multiple reputable outlets."
|
| 230 |
+
}
|
| 231 |
+
|
| 232 |
+
# Step 7 fusion
|
| 233 |
+
deep_outcome = deep_result.get("outcome") if deep_result else None
|
| 234 |
+
fuse = fuse_scores(fast_conf, deep_outcome, len(filtered))
|
| 235 |
|
| 236 |
return {
|
| 237 |
+
"claim": claim,
|
| 238 |
+
"text_type": stored_type,
|
| 239 |
+
"text_type_scores": text_type_res.get("scores", {}),
|
| 240 |
+
"user_summary": user_summary,
|
| 241 |
+
"fast": {"label": fast_label, "confidence": round(fast_conf,3)},
|
| 242 |
+
"evidence_count_raw": len(all_raw),
|
| 243 |
+
"evidence_count_filtered": len(filtered),
|
| 244 |
+
"evidence": filtered,
|
| 245 |
+
"evidence_summary": evidence_summary,
|
| 246 |
+
"deep": deep_result or {},
|
| 247 |
+
"credibility": fuse
|
| 248 |
}
|
| 249 |
+
|
| 250 |
+
# ---------------- Frontend ----------------
|
| 251 |
+
@app.get("/", response_class=HTMLResponse)
|
| 252 |
+
def root():
|
| 253 |
+
with open("static/index.html","r",encoding="utf-8") as f:
|
| 254 |
+
return f.read()
|
| 255 |
+
|
| 256 |
+
if __name__ == "__main__":
|
| 257 |
+
import uvicorn
|
| 258 |
+
uvicorn.run("app:app", host="0.0.0.0", port=int(os.getenv("PORT","8000")), reload=True)
|
gcp_clients.py
DELETED
|
@@ -1,32 +0,0 @@
|
|
| 1 |
-
import os
|
| 2 |
-
|
| 3 |
-
MOCK = os.getenv("MOCK_MODE", "true").lower() in ("1", "true", "yes")
|
| 4 |
-
|
| 5 |
-
class VisionClient:
|
| 6 |
-
def __init__(self, mock=MOCK):
|
| 7 |
-
self.mock = mock
|
| 8 |
-
def annotate_image(self, image_uri):
|
| 9 |
-
if self.mock:
|
| 10 |
-
return {"labels": ["person", "phone"], "ocr_text": "Mock OCR: PM signs bill"}
|
| 11 |
-
return {}
|
| 12 |
-
|
| 13 |
-
class SpeechClient:
|
| 14 |
-
def __init__(self, mock=MOCK):
|
| 15 |
-
self.mock = mock
|
| 16 |
-
def transcribe_audio(self, audio_uri):
|
| 17 |
-
if self.mock:
|
| 18 |
-
return {"transcript": "Mock transcription of audio", "confidence": 0.95}
|
| 19 |
-
return {}
|
| 20 |
-
|
| 21 |
-
class VertexClient:
|
| 22 |
-
def __init__(self, mock=MOCK):
|
| 23 |
-
self.mock = mock
|
| 24 |
-
def predict(self, prompt):
|
| 25 |
-
if self.mock:
|
| 26 |
-
return {
|
| 27 |
-
"verdict": "likely_misinformation",
|
| 28 |
-
"confidence": 0.85,
|
| 29 |
-
"reasons": ["Single source", "Sensational language"],
|
| 30 |
-
"actions": ["Reverse image search", "Check fact-checkers"]
|
| 31 |
-
}
|
| 32 |
-
return {}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
prompting.py
DELETED
|
@@ -1,16 +0,0 @@
|
|
| 1 |
-
def build_vertex_prompt(evidence, search_results, claim_text):
|
| 2 |
-
return f"""
|
| 3 |
-
SYSTEM: You are Atlas, a misinformation detector assistant.
|
| 4 |
-
INPUT:
|
| 5 |
-
Evidence: {evidence}
|
| 6 |
-
Search results: {search_results}
|
| 7 |
-
Claim: {claim_text}
|
| 8 |
-
|
| 9 |
-
TASK:
|
| 10 |
-
1) Summarize the claim.
|
| 11 |
-
2) List supporting/contradicting evidence.
|
| 12 |
-
3) Give a verdict: likely_misinformation | likely_credible | uncertain.
|
| 13 |
-
4) Suggest 2-3 verification actions.
|
| 14 |
-
|
| 15 |
-
OUTPUT: JSON only.
|
| 16 |
-
"""
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
retriever.py
DELETED
|
@@ -1,13 +0,0 @@
|
|
| 1 |
-
import os
|
| 2 |
-
MOCK = os.getenv("MOCK_MODE", "true").lower() in ("1", "true", "yes")
|
| 3 |
-
|
| 4 |
-
class Retriever:
|
| 5 |
-
def __init__(self, mock=MOCK):
|
| 6 |
-
self.mock = mock
|
| 7 |
-
def search_claim(self, claim_text):
|
| 8 |
-
if self.mock:
|
| 9 |
-
return [
|
| 10 |
-
{"title": "Mocked refuting article", "url": "https://example.com/refute", "snippet": "This article refutes the claim."},
|
| 11 |
-
{"title": "Mocked supporting article", "url": "https://example.com/support", "snippet": "This article supports the claim."}
|
| 12 |
-
]
|
| 13 |
-
return []
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
static/index.html
ADDED
|
@@ -0,0 +1,242 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
<!doctype html>
|
| 2 |
+
<html lang="en">
|
| 3 |
+
<head>
|
| 4 |
+
<meta charset="utf-8" />
|
| 5 |
+
<meta name="viewport" content="width=device-width,initial-scale=1" />
|
| 6 |
+
<title>Misinformation Detective β Demo</title>
|
| 7 |
+
|
| 8 |
+
<style>
|
| 9 |
+
:root {
|
| 10 |
+
--primary: #0066cc;
|
| 11 |
+
--bg: #f9fafc;
|
| 12 |
+
--card-bg: #fff;
|
| 13 |
+
--border: #ddd;
|
| 14 |
+
--text: #222;
|
| 15 |
+
--muted: #666;
|
| 16 |
+
}
|
| 17 |
+
body {
|
| 18 |
+
margin: 0;
|
| 19 |
+
font-family: system-ui, sans-serif;
|
| 20 |
+
background: var(--bg);
|
| 21 |
+
color: var(--text);
|
| 22 |
+
line-height: 1.5;
|
| 23 |
+
}
|
| 24 |
+
.container {
|
| 25 |
+
max-width: 800px;
|
| 26 |
+
margin: 0 auto;
|
| 27 |
+
padding: 1.5rem;
|
| 28 |
+
}
|
| 29 |
+
header {
|
| 30 |
+
text-align: center;
|
| 31 |
+
margin-bottom: 1.5rem;
|
| 32 |
+
}
|
| 33 |
+
header h1 {
|
| 34 |
+
margin: 0;
|
| 35 |
+
font-size: 1.8rem;
|
| 36 |
+
}
|
| 37 |
+
header .tagline {
|
| 38 |
+
color: var(--muted);
|
| 39 |
+
font-size: 0.95rem;
|
| 40 |
+
}
|
| 41 |
+
.card {
|
| 42 |
+
background: var(--card-bg);
|
| 43 |
+
border: 1px solid var(--border);
|
| 44 |
+
border-radius: 6px;
|
| 45 |
+
padding: 1rem;
|
| 46 |
+
margin: 1rem 0;
|
| 47 |
+
box-shadow: 0 2px 4px rgba(0,0,0,0.05);
|
| 48 |
+
}
|
| 49 |
+
.card h3 {
|
| 50 |
+
margin-top: 0;
|
| 51 |
+
font-size: 1.1rem;
|
| 52 |
+
border-bottom: 1px solid var(--border);
|
| 53 |
+
padding-bottom: 0.25rem;
|
| 54 |
+
}
|
| 55 |
+
textarea {
|
| 56 |
+
width: 100%;
|
| 57 |
+
height: 80px;
|
| 58 |
+
padding: 0.5rem;
|
| 59 |
+
font-family: inherit;
|
| 60 |
+
font-size: 1rem;
|
| 61 |
+
border-radius: 4px;
|
| 62 |
+
border: 1px solid var(--border);
|
| 63 |
+
resize: vertical;
|
| 64 |
+
}
|
| 65 |
+
.controls {
|
| 66 |
+
display: flex;
|
| 67 |
+
justify-content: space-between;
|
| 68 |
+
align-items: center;
|
| 69 |
+
flex-wrap: wrap;
|
| 70 |
+
margin-top: 0.5rem;
|
| 71 |
+
}
|
| 72 |
+
.modes button {
|
| 73 |
+
margin-right: 0.5rem;
|
| 74 |
+
}
|
| 75 |
+
button {
|
| 76 |
+
cursor: pointer;
|
| 77 |
+
padding: 0.5rem 0.75rem;
|
| 78 |
+
border: 1px solid var(--border);
|
| 79 |
+
border-radius: 4px;
|
| 80 |
+
background: #f0f0f0;
|
| 81 |
+
font-size: 0.9rem;
|
| 82 |
+
}
|
| 83 |
+
button.primary {
|
| 84 |
+
background: var(--primary);
|
| 85 |
+
color: white;
|
| 86 |
+
border: none;
|
| 87 |
+
}
|
| 88 |
+
button.active {
|
| 89 |
+
background: var(--primary);
|
| 90 |
+
color: white;
|
| 91 |
+
}
|
| 92 |
+
.demo-buttons {
|
| 93 |
+
margin-top: 0.5rem;
|
| 94 |
+
}
|
| 95 |
+
.demo {
|
| 96 |
+
margin-right: 0.3rem;
|
| 97 |
+
margin-top: 0.3rem;
|
| 98 |
+
}
|
| 99 |
+
footer {
|
| 100 |
+
margin-top: 2rem;
|
| 101 |
+
text-align: center;
|
| 102 |
+
color: var(--muted);
|
| 103 |
+
font-size: 0.8rem;
|
| 104 |
+
}
|
| 105 |
+
pre {
|
| 106 |
+
white-space: pre-wrap;
|
| 107 |
+
word-wrap: break-word;
|
| 108 |
+
font-size: 0.85rem;
|
| 109 |
+
background: #f7f7f7;
|
| 110 |
+
padding: 0.5rem;
|
| 111 |
+
border-radius: 4px;
|
| 112 |
+
}
|
| 113 |
+
.loading {
|
| 114 |
+
text-align: center;
|
| 115 |
+
padding: 2rem;
|
| 116 |
+
font-weight: bold;
|
| 117 |
+
}
|
| 118 |
+
.error {
|
| 119 |
+
color: red;
|
| 120 |
+
font-weight: bold;
|
| 121 |
+
}
|
| 122 |
+
ul {
|
| 123 |
+
padding-left: 1.2rem;
|
| 124 |
+
}
|
| 125 |
+
li {
|
| 126 |
+
margin-bottom: 0.3rem;
|
| 127 |
+
}
|
| 128 |
+
</style>
|
| 129 |
+
</head>
|
| 130 |
+
<body>
|
| 131 |
+
<main class="container">
|
| 132 |
+
<header>
|
| 133 |
+
<h1>Misinformation Detective</h1>
|
| 134 |
+
<p class="tagline">Quick check + deep evidence-based explanation with highlighted comparisons.</p>
|
| 135 |
+
</header>
|
| 136 |
+
|
| 137 |
+
<section class="card input-card">
|
| 138 |
+
<label for="claim">Paste a claim or headline</label>
|
| 139 |
+
<textarea id="claim" placeholder="E.g. 'Alien is landed on earth'"></textarea>
|
| 140 |
+
|
| 141 |
+
<div class="controls">
|
| 142 |
+
<div class="modes">
|
| 143 |
+
<button id="mode-fast" class="mode active">Fast β‘</button>
|
| 144 |
+
<button id="mode-deep" class="mode">Deep π</button>
|
| 145 |
+
<button id="mode-hybrid" class="mode">Hybrid π</button>
|
| 146 |
+
</div>
|
| 147 |
+
<div class="actions">
|
| 148 |
+
<button id="verify-btn" class="primary">Verify Claim</button>
|
| 149 |
+
<div class="demo-buttons">
|
| 150 |
+
<button class="demo" data-claim="Drinking lemon cures cancer">Health example</button>
|
| 151 |
+
<button class="demo" data-claim="Scientists confirm coffee extends life by 10 years">Study example</button>
|
| 152 |
+
<button class="demo" data-claim="Alien is landed on earth">UFO example</button>
|
| 153 |
+
</div>
|
| 154 |
+
</div>
|
| 155 |
+
</div>
|
| 156 |
+
</section>
|
| 157 |
+
|
| 158 |
+
<section id="pipeline" class="pipeline">
|
| 159 |
+
<!-- Cards injected dynamically -->
|
| 160 |
+
</section>
|
| 161 |
+
|
| 162 |
+
<footer>
|
| 163 |
+
<small>Designed for hackathon demo β resilient, educational, and transparent pipeline.</small>
|
| 164 |
+
</footer>
|
| 165 |
+
</main>
|
| 166 |
+
|
| 167 |
+
<script>
|
| 168 |
+
const claimInput = document.getElementById("claim");
|
| 169 |
+
const verifyBtn = document.getElementById("verify-btn");
|
| 170 |
+
const pipeline = document.getElementById("pipeline");
|
| 171 |
+
let mode = "fast";
|
| 172 |
+
|
| 173 |
+
// Mode toggle
|
| 174 |
+
document.querySelectorAll(".mode").forEach(btn => {
|
| 175 |
+
btn.addEventListener("click", () => {
|
| 176 |
+
document.querySelectorAll(".mode").forEach(b => b.classList.remove("active"));
|
| 177 |
+
btn.classList.add("active");
|
| 178 |
+
mode = btn.id.replace("mode-","");
|
| 179 |
+
});
|
| 180 |
+
});
|
| 181 |
+
|
| 182 |
+
// Demo claims
|
| 183 |
+
document.querySelectorAll(".demo").forEach(btn => {
|
| 184 |
+
btn.addEventListener("click", () => {
|
| 185 |
+
claimInput.value = btn.dataset.claim;
|
| 186 |
+
});
|
| 187 |
+
});
|
| 188 |
+
|
| 189 |
+
function addCard(title, content) {
|
| 190 |
+
const card = document.createElement("section");
|
| 191 |
+
card.className = "card";
|
| 192 |
+
card.innerHTML = `<h3>${title}</h3><div>${content}</div>`;
|
| 193 |
+
pipeline.appendChild(card);
|
| 194 |
+
}
|
| 195 |
+
|
| 196 |
+
function renderComparison(list) {
|
| 197 |
+
if (!list || !list.length) return "<em>No comparisons available</em>";
|
| 198 |
+
return "<ul>" + list.map(c =>
|
| 199 |
+
`<li><strong>${c.claim_span}</strong> β <q>${c.evidence_span}</q> <em>[${c.relation}]</em></li>`
|
| 200 |
+
).join("") + "</ul>";
|
| 201 |
+
}
|
| 202 |
+
|
| 203 |
+
verifyBtn.addEventListener("click", async () => {
|
| 204 |
+
const text = claimInput.value.trim();
|
| 205 |
+
if (!text) return alert("Please enter a claim.");
|
| 206 |
+
pipeline.innerHTML = "<p class='loading'>Checking claim...</p>";
|
| 207 |
+
|
| 208 |
+
try {
|
| 209 |
+
const res = await fetch("/verify", {
|
| 210 |
+
method: "POST",
|
| 211 |
+
headers: {"Content-Type":"application/json"},
|
| 212 |
+
body: JSON.stringify({text, mode})
|
| 213 |
+
});
|
| 214 |
+
const data = await res.json();
|
| 215 |
+
pipeline.innerHTML = "";
|
| 216 |
+
|
| 217 |
+
addCard("Step 1 β Classification", `<p>Type: <b>${data.text_type}</b></p>`);
|
| 218 |
+
addCard("Step 2 β Summary", `<p>${data.user_summary}</p>`);
|
| 219 |
+
addCard("Step 3 β Search Results", `<pre>${data.evidence_summary}</pre>`);
|
| 220 |
+
addCard("Step 4 β Evidence Filtered", `<p>Found ${data.evidence_count_filtered} credible sources</p>`);
|
| 221 |
+
addCard("Step 5 β Fast Label", `<p>${data.fast.label} (conf ${data.fast.confidence})</p>`);
|
| 222 |
+
addCard("Step 6 β Similarity Score", `<p>${data.similarity}</p>`);
|
| 223 |
+
|
| 224 |
+
if (data.deep && Object.keys(data.deep).length > 0) {
|
| 225 |
+
addCard("Step 7 β Deep AI Verdict",
|
| 226 |
+
`<p><b>Outcome:</b> ${data.deep.outcome}</p>
|
| 227 |
+
<p>${data.deep.explanation}</p>
|
| 228 |
+
<h4>Comparison</h4>${renderComparison(data.deep.comparison)}
|
| 229 |
+
<h4>Takeaways</h4><ul>${data.deep.takeaways.map(t=>`<li>${t}</li>`).join("")}</ul>
|
| 230 |
+
<p><i>Tip:</i> ${data.deep.tip}</p>`);
|
| 231 |
+
}
|
| 232 |
+
|
| 233 |
+
addCard("Step 8 β Credibility Score",
|
| 234 |
+
`<p>Score: <b style="color:${data.credibility.color}">${data.credibility.score}</b>/100</p>`);
|
| 235 |
+
} catch (err) {
|
| 236 |
+
console.error(err);
|
| 237 |
+
pipeline.innerHTML = "<p class='error'>Error verifying claim.</p>";
|
| 238 |
+
}
|
| 239 |
+
});
|
| 240 |
+
</script>
|
| 241 |
+
</body>
|
| 242 |
+
</html>
|