Spaces:

mdAmin313
/

atlas

Paused

@@ -1,46 +1,258 @@
-# app.py
 import os
 from fastapi import FastAPI, HTTPException
 from pydantic import BaseModel
-# Import internal modules
-import gcp_clients
-import retriever
-import prompting
-app = FastAPI(title="Atlas Engine – Misinformation Detector")
-MOCK = os.getenv("MOCK_MODE", "true").lower() in ("1", "true", "yes")
-vision = gcp_clients.VisionClient(mock=MOCK)
-speech = gcp_clients.SpeechClient(mock=MOCK)
-vertex = gcp_clients.VertexClient(mock=MOCK)
-retr = retriever.Retriever(mock=MOCK)
-class AnalyzeRequest(BaseModel):
-    image_url: str = None
-    audio_url: str = None
-    text: str = None
-@app.post("/analyze")
-async def analyze(req: AnalyzeRequest):
-    if not (req.image_url or req.audio_url or req.text):
-        raise HTTPException(status_code=400, detail="Provide at least one of image_url, audio_url, or text")
-    evidence = {}
-    if req.image_url:
-        evidence['vision'] = vision.annotate_image(req.image_url)
-    if req.audio_url:
-        evidence['speech'] = speech.transcribe_audio(req.audio_url)
-    claim_text = req.text or evidence.get('vision', {}).get('ocr_text')
-    search_results = retr.search_claim(claim_text or "")
-    prompt = prompting.build_vertex_prompt(evidence, search_results, claim_text or "")
-    vertex_response = vertex.predict(prompt)
     return {
-        "claim": claim_text,
-        "evidence": evidence,
-        "search_results": search_results,
-        "vertex_response": vertex_response
     }

 import os
+import json
+import re
+from typing import List, Dict, Any, Optional
 from fastapi import FastAPI, HTTPException
+from fastapi.responses import HTMLResponse
 from pydantic import BaseModel
+from dotenv import load_dotenv
+import requests
+from bs4 import BeautifulSoup
+# Optional heavy libs
+try:
+    from sentence_transformers import SentenceTransformer, util
+    SENTE_MODEL = SentenceTransformer("all-mpnet-base-v2")
+except Exception:
+    SENTE_MODEL = None
+try:
+    from transformers import pipeline
+    ZS_PIPE = pipeline("zero-shot-classification", model="facebook/bart-large-mnli")
+except Exception:
+    ZS_PIPE = None
+load_dotenv()
+GNEWS_API_KEY = os.getenv("GNEWS_API_KEY")
+NEWSORG_API_KEY = os.getenv("NEWSORG_API_KEY")
+OPENAI_API_KEY = os.getenv("OPENAI_API_KEY")
+GEMINI_API_KEY = os.getenv("GEMINI_API_KEY")  # placeholder
+app = FastAPI(title="Hybrid Misinformation Detector")
+# ---------------- Models ----------------
+class VerifyRequest(BaseModel):
+    text: str
+    mode: Optional[str] = "fast"  # fast, deep, hybrid
+def safe_headers():
+    return {"User-Agent": "misinfo-tool/1.0 (+https://example.com)"}
+# ---------------- NLP classify ----------------
+def classify_text_type(text: str) -> Dict[str, Any]:
+    labels = ["news","rumor","fact","opinion","satire","unverifiable"]
+    if ZS_PIPE:
+        try:
+            res = ZS_PIPE(text, labels, multi_label=False, truncation=True)
+            label = res["labels"][0]
+            score = float(res["scores"][0])
+            return {"type": label, "score": round(score,3), "scores": dict(zip(res["labels"], res["scores"]))}
+        except Exception:
+            pass
+    t = text.lower()
+    if any(k in t for k in ["according to","reported","breaking","news","announced"]):
+        return {"type":"news","score":0.65,"scores":{}}
+    if any(k in t for k in ["i think","in my opinion","i believe","should"]):
+        return {"type":"opinion","score":0.7,"scores":{}}
+    if any(k in t for k in ["joke","satire","not real","parody"]):
+        return {"type":"satire","score":0.7,"scores":{}}
+    if any(k in t for k in ["study shows","research","published","peer-reviewed"]):
+        return {"type":"fact","score":0.6,"scores":{}}
+    return {"type":"rumor","score":0.45,"scores":{}}
+# ---------------- Summarize ----------------
+def summarize_text(text: str, max_len=300) -> str:
+    sentences = re.split(r'(?<=[.!?]) +', text.strip())
+    summary = sentences[0] if sentences else text
+    if len(summary) > max_len:
+        summary = summary[:max_len].rsplit(' ',1)[0] + "..."
+    return summary
+# ---------------- Search helpers ----------------
+def fetch_gnews(query: str, max_results=6) -> List[Dict[str,str]]:
+    if not GNEWS_API_KEY:
+        return []
+    try:
+        url = "https://gnews.io/api/v4/search"
+        params = {"q": query, "token": GNEWS_API_KEY, "max": max_results, "lang":"en"}
+        r = requests.get(url, params=params, headers=safe_headers(), timeout=6)
+        r.raise_for_status()
+        js = r.json()
+        return [{"title": a.get("title"), "url": a.get("url"), "source": a.get("source",{}).get("name"), "snippet": a.get("description")} for a in js.get("articles", [])[:max_results]]
+    except Exception:
+        return []
+def fetch_newsapi(query: str, max_results=6) -> List[Dict[str,str]]:
+    if not NEWSORG_API_KEY:
+        return []
+    try:
+        url = "https://newsapi.org/v2/everything"
+        params = {"q": query, "pageSize": max_results, "apiKey": NEWSORG_API_KEY, "language":"en"}
+        r = requests.get(url, params=params, headers=safe_headers(), timeout=6)
+        r.raise_for_status()
+        js = r.json()
+        return [{"title": a.get("title"), "url": a.get("url"), "source": a.get("source",{}).get("name"), "snippet": a.get("description")} for a in js.get("articles", [])[:max_results]]
+    except Exception:
+        return []
+def duckduckgo_search(query: str, max_results=8) -> List[Dict[str,str]]:
+    try:
+        url = "https://html.duckduckgo.com/html/"
+        r = requests.post(url, data={"q": query}, headers=safe_headers(), timeout=6)
+        r.raise_for_status()
+        soup = BeautifulSoup(r.text, "html.parser")
+        results = []
+        for res in soup.select(".result__a")[:max_results]:
+            title = res.get_text()
+            href = res.get("href")
+            snippet_node = res.find_parent().select_one(".result__snippet")
+            snippet = snippet_node.get_text() if snippet_node else ""
+            results.append({"title": title, "url": href, "source":None, "snippet": snippet})
+        return results
+    except Exception:
+        return []
+# ---------------- Filtering ----------------
+TRUSTED_DOMAINS = {
+    "bbc.co.uk","bbc.com","cnn.com","nytimes.com","reuters.com","apnews.com",
+    "theguardian.com","npr.org","washingtonpost.com","wsj.com","usatoday.com",
+    "bloomberg.com","aljazeera.com","msnbc.com","cnbc.com","foxnews.com"
+}
+UNWANTED_KEYWORDS = [
+    "movie","film","trailer","episode","comic","manga","fan","fandom","imdb",
+    "review","tv series","fiction","novel","fantasy","screenplay","actor","actress"
+]
+def domain_from_url(url: str) -> Optional[str]:
+    if not url: return None
+    try:
+        m = re.search(r"https?://(?:www\.)?([^/]+)/?", url)
+        if m:
+            domain = m.group(1).lower()
+            parts = domain.split('.')
+            if len(parts) > 2:
+                domain = ".".join(parts[-2:])
+            return domain
+    except Exception:
+        return None
+    return None
+def is_unwanted_snippet(snippet: str) -> bool:
+    if not snippet: return False
+    s = snippet.lower()
+    return any(k in s for k in UNWANTED_KEYWORDS)
+def filter_sources(sources: List[Dict[str,str]]) -> List[Dict[str,str]]:
+    kept, seen = [], set()
+    for s in sources:
+        url = s.get("url") or ""
+        if not url or url in seen: continue
+        seen.add(url)
+        domain = domain_from_url(url)
+        s["domain"] = domain or ""
+        if domain in TRUSTED_DOMAINS:
+            kept.append(s); continue
+        if domain and any(d in domain for d in ["imdb.com","youtube.com","wikipedia.org","fandom.com","comicbook.com"]):
+            continue
+        if is_unwanted_snippet(s.get("snippet","")) or is_unwanted_snippet(s.get("title","")):
+            continue
+        kept.append(s)
+    return kept
+# ---------------- Evidence summary ----------------
+def summarize_evidence(sources: List[Dict[str,str]], max_chars=800) -> str:
+    if not sources:
+        return "No credible news sources found."
+    parts = []
+    for s in sources[:8]:
+        t = s.get("title") or ""
+        snip = s.get("snippet") or ""
+        domain = s.get("domain") or domain_from_url(s.get("url","")) or ""
+        parts.append(f"{t} ({domain}) — {snip}")
+    res = "\n".join(parts)
+    if len(res) > max_chars:
+        return res[:max_chars].rsplit(" ",1)[0] + "..."
+    return res
+# ---------------- Fusion ----------------
+def fuse_scores(fast_conf: float, deep_outcome: Optional[str], evidence_count: int) -> Dict[str,Any]:
+    base = fast_conf*0.5 + min(evidence_count/5.0,1.0)*0.5
+    if deep_outcome and deep_outcome.lower() in ["false","misleading"]:
+        base *= 0.7
+    score = int(round(max(0, min(1, base)) * 100))
+    color = "green" if score >= 70 else "yellow" if score >= 40 else "red"
+    return {"score":score, "color":color}
+# ---------------- API ----------------
+@app.post("/verify")
+async def verify(req: VerifyRequest):
+    claim = (req.text or "").strip()
+    mode = (req.mode or "fast").lower()
+    if not claim:
+        raise HTTPException(status_code=400, detail="Empty claim")
+    # Step 1 classify
+    text_type_res = classify_text_type(claim)
+    stored_type = text_type_res["type"]
+    # Step 2 summarize
+    user_summary = summarize_text(claim)
+    # Step 3 search
+    query = user_summary
+    all_raw = fetch_gnews(query) + fetch_newsapi(query) + duckduckgo_search(query)
+    # Step 4 filter
+    filtered = filter_sources(all_raw)
+    evidence_summary = summarize_evidence(filtered)
+    # Step 5 fast guess
+    fast_label = "Unverifiable"; fast_conf = 0.4
+    if ZS_PIPE:
+        try:
+            cls = ZS_PIPE(claim, ["True","False","Misleading","Unverifiable"], multi_label=False, truncation=True)
+            fast_label = cls["labels"][0]
+            fast_conf = float(cls["scores"][0])
+        except Exception:
+            pass
+    # Step 6 deep (simplified fallback)
+    deep_result = None
+    if mode in ["deep","hybrid"]:
+        deep_result = {
+            "outcome":"Unverifiable",
+            "explanation":"Demo mode: Deep reasoning not configured (missing API key).",
+            "comparison":[],
+            "takeaways":["Search credible sources","Cross-check claims","Beware sensational headlines"],
+            "tip":"Look for multiple reputable outlets."
+        }
+    # Step 7 fusion
+    deep_outcome = deep_result.get("outcome") if deep_result else None
+    fuse = fuse_scores(fast_conf, deep_outcome, len(filtered))
     return {
+        "claim": claim,
+        "text_type": stored_type,
+        "text_type_scores": text_type_res.get("scores", {}),
+        "user_summary": user_summary,
+        "fast": {"label": fast_label, "confidence": round(fast_conf,3)},
+        "evidence_count_raw": len(all_raw),
+        "evidence_count_filtered": len(filtered),
+        "evidence": filtered,
+        "evidence_summary": evidence_summary,
+        "deep": deep_result or {},
+        "credibility": fuse
     }
+# ---------------- Frontend ----------------
+@app.get("/", response_class=HTMLResponse)
+def root():
+    with open("static/index.html","r",encoding="utf-8") as f:
+        return f.read()
+if __name__ == "__main__":
+    import uvicorn
+    uvicorn.run("app:app", host="0.0.0.0", port=int(os.getenv("PORT","8000")), reload=True)

gcp_clients.py DELETED Viewed

@@ -1,32 +0,0 @@
-import os
-MOCK = os.getenv("MOCK_MODE", "true").lower() in ("1", "true", "yes")
-class VisionClient:
-    def __init__(self, mock=MOCK):
-        self.mock = mock
-    def annotate_image(self, image_uri):
-        if self.mock:
-            return {"labels": ["person", "phone"], "ocr_text": "Mock OCR: PM signs bill"}
-        return {}
-class SpeechClient:
-    def __init__(self, mock=MOCK):
-        self.mock = mock
-    def transcribe_audio(self, audio_uri):
-        if self.mock:
-            return {"transcript": "Mock transcription of audio", "confidence": 0.95}
-        return {}
-class VertexClient:
-    def __init__(self, mock=MOCK):
-        self.mock = mock
-    def predict(self, prompt):
-        if self.mock:
-            return {
-                "verdict": "likely_misinformation",
-                "confidence": 0.85,
-                "reasons": ["Single source", "Sensational language"],
-                "actions": ["Reverse image search", "Check fact-checkers"]
-            }
-        return {}

prompting.py DELETED Viewed

@@ -1,16 +0,0 @@
-def build_vertex_prompt(evidence, search_results, claim_text):
-    return f"""
-SYSTEM: You are Atlas, a misinformation detector assistant.
-INPUT:
-Evidence: {evidence}
-Search results: {search_results}
-Claim: {claim_text}
-TASK:
-1) Summarize the claim.
-2) List supporting/contradicting evidence.
-3) Give a verdict: likely_misinformation | likely_credible | uncertain.
-4) Suggest 2-3 verification actions.
-OUTPUT: JSON only.
-"""

retriever.py DELETED Viewed

@@ -1,13 +0,0 @@
-import os
-MOCK = os.getenv("MOCK_MODE", "true").lower() in ("1", "true", "yes")
-class Retriever:
-    def __init__(self, mock=MOCK):
-        self.mock = mock
-    def search_claim(self, claim_text):
-        if self.mock:
-            return [
-                {"title": "Mocked refuting article", "url": "https://example.com/refute", "snippet": "This article refutes the claim."},
-                {"title": "Mocked supporting article", "url": "https://example.com/support", "snippet": "This article supports the claim."}
-            ]
-        return []

static/index.html ADDED Viewed

	@@ -0,0 +1,242 @@

+<!doctype html>
+<html lang="en">
+<head>
+  <meta charset="utf-8" />
+  <meta name="viewport" content="width=device-width,initial-scale=1" />
+  <title>Misinformation Detective — Demo</title>
+  <style>
+    :root {
+      --primary: #0066cc;
+      --bg: #f9fafc;
+      --card-bg: #fff;
+      --border: #ddd;
+      --text: #222;
+      --muted: #666;
+    }
+    body {
+      margin: 0;
+      font-family: system-ui, sans-serif;
+      background: var(--bg);
+      color: var(--text);
+      line-height: 1.5;
+    }
+    .container {
+      max-width: 800px;
+      margin: 0 auto;
+      padding: 1.5rem;
+    }
+    header {
+      text-align: center;
+      margin-bottom: 1.5rem;
+    }
+    header h1 {
+      margin: 0;
+      font-size: 1.8rem;
+    }
+    header .tagline {
+      color: var(--muted);
+      font-size: 0.95rem;
+    }
+    .card {
+      background: var(--card-bg);
+      border: 1px solid var(--border);
+      border-radius: 6px;
+      padding: 1rem;
+      margin: 1rem 0;
+      box-shadow: 0 2px 4px rgba(0,0,0,0.05);
+    }
+    .card h3 {
+      margin-top: 0;
+      font-size: 1.1rem;
+      border-bottom: 1px solid var(--border);
+      padding-bottom: 0.25rem;
+    }
+    textarea {
+      width: 100%;
+      height: 80px;
+      padding: 0.5rem;
+      font-family: inherit;
+      font-size: 1rem;
+      border-radius: 4px;
+      border: 1px solid var(--border);
+      resize: vertical;
+    }
+    .controls {
+      display: flex;
+      justify-content: space-between;
+      align-items: center;
+      flex-wrap: wrap;
+      margin-top: 0.5rem;
+    }
+    .modes button {
+      margin-right: 0.5rem;
+    }
+    button {
+      cursor: pointer;
+      padding: 0.5rem 0.75rem;
+      border: 1px solid var(--border);
+      border-radius: 4px;
+      background: #f0f0f0;
+      font-size: 0.9rem;
+    }
+    button.primary {
+      background: var(--primary);
+      color: white;
+      border: none;
+    }
+    button.active {
+      background: var(--primary);
+      color: white;
+    }
+    .demo-buttons {
+      margin-top: 0.5rem;
+    }
+    .demo {
+      margin-right: 0.3rem;
+      margin-top: 0.3rem;
+    }
+    footer {
+      margin-top: 2rem;
+      text-align: center;
+      color: var(--muted);
+      font-size: 0.8rem;
+    }
+    pre {
+      white-space: pre-wrap;
+      word-wrap: break-word;
+      font-size: 0.85rem;
+      background: #f7f7f7;
+      padding: 0.5rem;
+      border-radius: 4px;
+    }
+    .loading {
+      text-align: center;
+      padding: 2rem;
+      font-weight: bold;
+    }
+    .error {
+      color: red;
+      font-weight: bold;
+    }
+    ul {
+      padding-left: 1.2rem;
+    }
+    li {
+      margin-bottom: 0.3rem;
+    }
+  </style>
+</head>
+<body>
+  <main class="container">
+    <header>
+      <h1>Misinformation Detective</h1>
+      <p class="tagline">Quick check + deep evidence-based explanation with highlighted comparisons.</p>
+    </header>
+    <section class="card input-card">
+      <label for="claim">Paste a claim or headline</label>
+      <textarea id="claim" placeholder="E.g. 'Alien is landed on earth'"></textarea>
+      <div class="controls">
+        <div class="modes">
+          <button id="mode-fast" class="mode active">Fast ⚡</button>
+          <button id="mode-deep" class="mode">Deep 🔍</button>
+          <button id="mode-hybrid" class="mode">Hybrid 🔁</button>
+        </div>
+        <div class="actions">
+          <button id="verify-btn" class="primary">Verify Claim</button>
+          <div class="demo-buttons">
+            <button class="demo" data-claim="Drinking lemon cures cancer">Health example</button>
+            <button class="demo" data-claim="Scientists confirm coffee extends life by 10 years">Study example</button>
+            <button class="demo" data-claim="Alien is landed on earth">UFO example</button>
+          </div>
+        </div>
+      </div>
+    </section>
+    <section id="pipeline" class="pipeline">
+      <!-- Cards injected dynamically -->
+    </section>
+    <footer>
+      <small>Designed for hackathon demo — resilient, educational, and transparent pipeline.</small>
+    </footer>
+  </main>
+  <script>
+    const claimInput = document.getElementById("claim");
+    const verifyBtn = document.getElementById("verify-btn");
+    const pipeline = document.getElementById("pipeline");
+    let mode = "fast";
+    // Mode toggle
+    document.querySelectorAll(".mode").forEach(btn => {
+      btn.addEventListener("click", () => {
+        document.querySelectorAll(".mode").forEach(b => b.classList.remove("active"));
+        btn.classList.add("active");
+        mode = btn.id.replace("mode-","");
+      });
+    });
+    // Demo claims
+    document.querySelectorAll(".demo").forEach(btn => {
+      btn.addEventListener("click", () => {
+        claimInput.value = btn.dataset.claim;
+      });
+    });
+    function addCard(title, content) {
+      const card = document.createElement("section");
+      card.className = "card";
+      card.innerHTML = `<h3>${title}</h3><div>${content}</div>`;
+      pipeline.appendChild(card);
+    }
+    function renderComparison(list) {
+      if (!list || !list.length) return "<em>No comparisons available</em>";
+      return "<ul>" + list.map(c =>
+        `<li><strong>${c.claim_span}</strong> ↔ <q>${c.evidence_span}</q> <em>[${c.relation}]</em></li>`
+      ).join("") + "</ul>";
+    }
+    verifyBtn.addEventListener("click", async () => {
+      const text = claimInput.value.trim();
+      if (!text) return alert("Please enter a claim.");
+      pipeline.innerHTML = "<p class='loading'>Checking claim...</p>";
+      try {
+        const res = await fetch("/verify", {
+          method: "POST",
+          headers: {"Content-Type":"application/json"},
+          body: JSON.stringify({text, mode})
+        });
+        const data = await res.json();
+        pipeline.innerHTML = "";
+        addCard("Step 1 — Classification", `<p>Type: <b>${data.text_type}</b></p>`);
+        addCard("Step 2 — Summary", `<p>${data.user_summary}</p>`);
+        addCard("Step 3 — Search Results", `<pre>${data.evidence_summary}</pre>`);
+        addCard("Step 4 — Evidence Filtered", `<p>Found ${data.evidence_count_filtered} credible sources</p>`);
+        addCard("Step 5 — Fast Label", `<p>${data.fast.label} (conf ${data.fast.confidence})</p>`);
+        addCard("Step 6 — Similarity Score", `<p>${data.similarity}</p>`);
+        if (data.deep && Object.keys(data.deep).length > 0) {
+          addCard("Step 7 — Deep AI Verdict",
+            `<p><b>Outcome:</b> ${data.deep.outcome}</p>
+             <p>${data.deep.explanation}</p>
+             <h4>Comparison</h4>${renderComparison(data.deep.comparison)}
+             <h4>Takeaways</h4><ul>${data.deep.takeaways.map(t=>`<li>${t}</li>`).join("")}</ul>
+             <p><i>Tip:</i> ${data.deep.tip}</p>`);
+        }
+        addCard("Step 8 — Credibility Score",
+          `<p>Score: <b style="color:${data.credibility.color}">${data.credibility.score}</b>/100</p>`);
+      } catch (err) {
+        console.error(err);
+        pipeline.innerHTML = "<p class='error'>Error verifying claim.</p>";
+      }
+    });
+  </script>
+</body>
+</html>