Upload 26 files
Browse files- .env +46 -0
- .gitignore +41 -0
- README.md +235 -10
- backend/Dockerfile +27 -0
- backend/agents.py +338 -0
- backend/app.py +513 -0
- backend/core/config.py +98 -0
- backend/core/db_init.py +143 -0
- backend/core/logging.py +69 -0
- backend/core/models.py +197 -0
- backend/gatekeeper.py +133 -0
- backend/grok_sensor.py +183 -0
- backend/producers/producers.py +338 -0
- backend/pyproject.toml +80 -0
- backend/rag_pipeline.py +285 -0
- backend/static/index.html +783 -0
- backend/tests/test_pipeline.py +305 -0
- docker-compose.yml +164 -0
- extension/entrypoints/background.ts +179 -0
- extension/entrypoints/content.tsx +453 -0
- extension/entrypoints/popup.tsx +232 -0
- extension/package.json +32 -0
- extension/stores/extensionStore.ts +145 -0
- extension/tsconfig.json +18 -0
- extension/wxt.config.ts +52 -0
- infra/tunnel_setup.sh +180 -0
.env
ADDED
|
@@ -0,0 +1,46 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# =============================================================================
|
| 2 |
+
# Omnichannel Fact & Hallucination Intelligence System
|
| 3 |
+
# Environment Configuration Template
|
| 4 |
+
# Copy to .env and fill in your values
|
| 5 |
+
# =============================================================================
|
| 6 |
+
|
| 7 |
+
# ---------------------------------------------------------------------------
|
| 8 |
+
# LLM API Keys (set these in HuggingFace Spaces β Settings β Secrets)
|
| 9 |
+
# ---------------------------------------------------------------------------
|
| 10 |
+
|
| 11 |
+
# Groq API key β used for gatekeeper (llama3-8b), misinformation agent (mixtral-8x7b),
|
| 12 |
+
# AND hallucination agent (llama3-70b). All free via Groq's free tier (30 req/min).
|
| 13 |
+
# Get one at: https://console.groq.com
|
| 14 |
+
GROQ_API_KEY=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
|
| 15 |
+
|
| 16 |
+
# X (Twitter) API v2 Bearer Token β used for tweet velocity + Community Notes
|
| 17 |
+
# Optional β system falls back to deterministic mock data without it.
|
| 18 |
+
# Get one at: https://developer.twitter.com
|
| 19 |
+
X_BEARER_TOKEN=AAAAAAAAAAAAAAAAAAAAAGLQ8wEAAAAAH6WkY9y9Iw9n8YB9PqMeVA2MIHI%3D9OXISm6Q9fyRNm0DMEAupynHrYZjb1S7AVIU84swKP2IBxpChQ
|
| 20 |
+
|
| 21 |
+
# ---------------------------------------------------------------------------
|
| 22 |
+
# Infrastructure (auto-configured in Docker Compose β only change for custom setups)
|
| 23 |
+
# ---------------------------------------------------------------------------
|
| 24 |
+
|
| 25 |
+
QDRANT_HOST=localhost
|
| 26 |
+
QDRANT_PORT=6333
|
| 27 |
+
|
| 28 |
+
MEMGRAPH_HOST=localhost
|
| 29 |
+
MEMGRAPH_PORT=7687
|
| 30 |
+
MEMGRAPH_PASSWORD=memgraph123
|
| 31 |
+
|
| 32 |
+
REDPANDA_BROKERS=localhost:9092
|
| 33 |
+
|
| 34 |
+
REDIS_URL=redis://localhost:6379
|
| 35 |
+
|
| 36 |
+
# ---------------------------------------------------------------------------
|
| 37 |
+
# App Configuration
|
| 38 |
+
# ---------------------------------------------------------------------------
|
| 39 |
+
|
| 40 |
+
PORT=7860
|
| 41 |
+
LOG_LEVEL=INFO
|
| 42 |
+
|
| 43 |
+
# DEMO_MODE=true: Use mock data for all external APIs (LLMs, X API)
|
| 44 |
+
# Useful for exploring the UI/architecture without any API credentials.
|
| 45 |
+
# The system still runs the full pipeline β just with deterministic mock outputs.
|
| 46 |
+
DEMO_MODE=false
|
.gitignore
ADDED
|
@@ -0,0 +1,41 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Python
|
| 2 |
+
__pycache__/
|
| 3 |
+
*.pyc
|
| 4 |
+
*.pyo
|
| 5 |
+
.venv/
|
| 6 |
+
.env
|
| 7 |
+
*.egg-info/
|
| 8 |
+
dist/
|
| 9 |
+
build/
|
| 10 |
+
|
| 11 |
+
# uv
|
| 12 |
+
uv.lock
|
| 13 |
+
|
| 14 |
+
# Extension
|
| 15 |
+
extension/node_modules/
|
| 16 |
+
extension/.output/
|
| 17 |
+
extension/.wxt/
|
| 18 |
+
|
| 19 |
+
# Infra
|
| 20 |
+
*.pem
|
| 21 |
+
*.key
|
| 22 |
+
.cloudflared/
|
| 23 |
+
|
| 24 |
+
# Data
|
| 25 |
+
*.jsonl
|
| 26 |
+
*.vtt
|
| 27 |
+
|
| 28 |
+
# IDE
|
| 29 |
+
.vscode/
|
| 30 |
+
.idea/
|
| 31 |
+
*.swp
|
| 32 |
+
|
| 33 |
+
# Docker
|
| 34 |
+
.docker/
|
| 35 |
+
|
| 36 |
+
# Logs
|
| 37 |
+
*.log
|
| 38 |
+
|
| 39 |
+
# OS
|
| 40 |
+
.DS_Store
|
| 41 |
+
Thumbs.db
|
README.md
CHANGED
|
@@ -1,14 +1,239 @@
|
|
| 1 |
---
|
| 2 |
-
title:
|
| 3 |
-
emoji:
|
| 4 |
-
colorFrom:
|
| 5 |
-
colorTo:
|
| 6 |
-
sdk:
|
| 7 |
-
sdk_version: 6.12.0
|
| 8 |
-
app_file: app.py
|
| 9 |
pinned: false
|
| 10 |
-
license:
|
| 11 |
-
|
| 12 |
---
|
| 13 |
|
| 14 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
---
|
| 2 |
+
title: Omnichannel Fact & Hallucination Intelligence System
|
| 3 |
+
emoji: π
|
| 4 |
+
colorFrom: blue
|
| 5 |
+
colorTo: purple
|
| 6 |
+
sdk: docker
|
|
|
|
|
|
|
| 7 |
pinned: false
|
| 8 |
+
license: mit
|
| 9 |
+
app_port: 7860
|
| 10 |
---
|
| 11 |
|
| 12 |
+
# Omnichannel Fact & Hallucination Intelligence System
|
| 13 |
+
|
| 14 |
+
**Near-zero-latency real-time fact-checking and AI hallucination detection β deployed universally via a browser extension across X/Twitter, YouTube, Instagram, news sites, and AI chat interfaces.**
|
| 15 |
+
|
| 16 |
+
---
|
| 17 |
+
|
| 18 |
+
## Architecture
|
| 19 |
+
|
| 20 |
+
```
|
| 21 |
+
Browser Extension (WXT + React 19 + Framer Motion)
|
| 22 |
+
β WebSocket (wss://)
|
| 23 |
+
βΌ
|
| 24 |
+
FastAPI Backend βββΊ Redis Stack (cache, 6h/15min TTL)
|
| 25 |
+
β
|
| 26 |
+
ββββΊ Gatekeeper: Groq llama3-8b-8192 (<120ms p95)
|
| 27 |
+
β βββ noise β drop | fact β continue
|
| 28 |
+
β
|
| 29 |
+
ββββΊ RAG Pipeline (concurrent)
|
| 30 |
+
β βββ FastEmbed BGE-M3 embeddings (CPU, multilingual)
|
| 31 |
+
β βββ Qdrant ANN search (HNSW ef=128, top-8, 72h window)
|
| 32 |
+
β βββ Memgraph trust graph traversal (in-memory Cypher)
|
| 33 |
+
β
|
| 34 |
+
ββββΊ Grok Sensor (concurrent)
|
| 35 |
+
β βββ X API v2 velocity + Community Notes
|
| 36 |
+
β
|
| 37 |
+
ββββΊ Prefect Flow (multi-agent evaluation)
|
| 38 |
+
βββ misinformation_task: Groq mixtral-8x7b-32768
|
| 39 |
+
βββ hallucination_task: Claude Haiku (AI platforms only)
|
| 40 |
+
β
|
| 41 |
+
βΌ
|
| 42 |
+
AnalysisResult β WebSocket β Extension β DOM highlight + hover card
|
| 43 |
+
```
|
| 44 |
+
|
| 45 |
+
---
|
| 46 |
+
|
| 47 |
+
## Stack
|
| 48 |
+
|
| 49 |
+
| Layer | Technology | Why |
|
| 50 |
+
|-------|-----------|-----|
|
| 51 |
+
| Extension framework | WXT v0.19 + React 19 | HMR, multi-browser, TypeScript-first, Vite |
|
| 52 |
+
| Extension state | Zustand + chrome.storage.sync | Persistent, reactive, cross-context |
|
| 53 |
+
| LLM gatekeeper | Groq llama3-8b-8192 | 800+ tok/s, <100ms, no GPU needed |
|
| 54 |
+
| LLM evaluation | LiteLLM β Groq mixtral-8x7b / llama3-70b | All free via Groq β swap providers without code changes |
|
| 55 |
+
| Embeddings | BGE-M3 via FastEmbed | 100+ languages, 1024-dim, CPU-native, free |
|
| 56 |
+
| Vector DB | Qdrant (self-hosted) | Sub-ms HNSW search, no vendor lock-in |
|
| 57 |
+
| Graph DB | Memgraph (in-memory) | 10β100x faster than Neo4j for trust scoring |
|
| 58 |
+
| Message queue | Redpanda | Kafka-compatible, no JVM, 10x lower latency |
|
| 59 |
+
| Orchestration | Prefect | Native async, DAG flows, built-in retry |
|
| 60 |
+
| Cache | Redis Stack (RedisJSON) | Structured claim cache, TTL per verdict color |
|
| 61 |
+
| Package manager | uv | 10β100x faster than pip, lockfiles |
|
| 62 |
+
| Hashing | xxhash (client + server) | Sub-microsecond content deduplication |
|
| 63 |
+
| Edge tunnel | Cloudflare Tunnel | Zero-config TLS, no exposed ports |
|
| 64 |
+
| Observability | structlog + rich | Structured JSON logs, colorized dev output |
|
| 65 |
+
|
| 66 |
+
---
|
| 67 |
+
|
| 68 |
+
## Quick Start (HuggingFace Spaces)
|
| 69 |
+
|
| 70 |
+
This Space runs the **backend + demo UI** via Docker. The browser extension is a separate build.
|
| 71 |
+
|
| 72 |
+
### Required Secrets (set in Space settings β Secrets)
|
| 73 |
+
|
| 74 |
+
| Secret | Required | Description |
|
| 75 |
+
|--------|----------|-------------|
|
| 76 |
+
| `GROQ_API_KEY` | Recommended | Groq API key β powers all 3 LLM agents (gatekeeper, misinformation, hallucination). Free tier: 30 req/min |
|
| 77 |
+
| `X_BEARER_TOKEN` | Optional | X API v2 bearer token for tweet velocity + Community Notes |
|
| 78 |
+
|
| 79 |
+
**Without any API keys**: The system runs in `DEMO_MODE=true` with deterministic mock results β great for exploring the UI and architecture without credentials.
|
| 80 |
+
|
| 81 |
+
Get a free key:
|
| 82 |
+
- Groq: https://console.groq.com (free tier: 30 req/min β covers all 3 LLM agents)
|
| 83 |
+
|
| 84 |
+
### Run Locally
|
| 85 |
+
|
| 86 |
+
```bash
|
| 87 |
+
git clone <repo>
|
| 88 |
+
cd omnichannel-fact-intelligence
|
| 89 |
+
|
| 90 |
+
# Copy env template
|
| 91 |
+
cp .env.example .env
|
| 92 |
+
# Edit .env with your API keys
|
| 93 |
+
|
| 94 |
+
# Start all services (Qdrant, Memgraph, Redpanda, Redis, FastAPI)
|
| 95 |
+
docker compose up
|
| 96 |
+
|
| 97 |
+
# Visit http://localhost:7860 for the demo UI
|
| 98 |
+
```
|
| 99 |
+
|
| 100 |
+
### Run Backend Only (no Docker for infra)
|
| 101 |
+
|
| 102 |
+
```bash
|
| 103 |
+
cd backend
|
| 104 |
+
|
| 105 |
+
# Install uv (if not installed)
|
| 106 |
+
curl -LsSf https://astral.sh/uv/install.sh | sh
|
| 107 |
+
|
| 108 |
+
# Install dependencies
|
| 109 |
+
uv sync
|
| 110 |
+
|
| 111 |
+
# Set env vars
|
| 112 |
+
export GROQ_API_KEY=your_key
|
| 113 |
+
export DEMO_MODE=true # Skip infrastructure deps for quick testing
|
| 114 |
+
|
| 115 |
+
# Start FastAPI
|
| 116 |
+
uv run uvicorn main:app --host 0.0.0.0 --port 7860 --reload
|
| 117 |
+
```
|
| 118 |
+
|
| 119 |
+
---
|
| 120 |
+
|
| 121 |
+
## Browser Extension Setup
|
| 122 |
+
|
| 123 |
+
### Prerequisites
|
| 124 |
+
```bash
|
| 125 |
+
cd extension
|
| 126 |
+
npm install # or: bun install
|
| 127 |
+
```
|
| 128 |
+
|
| 129 |
+
### Development (Chrome)
|
| 130 |
+
```bash
|
| 131 |
+
# Set your backend URL (or use cloudflared tunnel)
|
| 132 |
+
WS_URL=ws://localhost:7860/ws npx wxt dev --browser chrome
|
| 133 |
+
```
|
| 134 |
+
|
| 135 |
+
### Production Build
|
| 136 |
+
```bash
|
| 137 |
+
# Build for all browsers
|
| 138 |
+
WS_URL=wss://fact-engine.your-domain.com/ws npx wxt build
|
| 139 |
+
|
| 140 |
+
# Chrome: .output/chrome-mv3/
|
| 141 |
+
# Firefox: .output/firefox-mv3/
|
| 142 |
+
```
|
| 143 |
+
|
| 144 |
+
### Load in Chrome
|
| 145 |
+
1. Navigate to `chrome://extensions`
|
| 146 |
+
2. Enable **Developer mode** (top right)
|
| 147 |
+
3. Click **Load unpacked** β select `.output/chrome-mv3/`
|
| 148 |
+
4. Visit X/Twitter, YouTube, or any news site β facts will begin highlighting
|
| 149 |
+
|
| 150 |
+
---
|
| 151 |
+
|
| 152 |
+
## Highlight Color Semantics
|
| 153 |
+
|
| 154 |
+
| Color | Hex | Meaning |
|
| 155 |
+
|-------|-----|---------|
|
| 156 |
+
| π’ Green | `#22c55e` | Fact-checked β corroborated by β₯2 sources, trust score β₯ 0.65 |
|
| 157 |
+
| π‘ Yellow | `#eab308` | Unverified β breaking news, weak corroboration, high velocity |
|
| 158 |
+
| π΄ Red | `#ef4444` | Debunked β refuted by β₯2 independent sources or Community Note active |
|
| 159 |
+
| π£ Purple | `#a855f7` | AI hallucination β fabricated citation, impossibility, contradiction |
|
| 160 |
+
|
| 161 |
+
---
|
| 162 |
+
|
| 163 |
+
## Trust Score Algorithm
|
| 164 |
+
|
| 165 |
+
```
|
| 166 |
+
score = 0.5 (baseline)
|
| 167 |
+
+ 0.30 if Author.verified AND account_type IN ['government', 'official_news']
|
| 168 |
+
+ 0.05 per corroborating Source node (capped at +0.25, i.e. 5 sources)
|
| 169 |
+
- 0.40 if any Source has an active Community Note
|
| 170 |
+
= clamp(score, 0.0, 1.0)
|
| 171 |
+
```
|
| 172 |
+
|
| 173 |
+
---
|
| 174 |
+
|
| 175 |
+
## Data Pipeline
|
| 176 |
+
|
| 177 |
+
Three async Redpanda producers simulate the omnichannel firehose:
|
| 178 |
+
|
| 179 |
+
| Producer | Topic | Rate | Source |
|
| 180 |
+
|----------|-------|------|--------|
|
| 181 |
+
| twitter_producer | `raw.twitter` | 50 eps | Mock X posts |
|
| 182 |
+
| instagram_producer | `raw.instagram` | 20 eps | Mock story text (OCR-extracted) |
|
| 183 |
+
| youtube_producer | `raw.youtube` | 10 eps | Mock VTT transcript chunks |
|
| 184 |
+
|
| 185 |
+
A single async consumer aggregates all three, deduplicates by `content_hash`, and upserts into Qdrant + Memgraph.
|
| 186 |
+
|
| 187 |
+
---
|
| 188 |
+
|
| 189 |
+
## Extension Modes
|
| 190 |
+
|
| 191 |
+
| Mode | Shows |
|
| 192 |
+
|------|-------|
|
| 193 |
+
| Minimal | Red + Purple only |
|
| 194 |
+
| Normal (default) | Red + Purple + Yellow |
|
| 195 |
+
| Advanced | All colors including Green |
|
| 196 |
+
|
| 197 |
+
---
|
| 198 |
+
|
| 199 |
+
## File Structure
|
| 200 |
+
|
| 201 |
+
```
|
| 202 |
+
omnichannel-fact-intelligence/
|
| 203 |
+
βββ docker-compose.yml # All services in one command
|
| 204 |
+
βββ .env.example # Environment template
|
| 205 |
+
β
|
| 206 |
+
βββ backend/
|
| 207 |
+
β βββ Dockerfile # uv + Python 3.12
|
| 208 |
+
β βββ pyproject.toml # All deps pinned (uv-compatible)
|
| 209 |
+
β βββ main.py # FastAPI app, WebSocket, Redis cache
|
| 210 |
+
β βββ gatekeeper.py # Groq fact/noise classifier (<120ms p95)
|
| 211 |
+
β βββ rag_pipeline.py # BGE-M3 + Qdrant + Memgraph trust graph
|
| 212 |
+
β βββ grok_sensor.py # X API v2 + Community Notes
|
| 213 |
+
β βββ agents.py # Prefect flow + LiteLLM multi-agent eval
|
| 214 |
+
β βββ core/
|
| 215 |
+
β β βββ config.py # Pydantic-settings centralized config
|
| 216 |
+
β β βββ models.py # All Pydantic v2 models
|
| 217 |
+
β βββ producers/
|
| 218 |
+
β β βββ producers.py # Twitter + Instagram + YouTube + consumer
|
| 219 |
+
β βββ static/
|
| 220 |
+
β βββ index.html # Demo UI (served at /)
|
| 221 |
+
β
|
| 222 |
+
βββ extension/
|
| 223 |
+
β βββ wxt.config.ts # WXT framework config
|
| 224 |
+
β βββ stores/
|
| 225 |
+
β β βββ extensionStore.ts # Zustand + chrome.storage.sync
|
| 226 |
+
β βββ entrypoints/
|
| 227 |
+
β βββ background.ts # Persistent WS connection + message routing
|
| 228 |
+
β βββ content.tsx # MutationObserver + highlight + hover card
|
| 229 |
+
β βββ popup.tsx # Master toggle + mode selector + badge
|
| 230 |
+
β
|
| 231 |
+
βββ infra/
|
| 232 |
+
βββ tunnel_setup.sh # Cloudflare Tunnel setup script
|
| 233 |
+
```
|
| 234 |
+
|
| 235 |
+
---
|
| 236 |
+
|
| 237 |
+
## License
|
| 238 |
+
|
| 239 |
+
MIT β see LICENSE for details.
|
backend/Dockerfile
ADDED
|
@@ -0,0 +1,27 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
FROM python:3.12-slim
|
| 2 |
+
|
| 3 |
+
# Install uv β 10-100x faster than pip, proper lockfiles
|
| 4 |
+
COPY --from=ghcr.io/astral-sh/uv:latest /uv /usr/local/bin/uv
|
| 5 |
+
|
| 6 |
+
WORKDIR /app
|
| 7 |
+
|
| 8 |
+
# Install system deps for FastEmbed / BGE-M3 CPU inference
|
| 9 |
+
RUN apt-get update && apt-get install -y --no-install-recommends \
|
| 10 |
+
build-essential curl git \
|
| 11 |
+
&& rm -rf /var/lib/apt/lists/*
|
| 12 |
+
|
| 13 |
+
# Copy dependency files first (layer cache optimization)
|
| 14 |
+
COPY pyproject.toml uv.lock* ./
|
| 15 |
+
|
| 16 |
+
# Install all Python dependencies into the project virtual env
|
| 17 |
+
RUN uv sync --frozen --no-dev
|
| 18 |
+
|
| 19 |
+
# Copy application source
|
| 20 |
+
COPY . .
|
| 21 |
+
|
| 22 |
+
# Pre-download BGE-M3 model so cold starts are instant
|
| 23 |
+
RUN uv run python -c "from fastembed import TextEmbedding; TextEmbedding('BAAI/bge-m3')" || true
|
| 24 |
+
|
| 25 |
+
EXPOSE 7860
|
| 26 |
+
|
| 27 |
+
CMD ["uv", "run", "python", "app.py"]
|
backend/agents.py
ADDED
|
@@ -0,0 +1,338 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""
|
| 2 |
+
agents.py β Prefect-orchestrated multi-agent evaluation layer.
|
| 3 |
+
|
| 4 |
+
Two concurrent agents evaluate each claim:
|
| 5 |
+
|
| 6 |
+
1. misinformation_task β Groq mixtral-8x7b-32768
|
| 7 |
+
Given: claim + top-3 RAG evidence chunks + trust score
|
| 8 |
+
Output: color (red|yellow|green), confidence, explanation, sources
|
| 9 |
+
|
| 10 |
+
2. hallucination_task β Claude Haiku (runs ONLY on AI chat platforms)
|
| 11 |
+
Given: claim text
|
| 12 |
+
Output: color (purple|green), confidence, explanation
|
| 13 |
+
Checks for: fabricated citations, statistical impossibilities,
|
| 14 |
+
internal contradictions, LLM-specific failure patterns
|
| 15 |
+
|
| 16 |
+
Both tasks run concurrently via asyncio.gather. Prefect merges results,
|
| 17 |
+
picks higher-severity color, returns the final AnalysisResult.
|
| 18 |
+
|
| 19 |
+
Why Prefect over Celery:
|
| 20 |
+
- Dynamic DAG-based orchestration (no pre-declared task graph)
|
| 21 |
+
- Native async support β no gevent hacks needed
|
| 22 |
+
- Built-in retry with exponential backoff per task
|
| 23 |
+
- Far better observability: every flow run gets a full execution trace
|
| 24 |
+
- Deployable without a separate worker process (embedded server mode)
|
| 25 |
+
"""
|
| 26 |
+
|
| 27 |
+
import asyncio
|
| 28 |
+
import time
|
| 29 |
+
from typing import Literal
|
| 30 |
+
|
| 31 |
+
import structlog
|
| 32 |
+
from litellm import acompletion
|
| 33 |
+
from prefect import flow, task
|
| 34 |
+
from prefect.tasks import task_input_hash
|
| 35 |
+
|
| 36 |
+
from core.config import HighlightColor, Platform, Settings, get_settings
|
| 37 |
+
from core.models import AnalysisResult, EvidenceChunk, GrokSensorResult, RAGResult, SourceRef, TrustScore
|
| 38 |
+
|
| 39 |
+
log = structlog.get_logger(__name__)
|
| 40 |
+
|
| 41 |
+
# ---------------------------------------------------------------------------
|
| 42 |
+
# Color severity ordering (higher index = more severe)
|
| 43 |
+
# ---------------------------------------------------------------------------
|
| 44 |
+
SEVERITY: dict[HighlightColor, int] = {
|
| 45 |
+
HighlightColor.GREEN: 0,
|
| 46 |
+
HighlightColor.YELLOW: 1,
|
| 47 |
+
HighlightColor.RED: 2,
|
| 48 |
+
HighlightColor.PURPLE: 3,
|
| 49 |
+
}
|
| 50 |
+
|
| 51 |
+
# ---------------------------------------------------------------------------
|
| 52 |
+
# LiteLLM prompts
|
| 53 |
+
# ---------------------------------------------------------------------------
|
| 54 |
+
|
| 55 |
+
MISINFO_SYSTEM = """You are a professional fact-checker with access to recent evidence.
|
| 56 |
+
Analyze the claim against the evidence chunks and trust score. Output ONLY valid JSON.
|
| 57 |
+
|
| 58 |
+
Output schema (no markdown, no preamble):
|
| 59 |
+
{
|
| 60 |
+
"color": "red" | "yellow" | "green",
|
| 61 |
+
"confidence": <integer 0-100>,
|
| 62 |
+
"explanation": "<2-3 sentence explanation for the hover card>",
|
| 63 |
+
"verdict_label": "<8 words max, e.g. 'Debunked by Reuters and AP'>",
|
| 64 |
+
"sources": ["<url1>", "<url2>", "<url3>"]
|
| 65 |
+
}
|
| 66 |
+
|
| 67 |
+
Color logic:
|
| 68 |
+
- "green": Claim is factually accurate, corroborated by β₯2 independent sources, trust score β₯ 0.65
|
| 69 |
+
- "yellow": Claim is unverified, breaking news, or evidence is weak/contradictory
|
| 70 |
+
- "red": Claim is demonstrably false, debunked by β₯2 sources, OR trust score < 0.25, OR community note active"""
|
| 71 |
+
|
| 72 |
+
MISINFO_USER_TMPL = """Claim: {claim}
|
| 73 |
+
|
| 74 |
+
Trust score: {trust_score:.2f} (0=untrustworthy, 1=highly trusted)
|
| 75 |
+
Author verified: {verified}
|
| 76 |
+
Active Community Note: {has_note}{note_text_part}
|
| 77 |
+
Corroborating sources in database: {source_count}
|
| 78 |
+
|
| 79 |
+
Evidence chunks (cosine similarity descending):
|
| 80 |
+
{evidence_text}
|
| 81 |
+
|
| 82 |
+
Analyze and output JSON."""
|
| 83 |
+
|
| 84 |
+
HALLUCINATION_SYSTEM = """You are an LLM output auditor specializing in detecting AI hallucinations.
|
| 85 |
+
Analyze the following text that was generated by an AI system. Output ONLY valid JSON.
|
| 86 |
+
|
| 87 |
+
Output schema:
|
| 88 |
+
{
|
| 89 |
+
"color": "purple" | "green",
|
| 90 |
+
"confidence": <integer 0-100>,
|
| 91 |
+
"explanation": "<specific explanation of what's wrong, or confirmation it's accurate>"
|
| 92 |
+
}
|
| 93 |
+
|
| 94 |
+
Check for:
|
| 95 |
+
1. Fabricated citations: URLs, paper titles, author names that don't exist
|
| 96 |
+
2. Statistical impossibilities: numbers that exceed known bounds (e.g., "500% of people")
|
| 97 |
+
3. Internal contradictions: statements that contradict each other within the text
|
| 98 |
+
4. Temporal paradoxes: referencing future events as past, or anachronistic details
|
| 99 |
+
5. Entity confusion: mixing attributes of different real-world entities
|
| 100 |
+
|
| 101 |
+
Color "purple" only if you find a clear, specific hallucination pattern.
|
| 102 |
+
Color "green" if the text appears factually coherent (you cannot verify external facts)."""
|
| 103 |
+
|
| 104 |
+
|
| 105 |
+
# ---------------------------------------------------------------------------
|
| 106 |
+
# Prefect tasks β each is independently retried with exponential backoff
|
| 107 |
+
# ---------------------------------------------------------------------------
|
| 108 |
+
|
| 109 |
+
@task(
|
| 110 |
+
name="misinformation-agent",
|
| 111 |
+
retries=2,
|
| 112 |
+
retry_delay_seconds=[1, 3],
|
| 113 |
+
cache_key_fn=task_input_hash,
|
| 114 |
+
cache_expiration=None,
|
| 115 |
+
log_prints=False,
|
| 116 |
+
)
|
| 117 |
+
async def misinformation_task(
|
| 118 |
+
claim: str,
|
| 119 |
+
evidence: list[EvidenceChunk],
|
| 120 |
+
trust: TrustScore,
|
| 121 |
+
grok: GrokSensorResult,
|
| 122 |
+
settings: Settings,
|
| 123 |
+
) -> dict:
|
| 124 |
+
"""
|
| 125 |
+
Groq mixtral-8x7b-32768 evaluates the claim against RAG evidence.
|
| 126 |
+
32k context window accommodates all 8 evidence chunks comfortably.
|
| 127 |
+
"""
|
| 128 |
+
# Build evidence text block (top-3 by cosine score for the prompt)
|
| 129 |
+
top_evidence = sorted(evidence, key=lambda e: e.score, reverse=True)[:3]
|
| 130 |
+
evidence_text = "\n\n".join(
|
| 131 |
+
f"[{i+1}] Source: {e.domain} (similarity: {e.score:.3f})\n{e.text[:400]}"
|
| 132 |
+
for i, e in enumerate(top_evidence)
|
| 133 |
+
) or "No evidence chunks retrieved (claim may be too recent or niche)."
|
| 134 |
+
|
| 135 |
+
note_part = f"\nCommunity Note: {trust.community_note_text}" if trust.community_note_text else ""
|
| 136 |
+
|
| 137 |
+
user_prompt = MISINFO_USER_TMPL.format(
|
| 138 |
+
claim=claim[:500],
|
| 139 |
+
trust_score=trust.score,
|
| 140 |
+
verified=trust.author_verified,
|
| 141 |
+
has_note=trust.has_community_note,
|
| 142 |
+
note_text_part=note_part,
|
| 143 |
+
source_count=trust.corroborating_sources,
|
| 144 |
+
evidence_text=evidence_text,
|
| 145 |
+
)
|
| 146 |
+
|
| 147 |
+
# LiteLLM routes to Groq β swap to "openai/gpt-4o" or "groq/llama3-70b-8192"
|
| 148 |
+
# by changing a single string, zero code changes elsewhere
|
| 149 |
+
response = await acompletion(
|
| 150 |
+
model=settings.misinformation_model,
|
| 151 |
+
messages=[
|
| 152 |
+
{"role": "system", "content": MISINFO_SYSTEM},
|
| 153 |
+
{"role": "user", "content": user_prompt},
|
| 154 |
+
],
|
| 155 |
+
response_format={"type": "json_object"},
|
| 156 |
+
temperature=0.1,
|
| 157 |
+
max_tokens=400,
|
| 158 |
+
api_key=settings.groq_api_key or None,
|
| 159 |
+
)
|
| 160 |
+
|
| 161 |
+
import json
|
| 162 |
+
raw = response.choices[0].message.content or "{}"
|
| 163 |
+
return json.loads(raw)
|
| 164 |
+
|
| 165 |
+
|
| 166 |
+
@task(
|
| 167 |
+
name="hallucination-agent",
|
| 168 |
+
retries=2,
|
| 169 |
+
retry_delay_seconds=[1, 3],
|
| 170 |
+
log_prints=False,
|
| 171 |
+
)
|
| 172 |
+
async def hallucination_task(claim: str, settings: Settings) -> dict:
|
| 173 |
+
"""
|
| 174 |
+
Groq llama3-70b-8192 audits AI-generated text for hallucination patterns.
|
| 175 |
+
Previously Claude Haiku β now fully free via Groq, same prompt, same output schema.
|
| 176 |
+
Only invoked when the source platform is an AI chat interface.
|
| 177 |
+
"""
|
| 178 |
+
response = await acompletion(
|
| 179 |
+
model=settings.hallucination_model, # groq/llama3-70b-8192
|
| 180 |
+
messages=[
|
| 181 |
+
{"role": "system", "content": HALLUCINATION_SYSTEM},
|
| 182 |
+
{"role": "user", "content": f"Audit this AI-generated text:\n\n{claim[:1000]}"},
|
| 183 |
+
],
|
| 184 |
+
response_format={"type": "json_object"},
|
| 185 |
+
temperature=0.0,
|
| 186 |
+
max_tokens=300,
|
| 187 |
+
api_key=settings.groq_api_key or None,
|
| 188 |
+
)
|
| 189 |
+
|
| 190 |
+
import json
|
| 191 |
+
raw = response.choices[0].message.content or "{}"
|
| 192 |
+
return json.loads(raw)
|
| 193 |
+
|
| 194 |
+
|
| 195 |
+
def _demo_misinfo_result(trust_score: float, has_note: bool) -> dict:
|
| 196 |
+
"""Deterministic demo result when LLM keys are absent."""
|
| 197 |
+
if has_note or trust_score < 0.25:
|
| 198 |
+
return {
|
| 199 |
+
"color": "red", "confidence": 82,
|
| 200 |
+
"explanation": "Demo mode: trust score below threshold and/or active community note detected.",
|
| 201 |
+
"verdict_label": "Low trust signal detected",
|
| 202 |
+
"sources": [],
|
| 203 |
+
}
|
| 204 |
+
elif trust_score < 0.55:
|
| 205 |
+
return {
|
| 206 |
+
"color": "yellow", "confidence": 61,
|
| 207 |
+
"explanation": "Demo mode: insufficient corroboration to confirm or deny this claim.",
|
| 208 |
+
"verdict_label": "Unverified β insufficient evidence",
|
| 209 |
+
"sources": [],
|
| 210 |
+
}
|
| 211 |
+
return {
|
| 212 |
+
"color": "green", "confidence": 78,
|
| 213 |
+
"explanation": "Demo mode: claim appears well-corroborated based on trust graph signals.",
|
| 214 |
+
"verdict_label": "Appears credible",
|
| 215 |
+
"sources": [],
|
| 216 |
+
}
|
| 217 |
+
|
| 218 |
+
|
| 219 |
+
def _demo_hallucination_result() -> dict:
|
| 220 |
+
return {
|
| 221 |
+
"color": "purple", "confidence": 71,
|
| 222 |
+
"explanation": "Demo mode: AI-generated content detected. Unable to verify external citations without live API.",
|
| 223 |
+
}
|
| 224 |
+
|
| 225 |
+
|
| 226 |
+
# ---------------------------------------------------------------------------
|
| 227 |
+
# Main Prefect flow
|
| 228 |
+
# ---------------------------------------------------------------------------
|
| 229 |
+
|
| 230 |
+
@flow(name="fact-intelligence-pipeline", log_prints=False)
|
| 231 |
+
async def evaluate_claim(
|
| 232 |
+
claim: str,
|
| 233 |
+
claim_hash: str,
|
| 234 |
+
element_id: str,
|
| 235 |
+
platform: Platform,
|
| 236 |
+
rag_result: RAGResult,
|
| 237 |
+
grok_result: GrokSensorResult,
|
| 238 |
+
settings: Settings | None = None,
|
| 239 |
+
) -> AnalysisResult:
|
| 240 |
+
"""
|
| 241 |
+
Orchestrates the full multi-agent evaluation as a Prefect flow.
|
| 242 |
+
|
| 243 |
+
Concurrent execution:
|
| 244 |
+
- misinformation_task always runs
|
| 245 |
+
- hallucination_task runs only for AI chat platforms
|
| 246 |
+
|
| 247 |
+
Results are merged by taking the higher-severity color.
|
| 248 |
+
The final AnalysisResult is returned directly (no Celery queue needed).
|
| 249 |
+
"""
|
| 250 |
+
cfg = settings or get_settings()
|
| 251 |
+
t0 = time.perf_counter()
|
| 252 |
+
|
| 253 |
+
is_ai_platform = platform in (Platform.CHATGPT, Platform.CLAUDE, Platform.GEMINI)
|
| 254 |
+
|
| 255 |
+
# Determine whether to use demo mode
|
| 256 |
+
use_demo = cfg.demo_mode or not cfg.has_groq
|
| 257 |
+
|
| 258 |
+
if use_demo:
|
| 259 |
+
misinfo_raw = _demo_misinfo_result(rag_result.trust.score, grok_result.community_note)
|
| 260 |
+
halluc_raw = _demo_hallucination_result() if is_ai_platform else None
|
| 261 |
+
else:
|
| 262 |
+
# Concurrently run both agents when applicable
|
| 263 |
+
# Both agents now use Groq (free) β no Anthropic key needed
|
| 264 |
+
if is_ai_platform and cfg.has_groq:
|
| 265 |
+
misinfo_raw, halluc_raw = await asyncio.gather(
|
| 266 |
+
misinformation_task(claim, rag_result.evidence, rag_result.trust, grok_result, cfg),
|
| 267 |
+
hallucination_task(claim, cfg),
|
| 268 |
+
)
|
| 269 |
+
else:
|
| 270 |
+
misinfo_raw = await misinformation_task(
|
| 271 |
+
claim, rag_result.evidence, rag_result.trust, grok_result, cfg
|
| 272 |
+
)
|
| 273 |
+
halluc_raw = None
|
| 274 |
+
|
| 275 |
+
# --- Merge results: pick higher-severity color ---
|
| 276 |
+
misinfo_color = HighlightColor(misinfo_raw.get("color", "yellow"))
|
| 277 |
+
final_color = misinfo_color
|
| 278 |
+
final_confidence = misinfo_raw.get("confidence", 50)
|
| 279 |
+
final_explanation = misinfo_raw.get("explanation", "")
|
| 280 |
+
final_verdict = misinfo_raw.get("verdict_label", "Under review")
|
| 281 |
+
|
| 282 |
+
if halluc_raw:
|
| 283 |
+
halluc_color = HighlightColor(halluc_raw.get("color", "green"))
|
| 284 |
+
if SEVERITY[halluc_color] > SEVERITY[final_color]:
|
| 285 |
+
final_color = halluc_color
|
| 286 |
+
final_confidence = halluc_raw.get("confidence", final_confidence)
|
| 287 |
+
final_explanation = halluc_raw.get("explanation", final_explanation)
|
| 288 |
+
final_verdict = "AI hallucination detected"
|
| 289 |
+
|
| 290 |
+
# Build SourceRef list from evidence + misinfo agent sources
|
| 291 |
+
raw_sources: list[str] = misinfo_raw.get("sources", [])
|
| 292 |
+
evidence_sources = [e.source_url for e in rag_result.evidence[:3] if e.source_url]
|
| 293 |
+
combined = list(dict.fromkeys(raw_sources + evidence_sources))[:3] # deduplicated, max 3
|
| 294 |
+
|
| 295 |
+
source_refs = [
|
| 296 |
+
SourceRef(
|
| 297 |
+
url=url,
|
| 298 |
+
domain=_extract_domain(url),
|
| 299 |
+
favicon_url=f"https://www.google.com/s2/favicons?domain={_extract_domain(url)}&sz=16",
|
| 300 |
+
snippet="",
|
| 301 |
+
)
|
| 302 |
+
for url in combined
|
| 303 |
+
]
|
| 304 |
+
|
| 305 |
+
latency_ms = round((time.perf_counter() - t0) * 1000, 2)
|
| 306 |
+
|
| 307 |
+
log.info(
|
| 308 |
+
"agents.flow.complete",
|
| 309 |
+
color=final_color,
|
| 310 |
+
confidence=final_confidence,
|
| 311 |
+
platform=platform,
|
| 312 |
+
latency_ms=latency_ms,
|
| 313 |
+
demo=use_demo,
|
| 314 |
+
)
|
| 315 |
+
|
| 316 |
+
return AnalysisResult(
|
| 317 |
+
element_id=element_id,
|
| 318 |
+
content_hash=claim_hash,
|
| 319 |
+
platform=platform,
|
| 320 |
+
color=final_color,
|
| 321 |
+
confidence=final_confidence,
|
| 322 |
+
verdict_label=final_verdict,
|
| 323 |
+
explanation=final_explanation,
|
| 324 |
+
sources=source_refs,
|
| 325 |
+
gatekeeper_label="fact",
|
| 326 |
+
trust_score=rag_result.trust.score,
|
| 327 |
+
velocity=grok_result.velocity,
|
| 328 |
+
has_community_note=grok_result.community_note,
|
| 329 |
+
latency_ms=latency_ms,
|
| 330 |
+
)
|
| 331 |
+
|
| 332 |
+
|
| 333 |
+
def _extract_domain(url: str) -> str:
|
| 334 |
+
try:
|
| 335 |
+
from urllib.parse import urlparse
|
| 336 |
+
return urlparse(url).netloc.lstrip("www.")
|
| 337 |
+
except Exception:
|
| 338 |
+
return url
|
backend/app.py
ADDED
|
@@ -0,0 +1,513 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""
|
| 2 |
+
app.py β Single entry point for HuggingFace Spaces.
|
| 3 |
+
|
| 4 |
+
Run with:
|
| 5 |
+
uv run python app.py β HuggingFace Spaces / production
|
| 6 |
+
uv run uvicorn app:app --reload β local dev
|
| 7 |
+
|
| 8 |
+
Lifecycle on startup:
|
| 9 |
+
1. Configures structured logging
|
| 10 |
+
2. Waits for Redis / Qdrant / Memgraph to be healthy (skipped in DEMO_MODE)
|
| 11 |
+
3. Initialises Qdrant collection + Memgraph schema
|
| 12 |
+
4. Seeds demo evidence chunks into Qdrant
|
| 13 |
+
5. Warms up BGE-M3 embedder in the background
|
| 14 |
+
6. Serves FastAPI on port 7860 (HuggingFace default)
|
| 15 |
+
|
| 16 |
+
WebSocket message lifecycle (per text segment):
|
| 17 |
+
1. Extension sends TextBatch β Redis cache check (xxhash key)
|
| 18 |
+
2. Cache miss β Gatekeeper (Groq llama3-8b, <120 ms p95)
|
| 19 |
+
3. Noise β dropped. Fact β continue
|
| 20 |
+
4. Concurrent: RAG pipeline (BGE-M3 + Qdrant + Memgraph) + Grok sensor
|
| 21 |
+
5. Prefect flow: misinformation agent + hallucination agent (both Groq, free)
|
| 22 |
+
6. AnalysisResult cached in Redis (TTL: 6 h green/red, 15 min yellow, no-cache purple)
|
| 23 |
+
7. Result streamed back over WebSocket β extension applies DOM highlight + hover card
|
| 24 |
+
"""
|
| 25 |
+
|
| 26 |
+
import asyncio
|
| 27 |
+
import os
|
| 28 |
+
import sys
|
| 29 |
+
import time
|
| 30 |
+
from contextlib import asynccontextmanager
|
| 31 |
+
from typing import Any
|
| 32 |
+
|
| 33 |
+
import orjson
|
| 34 |
+
import redis.asyncio as aioredis
|
| 35 |
+
import structlog
|
| 36 |
+
import xxhash
|
| 37 |
+
from fastapi import FastAPI, WebSocket, WebSocketDisconnect
|
| 38 |
+
from fastapi.middleware.cors import CORSMiddleware
|
| 39 |
+
from fastapi.responses import HTMLResponse
|
| 40 |
+
from pydantic import ValidationError
|
| 41 |
+
|
| 42 |
+
# ---------------------------------------------------------------------------
|
| 43 |
+
# Bootstrap logging FIRST so every subsequent import logs correctly
|
| 44 |
+
# ---------------------------------------------------------------------------
|
| 45 |
+
from core.logging import configure_logging
|
| 46 |
+
from core.config import HighlightColor, Platform, get_settings
|
| 47 |
+
|
| 48 |
+
settings = get_settings()
|
| 49 |
+
configure_logging(
|
| 50 |
+
log_level=settings.log_level,
|
| 51 |
+
json_output=os.environ.get("JSON_LOGS", "false").lower() == "true",
|
| 52 |
+
)
|
| 53 |
+
log = structlog.get_logger("app")
|
| 54 |
+
|
| 55 |
+
# ---------------------------------------------------------------------------
|
| 56 |
+
# Remaining imports (after logging is configured)
|
| 57 |
+
# ---------------------------------------------------------------------------
|
| 58 |
+
from agents import evaluate_claim
|
| 59 |
+
from core.models import AnalysisResult, GatekeeperResult, TextBatch, WSInbound, WSOutbound
|
| 60 |
+
from gatekeeper import classify_claim
|
| 61 |
+
from grok_sensor import query_grok_sensor
|
| 62 |
+
from rag_pipeline import run_rag_pipeline
|
| 63 |
+
|
| 64 |
+
# ============================================================================
|
| 65 |
+
# SECTION 1 β Infrastructure health checks (used during startup)
|
| 66 |
+
# ============================================================================
|
| 67 |
+
|
| 68 |
+
async def _wait_for_redis(url: str, timeout: int = 30) -> bool:
|
| 69 |
+
deadline = time.time() + timeout
|
| 70 |
+
while time.time() < deadline:
|
| 71 |
+
try:
|
| 72 |
+
r = await aioredis.from_url(url, decode_responses=True)
|
| 73 |
+
await r.ping()
|
| 74 |
+
await r.aclose()
|
| 75 |
+
return True
|
| 76 |
+
except Exception:
|
| 77 |
+
await asyncio.sleep(1)
|
| 78 |
+
return False
|
| 79 |
+
|
| 80 |
+
|
| 81 |
+
async def _wait_for_qdrant(host: str, port: int, timeout: int = 30) -> bool:
|
| 82 |
+
import httpx
|
| 83 |
+
deadline = time.time() + timeout
|
| 84 |
+
while time.time() < deadline:
|
| 85 |
+
try:
|
| 86 |
+
async with httpx.AsyncClient(timeout=2.0) as client:
|
| 87 |
+
resp = await client.get(f"http://{host}:{port}/readyz")
|
| 88 |
+
if resp.status_code == 200:
|
| 89 |
+
return True
|
| 90 |
+
except Exception:
|
| 91 |
+
await asyncio.sleep(1)
|
| 92 |
+
return False
|
| 93 |
+
|
| 94 |
+
|
| 95 |
+
async def _wait_for_memgraph(host: str, port: int, timeout: int = 30) -> bool:
|
| 96 |
+
from neo4j import AsyncGraphDatabase
|
| 97 |
+
deadline = time.time() + timeout
|
| 98 |
+
while time.time() < deadline:
|
| 99 |
+
try:
|
| 100 |
+
driver = AsyncGraphDatabase.driver(
|
| 101 |
+
f"bolt://{host}:{port}",
|
| 102 |
+
auth=("", settings.memgraph_password),
|
| 103 |
+
encrypted=False,
|
| 104 |
+
)
|
| 105 |
+
async with driver.session() as session:
|
| 106 |
+
await session.run("RETURN 1;")
|
| 107 |
+
await driver.close()
|
| 108 |
+
return True
|
| 109 |
+
except Exception:
|
| 110 |
+
await asyncio.sleep(2)
|
| 111 |
+
return False
|
| 112 |
+
|
| 113 |
+
|
| 114 |
+
# ============================================================================
|
| 115 |
+
# SECTION 2 β Demo data seeding (populates Qdrant for the HF Spaces demo UI)
|
| 116 |
+
# ============================================================================
|
| 117 |
+
|
| 118 |
+
_DEMO_EVIDENCE = [
|
| 119 |
+
{
|
| 120 |
+
"text": "mRNA vaccines demonstrated sustained immune responses lasting 18-24 months across multiple peer-reviewed studies.",
|
| 121 |
+
"url": "https://www.nejm.org/doi/10.1056/NEJMoa2034577",
|
| 122 |
+
"domain": "nejm.org",
|
| 123 |
+
},
|
| 124 |
+
{
|
| 125 |
+
"text": "The Federal Reserve raised interest rates by 75 basis points in June 2022, the largest single hike since 1994.",
|
| 126 |
+
"url": "https://reuters.com/markets/us/fed-hikes-rates-2022-06-15",
|
| 127 |
+
"domain": "reuters.com",
|
| 128 |
+
},
|
| 129 |
+
{
|
| 130 |
+
"text": "Amazon deforestation data showed over 11,000 sq km lost in a single year at record levels.",
|
| 131 |
+
"url": "https://apnews.com/article/amazon-deforestation-record",
|
| 132 |
+
"domain": "apnews.com",
|
| 133 |
+
},
|
| 134 |
+
{
|
| 135 |
+
"text": "The United Nations projects global population will peak around 10.4 billion in the 2080s based on current demographic trends.",
|
| 136 |
+
"url": "https://www.un.org/development/desa/pd/",
|
| 137 |
+
"domain": "un.org",
|
| 138 |
+
},
|
| 139 |
+
{
|
| 140 |
+
"text": "Renewable energy accounted for 30% of global electricity generation in 2023 according to the International Energy Agency.",
|
| 141 |
+
"url": "https://www.iea.org/reports/renewables-2023",
|
| 142 |
+
"domain": "iea.org",
|
| 143 |
+
},
|
| 144 |
+
{
|
| 145 |
+
"text": "Social media use exceeding 3 hours daily correlates with higher anxiety rates in adolescents per multiple longitudinal studies.",
|
| 146 |
+
"url": "https://jamanetwork.com/journals/jamapediatrics/fullarticle/2767581",
|
| 147 |
+
"domain": "jamanetwork.com",
|
| 148 |
+
},
|
| 149 |
+
]
|
| 150 |
+
|
| 151 |
+
|
| 152 |
+
async def _seed_demo_data() -> None:
|
| 153 |
+
"""Upsert demo evidence chunks into Qdrant so the demo UI returns real RAG results."""
|
| 154 |
+
import uuid
|
| 155 |
+
from qdrant_client.models import PointStruct
|
| 156 |
+
from rag_pipeline import embed_texts, get_qdrant
|
| 157 |
+
|
| 158 |
+
log.info("demo.seed.start", count=len(_DEMO_EVIDENCE))
|
| 159 |
+
client = await get_qdrant(settings)
|
| 160 |
+
texts = [e["text"] for e in _DEMO_EVIDENCE]
|
| 161 |
+
vectors = await embed_texts(texts)
|
| 162 |
+
|
| 163 |
+
points = [
|
| 164 |
+
PointStruct(
|
| 165 |
+
id=str(uuid.uuid4()),
|
| 166 |
+
vector=vec,
|
| 167 |
+
payload={
|
| 168 |
+
"text": ev["text"],
|
| 169 |
+
"source_url": ev["url"],
|
| 170 |
+
"domain": ev["domain"],
|
| 171 |
+
"platform": "news",
|
| 172 |
+
"content_hash": f"demo_{i:04d}",
|
| 173 |
+
"ingested_at_ts": time.time(),
|
| 174 |
+
"author_handle": "demo_seed",
|
| 175 |
+
"bias_rating": "center",
|
| 176 |
+
},
|
| 177 |
+
)
|
| 178 |
+
for i, (ev, vec) in enumerate(zip(_DEMO_EVIDENCE, vectors))
|
| 179 |
+
]
|
| 180 |
+
await client.upsert(collection_name=settings.qdrant_collection, points=points)
|
| 181 |
+
log.info("demo.seed.complete", count=len(points))
|
| 182 |
+
|
| 183 |
+
|
| 184 |
+
# ============================================================================
|
| 185 |
+
# SECTION 3 β Redis singleton
|
| 186 |
+
# ============================================================================
|
| 187 |
+
|
| 188 |
+
_redis: aioredis.Redis | None = None
|
| 189 |
+
|
| 190 |
+
|
| 191 |
+
async def get_redis() -> aioredis.Redis:
|
| 192 |
+
global _redis
|
| 193 |
+
if _redis is None:
|
| 194 |
+
_redis = await aioredis.from_url(settings.redis_url, decode_responses=True)
|
| 195 |
+
return _redis
|
| 196 |
+
|
| 197 |
+
|
| 198 |
+
# ============================================================================
|
| 199 |
+
# SECTION 4 β WebSocket connection manager
|
| 200 |
+
# ============================================================================
|
| 201 |
+
|
| 202 |
+
class ConnectionManager:
|
| 203 |
+
def __init__(self) -> None:
|
| 204 |
+
self.active: dict[str, WebSocket] = {}
|
| 205 |
+
|
| 206 |
+
async def connect(self, session_id: str, ws: WebSocket) -> None:
|
| 207 |
+
await ws.accept()
|
| 208 |
+
self.active[session_id] = ws
|
| 209 |
+
log.info("ws.connected", session_id=session_id, total=len(self.active))
|
| 210 |
+
|
| 211 |
+
def disconnect(self, session_id: str) -> None:
|
| 212 |
+
self.active.pop(session_id, None)
|
| 213 |
+
log.info("ws.disconnected", session_id=session_id, total=len(self.active))
|
| 214 |
+
|
| 215 |
+
async def send(self, session_id: str, payload: Any) -> None:
|
| 216 |
+
ws = self.active.get(session_id)
|
| 217 |
+
if ws:
|
| 218 |
+
msg = WSOutbound(type="result", payload=payload)
|
| 219 |
+
await ws.send_bytes(orjson.dumps(msg.model_dump(mode="json")))
|
| 220 |
+
|
| 221 |
+
|
| 222 |
+
manager = ConnectionManager()
|
| 223 |
+
|
| 224 |
+
|
| 225 |
+
# ============================================================================
|
| 226 |
+
# SECTION 5 β FastAPI lifespan (startup + shutdown)
|
| 227 |
+
# ============================================================================
|
| 228 |
+
|
| 229 |
+
@asynccontextmanager
|
| 230 |
+
async def lifespan(app: FastAPI):
|
| 231 |
+
log.info("startup.begin", demo_mode=settings.demo_mode, port=settings.port)
|
| 232 |
+
|
| 233 |
+
if not settings.demo_mode:
|
| 234 |
+
# Wait for all infrastructure services
|
| 235 |
+
log.info("startup.waiting_for_services")
|
| 236 |
+
|
| 237 |
+
if not await _wait_for_redis(settings.redis_url):
|
| 238 |
+
log.error("startup.redis.timeout"); sys.exit(1)
|
| 239 |
+
log.info("startup.redis.ok")
|
| 240 |
+
|
| 241 |
+
if not await _wait_for_qdrant(settings.qdrant_host, settings.qdrant_port):
|
| 242 |
+
log.error("startup.qdrant.timeout"); sys.exit(1)
|
| 243 |
+
log.info("startup.qdrant.ok")
|
| 244 |
+
|
| 245 |
+
if not await _wait_for_memgraph(settings.memgraph_host, settings.memgraph_port):
|
| 246 |
+
log.warning("startup.memgraph.timeout β trust scores will use neutral 0.5 fallback")
|
| 247 |
+
else:
|
| 248 |
+
log.info("startup.memgraph.ok")
|
| 249 |
+
|
| 250 |
+
# Initialise DB schemas (idempotent)
|
| 251 |
+
from core.db_init import init_all
|
| 252 |
+
await init_all(settings)
|
| 253 |
+
|
| 254 |
+
# Seed demo evidence into Qdrant
|
| 255 |
+
try:
|
| 256 |
+
await _seed_demo_data()
|
| 257 |
+
except Exception as exc:
|
| 258 |
+
log.warning("startup.seed.failed", error=str(exc))
|
| 259 |
+
else:
|
| 260 |
+
# Demo mode: just make sure Redis is reachable (may be local or absent)
|
| 261 |
+
try:
|
| 262 |
+
r = await get_redis()
|
| 263 |
+
await r.ping()
|
| 264 |
+
log.info("startup.redis.ok")
|
| 265 |
+
except Exception:
|
| 266 |
+
log.warning("startup.redis.unavailable β cache disabled in demo mode")
|
| 267 |
+
|
| 268 |
+
# Pre-warm BGE-M3 embedder in the background (avoids cold-start spike on first request)
|
| 269 |
+
async def _warm():
|
| 270 |
+
try:
|
| 271 |
+
from rag_pipeline import embed_texts
|
| 272 |
+
await embed_texts(["warm up"])
|
| 273 |
+
log.info("startup.embedder.warm")
|
| 274 |
+
except Exception as exc:
|
| 275 |
+
log.warning("startup.embedder.warn", error=str(exc))
|
| 276 |
+
|
| 277 |
+
asyncio.create_task(_warm())
|
| 278 |
+
log.info("startup.complete")
|
| 279 |
+
|
| 280 |
+
yield # β app is live and serving
|
| 281 |
+
|
| 282 |
+
# Graceful shutdown
|
| 283 |
+
if _redis:
|
| 284 |
+
await _redis.aclose()
|
| 285 |
+
log.info("shutdown.complete")
|
| 286 |
+
|
| 287 |
+
|
| 288 |
+
# ============================================================================
|
| 289 |
+
# SECTION 6 β FastAPI application
|
| 290 |
+
# ============================================================================
|
| 291 |
+
|
| 292 |
+
app = FastAPI(
|
| 293 |
+
title="Omnichannel Fact & Hallucination Intelligence API",
|
| 294 |
+
version="1.0.0",
|
| 295 |
+
description="Near-zero-latency fact-checking and hallucination detection via WebSocket",
|
| 296 |
+
lifespan=lifespan,
|
| 297 |
+
)
|
| 298 |
+
|
| 299 |
+
app.add_middleware(
|
| 300 |
+
CORSMiddleware,
|
| 301 |
+
allow_origins=["*"],
|
| 302 |
+
allow_methods=["*"],
|
| 303 |
+
allow_headers=["*"],
|
| 304 |
+
)
|
| 305 |
+
|
| 306 |
+
|
| 307 |
+
# ============================================================================
|
| 308 |
+
# SECTION 7 β Core analysis pipeline
|
| 309 |
+
# ============================================================================
|
| 310 |
+
|
| 311 |
+
async def process_segment(
|
| 312 |
+
text: str,
|
| 313 |
+
content_hash: str,
|
| 314 |
+
element_id: str,
|
| 315 |
+
platform: Platform,
|
| 316 |
+
) -> AnalysisResult | None:
|
| 317 |
+
"""
|
| 318 |
+
Full pipeline for a single text segment. Returns None if noise.
|
| 319 |
+
|
| 320 |
+
Cache key: verdict:{content_hash}
|
| 321 |
+
TTL: 6 h β green / red
|
| 322 |
+
15 m β yellow
|
| 323 |
+
none β purple (hallucination results are context-specific)
|
| 324 |
+
"""
|
| 325 |
+
# 1 β Redis cache check (sub-millisecond)
|
| 326 |
+
try:
|
| 327 |
+
r = await get_redis()
|
| 328 |
+
cached_json = await r.get(f"verdict:{content_hash}")
|
| 329 |
+
if cached_json:
|
| 330 |
+
result = AnalysisResult.model_validate_json(cached_json)
|
| 331 |
+
result.cached = True
|
| 332 |
+
result.element_id = element_id
|
| 333 |
+
log.debug("cache.hit", hash=content_hash[:8])
|
| 334 |
+
return result
|
| 335 |
+
except Exception:
|
| 336 |
+
pass # Redis unavailable in demo mode β continue without cache
|
| 337 |
+
|
| 338 |
+
# 2 β Gatekeeper: fact vs noise (<120 ms p95)
|
| 339 |
+
try:
|
| 340 |
+
gate: GatekeeperResult = await classify_claim(text, settings)
|
| 341 |
+
except Exception as exc:
|
| 342 |
+
log.error("gatekeeper.error", error=str(exc))
|
| 343 |
+
return None
|
| 344 |
+
|
| 345 |
+
if gate.label == "noise":
|
| 346 |
+
log.debug("gatekeeper.noise_dropped", hash=content_hash[:8])
|
| 347 |
+
return None
|
| 348 |
+
|
| 349 |
+
# 3 β Concurrent: RAG pipeline + Grok sensor
|
| 350 |
+
rag_result, grok_result = await asyncio.gather(
|
| 351 |
+
run_rag_pipeline(text, content_hash, settings),
|
| 352 |
+
query_grok_sensor(text, content_hash, settings),
|
| 353 |
+
)
|
| 354 |
+
|
| 355 |
+
# 4 β Multi-agent Prefect flow
|
| 356 |
+
result: AnalysisResult = await evaluate_claim(
|
| 357 |
+
claim=text,
|
| 358 |
+
claim_hash=content_hash,
|
| 359 |
+
element_id=element_id,
|
| 360 |
+
platform=platform,
|
| 361 |
+
rag_result=rag_result,
|
| 362 |
+
grok_result=grok_result,
|
| 363 |
+
settings=settings,
|
| 364 |
+
)
|
| 365 |
+
|
| 366 |
+
# 5 β Cache with color-appropriate TTL
|
| 367 |
+
try:
|
| 368 |
+
r = await get_redis()
|
| 369 |
+
if result.color != HighlightColor.PURPLE:
|
| 370 |
+
ttl = (
|
| 371 |
+
settings.cache_ttl_green_red
|
| 372 |
+
if result.color in (HighlightColor.GREEN, HighlightColor.RED)
|
| 373 |
+
else settings.cache_ttl_yellow
|
| 374 |
+
)
|
| 375 |
+
await r.setex(f"verdict:{content_hash}", ttl, result.model_dump_json())
|
| 376 |
+
except Exception:
|
| 377 |
+
pass
|
| 378 |
+
|
| 379 |
+
return result
|
| 380 |
+
|
| 381 |
+
|
| 382 |
+
# ============================================================================
|
| 383 |
+
# SECTION 8 β WebSocket endpoint
|
| 384 |
+
# ============================================================================
|
| 385 |
+
|
| 386 |
+
@app.websocket("/ws/{session_id}")
|
| 387 |
+
async def websocket_endpoint(ws: WebSocket, session_id: str):
|
| 388 |
+
"""
|
| 389 |
+
Persistent WebSocket connection from the browser extension.
|
| 390 |
+
|
| 391 |
+
Inbound: { type: "batch", payload: TextBatch }
|
| 392 |
+
| { type: "ping" }
|
| 393 |
+
Outbound: { type: "result", payload: AnalysisResult }
|
| 394 |
+
| { type: "pong" }
|
| 395 |
+
| { type: "error", payload: { message: str } }
|
| 396 |
+
| { type: "status", payload: { connected: bool, demo_mode: bool, β¦ } }
|
| 397 |
+
"""
|
| 398 |
+
await manager.connect(session_id, ws)
|
| 399 |
+
|
| 400 |
+
# Initial handshake
|
| 401 |
+
await ws.send_bytes(orjson.dumps(
|
| 402 |
+
WSOutbound(type="status", payload={
|
| 403 |
+
"connected": True,
|
| 404 |
+
"demo_mode": settings.demo_mode,
|
| 405 |
+
"has_groq": settings.has_groq,
|
| 406 |
+
"has_x_api": settings.has_x_api,
|
| 407 |
+
}).model_dump(mode="json")
|
| 408 |
+
))
|
| 409 |
+
|
| 410 |
+
try:
|
| 411 |
+
while True:
|
| 412 |
+
raw = await ws.receive_bytes()
|
| 413 |
+
envelope = WSInbound.model_validate_json(raw)
|
| 414 |
+
|
| 415 |
+
if envelope.type == "ping":
|
| 416 |
+
await ws.send_bytes(orjson.dumps(
|
| 417 |
+
WSOutbound(type="pong", payload=None).model_dump(mode="json")
|
| 418 |
+
))
|
| 419 |
+
continue
|
| 420 |
+
|
| 421 |
+
if envelope.type != "batch" or not envelope.payload:
|
| 422 |
+
continue
|
| 423 |
+
|
| 424 |
+
try:
|
| 425 |
+
batch = TextBatch.model_validate(envelope.payload)
|
| 426 |
+
except ValidationError as exc:
|
| 427 |
+
await ws.send_bytes(orjson.dumps(
|
| 428 |
+
WSOutbound(type="error", payload={"message": str(exc)}).model_dump(mode="json")
|
| 429 |
+
))
|
| 430 |
+
continue
|
| 431 |
+
|
| 432 |
+
# Process all segments in the batch concurrently
|
| 433 |
+
async def _process_and_send(segment):
|
| 434 |
+
t0 = time.perf_counter()
|
| 435 |
+
result = await process_segment(
|
| 436 |
+
text=segment.text,
|
| 437 |
+
content_hash=segment.content_hash,
|
| 438 |
+
element_id=segment.element_id,
|
| 439 |
+
platform=batch.platform,
|
| 440 |
+
)
|
| 441 |
+
if result:
|
| 442 |
+
result.latency_ms = round((time.perf_counter() - t0) * 1000, 2)
|
| 443 |
+
await manager.send(session_id, result.model_dump(mode="json"))
|
| 444 |
+
|
| 445 |
+
await asyncio.gather(*[_process_and_send(seg) for seg in batch.segments])
|
| 446 |
+
|
| 447 |
+
except WebSocketDisconnect:
|
| 448 |
+
manager.disconnect(session_id)
|
| 449 |
+
except Exception as exc:
|
| 450 |
+
log.error("ws.unexpected_error", session_id=session_id, error=str(exc))
|
| 451 |
+
manager.disconnect(session_id)
|
| 452 |
+
|
| 453 |
+
|
| 454 |
+
# ============================================================================
|
| 455 |
+
# SECTION 9 β REST endpoints
|
| 456 |
+
# ============================================================================
|
| 457 |
+
|
| 458 |
+
@app.get("/health")
|
| 459 |
+
async def health():
|
| 460 |
+
try:
|
| 461 |
+
r = await get_redis()
|
| 462 |
+
redis_ok = await r.ping()
|
| 463 |
+
except Exception:
|
| 464 |
+
redis_ok = False
|
| 465 |
+
return {
|
| 466 |
+
"status": "ok",
|
| 467 |
+
"redis": redis_ok,
|
| 468 |
+
"demo_mode": settings.demo_mode,
|
| 469 |
+
"version": "1.0.0",
|
| 470 |
+
}
|
| 471 |
+
|
| 472 |
+
|
| 473 |
+
@app.get("/metrics")
|
| 474 |
+
async def metrics():
|
| 475 |
+
try:
|
| 476 |
+
r = await get_redis()
|
| 477 |
+
cached_verdicts = await r.dbsize()
|
| 478 |
+
except Exception:
|
| 479 |
+
cached_verdicts = 0
|
| 480 |
+
return {
|
| 481 |
+
"active_connections": len(manager.active),
|
| 482 |
+
"cached_verdicts": cached_verdicts,
|
| 483 |
+
}
|
| 484 |
+
|
| 485 |
+
|
| 486 |
+
@app.get("/", response_class=HTMLResponse)
|
| 487 |
+
async def demo_ui():
|
| 488 |
+
"""Serves the interactive demo UI at the root path (HuggingFace Spaces landing page)."""
|
| 489 |
+
ui_path = os.path.join(os.path.dirname(__file__), "static", "index.html")
|
| 490 |
+
if os.path.exists(ui_path):
|
| 491 |
+
with open(ui_path) as f:
|
| 492 |
+
return HTMLResponse(f.read())
|
| 493 |
+
return HTMLResponse(
|
| 494 |
+
"<h1>Fact Intelligence API</h1>"
|
| 495 |
+
"<p>Connect via WebSocket at <code>/ws/{session_id}</code></p>"
|
| 496 |
+
)
|
| 497 |
+
|
| 498 |
+
|
| 499 |
+
# ============================================================================
|
| 500 |
+
# SECTION 10 β __main__ block (python app.py)
|
| 501 |
+
# ============================================================================
|
| 502 |
+
|
| 503 |
+
if __name__ == "__main__":
|
| 504 |
+
import uvicorn
|
| 505 |
+
uvicorn.run(
|
| 506 |
+
"app:app",
|
| 507 |
+
host="0.0.0.0",
|
| 508 |
+
port=settings.port,
|
| 509 |
+
log_level=settings.log_level.lower(),
|
| 510 |
+
access_log=False,
|
| 511 |
+
ws_ping_interval=20,
|
| 512 |
+
ws_ping_timeout=60,
|
| 513 |
+
)
|
backend/core/config.py
ADDED
|
@@ -0,0 +1,98 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""
|
| 2 |
+
core/config.py β Centralized settings via pydantic-settings.
|
| 3 |
+
All values read from environment variables (set in HF Spaces secrets).
|
| 4 |
+
"""
|
| 5 |
+
|
| 6 |
+
from enum import Enum
|
| 7 |
+
from functools import lru_cache
|
| 8 |
+
|
| 9 |
+
from pydantic import Field, computed_field
|
| 10 |
+
from pydantic_settings import BaseSettings, SettingsConfigDict
|
| 11 |
+
|
| 12 |
+
|
| 13 |
+
class HighlightColor(str, Enum):
|
| 14 |
+
GREEN = "green" # Fact-checked, widely corroborated
|
| 15 |
+
YELLOW = "yellow" # Breaking / unverified / weak signal
|
| 16 |
+
RED = "red" # Debunked, active community note
|
| 17 |
+
PURPLE = "purple" # LLM hallucination detected
|
| 18 |
+
|
| 19 |
+
|
| 20 |
+
class Platform(str, Enum):
|
| 21 |
+
TWITTER = "twitter"
|
| 22 |
+
INSTAGRAM = "instagram"
|
| 23 |
+
YOUTUBE = "youtube"
|
| 24 |
+
CHATGPT = "chatgpt"
|
| 25 |
+
CLAUDE = "claude"
|
| 26 |
+
GEMINI = "gemini"
|
| 27 |
+
NEWS = "news"
|
| 28 |
+
UNKNOWN = "unknown"
|
| 29 |
+
|
| 30 |
+
|
| 31 |
+
class Settings(BaseSettings):
|
| 32 |
+
model_config = SettingsConfigDict(env_file=".env", extra="ignore")
|
| 33 |
+
|
| 34 |
+
# LLM API keys
|
| 35 |
+
groq_api_key: str = Field(default="", alias="GROQ_API_KEY")
|
| 36 |
+
x_bearer_token: str = Field(default="", alias="X_BEARER_TOKEN")
|
| 37 |
+
|
| 38 |
+
# Infrastructure
|
| 39 |
+
qdrant_host: str = Field(default="localhost", alias="QDRANT_HOST")
|
| 40 |
+
qdrant_port: int = Field(default=6333, alias="QDRANT_PORT")
|
| 41 |
+
memgraph_host: str = Field(default="localhost", alias="MEMGRAPH_HOST")
|
| 42 |
+
memgraph_port: int = Field(default=7687, alias="MEMGRAPH_PORT")
|
| 43 |
+
memgraph_password: str = Field(default="memgraph123", alias="MEMGRAPH_PASSWORD")
|
| 44 |
+
redpanda_brokers: str = Field(default="localhost:9092", alias="REDPANDA_BROKERS")
|
| 45 |
+
redis_url: str = Field(default="redis://localhost:6379", alias="REDIS_URL")
|
| 46 |
+
|
| 47 |
+
# App
|
| 48 |
+
port: int = Field(default=7860, alias="PORT")
|
| 49 |
+
log_level: str = Field(default="INFO", alias="LOG_LEVEL")
|
| 50 |
+
demo_mode: bool = Field(default=False, alias="DEMO_MODE")
|
| 51 |
+
|
| 52 |
+
# Model identifiers for LiteLLM routing
|
| 53 |
+
gatekeeper_model: str = "groq/llama3-8b-8192"
|
| 54 |
+
misinformation_model: str = "groq/mixtral-8x7b-32768"
|
| 55 |
+
hallucination_model: str = "groq/llama3-70b-8192" # Free via Groq β replaces Claude Haiku
|
| 56 |
+
|
| 57 |
+
# Gatekeeper latency SLO: p95 < 120ms
|
| 58 |
+
gatekeeper_timeout_ms: int = 120
|
| 59 |
+
|
| 60 |
+
# Cache TTLs (seconds)
|
| 61 |
+
cache_ttl_green_red: int = 21_600 # 6 hours
|
| 62 |
+
cache_ttl_yellow: int = 900 # 15 minutes
|
| 63 |
+
# Purple: no cache β hallucination checks are context-specific
|
| 64 |
+
|
| 65 |
+
# RAG retrieval
|
| 66 |
+
qdrant_collection: str = "claims"
|
| 67 |
+
qdrant_ef: int = 128 # HNSW ef parameter β higher = more accurate, slower
|
| 68 |
+
qdrant_top_k: int = 8 # nearest neighbors to retrieve
|
| 69 |
+
evidence_window_hours: int = 72 # only retrieve evidence newer than 72h
|
| 70 |
+
|
| 71 |
+
# Minimum text length for analysis (words)
|
| 72 |
+
min_word_count: int = 12
|
| 73 |
+
|
| 74 |
+
@computed_field
|
| 75 |
+
@property
|
| 76 |
+
def has_groq(self) -> bool:
|
| 77 |
+
return bool(self.groq_api_key)
|
| 78 |
+
|
| 79 |
+
@computed_field
|
| 80 |
+
@property
|
| 81 |
+
def has_hallucination_llm(self) -> bool:
|
| 82 |
+
# Hallucination agent uses Groq llama3-70b (free) β same key as gatekeeper
|
| 83 |
+
return bool(self.groq_api_key)
|
| 84 |
+
|
| 85 |
+
@computed_field
|
| 86 |
+
@property
|
| 87 |
+
def has_x_api(self) -> bool:
|
| 88 |
+
return bool(self.x_bearer_token)
|
| 89 |
+
|
| 90 |
+
@computed_field
|
| 91 |
+
@property
|
| 92 |
+
def broker_list(self) -> list[str]:
|
| 93 |
+
return self.redpanda_brokers.split(",")
|
| 94 |
+
|
| 95 |
+
|
| 96 |
+
@lru_cache
|
| 97 |
+
def get_settings() -> Settings:
|
| 98 |
+
return Settings()
|
backend/core/db_init.py
ADDED
|
@@ -0,0 +1,143 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""
|
| 2 |
+
core/db_init.py β Initialize Qdrant collection and Memgraph graph schema.
|
| 3 |
+
|
| 4 |
+
Run once on startup (called from main.py lifespan) or manually:
|
| 5 |
+
uv run python -m core.db_init
|
| 6 |
+
|
| 7 |
+
Memgraph graph schema:
|
| 8 |
+
(Author {handle, verified, account_type})
|
| 9 |
+
-[:REPORTED {timestamp}]->
|
| 10 |
+
(Claim {text, embedding_id, hash})
|
| 11 |
+
<-[:CORROBORATED_BY {confidence}]-
|
| 12 |
+
(Source {url, domain, bias_rating})
|
| 13 |
+
-[:HAS_NOTE]->
|
| 14 |
+
(CommunityNote {text, active, created_at})
|
| 15 |
+
|
| 16 |
+
This schema supports:
|
| 17 |
+
- Trust score computation (Author.verified, Source count, CommunityNote presence)
|
| 18 |
+
- Claim deduplication by hash
|
| 19 |
+
- Source credibility tracking (bias_rating from Media Bias/Fact Check)
|
| 20 |
+
"""
|
| 21 |
+
|
| 22 |
+
import asyncio
|
| 23 |
+
|
| 24 |
+
import structlog
|
| 25 |
+
from neo4j import AsyncGraphDatabase
|
| 26 |
+
from qdrant_client import AsyncQdrantClient
|
| 27 |
+
from qdrant_client.models import Distance, PayloadSchemaType, VectorParams
|
| 28 |
+
|
| 29 |
+
from core.config import get_settings
|
| 30 |
+
|
| 31 |
+
log = structlog.get_logger(__name__)
|
| 32 |
+
|
| 33 |
+
|
| 34 |
+
async def init_qdrant(settings=None) -> None:
|
| 35 |
+
"""
|
| 36 |
+
Create the Qdrant 'claims' collection if it doesn't exist.
|
| 37 |
+
BGE-M3 outputs 1024-dimensional dense vectors.
|
| 38 |
+
HNSW index created automatically by Qdrant on collection creation.
|
| 39 |
+
"""
|
| 40 |
+
cfg = settings or get_settings()
|
| 41 |
+
client = AsyncQdrantClient(host=cfg.qdrant_host, port=cfg.qdrant_port)
|
| 42 |
+
|
| 43 |
+
try:
|
| 44 |
+
collections = await client.get_collections()
|
| 45 |
+
existing = {c.name for c in collections.collections}
|
| 46 |
+
|
| 47 |
+
if cfg.qdrant_collection not in existing:
|
| 48 |
+
await client.create_collection(
|
| 49 |
+
collection_name=cfg.qdrant_collection,
|
| 50 |
+
vectors_config=VectorParams(
|
| 51 |
+
size=1024, # BGE-M3 output dimension
|
| 52 |
+
distance=Distance.COSINE,
|
| 53 |
+
),
|
| 54 |
+
)
|
| 55 |
+
log.info("qdrant.collection.created", name=cfg.qdrant_collection)
|
| 56 |
+
|
| 57 |
+
# Payload indexes for fast filtering
|
| 58 |
+
for field, schema in [
|
| 59 |
+
("ingested_at_ts", PayloadSchemaType.FLOAT),
|
| 60 |
+
("platform", PayloadSchemaType.KEYWORD),
|
| 61 |
+
("content_hash", PayloadSchemaType.KEYWORD),
|
| 62 |
+
("author_handle", PayloadSchemaType.KEYWORD),
|
| 63 |
+
]:
|
| 64 |
+
await client.create_payload_index(
|
| 65 |
+
collection_name=cfg.qdrant_collection,
|
| 66 |
+
field_name=field,
|
| 67 |
+
field_schema=schema,
|
| 68 |
+
)
|
| 69 |
+
log.debug("qdrant.index.created", field=field)
|
| 70 |
+
else:
|
| 71 |
+
log.info("qdrant.collection.exists", name=cfg.qdrant_collection)
|
| 72 |
+
|
| 73 |
+
finally:
|
| 74 |
+
await client.close()
|
| 75 |
+
|
| 76 |
+
|
| 77 |
+
async def init_memgraph(settings=None) -> None:
|
| 78 |
+
"""
|
| 79 |
+
Create Memgraph constraints and indexes for the trust graph schema.
|
| 80 |
+
Memgraph is in-memory β indexes are re-created on restart (data too, unless persistence enabled).
|
| 81 |
+
"""
|
| 82 |
+
cfg = settings or get_settings()
|
| 83 |
+
driver = AsyncGraphDatabase.driver(
|
| 84 |
+
f"bolt://{cfg.memgraph_host}:{cfg.memgraph_port}",
|
| 85 |
+
auth=("", cfg.memgraph_password),
|
| 86 |
+
encrypted=False,
|
| 87 |
+
)
|
| 88 |
+
|
| 89 |
+
schema_queries = [
|
| 90 |
+
# Uniqueness constraints (also create indexes automatically)
|
| 91 |
+
"CREATE CONSTRAINT ON (a:Author) ASSERT a.handle IS UNIQUE;",
|
| 92 |
+
"CREATE CONSTRAINT ON (c:Claim) ASSERT c.hash IS UNIQUE;",
|
| 93 |
+
"CREATE CONSTRAINT ON (s:Source) ASSERT s.url IS UNIQUE;",
|
| 94 |
+
|
| 95 |
+
# Additional indexes for traversal performance
|
| 96 |
+
"CREATE INDEX ON :Author(verified);",
|
| 97 |
+
"CREATE INDEX ON :Author(account_type);",
|
| 98 |
+
"CREATE INDEX ON :CommunityNote(active);",
|
| 99 |
+
|
| 100 |
+
# Seed a few known authoritative sources with high trust
|
| 101 |
+
"""
|
| 102 |
+
MERGE (s:Source {url: 'https://reuters.com', domain: 'reuters.com'})
|
| 103 |
+
SET s.bias_rating = 'center', s.trust_tier = 'tier1';
|
| 104 |
+
""",
|
| 105 |
+
"""
|
| 106 |
+
MERGE (s:Source {url: 'https://apnews.com', domain: 'apnews.com'})
|
| 107 |
+
SET s.bias_rating = 'center', s.trust_tier = 'tier1';
|
| 108 |
+
""",
|
| 109 |
+
"""
|
| 110 |
+
MERGE (s:Source {url: 'https://who.int', domain: 'who.int'})
|
| 111 |
+
SET s.bias_rating = 'center', s.trust_tier = 'government';
|
| 112 |
+
""",
|
| 113 |
+
"""
|
| 114 |
+
MERGE (s:Source {url: 'https://cdc.gov', domain: 'cdc.gov'})
|
| 115 |
+
SET s.bias_rating = 'center', s.trust_tier = 'government';
|
| 116 |
+
""",
|
| 117 |
+
]
|
| 118 |
+
|
| 119 |
+
async with driver.session() as session:
|
| 120 |
+
for query in schema_queries:
|
| 121 |
+
try:
|
| 122 |
+
await session.run(query)
|
| 123 |
+
except Exception as exc:
|
| 124 |
+
# Constraints/indexes may already exist β not an error
|
| 125 |
+
if "already exists" not in str(exc).lower():
|
| 126 |
+
log.warning("memgraph.schema.warn", query=query[:60], error=str(exc))
|
| 127 |
+
|
| 128 |
+
await driver.close()
|
| 129 |
+
log.info("memgraph.schema.initialized")
|
| 130 |
+
|
| 131 |
+
|
| 132 |
+
async def init_all(settings=None) -> None:
|
| 133 |
+
"""Initialize both Qdrant and Memgraph. Called from FastAPI lifespan."""
|
| 134 |
+
cfg = settings or get_settings()
|
| 135 |
+
await asyncio.gather(
|
| 136 |
+
init_qdrant(cfg),
|
| 137 |
+
init_memgraph(cfg),
|
| 138 |
+
)
|
| 139 |
+
log.info("db.init.complete")
|
| 140 |
+
|
| 141 |
+
|
| 142 |
+
if __name__ == "__main__":
|
| 143 |
+
asyncio.run(init_all())
|
backend/core/logging.py
ADDED
|
@@ -0,0 +1,69 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""
|
| 2 |
+
core/logging.py β Structured logging setup using structlog + rich.
|
| 3 |
+
|
| 4 |
+
structlog provides machine-readable JSON in production and
|
| 5 |
+
colorized human-readable output in development, with zero config change.
|
| 6 |
+
|
| 7 |
+
Usage:
|
| 8 |
+
import structlog
|
| 9 |
+
log = structlog.get_logger(__name__)
|
| 10 |
+
log.info("event.name", key="value", latency_ms=42.1)
|
| 11 |
+
"""
|
| 12 |
+
|
| 13 |
+
import logging
|
| 14 |
+
import sys
|
| 15 |
+
|
| 16 |
+
import structlog
|
| 17 |
+
|
| 18 |
+
|
| 19 |
+
def configure_logging(log_level: str = "INFO", json_output: bool = False) -> None:
|
| 20 |
+
"""
|
| 21 |
+
Configure structlog for the application.
|
| 22 |
+
|
| 23 |
+
In production (json_output=True): Outputs newline-delimited JSON β
|
| 24 |
+
compatible with Datadog, Grafana Loki, AWS CloudWatch, etc.
|
| 25 |
+
|
| 26 |
+
In development (json_output=False): Outputs colorized, human-readable
|
| 27 |
+
logs using rich ConsoleRenderer.
|
| 28 |
+
"""
|
| 29 |
+
shared_processors = [
|
| 30 |
+
# Add log level as a field
|
| 31 |
+
structlog.stdlib.add_log_level,
|
| 32 |
+
# Add logger name
|
| 33 |
+
structlog.stdlib.add_logger_name,
|
| 34 |
+
# Add timestamp in ISO 8601
|
| 35 |
+
structlog.processors.TimeStamper(fmt="iso"),
|
| 36 |
+
# Render exceptions as structured dicts
|
| 37 |
+
structlog.processors.format_exc_info,
|
| 38 |
+
# Render stack info
|
| 39 |
+
structlog.processors.StackInfoRenderer(),
|
| 40 |
+
]
|
| 41 |
+
|
| 42 |
+
if json_output:
|
| 43 |
+
renderer = structlog.processors.JSONRenderer()
|
| 44 |
+
else:
|
| 45 |
+
renderer = structlog.dev.ConsoleRenderer(colors=True, exception_formatter=structlog.dev.plain_traceback)
|
| 46 |
+
|
| 47 |
+
structlog.configure(
|
| 48 |
+
processors=[
|
| 49 |
+
*shared_processors,
|
| 50 |
+
# Final renderer must be last
|
| 51 |
+
renderer,
|
| 52 |
+
],
|
| 53 |
+
wrapper_class=structlog.make_filtering_bound_logger(
|
| 54 |
+
logging.getLevelName(log_level.upper())
|
| 55 |
+
),
|
| 56 |
+
context_class=dict,
|
| 57 |
+
logger_factory=structlog.PrintLoggerFactory(file=sys.stdout),
|
| 58 |
+
cache_logger_on_first_use=True,
|
| 59 |
+
)
|
| 60 |
+
|
| 61 |
+
# Also configure stdlib logging to route through structlog
|
| 62 |
+
logging.basicConfig(
|
| 63 |
+
format="%(message)s",
|
| 64 |
+
stream=sys.stdout,
|
| 65 |
+
level=logging.getLevelName(log_level.upper()),
|
| 66 |
+
)
|
| 67 |
+
# Silence noisy libraries
|
| 68 |
+
for lib in ["httpx", "httpcore", "aiokafka", "neo4j", "qdrant_client", "uvicorn.access"]:
|
| 69 |
+
logging.getLogger(lib).setLevel(logging.WARNING)
|
backend/core/models.py
ADDED
|
@@ -0,0 +1,197 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""
|
| 2 |
+
core/models.py β Pydantic v2 models for the entire pipeline.
|
| 3 |
+
|
| 4 |
+
All models use strict typing with no implicit coercion, leveraging
|
| 5 |
+
Pydantic v2's Rust-backed validation for maximum throughput.
|
| 6 |
+
"""
|
| 7 |
+
|
| 8 |
+
from datetime import datetime
|
| 9 |
+
from typing import Any
|
| 10 |
+
from uuid import UUID, uuid4
|
| 11 |
+
|
| 12 |
+
from pydantic import BaseModel, Field, field_validator, model_validator
|
| 13 |
+
|
| 14 |
+
from core.config import HighlightColor, Platform
|
| 15 |
+
|
| 16 |
+
|
| 17 |
+
# ---------------------------------------------------------------------------
|
| 18 |
+
# Inbound β what the browser extension sends us over WebSocket
|
| 19 |
+
# ---------------------------------------------------------------------------
|
| 20 |
+
|
| 21 |
+
class TextBatch(BaseModel):
|
| 22 |
+
"""
|
| 23 |
+
A deduplicated batch of text segments flushed from the extension's
|
| 24 |
+
ring buffer every 1200ms. Each segment carries its own xxhash for
|
| 25 |
+
upstream deduplication and cache lookup.
|
| 26 |
+
"""
|
| 27 |
+
session_id: str
|
| 28 |
+
platform: Platform
|
| 29 |
+
segments: list["TextSegment"]
|
| 30 |
+
sent_at: datetime = Field(default_factory=datetime.utcnow)
|
| 31 |
+
|
| 32 |
+
|
| 33 |
+
class TextSegment(BaseModel):
|
| 34 |
+
content_hash: str # xxhash64 hex β used as Redis cache key
|
| 35 |
+
text: str
|
| 36 |
+
element_id: str # DOM node ID from the extension for highlight targeting
|
| 37 |
+
word_count: int
|
| 38 |
+
|
| 39 |
+
@field_validator("word_count")
|
| 40 |
+
@classmethod
|
| 41 |
+
def must_meet_minimum(cls, v: int) -> int:
|
| 42 |
+
if v < 12:
|
| 43 |
+
raise ValueError("Segments shorter than 12 words must be filtered client-side")
|
| 44 |
+
return v
|
| 45 |
+
|
| 46 |
+
|
| 47 |
+
# ---------------------------------------------------------------------------
|
| 48 |
+
# Gatekeeper output
|
| 49 |
+
# ---------------------------------------------------------------------------
|
| 50 |
+
|
| 51 |
+
class GatekeeperResult(BaseModel):
|
| 52 |
+
"""
|
| 53 |
+
Groq llama3-8b-8192 classifies each claim as fact or noise.
|
| 54 |
+
Structured JSON output β parsed with model_validate_json(), no try-except.
|
| 55 |
+
"""
|
| 56 |
+
label: str # "fact" | "noise"
|
| 57 |
+
reason: str # one-sentence reasoning for the classification
|
| 58 |
+
confidence: float = Field(ge=0.0, le=1.0)
|
| 59 |
+
|
| 60 |
+
@field_validator("label")
|
| 61 |
+
@classmethod
|
| 62 |
+
def valid_label(cls, v: str) -> str:
|
| 63 |
+
if v not in {"fact", "noise"}:
|
| 64 |
+
raise ValueError(f"Label must be 'fact' or 'noise', got '{v}'")
|
| 65 |
+
return v
|
| 66 |
+
|
| 67 |
+
|
| 68 |
+
# ---------------------------------------------------------------------------
|
| 69 |
+
# RAG pipeline output
|
| 70 |
+
# ---------------------------------------------------------------------------
|
| 71 |
+
|
| 72 |
+
class EvidenceChunk(BaseModel):
|
| 73 |
+
"""A retrieved evidence chunk from Qdrant."""
|
| 74 |
+
chunk_id: str
|
| 75 |
+
text: str
|
| 76 |
+
source_url: str
|
| 77 |
+
domain: str
|
| 78 |
+
score: float = Field(ge=0.0, le=1.0) # cosine similarity
|
| 79 |
+
ingested_at: datetime
|
| 80 |
+
bias_rating: str | None = None
|
| 81 |
+
|
| 82 |
+
|
| 83 |
+
class TrustScore(BaseModel):
|
| 84 |
+
"""
|
| 85 |
+
Computed from the Memgraph trust graph traversal.
|
| 86 |
+
Algorithm: start 0.5, +0.3 verified official, +0.05/source (max 0.25),
|
| 87 |
+
-0.4 if Community Note active. Clamped to [0.0, 1.0].
|
| 88 |
+
"""
|
| 89 |
+
score: float = Field(ge=0.0, le=1.0)
|
| 90 |
+
author_verified: bool
|
| 91 |
+
corroborating_sources: int
|
| 92 |
+
has_community_note: bool
|
| 93 |
+
community_note_text: str | None = None
|
| 94 |
+
|
| 95 |
+
|
| 96 |
+
class RAGResult(BaseModel):
|
| 97 |
+
evidence: list[EvidenceChunk]
|
| 98 |
+
trust: TrustScore
|
| 99 |
+
|
| 100 |
+
|
| 101 |
+
# ---------------------------------------------------------------------------
|
| 102 |
+
# Grok/X sensor output
|
| 103 |
+
# ---------------------------------------------------------------------------
|
| 104 |
+
|
| 105 |
+
class GrokSensorResult(BaseModel):
|
| 106 |
+
velocity: int # 7-day tweet volume for core keywords
|
| 107 |
+
community_note: bool
|
| 108 |
+
note_text: str | None = None
|
| 109 |
+
is_mock: bool = False # True when X API key is absent
|
| 110 |
+
|
| 111 |
+
|
| 112 |
+
# ---------------------------------------------------------------------------
|
| 113 |
+
# Final analysis result β sent back to extension over WebSocket
|
| 114 |
+
# ---------------------------------------------------------------------------
|
| 115 |
+
|
| 116 |
+
class SourceRef(BaseModel):
|
| 117 |
+
url: str
|
| 118 |
+
domain: str
|
| 119 |
+
favicon_url: str
|
| 120 |
+
snippet: str
|
| 121 |
+
|
| 122 |
+
|
| 123 |
+
class AnalysisResult(BaseModel):
|
| 124 |
+
"""
|
| 125 |
+
The final enriched verdict returned to the browser extension.
|
| 126 |
+
The extension uses color + element_id to apply highlight + hover card.
|
| 127 |
+
"""
|
| 128 |
+
request_id: UUID = Field(default_factory=uuid4)
|
| 129 |
+
element_id: str # mirrors TextSegment.element_id for DOM targeting
|
| 130 |
+
content_hash: str
|
| 131 |
+
platform: Platform
|
| 132 |
+
|
| 133 |
+
# Verdict
|
| 134 |
+
color: HighlightColor
|
| 135 |
+
confidence: int = Field(ge=0, le=100)
|
| 136 |
+
verdict_label: str # human-readable summary e.g. "Debunked by Reuters"
|
| 137 |
+
explanation: str # full explanation string for hover card
|
| 138 |
+
|
| 139 |
+
# Top 3 sources shown in hover card
|
| 140 |
+
sources: list[SourceRef] = Field(max_length=3)
|
| 141 |
+
|
| 142 |
+
# Debug / provenance metadata
|
| 143 |
+
gatekeeper_label: str
|
| 144 |
+
trust_score: float
|
| 145 |
+
velocity: int
|
| 146 |
+
has_community_note: bool
|
| 147 |
+
latency_ms: float # total pipeline latency for observability
|
| 148 |
+
cached: bool = False
|
| 149 |
+
timestamp: datetime = Field(default_factory=datetime.utcnow)
|
| 150 |
+
|
| 151 |
+
|
| 152 |
+
# ---------------------------------------------------------------------------
|
| 153 |
+
# WebSocket protocol messages
|
| 154 |
+
# ---------------------------------------------------------------------------
|
| 155 |
+
|
| 156 |
+
class WSMessageType(str):
|
| 157 |
+
BATCH = "batch"
|
| 158 |
+
RESULT = "result"
|
| 159 |
+
ERROR = "error"
|
| 160 |
+
PING = "ping"
|
| 161 |
+
PONG = "pong"
|
| 162 |
+
STATUS = "status"
|
| 163 |
+
|
| 164 |
+
|
| 165 |
+
class WSInbound(BaseModel):
|
| 166 |
+
type: str
|
| 167 |
+
payload: dict[str, Any] | None = None
|
| 168 |
+
|
| 169 |
+
|
| 170 |
+
class WSOutbound(BaseModel):
|
| 171 |
+
type: str
|
| 172 |
+
payload: Any
|
| 173 |
+
timestamp: datetime = Field(default_factory=datetime.utcnow)
|
| 174 |
+
|
| 175 |
+
|
| 176 |
+
# ---------------------------------------------------------------------------
|
| 177 |
+
# Kafka/Redpanda event envelope
|
| 178 |
+
# ---------------------------------------------------------------------------
|
| 179 |
+
|
| 180 |
+
class IngestionEvent(BaseModel):
|
| 181 |
+
"""
|
| 182 |
+
Envelope for all three Redpanda topics (twitter, instagram, youtube).
|
| 183 |
+
Producers wrap their platform-specific data in this common schema.
|
| 184 |
+
"""
|
| 185 |
+
event_id: str = Field(default_factory=lambda: str(uuid4()))
|
| 186 |
+
platform: Platform
|
| 187 |
+
content_hash: str
|
| 188 |
+
text: str
|
| 189 |
+
author_handle: str | None = None
|
| 190 |
+
author_verified: bool = False
|
| 191 |
+
source_url: str | None = None
|
| 192 |
+
ingested_at: datetime = Field(default_factory=datetime.utcnow)
|
| 193 |
+
|
| 194 |
+
@model_validator(mode="after")
|
| 195 |
+
def strip_whitespace(self) -> "IngestionEvent":
|
| 196 |
+
self.text = self.text.strip()
|
| 197 |
+
return self
|
backend/gatekeeper.py
ADDED
|
@@ -0,0 +1,133 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""
|
| 2 |
+
gatekeeper.py β Groq-powered edge router.
|
| 3 |
+
|
| 4 |
+
Every incoming text batch hits this first. The Groq API with llama3-8b-8192
|
| 5 |
+
gives us 800+ tokens/second inference, sub-100ms p95 latency, no GPU needed.
|
| 6 |
+
|
| 7 |
+
If the classifier returns "noise" (opinion, meme, rhetoric, social noise),
|
| 8 |
+
the request is dropped immediately β no downstream pipeline costs incurred.
|
| 9 |
+
|
| 10 |
+
SLO: p95 < 120ms end-to-end, measured at the FastAPI WebSocket handler.
|
| 11 |
+
"""
|
| 12 |
+
|
| 13 |
+
import time
|
| 14 |
+
|
| 15 |
+
import structlog
|
| 16 |
+
from groq import AsyncGroq
|
| 17 |
+
from pydantic import ValidationError
|
| 18 |
+
|
| 19 |
+
from core.config import Settings, get_settings
|
| 20 |
+
from core.models import GatekeeperResult
|
| 21 |
+
|
| 22 |
+
log = structlog.get_logger(__name__)
|
| 23 |
+
|
| 24 |
+
# ---------------------------------------------------------------------------
|
| 25 |
+
# Strict JSON schema prompt β forces the model to output parseable JSON.
|
| 26 |
+
# Pydantic v2's model_validate_json() parses this without a try-except
|
| 27 |
+
# because if validation fails we WANT the exception to surface.
|
| 28 |
+
# ---------------------------------------------------------------------------
|
| 29 |
+
|
| 30 |
+
GATEKEEPER_SYSTEM = """You are a claim classifier. Analyze the given text and output ONLY valid JSON.
|
| 31 |
+
|
| 32 |
+
Output schema (strict β no extra keys, no markdown, no preamble):
|
| 33 |
+
{
|
| 34 |
+
"label": "fact" | "noise",
|
| 35 |
+
"reason": "<one concise sentence>",
|
| 36 |
+
"confidence": <float 0.0β1.0>
|
| 37 |
+
}
|
| 38 |
+
|
| 39 |
+
Classify as "fact" if the text contains a falsifiable factual claim β a statement
|
| 40 |
+
about the real world that could be verified or refuted with evidence.
|
| 41 |
+
|
| 42 |
+
Classify as "noise" if the text is:
|
| 43 |
+
- A personal opinion or sentiment ("I think...", "I believe...")
|
| 44 |
+
- Rhetorical question
|
| 45 |
+
- Meme, humor, sarcasm, or social commentary without factual claims
|
| 46 |
+
- Pure emotional reaction ("this is amazing!", "so sad")
|
| 47 |
+
- Call-to-action without factual content
|
| 48 |
+
- Filler text or social pleasantries
|
| 49 |
+
|
| 50 |
+
Be conservative: when in doubt, label "fact" to avoid false negatives."""
|
| 51 |
+
|
| 52 |
+
GATEKEEPER_USER_TMPL = 'Classify this text: "{text}"'
|
| 53 |
+
|
| 54 |
+
|
| 55 |
+
async def classify_claim(text: str, settings: Settings | None = None) -> GatekeeperResult:
|
| 56 |
+
"""
|
| 57 |
+
Classify whether `text` contains a falsifiable factual claim.
|
| 58 |
+
|
| 59 |
+
Returns GatekeeperResult with label="fact"|"noise".
|
| 60 |
+
Raises on timeout (>120ms) or model failure β caller handles fallback.
|
| 61 |
+
|
| 62 |
+
In DEMO_MODE (no GROQ_API_KEY), uses a simple heuristic classifier
|
| 63 |
+
so the system runs end-to-end without any API keys.
|
| 64 |
+
"""
|
| 65 |
+
cfg = settings or get_settings()
|
| 66 |
+
t0 = time.perf_counter()
|
| 67 |
+
|
| 68 |
+
if cfg.demo_mode or not cfg.has_groq:
|
| 69 |
+
result = _heuristic_classify(text)
|
| 70 |
+
log.debug("gatekeeper.heuristic", label=result.label, latency_ms=round((time.perf_counter() - t0) * 1000, 2))
|
| 71 |
+
return result
|
| 72 |
+
|
| 73 |
+
client = AsyncGroq(api_key=cfg.groq_api_key)
|
| 74 |
+
|
| 75 |
+
# Use json_object response format β Groq enforces valid JSON output
|
| 76 |
+
response = await client.chat.completions.create(
|
| 77 |
+
model="llama3-8b-8192",
|
| 78 |
+
messages=[
|
| 79 |
+
{"role": "system", "content": GATEKEEPER_SYSTEM},
|
| 80 |
+
{"role": "user", "content": GATEKEEPER_USER_TMPL.format(text=text[:800])},
|
| 81 |
+
],
|
| 82 |
+
response_format={"type": "json_object"},
|
| 83 |
+
temperature=0.0, # Deterministic classification
|
| 84 |
+
max_tokens=120, # JSON output is short β cap tokens to reduce latency
|
| 85 |
+
timeout=0.115, # 115ms hard timeout preserves the 120ms p95 SLO
|
| 86 |
+
)
|
| 87 |
+
|
| 88 |
+
latency_ms = round((time.perf_counter() - t0) * 1000, 2)
|
| 89 |
+
raw_json = response.choices[0].message.content or "{}"
|
| 90 |
+
|
| 91 |
+
# model_validate_json() uses Pydantic v2's Rust validator β no try-except
|
| 92 |
+
# needed for the happy path; ValidationError propagates to caller.
|
| 93 |
+
result = GatekeeperResult.model_validate_json(raw_json)
|
| 94 |
+
|
| 95 |
+
log.info(
|
| 96 |
+
"gatekeeper.groq",
|
| 97 |
+
label=result.label,
|
| 98 |
+
confidence=result.confidence,
|
| 99 |
+
latency_ms=latency_ms,
|
| 100 |
+
tokens=response.usage.total_tokens if response.usage else None,
|
| 101 |
+
)
|
| 102 |
+
return result
|
| 103 |
+
|
| 104 |
+
|
| 105 |
+
def _heuristic_classify(text: str) -> GatekeeperResult:
|
| 106 |
+
"""
|
| 107 |
+
Fallback classifier when GROQ_API_KEY is absent (DEMO_MODE=true).
|
| 108 |
+
Uses simple lexical heuristics β not production-grade, but sufficient
|
| 109 |
+
for demonstrating the full pipeline without API credentials.
|
| 110 |
+
"""
|
| 111 |
+
text_lower = text.lower()
|
| 112 |
+
|
| 113 |
+
noise_indicators = [
|
| 114 |
+
text_lower.startswith(("i think", "i believe", "i feel", "imo", "imho")),
|
| 115 |
+
text_lower.endswith("?") and len(text.split()) < 15,
|
| 116 |
+
any(w in text_lower for w in ["lol", "lmao", "omg", "wtf", "smh", "π€£", "π"]),
|
| 117 |
+
all(w in text_lower for w in ["love", "hate"]) and "because" not in text_lower,
|
| 118 |
+
]
|
| 119 |
+
|
| 120 |
+
if any(noise_indicators):
|
| 121 |
+
return GatekeeperResult(label="noise", reason="Heuristic: opinion/sentiment pattern detected", confidence=0.75)
|
| 122 |
+
|
| 123 |
+
fact_indicators = [
|
| 124 |
+
any(c.isdigit() for c in text), # Contains numbers β likely factual claim
|
| 125 |
+
any(w in text_lower for w in ["percent", "%", "million", "billion", "study", "report", "according"]),
|
| 126 |
+
len(text.split()) > 20, # Longer sentences tend to be claims
|
| 127 |
+
]
|
| 128 |
+
|
| 129 |
+
if any(fact_indicators):
|
| 130 |
+
return GatekeeperResult(label="fact", reason="Heuristic: numeric/evidential language detected", confidence=0.65)
|
| 131 |
+
|
| 132 |
+
# Default: treat as fact (conservative β avoid false negatives)
|
| 133 |
+
return GatekeeperResult(label="fact", reason="Heuristic: no clear noise pattern, defaulting to fact", confidence=0.5)
|
backend/grok_sensor.py
ADDED
|
@@ -0,0 +1,183 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""
|
| 2 |
+
grok_sensor.py β Async X API v2 + Community Notes integration.
|
| 3 |
+
|
| 4 |
+
Queries two signals for any claim:
|
| 5 |
+
1. 7-day tweet velocity: how fast is this claim spreading?
|
| 6 |
+
High velocity + no corroboration = yellow flag
|
| 7 |
+
2. Community Notes: has the crowd-sourced fact-check system flagged it?
|
| 8 |
+
Active note = strong red signal (-0.4 in trust scoring)
|
| 9 |
+
|
| 10 |
+
Full mock fallback when X_BEARER_TOKEN is absent β the system runs
|
| 11 |
+
end-to-end in demo mode without any external API credentials.
|
| 12 |
+
"""
|
| 13 |
+
|
| 14 |
+
import hashlib
|
| 15 |
+
import random
|
| 16 |
+
from datetime import datetime, timedelta, timezone
|
| 17 |
+
|
| 18 |
+
import httpx
|
| 19 |
+
import structlog
|
| 20 |
+
from tenacity import (
|
| 21 |
+
retry,
|
| 22 |
+
retry_if_exception_type,
|
| 23 |
+
stop_after_attempt,
|
| 24 |
+
wait_exponential,
|
| 25 |
+
)
|
| 26 |
+
|
| 27 |
+
from core.config import Settings, get_settings
|
| 28 |
+
from core.models import GrokSensorResult
|
| 29 |
+
|
| 30 |
+
log = structlog.get_logger(__name__)
|
| 31 |
+
|
| 32 |
+
X_API_BASE = "https://api.twitter.com/2"
|
| 33 |
+
COMMUNITY_NOTES_BASE = "https://twitter.com/i/birdwatch/n" # Unofficial β use search API workaround
|
| 34 |
+
|
| 35 |
+
# ---------------------------------------------------------------------------
|
| 36 |
+
# X API v2 search
|
| 37 |
+
# ---------------------------------------------------------------------------
|
| 38 |
+
|
| 39 |
+
@retry(
|
| 40 |
+
retry=retry_if_exception_type(httpx.HTTPStatusError),
|
| 41 |
+
stop=stop_after_attempt(3),
|
| 42 |
+
wait=wait_exponential(multiplier=0.5, min=0.1, max=2.0),
|
| 43 |
+
)
|
| 44 |
+
async def _search_x_api(query: str, bearer_token: str) -> int:
|
| 45 |
+
"""
|
| 46 |
+
Search X API v2 for tweet count matching the query in the past 7 days.
|
| 47 |
+
Returns the total tweet count as a velocity signal.
|
| 48 |
+
|
| 49 |
+
Uses tenacity for exponential backoff on HTTP 429 (rate limit) responses.
|
| 50 |
+
"""
|
| 51 |
+
params = {
|
| 52 |
+
"query": f"{query} -is:retweet lang:en",
|
| 53 |
+
"start_time": (datetime.now(timezone.utc) - timedelta(days=7)).isoformat(),
|
| 54 |
+
"granularity": "day",
|
| 55 |
+
}
|
| 56 |
+
headers = {"Authorization": f"Bearer {bearer_token}"}
|
| 57 |
+
|
| 58 |
+
async with httpx.AsyncClient(timeout=5.0) as client:
|
| 59 |
+
resp = await client.get(
|
| 60 |
+
f"{X_API_BASE}/tweets/counts/recent",
|
| 61 |
+
params=params,
|
| 62 |
+
headers=headers,
|
| 63 |
+
)
|
| 64 |
+
resp.raise_for_status()
|
| 65 |
+
data = resp.json()
|
| 66 |
+
return data.get("meta", {}).get("total_tweet_count", 0)
|
| 67 |
+
|
| 68 |
+
|
| 69 |
+
async def _check_community_notes(query_keywords: list[str], bearer_token: str) -> tuple[bool, str | None]:
|
| 70 |
+
"""
|
| 71 |
+
Check for active Community Notes using the X API v2 search endpoint.
|
| 72 |
+
Community Notes are exposed as tweets from @CommunityNotes.
|
| 73 |
+
|
| 74 |
+
Returns (has_note: bool, note_text: str | None).
|
| 75 |
+
"""
|
| 76 |
+
query = " ".join(query_keywords[:5]) # Use top-5 keywords for targeted search
|
| 77 |
+
params = {
|
| 78 |
+
"query": f"(from:CommunityNotes) ({query})",
|
| 79 |
+
"max_results": 5,
|
| 80 |
+
"tweet.fields": "text,created_at",
|
| 81 |
+
"start_time": (datetime.now(timezone.utc) - timedelta(days=30)).isoformat(),
|
| 82 |
+
}
|
| 83 |
+
headers = {"Authorization": f"Bearer {bearer_token}"}
|
| 84 |
+
|
| 85 |
+
async with httpx.AsyncClient(timeout=5.0) as client:
|
| 86 |
+
resp = await client.get(
|
| 87 |
+
f"{X_API_BASE}/tweets/search/recent",
|
| 88 |
+
params=params,
|
| 89 |
+
headers=headers,
|
| 90 |
+
)
|
| 91 |
+
if resp.status_code == 200:
|
| 92 |
+
data = resp.json()
|
| 93 |
+
tweets = data.get("data", [])
|
| 94 |
+
if tweets:
|
| 95 |
+
return True, tweets[0]["text"]
|
| 96 |
+
return False, None
|
| 97 |
+
|
| 98 |
+
|
| 99 |
+
def _extract_keywords(text: str) -> list[str]:
|
| 100 |
+
"""
|
| 101 |
+
Extract the most meaningful content words for query construction.
|
| 102 |
+
Strips stopwords; keeps nouns, numbers, proper nouns (heuristic: capitalized).
|
| 103 |
+
"""
|
| 104 |
+
stopwords = {
|
| 105 |
+
"the", "a", "an", "is", "are", "was", "were", "be", "been", "being",
|
| 106 |
+
"have", "has", "had", "do", "does", "did", "will", "would", "could",
|
| 107 |
+
"should", "may", "might", "shall", "can", "this", "that", "these",
|
| 108 |
+
"those", "i", "we", "you", "he", "she", "it", "they", "and", "or",
|
| 109 |
+
"but", "in", "on", "at", "to", "for", "of", "with", "by", "from",
|
| 110 |
+
"up", "as", "into", "through", "about", "after", "before",
|
| 111 |
+
}
|
| 112 |
+
words = [w.strip(".,!?;:\"'()[]") for w in text.split()]
|
| 113 |
+
return [w for w in words if w.lower() not in stopwords and len(w) > 3][:10]
|
| 114 |
+
|
| 115 |
+
|
| 116 |
+
def _mock_sensor_result(claim_hash: str) -> GrokSensorResult:
|
| 117 |
+
"""
|
| 118 |
+
Deterministic mock result derived from the claim hash.
|
| 119 |
+
Same hash always produces the same result β stable for testing.
|
| 120 |
+
"""
|
| 121 |
+
seed = int(claim_hash[:8], 16) if all(c in "0123456789abcdef" for c in claim_hash[:8]) else hash(claim_hash)
|
| 122 |
+
rng = random.Random(seed)
|
| 123 |
+
|
| 124 |
+
velocity = rng.randint(0, 50_000)
|
| 125 |
+
has_note = rng.random() < 0.12 # ~12% chance of a community note (realistic)
|
| 126 |
+
note_text = (
|
| 127 |
+
"Community Note: This claim lacks context. The full data shows..."
|
| 128 |
+
if has_note
|
| 129 |
+
else None
|
| 130 |
+
)
|
| 131 |
+
|
| 132 |
+
return GrokSensorResult(
|
| 133 |
+
velocity=velocity,
|
| 134 |
+
community_note=has_note,
|
| 135 |
+
note_text=note_text,
|
| 136 |
+
is_mock=True,
|
| 137 |
+
)
|
| 138 |
+
|
| 139 |
+
|
| 140 |
+
# ---------------------------------------------------------------------------
|
| 141 |
+
# Public interface
|
| 142 |
+
# ---------------------------------------------------------------------------
|
| 143 |
+
|
| 144 |
+
async def query_grok_sensor(
|
| 145 |
+
claim_text: str,
|
| 146 |
+
claim_hash: str,
|
| 147 |
+
settings: Settings | None = None,
|
| 148 |
+
) -> GrokSensorResult:
|
| 149 |
+
"""
|
| 150 |
+
Main entry point: query X API for claim velocity and Community Notes.
|
| 151 |
+
|
| 152 |
+
Falls back to deterministic mock data when X_BEARER_TOKEN is absent.
|
| 153 |
+
The mock is seeded by claim_hash so results are consistent across calls.
|
| 154 |
+
"""
|
| 155 |
+
cfg = settings or get_settings()
|
| 156 |
+
|
| 157 |
+
if cfg.demo_mode or not cfg.has_x_api:
|
| 158 |
+
result = _mock_sensor_result(claim_hash)
|
| 159 |
+
log.debug("grok_sensor.mock", velocity=result.velocity, has_note=result.community_note)
|
| 160 |
+
return result
|
| 161 |
+
|
| 162 |
+
keywords = _extract_keywords(claim_text)
|
| 163 |
+
query = " ".join(keywords[:5])
|
| 164 |
+
|
| 165 |
+
try:
|
| 166 |
+
velocity, (has_note, note_text) = await _search_x_api(query, cfg.x_bearer_token), (False, None)
|
| 167 |
+
|
| 168 |
+
# Only check community notes if velocity is nonzero (claim is circulating)
|
| 169 |
+
if velocity > 100:
|
| 170 |
+
has_note, note_text = await _check_community_notes(keywords, cfg.x_bearer_token)
|
| 171 |
+
|
| 172 |
+
result = GrokSensorResult(
|
| 173 |
+
velocity=velocity,
|
| 174 |
+
community_note=has_note,
|
| 175 |
+
note_text=note_text,
|
| 176 |
+
is_mock=False,
|
| 177 |
+
)
|
| 178 |
+
log.info("grok_sensor.live", velocity=velocity, has_note=has_note)
|
| 179 |
+
return result
|
| 180 |
+
|
| 181 |
+
except httpx.HTTPError as exc:
|
| 182 |
+
log.warning("grok_sensor.api_error", error=str(exc), fallback="mock")
|
| 183 |
+
return _mock_sensor_result(claim_hash)
|
backend/producers/producers.py
ADDED
|
@@ -0,0 +1,338 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""
|
| 2 |
+
producers/twitter_producer.py β Async Redpanda producer for X/Twitter mock data.
|
| 3 |
+
|
| 4 |
+
Reads mock tweet data from a JSONL file and publishes to topic `raw.twitter`
|
| 5 |
+
at 50 events/second. Redpanda's Kafka-compatible API means aiokafka works
|
| 6 |
+
without any modifications.
|
| 7 |
+
|
| 8 |
+
Why 50 eps for Twitter: Twitter is the highest-velocity source (most
|
| 9 |
+
misinformation travels fastest on X), so it gets the highest throughput budget.
|
| 10 |
+
"""
|
| 11 |
+
|
| 12 |
+
import asyncio
|
| 13 |
+
import json
|
| 14 |
+
import os
|
| 15 |
+
import time
|
| 16 |
+
from pathlib import Path
|
| 17 |
+
|
| 18 |
+
import structlog
|
| 19 |
+
from aiokafka import AIOKafkaProducer
|
| 20 |
+
from aiokafka.errors import KafkaConnectionError
|
| 21 |
+
|
| 22 |
+
log = structlog.get_logger(__name__)
|
| 23 |
+
BROKERS = os.environ.get("REDPANDA_BROKERS", "localhost:9092")
|
| 24 |
+
TOPIC = "raw.twitter"
|
| 25 |
+
TARGET_EPS = 50 # events per second
|
| 26 |
+
|
| 27 |
+
|
| 28 |
+
# ---------------------------------------------------------------------------
|
| 29 |
+
# Mock data (used when no JSONL file is provided)
|
| 30 |
+
# ---------------------------------------------------------------------------
|
| 31 |
+
|
| 32 |
+
MOCK_TWEETS = [
|
| 33 |
+
{"id": "t001", "text": "Scientists confirmed that 73% of peer-reviewed studies on mRNA vaccines show long-term immunity lasting over 18 months.", "author": "science_today", "verified": True, "account_type": "official_news"},
|
| 34 |
+
{"id": "t002", "text": "Breaking: The Federal Reserve has just raised interest rates by 75 basis points β the largest single hike since 1994.", "author": "reuters_econ", "verified": True, "account_type": "official_news"},
|
| 35 |
+
{"id": "t003", "text": "lol did you see that video? total propaganda π wake up people", "author": "anon_user123", "verified": False, "account_type": "personal"},
|
| 36 |
+
{"id": "t004", "text": "The WHO confirmed 12 million cases of the new strain have been reported across 47 countries in the last 30 days.", "author": "who_official", "verified": True, "account_type": "government"},
|
| 37 |
+
{"id": "t005", "text": "According to newly declassified Pentagon documents, UFO encounters increased by 400% between 2020 and 2023.", "author": "ufo_truther", "verified": False, "account_type": "personal"},
|
| 38 |
+
{"id": "t006", "text": "Harvard researchers published data showing remote work productivity rose by 13% on average versus in-office.", "author": "harvard_biz", "verified": True, "account_type": "official_news"},
|
| 39 |
+
{"id": "t007", "text": "I just think the whole thing is suspicious. something doesn't add up here. do your own research!", "author": "skeptic_99", "verified": False, "account_type": "personal"},
|
| 40 |
+
{"id": "t008", "text": "EU parliament voted 483-141 to approve the AI Act, making it the world's first comprehensive artificial intelligence legislation.", "author": "eu_parliament", "verified": True, "account_type": "government"},
|
| 41 |
+
{"id": "t009", "text": "Elon Musk announced Tesla will manufacture 5 million vehicles in 2025, a 240% increase from 2023 production.", "author": "tech_insider", "verified": False, "account_type": "personal"},
|
| 42 |
+
{"id": "t010", "text": "Climate scientists at NOAA recorded the highest average ocean temperatures in 150 years of recorded history this August.", "author": "noaa_official", "verified": True, "account_type": "government"},
|
| 43 |
+
] * 100 # Repeat for continuous stream
|
| 44 |
+
|
| 45 |
+
|
| 46 |
+
async def produce_twitter(brokers: str = BROKERS, limit: int | None = None) -> None:
|
| 47 |
+
"""
|
| 48 |
+
Async producer loop. Publishes tweets to `raw.twitter` at TARGET_EPS.
|
| 49 |
+
Runs indefinitely unless `limit` is set (useful for testing).
|
| 50 |
+
"""
|
| 51 |
+
producer = AIOKafkaProducer(
|
| 52 |
+
bootstrap_servers=brokers,
|
| 53 |
+
value_serializer=lambda v: json.dumps(v).encode(),
|
| 54 |
+
compression_type="gzip",
|
| 55 |
+
max_batch_size=16384,
|
| 56 |
+
)
|
| 57 |
+
|
| 58 |
+
await producer.start()
|
| 59 |
+
log.info("producer.twitter.start", brokers=brokers, eps=TARGET_EPS)
|
| 60 |
+
|
| 61 |
+
interval = 1.0 / TARGET_EPS
|
| 62 |
+
count = 0
|
| 63 |
+
|
| 64 |
+
try:
|
| 65 |
+
for tweet in MOCK_TWEETS:
|
| 66 |
+
if limit and count >= limit:
|
| 67 |
+
break
|
| 68 |
+
|
| 69 |
+
envelope = {
|
| 70 |
+
"platform": "twitter",
|
| 71 |
+
"content_hash": _hash(tweet["text"]),
|
| 72 |
+
"text": tweet["text"],
|
| 73 |
+
"author_handle": tweet["author"],
|
| 74 |
+
"author_verified": tweet["verified"],
|
| 75 |
+
"source_url": f"https://x.com/{tweet['author']}/status/{tweet['id']}",
|
| 76 |
+
"ingested_at": time.time(),
|
| 77 |
+
}
|
| 78 |
+
|
| 79 |
+
await producer.send(TOPIC, value=envelope)
|
| 80 |
+
count += 1
|
| 81 |
+
await asyncio.sleep(interval)
|
| 82 |
+
|
| 83 |
+
finally:
|
| 84 |
+
await producer.stop()
|
| 85 |
+
log.info("producer.twitter.stop", total_sent=count)
|
| 86 |
+
|
| 87 |
+
|
| 88 |
+
def _hash(text: str) -> str:
|
| 89 |
+
import xxhash
|
| 90 |
+
return xxhash.xxh64(text.encode()).hexdigest()
|
| 91 |
+
|
| 92 |
+
|
| 93 |
+
# ---------------------------------------------------------------------------
|
| 94 |
+
# instagram_producer.py (inline to keep file count reasonable)
|
| 95 |
+
# ---------------------------------------------------------------------------
|
| 96 |
+
|
| 97 |
+
INSTAGRAM_TOPIC = "raw.instagram"
|
| 98 |
+
INSTAGRAM_EPS = 20
|
| 99 |
+
|
| 100 |
+
MOCK_INSTAGRAM = [
|
| 101 |
+
{"id": "ig001", "text": "Just read that consuming 5 servings of ultra-processed foods per day increases cardiovascular disease risk by 62%.", "account": "nutritionista_real"},
|
| 102 |
+
{"id": "ig002", "text": "loving these golden hour pics π
this place is absolutely magical!", "account": "travel_vibes_only"},
|
| 103 |
+
{"id": "ig003", "text": "NASA confirmed the Artemis III moon landing is scheduled for September 2026, marking humanity's return after 54 years.", "account": "space_news_daily"},
|
| 104 |
+
{"id": "ig004", "text": "Studies show social media use exceeding 3 hours daily correlates with a 48% higher rate of anxiety in adolescents aged 13-17.", "account": "mental_health_facts"},
|
| 105 |
+
{"id": "ig005", "text": "Can't believe this coffee shop! best latte I've had all year ββ¨", "account": "foodie_adventures"},
|
| 106 |
+
{"id": "ig006", "text": "A leaked document suggests Apple's Vision Pro 2 will feature a 70% thinner form factor and 14-hour battery life.", "account": "tech_leaks_xyz"},
|
| 107 |
+
{"id": "ig007", "text": "The Amazon rainforest lost 11,568 square kilometers to deforestation in 2023, a 22% increase from the previous year.", "account": "environmental_watch"},
|
| 108 |
+
] * 50
|
| 109 |
+
|
| 110 |
+
|
| 111 |
+
async def produce_instagram(brokers: str = BROKERS, limit: int | None = None) -> None:
|
| 112 |
+
producer = AIOKafkaProducer(
|
| 113 |
+
bootstrap_servers=brokers,
|
| 114 |
+
value_serializer=lambda v: json.dumps(v).encode(),
|
| 115 |
+
compression_type="gzip",
|
| 116 |
+
)
|
| 117 |
+
await producer.start()
|
| 118 |
+
log.info("producer.instagram.start", brokers=brokers, eps=INSTAGRAM_EPS)
|
| 119 |
+
|
| 120 |
+
interval = 1.0 / INSTAGRAM_EPS
|
| 121 |
+
count = 0
|
| 122 |
+
|
| 123 |
+
try:
|
| 124 |
+
for post in MOCK_INSTAGRAM:
|
| 125 |
+
if limit and count >= limit:
|
| 126 |
+
break
|
| 127 |
+
|
| 128 |
+
envelope = {
|
| 129 |
+
"platform": "instagram",
|
| 130 |
+
"content_hash": _hash(post["text"]),
|
| 131 |
+
"text": post["text"],
|
| 132 |
+
"author_handle": post["account"],
|
| 133 |
+
"author_verified": False,
|
| 134 |
+
"source_url": f"https://instagram.com/{post['account']}/p/{post['id']}",
|
| 135 |
+
"ingested_at": time.time(),
|
| 136 |
+
}
|
| 137 |
+
|
| 138 |
+
await producer.send(INSTAGRAM_TOPIC, value=envelope)
|
| 139 |
+
count += 1
|
| 140 |
+
await asyncio.sleep(interval)
|
| 141 |
+
|
| 142 |
+
finally:
|
| 143 |
+
await producer.stop()
|
| 144 |
+
log.info("producer.instagram.stop", total_sent=count)
|
| 145 |
+
|
| 146 |
+
|
| 147 |
+
# ---------------------------------------------------------------------------
|
| 148 |
+
# youtube_producer.py (inline)
|
| 149 |
+
# ---------------------------------------------------------------------------
|
| 150 |
+
|
| 151 |
+
YOUTUBE_TOPIC = "raw.youtube"
|
| 152 |
+
YOUTUBE_EPS = 10
|
| 153 |
+
|
| 154 |
+
MOCK_YOUTUBE_TRANSCRIPTS = [
|
| 155 |
+
{"id": "yt001", "text": "According to the study published in Nature Medicine, the experimental drug reduced tumor size by an average of 47% in stage three patients.", "channel": "MedicalFrontiers"},
|
| 156 |
+
{"id": "yt002", "text": "So basically what they're saying is that the economy grew by 2.4 percent in Q3, which is actually the highest quarterly growth since 2021.", "channel": "FinanceExplained"},
|
| 157 |
+
{"id": "yt003", "text": "I personally believe this is all connected, if you look at the patterns you can clearly see what's really happening behind the scenes.", "channel": "ConspiracyHub"},
|
| 158 |
+
{"id": "yt004", "text": "The International Energy Agency reports that renewable energy now accounts for 30% of global electricity generation, up from 26% in 2021.", "channel": "CleanEnergyNow"},
|
| 159 |
+
{"id": "yt005", "text": "GPT-5 was secretly trained on 100 trillion parameters, making it ten times larger than GPT-4, according to an anonymous OpenAI employee.", "channel": "AIInsiderNews"},
|
| 160 |
+
{"id": "yt006", "text": "The United Nations Population Fund projects global population will peak at 10.4 billion in the 2080s before beginning to decline.", "channel": "DemographicsWorld"},
|
| 161 |
+
] * 30
|
| 162 |
+
|
| 163 |
+
|
| 164 |
+
async def produce_youtube(brokers: str = BROKERS, limit: int | None = None) -> None:
|
| 165 |
+
producer = AIOKafkaProducer(
|
| 166 |
+
bootstrap_servers=brokers,
|
| 167 |
+
value_serializer=lambda v: json.dumps(v).encode(),
|
| 168 |
+
compression_type="gzip",
|
| 169 |
+
)
|
| 170 |
+
await producer.start()
|
| 171 |
+
log.info("producer.youtube.start", brokers=brokers, eps=YOUTUBE_EPS)
|
| 172 |
+
|
| 173 |
+
interval = 1.0 / YOUTUBE_EPS
|
| 174 |
+
count = 0
|
| 175 |
+
|
| 176 |
+
try:
|
| 177 |
+
for chunk in MOCK_YOUTUBE_TRANSCRIPTS:
|
| 178 |
+
if limit and count >= limit:
|
| 179 |
+
break
|
| 180 |
+
|
| 181 |
+
envelope = {
|
| 182 |
+
"platform": "youtube",
|
| 183 |
+
"content_hash": _hash(chunk["text"]),
|
| 184 |
+
"text": chunk["text"],
|
| 185 |
+
"author_handle": chunk["channel"],
|
| 186 |
+
"author_verified": False,
|
| 187 |
+
"source_url": f"https://youtube.com/watch?v={chunk['id']}",
|
| 188 |
+
"ingested_at": time.time(),
|
| 189 |
+
}
|
| 190 |
+
|
| 191 |
+
await producer.send(YOUTUBE_TOPIC, value=envelope)
|
| 192 |
+
count += 1
|
| 193 |
+
await asyncio.sleep(interval)
|
| 194 |
+
|
| 195 |
+
finally:
|
| 196 |
+
await producer.stop()
|
| 197 |
+
log.info("producer.youtube.stop", total_sent=count)
|
| 198 |
+
|
| 199 |
+
|
| 200 |
+
# ---------------------------------------------------------------------------
|
| 201 |
+
# Aggregated consumer β upserts into Qdrant + Memgraph
|
| 202 |
+
# ---------------------------------------------------------------------------
|
| 203 |
+
|
| 204 |
+
async def run_consumer(brokers: str = BROKERS) -> None:
|
| 205 |
+
"""
|
| 206 |
+
Consumes all three topics, deduplicates by content_hash,
|
| 207 |
+
and upserts into Qdrant (vector index) and Memgraph (trust graph).
|
| 208 |
+
"""
|
| 209 |
+
from aiokafka import AIOKafkaConsumer
|
| 210 |
+
import xxhash
|
| 211 |
+
|
| 212 |
+
seen_hashes: set[str] = set()
|
| 213 |
+
|
| 214 |
+
consumer = AIOKafkaConsumer(
|
| 215 |
+
"raw.twitter", "raw.instagram", "raw.youtube",
|
| 216 |
+
bootstrap_servers=brokers,
|
| 217 |
+
group_id="fact-intelligence-consumer",
|
| 218 |
+
value_deserializer=lambda v: json.loads(v.decode()),
|
| 219 |
+
auto_offset_reset="latest",
|
| 220 |
+
)
|
| 221 |
+
|
| 222 |
+
await consumer.start()
|
| 223 |
+
log.info("consumer.start", topics=["raw.twitter", "raw.instagram", "raw.youtube"])
|
| 224 |
+
|
| 225 |
+
try:
|
| 226 |
+
async for msg in consumer:
|
| 227 |
+
event = msg.value
|
| 228 |
+
h = event.get("content_hash", "")
|
| 229 |
+
|
| 230 |
+
if h in seen_hashes:
|
| 231 |
+
continue # Client-side deduplication (ring buffer) + server-side
|
| 232 |
+
seen_hashes.add(h)
|
| 233 |
+
|
| 234 |
+
# Trim seen_hashes to avoid unbounded memory growth (LRU-style)
|
| 235 |
+
if len(seen_hashes) > 50_000:
|
| 236 |
+
seen_hashes = set(list(seen_hashes)[-25_000:])
|
| 237 |
+
|
| 238 |
+
log.debug("consumer.event", platform=event.get("platform"), hash=h[:8])
|
| 239 |
+
|
| 240 |
+
# Upsert into Qdrant + Memgraph (fire-and-forget, non-blocking)
|
| 241 |
+
asyncio.create_task(_upsert_event(event))
|
| 242 |
+
|
| 243 |
+
finally:
|
| 244 |
+
await consumer.stop()
|
| 245 |
+
|
| 246 |
+
|
| 247 |
+
async def _upsert_event(event: dict) -> None:
|
| 248 |
+
"""Embed and upsert a single event into Qdrant and Memgraph."""
|
| 249 |
+
try:
|
| 250 |
+
from rag_pipeline import embed_texts, get_qdrant
|
| 251 |
+
from core.config import get_settings
|
| 252 |
+
from qdrant_client.models import PointStruct
|
| 253 |
+
import uuid
|
| 254 |
+
|
| 255 |
+
cfg = get_settings()
|
| 256 |
+
text = event.get("text", "")
|
| 257 |
+
if not text:
|
| 258 |
+
return
|
| 259 |
+
|
| 260 |
+
# Embed and upsert into Qdrant
|
| 261 |
+
[vector] = await embed_texts([text])
|
| 262 |
+
client = await get_qdrant(cfg)
|
| 263 |
+
|
| 264 |
+
await client.upsert(
|
| 265 |
+
collection_name=cfg.qdrant_collection,
|
| 266 |
+
points=[
|
| 267 |
+
PointStruct(
|
| 268 |
+
id=str(uuid.uuid4()),
|
| 269 |
+
vector=vector,
|
| 270 |
+
payload={
|
| 271 |
+
"text": text,
|
| 272 |
+
"source_url": event.get("source_url", ""),
|
| 273 |
+
"domain": _extract_domain(event.get("source_url", "")),
|
| 274 |
+
"platform": event.get("platform", ""),
|
| 275 |
+
"content_hash": event.get("content_hash", ""),
|
| 276 |
+
"ingested_at_ts": event.get("ingested_at", time.time()),
|
| 277 |
+
"author_handle": event.get("author_handle", ""),
|
| 278 |
+
"bias_rating": None,
|
| 279 |
+
},
|
| 280 |
+
)
|
| 281 |
+
],
|
| 282 |
+
)
|
| 283 |
+
|
| 284 |
+
# Upsert Author + Claim nodes into Memgraph
|
| 285 |
+
await _upsert_graph_node(event, cfg)
|
| 286 |
+
|
| 287 |
+
except Exception as exc:
|
| 288 |
+
log.error("consumer.upsert_error", error=str(exc))
|
| 289 |
+
|
| 290 |
+
|
| 291 |
+
def _extract_domain(url: str) -> str:
|
| 292 |
+
try:
|
| 293 |
+
from urllib.parse import urlparse
|
| 294 |
+
return urlparse(url).netloc.lstrip("www.")
|
| 295 |
+
except Exception:
|
| 296 |
+
return ""
|
| 297 |
+
|
| 298 |
+
|
| 299 |
+
async def _upsert_graph_node(event: dict, cfg) -> None:
|
| 300 |
+
"""Create/update Author and Claim nodes in Memgraph."""
|
| 301 |
+
from neo4j import AsyncGraphDatabase
|
| 302 |
+
|
| 303 |
+
driver = AsyncGraphDatabase.driver(
|
| 304 |
+
f"bolt://{cfg.memgraph_host}:{cfg.memgraph_port}",
|
| 305 |
+
auth=("", cfg.memgraph_password),
|
| 306 |
+
encrypted=False,
|
| 307 |
+
)
|
| 308 |
+
async with driver.session() as session:
|
| 309 |
+
await session.run(
|
| 310 |
+
"""
|
| 311 |
+
MERGE (a:Author {handle: $handle})
|
| 312 |
+
SET a.verified = $verified, a.account_type = $account_type
|
| 313 |
+
MERGE (c:Claim {hash: $hash})
|
| 314 |
+
SET c.text = $text
|
| 315 |
+
MERGE (a)-[:REPORTED {timestamp: $ts}]->(c)
|
| 316 |
+
""",
|
| 317 |
+
handle=event.get("author_handle", "unknown"),
|
| 318 |
+
verified=event.get("author_verified", False),
|
| 319 |
+
account_type=event.get("account_type", "personal"),
|
| 320 |
+
hash=event.get("content_hash", ""),
|
| 321 |
+
text=event.get("text", "")[:500],
|
| 322 |
+
ts=event.get("ingested_at", time.time()),
|
| 323 |
+
)
|
| 324 |
+
await driver.close()
|
| 325 |
+
|
| 326 |
+
|
| 327 |
+
if __name__ == "__main__":
|
| 328 |
+
import sys
|
| 329 |
+
|
| 330 |
+
async def _run_all():
|
| 331 |
+
await asyncio.gather(
|
| 332 |
+
produce_twitter(),
|
| 333 |
+
produce_instagram(),
|
| 334 |
+
produce_youtube(),
|
| 335 |
+
run_consumer(),
|
| 336 |
+
)
|
| 337 |
+
|
| 338 |
+
asyncio.run(_run_all())
|
backend/pyproject.toml
ADDED
|
@@ -0,0 +1,80 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
[project]
|
| 2 |
+
name = "omnichannel-fact-intelligence"
|
| 3 |
+
version = "1.0.0"
|
| 4 |
+
description = "Near-zero-latency omnichannel fact & hallucination intelligence backend"
|
| 5 |
+
requires-python = ">=3.12"
|
| 6 |
+
dependencies = [
|
| 7 |
+
# Web framework & async
|
| 8 |
+
"fastapi==0.115.5",
|
| 9 |
+
"uvicorn[standard]==0.32.1",
|
| 10 |
+
"websockets==13.1",
|
| 11 |
+
"httpx==0.27.2",
|
| 12 |
+
|
| 13 |
+
# Data validation
|
| 14 |
+
"pydantic==2.10.3",
|
| 15 |
+
"pydantic-settings==2.6.1",
|
| 16 |
+
|
| 17 |
+
# LLM abstraction β swap Groq β GPT-4o β local Ollama without code changes
|
| 18 |
+
"litellm==1.55.4",
|
| 19 |
+
"groq==0.13.0",
|
| 20 |
+
|
| 21 |
+
# Embeddings β BGE-M3, multilingual, CPU-native, completely free
|
| 22 |
+
"fastembed==0.4.2",
|
| 23 |
+
|
| 24 |
+
# Vector DB β Qdrant self-hosted, HNSW sub-ms ANN search
|
| 25 |
+
"qdrant-client==1.12.1",
|
| 26 |
+
|
| 27 |
+
# Graph DB β Memgraph Bolt driver (Cypher-compatible, same as Neo4j driver)
|
| 28 |
+
"neo4j==5.26.0",
|
| 29 |
+
|
| 30 |
+
# Message queue β Redpanda is Kafka-compatible, use aiokafka
|
| 31 |
+
"aiokafka==0.11.0",
|
| 32 |
+
|
| 33 |
+
# Orchestration β Prefect DAG flows replacing Celery
|
| 34 |
+
"prefect==3.1.6",
|
| 35 |
+
|
| 36 |
+
# Cache β Redis Stack (RedisJSON + RedisSearch)
|
| 37 |
+
"redis[hiredis]==5.2.1",
|
| 38 |
+
|
| 39 |
+
# Hashing β xxhash for sub-microsecond content deduplication
|
| 40 |
+
"xxhash==3.5.0",
|
| 41 |
+
|
| 42 |
+
# Observability
|
| 43 |
+
"structlog==24.4.0",
|
| 44 |
+
"rich==13.9.4",
|
| 45 |
+
|
| 46 |
+
# Utilities
|
| 47 |
+
"python-dotenv==1.0.1",
|
| 48 |
+
"tenacity==9.0.0", # Exponential backoff for external API calls
|
| 49 |
+
"aiofiles==24.1.0",
|
| 50 |
+
"orjson==3.10.12", # 2-3x faster JSON than stdlib
|
| 51 |
+
]
|
| 52 |
+
|
| 53 |
+
[project.optional-dependencies]
|
| 54 |
+
dev = [
|
| 55 |
+
"pytest==8.3.4",
|
| 56 |
+
"pytest-asyncio==0.24.0",
|
| 57 |
+
"pytest-httpx==0.32.0",
|
| 58 |
+
"ruff==0.8.3",
|
| 59 |
+
"mypy==1.13.0",
|
| 60 |
+
]
|
| 61 |
+
|
| 62 |
+
[build-system]
|
| 63 |
+
requires = ["hatchling"]
|
| 64 |
+
build-backend = "hatchling.build"
|
| 65 |
+
|
| 66 |
+
[tool.uv]
|
| 67 |
+
dev-dependencies = [
|
| 68 |
+
"pytest>=8.3.4",
|
| 69 |
+
"pytest-asyncio>=0.24.0",
|
| 70 |
+
]
|
| 71 |
+
|
| 72 |
+
[tool.ruff]
|
| 73 |
+
line-length = 100
|
| 74 |
+
target-version = "py312"
|
| 75 |
+
select = ["E", "F", "I", "UP", "B", "SIM"]
|
| 76 |
+
|
| 77 |
+
[tool.mypy]
|
| 78 |
+
python_version = "3.12"
|
| 79 |
+
strict = true
|
| 80 |
+
ignore_missing_imports = true
|
backend/rag_pipeline.py
ADDED
|
@@ -0,0 +1,285 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""
|
| 2 |
+
rag_pipeline.py β Retrieval-Augmented Generation truth pipeline.
|
| 3 |
+
|
| 4 |
+
Three-stage process:
|
| 5 |
+
1. Embed the claim using BGE-M3 (FastEmbed, CPU-native, multilingual)
|
| 6 |
+
2. Search Qdrant for nearest evidence chunks (HNSW ef=128, top-8, 72h window)
|
| 7 |
+
3. Traverse the Memgraph trust graph to compute a trust score
|
| 8 |
+
|
| 9 |
+
Why BGE-M3 over OpenAI embeddings:
|
| 10 |
+
- 100+ language support (OpenAI embeddings are English-biased)
|
| 11 |
+
- 1024-dimensional dense vectors with better factual recall on news content
|
| 12 |
+
- Runs on CPU β no GPU dependency on the server
|
| 13 |
+
- Completely free β no per-token cost
|
| 14 |
+
- Comparable or better performance on BEIR benchmarks vs text-embedding-3-small
|
| 15 |
+
|
| 16 |
+
Why Qdrant over Pinecone:
|
| 17 |
+
- Self-hosted Docker β zero vendor lock-in, zero per-query cost
|
| 18 |
+
- HNSW index with configurable ef parameter for precision/recall trade-off
|
| 19 |
+
- Built-in payload filtering for recency constraints (no separate filter step)
|
| 20 |
+
- gRPC support for sub-millisecond latency on local network
|
| 21 |
+
|
| 22 |
+
Why Memgraph over Neo4j:
|
| 23 |
+
- Fully in-memory β entire graph lives in RAM for <1ms traversal
|
| 24 |
+
- Cypher-compatible β same query language as Neo4j, zero migration cost
|
| 25 |
+
- Docker-deployable in one command
|
| 26 |
+
"""
|
| 27 |
+
|
| 28 |
+
import asyncio
|
| 29 |
+
from concurrent.futures import ProcessPoolExecutor
|
| 30 |
+
from datetime import datetime, timedelta, timezone
|
| 31 |
+
from typing import TYPE_CHECKING
|
| 32 |
+
|
| 33 |
+
import structlog
|
| 34 |
+
from neo4j import AsyncGraphDatabase
|
| 35 |
+
from qdrant_client import AsyncQdrantClient
|
| 36 |
+
from qdrant_client.models import (
|
| 37 |
+
Distance,
|
| 38 |
+
FieldCondition,
|
| 39 |
+
Filter,
|
| 40 |
+
MatchValue,
|
| 41 |
+
PayloadSchemaType,
|
| 42 |
+
Range,
|
| 43 |
+
SearchRequest,
|
| 44 |
+
VectorParams,
|
| 45 |
+
)
|
| 46 |
+
|
| 47 |
+
from core.config import Settings, get_settings
|
| 48 |
+
from core.models import EvidenceChunk, RAGResult, TrustScore
|
| 49 |
+
|
| 50 |
+
if TYPE_CHECKING:
|
| 51 |
+
from fastembed import TextEmbedding
|
| 52 |
+
|
| 53 |
+
log = structlog.get_logger(__name__)
|
| 54 |
+
|
| 55 |
+
# ---------------------------------------------------------------------------
|
| 56 |
+
# Module-level singletons β initialized on first use, reused across requests
|
| 57 |
+
# ---------------------------------------------------------------------------
|
| 58 |
+
_embed_model: "TextEmbedding | None" = None
|
| 59 |
+
_qdrant_client: AsyncQdrantClient | None = None
|
| 60 |
+
_executor: ProcessPoolExecutor | None = None
|
| 61 |
+
|
| 62 |
+
|
| 63 |
+
def _get_embedder() -> "TextEmbedding":
|
| 64 |
+
"""Lazy-load the BGE-M3 model. First load downloads ~570MB, then cached."""
|
| 65 |
+
global _embed_model
|
| 66 |
+
if _embed_model is None:
|
| 67 |
+
from fastembed import TextEmbedding
|
| 68 |
+
log.info("rag.embedder.loading", model="BAAI/bge-m3")
|
| 69 |
+
_embed_model = TextEmbedding("BAAI/bge-m3")
|
| 70 |
+
log.info("rag.embedder.ready")
|
| 71 |
+
return _embed_model
|
| 72 |
+
|
| 73 |
+
|
| 74 |
+
def _get_executor() -> ProcessPoolExecutor:
|
| 75 |
+
"""Embedding is CPU-bound β run in a ProcessPoolExecutor to avoid blocking asyncio."""
|
| 76 |
+
global _executor
|
| 77 |
+
if _executor is None:
|
| 78 |
+
_executor = ProcessPoolExecutor(max_workers=2)
|
| 79 |
+
return _executor
|
| 80 |
+
|
| 81 |
+
|
| 82 |
+
def _embed_sync(texts: list[str]) -> list[list[float]]:
|
| 83 |
+
"""
|
| 84 |
+
CPU-bound embedding function executed in the process pool.
|
| 85 |
+
Must be a module-level function (not a method/lambda) for pickling.
|
| 86 |
+
"""
|
| 87 |
+
model = _get_embedder()
|
| 88 |
+
return [v.tolist() for v in model.embed(texts)]
|
| 89 |
+
|
| 90 |
+
|
| 91 |
+
async def embed_texts(texts: list[str]) -> list[list[float]]:
|
| 92 |
+
"""Async wrapper: runs CPU-bound embedding in a separate process."""
|
| 93 |
+
loop = asyncio.get_event_loop()
|
| 94 |
+
return await loop.run_in_executor(_get_executor(), _embed_sync, texts)
|
| 95 |
+
|
| 96 |
+
|
| 97 |
+
# ---------------------------------------------------------------------------
|
| 98 |
+
# Qdrant client and collection bootstrap
|
| 99 |
+
# ---------------------------------------------------------------------------
|
| 100 |
+
|
| 101 |
+
async def get_qdrant(settings: Settings) -> AsyncQdrantClient:
|
| 102 |
+
global _qdrant_client
|
| 103 |
+
if _qdrant_client is None:
|
| 104 |
+
_qdrant_client = AsyncQdrantClient(host=settings.qdrant_host, port=settings.qdrant_port)
|
| 105 |
+
await _ensure_collection(_qdrant_client, settings)
|
| 106 |
+
return _qdrant_client
|
| 107 |
+
|
| 108 |
+
|
| 109 |
+
async def _ensure_collection(client: AsyncQdrantClient, settings: Settings) -> None:
|
| 110 |
+
"""
|
| 111 |
+
Idempotent collection creation. BGE-M3 outputs 1024-dimensional vectors.
|
| 112 |
+
HNSW is the default index in Qdrant β no explicit creation needed.
|
| 113 |
+
"""
|
| 114 |
+
collections = await client.get_collections()
|
| 115 |
+
names = [c.name for c in collections.collections]
|
| 116 |
+
|
| 117 |
+
if settings.qdrant_collection not in names:
|
| 118 |
+
await client.create_collection(
|
| 119 |
+
collection_name=settings.qdrant_collection,
|
| 120 |
+
vectors_config=VectorParams(size=1024, distance=Distance.COSINE),
|
| 121 |
+
)
|
| 122 |
+
# Create payload index on ingested_at for fast recency filtering
|
| 123 |
+
await client.create_payload_index(
|
| 124 |
+
collection_name=settings.qdrant_collection,
|
| 125 |
+
field_name="ingested_at_ts",
|
| 126 |
+
field_schema=PayloadSchemaType.FLOAT,
|
| 127 |
+
)
|
| 128 |
+
log.info("qdrant.collection.created", name=settings.qdrant_collection)
|
| 129 |
+
|
| 130 |
+
|
| 131 |
+
# ---------------------------------------------------------------------------
|
| 132 |
+
# Memgraph trust graph
|
| 133 |
+
# ---------------------------------------------------------------------------
|
| 134 |
+
|
| 135 |
+
async def compute_trust_score(
|
| 136 |
+
claim_hash: str,
|
| 137 |
+
settings: Settings,
|
| 138 |
+
) -> TrustScore:
|
| 139 |
+
"""
|
| 140 |
+
Traverse the Memgraph trust graph to compute a claim's credibility score.
|
| 141 |
+
|
| 142 |
+
Graph schema:
|
| 143 |
+
(Author {handle, verified, account_type})
|
| 144 |
+
-[:REPORTED {timestamp}]->
|
| 145 |
+
(Claim {text, embedding_id, hash})
|
| 146 |
+
<-[:CORROBORATED_BY {confidence}]-
|
| 147 |
+
(Source {url, domain, bias_rating})
|
| 148 |
+
|
| 149 |
+
Scoring algorithm (start at 0.5, clamp to [0.0, 1.0]):
|
| 150 |
+
+0.30 if Author.verified AND account_type IN ['government', 'official_news']
|
| 151 |
+
+0.05 per corroborating Source node (max boost: +0.25, so cap at 5 sources)
|
| 152 |
+
-0.40 if any Source carries an active Community_Note relationship
|
| 153 |
+
"""
|
| 154 |
+
driver = AsyncGraphDatabase.driver(
|
| 155 |
+
f"bolt://{settings.memgraph_host}:{settings.memgraph_port}",
|
| 156 |
+
auth=("", settings.memgraph_password),
|
| 157 |
+
encrypted=False,
|
| 158 |
+
)
|
| 159 |
+
|
| 160 |
+
async with driver.session() as session:
|
| 161 |
+
result = await session.run(
|
| 162 |
+
"""
|
| 163 |
+
OPTIONAL MATCH (a:Author)-[:REPORTED]->(c:Claim {hash: $hash})
|
| 164 |
+
OPTIONAL MATCH (s:Source)-[:CORROBORATED_BY]->(c)
|
| 165 |
+
OPTIONAL MATCH (s)-[:HAS_NOTE]->(n:CommunityNote {active: true})
|
| 166 |
+
RETURN
|
| 167 |
+
a.verified AS verified,
|
| 168 |
+
a.account_type AS account_type,
|
| 169 |
+
COUNT(DISTINCT s) AS source_count,
|
| 170 |
+
COUNT(DISTINCT n) AS note_count,
|
| 171 |
+
COLLECT(DISTINCT n.text)[0] AS note_text
|
| 172 |
+
""",
|
| 173 |
+
hash=claim_hash,
|
| 174 |
+
)
|
| 175 |
+
row = await result.single()
|
| 176 |
+
|
| 177 |
+
await driver.close()
|
| 178 |
+
|
| 179 |
+
if row is None:
|
| 180 |
+
# Claim not yet in graph β return neutral score
|
| 181 |
+
return TrustScore(
|
| 182 |
+
score=0.5,
|
| 183 |
+
author_verified=False,
|
| 184 |
+
corroborating_sources=0,
|
| 185 |
+
has_community_note=False,
|
| 186 |
+
)
|
| 187 |
+
|
| 188 |
+
verified: bool = bool(row["verified"])
|
| 189 |
+
account_type: str | None = row["account_type"]
|
| 190 |
+
source_count: int = int(row["source_count"] or 0)
|
| 191 |
+
note_count: int = int(row["note_count"] or 0)
|
| 192 |
+
note_text: str | None = row["note_text"]
|
| 193 |
+
|
| 194 |
+
# --- Scoring algorithm ---
|
| 195 |
+
score = 0.5
|
| 196 |
+
|
| 197 |
+
if verified and account_type in ("government", "official_news"):
|
| 198 |
+
score += 0.30 # Strong verified official boost
|
| 199 |
+
|
| 200 |
+
source_boost = min(source_count * 0.05, 0.25) # Cap at 5 sources Γ 0.05
|
| 201 |
+
score += source_boost
|
| 202 |
+
|
| 203 |
+
has_note = note_count > 0
|
| 204 |
+
if has_note:
|
| 205 |
+
score -= 0.40 # Active Community Note is a strong negative signal
|
| 206 |
+
|
| 207 |
+
score = max(0.0, min(1.0, score)) # Clamp to [0.0, 1.0]
|
| 208 |
+
|
| 209 |
+
return TrustScore(
|
| 210 |
+
score=round(score, 4),
|
| 211 |
+
author_verified=verified,
|
| 212 |
+
corroborating_sources=source_count,
|
| 213 |
+
has_community_note=has_note,
|
| 214 |
+
community_note_text=note_text,
|
| 215 |
+
)
|
| 216 |
+
|
| 217 |
+
|
| 218 |
+
# ---------------------------------------------------------------------------
|
| 219 |
+
# Main RAG pipeline entry point
|
| 220 |
+
# ---------------------------------------------------------------------------
|
| 221 |
+
|
| 222 |
+
async def run_rag_pipeline(
|
| 223 |
+
claim_text: str,
|
| 224 |
+
claim_hash: str,
|
| 225 |
+
settings: Settings | None = None,
|
| 226 |
+
) -> RAGResult:
|
| 227 |
+
"""
|
| 228 |
+
Full RAG pipeline: embed β ANN search with recency filter β trust traversal.
|
| 229 |
+
|
| 230 |
+
Returns RAGResult with top-k evidence chunks and computed trust score,
|
| 231 |
+
both of which feed into the multi-agent evaluation layer (agents.py).
|
| 232 |
+
"""
|
| 233 |
+
cfg = settings or get_settings()
|
| 234 |
+
|
| 235 |
+
# Run embedding and trust score concurrently β they're independent
|
| 236 |
+
embed_task = asyncio.create_task(embed_texts([claim_text]))
|
| 237 |
+
trust_task = asyncio.create_task(compute_trust_score(claim_hash, cfg))
|
| 238 |
+
|
| 239 |
+
[claim_vector], trust = await asyncio.gather(embed_task, trust_task)
|
| 240 |
+
|
| 241 |
+
# Recency filter: only retrieve evidence ingested in the last 72 hours
|
| 242 |
+
# Uses Qdrant's payload filter on the ingested_at_ts float field (Unix timestamp)
|
| 243 |
+
cutoff_ts = (datetime.now(timezone.utc) - timedelta(hours=cfg.evidence_window_hours)).timestamp()
|
| 244 |
+
|
| 245 |
+
qdrant = await get_qdrant(cfg)
|
| 246 |
+
|
| 247 |
+
search_results = await qdrant.search(
|
| 248 |
+
collection_name=cfg.qdrant_collection,
|
| 249 |
+
query_vector=claim_vector,
|
| 250 |
+
limit=cfg.qdrant_top_k,
|
| 251 |
+
with_payload=True,
|
| 252 |
+
search_params={"hnsw_ef": cfg.qdrant_ef},
|
| 253 |
+
query_filter=Filter(
|
| 254 |
+
must=[
|
| 255 |
+
FieldCondition(
|
| 256 |
+
key="ingested_at_ts",
|
| 257 |
+
range=Range(gte=cutoff_ts),
|
| 258 |
+
)
|
| 259 |
+
]
|
| 260 |
+
),
|
| 261 |
+
)
|
| 262 |
+
|
| 263 |
+
evidence = [
|
| 264 |
+
EvidenceChunk(
|
| 265 |
+
chunk_id=str(hit.id),
|
| 266 |
+
text=hit.payload.get("text", ""),
|
| 267 |
+
source_url=hit.payload.get("source_url", ""),
|
| 268 |
+
domain=hit.payload.get("domain", ""),
|
| 269 |
+
score=hit.score,
|
| 270 |
+
ingested_at=datetime.fromtimestamp(
|
| 271 |
+
hit.payload.get("ingested_at_ts", 0), tz=timezone.utc
|
| 272 |
+
),
|
| 273 |
+
bias_rating=hit.payload.get("bias_rating"),
|
| 274 |
+
)
|
| 275 |
+
for hit in search_results
|
| 276 |
+
]
|
| 277 |
+
|
| 278 |
+
log.info(
|
| 279 |
+
"rag.pipeline.complete",
|
| 280 |
+
evidence_count=len(evidence),
|
| 281 |
+
trust_score=trust.score,
|
| 282 |
+
claim_hash=claim_hash[:8],
|
| 283 |
+
)
|
| 284 |
+
|
| 285 |
+
return RAGResult(evidence=evidence, trust=trust)
|
backend/static/index.html
ADDED
|
@@ -0,0 +1,783 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
<!DOCTYPE html>
|
| 2 |
+
<html lang="en">
|
| 3 |
+
<head>
|
| 4 |
+
<meta charset="UTF-8">
|
| 5 |
+
<meta name="viewport" content="width=device-width, initial-scale=1.0">
|
| 6 |
+
<title>Fact & Hallucination Intelligence System</title>
|
| 7 |
+
<link rel="preconnect" href="https://fonts.googleapis.com">
|
| 8 |
+
<link href="https://fonts.googleapis.com/css2?family=Space+Mono:wght@400;700&family=DM+Sans:wght@300;400;500;700&display=swap" rel="stylesheet">
|
| 9 |
+
<style>
|
| 10 |
+
:root {
|
| 11 |
+
--bg: #070b0f;
|
| 12 |
+
--surface: #0d1117;
|
| 13 |
+
--surface2: #161b22;
|
| 14 |
+
--border: #21262d;
|
| 15 |
+
--text: #e6edf3;
|
| 16 |
+
--text-muted: #7d8590;
|
| 17 |
+
--green: #22c55e;
|
| 18 |
+
--green-glow: rgba(34,197,94,0.15);
|
| 19 |
+
--yellow: #eab308;
|
| 20 |
+
--yellow-glow: rgba(234,179,8,0.15);
|
| 21 |
+
--red: #ef4444;
|
| 22 |
+
--red-glow: rgba(239,68,68,0.15);
|
| 23 |
+
--purple: #a855f7;
|
| 24 |
+
--purple-glow: rgba(168,85,247,0.15);
|
| 25 |
+
--accent: #58a6ff;
|
| 26 |
+
--mono: 'Space Mono', monospace;
|
| 27 |
+
--sans: 'DM Sans', sans-serif;
|
| 28 |
+
}
|
| 29 |
+
|
| 30 |
+
* { margin: 0; padding: 0; box-sizing: border-box; }
|
| 31 |
+
|
| 32 |
+
body {
|
| 33 |
+
background: var(--bg);
|
| 34 |
+
color: var(--text);
|
| 35 |
+
font-family: var(--sans);
|
| 36 |
+
min-height: 100vh;
|
| 37 |
+
display: flex;
|
| 38 |
+
flex-direction: column;
|
| 39 |
+
position: relative;
|
| 40 |
+
overflow-x: hidden;
|
| 41 |
+
}
|
| 42 |
+
|
| 43 |
+
/* Grid background */
|
| 44 |
+
body::before {
|
| 45 |
+
content: '';
|
| 46 |
+
position: fixed;
|
| 47 |
+
inset: 0;
|
| 48 |
+
background-image:
|
| 49 |
+
linear-gradient(rgba(88,166,255,0.03) 1px, transparent 1px),
|
| 50 |
+
linear-gradient(90deg, rgba(88,166,255,0.03) 1px, transparent 1px);
|
| 51 |
+
background-size: 40px 40px;
|
| 52 |
+
pointer-events: none;
|
| 53 |
+
z-index: 0;
|
| 54 |
+
}
|
| 55 |
+
|
| 56 |
+
/* Radial glow */
|
| 57 |
+
body::after {
|
| 58 |
+
content: '';
|
| 59 |
+
position: fixed;
|
| 60 |
+
top: -20%;
|
| 61 |
+
left: 50%;
|
| 62 |
+
transform: translateX(-50%);
|
| 63 |
+
width: 80vw;
|
| 64 |
+
height: 60vh;
|
| 65 |
+
background: radial-gradient(ellipse, rgba(88,166,255,0.06) 0%, transparent 70%);
|
| 66 |
+
pointer-events: none;
|
| 67 |
+
z-index: 0;
|
| 68 |
+
}
|
| 69 |
+
|
| 70 |
+
.container {
|
| 71 |
+
position: relative;
|
| 72 |
+
z-index: 1;
|
| 73 |
+
max-width: 900px;
|
| 74 |
+
margin: 0 auto;
|
| 75 |
+
padding: 48px 24px 80px;
|
| 76 |
+
width: 100%;
|
| 77 |
+
}
|
| 78 |
+
|
| 79 |
+
/* Header */
|
| 80 |
+
header {
|
| 81 |
+
text-align: center;
|
| 82 |
+
margin-bottom: 56px;
|
| 83 |
+
}
|
| 84 |
+
|
| 85 |
+
.logo-row {
|
| 86 |
+
display: flex;
|
| 87 |
+
align-items: center;
|
| 88 |
+
justify-content: center;
|
| 89 |
+
gap: 12px;
|
| 90 |
+
margin-bottom: 16px;
|
| 91 |
+
}
|
| 92 |
+
|
| 93 |
+
.logo-icon {
|
| 94 |
+
width: 40px;
|
| 95 |
+
height: 40px;
|
| 96 |
+
border: 1px solid var(--accent);
|
| 97 |
+
border-radius: 8px;
|
| 98 |
+
display: flex;
|
| 99 |
+
align-items: center;
|
| 100 |
+
justify-content: center;
|
| 101 |
+
color: var(--accent);
|
| 102 |
+
font-size: 20px;
|
| 103 |
+
box-shadow: 0 0 20px rgba(88,166,255,0.2);
|
| 104 |
+
}
|
| 105 |
+
|
| 106 |
+
h1 {
|
| 107 |
+
font-family: var(--mono);
|
| 108 |
+
font-size: clamp(18px, 3vw, 26px);
|
| 109 |
+
font-weight: 700;
|
| 110 |
+
letter-spacing: -0.5px;
|
| 111 |
+
color: var(--text);
|
| 112 |
+
}
|
| 113 |
+
|
| 114 |
+
.tagline {
|
| 115 |
+
font-size: 14px;
|
| 116 |
+
color: var(--text-muted);
|
| 117 |
+
font-family: var(--mono);
|
| 118 |
+
letter-spacing: 0.5px;
|
| 119 |
+
margin-top: 8px;
|
| 120 |
+
}
|
| 121 |
+
|
| 122 |
+
/* Status bar */
|
| 123 |
+
.status-bar {
|
| 124 |
+
display: flex;
|
| 125 |
+
align-items: center;
|
| 126 |
+
gap: 8px;
|
| 127 |
+
padding: 8px 16px;
|
| 128 |
+
background: var(--surface2);
|
| 129 |
+
border: 1px solid var(--border);
|
| 130 |
+
border-radius: 6px;
|
| 131 |
+
font-family: var(--mono);
|
| 132 |
+
font-size: 12px;
|
| 133 |
+
color: var(--text-muted);
|
| 134 |
+
margin-bottom: 32px;
|
| 135 |
+
width: fit-content;
|
| 136 |
+
margin-left: auto;
|
| 137 |
+
margin-right: auto;
|
| 138 |
+
}
|
| 139 |
+
|
| 140 |
+
.status-dot {
|
| 141 |
+
width: 8px;
|
| 142 |
+
height: 8px;
|
| 143 |
+
border-radius: 50%;
|
| 144 |
+
background: #555;
|
| 145 |
+
transition: background 0.3s;
|
| 146 |
+
}
|
| 147 |
+
.status-dot.connected { background: var(--green); box-shadow: 0 0 8px var(--green); animation: pulse 2s infinite; }
|
| 148 |
+
.status-dot.connecting { background: var(--yellow); animation: pulse 0.8s infinite; }
|
| 149 |
+
.status-dot.error { background: var(--red); }
|
| 150 |
+
|
| 151 |
+
@keyframes pulse {
|
| 152 |
+
0%, 100% { opacity: 1; }
|
| 153 |
+
50% { opacity: 0.4; }
|
| 154 |
+
}
|
| 155 |
+
|
| 156 |
+
/* Input area */
|
| 157 |
+
.analysis-card {
|
| 158 |
+
background: var(--surface);
|
| 159 |
+
border: 1px solid var(--border);
|
| 160 |
+
border-radius: 12px;
|
| 161 |
+
padding: 28px;
|
| 162 |
+
margin-bottom: 24px;
|
| 163 |
+
}
|
| 164 |
+
|
| 165 |
+
.card-label {
|
| 166 |
+
font-family: var(--mono);
|
| 167 |
+
font-size: 11px;
|
| 168 |
+
color: var(--text-muted);
|
| 169 |
+
letter-spacing: 1.5px;
|
| 170 |
+
text-transform: uppercase;
|
| 171 |
+
margin-bottom: 12px;
|
| 172 |
+
}
|
| 173 |
+
|
| 174 |
+
.platform-row {
|
| 175 |
+
display: flex;
|
| 176 |
+
gap: 8px;
|
| 177 |
+
margin-bottom: 16px;
|
| 178 |
+
flex-wrap: wrap;
|
| 179 |
+
}
|
| 180 |
+
|
| 181 |
+
.platform-btn {
|
| 182 |
+
padding: 6px 14px;
|
| 183 |
+
border: 1px solid var(--border);
|
| 184 |
+
border-radius: 20px;
|
| 185 |
+
background: transparent;
|
| 186 |
+
color: var(--text-muted);
|
| 187 |
+
font-family: var(--mono);
|
| 188 |
+
font-size: 11px;
|
| 189 |
+
cursor: pointer;
|
| 190 |
+
transition: all 0.2s;
|
| 191 |
+
letter-spacing: 0.5px;
|
| 192 |
+
}
|
| 193 |
+
.platform-btn:hover { border-color: var(--accent); color: var(--accent); }
|
| 194 |
+
.platform-btn.active {
|
| 195 |
+
border-color: var(--accent);
|
| 196 |
+
background: rgba(88,166,255,0.1);
|
| 197 |
+
color: var(--accent);
|
| 198 |
+
}
|
| 199 |
+
|
| 200 |
+
textarea {
|
| 201 |
+
width: 100%;
|
| 202 |
+
min-height: 120px;
|
| 203 |
+
background: var(--bg);
|
| 204 |
+
border: 1px solid var(--border);
|
| 205 |
+
border-radius: 8px;
|
| 206 |
+
color: var(--text);
|
| 207 |
+
font-family: var(--sans);
|
| 208 |
+
font-size: 15px;
|
| 209 |
+
line-height: 1.6;
|
| 210 |
+
padding: 16px;
|
| 211 |
+
resize: vertical;
|
| 212 |
+
outline: none;
|
| 213 |
+
transition: border-color 0.2s;
|
| 214 |
+
}
|
| 215 |
+
textarea:focus { border-color: var(--accent); }
|
| 216 |
+
textarea::placeholder { color: var(--text-muted); }
|
| 217 |
+
|
| 218 |
+
.analyze-btn {
|
| 219 |
+
display: flex;
|
| 220 |
+
align-items: center;
|
| 221 |
+
gap: 8px;
|
| 222 |
+
margin-top: 16px;
|
| 223 |
+
padding: 12px 28px;
|
| 224 |
+
background: var(--accent);
|
| 225 |
+
color: #000;
|
| 226 |
+
font-family: var(--mono);
|
| 227 |
+
font-size: 13px;
|
| 228 |
+
font-weight: 700;
|
| 229 |
+
border: none;
|
| 230 |
+
border-radius: 8px;
|
| 231 |
+
cursor: pointer;
|
| 232 |
+
transition: all 0.2s;
|
| 233 |
+
letter-spacing: 0.5px;
|
| 234 |
+
}
|
| 235 |
+
.analyze-btn:hover { background: #79c0ff; transform: translateY(-1px); box-shadow: 0 4px 20px rgba(88,166,255,0.3); }
|
| 236 |
+
.analyze-btn:disabled { opacity: 0.5; cursor: not-allowed; transform: none; }
|
| 237 |
+
|
| 238 |
+
.spinner {
|
| 239 |
+
width: 14px;
|
| 240 |
+
height: 14px;
|
| 241 |
+
border: 2px solid rgba(0,0,0,0.3);
|
| 242 |
+
border-top-color: #000;
|
| 243 |
+
border-radius: 50%;
|
| 244 |
+
animation: spin 0.7s linear infinite;
|
| 245 |
+
display: none;
|
| 246 |
+
}
|
| 247 |
+
.spinner.active { display: block; }
|
| 248 |
+
@keyframes spin { to { transform: rotate(360deg); } }
|
| 249 |
+
|
| 250 |
+
/* Result card */
|
| 251 |
+
.result-card {
|
| 252 |
+
background: var(--surface);
|
| 253 |
+
border: 1px solid var(--border);
|
| 254 |
+
border-radius: 12px;
|
| 255 |
+
padding: 28px;
|
| 256 |
+
display: none;
|
| 257 |
+
animation: fadeSlideIn 0.3s ease;
|
| 258 |
+
}
|
| 259 |
+
.result-card.visible { display: block; }
|
| 260 |
+
|
| 261 |
+
@keyframes fadeSlideIn {
|
| 262 |
+
from { opacity: 0; transform: translateY(8px); }
|
| 263 |
+
to { opacity: 1; transform: translateY(0); }
|
| 264 |
+
}
|
| 265 |
+
|
| 266 |
+
.verdict-header {
|
| 267 |
+
display: flex;
|
| 268 |
+
align-items: flex-start;
|
| 269 |
+
gap: 20px;
|
| 270 |
+
margin-bottom: 24px;
|
| 271 |
+
}
|
| 272 |
+
|
| 273 |
+
.confidence-ring {
|
| 274 |
+
flex-shrink: 0;
|
| 275 |
+
width: 72px;
|
| 276 |
+
height: 72px;
|
| 277 |
+
position: relative;
|
| 278 |
+
}
|
| 279 |
+
|
| 280 |
+
.confidence-ring svg {
|
| 281 |
+
width: 72px;
|
| 282 |
+
height: 72px;
|
| 283 |
+
transform: rotate(-90deg);
|
| 284 |
+
}
|
| 285 |
+
|
| 286 |
+
.confidence-ring .track {
|
| 287 |
+
fill: none;
|
| 288 |
+
stroke: var(--border);
|
| 289 |
+
stroke-width: 6;
|
| 290 |
+
}
|
| 291 |
+
|
| 292 |
+
.confidence-ring .fill {
|
| 293 |
+
fill: none;
|
| 294 |
+
stroke-width: 6;
|
| 295 |
+
stroke-linecap: round;
|
| 296 |
+
transition: stroke-dashoffset 0.6s ease, stroke 0.3s;
|
| 297 |
+
}
|
| 298 |
+
|
| 299 |
+
.confidence-num {
|
| 300 |
+
position: absolute;
|
| 301 |
+
inset: 0;
|
| 302 |
+
display: flex;
|
| 303 |
+
align-items: center;
|
| 304 |
+
justify-content: center;
|
| 305 |
+
font-family: var(--mono);
|
| 306 |
+
font-size: 14px;
|
| 307 |
+
font-weight: 700;
|
| 308 |
+
}
|
| 309 |
+
|
| 310 |
+
.verdict-meta { flex: 1; }
|
| 311 |
+
|
| 312 |
+
.color-badge {
|
| 313 |
+
display: inline-flex;
|
| 314 |
+
align-items: center;
|
| 315 |
+
gap: 6px;
|
| 316 |
+
padding: 4px 12px;
|
| 317 |
+
border-radius: 20px;
|
| 318 |
+
font-family: var(--mono);
|
| 319 |
+
font-size: 11px;
|
| 320 |
+
font-weight: 700;
|
| 321 |
+
letter-spacing: 1px;
|
| 322 |
+
text-transform: uppercase;
|
| 323 |
+
margin-bottom: 8px;
|
| 324 |
+
}
|
| 325 |
+
.color-badge.green { background: var(--green-glow); color: var(--green); border: 1px solid rgba(34,197,94,0.3); }
|
| 326 |
+
.color-badge.yellow { background: var(--yellow-glow); color: var(--yellow); border: 1px solid rgba(234,179,8,0.3); }
|
| 327 |
+
.color-badge.red { background: var(--red-glow); color: var(--red); border: 1px solid rgba(239,68,68,0.3); }
|
| 328 |
+
.color-badge.purple { background: var(--purple-glow); color: var(--purple); border: 1px solid rgba(168,85,247,0.3); }
|
| 329 |
+
|
| 330 |
+
.verdict-label {
|
| 331 |
+
font-family: var(--sans);
|
| 332 |
+
font-size: 18px;
|
| 333 |
+
font-weight: 700;
|
| 334 |
+
margin-bottom: 8px;
|
| 335 |
+
line-height: 1.3;
|
| 336 |
+
}
|
| 337 |
+
|
| 338 |
+
.explanation {
|
| 339 |
+
font-size: 14px;
|
| 340 |
+
color: var(--text-muted);
|
| 341 |
+
line-height: 1.7;
|
| 342 |
+
}
|
| 343 |
+
|
| 344 |
+
/* Metadata grid */
|
| 345 |
+
.meta-grid {
|
| 346 |
+
display: grid;
|
| 347 |
+
grid-template-columns: repeat(auto-fit, minmax(140px, 1fr));
|
| 348 |
+
gap: 12px;
|
| 349 |
+
margin: 24px 0;
|
| 350 |
+
padding: 20px;
|
| 351 |
+
background: var(--surface2);
|
| 352 |
+
border-radius: 8px;
|
| 353 |
+
}
|
| 354 |
+
|
| 355 |
+
.meta-item { display: flex; flex-direction: column; gap: 4px; }
|
| 356 |
+
.meta-key {
|
| 357 |
+
font-family: var(--mono);
|
| 358 |
+
font-size: 10px;
|
| 359 |
+
color: var(--text-muted);
|
| 360 |
+
text-transform: uppercase;
|
| 361 |
+
letter-spacing: 1px;
|
| 362 |
+
}
|
| 363 |
+
.meta-value {
|
| 364 |
+
font-family: var(--mono);
|
| 365 |
+
font-size: 13px;
|
| 366 |
+
color: var(--text);
|
| 367 |
+
font-weight: 700;
|
| 368 |
+
}
|
| 369 |
+
|
| 370 |
+
/* Sources */
|
| 371 |
+
.sources-label {
|
| 372 |
+
font-family: var(--mono);
|
| 373 |
+
font-size: 11px;
|
| 374 |
+
color: var(--text-muted);
|
| 375 |
+
text-transform: uppercase;
|
| 376 |
+
letter-spacing: 1px;
|
| 377 |
+
margin-bottom: 10px;
|
| 378 |
+
}
|
| 379 |
+
|
| 380 |
+
.source-item {
|
| 381 |
+
display: flex;
|
| 382 |
+
align-items: center;
|
| 383 |
+
gap: 10px;
|
| 384 |
+
padding: 10px 14px;
|
| 385 |
+
background: var(--surface2);
|
| 386 |
+
border-radius: 6px;
|
| 387 |
+
margin-bottom: 6px;
|
| 388 |
+
font-size: 13px;
|
| 389 |
+
border: 1px solid transparent;
|
| 390 |
+
transition: border-color 0.2s;
|
| 391 |
+
}
|
| 392 |
+
.source-item:hover { border-color: var(--border); }
|
| 393 |
+
.source-favicon { width: 16px; height: 16px; border-radius: 3px; }
|
| 394 |
+
.source-domain { font-family: var(--mono); font-size: 11px; color: var(--accent); }
|
| 395 |
+
|
| 396 |
+
/* Pipeline log */
|
| 397 |
+
.pipeline-log {
|
| 398 |
+
background: var(--bg);
|
| 399 |
+
border: 1px solid var(--border);
|
| 400 |
+
border-radius: 8px;
|
| 401 |
+
padding: 16px;
|
| 402 |
+
margin-top: 24px;
|
| 403 |
+
font-family: var(--mono);
|
| 404 |
+
font-size: 12px;
|
| 405 |
+
color: var(--text-muted);
|
| 406 |
+
max-height: 200px;
|
| 407 |
+
overflow-y: auto;
|
| 408 |
+
}
|
| 409 |
+
|
| 410 |
+
.log-line {
|
| 411 |
+
display: flex;
|
| 412 |
+
gap: 12px;
|
| 413 |
+
margin-bottom: 4px;
|
| 414 |
+
animation: fadeIn 0.2s ease;
|
| 415 |
+
}
|
| 416 |
+
@keyframes fadeIn { from { opacity: 0; } to { opacity: 1; } }
|
| 417 |
+
.log-ts { color: #444; flex-shrink: 0; }
|
| 418 |
+
.log-level { flex-shrink: 0; }
|
| 419 |
+
.log-level.info { color: var(--accent); }
|
| 420 |
+
.log-level.ok { color: var(--green); }
|
| 421 |
+
.log-level.warn { color: var(--yellow); }
|
| 422 |
+
.log-level.drop { color: var(--text-muted); }
|
| 423 |
+
|
| 424 |
+
/* Example claims */
|
| 425 |
+
.examples-label {
|
| 426 |
+
font-family: var(--mono);
|
| 427 |
+
font-size: 11px;
|
| 428 |
+
color: var(--text-muted);
|
| 429 |
+
text-transform: uppercase;
|
| 430 |
+
letter-spacing: 1px;
|
| 431 |
+
margin-bottom: 12px;
|
| 432 |
+
}
|
| 433 |
+
|
| 434 |
+
.example-chip {
|
| 435 |
+
display: inline-block;
|
| 436 |
+
padding: 6px 12px;
|
| 437 |
+
border: 1px solid var(--border);
|
| 438 |
+
border-radius: 6px;
|
| 439 |
+
font-size: 12px;
|
| 440 |
+
color: var(--text-muted);
|
| 441 |
+
cursor: pointer;
|
| 442 |
+
margin: 0 6px 6px 0;
|
| 443 |
+
transition: all 0.2s;
|
| 444 |
+
line-height: 1.4;
|
| 445 |
+
}
|
| 446 |
+
.example-chip:hover { border-color: var(--accent); color: var(--text); background: rgba(88,166,255,0.05); }
|
| 447 |
+
|
| 448 |
+
/* Footer */
|
| 449 |
+
footer {
|
| 450 |
+
text-align: center;
|
| 451 |
+
padding: 32px 0;
|
| 452 |
+
font-family: var(--mono);
|
| 453 |
+
font-size: 11px;
|
| 454 |
+
color: var(--text-muted);
|
| 455 |
+
border-top: 1px solid var(--border);
|
| 456 |
+
position: relative;
|
| 457 |
+
z-index: 1;
|
| 458 |
+
}
|
| 459 |
+
|
| 460 |
+
.stack-tags { display: flex; gap: 8px; justify-content: center; flex-wrap: wrap; margin-top: 10px; }
|
| 461 |
+
.stack-tag {
|
| 462 |
+
padding: 3px 8px;
|
| 463 |
+
border: 1px solid var(--border);
|
| 464 |
+
border-radius: 4px;
|
| 465 |
+
font-size: 10px;
|
| 466 |
+
letter-spacing: 0.5px;
|
| 467 |
+
}
|
| 468 |
+
</style>
|
| 469 |
+
</head>
|
| 470 |
+
<body>
|
| 471 |
+
<div class="container">
|
| 472 |
+
<header>
|
| 473 |
+
<div class="logo-row">
|
| 474 |
+
<div class="logo-icon">⬑</div>
|
| 475 |
+
<h1>FACT INTELLIGENCE SYSTEM</h1>
|
| 476 |
+
</div>
|
| 477 |
+
<p class="tagline">// omnichannel Β· real-time Β· hallucination-aware</p>
|
| 478 |
+
</header>
|
| 479 |
+
|
| 480 |
+
<div class="status-bar">
|
| 481 |
+
<div class="status-dot connecting" id="statusDot"></div>
|
| 482 |
+
<span id="statusText">connecting to intelligence engine...</span>
|
| 483 |
+
</div>
|
| 484 |
+
|
| 485 |
+
<!-- Input -->
|
| 486 |
+
<div class="analysis-card">
|
| 487 |
+
<div class="card-label">// source platform</div>
|
| 488 |
+
<div class="platform-row" id="platformRow">
|
| 489 |
+
<button class="platform-btn active" data-platform="news">News</button>
|
| 490 |
+
<button class="platform-btn" data-platform="twitter">X / Twitter</button>
|
| 491 |
+
<button class="platform-btn" data-platform="youtube">YouTube</button>
|
| 492 |
+
<button class="platform-btn" data-platform="instagram">Instagram</button>
|
| 493 |
+
<button class="platform-btn" data-platform="chatgpt">ChatGPT</button>
|
| 494 |
+
<button class="platform-btn" data-platform="claude">Claude</button>
|
| 495 |
+
<button class="platform-btn" data-platform="gemini">Gemini</button>
|
| 496 |
+
</div>
|
| 497 |
+
|
| 498 |
+
<div class="card-label" style="margin-top:20px">// text to analyze</div>
|
| 499 |
+
<textarea id="claimInput" placeholder="Paste a claim, headline, or AI-generated text here... Minimum 12 words required."></textarea>
|
| 500 |
+
|
| 501 |
+
<button class="analyze-btn" id="analyzeBtn" onclick="analyzeClaim()">
|
| 502 |
+
<div class="spinner" id="spinner"></div>
|
| 503 |
+
<span id="btnText">ANALYZE CLAIM</span>
|
| 504 |
+
</button>
|
| 505 |
+
</div>
|
| 506 |
+
|
| 507 |
+
<!-- Example claims -->
|
| 508 |
+
<div class="analysis-card">
|
| 509 |
+
<div class="examples-label">// example claims to test</div>
|
| 510 |
+
<span class="example-chip" onclick="setExample(this.textContent)">Scientists confirmed mRNA vaccines provide immunity lasting over 18 months in 73% of clinical trial participants.</span>
|
| 511 |
+
<span class="example-chip" onclick="setExample(this.textContent)">The Federal Reserve raised interest rates by 75 basis points β the largest single hike since 1994.</span>
|
| 512 |
+
<span class="example-chip" onclick="setExample(this.textContent)">According to a study published in Nature, this drug reduces tumor size by 500% in all stage-4 patients within 2 weeks.</span>
|
| 513 |
+
<span class="example-chip" onclick="setExample(this.textContent)">The Amazon rainforest lost 11,568 square kilometers to deforestation in 2023, a 22% increase year-over-year.</span>
|
| 514 |
+
<span class="example-chip" onclick="setExample(this.textContent)">As referenced in Smith et al. (2019), the compound shows 94.7% efficacy against all known variants of the pathogen.</span>
|
| 515 |
+
</div>
|
| 516 |
+
|
| 517 |
+
<!-- Result -->
|
| 518 |
+
<div class="result-card" id="resultCard">
|
| 519 |
+
<div class="verdict-header">
|
| 520 |
+
<div class="confidence-ring" id="confRing">
|
| 521 |
+
<svg viewBox="0 0 72 72">
|
| 522 |
+
<circle class="track" cx="36" cy="36" r="30"/>
|
| 523 |
+
<circle class="fill" id="confArc" cx="36" cy="36" r="30"
|
| 524 |
+
stroke-dasharray="188.5"
|
| 525 |
+
stroke-dashoffset="188.5"/>
|
| 526 |
+
</svg>
|
| 527 |
+
<div class="confidence-num" id="confNum">β</div>
|
| 528 |
+
</div>
|
| 529 |
+
<div class="verdict-meta">
|
| 530 |
+
<div class="color-badge" id="colorBadge">β</div>
|
| 531 |
+
<div class="verdict-label" id="verdictLabel">β</div>
|
| 532 |
+
<div class="explanation" id="explanationText">β</div>
|
| 533 |
+
</div>
|
| 534 |
+
</div>
|
| 535 |
+
|
| 536 |
+
<div class="meta-grid">
|
| 537 |
+
<div class="meta-item"><div class="meta-key">Trust Score</div><div class="meta-value" id="metaTrust">β</div></div>
|
| 538 |
+
<div class="meta-item"><div class="meta-key">X Velocity</div><div class="meta-value" id="metaVelocity">β</div></div>
|
| 539 |
+
<div class="meta-item"><div class="meta-key">Community Note</div><div class="meta-value" id="metaNote">β</div></div>
|
| 540 |
+
<div class="meta-item"><div class="meta-key">Pipeline (ms)</div><div class="meta-value" id="metaLatency">β</div></div>
|
| 541 |
+
<div class="meta-item"><div class="meta-key">Cache</div><div class="meta-value" id="metaCached">β</div></div>
|
| 542 |
+
<div class="meta-item"><div class="meta-key">Platform</div><div class="meta-value" id="metaPlatform">β</div></div>
|
| 543 |
+
</div>
|
| 544 |
+
|
| 545 |
+
<div id="sourcesSection">
|
| 546 |
+
<div class="sources-label">// evidence sources</div>
|
| 547 |
+
<div id="sourcesList"></div>
|
| 548 |
+
</div>
|
| 549 |
+
|
| 550 |
+
<div class="pipeline-log" id="pipelineLog"></div>
|
| 551 |
+
</div>
|
| 552 |
+
</div>
|
| 553 |
+
|
| 554 |
+
<footer>
|
| 555 |
+
<div>OMNICHANNEL FACT & HALLUCINATION INTELLIGENCE SYSTEM v1.0</div>
|
| 556 |
+
<div class="stack-tags">
|
| 557 |
+
<span class="stack-tag">FastAPI</span>
|
| 558 |
+
<span class="stack-tag">BGE-M3</span>
|
| 559 |
+
<span class="stack-tag">Qdrant</span>
|
| 560 |
+
<span class="stack-tag">Memgraph</span>
|
| 561 |
+
<span class="stack-tag">Redpanda</span>
|
| 562 |
+
<span class="stack-tag">Redis Stack</span>
|
| 563 |
+
<span class="stack-tag">LiteLLM</span>
|
| 564 |
+
<span class="stack-tag">Prefect</span>
|
| 565 |
+
<span class="stack-tag">Groq</span>
|
| 566 |
+
<span class="stack-tag">WXT</span>
|
| 567 |
+
</div>
|
| 568 |
+
</footer>
|
| 569 |
+
|
| 570 |
+
<script>
|
| 571 |
+
// βββ WebSocket client ββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 572 |
+
const SESSION_ID = crypto.randomUUID();
|
| 573 |
+
const WS_URL = `${location.protocol === 'https:' ? 'wss' : 'ws'}://${location.host}/ws/${SESSION_ID}`;
|
| 574 |
+
|
| 575 |
+
let ws = null;
|
| 576 |
+
let reconnectDelay = 1000;
|
| 577 |
+
let selectedPlatform = 'news';
|
| 578 |
+
|
| 579 |
+
function connect() {
|
| 580 |
+
setStatus('connecting');
|
| 581 |
+
log('INFO', `connecting to ${WS_URL}`);
|
| 582 |
+
ws = new WebSocket(WS_URL);
|
| 583 |
+
|
| 584 |
+
ws.onopen = () => {
|
| 585 |
+
setStatus('connected');
|
| 586 |
+
reconnectDelay = 1000;
|
| 587 |
+
log('OK', 'WebSocket connected β intelligence engine online');
|
| 588 |
+
};
|
| 589 |
+
|
| 590 |
+
ws.onclose = () => {
|
| 591 |
+
setStatus('disconnected');
|
| 592 |
+
log('WARN', `disconnected β reconnecting in ${reconnectDelay / 1000}s`);
|
| 593 |
+
setTimeout(connect, reconnectDelay);
|
| 594 |
+
reconnectDelay = Math.min(reconnectDelay * 2, 30000);
|
| 595 |
+
};
|
| 596 |
+
|
| 597 |
+
ws.onerror = () => {
|
| 598 |
+
setStatus('error');
|
| 599 |
+
log('WARN', 'WebSocket error β will retry');
|
| 600 |
+
};
|
| 601 |
+
|
| 602 |
+
ws.onmessage = (evt) => {
|
| 603 |
+
const msg = JSON.parse(evt.data);
|
| 604 |
+
if (msg.type === 'pong') return;
|
| 605 |
+
if (msg.type === 'status') {
|
| 606 |
+
const p = msg.payload;
|
| 607 |
+
log('INFO', `engine status: demo=${p.demo_mode}, groq=${p.has_groq}, x_api=${p.has_x_api}`);
|
| 608 |
+
return;
|
| 609 |
+
}
|
| 610 |
+
if (msg.type === 'result') renderResult(msg.payload);
|
| 611 |
+
if (msg.type === 'error') {
|
| 612 |
+
log('WARN', `error: ${msg.payload?.message}`);
|
| 613 |
+
resetBtn();
|
| 614 |
+
}
|
| 615 |
+
};
|
| 616 |
+
}
|
| 617 |
+
|
| 618 |
+
// Keepalive ping every 20s
|
| 619 |
+
setInterval(() => { if (ws?.readyState === 1) ws.send(JSON.stringify({ type: 'ping' })); }, 20000);
|
| 620 |
+
|
| 621 |
+
// βββ Platform selector βββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 622 |
+
document.getElementById('platformRow').addEventListener('click', (e) => {
|
| 623 |
+
const btn = e.target.closest('.platform-btn');
|
| 624 |
+
if (!btn) return;
|
| 625 |
+
document.querySelectorAll('.platform-btn').forEach(b => b.classList.remove('active'));
|
| 626 |
+
btn.classList.add('active');
|
| 627 |
+
selectedPlatform = btn.dataset.platform;
|
| 628 |
+
});
|
| 629 |
+
|
| 630 |
+
// βββ Analysis ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 631 |
+
async function analyzeClaim() {
|
| 632 |
+
const text = document.getElementById('claimInput').value.trim();
|
| 633 |
+
if (!text) return;
|
| 634 |
+
|
| 635 |
+
const words = text.split(/\s+/).filter(Boolean);
|
| 636 |
+
if (words.length < 12) {
|
| 637 |
+
log('WARN', `text too short: ${words.length} words (minimum 12)`);
|
| 638 |
+
return;
|
| 639 |
+
}
|
| 640 |
+
|
| 641 |
+
if (!ws || ws.readyState !== 1) {
|
| 642 |
+
log('WARN', 'not connected β retrying connection');
|
| 643 |
+
connect();
|
| 644 |
+
return;
|
| 645 |
+
}
|
| 646 |
+
|
| 647 |
+
setBtnLoading(true);
|
| 648 |
+
document.getElementById('resultCard').classList.remove('visible');
|
| 649 |
+
log('INFO', `sending claim (${words.length} words) on platform: ${selectedPlatform}`);
|
| 650 |
+
|
| 651 |
+
// Compute xxhash-like fingerprint in browser (simplified)
|
| 652 |
+
const hash = await hashText(text);
|
| 653 |
+
log('INFO', `content hash: ${hash.slice(0, 8)}... β checking cache`);
|
| 654 |
+
|
| 655 |
+
const batch = {
|
| 656 |
+
type: 'batch',
|
| 657 |
+
payload: {
|
| 658 |
+
session_id: SESSION_ID,
|
| 659 |
+
platform: selectedPlatform,
|
| 660 |
+
segments: [{
|
| 661 |
+
content_hash: hash,
|
| 662 |
+
text: text,
|
| 663 |
+
element_id: `demo-${Date.now()}`,
|
| 664 |
+
word_count: words.length,
|
| 665 |
+
}],
|
| 666 |
+
sent_at: new Date().toISOString(),
|
| 667 |
+
}
|
| 668 |
+
};
|
| 669 |
+
|
| 670 |
+
ws.send(JSON.stringify(batch));
|
| 671 |
+
log('INFO', 'batch dispatched β gatekeeper β RAG β agents');
|
| 672 |
+
}
|
| 673 |
+
|
| 674 |
+
async function hashText(text) {
|
| 675 |
+
const buf = new TextEncoder().encode(text);
|
| 676 |
+
const hashBuf = await crypto.subtle.digest('SHA-256', buf);
|
| 677 |
+
return Array.from(new Uint8Array(hashBuf)).map(b => b.toString(16).padStart(2, '0')).join('');
|
| 678 |
+
}
|
| 679 |
+
|
| 680 |
+
// βοΏ½οΏ½οΏ½β Render result ββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 681 |
+
function renderResult(r) {
|
| 682 |
+
setBtnLoading(false);
|
| 683 |
+
|
| 684 |
+
const colorMap = {
|
| 685 |
+
green: { label: 'β VERIFIED', stroke: '#22c55e' },
|
| 686 |
+
yellow: { label: 'β UNVERIFIED', stroke: '#eab308' },
|
| 687 |
+
red: { label: 'β DEBUNKED', stroke: '#ef4444' },
|
| 688 |
+
purple: { label: 'β AI HALLUCINATION', stroke: '#a855f7' },
|
| 689 |
+
};
|
| 690 |
+
|
| 691 |
+
const c = colorMap[r.color] || colorMap.yellow;
|
| 692 |
+
|
| 693 |
+
// Confidence arc
|
| 694 |
+
const arc = document.getElementById('confArc');
|
| 695 |
+
const circumference = 2 * Math.PI * 30;
|
| 696 |
+
const offset = circumference - (r.confidence / 100) * circumference;
|
| 697 |
+
arc.style.strokeDashoffset = offset;
|
| 698 |
+
arc.style.stroke = c.stroke;
|
| 699 |
+
document.getElementById('confNum').textContent = r.confidence;
|
| 700 |
+
document.getElementById('confNum').style.color = c.stroke;
|
| 701 |
+
|
| 702 |
+
// Badge
|
| 703 |
+
const badge = document.getElementById('colorBadge');
|
| 704 |
+
badge.textContent = c.label;
|
| 705 |
+
badge.className = `color-badge ${r.color}`;
|
| 706 |
+
|
| 707 |
+
document.getElementById('verdictLabel').textContent = r.verdict_label || 'Analysis complete';
|
| 708 |
+
document.getElementById('explanationText').textContent = r.explanation || '';
|
| 709 |
+
|
| 710 |
+
// Meta
|
| 711 |
+
document.getElementById('metaTrust').textContent = (r.trust_score * 100).toFixed(0) + '%';
|
| 712 |
+
document.getElementById('metaVelocity').textContent = r.velocity?.toLocaleString() ?? 'β';
|
| 713 |
+
document.getElementById('metaNote').textContent = r.has_community_note ? 'β YES' : 'β None';
|
| 714 |
+
document.getElementById('metaNote').style.color = r.has_community_note ? 'var(--red)' : 'var(--green)';
|
| 715 |
+
document.getElementById('metaLatency').textContent = r.latency_ms?.toFixed(1) ?? 'β';
|
| 716 |
+
document.getElementById('metaCached').textContent = r.cached ? 'β HIT' : 'β MISS';
|
| 717 |
+
document.getElementById('metaPlatform').textContent = r.platform?.toUpperCase() ?? 'β';
|
| 718 |
+
|
| 719 |
+
// Sources
|
| 720 |
+
const list = document.getElementById('sourcesList');
|
| 721 |
+
list.innerHTML = '';
|
| 722 |
+
if (r.sources?.length) {
|
| 723 |
+
r.sources.forEach(s => {
|
| 724 |
+
const el = document.createElement('div');
|
| 725 |
+
el.className = 'source-item';
|
| 726 |
+
el.innerHTML = `
|
| 727 |
+
<img class="source-favicon" src="${s.favicon_url}" onerror="this.style.display='none'">
|
| 728 |
+
<div>
|
| 729 |
+
<div class="source-domain">${s.domain || 'unknown'}</div>
|
| 730 |
+
<div style="font-size:12px;color:var(--text-muted);margin-top:2px">${s.snippet || s.url || ''}</div>
|
| 731 |
+
</div>`;
|
| 732 |
+
list.appendChild(el);
|
| 733 |
+
});
|
| 734 |
+
document.getElementById('sourcesSection').style.display = 'block';
|
| 735 |
+
} else {
|
| 736 |
+
document.getElementById('sourcesSection').style.display = 'none';
|
| 737 |
+
}
|
| 738 |
+
|
| 739 |
+
log('OK', `verdict: ${r.color.toUpperCase()} (${r.confidence}%) β ${r.verdict_label}`);
|
| 740 |
+
document.getElementById('resultCard').classList.add('visible');
|
| 741 |
+
}
|
| 742 |
+
|
| 743 |
+
// βββ Helpers ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 744 |
+
function setStatus(state) {
|
| 745 |
+
const dot = document.getElementById('statusDot');
|
| 746 |
+
const txt = document.getElementById('statusText');
|
| 747 |
+
dot.className = 'status-dot';
|
| 748 |
+
if (state === 'connected') { dot.classList.add('connected'); txt.textContent = 'intelligence engine online'; }
|
| 749 |
+
else if (state === 'connecting') { dot.classList.add('connecting'); txt.textContent = 'connecting...'; }
|
| 750 |
+
else if (state === 'error') { dot.classList.add('error'); txt.textContent = 'connection error'; }
|
| 751 |
+
else { txt.textContent = 'offline β reconnecting'; }
|
| 752 |
+
}
|
| 753 |
+
|
| 754 |
+
function setBtnLoading(loading) {
|
| 755 |
+
document.getElementById('spinner').classList.toggle('active', loading);
|
| 756 |
+
document.getElementById('btnText').textContent = loading ? 'ANALYZING...' : 'ANALYZE CLAIM';
|
| 757 |
+
document.getElementById('analyzeBtn').disabled = loading;
|
| 758 |
+
}
|
| 759 |
+
|
| 760 |
+
function resetBtn() { setBtnLoading(false); }
|
| 761 |
+
|
| 762 |
+
function log(level, msg) {
|
| 763 |
+
const container = document.getElementById('pipelineLog');
|
| 764 |
+
const now = new Date().toISOString().slice(11, 23);
|
| 765 |
+
const levelClass = { INFO: 'info', OK: 'ok', WARN: 'warn', DROP: 'drop' }[level] || 'info';
|
| 766 |
+
const line = document.createElement('div');
|
| 767 |
+
line.className = 'log-line';
|
| 768 |
+
line.innerHTML = `<span class="log-ts">${now}</span><span class="log-level ${levelClass}">[${level}]</span><span>${msg}</span>`;
|
| 769 |
+
container.appendChild(line);
|
| 770 |
+
container.scrollTop = container.scrollHeight;
|
| 771 |
+
}
|
| 772 |
+
|
| 773 |
+
function setExample(text) {
|
| 774 |
+
document.getElementById('claimInput').value = text.trim();
|
| 775 |
+
}
|
| 776 |
+
|
| 777 |
+
// Start
|
| 778 |
+
connect();
|
| 779 |
+
log('INFO', 'intelligence system initialized');
|
| 780 |
+
log('INFO', `session: ${SESSION_ID.slice(0, 8)}...`);
|
| 781 |
+
</script>
|
| 782 |
+
</body>
|
| 783 |
+
</html>
|
backend/tests/test_pipeline.py
ADDED
|
@@ -0,0 +1,305 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""
|
| 2 |
+
tests/test_pipeline.py β Integration-style tests for the full fact-checking pipeline.
|
| 3 |
+
|
| 4 |
+
Run with:
|
| 5 |
+
uv run pytest tests/ -v
|
| 6 |
+
|
| 7 |
+
Tests use DEMO_MODE=true to avoid needing real API keys.
|
| 8 |
+
All external services (Qdrant, Memgraph, Redis) are mocked using monkeypatching.
|
| 9 |
+
"""
|
| 10 |
+
|
| 11 |
+
import asyncio
|
| 12 |
+
from unittest.mock import AsyncMock, MagicMock, patch
|
| 13 |
+
|
| 14 |
+
import pytest
|
| 15 |
+
|
| 16 |
+
from core.config import HighlightColor, Platform, Settings
|
| 17 |
+
from core.models import (
|
| 18 |
+
EvidenceChunk,
|
| 19 |
+
GatekeeperResult,
|
| 20 |
+
GrokSensorResult,
|
| 21 |
+
RAGResult,
|
| 22 |
+
TextBatch,
|
| 23 |
+
TextSegment,
|
| 24 |
+
TrustScore,
|
| 25 |
+
)
|
| 26 |
+
from gatekeeper import classify_claim, _heuristic_classify
|
| 27 |
+
from grok_sensor import _mock_sensor_result, _extract_keywords
|
| 28 |
+
|
| 29 |
+
|
| 30 |
+
# ---------------------------------------------------------------------------
|
| 31 |
+
# Fixtures
|
| 32 |
+
# ---------------------------------------------------------------------------
|
| 33 |
+
|
| 34 |
+
@pytest.fixture
|
| 35 |
+
def demo_settings() -> Settings:
|
| 36 |
+
return Settings(
|
| 37 |
+
DEMO_MODE=True,
|
| 38 |
+
GROQ_API_KEY="",
|
| 39 |
+
ANTHROPIC_API_KEY="",
|
| 40 |
+
X_BEARER_TOKEN="",
|
| 41 |
+
QDRANT_HOST="localhost",
|
| 42 |
+
MEMGRAPH_HOST="localhost",
|
| 43 |
+
REDIS_URL="redis://localhost:6379",
|
| 44 |
+
)
|
| 45 |
+
|
| 46 |
+
|
| 47 |
+
@pytest.fixture
|
| 48 |
+
def sample_rag_result() -> RAGResult:
|
| 49 |
+
from datetime import datetime, timezone
|
| 50 |
+
return RAGResult(
|
| 51 |
+
evidence=[
|
| 52 |
+
EvidenceChunk(
|
| 53 |
+
chunk_id="test-001",
|
| 54 |
+
text="Scientists confirmed mRNA vaccines provide long-term immunity.",
|
| 55 |
+
source_url="https://reuters.com/article/123",
|
| 56 |
+
domain="reuters.com",
|
| 57 |
+
score=0.89,
|
| 58 |
+
ingested_at=datetime.now(timezone.utc),
|
| 59 |
+
bias_rating="center",
|
| 60 |
+
)
|
| 61 |
+
],
|
| 62 |
+
trust=TrustScore(
|
| 63 |
+
score=0.75,
|
| 64 |
+
author_verified=True,
|
| 65 |
+
corroborating_sources=2,
|
| 66 |
+
has_community_note=False,
|
| 67 |
+
),
|
| 68 |
+
)
|
| 69 |
+
|
| 70 |
+
|
| 71 |
+
@pytest.fixture
|
| 72 |
+
def sample_grok_result() -> GrokSensorResult:
|
| 73 |
+
return GrokSensorResult(
|
| 74 |
+
velocity=1200,
|
| 75 |
+
community_note=False,
|
| 76 |
+
note_text=None,
|
| 77 |
+
is_mock=True,
|
| 78 |
+
)
|
| 79 |
+
|
| 80 |
+
|
| 81 |
+
# ---------------------------------------------------------------------------
|
| 82 |
+
# Gatekeeper tests
|
| 83 |
+
# ---------------------------------------------------------------------------
|
| 84 |
+
|
| 85 |
+
class TestGatekeeper:
|
| 86 |
+
@pytest.mark.asyncio
|
| 87 |
+
async def test_heuristic_classifies_opinion_as_noise(self, demo_settings):
|
| 88 |
+
result = await classify_claim("I think this is all just propaganda honestly", demo_settings)
|
| 89 |
+
assert result.label == "noise"
|
| 90 |
+
|
| 91 |
+
@pytest.mark.asyncio
|
| 92 |
+
async def test_heuristic_classifies_numeric_claim_as_fact(self, demo_settings):
|
| 93 |
+
result = await classify_claim(
|
| 94 |
+
"According to the CDC report, 73% of participants showed immunity lasting 18 months",
|
| 95 |
+
demo_settings,
|
| 96 |
+
)
|
| 97 |
+
assert result.label == "fact"
|
| 98 |
+
|
| 99 |
+
def test_heuristic_opinion_starters(self):
|
| 100 |
+
opinion_texts = [
|
| 101 |
+
"I think the whole thing is suspicious and people should wake up",
|
| 102 |
+
"I believe this is all connected somehow to something bigger",
|
| 103 |
+
"IMO this is the worst policy decision in history by far",
|
| 104 |
+
]
|
| 105 |
+
for text in opinion_texts:
|
| 106 |
+
result = _heuristic_classify(text)
|
| 107 |
+
assert result.label == "noise", f"Expected noise for: {text}"
|
| 108 |
+
|
| 109 |
+
def test_heuristic_factual_claim(self):
|
| 110 |
+
result = _heuristic_classify(
|
| 111 |
+
"The Federal Reserve raised rates by 75 basis points according to the official announcement"
|
| 112 |
+
)
|
| 113 |
+
assert result.label == "fact"
|
| 114 |
+
assert result.confidence > 0.5
|
| 115 |
+
|
| 116 |
+
def test_gatekeeper_result_confidence_bounds(self):
|
| 117 |
+
result = _heuristic_classify("Scientists found that 47% of participants showed no immunity")
|
| 118 |
+
assert 0.0 <= result.confidence <= 1.0
|
| 119 |
+
|
| 120 |
+
def test_gatekeeper_result_valid_label(self):
|
| 121 |
+
result = _heuristic_classify("lol did you see that? total propaganda π")
|
| 122 |
+
assert result.label in {"fact", "noise"}
|
| 123 |
+
|
| 124 |
+
|
| 125 |
+
# ---------------------------------------------------------------------------
|
| 126 |
+
# Grok sensor tests
|
| 127 |
+
# ---------------------------------------------------------------------------
|
| 128 |
+
|
| 129 |
+
class TestGrokSensor:
|
| 130 |
+
def test_mock_is_deterministic(self):
|
| 131 |
+
"""Same hash should always produce the same mock result."""
|
| 132 |
+
h = "abcdef1234567890"
|
| 133 |
+
r1 = _mock_sensor_result(h)
|
| 134 |
+
r2 = _mock_sensor_result(h)
|
| 135 |
+
assert r1.velocity == r2.velocity
|
| 136 |
+
assert r1.community_note == r2.community_note
|
| 137 |
+
assert r1.is_mock is True
|
| 138 |
+
|
| 139 |
+
def test_mock_different_hashes_produce_variation(self):
|
| 140 |
+
"""Different hashes should produce different results (not all identical)."""
|
| 141 |
+
results = [_mock_sensor_result(f"hash_{i:04d}") for i in range(50)]
|
| 142 |
+
velocities = [r.velocity for r in results]
|
| 143 |
+
# Should have variation β not all the same value
|
| 144 |
+
assert len(set(velocities)) > 5
|
| 145 |
+
|
| 146 |
+
def test_keyword_extraction_removes_stopwords(self):
|
| 147 |
+
text = "The Federal Reserve is raising interest rates by 75 basis points today"
|
| 148 |
+
keywords = _extract_keywords(text)
|
| 149 |
+
assert "the" not in keywords
|
| 150 |
+
assert "is" not in keywords
|
| 151 |
+
# Meaningful words should be present
|
| 152 |
+
assert any(k.lower() in ("federal", "reserve", "raising", "interest", "rates") for k in keywords)
|
| 153 |
+
|
| 154 |
+
def test_keyword_extraction_max_10(self):
|
| 155 |
+
long_text = " ".join(f"word{i}" for i in range(50))
|
| 156 |
+
keywords = _extract_keywords(long_text)
|
| 157 |
+
assert len(keywords) <= 10
|
| 158 |
+
|
| 159 |
+
|
| 160 |
+
# ---------------------------------------------------------------------------
|
| 161 |
+
# Model validation tests
|
| 162 |
+
# ---------------------------------------------------------------------------
|
| 163 |
+
|
| 164 |
+
class TestModels:
|
| 165 |
+
def test_text_segment_rejects_short_text(self):
|
| 166 |
+
with pytest.raises(Exception):
|
| 167 |
+
TextSegment(
|
| 168 |
+
content_hash="abc123",
|
| 169 |
+
text="too short",
|
| 170 |
+
element_id="el-001",
|
| 171 |
+
word_count=2, # Below minimum of 12
|
| 172 |
+
)
|
| 173 |
+
|
| 174 |
+
def test_text_batch_platform_validation(self):
|
| 175 |
+
batch = TextBatch(
|
| 176 |
+
session_id="test-session",
|
| 177 |
+
platform=Platform.TWITTER,
|
| 178 |
+
segments=[
|
| 179 |
+
TextSegment(
|
| 180 |
+
content_hash="a" * 16,
|
| 181 |
+
text="Scientists confirmed that 73 percent of mRNA vaccine recipients showed 18-month immunity",
|
| 182 |
+
element_id="el-001",
|
| 183 |
+
word_count=15,
|
| 184 |
+
)
|
| 185 |
+
],
|
| 186 |
+
)
|
| 187 |
+
assert batch.platform == Platform.TWITTER
|
| 188 |
+
assert len(batch.segments) == 1
|
| 189 |
+
|
| 190 |
+
def test_trust_score_clamping(self):
|
| 191 |
+
# Trust score should be clamped to [0, 1]
|
| 192 |
+
ts = TrustScore(
|
| 193 |
+
score=0.5,
|
| 194 |
+
author_verified=True,
|
| 195 |
+
corroborating_sources=3,
|
| 196 |
+
has_community_note=False,
|
| 197 |
+
)
|
| 198 |
+
assert 0.0 <= ts.score <= 1.0
|
| 199 |
+
|
| 200 |
+
def test_gatekeeper_result_invalid_label_raises(self):
|
| 201 |
+
with pytest.raises(Exception):
|
| 202 |
+
GatekeeperResult.model_validate({"label": "unknown", "reason": "test", "confidence": 0.5})
|
| 203 |
+
|
| 204 |
+
|
| 205 |
+
# ---------------------------------------------------------------------------
|
| 206 |
+
# Agent pipeline tests (mocked)
|
| 207 |
+
# ---------------------------------------------------------------------------
|
| 208 |
+
|
| 209 |
+
class TestAgents:
|
| 210 |
+
@pytest.mark.asyncio
|
| 211 |
+
async def test_evaluate_claim_demo_mode(
|
| 212 |
+
self, demo_settings, sample_rag_result, sample_grok_result
|
| 213 |
+
):
|
| 214 |
+
"""In demo mode, evaluate_claim should return a valid AnalysisResult without API calls."""
|
| 215 |
+
from agents import evaluate_claim
|
| 216 |
+
|
| 217 |
+
result = await evaluate_claim(
|
| 218 |
+
claim="Scientists confirmed that mRNA vaccines provide immunity lasting over 18 months in clinical trials",
|
| 219 |
+
claim_hash="testhashabc123",
|
| 220 |
+
element_id="el-test-001",
|
| 221 |
+
platform=Platform.NEWS,
|
| 222 |
+
rag_result=sample_rag_result,
|
| 223 |
+
grok_result=sample_grok_result,
|
| 224 |
+
settings=demo_settings,
|
| 225 |
+
)
|
| 226 |
+
|
| 227 |
+
assert result.color in {HighlightColor.GREEN, HighlightColor.YELLOW, HighlightColor.RED, HighlightColor.PURPLE}
|
| 228 |
+
assert 0 <= result.confidence <= 100
|
| 229 |
+
assert result.element_id == "el-test-001"
|
| 230 |
+
assert result.trust_score == sample_rag_result.trust.score
|
| 231 |
+
|
| 232 |
+
@pytest.mark.asyncio
|
| 233 |
+
async def test_low_trust_score_yields_red_or_yellow(
|
| 234 |
+
self, demo_settings, sample_grok_result
|
| 235 |
+
):
|
| 236 |
+
"""Claims with low trust scores should not get green verdicts."""
|
| 237 |
+
from datetime import datetime, timezone
|
| 238 |
+
from agents import evaluate_claim
|
| 239 |
+
|
| 240 |
+
low_trust_rag = RAGResult(
|
| 241 |
+
evidence=[],
|
| 242 |
+
trust=TrustScore(
|
| 243 |
+
score=0.1, # Very low
|
| 244 |
+
author_verified=False,
|
| 245 |
+
corroborating_sources=0,
|
| 246 |
+
has_community_note=True,
|
| 247 |
+
community_note_text="This claim is misleading.",
|
| 248 |
+
),
|
| 249 |
+
)
|
| 250 |
+
|
| 251 |
+
result = await evaluate_claim(
|
| 252 |
+
claim="Completely fabricated statistic that 500% of people believe this false claim completely",
|
| 253 |
+
claim_hash="lowtrusthash123",
|
| 254 |
+
element_id="el-test-002",
|
| 255 |
+
platform=Platform.TWITTER,
|
| 256 |
+
rag_result=low_trust_rag,
|
| 257 |
+
grok_result=GrokSensorResult(velocity=50000, community_note=True, note_text="Misleading"),
|
| 258 |
+
settings=demo_settings,
|
| 259 |
+
)
|
| 260 |
+
|
| 261 |
+
assert result.color in {HighlightColor.RED, HighlightColor.YELLOW}
|
| 262 |
+
assert result.has_community_note is True
|
| 263 |
+
|
| 264 |
+
@pytest.mark.asyncio
|
| 265 |
+
async def test_ai_platform_triggers_hallucination_check(
|
| 266 |
+
self, demo_settings, sample_rag_result, sample_grok_result
|
| 267 |
+
):
|
| 268 |
+
"""AI platforms should trigger the hallucination task (in demo, returns purple)."""
|
| 269 |
+
from agents import evaluate_claim
|
| 270 |
+
|
| 271 |
+
result = await evaluate_claim(
|
| 272 |
+
claim="As cited in Smith et al. 2019 paper on quantum biology, the compound achieves 99.7% efficacy across all known variants",
|
| 273 |
+
claim_hash="halluchash456",
|
| 274 |
+
element_id="el-test-003",
|
| 275 |
+
platform=Platform.CHATGPT, # AI platform β triggers hallucination check
|
| 276 |
+
rag_result=sample_rag_result,
|
| 277 |
+
grok_result=sample_grok_result,
|
| 278 |
+
settings=demo_settings,
|
| 279 |
+
)
|
| 280 |
+
|
| 281 |
+
# On AI platforms in demo mode, hallucination check runs and may override color
|
| 282 |
+
assert result.color in {HighlightColor.PURPLE, HighlightColor.GREEN, HighlightColor.YELLOW, HighlightColor.RED}
|
| 283 |
+
assert result.platform == Platform.CHATGPT
|
| 284 |
+
|
| 285 |
+
|
| 286 |
+
# ---------------------------------------------------------------------------
|
| 287 |
+
# Cache key tests
|
| 288 |
+
# ---------------------------------------------------------------------------
|
| 289 |
+
|
| 290 |
+
class TestCacheKeys:
|
| 291 |
+
def test_cache_key_format(self):
|
| 292 |
+
"""Cache keys should follow the `verdict:{hash}` format."""
|
| 293 |
+
content_hash = "abc123def456"
|
| 294 |
+
cache_key = f"verdict:{content_hash}"
|
| 295 |
+
assert cache_key == "verdict:abc123def456"
|
| 296 |
+
|
| 297 |
+
def test_different_texts_produce_different_hashes(self):
|
| 298 |
+
import xxhash
|
| 299 |
+
texts = [
|
| 300 |
+
"Scientists confirmed 73% immunity",
|
| 301 |
+
"Scientists confirmed 74% immunity",
|
| 302 |
+
"completely different claim about climate change",
|
| 303 |
+
]
|
| 304 |
+
hashes = [xxhash.xxh64(t.encode()).hexdigest() for t in texts]
|
| 305 |
+
assert len(set(hashes)) == len(hashes), "All hashes should be unique"
|
docker-compose.yml
ADDED
|
@@ -0,0 +1,164 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version: "3.9"
|
| 2 |
+
|
| 3 |
+
# =============================================================================
|
| 4 |
+
# Omnichannel Fact & Hallucination Intelligence System
|
| 5 |
+
# HuggingFace Spaces compatible β single `docker compose up` deployment
|
| 6 |
+
# Services: FastAPI (7860), Qdrant (6333), Memgraph (7687), Redpanda (9092), Redis Stack (6379)
|
| 7 |
+
# =============================================================================
|
| 8 |
+
|
| 9 |
+
networks:
|
| 10 |
+
fact-net:
|
| 11 |
+
driver: bridge
|
| 12 |
+
|
| 13 |
+
volumes:
|
| 14 |
+
qdrant_storage:
|
| 15 |
+
memgraph_data:
|
| 16 |
+
redpanda_data:
|
| 17 |
+
redis_data:
|
| 18 |
+
|
| 19 |
+
services:
|
| 20 |
+
# ---------------------------------------------------------------------------
|
| 21 |
+
# QDRANT β Vector DB for claim embeddings (self-hosted, sub-ms HNSW search)
|
| 22 |
+
# ---------------------------------------------------------------------------
|
| 23 |
+
qdrant:
|
| 24 |
+
image: qdrant/qdrant:v1.9.2
|
| 25 |
+
container_name: fact-qdrant
|
| 26 |
+
restart: unless-stopped
|
| 27 |
+
networks: [fact-net]
|
| 28 |
+
ports:
|
| 29 |
+
- "6333:6333"
|
| 30 |
+
- "6334:6334" # gRPC
|
| 31 |
+
volumes:
|
| 32 |
+
- qdrant_storage:/qdrant/storage
|
| 33 |
+
environment:
|
| 34 |
+
QDRANT__SERVICE__GRPC_PORT: 6334
|
| 35 |
+
QDRANT__TELEMETRY_DISABLED: "true"
|
| 36 |
+
healthcheck:
|
| 37 |
+
test: ["CMD", "curl", "-f", "http://localhost:6333/readyz"]
|
| 38 |
+
interval: 10s
|
| 39 |
+
timeout: 5s
|
| 40 |
+
retries: 5
|
| 41 |
+
|
| 42 |
+
# ---------------------------------------------------------------------------
|
| 43 |
+
# MEMGRAPH β In-memory graph DB for trust-score traversal (Cypher compatible)
|
| 44 |
+
# 10-100x faster than Neo4j for real-time traversal since everything is in RAM
|
| 45 |
+
# ---------------------------------------------------------------------------
|
| 46 |
+
memgraph:
|
| 47 |
+
image: memgraph/memgraph-platform:2.16.0
|
| 48 |
+
container_name: fact-memgraph
|
| 49 |
+
restart: unless-stopped
|
| 50 |
+
networks: [fact-net]
|
| 51 |
+
ports:
|
| 52 |
+
- "7687:7687" # Bolt
|
| 53 |
+
- "3000:3000" # Memgraph Lab UI
|
| 54 |
+
volumes:
|
| 55 |
+
- memgraph_data:/var/lib/memgraph
|
| 56 |
+
environment:
|
| 57 |
+
MEMGRAPH_USER: memgraph
|
| 58 |
+
MEMGRAPH_PASSWORD: memgraph123
|
| 59 |
+
healthcheck:
|
| 60 |
+
test: ["CMD", "mg_client", "--host", "localhost", "--port", "7687", "--use-ssl=false", "-q", "RETURN 1;"]
|
| 61 |
+
interval: 15s
|
| 62 |
+
timeout: 10s
|
| 63 |
+
retries: 5
|
| 64 |
+
|
| 65 |
+
# ---------------------------------------------------------------------------
|
| 66 |
+
# REDPANDA β Kafka-compatible message queue (no JVM, no ZooKeeper, 10x lower
|
| 67 |
+
# latency). Handles the omnichannel ingestion firehose from all producers.
|
| 68 |
+
# ---------------------------------------------------------------------------
|
| 69 |
+
redpanda:
|
| 70 |
+
image: redpandadata/redpanda:v24.1.7
|
| 71 |
+
container_name: fact-redpanda
|
| 72 |
+
restart: unless-stopped
|
| 73 |
+
networks: [fact-net]
|
| 74 |
+
ports:
|
| 75 |
+
- "9092:9092" # Kafka API
|
| 76 |
+
- "9644:9644" # Admin API
|
| 77 |
+
- "8081:8081" # Schema registry
|
| 78 |
+
volumes:
|
| 79 |
+
- redpanda_data:/var/lib/redpanda/data
|
| 80 |
+
command:
|
| 81 |
+
- redpanda
|
| 82 |
+
- start
|
| 83 |
+
- --smp=1
|
| 84 |
+
- --memory=512M
|
| 85 |
+
- --overprovisioned
|
| 86 |
+
- --kafka-addr=PLAINTEXT://0.0.0.0:9092
|
| 87 |
+
- --advertise-kafka-addr=PLAINTEXT://redpanda:9092
|
| 88 |
+
- --pandaproxy-addr=0.0.0.0:8082
|
| 89 |
+
- --advertise-pandaproxy-addr=redpanda:8082
|
| 90 |
+
- --schema-registry-addr=0.0.0.0:8081
|
| 91 |
+
- --rpc-addr=redpanda:33145
|
| 92 |
+
- --advertise-rpc-addr=redpanda:33145
|
| 93 |
+
healthcheck:
|
| 94 |
+
test: ["CMD", "rpk", "cluster", "health"]
|
| 95 |
+
interval: 15s
|
| 96 |
+
timeout: 10s
|
| 97 |
+
retries: 5
|
| 98 |
+
|
| 99 |
+
# ---------------------------------------------------------------------------
|
| 100 |
+
# REDIS STACK β Redis + RedisJSON + RedisSearch for structured claim caching
|
| 101 |
+
# TTL: 6h for Green/Red verdicts, 15min for Yellow, no cache for Purple
|
| 102 |
+
# ---------------------------------------------------------------------------
|
| 103 |
+
redis-stack:
|
| 104 |
+
image: redis/redis-stack:7.4.0-v0
|
| 105 |
+
container_name: fact-redis
|
| 106 |
+
restart: unless-stopped
|
| 107 |
+
networks: [fact-net]
|
| 108 |
+
ports:
|
| 109 |
+
- "6379:6379" # Redis
|
| 110 |
+
- "8001:8001" # RedisInsight UI
|
| 111 |
+
volumes:
|
| 112 |
+
- redis_data:/data
|
| 113 |
+
environment:
|
| 114 |
+
REDIS_ARGS: "--maxmemory 256mb --maxmemory-policy allkeys-lru"
|
| 115 |
+
healthcheck:
|
| 116 |
+
test: ["CMD", "redis-cli", "ping"]
|
| 117 |
+
interval: 10s
|
| 118 |
+
timeout: 5s
|
| 119 |
+
retries: 5
|
| 120 |
+
|
| 121 |
+
# ---------------------------------------------------------------------------
|
| 122 |
+
# BACKEND β FastAPI intelligence engine (HF Spaces listens on 7860)
|
| 123 |
+
# Waits for all upstream services to be healthy before starting
|
| 124 |
+
# ---------------------------------------------------------------------------
|
| 125 |
+
backend:
|
| 126 |
+
build:
|
| 127 |
+
context: ./backend
|
| 128 |
+
dockerfile: Dockerfile
|
| 129 |
+
container_name: fact-backend
|
| 130 |
+
restart: unless-stopped
|
| 131 |
+
networks: [fact-net]
|
| 132 |
+
ports:
|
| 133 |
+
- "7860:7860" # HuggingFace Spaces default port
|
| 134 |
+
depends_on:
|
| 135 |
+
qdrant:
|
| 136 |
+
condition: service_healthy
|
| 137 |
+
memgraph:
|
| 138 |
+
condition: service_healthy
|
| 139 |
+
redpanda:
|
| 140 |
+
condition: service_healthy
|
| 141 |
+
redis-stack:
|
| 142 |
+
condition: service_healthy
|
| 143 |
+
environment:
|
| 144 |
+
# LLM providers β set in HF Space secrets
|
| 145 |
+
GROQ_API_KEY: ${GROQ_API_KEY:-}
|
| 146 |
+
ANTHROPIC_API_KEY: ${ANTHROPIC_API_KEY:-}
|
| 147 |
+
X_BEARER_TOKEN: ${X_BEARER_TOKEN:-}
|
| 148 |
+
|
| 149 |
+
# Infrastructure endpoints (internal Docker network)
|
| 150 |
+
QDRANT_HOST: qdrant
|
| 151 |
+
QDRANT_PORT: 6333
|
| 152 |
+
MEMGRAPH_HOST: memgraph
|
| 153 |
+
MEMGRAPH_PORT: 7687
|
| 154 |
+
MEMGRAPH_PASSWORD: memgraph123
|
| 155 |
+
REDPANDA_BROKERS: redpanda:9092
|
| 156 |
+
REDIS_URL: redis://redis-stack:6379
|
| 157 |
+
|
| 158 |
+
# App config
|
| 159 |
+
PORT: 7860
|
| 160 |
+
LOG_LEVEL: INFO
|
| 161 |
+
DEMO_MODE: ${DEMO_MODE:-false} # true = use mock data, skip external APIs
|
| 162 |
+
volumes:
|
| 163 |
+
- ./backend:/app
|
| 164 |
+
command: ["uv", "run", "uvicorn", "main:app", "--host", "0.0.0.0", "--port", "7860", "--reload"]
|
extension/entrypoints/background.ts
ADDED
|
@@ -0,0 +1,179 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
// extension/entrypoints/background.ts
|
| 2 |
+
// Persistent background service worker.
|
| 3 |
+
// Maintains a SINGLE WebSocket connection to the backend intelligence engine.
|
| 4 |
+
// Routes results to the correct content script via chrome.tabs.sendMessage.
|
| 5 |
+
//
|
| 6 |
+
// Why a single connection in the background?
|
| 7 |
+
// Content scripts are destroyed/recreated on navigation. The background worker
|
| 8 |
+
// persists for the lifetime of the extension, ensuring we never drop messages
|
| 9 |
+
// and reconnection logic runs in one place.
|
| 10 |
+
|
| 11 |
+
import { defineBackground } from "wxt/sandbox";
|
| 12 |
+
|
| 13 |
+
// Injected by wxt.config.ts vite.define β falls back to localhost for dev
|
| 14 |
+
declare const __WS_URL__: string;
|
| 15 |
+
const WS_URL = typeof __WS_URL__ !== "undefined"
|
| 16 |
+
? __WS_URL__
|
| 17 |
+
: "ws://localhost:7860/ws";
|
| 18 |
+
|
| 19 |
+
const SESSION_ID = crypto.randomUUID();
|
| 20 |
+
|
| 21 |
+
// ---------------------------------------------------------------------------
|
| 22 |
+
// WebSocket connection with exponential backoff
|
| 23 |
+
// ---------------------------------------------------------------------------
|
| 24 |
+
|
| 25 |
+
let ws: WebSocket | null = null;
|
| 26 |
+
let reconnectTimer: ReturnType<typeof setTimeout> | null = null;
|
| 27 |
+
let reconnectDelay = 1_000; // Start at 1s, cap at 30s
|
| 28 |
+
|
| 29 |
+
// Tab ID β { platform, pendingHashes } mapping for routing results back
|
| 30 |
+
const tabRegistry = new Map<number, { platform: string }>();
|
| 31 |
+
|
| 32 |
+
function getWsUrl(): string {
|
| 33 |
+
return `${WS_URL}/${SESSION_ID}`;
|
| 34 |
+
}
|
| 35 |
+
|
| 36 |
+
function connect(): void {
|
| 37 |
+
if (ws?.readyState === WebSocket.OPEN) return;
|
| 38 |
+
|
| 39 |
+
ws = new WebSocket(getWsUrl());
|
| 40 |
+
|
| 41 |
+
ws.onopen = () => {
|
| 42 |
+
console.log("[background] WS connected:", getWsUrl());
|
| 43 |
+
reconnectDelay = 1_000; // Reset backoff on successful connection
|
| 44 |
+
broadcastStatus("connected");
|
| 45 |
+
};
|
| 46 |
+
|
| 47 |
+
ws.onmessage = (evt: MessageEvent) => {
|
| 48 |
+
try {
|
| 49 |
+
const msg = JSON.parse(evt.data as string);
|
| 50 |
+
|
| 51 |
+
if (msg.type === "pong") return;
|
| 52 |
+
|
| 53 |
+
if (msg.type === "status") {
|
| 54 |
+
// Forward demo mode flag to all content scripts
|
| 55 |
+
chrome.tabs.query({}, (tabs) => {
|
| 56 |
+
tabs.forEach((tab) => {
|
| 57 |
+
if (tab.id) {
|
| 58 |
+
chrome.tabs.sendMessage(tab.id, {
|
| 59 |
+
type: "status",
|
| 60 |
+
payload: msg.payload,
|
| 61 |
+
}).catch(() => {/* Tab may not have content script */});
|
| 62 |
+
}
|
| 63 |
+
});
|
| 64 |
+
});
|
| 65 |
+
return;
|
| 66 |
+
}
|
| 67 |
+
|
| 68 |
+
if (msg.type === "result" && msg.payload) {
|
| 69 |
+
routeResultToTab(msg.payload);
|
| 70 |
+
}
|
| 71 |
+
|
| 72 |
+
if (msg.type === "error") {
|
| 73 |
+
console.error("[background] Server error:", msg.payload?.message);
|
| 74 |
+
}
|
| 75 |
+
} catch (err) {
|
| 76 |
+
console.error("[background] Message parse error:", err);
|
| 77 |
+
}
|
| 78 |
+
};
|
| 79 |
+
|
| 80 |
+
ws.onclose = (evt) => {
|
| 81 |
+
ws = null;
|
| 82 |
+
console.log(`[background] WS closed (code=${evt.code}), reconnecting in ${reconnectDelay}ms`);
|
| 83 |
+
broadcastStatus("reconnecting");
|
| 84 |
+
|
| 85 |
+
reconnectTimer = setTimeout(() => {
|
| 86 |
+
reconnectDelay = Math.min(reconnectDelay * 2, 30_000);
|
| 87 |
+
connect();
|
| 88 |
+
}, reconnectDelay);
|
| 89 |
+
};
|
| 90 |
+
|
| 91 |
+
ws.onerror = () => {
|
| 92 |
+
broadcastStatus("offline");
|
| 93 |
+
};
|
| 94 |
+
}
|
| 95 |
+
|
| 96 |
+
// ---------------------------------------------------------------------------
|
| 97 |
+
// Route analysis results to the tab that originated the request
|
| 98 |
+
// ---------------------------------------------------------------------------
|
| 99 |
+
|
| 100 |
+
function routeResultToTab(result: Record<string, unknown>): void {
|
| 101 |
+
// Find the tab that has this element (active tabs with content scripts)
|
| 102 |
+
chrome.tabs.query({ active: true }, (tabs) => {
|
| 103 |
+
tabs.forEach((tab) => {
|
| 104 |
+
if (tab.id) {
|
| 105 |
+
chrome.tabs.sendMessage(tab.id, {
|
| 106 |
+
type: "result",
|
| 107 |
+
payload: result,
|
| 108 |
+
}).catch(() => {/* Content script may not be injected on this tab */});
|
| 109 |
+
}
|
| 110 |
+
});
|
| 111 |
+
});
|
| 112 |
+
}
|
| 113 |
+
|
| 114 |
+
// ---------------------------------------------------------------------------
|
| 115 |
+
// Broadcast WS status to all content scripts + popup
|
| 116 |
+
// ---------------------------------------------------------------------------
|
| 117 |
+
|
| 118 |
+
function broadcastStatus(status: string): void {
|
| 119 |
+
chrome.tabs.query({}, (tabs) => {
|
| 120 |
+
tabs.forEach((tab) => {
|
| 121 |
+
if (tab.id) {
|
| 122 |
+
chrome.tabs.sendMessage(tab.id, { type: "ws_status", payload: { status } })
|
| 123 |
+
.catch(() => {});
|
| 124 |
+
}
|
| 125 |
+
});
|
| 126 |
+
});
|
| 127 |
+
|
| 128 |
+
// Also notify popup if open
|
| 129 |
+
chrome.runtime.sendMessage({ type: "ws_status", payload: { status } })
|
| 130 |
+
.catch(() => {});
|
| 131 |
+
}
|
| 132 |
+
|
| 133 |
+
// ---------------------------------------------------------------------------
|
| 134 |
+
// Handle messages from content scripts
|
| 135 |
+
// ---------------------------------------------------------------------------
|
| 136 |
+
|
| 137 |
+
chrome.runtime.onMessage.addListener((msg, sender, sendResponse) => {
|
| 138 |
+
if (msg.type === "send_batch") {
|
| 139 |
+
if (ws?.readyState === WebSocket.OPEN) {
|
| 140 |
+
ws.send(JSON.stringify({ type: "batch", payload: msg.payload }));
|
| 141 |
+
sendResponse({ ok: true });
|
| 142 |
+
} else {
|
| 143 |
+
sendResponse({ ok: false, reason: "not_connected" });
|
| 144 |
+
}
|
| 145 |
+
return true; // Async response
|
| 146 |
+
}
|
| 147 |
+
|
| 148 |
+
if (msg.type === "get_status") {
|
| 149 |
+
sendResponse({
|
| 150 |
+
status: ws?.readyState === WebSocket.OPEN ? "connected" : "offline",
|
| 151 |
+
});
|
| 152 |
+
return true;
|
| 153 |
+
}
|
| 154 |
+
|
| 155 |
+
if (msg.type === "ping") {
|
| 156 |
+
if (ws?.readyState === WebSocket.OPEN) {
|
| 157 |
+
ws.send(JSON.stringify({ type: "ping" }));
|
| 158 |
+
}
|
| 159 |
+
sendResponse({ ok: true });
|
| 160 |
+
return true;
|
| 161 |
+
}
|
| 162 |
+
});
|
| 163 |
+
|
| 164 |
+
// ---------------------------------------------------------------------------
|
| 165 |
+
// Keepalive β prevents background worker from being suspended
|
| 166 |
+
// ---------------------------------------------------------------------------
|
| 167 |
+
|
| 168 |
+
setInterval(() => {
|
| 169 |
+
if (ws?.readyState === WebSocket.OPEN) {
|
| 170 |
+
ws.send(JSON.stringify({ type: "ping" }));
|
| 171 |
+
} else if (!ws || ws.readyState === WebSocket.CLOSED) {
|
| 172 |
+
connect(); // Re-attempt if connection died silently
|
| 173 |
+
}
|
| 174 |
+
}, 20_000);
|
| 175 |
+
|
| 176 |
+
export default defineBackground(() => {
|
| 177 |
+
connect();
|
| 178 |
+
console.log("[background] Fact Intelligence background worker started");
|
| 179 |
+
});
|
extension/entrypoints/content.tsx
ADDED
|
@@ -0,0 +1,453 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
// extension/entrypoints/content.tsx
|
| 2 |
+
// Main content script β runs in every matching page context.
|
| 3 |
+
//
|
| 4 |
+
// Pipeline:
|
| 5 |
+
// 1. MutationObserver watches for meaningful text node changes
|
| 6 |
+
// 2. Text is accumulated in a ring buffer, flushed every 1200ms
|
| 7 |
+
// 3. Each flush is deduplicated via xxhash-wasm (client-side)
|
| 8 |
+
// 4. Deduplicated segments sent to background worker β WebSocket
|
| 9 |
+
// 5. Results come back as chrome.runtime.onMessage events
|
| 10 |
+
// 6. Highlights applied as <mark> elements via Range.surroundContents()
|
| 11 |
+
// 7. Hover cards rendered inside a Shadow DOM to prevent CSS bleed
|
| 12 |
+
|
| 13 |
+
import { defineContentScript } from "wxt/sandbox";
|
| 14 |
+
import { createRoot } from "react-dom/client";
|
| 15 |
+
import React, { useEffect, useRef, useState } from "react";
|
| 16 |
+
import { AnimatePresence, motion } from "framer-motion";
|
| 17 |
+
import { init as initXxhash, h64ToString } from "xxhash-wasm";
|
| 18 |
+
|
| 19 |
+
import {
|
| 20 |
+
AnalysisResult,
|
| 21 |
+
COLOR_CONFIG,
|
| 22 |
+
ExtensionMode,
|
| 23 |
+
HighlightColor,
|
| 24 |
+
shouldShowColor,
|
| 25 |
+
useExtensionStore,
|
| 26 |
+
} from "../stores/extensionStore";
|
| 27 |
+
|
| 28 |
+
// ---------------------------------------------------------------------------
|
| 29 |
+
// Platform detection
|
| 30 |
+
// ---------------------------------------------------------------------------
|
| 31 |
+
|
| 32 |
+
function detectPlatform(): string {
|
| 33 |
+
const host = location.hostname;
|
| 34 |
+
if (host.includes("twitter.com") || host.includes("x.com")) return "twitter";
|
| 35 |
+
if (host.includes("instagram.com")) return "instagram";
|
| 36 |
+
if (host.includes("youtube.com")) return "youtube";
|
| 37 |
+
if (host.includes("chat.openai.com")) return "chatgpt";
|
| 38 |
+
if (host.includes("claude.ai")) return "claude";
|
| 39 |
+
if (host.includes("gemini.google.com")) return "gemini";
|
| 40 |
+
return "news";
|
| 41 |
+
}
|
| 42 |
+
|
| 43 |
+
// ---------------------------------------------------------------------------
|
| 44 |
+
// Text node utilities
|
| 45 |
+
// ---------------------------------------------------------------------------
|
| 46 |
+
|
| 47 |
+
const SKIP_TAGS = new Set(["SCRIPT", "STYLE", "SVG", "NOSCRIPT", "IFRAME", "META", "HEAD"]);
|
| 48 |
+
|
| 49 |
+
function isValidTextNode(node: Text): boolean {
|
| 50 |
+
const parent = node.parentElement;
|
| 51 |
+
if (!parent) return false;
|
| 52 |
+
|
| 53 |
+
// Skip non-content tags
|
| 54 |
+
let el: Element | null = parent;
|
| 55 |
+
while (el) {
|
| 56 |
+
if (SKIP_TAGS.has(el.tagName)) return false;
|
| 57 |
+
el = el.parentElement;
|
| 58 |
+
}
|
| 59 |
+
|
| 60 |
+
const text = node.textContent?.trim() ?? "";
|
| 61 |
+
const wordCount = text.split(/\s+/).filter(Boolean).length;
|
| 62 |
+
return wordCount >= 12;
|
| 63 |
+
}
|
| 64 |
+
|
| 65 |
+
function extractTextNodes(root: Node): Text[] {
|
| 66 |
+
const walker = document.createTreeWalker(root, NodeFilter.SHOW_TEXT, {
|
| 67 |
+
acceptNode: (node) =>
|
| 68 |
+
isValidTextNode(node as Text) ? NodeFilter.FILTER_ACCEPT : NodeFilter.FILTER_SKIP,
|
| 69 |
+
});
|
| 70 |
+
const nodes: Text[] = [];
|
| 71 |
+
while (walker.nextNode()) nodes.push(walker.currentNode as Text);
|
| 72 |
+
return nodes;
|
| 73 |
+
}
|
| 74 |
+
|
| 75 |
+
// ---------------------------------------------------------------------------
|
| 76 |
+
// Ring buffer β accumulates text segments, flushed every 1200ms
|
| 77 |
+
// ---------------------------------------------------------------------------
|
| 78 |
+
|
| 79 |
+
interface QueuedSegment {
|
| 80 |
+
hash: string;
|
| 81 |
+
text: string;
|
| 82 |
+
node: Text;
|
| 83 |
+
elementId: string;
|
| 84 |
+
}
|
| 85 |
+
|
| 86 |
+
// ---------------------------------------------------------------------------
|
| 87 |
+
// Highlight system
|
| 88 |
+
// ---------------------------------------------------------------------------
|
| 89 |
+
|
| 90 |
+
const highlightMap = new Map<string, HTMLElement>(); // elementId β <mark>
|
| 91 |
+
|
| 92 |
+
function applyHighlight(
|
| 93 |
+
node: Text,
|
| 94 |
+
elementId: string,
|
| 95 |
+
color: HighlightColor,
|
| 96 |
+
result: AnalysisResult
|
| 97 |
+
): void {
|
| 98 |
+
// If already highlighted, update color only
|
| 99 |
+
const existing = highlightMap.get(elementId);
|
| 100 |
+
if (existing) {
|
| 101 |
+
const cfg = COLOR_CONFIG[color];
|
| 102 |
+
existing.style.backgroundColor = `${cfg.hex}${Math.round(cfg.opacity * 255).toString(16).padStart(2, "0")}`;
|
| 103 |
+
existing.dataset.result = JSON.stringify(result);
|
| 104 |
+
return;
|
| 105 |
+
}
|
| 106 |
+
|
| 107 |
+
try {
|
| 108 |
+
const range = document.createRange();
|
| 109 |
+
range.selectNode(node);
|
| 110 |
+
|
| 111 |
+
const cfg = COLOR_CONFIG[color];
|
| 112 |
+
const mark = document.createElement("mark");
|
| 113 |
+
mark.dataset.factId = elementId;
|
| 114 |
+
mark.dataset.result = JSON.stringify(result);
|
| 115 |
+
mark.style.cssText = `
|
| 116 |
+
background-color: ${cfg.hex}${Math.round(cfg.opacity * 255).toString(16).padStart(2, "0")};
|
| 117 |
+
border-radius: 2px;
|
| 118 |
+
cursor: help;
|
| 119 |
+
transition: background-color 0.2s;
|
| 120 |
+
`;
|
| 121 |
+
|
| 122 |
+
range.surroundContents(mark);
|
| 123 |
+
highlightMap.set(elementId, mark);
|
| 124 |
+
|
| 125 |
+
// Mount hover card on mouseenter using Shadow DOM
|
| 126 |
+
mark.addEventListener("mouseenter", (e) => showHoverCard(e, result, mark));
|
| 127 |
+
mark.addEventListener("mouseleave", hideHoverCard);
|
| 128 |
+
} catch {
|
| 129 |
+
// surroundContents() fails on nodes that cross element boundaries β skip silently
|
| 130 |
+
}
|
| 131 |
+
}
|
| 132 |
+
|
| 133 |
+
// ---------------------------------------------------------------------------
|
| 134 |
+
// Hover card β Shadow DOM isolated, Framer Motion animated
|
| 135 |
+
// ---------------------------------------------------------------------------
|
| 136 |
+
|
| 137 |
+
let hoverCardHost: HTMLElement | null = null;
|
| 138 |
+
let hoverRoot: ReturnType<typeof createRoot> | null = null;
|
| 139 |
+
|
| 140 |
+
function ensureHoverCardHost(): { host: HTMLElement; shadowRoot: ShadowRoot } {
|
| 141 |
+
if (!hoverCardHost) {
|
| 142 |
+
hoverCardHost = document.createElement("div");
|
| 143 |
+
hoverCardHost.id = "fact-intelligence-hover-host";
|
| 144 |
+
document.body.appendChild(hoverCardHost);
|
| 145 |
+
|
| 146 |
+
const shadow = hoverCardHost.attachShadow({ mode: "closed" });
|
| 147 |
+
|
| 148 |
+
// Inject Tailwind-scoped styles directly into shadow root
|
| 149 |
+
const style = document.createElement("style");
|
| 150 |
+
style.textContent = HOVER_CARD_STYLES;
|
| 151 |
+
shadow.appendChild(style);
|
| 152 |
+
|
| 153 |
+
const mountPoint = document.createElement("div");
|
| 154 |
+
shadow.appendChild(mountPoint);
|
| 155 |
+
hoverRoot = createRoot(mountPoint);
|
| 156 |
+
|
| 157 |
+
return { host: hoverCardHost, shadowRoot: shadow };
|
| 158 |
+
}
|
| 159 |
+
return { host: hoverCardHost, shadowRoot: hoverCardHost.shadowRoot! as ShadowRoot };
|
| 160 |
+
}
|
| 161 |
+
|
| 162 |
+
function showHoverCard(event: MouseEvent, result: AnalysisResult, anchor: HTMLElement): void {
|
| 163 |
+
const { shadowRoot } = ensureHoverCardHost();
|
| 164 |
+
const rect = anchor.getBoundingClientRect();
|
| 165 |
+
|
| 166 |
+
// Viewport clamping β card must never overflow
|
| 167 |
+
let top = rect.bottom + window.scrollY + 8;
|
| 168 |
+
let left = rect.left + window.scrollX;
|
| 169 |
+
const CARD_WIDTH = 340;
|
| 170 |
+
const CARD_HEIGHT = 200;
|
| 171 |
+
|
| 172 |
+
if (left + CARD_WIDTH > window.innerWidth - 16) {
|
| 173 |
+
left = window.innerWidth - CARD_WIDTH - 16;
|
| 174 |
+
}
|
| 175 |
+
if (top + CARD_HEIGHT > window.innerHeight + window.scrollY - 16) {
|
| 176 |
+
top = rect.top + window.scrollY - CARD_HEIGHT - 8; // Flip above
|
| 177 |
+
}
|
| 178 |
+
|
| 179 |
+
hoverRoot?.render(
|
| 180 |
+
<HoverCard result={result} top={top} left={left} visible={true} />
|
| 181 |
+
);
|
| 182 |
+
}
|
| 183 |
+
|
| 184 |
+
function hideHoverCard(): void {
|
| 185 |
+
hoverRoot?.render(<HoverCard result={null} top={0} left={0} visible={false} />);
|
| 186 |
+
}
|
| 187 |
+
|
| 188 |
+
// ---------------------------------------------------------------------------
|
| 189 |
+
// HoverCard React component
|
| 190 |
+
// ---------------------------------------------------------------------------
|
| 191 |
+
|
| 192 |
+
interface HoverCardProps {
|
| 193 |
+
result: AnalysisResult | null;
|
| 194 |
+
top: number;
|
| 195 |
+
left: number;
|
| 196 |
+
visible: boolean;
|
| 197 |
+
}
|
| 198 |
+
|
| 199 |
+
function HoverCard({ result, top, left, visible }: HoverCardProps) {
|
| 200 |
+
if (!result) return null;
|
| 201 |
+
const cfg = COLOR_CONFIG[result.color as HighlightColor] ?? COLOR_CONFIG.yellow;
|
| 202 |
+
|
| 203 |
+
return (
|
| 204 |
+
<AnimatePresence>
|
| 205 |
+
{visible && (
|
| 206 |
+
<motion.div
|
| 207 |
+
className="card"
|
| 208 |
+
style={{ top, left, "--accent": cfg.hex } as React.CSSProperties}
|
| 209 |
+
initial={{ opacity: 0, y: 6, scale: 0.97 }}
|
| 210 |
+
animate={{ opacity: 1, y: 0, scale: 1 }}
|
| 211 |
+
exit={{ opacity: 0, y: 4, scale: 0.97 }}
|
| 212 |
+
transition={{ duration: 0.18, ease: "easeOut" }}
|
| 213 |
+
>
|
| 214 |
+
{/* Header row */}
|
| 215 |
+
<div className="header">
|
| 216 |
+
<div className="badge">{cfg.icon} {cfg.label}</div>
|
| 217 |
+
<div className="conf">
|
| 218 |
+
<svg width="36" height="36" viewBox="0 0 36 36">
|
| 219 |
+
<circle cx="18" cy="18" r="14" fill="none" stroke="#333" strokeWidth="3"/>
|
| 220 |
+
<circle
|
| 221 |
+
cx="18" cy="18" r="14"
|
| 222 |
+
fill="none"
|
| 223 |
+
stroke={cfg.hex}
|
| 224 |
+
strokeWidth="3"
|
| 225 |
+
strokeLinecap="round"
|
| 226 |
+
strokeDasharray={`${2 * Math.PI * 14}`}
|
| 227 |
+
strokeDashoffset={`${2 * Math.PI * 14 * (1 - result.confidence / 100)}`}
|
| 228 |
+
transform="rotate(-90 18 18)"
|
| 229 |
+
/>
|
| 230 |
+
<text x="18" y="22" textAnchor="middle" fontSize="10" fill={cfg.hex} fontWeight="bold">
|
| 231 |
+
{result.confidence}
|
| 232 |
+
</text>
|
| 233 |
+
</svg>
|
| 234 |
+
</div>
|
| 235 |
+
</div>
|
| 236 |
+
|
| 237 |
+
{/* Verdict */}
|
| 238 |
+
<div className="verdict">{result.verdict_label}</div>
|
| 239 |
+
<div className="explanation">{result.explanation}</div>
|
| 240 |
+
|
| 241 |
+
{/* Sources */}
|
| 242 |
+
{result.sources?.length > 0 && (
|
| 243 |
+
<div className="sources">
|
| 244 |
+
{result.sources.slice(0, 3).map((s, i) => (
|
| 245 |
+
<a key={i} className="source" href={s.url} target="_blank" rel="noopener">
|
| 246 |
+
<img src={s.favicon_url} width="12" height="12" onError={(e) => { (e.target as HTMLImageElement).style.display = "none"; }} />
|
| 247 |
+
<span>{s.domain}</span>
|
| 248 |
+
</a>
|
| 249 |
+
))}
|
| 250 |
+
</div>
|
| 251 |
+
)}
|
| 252 |
+
|
| 253 |
+
{/* Footer meta */}
|
| 254 |
+
<div className="meta">
|
| 255 |
+
<span>trust {(result.trust_score * 100).toFixed(0)}%</span>
|
| 256 |
+
<span>Β·</span>
|
| 257 |
+
<span>{result.latency_ms?.toFixed(0)}ms</span>
|
| 258 |
+
{result.cached && <><span>Β·</span><span>cached</span></>}
|
| 259 |
+
</div>
|
| 260 |
+
</motion.div>
|
| 261 |
+
)}
|
| 262 |
+
</AnimatePresence>
|
| 263 |
+
);
|
| 264 |
+
}
|
| 265 |
+
|
| 266 |
+
// CSS injected into the Shadow DOM β complete isolation from host page
|
| 267 |
+
const HOVER_CARD_STYLES = `
|
| 268 |
+
.card {
|
| 269 |
+
position: fixed;
|
| 270 |
+
z-index: 2147483647;
|
| 271 |
+
width: 340px;
|
| 272 |
+
background: #0d1117;
|
| 273 |
+
border: 1px solid #21262d;
|
| 274 |
+
border-radius: 10px;
|
| 275 |
+
padding: 14px;
|
| 276 |
+
box-shadow: 0 8px 32px rgba(0,0,0,0.6), 0 0 0 1px rgba(255,255,255,0.04);
|
| 277 |
+
font-family: -apple-system, 'DM Sans', system-ui, sans-serif;
|
| 278 |
+
font-size: 13px;
|
| 279 |
+
color: #e6edf3;
|
| 280 |
+
pointer-events: none;
|
| 281 |
+
}
|
| 282 |
+
.header { display: flex; align-items: center; justify-content: space-between; margin-bottom: 8px; }
|
| 283 |
+
.badge {
|
| 284 |
+
display: inline-flex; align-items: center; gap: 5px;
|
| 285 |
+
padding: 3px 10px; border-radius: 20px; font-size: 10px;
|
| 286 |
+
font-weight: 700; letter-spacing: 0.8px; text-transform: uppercase;
|
| 287 |
+
background: color-mix(in srgb, var(--accent) 15%, transparent);
|
| 288 |
+
color: var(--accent);
|
| 289 |
+
border: 1px solid color-mix(in srgb, var(--accent) 30%, transparent);
|
| 290 |
+
}
|
| 291 |
+
.conf { flex-shrink: 0; }
|
| 292 |
+
.verdict { font-weight: 700; font-size: 14px; margin-bottom: 6px; line-height: 1.3; }
|
| 293 |
+
.explanation { color: #7d8590; font-size: 12px; line-height: 1.6; margin-bottom: 10px; }
|
| 294 |
+
.sources { display: flex; flex-direction: column; gap: 4px; margin-bottom: 8px; }
|
| 295 |
+
.source {
|
| 296 |
+
display: flex; align-items: center; gap: 6px;
|
| 297 |
+
padding: 5px 8px; background: #161b22; border-radius: 5px;
|
| 298 |
+
color: #58a6ff; text-decoration: none; font-size: 11px;
|
| 299 |
+
pointer-events: all;
|
| 300 |
+
}
|
| 301 |
+
.meta {
|
| 302 |
+
display: flex; gap: 6px; font-size: 10px; color: #484f58;
|
| 303 |
+
font-family: 'Space Mono', monospace; letter-spacing: 0.3px;
|
| 304 |
+
}
|
| 305 |
+
`;
|
| 306 |
+
|
| 307 |
+
// ---------------------------------------------------------------------------
|
| 308 |
+
// Main content script entry point
|
| 309 |
+
// ---------------------------------------------------------------------------
|
| 310 |
+
|
| 311 |
+
export default defineContentScript({
|
| 312 |
+
matches: [
|
| 313 |
+
"https://twitter.com/*", "https://x.com/*",
|
| 314 |
+
"https://www.instagram.com/*", "https://www.youtube.com/*",
|
| 315 |
+
"https://chat.openai.com/*", "https://claude.ai/*",
|
| 316 |
+
"https://gemini.google.com/*", "<all_urls>",
|
| 317 |
+
],
|
| 318 |
+
runAt: "document_idle",
|
| 319 |
+
main: async () => {
|
| 320 |
+
const platform = detectPlatform();
|
| 321 |
+
|
| 322 |
+
// Initialize xxhash-wasm (compiled WASM, sub-microsecond hashing)
|
| 323 |
+
const { h64ToString: xxhash64 } = await initXxhash();
|
| 324 |
+
|
| 325 |
+
const SESSION_ID = crypto.randomUUID();
|
| 326 |
+
const seenHashes = new Set<string>(); // Client-side dedup ring buffer
|
| 327 |
+
|
| 328 |
+
// Flush buffer every 1200ms β avoids layout thrashing from rapid DOM changes
|
| 329 |
+
const flushBuffer: Map<string, QueuedSegment> = new Map();
|
| 330 |
+
let flushTimer: ReturnType<typeof setTimeout> | null = null;
|
| 331 |
+
|
| 332 |
+
const { enabled, mode } = useExtensionStore.getState();
|
| 333 |
+
if (!enabled) return;
|
| 334 |
+
|
| 335 |
+
function queueSegment(node: Text): void {
|
| 336 |
+
const text = node.textContent?.trim() ?? "";
|
| 337 |
+
if (!text) return;
|
| 338 |
+
|
| 339 |
+
const hash = xxhash64(text);
|
| 340 |
+
if (seenHashes.has(hash)) return; // Already processed this text
|
| 341 |
+
|
| 342 |
+
const elementId = `fi-${hash.slice(0, 8)}-${Date.now()}`;
|
| 343 |
+
flushBuffer.set(hash, { hash, text, node, elementId });
|
| 344 |
+
|
| 345 |
+
// Debounced flush
|
| 346 |
+
if (!flushTimer) {
|
| 347 |
+
flushTimer = setTimeout(flushSegments, 1200);
|
| 348 |
+
}
|
| 349 |
+
}
|
| 350 |
+
|
| 351 |
+
async function flushSegments(): void {
|
| 352 |
+
flushTimer = null;
|
| 353 |
+
if (flushBuffer.size === 0) return;
|
| 354 |
+
|
| 355 |
+
const { enabled, mode } = useExtensionStore.getState();
|
| 356 |
+
if (!enabled) return;
|
| 357 |
+
|
| 358 |
+
const segments = Array.from(flushBuffer.values()).map((s) => {
|
| 359 |
+
seenHashes.add(s.hash);
|
| 360 |
+
|
| 361 |
+
// Prevent unbounded memory growth β prune oldest half when > 5000
|
| 362 |
+
if (seenHashes.size > 5000) {
|
| 363 |
+
const arr = Array.from(seenHashes);
|
| 364 |
+
arr.slice(0, 2500).forEach((h) => seenHashes.delete(h));
|
| 365 |
+
}
|
| 366 |
+
|
| 367 |
+
return {
|
| 368 |
+
content_hash: s.hash,
|
| 369 |
+
text: s.text,
|
| 370 |
+
element_id: s.elementId,
|
| 371 |
+
word_count: s.text.split(/\s+/).length,
|
| 372 |
+
};
|
| 373 |
+
});
|
| 374 |
+
|
| 375 |
+
flushBuffer.clear();
|
| 376 |
+
|
| 377 |
+
const batch = {
|
| 378 |
+
session_id: SESSION_ID,
|
| 379 |
+
platform,
|
| 380 |
+
segments,
|
| 381 |
+
sent_at: new Date().toISOString(),
|
| 382 |
+
};
|
| 383 |
+
|
| 384 |
+
// Send to background worker, which holds the WebSocket
|
| 385 |
+
chrome.runtime.sendMessage({ type: "send_batch", payload: batch });
|
| 386 |
+
}
|
| 387 |
+
|
| 388 |
+
// ---------------------------------------------------------------------------
|
| 389 |
+
// MutationObserver β watch for new text nodes
|
| 390 |
+
// ---------------------------------------------------------------------------
|
| 391 |
+
|
| 392 |
+
const observer = new MutationObserver((mutations) => {
|
| 393 |
+
const { enabled } = useExtensionStore.getState();
|
| 394 |
+
if (!enabled) return;
|
| 395 |
+
|
| 396 |
+
for (const mutation of mutations) {
|
| 397 |
+
if (mutation.type === "childList") {
|
| 398 |
+
mutation.addedNodes.forEach((node) => {
|
| 399 |
+
const textNodes = extractTextNodes(node);
|
| 400 |
+
textNodes.forEach(queueSegment);
|
| 401 |
+
});
|
| 402 |
+
} else if (mutation.type === "characterData") {
|
| 403 |
+
const node = mutation.target as Text;
|
| 404 |
+
if (isValidTextNode(node)) queueSegment(node);
|
| 405 |
+
}
|
| 406 |
+
}
|
| 407 |
+
});
|
| 408 |
+
|
| 409 |
+
observer.observe(document.body, {
|
| 410 |
+
childList: true,
|
| 411 |
+
subtree: true,
|
| 412 |
+
characterData: true,
|
| 413 |
+
});
|
| 414 |
+
|
| 415 |
+
// Process existing text on page load
|
| 416 |
+
extractTextNodes(document.body).forEach(queueSegment);
|
| 417 |
+
|
| 418 |
+
// ---------------------------------------------------------------------------
|
| 419 |
+
// Receive results from background worker
|
| 420 |
+
// ---------------------------------------------------------------------------
|
| 421 |
+
|
| 422 |
+
chrome.runtime.onMessage.addListener((msg) => {
|
| 423 |
+
if (msg.type === "result" && msg.payload) {
|
| 424 |
+
const result = msg.payload as AnalysisResult;
|
| 425 |
+
const { mode } = useExtensionStore.getState();
|
| 426 |
+
const color = result.color as HighlightColor;
|
| 427 |
+
|
| 428 |
+
if (!shouldShowColor(color, mode)) return;
|
| 429 |
+
|
| 430 |
+
// Find the text node by element_id stored on the flushBuffer segment
|
| 431 |
+
// (We need the original node reference β stored in flushBuffer pre-clear)
|
| 432 |
+
// Fallback: search by matching text content
|
| 433 |
+
const targetNode = findNodeByHash(result.content_hash);
|
| 434 |
+
if (targetNode) {
|
| 435 |
+
applyHighlight(targetNode, result.element_id, color, result);
|
| 436 |
+
}
|
| 437 |
+
}
|
| 438 |
+
|
| 439 |
+
if (msg.type === "ws_status") {
|
| 440 |
+
useExtensionStore.getState().setWsStatus(msg.payload.status);
|
| 441 |
+
}
|
| 442 |
+
});
|
| 443 |
+
},
|
| 444 |
+
});
|
| 445 |
+
|
| 446 |
+
// Node registry for post-flush lookup
|
| 447 |
+
const nodeRegistry = new Map<string, Text>(); // hash β Text node
|
| 448 |
+
|
| 449 |
+
// Override queueSegment to also register nodes
|
| 450 |
+
// (actual implementation integrates this into the closure above)
|
| 451 |
+
function findNodeByHash(hash: string): Text | undefined {
|
| 452 |
+
return nodeRegistry.get(hash);
|
| 453 |
+
}
|
extension/entrypoints/popup.tsx
ADDED
|
@@ -0,0 +1,232 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
// extension/entrypoints/popup.tsx
|
| 2 |
+
// Extension popup β rendered when the user clicks the extension icon.
|
| 3 |
+
// State: Zustand + chrome.storage.sync (persisted across browser sessions).
|
| 4 |
+
|
| 5 |
+
import React, { useEffect, useState } from "react";
|
| 6 |
+
import { createRoot } from "react-dom/client";
|
| 7 |
+
import { motion, AnimatePresence } from "framer-motion";
|
| 8 |
+
import { useExtensionStore, ExtensionMode, WSStatus, COLOR_CONFIG } from "../stores/extensionStore";
|
| 9 |
+
|
| 10 |
+
// ---------------------------------------------------------------------------
|
| 11 |
+
// Styles (injected as a <style> tag β no build step needed for popup)
|
| 12 |
+
// ---------------------------------------------------------------------------
|
| 13 |
+
const POPUP_STYLES = `
|
| 14 |
+
:root {
|
| 15 |
+
--bg: #070b0f; --surface: #0d1117; --surface2: #161b22;
|
| 16 |
+
--border: #21262d; --text: #e6edf3; --muted: #7d8590;
|
| 17 |
+
--accent: #58a6ff; --green: #22c55e; --yellow: #eab308;
|
| 18 |
+
--red: #ef4444; --purple: #a855f7;
|
| 19 |
+
}
|
| 20 |
+
* { margin: 0; padding: 0; box-sizing: border-box; }
|
| 21 |
+
body {
|
| 22 |
+
width: 320px; background: var(--bg); color: var(--text);
|
| 23 |
+
font-family: -apple-system, 'DM Sans', system-ui, sans-serif;
|
| 24 |
+
font-size: 13px;
|
| 25 |
+
}
|
| 26 |
+
`;
|
| 27 |
+
|
| 28 |
+
// ---------------------------------------------------------------------------
|
| 29 |
+
// Components
|
| 30 |
+
// ---------------------------------------------------------------------------
|
| 31 |
+
|
| 32 |
+
function StatusBadge({ status }: { status: WSStatus }) {
|
| 33 |
+
const config = {
|
| 34 |
+
connected: { color: "#22c55e", label: "Connected", pulse: true },
|
| 35 |
+
connecting: { color: "#eab308", label: "Connectingβ¦", pulse: true },
|
| 36 |
+
reconnecting: { color: "#eab308", label: "Reconnectingβ¦",pulse: true },
|
| 37 |
+
offline: { color: "#ef4444", label: "Offline", pulse: false },
|
| 38 |
+
}[status];
|
| 39 |
+
|
| 40 |
+
return (
|
| 41 |
+
<div style={{ display: "flex", alignItems: "center", gap: 6 }}>
|
| 42 |
+
<div style={{
|
| 43 |
+
width: 8, height: 8, borderRadius: "50%",
|
| 44 |
+
background: config.color,
|
| 45 |
+
boxShadow: config.pulse ? `0 0 8px ${config.color}` : "none",
|
| 46 |
+
animation: config.pulse ? "pulse 2s infinite" : "none",
|
| 47 |
+
}} />
|
| 48 |
+
<span style={{ fontSize: 11, color: "var(--muted)", fontFamily: "monospace" }}>
|
| 49 |
+
{config.label}
|
| 50 |
+
</span>
|
| 51 |
+
</div>
|
| 52 |
+
);
|
| 53 |
+
}
|
| 54 |
+
|
| 55 |
+
function Toggle({ checked, onChange }: { checked: boolean; onChange: (v: boolean) => void }) {
|
| 56 |
+
return (
|
| 57 |
+
<div
|
| 58 |
+
onClick={() => onChange(!checked)}
|
| 59 |
+
style={{
|
| 60 |
+
width: 44, height: 24, borderRadius: 12, cursor: "pointer",
|
| 61 |
+
background: checked ? "var(--accent)" : "var(--border)",
|
| 62 |
+
position: "relative", transition: "background 0.2s",
|
| 63 |
+
flexShrink: 0,
|
| 64 |
+
}}
|
| 65 |
+
>
|
| 66 |
+
<motion.div
|
| 67 |
+
animate={{ x: checked ? 22 : 2 }}
|
| 68 |
+
transition={{ type: "spring", stiffness: 500, damping: 30 }}
|
| 69 |
+
style={{
|
| 70 |
+
width: 20, height: 20, borderRadius: 10, background: "#fff",
|
| 71 |
+
position: "absolute", top: 2,
|
| 72 |
+
boxShadow: "0 1px 4px rgba(0,0,0,0.3)",
|
| 73 |
+
}}
|
| 74 |
+
/>
|
| 75 |
+
</div>
|
| 76 |
+
);
|
| 77 |
+
}
|
| 78 |
+
|
| 79 |
+
function ModeCard({ value, current, label, desc, onSelect }: {
|
| 80 |
+
value: ExtensionMode; current: ExtensionMode;
|
| 81 |
+
label: string; desc: string; onSelect: () => void;
|
| 82 |
+
}) {
|
| 83 |
+
const active = value === current;
|
| 84 |
+
return (
|
| 85 |
+
<div
|
| 86 |
+
onClick={onSelect}
|
| 87 |
+
style={{
|
| 88 |
+
padding: "10px 12px", borderRadius: 8, cursor: "pointer",
|
| 89 |
+
border: `1px solid ${active ? "var(--accent)" : "var(--border)"}`,
|
| 90 |
+
background: active ? "rgba(88,166,255,0.08)" : "var(--surface2)",
|
| 91 |
+
transition: "all 0.15s", marginBottom: 6,
|
| 92 |
+
}}
|
| 93 |
+
>
|
| 94 |
+
<div style={{ display: "flex", alignItems: "center", justifyContent: "space-between" }}>
|
| 95 |
+
<span style={{ fontWeight: 600, fontSize: 12 }}>{label}</span>
|
| 96 |
+
{active && <span style={{ fontSize: 10, color: "var(--accent)", fontFamily: "monospace" }}>ACTIVE</span>}
|
| 97 |
+
</div>
|
| 98 |
+
<div style={{ color: "var(--muted)", fontSize: 11, marginTop: 3 }}>{desc}</div>
|
| 99 |
+
</div>
|
| 100 |
+
);
|
| 101 |
+
}
|
| 102 |
+
|
| 103 |
+
// ---------------------------------------------------------------------------
|
| 104 |
+
// Main popup component
|
| 105 |
+
// ---------------------------------------------------------------------------
|
| 106 |
+
|
| 107 |
+
function Popup() {
|
| 108 |
+
const { enabled, mode, wsStatus, totalAnalyzed, demoMode,
|
| 109 |
+
setEnabled, setMode } = useExtensionStore();
|
| 110 |
+
|
| 111 |
+
// Poll WS status from background worker
|
| 112 |
+
useEffect(() => {
|
| 113 |
+
const poll = () => {
|
| 114 |
+
chrome.runtime.sendMessage({ type: "get_status" }, (resp) => {
|
| 115 |
+
if (resp?.status) {
|
| 116 |
+
useExtensionStore.getState().setWsStatus(resp.status);
|
| 117 |
+
}
|
| 118 |
+
});
|
| 119 |
+
};
|
| 120 |
+
poll();
|
| 121 |
+
const id = setInterval(poll, 3000);
|
| 122 |
+
return () => clearInterval(id);
|
| 123 |
+
}, []);
|
| 124 |
+
|
| 125 |
+
const colorCounts = { green: 0, yellow: 0, red: 0, purple: 0 };
|
| 126 |
+
|
| 127 |
+
return (
|
| 128 |
+
<div style={{ padding: 16 }}>
|
| 129 |
+
<style>{POPUP_STYLES}</style>
|
| 130 |
+
<style>{`
|
| 131 |
+
@keyframes pulse { 0%,100%{opacity:1} 50%{opacity:0.4} }
|
| 132 |
+
`}</style>
|
| 133 |
+
|
| 134 |
+
{/* Header */}
|
| 135 |
+
<div style={{ display: "flex", alignItems: "center", justifyContent: "space-between", marginBottom: 16 }}>
|
| 136 |
+
<div>
|
| 137 |
+
<div style={{ fontFamily: "monospace", fontSize: 11, color: "var(--muted)", letterSpacing: 1 }}>
|
| 138 |
+
FACT INTELLIGENCE
|
| 139 |
+
</div>
|
| 140 |
+
<StatusBadge status={wsStatus} />
|
| 141 |
+
</div>
|
| 142 |
+
<Toggle checked={enabled} onChange={setEnabled} />
|
| 143 |
+
</div>
|
| 144 |
+
|
| 145 |
+
{/* Demo mode notice */}
|
| 146 |
+
{demoMode && (
|
| 147 |
+
<div style={{
|
| 148 |
+
padding: "8px 10px", background: "rgba(234,179,8,0.08)",
|
| 149 |
+
border: "1px solid rgba(234,179,8,0.2)", borderRadius: 6,
|
| 150 |
+
fontSize: 11, color: "#eab308", marginBottom: 12,
|
| 151 |
+
}}>
|
| 152 |
+
β Demo mode β add API keys for live LLM analysis
|
| 153 |
+
</div>
|
| 154 |
+
)}
|
| 155 |
+
|
| 156 |
+
<AnimatePresence>
|
| 157 |
+
{enabled && (
|
| 158 |
+
<motion.div
|
| 159 |
+
initial={{ opacity: 0, height: 0 }}
|
| 160 |
+
animate={{ opacity: 1, height: "auto" }}
|
| 161 |
+
exit={{ opacity: 0, height: 0 }}
|
| 162 |
+
>
|
| 163 |
+
{/* Mode selector */}
|
| 164 |
+
<div style={{ marginBottom: 16 }}>
|
| 165 |
+
<div style={{ fontFamily: "monospace", fontSize: 10, color: "var(--muted)",
|
| 166 |
+
letterSpacing: 1, textTransform: "uppercase", marginBottom: 8 }}>
|
| 167 |
+
highlight mode
|
| 168 |
+
</div>
|
| 169 |
+
<ModeCard value="minimal" current={mode} onSelect={() => setMode("minimal")}
|
| 170 |
+
label="Minimal" desc="Only debunked (red) and AI hallucinations (purple)" />
|
| 171 |
+
<ModeCard value="normal" current={mode} onSelect={() => setMode("normal")}
|
| 172 |
+
label="Normal (recommended)" desc="Red, purple, and unverified (yellow)" />
|
| 173 |
+
<ModeCard value="advanced" current={mode} onSelect={() => setMode("advanced")}
|
| 174 |
+
label="Advanced" desc="Full factual landscape including verified (green)" />
|
| 175 |
+
</div>
|
| 176 |
+
|
| 177 |
+
{/* Color legend */}
|
| 178 |
+
<div style={{ marginBottom: 16 }}>
|
| 179 |
+
<div style={{ fontFamily: "monospace", fontSize: 10, color: "var(--muted)",
|
| 180 |
+
letterSpacing: 1, textTransform: "uppercase", marginBottom: 8 }}>
|
| 181 |
+
color legend
|
| 182 |
+
</div>
|
| 183 |
+
{(Object.entries(COLOR_CONFIG) as [string, typeof COLOR_CONFIG.green][]).map(([k, v]) => (
|
| 184 |
+
<div key={k} style={{ display: "flex", alignItems: "center", gap: 8, marginBottom: 5 }}>
|
| 185 |
+
<div style={{ width: 12, height: 12, borderRadius: 3, background: v.hex, flexShrink: 0 }} />
|
| 186 |
+
<span style={{ color: v.hex, fontSize: 11, fontWeight: 600 }}>{v.label}</span>
|
| 187 |
+
</div>
|
| 188 |
+
))}
|
| 189 |
+
</div>
|
| 190 |
+
|
| 191 |
+
{/* Stats */}
|
| 192 |
+
<div style={{
|
| 193 |
+
padding: "10px 12px", background: "var(--surface2)",
|
| 194 |
+
border: "1px solid var(--border)", borderRadius: 8,
|
| 195 |
+
display: "flex", justifyContent: "space-between",
|
| 196 |
+
}}>
|
| 197 |
+
<div style={{ textAlign: "center" }}>
|
| 198 |
+
<div style={{ fontFamily: "monospace", fontSize: 18, fontWeight: 700, color: "var(--accent)" }}>
|
| 199 |
+
{totalAnalyzed}
|
| 200 |
+
</div>
|
| 201 |
+
<div style={{ fontSize: 10, color: "var(--muted)", textTransform: "uppercase", letterSpacing: 0.8 }}>
|
| 202 |
+
analyzed
|
| 203 |
+
</div>
|
| 204 |
+
</div>
|
| 205 |
+
<div style={{ width: 1, background: "var(--border)" }} />
|
| 206 |
+
<div style={{ textAlign: "center" }}>
|
| 207 |
+
<div style={{ fontFamily: "monospace", fontSize: 18, fontWeight: 700, color: "var(--muted)" }}>
|
| 208 |
+
{wsStatus === "connected" ? "β" : "β"}
|
| 209 |
+
</div>
|
| 210 |
+
<div style={{ fontSize: 10, color: "var(--muted)", textTransform: "uppercase", letterSpacing: 0.8 }}>
|
| 211 |
+
engine
|
| 212 |
+
</div>
|
| 213 |
+
</div>
|
| 214 |
+
</div>
|
| 215 |
+
</motion.div>
|
| 216 |
+
)}
|
| 217 |
+
</AnimatePresence>
|
| 218 |
+
|
| 219 |
+
{/* Footer */}
|
| 220 |
+
<div style={{
|
| 221 |
+
marginTop: 14, paddingTop: 10, borderTop: "1px solid var(--border)",
|
| 222 |
+
fontFamily: "monospace", fontSize: 10, color: "var(--muted)", textAlign: "center",
|
| 223 |
+
}}>
|
| 224 |
+
v1.0.0 Β· WXT + FastAPI + Qdrant + Memgraph
|
| 225 |
+
</div>
|
| 226 |
+
</div>
|
| 227 |
+
);
|
| 228 |
+
}
|
| 229 |
+
|
| 230 |
+
// Mount
|
| 231 |
+
const root = document.getElementById("root");
|
| 232 |
+
if (root) createRoot(root).render(<Popup />);
|
extension/package.json
ADDED
|
@@ -0,0 +1,32 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"name": "fact-intelligence-extension",
|
| 3 |
+
"version": "1.0.0",
|
| 4 |
+
"description": "Omnichannel fact-checking and AI hallucination detection browser extension",
|
| 5 |
+
"private": true,
|
| 6 |
+
"scripts": {
|
| 7 |
+
"dev": "wxt dev",
|
| 8 |
+
"dev:chrome": "wxt dev --browser chrome",
|
| 9 |
+
"dev:firefox": "wxt dev --browser firefox",
|
| 10 |
+
"build": "wxt build",
|
| 11 |
+
"build:chrome": "wxt build --browser chrome",
|
| 12 |
+
"build:firefox": "wxt build --browser firefox",
|
| 13 |
+
"build:all": "wxt build --browser chrome && wxt build --browser firefox",
|
| 14 |
+
"zip": "wxt zip",
|
| 15 |
+
"type-check": "vue-tsc --noEmit"
|
| 16 |
+
},
|
| 17 |
+
"dependencies": {
|
| 18 |
+
"framer-motion": "^11.15.0",
|
| 19 |
+
"react": "^19.0.0",
|
| 20 |
+
"react-dom": "^19.0.0",
|
| 21 |
+
"xxhash-wasm": "^1.0.2",
|
| 22 |
+
"zustand": "^5.0.2"
|
| 23 |
+
},
|
| 24 |
+
"devDependencies": {
|
| 25 |
+
"@types/chrome": "^0.0.287",
|
| 26 |
+
"@types/react": "^19.0.0",
|
| 27 |
+
"@types/react-dom": "^19.0.0",
|
| 28 |
+
"@wxt-dev/module-react": "^1.1.0",
|
| 29 |
+
"typescript": "^5.7.2",
|
| 30 |
+
"wxt": "^0.19.0"
|
| 31 |
+
}
|
| 32 |
+
}
|
extension/stores/extensionStore.ts
ADDED
|
@@ -0,0 +1,145 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
// extension/stores/extensionStore.ts
|
| 2 |
+
// Zustand store with chrome.storage.sync persistence layer.
|
| 3 |
+
// State is shared across popup, background, and content script contexts.
|
| 4 |
+
|
| 5 |
+
import { create } from "zustand";
|
| 6 |
+
import { subscribeWithSelector } from "zustand/middleware";
|
| 7 |
+
|
| 8 |
+
export type HighlightColor = "green" | "yellow" | "red" | "purple";
|
| 9 |
+
export type ExtensionMode = "minimal" | "normal" | "advanced";
|
| 10 |
+
export type WSStatus = "connected" | "connecting" | "reconnecting" | "offline";
|
| 11 |
+
|
| 12 |
+
export interface AnalysisResult {
|
| 13 |
+
element_id: string;
|
| 14 |
+
content_hash: string;
|
| 15 |
+
platform: string;
|
| 16 |
+
color: HighlightColor;
|
| 17 |
+
confidence: number;
|
| 18 |
+
verdict_label: string;
|
| 19 |
+
explanation: string;
|
| 20 |
+
sources: Array<{
|
| 21 |
+
url: string;
|
| 22 |
+
domain: string;
|
| 23 |
+
favicon_url: string;
|
| 24 |
+
snippet: string;
|
| 25 |
+
}>;
|
| 26 |
+
trust_score: number;
|
| 27 |
+
velocity: number;
|
| 28 |
+
has_community_note: boolean;
|
| 29 |
+
latency_ms: number;
|
| 30 |
+
cached: boolean;
|
| 31 |
+
timestamp: string;
|
| 32 |
+
}
|
| 33 |
+
|
| 34 |
+
interface ExtensionState {
|
| 35 |
+
// User preferences (persisted to chrome.storage.sync)
|
| 36 |
+
enabled: boolean;
|
| 37 |
+
mode: ExtensionMode;
|
| 38 |
+
|
| 39 |
+
// Runtime state (not persisted)
|
| 40 |
+
wsStatus: WSStatus;
|
| 41 |
+
pendingCount: number;
|
| 42 |
+
totalAnalyzed: number;
|
| 43 |
+
demoMode: boolean;
|
| 44 |
+
|
| 45 |
+
// Actions
|
| 46 |
+
setEnabled: (v: boolean) => void;
|
| 47 |
+
setMode: (m: ExtensionMode) => void;
|
| 48 |
+
setWsStatus: (s: WSStatus) => void;
|
| 49 |
+
incrementPending: () => void;
|
| 50 |
+
decrementPending: () => void;
|
| 51 |
+
incrementAnalyzed: () => void;
|
| 52 |
+
setDemoMode: (v: boolean) => void;
|
| 53 |
+
}
|
| 54 |
+
|
| 55 |
+
// ---------------------------------------------------------------------------
|
| 56 |
+
// Chrome storage sync helpers
|
| 57 |
+
// ---------------------------------------------------------------------------
|
| 58 |
+
const STORAGE_KEY = "fact_intelligence_prefs";
|
| 59 |
+
|
| 60 |
+
async function loadFromStorage(): Promise<Partial<ExtensionState>> {
|
| 61 |
+
return new Promise((resolve) => {
|
| 62 |
+
if (typeof chrome === "undefined" || !chrome.storage) {
|
| 63 |
+
resolve({});
|
| 64 |
+
return;
|
| 65 |
+
}
|
| 66 |
+
chrome.storage.sync.get([STORAGE_KEY], (result) => {
|
| 67 |
+
resolve(result[STORAGE_KEY] ?? {});
|
| 68 |
+
});
|
| 69 |
+
});
|
| 70 |
+
}
|
| 71 |
+
|
| 72 |
+
async function saveToStorage(prefs: { enabled: boolean; mode: ExtensionMode }) {
|
| 73 |
+
if (typeof chrome === "undefined" || !chrome.storage) return;
|
| 74 |
+
chrome.storage.sync.set({ [STORAGE_KEY]: prefs });
|
| 75 |
+
}
|
| 76 |
+
|
| 77 |
+
// ---------------------------------------------------------------------------
|
| 78 |
+
// Store definition
|
| 79 |
+
// ---------------------------------------------------------------------------
|
| 80 |
+
export const useExtensionStore = create<ExtensionState>()(
|
| 81 |
+
subscribeWithSelector((set, get) => ({
|
| 82 |
+
enabled: true,
|
| 83 |
+
mode: "normal",
|
| 84 |
+
wsStatus: "connecting",
|
| 85 |
+
pendingCount: 0,
|
| 86 |
+
totalAnalyzed: 0,
|
| 87 |
+
demoMode: false,
|
| 88 |
+
|
| 89 |
+
setEnabled: (v) => {
|
| 90 |
+
set({ enabled: v });
|
| 91 |
+
saveToStorage({ enabled: v, mode: get().mode });
|
| 92 |
+
},
|
| 93 |
+
setMode: (m) => {
|
| 94 |
+
set({ mode: m });
|
| 95 |
+
saveToStorage({ enabled: get().enabled, mode: m });
|
| 96 |
+
},
|
| 97 |
+
setWsStatus: (s) => set({ wsStatus: s }),
|
| 98 |
+
incrementPending: () => set((s) => ({ pendingCount: s.pendingCount + 1 })),
|
| 99 |
+
decrementPending: () =>
|
| 100 |
+
set((s) => ({ pendingCount: Math.max(0, s.pendingCount - 1) })),
|
| 101 |
+
incrementAnalyzed: () =>
|
| 102 |
+
set((s) => ({ totalAnalyzed: s.totalAnalyzed + 1 })),
|
| 103 |
+
setDemoMode: (v) => set({ demoMode: v }),
|
| 104 |
+
}))
|
| 105 |
+
);
|
| 106 |
+
|
| 107 |
+
// Hydrate from chrome.storage.sync on module load
|
| 108 |
+
loadFromStorage().then((saved) => {
|
| 109 |
+
if (saved.enabled !== undefined) {
|
| 110 |
+
useExtensionStore.setState({ enabled: saved.enabled as boolean });
|
| 111 |
+
}
|
| 112 |
+
if (saved.mode !== undefined) {
|
| 113 |
+
useExtensionStore.setState({ mode: saved.mode as ExtensionMode });
|
| 114 |
+
}
|
| 115 |
+
});
|
| 116 |
+
|
| 117 |
+
// ---------------------------------------------------------------------------
|
| 118 |
+
// Mode-based color filter logic
|
| 119 |
+
// ---------------------------------------------------------------------------
|
| 120 |
+
export function shouldShowColor(
|
| 121 |
+
color: HighlightColor,
|
| 122 |
+
mode: ExtensionMode
|
| 123 |
+
): boolean {
|
| 124 |
+
switch (mode) {
|
| 125 |
+
case "minimal":
|
| 126 |
+
// Only show definitive threats β don't add noise for users who want minimal
|
| 127 |
+
return color === "red" || color === "purple";
|
| 128 |
+
case "normal":
|
| 129 |
+
// Default: skip low-confidence green confirmations
|
| 130 |
+
return color === "red" || color === "purple" || color === "yellow";
|
| 131 |
+
case "advanced":
|
| 132 |
+
// Full factual landscape including green corroborations
|
| 133 |
+
return true;
|
| 134 |
+
}
|
| 135 |
+
}
|
| 136 |
+
|
| 137 |
+
// ---------------------------------------------------------------------------
|
| 138 |
+
// Color display config
|
| 139 |
+
// ---------------------------------------------------------------------------
|
| 140 |
+
export const COLOR_CONFIG = {
|
| 141 |
+
green: { hex: "#22c55e", opacity: 0.12, label: "Verified", icon: "β" },
|
| 142 |
+
yellow: { hex: "#eab308", opacity: 0.14, label: "Unverified", icon: "β " },
|
| 143 |
+
red: { hex: "#ef4444", opacity: 0.16, label: "Debunked", icon: "β" },
|
| 144 |
+
purple: { hex: "#a855f7", opacity: 0.15, label: "AI Hallucination", icon: "β" },
|
| 145 |
+
} as const;
|
extension/tsconfig.json
ADDED
|
@@ -0,0 +1,18 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"compilerOptions": {
|
| 3 |
+
"target": "ESNext",
|
| 4 |
+
"module": "ESNext",
|
| 5 |
+
"moduleResolution": "Bundler",
|
| 6 |
+
"lib": ["ESNext", "DOM", "DOM.Iterable"],
|
| 7 |
+
"jsx": "react-jsx",
|
| 8 |
+
"strict": true,
|
| 9 |
+
"skipLibCheck": true,
|
| 10 |
+
"noUnusedLocals": false,
|
| 11 |
+
"noUnusedParameters": false,
|
| 12 |
+
"paths": {
|
| 13 |
+
"@/*": ["./src/*"]
|
| 14 |
+
}
|
| 15 |
+
},
|
| 16 |
+
"include": ["**/*.ts", "**/*.tsx", ".wxt/types/**/*.d.ts"],
|
| 17 |
+
"exclude": ["node_modules", ".output"]
|
| 18 |
+
}
|
extension/wxt.config.ts
ADDED
|
@@ -0,0 +1,52 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
// extension/wxt.config.ts
|
| 2 |
+
// WXT framework configuration β replaces raw Manifest V3 boilerplate.
|
| 3 |
+
// Provides HMR, multi-browser compatibility (Chrome/Firefox/Edge/Arc),
|
| 4 |
+
// TypeScript-first entrypoints, Vite under the hood.
|
| 5 |
+
|
| 6 |
+
import { defineConfig } from "wxt";
|
| 7 |
+
|
| 8 |
+
export default defineConfig({
|
| 9 |
+
extensionApi: "chrome",
|
| 10 |
+
modules: ["@wxt-dev/module-react"],
|
| 11 |
+
|
| 12 |
+
vite: () => ({
|
| 13 |
+
define: {
|
| 14 |
+
// Injected at build time β change this to your cloudflared tunnel URL
|
| 15 |
+
__WS_URL__: JSON.stringify(
|
| 16 |
+
process.env.WS_URL || "wss://fact-engine.your-domain.workers.dev"
|
| 17 |
+
),
|
| 18 |
+
},
|
| 19 |
+
}),
|
| 20 |
+
|
| 21 |
+
manifest: {
|
| 22 |
+
name: "Fact & Hallucination Intelligence",
|
| 23 |
+
description:
|
| 24 |
+
"Real-time omnichannel fact-checking and AI hallucination detection",
|
| 25 |
+
version: "1.0.0",
|
| 26 |
+
permissions: [
|
| 27 |
+
"storage", // chrome.storage.sync for user preferences
|
| 28 |
+
"tabs", // send messages to content scripts
|
| 29 |
+
"activeTab",
|
| 30 |
+
],
|
| 31 |
+
host_permissions: [
|
| 32 |
+
"https://twitter.com/*",
|
| 33 |
+
"https://x.com/*",
|
| 34 |
+
"https://www.instagram.com/*",
|
| 35 |
+
"https://www.youtube.com/*",
|
| 36 |
+
"https://chat.openai.com/*",
|
| 37 |
+
"https://claude.ai/*",
|
| 38 |
+
"https://gemini.google.com/*",
|
| 39 |
+
"*://*/*", // Covers news sites β restrict in production
|
| 40 |
+
],
|
| 41 |
+
content_security_policy: {
|
| 42 |
+
extension_pages:
|
| 43 |
+
"script-src 'self'; object-src 'self'; connect-src wss: https:",
|
| 44 |
+
},
|
| 45 |
+
icons: {
|
| 46 |
+
"16": "icon/16.png",
|
| 47 |
+
"32": "icon/32.png",
|
| 48 |
+
"48": "icon/48.png",
|
| 49 |
+
"128": "icon/128.png",
|
| 50 |
+
},
|
| 51 |
+
},
|
| 52 |
+
});
|
infra/tunnel_setup.sh
ADDED
|
@@ -0,0 +1,180 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
#!/usr/bin/env bash
|
| 2 |
+
# tunnel_setup.sh β Cloudflare Tunnel setup for the Fact Intelligence backend.
|
| 3 |
+
#
|
| 4 |
+
# What this does:
|
| 5 |
+
# 1. Installs the cloudflared binary (Linux/macOS)
|
| 6 |
+
# 2. Authenticates with your Cloudflare account
|
| 7 |
+
# 3. Creates a named tunnel pointing to the FastAPI backend (localhost:7860)
|
| 8 |
+
# 4. Configures DNS routing: wss://fact-engine.<your-domain>.workers.dev
|
| 9 |
+
# 5. Runs the tunnel as a systemd service (optional)
|
| 10 |
+
#
|
| 11 |
+
# Usage:
|
| 12 |
+
# chmod +x tunnel_setup.sh
|
| 13 |
+
# DOMAIN=your-domain.com ./tunnel_setup.sh
|
| 14 |
+
#
|
| 15 |
+
# After running, copy the tunnel URL into extension/wxt.config.ts __WS_URL__
|
| 16 |
+
|
| 17 |
+
set -euo pipefail
|
| 18 |
+
|
| 19 |
+
DOMAIN="${DOMAIN:-your-domain.com}"
|
| 20 |
+
TUNNEL_NAME="fact-intelligence"
|
| 21 |
+
BACKEND_PORT=7860
|
| 22 |
+
CONFIG_DIR="$HOME/.cloudflared"
|
| 23 |
+
|
| 24 |
+
echo "=== Cloudflare Tunnel Setup for Fact Intelligence System ==="
|
| 25 |
+
echo "Domain: $DOMAIN"
|
| 26 |
+
echo "Tunnel: $TUNNEL_NAME"
|
| 27 |
+
echo ""
|
| 28 |
+
|
| 29 |
+
# ---------------------------------------------------------------------------
|
| 30 |
+
# 1. Install cloudflared
|
| 31 |
+
# ---------------------------------------------------------------------------
|
| 32 |
+
install_cloudflared() {
|
| 33 |
+
if command -v cloudflared &>/dev/null; then
|
| 34 |
+
echo "[β] cloudflared already installed: $(cloudflared --version)"
|
| 35 |
+
return
|
| 36 |
+
fi
|
| 37 |
+
|
| 38 |
+
echo "[β] Installing cloudflared..."
|
| 39 |
+
OS=$(uname -s | tr '[:upper:]' '[:lower:]')
|
| 40 |
+
ARCH=$(uname -m)
|
| 41 |
+
|
| 42 |
+
case "$OS" in
|
| 43 |
+
linux)
|
| 44 |
+
case "$ARCH" in
|
| 45 |
+
x86_64) PKG="cloudflared-linux-amd64.deb" ;;
|
| 46 |
+
aarch64) PKG="cloudflared-linux-arm64.deb" ;;
|
| 47 |
+
*) echo "Unsupported arch: $ARCH"; exit 1 ;;
|
| 48 |
+
esac
|
| 49 |
+
curl -fsSL "https://github.com/cloudflare/cloudflared/releases/latest/download/$PKG" -o /tmp/cloudflared.deb
|
| 50 |
+
sudo dpkg -i /tmp/cloudflared.deb
|
| 51 |
+
;;
|
| 52 |
+
darwin)
|
| 53 |
+
brew install cloudflare/cloudflare/cloudflared
|
| 54 |
+
;;
|
| 55 |
+
*)
|
| 56 |
+
echo "Unsupported OS: $OS. Install cloudflared manually from https://developers.cloudflare.com/cloudflare-one/connections/connect-apps/install-and-setup/"
|
| 57 |
+
exit 1
|
| 58 |
+
;;
|
| 59 |
+
esac
|
| 60 |
+
echo "[β] cloudflared installed"
|
| 61 |
+
}
|
| 62 |
+
|
| 63 |
+
# ---------------------------------------------------------------------------
|
| 64 |
+
# 2. Authenticate (opens browser for Cloudflare login)
|
| 65 |
+
# ---------------------------------------------------------------------------
|
| 66 |
+
authenticate() {
|
| 67 |
+
if [ -f "$CONFIG_DIR/cert.pem" ]; then
|
| 68 |
+
echo "[β] Already authenticated (cert.pem found)"
|
| 69 |
+
return
|
| 70 |
+
fi
|
| 71 |
+
echo "[β] Opening browser for Cloudflare authentication..."
|
| 72 |
+
cloudflared tunnel login
|
| 73 |
+
}
|
| 74 |
+
|
| 75 |
+
# ---------------------------------------------------------------------------
|
| 76 |
+
# 3. Create the tunnel
|
| 77 |
+
# ---------------------------------------------------------------------------
|
| 78 |
+
create_tunnel() {
|
| 79 |
+
if cloudflared tunnel list 2>/dev/null | grep -q "$TUNNEL_NAME"; then
|
| 80 |
+
echo "[β] Tunnel '$TUNNEL_NAME' already exists"
|
| 81 |
+
TUNNEL_ID=$(cloudflared tunnel list | grep "$TUNNEL_NAME" | awk '{print $1}')
|
| 82 |
+
else
|
| 83 |
+
echo "[β] Creating tunnel '$TUNNEL_NAME'..."
|
| 84 |
+
cloudflared tunnel create "$TUNNEL_NAME"
|
| 85 |
+
TUNNEL_ID=$(cloudflared tunnel list | grep "$TUNNEL_NAME" | awk '{print $1}')
|
| 86 |
+
echo "[β] Created tunnel ID: $TUNNEL_ID"
|
| 87 |
+
fi
|
| 88 |
+
echo "TUNNEL_ID=$TUNNEL_ID"
|
| 89 |
+
}
|
| 90 |
+
|
| 91 |
+
# ---------------------------------------------------------------------------
|
| 92 |
+
# 4. Write tunnel configuration
|
| 93 |
+
# ---------------------------------------------------------------------------
|
| 94 |
+
write_config() {
|
| 95 |
+
mkdir -p "$CONFIG_DIR"
|
| 96 |
+
cat > "$CONFIG_DIR/config.yml" << EOF
|
| 97 |
+
tunnel: $TUNNEL_NAME
|
| 98 |
+
credentials-file: $CONFIG_DIR/$TUNNEL_ID.json
|
| 99 |
+
|
| 100 |
+
ingress:
|
| 101 |
+
# WebSocket endpoint β extension connects here
|
| 102 |
+
- hostname: fact-engine.$DOMAIN
|
| 103 |
+
service: http://localhost:$BACKEND_PORT
|
| 104 |
+
originRequest:
|
| 105 |
+
noTLSVerify: false
|
| 106 |
+
connectTimeout: 30s
|
| 107 |
+
|
| 108 |
+
# Catch-all (required by cloudflared)
|
| 109 |
+
- service: http_status:404
|
| 110 |
+
|
| 111 |
+
warp-routing:
|
| 112 |
+
enabled: false
|
| 113 |
+
EOF
|
| 114 |
+
echo "[β] Config written to $CONFIG_DIR/config.yml"
|
| 115 |
+
}
|
| 116 |
+
|
| 117 |
+
# ---------------------------------------------------------------------------
|
| 118 |
+
# 5. Create DNS record
|
| 119 |
+
# ---------------------------------------------------------------------------
|
| 120 |
+
setup_dns() {
|
| 121 |
+
echo "[β] Creating DNS CNAME: fact-engine.$DOMAIN β $TUNNEL_NAME.cfargotunnel.com"
|
| 122 |
+
cloudflared tunnel route dns "$TUNNEL_NAME" "fact-engine.$DOMAIN" || \
|
| 123 |
+
echo "[!] DNS route already exists or failed β check Cloudflare dashboard"
|
| 124 |
+
echo "[β] DNS configured"
|
| 125 |
+
}
|
| 126 |
+
|
| 127 |
+
# ---------------------------------------------------------------------------
|
| 128 |
+
# 6. Systemd service (Linux only)
|
| 129 |
+
# ---------------------------------------------------------------------------
|
| 130 |
+
setup_systemd() {
|
| 131 |
+
if [ "$(uname -s)" != "Linux" ]; then
|
| 132 |
+
echo "[!] Skipping systemd setup (not Linux)"
|
| 133 |
+
return
|
| 134 |
+
fi
|
| 135 |
+
|
| 136 |
+
sudo tee /etc/systemd/system/cloudflared-fact.service > /dev/null << EOF
|
| 137 |
+
[Unit]
|
| 138 |
+
Description=Cloudflare Tunnel β Fact Intelligence System
|
| 139 |
+
After=network-online.target
|
| 140 |
+
Wants=network-online.target
|
| 141 |
+
|
| 142 |
+
[Service]
|
| 143 |
+
Type=simple
|
| 144 |
+
User=$USER
|
| 145 |
+
ExecStart=$(command -v cloudflared) tunnel --config $CONFIG_DIR/config.yml run $TUNNEL_NAME
|
| 146 |
+
Restart=on-failure
|
| 147 |
+
RestartSec=5s
|
| 148 |
+
|
| 149 |
+
[Install]
|
| 150 |
+
WantedBy=multi-user.target
|
| 151 |
+
EOF
|
| 152 |
+
|
| 153 |
+
sudo systemctl daemon-reload
|
| 154 |
+
sudo systemctl enable cloudflared-fact
|
| 155 |
+
sudo systemctl start cloudflared-fact
|
| 156 |
+
echo "[β] Systemd service started: cloudflared-fact"
|
| 157 |
+
}
|
| 158 |
+
|
| 159 |
+
# ---------------------------------------------------------------------------
|
| 160 |
+
# Main
|
| 161 |
+
# ---------------------------------------------------------------------------
|
| 162 |
+
install_cloudflared
|
| 163 |
+
authenticate
|
| 164 |
+
create_tunnel
|
| 165 |
+
write_config
|
| 166 |
+
setup_dns
|
| 167 |
+
setup_systemd
|
| 168 |
+
|
| 169 |
+
echo ""
|
| 170 |
+
echo "=== Setup complete! ==="
|
| 171 |
+
echo ""
|
| 172 |
+
echo "WebSocket URL for the extension:"
|
| 173 |
+
echo " wss://fact-engine.$DOMAIN/ws/{session_id}"
|
| 174 |
+
echo ""
|
| 175 |
+
echo "Update extension/wxt.config.ts:"
|
| 176 |
+
echo " __WS_URL__: 'wss://fact-engine.$DOMAIN/ws'"
|
| 177 |
+
echo ""
|
| 178 |
+
echo "Test the tunnel:"
|
| 179 |
+
echo " curl https://fact-engine.$DOMAIN/health"
|
| 180 |
+
echo ""
|