diff --git a/.dockerignore b/.dockerignore new file mode 100644 index 0000000000000000000000000000000000000000..ab03544417c367773abdafc824f77b6ca41f1d8b --- /dev/null +++ b/.dockerignore @@ -0,0 +1,47 @@ +# Python +__pycache__/ +**/__pycache__/ +*.py[cod] +*.pyo +.pytest_cache/ +.cache/ +venv/ +.venv/ +*.egg-info/ + +# Environment & secrets +.env +.env.* +serviceAccountKey.json + +# Local data / logs +data/history.json +inputs/*.log +inputs/__pycache__/ + +# ML training artefacts (keep ml/models/ — needed at runtime) +ml/data/ +ml/*.log + +# Frontend source (only dist goes to Firebase Hosting, not Cloud Run) +frontend/ + +# Git / editor +.git/ +.gitignore +.gitattributes +.vscode/ +.idea/ + +# Docs +docs/ +*.md +README* + +# Tests +tests/ +pytest.ini + +# Docker itself +Dockerfile +.dockerignore diff --git a/.firebase/hosting.ZnJvbnRlbmQvZGlzdA.cache b/.firebase/hosting.ZnJvbnRlbmQvZGlzdA.cache new file mode 100644 index 0000000000000000000000000000000000000000..f1707171287c211290a9c789a2869017a9def9de --- /dev/null +++ b/.firebase/hosting.ZnJvbnRlbmQvZGlzdA.cache @@ -0,0 +1,5 @@ +vite.svg,1771983804434,d3bbbc44b3ea71906a72bf2ec1a4716903e2e3d9f85a5007205a65d1f12e2923 +index.html,1771983804629,b6c877b7fe830ae6270dfb77cd1d205222591249325fa88601f51f6e2ed57653 +logo.svg,1771983804434,c1ca19989c26d83c632b01609dc4514e16bef7418284c6df88b29ac34ca035ec +assets/index-DE8XF5VL.css,1771983804629,941148112bdd25f98beea529b6ad97209f2f777e70671d0f5b96f919c8472699 +assets/index-BCcoqzYM.js,1771983804629,60632c706af44a3486a56a8364e32bdce3c7a8cb388f69de2fe9c21876d55942 diff --git a/.firebaserc b/.firebaserc index b49ad35cde53275e3206456c669028493de73153..3a1630a35bb5d76be364b46be4a1952eccdbe691 100644 --- a/.firebaserc +++ b/.firebaserc @@ -1,5 +1,15 @@ { - "projects": {}, - "targets": {}, + "projects": { + "default": "philverify" + }, + "targets": { + "philverify": { + "hosting": { + "philverify": [ + "philverify" + ] + } + } + }, "etags": {} -} +} \ No newline at end of file diff --git a/.gcloudignore b/.gcloudignore new file mode 100644 index 0000000000000000000000000000000000000000..d5be0d03bd49f8b9288dbbdb4baa75a64033e0da --- /dev/null +++ b/.gcloudignore @@ -0,0 +1,50 @@ +# .gcloudignore — Cloud Build source upload exclusions +# gcloud builds submit uses this before creating the source tarball. +# Patterns follow .gitignore syntax. + +# ── Heavy runtimes / caches ─────────────────────────────────────────────────── +venv/ +.venv/ +__pycache__/ +**/__pycache__/ +*.py[cod] +.cache/ +.pytest_cache/ + +# ── Secrets (never upload) ──────────────────────────────────────────────────── +.env +.env.* +serviceAccountKey.json +*.json.key + +# ── ML artefacts (large — Docker downloads from HuggingFace at build time) ─── +ml/models/ +ml/data/raw/ +ml/data/processed/ +ml/data/combined/ + +# ── Frontend source & deps (built separately, not needed in Cloud Run) ─────── +frontend/node_modules/ +frontend/dist/ + +# ── Dataset pipeline scripts (not needed at runtime) ───────────────────────── +ml/data_sources/ +ml/train_*.py +ml/dataset_builder.py +ml/combined_dataset.py +ml/_smoke_test.py + +# ── Tests & docs ────────────────────────────────────────────────────────────── +tests/ +docs/ + +# ── OS / editor ─────────────────────────────────────────────────────────────── +.DS_Store +.vscode/ +.idea/ +*.swp + +# ── Git ─────────────────────────────────────────────────────────────────────── +.git/ +.gitignore +.gitattributes diff --git a/.gitignore b/.gitignore index 3a83b1361117997dd67908f60494f5480e2b7124..bee7023779d4f8a8c19d3fe8d085124f9e50920b 100644 --- a/.gitignore +++ b/.gitignore @@ -22,10 +22,20 @@ build/ # OS .DS_Store -# ML models (too large for git) +# ML models (too large for git — use DVC or download separately) ml/models/*.pkl ml/models/*.bin ml/models/*.pt +ml/models/*.safetensors +ml/models/xlmr_model/ serviceAccountKey.json *.json.key docs/*.json + +# Dataset pipeline — raw downloads & processed parquet (regenerate via dataset_builder.py) +ml/data/raw/ +ml/data/processed/ +ml/_smoke_test.py + +# Local history persistence (user data — do not commit) +data/history.json diff --git a/Dockerfile b/Dockerfile new file mode 100644 index 0000000000000000000000000000000000000000..756b6b93e156fb79e7d718469e6cce883ca46aab --- /dev/null +++ b/Dockerfile @@ -0,0 +1,67 @@ +# ── PhilVerify API — Cloud Run Dockerfile ───────────────────────────────────── +# Build: docker build -t philverify-api . +# Run: docker run -p 8080:8080 --env-file .env philverify-api + +FROM python:3.12-slim + +# ── System dependencies ─────────────────────────────────────────────────────── +# tesseract: OCR for image verification +# ffmpeg: audio decoding for Whisper (video/audio input) +RUN apt-get update && apt-get install -y --no-install-recommends \ + tesseract-ocr \ + tesseract-ocr-fil \ + tesseract-ocr-eng \ + ffmpeg \ + libgl1 \ + libglib2.0-0 \ + curl \ + && rm -rf /var/lib/apt/lists/* + +WORKDIR /app + +# ── Python dependencies ─────────────────────────────────────────────────────── +# Upgrade pip + add setuptools (required by openai-whisper's setup.py on 3.12-slim) +COPY requirements.txt . +RUN pip install --no-cache-dir --upgrade pip setuptools wheel && \ + pip install --no-cache-dir -r requirements.txt + +# Download spaCy English model (small, ~12 MB) +RUN python -m spacy download en_core_web_sm || true + +# Download NLTK data used by the NLP pipeline +RUN python -c "import nltk; nltk.download('punkt', quiet=True); nltk.download('stopwords', quiet=True); nltk.download('punkt_tab', quiet=True)" || true + +# ── Application code ────────────────────────────────────────────────────────── +COPY . . + +# Remove local secrets — Cloud Run uses its own service account (ADC) +# The serviceAccountKey.json is NOT needed inside the container. +RUN rm -f serviceAccountKey.json .env + +# Pre-download Whisper base model so cold starts are faster +RUN python -c "import whisper; whisper.load_model('base')" || true + +# Pre-download HuggingFace transformer models used by the NLP pipeline so that +# cold starts don't hit the network — these would otherwise be fetched on the +# first /verify request and cause a Firebase Hosting 502 timeout (~1.2 GB total). +RUN python -c "\ +from transformers import pipeline; \ +print('Downloading twitter-roberta-base-sentiment...'); \ +pipeline('text-classification', model='cardiffnlp/twitter-roberta-base-sentiment-latest'); \ +print('Downloading emotion-english-distilroberta...'); \ +pipeline('text-classification', model='j-hartmann/emotion-english-distilroberta-base'); \ +print('Downloading distilbart-cnn-6-6 (claim extractor)...'); \ +pipeline('summarization', model='sshleifer/distilbart-cnn-6-6'); \ +print('All HuggingFace models cached.'); \ +" || true + +# ── Runtime ─────────────────────────────────────────────────────────────────── +# Cloud Run sets PORT automatically; default to 8080 for local runs. +ENV PORT=8080 +ENV APP_ENV=production +ENV DEBUG=false + +EXPOSE 8080 + +# Use exec form so signals (SIGTERM) reach uvicorn directly +CMD ["sh", "-c", "uvicorn main:app --host 0.0.0.0 --port ${PORT} --workers 1 --timeout-keep-alive 75"] diff --git a/api/routes/history.py b/api/routes/history.py index 944c5aa6671deda2afdd612724275a399dce6532..e78fb5a10b6af0007c249abb5ab8a6f2396d4081 100644 --- a/api/routes/history.py +++ b/api/routes/history.py @@ -1,21 +1,101 @@ """ PhilVerify — History Route GET /history — Returns past verification logs with pagination. + +Persistence tier order (best to worst): + 1. Firestore — requires Cloud Firestore API to be enabled in GCP console + 2. Local JSON file — data/history.json, survives server restarts, no setup needed + 3. In-memory list — last resort, resets on every restart """ +import json import logging -from fastapi import APIRouter, Query +import threading +from pathlib import Path +from fastapi import APIRouter, Query, HTTPException from api.schemas import HistoryResponse, HistoryEntry, Verdict logger = logging.getLogger(__name__) router = APIRouter(prefix="/history", tags=["History"]) -# In-memory store for development. Will be replaced by DB queries in Phase 7. +# ── Local JSON file store ───────────────────────────────────────────────────── +# Survives server restarts. Used when Firestore is unavailable (e.g. API disabled). +_HISTORY_FILE = Path(__file__).parent.parent.parent / "data" / "history.json" +_HISTORY_FILE.parent.mkdir(parents=True, exist_ok=True) +_file_lock = threading.Lock() # Guard concurrent writes + + +def _load_history_file() -> list[dict]: + """Read all records from the local JSON history file.""" + try: + if _HISTORY_FILE.exists(): + return json.loads(_HISTORY_FILE.read_text(encoding="utf-8")) + except Exception as e: + logger.warning("Could not read history file: %s", e) + return [] + + +def _append_history_file(entry: dict) -> None: + """Atomically append one entry to the local JSON history file.""" + with _file_lock: + records = _load_history_file() + records.append(entry) + try: + _HISTORY_FILE.write_text( + json.dumps(records, ensure_ascii=False, indent=2), + encoding="utf-8", + ) + except Exception as e: + logger.warning("Could not write history file: %s", e) + + +# In-memory fallback (last resort — loses data on restart) _HISTORY: list[dict] = [] def record_verification(entry: dict) -> None: - """Called by the scoring engine to persist each verification result.""" + """ + Called by the scoring engine after every verification. + Writes to the local JSON file so history persists even without Firestore. + Also keeps the in-memory list in sync for the current process lifetime. + """ _HISTORY.append(entry) + _append_history_file(entry) + + +@router.get( + "/{entry_id}", + summary="Get single verification by ID", + description="Returns the full raw record for a single verification, including layer scores, entities, sentiment.", +) +async def get_history_entry(entry_id: str) -> dict: + logger.info("GET /history/%s", entry_id) + + # Tier 1: Firestore + try: + from firebase_client import get_firestore + db = get_firestore() + if db: + doc = db.collection("verifications").document(entry_id).get() + if doc.exists: + return doc.to_dict() + except Exception as e: + logger.debug("Firestore detail unavailable (%s) — trying local file", e) + + # Tier 2: Local JSON file + try: + records = _load_history_file() + for r in records: + if r.get("id") == entry_id: + return r + except Exception: + pass + + # Tier 3: In-memory + for r in _HISTORY: + if r.get("id") == entry_id: + return r + + raise HTTPException(status_code=404, detail="Verification not found") @router.get( @@ -31,7 +111,7 @@ async def get_history( ) -> HistoryResponse: logger.info("GET /history | page=%d limit=%d", page, limit) - # Try Firestore first + # ── Tier 1: Firestore ───────────────────────────────────────────────────── try: from firebase_client import get_verifications, get_verification_count vf = verdict_filter.value if verdict_filter else None @@ -55,9 +135,34 @@ async def get_history( ], ) except Exception as e: - logger.debug("Firestore history read failed (%s) — using in-memory store", e) + logger.debug("Firestore history unavailable (%s) — trying local file", e) + + # ── Tier 2: Local JSON file ─────────────────────────────────────────────── + # Load from file rather than in-memory list so data survives restarts. + file_entries = list(reversed(_load_history_file())) + if file_entries: + if verdict_filter: + file_entries = [e for e in file_entries if e.get("verdict") == verdict_filter.value] + total = len(file_entries) + start = (page - 1) * limit + paginated = file_entries[start : start + limit] + return HistoryResponse( + total=total, + entries=[ + HistoryEntry( + id=e["id"], + timestamp=e["timestamp"], + input_type=e.get("input_type", "text"), + text_preview=e.get("text_preview", "")[:120], + verdict=Verdict(e["verdict"]), + confidence=e["confidence"], + final_score=e["final_score"], + ) + for e in paginated + ], + ) - # In-memory fallback + # ── Tier 3: In-memory (last resort — resets on restart) ─────────────────── entries = list(reversed(_HISTORY)) if verdict_filter: entries = [e for e in entries if e.get("verdict") == verdict_filter.value] diff --git a/api/routes/preview.py b/api/routes/preview.py new file mode 100644 index 0000000000000000000000000000000000000000..f9dd49ebe918594360802bdab62ea863807ed9ac --- /dev/null +++ b/api/routes/preview.py @@ -0,0 +1,179 @@ +""" +PhilVerify — URL Preview Route +GET /preview?url= + +Fetches Open Graph / meta tags from the given URL and returns a lightweight +article card payload: title, description, image, site name, favicon, and domain. +Used by the frontend to show a "link unfurl" preview before/after verification. +""" +import logging +import re +from urllib.parse import urlparse + +from fastapi import APIRouter, Query, HTTPException +from pydantic import BaseModel +from typing import Optional + +logger = logging.getLogger(__name__) +router = APIRouter(prefix="/preview", tags=["Preview"]) + + +class URLPreview(BaseModel): + title: Optional[str] = None + description: Optional[str] = None + image: Optional[str] = None + site_name: Optional[str] = None + favicon: Optional[str] = None + domain: Optional[str] = None + + +def _slug_to_title(url: str) -> Optional[str]: + """Convert URL path slug to a readable title. + e.g. 'remulla-chides-bulacan-guv-for-alleged-road-abuse-dont-act-like-a-king' → + 'Remulla Chides Bulacan Guv For Alleged Road Abuse Dont Act Like A King' + """ + parsed = urlparse(url) + segments = [s for s in parsed.path.split("/") if s and not s.isdigit() and len(s) > 4] + if segments: + slug = segments[-1] + # Remove common file extensions + slug = re.sub(r'\.(html?|php|aspx?)$', '', slug, flags=re.IGNORECASE) + # Strip UTM / query artifacts that leaked into path + slug = slug.split('?')[0] + return ' '.join(w.capitalize() for w in slug.replace('-', ' ').replace('_', ' ').split()) + return None + + +def _extract_preview(html: str, base_url: str, original_url: str = "") -> URLPreview: + """Parse OG / meta tags from raw HTML.""" + from bs4 import BeautifulSoup + + parsed_base = urlparse(base_url) + domain = parsed_base.netloc.replace("www.", "") + origin = f"{parsed_base.scheme}://{parsed_base.netloc}" + + # Parse head first for speed, then fall back to full doc if needed + head_end = html.find("") + head_html = html[:head_end + 7] if head_end != -1 else html[:8000] + soup_head = BeautifulSoup(head_html, "lxml") + # Also keep full soup for body-level og: tags some CDNs inject + soup_full = BeautifulSoup(html[:60_000], "lxml") if head_end == -1 or head_end > 60_000 else soup_head + + def meta(soup, prop=None, name=None): + if prop: + el = soup.find("meta", property=prop) or soup.find("meta", attrs={"property": prop}) + else: + el = soup.find("meta", attrs={"name": name}) + return (el.get("content") or "").strip() if el else None + + def m(prop=None, name=None): + return meta(soup_head, prop=prop, name=name) or meta(soup_full, prop=prop, name=name) + + title = ( + m(prop="og:title") + or m(name="twitter:title") + or (soup_head.title.get_text(strip=True) if soup_head.title else None) + or _slug_to_title(original_url or base_url) + ) + description = ( + m(prop="og:description") + or m(name="twitter:description") + or m(name="description") + ) + image = ( + m(prop="og:image") + or m(name="twitter:image") + or m(name="twitter:image:src") + ) + site_name = m(prop="og:site_name") or domain + + # Resolve relative image URLs + if image and image.startswith("//"): + image = f"{parsed_base.scheme}:{image}" + elif image and image.startswith("/"): + image = f"{origin}{image}" + + # Favicon: try link[rel=icon], fallback to /favicon.ico + favicon = None + icon_el = ( + soup_head.find("link", rel="icon") + or soup_head.find("link", rel="shortcut icon") + or soup_head.find("link", rel=lambda v: v and "icon" in v) + ) + if icon_el and icon_el.get("href"): + href = icon_el["href"].strip() + if href.startswith("//"): + favicon = f"{parsed_base.scheme}:{href}" + elif href.startswith("/"): + favicon = f"{origin}{href}" + else: + favicon = href + else: + favicon = f"{origin}/favicon.ico" + + return URLPreview( + title=title or None, + description=description or None, + image=image or None, + site_name=site_name or None, + favicon=favicon, + domain=domain, + ) + + +_BOT_TITLES = { + "just a moment", "attention required", "access denied", "please wait", + "checking your browser", "ddos-guard", "enable javascript", "403 forbidden", + "404 not found", "503 service unavailable", +} + + +@router.get("", response_model=URLPreview, summary="Fetch article preview (OG meta)") +async def get_preview(url: str = Query(..., description="Article URL to preview")) -> URLPreview: + try: + import httpx + except ImportError: + raise HTTPException(status_code=500, detail="httpx not installed") + + headers = { + "User-Agent": ( + "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) " + "AppleWebKit/537.36 (KHTML, like Gecko) " + "Chrome/122.0.0.0 Safari/537.36" + ), + "Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8", + "Accept-Language": "en-US,en;q=0.5", + } + + parsed = urlparse(url) + domain = parsed.netloc.replace("www.", "") + origin = f"{parsed.scheme}://{parsed.netloc}" + slug_title = _slug_to_title(url) + + try: + async with httpx.AsyncClient(timeout=10, follow_redirects=True) as client: + resp = await client.get(url, headers=headers) + if resp.status_code >= 400: + logger.warning("Preview fetch returned %d for %s", resp.status_code, url) + return URLPreview( + domain=domain, + site_name=domain, + title=slug_title, + favicon=f"{origin}/favicon.ico", + ) + preview = _extract_preview(resp.text, str(resp.url), original_url=url) + # If OG parsing returned no title, or got a bot-challenge page title, fall back to slug + if not preview.title or preview.title.lower().strip() in _BOT_TITLES: + preview.title = slug_title + # Don't keep description/image from a bot-challenge page + preview.description = None + preview.image = None + return preview + except Exception as exc: + logger.warning("Preview fetch failed for %s: %s", url, exc) + return URLPreview( + domain=domain, + site_name=domain, + title=slug_title, + favicon=f"{origin}/favicon.ico", + ) diff --git a/api/routes/trends.py b/api/routes/trends.py index 43022ce7153d49c87bb35894011f729c9a05c9f7..315c9824f8b95476e6ec12ac57439104ad502f74 100644 --- a/api/routes/trends.py +++ b/api/routes/trends.py @@ -10,8 +10,33 @@ from api.schemas import TrendsResponse, TrendingEntity, TrendingTopic, Verdict logger = logging.getLogger(__name__) router = APIRouter(prefix="/trends", tags=["Trends"]) -# Reads from the same in-memory store as history (Phase 7 → DB aggregation). -from api.routes.history import _HISTORY + +def _load_all_history() -> list[dict]: + """ + Return all history records from the best available source: + 1. Firestore 2. Local JSON file 3. In-memory list (fallback) + """ + # Tier 1: Firestore + try: + from firebase_client import get_all_verifications_sync + records = get_all_verifications_sync() + if records: + return records + except Exception: + pass + + # Tier 2: Local JSON file (persists across restarts) + try: + from api.routes.history import _load_history_file + records = _load_history_file() + if records: + return records + except Exception: + pass + + # Tier 3: In-memory (empty after restart, but keeps current session data) + from api.routes.history import _HISTORY + return list(_HISTORY) @router.get( @@ -26,13 +51,15 @@ async def get_trends( ) -> TrendsResponse: logger.info("GET /trends | days=%d", days) + all_history = _load_all_history() + entity_counter: Counter = Counter() entity_type_map: dict[str, str] = {} entity_fake_counter: Counter = Counter() topic_counter: Counter = Counter() topic_verdict_map: dict[str, list[str]] = {} - for entry in _HISTORY: + for entry in all_history: is_fake = entry.get("verdict") in (Verdict.LIKELY_FAKE.value, Verdict.UNVERIFIED.value) entities = entry.get("entities", {}) @@ -81,4 +108,37 @@ async def get_trends( for topic, count in topic_counter.most_common(limit) ] - return TrendsResponse(top_entities=top_entities, top_topics=top_topics) + # ── Verdict distribution totals ─────────────────────────────────────────────── + verdict_dist: dict[str, int] = {"Credible": 0, "Unverified": 0, "Likely Fake": 0} + day_map: dict[str, dict[str, int]] = {} # date → {Credible, Unverified, Likely Fake} + + for entry in all_history: + v = entry.get("verdict", "Unverified") + if v in verdict_dist: + verdict_dist[v] += 1 + + ts = entry.get("timestamp", "") + date_key = ts[:10] if ts else "" # YYYY-MM-DD prefix + if date_key: + bucket = day_map.setdefault(date_key, {"Credible": 0, "Unverified": 0, "Likely Fake": 0}) + if v in bucket: + bucket[v] += 1 + + from api.schemas import VerdictDayPoint + verdict_by_day = [ + VerdictDayPoint( + date=d, + credible=day_map[d]["Credible"], + unverified=day_map[d]["Unverified"], + fake=day_map[d]["Likely Fake"], + ) + for d in sorted(day_map.keys()) + ] + + return TrendsResponse( + top_entities=top_entities, + top_topics=top_topics, + verdict_distribution=verdict_dist, + verdict_by_day=verdict_by_day, + ) + diff --git a/api/routes/verify.py b/api/routes/verify.py index 6d0a5a1ba712fadab12a7806baff32b65a0d9d0b..b602d79aa54188737cdeba75145b7ab214a7219d 100644 --- a/api/routes/verify.py +++ b/api/routes/verify.py @@ -67,6 +67,10 @@ async def verify_url(body: URLVerifyRequest) -> VerificationResponse: return result except HTTPException: raise + except ValueError as exc: + # Expected user-facing errors (e.g. robots.txt block, bad URL) + logger.warning("verify/url rejected: %s", exc) + raise HTTPException(status_code=422, detail=str(exc)) from exc except Exception as exc: logger.exception("verify/url error: %s", exc) raise HTTPException(status_code=500, detail=f"URL verification failed: {exc}") from exc diff --git a/api/schemas.py b/api/schemas.py index 7eb541f7daf7c79e046f249a098b62c810944aa2..9d6f881a725a9eed0d4cd13414da82fbd98bf1a7 100644 --- a/api/schemas.py +++ b/api/schemas.py @@ -138,9 +138,24 @@ class TrendingTopic(BaseModel): dominant_verdict: Verdict +class VerdictDayPoint(BaseModel): + date: str # YYYY-MM-DD + credible: int = 0 + unverified: int = 0 + fake: int = 0 + + class TrendsResponse(BaseModel): top_entities: list[TrendingEntity] top_topics: list[TrendingTopic] + verdict_distribution: dict[str, int] = Field( + default_factory=dict, + description="Counts per verdict: Credible, Unverified, Likely Fake", + ) + verdict_by_day: list[VerdictDayPoint] = Field( + default_factory=list, + description="Day-by-day verdict counts for the area chart (last N days)", + ) # ── Error ───────────────────────────────────────────────────────────────────── diff --git a/deploy.sh b/deploy.sh new file mode 100644 index 0000000000000000000000000000000000000000..10e7f6a568ea80f84cf18aeccb3b65c722abcc4e --- /dev/null +++ b/deploy.sh @@ -0,0 +1,78 @@ +#!/usr/bin/env bash +# ── PhilVerify — Firebase + Cloud Run Deployment Script ─────────────────────── +# Usage: +# chmod +x deploy.sh +# ./deploy.sh YOUR_GCP_PROJECT_ID +# +# Prerequisites: +# brew install google-cloud-sdk firebase-cli +# gcloud auth login +# gcloud auth configure-docker +# firebase login + +set -euo pipefail + +PROJECT_ID="${1:-}" +REGION="asia-southeast1" +SERVICE_NAME="philverify-api" +IMAGE="gcr.io/${PROJECT_ID}/${SERVICE_NAME}" + +if [[ -z "$PROJECT_ID" ]]; then + echo "Usage: ./deploy.sh YOUR_GCP_PROJECT_ID" + exit 1 +fi + +echo "▶ Project: $PROJECT_ID | Region: $REGION | Service: $SERVICE_NAME" + +# ── 1. Set GCP project ──────────────────────────────────────────────────────── +gcloud config set project "$PROJECT_ID" + +# ── 2. Build + push Docker image to GCR ────────────────────────────────────── +echo "" +echo "▶ Building & pushing Docker image (this takes ~10 min first time)…" +gcloud builds submit \ + --tag "$IMAGE" \ + --timeout=30m \ + . + +# ── 3. Deploy to Cloud Run ──────────────────────────────────────────────────── +echo "" +echo "▶ Deploying to Cloud Run…" +gcloud run deploy "$SERVICE_NAME" \ + --image "$IMAGE" \ + --region "$REGION" \ + --platform managed \ + --allow-unauthenticated \ + --memory 4Gi \ + --cpu 2 \ + --concurrency 10 \ + --timeout 300 \ + --min-instances 1 \ + --max-instances 3 \ + --set-env-vars "APP_ENV=production,DEBUG=false,LOG_LEVEL=INFO" \ + --set-env-vars "ALLOWED_ORIGINS=https://${PROJECT_ID}.web.app,https://${PROJECT_ID}.firebaseapp.com" + # Add secrets like NEWS_API_KEY via: + # --update-secrets NEWS_API_KEY=philverify-news-api-key:latest + +# ── 4. Link Firebase project ────────────────────────────────────────────────── +echo "" +echo "▶ Setting Firebase project…" +firebase use "$PROJECT_ID" + +# ── 5. Build React frontend ─────────────────────────────────────────────────── +echo "" +echo "▶ Building React frontend…" +cd frontend +npm ci +npm run build +cd .. + +# ── 6. Deploy to Firebase Hosting ──────────────────────────────────────────── +echo "" +echo "▶ Deploying to Firebase Hosting…" +firebase deploy --only hosting,firestore + +echo "" +echo "✅ Deploy complete!" +echo " Frontend: https://${PROJECT_ID}.web.app" +echo " API: https://${PROJECT_ID}.web.app/api/health" diff --git a/evidence/domain_credibility.py b/evidence/domain_credibility.py new file mode 100644 index 0000000000000000000000000000000000000000..c8a368f2edfa39a26dff4064c5f8b7417ed88a02 --- /dev/null +++ b/evidence/domain_credibility.py @@ -0,0 +1,150 @@ +""" +PhilVerify — Domain Credibility Module (Phase 5) +Wraps domain_credibility.json to provide structured tier lookups +for evidence source URLs and news article domains. + +Tiers: + Tier 1 (CREDIBLE) — Established PH news orgs (Rappler, Inquirer, GMA, etc.) + Tier 2 (SATIRE_OPINION) — Satire, opinion blogs, entertainment + Tier 3 (SUSPICIOUS) — Unknown / newly registered / low authority + Tier 4 (KNOWN_FAKE) — Vera Files blacklisted fake news sites +""" +import json +import logging +import re +from dataclasses import dataclass +from enum import IntEnum +from pathlib import Path +from urllib.parse import urlparse +import functools + +logger = logging.getLogger(__name__) + +_DB_PATH = Path(__file__).parent.parent / "domain_credibility.json" + +# Score adjustments per tier (applied in scoring engine) +TIER_SCORE_ADJUSTMENT: dict[int, float] = { + 1: +20.0, # Established PH news — credibility boost + 2: -5.0, # Satire/opinion — mild penalty + 3: -10.0, # Unknown — moderate penalty + 4: -35.0, # Known fake — heavy penalty +} + +TIER_LABELS: dict[int, str] = { + 1: "Credible", + 2: "Satire/Opinion", + 3: "Suspicious", + 4: "Known Fake", +} + + +class DomainTier(IntEnum): + CREDIBLE = 1 + SATIRE_OPINION = 2 + SUSPICIOUS = 3 + KNOWN_FAKE = 4 + + +@dataclass +class DomainResult: + domain: str + tier: DomainTier + tier_label: str + score_adjustment: float + matched_entry: str | None = None # Which entry in the JSON matched + + +@functools.lru_cache(maxsize=1) +def _load_db() -> dict: + """Load and cache the domain_credibility.json file.""" + try: + data = json.loads(_DB_PATH.read_text()) + total = sum(len(v.get("domains", [])) for v in data.values()) + logger.info("domain_credibility.json loaded — %d domains across %d tiers", total, len(data)) + return data + except Exception as e: + logger.error("Failed to load domain_credibility.json: %s", e) + return {} + + +def extract_domain(url_or_domain: str) -> str: + """ + Normalize a URL or raw domain string to a bare hostname. + + Examples: + "https://www.rappler.com/news/..." → "rappler.com" + "www.gmanetwork.com" → "gmanetwork.com" + "inquirer.net" → "inquirer.net" + """ + if not url_or_domain: + return "" + raw = url_or_domain.strip().lower() + # Add scheme if missing so urlparse works correctly + if not raw.startswith(("http://", "https://")): + raw = "https://" + raw + try: + hostname = urlparse(raw).hostname or "" + # Strip leading www. + hostname = re.sub(r"^www\.", "", hostname) + return hostname + except Exception: + # Last resort — strip www. manually + return re.sub(r"^www\.", "", raw.split("/")[0]) + + +def lookup_domain(url_or_domain: str) -> DomainResult: + """ + Classify a domain/URL against the credibility tier database. + + Args: + url_or_domain: Full URL or bare domain name. + + Returns: + DomainResult — Tier 3 (Suspicious) by default for unknown domains. + """ + domain = extract_domain(url_or_domain) + if not domain: + return _make_result("", DomainTier.SUSPICIOUS, None) + + db = _load_db() + + for tier_key, tier_data in db.items(): + tier_num = int(tier_key[-1]) # "tier1" → 1 + for entry in tier_data.get("domains", []): + # Match exact domain or subdomain of listed domain + if domain == entry or domain.endswith(f".{entry}"): + return _make_result(domain, DomainTier(tier_num), entry) + + # Not found → Tier 3 (Suspicious/Unknown) + logger.debug("Domain '%s' not in credibility DB — defaulting to Tier 3 (Suspicious)", domain) + return _make_result(domain, DomainTier.SUSPICIOUS, None) + + +def _make_result(domain: str, tier: DomainTier, matched_entry: str | None) -> DomainResult: + return DomainResult( + domain=domain, + tier=tier, + tier_label=TIER_LABELS[tier.value], + score_adjustment=TIER_SCORE_ADJUSTMENT[tier.value], + matched_entry=matched_entry, + ) + + +def get_tier_score(url_or_domain: str) -> float: + """ + Convenience: return just the score adjustment for a domain. + Positive = credibility boost, negative = penalty. + """ + return lookup_domain(url_or_domain).score_adjustment + + +def is_blacklisted(url_or_domain: str) -> bool: + """Return True if the domain is a known fake news / blacklisted site.""" + return lookup_domain(url_or_domain).tier == DomainTier.KNOWN_FAKE + + +def describe_tier(tier: DomainTier) -> str: + """Human-readable tier description for API responses.""" + db = _load_db() + key = f"tier{tier.value}" + return db.get(key, {}).get("description", TIER_LABELS[tier.value]) diff --git a/evidence/news_fetcher.py b/evidence/news_fetcher.py index 39120759509111a7801636e1d9f573878b865e09..d9da5ee635f98ff1b0b76a9f75f010c1c52b45b9 100644 --- a/evidence/news_fetcher.py +++ b/evidence/news_fetcher.py @@ -1,20 +1,46 @@ """ PhilVerify — Evidence Retrieval Module -Fetches related articles from NewsAPI, computes cosine similarity, -and produces an evidence score for Layer 2 of the scoring engine. +Fetches related articles from two sources and merges the results: + 1. Google News RSS (gl=PH) — free, no API key, PH-indexed, primary source + 2. NewsAPI /everything — broader English coverage, requires API key + +Google News RSS is always attempted first since it covers local PH outlets +(GMA, Inquirer, Rappler, CNN Philippines, PhilStar, etc.) far better than +NewsAPI's free tier index. """ +import asyncio import logging import hashlib +import xml.etree.ElementTree as ET +import urllib.parse from dataclasses import dataclass, field from pathlib import Path import json logger = logging.getLogger(__name__) -# Simple file-based cache to respect NewsAPI 100 req/day free tier limit +# ── Cache ───────────────────────────────────────────────────────────────────── +# Shared cache for both sources. NewsAPI free tier = 100 req/day. +# Google News RSS has no hard limit but we cache anyway to stay polite. _CACHE_DIR = Path(__file__).parent.parent / ".cache" / "newsapi" _CACHE_DIR.mkdir(parents=True, exist_ok=True) +# ── Philippine news domains (used to boost Google News RSS results) ─────────── +_PH_DOMAINS = { + "rappler.com", "inquirer.net", "gmanetwork.com", "philstar.com", + "manilatimes.net", "mb.com.ph", "abs-cbn.com", "cnnphilippines.com", + "pna.gov.ph", "sunstar.com.ph", "businessmirror.com.ph", + "businessworld.com.ph", "malaya.com.ph", "marikina.gov.ph", + "verafiles.org", "pcij.org", "interaksyon.philstar.com", +} + +# NewsAPI domains filter — restricts results to PH outlets when API key is set +_NEWSAPI_PH_DOMAINS = ",".join([ + "rappler.com", "inquirer.net", "gmanetwork.com", "philstar.com", + "manilatimes.net", "mb.com.ph", "abs-cbn.com", "cnnphilippines.com", + "pna.gov.ph", "sunstar.com.ph", "businessmirror.com.ph", +]) + @dataclass class ArticleResult: @@ -36,8 +62,8 @@ class EvidenceResult: claim_used: str = "" -def _cache_key(claim: str) -> str: - return hashlib.md5(claim.lower().strip().encode()).hexdigest() +def _cache_key(prefix: str, claim: str) -> str: + return f"{prefix}_{hashlib.md5(claim.lower().strip().encode()).hexdigest()}" def _load_cache(key: str) -> list[dict] | None: @@ -52,41 +78,230 @@ def _load_cache(key: str) -> list[dict] | None: def _save_cache(key: str, data: list[dict]) -> None: path = _CACHE_DIR / f"{key}.json" - path.write_text(json.dumps(data)) + try: + path.write_text(json.dumps(data)) + except Exception: + pass + + +def _extract_domain(url: str) -> str: + """Return bare domain from a URL string.""" + try: + from urllib.parse import urlparse + host = urlparse(url).hostname or "" + return host.removeprefix("www.") + except Exception: + return "" + + +def _is_ph_article(article: dict) -> bool: + """ + Return True if the article appears to be from a Philippine outlet. + Checks the source name since Google News RSS links are redirect URLs. + """ + src = (article.get("source", {}) or {}).get("name", "").lower() + url = article.get("url", "").lower() + # Direct domain match on URL (works for NewsAPI results) + if _extract_domain(url) in _PH_DOMAINS: + return True + # Source-name match (works for Google News RSS redirect URLs) + _PH_SOURCE_KEYWORDS = { + "rappler", "inquirer", "gma", "abs-cbn", "cnn philippines", + "philstar", "manila times", "manila bulletin", "sunstar", + "businessworld", "business mirror", "malaya", "philippine news agency", + "pna", "vera files", "pcij", "interaksyon", + } + return any(kw in src for kw in _PH_SOURCE_KEYWORDS) + + +def _build_query(claim: str, entities: list[str] | None) -> str: + """Build a concise search query from entities or the first words of the claim.""" + if entities: + return " ".join(entities[:3]) + words = claim.split() + return " ".join(words[:6]) + + +# ── Google News RSS ─────────────────────────────────────────────────────────── + +def _fetch_gnews_rss(query: str, max_results: int = 5) -> list[dict]: + """ + Fetch articles from Google News RSS scoped to the Philippines. + Returns a list of dicts in the same shape as NewsAPI articles so the + rest of the pipeline can treat both sources uniformly. + No API key required. + """ + encoded = urllib.parse.quote(query) + url = ( + f"https://news.google.com/rss/search" + f"?q={encoded}&gl=PH&hl=en-PH&ceid=PH:en" + ) + try: + import requests as req_lib + resp = req_lib.get(url, headers={"User-Agent": "PhilVerify/1.0"}, timeout=10) + resp.raise_for_status() + raw = resp.content + root = ET.fromstring(raw) + channel = root.find("channel") + if channel is None: + return [] + + articles: list[dict] = [] + for item in channel.findall("item")[:max_results]: + title_el = item.find("title") + link_el = item.find("link") + desc_el = item.find("description") + pub_el = item.find("pubDate") + src_el = item.find("source") + + title = title_el.text if title_el is not None else "" + link = link_el.text if link_el is not None else "" + description = desc_el.text if desc_el is not None else "" + pub_date = pub_el.text if pub_el is not None else "" + src_name = src_el.text if src_el is not None else _extract_domain(link) + + # Google News titles often include "- Source" suffix — strip it + if src_name and title.endswith(f" - {src_name}"): + title = title[: -(len(src_name) + 3)].strip() + articles.append({ + "title": title, + "url": link, + "description": description or title, + "publishedAt": pub_date, + "source": {"name": src_name}, + "_gnews": True, # Tag so we can log the origin + }) -async def fetch_evidence(claim: str, api_key: str, max_results: int = 5) -> list[dict]: - """Fetch top articles from NewsAPI for the given claim. Cached.""" - key = _cache_key(claim) - cached = _load_cache(key) - if cached is not None: - logger.info("NewsAPI cache hit for claim hash %s", key[:8]) - return cached + logger.info( + "Google News RSS (PH) returned %d articles for query '%s...'", + len(articles), query[:40], + ) + return articles - if not api_key: - logger.warning("NEWS_API_KEY not set — returning empty evidence") + except Exception as exc: + logger.warning("Google News RSS fetch failed: %s", exc) return [] + +# ── NewsAPI ─────────────────────────────────────────────────────────────────── + +def _fetch_newsapi(query: str, api_key: str, max_results: int = 5) -> list[dict]: + """ + Fetch from NewsAPI /everything, restricted to PH domains. + Falls back to global search if the PH-domains query returns < 2 results. + """ try: from newsapi import NewsApiClient client = NewsApiClient(api_key=api_key) - # Use first 100 chars of claim as query - query = claim[:100] + + # Try Philippine outlets first resp = client.get_everything( q=query, + domains=_NEWSAPI_PH_DOMAINS, language="en", sort_by="relevancy", page_size=max_results, ) articles = resp.get("articles", []) - _save_cache(key, articles) - logger.info("NewsAPI returned %d articles for query '%s...'", len(articles), query[:30]) + + # If PH domains yield nothing useful, fall back to global + if len(articles) < 2: + logger.debug("NewsAPI PH-domains sparse (%d) — retrying global", len(articles)) + resp = client.get_everything( + q=query, + language="en", + sort_by="relevancy", + page_size=max_results, + ) + articles = resp.get("articles", []) + + logger.info( + "NewsAPI returned %d articles for query '%s...'", + len(articles), query[:40], + ) return articles - except Exception as e: - logger.warning("NewsAPI fetch error: %s", e) + except Exception as exc: + logger.warning("NewsAPI fetch error: %s", exc) return [] +# ── Public API ──────────────────────────────────────────────────────────────── + +async def fetch_evidence( + claim: str, + api_key: str, + entities: list[str] = None, + max_results: int = 5, +) -> list[dict]: + """ + Fetch the most relevant articles for a claim by merging: + 1. Google News RSS (PH-scoped) — always attempted, no key needed + 2. NewsAPI — only when NEWS_API_KEY is configured + + Results are deduplicated by domain and capped at max_results. + PH-domain articles are surfaced first so scoring reflects local coverage. + """ + query = _build_query(claim, entities) + + # ── Google News RSS (check cache) ───────────────────────────────────────── + gnews_key = _cache_key("gnews", query) + gnews_articles = _load_cache(gnews_key) + if gnews_articles is None: + # Run blocking RSS fetch in a thread so we don't block the event loop + gnews_articles = await asyncio.get_event_loop().run_in_executor( + None, _fetch_gnews_rss, query, max_results + ) + _save_cache(gnews_key, gnews_articles) + else: + logger.info("Google News RSS cache hit for query hash %s", gnews_key[-8:]) + + # ── NewsAPI (check cache) ───────────────────────────────────────────────── + newsapi_articles: list[dict] = [] + if api_key: + newsapi_key = _cache_key("newsapi", query) + newsapi_articles = _load_cache(newsapi_key) + if newsapi_articles is None: + newsapi_articles = await asyncio.get_event_loop().run_in_executor( + None, _fetch_newsapi, query, api_key, max_results + ) + _save_cache(newsapi_key, newsapi_articles) + else: + logger.info("NewsAPI cache hit for query hash %s", newsapi_key[-8:]) + + # ── Merge: PH articles first, then global, deduplicated by domain ───────── + seen_domains: set[str] = set() + merged: list[dict] = [] + + def _add(articles: list[dict]) -> None: + for art in articles: + url = art.get("url", "") + domain = _extract_domain(url) + # For Google News redirect URLs, deduplicate by source name instead + dedup_key = domain if domain and "google.com" not in domain \ + else (art.get("source", {}) or {}).get("name", url) + if dedup_key and dedup_key in seen_domains: + continue + if dedup_key: + seen_domains.add(dedup_key) + merged.append(art) + + # PH-source Google News articles go first + ph_gnews = [a for a in gnews_articles if _is_ph_article(a)] + other_gnews = [a for a in gnews_articles if not _is_ph_article(a)] + + _add(ph_gnews) + _add(newsapi_articles) + _add(other_gnews) # non-PH Google News last + + result = merged[:max_results] + logger.info( + "Evidence merged: %d PH-gnews + %d newsapi + %d other → %d final", + len(ph_gnews), len(newsapi_articles), len(other_gnews), len(result), + ) + return result + + def compute_similarity(claim: str, article_text: str) -> float: """ Compute cosine similarity between claim and article using sentence-transformers. diff --git a/evidence/similarity.py b/evidence/similarity.py new file mode 100644 index 0000000000000000000000000000000000000000..6f5c323e519113aae1860b78274dbfe8ca1ae3af --- /dev/null +++ b/evidence/similarity.py @@ -0,0 +1,80 @@ +""" +PhilVerify — Similarity Module (Phase 5) +Computes semantic similarity between a claim and evidence article text. +Primary: sentence-transformers/all-MiniLM-L6-v2 (cosine similarity) +Fallback: Jaccard word-overlap similarity +""" +import logging +import functools + +logger = logging.getLogger(__name__) + +# Lazy-load the model at first use — avoids blocking app startup +@functools.lru_cache(maxsize=1) +def _get_model(): + """Load sentence-transformer model once and cache it.""" + try: + from sentence_transformers import SentenceTransformer + model = SentenceTransformer("all-MiniLM-L6-v2") + logger.info("sentence-transformers model loaded: all-MiniLM-L6-v2") + return model + except Exception as e: + logger.warning("sentence-transformers unavailable (%s) — Jaccard fallback active", e) + return None + + +def compute_similarity(claim: str, article_text: str) -> float: + """ + Compute semantic similarity between a fact-check claim and an article. + + Args: + claim: The extracted falsifiable claim sentence. + article_text: Title + description of a retrieved news article. + + Returns: + Float in [0.0, 1.0] — higher means more semantically related. + """ + if not claim or not article_text: + return 0.0 + + model = _get_model() + if model is not None: + try: + from sentence_transformers import util + emb_claim = model.encode(claim, convert_to_tensor=True) + emb_article = model.encode(article_text[:512], convert_to_tensor=True) + score = float(util.cos_sim(emb_claim, emb_article)[0][0]) + return round(max(0.0, min(1.0, score)), 4) + except Exception as e: + logger.warning("Embedding similarity failed (%s) — falling back to Jaccard", e) + + # Jaccard token-overlap fallback + return _jaccard_similarity(claim, article_text) + + +def _jaccard_similarity(a: str, b: str) -> float: + """Simple set-based Jaccard similarity on word tokens.""" + tokens_a = set(a.lower().split()) + tokens_b = set(b.lower().split()) + if not tokens_a or not tokens_b: + return 0.0 + intersection = tokens_a & tokens_b + union = tokens_a | tokens_b + return round(len(intersection) / len(union), 4) + + +def rank_articles_by_similarity(claim: str, articles: list[dict]) -> list[dict]: + """ + Annotate and sort a list of NewsAPI article dicts by similarity to the claim. + + Each article dict gets a `similarity` key added. + Returns articles sorted descending by similarity. + """ + scored = [] + for article in articles: + article_text = f"{article.get('title', '')} {article.get('description', '')}" + sim = compute_similarity(claim, article_text) + scored.append({**article, "similarity": sim}) + + scored.sort(key=lambda x: x["similarity"], reverse=True) + return scored diff --git a/evidence/stance_detector.py b/evidence/stance_detector.py new file mode 100644 index 0000000000000000000000000000000000000000..53c86131de22440d248f9c65347adfcad4b85201 --- /dev/null +++ b/evidence/stance_detector.py @@ -0,0 +1,194 @@ +""" +PhilVerify — Stance Detection Module (Phase 5) +Classifies the relationship between a claim and a retrieved evidence article. + +Stance labels: + Supports — article content supports the claim + Refutes — article content contradicts / debunks the claim + Not Enough Info — article is related but not conclusive either way + +Strategy (rule-based hybrid — no heavy model dependency): + 1. Keyword scan of title + description for refutation/support signals + 2. Similarity threshold guard — low similarity → NEI + 3. Factuality keywords override similarity-based detection +""" +import logging +import re +from dataclasses import dataclass +from enum import Enum + +logger = logging.getLogger(__name__) + + +class Stance(str, Enum): + SUPPORTS = "Supports" + REFUTES = "Refutes" + NOT_ENOUGH_INFO = "Not Enough Info" + + +# ── Keyword Lists ───────────────────────────────────────────────────────────── +# Ordered: check REFUTATION first (stronger signal), then SUPPORT +_REFUTATION_KEYWORDS = [ + # Fact-check verdicts + r"\bfact.?check\b", r"\bfalse\b", r"\bfake\b", r"\bhoax\b", + r"\bdebunked\b", r"\bmisinformation\b", r"\bdisinformation\b", + r"\bnot true\b", r"\bno evidence\b", r"\bunverified\b", + r"\bcorrection\b", r"\bretract\b", r"\bwrong\b", r"\bdenied\b", + r"\bscam\b", r"\bsatire\b", + # Filipino equivalents + r"\bkasinungalingan\b", r"\bhindi totoo\b", r"\bpeke\b", +] + +_SUPPORT_KEYWORDS = [ + r"\bconfirmed\b", r"\bverified\b", r"\bofficial\b", r"\bproven\b", + r"\btrue\b", r"\blegitimate\b", r"\baccurate\b", r"\bauthorized\b", + r"\breal\b", r"\bgenuine\b", + # Filipino equivalents + r"\btotoo\b", r"\bkumpirmado\b", r"\bopisyal\b", +] + +# Articles from these PH fact-check domains always → Refutes regardless of content +_FACTCHECK_DOMAINS = { + "vera-files.org", "verafiles.org", "factcheck.afp.com", + "rappler.com/newsbreak/fact-check", "cnn.ph/fact-check", +} + +# Similarity threshold: below this → NEI even with support keywords +_SIMILARITY_NEI_THRESHOLD = 0.15 +# Similarity above this + support keywords → Supports +_SIMILARITY_SUPPORT_THRESHOLD = 0.35 + + +@dataclass +class StanceResult: + stance: Stance + confidence: float # 0.0–1.0 — how confident we are in this label + matched_keywords: list[str] + reason: str + + +def detect_stance( + claim: str, + article_title: str, + article_description: str, + article_url: str = "", + similarity: float = 0.0, +) -> StanceResult: + """ + Detect the stance of an article relative to the claim. + + Args: + claim: The extracted falsifiable claim. + article_title: NewsAPI article title. + article_description: NewsAPI article description. + article_url: Article URL (used for fact-check domain detection). + similarity: Pre-computed cosine similarity score (0–1). + + Returns: + StanceResult with stance label, confidence, and reason. + """ + # Combine article text for keyword search + article_text = f"{article_title} {article_description}".lower() + + # ── Rule 0: Known fact-check domain → always Refutes ────────────────────── + if article_url: + for fc_domain in _FACTCHECK_DOMAINS: + if fc_domain in article_url.lower(): + return StanceResult( + stance=Stance.REFUTES, + confidence=0.90, + matched_keywords=[fc_domain], + reason="Known Philippine fact-check domain", + ) + + # ── Rule 1: Similarity floor — too low to make any claim ────────────────── + if similarity < _SIMILARITY_NEI_THRESHOLD: + return StanceResult( + stance=Stance.NOT_ENOUGH_INFO, + confidence=0.80, + matched_keywords=[], + reason=f"Low similarity ({similarity:.2f}) — article not related to claim", + ) + + # ── Rule 2: Scan for refutation keywords ────────────────────────────────── + refutation_hits = _scan_keywords(article_text, _REFUTATION_KEYWORDS) + if refutation_hits: + confidence = min(0.95, 0.65 + len(refutation_hits) * 0.10) + return StanceResult( + stance=Stance.REFUTES, + confidence=round(confidence, 2), + matched_keywords=refutation_hits, + reason=f"Refutation signal detected: {', '.join(refutation_hits[:3])}", + ) + + # ── Rule 3: Scan for support keywords + similarity threshold ────────────── + support_hits = _scan_keywords(article_text, _SUPPORT_KEYWORDS) + if support_hits and similarity >= _SIMILARITY_SUPPORT_THRESHOLD: + confidence = min(0.90, 0.50 + len(support_hits) * 0.10 + similarity * 0.20) + return StanceResult( + stance=Stance.SUPPORTS, + confidence=round(confidence, 2), + matched_keywords=support_hits, + reason=f"Support signal + similarity {similarity:.2f}: {', '.join(support_hits[:3])}", + ) + + # ── Default: Not Enough Info ─────────────────────────────────────────────── + return StanceResult( + stance=Stance.NOT_ENOUGH_INFO, + confidence=0.70, + matched_keywords=[], + reason="No conclusive support or refutation signals found", + ) + + +def _scan_keywords(text: str, patterns: list[str]) -> list[str]: + """Return list of matched keyword patterns found in text.""" + hits = [] + for pattern in patterns: + match = re.search(pattern, text, re.IGNORECASE) + if match: + hits.append(match.group(0)) + return hits + + +def compute_evidence_score( + stances: list[StanceResult], + similarities: list[float], +) -> tuple[float, str]: + """ + Aggregate multiple article stances into a single evidence score (0–100) + and an overall Layer 2 verdict. + + Scoring: + - Start at neutral 50 + - Each Supports article: +10 × similarity bonus + - Each Refutes article: -15 penalty (stronger signal) + - NEI articles: no effect + + Returns: + (evidence_score, verdict_label) + """ + if not stances: + return 50.0, "Unverified" + + score = 50.0 + supporting = [s for s in stances if s.stance == Stance.SUPPORTS] + refuting = [s for s in stances if s.stance == Stance.REFUTES] + + for i, stance in enumerate(stances): + sim = similarities[i] if i < len(similarities) else 0.5 + if stance.stance == Stance.SUPPORTS: + score += 10.0 * (0.5 + sim) + elif stance.stance == Stance.REFUTES: + score -= 15.0 * stance.confidence + + score = round(max(0.0, min(100.0, score)), 1) + + if len(refuting) > len(supporting): + verdict = "Likely Fake" + elif len(supporting) >= 2 and score >= 60: + verdict = "Credible" + else: + verdict = "Unverified" + + return score, verdict diff --git a/extension/background.js b/extension/background.js new file mode 100644 index 0000000000000000000000000000000000000000..0245665ed09c885a6b768bf6751b30524eeeece0 --- /dev/null +++ b/extension/background.js @@ -0,0 +1,171 @@ +/** + * PhilVerify — Background Service Worker (Manifest V3) + * + * Responsibilities: + * - Proxy API calls to the PhilVerify FastAPI backend + * - File-based cache via chrome.storage.local (24-hour TTL, max 50 entries) + * - Maintain personal verification history + * - Respond to messages from content.js and popup.js + * + * Message types handled: + * VERIFY_TEXT { text } → VerificationResponse + * VERIFY_URL { url } → VerificationResponse + * GET_HISTORY {} → { history: HistoryEntry[] } + * GET_SETTINGS {} → { apiBase, autoScan } + * SAVE_SETTINGS { apiBase, autoScan } → {} + */ + +const CACHE_TTL_MS = 24 * 60 * 60 * 1000 // 24 hours +const MAX_HISTORY = 50 + +// ── Default settings ────────────────────────────────────────────────────────── +const DEFAULT_SETTINGS = { + apiBase: 'http://localhost:8000', + autoScan: true, // Automatically scan Facebook feed posts +} + +// ── Utilities ───────────────────────────────────────────────────────────────── +/** Validate that a string is a safe http/https URL */ +function isHttpUrl(str) { + if (!str || typeof str !== 'string') return false + try { + const u = new URL(str) + return u.protocol === 'http:' || u.protocol === 'https:' + } catch { return false } +} +async function sha256prefix(text, len = 16) { + const buf = await crypto.subtle.digest( + 'SHA-256', + new TextEncoder().encode(text.trim().toLowerCase()), + ) + return Array.from(new Uint8Array(buf)) + .map(b => b.toString(16).padStart(2, '0')) + .join('') + .slice(0, len) +} + +async function getSettings() { + const stored = await chrome.storage.local.get('settings') + return { ...DEFAULT_SETTINGS, ...(stored.settings ?? {}) } +} + +// ── Cache helpers ───────────────────────────────────────────────────────────── + +async function getCached(key) { + const stored = await chrome.storage.local.get(key) + const entry = stored[key] + if (!entry) return null + if (Date.now() - entry.timestamp > CACHE_TTL_MS) { + await chrome.storage.local.remove(key) + return null + } + return entry.result +} + +async function setCached(key, result, preview) { + await chrome.storage.local.set({ + [key]: { result, timestamp: Date.now() }, + }) + + // Prepend to history list + const { history = [] } = await chrome.storage.local.get('history') + const entry = { + id: key, + timestamp: new Date().toISOString(), + text_preview: preview.slice(0, 80), + verdict: result.verdict, + final_score: result.final_score, + } + const updated = [entry, ...history.filter(h => h.id !== key)].slice(0, MAX_HISTORY) + await chrome.storage.local.set({ history: updated }) +} + +// ── API calls ───────────────────────────────────────────────────────────────── + +async function verifyText(text) { + const key = 'txt_' + await sha256prefix(text) + const hit = await getCached(key) + if (hit) return { ...hit, _fromCache: true } + + const { apiBase } = await getSettings() + const res = await fetch(`${apiBase}/verify/text`, { + method: 'POST', + headers: { 'Content-Type': 'application/json' }, + body: JSON.stringify({ text }), + }) + if (!res.ok) { + const body = await res.json().catch(() => ({})) + throw new Error(body.detail ?? `API error ${res.status}`) + } + const result = await res.json() + await setCached(key, result, text) + return result +} + +async function verifyUrl(url) { + const key = 'url_' + await sha256prefix(url) + const hit = await getCached(key) + if (hit) return { ...hit, _fromCache: true } + + const { apiBase } = await getSettings() + const res = await fetch(`${apiBase}/verify/url`, { + method: 'POST', + headers: { 'Content-Type': 'application/json' }, + body: JSON.stringify({ url }), + }) + if (!res.ok) { + const body = await res.json().catch(() => ({})) + throw new Error(body.detail ?? `API error ${res.status}`) + } + const result = await res.json() + await setCached(key, result, url) + return result +} + +// ── Message handler ─────────────────────────────────────────────────────────── + +chrome.runtime.onMessage.addListener((msg, _sender, sendResponse) => { + switch (msg.type) { + + case 'VERIFY_TEXT': + verifyText(msg.text) + .then(r => sendResponse({ ok: true, result: r })) + .catch(e => sendResponse({ ok: false, error: e.message })) + return true // keep message channel open for async response + + case 'VERIFY_URL': + if (!isHttpUrl(msg.url)) { + sendResponse({ ok: false, error: 'Invalid URL: only http/https allowed' }) + return false + } + verifyUrl(msg.url) + .then(r => sendResponse({ ok: true, result: r })) + .catch(e => sendResponse({ ok: false, error: e.message })) + return true + + case 'GET_HISTORY': + chrome.storage.local.get('history') + .then(({ history = [] }) => sendResponse({ history })) + return true + + case 'GET_SETTINGS': + getSettings().then(s => sendResponse(s)) + return true + + case 'SAVE_SETTINGS': { + const incoming = msg.settings ?? {} + // Validate apiBase is a safe URL before persisting + if (incoming.apiBase && !isHttpUrl(incoming.apiBase)) { + sendResponse({ ok: false, error: 'Invalid API URL: only http/https allowed' }) + return false + } + chrome.storage.local + .set({ settings: incoming }) + .then(() => sendResponse({ ok: true })) + return true + } + + default: + break + } +}) diff --git a/extension/content.css b/extension/content.css new file mode 100644 index 0000000000000000000000000000000000000000..256dacc7d0c2cc9d9d8368c5356a5ba938139bc5 --- /dev/null +++ b/extension/content.css @@ -0,0 +1,190 @@ +/** + * PhilVerify — Content Script Styles + * Badge overlay injected into Facebook feed posts. + * All selectors are namespaced under .pv-* to avoid collisions. + */ + +/* ── Badge wrapper ───────────────────────────────────────────────────────── */ +.pv-badge-wrap { + display: block; + margin: 6px 12px 2px; +} + +.pv-badge { + display: inline-flex; + align-items: center; + gap: 6px; + padding: 4px 10px; + border-radius: 3px; + font-family: -apple-system, BlinkMacSystemFont, 'Segoe UI', system-ui, sans-serif; + font-size: 11px; + font-weight: 600; + letter-spacing: 0.04em; + cursor: pointer; + touch-action: manipulation; + -webkit-tap-highlight-color: transparent; +} + +.pv-badge:focus-visible { + outline: 2px solid #06b6d4; + outline-offset: 2px; +} + +/* ── Loading state ───────────────────────────────────────────────────────── */ +.pv-badge--loading { + color: #a89f94; + border: 1px solid rgba(168, 159, 148, 0.2); + background: rgba(168, 159, 148, 0.06); + cursor: default; +} + +.pv-spinner { + display: inline-block; + width: 10px; + height: 10px; + border: 2px solid rgba(168, 159, 148, 0.3); + border-top-color: #a89f94; + border-radius: 50%; + animation: pv-spin 0.7s linear infinite; +} + +@media (prefers-reduced-motion: reduce) { + .pv-spinner { animation: none; } +} + +@keyframes pv-spin { + to { transform: rotate(360deg); } +} + +/* ── Error state ─────────────────────────────────────────────────────────── */ +.pv-badge--error { + color: #78716c; + border: 1px solid rgba(120, 113, 108, 0.2); + background: transparent; + cursor: default; + font-size: 10px; +} + +/* ── Detail panel ────────────────────────────────────────────────────────── */ +.pv-detail { + display: block; + margin: 4px 0 6px; + padding: 10px 12px; + background: #141414; + border: 1px solid rgba(245, 240, 232, 0.1); + border-radius: 4px; + font-family: -apple-system, BlinkMacSystemFont, 'Segoe UI', system-ui, sans-serif; + font-size: 11px; + color: #f5f0e8; + max-width: 400px; + box-shadow: 0 4px 20px rgba(0, 0, 0, 0.5); +} + +.pv-detail-header { + display: flex; + align-items: center; + justify-content: space-between; + margin-bottom: 8px; + padding-bottom: 6px; + border-bottom: 1px solid rgba(245, 240, 232, 0.07); +} + +.pv-logo { + font-weight: 800; + font-size: 12px; + letter-spacing: 0.12em; + color: #f5f0e8; +} + +.pv-close { + background: none; + border: none; + cursor: pointer; + color: #5c554e; + font-size: 12px; + padding: 2px 4px; + border-radius: 2px; + touch-action: manipulation; +} +.pv-close:hover { color: #f5f0e8; } +.pv-close:focus-visible { outline: 2px solid #06b6d4; } + +.pv-row { + display: flex; + justify-content: space-between; + align-items: center; + padding: 4px 0; + border-bottom: 1px solid rgba(245, 240, 232, 0.05); +} + +.pv-label { + font-size: 9px; + font-weight: 700; + letter-spacing: 0.12em; + color: #5c554e; + text-transform: uppercase; +} + +.pv-val { + font-size: 11px; + font-weight: 600; + color: #a89f94; +} + +.pv-signals { + padding: 6px 0 4px; + border-bottom: 1px solid rgba(245, 240, 232, 0.05); +} + +.pv-tags { + display: flex; + flex-wrap: wrap; + gap: 4px; + margin-top: 4px; +} + +.pv-tag { + padding: 2px 6px; + background: rgba(220, 38, 38, 0.12); + color: #f87171; + border: 1px solid rgba(220, 38, 38, 0.25); + border-radius: 2px; + font-size: 9px; + letter-spacing: 0.04em; + font-weight: 600; +} + +.pv-source { + padding: 6px 0 4px; + border-bottom: 1px solid rgba(245, 240, 232, 0.05); +} + +.pv-source-link { + display: block; + margin-top: 4px; + color: #06b6d4; + font-size: 10px; + text-decoration: none; + overflow: hidden; + text-overflow: ellipsis; + white-space: nowrap; +} +.pv-source-link:hover { text-decoration: underline; } + +.pv-open-full { + display: block; + margin-top: 8px; + text-align: center; + color: #dc2626; + font-size: 10px; + font-weight: 700; + letter-spacing: 0.08em; + text-decoration: none; + text-transform: uppercase; + padding: 5px; + border: 1px solid rgba(220, 38, 38, 0.3); + border-radius: 2px; +} +.pv-open-full:hover { + background: rgba(220, 38, 38, 0.08); +} diff --git a/extension/content.js b/extension/content.js new file mode 100644 index 0000000000000000000000000000000000000000..a2f56cfb4b3ef66ffe62de659e97e21a54963e80 --- /dev/null +++ b/extension/content.js @@ -0,0 +1,390 @@ +/** + * PhilVerify — Content Script (Facebook feed scanner) + * + * Watches the Facebook feed via MutationObserver. + * For each new post that appears: + * 1. Extracts the post text or shared URL + * 2. Sends to background.js for verification (with cache) + * 3. Injects a credibility badge overlay onto the post card + * + * Badge click → opens an inline detail panel with verdict, score, and top source. + * + * Uses `data-philverify` attribute to mark already-processed posts. + */ + +;(function philverifyContentScript() { + 'use strict' + + // ── Config ──────────────────────────────────────────────────────────────── + + /** Minimum text length to send for verification (avoids verifying 1-word posts) */ + const MIN_TEXT_LENGTH = 40 + + /** + * Facebook feed post selectors — ordered by reliability. + * Facebook's class names are obfuscated; structural role/data attributes are + * more stable across renames. + */ + const POST_SELECTORS = [ + '[data-pagelet^="FeedUnit"]', + '[data-pagelet^="GroupsFeedUnit"]', + '[role="article"]', + '[data-testid="post_message"]', + ] + + const VERDICT_COLORS = { + 'Credible': '#16a34a', + 'Unverified': '#d97706', + 'Likely Fake': '#dc2626', + } + const VERDICT_LABELS = { + 'Credible': '✓ Credible', + 'Unverified': '? Unverified', + 'Likely Fake': '✗ Likely Fake', + } + + // ── Utilities ───────────────────────────────────────────────────────────── + + /** Escape HTML special chars to prevent XSS in innerHTML templates */ + function safeText(str) { + if (str == null) return '' + return String(str) + .replace(/&/g, '&') + .replace(//g, '>') + .replace(/"/g, '"') + .replace(/'/g, ''') + } + + /** Allow only http/https URLs; return '#' for anything else */ + function safeUrl(url) { + if (!url) return '#' + try { + const u = new URL(url) + return (u.protocol === 'http:' || u.protocol === 'https:') ? u.href : '#' + } catch { return '#' } + } + + function extractPostText(post) { + // Try common post message containers + const msgSelectors = [ + '[data-ad-preview="message"]', + '[data-testid="post_message"]', + '[dir="auto"] > div > div > div > span', + 'div[style*="text-align"] span', + ] + for (const sel of msgSelectors) { + const el = post.querySelector(sel) + if (el?.innerText?.trim().length >= MIN_TEXT_LENGTH) { + return el.innerText.trim().slice(0, 2000) + } + } + // Fallback: gather all text spans ≥ MIN_TEXT_LENGTH chars + const spans = Array.from(post.querySelectorAll('span')) + for (const span of spans) { + const t = span.innerText?.trim() + if (t && t.length >= MIN_TEXT_LENGTH && !t.startsWith('http')) return t.slice(0, 2000) + } + return null + } + + function extractPostUrl(post) { + // Shared article links + const linkSelectors = [ + 'a[href*="l.facebook.com/l.php"]', // Facebook link wrapper + 'a[target="_blank"][href^="https"]', // Direct external links + 'a[aria-label][href*="facebook.com/watch"]', // Videos + ] + for (const sel of linkSelectors) { + const el = post.querySelector(sel) + if (el?.href) { + try { + const u = new URL(el.href) + const dest = u.searchParams.get('u') // Unwrap l.facebook.com redirect + return dest || el.href + } catch { + return el.href + } + } + } + return null + } + + function genPostId(post) { + // Use aria-label prefix + UUID for stable, unique ID + // Avoids offsetTop which forces a synchronous layout read + const label = (post.getAttribute('aria-label') ?? '').replace(/\W/g, '').slice(0, 20) + return 'pv_' + label + crypto.randomUUID().replace(/-/g, '').slice(0, 12) + } + + // ── Badge rendering ─────────────────────────────────────────────────────── + + function createBadge(verdict, score, result) { + const color = VERDICT_COLORS[verdict] ?? '#5c554e' + const label = VERDICT_LABELS[verdict] ?? verdict + + const wrap = document.createElement('div') + wrap.className = 'pv-badge' + wrap.setAttribute('role', 'status') + wrap.setAttribute('aria-label', `PhilVerify: ${label} — ${Math.round(score)}% credibility score`) + wrap.style.cssText = ` + display: inline-flex; + align-items: center; + gap: 6px; + padding: 4px 10px; + border-radius: 3px; + border: 1px solid ${color}4d; + background: ${color}14; + cursor: pointer; + font-family: system-ui, sans-serif; + font-size: 11px; + font-weight: 600; + letter-spacing: 0.04em; + color: ${color}; + touch-action: manipulation; + -webkit-tap-highlight-color: transparent; + position: relative; + z-index: 10; + ` + + const dot = document.createElement('span') + dot.style.cssText = ` + width: 7px; height: 7px; + border-radius: 50%; + background: ${color}; + flex-shrink: 0; + ` + + const text = document.createElement('span') + text.textContent = `${label} ${Math.round(score)}%` + + const cacheTag = result._fromCache + ? (() => { const t = document.createElement('span'); t.textContent = '·cached'; t.style.cssText = `opacity:0.5;font-size:9px;`; return t })() + : null + + wrap.appendChild(dot) + wrap.appendChild(text) + if (cacheTag) wrap.appendChild(cacheTag) + + // Click → toggle detail panel + wrap.addEventListener('click', (e) => { + e.stopPropagation() + toggleDetailPanel(wrap, result) + }) + + return wrap + } + + function toggleDetailPanel(badge, result) { + const existing = badge.parentElement?.querySelector('.pv-detail') + if (existing) { existing.remove(); return } + + const panel = document.createElement('div') + panel.className = 'pv-detail' + panel.setAttribute('role', 'dialog') + panel.setAttribute('aria-label', 'PhilVerify fact-check details') + + const color = VERDICT_COLORS[result.verdict] ?? '#5c554e' + const topSource = result.layer2?.sources?.[0] + + panel.innerHTML = ` +
+ + +
+
+ VERDICT + ${safeText(result.verdict)} +
+
+ SCORE + ${Math.round(result.final_score)}% +
+
+ LANGUAGE + ${safeText(result.language ?? '—')} +
+ ${result.layer1?.triggered_features?.length ? ` +
+ SIGNALS +
+ ${result.layer1.triggered_features.slice(0, 3).map(f => + `${safeText(f)}` + ).join('')} +
+
` : ''} + ${topSource ? ` +
+ TOP SOURCE + + ${safeText(topSource.title?.slice(0, 60) ?? topSource.source_name ?? 'View source')} ↗ + +
` : ''} + + Open full analysis ↗ + + ` + + panel.querySelector('.pv-close').addEventListener('click', (e) => { + e.stopPropagation() + panel.remove() + }) + + badge.insertAdjacentElement('afterend', panel) + } + + function injectBadgeIntoPost(post, result) { + // Find a stable injection point near the post actions bar + const actionBar = post.querySelector('[data-testid="UFI2ReactionsCount/root"]') + ?? post.querySelector('[aria-label*="reaction"]') + ?? post.querySelector('[role="toolbar"]') + ?? post + + const container = document.createElement('div') + container.className = 'pv-badge-wrap' + const badge = createBadge(result.verdict, result.final_score, result) + container.appendChild(badge) + + // Insert before the action bar, or append inside the post + if (actionBar && actionBar !== post) { + actionBar.insertAdjacentElement('beforebegin', container) + } else { + post.appendChild(container) + } + } + + // ── Loading state ───────────────────────────────────────────────────────── + + function injectLoadingBadge(post) { + const container = document.createElement('div') + container.className = 'pv-badge-wrap pv-loading' + container.setAttribute('aria-label', 'PhilVerify: verifying…') + container.innerHTML = ` +
+ + Verifying… +
+ ` + post.appendChild(container) + return container + } + + // ── Post processing ─────────────────────────────────────────────────────── + + async function processPost(post) { + if (post.dataset.philverify) return // already processed + const id = genPostId(post) + post.dataset.philverify = id + + const text = extractPostText(post) + const url = extractPostUrl(post) + + if (!text && !url) return // nothing to verify + + const loader = injectLoadingBadge(post) + + try { + const response = await new Promise((resolve, reject) => { + const msg = url + ? { type: 'VERIFY_URL', url } + : { type: 'VERIFY_TEXT', text } + chrome.runtime.sendMessage(msg, (resp) => { + if (chrome.runtime.lastError) reject(new Error(chrome.runtime.lastError.message)) + else if (!resp?.ok) reject(new Error(resp?.error ?? 'Unknown error')) + else resolve(resp.result) + }) + }) + + loader.remove() + injectBadgeIntoPost(post, response) + } catch (err) { + loader.remove() + // Show a muted error indicator — don't block reading + const errBadge = document.createElement('div') + errBadge.className = 'pv-badge-wrap' + const errInner = document.createElement('div') + errInner.className = 'pv-badge pv-badge--error' + errInner.title = err.message // .title setter is XSS-safe + errInner.textContent = '⚠ PhilVerify offline' + errBadge.appendChild(errInner) + post.appendChild(errBadge) + } + } + + // ── MutationObserver ────────────────────────────────────────────────────── + + const pendingPosts = new Set() + let rafScheduled = false + + function flushPosts() { + rafScheduled = false + for (const post of pendingPosts) processPost(post) + pendingPosts.clear() + } + + function scheduleProcess(post) { + pendingPosts.add(post) + if (!rafScheduled) { + rafScheduled = true + requestAnimationFrame(flushPosts) + } + } + + function findPosts(root) { + for (const sel of POST_SELECTORS) { + const found = root.querySelectorAll(sel) + if (found.length) return found + } + return [] + } + + const observer = new MutationObserver((mutations) => { + for (const mutation of mutations) { + for (const node of mutation.addedNodes) { + if (node.nodeType !== 1) continue // element nodes only + // Check if the node itself matches + for (const sel of POST_SELECTORS) { + if (node.matches?.(sel)) { scheduleProcess(node); break } + } + // Check descendants + const posts = findPosts(node) + for (const post of posts) scheduleProcess(post) + } + } + }) + + // ── Initialization ──────────────────────────────────────────────────────── + + async function init() { + // Check autoScan setting before activating + const response = await new Promise(resolve => { + chrome.runtime.sendMessage({ type: 'GET_SETTINGS' }, resolve) + }).catch(() => ({ autoScan: true })) + + if (!response?.autoScan) return + + // Process any posts already in the DOM + const existing = findPosts(document.body) + for (const post of existing) scheduleProcess(post) + + // Watch for new posts (Facebook is a SPA — feed dynamically loads more) + observer.observe(document.body, { childList: true, subtree: true }) + } + + init() + + // React to autoScan toggle without requiring page reload + chrome.storage.onChanged.addListener((changes, area) => { + if (area !== 'local' || !changes.settings) return + const autoScan = changes.settings.newValue?.autoScan + if (autoScan === false) { + observer.disconnect() + } else if (autoScan === true) { + observer.observe(document.body, { childList: true, subtree: true }) + // Process any posts that appeared while scanning was paused + const existing = findPosts(document.body) + for (const post of existing) scheduleProcess(post) + } + }) + +})() diff --git a/extension/generate_icons.py b/extension/generate_icons.py new file mode 100644 index 0000000000000000000000000000000000000000..0e05e8fd0509c377da34b5e8dbe84fc5e473fd23 --- /dev/null +++ b/extension/generate_icons.py @@ -0,0 +1,61 @@ +""" +Generate PhilVerify extension icons (16×16, 32×32, 48×48, 128×128 PNG). +Requires Pillow: pip install Pillow +Run from the extension/ directory: python generate_icons.py +""" +import os +from PIL import Image, ImageDraw, ImageFont + +SIZES = [16, 32, 48, 128] +OUTPUT_DIR = os.path.join(os.path.dirname(__file__), 'icons') +os.makedirs(OUTPUT_DIR, exist_ok=True) + +BG_COLOR = (13, 13, 13, 255) # --bg-base +RED_COLOR = (220, 38, 38, 255) # --accent-red +TEXT_COLOR = (245, 240, 232, 255) # --text-primary + + +def make_icon(size: int) -> Image.Image: + img = Image.new('RGBA', (size, size), BG_COLOR) + draw = ImageDraw.Draw(img) + + # Red left-edge accent bar (3px scaled) + bar_width = max(2, size // 10) + draw.rectangle([0, 0, bar_width - 1, size - 1], fill=RED_COLOR) + + # 'PV' text label — only draw text on larger icons where it looks clean + font_size = max(6, int(size * 0.38)) + font = None + for path in [ + '/System/Library/Fonts/Helvetica.ttc', + '/System/Library/Fonts/SFNSDisplay.ttf', + '/System/Library/Fonts/ArialHB.ttc', + ]: + try: + font = ImageFont.truetype(path, font_size) + break + except OSError: + continue + if font is None: + font = ImageFont.load_default() + + if size >= 32: + text = 'PV' + try: + bbox = draw.textbbox((0, 0), text, font=font) + tw, th = bbox[2] - bbox[0], bbox[3] - bbox[1] + tx = bar_width + (size - bar_width - tw) // 2 + ty = (size - th) // 2 - bbox[1] + draw.text((tx, ty), text, fill=TEXT_COLOR, font=font) + except Exception: + pass # Skip text on render error — icon still has the red bar + + return img + + +for sz in SIZES: + icon_path = os.path.join(OUTPUT_DIR, f'icon{sz}.png') + make_icon(sz).save(icon_path, 'PNG') + print(f'✓ icons/icon{sz}.png') + +print('Icons generated in extension/icons/') diff --git a/extension/icons/icon128.png b/extension/icons/icon128.png new file mode 100644 index 0000000000000000000000000000000000000000..2b3ac92f32d72c5b015b61ef5a961b21c0d06bea Binary files /dev/null and b/extension/icons/icon128.png differ diff --git a/extension/icons/icon16.png b/extension/icons/icon16.png new file mode 100644 index 0000000000000000000000000000000000000000..4a4de134f9cb36c26470259cfeb2a4f505709298 Binary files /dev/null and b/extension/icons/icon16.png differ diff --git a/extension/icons/icon32.png b/extension/icons/icon32.png new file mode 100644 index 0000000000000000000000000000000000000000..f4d0272f67248b6b04f4f538eeaf231336d58bcc Binary files /dev/null and b/extension/icons/icon32.png differ diff --git a/extension/icons/icon48.png b/extension/icons/icon48.png new file mode 100644 index 0000000000000000000000000000000000000000..e02374910b7b403773b2e50d7e1aeb9df268b261 Binary files /dev/null and b/extension/icons/icon48.png differ diff --git a/extension/manifest.json b/extension/manifest.json new file mode 100644 index 0000000000000000000000000000000000000000..84fdda35064596033ec2f93060041d61ebdfbe13 --- /dev/null +++ b/extension/manifest.json @@ -0,0 +1,55 @@ +{ + "manifest_version": 3, + "name": "PhilVerify", + "version": "1.0.0", + "description": "AI-powered fact-checking for Philippine news and social media. Detects misinformation on Facebook in real time.", + + "permissions": [ + "storage", + "activeTab", + "scripting" + ], + + "host_permissions": [ + "https://www.facebook.com/*", + "https://facebook.com/*", + "http://localhost:8000/*", + "https://api.philverify.com/*" + ], + + "background": { + "service_worker": "background.js", + "type": "module" + }, + + "content_scripts": [ + { + "matches": ["https://www.facebook.com/*", "https://facebook.com/*"], + "js": ["content.js"], + "css": ["content.css"], + "run_at": "document_idle" + } + ], + + "action": { + "default_popup": "popup.html", + "default_title": "PhilVerify — Fact Check", + "default_icon": { + "16": "icons/icon16.png", + "32": "icons/icon32.png", + "48": "icons/icon48.png", + "128": "icons/icon128.png" + } + }, + + "icons": { + "16": "icons/icon16.png", + "32": "icons/icon32.png", + "48": "icons/icon48.png", + "128": "icons/icon128.png" + }, + + "content_security_policy": { + "extension_pages": "script-src 'self'; object-src 'self'" + } +} diff --git a/extension/popup.html b/extension/popup.html new file mode 100644 index 0000000000000000000000000000000000000000..ab5e31deefbf180aa0dad12ed3fc5c19bec15772 --- /dev/null +++ b/extension/popup.html @@ -0,0 +1,446 @@ + + + + + + PhilVerify + + + + + + + + +
+ + + +
+ + +
+

Loading current URL…

+ + +
+
+ + +
+
+
No verifications yet — use the Verify tab or browse Facebook.
+
+
+ + +
+
+ + + + Default: http://localhost:8000 — change for production deployment. + +
+
+ + +
+ +
+
+ + + + diff --git a/extension/popup.js b/extension/popup.js new file mode 100644 index 0000000000000000000000000000000000000000..5fc307d4757d455b356ad2bee68e8607a2b594c0 --- /dev/null +++ b/extension/popup.js @@ -0,0 +1,238 @@ +/** + * PhilVerify — Popup Script + * Controls the extension popup: verify tab, history tab, settings tab. + */ +'use strict' + +// ── Constants ───────────────────────────────────────────────────────────────── + +const VERDICT_COLORS = { + 'Credible': '#16a34a', + 'Unverified': '#d97706', + 'Likely Fake': '#dc2626', +} + +// ── Helpers ─────────────────────────────────────────────────────────────────── +/** Escape HTML special chars to prevent XSS in innerHTML templates */ +function safeText(str) { + if (str == null) return '' + return String(str) + .replace(/&/g, '&') + .replace(//g, '>') + .replace(/"/g, '"') + .replace(/'/g, ''') +} + +/** Allow only http/https URLs; return '#' for anything else */ +function safeUrl(url) { + if (!url) return '#' + try { + const u = new URL(url) + return (u.protocol === 'http:' || u.protocol === 'https:') ? u.href : '#' + } catch { return '#' } +} +function msg(obj) { + return new Promise(resolve => { + chrome.runtime.sendMessage(obj, resolve) + }) +} + +function timeAgo(iso) { + const diff = Date.now() - new Date(iso).getTime() + if (diff < 60_000) return 'just now' + if (diff < 3_600_000) return `${Math.floor(diff / 60_000)}m ago` + if (diff < 86_400_000) return `${Math.floor(diff / 3_600_000)}h ago` + return `${Math.floor(diff / 86_400_000)}d ago` +} + +function isUrl(s) { + try { new URL(s); return s.startsWith('http'); } catch { return false } +} + +// ── Render helpers ──────────────────────────────────────────────────────────── + +function renderResult(result, container) { + const color = VERDICT_COLORS[result.verdict] ?? '#5c554e' + const topSource = result.layer2?.sources?.[0] + + container.innerHTML = ` +
+
${safeText(result.verdict)}
+
${Math.round(result.final_score)}% credibility${result._fromCache ? ' (cached)' : ''}
+
+ Language + ${safeText(result.language ?? '—')} +
+
+ Confidence + ${result.confidence?.toFixed(1)}% +
+ ${result.layer1?.triggered_features?.length ? ` +
+ Signals + ${result.layer1.triggered_features.slice(0, 3).map(safeText).join(', ')} +
` : ''} + ${topSource ? ` + ` : ''} + + Open Full Dashboard ↗ + +
+ ` +} + +function renderHistory(entries, container) { + if (!entries.length) { + container.innerHTML = '
No verifications yet.
' + return + } + container.innerHTML = ` + + ` +} + +// ── Tab switching ───────────────────────────────────────────────────────────── + +document.querySelectorAll('.tab').forEach(tab => { + tab.addEventListener('click', () => { + document.querySelectorAll('.tab').forEach(t => { + t.classList.remove('active') + t.setAttribute('aria-selected', 'false') + }) + document.querySelectorAll('.panel').forEach(p => p.classList.remove('active')) + tab.classList.add('active') + tab.setAttribute('aria-selected', 'true') + document.getElementById(`panel-${tab.dataset.tab}`)?.classList.add('active') + if (tab.dataset.tab === 'history') loadHistory() + if (tab.dataset.tab === 'settings') loadSettings() + }) +}) + +// ── Verify tab ──────────────────────────────────────────────────────────────── + +const verifyInput = document.getElementById('verify-input') +const btnVerify = document.getElementById('btn-verify') +const verifyResult = document.getElementById('verify-result') +const currentUrlEl = document.getElementById('current-url') + +// Auto-populate input with current tab URL if it's a news article +chrome.tabs.query({ active: true, currentWindow: true }, ([tab]) => { + const url = tab?.url ?? '' + if (url && !url.startsWith('chrome') && !url.includes('facebook.com')) { + currentUrlEl.textContent = url + currentUrlEl.title = url + verifyInput.value = url + } else { + currentUrlEl.textContent = 'facebook.com — use text input below' + } +}) + +btnVerify.addEventListener('click', async () => { + const raw = verifyInput.value.trim() + if (!raw) return + + btnVerify.disabled = true + btnVerify.setAttribute('aria-busy', 'true') + btnVerify.textContent = 'Verifying…' + verifyResult.innerHTML = ` +
+
Analyzing claim… +
` + + const type = isUrl(raw) ? 'VERIFY_URL' : 'VERIFY_TEXT' + const payload = type === 'VERIFY_URL' ? { type, url: raw } : { type, text: raw } + const resp = await msg(payload) + + btnVerify.disabled = false + btnVerify.setAttribute('aria-busy', 'false') + btnVerify.textContent = 'Verify Claim' + + if (resp?.ok) { + renderResult(resp.result, verifyResult) + } else { + verifyResult.innerHTML = ` + ` + } +}) + +// Allow Enter (single line) to trigger verify when text area is focused on Ctrl+Enter +verifyInput.addEventListener('keydown', e => { + if (e.key === 'Enter' && (e.ctrlKey || e.metaKey)) { + e.preventDefault() + btnVerify.click() + } +}) + +// ── History tab ─────────────────────────────────────────────────────────────── + +async function loadHistory() { + const container = document.getElementById('history-container') + container.innerHTML = '

Loading…
' + const resp = await msg({ type: 'GET_HISTORY' }) + renderHistory(resp?.history ?? [], container) +} + +// ── Settings tab ────────────────────────────────────────────────────────────── + +async function loadSettings() { + const resp = await msg({ type: 'GET_SETTINGS' }) + if (!resp) return + document.getElementById('api-base').value = resp.apiBase ?? 'http://localhost:8000' + document.getElementById('auto-scan').checked = resp.autoScan ?? true +} + +document.getElementById('btn-save').addEventListener('click', async () => { + const settings = { + apiBase: document.getElementById('api-base').value.trim() || 'http://localhost:8000', + autoScan: document.getElementById('auto-scan').checked, + } + await msg({ type: 'SAVE_SETTINGS', settings }) + + const flash = document.getElementById('saved-flash') + flash.textContent = 'Saved ✓' + setTimeout(() => { flash.textContent = '' }, 2000) +}) + +// ── API status check ────────────────────────────────────────────────────────── + +async function checkApiStatus() { + const dot = document.getElementById('api-status-dot') + const label = document.getElementById('api-status-label') + try { + const { apiBase } = await msg({ type: 'GET_SETTINGS' }) + const res = await fetch(`${apiBase ?? 'http://localhost:8000'}/health`, { signal: AbortSignal.timeout(3000) }) + if (res.ok) { + dot.style.background = 'var(--credible)' + label.style.color = 'var(--credible)' + label.textContent = 'ONLINE' + } else { + throw new Error(`${res.status}`) + } + } catch { + dot.style.background = 'var(--fake)' + label.style.color = 'var(--fake)' + label.textContent = 'OFFLINE' + } +} + +checkApiStatus() diff --git a/firebase.json b/firebase.json index ce186b3592e5d861c12238bbd803e14876a635ac..38eaceec3020f6e4d8799320b2dd9d7a9c04b26c 100644 --- a/firebase.json +++ b/firebase.json @@ -4,7 +4,6 @@ "indexes": "firestore.indexes.json" }, "hosting": { - "site": "philverify", "public": "frontend/dist", "ignore": [ "firebase.json", diff --git a/firebase_client.py b/firebase_client.py index 9c76cbcdf627865ecba950e8bdf49b1ac9d6aa64..bf5531926a02019da451cf1625eb5d84031e46da 100644 --- a/firebase_client.py +++ b/firebase_client.py @@ -42,8 +42,8 @@ def get_firestore(): cred = credentials.Certificate(str(_SERVICEACCOUNT_PATH)) firebase_admin.initialize_app(cred) logger.info("Firebase initialized via service account key") - elif os.getenv("GOOGLE_APPLICATION_CREDENTIALS"): - # Cloud Run / GCE default credentials + elif os.getenv("GOOGLE_APPLICATION_CREDENTIALS") or os.getenv("K_SERVICE"): + # Cloud Run (K_SERVICE is always set) or explicit ADC path cred = credentials.ApplicationDefault() firebase_admin.initialize_app(cred) logger.info("Firebase initialized via Application Default Credentials") @@ -92,12 +92,13 @@ async def get_verifications( if db is None: return [] try: + from google.cloud.firestore_v1.base_query import FieldFilter query = ( db.collection("verifications") .order_by("timestamp", direction="DESCENDING") ) if verdict_filter: - query = query.where("verdict", "==", verdict_filter) + query = query.where(filter=FieldFilter("verdict", "==", verdict_filter)) docs = query.limit(limit + offset).stream() results = [doc.to_dict() for doc in docs] return results[offset : offset + limit] @@ -106,15 +107,34 @@ async def get_verifications( return [] +def get_all_verifications_sync() -> list[dict]: + """Synchronously fetch ALL verification records from Firestore (used by trends aggregation).""" + db = get_firestore() + if db is None: + return [] + try: + docs = ( + db.collection("verifications") + .order_by("timestamp", direction="DESCENDING") + .limit(10_000) # hard cap — more than enough for trends analysis + .stream() + ) + return [doc.to_dict() for doc in docs] + except Exception as e: + logger.error("Firestore get_all_verifications_sync error: %s", e) + return [] + + async def get_verification_count(verdict_filter: str | None = None) -> int: """Return total count of verifications (with optional verdict filter).""" db = get_firestore() if db is None: return 0 try: + from google.cloud.firestore_v1.base_query import FieldFilter query = db.collection("verifications") if verdict_filter: - query = query.where("verdict", "==", verdict_filter) + query = query.where(filter=FieldFilter("verdict", "==", verdict_filter)) # Use aggregation query (Firestore native count) result = query.count().get() return result[0][0].value diff --git a/firestore.indexes.json b/firestore.indexes.json index 0e6de7cb808cd5b72ac8580bf2ba440e48a50d74..2953612b926dd49228d250465668ec60dc488917 100644 --- a/firestore.indexes.json +++ b/firestore.indexes.json @@ -1,51 +1,13 @@ { - // Example (Standard Edition): - // - // "indexes": [ - // { - // "collectionGroup": "widgets", - // "queryScope": "COLLECTION", - // "fields": [ - // { "fieldPath": "foo", "arrayConfig": "CONTAINS" }, - // { "fieldPath": "bar", "mode": "DESCENDING" } - // ] - // }, - // - // "fieldOverrides": [ - // { - // "collectionGroup": "widgets", - // "fieldPath": "baz", - // "indexes": [ - // { "order": "ASCENDING", "queryScope": "COLLECTION" } - // ] - // }, - // ] - // ] - // - // Example (Enterprise Edition): - // - // "indexes": [ - // { - // "collectionGroup": "reviews", - // "queryScope": "COLLECTION_GROUP", - // "apiScope": "MONGODB_COMPATIBLE_API", - // "density": "DENSE", - // "multikey": false, - // "fields": [ - // { "fieldPath": "baz", "mode": "ASCENDING" } - // ] - // }, - // { - // "collectionGroup": "items", - // "queryScope": "COLLECTION_GROUP", - // "apiScope": "MONGODB_COMPATIBLE_API", - // "density": "SPARSE_ANY", - // "multikey": true, - // "fields": [ - // { "fieldPath": "baz", "mode": "ASCENDING" } - // ] - // }, - // ] - "indexes": [], + "indexes": [ + { + "collectionGroup": "verifications", + "queryScope": "COLLECTION", + "fields": [ + { "fieldPath": "verdict", "order": "ASCENDING" }, + { "fieldPath": "timestamp", "order": "DESCENDING" } + ] + } + ], "fieldOverrides": [] -} \ No newline at end of file +} diff --git a/frontend/index.html b/frontend/index.html index c20fbd3a70cc5c113cd0498fbd28c49abfe0bf7d..22ab6eed327e47d1545e26a807b1c8e293c4cbf8 100644 --- a/frontend/index.html +++ b/frontend/index.html @@ -2,9 +2,13 @@ - + - frontend + + + + + PhilVerify — Philippine Fake News Detector
diff --git a/frontend/package-lock.json b/frontend/package-lock.json index 5d864e82a8269361d44f7e5a19c67ee6a2373e81..a69d3318ef31f49a9025e7722d39e80f9a8eb3ca 100644 --- a/frontend/package-lock.json +++ b/frontend/package-lock.json @@ -27,6 +27,7 @@ "eslint-plugin-react-hooks": "^7.0.1", "eslint-plugin-react-refresh": "^0.4.24", "globals": "^16.5.0", + "typescript": "^5.9.3", "vite": "^7.3.1" } }, @@ -4595,6 +4596,20 @@ "node": ">= 0.8.0" } }, + "node_modules/typescript": { + "version": "5.9.3", + "resolved": "https://registry.npmjs.org/typescript/-/typescript-5.9.3.tgz", + "integrity": "sha512-jl1vZzPDinLr9eUt3J/t7V6FgNEw9QjvBPdysz9KfQDD41fQrC2Y4vKQdiaUpFT4bXlb1RHhLpp8wtm6M5TgSw==", + "dev": true, + "license": "Apache-2.0", + "bin": { + "tsc": "bin/tsc", + "tsserver": "bin/tsserver" + }, + "engines": { + "node": ">=14.17" + } + }, "node_modules/undici-types": { "version": "7.18.2", "resolved": "https://registry.npmjs.org/undici-types/-/undici-types-7.18.2.tgz", diff --git a/frontend/package.json b/frontend/package.json index 4cc753e736ae58b075cd3ef450db433e13612745..63abc110ffe054eae1c772ffd3df4cffe6c0e90e 100644 --- a/frontend/package.json +++ b/frontend/package.json @@ -7,7 +7,8 @@ "dev": "vite", "build": "vite build", "lint": "eslint .", - "preview": "vite preview" + "preview": "vite preview", + "typecheck": "tsc --noEmit" }, "dependencies": { "@tailwindcss/vite": "^4.2.1", @@ -29,6 +30,7 @@ "eslint-plugin-react-hooks": "^7.0.1", "eslint-plugin-react-refresh": "^0.4.24", "globals": "^16.5.0", + "typescript": "^5.9.3", "vite": "^7.3.1" } } diff --git a/frontend/public/logo.svg b/frontend/public/logo.svg new file mode 100644 index 0000000000000000000000000000000000000000..99e7fb2e9f564d34a4d5671d6fe28dd926483a8c --- /dev/null +++ b/frontend/public/logo.svg @@ -0,0 +1,13 @@ + + + + + + + + + + + + + diff --git a/frontend/src/App.jsx b/frontend/src/App.jsx index 2e57eb30ce9b5cfeb5c868b94b0384443643ed0f..9a1ff18dd3fadb15e10f13561532172150c57010 100644 --- a/frontend/src/App.jsx +++ b/frontend/src/App.jsx @@ -4,18 +4,49 @@ import VerifyPage from './pages/VerifyPage.jsx' import HistoryPage from './pages/HistoryPage.jsx' import TrendsPage from './pages/TrendsPage.jsx' +/** Shared horizontal constraint — all pages + navbar use this */ +export const PAGE_MAX_W = 960 +export const PAGE_STYLE = { + maxWidth: PAGE_MAX_W, + width: '100%', + margin: '0 auto', + padding: '0 24px', +} + export default function App() { return ( + {/* web-design-guidelines: skip link for keyboard/screen-reader users */} + + Skip to content +
-
+
} /> } /> } /> -
+
) diff --git a/frontend/src/api.js b/frontend/src/api.js index 9324cf8cb727bf9bb2ce81ca698b1bb50eb02abf..23ab6946b399ae7ee7fbe8487fe7608f23c619c9 100644 --- a/frontend/src/api.js +++ b/frontend/src/api.js @@ -1,6 +1,16 @@ /** PhilVerify API client — proxied through Vite to http://localhost:8000 */ const BASE = '/api' +function _detailToString(detail, status) { + if (!detail) return `HTTP ${status}` + if (typeof detail === 'string') return detail + if (Array.isArray(detail)) { + // FastAPI validation errors: [{loc, msg, type}, ...] + return detail.map(d => d.msg || JSON.stringify(d)).join('; ') + } + return JSON.stringify(detail) +} + async function post(path, body) { const res = await fetch(`${BASE}${path}`, { method: 'POST', @@ -9,7 +19,9 @@ async function post(path, body) { }) if (!res.ok) { const err = await res.json().catch(() => ({})) - throw new Error(err.detail || `HTTP ${res.status}`) + const e = new Error(_detailToString(err.detail, res.status)) + e.isBackendError = true // backend responded — not a connection failure + throw e } return res.json() } @@ -18,7 +30,7 @@ async function postForm(path, formData) { const res = await fetch(`${BASE}${path}`, { method: 'POST', body: formData }) if (!res.ok) { const err = await res.json().catch(() => ({})) - throw new Error(err.detail || `HTTP ${res.status}`) + throw new Error(_detailToString(err.detail, res.status)) } return res.json() } @@ -26,8 +38,13 @@ async function postForm(path, formData) { async function get(path, params = {}) { const qs = new URLSearchParams(params).toString() const res = await fetch(`${BASE}${path}${qs ? '?' + qs : ''}`) - if (!res.ok) throw new Error(`HTTP ${res.status}`) - return res.json() + if (!res.ok) { + const err = await res.json().catch(() => ({})) + throw new Error(_detailToString(err.detail, res.status)) + } + return res.json().catch(() => { + throw new Error('API returned an unexpected response — the server may be starting up. Please try again.') + }) } export const api = { @@ -36,6 +53,8 @@ export const api = { verifyImage: (file) => { const f = new FormData(); f.append('file', file); return postForm('/verify/image', f) }, verifyVideo: (file) => { const f = new FormData(); f.append('file', file); return postForm('/verify/video', f) }, history: (params) => get('/history', params), + historyDetail: (id) => get(`/history/${id}`), trends: () => get('/trends'), health: () => get('/health'), + preview: (url) => get('/preview', { url }), } diff --git a/frontend/src/api.ts b/frontend/src/api.ts new file mode 100644 index 0000000000000000000000000000000000000000..56d2d00b731a945710bf60cefcea8e884850c8b0 --- /dev/null +++ b/frontend/src/api.ts @@ -0,0 +1,84 @@ +/** + * PhilVerify API client — proxied through Vite to http://localhost:8000 + * Typed via src/types.ts which mirrors api/schemas.py + */ +import type { + VerificationResponse, + HistoryParams, + HistoryResponse, + TrendsResponse, + HealthResponse, + ApiError as ApiErrorType, +} from './types' +import { ApiError } from './types' + +const BASE = '/api' + +// ── Internal fetch helpers ───────────────────────────────────────────────────── + +async function post(path: string, body: unknown): Promise { + const res = await fetch(`${BASE}${path}`, { + method: 'POST', + headers: { 'Content-Type': 'application/json' }, + body: JSON.stringify(body), + }) + if (!res.ok) { + const err = await res.json().catch(() => ({})) as { detail?: string } + throw new ApiError(err.detail ?? `HTTP ${res.status}`, true) + } + return res.json() as Promise +} + +async function postForm(path: string, formData: FormData): Promise { + const res = await fetch(`${BASE}${path}`, { method: 'POST', body: formData }) + if (!res.ok) { + const err = await res.json().catch(() => ({})) as { detail?: string } + throw new ApiError(err.detail ?? `HTTP ${res.status}`, true) + } + return res.json() as Promise +} + +async function get(path: string, params: Record = {}): Promise { + const defined = Object.fromEntries( + Object.entries(params).filter(([, v]) => v !== undefined), + ) as Record + const qs = new URLSearchParams(defined).toString() + const res = await fetch(`${BASE}${path}${qs ? '?' + qs : ''}`) + if (!res.ok) throw new ApiError(`HTTP ${res.status}`) + return res.json() as Promise +} + +// ── Public API surface ───────────────────────────────────────────────────────── + +export const api = { + verifyText: (text: string): Promise => + post('/verify/text', { text }), + + verifyUrl: (url: string): Promise => + post('/verify/url', { url }), + + verifyImage: (file: File): Promise => { + const f = new FormData() + f.append('file', file) + return postForm('/verify/image', f) + }, + + verifyVideo: (file: File): Promise => { + const f = new FormData() + f.append('file', file) + return postForm('/verify/video', f) + }, + + history: (params?: HistoryParams): Promise => + get('/history', params as Record), + + trends: (): Promise => + get('/trends'), + + health: (): Promise => + get('/health'), +} as const + +// Re-export error class for consumers +export { ApiError } from './types' +export type { ApiErrorType } diff --git a/frontend/src/components/Navbar.jsx b/frontend/src/components/Navbar.jsx index 6b7bf41c773febfdc54f1993798b93afaa749f19..7cef1092374b8892ab3fa13ee2a08061c400ea46 100644 --- a/frontend/src/components/Navbar.jsx +++ b/frontend/src/components/Navbar.jsx @@ -1,5 +1,6 @@ -import { NavLink } from 'react-router-dom' +import { NavLink, Link } from 'react-router-dom' import { Radar, Clock, TrendingUp, ShieldCheck } from 'lucide-react' +import { PAGE_STYLE } from '../App.jsx' const NAV_LINKS = [ { to: '/', icon: ShieldCheck, label: 'Verify' }, @@ -9,55 +10,69 @@ const NAV_LINKS = [ export default function Navbar() { return ( - /* semantic
— web-design-guidelines: semantic HTML */
- {/* Logo */} -
-
+ {/* Inner content aligned to same width as page content */} +
+ {/* Logo — Link to home */} + +
) diff --git a/frontend/src/components/SkeletonCard.jsx b/frontend/src/components/SkeletonCard.jsx new file mode 100644 index 0000000000000000000000000000000000000000..8b9b4e0f29e4e82ab179f82b63ef45c1a8cd9a60 --- /dev/null +++ b/frontend/src/components/SkeletonCard.jsx @@ -0,0 +1,46 @@ +/** + * SkeletonCard — Phase 8: Loading state skeleton screens + * Used while the verification API call is in-flight. + * web-design-guidelines: content-jumping — reserve space for async content. + * web-design-guidelines: prefers-reduced-motion — skip animation if user prefers. + */ +export default function SkeletonCard({ lines = 3, height = null, className = '' }) { + return ( + + ) +} + +function SkeletonBar({ style = {} }) { + return ( +
+
+
+ ) +} diff --git a/frontend/src/components/WordHighlighter.jsx b/frontend/src/components/WordHighlighter.jsx new file mode 100644 index 0000000000000000000000000000000000000000..b4f03272fd12a0200f4fe597fd079495fdddd312 --- /dev/null +++ b/frontend/src/components/WordHighlighter.jsx @@ -0,0 +1,115 @@ +/** + * WordHighlighter — Phase 8: Suspicious Word Highlighter + * Highlights suspicious / clickbait trigger words in the claim text. + * Uses triggered_features from Layer 1 as hint words. + * + * architect-review: pure presentational, no side-effects. + * web-design-guidelines: uses with visible styles, screen-reader friendly. + */ + +// Common suspicious/misinformation signal words to highlight +const SUSPICIOUS_PATTERNS = [ + // English signals + /\b(shocking|exposed|revealed|secret|hoax|fake|false|confirmed|breaking|urgent|emergency|exclusive|banned|cover[\s-]?up|conspiracy|miracle|crisis|scandal|leaked|hidden|truth|they don't want you to know)\b/gi, + // Filipino signals + /\b(grabe|nakakagulat|totoo|peke|huwag maniwala|nagsisinungaling|lihim|inilabas|natuklasan|katotohanan|panlilinlang|kahirap-hirap|itinatago)\b/gi, +] + +function getHighlightedSegments(text, triggerWords = []) { + if (!text) return [] + + // Build a combined pattern from both static patterns + dynamic trigger words + const allPatterns = [...SUSPICIOUS_PATTERNS] + + if (triggerWords.length > 0) { + const escaped = triggerWords.map(w => w.replace(/[.*+?^${}()|[\]\\]/g, '\\$&')) + allPatterns.push(new RegExp(`\\b(${escaped.join('|')})\\b`, 'gi')) + } + + // Find all match intervals + const matches = [] + for (const pattern of allPatterns) { + pattern.lastIndex = 0 + let m + while ((m = pattern.exec(text)) !== null) { + matches.push({ start: m.index, end: m.index + m[0].length, word: m[0] }) + } + } + + if (matches.length === 0) return [{ text, highlighted: false }] + + // Sort + merge overlapping intervals + matches.sort((a, b) => a.start - b.start) + const merged = [] + for (const m of matches) { + const last = merged[merged.length - 1] + if (last && m.start <= last.end) { + last.end = Math.max(last.end, m.end) + } else { + merged.push({ ...m }) + } + } + + // Build segments + const segments = [] + let cursor = 0 + for (const { start, end, word } of merged) { + if (cursor < start) segments.push({ text: text.slice(cursor, start), highlighted: false }) + segments.push({ text: text.slice(start, end), highlighted: true, word }) + cursor = end + } + if (cursor < text.length) segments.push({ text: text.slice(cursor), highlighted: false }) + + return segments +} + +export default function WordHighlighter({ text = '', triggerWords = [], className = '' }) { + const segments = getHighlightedSegments(text, triggerWords) + const hitCount = segments.filter(s => s.highlighted).length + + if (segments.length === 1 && !segments[0].highlighted) { + // No suspicious words found + return ( +

+ {text} +

+ ) + } + + return ( +
+ {hitCount > 0 && ( +

+ ⚠ {hitCount} suspicious signal{hitCount !== 1 ? 's' : ''} detected +

+ )} +

+ {segments.map((seg, i) => + seg.highlighted ? ( + + {seg.text} + + ) : ( + {seg.text} + ) + )} +

+
+ ) +} diff --git a/frontend/src/firebase.js b/frontend/src/firebase.js index 7f5f3c98ca7b08e656097b8b6d8398e45dc87028..095c086d59a72a3b1656f9d5ccf29b8b98d2dfc0 100644 --- a/frontend/src/firebase.js +++ b/frontend/src/firebase.js @@ -13,14 +13,28 @@ const firebaseConfig = { const app = initializeApp(firebaseConfig) export const db = getFirestore(app) -/** Subscribe to the 20 most recent verifications in real-time. */ -export function subscribeToHistory(callback) { +/** + * Subscribe to the 20 most recent verifications in real-time. + * @param {Function} callback - called with array of docs on each update + * @param {Function} [onError] - called with Error when Firestore is unreachable (e.g. ad blocker) + * @returns unsubscribe function + */ +export function subscribeToHistory(callback, onError) { const q = query( collection(db, 'verifications'), orderBy('timestamp', 'desc'), limit(20) ) - return onSnapshot(q, (snap) => { - callback(snap.docs.map(d => ({ id: d.id, ...d.data() }))) - }) + return onSnapshot( + q, + (snap) => { + callback(snap.docs.map(d => ({ id: d.id, ...d.data() }))) + }, + (error) => { + // Firestore blocked (ERR_BLOCKED_BY_CLIENT from ad blockers) or + // permission denied — fail fast and let caller fall back to REST. + console.warn('[PhilVerify] Firestore unavailable:', error.code || error.message) + if (onError) onError(error) + } + ) } diff --git a/frontend/src/index.css b/frontend/src/index.css index 09cdd1bbb24b69db9721da6d7c078c7ef49f8592..2db37f15950d1e18e3e6d036ab4544eac63516ed 100644 --- a/frontend/src/index.css +++ b/frontend/src/index.css @@ -97,6 +97,13 @@ h4 { border-radius: 4px; } +/* Textarea: use box-shadow ring for :focus-visible so keyboard users see focus; + border-color animation still handled by onFocus/onBlur JS handlers */ +.claim-textarea:focus-visible { + outline: none; + box-shadow: 0 0 0 2px var(--accent-red); +} + /* ── Touch (web-design-guidelines) ───────────────────── */ button, a, @@ -161,7 +168,17 @@ a, /* ── Left-rule accent divider ────────────────────────── */ .ruled { border-left: 3px solid var(--accent-red); - padding-left: 12px; + padding-left: 16px; +} + +/* ── Nav link hover ──────────────────────────────────────── */ +a.nav-link-item { + text-decoration: none; + display: block; +} + +a.nav-link-item:hover > div { + color: var(--text-primary) !important; } /* ── Animations (frontend-design: one orchestrated reveal) ─── */ @@ -222,6 +239,16 @@ a, .bar-fill { animation: barGrow 0.9s cubic-bezier(0.16, 1, 0.3, 1) forwards; } + + @keyframes shimmer { + 0% { + transform: translateX(-100%); + } + + 100% { + transform: translateX(100%); + } + } } /* Fallback: no animation for reduced-motion users */ diff --git a/frontend/src/pages/HistoryPage.jsx b/frontend/src/pages/HistoryPage.jsx index e10b8178b1aeae94ebc95ed6500dda5afb6576d2..85665ead645313afd058cc08b364d5495c503c65 100644 --- a/frontend/src/pages/HistoryPage.jsx +++ b/frontend/src/pages/HistoryPage.jsx @@ -1,53 +1,480 @@ -import { useEffect, useState } from 'react' +import { useEffect, useState, useCallback, useMemo } from 'react' import { subscribeToHistory } from '../firebase.js' -import { timeAgo } from '../utils/format.js' +import { timeAgo, VERDICT_MAP, scoreColor } from '../utils/format.js' +import { PAGE_STYLE } from '../App.jsx' +import { api } from '../api' import VerdictBadge from '../components/VerdictBadge.jsx' -import { Clock, RefreshCw } from 'lucide-react' +import SkeletonCard from '../components/SkeletonCard.jsx' +import { Clock, RefreshCw, WifiOff, ChevronUp, ChevronDown, ChevronsUpDown, X, Loader2, FileText, Globe, ImageIcon, Video } from 'lucide-react' + + +/* ── Sort icon helper ─────────────────────────────────── */ +function SortIcon({ field, current, dir }) { + if (current !== field) return