Spaces:

devrup404
/

SignalMod

Running

App Files Files Community

Ruperth commited on 3 days ago

Commit

7ba2f95

1 Parent(s): 0ac8b84

feat: persist every prediction in supabase and expose history endpoint

Browse files

Files changed (4) hide show

src/api/routes/predict.py +44 -5
src/db/__init__.py +0 -0
src/db/supabase_client.py +117 -0
supabase/predictions_setup.sql +49 -0

src/api/routes/predict.py CHANGED Viewed

@@ -1,6 +1,6 @@
 import time
-from fastapi import APIRouter, HTTPException
 from src.api.schemas import (
     BatchPredictRequest,
@@ -12,13 +12,23 @@ from src.api.schemas import (
 )
 from src.api.services import predict_single, to_predict_response
 from src.api.state import get_state
-from src.api.youtube import CommentsFetchError, fetch_comments
 router = APIRouter(tags=["Prediction"])
 @router.post("/predict", response_model=PredictResponse)
 async def predict(request: PredictRequest):
-    return predict_single(request.text, request.threshold)
 @router.post("/predict-batch", response_model=BatchPredictResponse)
@@ -28,7 +38,15 @@ async def predict_batch(request: BatchPredictRequest):
     for text in request.texts:
         if not text.strip():
             continue
-        results.append(predict_single(text.strip(), request.threshold))
     total_ms = round((time.perf_counter() - t0) * 1000, 2)
     toxic_count = sum(1 for r in results if r.is_toxic)
     return BatchPredictResponse(
@@ -51,6 +69,8 @@ async def predict_video(request: VideoRequest):
     if not comments:
         raise HTTPException(status_code=404, detail="No comments found for this video")
     t0 = time.perf_counter()
     results: list[PredictResponse] = []
     service = get_state()["service"]
@@ -61,7 +81,17 @@ async def predict_video(request: VideoRequest):
         if not text.strip():
             continue
         raw = service.predict(text)
-        results.append(to_predict_response(text, raw, 0.0, request.threshold))
     total_ms = round((time.perf_counter() - t0) * 1000, 2)
     toxic_count = sum(1 for r in results if r.is_toxic)
@@ -75,3 +105,12 @@ async def predict_video(request: VideoRequest):
         results=results,
         source=source,
     )

 import time
+from fastapi import APIRouter, HTTPException, Query
 from src.api.schemas import (
     BatchPredictRequest,
 )
 from src.api.services import predict_single, to_predict_response
 from src.api.state import get_state
+from src.api.youtube import CommentsFetchError, extract_video_id, fetch_comments
+from src.db.supabase_client import list_predictions, save_prediction
 router = APIRouter(tags=["Prediction"])
 @router.post("/predict", response_model=PredictResponse)
 async def predict(request: PredictRequest):
+    response = predict_single(request.text, request.threshold)
+    save_prediction(
+        text=request.text,
+        result=response,
+        source="api_direct",
+        threshold=request.threshold,
+        latency_ms=response.latency_ms,
+    )
+    return response
 @router.post("/predict-batch", response_model=BatchPredictResponse)
     for text in request.texts:
         if not text.strip():
             continue
+        single = predict_single(text.strip(), request.threshold)
+        results.append(single)
+        save_prediction(
+            text=text.strip(),
+            result=single,
+            source="api_direct",
+            threshold=request.threshold,
+            latency_ms=single.latency_ms,
+        )
     total_ms = round((time.perf_counter() - t0) * 1000, 2)
     toxic_count = sum(1 for r in results if r.is_toxic)
     return BatchPredictResponse(
     if not comments:
         raise HTTPException(status_code=404, detail="No comments found for this video")
+    video_id = extract_video_id(request.url)
     t0 = time.perf_counter()
     results: list[PredictResponse] = []
     service = get_state()["service"]
         if not text.strip():
             continue
         raw = service.predict(text)
+        response = to_predict_response(text, raw, 0.0, request.threshold)
+        results.append(response)
+        save_prediction(
+            text=text,
+            result=response,
+            source="video_fetch",
+            video_id=video_id,
+            video_url=request.url,
+            threshold=request.threshold,
+            latency_ms=response.latency_ms,
+        )
     total_ms = round((time.perf_counter() - t0) * 1000, 2)
     toxic_count = sum(1 for r in results if r.is_toxic)
         results=results,
         source=source,
     )
+@router.get("/predictions")
+async def get_predictions(
+    video_id: str | None = Query(default=None),
+    limit: int = Query(default=50, ge=1, le=200),
+):
+    rows = list_predictions(video_id=video_id, limit=limit)
+    return rows

src/db/__init__.py ADDED Viewed

File without changes

src/db/supabase_client.py ADDED Viewed

	@@ -0,0 +1,117 @@

+"""Optional Supabase persistence for predictions.
+The API works fine without credentials — all functions degrade
+gracefully when ``SUPABASE_URL`` / ``SUPABASE_KEY`` are missing.
+"""
+from __future__ import annotations
+import os
+from functools import lru_cache
+from typing import Any
+from src.utils.logger import get_logger
+logger = get_logger(__name__)
+try:
+    from supabase import Client, create_client
+except ImportError:  # pragma: no cover - dep listed in pyproject
+    Client = None  # type: ignore[assignment,misc]
+    create_client = None  # type: ignore[assignment]
+_TABLE = "predictions"
+@lru_cache(maxsize=1)
+def get_client() -> "Client | None":
+    """Return a cached Supabase client, or ``None`` if not configured."""
+    url = os.getenv("SUPABASE_URL", "").strip()
+    key = os.getenv("SUPABASE_KEY", "").strip()
+    if not url or not key:
+        return None
+    if create_client is None:
+        logger.warning("supabase package not available; persistence disabled")
+        return None
+    try:
+        client = create_client(url, key)
+        logger.info("Supabase client initialized")
+        return client
+    except Exception as exc:  # pragma: no cover - network/config errors
+        logger.warning("Failed to initialize Supabase client: %s", exc)
+        return None
+def save_prediction(
+    text: str,
+    result: Any,
+    source: str,
+    video_id: str | None = None,
+    video_url: str | None = None,
+    threshold: float | None = None,
+    latency_ms: float | None = None,
+) -> None:
+    """Persist a single prediction, silently no-op when DB is not configured.
+    ``result`` may be a Pydantic ``PredictResponse`` or a dict with the same
+    fields (``probability``, ``is_toxic``, ``labels``, ``model_used``,
+    ``latency_ms``).
+    """
+    client = get_client()
+    if client is None:
+        return
+    try:
+        if hasattr(result, "model_dump"):
+            data = result.model_dump()
+        elif isinstance(result, dict):
+            data = result
+        else:
+            data = {
+                "probability": getattr(result, "probability", None),
+                "is_toxic": getattr(result, "is_toxic", None),
+                "labels": getattr(result, "labels", []),
+                "model_used": getattr(result, "model_used", ""),
+                "latency_ms": getattr(result, "latency_ms", None),
+            }
+        row = {
+            "text": text,
+            "video_id": video_id,
+            "video_url": video_url,
+            "probability": data.get("probability"),
+            "is_toxic": data.get("is_toxic"),
+            "labels": data.get("labels", []) or [],
+            "model_used": data.get("model_used", ""),
+            "threshold": threshold,
+            "latency_ms": latency_ms if latency_ms is not None else data.get("latency_ms"),
+            "source": source,
+        }
+        client.table(_TABLE).insert(row).execute()
+    except Exception as exc:
+        logger.warning("save_prediction failed (non-critical): %s", exc)
+def list_predictions(
+    video_id: str | None = None,
+    limit: int = 50,
+) -> list[dict]:
+    """Return latest predictions ordered by ``created_at`` desc.
+    Returns ``[]`` when the client is not configured.
+    """
+    client = get_client()
+    if client is None:
+        return []
+    try:
+        query = client.table(_TABLE).select("*").order("created_at", desc=True)
+        if video_id:
+            query = query.eq("video_id", video_id)
+        query = query.limit(max(1, min(limit, 200)))
+        response = query.execute()
+        return list(getattr(response, "data", []) or [])
+    except Exception as exc:
+        logger.warning("list_predictions failed: %s", exc)
+        return []

supabase/predictions_setup.sql ADDED Viewed

	@@ -0,0 +1,49 @@

+-- =====================================================================
+-- SignalMod — Predictions table setup
+-- Run this in Supabase SQL Editor:
+--   Dashboard → SQL Editor → New query → paste this → Run
+-- =====================================================================
+-- 1. Table
+create table if not exists public.predictions (
+    id          bigserial primary key,
+    created_at  timestamptz not null default now(),
+    text        text not null,
+    video_id    text,
+    video_url   text,
+    probability double precision,
+    is_toxic    boolean,
+    labels      text[] default '{}',
+    model_used  text,
+    threshold   double precision,
+    latency_ms  double precision,
+    source      text          -- "api_direct" | "video_fetch" | "user_comment"
+);
+-- 2. Indexes for the queries the API will run
+create index if not exists predictions_created_at_idx
+    on public.predictions (created_at desc);
+create index if not exists predictions_video_id_idx
+    on public.predictions (video_id);
+-- 3. Row Level Security: allow anonymous insert + select
+--    (we are using the publishable key from the frontend / backend with no auth)
+alter table public.predictions enable row level security;
+drop policy if exists "anon_insert" on public.predictions;
+create policy "anon_insert"
+    on public.predictions
+    for insert
+    to anon
+    with check (true);
+drop policy if exists "anon_select" on public.predictions;
+create policy "anon_select"
+    on public.predictions
+    for select
+    to anon
+    using (true);
+-- 4. Sanity check (run separately if you want)
+-- select count(*) from public.predictions;