Spaces:

lanczos
/

aesthetic-annotators

Sleeping

App Files Files Community

lanczos commited on Apr 24

Commit

b8cd5c3

verified ·

1 Parent(s): 1e767b9

deploy: labeling server

Browse files

Files changed (6) hide show

labeling/static/app.js +167 -61
labeling/static/index.html +2 -2
labeling/static/style.css +29 -3
spaces/space_entry.py +5 -0
src/aamcq/annotation/api.py +227 -3
src/aamcq/annotation/db.py +94 -9

labeling/static/app.js CHANGED Viewed

@@ -2,11 +2,11 @@
 const AXES = ["art_style", "color", "art_medium", "lighting"];
 const TOKEN_STORAGE_KEY = "aamcq_token";
 const THEME_STORAGE_KEY = "aamcq_theme";
-const ROUNDS_KEY = "aamcq_rounds_done";
 const PASSWORD_SESSION_KEY = "aamcq_access_password";
 const FIRST_SESSION_CAP = 20;
-const REPEAT_SESSION_CAP = 10;
 function setTheme(theme) {
   document.documentElement.setAttribute("data-theme", theme);
@@ -44,28 +44,16 @@ async function fetchJSON(path, init) {
   return resp.json();
 }
-// Tokens come from three places, in order:
-//   1. ?token=... in the URL (coordinator-issued personal link — old flow)
-//   2. localStorage (returning visitor)
-//   3. POST /api/register (fresh anonymous session; only works when the
-//      server was launched with --anonymous-register)
-function nextSessionCap() {
-  const rounds = parseInt(localStorage.getItem(ROUNDS_KEY) || "0", 10);
-  return rounds === 0 ? FIRST_SESSION_CAP : REPEAT_SESSION_CAP;
 }
-async function attemptRegister(cap, password) {
-  const params = new URLSearchParams({ cap: String(cap) });
-  if (password) params.set("password", password);
-  return fetch(`/api/register?${params.toString()}`, { method: "POST" });
-}
-async function registerFresh(cap) {
-  // Try with whatever password we have cached (could be empty).
   let password = sessionStorage.getItem(PASSWORD_SESSION_KEY) || "";
-  let resp = await attemptRegister(cap, password);
-  // 403 means server wants a password — prompt (and re-prompt on mismatch).
   while (resp.status === 403) {
     sessionStorage.removeItem(PASSWORD_SESSION_KEY);
     const entered = window.prompt(
@@ -73,22 +61,21 @@ async function registerFresh(cap) {
         ? "Wrong access password. Try again:"
         : "Enter the access password to start labeling:"
     );
-    if (entered == null) {
-      throw new Error("Access password required.");
-    }
     password = entered;
-    resp = await attemptRegister(cap, password);
   }
   if (!resp.ok) {
-    throw new Error(
-      "No ?token= in URL and anonymous registration is disabled on this server."
-    );
   }
   if (password) sessionStorage.setItem(PASSWORD_SESSION_KEY, password);
-  const { token } = await resp.json();
-  localStorage.setItem(TOKEN_STORAGE_KEY, token);
-  return token;
 }
 async function ensureToken() {
@@ -99,7 +86,8 @@ async function ensureToken() {
   }
   const stored = localStorage.getItem(TOKEN_STORAGE_KEY);
   if (stored) return stored;
-  return registerFresh(nextSessionCap());
 }
 function renderProfileCard(idx, profile) {
@@ -141,29 +129,14 @@ async function loadNext(token) {
   const submit = document.getElementById("submit");
   const err = document.getElementById("error");
   err.textContent = "";
   if (data.done) {
-    const labeled = data.labeled ?? 0;
-    const msg =
-      data.reason === "cap_reached"
-        ? `All done — you labeled ${labeled} items. Thank you!`
-        : `All items are fully labeled (you contributed ${labeled}). Thank you!`;
-    card.innerHTML = `<p class='done'>${msg}</p>`;
-    if (data.reason === "cap_reached") {
-      const btn = document.createElement("button");
-      btn.id = "new-session";
-      btn.textContent = `Start a new session (+${REPEAT_SESSION_CAP} more)`;
-      btn.addEventListener("click", () => {
-        const rounds = parseInt(localStorage.getItem(ROUNDS_KEY) || "0", 10);
-        localStorage.setItem(ROUNDS_KEY, String(rounds + 1));
-        localStorage.removeItem(TOKEN_STORAGE_KEY);
-        location.reload();
-      });
-      card.appendChild(btn);
-    }
     submit.disabled = true;
     updateProgress(data.labeled, data.cap);
     return;
   }
   currentItem = data;
   shownAt = performance.now();
   document.getElementById("stimulus").src = data.image_url;
@@ -172,18 +145,151 @@ async function loadNext(token) {
   const form = document.getElementById("options");
   form.innerHTML = "";
   const options = data.payload.options || [];
-  options.forEach((opt, i) => {
-    form.appendChild(renderProfileCard(i, opt));
-  });
   submit.disabled = true;
   form.querySelectorAll("input[type=radio]").forEach((el) => {
-    el.addEventListener("change", () => {
-      submit.disabled = false;
-    });
   });
   updateProgress(data.labeled, data.cap);
 }
 function updateProgress(labeled, cap) {
   const el = document.getElementById("progress");
   if (cap != null) {
@@ -214,11 +320,11 @@ async function submitLabel(token) {
 }
 async function recoverFromInvalidToken() {
-  // Server doesn't know this token (DB was reset, Space rebuild lost state,
-  // etc.). Wipe client state and start a fresh session.
   localStorage.removeItem(TOKEN_STORAGE_KEY);
-  localStorage.removeItem(ROUNDS_KEY);
-  return registerFresh(FIRST_SESSION_CAP);
 }
 async function main() {

 const AXES = ["art_style", "color", "art_medium", "lighting"];
 const TOKEN_STORAGE_KEY = "aamcq_token";
+const EMAIL_STORAGE_KEY = "aamcq_email";
 const THEME_STORAGE_KEY = "aamcq_theme";
 const PASSWORD_SESSION_KEY = "aamcq_access_password";
 const FIRST_SESSION_CAP = 20;
+const EXTRA_ROUND_CAP = 10;
 function setTheme(theme) {
   document.documentElement.setAttribute("data-theme", theme);
   return resp.json();
 }
+async function attemptRegister(params, password) {
+  const qs = new URLSearchParams(params);
+  if (password) qs.set("password", password);
+  return fetch(`/api/register?${qs.toString()}`, { method: "POST" });
 }
+async function registerWithParams(params) {
+  // Try with cached password (could be empty).
   let password = sessionStorage.getItem(PASSWORD_SESSION_KEY) || "";
+  let resp = await attemptRegister(params, password);
   while (resp.status === 403) {
     sessionStorage.removeItem(PASSWORD_SESSION_KEY);
     const entered = window.prompt(
         ? "Wrong access password. Try again:"
         : "Enter the access password to start labeling:"
     );
+    if (entered == null) throw new Error("Access password required.");
     password = entered;
+    resp = await attemptRegister(params, password);
+  }
+  if (resp.status === 429) {
+    throw new HttpError(429, await resp.text());
   }
   if (!resp.ok) {
+    const body = await resp.text();
+    throw new HttpError(resp.status, body);
   }
   if (password) sessionStorage.setItem(PASSWORD_SESSION_KEY, password);
+  const data = await resp.json();
+  localStorage.setItem(TOKEN_STORAGE_KEY, data.token);
+  return data;
 }
 async function ensureToken() {
   }
   const stored = localStorage.getItem(TOKEN_STORAGE_KEY);
   if (stored) return stored;
+  const { token } = await registerWithParams({ cap: String(FIRST_SESSION_CAP) });
+  return token;
 }
 function renderProfileCard(idx, profile) {
   const submit = document.getElementById("submit");
   const err = document.getElementById("error");
   err.textContent = "";
   if (data.done) {
+    await renderDonePage(token, data);
     submit.disabled = true;
     updateProgress(data.labeled, data.cap);
     return;
   }
   currentItem = data;
   shownAt = performance.now();
   document.getElementById("stimulus").src = data.image_url;
   const form = document.getElementById("options");
   form.innerHTML = "";
   const options = data.payload.options || [];
+  options.forEach((opt, i) => form.appendChild(renderProfileCard(i, opt)));
   submit.disabled = true;
   form.querySelectorAll("input[type=radio]").forEach((el) => {
+    el.addEventListener("change", () => { submit.disabled = false; });
   });
   updateProgress(data.labeled, data.cap);
 }
+async function renderDonePage(token, taskData) {
+  const card = document.getElementById("card");
+  const labeled = taskData.labeled ?? 0;
+  if (taskData.reason !== "cap_reached") {
+    // Pool is drained entirely. Thank and stop.
+    card.innerHTML = `<p class='done'>All items are fully labeled (you contributed ${labeled}). Thank you!</p>`;
+    return;
+  }
+  // cap_reached: fetch detailed session status to decide UI state
+  let status;
+  try {
+    status = await fetchJSON(`/api/session_status?token=${encodeURIComponent(token)}`);
+  } catch (e) {
+    card.innerHTML = `<p class='done'>Session complete (${labeled} labeled). Couldn't load status: ${e.message}</p>`;
+    return;
+  }
+  card.innerHTML = "";
+  const msg = document.createElement("p");
+  msg.className = "done";
+  card.appendChild(msg);
+  if (!status.acc_pass) {
+    // Fail state
+    msg.innerHTML =
+      `<strong>Low agreement rate detected.</strong><br>` +
+      `Your ${labeled}-item session doesn't meet the quality threshold ` +
+      `and can't be credited. Please try again more carefully.`;
+    msg.classList.add("fail");
+    const btn = document.createElement("button");
+    btn.id = "retry-session";
+    if (status.round_number === 1) {
+      btn.textContent = "Try again (careful labeling)";
+      btn.addEventListener("click", () => {
+        // Round-1 fail: wipe everything — no email on file yet.
+        localStorage.removeItem(TOKEN_STORAGE_KEY);
+        localStorage.removeItem(EMAIL_STORAGE_KEY);
+        location.reload();
+      });
+    } else {
+      btn.textContent = `Redo round ${status.round_number}`;
+      btn.addEventListener("click", async () => {
+        // Round 2+ fail: clear token but keep email, re-register same round.
+        localStorage.removeItem(TOKEN_STORAGE_KEY);
+        const email = status.email || localStorage.getItem(EMAIL_STORAGE_KEY);
+        if (!email) { location.reload(); return; }
+        try {
+          await registerWithParams({
+            cap: String(EXTRA_ROUND_CAP),
+            email,
+            round: String(status.round_number),
+          });
+          location.reload();
+        } catch (e) {
+          document.getElementById("error").textContent = `Retry failed: ${e.message}`;
+        }
+      });
+    }
+    card.appendChild(btn);
+    return;
+  }
+  // Pass state
+  if (status.round_number === 1 && !status.email) {
+    // Needs email submission
+    msg.innerHTML =
+      `<strong>Great job!</strong> Your round-1 session passed the quality check. ` +
+      `Submit your email to be entered in the lottery.`;
+    const form = document.createElement("form");
+    form.id = "email-form";
+    form.innerHTML =
+      `<input type="email" id="email-input" placeholder="your@email" required autocomplete="email" />` +
+      `<button type="submit">Submit email</button>`;
+    card.appendChild(form);
+    form.addEventListener("submit", async (ev) => {
+      ev.preventDefault();
+      const email = document.getElementById("email-input").value.trim();
+      if (!email) return;
+      try {
+        const resp = await fetchJSON("/api/submit_email", {
+          method: "POST",
+          headers: { "content-type": "application/json" },
+          body: JSON.stringify({ token, email }),
+        });
+        localStorage.setItem(EMAIL_STORAGE_KEY, email);
+        // Re-render with updated status
+        await renderDonePage(token, taskData);
+      } catch (e) {
+        document.getElementById("error").textContent = `Submit failed: ${e.message}`;
+      }
+    });
+    return;
+  }
+  // Already on email chain (round 1 email submitted OR round 2+)
+  const multiplier = status.multiplier || 1.0;
+  msg.innerHTML =
+    `<strong>Thanks, ${status.email}!</strong><br>` +
+    `Your lottery multiplier is now <strong>${multiplier.toFixed(2)}×</strong> ` +
+    `(${labeled}-item round ${status.round_number} passed).`;
+  if (status.can_extend) {
+    const extraLeft = 3 - status.round_number;
+    const nextMultiplier = Math.min(multiplier + 0.25, 1.5);
+    const info = document.createElement("p");
+    info.style.color = "var(--muted)";
+    info.textContent =
+      `Label ${EXTRA_ROUND_CAP} more to reach ${nextMultiplier.toFixed(2)}× ` +
+      `(${extraLeft} bonus round${extraLeft > 1 ? "s" : ""} remaining).`;
+    card.appendChild(info);
+    const btn = document.createElement("button");
+    btn.id = "extend-session";
+    btn.textContent = `Label ${EXTRA_ROUND_CAP} more (+0.25×)`;
+    btn.addEventListener("click", async () => {
+      try {
+        await registerWithParams({
+          cap: String(EXTRA_ROUND_CAP),
+          email: status.email,
+          round: String(status.round_number + 1),
+        });
+        location.reload();
+      } catch (e) {
+        document.getElementById("error").textContent = `Couldn't start round: ${e.message}`;
+      }
+    });
+    card.appendChild(btn);
+  } else {
+    const info = document.createElement("p");
+    info.style.color = "var(--muted)";
+    info.textContent = "Maximum lottery multiplier reached. Thanks for labeling!";
+    card.appendChild(info);
+  }
+}
 function updateProgress(labeled, cap) {
   const el = document.getElementById("progress");
   if (cap != null) {
 }
 async function recoverFromInvalidToken() {
+  // Server doesn't know this token. Wipe client state and start fresh.
   localStorage.removeItem(TOKEN_STORAGE_KEY);
+  localStorage.removeItem(EMAIL_STORAGE_KEY);
+  const { token } = await registerWithParams({ cap: String(FIRST_SESSION_CAP) });
+  return token;
 }
 async function main() {

labeling/static/index.html CHANGED Viewed

@@ -4,7 +4,7 @@
   <meta charset="utf-8" />
   <meta name="viewport" content="width=device-width, initial-scale=1" />
   <title>AestheticMCQ — Annotation</title>
-  <link rel="stylesheet" href="/style.css?v=8" />
   <script>
     // Apply saved theme before CSS paints to avoid a flash.
     (function () {
@@ -41,6 +41,6 @@
       <span id="error"></span>
     </footer>
   </main>
-  <script src="/app.js?v=11"></script>
 </body>
 </html>

   <meta charset="utf-8" />
   <meta name="viewport" content="width=device-width, initial-scale=1" />
   <title>AestheticMCQ — Annotation</title>
+  <link rel="stylesheet" href="/style.css?v=9" />
   <script>
     // Apply saved theme before CSS paints to avoid a flash.
     (function () {
       <span id="error"></span>
     </footer>
   </main>
+  <script src="/app.js?v=12"></script>
 </body>
 </html>

labeling/static/style.css CHANGED Viewed

@@ -179,9 +179,11 @@ button#submit:disabled {
 }
 #error { color: #e66; font-size: 0.9rem; }
-.done { text-align: center; font-size: 1.2rem; color: var(--muted); margin: 24px 0 12px; }
-button#new-session {
   display: block;
   margin: 12px auto 0;
   background: transparent;
@@ -192,4 +194,28 @@ button#new-session {
   font-size: 0.95rem;
   cursor: pointer;
 }
-button#new-session:hover { background: var(--card); }

 }
 #error { color: #e66; font-size: 0.9rem; }
+.done { text-align: center; font-size: 1.1rem; color: var(--fg); margin: 24px 0 12px; line-height: 1.5; }
+.done.fail { color: #d33; }
+:root[data-theme="light"] .done.fail { color: #c1272d; }
+#card button {
   display: block;
   margin: 12px auto 0;
   background: transparent;
   font-size: 0.95rem;
   cursor: pointer;
 }
+#card button:hover { background: var(--card); }
+#email-form {
+  display: flex;
+  gap: 8px;
+  justify-content: center;
+  margin: 12px auto;
+  max-width: 420px;
+}
+#email-form input[type=email] {
+  flex: 1;
+  padding: 8px 12px;
+  font-size: 1rem;
+  border: 1px solid var(--border);
+  border-radius: 6px;
+  background: var(--card);
+  color: var(--fg);
+}
+#email-form button {
+  background: var(--accent);
+  color: #fff;
+  border: 0;
+  padding: 8px 16px;
+  margin: 0;
+}

spaces/space_entry.py CHANGED Viewed

@@ -15,6 +15,8 @@ Env vars:
   AAMCQ_LABELS_PER_ITEM      default: 3
   AAMCQ_BACKUP_INTERVAL      default: 60 (seconds)
   AAMCQ_ACCESS_PASSWORD      optional; if set, /api/register requires it
 """
 from __future__ import annotations
@@ -43,6 +45,7 @@ BACKUP_INTERVAL = int(os.environ.get("AAMCQ_BACKUP_INTERVAL", "60"))
 PER_ANNOTATOR_CAP = int(os.environ.get("AAMCQ_PER_ANNOTATOR_CAP", "20"))
 LABELS_PER_ITEM = int(os.environ.get("AAMCQ_LABELS_PER_ITEM", "3"))
 ACCESS_PASSWORD = os.environ.get("AAMCQ_ACCESS_PASSWORD") or None
 def _require_token() -> str:
@@ -162,11 +165,13 @@ def main() -> int:
         max_labels_per_item=LABELS_PER_ITEM,
         max_labels_per_annotator=PER_ANNOTATOR_CAP,
         access_password=ACCESS_PASSWORD,
     )
     if ACCESS_PASSWORD:
         print("access password gate: ON")
     else:
         print("access password gate: OFF (set AAMCQ_ACCESS_PASSWORD to enable)")
     @app.on_event("startup")
     async def _start_backup() -> None:

   AAMCQ_LABELS_PER_ITEM      default: 3
   AAMCQ_BACKUP_INTERVAL      default: 60 (seconds)
   AAMCQ_ACCESS_PASSWORD      optional; if set, /api/register requires it
+  AAMCQ_ACC_THRESHOLD        default: 0.40; min session acc vs correct_index
+                             required to unlock email submission
 """
 from __future__ import annotations
 PER_ANNOTATOR_CAP = int(os.environ.get("AAMCQ_PER_ANNOTATOR_CAP", "20"))
 LABELS_PER_ITEM = int(os.environ.get("AAMCQ_LABELS_PER_ITEM", "3"))
 ACCESS_PASSWORD = os.environ.get("AAMCQ_ACCESS_PASSWORD") or None
+ACC_THRESHOLD = float(os.environ.get("AAMCQ_ACC_THRESHOLD", "0.40"))
 def _require_token() -> str:
         max_labels_per_item=LABELS_PER_ITEM,
         max_labels_per_annotator=PER_ANNOTATOR_CAP,
         access_password=ACCESS_PASSWORD,
+        acc_threshold=ACC_THRESHOLD,
     )
     if ACCESS_PASSWORD:
         print("access password gate: ON")
     else:
         print("access password gate: OFF (set AAMCQ_ACCESS_PASSWORD to enable)")
+    print(f"acc_threshold: {ACC_THRESHOLD}")
     @app.on_event("startup")
     async def _start_backup() -> None:

src/aamcq/annotation/api.py CHANGED Viewed

@@ -4,10 +4,13 @@ from __future__ import annotations
 import hmac
 import os
 import sqlite3
 from pathlib import Path
-from fastapi import Depends, FastAPI, HTTPException, Query
 from fastapi.responses import FileResponse, JSONResponse
 from fastapi.staticfiles import StaticFiles
 from pydantic import BaseModel, Field, conint
@@ -15,6 +18,23 @@ from pydantic import BaseModel, Field, conint
 from aamcq.annotation import db as dbmod
 from aamcq.annotation.assignment import bootstrap_annotators
 REPO_ROOT = Path(__file__).resolve().parents[3]
 DEFAULT_DB = REPO_ROOT / "data" / "annotations.sqlite"
 DEFAULT_IMAGE_DIR = REPO_ROOT / "data" / "images"
@@ -34,6 +54,11 @@ def _sanitize_item(payload: dict) -> dict:
     return {k: v for k, v in payload.items() if k != "correct_index"}
 def create_app(
     db_path: str | os.PathLike[str] | None = None,
     image_dir: str | os.PathLike[str] | None = None,
@@ -43,6 +68,8 @@ def create_app(
     max_labels_per_item: int = 3,
     max_labels_per_annotator: int | None = None,
     access_password: str | None = None,
 ) -> FastAPI:
     """Labeling server.
@@ -64,6 +91,14 @@ def create_app(
     `access_password`: if set, `/api/register` requires a matching
     `?password=` query param (constant-time compared). Cheap anti-spam
     gate for public Spaces — existing tokens keep working regardless.
     """
     db_path = Path(db_path or DEFAULT_DB)
     image_dir = Path(image_dir or DEFAULT_IMAGE_DIR)
@@ -79,6 +114,9 @@ def create_app(
     app.state.max_labels_per_item = max_labels_per_item
     app.state.max_labels_per_annotator = max_labels_per_annotator
     app.state.access_password = access_password
     @app.middleware("http")
     async def _deny_framing(request, call_next):
@@ -107,6 +145,30 @@ def create_app(
         per = dbmod.get_annotator_cap(conn, annotator_id)
         return per if per is not None else app.state.max_labels_per_annotator
     def _next_task_payload(annotator_id: str, conn: sqlite3.Connection, n_done: int) -> dict:
         cap = _effective_cap(conn, annotator_id)
         if cap is not None and n_done >= cap:
@@ -128,8 +190,11 @@ def create_app(
     @app.post("/api/register")
     def api_register(
         cap: int | None = Query(default=None, ge=1, le=10000),
         password: str | None = Query(default=None, max_length=256),
         conn: sqlite3.Connection = Depends(get_conn),
     ):
         """Mint a fresh anonymous annotator. Only enabled when anonymous_register.
@@ -138,6 +203,13 @@ def create_app(
         server default (used by the frontend to give the first session a
         larger quota than subsequent ones).
         If `access_password` was set at startup, `?password=` must match
         (constant-time compared) or we return 403.
         """
@@ -147,6 +219,28 @@ def create_app(
         if expected:
             if not password or not hmac.compare_digest(password, expected):
                 raise HTTPException(status_code=403, detail="wrong access password")
         existing = {row["annotator_id"] for row in conn.execute(
             "SELECT annotator_id FROM annotators"
         )}
@@ -158,8 +252,20 @@ def create_app(
             n += 1
             if n > 8:
                 raise HTTPException(status_code=500, detail="could not mint unique id")
-        tokens = bootstrap_annotators(conn, [candidate], cap=cap)
-        return {"annotator_id": candidate, "token": tokens[candidate], "cap": cap}
     @app.get("/api/task")
     def api_task(
@@ -197,6 +303,124 @@ def create_app(
         )
         return {"ok": True}
     @app.get("/api/progress")
     def api_progress(
         token: str = Query(min_length=8, max_length=128),

 import hmac
 import os
+import re
 import sqlite3
+import time
+from collections import deque
 from pathlib import Path
+from fastapi import Depends, FastAPI, HTTPException, Query, Request
 from fastapi.responses import FileResponse, JSONResponse
 from fastapi.staticfiles import StaticFiles
 from pydantic import BaseModel, Field, conint
 from aamcq.annotation import db as dbmod
 from aamcq.annotation.assignment import bootstrap_annotators
+DEFAULT_ACC_THRESHOLD = 0.40
+MAX_LOTTERY_ROUND = 3  # round 1 base, +2 bonus rounds cap multiplier at 1.5x
+_EMAIL_RE = re.compile(r"^[^@\s]+@[^@\s]+\.[^@\s]+$")
+def _is_valid_email(email: str) -> bool:
+    return bool(email) and len(email) <= 254 and bool(_EMAIL_RE.match(email))
+def _multiplier(passed_rounds: set[int]) -> float:
+    """Lottery multiplier = 1.0 + 0.25 * (extra passed rounds beyond round 1),
+    capped at 1.5 (i.e. rounds 1+2+3 all passed)."""
+    if 1 not in passed_rounds:
+        return 0.0
+    extras = len([r for r in passed_rounds if r > 1])
+    return min(1.0 + 0.25 * extras, 1.5)
 REPO_ROOT = Path(__file__).resolve().parents[3]
 DEFAULT_DB = REPO_ROOT / "data" / "annotations.sqlite"
 DEFAULT_IMAGE_DIR = REPO_ROOT / "data" / "images"
     return {k: v for k, v in payload.items() if k != "correct_index"}
+class SubmitEmailPayload(BaseModel):
+    token: str = Field(min_length=8, max_length=128)
+    email: str = Field(min_length=3, max_length=254)
 def create_app(
     db_path: str | os.PathLike[str] | None = None,
     image_dir: str | os.PathLike[str] | None = None,
     max_labels_per_item: int = 3,
     max_labels_per_annotator: int | None = None,
     access_password: str | None = None,
+    acc_threshold: float = DEFAULT_ACC_THRESHOLD,
+    register_rate_limit: tuple[int, int] | None = (5, 3600),
 ) -> FastAPI:
     """Labeling server.
     `access_password`: if set, `/api/register` requires a matching
     `?password=` query param (constant-time compared). Cheap anti-spam
     gate for public Spaces — existing tokens keep working regardless.
+    `acc_threshold`: session accuracy (vs `correct_index`) required for
+    email submission + lottery bonus credit. Defaults to 0.40. At
+    p_random=0.25 and n=20, false-positive rate is ~10%, so pair with
+    IP rate-limiting + UNIQUE-email-per-round checks.
+    `register_rate_limit`: (max_requests, window_seconds) tuple. Defaults
+    to (5, 3600) — per-IP rolling window. Pass None to disable.
     """
     db_path = Path(db_path or DEFAULT_DB)
     image_dir = Path(image_dir or DEFAULT_IMAGE_DIR)
     app.state.max_labels_per_item = max_labels_per_item
     app.state.max_labels_per_annotator = max_labels_per_annotator
     app.state.access_password = access_password
+    app.state.acc_threshold = float(acc_threshold)
+    app.state.register_rate_limit = register_rate_limit
+    app.state.register_hits: dict[str, deque[float]] = {}
     @app.middleware("http")
     async def _deny_framing(request, call_next):
         per = dbmod.get_annotator_cap(conn, annotator_id)
         return per if per is not None else app.state.max_labels_per_annotator
+    def _client_ip(request: Request) -> str:
+        # HF Spaces sit behind a proxy — take the first entry of
+        # X-Forwarded-For if present, else the direct peer address.
+        xff = request.headers.get("x-forwarded-for")
+        if xff:
+            return xff.split(",")[0].strip()
+        return request.client.host if request.client else "unknown"
+    def _check_rate_limit(ip: str) -> None:
+        limit = app.state.register_rate_limit
+        if limit is None:
+            return
+        max_req, window = limit
+        now = time.time()
+        q = app.state.register_hits.setdefault(ip, deque())
+        while q and q[0] < now - window:
+            q.popleft()
+        if len(q) >= max_req:
+            raise HTTPException(
+                status_code=429,
+                detail=f"too many register requests (limit {max_req}/{window}s)",
+            )
+        q.append(now)
     def _next_task_payload(annotator_id: str, conn: sqlite3.Connection, n_done: int) -> dict:
         cap = _effective_cap(conn, annotator_id)
         if cap is not None and n_done >= cap:
     @app.post("/api/register")
     def api_register(
+        request: Request,
         cap: int | None = Query(default=None, ge=1, le=10000),
         password: str | None = Query(default=None, max_length=256),
+        email: str | None = Query(default=None, max_length=254),
+        round: int = Query(default=1, ge=1, le=MAX_LOTTERY_ROUND),
         conn: sqlite3.Connection = Depends(get_conn),
     ):
         """Mint a fresh anonymous annotator. Only enabled when anonymous_register.
         server default (used by the frontend to give the first session a
         larger quota than subsequent ones).
+        `?email=` + `?round=N` attach this new annotator to an existing
+        email-based chain for lottery-multiplier rounds (round 2/3). For
+        round > 1 we verify the prior round was passed by an earlier
+        annotator with the same email; otherwise reject. round==1 can
+        include email but is rarer (email is normally attached later
+        via /api/submit_email once session acc is known).
         If `access_password` was set at startup, `?password=` must match
         (constant-time compared) or we return 403.
         """
         if expected:
             if not password or not hmac.compare_digest(password, expected):
                 raise HTTPException(status_code=403, detail="wrong access password")
+        _check_rate_limit(_client_ip(request))
+        if email is not None:
+            if not _is_valid_email(email):
+                raise HTTPException(status_code=400, detail="bad email format")
+            # round > 1 must chain off a PASSED prior round for this email.
+            if round > 1:
+                passed = dbmod.email_passed_rounds(
+                    conn, email, app.state.acc_threshold
+                )
+                if (round - 1) not in passed:
+                    raise HTTPException(
+                        status_code=400,
+                        detail=f"round {round} requires passed round {round-1} for this email",
+                    )
+                if round in passed:
+                    raise HTTPException(
+                        status_code=400,
+                        detail=f"round {round} already passed for this email",
+                    )
         existing = {row["annotator_id"] for row in conn.execute(
             "SELECT annotator_id FROM annotators"
         )}
             n += 1
             if n > 8:
                 raise HTTPException(status_code=500, detail="could not mint unique id")
+        # bootstrap_annotators wasn't set up for email/round; inline the
+        # same effect so we can pass those through.
+        token = dbmod.mint_token()
+        dbmod.insert_annotator(
+            conn, candidate, token, cap=cap, email=email, round_number=round
+        )
+        return {
+            "annotator_id": candidate,
+            "token": token,
+            "cap": cap,
+            "email": email,
+            "round_number": round,
+        }
     @app.get("/api/task")
     def api_task(
         )
         return {"ok": True}
+    @app.get("/api/session_status")
+    def api_session_status(
+        token: str = Query(min_length=8, max_length=128),
+        conn: sqlite3.Connection = Depends(get_conn),
+    ):
+        """Tell the frontend what the done-page should render.
+        Returns pass/fail flags — never the raw accuracy number — so
+        annotators can't binary-search the threshold by reloading.
+        """
+        annotator_id = resolve_annotator(token, conn)
+        row = dbmod.get_annotator_row(conn, annotator_id)
+        if row is None:
+            raise HTTPException(status_code=401, detail="invalid token")
+        cap = _effective_cap(conn, annotator_id)
+        n_correct, n = dbmod.session_accuracy(conn, annotator_id)
+        cap_reached = cap is not None and n >= cap
+        acc_pass = cap_reached and n > 0 and (n_correct / n) >= app.state.acc_threshold
+        # Lottery state: look at ALL annotators for this email (if any)
+        # to compute the current multiplier + whether more rounds are
+        # available.
+        email = row["email"]
+        multiplier = 0.0
+        passed_rounds: set[int] = set()
+        if email:
+            passed_rounds = dbmod.email_passed_rounds(
+                conn, email, app.state.acc_threshold
+            )
+            # Include THIS session's pass optimistically so the UI shows
+            # the correct multiplier immediately after a passing round
+            # (row's cap may differ from the session_accuracy lookup if
+            # server cap default applied; use effective cap instead).
+            if acc_pass:
+                passed_rounds = passed_rounds | {int(row["round_number"])}
+            multiplier = _multiplier(passed_rounds)
+        can_extend = (
+            acc_pass
+            and email is not None
+            and int(row["round_number"]) < MAX_LOTTERY_ROUND
+        )
+        return {
+            "cap_reached": bool(cap_reached),
+            "n_labeled": int(n),
+            "cap": cap,
+            "acc_pass": bool(acc_pass),
+            "round_number": int(row["round_number"]),
+            "email": email,
+            "multiplier": multiplier,
+            "can_extend": bool(can_extend),
+            "next_round_cap": 10,  # hardcoded for now; frontend uses this
+        }
+    @app.post("/api/submit_email")
+    def api_submit_email(
+        payload: SubmitEmailPayload,
+        conn: sqlite3.Connection = Depends(get_conn),
+    ):
+        """Attach the email to a round-1 annotator whose session passed.
+        Guards:
+          - Annotator exists (valid token).
+          - round_number == 1 (email is introduced on round 1 only).
+          - Current email is NULL (can't overwrite once set).
+          - Session complete: n_labeled >= cap.
+          - acc >= threshold.
+          - Email hasn't already passed round 1 via another annotator
+            (prevents multiple lottery entries per email).
+        """
+        annotator_id = resolve_annotator(payload.token, conn)
+        row = dbmod.get_annotator_row(conn, annotator_id)
+        if row is None:
+            raise HTTPException(status_code=401, detail="invalid token")
+        if int(row["round_number"]) != 1:
+            raise HTTPException(
+                status_code=400, detail="email only submitted on round 1"
+            )
+        if row["email"]:
+            raise HTTPException(
+                status_code=409, detail="email already set for this annotator"
+            )
+        if not _is_valid_email(payload.email):
+            raise HTTPException(status_code=400, detail="bad email format")
+        cap = _effective_cap(conn, annotator_id)
+        n_correct, n = dbmod.session_accuracy(conn, annotator_id)
+        if cap is None or n < cap:
+            raise HTTPException(
+                status_code=400,
+                detail=f"session not complete ({n}/{cap})",
+            )
+        if n == 0 or (n_correct / n) < app.state.acc_threshold:
+            raise HTTPException(
+                status_code=403,
+                detail=f"accuracy below threshold ({app.state.acc_threshold})",
+            )
+        # Prevent double-credit: this email must not have a passed
+        # round 1 on another annotator.
+        if 1 in dbmod.email_passed_rounds(conn, payload.email, app.state.acc_threshold):
+            raise HTTPException(
+                status_code=409,
+                detail="email already credited for round 1",
+            )
+        dbmod.set_annotator_email(conn, annotator_id, payload.email)
+        passed_rounds = dbmod.email_passed_rounds(
+            conn, payload.email, app.state.acc_threshold
+        )
+        return {
+            "ok": True,
+            "email": payload.email,
+            "round_number": 1,
+            "multiplier": _multiplier(passed_rounds),
+        }
     @app.get("/api/progress")
     def api_progress(
         token: str = Query(min_length=8, max_length=128),

src/aamcq/annotation/db.py CHANGED Viewed

@@ -26,8 +26,11 @@ CREATE TABLE IF NOT EXISTS annotators (
     annotator_id TEXT PRIMARY KEY,
     token TEXT NOT NULL UNIQUE,
     created_at REAL NOT NULL,
-    cap INTEGER
 );
 CREATE TABLE IF NOT EXISTS assignments (
     item_id TEXT NOT NULL,
     annotator_id TEXT NOT NULL,
@@ -87,11 +90,19 @@ def connect(db_path: str | Path) -> sqlite3.Connection:
 def init_schema(conn: sqlite3.Connection) -> None:
     conn.executescript(SCHEMA)
-    # Migrate older DBs that predate the `cap` column on annotators.
-    try:
-        conn.execute("ALTER TABLE annotators ADD COLUMN cap INTEGER")
-    except sqlite3.OperationalError:
-        pass  # column already exists
 def mint_token() -> str:
@@ -110,11 +121,14 @@ def insert_annotator(
     annotator_id: str,
     token: str,
     cap: int | None = None,
 ) -> None:
     conn.execute(
-        "INSERT OR REPLACE INTO annotators(annotator_id, token, created_at, cap) "
-        "VALUES (?, ?, ?, ?)",
-        (annotator_id, token, time.time(), cap),
     )
@@ -222,6 +236,77 @@ def next_pooled_item(
     )
 def count_annotator_labels(conn: sqlite3.Connection, annotator_id: str) -> int:
     return int(conn.execute(
         "SELECT COUNT(*) AS n FROM labels WHERE annotator_id = ?",

     annotator_id TEXT PRIMARY KEY,
     token TEXT NOT NULL UNIQUE,
     created_at REAL NOT NULL,
+    cap INTEGER,
+    email TEXT,
+    round_number INTEGER NOT NULL DEFAULT 1
 );
+CREATE INDEX IF NOT EXISTS idx_annotators_email ON annotators(email);
 CREATE TABLE IF NOT EXISTS assignments (
     item_id TEXT NOT NULL,
     annotator_id TEXT NOT NULL,
 def init_schema(conn: sqlite3.Connection) -> None:
     conn.executescript(SCHEMA)
+    # Forward-compatible migrations for older DBs. Each ALTER is wrapped
+    # in a try/except since sqlite3 throws OperationalError on existing
+    # columns — we can't check sqlite_master cleanly across SQLite
+    # versions, so just-try-it is the pragmatic pattern.
+    for ddl in (
+        "ALTER TABLE annotators ADD COLUMN cap INTEGER",
+        "ALTER TABLE annotators ADD COLUMN email TEXT",
+        "ALTER TABLE annotators ADD COLUMN round_number INTEGER NOT NULL DEFAULT 1",
+    ):
+        try:
+            conn.execute(ddl)
+        except sqlite3.OperationalError:
+            pass
 def mint_token() -> str:
     annotator_id: str,
     token: str,
     cap: int | None = None,
+    email: str | None = None,
+    round_number: int = 1,
 ) -> None:
     conn.execute(
+        "INSERT OR REPLACE INTO annotators"
+        "(annotator_id, token, created_at, cap, email, round_number) "
+        "VALUES (?, ?, ?, ?, ?, ?)",
+        (annotator_id, token, time.time(), cap, email, round_number),
     )
     )
+def get_annotator_row(conn: sqlite3.Connection, annotator_id: str) -> sqlite3.Row | None:
+    return conn.execute(
+        "SELECT annotator_id, token, cap, email, round_number, created_at "
+        "FROM annotators WHERE annotator_id = ?",
+        (annotator_id,),
+    ).fetchone()
+def session_accuracy(
+    conn: sqlite3.Connection, annotator_id: str
+) -> tuple[int, int]:
+    """Return (n_correct, n_total) for this annotator's labels.
+    Compares label.chosen_index to items.payload_json->'$.correct_index'.
+    Returns (0, 0) if the annotator has no labels yet.
+    """
+    row = conn.execute(
+        """
+        SELECT
+          COUNT(*) AS n,
+          SUM(CASE WHEN l.chosen_index =
+              CAST(json_extract(i.payload_json, '$.correct_index') AS INTEGER)
+              THEN 1 ELSE 0 END) AS n_correct
+        FROM labels l
+        JOIN items i USING(item_id)
+        WHERE l.annotator_id = ?
+        """,
+        (annotator_id,),
+    ).fetchone()
+    n = int(row["n"] or 0)
+    n_correct = int(row["n_correct"] or 0)
+    return n_correct, n
+def set_annotator_email(
+    conn: sqlite3.Connection, annotator_id: str, email: str
+) -> None:
+    conn.execute(
+        "UPDATE annotators SET email = ? WHERE annotator_id = ?",
+        (email, annotator_id),
+    )
+def email_passed_rounds(
+    conn: sqlite3.Connection, email: str, acc_threshold: float, target_cap: int | None = None
+) -> set[int]:
+    """Return the set of round_numbers for which this email has at least
+    one annotator that (a) hit cap and (b) has acc >= threshold.
+    `target_cap` is optional; None = use each annotator's own cap to
+    decide cap_reached. Passing cap explicitly is useful when the
+    caller wants to ignore annotators whose session didn't finish.
+    """
+    rows = conn.execute(
+        "SELECT annotator_id, cap, round_number FROM annotators "
+        "WHERE email = ?",
+        (email,),
+    ).fetchall()
+    passed: set[int] = set()
+    for r in rows:
+        n_correct, n = session_accuracy(conn, r["annotator_id"])
+        cap = r["cap"] if target_cap is None else target_cap
+        if cap is None or n < cap:
+            continue
+        if n == 0:
+            continue
+        if (n_correct / n) >= acc_threshold:
+            passed.add(int(r["round_number"]))
+    return passed
 def count_annotator_labels(conn: sqlite3.Connection, annotator_id: str) -> int:
     return int(conn.execute(
         "SELECT COUNT(*) AS n FROM labels WHERE annotator_id = ?",