lanczos commited on
Commit
b8cd5c3
·
verified ·
1 Parent(s): 1e767b9

deploy: labeling server

Browse files
labeling/static/app.js CHANGED
@@ -2,11 +2,11 @@
2
 
3
  const AXES = ["art_style", "color", "art_medium", "lighting"];
4
  const TOKEN_STORAGE_KEY = "aamcq_token";
 
5
  const THEME_STORAGE_KEY = "aamcq_theme";
6
- const ROUNDS_KEY = "aamcq_rounds_done";
7
  const PASSWORD_SESSION_KEY = "aamcq_access_password";
8
  const FIRST_SESSION_CAP = 20;
9
- const REPEAT_SESSION_CAP = 10;
10
 
11
  function setTheme(theme) {
12
  document.documentElement.setAttribute("data-theme", theme);
@@ -44,28 +44,16 @@ async function fetchJSON(path, init) {
44
  return resp.json();
45
  }
46
 
47
- // Tokens come from three places, in order:
48
- // 1. ?token=... in the URL (coordinator-issued personal link — old flow)
49
- // 2. localStorage (returning visitor)
50
- // 3. POST /api/register (fresh anonymous session; only works when the
51
- // server was launched with --anonymous-register)
52
- function nextSessionCap() {
53
- const rounds = parseInt(localStorage.getItem(ROUNDS_KEY) || "0", 10);
54
- return rounds === 0 ? FIRST_SESSION_CAP : REPEAT_SESSION_CAP;
55
  }
56
 
57
- async function attemptRegister(cap, password) {
58
- const params = new URLSearchParams({ cap: String(cap) });
59
- if (password) params.set("password", password);
60
- return fetch(`/api/register?${params.toString()}`, { method: "POST" });
61
- }
62
-
63
- async function registerFresh(cap) {
64
- // Try with whatever password we have cached (could be empty).
65
  let password = sessionStorage.getItem(PASSWORD_SESSION_KEY) || "";
66
- let resp = await attemptRegister(cap, password);
67
-
68
- // 403 means server wants a password — prompt (and re-prompt on mismatch).
69
  while (resp.status === 403) {
70
  sessionStorage.removeItem(PASSWORD_SESSION_KEY);
71
  const entered = window.prompt(
@@ -73,22 +61,21 @@ async function registerFresh(cap) {
73
  ? "Wrong access password. Try again:"
74
  : "Enter the access password to start labeling:"
75
  );
76
- if (entered == null) {
77
- throw new Error("Access password required.");
78
- }
79
  password = entered;
80
- resp = await attemptRegister(cap, password);
 
 
 
81
  }
82
-
83
  if (!resp.ok) {
84
- throw new Error(
85
- "No ?token= in URL and anonymous registration is disabled on this server."
86
- );
87
  }
88
  if (password) sessionStorage.setItem(PASSWORD_SESSION_KEY, password);
89
- const { token } = await resp.json();
90
- localStorage.setItem(TOKEN_STORAGE_KEY, token);
91
- return token;
92
  }
93
 
94
  async function ensureToken() {
@@ -99,7 +86,8 @@ async function ensureToken() {
99
  }
100
  const stored = localStorage.getItem(TOKEN_STORAGE_KEY);
101
  if (stored) return stored;
102
- return registerFresh(nextSessionCap());
 
103
  }
104
 
105
  function renderProfileCard(idx, profile) {
@@ -141,29 +129,14 @@ async function loadNext(token) {
141
  const submit = document.getElementById("submit");
142
  const err = document.getElementById("error");
143
  err.textContent = "";
 
144
  if (data.done) {
145
- const labeled = data.labeled ?? 0;
146
- const msg =
147
- data.reason === "cap_reached"
148
- ? `All done — you labeled ${labeled} items. Thank you!`
149
- : `All items are fully labeled (you contributed ${labeled}). Thank you!`;
150
- card.innerHTML = `<p class='done'>${msg}</p>`;
151
- if (data.reason === "cap_reached") {
152
- const btn = document.createElement("button");
153
- btn.id = "new-session";
154
- btn.textContent = `Start a new session (+${REPEAT_SESSION_CAP} more)`;
155
- btn.addEventListener("click", () => {
156
- const rounds = parseInt(localStorage.getItem(ROUNDS_KEY) || "0", 10);
157
- localStorage.setItem(ROUNDS_KEY, String(rounds + 1));
158
- localStorage.removeItem(TOKEN_STORAGE_KEY);
159
- location.reload();
160
- });
161
- card.appendChild(btn);
162
- }
163
  submit.disabled = true;
164
  updateProgress(data.labeled, data.cap);
165
  return;
166
  }
 
167
  currentItem = data;
168
  shownAt = performance.now();
169
  document.getElementById("stimulus").src = data.image_url;
@@ -172,18 +145,151 @@ async function loadNext(token) {
172
  const form = document.getElementById("options");
173
  form.innerHTML = "";
174
  const options = data.payload.options || [];
175
- options.forEach((opt, i) => {
176
- form.appendChild(renderProfileCard(i, opt));
177
- });
178
  submit.disabled = true;
179
  form.querySelectorAll("input[type=radio]").forEach((el) => {
180
- el.addEventListener("change", () => {
181
- submit.disabled = false;
182
- });
183
  });
184
  updateProgress(data.labeled, data.cap);
185
  }
186
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
187
  function updateProgress(labeled, cap) {
188
  const el = document.getElementById("progress");
189
  if (cap != null) {
@@ -214,11 +320,11 @@ async function submitLabel(token) {
214
  }
215
 
216
  async function recoverFromInvalidToken() {
217
- // Server doesn't know this token (DB was reset, Space rebuild lost state,
218
- // etc.). Wipe client state and start a fresh session.
219
  localStorage.removeItem(TOKEN_STORAGE_KEY);
220
- localStorage.removeItem(ROUNDS_KEY);
221
- return registerFresh(FIRST_SESSION_CAP);
 
222
  }
223
 
224
  async function main() {
 
2
 
3
  const AXES = ["art_style", "color", "art_medium", "lighting"];
4
  const TOKEN_STORAGE_KEY = "aamcq_token";
5
+ const EMAIL_STORAGE_KEY = "aamcq_email";
6
  const THEME_STORAGE_KEY = "aamcq_theme";
 
7
  const PASSWORD_SESSION_KEY = "aamcq_access_password";
8
  const FIRST_SESSION_CAP = 20;
9
+ const EXTRA_ROUND_CAP = 10;
10
 
11
  function setTheme(theme) {
12
  document.documentElement.setAttribute("data-theme", theme);
 
44
  return resp.json();
45
  }
46
 
47
+ async function attemptRegister(params, password) {
48
+ const qs = new URLSearchParams(params);
49
+ if (password) qs.set("password", password);
50
+ return fetch(`/api/register?${qs.toString()}`, { method: "POST" });
 
 
 
 
51
  }
52
 
53
+ async function registerWithParams(params) {
54
+ // Try with cached password (could be empty).
 
 
 
 
 
 
55
  let password = sessionStorage.getItem(PASSWORD_SESSION_KEY) || "";
56
+ let resp = await attemptRegister(params, password);
 
 
57
  while (resp.status === 403) {
58
  sessionStorage.removeItem(PASSWORD_SESSION_KEY);
59
  const entered = window.prompt(
 
61
  ? "Wrong access password. Try again:"
62
  : "Enter the access password to start labeling:"
63
  );
64
+ if (entered == null) throw new Error("Access password required.");
 
 
65
  password = entered;
66
+ resp = await attemptRegister(params, password);
67
+ }
68
+ if (resp.status === 429) {
69
+ throw new HttpError(429, await resp.text());
70
  }
 
71
  if (!resp.ok) {
72
+ const body = await resp.text();
73
+ throw new HttpError(resp.status, body);
 
74
  }
75
  if (password) sessionStorage.setItem(PASSWORD_SESSION_KEY, password);
76
+ const data = await resp.json();
77
+ localStorage.setItem(TOKEN_STORAGE_KEY, data.token);
78
+ return data;
79
  }
80
 
81
  async function ensureToken() {
 
86
  }
87
  const stored = localStorage.getItem(TOKEN_STORAGE_KEY);
88
  if (stored) return stored;
89
+ const { token } = await registerWithParams({ cap: String(FIRST_SESSION_CAP) });
90
+ return token;
91
  }
92
 
93
  function renderProfileCard(idx, profile) {
 
129
  const submit = document.getElementById("submit");
130
  const err = document.getElementById("error");
131
  err.textContent = "";
132
+
133
  if (data.done) {
134
+ await renderDonePage(token, data);
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
135
  submit.disabled = true;
136
  updateProgress(data.labeled, data.cap);
137
  return;
138
  }
139
+
140
  currentItem = data;
141
  shownAt = performance.now();
142
  document.getElementById("stimulus").src = data.image_url;
 
145
  const form = document.getElementById("options");
146
  form.innerHTML = "";
147
  const options = data.payload.options || [];
148
+ options.forEach((opt, i) => form.appendChild(renderProfileCard(i, opt)));
 
 
149
  submit.disabled = true;
150
  form.querySelectorAll("input[type=radio]").forEach((el) => {
151
+ el.addEventListener("change", () => { submit.disabled = false; });
 
 
152
  });
153
  updateProgress(data.labeled, data.cap);
154
  }
155
 
156
+ async function renderDonePage(token, taskData) {
157
+ const card = document.getElementById("card");
158
+ const labeled = taskData.labeled ?? 0;
159
+
160
+ if (taskData.reason !== "cap_reached") {
161
+ // Pool is drained entirely. Thank and stop.
162
+ card.innerHTML = `<p class='done'>All items are fully labeled (you contributed ${labeled}). Thank you!</p>`;
163
+ return;
164
+ }
165
+
166
+ // cap_reached: fetch detailed session status to decide UI state
167
+ let status;
168
+ try {
169
+ status = await fetchJSON(`/api/session_status?token=${encodeURIComponent(token)}`);
170
+ } catch (e) {
171
+ card.innerHTML = `<p class='done'>Session complete (${labeled} labeled). Couldn't load status: ${e.message}</p>`;
172
+ return;
173
+ }
174
+
175
+ card.innerHTML = "";
176
+ const msg = document.createElement("p");
177
+ msg.className = "done";
178
+ card.appendChild(msg);
179
+
180
+ if (!status.acc_pass) {
181
+ // Fail state
182
+ msg.innerHTML =
183
+ `<strong>Low agreement rate detected.</strong><br>` +
184
+ `Your ${labeled}-item session doesn't meet the quality threshold ` +
185
+ `and can't be credited. Please try again more carefully.`;
186
+ msg.classList.add("fail");
187
+ const btn = document.createElement("button");
188
+ btn.id = "retry-session";
189
+ if (status.round_number === 1) {
190
+ btn.textContent = "Try again (careful labeling)";
191
+ btn.addEventListener("click", () => {
192
+ // Round-1 fail: wipe everything — no email on file yet.
193
+ localStorage.removeItem(TOKEN_STORAGE_KEY);
194
+ localStorage.removeItem(EMAIL_STORAGE_KEY);
195
+ location.reload();
196
+ });
197
+ } else {
198
+ btn.textContent = `Redo round ${status.round_number}`;
199
+ btn.addEventListener("click", async () => {
200
+ // Round 2+ fail: clear token but keep email, re-register same round.
201
+ localStorage.removeItem(TOKEN_STORAGE_KEY);
202
+ const email = status.email || localStorage.getItem(EMAIL_STORAGE_KEY);
203
+ if (!email) { location.reload(); return; }
204
+ try {
205
+ await registerWithParams({
206
+ cap: String(EXTRA_ROUND_CAP),
207
+ email,
208
+ round: String(status.round_number),
209
+ });
210
+ location.reload();
211
+ } catch (e) {
212
+ document.getElementById("error").textContent = `Retry failed: ${e.message}`;
213
+ }
214
+ });
215
+ }
216
+ card.appendChild(btn);
217
+ return;
218
+ }
219
+
220
+ // Pass state
221
+ if (status.round_number === 1 && !status.email) {
222
+ // Needs email submission
223
+ msg.innerHTML =
224
+ `<strong>Great job!</strong> Your round-1 session passed the quality check. ` +
225
+ `Submit your email to be entered in the lottery.`;
226
+ const form = document.createElement("form");
227
+ form.id = "email-form";
228
+ form.innerHTML =
229
+ `<input type="email" id="email-input" placeholder="your@email" required autocomplete="email" />` +
230
+ `<button type="submit">Submit email</button>`;
231
+ card.appendChild(form);
232
+ form.addEventListener("submit", async (ev) => {
233
+ ev.preventDefault();
234
+ const email = document.getElementById("email-input").value.trim();
235
+ if (!email) return;
236
+ try {
237
+ const resp = await fetchJSON("/api/submit_email", {
238
+ method: "POST",
239
+ headers: { "content-type": "application/json" },
240
+ body: JSON.stringify({ token, email }),
241
+ });
242
+ localStorage.setItem(EMAIL_STORAGE_KEY, email);
243
+ // Re-render with updated status
244
+ await renderDonePage(token, taskData);
245
+ } catch (e) {
246
+ document.getElementById("error").textContent = `Submit failed: ${e.message}`;
247
+ }
248
+ });
249
+ return;
250
+ }
251
+
252
+ // Already on email chain (round 1 email submitted OR round 2+)
253
+ const multiplier = status.multiplier || 1.0;
254
+ msg.innerHTML =
255
+ `<strong>Thanks, ${status.email}!</strong><br>` +
256
+ `Your lottery multiplier is now <strong>${multiplier.toFixed(2)}×</strong> ` +
257
+ `(${labeled}-item round ${status.round_number} passed).`;
258
+
259
+ if (status.can_extend) {
260
+ const extraLeft = 3 - status.round_number;
261
+ const nextMultiplier = Math.min(multiplier + 0.25, 1.5);
262
+ const info = document.createElement("p");
263
+ info.style.color = "var(--muted)";
264
+ info.textContent =
265
+ `Label ${EXTRA_ROUND_CAP} more to reach ${nextMultiplier.toFixed(2)}× ` +
266
+ `(${extraLeft} bonus round${extraLeft > 1 ? "s" : ""} remaining).`;
267
+ card.appendChild(info);
268
+
269
+ const btn = document.createElement("button");
270
+ btn.id = "extend-session";
271
+ btn.textContent = `Label ${EXTRA_ROUND_CAP} more (+0.25×)`;
272
+ btn.addEventListener("click", async () => {
273
+ try {
274
+ await registerWithParams({
275
+ cap: String(EXTRA_ROUND_CAP),
276
+ email: status.email,
277
+ round: String(status.round_number + 1),
278
+ });
279
+ location.reload();
280
+ } catch (e) {
281
+ document.getElementById("error").textContent = `Couldn't start round: ${e.message}`;
282
+ }
283
+ });
284
+ card.appendChild(btn);
285
+ } else {
286
+ const info = document.createElement("p");
287
+ info.style.color = "var(--muted)";
288
+ info.textContent = "Maximum lottery multiplier reached. Thanks for labeling!";
289
+ card.appendChild(info);
290
+ }
291
+ }
292
+
293
  function updateProgress(labeled, cap) {
294
  const el = document.getElementById("progress");
295
  if (cap != null) {
 
320
  }
321
 
322
  async function recoverFromInvalidToken() {
323
+ // Server doesn't know this token. Wipe client state and start fresh.
 
324
  localStorage.removeItem(TOKEN_STORAGE_KEY);
325
+ localStorage.removeItem(EMAIL_STORAGE_KEY);
326
+ const { token } = await registerWithParams({ cap: String(FIRST_SESSION_CAP) });
327
+ return token;
328
  }
329
 
330
  async function main() {
labeling/static/index.html CHANGED
@@ -4,7 +4,7 @@
4
  <meta charset="utf-8" />
5
  <meta name="viewport" content="width=device-width, initial-scale=1" />
6
  <title>AestheticMCQ — Annotation</title>
7
- <link rel="stylesheet" href="/style.css?v=8" />
8
  <script>
9
  // Apply saved theme before CSS paints to avoid a flash.
10
  (function () {
@@ -41,6 +41,6 @@
41
  <span id="error"></span>
42
  </footer>
43
  </main>
44
- <script src="/app.js?v=11"></script>
45
  </body>
46
  </html>
 
4
  <meta charset="utf-8" />
5
  <meta name="viewport" content="width=device-width, initial-scale=1" />
6
  <title>AestheticMCQ — Annotation</title>
7
+ <link rel="stylesheet" href="/style.css?v=9" />
8
  <script>
9
  // Apply saved theme before CSS paints to avoid a flash.
10
  (function () {
 
41
  <span id="error"></span>
42
  </footer>
43
  </main>
44
+ <script src="/app.js?v=12"></script>
45
  </body>
46
  </html>
labeling/static/style.css CHANGED
@@ -179,9 +179,11 @@ button#submit:disabled {
179
  }
180
 
181
  #error { color: #e66; font-size: 0.9rem; }
182
- .done { text-align: center; font-size: 1.2rem; color: var(--muted); margin: 24px 0 12px; }
 
 
183
 
184
- button#new-session {
185
  display: block;
186
  margin: 12px auto 0;
187
  background: transparent;
@@ -192,4 +194,28 @@ button#new-session {
192
  font-size: 0.95rem;
193
  cursor: pointer;
194
  }
195
- button#new-session:hover { background: var(--card); }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
179
  }
180
 
181
  #error { color: #e66; font-size: 0.9rem; }
182
+ .done { text-align: center; font-size: 1.1rem; color: var(--fg); margin: 24px 0 12px; line-height: 1.5; }
183
+ .done.fail { color: #d33; }
184
+ :root[data-theme="light"] .done.fail { color: #c1272d; }
185
 
186
+ #card button {
187
  display: block;
188
  margin: 12px auto 0;
189
  background: transparent;
 
194
  font-size: 0.95rem;
195
  cursor: pointer;
196
  }
197
+ #card button:hover { background: var(--card); }
198
+
199
+ #email-form {
200
+ display: flex;
201
+ gap: 8px;
202
+ justify-content: center;
203
+ margin: 12px auto;
204
+ max-width: 420px;
205
+ }
206
+ #email-form input[type=email] {
207
+ flex: 1;
208
+ padding: 8px 12px;
209
+ font-size: 1rem;
210
+ border: 1px solid var(--border);
211
+ border-radius: 6px;
212
+ background: var(--card);
213
+ color: var(--fg);
214
+ }
215
+ #email-form button {
216
+ background: var(--accent);
217
+ color: #fff;
218
+ border: 0;
219
+ padding: 8px 16px;
220
+ margin: 0;
221
+ }
spaces/space_entry.py CHANGED
@@ -15,6 +15,8 @@ Env vars:
15
  AAMCQ_LABELS_PER_ITEM default: 3
16
  AAMCQ_BACKUP_INTERVAL default: 60 (seconds)
17
  AAMCQ_ACCESS_PASSWORD optional; if set, /api/register requires it
 
 
18
  """
19
 
20
  from __future__ import annotations
@@ -43,6 +45,7 @@ BACKUP_INTERVAL = int(os.environ.get("AAMCQ_BACKUP_INTERVAL", "60"))
43
  PER_ANNOTATOR_CAP = int(os.environ.get("AAMCQ_PER_ANNOTATOR_CAP", "20"))
44
  LABELS_PER_ITEM = int(os.environ.get("AAMCQ_LABELS_PER_ITEM", "3"))
45
  ACCESS_PASSWORD = os.environ.get("AAMCQ_ACCESS_PASSWORD") or None
 
46
 
47
 
48
  def _require_token() -> str:
@@ -162,11 +165,13 @@ def main() -> int:
162
  max_labels_per_item=LABELS_PER_ITEM,
163
  max_labels_per_annotator=PER_ANNOTATOR_CAP,
164
  access_password=ACCESS_PASSWORD,
 
165
  )
166
  if ACCESS_PASSWORD:
167
  print("access password gate: ON")
168
  else:
169
  print("access password gate: OFF (set AAMCQ_ACCESS_PASSWORD to enable)")
 
170
 
171
  @app.on_event("startup")
172
  async def _start_backup() -> None:
 
15
  AAMCQ_LABELS_PER_ITEM default: 3
16
  AAMCQ_BACKUP_INTERVAL default: 60 (seconds)
17
  AAMCQ_ACCESS_PASSWORD optional; if set, /api/register requires it
18
+ AAMCQ_ACC_THRESHOLD default: 0.40; min session acc vs correct_index
19
+ required to unlock email submission
20
  """
21
 
22
  from __future__ import annotations
 
45
  PER_ANNOTATOR_CAP = int(os.environ.get("AAMCQ_PER_ANNOTATOR_CAP", "20"))
46
  LABELS_PER_ITEM = int(os.environ.get("AAMCQ_LABELS_PER_ITEM", "3"))
47
  ACCESS_PASSWORD = os.environ.get("AAMCQ_ACCESS_PASSWORD") or None
48
+ ACC_THRESHOLD = float(os.environ.get("AAMCQ_ACC_THRESHOLD", "0.40"))
49
 
50
 
51
  def _require_token() -> str:
 
165
  max_labels_per_item=LABELS_PER_ITEM,
166
  max_labels_per_annotator=PER_ANNOTATOR_CAP,
167
  access_password=ACCESS_PASSWORD,
168
+ acc_threshold=ACC_THRESHOLD,
169
  )
170
  if ACCESS_PASSWORD:
171
  print("access password gate: ON")
172
  else:
173
  print("access password gate: OFF (set AAMCQ_ACCESS_PASSWORD to enable)")
174
+ print(f"acc_threshold: {ACC_THRESHOLD}")
175
 
176
  @app.on_event("startup")
177
  async def _start_backup() -> None:
src/aamcq/annotation/api.py CHANGED
@@ -4,10 +4,13 @@ from __future__ import annotations
4
 
5
  import hmac
6
  import os
 
7
  import sqlite3
 
 
8
  from pathlib import Path
9
 
10
- from fastapi import Depends, FastAPI, HTTPException, Query
11
  from fastapi.responses import FileResponse, JSONResponse
12
  from fastapi.staticfiles import StaticFiles
13
  from pydantic import BaseModel, Field, conint
@@ -15,6 +18,23 @@ from pydantic import BaseModel, Field, conint
15
  from aamcq.annotation import db as dbmod
16
  from aamcq.annotation.assignment import bootstrap_annotators
17
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
18
  REPO_ROOT = Path(__file__).resolve().parents[3]
19
  DEFAULT_DB = REPO_ROOT / "data" / "annotations.sqlite"
20
  DEFAULT_IMAGE_DIR = REPO_ROOT / "data" / "images"
@@ -34,6 +54,11 @@ def _sanitize_item(payload: dict) -> dict:
34
  return {k: v for k, v in payload.items() if k != "correct_index"}
35
 
36
 
 
 
 
 
 
37
  def create_app(
38
  db_path: str | os.PathLike[str] | None = None,
39
  image_dir: str | os.PathLike[str] | None = None,
@@ -43,6 +68,8 @@ def create_app(
43
  max_labels_per_item: int = 3,
44
  max_labels_per_annotator: int | None = None,
45
  access_password: str | None = None,
 
 
46
  ) -> FastAPI:
47
  """Labeling server.
48
 
@@ -64,6 +91,14 @@ def create_app(
64
  `access_password`: if set, `/api/register` requires a matching
65
  `?password=` query param (constant-time compared). Cheap anti-spam
66
  gate for public Spaces — existing tokens keep working regardless.
 
 
 
 
 
 
 
 
67
  """
68
  db_path = Path(db_path or DEFAULT_DB)
69
  image_dir = Path(image_dir or DEFAULT_IMAGE_DIR)
@@ -79,6 +114,9 @@ def create_app(
79
  app.state.max_labels_per_item = max_labels_per_item
80
  app.state.max_labels_per_annotator = max_labels_per_annotator
81
  app.state.access_password = access_password
 
 
 
82
 
83
  @app.middleware("http")
84
  async def _deny_framing(request, call_next):
@@ -107,6 +145,30 @@ def create_app(
107
  per = dbmod.get_annotator_cap(conn, annotator_id)
108
  return per if per is not None else app.state.max_labels_per_annotator
109
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
110
  def _next_task_payload(annotator_id: str, conn: sqlite3.Connection, n_done: int) -> dict:
111
  cap = _effective_cap(conn, annotator_id)
112
  if cap is not None and n_done >= cap:
@@ -128,8 +190,11 @@ def create_app(
128
 
129
  @app.post("/api/register")
130
  def api_register(
 
131
  cap: int | None = Query(default=None, ge=1, le=10000),
132
  password: str | None = Query(default=None, max_length=256),
 
 
133
  conn: sqlite3.Connection = Depends(get_conn),
134
  ):
135
  """Mint a fresh anonymous annotator. Only enabled when anonymous_register.
@@ -138,6 +203,13 @@ def create_app(
138
  server default (used by the frontend to give the first session a
139
  larger quota than subsequent ones).
140
 
 
 
 
 
 
 
 
141
  If `access_password` was set at startup, `?password=` must match
142
  (constant-time compared) or we return 403.
143
  """
@@ -147,6 +219,28 @@ def create_app(
147
  if expected:
148
  if not password or not hmac.compare_digest(password, expected):
149
  raise HTTPException(status_code=403, detail="wrong access password")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
150
  existing = {row["annotator_id"] for row in conn.execute(
151
  "SELECT annotator_id FROM annotators"
152
  )}
@@ -158,8 +252,20 @@ def create_app(
158
  n += 1
159
  if n > 8:
160
  raise HTTPException(status_code=500, detail="could not mint unique id")
161
- tokens = bootstrap_annotators(conn, [candidate], cap=cap)
162
- return {"annotator_id": candidate, "token": tokens[candidate], "cap": cap}
 
 
 
 
 
 
 
 
 
 
 
 
163
 
164
  @app.get("/api/task")
165
  def api_task(
@@ -197,6 +303,124 @@ def create_app(
197
  )
198
  return {"ok": True}
199
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
200
  @app.get("/api/progress")
201
  def api_progress(
202
  token: str = Query(min_length=8, max_length=128),
 
4
 
5
  import hmac
6
  import os
7
+ import re
8
  import sqlite3
9
+ import time
10
+ from collections import deque
11
  from pathlib import Path
12
 
13
+ from fastapi import Depends, FastAPI, HTTPException, Query, Request
14
  from fastapi.responses import FileResponse, JSONResponse
15
  from fastapi.staticfiles import StaticFiles
16
  from pydantic import BaseModel, Field, conint
 
18
  from aamcq.annotation import db as dbmod
19
  from aamcq.annotation.assignment import bootstrap_annotators
20
 
21
+ DEFAULT_ACC_THRESHOLD = 0.40
22
+ MAX_LOTTERY_ROUND = 3 # round 1 base, +2 bonus rounds cap multiplier at 1.5x
23
+ _EMAIL_RE = re.compile(r"^[^@\s]+@[^@\s]+\.[^@\s]+$")
24
+
25
+
26
+ def _is_valid_email(email: str) -> bool:
27
+ return bool(email) and len(email) <= 254 and bool(_EMAIL_RE.match(email))
28
+
29
+
30
+ def _multiplier(passed_rounds: set[int]) -> float:
31
+ """Lottery multiplier = 1.0 + 0.25 * (extra passed rounds beyond round 1),
32
+ capped at 1.5 (i.e. rounds 1+2+3 all passed)."""
33
+ if 1 not in passed_rounds:
34
+ return 0.0
35
+ extras = len([r for r in passed_rounds if r > 1])
36
+ return min(1.0 + 0.25 * extras, 1.5)
37
+
38
  REPO_ROOT = Path(__file__).resolve().parents[3]
39
  DEFAULT_DB = REPO_ROOT / "data" / "annotations.sqlite"
40
  DEFAULT_IMAGE_DIR = REPO_ROOT / "data" / "images"
 
54
  return {k: v for k, v in payload.items() if k != "correct_index"}
55
 
56
 
57
+ class SubmitEmailPayload(BaseModel):
58
+ token: str = Field(min_length=8, max_length=128)
59
+ email: str = Field(min_length=3, max_length=254)
60
+
61
+
62
  def create_app(
63
  db_path: str | os.PathLike[str] | None = None,
64
  image_dir: str | os.PathLike[str] | None = None,
 
68
  max_labels_per_item: int = 3,
69
  max_labels_per_annotator: int | None = None,
70
  access_password: str | None = None,
71
+ acc_threshold: float = DEFAULT_ACC_THRESHOLD,
72
+ register_rate_limit: tuple[int, int] | None = (5, 3600),
73
  ) -> FastAPI:
74
  """Labeling server.
75
 
 
91
  `access_password`: if set, `/api/register` requires a matching
92
  `?password=` query param (constant-time compared). Cheap anti-spam
93
  gate for public Spaces — existing tokens keep working regardless.
94
+
95
+ `acc_threshold`: session accuracy (vs `correct_index`) required for
96
+ email submission + lottery bonus credit. Defaults to 0.40. At
97
+ p_random=0.25 and n=20, false-positive rate is ~10%, so pair with
98
+ IP rate-limiting + UNIQUE-email-per-round checks.
99
+
100
+ `register_rate_limit`: (max_requests, window_seconds) tuple. Defaults
101
+ to (5, 3600) — per-IP rolling window. Pass None to disable.
102
  """
103
  db_path = Path(db_path or DEFAULT_DB)
104
  image_dir = Path(image_dir or DEFAULT_IMAGE_DIR)
 
114
  app.state.max_labels_per_item = max_labels_per_item
115
  app.state.max_labels_per_annotator = max_labels_per_annotator
116
  app.state.access_password = access_password
117
+ app.state.acc_threshold = float(acc_threshold)
118
+ app.state.register_rate_limit = register_rate_limit
119
+ app.state.register_hits: dict[str, deque[float]] = {}
120
 
121
  @app.middleware("http")
122
  async def _deny_framing(request, call_next):
 
145
  per = dbmod.get_annotator_cap(conn, annotator_id)
146
  return per if per is not None else app.state.max_labels_per_annotator
147
 
148
+ def _client_ip(request: Request) -> str:
149
+ # HF Spaces sit behind a proxy — take the first entry of
150
+ # X-Forwarded-For if present, else the direct peer address.
151
+ xff = request.headers.get("x-forwarded-for")
152
+ if xff:
153
+ return xff.split(",")[0].strip()
154
+ return request.client.host if request.client else "unknown"
155
+
156
+ def _check_rate_limit(ip: str) -> None:
157
+ limit = app.state.register_rate_limit
158
+ if limit is None:
159
+ return
160
+ max_req, window = limit
161
+ now = time.time()
162
+ q = app.state.register_hits.setdefault(ip, deque())
163
+ while q and q[0] < now - window:
164
+ q.popleft()
165
+ if len(q) >= max_req:
166
+ raise HTTPException(
167
+ status_code=429,
168
+ detail=f"too many register requests (limit {max_req}/{window}s)",
169
+ )
170
+ q.append(now)
171
+
172
  def _next_task_payload(annotator_id: str, conn: sqlite3.Connection, n_done: int) -> dict:
173
  cap = _effective_cap(conn, annotator_id)
174
  if cap is not None and n_done >= cap:
 
190
 
191
  @app.post("/api/register")
192
  def api_register(
193
+ request: Request,
194
  cap: int | None = Query(default=None, ge=1, le=10000),
195
  password: str | None = Query(default=None, max_length=256),
196
+ email: str | None = Query(default=None, max_length=254),
197
+ round: int = Query(default=1, ge=1, le=MAX_LOTTERY_ROUND),
198
  conn: sqlite3.Connection = Depends(get_conn),
199
  ):
200
  """Mint a fresh anonymous annotator. Only enabled when anonymous_register.
 
203
  server default (used by the frontend to give the first session a
204
  larger quota than subsequent ones).
205
 
206
+ `?email=` + `?round=N` attach this new annotator to an existing
207
+ email-based chain for lottery-multiplier rounds (round 2/3). For
208
+ round > 1 we verify the prior round was passed by an earlier
209
+ annotator with the same email; otherwise reject. round==1 can
210
+ include email but is rarer (email is normally attached later
211
+ via /api/submit_email once session acc is known).
212
+
213
  If `access_password` was set at startup, `?password=` must match
214
  (constant-time compared) or we return 403.
215
  """
 
219
  if expected:
220
  if not password or not hmac.compare_digest(password, expected):
221
  raise HTTPException(status_code=403, detail="wrong access password")
222
+
223
+ _check_rate_limit(_client_ip(request))
224
+
225
+ if email is not None:
226
+ if not _is_valid_email(email):
227
+ raise HTTPException(status_code=400, detail="bad email format")
228
+ # round > 1 must chain off a PASSED prior round for this email.
229
+ if round > 1:
230
+ passed = dbmod.email_passed_rounds(
231
+ conn, email, app.state.acc_threshold
232
+ )
233
+ if (round - 1) not in passed:
234
+ raise HTTPException(
235
+ status_code=400,
236
+ detail=f"round {round} requires passed round {round-1} for this email",
237
+ )
238
+ if round in passed:
239
+ raise HTTPException(
240
+ status_code=400,
241
+ detail=f"round {round} already passed for this email",
242
+ )
243
+
244
  existing = {row["annotator_id"] for row in conn.execute(
245
  "SELECT annotator_id FROM annotators"
246
  )}
 
252
  n += 1
253
  if n > 8:
254
  raise HTTPException(status_code=500, detail="could not mint unique id")
255
+
256
+ # bootstrap_annotators wasn't set up for email/round; inline the
257
+ # same effect so we can pass those through.
258
+ token = dbmod.mint_token()
259
+ dbmod.insert_annotator(
260
+ conn, candidate, token, cap=cap, email=email, round_number=round
261
+ )
262
+ return {
263
+ "annotator_id": candidate,
264
+ "token": token,
265
+ "cap": cap,
266
+ "email": email,
267
+ "round_number": round,
268
+ }
269
 
270
  @app.get("/api/task")
271
  def api_task(
 
303
  )
304
  return {"ok": True}
305
 
306
+ @app.get("/api/session_status")
307
+ def api_session_status(
308
+ token: str = Query(min_length=8, max_length=128),
309
+ conn: sqlite3.Connection = Depends(get_conn),
310
+ ):
311
+ """Tell the frontend what the done-page should render.
312
+
313
+ Returns pass/fail flags — never the raw accuracy number — so
314
+ annotators can't binary-search the threshold by reloading.
315
+ """
316
+ annotator_id = resolve_annotator(token, conn)
317
+ row = dbmod.get_annotator_row(conn, annotator_id)
318
+ if row is None:
319
+ raise HTTPException(status_code=401, detail="invalid token")
320
+ cap = _effective_cap(conn, annotator_id)
321
+ n_correct, n = dbmod.session_accuracy(conn, annotator_id)
322
+ cap_reached = cap is not None and n >= cap
323
+ acc_pass = cap_reached and n > 0 and (n_correct / n) >= app.state.acc_threshold
324
+
325
+ # Lottery state: look at ALL annotators for this email (if any)
326
+ # to compute the current multiplier + whether more rounds are
327
+ # available.
328
+ email = row["email"]
329
+ multiplier = 0.0
330
+ passed_rounds: set[int] = set()
331
+ if email:
332
+ passed_rounds = dbmod.email_passed_rounds(
333
+ conn, email, app.state.acc_threshold
334
+ )
335
+ # Include THIS session's pass optimistically so the UI shows
336
+ # the correct multiplier immediately after a passing round
337
+ # (row's cap may differ from the session_accuracy lookup if
338
+ # server cap default applied; use effective cap instead).
339
+ if acc_pass:
340
+ passed_rounds = passed_rounds | {int(row["round_number"])}
341
+ multiplier = _multiplier(passed_rounds)
342
+
343
+ can_extend = (
344
+ acc_pass
345
+ and email is not None
346
+ and int(row["round_number"]) < MAX_LOTTERY_ROUND
347
+ )
348
+
349
+ return {
350
+ "cap_reached": bool(cap_reached),
351
+ "n_labeled": int(n),
352
+ "cap": cap,
353
+ "acc_pass": bool(acc_pass),
354
+ "round_number": int(row["round_number"]),
355
+ "email": email,
356
+ "multiplier": multiplier,
357
+ "can_extend": bool(can_extend),
358
+ "next_round_cap": 10, # hardcoded for now; frontend uses this
359
+ }
360
+
361
+ @app.post("/api/submit_email")
362
+ def api_submit_email(
363
+ payload: SubmitEmailPayload,
364
+ conn: sqlite3.Connection = Depends(get_conn),
365
+ ):
366
+ """Attach the email to a round-1 annotator whose session passed.
367
+
368
+ Guards:
369
+ - Annotator exists (valid token).
370
+ - round_number == 1 (email is introduced on round 1 only).
371
+ - Current email is NULL (can't overwrite once set).
372
+ - Session complete: n_labeled >= cap.
373
+ - acc >= threshold.
374
+ - Email hasn't already passed round 1 via another annotator
375
+ (prevents multiple lottery entries per email).
376
+ """
377
+ annotator_id = resolve_annotator(payload.token, conn)
378
+ row = dbmod.get_annotator_row(conn, annotator_id)
379
+ if row is None:
380
+ raise HTTPException(status_code=401, detail="invalid token")
381
+ if int(row["round_number"]) != 1:
382
+ raise HTTPException(
383
+ status_code=400, detail="email only submitted on round 1"
384
+ )
385
+ if row["email"]:
386
+ raise HTTPException(
387
+ status_code=409, detail="email already set for this annotator"
388
+ )
389
+ if not _is_valid_email(payload.email):
390
+ raise HTTPException(status_code=400, detail="bad email format")
391
+
392
+ cap = _effective_cap(conn, annotator_id)
393
+ n_correct, n = dbmod.session_accuracy(conn, annotator_id)
394
+ if cap is None or n < cap:
395
+ raise HTTPException(
396
+ status_code=400,
397
+ detail=f"session not complete ({n}/{cap})",
398
+ )
399
+ if n == 0 or (n_correct / n) < app.state.acc_threshold:
400
+ raise HTTPException(
401
+ status_code=403,
402
+ detail=f"accuracy below threshold ({app.state.acc_threshold})",
403
+ )
404
+
405
+ # Prevent double-credit: this email must not have a passed
406
+ # round 1 on another annotator.
407
+ if 1 in dbmod.email_passed_rounds(conn, payload.email, app.state.acc_threshold):
408
+ raise HTTPException(
409
+ status_code=409,
410
+ detail="email already credited for round 1",
411
+ )
412
+
413
+ dbmod.set_annotator_email(conn, annotator_id, payload.email)
414
+ passed_rounds = dbmod.email_passed_rounds(
415
+ conn, payload.email, app.state.acc_threshold
416
+ )
417
+ return {
418
+ "ok": True,
419
+ "email": payload.email,
420
+ "round_number": 1,
421
+ "multiplier": _multiplier(passed_rounds),
422
+ }
423
+
424
  @app.get("/api/progress")
425
  def api_progress(
426
  token: str = Query(min_length=8, max_length=128),
src/aamcq/annotation/db.py CHANGED
@@ -26,8 +26,11 @@ CREATE TABLE IF NOT EXISTS annotators (
26
  annotator_id TEXT PRIMARY KEY,
27
  token TEXT NOT NULL UNIQUE,
28
  created_at REAL NOT NULL,
29
- cap INTEGER
 
 
30
  );
 
31
  CREATE TABLE IF NOT EXISTS assignments (
32
  item_id TEXT NOT NULL,
33
  annotator_id TEXT NOT NULL,
@@ -87,11 +90,19 @@ def connect(db_path: str | Path) -> sqlite3.Connection:
87
 
88
  def init_schema(conn: sqlite3.Connection) -> None:
89
  conn.executescript(SCHEMA)
90
- # Migrate older DBs that predate the `cap` column on annotators.
91
- try:
92
- conn.execute("ALTER TABLE annotators ADD COLUMN cap INTEGER")
93
- except sqlite3.OperationalError:
94
- pass # column already exists
 
 
 
 
 
 
 
 
95
 
96
 
97
  def mint_token() -> str:
@@ -110,11 +121,14 @@ def insert_annotator(
110
  annotator_id: str,
111
  token: str,
112
  cap: int | None = None,
 
 
113
  ) -> None:
114
  conn.execute(
115
- "INSERT OR REPLACE INTO annotators(annotator_id, token, created_at, cap) "
116
- "VALUES (?, ?, ?, ?)",
117
- (annotator_id, token, time.time(), cap),
 
118
  )
119
 
120
 
@@ -222,6 +236,77 @@ def next_pooled_item(
222
  )
223
 
224
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
225
  def count_annotator_labels(conn: sqlite3.Connection, annotator_id: str) -> int:
226
  return int(conn.execute(
227
  "SELECT COUNT(*) AS n FROM labels WHERE annotator_id = ?",
 
26
  annotator_id TEXT PRIMARY KEY,
27
  token TEXT NOT NULL UNIQUE,
28
  created_at REAL NOT NULL,
29
+ cap INTEGER,
30
+ email TEXT,
31
+ round_number INTEGER NOT NULL DEFAULT 1
32
  );
33
+ CREATE INDEX IF NOT EXISTS idx_annotators_email ON annotators(email);
34
  CREATE TABLE IF NOT EXISTS assignments (
35
  item_id TEXT NOT NULL,
36
  annotator_id TEXT NOT NULL,
 
90
 
91
  def init_schema(conn: sqlite3.Connection) -> None:
92
  conn.executescript(SCHEMA)
93
+ # Forward-compatible migrations for older DBs. Each ALTER is wrapped
94
+ # in a try/except since sqlite3 throws OperationalError on existing
95
+ # columns we can't check sqlite_master cleanly across SQLite
96
+ # versions, so just-try-it is the pragmatic pattern.
97
+ for ddl in (
98
+ "ALTER TABLE annotators ADD COLUMN cap INTEGER",
99
+ "ALTER TABLE annotators ADD COLUMN email TEXT",
100
+ "ALTER TABLE annotators ADD COLUMN round_number INTEGER NOT NULL DEFAULT 1",
101
+ ):
102
+ try:
103
+ conn.execute(ddl)
104
+ except sqlite3.OperationalError:
105
+ pass
106
 
107
 
108
  def mint_token() -> str:
 
121
  annotator_id: str,
122
  token: str,
123
  cap: int | None = None,
124
+ email: str | None = None,
125
+ round_number: int = 1,
126
  ) -> None:
127
  conn.execute(
128
+ "INSERT OR REPLACE INTO annotators"
129
+ "(annotator_id, token, created_at, cap, email, round_number) "
130
+ "VALUES (?, ?, ?, ?, ?, ?)",
131
+ (annotator_id, token, time.time(), cap, email, round_number),
132
  )
133
 
134
 
 
236
  )
237
 
238
 
239
+ def get_annotator_row(conn: sqlite3.Connection, annotator_id: str) -> sqlite3.Row | None:
240
+ return conn.execute(
241
+ "SELECT annotator_id, token, cap, email, round_number, created_at "
242
+ "FROM annotators WHERE annotator_id = ?",
243
+ (annotator_id,),
244
+ ).fetchone()
245
+
246
+
247
+ def session_accuracy(
248
+ conn: sqlite3.Connection, annotator_id: str
249
+ ) -> tuple[int, int]:
250
+ """Return (n_correct, n_total) for this annotator's labels.
251
+
252
+ Compares label.chosen_index to items.payload_json->'$.correct_index'.
253
+ Returns (0, 0) if the annotator has no labels yet.
254
+ """
255
+ row = conn.execute(
256
+ """
257
+ SELECT
258
+ COUNT(*) AS n,
259
+ SUM(CASE WHEN l.chosen_index =
260
+ CAST(json_extract(i.payload_json, '$.correct_index') AS INTEGER)
261
+ THEN 1 ELSE 0 END) AS n_correct
262
+ FROM labels l
263
+ JOIN items i USING(item_id)
264
+ WHERE l.annotator_id = ?
265
+ """,
266
+ (annotator_id,),
267
+ ).fetchone()
268
+ n = int(row["n"] or 0)
269
+ n_correct = int(row["n_correct"] or 0)
270
+ return n_correct, n
271
+
272
+
273
+ def set_annotator_email(
274
+ conn: sqlite3.Connection, annotator_id: str, email: str
275
+ ) -> None:
276
+ conn.execute(
277
+ "UPDATE annotators SET email = ? WHERE annotator_id = ?",
278
+ (email, annotator_id),
279
+ )
280
+
281
+
282
+ def email_passed_rounds(
283
+ conn: sqlite3.Connection, email: str, acc_threshold: float, target_cap: int | None = None
284
+ ) -> set[int]:
285
+ """Return the set of round_numbers for which this email has at least
286
+ one annotator that (a) hit cap and (b) has acc >= threshold.
287
+
288
+ `target_cap` is optional; None = use each annotator's own cap to
289
+ decide cap_reached. Passing cap explicitly is useful when the
290
+ caller wants to ignore annotators whose session didn't finish.
291
+ """
292
+ rows = conn.execute(
293
+ "SELECT annotator_id, cap, round_number FROM annotators "
294
+ "WHERE email = ?",
295
+ (email,),
296
+ ).fetchall()
297
+ passed: set[int] = set()
298
+ for r in rows:
299
+ n_correct, n = session_accuracy(conn, r["annotator_id"])
300
+ cap = r["cap"] if target_cap is None else target_cap
301
+ if cap is None or n < cap:
302
+ continue
303
+ if n == 0:
304
+ continue
305
+ if (n_correct / n) >= acc_threshold:
306
+ passed.add(int(r["round_number"]))
307
+ return passed
308
+
309
+
310
  def count_annotator_labels(conn: sqlite3.Connection, annotator_id: str) -> int:
311
  return int(conn.execute(
312
  "SELECT COUNT(*) AS n FROM labels WHERE annotator_id = ?",