Spaces:

lanczos
/

aesthetic-annotators

Sleeping

App Files Files Community

lanczos commited on Apr 19

Commit

871ff87

verified ·

1 Parent(s): b991a77

deploy: labeling server

Browse files

Files changed (35) hide show

Dockerfile +29 -0
README.md +32 -3
configs/base_prompts.yaml +189 -0
configs/distractor_policy.yaml +22 -0
configs/generation.yaml +4 -0
configs/profile_vocab.json +34 -0
labeling/instructions.md +25 -0
labeling/static/app.js +162 -0
labeling/static/index.html +36 -0
labeling/static/style.css +146 -0
pyproject.toml +38 -0
spaces/DEPLOY.md +77 -0
spaces/Dockerfile +29 -0
spaces/README.md +39 -0
spaces/push_dataset.py +75 -0
spaces/push_space.py +74 -0
spaces/push_to_space.sh +21 -0
spaces/requirements.txt +6 -0
spaces/space_entry.py +155 -0
src/aamcq/__init__.py +3 -0
src/aamcq/annotation/__init__.py +0 -0
src/aamcq/annotation/api.py +205 -0
src/aamcq/annotation/assignment.py +72 -0
src/aamcq/annotation/db.py +256 -0
src/aamcq/distractors.py +187 -0
src/aamcq/generation/__init__.py +10 -0
src/aamcq/generation/base.py +38 -0
src/aamcq/generation/flux2_klein.py +74 -0
src/aamcq/generation/registry.py +18 -0
src/aamcq/instance_plan.py +170 -0
src/aamcq/profile.py +90 -0
src/aamcq/prompt_render.py +113 -0
src/aamcq/utils/__init__.py +0 -0
src/aamcq/utils/io.py +37 -0
src/aamcq/utils/seeding.py +16 -0

Dockerfile ADDED Viewed

	@@ -0,0 +1,29 @@

+FROM python:3.11-slim
+ENV PYTHONUNBUFFERED=1 \
+    PYTHONDONTWRITEBYTECODE=1 \
+    PIP_NO_CACHE_DIR=1 \
+    PYTHONPATH=/app/src
+WORKDIR /app
+RUN apt-get update && \
+    apt-get install -y --no-install-recommends git && \
+    rm -rf /var/lib/apt/lists/*
+COPY spaces/requirements.txt /app/spaces/requirements.txt
+RUN pip install -r /app/spaces/requirements.txt
+COPY pyproject.toml /app/
+COPY src /app/src
+COPY labeling /app/labeling
+COPY configs /app/configs
+COPY spaces /app/spaces
+# HF Spaces mounts a writable /data directory when Persistent Storage is
+# enabled; fall back to an in-container path when running locally.
+ENV AAMCQ_DATA_DIR=/data
+RUN mkdir -p /data && chmod 777 /data
+EXPOSE 7860
+CMD ["python", "/app/spaces/space_entry.py"]

README.md CHANGED Viewed

@@ -1,10 +1,39 @@
 ---
 title: Aesthetic Annotators
-emoji: 🔥
-colorFrom: blue
 colorTo: pink
 sdk: docker
 pinned: false
 ---
-Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference

 ---
 title: Aesthetic Annotators
+emoji: 🎨
+colorFrom: purple
 colorTo: pink
 sdk: docker
+app_port: 7860
 pinned: false
 ---
+# Aesthetic Annotators
+Public-URL labeling server for the AestheticMCQ dataset. Any visitor is
+auto-issued an anonymous annotator id on first hit; each session labels up
+to `AAMCQ_PER_ANNOTATOR_CAP` items (default 20) pulled breadth-first from
+the pool so every item receives one label before any receives a second.
+## Configuration
+Space secrets:
+| name | required | default | notes |
+|---|---|---|---|
+| `HF_TOKEN` | yes | — | write scope on the companion dataset repo |
+| `AAMCQ_DATASET_REPO` | no | `lanczos/aesthetic-annotators` | source of images + mcq + label backups |
+| `AAMCQ_PER_ANNOTATOR_CAP` | no | `20` | items per session before "all done" |
+| `AAMCQ_LABELS_PER_ITEM` | no | `3` | target labels per item |
+| `AAMCQ_BACKUP_INTERVAL` | no | `60` | SQLite → dataset repo push interval (seconds) |
+## Data flow
+1. On boot, the Space pulls `images/*.png`, `mcq_unlabeled.jsonl`, and any
+   prior `labels/annotations.sqlite` from the dataset repo.
+2. Annotators land on the root URL → JS calls `POST /api/register` → server
+   mints a fresh `anon_*` id + token, cached in localStorage.
+3. `/api/task` hands out the least-labeled item the annotator hasn't seen.
+4. Every `AAMCQ_BACKUP_INTERVAL` seconds the server pushes the SQLite back
+   to `labels/annotations.sqlite` in the dataset repo, so Space
+   restarts/sleeps lose at most one backup interval's worth of labels.

configs/base_prompts.yaml ADDED Viewed

	@@ -0,0 +1,189 @@

+# Content-neutral base prompts rotated across MCQ items.
+#
+# Filter rules applied when curating this pool:
+#   - no embedded style words (epic / mystical / cinematic / gloomy / surreal / cozy / vibrant)
+#   - single subject, simple composition
+#   - realistic / non-fantastical content
+#   - no text-in-image requirement
+#   - content-orthogonal across categories
+#
+# Each prompt has a `quality`: `clean` means style-uncontaminated; `mild` means a
+# weak style word slipped through. build_plan() loads only `clean` entries.
+meta:
+  version: "1.0"
+recommended:
+  portrait: "a 20 year old woman studying"
+  landscape: "a tree in a field in front of a mountain"
+  animal: "a ginger cat looking out the window"
+  still_life: "sunflowers in a glass mason jar"
+categories:
+  portrait:
+    description: "Human subjects — skin tone, lighting, mood style signals strongest"
+    prompts:
+      - { text: "a 20 year old woman studying", quality: clean, recommended: true }
+      - { text: "a young man looking at his reflection in a mirror", quality: clean }
+      - { text: "a woman staring into the camera, scratching her head", quality: clean }
+      - { text: "a woman crossing a footbridge in a park", quality: clean }
+      - { text: "a man sitting enjoying a tree's shade", quality: clean }
+      - { text: "a boy walking down a forest in fall", quality: clean }
+      - { text: "a female climber in a bouldering gym", quality: clean }
+      - { text: "a cyclist on a road bicycle", quality: clean }
+      - { text: "a female cyclist on a road bicycle", quality: clean }
+      - { text: "a surfer girl holding a surf board", quality: clean }
+      - { text: "a man ordering food at a restaurant", quality: clean }
+      - { text: "a couple on a first date", quality: clean }
+      - { text: "a tennis player playing tennis at Roland Garros", quality: clean }
+      - { text: "a bodybuilder training hard with weights in the gym", quality: clean }
+      - { text: "a happy nepalese girl in a village", quality: clean }
+      - { text: "a boy sitting in a poor village street corner playing an acoustic guitar", quality: clean }
+      - { text: "a 20 year old woman wearing workout clothes", quality: clean }
+      - { text: "an elderly woman in a rocking chair", quality: clean }
+      - { text: "a woman pointing straight at viewer", quality: clean }
+      - { text: "two hands typing on a computer keyboard", quality: clean }
+      - { text: "a man waving goodbye while looking at camera", quality: clean }
+      - { text: "a young woman with red hair and green eyes", quality: clean }
+      - { text: "a ginger woman with short wavy hair and pale skin and freckles", quality: clean }
+      - { text: "an 18 year old girl, looking at viewer", quality: clean }
+      - { text: "young woman with freckled face, on the beach", quality: clean }
+      - { text: "a videographer in a busy place", quality: clean }
+      - { text: "a man and a woman eating dinner at home", quality: clean }
+      - { text: "high school students playing during the school break", quality: clean }
+      - { text: "a woman helping an old man", quality: clean }
+      - { text: "an indian man learning to ride a bicycle", quality: clean }
+      - { text: "a man in overalls strumming a guitar", quality: clean }
+      - { text: "a woman using a silk screen", quality: clean }
+      - { text: "a woman heat pressing a shirt", quality: clean }
+      - { text: "a cute woman with freckles and pale skin", quality: clean }
+      - { text: "a person with long hair wearing glasses, freckles, blue tshirt", quality: clean }
+      - { text: "a woman stepping in a puddle on a rainy day", quality: clean }
+      - { text: "father and daughter watching sunrise at the beach sitting on a bench", quality: clean }
+      - { text: "a man sitting by the ocean watching the sunset", quality: mild }
+      - { text: "a cool looking middle aged man with white hair and beard typing on his laptop in a dark room", quality: mild }
+  landscape:
+    description: "Natural scenes — lighting, color palette, season style signals strongest"
+    prompts:
+      - { text: "a tree in a field in front of a mountain", quality: clean, recommended: true }
+      - { text: "a flower blooming out of a crack on a boulder", quality: clean }
+      - { text: "a plant with green leaves and a single white flower", quality: clean }
+      - { text: "a flower at the top of a mountain", quality: clean }
+      - { text: "a white mushroom in the forest", quality: clean }
+      - { text: "a pond in a forest at night", quality: clean }
+      - { text: "a field of flowers in the middle of a forest", quality: clean }
+      - { text: "milky way on a clear night, with a forest as backdrop", quality: clean }
+      - { text: "a night sky with full moon", quality: clean }
+      - { text: "two blue flowers in front of a meadow", quality: clean }
+      - { text: "snow pea plants in a garden", quality: clean }
+      - { text: "pink flower in a pot on the window in the sunshine", quality: clean }
+      - { text: "a tiny frog on a leaf, tropical settings", quality: clean }
+      - { text: "a macro photo of a ladybug on a flower", quality: clean }
+      - { text: "macro close up photo of a snowflake", quality: clean }
+      - { text: "a red cardinal on a bird feeder", quality: clean }
+      - { text: "an old stone well in a field of grain with a farmhouse in the distance", quality: clean }
+      - { text: "a view of a tall hill with a big forest treeline as viewed from below", quality: clean }
+      - { text: "a cottage with a small garden in the front yard, in the forest", quality: clean }
+      - { text: "a white house with a garden", quality: clean }
+      - { text: "aerial view of a small tropical island with a beach and palm trees", quality: clean }
+      - { text: "aerial view of a small town on the bank of a river", quality: clean }
+      - { text: "large mossy tree lying in a spring forest", quality: clean }
+      - { text: "hiker in the forest walking in snow covered trees along a trail", quality: clean }
+      - { text: "a glass jar terrarium filled with plants", quality: clean }
+      - { text: "a glass jar terrarium filled with flowering plants", quality: clean }
+      - { text: "balcony with a lot of plants, with citrus tree in a bucket", quality: clean }
+      - { text: "picture of rays of light shining through the trees onto a meadow", quality: clean }
+      - { text: "a snowy mountain peak with a lone hiker standing at the top", quality: clean }
+      - { text: "a path of polished bricks leading into the sky, dusk", quality: mild }
+      - { text: "mountains, river and cottage at dusk", quality: mild }
+      - { text: "two horses running in a field in the foggy daytime", quality: mild }
+  animal:
+    description: "Animal subjects — fur texture, background atmosphere style signals clearest"
+    prompts:
+      - { text: "a ginger cat looking out the window", quality: clean, recommended: true }
+      - { text: "a golden retriever running through a water puddle", quality: clean }
+      - { text: "a cat on a rocking chair", quality: clean }
+      - { text: "photo of a fluffy white kitten", quality: clean }
+      - { text: "a cute kitten sitting on a couch", quality: clean }
+      - { text: "a happy puppy on a couch staring out a window", quality: clean }
+      - { text: "a Shiba Inu dog in a wicker basket of flowers", quality: clean }
+      - { text: "a cute hamster eating sunflower seeds", quality: clean }
+      - { text: "a cute hamster climbing a cage", quality: clean }
+      - { text: "a siamese cat with blue eyes", quality: clean }
+      - { text: "a cat playing in the grass", quality: clean }
+      - { text: "a cat playing with a ball", quality: clean }
+      - { text: "a cat jumping for a toy", quality: clean }
+      - { text: "a cat seated on a rock in the forest", quality: clean }
+      - { text: "an orange cat hugging a rock", quality: clean }
+      - { text: "two adorable cats, one black and white and one short haired orange tabby", quality: clean }
+      - { text: "a pitbull playing with a toy ball", quality: clean }
+      - { text: "a dog catching a ball in the air", quality: clean }
+      - { text: "a golden retriever jumping over a box", quality: clean }
+      - { text: "a cute husky wagging its tail", quality: clean }
+      - { text: "Australian Shepherd sitting on a mountain cliff edge", quality: clean }
+      - { text: "bernese mountain dog running in the grass, blue sky", quality: clean }
+      - { text: "a Corgi and a goldendoodle playing together on a large green couch", quality: clean }
+      - { text: "two beagles playing in the forest", quality: clean }
+      - { text: "a Yorkshire terrier at the botanical garden", quality: clean }
+      - { text: "Landseer Newfoundland dog sitting by a lake", quality: clean }
+      - { text: "a dog sitting on a beach", quality: clean }
+      - { text: "an otter poking its head out of water", quality: clean }
+      - { text: "a baby otter playing with a ball", quality: clean }
+      - { text: "a garden lizard sitting on a plank of wood", quality: clean }
+      - { text: "a kingfisher sitting on a pole", quality: clean }
+      - { text: "a white peacock in a lilac tree", quality: clean }
+      - { text: "honeybee collecting nectar from a bunch of marigolds", quality: clean }
+      - { text: "a close-up of a small bird perched on a flower", quality: clean }
+      - { text: "a robin on a large balcony with luscious green trees in spring", quality: clean }
+      - { text: "a jack russell eating a cabbage", quality: clean }
+      - { text: "a Brittany dog with a pheasant", quality: clean }
+      - { text: "a shar pei on the beach", quality: clean }
+      - { text: "a rabbit with a carrot in its hand", quality: clean }
+  still_life:
+    description: "Objects and food — color, material, lighting style signals most controlled"
+    prompts:
+      - { text: "sunflowers in a glass mason jar", quality: clean, recommended: true }
+      - { text: "a red apple sitting to the right of a peach", quality: clean }
+      - { text: "blueberries, raspberries, apples on a plate", quality: clean }
+      - { text: "blueberries, raspberries, watermelon in a dish", quality: clean }
+      - { text: "a plate of chocolate chip cookies", quality: clean }
+      - { text: "bacon and eggs on a plate", quality: clean }
+      - { text: "an omelet with strawberries on a plate with a coffee cup", quality: clean }
+      - { text: "a bowl of strawberries and sliced bananas in milk", quality: clean }
+      - { text: "a pizza with mortadella on top", quality: clean }
+      - { text: "a burger with cheese and salad", quality: clean }
+      - { text: "spaghetti and a pack of flour", quality: clean }
+      - { text: "a bowl of popcorn with nacho cheese seasoning", quality: clean }
+      - { text: "photo of an orange block of cheese", quality: clean }
+      - { text: "a slice of swiss cheese on a wooden cutting board", quality: clean }
+      - { text: "a rubber ducky next to a box of legos on a wooden floor", quality: clean }
+      - { text: "photo of a table with a teapot on it", quality: clean }
+      - { text: "a close up of a book on a coffee table", quality: clean }
+      - { text: "a chess board with Staunton pieces, initial position", quality: clean }
+      - { text: "a longsword on a wooden table", quality: clean }
+      - { text: "a poodle toy laying on a wooden floor, interior", quality: clean }
+      - { text: "a collection of small bits and bobs, flat lay", quality: clean }
+      - { text: "a red box on top of a blue box", quality: clean }
+      - { text: "a white champignon in the forest", quality: clean }
+  architecture:
+    description: "Buildings and interiors — atmosphere, light quality, texture style signals strong"
+    prompts:
+      - { text: "a park at the center of a city", quality: clean }
+      - { text: "a car parked on a leafy street", quality: clean }
+      - { text: "photo of a bicycle in venice", quality: clean }
+      - { text: "aerial view of a small town on the bank of a river", quality: clean }
+      - { text: "view down a road with skyscrapers each side", quality: clean }
+      - { text: "a photo of a messy kitchen", quality: clean }
+      - { text: "a small bathroom with a washing machine", quality: clean }
+      - { text: "a well furnished bedroom with two double beds", quality: clean }
+      - { text: "a photo of a cute bookstore with floor-to-ceiling windows", quality: clean }
+      - { text: "the university of edinburgh old college", quality: clean }
+      - { text: "picture of an airport with parallel runways", quality: clean }
+      - { text: "crowd in the street of NYC", quality: clean }
+      - { text: "a small hut on a beach in Thailand", quality: clean }
+      - { text: "a comfy designer chair standing near the window, a vintage lamp on a round coffee table", quality: clean }

configs/distractor_policy.yaml ADDED Viewed

	@@ -0,0 +1,22 @@

+# Mix of distractor strategies used by instance_plan.py.
+# Each value is the fraction of items assigned that strategy.
+# Must sum to 1.0 (validated at load time).
+strategy_mix:
+  one_axis_swap: 0.40
+  two_axis_swap: 0.30
+  axis_cluster: 0.15
+  random: 0.15
+# Per-axis weight when computing "closeness" for axis_cluster strategy.
+# Higher weight = bigger visible effect, so mismatches on these axes hurt
+# more. Weights are tie-breaking only; Hamming distance is the primary key.
+axis_weights:
+  art_style: 3.0
+  art_medium: 2.5
+  color: 2.0
+  lighting: 1.5
+# Gold-item injection rate for quality control. 10% of each annotator's
+# load is replaced with random-strategy items whose answer is unambiguous.
+gold_injection_rate: 0.10

configs/generation.yaml ADDED Viewed

	@@ -0,0 +1,4 @@

+backend_mix:
+  flux2_klein: 1.0
+image_size: [1024, 1024]

configs/profile_vocab.json ADDED Viewed

	@@ -0,0 +1,34 @@

+{
+  "art_style": [
+    "Impressionism",
+    "Minimalism",
+    "Anime",
+    "Photorealism",
+    "Cubism",
+    "Art Deco"
+  ],
+  "color": [
+    "Warm Reds",
+    "Cool Blues",
+    "Earth Tones",
+    "Monochrome",
+    "Pastel Palette",
+    "Electric Neon"
+  ],
+  "art_medium": [
+    "Oil Painting",
+    "Watercolor",
+    "Ink Drawing",
+    "Digital Painting",
+    "Pixel Art",
+    "Pencil Sketch"
+  ],
+  "lighting": [
+    "Golden Hour",
+    "Moody Low-Key",
+    "Soft Overcast",
+    "Harsh Noon",
+    "Neon Glow",
+    "Candlelit"
+  ]
+}

labeling/instructions.md ADDED Viewed

	@@ -0,0 +1,25 @@

+# AestheticMCQ annotation — instructions
+You will see an image and four candidate **aesthetic profiles**, labelled A-D.
+Each profile is five axes:
+- **art_style**: Impressionism, Realism, Abstract, Surrealism, Minimalism, Pop Art, Anime, Classical
+- **color**: Warm, Cool, Neutral, Earth, Vibrant, Monochrome
+- **art_medium**: Oil Painting, Watercolor, Digital, Pencil, Ink, Pastel
+- **detail**: Fine, Moderate, Minimal
+- **saturation**: Vivid, Moderate, Muted
+Pick the single profile that **best** describes the aesthetic style of the image.
+## Rules of thumb
+- If two feel equally good, pick the *closer* one.
+- If none feels quite right, still pick the *closest* — do not skip.
+- Base your choice on **aesthetic style**, not on the subject matter.
+- Aim for ~15 seconds per item.
+## Privacy / data
+Your per-item time and choice are logged, along with your annotator ID. We do
+not log IP or identity. You can stop at any time and resume later from the
+same share URL.

labeling/static/app.js ADDED Viewed

	@@ -0,0 +1,162 @@

+"use strict";
+const AXES = ["art_style", "color", "art_medium", "lighting"];
+const TOKEN_STORAGE_KEY = "aamcq_token";
+async function fetchJSON(path, init) {
+  const resp = await fetch(path, init);
+  if (!resp.ok) {
+    const body = await resp.text();
+    throw new Error(`${resp.status}: ${body}`);
+  }
+  return resp.json();
+}
+// Tokens come from three places, in order:
+//   1. ?token=... in the URL (coordinator-issued personal link — old flow)
+//   2. localStorage (returning visitor)
+//   3. POST /api/register (fresh anonymous session; only works when the
+//      server was launched with --anonymous-register)
+async function ensureToken() {
+  const urlToken = new URL(window.location.href).searchParams.get("token");
+  if (urlToken) {
+    localStorage.setItem(TOKEN_STORAGE_KEY, urlToken);
+    return urlToken;
+  }
+  const stored = localStorage.getItem(TOKEN_STORAGE_KEY);
+  if (stored) return stored;
+  const resp = await fetch("/api/register", { method: "POST" });
+  if (!resp.ok) {
+    throw new Error(
+      "No ?token= in URL and anonymous registration is disabled on this server."
+    );
+  }
+  const { token } = await resp.json();
+  localStorage.setItem(TOKEN_STORAGE_KEY, token);
+  return token;
+}
+function renderProfileCard(idx, profile) {
+  const ul = document.createElement("ul");
+  ul.className = "profile";
+  for (const axis of AXES) {
+    const li = document.createElement("li");
+    const key = document.createElement("span");
+    key.className = "axis";
+    key.textContent = axis.replace("_", " ") + ": ";
+    const val = document.createElement("span");
+    val.className = "value";
+    val.textContent = profile[axis] ?? "?";
+    li.appendChild(key);
+    li.appendChild(val);
+    ul.appendChild(li);
+  }
+  const wrapper = document.createElement("label");
+  wrapper.className = "option";
+  const input = document.createElement("input");
+  input.type = "radio";
+  input.name = "choice";
+  input.value = String(idx);
+  wrapper.appendChild(input);
+  const badge = document.createElement("div");
+  badge.className = "badge";
+  badge.textContent = String.fromCharCode(65 + idx);
+  wrapper.appendChild(badge);
+  wrapper.appendChild(ul);
+  return wrapper;
+}
+let currentItem = null;
+let shownAt = 0;
+async function loadNext(token) {
+  const data = await fetchJSON(`/api/task?token=${encodeURIComponent(token)}`);
+  const card = document.getElementById("card");
+  const submit = document.getElementById("submit");
+  const err = document.getElementById("error");
+  err.textContent = "";
+  if (data.done) {
+    const labeled = data.labeled ?? 0;
+    const msg =
+      data.reason === "cap_reached"
+        ? `All done — you labeled ${labeled} items. Thank you!`
+        : `All items are fully labeled (you contributed ${labeled}). Thank you!`;
+    card.innerHTML = `<p class='done'>${msg}</p>`;
+    submit.disabled = true;
+    updateProgress(data.labeled, data.cap);
+    return;
+  }
+  currentItem = data;
+  shownAt = performance.now();
+  document.getElementById("stimulus").src = data.image_url;
+  document.getElementById("base-prompt").textContent =
+    data.payload.base_prompt ? `"${data.payload.base_prompt}"` : "";
+  const form = document.getElementById("options");
+  form.innerHTML = "";
+  const options = data.payload.options || [];
+  options.forEach((opt, i) => {
+    form.appendChild(renderProfileCard(i, opt));
+  });
+  submit.disabled = true;
+  form.querySelectorAll("input[type=radio]").forEach((el) => {
+    el.addEventListener("change", () => {
+      submit.disabled = false;
+    });
+  });
+  updateProgress(data.labeled, data.cap);
+}
+function updateProgress(labeled, cap) {
+  const el = document.getElementById("progress");
+  if (cap != null) {
+    el.textContent = `${labeled ?? 0} / ${cap} done`;
+  } else {
+    el.textContent = `${labeled ?? 0} labeled`;
+  }
+}
+async function submitLabel(token) {
+  const err = document.getElementById("error");
+  err.textContent = "";
+  const chosen = document.querySelector("input[name=choice]:checked");
+  if (!chosen || !currentItem) return;
+  const elapsed = (performance.now() - shownAt) / 1000;
+  try {
+    await fetchJSON("/api/label", {
+      method: "POST",
+      headers: { "content-type": "application/json" },
+      body: JSON.stringify({
+        token,
+        item_id: currentItem.item_id,
+        chosen_index: Number(chosen.value),
+        seconds: elapsed,
+        confidence: null,
+      }),
+    });
+    await loadNext(token);
+  } catch (e) {
+    err.textContent = `Submit failed: ${e.message}`;
+  }
+}
+async function main() {
+  let token;
+  try {
+    token = await ensureToken();
+  } catch (e) {
+    document.getElementById("error").textContent = e.message;
+    return;
+  }
+  document.getElementById("submit").addEventListener("click", () => submitLabel(token));
+  try {
+    await loadNext(token);
+  } catch (e) {
+    document.getElementById("error").textContent = `Load failed: ${e.message}`;
+  }
+}
+if (document.readyState === "loading") {
+  document.addEventListener("DOMContentLoaded", main);
+} else {
+  main();
+}

labeling/static/index.html ADDED Viewed

	@@ -0,0 +1,36 @@

+<!doctype html>
+<html lang="en">
+<head>
+  <meta charset="utf-8" />
+  <meta name="viewport" content="width=device-width, initial-scale=1" />
+  <title>AestheticMCQ — Annotation</title>
+  <link rel="stylesheet" href="/style.css?v=4" />
+</head>
+<body>
+  <main>
+    <header>
+      <h1>AestheticMCQ</h1>
+      <div id="progress">loading…</div>
+    </header>
+    <section id="instructions">
+      <p>
+        Pick the aesthetic profile that best describes the image. If two feel
+        equally good, pick the closer one; if none feels right, still pick the
+        closest.
+      </p>
+    </section>
+    <section id="card">
+      <figure>
+        <img id="stimulus" alt="image to annotate" />
+        <figcaption id="base-prompt"></figcaption>
+      </figure>
+      <form id="options"></form>
+    </section>
+    <footer>
+      <button id="submit" disabled>Submit &amp; next</button>
+      <span id="error"></span>
+    </footer>
+  </main>
+  <script src="/app.js?v=5"></script>
+</body>
+</html>

labeling/static/style.css ADDED Viewed

	@@ -0,0 +1,146 @@

+:root {
+  --bg: #111;
+  --fg: #eee;
+  --muted: #888;
+  --accent: #4aa3ff;
+  --card: #1c1c1c;
+  --border: #2a2a2a;
+}
+* { box-sizing: border-box; }
+body {
+  margin: 0;
+  padding: 0;
+  background: var(--bg);
+  color: var(--fg);
+  font-family: -apple-system, BlinkMacSystemFont, "Segoe UI", Roboto, sans-serif;
+  line-height: 1.4;
+}
+main {
+  max-width: 900px;
+  margin: 0 auto;
+  padding: 16px;
+}
+header {
+  display: flex;
+  align-items: baseline;
+  justify-content: space-between;
+  border-bottom: 1px solid var(--border);
+  padding-bottom: 8px;
+}
+header h1 { font-size: 1.2rem; margin: 0; }
+#progress { color: var(--muted); font-size: 0.9rem; }
+#instructions p {
+  color: var(--muted);
+  font-size: 0.9rem;
+  margin: 12px 0;
+}
+figure {
+  margin: 0;
+  text-align: center;
+}
+#stimulus {
+  max-width: 100%;
+  max-height: 60vh;
+  border-radius: 8px;
+  border: 1px solid var(--border);
+}
+#base-prompt {
+  color: var(--fg);
+  font-size: 1.05rem;
+  margin: 12px auto 0;
+  font-style: italic;
+  padding: 8px 14px;
+  background: var(--card);
+  border: 1px solid var(--border);
+  border-radius: 6px;
+  display: block;
+  width: fit-content;
+  max-width: 90%;
+  text-align: center;
+}
+#options {
+  display: grid;
+  grid-template-columns: repeat(2, 1fr);
+  gap: 12px;
+  margin: 16px 0;
+}
+@media (max-width: 600px) {
+  #options { grid-template-columns: 1fr; }
+}
+.option {
+  display: flex;
+  flex-direction: column;
+  gap: 8px;
+  padding: 12px;
+  background: var(--card);
+  border: 2px solid var(--border);
+  border-radius: 8px;
+  cursor: pointer;
+  position: relative;
+}
+.option:has(input:checked) {
+  border-color: var(--accent);
+}
+.option input[type=radio] {
+  position: absolute;
+  opacity: 0;
+  pointer-events: none;
+}
+.badge {
+  display: inline-block;
+  width: 24px;
+  height: 24px;
+  line-height: 24px;
+  text-align: center;
+  border-radius: 12px;
+  background: var(--border);
+  font-weight: bold;
+  color: var(--fg);
+  font-size: 0.8rem;
+}
+.profile { list-style: none; margin: 0; padding: 0; }
+.profile li { font-size: 0.9rem; padding: 2px 0; }
+.axis { color: var(--muted); text-transform: capitalize; }
+.value { color: var(--fg); }
+footer {
+  display: flex;
+  align-items: center;
+  gap: 16px;
+  margin-top: 16px;
+}
+button#submit {
+  background: var(--accent);
+  color: #fff;
+  border: 0;
+  padding: 10px 20px;
+  border-radius: 6px;
+  font-size: 1rem;
+  cursor: pointer;
+}
+button#submit:disabled {
+  background: var(--border);
+  color: var(--muted);
+  cursor: not-allowed;
+}
+#error { color: #e66; font-size: 0.9rem; }
+.done { text-align: center; font-size: 1.2rem; color: var(--muted); }

pyproject.toml ADDED Viewed

	@@ -0,0 +1,38 @@

+[build-system]
+requires = ["setuptools>=68", "wheel"]
+build-backend = "setuptools.build_meta"
+[project]
+name = "aamcq"
+version = "0.1.0"
+description = "AestheticMCQ — human-labeled aesthetic similarity MCQ dataset"
+requires-python = ">=3.10"
+dependencies = [
+    "fastapi>=0.110",
+    "uvicorn[standard]>=0.27",
+    "pyyaml>=6.0",
+    "pydantic>=2.5",
+    "numpy>=1.24",
+]
+[project.optional-dependencies]
+test = ["pytest>=8.0", "httpx>=0.26"]
+generation = [
+    # Flux2KleinPipeline currently only ships in the diffusers git dev branch:
+    #   uv pip install git+https://github.com/huggingface/diffusers.git
+    "diffusers>=0.29",
+    "torch",
+    "transformers",
+    "accelerate>=0.30",
+    "pillow",
+    "sentencepiece",
+    "protobuf",
+]
+hf = ["datasets>=2.18", "huggingface_hub>=0.20", "pillow"]
+[tool.setuptools.packages.find]
+where = ["src"]
+[tool.pytest.ini_options]
+testpaths = ["tests"]
+addopts = "-q"

spaces/DEPLOY.md ADDED Viewed

	@@ -0,0 +1,77 @@

+# Deploying to HF Spaces
+All commands below assume `HF_TOKEN` is exported and has **write** scope on
+the `lanczos` namespace.
+## 1. Seed the dataset repo (images + mcq)
+```bash
+# Creates lanczos/aesthetic-annotators (private dataset) if missing,
+# uploads 810 PNGs + mcq_unlabeled.jsonl. Takes ~2 min for 1.3 GB.
+HF_TOKEN=$HF_TOKEN .venv/bin/python spaces/push_dataset.py \
+    --repo lanczos/aesthetic-annotators \
+    --images data/images_final \
+    --mcq data/mcq_unlabeled.jsonl
+```
+Verify at <https://huggingface.co/datasets/lanczos/aesthetic-annotators>.
+## 2. Create the Space
+```bash
+.venv/bin/huggingface-cli repo create aesthetic-annotators \
+    --type space --space_sdk docker \
+    --organization lanczos
+```
+Or in the web UI: New Space → name `aesthetic-annotators` → SDK: **Docker**.
+## 3. Set the Space secret
+In the Space's Settings → Variables and secrets, add a **secret**:
+| name | value |
+|---|---|
+| `HF_TOKEN` | same token (needs write scope on the dataset repo) |
+The Docker container will read `HF_TOKEN` from env to pull images on boot
+and push SQLite label backups every 60 s.
+## 4. Push the code to the Space
+The Space is a git repo. Add it as a remote, then run the bundled deploy
+script — it overlays `spaces/README.md` at repo root (HF reads its metadata
+from the root README frontmatter) on a temp branch and pushes that to
+`space/main`, so the GitHub root README stays untouched.
+```bash
+# One-time
+git remote add space https://huggingface.co/spaces/lanczos/aesthetic-annotators
+# Each deploy (HF prompts for credentials: user=lanczos, password=$HF_TOKEN)
+./spaces/push_to_space.sh
+```
+First build ~3 min; subsequent pushes ~1 min.
+## 5. Hand out the URL
+```
+https://lanczos-aesthetic-annotators.hf.space/
+```
+No `?token=` needed — first visit auto-registers. Labels persist across
+Space restarts because of the 60 s SQLite → dataset repo backup.
+## Reading labels back
+```bash
+# Download the latest SQLite backup and inspect
+huggingface-cli download lanczos/aesthetic-annotators \
+    labels/annotations.sqlite \
+    --repo-type dataset \
+    --local-dir ./backup
+sqlite3 backup/labels/annotations.sqlite \
+    "SELECT annotator_id, COUNT(*) FROM labels GROUP BY annotator_id"
+```

spaces/Dockerfile ADDED Viewed

	@@ -0,0 +1,29 @@

+FROM python:3.11-slim
+ENV PYTHONUNBUFFERED=1 \
+    PYTHONDONTWRITEBYTECODE=1 \
+    PIP_NO_CACHE_DIR=1 \
+    PYTHONPATH=/app/src
+WORKDIR /app
+RUN apt-get update && \
+    apt-get install -y --no-install-recommends git && \
+    rm -rf /var/lib/apt/lists/*
+COPY spaces/requirements.txt /app/spaces/requirements.txt
+RUN pip install -r /app/spaces/requirements.txt
+COPY pyproject.toml /app/
+COPY src /app/src
+COPY labeling /app/labeling
+COPY configs /app/configs
+COPY spaces /app/spaces
+# HF Spaces mounts a writable /data directory when Persistent Storage is
+# enabled; fall back to an in-container path when running locally.
+ENV AAMCQ_DATA_DIR=/data
+RUN mkdir -p /data && chmod 777 /data
+EXPOSE 7860
+CMD ["python", "/app/spaces/space_entry.py"]

spaces/README.md ADDED Viewed

	@@ -0,0 +1,39 @@

+---
+title: Aesthetic Annotators
+emoji: 🎨
+colorFrom: purple
+colorTo: pink
+sdk: docker
+app_port: 7860
+pinned: false
+---
+# Aesthetic Annotators
+Public-URL labeling server for the AestheticMCQ dataset. Any visitor is
+auto-issued an anonymous annotator id on first hit; each session labels up
+to `AAMCQ_PER_ANNOTATOR_CAP` items (default 20) pulled breadth-first from
+the pool so every item receives one label before any receives a second.
+## Configuration
+Space secrets:
+| name | required | default | notes |
+|---|---|---|---|
+| `HF_TOKEN` | yes | — | write scope on the companion dataset repo |
+| `AAMCQ_DATASET_REPO` | no | `lanczos/aesthetic-annotators` | source of images + mcq + label backups |
+| `AAMCQ_PER_ANNOTATOR_CAP` | no | `20` | items per session before "all done" |
+| `AAMCQ_LABELS_PER_ITEM` | no | `3` | target labels per item |
+| `AAMCQ_BACKUP_INTERVAL` | no | `60` | SQLite → dataset repo push interval (seconds) |
+## Data flow
+1. On boot, the Space pulls `images/*.png`, `mcq_unlabeled.jsonl`, and any
+   prior `labels/annotations.sqlite` from the dataset repo.
+2. Annotators land on the root URL → JS calls `POST /api/register` → server
+   mints a fresh `anon_*` id + token, cached in localStorage.
+3. `/api/task` hands out the least-labeled item the annotator hasn't seen.
+4. Every `AAMCQ_BACKUP_INTERVAL` seconds the server pushes the SQLite back
+   to `labels/annotations.sqlite` in the dataset repo, so Space
+   restarts/sleeps lose at most one backup interval's worth of labels.

spaces/push_dataset.py ADDED Viewed

	@@ -0,0 +1,75 @@

+"""One-shot upload of images + mcq_unlabeled.jsonl to the companion HF dataset repo.
+Creates the repo if it doesn't exist. Runs locally (needs HF_TOKEN in env with
+write scope). Safe to re-run; only changed files are re-uploaded.
+Usage:
+    HF_TOKEN=... .venv/bin/python spaces/push_dataset.py \
+        --repo lanczos/aesthetic-annotators \
+        --images data/images_final \
+        --mcq data/mcq_unlabeled.jsonl
+"""
+from __future__ import annotations
+import argparse
+import os
+from pathlib import Path
+from huggingface_hub import HfApi, create_repo, upload_file, upload_folder
+REPO = Path(__file__).resolve().parents[1]
+def main() -> int:
+    ap = argparse.ArgumentParser()
+    ap.add_argument("--repo", default="lanczos/aesthetic-annotators")
+    ap.add_argument("--images", type=Path, default=REPO / "data" / "images_final")
+    ap.add_argument("--mcq", type=Path, default=REPO / "data" / "mcq_unlabeled.jsonl")
+    ap.add_argument("--private", action="store_true", default=True,
+                    help="create the repo as private (default)")
+    ap.add_argument("--public", action="store_true",
+                    help="override: create the repo as public")
+    args = ap.parse_args()
+    if args.public:
+        args.private = False
+    token = os.environ.get("HF_TOKEN")
+    if not token:
+        raise SystemExit("HF_TOKEN not set in env")
+    api = HfApi(token=token)
+    print(f"creating/confirming dataset repo {args.repo} (private={args.private}) ...")
+    create_repo(
+        repo_id=args.repo, repo_type="dataset",
+        private=args.private, exist_ok=True, token=token,
+    )
+    print(f"uploading {args.mcq.name} ...")
+    upload_file(
+        path_or_fileobj=str(args.mcq),
+        path_in_repo="mcq_unlabeled.jsonl",
+        repo_id=args.repo, repo_type="dataset",
+        token=token,
+        commit_message="update mcq_unlabeled.jsonl",
+    )
+    n_images = len(list(args.images.glob("*.png")))
+    print(f"uploading {n_images} images from {args.images}/ → images/ ...")
+    upload_folder(
+        folder_path=str(args.images),
+        path_in_repo="images",
+        repo_id=args.repo, repo_type="dataset",
+        token=token,
+        commit_message=f"upload {n_images} images",
+        allow_patterns=["*.png"],
+    )
+    print("done.")
+    print(f"  {args.repo} @ https://huggingface.co/datasets/{args.repo}")
+    return 0
+if __name__ == "__main__":
+    raise SystemExit(main())

spaces/push_space.py ADDED Viewed

	@@ -0,0 +1,74 @@

+"""Push the Space files to the HF Space repo using huggingface_hub.
+Uses HfApi.upload_folder with a staging dir so we don't touch the repo's
+root README.md / Dockerfile (which would pollute the GitHub view). The
+Space-only overlay (`spaces/README.md` at root, `spaces/Dockerfile` at root)
+is materialized inside the staging dir only.
+"""
+from __future__ import annotations
+import argparse
+import os
+import shutil
+import tempfile
+from pathlib import Path
+from huggingface_hub import HfApi
+REPO = Path(__file__).resolve().parents[1]
+# What to ship to the Space, as (source_rel, dest_rel) pairs. Dirs are
+# recursively copied; files are copied verbatim.
+SHIPMENT = [
+    ("spaces/README.md", "README.md"),
+    ("spaces/Dockerfile", "Dockerfile"),
+    ("pyproject.toml", "pyproject.toml"),
+    ("src", "src"),
+    ("labeling", "labeling"),
+    ("configs", "configs"),
+    ("spaces", "spaces"),
+]
+def main() -> int:
+    ap = argparse.ArgumentParser()
+    ap.add_argument("--repo", default="lanczos/aesthetic-annotators")
+    args = ap.parse_args()
+    token = os.environ.get("HF_TOKEN")
+    if not token:
+        raise SystemExit("HF_TOKEN not set")
+    with tempfile.TemporaryDirectory() as tmp:
+        staging = Path(tmp) / "space"
+        staging.mkdir()
+        for src_rel, dst_rel in SHIPMENT:
+            src = REPO / src_rel
+            dst = staging / dst_rel
+            if src.is_dir():
+                shutil.copytree(src, dst)
+            else:
+                dst.parent.mkdir(parents=True, exist_ok=True)
+                shutil.copy2(src, dst)
+        # Ignore Python cache / editor cruft inside src/
+        for bad in staging.rglob("__pycache__"):
+            if bad.is_dir():
+                shutil.rmtree(bad)
+        api = HfApi(token=token)
+        print(f"uploading to {args.repo} (space) ...")
+        api.upload_folder(
+            folder_path=str(staging),
+            repo_id=args.repo,
+            repo_type="space",
+            commit_message="deploy: labeling server",
+        )
+    print("done. Space will build now:")
+    print(f"  https://huggingface.co/spaces/{args.repo}")
+    return 0
+if __name__ == "__main__":
+    raise SystemExit(main())

spaces/push_to_space.sh ADDED Viewed

	@@ -0,0 +1,21 @@

+#!/usr/bin/env bash
+# Push the current commit to the HF Space remote, with spaces/README.md
+# overlaid at repo root (HF Spaces reads its metadata from root README.md
+# frontmatter; the GitHub root README stays untouched).
+#
+# Prereq once: git remote add space https://huggingface.co/spaces/lanczos/aesthetic-annotators
+# When git prompts for credentials on push, user = `lanczos`, password = $HF_TOKEN.
+set -euo pipefail
+BRANCH=$(git rev-parse --abbrev-ref HEAD)
+TEMP=space-deploy-$(date +%s)
+trap 'git checkout "$BRANCH" >/dev/null 2>&1 || true; git branch -D "$TEMP" >/dev/null 2>&1 || true' EXIT
+git checkout -b "$TEMP"
+cp spaces/README.md README.md
+git add README.md
+git commit --no-verify -m "deploy: use spaces/README.md as root"
+git push -f space "$TEMP:main"
+echo "pushed to space/main (overlaid README from spaces/README.md)"

spaces/requirements.txt ADDED Viewed

	@@ -0,0 +1,6 @@

+fastapi>=0.110
+uvicorn[standard]>=0.27
+pyyaml>=6.0
+pydantic>=2.5
+numpy>=1.24
+huggingface_hub>=0.20

spaces/space_entry.py ADDED Viewed

	@@ -0,0 +1,155 @@

+"""Entry point for the AestheticMCQ labeling server on HF Spaces.
+On boot:
+  1. Pull images/*.png and mcq_unlabeled.jsonl from the companion dataset repo.
+  2. Pull labels/annotations.sqlite from the dataset repo if it exists.
+  3. Bootstrap the SQLite items table from the mcq file.
+  4. Launch the FastAPI app in pool-mode + anonymous-register configuration.
+  5. Every BACKUP_INTERVAL seconds, push the SQLite back to the dataset repo
+     so labels survive Space restarts / sleeps.
+Env vars:
+  HF_TOKEN                   required; write access to AAMCQ_DATASET_REPO
+  AAMCQ_DATASET_REPO         default: lanczos/aesthetic-annotators
+  AAMCQ_PER_ANNOTATOR_CAP    default: 20
+  AAMCQ_LABELS_PER_ITEM      default: 3
+  AAMCQ_BACKUP_INTERVAL      default: 60 (seconds)
+"""
+from __future__ import annotations
+import asyncio
+import json
+import os
+import shutil
+import time
+from pathlib import Path
+import uvicorn
+from huggingface_hub import hf_hub_download, snapshot_download, upload_file
+from aamcq.annotation import db as dbmod
+from aamcq.annotation.api import create_app
+DATASET_REPO = os.environ.get("AAMCQ_DATASET_REPO", "lanczos/aesthetic-annotators")
+HF_TOKEN = os.environ.get("HF_TOKEN")
+DATA_DIR = Path(os.environ.get("AAMCQ_DATA_DIR", "/data"))
+IMAGE_DIR = DATA_DIR / "images"
+DB_PATH = DATA_DIR / "annotations.sqlite"
+MCQ_PATH = DATA_DIR / "mcq_unlabeled.jsonl"
+BACKUP_INTERVAL = int(os.environ.get("AAMCQ_BACKUP_INTERVAL", "60"))
+PER_ANNOTATOR_CAP = int(os.environ.get("AAMCQ_PER_ANNOTATOR_CAP", "20"))
+LABELS_PER_ITEM = int(os.environ.get("AAMCQ_LABELS_PER_ITEM", "3"))
+def _require_token() -> str:
+    if not HF_TOKEN:
+        raise SystemExit(
+            "HF_TOKEN is unset. Set it as a Space secret with write access to "
+            f"{DATASET_REPO}."
+        )
+    return HF_TOKEN
+def bootstrap_from_dataset() -> None:
+    """Pull images, mcq file, and any existing labels SQLite from the dataset repo."""
+    token = _require_token()
+    DATA_DIR.mkdir(parents=True, exist_ok=True)
+    IMAGE_DIR.mkdir(parents=True, exist_ok=True)
+    print(f"pulling images + mcq from {DATASET_REPO} ...")
+    snapshot_download(
+        DATASET_REPO,
+        repo_type="dataset",
+        local_dir=str(DATA_DIR),
+        allow_patterns=["images/*.png", "mcq_unlabeled.jsonl"],
+        token=token,
+    )
+    # Best-effort: restore previous SQLite so Space restarts don't lose labels.
+    try:
+        local = hf_hub_download(
+            DATASET_REPO,
+            "labels/annotations.sqlite",
+            repo_type="dataset",
+            token=token,
+        )
+        shutil.copy2(local, DB_PATH)
+        print(f"restored labels SQLite from {DATASET_REPO}/labels/")
+    except Exception as e:  # 404 on first run is normal
+        print(f"no prior SQLite backup ({type(e).__name__}); starting fresh")
+def init_items_in_db() -> None:
+    """Load items from mcq_unlabeled.jsonl into the SQLite items table."""
+    if not MCQ_PATH.exists():
+        raise SystemExit(f"{MCQ_PATH} missing — dataset repo may be empty")
+    conn = dbmod.connect(DB_PATH)
+    dbmod.init_schema(conn)
+    existing = {row["item_id"] for row in conn.execute("SELECT item_id FROM items")}
+    added = 0
+    with open(MCQ_PATH) as f:
+        for line in f:
+            line = line.strip()
+            if not line:
+                continue
+            row = json.loads(line)
+            if row["item_id"] in existing:
+                continue
+            dbmod.insert_item(conn, row["item_id"], row, is_gold=bool(row.get("is_gold")))
+            added += 1
+    conn.close()
+    print(f"items table: {len(existing)} existing + {added} new")
+async def periodic_backup() -> None:
+    """Push the SQLite file to the dataset repo whenever it's been written since last push."""
+    token = _require_token()
+    last_mtime = 0.0
+    while True:
+        await asyncio.sleep(BACKUP_INTERVAL)
+        try:
+            mtime = DB_PATH.stat().st_mtime
+        except FileNotFoundError:
+            continue
+        if mtime <= last_mtime:
+            continue
+        try:
+            upload_file(
+                path_or_fileobj=str(DB_PATH),
+                path_in_repo="labels/annotations.sqlite",
+                repo_id=DATASET_REPO,
+                repo_type="dataset",
+                token=token,
+                commit_message=f"backup @ {int(time.time())}",
+            )
+            last_mtime = mtime
+            print(f"pushed SQLite backup to {DATASET_REPO}/labels/")
+        except Exception as e:
+            print(f"backup upload failed: {type(e).__name__}: {e}")
+def main() -> int:
+    bootstrap_from_dataset()
+    init_items_in_db()
+    app = create_app(
+        db_path=DB_PATH,
+        image_dir=IMAGE_DIR,
+        pool_mode=True,
+        anonymous_register=True,
+        max_labels_per_item=LABELS_PER_ITEM,
+        max_labels_per_annotator=PER_ANNOTATOR_CAP,
+    )
+    @app.on_event("startup")
+    async def _start_backup() -> None:
+        asyncio.create_task(periodic_backup())
+    uvicorn.run(app, host="0.0.0.0", port=7860, log_level="info")
+    return 0
+if __name__ == "__main__":
+    raise SystemExit(main())

src/aamcq/__init__.py ADDED Viewed

	@@ -0,0 +1,3 @@


1	+ """AestheticMCQ — human-labeled MCQ dataset for aesthetic similarity."""
2	+
3	+ __version__ = "0.1.0"

src/aamcq/annotation/__init__.py ADDED Viewed

File without changes

src/aamcq/annotation/api.py ADDED Viewed

	@@ -0,0 +1,205 @@

+"""FastAPI app serving the annotator web UI + /api/task, /api/label, /api/progress."""
+from __future__ import annotations
+import os
+import sqlite3
+from pathlib import Path
+from fastapi import Depends, FastAPI, HTTPException, Query
+from fastapi.responses import FileResponse, JSONResponse
+from fastapi.staticfiles import StaticFiles
+from pydantic import BaseModel, Field, conint
+from aamcq.annotation import db as dbmod
+from aamcq.annotation.assignment import bootstrap_annotators
+REPO_ROOT = Path(__file__).resolve().parents[3]
+DEFAULT_DB = REPO_ROOT / "data" / "annotations.sqlite"
+DEFAULT_IMAGE_DIR = REPO_ROOT / "data" / "images"
+DEFAULT_STATIC_DIR = REPO_ROOT / "labeling" / "static"
+class LabelPayload(BaseModel):
+    token: str = Field(min_length=8, max_length=128)
+    item_id: str = Field(min_length=1, max_length=128)
+    chosen_index: conint(ge=0, le=3)  # type: ignore[valid-type]
+    seconds: float | None = Field(default=None, ge=0, le=3600)
+    confidence: int | None = Field(default=None, ge=1, le=5)
+def _sanitize_item(payload: dict) -> dict:
+    """Strip `correct_index` before sending to annotator."""
+    return {k: v for k, v in payload.items() if k != "correct_index"}
+def create_app(
+    db_path: str | os.PathLike[str] | None = None,
+    image_dir: str | os.PathLike[str] | None = None,
+    static_dir: str | os.PathLike[str] | None = None,
+    pool_mode: bool = False,
+    anonymous_register: bool = False,
+    max_labels_per_item: int = 3,
+    max_labels_per_annotator: int | None = None,
+) -> FastAPI:
+    """Labeling server.
+    `pool_mode=False` (default): annotators see only items pre-assigned to them
+    (round-robin). Requires bootstrap_annotators() + assign_items_round_robin()
+    before serving.
+    `pool_mode=True`: ignore pre-assignment; dispatch any item that still needs
+    labels and this annotator hasn't labeled. Items are handed out breadth-first
+    over existing-label-count — every item gets one label before anyone gets a
+    second. Unfinished work from one annotator is naturally picked up by the
+    next person who logs in. Cap each session with `max_labels_per_annotator`.
+    `anonymous_register=True`: `POST /api/register` mints a fresh annotator_id
+    + token on demand, so a single public URL can serve any number of
+    concurrent anonymous annotators (each browser session = one annotator).
+    Intended for public-URL crowdsourcing.
+    """
+    db_path = Path(db_path or DEFAULT_DB)
+    image_dir = Path(image_dir or DEFAULT_IMAGE_DIR)
+    static_dir = Path(static_dir or DEFAULT_STATIC_DIR)
+    app = FastAPI(title="AestheticMCQ Annotation")
+    conn = dbmod.connect(db_path)
+    dbmod.init_schema(conn)
+    app.state.conn = conn
+    app.state.image_dir = image_dir
+    app.state.pool_mode = pool_mode
+    app.state.anonymous_register = anonymous_register
+    app.state.max_labels_per_item = max_labels_per_item
+    app.state.max_labels_per_annotator = max_labels_per_annotator
+    def get_conn() -> sqlite3.Connection:
+        return app.state.conn
+    def resolve_annotator(
+        token: str,
+        conn: sqlite3.Connection = Depends(get_conn),
+    ) -> str:
+        annotator_id = dbmod.get_annotator_by_token(conn, token)
+        if not annotator_id:
+            raise HTTPException(status_code=401, detail="invalid token")
+        return annotator_id
+    def _next_task_payload(annotator_id: str, conn: sqlite3.Connection, n_done: int) -> dict:
+        cap = app.state.max_labels_per_annotator
+        if cap is not None and n_done >= cap:
+            return {"done": True, "reason": "cap_reached", "labeled": n_done, "cap": cap}
+        if app.state.pool_mode:
+            item = dbmod.next_pooled_item(conn, annotator_id, app.state.max_labels_per_item)
+        else:
+            item = dbmod.next_unlabeled_item(conn, annotator_id)
+        if item is None:
+            return {"done": True, "reason": "pool_empty", "labeled": n_done}
+        return {
+            "done": False,
+            "item_id": item.item_id,
+            "payload": _sanitize_item(item.payload),
+            "image_url": f"/images/{item.item_id}.png",
+            "labeled": n_done,
+            "cap": cap,
+        }
+    @app.post("/api/register")
+    def api_register(conn: sqlite3.Connection = Depends(get_conn)):
+        """Mint a fresh anonymous annotator. Only enabled when anonymous_register."""
+        if not app.state.anonymous_register:
+            raise HTTPException(status_code=404, detail="anonymous register disabled")
+        existing = {row["annotator_id"] for row in conn.execute(
+            "SELECT annotator_id FROM annotators"
+        )}
+        n = 0
+        while True:
+            candidate = f"anon_{dbmod.mint_token()[:10]}"
+            if candidate not in existing:
+                break
+            n += 1
+            if n > 8:
+                raise HTTPException(status_code=500, detail="could not mint unique id")
+        tokens = bootstrap_annotators(conn, [candidate])
+        return {"annotator_id": candidate, "token": tokens[candidate]}
+    @app.get("/api/task")
+    def api_task(
+        token: str = Query(min_length=8, max_length=128),
+        conn: sqlite3.Connection = Depends(get_conn),
+    ):
+        annotator_id = resolve_annotator(token, conn)
+        n_done = dbmod.count_annotator_labels(conn, annotator_id)
+        return _next_task_payload(annotator_id, conn, n_done)
+    @app.post("/api/label")
+    def api_label(
+        payload: LabelPayload,
+        conn: sqlite3.Connection = Depends(get_conn),
+    ):
+        annotator_id = resolve_annotator(payload.token, conn)
+        item_row = dbmod.get_item(conn, payload.item_id)
+        if item_row is None:
+            raise HTTPException(status_code=404, detail="unknown item_id")
+        if not app.state.pool_mode:
+            # Pre-assigned mode: require an assignment row.
+            assigned = conn.execute(
+                "SELECT 1 FROM assignments WHERE item_id = ? AND annotator_id = ? LIMIT 1",
+                (payload.item_id, annotator_id),
+            ).fetchone()
+            if assigned is None:
+                raise HTTPException(status_code=403, detail="item not assigned to annotator")
+        dbmod.record_label(
+            conn,
+            payload.item_id,
+            annotator_id,
+            int(payload.chosen_index),
+            payload.seconds,
+            payload.confidence,
+        )
+        return {"ok": True}
+    @app.get("/api/progress")
+    def api_progress(
+        token: str = Query(min_length=8, max_length=128),
+        conn: sqlite3.Connection = Depends(get_conn),
+    ):
+        annotator_id = resolve_annotator(token, conn)
+        n_done = dbmod.count_annotator_labels(conn, annotator_id)
+        if app.state.pool_mode:
+            cap = app.state.max_labels_per_annotator
+            return {
+                "labeled": n_done,
+                "assigned": cap if cap is not None else 0,
+            }
+        return dbmod.progress(conn, annotator_id)
+    @app.get("/images/{item_id}.png")
+    def serve_image(item_id: str):
+        # Defense against path traversal — allow only [A-Za-z0-9_-.] in item_id.
+        if not item_id or any(c not in _ALLOWED_ITEM_CHARS for c in item_id):
+            raise HTTPException(status_code=400, detail="bad item_id")
+        path = (app.state.image_dir / f"{item_id}.png").resolve()
+        if app.state.image_dir.resolve() not in path.parents:
+            raise HTTPException(status_code=400, detail="bad path")
+        if not path.exists():
+            raise HTTPException(status_code=404, detail="image missing")
+        return FileResponse(path, media_type="image/png")
+    @app.get("/healthz")
+    def healthz():
+        return {"ok": True}
+    if static_dir.exists():
+        app.mount("/", StaticFiles(directory=str(static_dir), html=True), name="static")
+    else:
+        @app.get("/")
+        def root():
+            return JSONResponse({"detail": "static dir missing; /api/* still usable"})
+    return app
+_ALLOWED_ITEM_CHARS = frozenset(
+    "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789_-."
+)

src/aamcq/annotation/assignment.py ADDED Viewed

	@@ -0,0 +1,72 @@

+"""Assign MCQ items to annotators with gold-item injection + k-fold coverage."""
+from __future__ import annotations
+import random
+from dataclasses import dataclass
+from typing import Iterable
+from aamcq.annotation import db as dbmod
+@dataclass(frozen=True)
+class AssignmentPolicy:
+    labels_per_item: int = 3
+    gold_injection_rate: float = 0.10
+def assign_items_round_robin(
+    conn,
+    annotator_ids: list[str],
+    item_ids: list[str],
+    gold_item_ids: list[str],
+    policy: AssignmentPolicy,
+    rng: random.Random,
+) -> dict[str, list[str]]:
+    """Create assignments so each non-gold item gets `labels_per_item` distinct annotators.
+    Gold items are inserted randomly into each annotator's queue at the requested rate.
+    Returns a mapping {annotator_id: [item_id, ...] in assigned order}.
+    """
+    if not annotator_ids:
+        raise ValueError("need at least 1 annotator")
+    if policy.labels_per_item > len(annotator_ids):
+        raise ValueError(
+            f"labels_per_item={policy.labels_per_item} > annotators={len(annotator_ids)}"
+        )
+    queues: dict[str, list[str]] = {aid: [] for aid in annotator_ids}
+    shuffled_items = list(item_ids)
+    rng.shuffle(shuffled_items)
+    for item_id in shuffled_items:
+        chosen = rng.sample(annotator_ids, policy.labels_per_item)
+        for aid in chosen:
+            queues[aid].append(item_id)
+    if gold_item_ids and policy.gold_injection_rate > 0:
+        for aid, queue in queues.items():
+            n_gold = max(1, int(round(len(queue) * policy.gold_injection_rate)))
+            gold_pick = rng.choices(gold_item_ids, k=n_gold)
+            # interleave golds at random positions
+            for gold_id in gold_pick:
+                pos = rng.randrange(len(queue) + 1)
+                queue.insert(pos, gold_id)
+    for aid, queue in queues.items():
+        for item_id in queue:
+            dbmod.insert_assignment(conn, item_id, aid)
+    return queues
+def bootstrap_annotators(
+    conn, annotator_ids: Iterable[str]
+) -> dict[str, str]:
+    """Create annotator rows with freshly minted tokens. Returns {annotator_id: token}."""
+    tokens: dict[str, str] = {}
+    for aid in annotator_ids:
+        token = dbmod.mint_token()
+        dbmod.insert_annotator(conn, aid, token)
+        tokens[aid] = token
+    return tokens

src/aamcq/annotation/db.py ADDED Viewed

	@@ -0,0 +1,256 @@

+"""SQLite schema + DAO for the annotation backend.
+Single-writer model: annotations are append-only, assignments are created up
+front. We use stdlib sqlite3 rather than SQLAlchemy to keep the install
+footprint small.
+"""
+from __future__ import annotations
+import json
+import secrets
+import sqlite3
+import time
+from contextlib import contextmanager
+from dataclasses import dataclass
+from pathlib import Path
+from typing import Iterator
+SCHEMA = """
+CREATE TABLE IF NOT EXISTS items (
+    item_id TEXT PRIMARY KEY,
+    payload_json TEXT NOT NULL,
+    is_gold INTEGER NOT NULL DEFAULT 0
+);
+CREATE TABLE IF NOT EXISTS annotators (
+    annotator_id TEXT PRIMARY KEY,
+    token TEXT NOT NULL UNIQUE,
+    created_at REAL NOT NULL
+);
+CREATE TABLE IF NOT EXISTS assignments (
+    item_id TEXT NOT NULL,
+    annotator_id TEXT NOT NULL,
+    assigned_at REAL NOT NULL,
+    PRIMARY KEY (item_id, annotator_id),
+    FOREIGN KEY (item_id) REFERENCES items(item_id),
+    FOREIGN KEY (annotator_id) REFERENCES annotators(annotator_id)
+);
+CREATE TABLE IF NOT EXISTS labels (
+    item_id TEXT NOT NULL,
+    annotator_id TEXT NOT NULL,
+    chosen_index INTEGER NOT NULL,
+    seconds REAL,
+    confidence INTEGER,
+    submitted_at REAL NOT NULL,
+    PRIMARY KEY (item_id, annotator_id),
+    FOREIGN KEY (item_id) REFERENCES items(item_id),
+    FOREIGN KEY (annotator_id) REFERENCES annotators(annotator_id)
+);
+CREATE INDEX IF NOT EXISTS idx_assignments_annotator
+    ON assignments(annotator_id);
+CREATE INDEX IF NOT EXISTS idx_labels_annotator
+    ON labels(annotator_id);
+"""
+@dataclass(frozen=True)
+class ItemRow:
+    item_id: str
+    payload: dict
+    is_gold: bool
+@dataclass(frozen=True)
+class LabelRow:
+    item_id: str
+    annotator_id: str
+    chosen_index: int
+    seconds: float | None
+    confidence: int | None
+    submitted_at: float
+def connect(db_path: str | Path) -> sqlite3.Connection:
+    db_path = Path(db_path)
+    db_path.parent.mkdir(parents=True, exist_ok=True)
+    conn = sqlite3.connect(db_path, check_same_thread=False, isolation_level=None)
+    conn.row_factory = sqlite3.Row
+    conn.execute("PRAGMA foreign_keys = ON")
+    conn.execute("PRAGMA journal_mode = WAL")
+    return conn
+def init_schema(conn: sqlite3.Connection) -> None:
+    conn.executescript(SCHEMA)
+def mint_token() -> str:
+    return secrets.token_urlsafe(16)
+def insert_item(conn: sqlite3.Connection, item_id: str, payload: dict, is_gold: bool = False) -> None:
+    conn.execute(
+        "INSERT OR REPLACE INTO items(item_id, payload_json, is_gold) VALUES (?, ?, ?)",
+        (item_id, json.dumps(payload, sort_keys=True), int(is_gold)),
+    )
+def insert_annotator(conn: sqlite3.Connection, annotator_id: str, token: str) -> None:
+    conn.execute(
+        "INSERT OR REPLACE INTO annotators(annotator_id, token, created_at) VALUES (?, ?, ?)",
+        (annotator_id, token, time.time()),
+    )
+def insert_assignment(conn: sqlite3.Connection, item_id: str, annotator_id: str) -> None:
+    conn.execute(
+        "INSERT OR IGNORE INTO assignments(item_id, annotator_id, assigned_at) "
+        "VALUES (?, ?, ?)",
+        (item_id, annotator_id, time.time()),
+    )
+def get_annotator_by_token(conn: sqlite3.Connection, token: str) -> str | None:
+    row = conn.execute(
+        "SELECT annotator_id FROM annotators WHERE token = ?", (token,)
+    ).fetchone()
+    return row["annotator_id"] if row else None
+def get_item(conn: sqlite3.Connection, item_id: str) -> ItemRow | None:
+    row = conn.execute(
+        "SELECT item_id, payload_json, is_gold FROM items WHERE item_id = ?",
+        (item_id,),
+    ).fetchone()
+    if not row:
+        return None
+    return ItemRow(
+        item_id=row["item_id"],
+        payload=json.loads(row["payload_json"]),
+        is_gold=bool(row["is_gold"]),
+    )
+def next_unlabeled_item(
+    conn: sqlite3.Connection, annotator_id: str
+) -> ItemRow | None:
+    """Pre-assigned dispatch: hand out the annotator's next un-labeled assignment."""
+    row = conn.execute(
+        """
+        SELECT items.item_id, items.payload_json, items.is_gold
+        FROM assignments
+        JOIN items ON items.item_id = assignments.item_id
+        LEFT JOIN labels
+            ON labels.item_id = assignments.item_id
+           AND labels.annotator_id = assignments.annotator_id
+        WHERE assignments.annotator_id = ?
+          AND labels.item_id IS NULL
+        ORDER BY assignments.assigned_at ASC
+        LIMIT 1
+        """,
+        (annotator_id,),
+    ).fetchone()
+    if not row:
+        return None
+    return ItemRow(
+        item_id=row["item_id"],
+        payload=json.loads(row["payload_json"]),
+        is_gold=bool(row["is_gold"]),
+    )
+def next_pooled_item(
+    conn: sqlite3.Connection,
+    annotator_id: str,
+    max_labels_per_item: int,
+) -> ItemRow | None:
+    """Pull-based dispatch: pick any item needing more labels that this
+    annotator hasn't seen yet. Breadth-first over coverage so every item gets
+    at least one label before anyone gets a second."""
+    row = conn.execute(
+        """
+        SELECT items.item_id, items.payload_json, items.is_gold,
+               COALESCE(counts.n, 0) AS n_labels
+        FROM items
+        LEFT JOIN (
+            SELECT item_id, COUNT(*) AS n
+            FROM labels
+            GROUP BY item_id
+        ) AS counts ON counts.item_id = items.item_id
+        LEFT JOIN labels mine
+            ON mine.item_id = items.item_id AND mine.annotator_id = ?
+        WHERE mine.item_id IS NULL
+          AND COALESCE(counts.n, 0) < ?
+        ORDER BY n_labels ASC, items.item_id ASC
+        LIMIT 1
+        """,
+        (annotator_id, max_labels_per_item),
+    ).fetchone()
+    if not row:
+        return None
+    return ItemRow(
+        item_id=row["item_id"],
+        payload=json.loads(row["payload_json"]),
+        is_gold=bool(row["is_gold"]),
+    )
+def count_annotator_labels(conn: sqlite3.Connection, annotator_id: str) -> int:
+    return int(conn.execute(
+        "SELECT COUNT(*) AS n FROM labels WHERE annotator_id = ?",
+        (annotator_id,),
+    ).fetchone()["n"])
+def record_label(
+    conn: sqlite3.Connection,
+    item_id: str,
+    annotator_id: str,
+    chosen_index: int,
+    seconds: float | None,
+    confidence: int | None,
+) -> None:
+    conn.execute(
+        """
+        INSERT OR REPLACE INTO labels
+            (item_id, annotator_id, chosen_index, seconds, confidence, submitted_at)
+        VALUES (?, ?, ?, ?, ?, ?)
+        """,
+        (item_id, annotator_id, chosen_index, seconds, confidence, time.time()),
+    )
+def progress(conn: sqlite3.Connection, annotator_id: str) -> dict[str, int]:
+    assigned = conn.execute(
+        "SELECT COUNT(*) AS n FROM assignments WHERE annotator_id = ?",
+        (annotator_id,),
+    ).fetchone()["n"]
+    labeled = conn.execute(
+        "SELECT COUNT(*) AS n FROM labels WHERE annotator_id = ?",
+        (annotator_id,),
+    ).fetchone()["n"]
+    return {"assigned": int(assigned), "labeled": int(labeled)}
+def iter_labels(conn: sqlite3.Connection) -> Iterator[LabelRow]:
+    for row in conn.execute(
+        "SELECT item_id, annotator_id, chosen_index, seconds, confidence, submitted_at FROM labels"
+    ):
+        yield LabelRow(
+            item_id=row["item_id"],
+            annotator_id=row["annotator_id"],
+            chosen_index=int(row["chosen_index"]),
+            seconds=row["seconds"],
+            confidence=row["confidence"],
+            submitted_at=float(row["submitted_at"]),
+        )
+@contextmanager
+def open_db(db_path: str | Path):
+    conn = connect(db_path)
+    try:
+        init_schema(conn)
+        yield conn
+    finally:
+        conn.close()

src/aamcq/distractors.py ADDED Viewed

	@@ -0,0 +1,187 @@

+"""Distractor sampling policies for MCQ items."""
+from __future__ import annotations
+from typing import Literal
+import numpy as np
+from aamcq.profile import AXES, VisualProfile, enumerate_profiles
+Strategy = Literal["one_axis_swap", "two_axis_swap", "axis_cluster", "random"]
+STRATEGIES: tuple[Strategy, ...] = (
+    "one_axis_swap",
+    "two_axis_swap",
+    "axis_cluster",
+    "random",
+)
+DEFAULT_AXIS_WEIGHTS: dict[str, float] = {
+    "art_style": 3.0,
+    "art_medium": 2.5,
+    "color": 2.0,
+    "lighting": 1.5,
+}
+def _mutate_axes(
+    gt: VisualProfile,
+    axes_to_mutate: tuple[str, ...],
+    vocab: dict[str, list[str]],
+    rng: np.random.Generator,
+) -> VisualProfile:
+    new_values = gt.to_dict()
+    for axis in axes_to_mutate:
+        options = [v for v in vocab[axis] if v != getattr(gt, axis)]
+        if not options:
+            continue
+        new_values[axis] = str(rng.choice(options))
+    return VisualProfile.from_dict(new_values)
+def _one_axis_swap(
+    gt: VisualProfile, vocab: dict[str, list[str]], rng: np.random.Generator
+) -> list[VisualProfile]:
+    axes = list(AXES)
+    rng.shuffle(axes)
+    picks: list[VisualProfile] = []
+    seen: set[tuple[str, ...]] = {gt.to_tuple()}
+    for axis in axes:
+        options = [v for v in vocab[axis] if v != getattr(gt, axis)]
+        rng.shuffle(options)
+        for value in options:
+            candidate = VisualProfile.from_dict({**gt.to_dict(), axis: value})
+            key = candidate.to_tuple()
+            if key not in seen:
+                picks.append(candidate)
+                seen.add(key)
+                break
+        if len(picks) == 3:
+            return picks
+    # Fallback (should only trigger for degenerate vocabs).
+    return _ensure_three(picks, gt, vocab, rng)
+def _two_axis_swap(
+    gt: VisualProfile, vocab: dict[str, list[str]], rng: np.random.Generator
+) -> list[VisualProfile]:
+    """Return 3 distractors at exact Hamming distance 2 from gt.
+    With a 5-axis vocab (min 3 values per axis), Hamming-2 neighbors always
+    exist in sufficient number, so the random-sample loop converges reliably.
+    We enumerate the full Hamming-2 set as a fallback to avoid the generic
+    `_ensure_three` path leaking mixed Hamming distances.
+    """
+    axes = list(AXES)
+    picks: list[VisualProfile] = []
+    seen: set[tuple[str, ...]] = {gt.to_tuple()}
+    attempts = 0
+    while len(picks) < 3 and attempts < 128:
+        attempts += 1
+        pair_idx = rng.choice(len(axes), size=2, replace=False)
+        pair = (axes[int(pair_idx[0])], axes[int(pair_idx[1])])
+        candidate = _mutate_axes(gt, pair, vocab, rng)
+        key = candidate.to_tuple()
+        if key not in seen:
+            picks.append(candidate)
+            seen.add(key)
+    if len(picks) < 3:
+        pool = [
+            p for p in enumerate_profiles(vocab)
+            if p.hamming(gt) == 2 and p.to_tuple() not in seen
+        ]
+        rng.shuffle(pool)
+        picks.extend(pool[: 3 - len(picks)])
+    if len(picks) < 3:
+        raise RuntimeError("not enough Hamming-2 neighbors; vocab too small")
+    return picks[:3]
+def _axis_cluster(
+    gt: VisualProfile,
+    vocab: dict[str, list[str]],
+    rng: np.random.Generator,
+    axis_weights: dict[str, float] | None = None,
+) -> list[VisualProfile]:
+    weights = axis_weights or DEFAULT_AXIS_WEIGHTS
+    # Candidates at Hamming distance 1 or 2.
+    scored: list[tuple[float, int, VisualProfile]] = []
+    # Tie-break seed drawn from rng so ordering is reproducible.
+    noise_seed = int(rng.integers(0, 2**31 - 1))
+    noise_rng = np.random.default_rng(noise_seed)
+    for candidate in enumerate_profiles(vocab):
+        if candidate == gt:
+            continue
+        diffs = candidate.differs_on(gt)
+        if not (1 <= len(diffs) <= 2):
+            continue
+        cost = sum(weights.get(axis, 1.0) for axis in diffs)
+        jitter = float(noise_rng.random()) * 1e-3
+        scored.append((cost + jitter, len(diffs), candidate))
+    scored.sort(key=lambda t: (t[0], t[1]))
+    picks = [c for _, _, c in scored[:3]]
+    return _ensure_three(picks, gt, vocab, rng)
+def _random(
+    gt: VisualProfile, vocab: dict[str, list[str]], rng: np.random.Generator
+) -> list[VisualProfile]:
+    all_profiles = [p for p in enumerate_profiles(vocab) if p != gt]
+    idx = rng.choice(len(all_profiles), size=3, replace=False)
+    return [all_profiles[int(i)] for i in idx]
+def _ensure_three(
+    picks: list[VisualProfile],
+    gt: VisualProfile,
+    vocab: dict[str, list[str]],
+    rng: np.random.Generator,
+) -> list[VisualProfile]:
+    seen = {gt.to_tuple(), *(p.to_tuple() for p in picks)}
+    if len(picks) >= 3:
+        return picks[:3]
+    pool = [p for p in enumerate_profiles(vocab) if p.to_tuple() not in seen]
+    rng.shuffle(pool)
+    picks.extend(pool[: 3 - len(picks)])
+    return picks[:3]
+def sample_distractors(
+    gt: VisualProfile,
+    strategy: Strategy,
+    vocab: dict[str, list[str]],
+    rng: np.random.Generator,
+    axis_weights: dict[str, float] | None = None,
+) -> list[VisualProfile]:
+    """Return exactly 3 distractor profiles; none equals gt or any other pick."""
+    if strategy == "one_axis_swap":
+        picks = _one_axis_swap(gt, vocab, rng)
+    elif strategy == "two_axis_swap":
+        picks = _two_axis_swap(gt, vocab, rng)
+    elif strategy == "axis_cluster":
+        picks = _axis_cluster(gt, vocab, rng, axis_weights)
+    elif strategy == "random":
+        picks = _random(gt, vocab, rng)
+    else:
+        raise ValueError(f"unknown strategy {strategy!r}")
+    if len(picks) != 3:
+        raise RuntimeError(f"distractor sampler returned {len(picks)} items, expected 3")
+    tuples = {p.to_tuple() for p in picks}
+    if gt.to_tuple() in tuples or len(tuples) != 3:
+        raise RuntimeError(f"duplicate or gt leakage: gt={gt}, picks={picks}")
+    return picks
+def build_options(
+    gt: VisualProfile,
+    distractors: list[VisualProfile],
+    rng: np.random.Generator,
+) -> tuple[list[VisualProfile], int]:
+    """Shuffle gt + 3 distractors, return (options, correct_index)."""
+    if len(distractors) != 3:
+        raise ValueError("expected 3 distractors")
+    combined = [gt, *distractors]
+    order = rng.permutation(4)
+    options = [combined[int(i)] for i in order]
+    correct_index = int(np.where(order == 0)[0][0])
+    return options, correct_index

src/aamcq/generation/__init__.py ADDED Viewed

	@@ -0,0 +1,10 @@

+"""Image-generation backends.
+Only `flux2_klein` is active for the current dataset; see
+`configs/generation.yaml` and `src/aamcq/prompt_render.py` for the pin.
+"""
+from aamcq.generation.base import GenerationBackend, GenerationResult
+from aamcq.generation.registry import BACKEND_REGISTRY, get_backend
+__all__ = ["GenerationBackend", "GenerationResult", "BACKEND_REGISTRY", "get_backend"]

src/aamcq/generation/base.py ADDED Viewed

	@@ -0,0 +1,38 @@

+"""Generation backend Protocol."""
+from __future__ import annotations
+from dataclasses import dataclass
+from typing import Protocol
+from PIL import Image
+@dataclass(frozen=True)
+class GenerationResult:
+    image: Image.Image
+    backend: str
+    model_id: str
+    prompt: str
+    negative_prompt: str | None
+    seed: int
+    num_inference_steps: int
+    guidance_scale: float
+    height: int
+    width: int
+class GenerationBackend(Protocol):
+    name: str
+    def load(self) -> None: ...
+    def generate(
+        self,
+        *,
+        prompt: str,
+        negative_prompt: str | None,
+        seed: int,
+        height: int,
+        width: int,
+    ) -> GenerationResult: ...

src/aamcq/generation/flux2_klein.py ADDED Viewed

	@@ -0,0 +1,74 @@

+"""FLUX.2-klein-9B backend (lazy-loaded, CPU offload for 24GB GPUs)."""
+from __future__ import annotations
+from typing import Any
+import torch
+from diffusers import Flux2KleinPipeline
+from aamcq.generation.base import GenerationResult
+from aamcq.prompt_render import MODEL_SPECS
+class Flux2KleinBackend:
+    """Single-GPU FLUX.2-klein backend with `enable_model_cpu_offload`.
+    Peak allocated ~19GB on an RTX A5000 24GB at 1024x1024 bf16, measured on
+    our smoke test. The pipeline is loaded on first `generate()` call and
+    reused thereafter.
+    """
+    name = "flux2_klein"
+    def __init__(self, *, cpu_offload: bool = True, torch_dtype: Any = torch.bfloat16) -> None:
+        self._pipe: Flux2KleinPipeline | None = None
+        self._cpu_offload = cpu_offload
+        self._dtype = torch_dtype
+        self._spec = MODEL_SPECS[self.name]
+        self.model_id: str = str(self._spec["model_id"])
+        self.num_inference_steps: int = int(self._spec["num_inference_steps"])  # type: ignore[arg-type]
+        self.guidance_scale: float = float(self._spec["guidance_scale"])  # type: ignore[arg-type]
+    def load(self) -> None:
+        if self._pipe is not None:
+            return
+        pipe = Flux2KleinPipeline.from_pretrained(self.model_id, torch_dtype=self._dtype)
+        if self._cpu_offload:
+            pipe.enable_model_cpu_offload()
+        else:
+            pipe.to("cuda")
+        self._pipe = pipe
+    def generate(
+        self,
+        *,
+        prompt: str,
+        negative_prompt: str | None,
+        seed: int,
+        height: int,
+        width: int,
+    ) -> GenerationResult:
+        self.load()
+        assert self._pipe is not None
+        gen = torch.Generator(device="cpu").manual_seed(int(seed))
+        img = self._pipe(
+            prompt=prompt,
+            height=height,
+            width=width,
+            num_inference_steps=self.num_inference_steps,
+            guidance_scale=self.guidance_scale,
+            generator=gen,
+        ).images[0]
+        return GenerationResult(
+            image=img,
+            backend=self.name,
+            model_id=self.model_id,
+            prompt=prompt,
+            negative_prompt=negative_prompt,
+            seed=int(seed),
+            num_inference_steps=self.num_inference_steps,
+            guidance_scale=self.guidance_scale,
+            height=height,
+            width=width,
+        )

src/aamcq/generation/registry.py ADDED Viewed

	@@ -0,0 +1,18 @@

+"""Backend name -> class registry."""
+from __future__ import annotations
+from typing import Callable
+from aamcq.generation.base import GenerationBackend
+from aamcq.generation.flux2_klein import Flux2KleinBackend
+BACKEND_REGISTRY: dict[str, Callable[..., GenerationBackend]] = {
+    "flux2_klein": Flux2KleinBackend,
+}
+def get_backend(name: str, **kwargs) -> GenerationBackend:
+    if name not in BACKEND_REGISTRY:
+        raise ValueError(f"unknown backend {name!r}; expected one of {list(BACKEND_REGISTRY)}")
+    return BACKEND_REGISTRY[name](**kwargs)

src/aamcq/instance_plan.py ADDED Viewed

	@@ -0,0 +1,170 @@

+"""Deterministic plan of (profile, base_prompt, backend, seed, strategy) tuples.
+The plan is produced once and written to `data/plan.jsonl`. Image generation and
+MCQ construction both consume this file, so as long as the plan is stable the
+entire dataset is reproducible bit-for-bit.
+"""
+from __future__ import annotations
+import itertools
+from dataclasses import asdict, dataclass
+from pathlib import Path
+import numpy as np
+import yaml
+from aamcq.distractors import STRATEGIES, Strategy
+from aamcq.profile import VisualProfile, enumerate_profiles
+from aamcq.utils.seeding import item_seed
+BASE_PROMPT_CATEGORIES = ("portrait", "landscape", "animal", "still_life", "architecture")
+@dataclass(frozen=True)
+class PlanItem:
+    item_id: str
+    gt_profile: dict[str, str]
+    base_prompt: str
+    base_prompt_category: str
+    backend: str
+    seed: int
+    distractor_strategy: str
+    difficulty: str
+    def to_dict(self) -> dict:
+        return asdict(self)
+def _load_base_prompts(path: str | Path) -> dict[str, list[str]]:
+    """Load `clean`-quality prompts per category from base_prompts.yaml."""
+    with open(path) as f:
+        data = yaml.safe_load(f)
+    cats = data.get("categories", {})
+    recommended = data.get("recommended", {})
+    out: dict[str, list[str]] = {}
+    for category in BASE_PROMPT_CATEGORIES:
+        entries = cats.get(category, {}).get("prompts", [])
+        prompts = [e["text"] for e in entries if e.get("quality") == "clean"]
+        if not prompts and recommended.get(category):
+            prompts = [recommended[category]]
+        if not prompts:
+            raise ValueError(f"base_prompts.yaml missing category {category!r}")
+        out[category] = prompts
+    return out
+def _allocate_mix(
+    n: int, mix: dict[str, float], rng: np.random.Generator, what: str
+) -> list[str]:
+    if abs(sum(mix.values()) - 1.0) > 1e-6:
+        raise ValueError(f"{what} mix must sum to 1.0, got {sum(mix.values())}")
+    counts = {name: int(round(frac * n)) for name, frac in mix.items()}
+    drift = n - sum(counts.values())
+    names = list(mix.keys())
+    i = 0
+    while drift != 0:
+        name = names[i % len(names)]
+        if drift > 0:
+            counts[name] += 1
+            drift -= 1
+        elif counts[name] > 0:
+            counts[name] -= 1
+            drift += 1
+        i += 1
+    slots: list[str] = []
+    for name, count in counts.items():
+        slots.extend([name] * count)
+    rng.shuffle(slots)
+    return slots
+def stratified_sample_by_style(
+    vocab: dict[str, list[str]],
+    n_target: int,
+    rng: np.random.Generator,
+    small_pool_cap: int = 50,
+) -> list[VisualProfile]:
+    """Proportional stratified sample over art_style groups.
+    Pools smaller than `small_pool_cap` (e.g. Photorealism's 36 under the
+    current compat filter) are sampled in full so every profile in a
+    minority style appears at least once. Final size is approximately
+    `n_target` but may vary by a few due to rounding + small-pool expansion.
+    """
+    all_profiles = list(enumerate_profiles(vocab))
+    by_style: dict[str, list[VisualProfile]] = {}
+    for p in all_profiles:
+        by_style.setdefault(p.art_style, []).append(p)
+    total = len(all_profiles)
+    sampled: list[VisualProfile] = []
+    for pool in by_style.values():
+        if len(pool) < small_pool_cap:
+            want = len(pool)
+        else:
+            want = min(round(len(pool) / total * n_target), len(pool))
+        idx = rng.choice(len(pool), size=want, replace=False)
+        sampled.extend(pool[int(i)] for i in idx)
+    rng.shuffle(sampled)
+    return sampled
+def build_plan(
+    vocab: dict[str, list[str]],
+    base_prompts_path: str | Path,
+    distractor_policy: dict,
+    generation_mix: dict[str, float],
+    n_random: int,
+    master_seed: int = 202,
+    stratified: bool = False,
+) -> list[PlanItem]:
+    rng = np.random.default_rng(master_seed)
+    base_prompts = _load_base_prompts(base_prompts_path)
+    if n_random <= 0:
+        return []
+    for name in distractor_policy["strategy_mix"]:
+        if name not in STRATEGIES:
+            raise ValueError(f"unknown strategy {name!r}; expected {STRATEGIES}")
+    if stratified:
+        profiles = stratified_sample_by_style(vocab, n_random, rng)
+    else:
+        all_profiles = list(enumerate_profiles(vocab))
+        idx = rng.choice(len(all_profiles), size=n_random, replace=False)
+        profiles = [all_profiles[int(i)] for i in idx]
+    n_items = len(profiles)
+    sources: list[tuple[str, VisualProfile]] = [
+        (f"rnd_{k:04d}", p) for k, p in enumerate(profiles)
+    ]
+    strategies = _allocate_mix(n_items, distractor_policy["strategy_mix"], rng, "strategy")
+    backends = _allocate_mix(n_items, generation_mix, rng, "backend")
+    prompt_cycle = itertools.cycle(BASE_PROMPT_CATEGORIES)
+    for _ in range(int(rng.integers(0, len(BASE_PROMPT_CATEGORIES)))):
+        next(prompt_cycle)
+    plan: list[PlanItem] = []
+    for (iid, profile), strat, backend in zip(sources, strategies, backends):
+        category = next(prompt_cycle)
+        pool = base_prompts[category]
+        prompt_rng = np.random.default_rng(item_seed(iid, master_seed, "prompt"))
+        base_prompt = str(pool[int(prompt_rng.integers(0, len(pool)))])
+        item_id = f"ab_mcq_{len(plan):05d}_{iid}"
+        plan.append(
+            PlanItem(
+                item_id=item_id,
+                gt_profile=profile.to_dict(),
+                base_prompt=base_prompt,
+                base_prompt_category=category,
+                backend=backend,
+                seed=item_seed(item_id, master_seed, "gen"),
+                distractor_strategy=strat,
+                difficulty="medium",
+            )
+        )
+    return plan

src/aamcq/profile.py ADDED Viewed

	@@ -0,0 +1,90 @@

+"""VisualProfile — 4-axis aesthetic profile: art_style × color × art_medium × lighting."""
+from __future__ import annotations
+import itertools
+import json
+from dataclasses import dataclass
+from pathlib import Path
+from typing import Iterator
+AXES: tuple[str, ...] = ("art_style", "color", "art_medium", "lighting")
+REPO_ROOT = Path(__file__).resolve().parents[2]
+DEFAULT_VOCAB_PATH = REPO_ROOT / "configs" / "profile_vocab.json"
+@dataclass(frozen=True)
+class VisualProfile:
+    """4-axis aesthetic profile."""
+    art_style: str
+    color: str
+    art_medium: str
+    lighting: str
+    def to_dict(self) -> dict[str, str]:
+        return {axis: getattr(self, axis) for axis in AXES}
+    def to_tuple(self) -> tuple[str, str, str, str]:
+        return (self.art_style, self.color, self.art_medium, self.lighting)
+    @classmethod
+    def from_dict(cls, data: dict[str, str]) -> "VisualProfile":
+        return cls(**{axis: data[axis] for axis in AXES})
+    def validate(self, vocab: dict[str, list[str]]) -> list[str]:
+        errors: list[str] = []
+        for axis in AXES:
+            value = getattr(self, axis)
+            if value not in vocab.get(axis, []):
+                errors.append(f"{axis}={value!r} not in {vocab.get(axis)}")
+        return errors
+    def differs_on(self, other: "VisualProfile") -> list[str]:
+        return [axis for axis in AXES if getattr(self, axis) != getattr(other, axis)]
+    def hamming(self, other: "VisualProfile") -> int:
+        return len(self.differs_on(other))
+def load_vocab(path: str | Path | None = None) -> dict[str, list[str]]:
+    if path is None:
+        path = DEFAULT_VOCAB_PATH
+    with open(path) as f:
+        vocab = json.load(f)
+    missing = [axis for axis in AXES if axis not in vocab]
+    if missing:
+        raise ValueError(f"vocab missing axes: {missing}")
+    return vocab
+# Style × medium compatibility rules. Combinations where the medium would
+# override the style's signature are filtered out of profile enumeration.
+_STYLE_MEDIUM_ALLOWLIST: dict[str, set[str]] = {
+    "Photorealism": {"Digital Painting"},
+    "Anime": {"Digital Painting", "Pixel Art", "Watercolor", "Ink Drawing"},
+}
+def is_compatible(profile: VisualProfile) -> bool:
+    allowed = _STYLE_MEDIUM_ALLOWLIST.get(profile.art_style)
+    if allowed is not None and profile.art_medium not in allowed:
+        return False
+    return True
+def enumerate_profiles(
+    vocab: dict[str, list[str]],
+    compat_filter: bool = True,
+) -> Iterator[VisualProfile]:
+    """Yield profile combinations in a fixed order.
+    With `compat_filter=True` (default) the `_STYLE_MEDIUM_ALLOWLIST` rules are
+    applied so only renderable combinations are yielded.
+    """
+    for values in itertools.product(*(vocab[axis] for axis in AXES)):
+        profile = VisualProfile(*values)
+        if compat_filter and not is_compatible(profile):
+            continue
+        yield profile

src/aamcq/prompt_render.py ADDED Viewed

	@@ -0,0 +1,113 @@

+"""Prompt rendering for the FLUX.2-klein backend."""
+from __future__ import annotations
+from dataclasses import dataclass
+from typing import Literal
+from aamcq.profile import VisualProfile
+# Template structure follows the BFL klein prompting guide: scene front-loaded,
+# natural-language prose, trailing "Key: value." markers. Lighting is embedded
+# mid-sentence as an environmental modifier — klein responds to it more
+# strongly there than as a trailing key.
+#   docs.bfl.ml/guides/prompting_guide_flux2_klein
+PROMPT_TEMPLATES: dict[str, str] = {
+    "flux2_klein": (
+        "{scene}, lit by {lighting_phrase}, rendered as {medium_phrase} "
+        "in {style_phrase}. Color palette: {color_lc}."
+    ),
+}
+# Medium-to-phrase map: bare vocab words like "Ink Drawing" or "Pixel Art"
+# aren't idiomatic article-bearing noun phrases, so we standardize the wording
+# here to keep the rendered prompt grammatical.
+ART_MEDIUM_PHRASES: dict[str, str] = {
+    "Oil Painting": "an oil painting",
+    "Watercolor": "a watercolor painting",
+    "Ink Drawing": "an ink drawing",
+    "Digital Painting": "a digital painting",
+    "Pixel Art": "a pixel-art illustration",
+    "Pencil Sketch": "a pencil sketch",
+}
+# Style-to-phrase map. Most styles are rendered as "{X} style"; Minimalism and
+# Art Deco use expanded phrases because the bare word doesn't activate klein's
+# flat-design / decorative-geometric signals.
+ART_STYLE_PHRASES: dict[str, str] = {
+    "Impressionism": "Impressionism style",
+    "Anime": "Anime style",
+    "Photorealism": "Photorealism style",
+    "Cubism": "Cubism style",
+    "Minimalism": "flat minimalist style with sparse composition",
+    "Art Deco": "Art Deco style with bold geometric shapes, clean symmetry, and ornamental lines",
+}
+# Lighting-to-phrase map, spanning a (temperature × hardness × key × source)
+# grid so the 6 values are maximally distinguishable in the rendered image.
+LIGHTING_PHRASES: dict[str, str] = {
+    "Golden Hour": "golden hour sunset light",
+    "Moody Low-Key": "moody low-key dramatic light",
+    "Soft Overcast": "soft diffused overcast daylight",
+    "Harsh Noon": "harsh direct noon sunlight",
+    "Neon Glow": "pink and cyan neon glow",
+    "Candlelit": "warm dim amber glow",
+}
+NEGATIVE_PROMPTS: dict[str, str | None] = {
+    "flux2_klein": None,
+}
+MODEL_SPECS: dict[str, dict[str, object]] = {
+    "flux2_klein": {
+        "model_id": "black-forest-labs/FLUX.2-klein-9B",
+        "num_inference_steps": 4,
+        "guidance_scale": 1.0,
+    },
+}
+Backend = Literal["flux2_klein"]
+@dataclass(frozen=True)
+class RenderedPrompt:
+    backend: str
+    prompt: str
+    negative_prompt: str | None
+def _format_kwargs(profile: VisualProfile, base_prompt: str) -> dict[str, str]:
+    if profile.art_medium not in ART_MEDIUM_PHRASES:
+        raise ValueError(f"no ART_MEDIUM_PHRASES entry for {profile.art_medium!r}")
+    if profile.lighting not in LIGHTING_PHRASES:
+        raise ValueError(f"no LIGHTING_PHRASES entry for {profile.lighting!r}")
+    if profile.art_style not in ART_STYLE_PHRASES:
+        raise ValueError(f"no ART_STYLE_PHRASES entry for {profile.art_style!r}")
+    return {
+        "scene": base_prompt,
+        "medium_phrase": ART_MEDIUM_PHRASES[profile.art_medium],
+        "lighting_phrase": LIGHTING_PHRASES[profile.lighting],
+        "style_phrase": ART_STYLE_PHRASES[profile.art_style],
+        "color_lc": profile.color.lower(),
+    }
+def render(profile: VisualProfile, base_prompt: str, backend: Backend = "flux2_klein") -> RenderedPrompt:
+    if backend not in PROMPT_TEMPLATES:
+        raise ValueError(f"unknown backend {backend!r}; expected one of {list(PROMPT_TEMPLATES)}")
+    prompt = PROMPT_TEMPLATES[backend].format(**_format_kwargs(profile, base_prompt))
+    prompt = _post_process(prompt)
+    return RenderedPrompt(
+        backend=backend,
+        prompt=prompt,
+        negative_prompt=NEGATIVE_PROMPTS[backend],
+    )
+def _post_process(prompt: str) -> str:
+    return (
+        " ".join(prompt.split())
+        .replace(" ,", ",")
+        .replace(",,", ",")
+        .rstrip(", ")
+    )

src/aamcq/utils/__init__.py ADDED Viewed

File without changes

src/aamcq/utils/io.py ADDED Viewed

	@@ -0,0 +1,37 @@

+"""Small IO helpers used by the pipeline."""
+from __future__ import annotations
+import json
+import os
+import tempfile
+from pathlib import Path
+from typing import Iterable, Iterator
+def atomic_write_text(path: str | Path, text: str) -> None:
+    path = Path(path)
+    path.parent.mkdir(parents=True, exist_ok=True)
+    with tempfile.NamedTemporaryFile(
+        mode="w", dir=path.parent, delete=False, suffix=".tmp"
+    ) as tmp:
+        tmp.write(text)
+        tmp.flush()
+        os.fsync(tmp.fileno())
+        tmp_path = tmp.name
+    os.replace(tmp_path, path)
+def write_jsonl(path: str | Path, rows: Iterable[dict]) -> None:
+    lines = "\n".join(json.dumps(row, sort_keys=True, ensure_ascii=False) for row in rows)
+    if lines:
+        lines += "\n"
+    atomic_write_text(path, lines)
+def read_jsonl(path: str | Path) -> Iterator[dict]:
+    with open(path) as f:
+        for line in f:
+            line = line.strip()
+            if line:
+                yield json.loads(line)

src/aamcq/utils/seeding.py ADDED Viewed

	@@ -0,0 +1,16 @@

+"""Deterministic seed derivation."""
+from __future__ import annotations
+import hashlib
+def item_seed(item_id: str, master_seed: int = 0, purpose: str = "") -> int:
+    """Derive a 32-bit seed from (item_id, master_seed, purpose).
+    Stable across runs and across machines — we rely on SHA-256, not Python's
+    string hash randomization.
+    """
+    blob = f"{master_seed}|{item_id}|{purpose}".encode("utf-8")
+    digest = hashlib.sha256(blob).digest()
+    return int.from_bytes(digest[:4], "big", signed=False)