Spaces:

lspcloud
/

prolific-preferences-dynamic

Sleeping

App Files Files Community

ehejin commited on Apr 23

Commit

45b2cda

1 Parent(s): b2d734b

synced w/ personalized user study

Browse files

Files changed (6) hide show

src/app.py +99 -99
src/config.py +7 -3
src/data.py +180 -19
src/model.py +4 -8
src/upload.py +7 -0
study_config.yaml +9 -8

src/app.py CHANGED Viewed

@@ -34,45 +34,69 @@ def _init_submodule() -> None:
             raise RuntimeError("GH_TOKEN secret is not set.")
         import shutil
-        # Aggressively remove any partial/corrupt lsp directory
         if _LSP_PATH.exists():
             shutil.rmtree(str(_LSP_PATH), ignore_errors=True)
-        # Also nuke any leftover .git/modules/lsp entry
         git_modules = _BASE / ".git" / "modules" / "lsp"
         if git_modules.exists():
             shutil.rmtree(str(git_modules), ignore_errors=True)
-        clone_url = f"https://ehejin:{token}@github.com/batu-el/lsp.git"
         for attempt in range(1, 4):
-            print(f"[SUBMODULE] clone attempt {attempt}/3 ...")
-            # Remove any partial clone from previous attempt
-            if _LSP_PATH.exists():
-                shutil.rmtree(str(_LSP_PATH), ignore_errors=True)
-            result = subprocess.run(
-                [
-                    "git", "clone",
-                    "--branch", "0412_train",
-                    "--depth", "1",
-                    clone_url,
-                    str(_LSP_PATH),
-                ],
-                capture_output=True, text=True,
-            )
-            print(f"[SUBMODULE] returncode: {result.returncode}")
-            if result.stderr:
-                # Scrub token from log
-                print(f"[SUBMODULE] stderr: {result.stderr.replace(token, '***')}")
-            if result.returncode == 0 and (_LSP_PATH / "src" / "prompts").exists():
-                print("[SUBMODULE] clone succeeded.")
                 break
-            print(f"[SUBMODULE] attempt {attempt} failed, retrying...")
         else:
-            raise RuntimeError(
-                f"Failed to clone lsp after 3 attempts. "
-                f"Last stderr: {result.stderr.replace(token, '***')}"
-            )
     lsp_src = str(_LSP_PATH / "src")
     if lsp_src not in sys.path:
@@ -83,6 +107,23 @@ def _init_submodule() -> None:
 _init_submodule()
 # ---------------------------------------------------------------------------
 # 2. App imports (only after submodule is initialised)
 # ---------------------------------------------------------------------------
@@ -108,15 +149,14 @@ from src.ui.screens_preference import screen_pair_intro
 # 3. Admin dashboard — visit ?admin=1
 # ---------------------------------------------------------------------------
 def _screen_admin(cfg: dict) -> None:
-    """
-    Coverage dashboard — visit ?admin=1 to see this.
-    Always scans the HF repo directly — ignores local completions cache
-    so the count reflects real accepted submissions only.
-    """
     from src.data import (
-        _load_pool, _pool_path, _data_dir,
-        _load_reservations, _expire_reservations,
     )
     st.markdown("## 📊 Study Coverage Dashboard")
     st.caption(
@@ -126,79 +166,43 @@ def _screen_admin(cfg: dict) -> None:
     )
     if st.button("🔄 Refresh", type="primary"):
         st.rerun()
-    hf_token    = cfg.get("hf_token", "")
-    output_repo = cfg.get("output_dataset_repo", "")
     for cat_cfg in cfg["categories"]:
-        cat  = cat_cfg["name"]
-        pool = _load_pool(str(_pool_path(cat, cfg)))
         total = len(pool)
-        # ── Scan HF directly (no cache) ──────────────────────────────────────
-        hf_counts = {str(i): 0 for i in range(total)}
-        n_json    = 0
-        if hf_token and output_repo:
-            try:
-                from huggingface_hub import HfApi
-                api        = HfApi(token=hf_token)
-                files      = list(api.list_repo_files(repo_id=output_repo, repo_type="dataset"))
-                json_files = [f for f in files if f.startswith("json/") and f.endswith(".json")]
-                n_json     = len(json_files)
-                # Build a pair_id → pool_index lookup for fallback matching
-                id_to_index = {}
-                for i, p in enumerate(pool):
-                    pid = p.get("pair_id") or p.get("item_id", "")
-                    if pid:
-                        id_to_index[pid] = i
-                for filepath in json_files:
-                    try:
-                        content = api.hf_hub_download(
-                            repo_id=output_repo,
-                            filename=filepath,
-                            repo_type="dataset",
-                            token=hf_token,
-                        )
-                        with open(content) as f:
-                            submission = json.load(f)
-                        for item in submission.get("items", []):
-                            if item.get("category") != cat:
-                                continue
-                            # Use _pool_index if present (new submissions),
-                            # fall back to pair_id/item_id matching (old submissions)
-                            idx = item.get("_pool_index")
-                            if idx is None:
-                                pid = item.get("pair_id") or item.get("item_id", "")
-                                idx = id_to_index.get(pid)
-                            if idx is not None:
-                                hf_counts[str(idx)] = hf_counts.get(str(idx), 0) + 1
-                    except Exception as e:
-                        st.warning(f"Could not parse {filepath}: {e}")
-            except Exception as e:
-                st.error(f"Could not scan HF repo: {e}")
-        # ── Reservations (active in-progress users) ───────────────────────────
-        reservations = _load_reservations(cfg)
-        _expire_reservations(reservations)
         reserved_uncovered = sum(
-            1 for k, v in reservations.items()
-            if hf_counts.get(k, 0) == 0
         )
-        covered         = sum(1 for v in hf_counts.values() if v >= 1)
-        uncovered       = total - covered
-        truly_uncovered = uncovered - reserved_uncovered
         st.markdown(f"### {cat.capitalize()}")
-        st.caption(f"{n_json} submission file(s) in HF repo")
         col1, col2, col3, col4 = st.columns(4)
         col1.metric("Total items",     total)
         col2.metric("Covered ✅",      covered)
         col3.metric("In progress 🔄",  reserved_uncovered,
-                    help="Reserved by active users — will complete soon")
         col4.metric("Still needed ⚠️", truly_uncovered,
                     delta=f"-{truly_uncovered}" if truly_uncovered > 0 else None,
                     delta_color="inverse")
@@ -206,15 +210,11 @@ def _screen_admin(cfg: dict) -> None:
         if truly_uncovered == 0 and reserved_uncovered == 0:
             st.success(f"✅ All {total} items covered!")
         elif truly_uncovered == 0:
-            st.info(
-                f"🔄 {reserved_uncovered} item(s) in progress — "
-                f"waiting for active participants to finish."
-            )
         else:
             st.warning(
                 f"⚠️ {truly_uncovered} item(s) still need a participant. "
-                f"Send more Prolific slots or wait for in-progress "
-                f"reservations to expire (up to 80 min)."
             )
         st.markdown("---")

             raise RuntimeError("GH_TOKEN secret is not set.")
         import shutil
+        import tarfile
+        import urllib.request
+        import time as _time
+        # Clean any stale state
         if _LSP_PATH.exists():
             shutil.rmtree(str(_LSP_PATH), ignore_errors=True)
         git_modules = _BASE / ".git" / "modules" / "lsp"
         if git_modules.exists():
             shutil.rmtree(str(git_modules), ignore_errors=True)
+        # GitHub serves a tarball of any branch/tag/SHA at this URL.
+        # Pinned to a specific commit SHA so future lsp changes don't break us.
+        branch      = "a71506e3b1fa74fa3427f8ab674fa68420ca42da"
+        tarball_url = f"https://api.github.com/repos/batu-el/lsp/tarball/{branch}"
+        tmp_tar     = Path("/tmp/lsp.tar.gz")
+        tmp_extract = Path("/tmp/lsp_extract")
         for attempt in range(1, 4):
+            print(f"[SUBMODULE] tarball download attempt {attempt}/3 ...")
+            try:
+                req = urllib.request.Request(
+                    tarball_url,
+                    headers={
+                        "Authorization": f"Bearer {token}",
+                        "Accept":        "application/vnd.github+json",
+                        "User-Agent":    "prolific-preferences",
+                    },
+                )
+                with urllib.request.urlopen(req, timeout=60) as resp:
+                    tmp_tar.write_bytes(resp.read())
+                print(f"[SUBMODULE] downloaded {tmp_tar.stat().st_size} bytes")
+                # Extract
+                if tmp_extract.exists():
+                    shutil.rmtree(str(tmp_extract), ignore_errors=True)
+                tmp_extract.mkdir(parents=True)
+                with tarfile.open(str(tmp_tar)) as tar:
+                    tar.extractall(str(tmp_extract))
+                # GitHub tarballs have a top-level dir like batu-el-lsp-abc123/
+                subdirs = [d for d in tmp_extract.iterdir() if d.is_dir()]
+                if not subdirs:
+                    raise RuntimeError("tarball had no top-level directory")
+                top = subdirs[0]
+                # Verify the prompts dir is present
+                if not (top / "src" / "prompts").exists():
+                    raise RuntimeError(f"src/prompts not found in extracted tarball at {top}")
+                # Move extracted contents to /app/lsp
+                shutil.copytree(str(top), str(_LSP_PATH))
+                tmp_tar.unlink(missing_ok=True)
+                shutil.rmtree(str(tmp_extract), ignore_errors=True)
+                print("[SUBMODULE] ready.")
                 break
+            except Exception as e:
+                msg = str(e).replace(token, "***") if token else str(e)
+                print(f"[SUBMODULE] attempt {attempt} failed: {msg}")
+                _time.sleep(3)
         else:
+            raise RuntimeError(f"Failed to download lsp tarball after 3 attempts.")
     lsp_src = str(_LSP_PATH / "src")
     if lsp_src not in sys.path:
 _init_submodule()
+# Wipe stale local state ONLY on the first container load (not on every Streamlit rerun).
+# We use a marker file — once created, subsequent imports skip the wipe.
+# Completions stay durable in HF; we re-scan HF fresh after wipe.
+_data_root = _BASE / "data"
+_data_root.mkdir(parents=True, exist_ok=True)
+_wipe_marker = _data_root / ".startup_wiped"
+if not _wipe_marker.exists():
+    for pattern in ("reservations.json", "local_completions_*.json", "completion_cache_*.json"):
+        for f in _data_root.glob(pattern):
+            try:
+                f.unlink()
+                print(f"[STARTUP] Wiped stale file: {f.name}")
+            except Exception as e:
+                print(f"[STARTUP] Could not wipe {f.name}: {e}")
+    _wipe_marker.touch()
+    print("[STARTUP] Marked container as wiped")
 # ---------------------------------------------------------------------------
 # 2. App imports (only after submodule is initialised)
 # ---------------------------------------------------------------------------
 # 3. Admin dashboard — visit ?admin=1
 # ---------------------------------------------------------------------------
 def _screen_admin(cfg: dict) -> None:
+    """Coverage dashboard — visit ?admin=1 to see this."""
     from src.data import (
+        _get_accepted_counts, _load_pool, _pool_path,
+        _load_reservations, _save_reservations,
+        _expire_reservations, _release_returned_reservations,
+        _reservation_lock_path,
     )
+    from filelock import FileLock
     st.markdown("## 📊 Study Coverage Dashboard")
     st.caption(
     )
     if st.button("🔄 Refresh", type="primary"):
+        # Invalidate caches so we re-scan HF and re-poll Prolific
+        from src.data import _data_dir
+        for f in _data_dir(cfg).glob("completion_cache*"):
+            f.unlink()
+        prolific_cache = _data_dir(cfg) / "prolific_returned_cache.json"
+        if prolific_cache.exists():
+            prolific_cache.unlink()
         st.rerun()
+    # Release expired + returned/timed-out reservations before displaying
+    lock = FileLock(str(_reservation_lock_path(cfg)), timeout=10)
+    with lock:
+        reservations = _load_reservations(cfg)
+        _expire_reservations(reservations)
+        _release_returned_reservations(reservations, cfg)
+        _save_reservations(reservations, cfg)
     for cat_cfg in cfg["categories"]:
+        cat   = cat_cfg["name"]
+        pool  = _load_pool(str(_pool_path(cat, cfg)))
         total = len(pool)
+        counts = _get_accepted_counts(cat, cfg)
+        covered = sum(1 for v in counts.values() if v >= 1)
         reserved_uncovered = sum(
+            1 for k in reservations
+            if counts.get(k, 0) == 0
         )
+        truly_uncovered = total - covered - reserved_uncovered
         st.markdown(f"### {cat.capitalize()}")
         col1, col2, col3, col4 = st.columns(4)
         col1.metric("Total items",     total)
         col2.metric("Covered ✅",      covered)
         col3.metric("In progress 🔄",  reserved_uncovered,
+                    help="Reserved by active Prolific participants")
         col4.metric("Still needed ⚠️", truly_uncovered,
                     delta=f"-{truly_uncovered}" if truly_uncovered > 0 else None,
                     delta_color="inverse")
         if truly_uncovered == 0 and reserved_uncovered == 0:
             st.success(f"✅ All {total} items covered!")
         elif truly_uncovered == 0:
+            st.info(f"🔄 {reserved_uncovered} item(s) in progress.")
         else:
             st.warning(
                 f"⚠️ {truly_uncovered} item(s) still need a participant. "
+                f"Send more Prolific slots."
             )
         st.markdown("---")

src/config.py CHANGED Viewed

@@ -16,9 +16,13 @@ def load_config() -> dict:
         cfg = yaml.safe_load(f)
     # Secrets come only from env vars, never from yaml
-    cfg["hf_token"]       = os.getenv("HF_TOKEN", "")
-    cfg["tinker_api_key"] = os.getenv("TINKER_API_KEY", "")
-    cfg["debug_mode"]     = os.getenv("DEBUG_MODE", "false").lower() == "true"
     # Derived filesystem paths
     cfg["base_dir"]        = str(BASE_DIR)

         cfg = yaml.safe_load(f)
     # Secrets come only from env vars, never from yaml
+    cfg["hf_token"]           = os.getenv("HF_TOKEN", "")
+    cfg["tinker_api_key"]     = os.getenv("TINKER_API_KEY", "")
+    cfg["prolific_api_token"] = os.getenv("PROLIFIC_API_TOKEN", "")
+    cfg["debug_mode"]         = os.getenv("DEBUG_MODE", "false").lower() == "true"
+    # prolific_study_id is read from the yaml (non-secret, study-specific)
+    cfg.setdefault("prolific_study_id", "")
     # Derived filesystem paths
     cfg["base_dir"]        = str(BASE_DIR)

src/data.py CHANGED Viewed

@@ -17,13 +17,19 @@ Rejected completions = JSON files moved to rejected/ by the admin.
 Reservations
 ------------
 When a user starts, their items are "reserved" in a local file for 80 min.
-Concurrent users (up to 5) each get a FileLock on the reservation file so they
 never receive the same items. Reservations expire automatically so abandoned
 sessions don't permanently block items.
 Dropout / rejection recovery
 -----------------------------
-- Dropout: reservation expires after 80 min → item re-enters the pool.
 - Rejection: admin moves json/{worker}/{id}.json → rejected/{worker}/{id}.json
   in the HF dataset repo. On next Space restart (or cache expiry) the item's
   accepted count drops to 0 and it gets re-assigned.
@@ -39,9 +45,10 @@ from filelock import FileLock
 from src.config import CATEGORY_TO_REPO
-POOL_SIZE            = 50    # items selected per (study_type, category)
-RESERVATION_TTL      = 60 * 80   # 80 min: 30 min expected + ~2.5x buffer
-COMPLETION_CACHE_TTL = 300        # re-scan HF repo every 5 minutes
 # ── Path helpers ──────────────────────────────────────────────────────────────
@@ -299,7 +306,6 @@ def record_completion(user_id: str, items: list, cfg: dict) -> None:
     Uses _pool_index stamped on each item at assignment time — no fuzzy matching.
     Called after successful HF upload AND by the simulation script.
     """
-    # Group by category using the stamped _pool_category and _pool_index
     by_category: dict = {}
     for item in items:
         cat = item.get("_pool_category") or item.get("category", "")
@@ -341,28 +347,176 @@ def record_completion(user_id: str, items: list, cfg: dict) -> None:
               f"(user {user_id[:8]})")
 # ── Core assignment ───────────────────────────────────────────────────────────
 def _assign_from_category(category: str, n: int, user_id: str, cfg: dict) -> list:
     """
     Assign n items using least-coverage-first strategy.
-    Priority order:
       1. Uncovered + unreserved         (count=0, not reserved)
-      2. Uncovered + reserved by other  (count=0, reserved — likely abandoned user)
       3. Covered   + unreserved         (count>0, not reserved)
       4. Covered   + reserved by other  (count>0, reserved)
-    This ensures abandoned users' items get picked up by subsequent users
-    rather than already-covered items being re-assigned.
     """
     pool            = _load_pool(str(_pool_path(category, cfg)))
     accepted_counts = _get_accepted_counts(category, cfg)
     lock            = FileLock(str(_reservation_lock_path(cfg)), timeout=10)
     with lock:
         reservations = _load_reservations(cfg)
         _expire_reservations(reservations)
         def is_reserved_by_other(i):
             r = reservations.get(str(i))
@@ -373,16 +527,24 @@ def _assign_from_category(category: str, n: int, user_id: str, cfg: dict) -> lis
             reserved = int(is_reserved_by_other(i))
             return (count, reserved)
-        # All indices sorted by (count, is_reserved_by_other)
-        all_indices = sorted(range(len(pool)), key=sort_key)
         selected_indices = all_indices[:n]
-        # Reserve selected items (overrides stale reservations from abandoned users)
-        expiry = time.time() + RESERVATION_TTL
-        for i in selected_indices:
-            reservations[str(i)] = {"user_id": user_id, "expiry": expiry}
-        _save_reservations(reservations, cfg)
     selected = []
     for i in selected_indices:
@@ -470,7 +632,6 @@ def assign_items(cfg: dict, user_id: str) -> list:
 def _make_item_slot(item: dict, study_type: str) -> dict:
     base = {
-        # Preserve pool index and category for record_completion in upload.py
         "_pool_index":    item.get("_pool_index"),
         "_pool_category": item.get("_pool_category", item.get("category", "")),
         "conversation": {

 Reservations
 ------------
 When a user starts, their items are "reserved" in a local file for 80 min.
+Concurrent users each get a FileLock on the reservation file so they
 never receive the same items. Reservations expire automatically so abandoned
 sessions don't permanently block items.
+Each reservation stores the user's prolific_pid so we can release their items
+immediately when Prolific reports them as RETURNED or TIMED-OUT — no need to
+wait for the 80-min TTL.
 Dropout / rejection recovery
 -----------------------------
+- Dropout (voluntary return): Prolific marks RETURNED, we query the API and
+  release the reservation on the next assignment.
+- Dropout (silent): reservation expires after 80 min → item re-enters pool.
 - Rejection: admin moves json/{worker}/{id}.json → rejected/{worker}/{id}.json
   in the HF dataset repo. On next Space restart (or cache expiry) the item's
   accepted count drops to 0 and it gets re-assigned.
 from src.config import CATEGORY_TO_REPO
+POOL_SIZE               = 50        # items selected per (study_type, category)
+RESERVATION_TTL         = 60 * 80   # 80 min: 30 min expected + ~2.5x buffer
+COMPLETION_CACHE_TTL    = 300       # re-scan HF repo every 5 minutes
+PROLIFIC_POLL_CACHE_TTL = 120       # re-poll Prolific every 2 minutes
 # ── Path helpers ──────────────────────────────────────────────────────────────
     Uses _pool_index stamped on each item at assignment time — no fuzzy matching.
     Called after successful HF upload AND by the simulation script.
     """
     by_category: dict = {}
     for item in items:
         cat = item.get("_pool_category") or item.get("category", "")
               f"(user {user_id[:8]})")
+# ── Prolific status polling ───────────────────────────────────────────────────
+def _prolific_returned_pids(cfg: dict) -> set:
+    """
+    Query Prolific for participants who have RETURNED or TIMED-OUT from the
+    active study. Returns a set of their PIDs. Cached for PROLIFIC_POLL_CACHE_TTL.
+    """
+    token    = cfg.get("prolific_api_token", "")
+    study_id = cfg.get("prolific_study_id", "")
+    if not token or not study_id:
+        return set()
+    cache_path = _data_dir(cfg) / "prolific_returned_cache.json"
+    now        = time.time()
+    if cache_path.exists():
+        try:
+            with open(cache_path) as f:
+                c = json.load(f)
+            if now - c.get("timestamp", 0) < PROLIFIC_POLL_CACHE_TTL:
+                return set(c.get("returned_pids", []))
+        except Exception:
+            pass
+    returned = set()
+    try:
+        import requests
+        url     = f"https://api.prolific.com/api/v1/studies/{study_id}/submissions/"
+        headers = {"Authorization": f"Token {token}"}
+        resp    = requests.get(url, headers=headers, timeout=10)
+        resp.raise_for_status()
+        for sub in resp.json().get("results", []):
+            status = sub.get("status", "")
+            if status in ("RETURNED", "TIMED-OUT", "TIMED_OUT"):
+                pid = sub.get("participant_id") or sub.get("participant", "")
+                if pid:
+                    returned.add(pid)
+        print(f"[PROLIFIC] Found {len(returned)} returned/timed-out participants")
+    except Exception as e:
+        print(f"[PROLIFIC] Could not query API: {e}")
+    try:
+        with open(cache_path, "w") as f:
+            json.dump({"timestamp": now, "returned_pids": list(returned)}, f)
+    except Exception:
+        pass
+    return returned
+def _release_returned_reservations(reservations: dict, cfg: dict) -> None:
+    """
+    Remove reservations held by Prolific participants who have RETURNED or
+    TIMED-OUT. Mutates the reservations dict in place.
+    """
+    returned_pids = _prolific_returned_pids(cfg)
+    if not returned_pids:
+        return
+    released = []
+    for idx, r in list(reservations.items()):
+        pid = r.get("prolific_pid", "")
+        if pid and pid in returned_pids:
+            released.append(idx)
+            del reservations[idx]
+    if released:
+        print(f"[ASSIGN] Released {len(released)} reservations from returned/timed-out participants: {released}")
+def all_items_covered(cfg: dict) -> bool:
+    """
+    Returns True if every item in every category has been accepted at least once.
+    Used for auto-pausing the Prolific study.
+    """
+    for cat_cfg in cfg["categories"]:
+        cat   = cat_cfg["name"]
+        pool  = _load_pool(str(_pool_path(cat, cfg)))
+        counts = _get_accepted_counts(cat, cfg)
+        for i in range(len(pool)):
+            if counts.get(str(i), 0) < 1:
+                return False
+    return True
+def pause_prolific_study(cfg: dict) -> bool:
+    """
+    Call Prolific's API to pause the study. Returns True on success.
+    Requires prolific_api_token (env PROLIFIC_API_TOKEN) and prolific_study_id.
+    Idempotent — safe to call multiple times (Prolific treats repeated pauses as no-ops).
+    """
+    token    = cfg.get("prolific_api_token", "")
+    study_id = cfg.get("prolific_study_id", "")
+    if not token or not study_id:
+        print("[PROLIFIC] Cannot auto-pause: no API token or study_id configured")
+        return False
+    # Idempotency marker so we don't spam the API on every completion after
+    # the first time all items are covered.
+    paused_marker = _data_dir(cfg) / ".prolific_paused"
+    if paused_marker.exists():
+        return True
+    try:
+        import requests
+        url     = f"https://api.prolific.com/api/v1/studies/{study_id}/transition/"
+        headers = {"Authorization": f"Token {token}", "Content-Type": "application/json"}
+        resp    = requests.post(url, headers=headers, json={"action": "PAUSE"}, timeout=10)
+        resp.raise_for_status()
+        paused_marker.touch()
+        print(f"[PROLIFIC] ✅ Study {study_id} paused automatically — all items covered.")
+        return True
+    except Exception as e:
+        print(f"[PROLIFIC] Could not auto-pause study: {e}")
+        return False
 # ── Core assignment ───────────────────────────────────────────────────────────
 def _assign_from_category(category: str, n: int, user_id: str, cfg: dict) -> list:
     """
     Assign n items using least-coverage-first strategy.
+    Priority order (via sort key):
       1. Uncovered + unreserved         (count=0, not reserved)
+      2. Uncovered + reserved by other  (count=0, reserved)
       3. Covered   + unreserved         (count>0, not reserved)
       4. Covered   + reserved by other  (count>0, reserved)
+    Reservations are ONLY created for participants who come via Prolific
+    (i.e. have a non-empty prolific_pid in the URL). Non-Prolific visitors
+    (testers, previewers, direct-URL visitors) still get items assigned so
+    they can run through the study, but they don't hold reservations.
+    Reservations from participants who have RETURNED/TIMED-OUT on Prolific
+    are released BEFORE the sort, so their items are treated as unreserved.
     """
     pool            = _load_pool(str(_pool_path(category, cfg)))
     accepted_counts = _get_accepted_counts(category, cfg)
     lock            = FileLock(str(_reservation_lock_path(cfg)), timeout=10)
+    # Capture prolific_pid early so we can decide whether to reserve.
+    # Read from query_params directly — session_state.study_state doesn't
+    # exist yet during init_state, which is what calls this function.
+    prolific_pid = ""
+    try:
+        params = st.query_params
+        prolific_pid = params.get("PROLIFIC_PID", "") or ""
+    except Exception:
+        pass
+    is_prolific = bool(prolific_pid)
     with lock:
         reservations = _load_reservations(cfg)
         _expire_reservations(reservations)
+        _release_returned_reservations(reservations, cfg)
+        # If this Prolific PID already has reservations (e.g. they refreshed
+        # the tab, got a new user_id, and came back), release the old ones
+        # before creating new ones. Prevents the same participant from
+        # accumulating multiple reservations.
+        if is_prolific:
+            stale = [
+                idx for idx, r in list(reservations.items())
+                if r.get("prolific_pid") == prolific_pid
+            ]
+            for idx in stale:
+                del reservations[idx]
+            if stale:
+                print(f"[ASSIGN] Released {len(stale)} prior reservations "
+                      f"for returning PID {prolific_pid}")
         def is_reserved_by_other(i):
             r = reservations.get(str(i))
             reserved = int(is_reserved_by_other(i))
             return (count, reserved)
+        all_indices      = sorted(range(len(pool)), key=sort_key)
         selected_indices = all_indices[:n]
+        # Only reserve if this is a Prolific participant — keeps the
+        # admin "in progress" count accurate and stops testers/bouncers
+        # from blocking items for real users.
+        if is_prolific:
+            expiry = time.time() + RESERVATION_TTL
+            for i in selected_indices:
+                reservations[str(i)] = {
+                    "user_id":      user_id,
+                    "prolific_pid": prolific_pid,
+                    "expiry":       expiry,
+                }
+            _save_reservations(reservations, cfg)
+            print(f"[ASSIGN] Reserved for Prolific PID {prolific_pid}")
+        else:
+            print(f"[ASSIGN] Non-Prolific visitor — no reservation created")
     selected = []
     for i in selected_indices:
 def _make_item_slot(item: dict, study_type: str) -> dict:
     base = {
         "_pool_index":    item.get("_pool_index"),
         "_pool_category": item.get("_pool_category", item.get("category", "")),
         "conversation": {

src/model.py CHANGED Viewed

@@ -1,4 +1,4 @@
-"""Tinker inference client. Model name is read from study_config.yaml."""
 import re
 import streamlit as st
@@ -33,14 +33,14 @@ def _get_tinker_clients(model_name: str, sampler_path: str = ""):
 def call_model(messages: list, cfg: dict) -> str:
-    """
-    Send a message list to Tinker and return cleaned response text.
-    """
     model_name   = cfg["model_name"]
     sampler_path = cfg.get("sampler_path", "")
     print(f"[MODEL] model_name={model_name} sampler_path={sampler_path or '(base)'}")
     print(f"[MODEL] num_messages={len(messages)}")
     print(f"[MODEL] roles={[m['role'] for m in messages]}")
     try:
         from tinker_cookbook import renderers as tinker_renderers
@@ -62,15 +62,11 @@ def call_model(messages: list, cfg: dict) -> str:
         parsed_message, _ = renderer.parse_response(result.sequences[0].tokens)
         content = tinker_renderers.format_content_as_string(parsed_message["content"])
-        # Strip <think>…</think>
         content = re.sub(r"<think>.*?</think>", "", content, flags=re.DOTALL).strip()
-        # Strip leaked control tokens
         content = re.sub(r"<\|[^|]*\|>", "", content).strip()
-        # Truncate degenerate repetition: any 40+ char span repeated 5+ times
         match = re.search(r"(.{40,}?)\1{4,}", content, flags=re.DOTALL)
         if match:
             content = content[: match.start() + len(match.group(1))].strip()
-        # Fallback if cleanup left nothing usable
         if not content or len(content.split()) < 3:
             raise ValueError("Model output cleanup yielded no usable content.")

+"""Tinker inference client. Supports both base models and fine-tuned checkpoints."""
 import re
 import streamlit as st
 def call_model(messages: list, cfg: dict) -> str:
+    """Send a message list to Tinker and return cleaned response text."""
     model_name   = cfg["model_name"]
     sampler_path = cfg.get("sampler_path", "")
     print(f"[MODEL] model_name={model_name} sampler_path={sampler_path or '(base)'}")
     print(f"[MODEL] num_messages={len(messages)}")
     print(f"[MODEL] roles={[m['role'] for m in messages]}")
+    if messages:
+        print(f"[MODEL] system_prompt[:150]={messages[0]['content'][:150]}")
     try:
         from tinker_cookbook import renderers as tinker_renderers
         parsed_message, _ = renderer.parse_response(result.sequences[0].tokens)
         content = tinker_renderers.format_content_as_string(parsed_message["content"])
         content = re.sub(r"<think>.*?</think>", "", content, flags=re.DOTALL).strip()
         content = re.sub(r"<\|[^|]*\|>", "", content).strip()
         match = re.search(r"(.{40,}?)\1{4,}", content, flags=re.DOTALL)
         if match:
             content = content[: match.start() + len(match.group(1))].strip()
         if not content or len(content.split()) < 3:
             raise ValueError("Model output cleanup yielded no usable content.")

src/upload.py CHANGED Viewed

@@ -71,6 +71,13 @@ def save_and_upload(state: dict, cfg: dict) -> None:
         # Record completion locally — updates counts immediately without waiting
         # for an HF re-scan. Also invalidates the HF cache.
         record_completion(state.get("user_id", ""), state.get("items", []), cfg)
     # ── Write + upload CSV ────────────────────────────────────────────────────
     _save_and_upload_csv(state, cfg, hf_api, safe_worker, submission_id)

         # Record completion locally — updates counts immediately without waiting
         # for an HF re-scan. Also invalidates the HF cache.
         record_completion(state.get("user_id", ""), state.get("items", []), cfg)
+        # Auto-pause Prolific study if all items are now covered
+        try:
+            from src.data import all_items_covered, pause_prolific_study
+            if all_items_covered(cfg):
+                pause_prolific_study(cfg)
+        except Exception as e:
+            print(f"[SAVE] Auto-pause check failed: {e}")
     # ── Write + upload CSV ────────────────────────────────────────────────────
     _save_and_upload_csv(state, cfg, hf_api, safe_worker, submission_id)

study_config.yaml CHANGED Viewed

@@ -26,27 +26,28 @@ study_type: preference
 # The two counts must sum to pairs_per_user.
 categories:
   - name: movies
-    count: 5
 model_variants:
   - name: base
     model_name: "meta-llama/Llama-3.1-8B-Instruct"
-    sampler_path: "tinker://04f8057d-e987-56aa-ada6-d56cc1d1ae2d:train:0/sampler_weights/000150"
     prompt_variant:
-      personalization: false
       detailed_instruction: true
-    count: 5          # items using this variant for odd-numbered users
 # counts swap on alternating users:
 pair_selection_seed: 42         # Seed for reproducible 50-item pool selection per category
-pairs_per_user: 5               # Total items/pairs shown per participant
 # Chat constraints — both set to 3 so each participant has exactly 3 real exchanges.
 min_turns: 3                    # Minimum exchanges before "done" button is enabled
 max_turns: 3                    # Hard cap; input is disabled after this many exchanges
-# Prolific
-prolific_completion_code: "CBRCO395"
 # HuggingFace dataset repo where results (JSON + CSV) are uploaded
-output_dataset_repo: "ehejin/user_study-preference-base_DETAILED_checkpoint"

 # The two counts must sum to pairs_per_user.
 categories:
   - name: movies
+    count: 2
 model_variants:
   - name: base
     model_name: "meta-llama/Llama-3.1-8B-Instruct"
+    sampler_path: ""
     prompt_variant:
+      personalization: true
       detailed_instruction: true
+    count: 2          # items using this variant for odd-numbered users
 # counts swap on alternating users:
 pair_selection_seed: 42         # Seed for reproducible 50-item pool selection per category
+pairs_per_user: 2               # Total items/pairs shown per participant
 # Chat constraints — both set to 3 so each participant has exactly 3 real exchanges.
 min_turns: 3                    # Minimum exchanges before "done" button is enabled
 max_turns: 3                    # Hard cap; input is disabled after this many exchanges
+# Prolific
+prolific_completion_code: "CREJ69QR"
+prolific_study_id: "69cdb78670b55c986db0d736"
 # HuggingFace dataset repo where results (JSON + CSV) are uploaded
+output_dataset_repo: "ehejin/user_study-preference-personalized_BASE"