Spaces:

lspcloud
/

prolific-preferences-personalized

Sleeping

App Files Files Community

ehejin commited on 18 days ago

Commit

37fe63c

1 Parent(s): c374677

prolific ready

Browse files

Files changed (7) hide show

scripts/check_prompt_format.py +74 -0
src/app.py +1 -1
src/data.py +15 -0
src/lsp_wrappers.py +79 -7
src/ui/components.py +37 -23
src/ui/screens_preference.py +12 -3
study_config.yaml +6 -6

scripts/check_prompt_format.py ADDED Viewed

	@@ -0,0 +1,74 @@

+"""
+Print the full seller system prompt that the user study sends to the model
+for a fake but realistic participant. No comparison, no training format,
+just the prompt.
+Usage:
+    cd /dfs/scratch1/echoi1/prolific_preferences
+    python3 scripts/print_prompt.py
+"""
+from src.lsp_wrappers import (
+    format_demographics,
+    build_seller_system_prompt_preference,
+)
+DEMOGRAPHICS = {
+    "age":                   "32",
+    "gender":                "Female",
+    "geographic_region":     "West",
+    "education_level":       "College graduate/some postgrad",
+    "race":                  "White",
+    "us_citizen":            "Yes",
+    "marital_status":        "Single",
+    "religion":              "Agnostic",
+    "religious_attendance":  "Never",
+    "political_affiliation": "Independent",
+    "income":                "$50,000-$75,000",
+    "political_views":       "Moderate",
+    "household_size":        "2",
+    "employment_status":     "Full-time employment",
+}
+BACKGROUND = {
+    "movies_criteria": (
+        "I look for strong character development, an interesting plot, "
+        "and good cinematography."
+    ),
+    "movies_enjoy": (
+        "I enjoy psychological thrillers and indie dramas."
+    ),
+    "movies_avoid": (
+        "I avoid slasher horror and broad slapstick comedies."
+    ),
+}
+PAIR = {
+    "pair_id":  "test-pair-001",
+    "category": "movies",
+    "product_a": {
+        "title":       "Eternal Sunshine of the Spotless Mind",
+        "description": ["A heartfelt sci-fi romance about memory and love."],
+        "features":    [],
+        "price":       "12.99",
+    },
+    "product_b": {
+        "title":       "The Hangover",
+        "description": ["A wild bachelor party comedy in Las Vegas."],
+        "features":    [],
+        "price":       "9.99",
+    },
+}
+cfg = {
+    "prompt_variant": {
+        "personalization": True,
+        "include_bio":     True,
+    },
+}
+demo_str   = format_demographics(DEMOGRAPHICS, background=BACKGROUND, include_bio=True)
+sys_prompt = build_seller_system_prompt_preference(PAIR, cfg, demo_str)
+print(sys_prompt)

src/app.py CHANGED Viewed

@@ -47,7 +47,7 @@ def _init_submodule() -> None:
         # GitHub serves a tarball of any branch/tag/SHA at this URL.
         # Pinned to a specific commit SHA so future lsp changes don't break us.
-        branch      = "a71506e3b1fa74fa3427f8ab674fa68420ca42da"
         tarball_url = f"https://api.github.com/repos/batu-el/lsp/tarball/{branch}"
         tmp_tar     = Path("/tmp/lsp.tar.gz")
         tmp_extract = Path("/tmp/lsp_extract")

         # GitHub serves a tarball of any branch/tag/SHA at this URL.
         # Pinned to a specific commit SHA so future lsp changes don't break us.
+        branch      = "74582acd911f81309ba8b22cef9286c2887dda18"
         tarball_url = f"https://api.github.com/repos/batu-el/lsp/tarball/{branch}"
         tmp_tar     = Path("/tmp/lsp.tar.gz")
         tmp_extract = Path("/tmp/lsp_extract")

src/data.py CHANGED Viewed

@@ -503,6 +503,21 @@ def _assign_from_category(category: str, n: int, user_id: str, cfg: dict) -> lis
         _expire_reservations(reservations)
         _release_returned_reservations(reservations, cfg)
         def is_reserved_by_other(i):
             r = reservations.get(str(i))
             return r is not None and r["user_id"] != user_id

         _expire_reservations(reservations)
         _release_returned_reservations(reservations, cfg)
+        # If this Prolific PID already has reservations (e.g. they refreshed
+        # the tab, got a new user_id, and came back), release the old ones
+        # before creating new ones. Prevents the same participant from
+        # accumulating multiple reservations.
+        if is_prolific:
+            stale = [
+                idx for idx, r in list(reservations.items())
+                if r.get("prolific_pid") == prolific_pid
+            ]
+            for idx in stale:
+                del reservations[idx]
+            if stale:
+                print(f"[ASSIGN] Released {len(stale)} prior reservations "
+                      f"for returning PID {prolific_pid}")
         def is_reserved_by_other(i):
             r = reservations.get(str(i))
             return r is not None and r["user_id"] != user_id

src/lsp_wrappers.py CHANGED Viewed

@@ -11,10 +11,82 @@ sys.path, so the imports succeed.
 """
-# ── Demographics (must match training code exactly) ───────────────────────────
-def format_demographics(demo: dict) -> str:
-    return ", ".join(f"{k}: {v}" for k, v in demo.items())
 # ── Product text helpers ──────────────────────────────────────────────────────
@@ -63,6 +135,7 @@ def pair_overview(pair: dict) -> str:
 # ── Seller system prompt builders ─────────────────────────────────────────────
 def build_seller_system_prompt_preference(
     pair: dict, cfg: dict, demographics_str: str
 ) -> str:
@@ -71,7 +144,6 @@ def build_seller_system_prompt_preference(
     a, b = pair["product_a"], pair["product_b"]
     result = get_seller_system_prompt(
         personalization=pv["personalization"],
-        detailed_instruction=pv["detailed_instruction"],
         title=a.get("title"),
         description=_desc_str(a),
         features=_feat_str(a),
@@ -82,8 +154,9 @@ def build_seller_system_prompt_preference(
         competitor_price=f"${b.get('price')}",
         demographics=demographics_str,
     )
-    print(f"[PROMPT] personalization={pv['personalization']}")   # ← ADD
-    print(f"[PROMPT] system_prompt[:300]: {result[:300]}")       # ← ADD
     return result
@@ -94,7 +167,6 @@ def build_seller_system_prompt_likelihood(
     pv = cfg["prompt_variant"]
     return get_seller_system_prompt(
         personalization=pv["personalization"],
-        detailed_instruction=pv["detailed_instruction"],
         title=product.get("title", ""),
         description=_desc_str(product),
         features=_feat_str(product),

 """
+# ── Demographics ──────────────────────────────────────────────────────────────
+#
+# Key sets and field labels mirror lsp/src/data.py exactly so that a checkpoint
+# trained on the lsp persona format sees identical strings at inference time.
+# DO NOT reorder, re-spell, or relabel anything in here without making the same
+# change in lsp/src/data.py first — the trained model is sensitive to the
+# literal format ("Demographics: ...", "Their own words about their movie tastes:").
+DEMOGRAPHIC_KEYS: tuple[str, ...] = (
+    "geographic_region",
+    "gender",
+    "age",
+    "education_level",
+    "race",
+    "us_citizen",
+    "marital_status",
+    "religion",
+    "religious_attendance",
+    "political_affiliation",
+    "income",
+    "political_views",
+    "household_size",
+    "employment_status",
+)
+# Maps (lsp BIO_KEY label) → (key in the user study's `background` dict).
+# Movies-only for now; if a future variant trains on groceries with bios
+# you'll need to extend this.
+BIO_KEY_LABEL_TO_BACKGROUND_KEY: dict[str, str] = {
+    "What matters to you when picking a movie": "movies_criteria",
+    "Description of movies you tend to enjoy":  "movies_enjoy",
+    "Description of movies you tend to avoid":  "movies_avoid",
+}
+def format_demographics(
+    demo: dict, background: dict | None = None, include_bio: bool = False
+) -> str:
+    """Render demographics (and optionally bio answers) the same way training does.
+    With include_bio=False (default), produces just the legacy single-line
+    "k: v, k: v, ..." string used by older user-study runs.
+    With include_bio=True, produces the multi-line block that matches
+    lsp/src/data.py:format_demographics:
+        Demographics: gender: Male, age: 30, ...
+        Their own words about their movie tastes:
+        - What matters to you when picking a movie: ...
+        - Description of movies you tend to enjoy: ...
+        - Description of movies you tend to avoid: ...
+    Empty/missing bio answers are silently skipped, matching training.
+    """
+    if not include_bio:
+        # Legacy path: single comma-joined line. Untouched for old checkpoints.
+        return ", ".join(f"{k}: {v}" for k, v in demo.items())
+    demo_pairs = ", ".join(
+        f"{k}: {demo[k]}" for k in DEMOGRAPHIC_KEYS if k in demo
+    )
+    bio_lines: list[str] = []
+    if background:
+        for label, bg_key in BIO_KEY_LABEL_TO_BACKGROUND_KEY.items():
+            value = background.get(bg_key)
+            if value not in (None, ""):
+                bio_lines.append(f"- {label}: {value}")
+    parts: list[str] = []
+    if demo_pairs:
+        parts.append(f"Demographics: {demo_pairs}")
+    if bio_lines:
+        parts.append("Their own words about their movie tastes:")
+        parts.extend(bio_lines)
+    return "\n".join(parts)
 # ── Product text helpers ──────────────────────────────────────────────────────
 # ── Seller system prompt builders ─────────────────────────────────────────────
 def build_seller_system_prompt_preference(
     pair: dict, cfg: dict, demographics_str: str
 ) -> str:
     a, b = pair["product_a"], pair["product_b"]
     result = get_seller_system_prompt(
         personalization=pv["personalization"],
         title=a.get("title"),
         description=_desc_str(a),
         features=_feat_str(a),
         competitor_price=f"${b.get('price')}",
         demographics=demographics_str,
     )
+    print(f"[PROMPT] personalization={pv['personalization']}, "
+          f"include_bio={pv.get('include_bio', False)}")
+    print(f"[PROMPT] system_prompt[:300]: {result[:300]}")
     return result
     pv = cfg["prompt_variant"]
     return get_seller_system_prompt(
         personalization=pv["personalization"],
         title=product.get("title", ""),
         description=_desc_str(product),
         features=_feat_str(product),

src/ui/components.py CHANGED Viewed

@@ -23,39 +23,40 @@ def inject_css() -> None:
 /* ── Product cards ───────────────────────────────────────────────────── */
 .product-card {
     border-radius: 10px; padding: 1rem 1.25rem; margin-bottom: 0.75rem;
 }
-.product-card-a      { border: 2px solid #2563eb; background: #eff6ff; }
-.product-card-b      { border: 2px solid #9333ea; background: #faf5ff; }
-.product-card-single { border: 2px solid #0891b2; background: #ecfeff; }
 .pc-header {
     display: flex; justify-content: space-between;
     align-items: flex-start; margin-bottom: 0.6rem; gap: 1rem;
 }
-.pc-title { font-size: 1.05rem; font-weight: 700; color: #1a1a2e; line-height: 1.35; flex: 1; }
-.pc-price { font-size: 1.2rem; font-weight: 800; white-space: nowrap; color: #16a34a; }
 .pc-label {
     display: inline-block; font-size: 0.8rem; font-weight: 700;
     padding: 0.2rem 0.6rem; border-radius: 99px; margin-bottom: 0.4rem;
 }
-.pc-label-a      { background: #dbeafe; color: #1e40af; }
-.pc-label-b      { background: #ede9fe; color: #6b21a8; }
-.pc-label-single { background: #cffafe; color: #155e75; }
 .pc-category-badge {
     display: inline-block; font-size: 0.7rem; font-weight: 600;
     padding: 0.12rem 0.5rem; border-radius: 99px; margin-left: 0.4rem;
-    background: #f1f5f9; color: #475569;
 }
 .pc-section { margin-top: 0.5rem; }
 .pc-section-title {
-    font-weight: 600; font-size: 0.82rem; color: #64748b;
     text-transform: uppercase; letter-spacing: 0.04em; margin-bottom: 0.3rem;
 }
-.pc-desc { font-size: 0.92rem; color: #334155; line-height: 1.6; }
-.pc-list { margin: 0; padding-left: 1.2rem; font-size: 0.92rem; color: #334155; line-height: 1.5; }
-.pc-list li { margin-bottom: 0.25rem; }
 /* ── VS divider ──────────────────────────────────────────────────────── */
 .vs-divider {
@@ -70,10 +71,29 @@ def inject_css() -> None:
 /* ── Chat bubbles ────────────────────────────────────────────────────── */
 .chat-wrap   { max-height: 480px; overflow-y: auto; margin-bottom: 1rem; padding-right: 4px; }
-.bubble      { padding: 0.65rem 0.9rem; border-radius: 12px; margin-bottom: 0.55rem; font-size: 0.93rem; line-height: 1.55; }
-.bubble-ai   { background: #eff6ff; border: 1px solid #93c5fd; margin-right: 8%; }
-.bubble-user { background: #f0fdf4; border: 1px solid #86efac; margin-left: 8%; text-align: right; }
-.bubble-meta { font-size: 0.73rem; color: #94a3b8; margin-bottom: 0.15rem; }
 /* ── Section headings on background page ────────────────────────────── */
 hr.section-divider { border: none; border-top: 2px solid #e2e8f0; margin: 1.5rem 0 1rem 0; }
@@ -239,12 +259,6 @@ def render_chat_history(turns: list, study_type: str) -> None:
             )
     html += "</div>"
     st.markdown(html, unsafe_allow_html=True)
-    st.components.v1.html("""
-        <script>
-        const chatWraps = window.parent.document.querySelectorAll('.chat-wrap');
-        chatWraps.forEach(el => el.scrollTop = el.scrollHeight);
-        </script>
-        """, height=0)
 # ── Rating / familiarity helpers ──────────────────────────────────────────────

 /* ── Product cards ───────────────────────────────────────────────────── */
 .product-card {
     border-radius: 10px; padding: 1rem 1.25rem; margin-bottom: 0.75rem;
+    color: #1a1a2e !important;  /* force dark text regardless of theme */
 }
+.product-card-a      { border: 2px solid #2563eb; background: #eff6ff !important; }
+.product-card-b      { border: 2px solid #9333ea; background: #faf5ff !important; }
+.product-card-single { border: 2px solid #0891b2; background: #ecfeff !important; }
 .pc-header {
     display: flex; justify-content: space-between;
     align-items: flex-start; margin-bottom: 0.6rem; gap: 1rem;
 }
+.pc-title { font-size: 1.05rem; font-weight: 700; color: #1a1a2e !important; line-height: 1.35; flex: 1; }
+.pc-price { font-size: 1.2rem; font-weight: 800; white-space: nowrap; color: #16a34a !important; }
 .pc-label {
     display: inline-block; font-size: 0.8rem; font-weight: 700;
     padding: 0.2rem 0.6rem; border-radius: 99px; margin-bottom: 0.4rem;
 }
+.pc-label-a      { background: #dbeafe !important; color: #1e40af !important; }
+.pc-label-b      { background: #ede9fe !important; color: #6b21a8 !important; }
+.pc-label-single { background: #cffafe !important; color: #155e75 !important; }
 .pc-category-badge {
     display: inline-block; font-size: 0.7rem; font-weight: 600;
     padding: 0.12rem 0.5rem; border-radius: 99px; margin-left: 0.4rem;
+    background: #f1f5f9 !important; color: #475569 !important;
 }
 .pc-section { margin-top: 0.5rem; }
 .pc-section-title {
+    font-weight: 600; font-size: 0.82rem; color: #64748b !important;
     text-transform: uppercase; letter-spacing: 0.04em; margin-bottom: 0.3rem;
 }
+.pc-desc { font-size: 0.92rem; color: #334155 !important; line-height: 1.6; }
+.pc-list { margin: 0; padding-left: 1.2rem; font-size: 0.92rem; color: #334155 !important; line-height: 1.5; }
+.pc-list li { margin-bottom: 0.25rem; color: #334155 !important; }
 /* ── VS divider ──────────────────────────────────────────────────────── */
 .vs-divider {
 /* ── Chat bubbles ────────────────────────────────────────────────────── */
 .chat-wrap   { max-height: 480px; overflow-y: auto; margin-bottom: 1rem; padding-right: 4px; }
+.bubble      {
+    padding: 0.65rem 0.9rem; border-radius: 12px; margin-bottom: 0.55rem;
+    font-size: 0.93rem; line-height: 1.55;
+    color: #1a1a2e !important;  /* force dark text regardless of theme */
+}
+.bubble-ai   {
+    background: #eff6ff !important;
+    border: 1px solid #93c5fd;
+    margin-right: 8%;
+    color: #1a1a2e !important;
+}
+.bubble-user {
+    background: #f0fdf4 !important;
+    border: 1px solid #86efac;
+    margin-left: 8%;
+    text-align: right;
+    color: #1a1a2e !important;
+}
+.bubble-meta {
+    font-size: 0.73rem;
+    color: #64748b !important;
+    margin-bottom: 0.15rem;
+}
 /* ── Section headings on background page ────────────────────────────── */
 hr.section-divider { border: none; border-top: 2px solid #e2e8f0; margin: 1.5rem 0 1rem 0; }
             )
     html += "</div>"
     st.markdown(html, unsafe_allow_html=True)
 # ── Rating / familiarity helpers ──────────────────────────────────────────────

src/ui/screens_preference.py CHANGED Viewed

@@ -95,17 +95,26 @@ def screen_pair_intro(s: dict, cfg: dict) -> None:
         fam_b   = fam_b   or fam_b_opts[0]
         pre_val = pre_val or choices[3]          # Neutral (4)
-        pre_int  = parse_rating(pre_val)
-        demo_str = format_demographics(s["demographics"])
         # ── Per-item config (model + prompt variant assigned at session init) ─
         item_cfg = {
             **cfg,
             "prompt_variant": item.get("prompt_variant", {}),
             "model_name":     item.get("model_name", ""),
-            "sampler_path":   item.get("sampler_path", ""),
         }
         # ── Build prompts ─────────────────────────────────────────────────────
         system_prompt   = build_seller_system_prompt_preference(item, item_cfg, demo_str)
         opening_msg     = opening_message_preference(item)

         fam_b   = fam_b   or fam_b_opts[0]
         pre_val = pre_val or choices[3]          # Neutral (4)
+        pre_int = parse_rating(pre_val)
         # ── Per-item config (model + prompt variant assigned at session init) ─
         item_cfg = {
             **cfg,
             "prompt_variant": item.get("prompt_variant", {}),
             "model_name":     item.get("model_name", ""),
+            "sampler_path":   item.get("sampler_path", ""),
         }
+        # Build the demographics string in whichever format the trained model expects.
+        # When include_bio is True we feed the participant's own background answers
+        # (movies_criteria / movies_enjoy / movies_avoid) the same way training does.
+        include_bio = bool(item_cfg["prompt_variant"].get("include_bio", False))
+        demo_str    = format_demographics(
+            s["demographics"],
+            background=s.get("background", {}),
+            include_bio=include_bio,
+        )
         # ── Build prompts ─────────────────────────────────────────────────────
         system_prompt   = build_seller_system_prompt_preference(item, item_cfg, demo_str)
         opening_msg     = opening_message_preference(item)

study_config.yaml CHANGED Viewed

@@ -31,10 +31,10 @@ categories:
 model_variants:
   - name: base
     model_name: "meta-llama/Llama-3.1-8B-Instruct"
-    sampler_path: "tinker://4aca87ed-dcb1-5212-b86e-a1701f0dd6c6:train:0/sampler_weights/000200"
     prompt_variant:
-      personalization: true
-      detailed_instruction: true
     count: 2          # items using this variant for odd-numbered users
 # counts swap on alternating users:
@@ -46,8 +46,8 @@ min_turns: 3                    # Minimum exchanges before "done" button is enab
 max_turns: 3                    # Hard cap; input is disabled after this many exchanges
 # Prolific
-prolific_completion_code: "C7OQ65JD"
-prolific_study_id: "69e91ee612dfa1a58a0273d4"
 # HuggingFace dataset repo where results (JSON + CSV) are uploaded
-output_dataset_repo: "ehejin/user_study-preference-personalized_0417_250"

 model_variants:
   - name: base
     model_name: "meta-llama/Llama-3.1-8B-Instruct"
+    sampler_path: "tinker://90528292-6961-5d83-b389-d70a8c1ba6a6:train:0/sampler_weights/000200"
     prompt_variant:
+      personalization: false
+      include_bio: false
     count: 2          # items using this variant for odd-numbered users
 # counts swap on alternating users:
 max_turns: 3                    # Hard cap; input is disabled after this many exchanges
 # Prolific
+prolific_completion_code: "C3QSGJK3"
+prolific_study_id: "69fd3473f636a4e92454c022"
 # HuggingFace dataset repo where results (JSON + CSV) are uploaded
+output_dataset_repo: "ehejin/user_study-preference-personalized_0505_NP1"