Spaces:
Sleeping
Sleeping
debug logs
Browse files- src/data.py +32 -1
- src/lsp_wrappers.py +8 -6
- src/model.py +5 -0
- src/ui/screens_likelihood.py +4 -1
- src/ui/screens_preference.py +9 -1
- src/ui/screens_shared.py +2 -1
- study_config.yaml +17 -7
src/data.py
CHANGED
|
@@ -177,6 +177,30 @@ def _assign_from_category(category: str, n: int, cfg: dict) -> list:
|
|
| 177 |
|
| 178 |
return assigned
|
| 179 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 180 |
|
| 181 |
def _compute_counts(cfg: dict) -> dict:
|
| 182 |
"""
|
|
@@ -268,8 +292,15 @@ def _make_item_slot(item: dict, study_type: str) -> dict:
|
|
| 268 |
def init_state(cfg: dict) -> dict:
|
| 269 |
"""Build the initial session-state dict for a new participant."""
|
| 270 |
n = cfg["pairs_per_user"]
|
|
|
|
| 271 |
items = assign_items(cfg)[:n]
|
| 272 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 273 |
try:
|
| 274 |
params = st.query_params
|
| 275 |
except Exception:
|
|
|
|
| 177 |
|
| 178 |
return assigned
|
| 179 |
|
| 180 |
+
def _assign_variants(cfg: dict, n: int) -> list:
|
| 181 |
+
"""
|
| 182 |
+
Return a list of n variant dicts (one per item), alternating the
|
| 183 |
+
personalized/base split across users.
|
| 184 |
+
"""
|
| 185 |
+
variants = cfg.get("model_variants")
|
| 186 |
+
if not variants:
|
| 187 |
+
# Fallback: single variant from old-style config
|
| 188 |
+
return [{"model_name": cfg["model_name"], "prompt_variant": cfg["prompt_variant"]}] * n
|
| 189 |
+
|
| 190 |
+
lock = FileLock(str(_data_dir(cfg) / "variant_counter.lock"))
|
| 191 |
+
with lock:
|
| 192 |
+
ctr = _read_counter(_data_dir(cfg) / "variant_counter.txt")
|
| 193 |
+
_write_counter(_data_dir(cfg) / "variant_counter.txt", ctr + 1)
|
| 194 |
+
|
| 195 |
+
# Swap counts on every other user
|
| 196 |
+
v0, v1 = variants[0], variants[1]
|
| 197 |
+
if ctr % 2 == 1:
|
| 198 |
+
v0, v1 = v1, v0
|
| 199 |
+
|
| 200 |
+
assigned = [v0] * v0["count"] + [v1] * v1["count"]
|
| 201 |
+
random.shuffle(assigned) # interleave so variant order isn't predictable
|
| 202 |
+
print(f"[VARIANTS] user {ctr}: {[v['name'] for v in assigned]}")
|
| 203 |
+
return assigned
|
| 204 |
|
| 205 |
def _compute_counts(cfg: dict) -> dict:
|
| 206 |
"""
|
|
|
|
| 292 |
def init_state(cfg: dict) -> dict:
|
| 293 |
"""Build the initial session-state dict for a new participant."""
|
| 294 |
n = cfg["pairs_per_user"]
|
| 295 |
+
variants = _assign_variants(cfg, n)
|
| 296 |
items = assign_items(cfg)[:n]
|
| 297 |
+
for item_slot, variant in zip(items, variants):
|
| 298 |
+
item_slot["model_name"] = variant["model_name"]
|
| 299 |
+
item_slot["prompt_variant"] = variant["prompt_variant"]
|
| 300 |
+
for i, item_slot in enumerate(items):
|
| 301 |
+
print(f"[ITEM {i}] category={item_slot.get('category')} "
|
| 302 |
+
f"model={item_slot.get('model_name')} "
|
| 303 |
+
f"personalization={item_slot.get('prompt_variant',{}).get('personalization')}")
|
| 304 |
try:
|
| 305 |
params = st.query_params
|
| 306 |
except Exception:
|
src/lsp_wrappers.py
CHANGED
|
@@ -63,26 +63,28 @@ def pair_overview(pair: dict) -> str:
|
|
| 63 |
|
| 64 |
|
| 65 |
# ββ Seller system prompt builders βββββββββββββββββββββββββββββββββββββββββββββ
|
| 66 |
-
|
| 67 |
def build_seller_system_prompt_preference(
|
| 68 |
pair: dict, cfg: dict, demographics_str: str
|
| 69 |
) -> str:
|
| 70 |
from prompts.seller_system.preference import get_seller_system_prompt
|
| 71 |
pv = cfg["prompt_variant"]
|
| 72 |
a, b = pair["product_a"], pair["product_b"]
|
| 73 |
-
|
| 74 |
personalization=pv["personalization"],
|
| 75 |
detailed_instruction=pv["detailed_instruction"],
|
| 76 |
-
title=a.get("title"
|
| 77 |
description=_desc_str(a),
|
| 78 |
features=_feat_str(a),
|
| 79 |
-
price=f"${a.get('price'
|
| 80 |
-
competitor_title=b.get("title"
|
| 81 |
competitor_description=_desc_str(b),
|
| 82 |
competitor_features=_feat_str(b),
|
| 83 |
-
competitor_price=f"${b.get('price'
|
| 84 |
demographics=demographics_str,
|
| 85 |
)
|
|
|
|
|
|
|
|
|
|
| 86 |
|
| 87 |
|
| 88 |
def build_seller_system_prompt_likelihood(
|
|
|
|
| 63 |
|
| 64 |
|
| 65 |
# ββ Seller system prompt builders βββββββββββββββββββββββββββββββββββββββββββββ
|
|
|
|
| 66 |
def build_seller_system_prompt_preference(
|
| 67 |
pair: dict, cfg: dict, demographics_str: str
|
| 68 |
) -> str:
|
| 69 |
from prompts.seller_system.preference import get_seller_system_prompt
|
| 70 |
pv = cfg["prompt_variant"]
|
| 71 |
a, b = pair["product_a"], pair["product_b"]
|
| 72 |
+
result = get_seller_system_prompt(
|
| 73 |
personalization=pv["personalization"],
|
| 74 |
detailed_instruction=pv["detailed_instruction"],
|
| 75 |
+
title=a.get("title"),
|
| 76 |
description=_desc_str(a),
|
| 77 |
features=_feat_str(a),
|
| 78 |
+
price=f"${a.get('price')}",
|
| 79 |
+
competitor_title=b.get("title"),
|
| 80 |
competitor_description=_desc_str(b),
|
| 81 |
competitor_features=_feat_str(b),
|
| 82 |
+
competitor_price=f"${b.get('price')}",
|
| 83 |
demographics=demographics_str,
|
| 84 |
)
|
| 85 |
+
print(f"[PROMPT] personalization={pv['personalization']}") # β ADD
|
| 86 |
+
print(f"[PROMPT] system_prompt[:300]: {result[:300]}") # β ADD
|
| 87 |
+
return result
|
| 88 |
|
| 89 |
|
| 90 |
def build_seller_system_prompt_likelihood(
|
src/model.py
CHANGED
|
@@ -31,6 +31,11 @@ def call_model(messages: list, cfg: dict) -> str:
|
|
| 31 |
- Degenerate repetition (Pocahontas-style loop)
|
| 32 |
"""
|
| 33 |
model_name = cfg["model_name"]
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 34 |
try:
|
| 35 |
from tinker_cookbook import renderers as tinker_renderers
|
| 36 |
|
|
|
|
| 31 |
- Degenerate repetition (Pocahontas-style loop)
|
| 32 |
"""
|
| 33 |
model_name = cfg["model_name"]
|
| 34 |
+
print(f"[MODEL] model_name={model_name}")
|
| 35 |
+
print(f"[MODEL] num_messages={len(messages)}")
|
| 36 |
+
print(f"[MODEL] roles={[m['role'] for m in messages]}")
|
| 37 |
+
print(f"[MODEL] system_prompt[:150]={messages[0]['content'][:150]}")
|
| 38 |
+
|
| 39 |
try:
|
| 40 |
from tinker_cookbook import renderers as tinker_renderers
|
| 41 |
|
src/ui/screens_likelihood.py
CHANGED
|
@@ -86,7 +86,10 @@ def screen_item_intro(s: dict, cfg: dict) -> None:
|
|
| 86 |
|
| 87 |
# ββ Build prompts βββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 88 |
# Seller always pushes user to buy. Features passed for groceries only.
|
| 89 |
-
|
|
|
|
|
|
|
|
|
|
| 90 |
opening_msg = opening_message_likelihood(product, category)
|
| 91 |
user_choice_msg = f"<choice>{pre_int}</choice>"
|
| 92 |
closing_msg = closing_message_likelihood(product, category) # logged only
|
|
|
|
| 86 |
|
| 87 |
# ββ Build prompts βββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 88 |
# Seller always pushes user to buy. Features passed for groceries only.
|
| 89 |
+
item_cfg = {**cfg,
|
| 90 |
+
"prompt_variant": item["prompt_variant"],
|
| 91 |
+
"model_name": item["model_name"]}
|
| 92 |
+
system_prompt = build_seller_system_prompt_likelihood(product, item_cfg, demo_str)
|
| 93 |
opening_msg = opening_message_likelihood(product, category)
|
| 94 |
user_choice_msg = f"<choice>{pre_int}</choice>"
|
| 95 |
closing_msg = closing_message_likelihood(product, category) # logged only
|
src/ui/screens_preference.py
CHANGED
|
@@ -100,7 +100,10 @@ def screen_pair_intro(s: dict, cfg: dict) -> None:
|
|
| 100 |
|
| 101 |
# ββ Build prompts βββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 102 |
# Seller always argues for Product A.
|
| 103 |
-
|
|
|
|
|
|
|
|
|
|
| 104 |
opening_msg = opening_message_preference(item)
|
| 105 |
user_choice_msg = f"<choice>{pre_int}</choice>"
|
| 106 |
closing_msg = closing_message_preference(item) # vote_final equivalent; logged only
|
|
@@ -153,5 +156,10 @@ def screen_pair_intro(s: dict, cfg: dict) -> None:
|
|
| 153 |
"num_turns": 0,
|
| 154 |
})
|
| 155 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 156 |
s["screen"] = "chat"
|
| 157 |
st.rerun()
|
|
|
|
| 100 |
|
| 101 |
# ββ Build prompts βββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 102 |
# Seller always argues for Product A.
|
| 103 |
+
item_cfg = {**cfg,
|
| 104 |
+
"prompt_variant": item["prompt_variant"],
|
| 105 |
+
"model_name": item["model_name"]}
|
| 106 |
+
system_prompt = build_seller_system_prompt_preference(item, item_cfg, demo_str)
|
| 107 |
opening_msg = opening_message_preference(item)
|
| 108 |
user_choice_msg = f"<choice>{pre_int}</choice>"
|
| 109 |
closing_msg = closing_message_preference(item) # vote_final equivalent; logged only
|
|
|
|
| 156 |
"num_turns": 0,
|
| 157 |
})
|
| 158 |
|
| 159 |
+
print(f"[CONV] num turns stored: {len(s['items'][idx]['conversation']['turns'])}")
|
| 160 |
+
print(f"[CONV] turn roles: {[(t['role'], t.get('synthetic')) for t in s['items'][idx]['conversation']['turns']]}")
|
| 161 |
+
print(f"[CONV] turn 0 content[:100]: {s['items'][idx]['conversation']['turns'][0]['content'][:100]}")
|
| 162 |
+
print(f"[CONV] turn 1 content: {s['items'][idx]['conversation']['turns'][1]['content']}")
|
| 163 |
+
|
| 164 |
s["screen"] = "chat"
|
| 165 |
st.rerun()
|
src/ui/screens_shared.py
CHANGED
|
@@ -308,7 +308,8 @@ def screen_chat(s: dict, cfg: dict) -> None:
|
|
| 308 |
messages.append({"role": "user", "content": user_msg})
|
| 309 |
|
| 310 |
with st.spinner("AI is respondingβ¦"):
|
| 311 |
-
|
|
|
|
| 312 |
|
| 313 |
now = time.time()
|
| 314 |
turn_base = len(conv["turns"])
|
|
|
|
| 308 |
messages.append({"role": "user", "content": user_msg})
|
| 309 |
|
| 310 |
with st.spinner("AI is respondingβ¦"):
|
| 311 |
+
item_cfg = {**cfg, "model_name": item["model_name"]}
|
| 312 |
+
ai_reply = call_model(messages, item_cfg)
|
| 313 |
|
| 314 |
now = time.time()
|
| 315 |
turn_base = len(conv["turns"])
|
study_config.yaml
CHANGED
|
@@ -28,12 +28,22 @@ categories:
|
|
| 28 |
- name: movies
|
| 29 |
count: 5
|
| 30 |
|
| 31 |
-
|
| 32 |
-
|
| 33 |
-
|
| 34 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 35 |
|
| 36 |
-
model_name: "meta-llama/Llama-3.1-8B-Instruct"
|
| 37 |
pair_selection_seed: 42 # Seed for reproducible 50-item pool selection per category
|
| 38 |
pairs_per_user: 5 # Total items/pairs shown per participant
|
| 39 |
|
|
@@ -42,7 +52,7 @@ min_turns: 3 # Minimum exchanges before "done" button is enab
|
|
| 42 |
max_turns: 3 # Hard cap; input is disabled after this many exchanges
|
| 43 |
|
| 44 |
# Prolific
|
| 45 |
-
prolific_completion_code: "
|
| 46 |
|
| 47 |
# HuggingFace dataset repo where results (JSON + CSV) are uploaded
|
| 48 |
-
output_dataset_repo: "lms-shape-preferences/user_study-preference-
|
|
|
|
| 28 |
- name: movies
|
| 29 |
count: 5
|
| 30 |
|
| 31 |
+
model_variants:
|
| 32 |
+
- name: personalized
|
| 33 |
+
model_name: "meta-llama/Llama-3.1-8B-Instruct"
|
| 34 |
+
prompt_variant:
|
| 35 |
+
personalization: true
|
| 36 |
+
detailed_instruction: true
|
| 37 |
+
count: 2 # items using this variant for odd-numbered users
|
| 38 |
+
|
| 39 |
+
- name: base
|
| 40 |
+
model_name: "meta-llama/Llama-3.1-8B-Instruct"
|
| 41 |
+
prompt_variant:
|
| 42 |
+
personalization: false
|
| 43 |
+
detailed_instruction: true
|
| 44 |
+
count: 3 # items using this variant for odd-numbered users
|
| 45 |
+
# counts swap on alternating users:
|
| 46 |
|
|
|
|
| 47 |
pair_selection_seed: 42 # Seed for reproducible 50-item pool selection per category
|
| 48 |
pairs_per_user: 5 # Total items/pairs shown per participant
|
| 49 |
|
|
|
|
| 52 |
max_turns: 3 # Hard cap; input is disabled after this many exchanges
|
| 53 |
|
| 54 |
# Prolific
|
| 55 |
+
prolific_completion_code: "C1JEJWOQ"
|
| 56 |
|
| 57 |
# HuggingFace dataset repo where results (JSON + CSV) are uploaded
|
| 58 |
+
output_dataset_repo: "lms-shape-preferences/user_study-preference-base"
|