ehejin commited on
Commit
6b0bcdc
Β·
1 Parent(s): d34de84

debug logs

Browse files
src/data.py CHANGED
@@ -177,6 +177,30 @@ def _assign_from_category(category: str, n: int, cfg: dict) -> list:
177
 
178
  return assigned
179
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
180
 
181
  def _compute_counts(cfg: dict) -> dict:
182
  """
@@ -268,8 +292,15 @@ def _make_item_slot(item: dict, study_type: str) -> dict:
268
  def init_state(cfg: dict) -> dict:
269
  """Build the initial session-state dict for a new participant."""
270
  n = cfg["pairs_per_user"]
 
271
  items = assign_items(cfg)[:n]
272
-
 
 
 
 
 
 
273
  try:
274
  params = st.query_params
275
  except Exception:
 
177
 
178
  return assigned
179
 
180
+ def _assign_variants(cfg: dict, n: int) -> list:
181
+ """
182
+ Return a list of n variant dicts (one per item), alternating the
183
+ personalized/base split across users.
184
+ """
185
+ variants = cfg.get("model_variants")
186
+ if not variants:
187
+ # Fallback: single variant from old-style config
188
+ return [{"model_name": cfg["model_name"], "prompt_variant": cfg["prompt_variant"]}] * n
189
+
190
+ lock = FileLock(str(_data_dir(cfg) / "variant_counter.lock"))
191
+ with lock:
192
+ ctr = _read_counter(_data_dir(cfg) / "variant_counter.txt")
193
+ _write_counter(_data_dir(cfg) / "variant_counter.txt", ctr + 1)
194
+
195
+ # Swap counts on every other user
196
+ v0, v1 = variants[0], variants[1]
197
+ if ctr % 2 == 1:
198
+ v0, v1 = v1, v0
199
+
200
+ assigned = [v0] * v0["count"] + [v1] * v1["count"]
201
+ random.shuffle(assigned) # interleave so variant order isn't predictable
202
+ print(f"[VARIANTS] user {ctr}: {[v['name'] for v in assigned]}")
203
+ return assigned
204
 
205
  def _compute_counts(cfg: dict) -> dict:
206
  """
 
292
  def init_state(cfg: dict) -> dict:
293
  """Build the initial session-state dict for a new participant."""
294
  n = cfg["pairs_per_user"]
295
+ variants = _assign_variants(cfg, n)
296
  items = assign_items(cfg)[:n]
297
+ for item_slot, variant in zip(items, variants):
298
+ item_slot["model_name"] = variant["model_name"]
299
+ item_slot["prompt_variant"] = variant["prompt_variant"]
300
+ for i, item_slot in enumerate(items):
301
+ print(f"[ITEM {i}] category={item_slot.get('category')} "
302
+ f"model={item_slot.get('model_name')} "
303
+ f"personalization={item_slot.get('prompt_variant',{}).get('personalization')}")
304
  try:
305
  params = st.query_params
306
  except Exception:
src/lsp_wrappers.py CHANGED
@@ -63,26 +63,28 @@ def pair_overview(pair: dict) -> str:
63
 
64
 
65
  # ── Seller system prompt builders ─────────────────────────────────────────────
66
-
67
  def build_seller_system_prompt_preference(
68
  pair: dict, cfg: dict, demographics_str: str
69
  ) -> str:
70
  from prompts.seller_system.preference import get_seller_system_prompt
71
  pv = cfg["prompt_variant"]
72
  a, b = pair["product_a"], pair["product_b"]
73
- return get_seller_system_prompt(
74
  personalization=pv["personalization"],
75
  detailed_instruction=pv["detailed_instruction"],
76
- title=a.get("title", ""),
77
  description=_desc_str(a),
78
  features=_feat_str(a),
79
- price=f"${a.get('price', '')}",
80
- competitor_title=b.get("title", ""),
81
  competitor_description=_desc_str(b),
82
  competitor_features=_feat_str(b),
83
- competitor_price=f"${b.get('price', '')}",
84
  demographics=demographics_str,
85
  )
 
 
 
86
 
87
 
88
  def build_seller_system_prompt_likelihood(
 
63
 
64
 
65
  # ── Seller system prompt builders ─────────────────────────────────────────────
 
66
  def build_seller_system_prompt_preference(
67
  pair: dict, cfg: dict, demographics_str: str
68
  ) -> str:
69
  from prompts.seller_system.preference import get_seller_system_prompt
70
  pv = cfg["prompt_variant"]
71
  a, b = pair["product_a"], pair["product_b"]
72
+ result = get_seller_system_prompt(
73
  personalization=pv["personalization"],
74
  detailed_instruction=pv["detailed_instruction"],
75
+ title=a.get("title"),
76
  description=_desc_str(a),
77
  features=_feat_str(a),
78
+ price=f"${a.get('price')}",
79
+ competitor_title=b.get("title"),
80
  competitor_description=_desc_str(b),
81
  competitor_features=_feat_str(b),
82
+ competitor_price=f"${b.get('price')}",
83
  demographics=demographics_str,
84
  )
85
+ print(f"[PROMPT] personalization={pv['personalization']}") # ← ADD
86
+ print(f"[PROMPT] system_prompt[:300]: {result[:300]}") # ← ADD
87
+ return result
88
 
89
 
90
  def build_seller_system_prompt_likelihood(
src/model.py CHANGED
@@ -31,6 +31,11 @@ def call_model(messages: list, cfg: dict) -> str:
31
  - Degenerate repetition (Pocahontas-style loop)
32
  """
33
  model_name = cfg["model_name"]
 
 
 
 
 
34
  try:
35
  from tinker_cookbook import renderers as tinker_renderers
36
 
 
31
  - Degenerate repetition (Pocahontas-style loop)
32
  """
33
  model_name = cfg["model_name"]
34
+ print(f"[MODEL] model_name={model_name}")
35
+ print(f"[MODEL] num_messages={len(messages)}")
36
+ print(f"[MODEL] roles={[m['role'] for m in messages]}")
37
+ print(f"[MODEL] system_prompt[:150]={messages[0]['content'][:150]}")
38
+
39
  try:
40
  from tinker_cookbook import renderers as tinker_renderers
41
 
src/ui/screens_likelihood.py CHANGED
@@ -86,7 +86,10 @@ def screen_item_intro(s: dict, cfg: dict) -> None:
86
 
87
  # ── Build prompts ─────────────────────────────────────────────────────
88
  # Seller always pushes user to buy. Features passed for groceries only.
89
- system_prompt = build_seller_system_prompt_likelihood(product, cfg, demo_str)
 
 
 
90
  opening_msg = opening_message_likelihood(product, category)
91
  user_choice_msg = f"<choice>{pre_int}</choice>"
92
  closing_msg = closing_message_likelihood(product, category) # logged only
 
86
 
87
  # ── Build prompts ─────────────────────────────────────────────────────
88
  # Seller always pushes user to buy. Features passed for groceries only.
89
+ item_cfg = {**cfg,
90
+ "prompt_variant": item["prompt_variant"],
91
+ "model_name": item["model_name"]}
92
+ system_prompt = build_seller_system_prompt_likelihood(product, item_cfg, demo_str)
93
  opening_msg = opening_message_likelihood(product, category)
94
  user_choice_msg = f"<choice>{pre_int}</choice>"
95
  closing_msg = closing_message_likelihood(product, category) # logged only
src/ui/screens_preference.py CHANGED
@@ -100,7 +100,10 @@ def screen_pair_intro(s: dict, cfg: dict) -> None:
100
 
101
  # ── Build prompts ─────────────────────────────────────────────────────
102
  # Seller always argues for Product A.
103
- system_prompt = build_seller_system_prompt_preference(item, cfg, demo_str)
 
 
 
104
  opening_msg = opening_message_preference(item)
105
  user_choice_msg = f"<choice>{pre_int}</choice>"
106
  closing_msg = closing_message_preference(item) # vote_final equivalent; logged only
@@ -153,5 +156,10 @@ def screen_pair_intro(s: dict, cfg: dict) -> None:
153
  "num_turns": 0,
154
  })
155
 
 
 
 
 
 
156
  s["screen"] = "chat"
157
  st.rerun()
 
100
 
101
  # ── Build prompts ─────────────────────────────────────────────────────
102
  # Seller always argues for Product A.
103
+ item_cfg = {**cfg,
104
+ "prompt_variant": item["prompt_variant"],
105
+ "model_name": item["model_name"]}
106
+ system_prompt = build_seller_system_prompt_preference(item, item_cfg, demo_str)
107
  opening_msg = opening_message_preference(item)
108
  user_choice_msg = f"<choice>{pre_int}</choice>"
109
  closing_msg = closing_message_preference(item) # vote_final equivalent; logged only
 
156
  "num_turns": 0,
157
  })
158
 
159
+ print(f"[CONV] num turns stored: {len(s['items'][idx]['conversation']['turns'])}")
160
+ print(f"[CONV] turn roles: {[(t['role'], t.get('synthetic')) for t in s['items'][idx]['conversation']['turns']]}")
161
+ print(f"[CONV] turn 0 content[:100]: {s['items'][idx]['conversation']['turns'][0]['content'][:100]}")
162
+ print(f"[CONV] turn 1 content: {s['items'][idx]['conversation']['turns'][1]['content']}")
163
+
164
  s["screen"] = "chat"
165
  st.rerun()
src/ui/screens_shared.py CHANGED
@@ -308,7 +308,8 @@ def screen_chat(s: dict, cfg: dict) -> None:
308
  messages.append({"role": "user", "content": user_msg})
309
 
310
  with st.spinner("AI is responding…"):
311
- ai_reply = call_model(messages, cfg)
 
312
 
313
  now = time.time()
314
  turn_base = len(conv["turns"])
 
308
  messages.append({"role": "user", "content": user_msg})
309
 
310
  with st.spinner("AI is responding…"):
311
+ item_cfg = {**cfg, "model_name": item["model_name"]}
312
+ ai_reply = call_model(messages, item_cfg)
313
 
314
  now = time.time()
315
  turn_base = len(conv["turns"])
study_config.yaml CHANGED
@@ -28,12 +28,22 @@ categories:
28
  - name: movies
29
  count: 5
30
 
31
- # Seller system prompt composition (from lsp/src/prompts/seller_system/)
32
- prompt_variant:
33
- personalization: false # true β†’ passes participant demographics to seller prompt
34
- detailed_instruction: true # true β†’ appends the detailed instruction block
 
 
 
 
 
 
 
 
 
 
 
35
 
36
- model_name: "meta-llama/Llama-3.1-8B-Instruct"
37
  pair_selection_seed: 42 # Seed for reproducible 50-item pool selection per category
38
  pairs_per_user: 5 # Total items/pairs shown per participant
39
 
@@ -42,7 +52,7 @@ min_turns: 3 # Minimum exchanges before "done" button is enab
42
  max_turns: 3 # Hard cap; input is disabled after this many exchanges
43
 
44
  # Prolific
45
- prolific_completion_code: "XXXXXXXX"
46
 
47
  # HuggingFace dataset repo where results (JSON + CSV) are uploaded
48
- output_dataset_repo: "lms-shape-preferences/user_study-preference-detailed_instructions"
 
28
  - name: movies
29
  count: 5
30
 
31
+ model_variants:
32
+ - name: personalized
33
+ model_name: "meta-llama/Llama-3.1-8B-Instruct"
34
+ prompt_variant:
35
+ personalization: true
36
+ detailed_instruction: true
37
+ count: 2 # items using this variant for odd-numbered users
38
+
39
+ - name: base
40
+ model_name: "meta-llama/Llama-3.1-8B-Instruct"
41
+ prompt_variant:
42
+ personalization: false
43
+ detailed_instruction: true
44
+ count: 3 # items using this variant for odd-numbered users
45
+ # counts swap on alternating users:
46
 
 
47
  pair_selection_seed: 42 # Seed for reproducible 50-item pool selection per category
48
  pairs_per_user: 5 # Total items/pairs shown per participant
49
 
 
52
  max_turns: 3 # Hard cap; input is disabled after this many exchanges
53
 
54
  # Prolific
55
+ prolific_completion_code: "C1JEJWOQ"
56
 
57
  # HuggingFace dataset repo where results (JSON + CSV) are uploaded
58
+ output_dataset_repo: "lms-shape-preferences/user_study-preference-base"