prolific-preferences-dynamic / study_config.yaml
ehejin's picture
change ranking style
0e06eab
# ── Per-deployment study configuration ────────────────────────────────────────
# Copy this file to each HuggingFace Space and edit as needed.
# Secrets (HF_TOKEN, GH_TOKEN, TINKER_API_KEY) must be set as Space Secrets,
# never stored here.
# "preference" : participants compare Product A vs Product B (7-pt preference scale)
# "likelihood" : participants evaluate a single product (7-pt likelihood-to-buy scale)
# "model_comparison" : one pair; same participant chats with multiple seller models
# (order randomized). Use pairs_per_user: 1 and comparison_models: (name, model_name,
# sampler_path, use_demographics, use_background, personalization).
# JR1-style checkpoints: uniform_initial_survey: true (preference_initial_uniform).
# JR2-style (persona buyer wording / simple seller): omit or false (preference_initial).
# Omit model_variants.
# study_type: preference
# Categories to include. Each entry needs a name and a count.
#
# Single category (movies only):
# categories:
# - name: movies
# count: 5
#
# Two categories (mixed):
# categories:
# - name: movies
# count: 3
# - name: groceries
# count: 2
#
# For two-category studies the split (3/2 vs 2/3) is automatically alternated
# across users so the overall pool stays balanced.
# The two counts must sum to pairs_per_user.
# categories:
# - name: movies
# count: 2
# model_variants:
# - name: base
# model_name: "meta-llama/Llama-3.1-8B-Instruct"
# sampler_path: ""
# prompt_variant:
# personalization: true
# include_bio: true
# count: 2 # items using this variant for odd-numbered users
# counts swap on alternating users:
pair_selection_seed: 42 # Seed for reproducible 50-item pool selection per category
# pairs_per_user: 2 # Total items/pairs shown per participant
# Chat constraints β€” both set to 3 so each participant has exactly 3 real exchanges.
# min_turns: 3 # Minimum exchanges before "done" button is enabled
# max_turns: 3 # Hard cap; input is disabled after this many exchanges
# Prolific
prolific_completion_code: "CIE6CQV7"
prolific_study_id: "6a07a5ffe759e03e67f9487c"
# HuggingFace dataset repo where results (JSON + CSV) are uploaded
output_dataset_repo: "ehejin/user_study-preference-personalized_0514_comparison_JR1_2"
# ── Example: model_comparison (uncomment and set study_type; comment out model_variants) ──
#
print_model_input: true
study_type: model_comparison
categories:
- name: movies
count: 1
pairs_per_user: 1
min_turns: 3
max_turns: 3
sampling_temperature: 1.0 # Tinker SamplingParams; seller and all call_model calls
comparison_models:
- name: base_anonymous
model_name: "meta-llama/Llama-3.1-8B-Instruct"
sampler_path: ""
use_demographics: false
use_background: false
personalization: false
- name: finetuned_JR1
model_name: "meta-llama/Llama-3.1-8B-Instruct"
sampler_path: "tinker://2fdbf0af-7a75-55a2-aadd-9c6cdf4229d5:train:0/sampler_weights/000060"
use_demographics: false
use_background: false
personalization: false
uniform_initial_survey: true
- name: finetuned_JR2
model_name: "meta-llama/Llama-3.1-8B-Instruct"
sampler_path: "tinker://5e6db03e-85d5-5d3c-95db-8c68e7718be1:train:0/sampler_weights/000120"
use_demographics: false
use_background: false
personalization: false
uniform_initial_survey: false