# ── Per-deployment study configuration ──────────────────────────────────────── # Copy this file to each HuggingFace Space and edit as needed. # Secrets (HF_TOKEN, GH_TOKEN, TINKER_API_KEY) must be set as Space Secrets, # never stored here. # "preference" : participants compare Product A vs Product B (7-pt preference scale) # "likelihood" : participants evaluate a single product (7-pt likelihood-to-buy scale) # "model_comparison" : one pair; same participant chats with multiple seller models # (order randomized). Use pairs_per_user: 1 and comparison_models: (name, model_name, # sampler_path, use_demographics, use_background, personalization). # JR1-style checkpoints: uniform_initial_survey: true (preference_initial_uniform). # JR2-style (persona buyer wording / simple seller): omit or false (preference_initial). # Omit model_variants. # study_type: preference # Categories to include. Each entry needs a name and a count. # # Single category (movies only): # categories: # - name: movies # count: 5 # # Two categories (mixed): # categories: # - name: movies # count: 3 # - name: groceries # count: 2 # # For two-category studies the split (3/2 vs 2/3) is automatically alternated # across users so the overall pool stays balanced. # The two counts must sum to pairs_per_user. # categories: # - name: movies # count: 2 # model_variants: # - name: base # model_name: "meta-llama/Llama-3.1-8B-Instruct" # sampler_path: "" # prompt_variant: # personalization: true # include_bio: true # count: 2 # items using this variant for odd-numbered users # counts swap on alternating users: pair_selection_seed: 42 # Seed for reproducible 50-item pool selection per category # pairs_per_user: 2 # Total items/pairs shown per participant # Chat constraints — both set to 3 so each participant has exactly 3 real exchanges. # min_turns: 3 # Minimum exchanges before "done" button is enabled # max_turns: 3 # Hard cap; input is disabled after this many exchanges # Prolific prolific_completion_code: "CIE6CQV7" prolific_study_id: "6a07a5ffe759e03e67f9487c" # HuggingFace dataset repo where results (JSON + CSV) are uploaded output_dataset_repo: "ehejin/user_study-preference-personalized_0514_comparison_JR1_2" # ── Example: model_comparison (uncomment and set study_type; comment out model_variants) ── # print_model_input: true study_type: model_comparison categories: - name: movies count: 1 pairs_per_user: 1 min_turns: 3 max_turns: 3 sampling_temperature: 1.0 # Tinker SamplingParams; seller and all call_model calls comparison_models: - name: base_anonymous model_name: "meta-llama/Llama-3.1-8B-Instruct" sampler_path: "" use_demographics: false use_background: false personalization: false - name: finetuned_JR1 model_name: "meta-llama/Llama-3.1-8B-Instruct" sampler_path: "tinker://2fdbf0af-7a75-55a2-aadd-9c6cdf4229d5:train:0/sampler_weights/000060" use_demographics: false use_background: false personalization: false uniform_initial_survey: true - name: finetuned_JR2 model_name: "meta-llama/Llama-3.1-8B-Instruct" sampler_path: "tinker://5e6db03e-85d5-5d3c-95db-8c68e7718be1:train:0/sampler_weights/000120" use_demographics: false use_background: false personalization: false uniform_initial_survey: false