| # ββ Per-deployment study configuration ββββββββββββββββββββββββββββββββββββββββ | |
| # Copy this file to each HuggingFace Space and edit as needed. | |
| # Secrets (HF_TOKEN, GH_TOKEN, TINKER_API_KEY) must be set as Space Secrets, | |
| # never stored here. | |
| # "preference" : participants compare Product A vs Product B (7-pt preference scale) | |
| # "likelihood" : participants evaluate a single product (7-pt likelihood-to-buy scale) | |
| # "model_comparison" : one pair; same participant chats with multiple seller models | |
| # (order randomized). Use pairs_per_user: 1 and comparison_models: (name, model_name, | |
| # sampler_path, use_demographics, use_background, personalization). | |
| # JR1-style checkpoints: uniform_initial_survey: true (preference_initial_uniform). | |
| # JR2-style (persona buyer wording / simple seller): omit or false (preference_initial). | |
| # Omit model_variants. | |
| # study_type: preference | |
| # Categories to include. Each entry needs a name and a count. | |
| # | |
| # Single category (movies only): | |
| # categories: | |
| # - name: movies | |
| # count: 5 | |
| # | |
| # Two categories (mixed): | |
| # categories: | |
| # - name: movies | |
| # count: 3 | |
| # - name: groceries | |
| # count: 2 | |
| # | |
| # For two-category studies the split (3/2 vs 2/3) is automatically alternated | |
| # across users so the overall pool stays balanced. | |
| # The two counts must sum to pairs_per_user. | |
| # categories: | |
| # - name: movies | |
| # count: 2 | |
| # model_variants: | |
| # - name: base | |
| # model_name: "meta-llama/Llama-3.1-8B-Instruct" | |
| # sampler_path: "" | |
| # prompt_variant: | |
| # personalization: true | |
| # include_bio: true | |
| # count: 2 # items using this variant for odd-numbered users | |
| # counts swap on alternating users: | |
| pair_selection_seed: 42 # Seed for reproducible 50-item pool selection per category | |
| # pairs_per_user: 2 # Total items/pairs shown per participant | |
| # Chat constraints β both set to 3 so each participant has exactly 3 real exchanges. | |
| # min_turns: 3 # Minimum exchanges before "done" button is enabled | |
| # max_turns: 3 # Hard cap; input is disabled after this many exchanges | |
| # Prolific | |
| prolific_completion_code: "CIE6CQV7" | |
| prolific_study_id: "6a07a5ffe759e03e67f9487c" | |
| # HuggingFace dataset repo where results (JSON + CSV) are uploaded | |
| output_dataset_repo: "ehejin/user_study-preference-personalized_0514_comparison_JR1_2" | |
| # ββ Example: model_comparison (uncomment and set study_type; comment out model_variants) ββ | |
| # | |
| print_model_input: true | |
| study_type: model_comparison | |
| categories: | |
| - name: movies | |
| count: 1 | |
| pairs_per_user: 1 | |
| min_turns: 3 | |
| max_turns: 3 | |
| sampling_temperature: 1.0 # Tinker SamplingParams; seller and all call_model calls | |
| comparison_models: | |
| - name: base_anonymous | |
| model_name: "meta-llama/Llama-3.1-8B-Instruct" | |
| sampler_path: "" | |
| use_demographics: false | |
| use_background: false | |
| personalization: false | |
| - name: finetuned_JR1 | |
| model_name: "meta-llama/Llama-3.1-8B-Instruct" | |
| sampler_path: "tinker://2fdbf0af-7a75-55a2-aadd-9c6cdf4229d5:train:0/sampler_weights/000060" | |
| use_demographics: false | |
| use_background: false | |
| personalization: false | |
| uniform_initial_survey: true | |
| - name: finetuned_JR2 | |
| model_name: "meta-llama/Llama-3.1-8B-Instruct" | |
| sampler_path: "tinker://5e6db03e-85d5-5d3c-95db-8c68e7718be1:train:0/sampler_weights/000120" | |
| use_demographics: false | |
| use_background: false | |
| personalization: false | |
| uniform_initial_survey: false |