# ── Per-deployment study configuration ────────────────────────────────────────
# Copy this file to each HuggingFace Space and edit as needed.
# Secrets (HF_TOKEN, GH_TOKEN, TINKER_API_KEY) must be set as Space Secrets,
# never stored here.

# "preference" : participants compare Product A vs Product B (7-pt preference scale)
# "likelihood" : participants evaluate a single product (7-pt likelihood-to-buy scale)
# "model_comparison" : one pair; same participant chats with multiple seller models
#   (order randomized). Use pairs_per_user: 1 and comparison_models: (name, model_name,
#   sampler_path, use_demographics, use_background, personalization).
#   JR1-style checkpoints: uniform_initial_survey: true (preference_initial_uniform).
#   JR2-style (persona buyer wording / simple seller): omit or false (preference_initial).
#   Omit model_variants.
# study_type: preference

# Categories to include. Each entry needs a name and a count.
#
#   Single category (movies only):
#     categories:
#       - name: movies
#         count: 5
#
#   Two categories (mixed):
#     categories:
#       - name: movies
#         count: 3
#       - name: groceries
#         count: 2
#
# For two-category studies the split (3/2 vs 2/3) is automatically alternated
# across users so the overall pool stays balanced.
# The two counts must sum to pairs_per_user.
# categories:
#   - name: movies
#     count: 2

# model_variants:
#   - name: base
#     model_name: "meta-llama/Llama-3.1-8B-Instruct"
#     sampler_path: ""
#     prompt_variant:
#       personalization: true
#       include_bio: true
#     count: 2          # items using this variant for odd-numbered users
# counts swap on alternating users:

pair_selection_seed: 42         # Seed for reproducible 50-item pool selection per category
# pairs_per_user: 2               # Total items/pairs shown per participant

# Chat constraints — both set to 3 so each participant has exactly 3 real exchanges.
# min_turns: 3                    # Minimum exchanges before "done" button is enabled
# max_turns: 3                    # Hard cap; input is disabled after this many exchanges

# Prolific  
prolific_completion_code: "CIE6CQV7"
prolific_study_id: "6a07a5ffe759e03e67f9487c"

# HuggingFace dataset repo where results (JSON + CSV) are uploaded
output_dataset_repo: "ehejin/user_study-preference-personalized_0514_comparison_JR1_2" 

# ── Example: model_comparison (uncomment and set study_type; comment out model_variants) ──
#
print_model_input: true
study_type: model_comparison
categories:
  - name: movies
    count: 1
pairs_per_user: 1
min_turns: 3
max_turns: 3
sampling_temperature: 1.0   # Tinker SamplingParams; seller and all call_model calls
comparison_models:
  - name: base_anonymous
    model_name: "meta-llama/Llama-3.1-8B-Instruct"
    sampler_path: ""
    use_demographics: false
    use_background: false
    personalization: false
  - name: finetuned_JR1
    model_name: "meta-llama/Llama-3.1-8B-Instruct"
    sampler_path: "tinker://2fdbf0af-7a75-55a2-aadd-9c6cdf4229d5:train:0/sampler_weights/000060"
    use_demographics: false
    use_background: false
    personalization: false
    uniform_initial_survey: true
  - name: finetuned_JR2
    model_name: "meta-llama/Llama-3.1-8B-Instruct"
    sampler_path: "tinker://5e6db03e-85d5-5d3c-95db-8c68e7718be1:train:0/sampler_weights/000120"
    use_demographics: false
    use_background: false
    personalization: false
    uniform_initial_survey: false