Spaces:

lspcloud
/

prolific-preferences-dynamic

Sleeping

App Files Files Community

prolific-preferences-dynamic / study_config.yaml

ehejin

change ranking style

0e06eab 20 days ago

raw

history blame contribute delete

3.57 kB

	# ── Per-deployment study configuration ────────────────────────────────────────
	# Copy this file to each HuggingFace Space and edit as needed.
	# Secrets (HF_TOKEN, GH_TOKEN, TINKER_API_KEY) must be set as Space Secrets,
	# never stored here.

	# "preference" : participants compare Product A vs Product B (7-pt preference scale)
	# "likelihood" : participants evaluate a single product (7-pt likelihood-to-buy scale)
	# "model_comparison" : one pair; same participant chats with multiple seller models
	# (order randomized). Use pairs_per_user: 1 and comparison_models: (name, model_name,
	# sampler_path, use_demographics, use_background, personalization).
	# JR1-style checkpoints: uniform_initial_survey: true (preference_initial_uniform).
	# JR2-style (persona buyer wording / simple seller): omit or false (preference_initial).
	# Omit model_variants.
	# study_type: preference

	# Categories to include. Each entry needs a name and a count.
	#
	# Single category (movies only):
	# categories:
	# - name: movies
	# count: 5
	#
	# Two categories (mixed):
	# categories:
	# - name: movies
	# count: 3
	# - name: groceries
	# count: 2
	#
	# For two-category studies the split (3/2 vs 2/3) is automatically alternated
	# across users so the overall pool stays balanced.
	# The two counts must sum to pairs_per_user.
	# categories:
	# - name: movies
	# count: 2

	# model_variants:
	# - name: base
	# model_name: "meta-llama/Llama-3.1-8B-Instruct"
	# sampler_path: ""
	# prompt_variant:
	# personalization: true
	# include_bio: true
	# count: 2 # items using this variant for odd-numbered users
	# counts swap on alternating users:

	pair_selection_seed: 42 # Seed for reproducible 50-item pool selection per category
	# pairs_per_user: 2 # Total items/pairs shown per participant

	# Chat constraints — both set to 3 so each participant has exactly 3 real exchanges.
	# min_turns: 3 # Minimum exchanges before "done" button is enabled
	# max_turns: 3 # Hard cap; input is disabled after this many exchanges

	# Prolific
	prolific_completion_code: "CIE6CQV7"
	prolific_study_id: "6a07a5ffe759e03e67f9487c"

	# HuggingFace dataset repo where results (JSON + CSV) are uploaded
	output_dataset_repo: "ehejin/user_study-preference-personalized_0514_comparison_JR1_2"

	# ── Example: model_comparison (uncomment and set study_type; comment out model_variants) ──
	#
	print_model_input: true
	study_type: model_comparison
	categories:
	- name: movies
	count: 1
	pairs_per_user: 1
	min_turns: 3
	max_turns: 3
	sampling_temperature: 1.0 # Tinker SamplingParams; seller and all call_model calls
	comparison_models:
	- name: base_anonymous
	model_name: "meta-llama/Llama-3.1-8B-Instruct"
	sampler_path: ""
	use_demographics: false
	use_background: false
	personalization: false
	- name: finetuned_JR1
	model_name: "meta-llama/Llama-3.1-8B-Instruct"
	sampler_path: "tinker://2fdbf0af-7a75-55a2-aadd-9c6cdf4229d5:train:0/sampler_weights/000060"
	use_demographics: false
	use_background: false
	personalization: false
	uniform_initial_survey: true
	- name: finetuned_JR2
	model_name: "meta-llama/Llama-3.1-8B-Instruct"
	sampler_path: "tinker://5e6db03e-85d5-5d3c-95db-8c68e7718be1:train:0/sampler_weights/000120"
	use_demographics: false
	use_background: false
	personalization: false
	uniform_initial_survey: false