File size: 5,125 Bytes
6b23da9 45b2cda 9f88ad1 45b2cda 6b23da9 fd3d17e 6b23da9 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 | """
Load study_config.yaml and merge with env-var secrets.
All static lookup tables live here so any module can import them.
"""
import os
from pathlib import Path
import yaml
BASE_DIR = Path(__file__).resolve().parent.parent
def load_config() -> dict:
config_path = BASE_DIR / "study_config.yaml"
with open(config_path) as f:
cfg = yaml.safe_load(f)
# Secrets come only from env vars, never from yaml
cfg["hf_token"] = os.getenv("HF_TOKEN", "")
cfg["tinker_api_key"] = os.getenv("TINKER_API_KEY", "")
cfg["prolific_api_token"] = os.getenv("PROLIFIC_API_TOKEN", "")
cfg["debug_mode"] = os.getenv("DEBUG_MODE", "false").lower() == "true"
# Log full chat messages + renderer prompt to stdout (local dev / one-off audits)
cfg["print_model_input"] = (
os.getenv("PRINT_MODEL_INPUT", "").lower() in ("1", "true", "yes")
or bool(cfg.get("print_model_input", False))
)
# prolific_study_id is read from the yaml (non-secret, study-specific)
cfg.setdefault("prolific_study_id", "")
# Tinker SamplingParams.temperature (seller / all call_model paths)
cfg.setdefault("sampling_temperature", 1.0)
t_sample = cfg["sampling_temperature"]
if t_sample is None:
t_sample = 1.0
cfg["sampling_temperature"] = float(t_sample)
# Derived filesystem paths
cfg["base_dir"] = str(BASE_DIR)
cfg["data_dir"] = str(BASE_DIR / "data")
cfg["annotations_dir"] = str(BASE_DIR / "annotations")
return cfg
# ---------------------------------------------------------------------------
# HF dataset repos — (study_type, category) → repo_id
# ---------------------------------------------------------------------------
CATEGORY_TO_REPO: dict = {
("preference", "movies"): "lms-shape-preferences/pairs_Movies_and_TV",
("preference", "groceries"): "lms-shape-preferences/pairs_Grocery_and_Gourmet_Food",
("likelihood", "movies"): "lms-shape-preferences/amazon_Movies_and_TV",
("likelihood", "groceries"): "lms-shape-preferences/amazon_Grocery_and_Gourmet_Food",
}
# ---------------------------------------------------------------------------
# Display helpers
# ---------------------------------------------------------------------------
CATEGORY_DISPLAY: dict = {
"movies": "Movies & TV",
"groceries": "Grocery Products",
}
FAMILIARITY_USED_LABEL: dict = {
"movies": "Watched it before",
"groceries": "Used it before",
}
# ---------------------------------------------------------------------------
# Background questions, keyed by category
# ---------------------------------------------------------------------------
BACKGROUND_QUESTIONS: dict = {
"movies": [
{
"key": "movies_criteria",
"label": "When picking between movies to purchase, what matters to you?",
"placeholder": "e.g. I look for strong storytelling, good reviews, genre, director, cast…",
},
{
"key": "movies_enjoy",
"label": "What kinds of movies do you usually enjoy, and why?",
"placeholder": "e.g. I love sci-fi thrillers because they keep me on the edge of my seat…",
},
{
"key": "movies_avoid",
"label": "What kinds of movies do you usually avoid, and why?",
"placeholder": "e.g. I tend to skip horror movies because I don't enjoy being scared…",
},
],
"groceries": [
{
"key": "groceries_criteria",
"label": "When picking between foods or grocery items to purchase, what matters to you?",
"placeholder": "e.g. Price, ingredients, brand trust, nutritional value, taste…",
},
{
"key": "groceries_enjoy",
"label": "What kinds of foods or grocery items do you usually enjoy, and why?",
"placeholder": "e.g. I enjoy organic snacks because they feel healthier and taste fresh…",
},
{
"key": "groceries_avoid",
"label": "What kinds of foods or grocery items do you usually avoid, and why?",
"placeholder": "e.g. I avoid heavily processed foods because of the artificial ingredients…",
},
],
}
# ---------------------------------------------------------------------------
# Likert scale labels — must match lsp/src/prompts/survey_prompts.py exactly
# ---------------------------------------------------------------------------
LIKELIHOOD_LABELS: dict = {
1: "Definitely would not buy",
2: "Probably would not buy",
3: "Slightly unlikely to buy",
4: "Neutral",
5: "Slightly likely to buy",
6: "Probably would buy",
7: "Definitely would buy",
}
PREFERENCE_LABELS: dict = {
1: "Definitely would prefer Product A",
2: "Probably would prefer Product A",
3: "Slightly likely to prefer Product A",
4: "Neutral",
5: "Slightly likely to prefer Product B",
6: "Probably would prefer Product B",
7: "Definitely would prefer Product B",
}
MIN_WORDS_BACKGROUND = 20
MIN_WORDS_REFLECTION = 10 |