File size: 5,125 Bytes
6b23da9
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
45b2cda
 
 
 
 
9f88ad1
 
 
 
 
 
45b2cda
 
6b23da9
fd3d17e
 
 
 
 
 
 
6b23da9
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
"""
Load study_config.yaml and merge with env-var secrets.
All static lookup tables live here so any module can import them.
"""
import os
from pathlib import Path

import yaml

BASE_DIR = Path(__file__).resolve().parent.parent


def load_config() -> dict:
    config_path = BASE_DIR / "study_config.yaml"
    with open(config_path) as f:
        cfg = yaml.safe_load(f)

    # Secrets come only from env vars, never from yaml
    cfg["hf_token"]           = os.getenv("HF_TOKEN", "")
    cfg["tinker_api_key"]     = os.getenv("TINKER_API_KEY", "")
    cfg["prolific_api_token"] = os.getenv("PROLIFIC_API_TOKEN", "")
    cfg["debug_mode"]         = os.getenv("DEBUG_MODE", "false").lower() == "true"

    # Log full chat messages + renderer prompt to stdout (local dev / one-off audits)
    cfg["print_model_input"] = (
        os.getenv("PRINT_MODEL_INPUT", "").lower() in ("1", "true", "yes")
        or bool(cfg.get("print_model_input", False))
    )

    # prolific_study_id is read from the yaml (non-secret, study-specific)
    cfg.setdefault("prolific_study_id", "")

    # Tinker SamplingParams.temperature (seller / all call_model paths)
    cfg.setdefault("sampling_temperature", 1.0)
    t_sample = cfg["sampling_temperature"]
    if t_sample is None:
        t_sample = 1.0
    cfg["sampling_temperature"] = float(t_sample)

    # Derived filesystem paths
    cfg["base_dir"]        = str(BASE_DIR)
    cfg["data_dir"]        = str(BASE_DIR / "data")
    cfg["annotations_dir"] = str(BASE_DIR / "annotations")

    return cfg


# ---------------------------------------------------------------------------
# HF dataset repos — (study_type, category) → repo_id
# ---------------------------------------------------------------------------
CATEGORY_TO_REPO: dict = {
    ("preference", "movies"):    "lms-shape-preferences/pairs_Movies_and_TV",
    ("preference", "groceries"): "lms-shape-preferences/pairs_Grocery_and_Gourmet_Food",
    ("likelihood", "movies"):    "lms-shape-preferences/amazon_Movies_and_TV",
    ("likelihood", "groceries"): "lms-shape-preferences/amazon_Grocery_and_Gourmet_Food",
}

# ---------------------------------------------------------------------------
# Display helpers
# ---------------------------------------------------------------------------
CATEGORY_DISPLAY: dict = {
    "movies":    "Movies & TV",
    "groceries": "Grocery Products",
}

FAMILIARITY_USED_LABEL: dict = {
    "movies":    "Watched it before",
    "groceries": "Used it before",
}

# ---------------------------------------------------------------------------
# Background questions, keyed by category
# ---------------------------------------------------------------------------
BACKGROUND_QUESTIONS: dict = {
    "movies": [
        {
            "key": "movies_criteria",
            "label": "When picking between movies to purchase, what matters to you?",
            "placeholder": "e.g. I look for strong storytelling, good reviews, genre, director, cast…",
        },
        {
            "key": "movies_enjoy",
            "label": "What kinds of movies do you usually enjoy, and why?",
            "placeholder": "e.g. I love sci-fi thrillers because they keep me on the edge of my seat…",
        },
        {
            "key": "movies_avoid",
            "label": "What kinds of movies do you usually avoid, and why?",
            "placeholder": "e.g. I tend to skip horror movies because I don't enjoy being scared…",
        },
    ],
    "groceries": [
        {
            "key": "groceries_criteria",
            "label": "When picking between foods or grocery items to purchase, what matters to you?",
            "placeholder": "e.g. Price, ingredients, brand trust, nutritional value, taste…",
        },
        {
            "key": "groceries_enjoy",
            "label": "What kinds of foods or grocery items do you usually enjoy, and why?",
            "placeholder": "e.g. I enjoy organic snacks because they feel healthier and taste fresh…",
        },
        {
            "key": "groceries_avoid",
            "label": "What kinds of foods or grocery items do you usually avoid, and why?",
            "placeholder": "e.g. I avoid heavily processed foods because of the artificial ingredients…",
        },
    ],
}

# ---------------------------------------------------------------------------
# Likert scale labels — must match lsp/src/prompts/survey_prompts.py exactly
# ---------------------------------------------------------------------------
LIKELIHOOD_LABELS: dict = {
    1: "Definitely would not buy",
    2: "Probably would not buy",
    3: "Slightly unlikely to buy",
    4: "Neutral",
    5: "Slightly likely to buy",
    6: "Probably would buy",
    7: "Definitely would buy",
}

PREFERENCE_LABELS: dict = {
    1: "Definitely would prefer Product A",
    2: "Probably would prefer Product A",
    3: "Slightly likely to prefer Product A",
    4: "Neutral",
    5: "Slightly likely to prefer Product B",
    6: "Probably would prefer Product B",
    7: "Definitely would prefer Product B",
}

MIN_WORDS_BACKGROUND = 20
MIN_WORDS_REFLECTION = 10