File size: 4,609 Bytes
6b23da9
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
0f4326e
 
 
 
 
 
 
6b23da9
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
"""
Load study_config.yaml and merge with env-var secrets.
All static lookup tables live here so any module can import them.
"""
import os
from pathlib import Path

import yaml

BASE_DIR = Path(__file__).resolve().parent.parent


def load_config() -> dict:
    config_path = BASE_DIR / "study_config.yaml"
    with open(config_path) as f:
        cfg = yaml.safe_load(f)

    # Secrets come only from env vars, never from yaml
    cfg["hf_token"]           = os.getenv("HF_TOKEN", "")
    cfg["tinker_api_key"]     = os.getenv("TINKER_API_KEY", "")
    cfg["prolific_api_token"] = os.getenv("PROLIFIC_API_TOKEN", "")
    cfg["debug_mode"]         = os.getenv("DEBUG_MODE", "false").lower() == "true"

    # prolific_study_id is read from the yaml (non-secret, study-specific)
    cfg.setdefault("prolific_study_id", "")

    # Derived filesystem paths
    cfg["base_dir"]        = str(BASE_DIR)
    cfg["data_dir"]        = str(BASE_DIR / "data")
    cfg["annotations_dir"] = str(BASE_DIR / "annotations")

    return cfg


# ---------------------------------------------------------------------------
# HF dataset repos — (study_type, category) → repo_id
# ---------------------------------------------------------------------------
CATEGORY_TO_REPO: dict = {
    ("preference", "movies"):    "lms-shape-preferences/pairs_Movies_and_TV",
    ("preference", "groceries"): "lms-shape-preferences/pairs_Grocery_and_Gourmet_Food",
    ("likelihood", "movies"):    "lms-shape-preferences/amazon_Movies_and_TV",
    ("likelihood", "groceries"): "lms-shape-preferences/amazon_Grocery_and_Gourmet_Food",
}

# ---------------------------------------------------------------------------
# Display helpers
# ---------------------------------------------------------------------------
CATEGORY_DISPLAY: dict = {
    "movies":    "Movies & TV",
    "groceries": "Grocery Products",
}

FAMILIARITY_USED_LABEL: dict = {
    "movies":    "Watched it before",
    "groceries": "Used it before",
}

# ---------------------------------------------------------------------------
# Background questions, keyed by category
# ---------------------------------------------------------------------------
BACKGROUND_QUESTIONS: dict = {
    "movies": [
        {
            "key": "movies_criteria",
            "label": "When picking between movies to purchase, what matters to you?",
            "placeholder": "e.g. I look for strong storytelling, good reviews, genre, director, cast…",
        },
        {
            "key": "movies_enjoy",
            "label": "What kinds of movies do you usually enjoy, and why?",
            "placeholder": "e.g. I love sci-fi thrillers because they keep me on the edge of my seat…",
        },
        {
            "key": "movies_avoid",
            "label": "What kinds of movies do you usually avoid, and why?",
            "placeholder": "e.g. I tend to skip horror movies because I don't enjoy being scared…",
        },
    ],
    "groceries": [
        {
            "key": "groceries_criteria",
            "label": "When picking between foods or grocery items to purchase, what matters to you?",
            "placeholder": "e.g. Price, ingredients, brand trust, nutritional value, taste…",
        },
        {
            "key": "groceries_enjoy",
            "label": "What kinds of foods or grocery items do you usually enjoy, and why?",
            "placeholder": "e.g. I enjoy organic snacks because they feel healthier and taste fresh…",
        },
        {
            "key": "groceries_avoid",
            "label": "What kinds of foods or grocery items do you usually avoid, and why?",
            "placeholder": "e.g. I avoid heavily processed foods because of the artificial ingredients…",
        },
    ],
}

# ---------------------------------------------------------------------------
# Likert scale labels — must match lsp/src/prompts/survey_prompts.py exactly
# ---------------------------------------------------------------------------
LIKELIHOOD_LABELS: dict = {
    1: "Definitely would not buy",
    2: "Probably would not buy",
    3: "Slightly unlikely to buy",
    4: "Neutral",
    5: "Slightly likely to buy",
    6: "Probably would buy",
    7: "Definitely would buy",
}

PREFERENCE_LABELS: dict = {
    1: "Definitely would prefer Product A",
    2: "Probably would prefer Product A",
    3: "Slightly likely to prefer Product A",
    4: "Neutral",
    5: "Slightly likely to prefer Product B",
    6: "Probably would prefer Product B",
    7: "Definitely would prefer Product B",
}

MIN_WORDS_BACKGROUND = 20
MIN_WORDS_REFLECTION = 10