ehejin's picture
sync w/ detailed repo
0f4326e
"""
Load study_config.yaml and merge with env-var secrets.
All static lookup tables live here so any module can import them.
"""
import os
from pathlib import Path
import yaml
BASE_DIR = Path(__file__).resolve().parent.parent
def load_config() -> dict:
config_path = BASE_DIR / "study_config.yaml"
with open(config_path) as f:
cfg = yaml.safe_load(f)
# Secrets come only from env vars, never from yaml
cfg["hf_token"] = os.getenv("HF_TOKEN", "")
cfg["tinker_api_key"] = os.getenv("TINKER_API_KEY", "")
cfg["prolific_api_token"] = os.getenv("PROLIFIC_API_TOKEN", "")
cfg["debug_mode"] = os.getenv("DEBUG_MODE", "false").lower() == "true"
# prolific_study_id is read from the yaml (non-secret, study-specific)
cfg.setdefault("prolific_study_id", "")
# Derived filesystem paths
cfg["base_dir"] = str(BASE_DIR)
cfg["data_dir"] = str(BASE_DIR / "data")
cfg["annotations_dir"] = str(BASE_DIR / "annotations")
return cfg
# ---------------------------------------------------------------------------
# HF dataset repos — (study_type, category) → repo_id
# ---------------------------------------------------------------------------
CATEGORY_TO_REPO: dict = {
("preference", "movies"): "lms-shape-preferences/pairs_Movies_and_TV",
("preference", "groceries"): "lms-shape-preferences/pairs_Grocery_and_Gourmet_Food",
("likelihood", "movies"): "lms-shape-preferences/amazon_Movies_and_TV",
("likelihood", "groceries"): "lms-shape-preferences/amazon_Grocery_and_Gourmet_Food",
}
# ---------------------------------------------------------------------------
# Display helpers
# ---------------------------------------------------------------------------
CATEGORY_DISPLAY: dict = {
"movies": "Movies & TV",
"groceries": "Grocery Products",
}
FAMILIARITY_USED_LABEL: dict = {
"movies": "Watched it before",
"groceries": "Used it before",
}
# ---------------------------------------------------------------------------
# Background questions, keyed by category
# ---------------------------------------------------------------------------
BACKGROUND_QUESTIONS: dict = {
"movies": [
{
"key": "movies_criteria",
"label": "When picking between movies to purchase, what matters to you?",
"placeholder": "e.g. I look for strong storytelling, good reviews, genre, director, cast…",
},
{
"key": "movies_enjoy",
"label": "What kinds of movies do you usually enjoy, and why?",
"placeholder": "e.g. I love sci-fi thrillers because they keep me on the edge of my seat…",
},
{
"key": "movies_avoid",
"label": "What kinds of movies do you usually avoid, and why?",
"placeholder": "e.g. I tend to skip horror movies because I don't enjoy being scared…",
},
],
"groceries": [
{
"key": "groceries_criteria",
"label": "When picking between foods or grocery items to purchase, what matters to you?",
"placeholder": "e.g. Price, ingredients, brand trust, nutritional value, taste…",
},
{
"key": "groceries_enjoy",
"label": "What kinds of foods or grocery items do you usually enjoy, and why?",
"placeholder": "e.g. I enjoy organic snacks because they feel healthier and taste fresh…",
},
{
"key": "groceries_avoid",
"label": "What kinds of foods or grocery items do you usually avoid, and why?",
"placeholder": "e.g. I avoid heavily processed foods because of the artificial ingredients…",
},
],
}
# ---------------------------------------------------------------------------
# Likert scale labels — must match lsp/src/prompts/survey_prompts.py exactly
# ---------------------------------------------------------------------------
LIKELIHOOD_LABELS: dict = {
1: "Definitely would not buy",
2: "Probably would not buy",
3: "Slightly unlikely to buy",
4: "Neutral",
5: "Slightly likely to buy",
6: "Probably would buy",
7: "Definitely would buy",
}
PREFERENCE_LABELS: dict = {
1: "Definitely would prefer Product A",
2: "Probably would prefer Product A",
3: "Slightly likely to prefer Product A",
4: "Neutral",
5: "Slightly likely to prefer Product B",
6: "Probably would prefer Product B",
7: "Definitely would prefer Product B",
}
MIN_WORDS_BACKGROUND = 20
MIN_WORDS_REFLECTION = 10