""" Load study_config.yaml and merge with env-var secrets. All static lookup tables live here so any module can import them. """ import os from pathlib import Path import yaml BASE_DIR = Path(__file__).resolve().parent.parent def load_config() -> dict: config_path = BASE_DIR / "study_config.yaml" with open(config_path) as f: cfg = yaml.safe_load(f) # Secrets come only from env vars, never from yaml cfg["hf_token"] = os.getenv("HF_TOKEN", "") cfg["tinker_api_key"] = os.getenv("TINKER_API_KEY", "") cfg["prolific_api_token"] = os.getenv("PROLIFIC_API_TOKEN", "") cfg["debug_mode"] = os.getenv("DEBUG_MODE", "false").lower() == "true" # prolific_study_id is read from the yaml (non-secret, study-specific) cfg.setdefault("prolific_study_id", "") # Derived filesystem paths cfg["base_dir"] = str(BASE_DIR) cfg["data_dir"] = str(BASE_DIR / "data") cfg["annotations_dir"] = str(BASE_DIR / "annotations") return cfg # --------------------------------------------------------------------------- # HF dataset repos — (study_type, category) → repo_id # --------------------------------------------------------------------------- CATEGORY_TO_REPO: dict = { ("preference", "movies"): "lms-shape-preferences/pairs_Movies_and_TV", ("preference", "groceries"): "lms-shape-preferences/pairs_Grocery_and_Gourmet_Food", ("likelihood", "movies"): "lms-shape-preferences/amazon_Movies_and_TV", ("likelihood", "groceries"): "lms-shape-preferences/amazon_Grocery_and_Gourmet_Food", } # --------------------------------------------------------------------------- # Display helpers # --------------------------------------------------------------------------- CATEGORY_DISPLAY: dict = { "movies": "Movies & TV", "groceries": "Grocery Products", } FAMILIARITY_USED_LABEL: dict = { "movies": "Watched it before", "groceries": "Used it before", } # --------------------------------------------------------------------------- # Background questions, keyed by category # --------------------------------------------------------------------------- BACKGROUND_QUESTIONS: dict = { "movies": [ { "key": "movies_criteria", "label": "When picking between movies to purchase, what matters to you?", "placeholder": "e.g. I look for strong storytelling, good reviews, genre, director, cast…", }, { "key": "movies_enjoy", "label": "What kinds of movies do you usually enjoy, and why?", "placeholder": "e.g. I love sci-fi thrillers because they keep me on the edge of my seat…", }, { "key": "movies_avoid", "label": "What kinds of movies do you usually avoid, and why?", "placeholder": "e.g. I tend to skip horror movies because I don't enjoy being scared…", }, ], "groceries": [ { "key": "groceries_criteria", "label": "When picking between foods or grocery items to purchase, what matters to you?", "placeholder": "e.g. Price, ingredients, brand trust, nutritional value, taste…", }, { "key": "groceries_enjoy", "label": "What kinds of foods or grocery items do you usually enjoy, and why?", "placeholder": "e.g. I enjoy organic snacks because they feel healthier and taste fresh…", }, { "key": "groceries_avoid", "label": "What kinds of foods or grocery items do you usually avoid, and why?", "placeholder": "e.g. I avoid heavily processed foods because of the artificial ingredients…", }, ], } # --------------------------------------------------------------------------- # Likert scale labels — must match lsp/src/prompts/survey_prompts.py exactly # --------------------------------------------------------------------------- LIKELIHOOD_LABELS: dict = { 1: "Definitely would not buy", 2: "Probably would not buy", 3: "Slightly unlikely to buy", 4: "Neutral", 5: "Slightly likely to buy", 6: "Probably would buy", 7: "Definitely would buy", } PREFERENCE_LABELS: dict = { 1: "Definitely would prefer Product A", 2: "Probably would prefer Product A", 3: "Slightly likely to prefer Product A", 4: "Neutral", 5: "Slightly likely to prefer Product B", 6: "Probably would prefer Product B", 7: "Definitely would prefer Product B", } MIN_WORDS_BACKGROUND = 20 MIN_WORDS_REFLECTION = 10