Spaces:
Sleeping
Sleeping
File size: 4,609 Bytes
6b23da9 0f4326e 6b23da9 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 | """
Load study_config.yaml and merge with env-var secrets.
All static lookup tables live here so any module can import them.
"""
import os
from pathlib import Path
import yaml
BASE_DIR = Path(__file__).resolve().parent.parent
def load_config() -> dict:
config_path = BASE_DIR / "study_config.yaml"
with open(config_path) as f:
cfg = yaml.safe_load(f)
# Secrets come only from env vars, never from yaml
cfg["hf_token"] = os.getenv("HF_TOKEN", "")
cfg["tinker_api_key"] = os.getenv("TINKER_API_KEY", "")
cfg["prolific_api_token"] = os.getenv("PROLIFIC_API_TOKEN", "")
cfg["debug_mode"] = os.getenv("DEBUG_MODE", "false").lower() == "true"
# prolific_study_id is read from the yaml (non-secret, study-specific)
cfg.setdefault("prolific_study_id", "")
# Derived filesystem paths
cfg["base_dir"] = str(BASE_DIR)
cfg["data_dir"] = str(BASE_DIR / "data")
cfg["annotations_dir"] = str(BASE_DIR / "annotations")
return cfg
# ---------------------------------------------------------------------------
# HF dataset repos — (study_type, category) → repo_id
# ---------------------------------------------------------------------------
CATEGORY_TO_REPO: dict = {
("preference", "movies"): "lms-shape-preferences/pairs_Movies_and_TV",
("preference", "groceries"): "lms-shape-preferences/pairs_Grocery_and_Gourmet_Food",
("likelihood", "movies"): "lms-shape-preferences/amazon_Movies_and_TV",
("likelihood", "groceries"): "lms-shape-preferences/amazon_Grocery_and_Gourmet_Food",
}
# ---------------------------------------------------------------------------
# Display helpers
# ---------------------------------------------------------------------------
CATEGORY_DISPLAY: dict = {
"movies": "Movies & TV",
"groceries": "Grocery Products",
}
FAMILIARITY_USED_LABEL: dict = {
"movies": "Watched it before",
"groceries": "Used it before",
}
# ---------------------------------------------------------------------------
# Background questions, keyed by category
# ---------------------------------------------------------------------------
BACKGROUND_QUESTIONS: dict = {
"movies": [
{
"key": "movies_criteria",
"label": "When picking between movies to purchase, what matters to you?",
"placeholder": "e.g. I look for strong storytelling, good reviews, genre, director, cast…",
},
{
"key": "movies_enjoy",
"label": "What kinds of movies do you usually enjoy, and why?",
"placeholder": "e.g. I love sci-fi thrillers because they keep me on the edge of my seat…",
},
{
"key": "movies_avoid",
"label": "What kinds of movies do you usually avoid, and why?",
"placeholder": "e.g. I tend to skip horror movies because I don't enjoy being scared…",
},
],
"groceries": [
{
"key": "groceries_criteria",
"label": "When picking between foods or grocery items to purchase, what matters to you?",
"placeholder": "e.g. Price, ingredients, brand trust, nutritional value, taste…",
},
{
"key": "groceries_enjoy",
"label": "What kinds of foods or grocery items do you usually enjoy, and why?",
"placeholder": "e.g. I enjoy organic snacks because they feel healthier and taste fresh…",
},
{
"key": "groceries_avoid",
"label": "What kinds of foods or grocery items do you usually avoid, and why?",
"placeholder": "e.g. I avoid heavily processed foods because of the artificial ingredients…",
},
],
}
# ---------------------------------------------------------------------------
# Likert scale labels — must match lsp/src/prompts/survey_prompts.py exactly
# ---------------------------------------------------------------------------
LIKELIHOOD_LABELS: dict = {
1: "Definitely would not buy",
2: "Probably would not buy",
3: "Slightly unlikely to buy",
4: "Neutral",
5: "Slightly likely to buy",
6: "Probably would buy",
7: "Definitely would buy",
}
PREFERENCE_LABELS: dict = {
1: "Definitely would prefer Product A",
2: "Probably would prefer Product A",
3: "Slightly likely to prefer Product A",
4: "Neutral",
5: "Slightly likely to prefer Product B",
6: "Probably would prefer Product B",
7: "Definitely would prefer Product B",
}
MIN_WORDS_BACKGROUND = 20
MIN_WORDS_REFLECTION = 10 |