Spaces:

lspcloud
/

prolific-preferences-dynamic

Sleeping

File size: 5,125 Bytes

"""
Load study_config.yaml and merge with env-var secrets.
All static lookup tables live here so any module can import them.
"""
import os
from pathlib import Path

import yaml

BASE_DIR = Path(__file__).resolve().parent.parent


def load_config() -> dict:
    config_path = BASE_DIR / "study_config.yaml"
    with open(config_path) as f:
        cfg = yaml.safe_load(f)

    # Secrets come only from env vars, never from yaml
    cfg["hf_token"]           = os.getenv("HF_TOKEN", "")
    cfg["tinker_api_key"]     = os.getenv("TINKER_API_KEY", "")
    cfg["prolific_api_token"] = os.getenv("PROLIFIC_API_TOKEN", "")
    cfg["debug_mode"]         = os.getenv("DEBUG_MODE", "false").lower() == "true"

    # Log full chat messages + renderer prompt to stdout (local dev / one-off audits)
    cfg["print_model_input"] = (
        os.getenv("PRINT_MODEL_INPUT", "").lower() in ("1", "true", "yes")
        or bool(cfg.get("print_model_input", False))
    )

    # prolific_study_id is read from the yaml (non-secret, study-specific)
    cfg.setdefault("prolific_study_id", "")

    # Tinker SamplingParams.temperature (seller / all call_model paths)
    cfg.setdefault("sampling_temperature", 1.0)
    t_sample = cfg["sampling_temperature"]
    if t_sample is None:
        t_sample = 1.0
    cfg["sampling_temperature"] = float(t_sample)

    # Derived filesystem paths
    cfg["base_dir"]        = str(BASE_DIR)
    cfg["data_dir"]        = str(BASE_DIR / "data")
    cfg["annotations_dir"] = str(BASE_DIR / "annotations")

    return cfg


# ---------------------------------------------------------------------------
# HF dataset repos — (study_type, category) → repo_id
# ---------------------------------------------------------------------------
CATEGORY_TO_REPO: dict = {
    ("preference", "movies"):    "lms-shape-preferences/pairs_Movies_and_TV",
    ("preference", "groceries"): "lms-shape-preferences/pairs_Grocery_and_Gourmet_Food",
    ("likelihood", "movies"):    "lms-shape-preferences/amazon_Movies_and_TV",
    ("likelihood", "groceries"): "lms-shape-preferences/amazon_Grocery_and_Gourmet_Food",
}

# ---------------------------------------------------------------------------
# Display helpers
# ---------------------------------------------------------------------------
CATEGORY_DISPLAY: dict = {
    "movies":    "Movies & TV",
    "groceries": "Grocery Products",
}

FAMILIARITY_USED_LABEL: dict = {
    "movies":    "Watched it before",
    "groceries": "Used it before",
}

# ---------------------------------------------------------------------------
# Background questions, keyed by category
# ---------------------------------------------------------------------------
BACKGROUND_QUESTIONS: dict = {
    "movies": [
        {
            "key": "movies_criteria",
            "label": "When picking between movies to purchase, what matters to you?",
            "placeholder": "e.g. I look for strong storytelling, good reviews, genre, director, cast…",
        },
        {
            "key": "movies_enjoy",
            "label": "What kinds of movies do you usually enjoy, and why?",
            "placeholder": "e.g. I love sci-fi thrillers because they keep me on the edge of my seat…",
        },
        {
            "key": "movies_avoid",
            "label": "What kinds of movies do you usually avoid, and why?",
            "placeholder": "e.g. I tend to skip horror movies because I don't enjoy being scared…",
        },
    ],
    "groceries": [
        {
            "key": "groceries_criteria",
            "label": "When picking between foods or grocery items to purchase, what matters to you?",
            "placeholder": "e.g. Price, ingredients, brand trust, nutritional value, taste…",
        },
        {
            "key": "groceries_enjoy",
            "label": "What kinds of foods or grocery items do you usually enjoy, and why?",
            "placeholder": "e.g. I enjoy organic snacks because they feel healthier and taste fresh…",
        },
        {
            "key": "groceries_avoid",
            "label": "What kinds of foods or grocery items do you usually avoid, and why?",
            "placeholder": "e.g. I avoid heavily processed foods because of the artificial ingredients…",
        },
    ],
}

# ---------------------------------------------------------------------------
# Likert scale labels — must match lsp/src/prompts/survey_prompts.py exactly
# ---------------------------------------------------------------------------
LIKELIHOOD_LABELS: dict = {
    1: "Definitely would not buy",
    2: "Probably would not buy",
    3: "Slightly unlikely to buy",
    4: "Neutral",
    5: "Slightly likely to buy",
    6: "Probably would buy",
    7: "Definitely would buy",
}

PREFERENCE_LABELS: dict = {
    1: "Definitely would prefer Product A",
    2: "Probably would prefer Product A",
    3: "Slightly likely to prefer Product A",
    4: "Neutral",
    5: "Slightly likely to prefer Product B",
    6: "Probably would prefer Product B",
    7: "Definitely would prefer Product B",
}

MIN_WORDS_BACKGROUND = 20
MIN_WORDS_REFLECTION = 10