""" Column name constants for the SQuAD v2.0 DataFrame & raw input field names. Benefits: - Single source of truth: schema changes are centralized - Safety: typos are caught at definition time rather than scattered string literals - IDE support: `Col.` autocompletes all valid names, streamlining typing and making schemas self-documenting """ from enum import Enum from pathlib import Path # constants.py lives at: /src/utils/constants.py; # resolve() addresses symlink issues REPO_ROOT: Path = Path(__file__).resolve().parent.parent.parent DATA_DIR: Path = REPO_ROOT / "data" # TODO - Placeholder needs to be made smaller for experiments! TRAIN_DATA_PATH: Path = DATA_DIR / "train-v2.0.json" DEV_DATA_PATH: Path = DATA_DIR / "dev-v2.0.json" EXPERIMENTS_DIR: Path = REPO_ROOT / "experiments" DEBUG_SEED = 42 class Col(Enum): # Schema entries below are reused for raw keys with identical names TITLE = "title" QUESTION_ID = "id" QUESTION = "question" CONTEXT = "context" ANSWER_TEXTS = "answers" ANSWER_STARTS = "answer_starts" IS_IMPOSSIBLE = "is_impossible" NUM_ANSWERS = "num_answers" class RawField(Enum): VERSION = "version" DATA = "data" PARAGRAPHS = "paragraphs" QAS = "qas" # QA-level answers (list of dicts with 'text' and 'answer_start') ANSWERS = "answers" ANSWER_TEXT = "text" ANSWER_START = "answer_start"