File size: 1,413 Bytes
461f64f |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 |
"""
Column name constants for the SQuAD v2.0 DataFrame & raw input field names.
Benefits:
- Single source of truth: schema changes are centralized
- Safety: typos are caught at definition time rather than scattered string literals
- IDE support: `Col.` autocompletes all valid names, streamlining typing and making schemas self-documenting
"""
from enum import Enum
from pathlib import Path
# constants.py lives at: <repo>/src/utils/constants.py;
# resolve() addresses symlink issues
REPO_ROOT: Path = Path(__file__).resolve().parent.parent.parent
DATA_DIR: Path = REPO_ROOT / "data"
# TODO - Placeholder needs to be made smaller for experiments!
TRAIN_DATA_PATH: Path = DATA_DIR / "train-v2.0.json"
DEV_DATA_PATH: Path = DATA_DIR / "dev-v2.0.json"
EXPERIMENTS_DIR: Path = REPO_ROOT / "experiments"
DEBUG_SEED = 42
class Col(Enum):
# Schema entries below are reused for raw keys with identical names
TITLE = "title"
QUESTION_ID = "id"
QUESTION = "question"
CONTEXT = "context"
ANSWER_TEXTS = "answers"
ANSWER_STARTS = "answer_starts"
IS_IMPOSSIBLE = "is_impossible"
NUM_ANSWERS = "num_answers"
class RawField(Enum):
VERSION = "version"
DATA = "data"
PARAGRAPHS = "paragraphs"
QAS = "qas"
# QA-level answers (list of dicts with 'text' and 'answer_start')
ANSWERS = "answers"
ANSWER_TEXT = "text"
ANSWER_START = "answer_start"
|