|
|
""" |
|
|
Column name constants for the SQuAD v2.0 DataFrame & raw input field names. |
|
|
|
|
|
Benefits: |
|
|
- Single source of truth: schema changes are centralized |
|
|
- Safety: typos are caught at definition time rather than scattered string literals |
|
|
- IDE support: `Col.` autocompletes all valid names, streamlining typing and making schemas self-documenting |
|
|
""" |
|
|
|
|
|
from enum import Enum |
|
|
from pathlib import Path |
|
|
|
|
|
|
|
|
|
|
|
REPO_ROOT: Path = Path(__file__).resolve().parent.parent.parent |
|
|
DATA_DIR: Path = REPO_ROOT / "data" |
|
|
|
|
|
TRAIN_DATA_PATH: Path = DATA_DIR / "train-v2.0.json" |
|
|
DEV_DATA_PATH: Path = DATA_DIR / "dev-v2.0.json" |
|
|
EXPERIMENTS_DIR: Path = REPO_ROOT / "experiments" |
|
|
|
|
|
DEBUG_SEED = 42 |
|
|
|
|
|
|
|
|
class Col(Enum): |
|
|
|
|
|
TITLE = "title" |
|
|
QUESTION_ID = "id" |
|
|
QUESTION = "question" |
|
|
CONTEXT = "context" |
|
|
ANSWER_TEXTS = "answers" |
|
|
ANSWER_STARTS = "answer_starts" |
|
|
IS_IMPOSSIBLE = "is_impossible" |
|
|
NUM_ANSWERS = "num_answers" |
|
|
|
|
|
|
|
|
class RawField(Enum): |
|
|
VERSION = "version" |
|
|
DATA = "data" |
|
|
PARAGRAPHS = "paragraphs" |
|
|
QAS = "qas" |
|
|
|
|
|
ANSWERS = "answers" |
|
|
ANSWER_TEXT = "text" |
|
|
ANSWER_START = "answer_start" |
|
|
|