squad2-qa / src /utils /constants.py
Kimis Perros
Initial deployment
461f64f
"""
Column name constants for the SQuAD v2.0 DataFrame & raw input field names.
Benefits:
- Single source of truth: schema changes are centralized
- Safety: typos are caught at definition time rather than scattered string literals
- IDE support: `Col.` autocompletes all valid names, streamlining typing and making schemas self-documenting
"""
from enum import Enum
from pathlib import Path
# constants.py lives at: <repo>/src/utils/constants.py;
# resolve() addresses symlink issues
REPO_ROOT: Path = Path(__file__).resolve().parent.parent.parent
DATA_DIR: Path = REPO_ROOT / "data"
# TODO - Placeholder needs to be made smaller for experiments!
TRAIN_DATA_PATH: Path = DATA_DIR / "train-v2.0.json"
DEV_DATA_PATH: Path = DATA_DIR / "dev-v2.0.json"
EXPERIMENTS_DIR: Path = REPO_ROOT / "experiments"
DEBUG_SEED = 42
class Col(Enum):
# Schema entries below are reused for raw keys with identical names
TITLE = "title"
QUESTION_ID = "id"
QUESTION = "question"
CONTEXT = "context"
ANSWER_TEXTS = "answers"
ANSWER_STARTS = "answer_starts"
IS_IMPOSSIBLE = "is_impossible"
NUM_ANSWERS = "num_answers"
class RawField(Enum):
VERSION = "version"
DATA = "data"
PARAGRAPHS = "paragraphs"
QAS = "qas"
# QA-level answers (list of dicts with 'text' and 'answer_start')
ANSWERS = "answers"
ANSWER_TEXT = "text"
ANSWER_START = "answer_start"