File size: 1,413 Bytes
461f64f
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
"""
Column name constants for the SQuAD v2.0 DataFrame & raw input field names.

Benefits:
    - Single source of truth: schema changes are centralized
    - Safety: typos are caught at definition time rather than scattered string literals
    - IDE support: `Col.` autocompletes all valid names, streamlining typing and making schemas self-documenting
"""

from enum import Enum
from pathlib import Path

# constants.py lives at: <repo>/src/utils/constants.py;
# resolve() addresses symlink issues
REPO_ROOT: Path = Path(__file__).resolve().parent.parent.parent
DATA_DIR: Path = REPO_ROOT / "data"
# TODO - Placeholder needs to be made smaller for experiments!
TRAIN_DATA_PATH: Path = DATA_DIR / "train-v2.0.json"
DEV_DATA_PATH: Path = DATA_DIR / "dev-v2.0.json"
EXPERIMENTS_DIR: Path = REPO_ROOT / "experiments"

DEBUG_SEED = 42


class Col(Enum):
    # Schema entries below are reused for raw keys with identical names
    TITLE = "title"
    QUESTION_ID = "id"
    QUESTION = "question"
    CONTEXT = "context"
    ANSWER_TEXTS = "answers"
    ANSWER_STARTS = "answer_starts"
    IS_IMPOSSIBLE = "is_impossible"
    NUM_ANSWERS = "num_answers"


class RawField(Enum):
    VERSION = "version"
    DATA = "data"
    PARAGRAPHS = "paragraphs"
    QAS = "qas"
    # QA-level answers (list of dicts with 'text' and 'answer_start')
    ANSWERS = "answers"
    ANSWER_TEXT = "text"
    ANSWER_START = "answer_start"