|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
DECODE_CTX_SIZE = 3 |
|
|
LABEL_PAD_TOKEN_ID = -100 |
|
|
|
|
|
|
|
|
TRAIN, DEV, TEST = 'train', 'dev', 'test' |
|
|
SPLIT_NAMES = [TRAIN, DEV, TEST] |
|
|
|
|
|
|
|
|
ENGLISH = 'en' |
|
|
RUSSIAN = 'ru' |
|
|
GERMAN = 'de' |
|
|
MULTILINGUAL = 'multilingual' |
|
|
SUPPORTED_LANGS = [ENGLISH, RUSSIAN, GERMAN, MULTILINGUAL] |
|
|
|
|
|
|
|
|
ITN_TASK = 0 |
|
|
TN_TASK = 1 |
|
|
ITN_PREFIX = str(ITN_TASK) |
|
|
TN_PREFIX = str(TN_TASK) |
|
|
|
|
|
|
|
|
B_PREFIX = 'B-' |
|
|
I_PREFIX = 'I-' |
|
|
TAGGER_LABELS_PREFIXES = [B_PREFIX, I_PREFIX] |
|
|
|
|
|
|
|
|
TN_MODE = 'tn' |
|
|
ITN_MODE = 'itn' |
|
|
JOINT_MODE = 'joint' |
|
|
MODES = [TN_MODE, ITN_MODE, JOINT_MODE] |
|
|
TASK_ID_TO_MODE = {ITN_TASK: ITN_MODE, TN_TASK: TN_MODE} |
|
|
MODE_TO_TASK_ID = {v: k for k, v in TASK_ID_TO_MODE.items()} |
|
|
|
|
|
|
|
|
INST_BACKWARD = 'BACKWARD' |
|
|
INST_FORWARD = 'FORWARD' |
|
|
INST_DIRECTIONS = [INST_BACKWARD, INST_FORWARD] |
|
|
DIRECTIONS_TO_ID = {INST_BACKWARD: ITN_TASK, INST_FORWARD: TN_TASK} |
|
|
DIRECTIONS_ID_TO_NAME = {ITN_TASK: INST_BACKWARD, TN_TASK: INST_FORWARD} |
|
|
DIRECTIONS_TO_MODE = {ITN_MODE: INST_BACKWARD, TN_MODE: INST_FORWARD} |
|
|
|
|
|
|
|
|
SAME_TAG = 'SAME' |
|
|
TASK_TAG = 'TASK' |
|
|
PUNCT_TAG = 'PUNCT' |
|
|
TRANSFORM_TAG = 'TRANSFORM' |
|
|
ALL_TAGS = [TASK_TAG, SAME_TAG, TRANSFORM_TAG] |
|
|
|
|
|
|
|
|
ALL_TAG_LABELS = [] |
|
|
for prefix in TAGGER_LABELS_PREFIXES: |
|
|
for tag in ALL_TAGS: |
|
|
ALL_TAG_LABELS.append(prefix + tag) |
|
|
|
|
|
ALL_TAG_LABELS.sort() |
|
|
LABEL_IDS = {l: idx for idx, l in enumerate(ALL_TAG_LABELS)} |
|
|
|
|
|
|
|
|
SIL_WORD = 'sil' |
|
|
SELF_WORD = '<self>' |
|
|
SPECIAL_WORDS = [SIL_WORD, SELF_WORD] |
|
|
|
|
|
|
|
|
EXTRA_ID_0 = '<extra_id_0>' |
|
|
EXTRA_ID_1 = '<extra_id_1>' |
|
|
|
|
|
|
|
|
EN_GREEK_TO_SPOKEN = { |
|
|
'Τ': 'tau', |
|
|
'Ο': 'omicron', |
|
|
'Δ': 'delta', |
|
|
'Η': 'eta', |
|
|
'Κ': 'kappa', |
|
|
'Ι': 'iota', |
|
|
'Θ': 'theta', |
|
|
'Α': 'alpha', |
|
|
'Σ': 'sigma', |
|
|
'Υ': 'upsilon', |
|
|
'Μ': 'mu', |
|
|
'Χ': 'chi', |
|
|
'Π': 'pi', |
|
|
'Ν': 'nu', |
|
|
'Λ': 'lambda', |
|
|
'Γ': 'gamma', |
|
|
'Β': 'beta', |
|
|
'Ρ': 'rho', |
|
|
'τ': 'tau', |
|
|
'υ': 'upsilon', |
|
|
'φ': 'phi', |
|
|
'α': 'alpha', |
|
|
'λ': 'lambda', |
|
|
'ι': 'iota', |
|
|
'ς': 'sigma', |
|
|
'ο': 'omicron', |
|
|
'σ': 'sigma', |
|
|
'η': 'eta', |
|
|
'π': 'pi', |
|
|
'ν': 'nu', |
|
|
'γ': 'gamma', |
|
|
'κ': 'kappa', |
|
|
'ε': 'epsilon', |
|
|
'β': 'beta', |
|
|
'ρ': 'rho', |
|
|
'ω': 'omega', |
|
|
'χ': 'chi', |
|
|
} |
|
|
|