|
|
|
|
|
|
|
|
|
|
|
SPECIAL_TOKENS_DICT = { |
|
|
"eos_token": "<endofex>", |
|
|
"pad_token": "<pad>", |
|
|
"additional_special_tokens": ["<startofex>"] |
|
|
} |
|
|
PAD_TOKEN = "<pad>" |
|
|
EOS_TOKEN = "<endofex>" |
|
|
START_OF_EX_TOKEN = "<startofex>" |
|
|
|
|
|
DEFAULT_MODEL_NAME = "gpt2" |
|
|
DEFAULT_BLOCK_SIZE = 128 |
|
|
DEFAULT_EPOCHS = 3 |
|
|
DEFAULT_BATCH_SIZE = 8 |
|
|
DEFAULT_LR = 5e-5 |
|
|
DEFAULT_WEIGHT_DECAY = 0.01 |
|
|
DEFAULT_GRAD_ACCUM_STEPS = 1 |
|
|
DEFAULT_LOGGING_STEPS = 100 |
|
|
DEFAULT_SAVE_EVAL_STEPS = 500 |
|
|
DEFAULT_SAVE_TOTAL_LIMIT = 2 |
|
|
DEFAULT_SEED = 42 |
|
|
DEFAULT_EVAL_STRATEGY = "epoch" |
|
|
DEFAULT_SAVE_STRATEGY = "epoch" |
|
|
DEFAULT_DATA_COLUMN = "text" |
|
|
DEFAULT_LORA_R = 8 |
|
|
DEFAULT_LORA_ALPHA = 32 |
|
|
DEFAULT_LORA_DROPOUT = 0.05 |
|
|
DEFAULT_LORA_TARGET_MODULES = ["c_attn"] |
|
|
DEFAULT_LORA_BIAS = "none" |
|
|
DEFAULT_WARMUP_STEPS = 0 |
|
|
DEFAULT_LR_SCHEDULER_TYPE = "linear" |
|
|
DEFAULT_EARLY_STOPPING_PATIENCE = 2 |
|
|
DEFAULT_REPORT_TO = "wandb" |
|
|
DEFAULT_RUN_NAME = "train_gpt2_equations" |
|
|
|
|
|
|
|
|
DEFAULT_SOURCE_DATA_COLUMN = "i_prompt_n" |
|
|
DEFAULT_DATA_DIR = "700K" |
|
|
|
|
|
|
|
|
DEFAULT_WANDB_PROJECT = "seriguela" |
|
|
DEFAULT_WANDB_ENTITY = None |
|
|
|
|
|
|
|
|
DEFAULT_DATASET_REPO_ID = "augustocsc/sintetico_natural" |