| ATASET = "task-focus + sample from remain datasets" | |
| DATASET_FORMAT = 'input-output' | |
| PER_DEVICE_TRAIN_BATCH_SIZE = 2 | |
| GRADIENT_ACCUMULATION_STEPS = 4 | |
| LEARNING_RATE = 0.0003 | |
| LR_SCHEDULER_TYPE = 'cosine' | |
| WARMUP_RATIO = 0.03 | |
| LORA_R = 192 | |
| LORA_ALPHA = 64 | |
| LORA_DROPOUT = 0.1 | |
| TRAIN_ON_SOURCE = False | |
| SOURCE_MAX_LENGTH = 1024 | |
| TARGET_MAX_LENGTH = 1024 | |
| LOGGING_STEPS = 20 | |
| SAVE_STEPS = 100 | |
| SAVE_TOTAL_LIMIT = 4 |