Commit
·
5a0459e
1
Parent(s):
b7f88d8
delete file
Browse files- training_args_tracker.txt +0 -25
training_args_tracker.txt
DELETED
|
@@ -1,25 +0,0 @@
|
|
| 1 |
-
from transformers import TrainingArguments
|
| 2 |
-
|
| 3 |
-
steps = 200
|
| 4 |
-
|
| 5 |
-
training_args = TrainingArguments(
|
| 6 |
-
output_dir=outmodelpath,
|
| 7 |
-
overwrite_output_dir=True, #Set to true to overwrite output directory
|
| 8 |
-
group_by_length=True, # group data with same length to save memory (only for dynamic padding)
|
| 9 |
-
per_device_train_batch_size=8, #16
|
| 10 |
-
#save_strategy="steps",# The checkpoint save strategy to adopt during training.
|
| 11 |
-
evaluation_strategy="steps", # alternative: None, epoch
|
| 12 |
-
num_train_epochs=5, #50
|
| 13 |
-
#max_gradient_norm=0.8, #Maximum gradient norm (for gradient clipping). Default: 1.0
|
| 14 |
-
fp16=True, # Available only on GPU, when set true save memory
|
| 15 |
-
gradient_checkpointing=True, # save memory at the expense of slower backward pass
|
| 16 |
-
save_steps=steps,
|
| 17 |
-
eval_steps=steps,
|
| 18 |
-
logging_steps=steps,
|
| 19 |
-
learning_rate=1e-4, # default: 5e-5
|
| 20 |
-
weight_decay=0.005, # prevent overfitting, generalize better
|
| 21 |
-
warmup_ratio=0.1, # Ratio of total training steps used for a linear warmup from 0 to learning_rate. default 0.0
|
| 22 |
-
max_grad_norm=0.8,
|
| 23 |
-
save_total_limit=2, # number of checkpoint to save
|
| 24 |
-
load_best_model_at_end=True #load best model to save the best model in trainer.save_model
|
| 25 |
-
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|