global_batch_size: 512; micro_batch_size: 1
Browse files
scripts/pretrain_core_model.yaml
CHANGED
|
@@ -58,10 +58,10 @@ train:
|
|
| 58 |
log_interval: 1
|
| 59 |
|
| 60 |
# Number of samples between optimizer steps across data-parallel ranks (type: int, default: 512)
|
| 61 |
-
global_batch_size:
|
| 62 |
|
| 63 |
# Number of samples per data-parallel rank (type: int, default: 4)
|
| 64 |
-
micro_batch_size:
|
| 65 |
|
| 66 |
# Number of iterations with learning rate warmup active (type: int, default: 2000)
|
| 67 |
lr_warmup_steps: 500
|
|
|
|
| 58 |
log_interval: 1
|
| 59 |
|
| 60 |
# Number of samples between optimizer steps across data-parallel ranks (type: int, default: 512)
|
| 61 |
+
global_batch_size: 512
|
| 62 |
|
| 63 |
# Number of samples per data-parallel rank (type: int, default: 4)
|
| 64 |
+
micro_batch_size: 1
|
| 65 |
|
| 66 |
# Number of iterations with learning rate warmup active (type: int, default: 2000)
|
| 67 |
lr_warmup_steps: 500
|