Upload folder using huggingface_hub
Browse files- configs/accelerate_config.yaml +16 -0
- configs/accelerate_config_1gpu.yaml +16 -0
- configs/accelerate_config_2gpu.yaml +16 -0
- configs/accelerate_config_3gpu.yaml +16 -0
- configs/accelerate_config_bf16.yaml +16 -0
- configs/accelerate_config_bf16_8gpu.yaml +16 -0
- configs/deepspeed_train_config.yaml +21 -0
- configs/deepspeed_train_config_bf16.yaml +21 -0
- configs/deepspeed_train_config_bf16_4gpu.yaml +21 -0
- configs/deepspeed_train_config_fp16.yaml +21 -0
- configs/deepspeed_train_config_zero3.yaml +21 -0
- configs/ds_z0_config.json +33 -0
- configs/ds_z2_config_bf16.json +33 -0
- configs/ds_z2_config_fp16.json +33 -0
- configs/ds_z3_config_bf16.json +30 -0
- configs/fsdp_train_config.yaml +27 -0
- configs/llama3_full_pt.yaml +42 -0
configs/accelerate_config.yaml
ADDED
|
@@ -0,0 +1,16 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
compute_environment: LOCAL_MACHINE
|
| 2 |
+
debug: false
|
| 3 |
+
distributed_type: MULTI_GPU
|
| 4 |
+
downcast_bf16: 'no'
|
| 5 |
+
gpu_ids: 0,1,2,3,4,5,6,7
|
| 6 |
+
machine_rank: 0
|
| 7 |
+
main_training_function: main
|
| 8 |
+
mixed_precision: 'fp16'
|
| 9 |
+
num_machines: 1
|
| 10 |
+
num_processes: 8
|
| 11 |
+
rdzv_backend: static
|
| 12 |
+
same_network: true
|
| 13 |
+
tpu_env: []
|
| 14 |
+
tpu_use_cluster: false
|
| 15 |
+
tpu_use_sudo: false
|
| 16 |
+
use_cpu: false
|
configs/accelerate_config_1gpu.yaml
ADDED
|
@@ -0,0 +1,16 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
compute_environment: LOCAL_MACHINE
|
| 2 |
+
debug: false
|
| 3 |
+
distributed_type: MULTI_GPU
|
| 4 |
+
downcast_bf16: 'no'
|
| 5 |
+
gpu_ids: 0
|
| 6 |
+
machine_rank: 0
|
| 7 |
+
main_training_function: main
|
| 8 |
+
mixed_precision: 'fp16'
|
| 9 |
+
num_machines: 1
|
| 10 |
+
num_processes: 1
|
| 11 |
+
rdzv_backend: static
|
| 12 |
+
same_network: true
|
| 13 |
+
tpu_env: []
|
| 14 |
+
tpu_use_cluster: false
|
| 15 |
+
tpu_use_sudo: false
|
| 16 |
+
use_cpu: false
|
configs/accelerate_config_2gpu.yaml
ADDED
|
@@ -0,0 +1,16 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
compute_environment: LOCAL_MACHINE
|
| 2 |
+
debug: false
|
| 3 |
+
distributed_type: MULTI_GPU
|
| 4 |
+
downcast_bf16: 'no'
|
| 5 |
+
gpu_ids: 4,5
|
| 6 |
+
machine_rank: 0
|
| 7 |
+
main_training_function: main
|
| 8 |
+
mixed_precision: 'fp16'
|
| 9 |
+
num_machines: 1
|
| 10 |
+
num_processes: 2
|
| 11 |
+
rdzv_backend: static
|
| 12 |
+
same_network: true
|
| 13 |
+
tpu_env: []
|
| 14 |
+
tpu_use_cluster: false
|
| 15 |
+
tpu_use_sudo: false
|
| 16 |
+
use_cpu: false
|
configs/accelerate_config_3gpu.yaml
ADDED
|
@@ -0,0 +1,16 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
compute_environment: LOCAL_MACHINE
|
| 2 |
+
debug: false
|
| 3 |
+
distributed_type: MULTI_GPU
|
| 4 |
+
downcast_bf16: 'no'
|
| 5 |
+
gpu_ids: 0,1,2
|
| 6 |
+
machine_rank: 0
|
| 7 |
+
main_training_function: main
|
| 8 |
+
mixed_precision: 'fp16'
|
| 9 |
+
num_machines: 1
|
| 10 |
+
num_processes: 3
|
| 11 |
+
rdzv_backend: static
|
| 12 |
+
same_network: true
|
| 13 |
+
tpu_env: []
|
| 14 |
+
tpu_use_cluster: false
|
| 15 |
+
tpu_use_sudo: false
|
| 16 |
+
use_cpu: false
|
configs/accelerate_config_bf16.yaml
ADDED
|
@@ -0,0 +1,16 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
compute_environment: LOCAL_MACHINE
|
| 2 |
+
debug: false
|
| 3 |
+
distributed_type: MULTI_GPU
|
| 4 |
+
downcast_bf16: 'no'
|
| 5 |
+
gpu_ids: 0,1,2,3,4,5
|
| 6 |
+
machine_rank: 0
|
| 7 |
+
main_training_function: main
|
| 8 |
+
mixed_precision: 'fp16'
|
| 9 |
+
num_machines: 1
|
| 10 |
+
num_processes: 6
|
| 11 |
+
rdzv_backend: static
|
| 12 |
+
same_network: true
|
| 13 |
+
tpu_env: []
|
| 14 |
+
tpu_use_cluster: false
|
| 15 |
+
tpu_use_sudo: false
|
| 16 |
+
use_cpu: false
|
configs/accelerate_config_bf16_8gpu.yaml
ADDED
|
@@ -0,0 +1,16 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
compute_environment: LOCAL_MACHINE
|
| 2 |
+
debug: false
|
| 3 |
+
distributed_type: MULTI_GPU
|
| 4 |
+
downcast_bf16: 'no'
|
| 5 |
+
gpu_ids: 0,1,2,3,4,5,6,7
|
| 6 |
+
machine_rank: 0
|
| 7 |
+
main_training_function: main
|
| 8 |
+
mixed_precision: 'bf16'
|
| 9 |
+
num_machines: 1
|
| 10 |
+
num_processes: 8
|
| 11 |
+
rdzv_backend: static
|
| 12 |
+
same_network: true
|
| 13 |
+
tpu_env: []
|
| 14 |
+
tpu_use_cluster: false
|
| 15 |
+
tpu_use_sudo: false
|
| 16 |
+
use_cpu: false
|
configs/deepspeed_train_config.yaml
ADDED
|
@@ -0,0 +1,21 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
compute_environment: LOCAL_MACHINE
|
| 2 |
+
deepspeed_config:
|
| 3 |
+
gradient_clipping: 1.0
|
| 4 |
+
offload_optimizer_device: none
|
| 5 |
+
offload_param_device: cpu
|
| 6 |
+
zero3_init_flag: false
|
| 7 |
+
zero_stage: 2
|
| 8 |
+
distributed_type: DEEPSPEED
|
| 9 |
+
downcast_bf16: 'no'
|
| 10 |
+
gpu_ids: 0,1,2,3,4,5
|
| 11 |
+
machine_rank: 0
|
| 12 |
+
main_training_function: main
|
| 13 |
+
mixed_precision: fp16
|
| 14 |
+
num_machines: 1
|
| 15 |
+
num_processes: 6
|
| 16 |
+
rdzv_backend: static
|
| 17 |
+
same_network: true
|
| 18 |
+
tpu_env: []
|
| 19 |
+
tpu_use_cluster: false
|
| 20 |
+
tpu_use_sudo: false
|
| 21 |
+
use_cpu: false
|
configs/deepspeed_train_config_bf16.yaml
ADDED
|
@@ -0,0 +1,21 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
compute_environment: LOCAL_MACHINE
|
| 2 |
+
deepspeed_config:
|
| 3 |
+
gradient_clipping: 1.0
|
| 4 |
+
offload_optimizer_device: none
|
| 5 |
+
offload_param_device: cpu
|
| 6 |
+
zero3_init_flag: false
|
| 7 |
+
zero_stage: 2
|
| 8 |
+
distributed_type: DEEPSPEED
|
| 9 |
+
downcast_bf16: 'no'
|
| 10 |
+
gpu_ids: 0,1,2,3,4,5
|
| 11 |
+
machine_rank: 0
|
| 12 |
+
main_training_function: main
|
| 13 |
+
mixed_precision: bf16
|
| 14 |
+
num_machines: 1
|
| 15 |
+
num_processes: 6
|
| 16 |
+
rdzv_backend: static
|
| 17 |
+
same_network: true
|
| 18 |
+
tpu_env: []
|
| 19 |
+
tpu_use_cluster: false
|
| 20 |
+
tpu_use_sudo: false
|
| 21 |
+
use_cpu: false
|
configs/deepspeed_train_config_bf16_4gpu.yaml
ADDED
|
@@ -0,0 +1,21 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
compute_environment: LOCAL_MACHINE
|
| 2 |
+
deepspeed_config:
|
| 3 |
+
gradient_clipping: 1.0
|
| 4 |
+
offload_optimizer_device: none
|
| 5 |
+
offload_param_device: cpu
|
| 6 |
+
zero3_init_flag: false
|
| 7 |
+
zero_stage: 2
|
| 8 |
+
distributed_type: DEEPSPEED
|
| 9 |
+
downcast_bf16: 'no'
|
| 10 |
+
gpu_ids: 0,1,2,3
|
| 11 |
+
machine_rank: 0
|
| 12 |
+
main_training_function: main
|
| 13 |
+
mixed_precision: bf16
|
| 14 |
+
num_machines: 1
|
| 15 |
+
num_processes: 4
|
| 16 |
+
rdzv_backend: static
|
| 17 |
+
same_network: true
|
| 18 |
+
tpu_env: []
|
| 19 |
+
tpu_use_cluster: false
|
| 20 |
+
tpu_use_sudo: false
|
| 21 |
+
use_cpu: false
|
configs/deepspeed_train_config_fp16.yaml
ADDED
|
@@ -0,0 +1,21 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
compute_environment: LOCAL_MACHINE
|
| 2 |
+
deepspeed_config:
|
| 3 |
+
gradient_clipping: 1.0
|
| 4 |
+
offload_optimizer_device: none
|
| 5 |
+
offload_param_device: cpu
|
| 6 |
+
zero3_init_flag: false
|
| 7 |
+
zero_stage: 2
|
| 8 |
+
distributed_type: DEEPSPEED
|
| 9 |
+
downcast_bf16: 'no'
|
| 10 |
+
gpu_ids: 0,1,2,3,4,5
|
| 11 |
+
machine_rank: 0
|
| 12 |
+
main_training_function: main
|
| 13 |
+
mixed_precision: fp16
|
| 14 |
+
num_machines: 1
|
| 15 |
+
num_processes: 6
|
| 16 |
+
rdzv_backend: static
|
| 17 |
+
same_network: true
|
| 18 |
+
tpu_env: []
|
| 19 |
+
tpu_use_cluster: false
|
| 20 |
+
tpu_use_sudo: false
|
| 21 |
+
use_cpu: false
|
configs/deepspeed_train_config_zero3.yaml
ADDED
|
@@ -0,0 +1,21 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
compute_environment: LOCAL_MACHINE
|
| 2 |
+
deepspeed_config:
|
| 3 |
+
gradient_clipping: 1.0
|
| 4 |
+
offload_optimizer_device: none
|
| 5 |
+
offload_param_device: cpu
|
| 6 |
+
zero3_init_flag: false
|
| 7 |
+
zero_stage: 3
|
| 8 |
+
distributed_type: DEEPSPEED
|
| 9 |
+
downcast_bf16: 'no'
|
| 10 |
+
gpu_ids: 0,1,2,3,4,5,6,7
|
| 11 |
+
machine_rank: 0
|
| 12 |
+
main_training_function: main
|
| 13 |
+
mixed_precision: bf16
|
| 14 |
+
num_machines: 1
|
| 15 |
+
num_processes: 8
|
| 16 |
+
rdzv_backend: static
|
| 17 |
+
same_network: true
|
| 18 |
+
tpu_env: []
|
| 19 |
+
tpu_use_cluster: false
|
| 20 |
+
tpu_use_sudo: false
|
| 21 |
+
use_cpu: false
|
configs/ds_z0_config.json
ADDED
|
@@ -0,0 +1,33 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"train_batch_size": "auto",
|
| 3 |
+
"train_micro_batch_size_per_gpu": "auto",
|
| 4 |
+
"gradient_accumulation_steps": "auto",
|
| 5 |
+
"gradient_clipping": "auto",
|
| 6 |
+
"zero_allow_untested_optimizer": true,
|
| 7 |
+
"fp16": {
|
| 8 |
+
"enabled": false,
|
| 9 |
+
"loss_scale": 0,
|
| 10 |
+
"loss_scale_window": 1000,
|
| 11 |
+
"initial_scale_power": 16,
|
| 12 |
+
"hysteresis": 2,
|
| 13 |
+
"min_loss_scale": 1
|
| 14 |
+
},
|
| 15 |
+
"bf16": {
|
| 16 |
+
"enabled": true,
|
| 17 |
+
"loss_scale": 0,
|
| 18 |
+
"loss_scale_window": 1000,
|
| 19 |
+
"initial_scale_power": 16,
|
| 20 |
+
"hysteresis": 2,
|
| 21 |
+
"min_loss_scale": 1
|
| 22 |
+
},
|
| 23 |
+
"zero_optimization": {
|
| 24 |
+
"stage": 0,
|
| 25 |
+
"allgather_partitions": true,
|
| 26 |
+
"allgather_bucket_size": 5e8,
|
| 27 |
+
"overlap_comm": true,
|
| 28 |
+
"reduce_scatter": true,
|
| 29 |
+
"reduce_bucket_size": 5e8,
|
| 30 |
+
"contiguous_gradients": true,
|
| 31 |
+
"round_robin_gradients": true
|
| 32 |
+
}
|
| 33 |
+
}
|
configs/ds_z2_config_bf16.json
ADDED
|
@@ -0,0 +1,33 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"train_batch_size": "auto",
|
| 3 |
+
"train_micro_batch_size_per_gpu": "auto",
|
| 4 |
+
"gradient_accumulation_steps": "auto",
|
| 5 |
+
"gradient_clipping": "auto",
|
| 6 |
+
"zero_allow_untested_optimizer": true,
|
| 7 |
+
"fp16": {
|
| 8 |
+
"enabled": false,
|
| 9 |
+
"loss_scale": 0,
|
| 10 |
+
"loss_scale_window": 1000,
|
| 11 |
+
"initial_scale_power": 16,
|
| 12 |
+
"hysteresis": 2,
|
| 13 |
+
"min_loss_scale": 1
|
| 14 |
+
},
|
| 15 |
+
"bf16": {
|
| 16 |
+
"enabled": true,
|
| 17 |
+
"loss_scale": 0,
|
| 18 |
+
"loss_scale_window": 1000,
|
| 19 |
+
"initial_scale_power": 16,
|
| 20 |
+
"hysteresis": 2,
|
| 21 |
+
"min_loss_scale": 1
|
| 22 |
+
},
|
| 23 |
+
"zero_optimization": {
|
| 24 |
+
"stage": 2,
|
| 25 |
+
"allgather_partitions": true,
|
| 26 |
+
"allgather_bucket_size": 5e8,
|
| 27 |
+
"overlap_comm": false,
|
| 28 |
+
"reduce_scatter": true,
|
| 29 |
+
"reduce_bucket_size": 5e8,
|
| 30 |
+
"contiguous_gradients": true,
|
| 31 |
+
"round_robin_gradients": true
|
| 32 |
+
}
|
| 33 |
+
}
|
configs/ds_z2_config_fp16.json
ADDED
|
@@ -0,0 +1,33 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"train_batch_size": "auto",
|
| 3 |
+
"train_micro_batch_size_per_gpu": "auto",
|
| 4 |
+
"gradient_accumulation_steps": "auto",
|
| 5 |
+
"gradient_clipping": "auto",
|
| 6 |
+
"zero_allow_untested_optimizer": true,
|
| 7 |
+
"fp16": {
|
| 8 |
+
"enabled": true,
|
| 9 |
+
"loss_scale": 0,
|
| 10 |
+
"loss_scale_window": 1000,
|
| 11 |
+
"initial_scale_power": 16,
|
| 12 |
+
"hysteresis": 2,
|
| 13 |
+
"min_loss_scale": 1
|
| 14 |
+
},
|
| 15 |
+
"bf16": {
|
| 16 |
+
"enabled": false,
|
| 17 |
+
"loss_scale": 0,
|
| 18 |
+
"loss_scale_window": 1000,
|
| 19 |
+
"initial_scale_power": 16,
|
| 20 |
+
"hysteresis": 2,
|
| 21 |
+
"min_loss_scale": 1
|
| 22 |
+
},
|
| 23 |
+
"zero_optimization": {
|
| 24 |
+
"stage": 2,
|
| 25 |
+
"allgather_partitions": true,
|
| 26 |
+
"allgather_bucket_size": 5e8,
|
| 27 |
+
"overlap_comm": true,
|
| 28 |
+
"reduce_scatter": true,
|
| 29 |
+
"reduce_bucket_size": 5e8,
|
| 30 |
+
"contiguous_gradients": true,
|
| 31 |
+
"round_robin_gradients": true
|
| 32 |
+
}
|
| 33 |
+
}
|
configs/ds_z3_config_bf16.json
ADDED
|
@@ -0,0 +1,30 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"train_batch_size": "auto",
|
| 3 |
+
"train_micro_batch_size_per_gpu": "auto",
|
| 4 |
+
"gradient_accumulation_steps": "auto",
|
| 5 |
+
"gradient_clipping": "auto",
|
| 6 |
+
"zero_allow_untested_optimizer": true,
|
| 7 |
+
"fp16": {
|
| 8 |
+
"enabled": "auto",
|
| 9 |
+
"loss_scale": 0,
|
| 10 |
+
"loss_scale_window": 1000,
|
| 11 |
+
"initial_scale_power": 16,
|
| 12 |
+
"hysteresis": 2,
|
| 13 |
+
"min_loss_scale": 1
|
| 14 |
+
},
|
| 15 |
+
"bf16": {
|
| 16 |
+
"enabled": "auto"
|
| 17 |
+
},
|
| 18 |
+
"zero_optimization": {
|
| 19 |
+
"stage": 3,
|
| 20 |
+
"overlap_comm": false,
|
| 21 |
+
"contiguous_gradients": true,
|
| 22 |
+
"sub_group_size": 1e9,
|
| 23 |
+
"reduce_bucket_size": "auto",
|
| 24 |
+
"stage3_prefetch_bucket_size": "auto",
|
| 25 |
+
"stage3_param_persistence_threshold": "auto",
|
| 26 |
+
"stage3_max_live_parameters": 1e9,
|
| 27 |
+
"stage3_max_reuse_distance": 1e9,
|
| 28 |
+
"stage3_gather_16bit_weights_on_model_save": true
|
| 29 |
+
}
|
| 30 |
+
}
|
configs/fsdp_train_config.yaml
ADDED
|
@@ -0,0 +1,27 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
compute_environment: LOCAL_MACHINE
|
| 2 |
+
debug: false
|
| 3 |
+
distributed_type: FSDP
|
| 4 |
+
downcast_bf16: 'no'
|
| 5 |
+
fsdp_config:
|
| 6 |
+
fsdp_auto_wrap_policy: TRANSFORMER_BASED_WRAP
|
| 7 |
+
fsdp_backward_prefetch_policy: BACKWARD_PRE
|
| 8 |
+
fsdp_forward_prefetch: false
|
| 9 |
+
fsdp_cpu_ram_efficient_loading: true
|
| 10 |
+
fsdp_offload_params: false
|
| 11 |
+
fsdp_sharding_strategy: SHARD_GRAD_OP
|
| 12 |
+
fsdp_state_dict_type: SHARDED_STATE_DICT
|
| 13 |
+
fsdp_sync_module_states: true
|
| 14 |
+
fsdp_transformer_layer_cls_to_wrap: BertLayer
|
| 15 |
+
fsdp_use_orig_params: true
|
| 16 |
+
gpu_ids: 0,1,2,3,4,5
|
| 17 |
+
machine_rank: 0
|
| 18 |
+
main_training_function: main
|
| 19 |
+
mixed_precision: fp16
|
| 20 |
+
num_machines: 1
|
| 21 |
+
num_processes: 6
|
| 22 |
+
rdzv_backend: static
|
| 23 |
+
same_network: true
|
| 24 |
+
tpu_env: []
|
| 25 |
+
tpu_use_cluster: false
|
| 26 |
+
tpu_use_sudo: false
|
| 27 |
+
use_cpu: false
|
configs/llama3_full_pt.yaml
ADDED
|
@@ -0,0 +1,42 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
### model
|
| 2 |
+
model_name_or_path: /mnt/luoyingfeng/model_card/Meta-Llama-3.2-1B
|
| 3 |
+
#trust_remote_code: true
|
| 4 |
+
template: llama3
|
| 5 |
+
### method
|
| 6 |
+
stage: pt
|
| 7 |
+
do_train: true
|
| 8 |
+
finetuning_type: full
|
| 9 |
+
|
| 10 |
+
### dataset
|
| 11 |
+
dataset_dir: /mnt/luoyingfeng/lora4mt/data/fine-tuning_data/cpt_data
|
| 12 |
+
dataset: cpt-kk-en
|
| 13 |
+
cutoff_len: 512
|
| 14 |
+
use_fast_tokenizer: true
|
| 15 |
+
dataloader_num_workers: 8
|
| 16 |
+
preprocessing_num_workers: 16
|
| 17 |
+
|
| 18 |
+
### output
|
| 19 |
+
output_dir: /mnt/luoyingfeng/lora4mt/exps/Meta-Llama-3.2-1B/fft_cpt
|
| 20 |
+
logging_steps: 0.01
|
| 21 |
+
save_steps: 0.05
|
| 22 |
+
plot_loss: true
|
| 23 |
+
overwrite_output_dir: true
|
| 24 |
+
|
| 25 |
+
### train
|
| 26 |
+
per_device_train_batch_size: 8
|
| 27 |
+
gradient_accumulation_steps: 16
|
| 28 |
+
learning_rate: 2.0e-5
|
| 29 |
+
num_train_epochs: 1.0
|
| 30 |
+
lr_scheduler_type: cosine
|
| 31 |
+
warmup_ratio: 0.1
|
| 32 |
+
bf16: true
|
| 33 |
+
ddp_timeout: 180000000
|
| 34 |
+
seed: 42
|
| 35 |
+
save_strategy: steps
|
| 36 |
+
logging_strategy: steps
|
| 37 |
+
|
| 38 |
+
### eval
|
| 39 |
+
# val_size: 0.1
|
| 40 |
+
# per_device_eval_batch_size: 1
|
| 41 |
+
# eval_strategy: steps
|
| 42 |
+
# eval_steps: 500
|