Trouter-Library commited on
Commit
9daefda
·
verified ·
1 Parent(s): 4ba76d2

Create ds_config_zero3.json

Browse files
Files changed (1) hide show
  1. ds_config_zero3.json +61 -0
ds_config_zero3.json ADDED
@@ -0,0 +1,61 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "bf16": {
3
+ "enabled": true
4
+ },
5
+ "zero_optimization": {
6
+ "stage": 3,
7
+ "offload_optimizer": {
8
+ "device": "cpu",
9
+ "pin_memory": true
10
+ },
11
+ "offload_param": {
12
+ "device": "cpu",
13
+ "pin_memory": true
14
+ },
15
+ "overlap_comm": true,
16
+ "contiguous_gradients": true,
17
+ "sub_group_size": 1e9,
18
+ "reduce_bucket_size": 5e8,
19
+ "stage3_prefetch_bucket_size": 5e8,
20
+ "stage3_param_persistence_threshold": 1e6,
21
+ "stage3_max_live_parameters": 1e9,
22
+ "stage3_max_reuse_distance": 1e9,
23
+ "stage3_gather_16bit_weights_on_model_save": true
24
+ },
25
+ "gradient_accumulation_steps": 32,
26
+ "gradient_clipping": 1.0,
27
+ "steps_per_print": 10,
28
+ "train_batch_size": "auto",
29
+ "train_micro_batch_size_per_gpu": "auto",
30
+ "wall_clock_breakdown": false,
31
+ "communication_data_type": "bf16",
32
+ "prescale_gradients": false,
33
+ "sparse_gradients": false,
34
+ "compression_training": {
35
+ "weight_quantization": {
36
+ "shared_parameters": {},
37
+ "different_groups": {}
38
+ },
39
+ "activation_quantization": {
40
+ "shared_parameters": {},
41
+ "different_groups": {}
42
+ },
43
+ "sparse_pruning": {
44
+ "shared_parameters": {},
45
+ "different_groups": {}
46
+ }
47
+ },
48
+ "flops_profiler": {
49
+ "enabled": false,
50
+ "profile_step": 1,
51
+ "module_depth": -1,
52
+ "top_modules": 1,
53
+ "detailed": true,
54
+ "output_file": null
55
+ },
56
+ "tensorboard": {
57
+ "enabled": true,
58
+ "output_path": "./logs/tensorboard",
59
+ "job_name": "helion_v2_training"
60
+ }
61
+ }