banbooliang commited on
Commit
c62f8a5
·
1 Parent(s): 0b0227d
This view is limited to 50 files because it contains too many changes.   See raw diff
Files changed (50) hide show
  1. weight_dir/DpoWeight/DPOP_Fix_ND3V1/checkpoint-1300/README.md +202 -0
  2. weight_dir/DpoWeight/DPOP_Fix_ND3V1/checkpoint-1300/adapter_config.json +34 -0
  3. weight_dir/DpoWeight/DPOP_Fix_ND3V1/checkpoint-1300/adapter_model.safetensors +3 -0
  4. weight_dir/DpoWeight/DPOP_Fix_ND3V1/checkpoint-1300/added_tokens.json +3 -0
  5. weight_dir/DpoWeight/DPOP_Fix_ND3V1/checkpoint-1300/optimizer.pt +3 -0
  6. weight_dir/DpoWeight/DPOP_Fix_ND3V1/checkpoint-1300/rng_state_0.pth +3 -0
  7. weight_dir/DpoWeight/DPOP_Fix_ND3V1/checkpoint-1300/rng_state_1.pth +3 -0
  8. weight_dir/DpoWeight/DPOP_Fix_ND3V1/checkpoint-1300/rng_state_2.pth +3 -0
  9. weight_dir/DpoWeight/DPOP_Fix_ND3V1/checkpoint-1300/rng_state_3.pth +3 -0
  10. weight_dir/DpoWeight/DPOP_Fix_ND3V1/checkpoint-1300/scheduler.pt +3 -0
  11. weight_dir/DpoWeight/DPOP_Fix_ND3V1/checkpoint-1300/special_tokens_map.json +30 -0
  12. weight_dir/DpoWeight/DPOP_Fix_ND3V1/checkpoint-1300/tokenizer.model +3 -0
  13. weight_dir/DpoWeight/DPOP_Fix_ND3V1/checkpoint-1300/tokenizer_config.json +50 -0
  14. weight_dir/DpoWeight/DPOP_Fix_ND3V1/checkpoint-1300/trainer_state.json +2031 -0
  15. weight_dir/DpoWeight/DPOP_Fix_ND3V1/checkpoint-1300/training_args.bin +3 -0
  16. weight_dir/loraWeight/fixbycrflp/checkpoint-8000/README.md +202 -0
  17. weight_dir/loraWeight/fixbycrflp/checkpoint-8000/adapter_config.json +29 -0
  18. weight_dir/loraWeight/fixbycrflp/checkpoint-8000/adapter_model.bin +3 -0
  19. weight_dir/loraWeight/fixbycrflp/checkpoint-8000/added_tokens.json +3 -0
  20. weight_dir/loraWeight/fixbycrflp/checkpoint-8000/global_step8000/bf16_zero_pp_rank_0_mp_rank_00_optim_states.pt +3 -0
  21. weight_dir/loraWeight/fixbycrflp/checkpoint-8000/global_step8000/bf16_zero_pp_rank_1_mp_rank_00_optim_states.pt +3 -0
  22. weight_dir/loraWeight/fixbycrflp/checkpoint-8000/global_step8000/bf16_zero_pp_rank_2_mp_rank_00_optim_states.pt +3 -0
  23. weight_dir/loraWeight/fixbycrflp/checkpoint-8000/global_step8000/bf16_zero_pp_rank_3_mp_rank_00_optim_states.pt +3 -0
  24. weight_dir/loraWeight/fixbycrflp/checkpoint-8000/global_step8000/mp_rank_00_model_states.pt +3 -0
  25. weight_dir/loraWeight/fixbycrflp/checkpoint-8000/latest +1 -0
  26. weight_dir/loraWeight/fixbycrflp/checkpoint-8000/rng_state_0.pth +3 -0
  27. weight_dir/loraWeight/fixbycrflp/checkpoint-8000/rng_state_1.pth +3 -0
  28. weight_dir/loraWeight/fixbycrflp/checkpoint-8000/rng_state_2.pth +3 -0
  29. weight_dir/loraWeight/fixbycrflp/checkpoint-8000/rng_state_3.pth +3 -0
  30. weight_dir/loraWeight/fixbycrflp/checkpoint-8000/special_tokens_map.json +30 -0
  31. weight_dir/loraWeight/fixbycrflp/checkpoint-8000/tokenizer.model +3 -0
  32. weight_dir/loraWeight/fixbycrflp/checkpoint-8000/tokenizer_config.json +50 -0
  33. weight_dir/loraWeight/fixbycrflp/checkpoint-8000/trainer_state.json +1153 -0
  34. weight_dir/loraWeight/fixbycrflp/checkpoint-8000/training_args.bin +3 -0
  35. weight_dir/loraWeight/fixbycrflp/checkpoint-8000/zero_to_fp32.py +604 -0
  36. weight_dir/loraWeight/fixbycrflp2/checkpoint-12000/README.md +202 -0
  37. weight_dir/loraWeight/fixbycrflp2/checkpoint-12000/adapter_config.json +29 -0
  38. weight_dir/loraWeight/fixbycrflp2/checkpoint-12000/adapter_model.bin +3 -0
  39. weight_dir/loraWeight/fixbycrflp2/checkpoint-12000/added_tokens.json +3 -0
  40. weight_dir/loraWeight/fixbycrflp2/checkpoint-12000/global_step12000/bf16_zero_pp_rank_0_mp_rank_00_optim_states.pt +3 -0
  41. weight_dir/loraWeight/fixbycrflp2/checkpoint-12000/global_step12000/bf16_zero_pp_rank_1_mp_rank_00_optim_states.pt +3 -0
  42. weight_dir/loraWeight/fixbycrflp2/checkpoint-12000/global_step12000/bf16_zero_pp_rank_2_mp_rank_00_optim_states.pt +3 -0
  43. weight_dir/loraWeight/fixbycrflp2/checkpoint-12000/global_step12000/bf16_zero_pp_rank_3_mp_rank_00_optim_states.pt +3 -0
  44. weight_dir/loraWeight/fixbycrflp2/checkpoint-12000/global_step12000/mp_rank_00_model_states.pt +3 -0
  45. weight_dir/loraWeight/fixbycrflp2/checkpoint-12000/latest +1 -0
  46. weight_dir/loraWeight/fixbycrflp2/checkpoint-12000/rng_state_0.pth +3 -0
  47. weight_dir/loraWeight/fixbycrflp2/checkpoint-12000/rng_state_1.pth +3 -0
  48. weight_dir/loraWeight/fixbycrflp2/checkpoint-12000/rng_state_2.pth +3 -0
  49. weight_dir/loraWeight/fixbycrflp2/checkpoint-12000/rng_state_3.pth +3 -0
  50. weight_dir/loraWeight/fixbycrflp2/checkpoint-12000/special_tokens_map.json +30 -0
weight_dir/DpoWeight/DPOP_Fix_ND3V1/checkpoint-1300/README.md ADDED
@@ -0,0 +1,202 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ library_name: peft
3
+ base_model: ./output_dir/fix_codeLlama
4
+ ---
5
+
6
+ # Model Card for Model ID
7
+
8
+ <!-- Provide a quick summary of what the model is/does. -->
9
+
10
+
11
+
12
+ ## Model Details
13
+
14
+ ### Model Description
15
+
16
+ <!-- Provide a longer summary of what this model is. -->
17
+
18
+
19
+
20
+ - **Developed by:** [More Information Needed]
21
+ - **Funded by [optional]:** [More Information Needed]
22
+ - **Shared by [optional]:** [More Information Needed]
23
+ - **Model type:** [More Information Needed]
24
+ - **Language(s) (NLP):** [More Information Needed]
25
+ - **License:** [More Information Needed]
26
+ - **Finetuned from model [optional]:** [More Information Needed]
27
+
28
+ ### Model Sources [optional]
29
+
30
+ <!-- Provide the basic links for the model. -->
31
+
32
+ - **Repository:** [More Information Needed]
33
+ - **Paper [optional]:** [More Information Needed]
34
+ - **Demo [optional]:** [More Information Needed]
35
+
36
+ ## Uses
37
+
38
+ <!-- Address questions around how the model is intended to be used, including the foreseeable users of the model and those affected by the model. -->
39
+
40
+ ### Direct Use
41
+
42
+ <!-- This section is for the model use without fine-tuning or plugging into a larger ecosystem/app. -->
43
+
44
+ [More Information Needed]
45
+
46
+ ### Downstream Use [optional]
47
+
48
+ <!-- This section is for the model use when fine-tuned for a task, or when plugged into a larger ecosystem/app -->
49
+
50
+ [More Information Needed]
51
+
52
+ ### Out-of-Scope Use
53
+
54
+ <!-- This section addresses misuse, malicious use, and uses that the model will not work well for. -->
55
+
56
+ [More Information Needed]
57
+
58
+ ## Bias, Risks, and Limitations
59
+
60
+ <!-- This section is meant to convey both technical and sociotechnical limitations. -->
61
+
62
+ [More Information Needed]
63
+
64
+ ### Recommendations
65
+
66
+ <!-- This section is meant to convey recommendations with respect to the bias, risk, and technical limitations. -->
67
+
68
+ Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations.
69
+
70
+ ## How to Get Started with the Model
71
+
72
+ Use the code below to get started with the model.
73
+
74
+ [More Information Needed]
75
+
76
+ ## Training Details
77
+
78
+ ### Training Data
79
+
80
+ <!-- This should link to a Dataset Card, perhaps with a short stub of information on what the training data is all about as well as documentation related to data pre-processing or additional filtering. -->
81
+
82
+ [More Information Needed]
83
+
84
+ ### Training Procedure
85
+
86
+ <!-- This relates heavily to the Technical Specifications. Content here should link to that section when it is relevant to the training procedure. -->
87
+
88
+ #### Preprocessing [optional]
89
+
90
+ [More Information Needed]
91
+
92
+
93
+ #### Training Hyperparameters
94
+
95
+ - **Training regime:** [More Information Needed] <!--fp32, fp16 mixed precision, bf16 mixed precision, bf16 non-mixed precision, fp16 non-mixed precision, fp8 mixed precision -->
96
+
97
+ #### Speeds, Sizes, Times [optional]
98
+
99
+ <!-- This section provides information about throughput, start/end time, checkpoint size if relevant, etc. -->
100
+
101
+ [More Information Needed]
102
+
103
+ ## Evaluation
104
+
105
+ <!-- This section describes the evaluation protocols and provides the results. -->
106
+
107
+ ### Testing Data, Factors & Metrics
108
+
109
+ #### Testing Data
110
+
111
+ <!-- This should link to a Dataset Card if possible. -->
112
+
113
+ [More Information Needed]
114
+
115
+ #### Factors
116
+
117
+ <!-- These are the things the evaluation is disaggregating by, e.g., subpopulations or domains. -->
118
+
119
+ [More Information Needed]
120
+
121
+ #### Metrics
122
+
123
+ <!-- These are the evaluation metrics being used, ideally with a description of why. -->
124
+
125
+ [More Information Needed]
126
+
127
+ ### Results
128
+
129
+ [More Information Needed]
130
+
131
+ #### Summary
132
+
133
+
134
+
135
+ ## Model Examination [optional]
136
+
137
+ <!-- Relevant interpretability work for the model goes here -->
138
+
139
+ [More Information Needed]
140
+
141
+ ## Environmental Impact
142
+
143
+ <!-- Total emissions (in grams of CO2eq) and additional considerations, such as electricity usage, go here. Edit the suggested text below accordingly -->
144
+
145
+ Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700).
146
+
147
+ - **Hardware Type:** [More Information Needed]
148
+ - **Hours used:** [More Information Needed]
149
+ - **Cloud Provider:** [More Information Needed]
150
+ - **Compute Region:** [More Information Needed]
151
+ - **Carbon Emitted:** [More Information Needed]
152
+
153
+ ## Technical Specifications [optional]
154
+
155
+ ### Model Architecture and Objective
156
+
157
+ [More Information Needed]
158
+
159
+ ### Compute Infrastructure
160
+
161
+ [More Information Needed]
162
+
163
+ #### Hardware
164
+
165
+ [More Information Needed]
166
+
167
+ #### Software
168
+
169
+ [More Information Needed]
170
+
171
+ ## Citation [optional]
172
+
173
+ <!-- If there is a paper or blog post introducing the model, the APA and Bibtex information for that should go in this section. -->
174
+
175
+ **BibTeX:**
176
+
177
+ [More Information Needed]
178
+
179
+ **APA:**
180
+
181
+ [More Information Needed]
182
+
183
+ ## Glossary [optional]
184
+
185
+ <!-- If relevant, include terms and calculations in this section that can help readers understand the model or model card. -->
186
+
187
+ [More Information Needed]
188
+
189
+ ## More Information [optional]
190
+
191
+ [More Information Needed]
192
+
193
+ ## Model Card Authors [optional]
194
+
195
+ [More Information Needed]
196
+
197
+ ## Model Card Contact
198
+
199
+ [More Information Needed]
200
+ ### Framework versions
201
+
202
+ - PEFT 0.11.1
weight_dir/DpoWeight/DPOP_Fix_ND3V1/checkpoint-1300/adapter_config.json ADDED
@@ -0,0 +1,34 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "alpha_pattern": {},
3
+ "auto_mapping": null,
4
+ "base_model_name_or_path": "./output_dir/fix_codeLlama",
5
+ "bias": "none",
6
+ "fan_in_fan_out": false,
7
+ "inference_mode": true,
8
+ "init_lora_weights": true,
9
+ "layer_replication": null,
10
+ "layers_pattern": null,
11
+ "layers_to_transform": null,
12
+ "loftq_config": {},
13
+ "lora_alpha": 16,
14
+ "lora_dropout": 0.05,
15
+ "megatron_config": null,
16
+ "megatron_core": "megatron.core",
17
+ "modules_to_save": null,
18
+ "peft_type": "LORA",
19
+ "r": 8,
20
+ "rank_pattern": {},
21
+ "revision": null,
22
+ "target_modules": [
23
+ "fc_out",
24
+ "q_proj",
25
+ "k_proj",
26
+ "fc_in",
27
+ "wte",
28
+ "v_proj",
29
+ "out_proj"
30
+ ],
31
+ "task_type": "CAUSAL_LM",
32
+ "use_dora": false,
33
+ "use_rslora": false
34
+ }
weight_dir/DpoWeight/DPOP_Fix_ND3V1/checkpoint-1300/adapter_model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b3bc6527e61727868deb335a388d96da4e34312a12e2813a5de86145429eae49
3
+ size 25191360
weight_dir/DpoWeight/DPOP_Fix_ND3V1/checkpoint-1300/added_tokens.json ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ {
2
+ "<PAD>": 32016
3
+ }
weight_dir/DpoWeight/DPOP_Fix_ND3V1/checkpoint-1300/optimizer.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:10ccd2e39e796e699e4a18d72274aa953b12ab9ff05892840247e7f21689b079
3
+ size 50445242
weight_dir/DpoWeight/DPOP_Fix_ND3V1/checkpoint-1300/rng_state_0.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d70f4816d9856643f7986b709d87aa477584114329562b62b70a8ee6bedbc11b
3
+ size 14960
weight_dir/DpoWeight/DPOP_Fix_ND3V1/checkpoint-1300/rng_state_1.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:861efdf0dceef25cf08029e4518f52f53768678173fcd28906388233c3fb3b02
3
+ size 14960
weight_dir/DpoWeight/DPOP_Fix_ND3V1/checkpoint-1300/rng_state_2.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ebdb27a64a2feccfa8a360d15c28e460fbe78b54e9d7e847e02a63fc8fa3eb33
3
+ size 14960
weight_dir/DpoWeight/DPOP_Fix_ND3V1/checkpoint-1300/rng_state_3.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ca8dd3637e6e034724517d6f63740144aba4bf2af43d797bbf16033e48567a69
3
+ size 14960
weight_dir/DpoWeight/DPOP_Fix_ND3V1/checkpoint-1300/scheduler.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d0355f6d2acfbddd46776507259ab052a22b6ee6d2b38dce536d622af0a82d05
3
+ size 1064
weight_dir/DpoWeight/DPOP_Fix_ND3V1/checkpoint-1300/special_tokens_map.json ADDED
@@ -0,0 +1,30 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "bos_token": {
3
+ "content": "<s>",
4
+ "lstrip": false,
5
+ "normalized": true,
6
+ "rstrip": false,
7
+ "single_word": false
8
+ },
9
+ "eos_token": {
10
+ "content": "</s>",
11
+ "lstrip": false,
12
+ "normalized": true,
13
+ "rstrip": false,
14
+ "single_word": false
15
+ },
16
+ "pad_token": {
17
+ "content": "<PAD>",
18
+ "lstrip": false,
19
+ "normalized": false,
20
+ "rstrip": false,
21
+ "single_word": false
22
+ },
23
+ "unk_token": {
24
+ "content": "<unk>",
25
+ "lstrip": false,
26
+ "normalized": true,
27
+ "rstrip": false,
28
+ "single_word": false
29
+ }
30
+ }
weight_dir/DpoWeight/DPOP_Fix_ND3V1/checkpoint-1300/tokenizer.model ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:45ccb9c8b6b561889acea59191d66986d314e7cbd6a78abc6e49b139ca91c1e6
3
+ size 500058
weight_dir/DpoWeight/DPOP_Fix_ND3V1/checkpoint-1300/tokenizer_config.json ADDED
@@ -0,0 +1,50 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "add_bos_token": true,
3
+ "add_eos_token": false,
4
+ "add_prefix_space": true,
5
+ "added_tokens_decoder": {
6
+ "0": {
7
+ "content": "<unk>",
8
+ "lstrip": false,
9
+ "normalized": true,
10
+ "rstrip": false,
11
+ "single_word": false,
12
+ "special": true
13
+ },
14
+ "1": {
15
+ "content": "<s>",
16
+ "lstrip": false,
17
+ "normalized": true,
18
+ "rstrip": false,
19
+ "single_word": false,
20
+ "special": true
21
+ },
22
+ "2": {
23
+ "content": "</s>",
24
+ "lstrip": false,
25
+ "normalized": true,
26
+ "rstrip": false,
27
+ "single_word": false,
28
+ "special": true
29
+ },
30
+ "32016": {
31
+ "content": "<PAD>",
32
+ "lstrip": false,
33
+ "normalized": false,
34
+ "rstrip": false,
35
+ "single_word": false,
36
+ "special": true
37
+ }
38
+ },
39
+ "bos_token": "<s>",
40
+ "clean_up_tokenization_spaces": false,
41
+ "eos_token": "</s>",
42
+ "legacy": true,
43
+ "model_max_length": 1000000000000000019884624838656,
44
+ "pad_token": "<PAD>",
45
+ "sp_model_kwargs": {},
46
+ "spaces_between_special_tokens": false,
47
+ "tokenizer_class": "LlamaTokenizer",
48
+ "unk_token": "<unk>",
49
+ "use_default_system_prompt": false
50
+ }
weight_dir/DpoWeight/DPOP_Fix_ND3V1/checkpoint-1300/trainer_state.json ADDED
@@ -0,0 +1,2031 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_metric": null,
3
+ "best_model_checkpoint": null,
4
+ "epoch": 16.883116883116884,
5
+ "eval_steps": 400,
6
+ "global_step": 1300,
7
+ "is_hyper_param_search": false,
8
+ "is_local_process_zero": true,
9
+ "is_world_process_zero": true,
10
+ "log_history": [
11
+ {
12
+ "epoch": 0.12987012987012986,
13
+ "grad_norm": 106.52960205078125,
14
+ "learning_rate": 5.000000000000001e-07,
15
+ "logits/chosen": -2.8220086097717285,
16
+ "logits/rejected": -2.813664674758911,
17
+ "logps/chosen": -6.851963043212891,
18
+ "logps/rejected": -22.329076766967773,
19
+ "loss": 1.7802,
20
+ "rewards/accuracies": 0.42500001192092896,
21
+ "rewards/chosen": 0.0019315744284540415,
22
+ "rewards/margins": -0.00016089165001176298,
23
+ "rewards/rejected": 0.0020924662239849567,
24
+ "step": 10
25
+ },
26
+ {
27
+ "epoch": 0.2597402597402597,
28
+ "grad_norm": 249.00958251953125,
29
+ "learning_rate": 1.0000000000000002e-06,
30
+ "logits/chosen": -2.8003315925598145,
31
+ "logits/rejected": -2.791141986846924,
32
+ "logps/chosen": -7.967398166656494,
33
+ "logps/rejected": -21.916549682617188,
34
+ "loss": 3.02,
35
+ "rewards/accuracies": 0.44999998807907104,
36
+ "rewards/chosen": -0.0019492346327751875,
37
+ "rewards/margins": -0.0002492312341928482,
38
+ "rewards/rejected": -0.0017000030493363738,
39
+ "step": 20
40
+ },
41
+ {
42
+ "epoch": 0.38961038961038963,
43
+ "grad_norm": 231.99557495117188,
44
+ "learning_rate": 1.5e-06,
45
+ "logits/chosen": -2.801818609237671,
46
+ "logits/rejected": -2.795116424560547,
47
+ "logps/chosen": -8.219635009765625,
48
+ "logps/rejected": -22.34463882446289,
49
+ "loss": 2.6654,
50
+ "rewards/accuracies": 0.4000000059604645,
51
+ "rewards/chosen": 0.000893857330083847,
52
+ "rewards/margins": -0.003195409430190921,
53
+ "rewards/rejected": 0.004089267458766699,
54
+ "step": 30
55
+ },
56
+ {
57
+ "epoch": 0.5194805194805194,
58
+ "grad_norm": 94.2333755493164,
59
+ "learning_rate": 2.0000000000000003e-06,
60
+ "logits/chosen": -2.8080127239227295,
61
+ "logits/rejected": -2.804051637649536,
62
+ "logps/chosen": -9.42918586730957,
63
+ "logps/rejected": -22.583698272705078,
64
+ "loss": 2.3196,
65
+ "rewards/accuracies": 0.574999988079071,
66
+ "rewards/chosen": 0.0048586102202534676,
67
+ "rewards/margins": 0.002601353684440255,
68
+ "rewards/rejected": 0.0022572563029825687,
69
+ "step": 40
70
+ },
71
+ {
72
+ "epoch": 0.6493506493506493,
73
+ "grad_norm": 213.9645538330078,
74
+ "learning_rate": 2.5e-06,
75
+ "logits/chosen": -2.805826187133789,
76
+ "logits/rejected": -2.7987563610076904,
77
+ "logps/chosen": -7.2015862464904785,
78
+ "logps/rejected": -23.5020694732666,
79
+ "loss": 2.6731,
80
+ "rewards/accuracies": 0.5,
81
+ "rewards/chosen": 0.0019127244595438242,
82
+ "rewards/margins": 0.002104248385876417,
83
+ "rewards/rejected": -0.000191524246474728,
84
+ "step": 50
85
+ },
86
+ {
87
+ "epoch": 0.7792207792207793,
88
+ "grad_norm": 110.76453399658203,
89
+ "learning_rate": 3e-06,
90
+ "logits/chosen": -2.801133394241333,
91
+ "logits/rejected": -2.7940406799316406,
92
+ "logps/chosen": -9.063034057617188,
93
+ "logps/rejected": -20.352069854736328,
94
+ "loss": 2.5189,
95
+ "rewards/accuracies": 0.44999998807907104,
96
+ "rewards/chosen": -0.003256978467106819,
97
+ "rewards/margins": -0.0020958157256245613,
98
+ "rewards/rejected": -0.0011611627414822578,
99
+ "step": 60
100
+ },
101
+ {
102
+ "epoch": 0.9090909090909091,
103
+ "grad_norm": 174.06761169433594,
104
+ "learning_rate": 3.5e-06,
105
+ "logits/chosen": -2.815706968307495,
106
+ "logits/rejected": -2.8055267333984375,
107
+ "logps/chosen": -7.804632663726807,
108
+ "logps/rejected": -22.440189361572266,
109
+ "loss": 1.7947,
110
+ "rewards/accuracies": 0.3499999940395355,
111
+ "rewards/chosen": 0.002698649885132909,
112
+ "rewards/margins": -0.0054735904559493065,
113
+ "rewards/rejected": 0.008172241039574146,
114
+ "step": 70
115
+ },
116
+ {
117
+ "epoch": 1.0389610389610389,
118
+ "grad_norm": 87.83943939208984,
119
+ "learning_rate": 4.000000000000001e-06,
120
+ "logits/chosen": -2.8100368976593018,
121
+ "logits/rejected": -2.798546075820923,
122
+ "logps/chosen": -7.676709175109863,
123
+ "logps/rejected": -21.805500030517578,
124
+ "loss": 1.678,
125
+ "rewards/accuracies": 0.4000000059604645,
126
+ "rewards/chosen": 0.00427329121157527,
127
+ "rewards/margins": -0.006891848053783178,
128
+ "rewards/rejected": 0.011165140196681023,
129
+ "step": 80
130
+ },
131
+ {
132
+ "epoch": 1.1688311688311688,
133
+ "grad_norm": 116.58035278320312,
134
+ "learning_rate": 4.5e-06,
135
+ "logits/chosen": -2.8018991947174072,
136
+ "logits/rejected": -2.7894458770751953,
137
+ "logps/chosen": -8.77466869354248,
138
+ "logps/rejected": -22.66843032836914,
139
+ "loss": 1.872,
140
+ "rewards/accuracies": 0.30000001192092896,
141
+ "rewards/chosen": 0.0032441741786897182,
142
+ "rewards/margins": -0.009967333637177944,
143
+ "rewards/rejected": 0.01321150828152895,
144
+ "step": 90
145
+ },
146
+ {
147
+ "epoch": 1.2987012987012987,
148
+ "grad_norm": 94.74404907226562,
149
+ "learning_rate": 5e-06,
150
+ "logits/chosen": -2.8042826652526855,
151
+ "logits/rejected": -2.7987048625946045,
152
+ "logps/chosen": -7.8375115394592285,
153
+ "logps/rejected": -21.124469757080078,
154
+ "loss": 1.7389,
155
+ "rewards/accuracies": 0.25,
156
+ "rewards/chosen": 0.00530009251087904,
157
+ "rewards/margins": -0.010122200474143028,
158
+ "rewards/rejected": 0.015422293916344643,
159
+ "step": 100
160
+ },
161
+ {
162
+ "epoch": 1.4285714285714286,
163
+ "grad_norm": 36.4909782409668,
164
+ "learning_rate": 4.997252228714279e-06,
165
+ "logits/chosen": -2.8379225730895996,
166
+ "logits/rejected": -2.831869602203369,
167
+ "logps/chosen": -8.803790092468262,
168
+ "logps/rejected": -20.375246047973633,
169
+ "loss": 1.3703,
170
+ "rewards/accuracies": 0.42500001192092896,
171
+ "rewards/chosen": 0.01235434040427208,
172
+ "rewards/margins": -0.006987647153437138,
173
+ "rewards/rejected": 0.019341984763741493,
174
+ "step": 110
175
+ },
176
+ {
177
+ "epoch": 1.5584415584415585,
178
+ "grad_norm": 39.18604278564453,
179
+ "learning_rate": 4.989014955054746e-06,
180
+ "logits/chosen": -2.817542552947998,
181
+ "logits/rejected": -2.8139848709106445,
182
+ "logps/chosen": -7.656688690185547,
183
+ "logps/rejected": -20.828929901123047,
184
+ "loss": 1.1774,
185
+ "rewards/accuracies": 0.30000001192092896,
186
+ "rewards/chosen": 0.01700626313686371,
187
+ "rewards/margins": -0.011658851057291031,
188
+ "rewards/rejected": 0.02866511419415474,
189
+ "step": 120
190
+ },
191
+ {
192
+ "epoch": 1.6883116883116882,
193
+ "grad_norm": 65.63705444335938,
194
+ "learning_rate": 4.975306286336628e-06,
195
+ "logits/chosen": -2.8285748958587646,
196
+ "logits/rejected": -2.823660373687744,
197
+ "logps/chosen": -7.743475914001465,
198
+ "logps/rejected": -20.759389877319336,
199
+ "loss": 1.3562,
200
+ "rewards/accuracies": 0.2750000059604645,
201
+ "rewards/chosen": 0.020265722647309303,
202
+ "rewards/margins": -0.01016728300601244,
203
+ "rewards/rejected": 0.03043300285935402,
204
+ "step": 130
205
+ },
206
+ {
207
+ "epoch": 1.8181818181818183,
208
+ "grad_norm": 170.55665588378906,
209
+ "learning_rate": 4.95615635718894e-06,
210
+ "logits/chosen": -2.821213722229004,
211
+ "logits/rejected": -2.8140625953674316,
212
+ "logps/chosen": -8.116856575012207,
213
+ "logps/rejected": -19.581222534179688,
214
+ "loss": 1.7842,
215
+ "rewards/accuracies": 0.2750000059604645,
216
+ "rewards/chosen": 0.017684193328022957,
217
+ "rewards/margins": -0.010469591245055199,
218
+ "rewards/rejected": 0.028153782710433006,
219
+ "step": 140
220
+ },
221
+ {
222
+ "epoch": 1.948051948051948,
223
+ "grad_norm": 71.12332916259766,
224
+ "learning_rate": 4.931607263312033e-06,
225
+ "logits/chosen": -2.823256015777588,
226
+ "logits/rejected": -2.812701463699341,
227
+ "logps/chosen": -6.964502811431885,
228
+ "logps/rejected": -21.734210968017578,
229
+ "loss": 1.5393,
230
+ "rewards/accuracies": 0.22499999403953552,
231
+ "rewards/chosen": 0.010581249371170998,
232
+ "rewards/margins": -0.01839107647538185,
233
+ "rewards/rejected": 0.0289723239839077,
234
+ "step": 150
235
+ },
236
+ {
237
+ "epoch": 2.0779220779220777,
238
+ "grad_norm": 173.57005310058594,
239
+ "learning_rate": 4.901712968942101e-06,
240
+ "logits/chosen": -2.808175563812256,
241
+ "logits/rejected": -2.798382520675659,
242
+ "logps/chosen": -7.128758907318115,
243
+ "logps/rejected": -22.715810775756836,
244
+ "loss": 1.4333,
245
+ "rewards/accuracies": 0.30000001192092896,
246
+ "rewards/chosen": 0.012118353508412838,
247
+ "rewards/margins": -0.01597173698246479,
248
+ "rewards/rejected": 0.028090089559555054,
249
+ "step": 160
250
+ },
251
+ {
252
+ "epoch": 2.207792207792208,
253
+ "grad_norm": 119.67935180664062,
254
+ "learning_rate": 4.866539188226086e-06,
255
+ "logits/chosen": -2.788814067840576,
256
+ "logits/rejected": -2.781687021255493,
257
+ "logps/chosen": -9.231825828552246,
258
+ "logps/rejected": -23.992216110229492,
259
+ "loss": 1.2406,
260
+ "rewards/accuracies": 0.25,
261
+ "rewards/chosen": 0.01427288819104433,
262
+ "rewards/margins": -0.011877929791808128,
263
+ "rewards/rejected": 0.026150818914175034,
264
+ "step": 170
265
+ },
266
+ {
267
+ "epoch": 2.3376623376623376,
268
+ "grad_norm": 78.3140869140625,
269
+ "learning_rate": 4.826163240767717e-06,
270
+ "logits/chosen": -2.819803476333618,
271
+ "logits/rejected": -2.812873363494873,
272
+ "logps/chosen": -8.85770320892334,
273
+ "logps/rejected": -21.352455139160156,
274
+ "loss": 1.6327,
275
+ "rewards/accuracies": 0.25,
276
+ "rewards/chosen": 0.01637933775782585,
277
+ "rewards/margins": -0.017695654183626175,
278
+ "rewards/rejected": 0.034074995666742325,
279
+ "step": 180
280
+ },
281
+ {
282
+ "epoch": 2.4675324675324677,
283
+ "grad_norm": 34.724788665771484,
284
+ "learning_rate": 4.780673881662242e-06,
285
+ "logits/chosen": -2.802525043487549,
286
+ "logits/rejected": -2.7943992614746094,
287
+ "logps/chosen": -9.141613006591797,
288
+ "logps/rejected": -21.16362762451172,
289
+ "loss": 1.3604,
290
+ "rewards/accuracies": 0.30000001192092896,
291
+ "rewards/chosen": 0.019131088629364967,
292
+ "rewards/margins": -0.012987020425498486,
293
+ "rewards/rejected": 0.03211811184883118,
294
+ "step": 190
295
+ },
296
+ {
297
+ "epoch": 2.5974025974025974,
298
+ "grad_norm": 155.53683471679688,
299
+ "learning_rate": 4.730171106393466e-06,
300
+ "logits/chosen": -2.8240277767181396,
301
+ "logits/rejected": -2.8137762546539307,
302
+ "logps/chosen": -6.279611587524414,
303
+ "logps/rejected": -22.48813247680664,
304
+ "loss": 1.3818,
305
+ "rewards/accuracies": 0.3499999940395355,
306
+ "rewards/chosen": 0.018246710300445557,
307
+ "rewards/margins": -0.011857626028358936,
308
+ "rewards/rejected": 0.03010433353483677,
309
+ "step": 200
310
+ },
311
+ {
312
+ "epoch": 2.7272727272727275,
313
+ "grad_norm": 64.57522583007812,
314
+ "learning_rate": 4.674765931021976e-06,
315
+ "logits/chosen": -2.8272862434387207,
316
+ "logits/rejected": -2.8214235305786133,
317
+ "logps/chosen": -8.027566909790039,
318
+ "logps/rejected": -20.517822265625,
319
+ "loss": 1.2211,
320
+ "rewards/accuracies": 0.25,
321
+ "rewards/chosen": 0.01772388443350792,
322
+ "rewards/margins": -0.015222163870930672,
323
+ "rewards/rejected": 0.03294604271650314,
324
+ "step": 210
325
+ },
326
+ {
327
+ "epoch": 2.857142857142857,
328
+ "grad_norm": 138.96739196777344,
329
+ "learning_rate": 4.614580148147744e-06,
330
+ "logits/chosen": -2.8115546703338623,
331
+ "logits/rejected": -2.804287910461426,
332
+ "logps/chosen": -6.3580474853515625,
333
+ "logps/rejected": -20.281545639038086,
334
+ "loss": 1.3669,
335
+ "rewards/accuracies": 0.32499998807907104,
336
+ "rewards/chosen": 0.0185546912252903,
337
+ "rewards/margins": -0.011832709424197674,
338
+ "rewards/rejected": 0.03038739785552025,
339
+ "step": 220
340
+ },
341
+ {
342
+ "epoch": 2.987012987012987,
343
+ "grad_norm": 55.348880767822266,
344
+ "learning_rate": 4.5497460591835615e-06,
345
+ "logits/chosen": -2.8048064708709717,
346
+ "logits/rejected": -2.799748420715332,
347
+ "logps/chosen": -8.203588485717773,
348
+ "logps/rejected": -20.907344818115234,
349
+ "loss": 1.1073,
350
+ "rewards/accuracies": 0.44999998807907104,
351
+ "rewards/chosen": 0.02281924895942211,
352
+ "rewards/margins": -0.005632441025227308,
353
+ "rewards/rejected": 0.02845168672502041,
354
+ "step": 230
355
+ },
356
+ {
357
+ "epoch": 3.116883116883117,
358
+ "grad_norm": 114.07877349853516,
359
+ "learning_rate": 4.480406183527823e-06,
360
+ "logits/chosen": -2.820413589477539,
361
+ "logits/rejected": -2.810023307800293,
362
+ "logps/chosen": -7.28661584854126,
363
+ "logps/rejected": -22.23759651184082,
364
+ "loss": 1.2872,
365
+ "rewards/accuracies": 0.5,
366
+ "rewards/chosen": 0.02074371837079525,
367
+ "rewards/margins": -0.00726896058768034,
368
+ "rewards/rejected": 0.028012678027153015,
369
+ "step": 240
370
+ },
371
+ {
372
+ "epoch": 3.2467532467532467,
373
+ "grad_norm": 114.84168243408203,
374
+ "learning_rate": 4.406712945275955e-06,
375
+ "logits/chosen": -2.8098714351654053,
376
+ "logits/rejected": -2.8057053089141846,
377
+ "logps/chosen": -8.777189254760742,
378
+ "logps/rejected": -23.82990074157715,
379
+ "loss": 1.2292,
380
+ "rewards/accuracies": 0.32499998807907104,
381
+ "rewards/chosen": 0.020751068368554115,
382
+ "rewards/margins": -0.011817799881100655,
383
+ "rewards/rejected": 0.03256886824965477,
384
+ "step": 250
385
+ },
386
+ {
387
+ "epoch": 3.3766233766233764,
388
+ "grad_norm": 125.23298645019531,
389
+ "learning_rate": 4.328828338159173e-06,
390
+ "logits/chosen": -2.8179001808166504,
391
+ "logits/rejected": -2.809523820877075,
392
+ "logps/chosen": -7.79488468170166,
393
+ "logps/rejected": -21.827838897705078,
394
+ "loss": 1.0056,
395
+ "rewards/accuracies": 0.44999998807907104,
396
+ "rewards/chosen": 0.018612362444400787,
397
+ "rewards/margins": -0.005726366303861141,
398
+ "rewards/rejected": 0.024338727816939354,
399
+ "step": 260
400
+ },
401
+ {
402
+ "epoch": 3.5064935064935066,
403
+ "grad_norm": 118.24935150146484,
404
+ "learning_rate": 4.246923569447105e-06,
405
+ "logits/chosen": -2.827606201171875,
406
+ "logits/rejected": -2.8264949321746826,
407
+ "logps/chosen": -9.784859657287598,
408
+ "logps/rejected": -19.85146141052246,
409
+ "loss": 1.2282,
410
+ "rewards/accuracies": 0.22499999403953552,
411
+ "rewards/chosen": 0.01882827654480934,
412
+ "rewards/margins": -0.013741184957325459,
413
+ "rewards/rejected": 0.032569460570812225,
414
+ "step": 270
415
+ },
416
+ {
417
+ "epoch": 3.6363636363636362,
418
+ "grad_norm": 231.2564697265625,
419
+ "learning_rate": 4.161178683597055e-06,
420
+ "logits/chosen": -2.8154025077819824,
421
+ "logits/rejected": -2.808724880218506,
422
+ "logps/chosen": -10.802755355834961,
423
+ "logps/rejected": -20.200098037719727,
424
+ "loss": 1.2421,
425
+ "rewards/accuracies": 0.42500001192092896,
426
+ "rewards/chosen": 0.016843315213918686,
427
+ "rewards/margins": -0.009146241471171379,
428
+ "rewards/rejected": 0.025989552959799767,
429
+ "step": 280
430
+ },
431
+ {
432
+ "epoch": 3.7662337662337664,
433
+ "grad_norm": 178.36927795410156,
434
+ "learning_rate": 4.071782166477213e-06,
435
+ "logits/chosen": -2.827821969985962,
436
+ "logits/rejected": -2.817737102508545,
437
+ "logps/chosen": -7.973962306976318,
438
+ "logps/rejected": -20.839879989624023,
439
+ "loss": 0.9562,
440
+ "rewards/accuracies": 0.550000011920929,
441
+ "rewards/chosen": 0.02151758037507534,
442
+ "rewards/margins": 0.0009062625467777252,
443
+ "rewards/rejected": 0.020611315965652466,
444
+ "step": 290
445
+ },
446
+ {
447
+ "epoch": 3.896103896103896,
448
+ "grad_norm": 177.05746459960938,
449
+ "learning_rate": 3.978930531033807e-06,
450
+ "logits/chosen": -2.81160306930542,
451
+ "logits/rejected": -2.807624340057373,
452
+ "logps/chosen": -7.694178581237793,
453
+ "logps/rejected": -20.271114349365234,
454
+ "loss": 1.2155,
455
+ "rewards/accuracies": 0.3499999940395355,
456
+ "rewards/chosen": 0.02011878788471222,
457
+ "rewards/margins": -0.011032785288989544,
458
+ "rewards/rejected": 0.03115157224237919,
459
+ "step": 300
460
+ },
461
+ {
462
+ "epoch": 4.025974025974026,
463
+ "grad_norm": 85.1644287109375,
464
+ "learning_rate": 3.882827885312999e-06,
465
+ "logits/chosen": -2.826159954071045,
466
+ "logits/rejected": -2.8190231323242188,
467
+ "logps/chosen": -7.839417934417725,
468
+ "logps/rejected": -20.949127197265625,
469
+ "loss": 1.1714,
470
+ "rewards/accuracies": 0.5249999761581421,
471
+ "rewards/chosen": 0.018428083509206772,
472
+ "rewards/margins": -0.0033446471206843853,
473
+ "rewards/rejected": 0.021772734820842743,
474
+ "step": 310
475
+ },
476
+ {
477
+ "epoch": 4.1558441558441555,
478
+ "grad_norm": 96.2293472290039,
479
+ "learning_rate": 3.783685483787105e-06,
480
+ "logits/chosen": -2.7987990379333496,
481
+ "logits/rejected": -2.7919256687164307,
482
+ "logps/chosen": -8.851567268371582,
483
+ "logps/rejected": -21.075613021850586,
484
+ "loss": 1.0528,
485
+ "rewards/accuracies": 0.550000011920929,
486
+ "rewards/chosen": 0.027806680649518967,
487
+ "rewards/margins": 0.007172185927629471,
488
+ "rewards/rejected": 0.020634492859244347,
489
+ "step": 320
490
+ },
491
+ {
492
+ "epoch": 4.285714285714286,
493
+ "grad_norm": 64.6633071899414,
494
+ "learning_rate": 3.6817212629714135e-06,
495
+ "logits/chosen": -2.8131086826324463,
496
+ "logits/rejected": -2.8076319694519043,
497
+ "logps/chosen": -8.776361465454102,
498
+ "logps/rejected": -21.563058853149414,
499
+ "loss": 0.9623,
500
+ "rewards/accuracies": 0.32499998807907104,
501
+ "rewards/chosen": 0.02237451635301113,
502
+ "rewards/margins": -0.005439778324216604,
503
+ "rewards/rejected": 0.027814293280243874,
504
+ "step": 330
505
+ },
506
+ {
507
+ "epoch": 4.415584415584416,
508
+ "grad_norm": 61.17387771606445,
509
+ "learning_rate": 3.5771593623524263e-06,
510
+ "logits/chosen": -2.8211700916290283,
511
+ "logits/rejected": -2.8142738342285156,
512
+ "logps/chosen": -8.312505722045898,
513
+ "logps/rejected": -22.2635440826416,
514
+ "loss": 0.8663,
515
+ "rewards/accuracies": 0.5249999761581421,
516
+ "rewards/chosen": 0.022781318053603172,
517
+ "rewards/margins": -0.0001775051496224478,
518
+ "rewards/rejected": 0.022958822548389435,
519
+ "step": 340
520
+ },
521
+ {
522
+ "epoch": 4.545454545454545,
523
+ "grad_norm": 244.74371337890625,
524
+ "learning_rate": 3.4702296316806243e-06,
525
+ "logits/chosen": -2.8051793575286865,
526
+ "logits/rejected": -2.8000447750091553,
527
+ "logps/chosen": -8.497454643249512,
528
+ "logps/rejected": -21.671079635620117,
529
+ "loss": 0.9413,
530
+ "rewards/accuracies": 0.5249999761581421,
531
+ "rewards/chosen": 0.018070612102746964,
532
+ "rewards/margins": -0.0006186591344885528,
533
+ "rewards/rejected": 0.018689271062612534,
534
+ "step": 350
535
+ },
536
+ {
537
+ "epoch": 4.675324675324675,
538
+ "grad_norm": 180.42735290527344,
539
+ "learning_rate": 3.3611671257108323e-06,
540
+ "logits/chosen": -2.8005387783050537,
541
+ "logits/rejected": -2.7906429767608643,
542
+ "logps/chosen": -8.441190719604492,
543
+ "logps/rejected": -22.472454071044922,
544
+ "loss": 1.0874,
545
+ "rewards/accuracies": 0.625,
546
+ "rewards/chosen": 0.02211115136742592,
547
+ "rewards/margins": 0.009135196916759014,
548
+ "rewards/rejected": 0.012975958175957203,
549
+ "step": 360
550
+ },
551
+ {
552
+ "epoch": 4.805194805194805,
553
+ "grad_norm": 37.385963439941406,
554
+ "learning_rate": 3.2502115875008523e-06,
555
+ "logits/chosen": -2.822923183441162,
556
+ "logits/rejected": -2.8172943592071533,
557
+ "logps/chosen": -6.2603559494018555,
558
+ "logps/rejected": -19.10385513305664,
559
+ "loss": 1.1011,
560
+ "rewards/accuracies": 0.550000011920929,
561
+ "rewards/chosen": 0.018987987190485,
562
+ "rewards/margins": 0.00487549277022481,
563
+ "rewards/rejected": 0.014112496748566628,
564
+ "step": 370
565
+ },
566
+ {
567
+ "epoch": 4.935064935064935,
568
+ "grad_norm": 251.71954345703125,
569
+ "learning_rate": 3.1376069214041917e-06,
570
+ "logits/chosen": -2.826533079147339,
571
+ "logits/rejected": -2.8127052783966064,
572
+ "logps/chosen": -8.482178688049316,
573
+ "logps/rejected": -22.023988723754883,
574
+ "loss": 1.0408,
575
+ "rewards/accuracies": 0.675000011920929,
576
+ "rewards/chosen": 0.02052697166800499,
577
+ "rewards/margins": 0.00905714649707079,
578
+ "rewards/rejected": 0.011469824239611626,
579
+ "step": 380
580
+ },
581
+ {
582
+ "epoch": 5.064935064935065,
583
+ "grad_norm": 133.91131591796875,
584
+ "learning_rate": 3.023600656915362e-06,
585
+ "logits/chosen": -2.822097063064575,
586
+ "logits/rejected": -2.810638189315796,
587
+ "logps/chosen": -6.029572486877441,
588
+ "logps/rejected": -21.33598518371582,
589
+ "loss": 0.9734,
590
+ "rewards/accuracies": 0.625,
591
+ "rewards/chosen": 0.023712964728474617,
592
+ "rewards/margins": 0.003511254210025072,
593
+ "rewards/rejected": 0.020201710984110832,
594
+ "step": 390
595
+ },
596
+ {
597
+ "epoch": 5.194805194805195,
598
+ "grad_norm": 133.3660125732422,
599
+ "learning_rate": 2.9084434045463255e-06,
600
+ "logits/chosen": -2.80812668800354,
601
+ "logits/rejected": -2.7997758388519287,
602
+ "logps/chosen": -7.040734767913818,
603
+ "logps/rejected": -20.19479751586914,
604
+ "loss": 0.9652,
605
+ "rewards/accuracies": 0.699999988079071,
606
+ "rewards/chosen": 0.020205114036798477,
607
+ "rewards/margins": 0.010116524063050747,
608
+ "rewards/rejected": 0.01008858997374773,
609
+ "step": 400
610
+ },
611
+ {
612
+ "epoch": 5.194805194805195,
613
+ "eval_logits/chosen": -2.801586627960205,
614
+ "eval_logits/rejected": -2.8042006492614746,
615
+ "eval_logps/chosen": -13.702254295349121,
616
+ "eval_logps/rejected": -15.930770874023438,
617
+ "eval_loss": 0.7836798429489136,
618
+ "eval_rewards/accuracies": 0.75,
619
+ "eval_rewards/chosen": 0.04482783377170563,
620
+ "eval_rewards/margins": 0.010822022333741188,
621
+ "eval_rewards/rejected": 0.03400580957531929,
622
+ "eval_runtime": 1.1767,
623
+ "eval_samples_per_second": 11.898,
624
+ "eval_steps_per_second": 1.7,
625
+ "step": 400
626
+ },
627
+ {
628
+ "epoch": 5.324675324675325,
629
+ "grad_norm": 103.42053985595703,
630
+ "learning_rate": 2.792388304930207e-06,
631
+ "logits/chosen": -2.8027472496032715,
632
+ "logits/rejected": -2.7954494953155518,
633
+ "logps/chosen": -8.696739196777344,
634
+ "logps/rejected": -21.20206642150879,
635
+ "loss": 0.8734,
636
+ "rewards/accuracies": 0.75,
637
+ "rewards/chosen": 0.025552362203598022,
638
+ "rewards/margins": 0.018355753272771835,
639
+ "rewards/rejected": 0.007196612656116486,
640
+ "step": 410
641
+ },
642
+ {
643
+ "epoch": 5.454545454545454,
644
+ "grad_norm": 236.04249572753906,
645
+ "learning_rate": 2.6756904723632325e-06,
646
+ "logits/chosen": -2.820538282394409,
647
+ "logits/rejected": -2.8109259605407715,
648
+ "logps/chosen": -8.277128219604492,
649
+ "logps/rejected": -21.056652069091797,
650
+ "loss": 1.0417,
651
+ "rewards/accuracies": 0.675000011920929,
652
+ "rewards/chosen": 0.02177886664867401,
653
+ "rewards/margins": 0.017902854830026627,
654
+ "rewards/rejected": 0.0038760111201554537,
655
+ "step": 420
656
+ },
657
+ {
658
+ "epoch": 5.584415584415584,
659
+ "grad_norm": 94.26475524902344,
660
+ "learning_rate": 2.5586064340081516e-06,
661
+ "logits/chosen": -2.8236119747161865,
662
+ "logits/rejected": -2.8140316009521484,
663
+ "logps/chosen": -7.59194803237915,
664
+ "logps/rejected": -21.763874053955078,
665
+ "loss": 1.1263,
666
+ "rewards/accuracies": 0.7250000238418579,
667
+ "rewards/chosen": 0.017852844670414925,
668
+ "rewards/margins": 0.01606573723256588,
669
+ "rewards/rejected": 0.0017871044110506773,
670
+ "step": 430
671
+ },
672
+ {
673
+ "epoch": 5.714285714285714,
674
+ "grad_norm": 195.20712280273438,
675
+ "learning_rate": 2.441393565991849e-06,
676
+ "logits/chosen": -2.807347297668457,
677
+ "logits/rejected": -2.7958061695098877,
678
+ "logps/chosen": -8.326894760131836,
679
+ "logps/rejected": -21.803070068359375,
680
+ "loss": 0.9669,
681
+ "rewards/accuracies": 0.7250000238418579,
682
+ "rewards/chosen": 0.019576936960220337,
683
+ "rewards/margins": 0.02373579703271389,
684
+ "rewards/rejected": -0.0041588591411709785,
685
+ "step": 440
686
+ },
687
+ {
688
+ "epoch": 5.8441558441558445,
689
+ "grad_norm": 175.50625610351562,
690
+ "learning_rate": 2.3243095276367687e-06,
691
+ "logits/chosen": -2.8178904056549072,
692
+ "logits/rejected": -2.8124308586120605,
693
+ "logps/chosen": -8.626742362976074,
694
+ "logps/rejected": -21.10752296447754,
695
+ "loss": 1.4178,
696
+ "rewards/accuracies": 0.800000011920929,
697
+ "rewards/chosen": 0.01709837280213833,
698
+ "rewards/margins": 0.018973568454384804,
699
+ "rewards/rejected": -0.001875193091109395,
700
+ "step": 450
701
+ },
702
+ {
703
+ "epoch": 5.974025974025974,
704
+ "grad_norm": 171.52513122558594,
705
+ "learning_rate": 2.207611695069794e-06,
706
+ "logits/chosen": -2.8153228759765625,
707
+ "logits/rejected": -2.8087317943573,
708
+ "logps/chosen": -8.533574104309082,
709
+ "logps/rejected": -22.59617805480957,
710
+ "loss": 1.1735,
711
+ "rewards/accuracies": 0.7749999761581421,
712
+ "rewards/chosen": 0.019586745649576187,
713
+ "rewards/margins": 0.03454780578613281,
714
+ "rewards/rejected": -0.014961063861846924,
715
+ "step": 460
716
+ },
717
+ {
718
+ "epoch": 6.103896103896104,
719
+ "grad_norm": 1.1001964807510376,
720
+ "learning_rate": 2.0915565954536745e-06,
721
+ "logits/chosen": -2.8197810649871826,
722
+ "logits/rejected": -2.8137242794036865,
723
+ "logps/chosen": -6.954007148742676,
724
+ "logps/rejected": -20.310867309570312,
725
+ "loss": 0.9806,
726
+ "rewards/accuracies": 0.699999988079071,
727
+ "rewards/chosen": 0.02565738931298256,
728
+ "rewards/margins": 0.02424320951104164,
729
+ "rewards/rejected": 0.001414179103448987,
730
+ "step": 470
731
+ },
732
+ {
733
+ "epoch": 6.233766233766234,
734
+ "grad_norm": 129.70095825195312,
735
+ "learning_rate": 1.9763993430846394e-06,
736
+ "logits/chosen": -2.8019607067108154,
737
+ "logits/rejected": -2.798675537109375,
738
+ "logps/chosen": -8.585060119628906,
739
+ "logps/rejected": -22.695144653320312,
740
+ "loss": 1.1706,
741
+ "rewards/accuracies": 0.699999988079071,
742
+ "rewards/chosen": 0.017484549432992935,
743
+ "rewards/margins": 0.01860320381820202,
744
+ "rewards/rejected": -0.0011186569463461637,
745
+ "step": 480
746
+ },
747
+ {
748
+ "epoch": 6.363636363636363,
749
+ "grad_norm": 194.80984497070312,
750
+ "learning_rate": 1.8623930785958092e-06,
751
+ "logits/chosen": -2.8055920600891113,
752
+ "logits/rejected": -2.7976577281951904,
753
+ "logps/chosen": -8.328341484069824,
754
+ "logps/rejected": -21.830015182495117,
755
+ "loss": 0.9653,
756
+ "rewards/accuracies": 0.824999988079071,
757
+ "rewards/chosen": 0.023755336180329323,
758
+ "rewards/margins": 0.03609599173069,
759
+ "rewards/rejected": -0.012340660206973553,
760
+ "step": 490
761
+ },
762
+ {
763
+ "epoch": 6.4935064935064934,
764
+ "grad_norm": 1.1828835010528564,
765
+ "learning_rate": 1.7497884124991487e-06,
766
+ "logits/chosen": -2.8199102878570557,
767
+ "logits/rejected": -2.8055901527404785,
768
+ "logps/chosen": -6.756843566894531,
769
+ "logps/rejected": -20.936315536499023,
770
+ "loss": 0.749,
771
+ "rewards/accuracies": 0.8999999761581421,
772
+ "rewards/chosen": 0.026989247649908066,
773
+ "rewards/margins": 0.038383591920137405,
774
+ "rewards/rejected": -0.011394346132874489,
775
+ "step": 500
776
+ },
777
+ {
778
+ "epoch": 6.623376623376624,
779
+ "grad_norm": 38.752262115478516,
780
+ "learning_rate": 1.6388328742891679e-06,
781
+ "logits/chosen": -2.82857084274292,
782
+ "logits/rejected": -2.8192813396453857,
783
+ "logps/chosen": -5.166688919067383,
784
+ "logps/rejected": -21.360883712768555,
785
+ "loss": 0.9419,
786
+ "rewards/accuracies": 0.9750000238418579,
787
+ "rewards/chosen": 0.02238740213215351,
788
+ "rewards/margins": 0.04248126968741417,
789
+ "rewards/rejected": -0.020093869417905807,
790
+ "step": 510
791
+ },
792
+ {
793
+ "epoch": 6.753246753246753,
794
+ "grad_norm": 160.1698455810547,
795
+ "learning_rate": 1.5297703683193755e-06,
796
+ "logits/chosen": -2.813995599746704,
797
+ "logits/rejected": -2.8053503036499023,
798
+ "logps/chosen": -8.157999038696289,
799
+ "logps/rejected": -21.715497970581055,
800
+ "loss": 0.9404,
801
+ "rewards/accuracies": 0.824999988079071,
802
+ "rewards/chosen": 0.025560980662703514,
803
+ "rewards/margins": 0.04841204732656479,
804
+ "rewards/rejected": -0.022851066663861275,
805
+ "step": 520
806
+ },
807
+ {
808
+ "epoch": 6.883116883116883,
809
+ "grad_norm": 94.66495513916016,
810
+ "learning_rate": 1.4228406376475741e-06,
811
+ "logits/chosen": -2.8161110877990723,
812
+ "logits/rejected": -2.806417942047119,
813
+ "logps/chosen": -9.14362907409668,
814
+ "logps/rejected": -21.98373794555664,
815
+ "loss": 1.3017,
816
+ "rewards/accuracies": 0.949999988079071,
817
+ "rewards/chosen": 0.022670337930321693,
818
+ "rewards/margins": 0.04901245981454849,
819
+ "rewards/rejected": -0.02634212002158165,
820
+ "step": 530
821
+ },
822
+ {
823
+ "epoch": 7.012987012987013,
824
+ "grad_norm": 89.24470520019531,
825
+ "learning_rate": 1.3182787370285865e-06,
826
+ "logits/chosen": -2.8100571632385254,
827
+ "logits/rejected": -2.802196502685547,
828
+ "logps/chosen": -8.180788040161133,
829
+ "logps/rejected": -22.61843490600586,
830
+ "loss": 0.9971,
831
+ "rewards/accuracies": 0.8999999761581421,
832
+ "rewards/chosen": 0.02741624414920807,
833
+ "rewards/margins": 0.052984196692705154,
834
+ "rewards/rejected": -0.025567958131432533,
835
+ "step": 540
836
+ },
837
+ {
838
+ "epoch": 7.142857142857143,
839
+ "grad_norm": 46.27058410644531,
840
+ "learning_rate": 1.2163145162128948e-06,
841
+ "logits/chosen": -2.811455249786377,
842
+ "logits/rejected": -2.801848888397217,
843
+ "logps/chosen": -8.542337417602539,
844
+ "logps/rejected": -21.399410247802734,
845
+ "loss": 0.9509,
846
+ "rewards/accuracies": 0.824999988079071,
847
+ "rewards/chosen": 0.02186908759176731,
848
+ "rewards/margins": 0.0516178198158741,
849
+ "rewards/rejected": -0.029748734086751938,
850
+ "step": 550
851
+ },
852
+ {
853
+ "epoch": 7.2727272727272725,
854
+ "grad_norm": 69.24237823486328,
855
+ "learning_rate": 1.1171721146870015e-06,
856
+ "logits/chosen": -2.8135528564453125,
857
+ "logits/rejected": -2.803555965423584,
858
+ "logps/chosen": -8.623903274536133,
859
+ "logps/rejected": -22.410194396972656,
860
+ "loss": 0.9666,
861
+ "rewards/accuracies": 0.875,
862
+ "rewards/chosen": 0.03136039525270462,
863
+ "rewards/margins": 0.06211704760789871,
864
+ "rewards/rejected": -0.030756641179323196,
865
+ "step": 560
866
+ },
867
+ {
868
+ "epoch": 7.402597402597403,
869
+ "grad_norm": 1.2535158395767212,
870
+ "learning_rate": 1.021069468966194e-06,
871
+ "logits/chosen": -2.8163511753082275,
872
+ "logits/rejected": -2.808074474334717,
873
+ "logps/chosen": -7.339944362640381,
874
+ "logps/rejected": -22.135278701782227,
875
+ "loss": 0.8673,
876
+ "rewards/accuracies": 0.8500000238418579,
877
+ "rewards/chosen": 0.025004085153341293,
878
+ "rewards/margins": 0.048856452107429504,
879
+ "rewards/rejected": -0.023852365091443062,
880
+ "step": 570
881
+ },
882
+ {
883
+ "epoch": 7.532467532467533,
884
+ "grad_norm": 1.1791399717330933,
885
+ "learning_rate": 9.282178335227885e-07,
886
+ "logits/chosen": -2.7872376441955566,
887
+ "logits/rejected": -2.7806403636932373,
888
+ "logps/chosen": -8.17931079864502,
889
+ "logps/rejected": -23.90004539489746,
890
+ "loss": 1.4371,
891
+ "rewards/accuracies": 0.925000011920929,
892
+ "rewards/chosen": 0.025283193215727806,
893
+ "rewards/margins": 0.06855350732803345,
894
+ "rewards/rejected": -0.04327031224966049,
895
+ "step": 580
896
+ },
897
+ {
898
+ "epoch": 7.662337662337662,
899
+ "grad_norm": 1.237028956413269,
900
+ "learning_rate": 8.38821316402946e-07,
901
+ "logits/chosen": -2.828946352005005,
902
+ "logits/rejected": -2.82279634475708,
903
+ "logps/chosen": -7.834005832672119,
904
+ "logps/rejected": -23.059555053710938,
905
+ "loss": 0.872,
906
+ "rewards/accuracies": 0.8999999761581421,
907
+ "rewards/chosen": 0.023193147033452988,
908
+ "rewards/margins": 0.051843591034412384,
909
+ "rewards/rejected": -0.0286504365503788,
910
+ "step": 590
911
+ },
912
+ {
913
+ "epoch": 7.792207792207792,
914
+ "grad_norm": 1.1478286981582642,
915
+ "learning_rate": 7.530764305528959e-07,
916
+ "logits/chosen": -2.8227028846740723,
917
+ "logits/rejected": -2.815317153930664,
918
+ "logps/chosen": -7.287829399108887,
919
+ "logps/rejected": -21.66531753540039,
920
+ "loss": 0.9423,
921
+ "rewards/accuracies": 0.8500000238418579,
922
+ "rewards/chosen": 0.02044813148677349,
923
+ "rewards/margins": 0.050043750554323196,
924
+ "rewards/rejected": -0.029595619067549706,
925
+ "step": 600
926
+ },
927
+ {
928
+ "epoch": 7.922077922077922,
929
+ "grad_norm": 0.9912703037261963,
930
+ "learning_rate": 6.711716618408282e-07,
931
+ "logits/chosen": -2.838146686553955,
932
+ "logits/rejected": -2.8304896354675293,
933
+ "logps/chosen": -6.6907477378845215,
934
+ "logps/rejected": -19.75206756591797,
935
+ "loss": 1.0092,
936
+ "rewards/accuracies": 0.875,
937
+ "rewards/chosen": 0.03283644840121269,
938
+ "rewards/margins": 0.04647618532180786,
939
+ "rewards/rejected": -0.01363973505795002,
940
+ "step": 610
941
+ },
942
+ {
943
+ "epoch": 8.051948051948052,
944
+ "grad_norm": 205.0751953125,
945
+ "learning_rate": 5.932870547240455e-07,
946
+ "logits/chosen": -2.814697504043579,
947
+ "logits/rejected": -2.803783893585205,
948
+ "logps/chosen": -7.918333530426025,
949
+ "logps/rejected": -22.431245803833008,
950
+ "loss": 1.1134,
951
+ "rewards/accuracies": 0.8999999761581421,
952
+ "rewards/chosen": 0.026031214743852615,
953
+ "rewards/margins": 0.05806659907102585,
954
+ "rewards/rejected": -0.03203538805246353,
955
+ "step": 620
956
+ },
957
+ {
958
+ "epoch": 8.181818181818182,
959
+ "grad_norm": 181.82443237304688,
960
+ "learning_rate": 5.195938164721767e-07,
961
+ "logits/chosen": -2.825852394104004,
962
+ "logits/rejected": -2.8194408416748047,
963
+ "logps/chosen": -7.493607997894287,
964
+ "logps/rejected": -22.741878509521484,
965
+ "loss": 0.9249,
966
+ "rewards/accuracies": 0.949999988079071,
967
+ "rewards/chosen": 0.031044036149978638,
968
+ "rewards/margins": 0.054368507117033005,
969
+ "rewards/rejected": -0.023324472829699516,
970
+ "step": 630
971
+ },
972
+ {
973
+ "epoch": 8.311688311688311,
974
+ "grad_norm": 1.184661626815796,
975
+ "learning_rate": 4.502539408164386e-07,
976
+ "logits/chosen": -2.8212850093841553,
977
+ "logits/rejected": -2.8142733573913574,
978
+ "logps/chosen": -8.27627944946289,
979
+ "logps/rejected": -20.00022315979004,
980
+ "loss": 0.9228,
981
+ "rewards/accuracies": 0.824999988079071,
982
+ "rewards/chosen": 0.025791505351662636,
983
+ "rewards/margins": 0.04540405049920082,
984
+ "rewards/rejected": -0.019612547010183334,
985
+ "step": 640
986
+ },
987
+ {
988
+ "epoch": 8.441558441558442,
989
+ "grad_norm": 128.95013427734375,
990
+ "learning_rate": 3.8541985185225645e-07,
991
+ "logits/chosen": -2.824355363845825,
992
+ "logits/rejected": -2.8176026344299316,
993
+ "logps/chosen": -7.515383243560791,
994
+ "logps/rejected": -21.355083465576172,
995
+ "loss": 1.0536,
996
+ "rewards/accuracies": 0.9750000238418579,
997
+ "rewards/chosen": 0.02296631410717964,
998
+ "rewards/margins": 0.056810516864061356,
999
+ "rewards/rejected": -0.03384420648217201,
1000
+ "step": 650
1001
+ },
1002
+ {
1003
+ "epoch": 8.571428571428571,
1004
+ "grad_norm": 276.641357421875,
1005
+ "learning_rate": 3.252340689780245e-07,
1006
+ "logits/chosen": -2.82700777053833,
1007
+ "logits/rejected": -2.822455883026123,
1008
+ "logps/chosen": -7.617165565490723,
1009
+ "logps/rejected": -23.340801239013672,
1010
+ "loss": 1.2275,
1011
+ "rewards/accuracies": 0.925000011920929,
1012
+ "rewards/chosen": 0.02447410859167576,
1013
+ "rewards/margins": 0.07831484079360962,
1014
+ "rewards/rejected": -0.05384073406457901,
1015
+ "step": 660
1016
+ },
1017
+ {
1018
+ "epoch": 8.7012987012987,
1019
+ "grad_norm": 241.392333984375,
1020
+ "learning_rate": 2.698288936065338e-07,
1021
+ "logits/chosen": -2.7932610511779785,
1022
+ "logits/rejected": -2.7827224731445312,
1023
+ "logps/chosen": -7.8216118812561035,
1024
+ "logps/rejected": -20.492076873779297,
1025
+ "loss": 1.0025,
1026
+ "rewards/accuracies": 0.925000011920929,
1027
+ "rewards/chosen": 0.024464499205350876,
1028
+ "rewards/margins": 0.055416546761989594,
1029
+ "rewards/rejected": -0.030952051281929016,
1030
+ "step": 670
1031
+ },
1032
+ {
1033
+ "epoch": 8.831168831168831,
1034
+ "grad_norm": 1.2769261598587036,
1035
+ "learning_rate": 2.1932611833775846e-07,
1036
+ "logits/chosen": -2.8169243335723877,
1037
+ "logits/rejected": -2.8119499683380127,
1038
+ "logps/chosen": -8.214499473571777,
1039
+ "logps/rejected": -22.425582885742188,
1040
+ "loss": 1.2029,
1041
+ "rewards/accuracies": 0.925000011920929,
1042
+ "rewards/chosen": 0.02633916214108467,
1043
+ "rewards/margins": 0.05255778878927231,
1044
+ "rewards/rejected": -0.026218628510832787,
1045
+ "step": 680
1046
+ },
1047
+ {
1048
+ "epoch": 8.96103896103896,
1049
+ "grad_norm": 198.49639892578125,
1050
+ "learning_rate": 1.7383675923228372e-07,
1051
+ "logits/chosen": -2.7998576164245605,
1052
+ "logits/rejected": -2.7877182960510254,
1053
+ "logps/chosen": -10.261676788330078,
1054
+ "logps/rejected": -22.23526954650879,
1055
+ "loss": 1.2189,
1056
+ "rewards/accuracies": 0.875,
1057
+ "rewards/chosen": 0.023275194689631462,
1058
+ "rewards/margins": 0.05330119654536247,
1059
+ "rewards/rejected": -0.030026007443666458,
1060
+ "step": 690
1061
+ },
1062
+ {
1063
+ "epoch": 9.090909090909092,
1064
+ "grad_norm": 1.1314146518707275,
1065
+ "learning_rate": 1.3346081177391474e-07,
1066
+ "logits/chosen": -2.795860767364502,
1067
+ "logits/rejected": -2.784841299057007,
1068
+ "logps/chosen": -10.019143104553223,
1069
+ "logps/rejected": -22.597219467163086,
1070
+ "loss": 0.8856,
1071
+ "rewards/accuracies": 0.8500000238418579,
1072
+ "rewards/chosen": 0.026558348909020424,
1073
+ "rewards/margins": 0.05729994922876358,
1074
+ "rewards/rejected": -0.030741602182388306,
1075
+ "step": 700
1076
+ },
1077
+ {
1078
+ "epoch": 9.220779220779221,
1079
+ "grad_norm": 179.00376892089844,
1080
+ "learning_rate": 9.828703105789983e-08,
1081
+ "logits/chosen": -2.8192873001098633,
1082
+ "logits/rejected": -2.8140900135040283,
1083
+ "logps/chosen": -9.172324180603027,
1084
+ "logps/rejected": -22.661869049072266,
1085
+ "loss": 1.0784,
1086
+ "rewards/accuracies": 0.925000011920929,
1087
+ "rewards/chosen": 0.023906812071800232,
1088
+ "rewards/margins": 0.06065645068883896,
1089
+ "rewards/rejected": -0.036749642342329025,
1090
+ "step": 710
1091
+ },
1092
+ {
1093
+ "epoch": 9.35064935064935,
1094
+ "grad_norm": 69.8018798828125,
1095
+ "learning_rate": 6.839273668796747e-08,
1096
+ "logits/chosen": -2.8201510906219482,
1097
+ "logits/rejected": -2.8087105751037598,
1098
+ "logps/chosen": -8.500712394714355,
1099
+ "logps/rejected": -22.419025421142578,
1100
+ "loss": 0.9798,
1101
+ "rewards/accuracies": 0.8500000238418579,
1102
+ "rewards/chosen": 0.023749757558107376,
1103
+ "rewards/margins": 0.05873861163854599,
1104
+ "rewards/rejected": -0.034988854080438614,
1105
+ "step": 720
1106
+ },
1107
+ {
1108
+ "epoch": 9.480519480519481,
1109
+ "grad_norm": 70.18726348876953,
1110
+ "learning_rate": 4.384364281105974e-08,
1111
+ "logits/chosen": -2.83191180229187,
1112
+ "logits/rejected": -2.824836015701294,
1113
+ "logps/chosen": -9.021523475646973,
1114
+ "logps/rejected": -23.541057586669922,
1115
+ "loss": 0.8762,
1116
+ "rewards/accuracies": 0.925000011920929,
1117
+ "rewards/chosen": 0.024020517244935036,
1118
+ "rewards/margins": 0.06196972727775574,
1119
+ "rewards/rejected": -0.03794920817017555,
1120
+ "step": 730
1121
+ },
1122
+ {
1123
+ "epoch": 9.61038961038961,
1124
+ "grad_norm": 308.2706298828125,
1125
+ "learning_rate": 2.4693713663372643e-08,
1126
+ "logits/chosen": -2.799358367919922,
1127
+ "logits/rejected": -2.7913246154785156,
1128
+ "logps/chosen": -7.899996757507324,
1129
+ "logps/rejected": -21.6778564453125,
1130
+ "loss": 1.3393,
1131
+ "rewards/accuracies": 0.800000011920929,
1132
+ "rewards/chosen": 0.01837027817964554,
1133
+ "rewards/margins": 0.05194631963968277,
1134
+ "rewards/rejected": -0.03357603773474693,
1135
+ "step": 740
1136
+ },
1137
+ {
1138
+ "epoch": 9.74025974025974,
1139
+ "grad_norm": 42.1520881652832,
1140
+ "learning_rate": 1.0985044945254763e-08,
1141
+ "logits/chosen": -2.8439416885375977,
1142
+ "logits/rejected": -2.839962959289551,
1143
+ "logps/chosen": -9.0369291305542,
1144
+ "logps/rejected": -19.963855743408203,
1145
+ "loss": 0.917,
1146
+ "rewards/accuracies": 0.800000011920929,
1147
+ "rewards/chosen": 0.02196243405342102,
1148
+ "rewards/margins": 0.04523957893252373,
1149
+ "rewards/rejected": -0.023277146741747856,
1150
+ "step": 750
1151
+ },
1152
+ {
1153
+ "epoch": 9.87012987012987,
1154
+ "grad_norm": 237.67958068847656,
1155
+ "learning_rate": 3.97807508777894e-06,
1156
+ "logits/chosen": -2.8095672130584717,
1157
+ "logits/rejected": -2.8042306900024414,
1158
+ "logps/chosen": -8.958699226379395,
1159
+ "logps/rejected": -20.755828857421875,
1160
+ "loss": 1.4632,
1161
+ "rewards/accuracies": 0.75,
1162
+ "rewards/chosen": 0.021344048902392387,
1163
+ "rewards/margins": 0.05193667858839035,
1164
+ "rewards/rejected": -0.030592631548643112,
1165
+ "step": 760
1166
+ },
1167
+ {
1168
+ "epoch": 10.0,
1169
+ "grad_norm": 415.3313293457031,
1170
+ "learning_rate": 3.949264905820697e-06,
1171
+ "logits/chosen": -2.8104586601257324,
1172
+ "logits/rejected": -2.8043599128723145,
1173
+ "logps/chosen": -8.099912643432617,
1174
+ "logps/rejected": -22.543119430541992,
1175
+ "loss": 1.0218,
1176
+ "rewards/accuracies": 0.8500000238418579,
1177
+ "rewards/chosen": 0.029804136604070663,
1178
+ "rewards/margins": 0.06551304459571838,
1179
+ "rewards/rejected": -0.03570891544222832,
1180
+ "step": 770
1181
+ },
1182
+ {
1183
+ "epoch": 10.12987012987013,
1184
+ "grad_norm": 1.33208429813385,
1185
+ "learning_rate": 3.92016186682789e-06,
1186
+ "logits/chosen": -2.8259775638580322,
1187
+ "logits/rejected": -2.8215713500976562,
1188
+ "logps/chosen": -9.199613571166992,
1189
+ "logps/rejected": -21.45523452758789,
1190
+ "loss": 0.8068,
1191
+ "rewards/accuracies": 0.925000011920929,
1192
+ "rewards/chosen": 0.02821979485452175,
1193
+ "rewards/margins": 0.05966518074274063,
1194
+ "rewards/rejected": -0.03144538030028343,
1195
+ "step": 780
1196
+ },
1197
+ {
1198
+ "epoch": 10.25974025974026,
1199
+ "grad_norm": 226.0507354736328,
1200
+ "learning_rate": 3.8907718517334405e-06,
1201
+ "logits/chosen": -2.8059029579162598,
1202
+ "logits/rejected": -2.7993156909942627,
1203
+ "logps/chosen": -8.171636581420898,
1204
+ "logps/rejected": -22.19893455505371,
1205
+ "loss": 0.9794,
1206
+ "rewards/accuracies": 0.875,
1207
+ "rewards/chosen": 0.022764720022678375,
1208
+ "rewards/margins": 0.069257453083992,
1209
+ "rewards/rejected": -0.04649273306131363,
1210
+ "step": 790
1211
+ },
1212
+ {
1213
+ "epoch": 10.38961038961039,
1214
+ "grad_norm": 1.1935486793518066,
1215
+ "learning_rate": 3.861100799460336e-06,
1216
+ "logits/chosen": -2.816051959991455,
1217
+ "logits/rejected": -2.81080961227417,
1218
+ "logps/chosen": -7.233284950256348,
1219
+ "logps/rejected": -22.618621826171875,
1220
+ "loss": 1.1384,
1221
+ "rewards/accuracies": 0.925000011920929,
1222
+ "rewards/chosen": 0.030704358592629433,
1223
+ "rewards/margins": 0.08574527502059937,
1224
+ "rewards/rejected": -0.05504090338945389,
1225
+ "step": 800
1226
+ },
1227
+ {
1228
+ "epoch": 10.38961038961039,
1229
+ "eval_logits/chosen": -2.8031392097473145,
1230
+ "eval_logits/rejected": -2.8060073852539062,
1231
+ "eval_logps/chosen": -13.673626899719238,
1232
+ "eval_logps/rejected": -16.335693359375,
1233
+ "eval_loss": 0.6743522882461548,
1234
+ "eval_rewards/accuracies": 1.0,
1235
+ "eval_rewards/chosen": 0.047690678387880325,
1236
+ "eval_rewards/margins": 0.05417700111865997,
1237
+ "eval_rewards/rejected": -0.006486321333795786,
1238
+ "eval_runtime": 1.1696,
1239
+ "eval_samples_per_second": 11.97,
1240
+ "eval_steps_per_second": 1.71,
1241
+ "step": 800
1242
+ },
1243
+ {
1244
+ "epoch": 10.519480519480519,
1245
+ "grad_norm": 148.8427734375,
1246
+ "learning_rate": 3.831154705721542e-06,
1247
+ "logits/chosen": -2.7960445880889893,
1248
+ "logits/rejected": -2.791477680206299,
1249
+ "logps/chosen": -9.04710578918457,
1250
+ "logps/rejected": -22.726436614990234,
1251
+ "loss": 0.8774,
1252
+ "rewards/accuracies": 0.875,
1253
+ "rewards/chosen": 0.03348678722977638,
1254
+ "rewards/margins": 0.08231980353593826,
1255
+ "rewards/rejected": -0.04883301258087158,
1256
+ "step": 810
1257
+ },
1258
+ {
1259
+ "epoch": 10.64935064935065,
1260
+ "grad_norm": 75.88983154296875,
1261
+ "learning_rate": 3.800939621808419e-06,
1262
+ "logits/chosen": -2.8040685653686523,
1263
+ "logits/rejected": -2.7940433025360107,
1264
+ "logps/chosen": -7.935153007507324,
1265
+ "logps/rejected": -22.834848403930664,
1266
+ "loss": 1.2887,
1267
+ "rewards/accuracies": 0.875,
1268
+ "rewards/chosen": 0.02770247682929039,
1269
+ "rewards/margins": 0.09637979418039322,
1270
+ "rewards/rejected": -0.06867731362581253,
1271
+ "step": 820
1272
+ },
1273
+ {
1274
+ "epoch": 10.779220779220779,
1275
+ "grad_norm": 94.64012145996094,
1276
+ "learning_rate": 3.770461653367934e-06,
1277
+ "logits/chosen": -2.8252198696136475,
1278
+ "logits/rejected": -2.8190674781799316,
1279
+ "logps/chosen": -7.881258964538574,
1280
+ "logps/rejected": -22.468507766723633,
1281
+ "loss": 0.8938,
1282
+ "rewards/accuracies": 0.8500000238418579,
1283
+ "rewards/chosen": 0.03068472072482109,
1284
+ "rewards/margins": 0.0954260528087616,
1285
+ "rewards/rejected": -0.06474132835865021,
1286
+ "step": 830
1287
+ },
1288
+ {
1289
+ "epoch": 10.909090909090908,
1290
+ "grad_norm": 126.55635070800781,
1291
+ "learning_rate": 3.7397269591688666e-06,
1292
+ "logits/chosen": -2.818159580230713,
1293
+ "logits/rejected": -2.8095242977142334,
1294
+ "logps/chosen": -7.300267219543457,
1295
+ "logps/rejected": -21.571325302124023,
1296
+ "loss": 1.1946,
1297
+ "rewards/accuracies": 0.925000011920929,
1298
+ "rewards/chosen": 0.04063314571976662,
1299
+ "rewards/margins": 0.09662587940692902,
1300
+ "rewards/rejected": -0.055992741137742996,
1301
+ "step": 840
1302
+ },
1303
+ {
1304
+ "epoch": 11.03896103896104,
1305
+ "grad_norm": 89.22303771972656,
1306
+ "learning_rate": 3.7087417498572946e-06,
1307
+ "logits/chosen": -2.8335649967193604,
1308
+ "logits/rejected": -2.8254220485687256,
1309
+ "logps/chosen": -7.027431488037109,
1310
+ "logps/rejected": -24.15895652770996,
1311
+ "loss": 1.6269,
1312
+ "rewards/accuracies": 0.949999988079071,
1313
+ "rewards/chosen": 0.021446553990244865,
1314
+ "rewards/margins": 0.0891275405883789,
1315
+ "rewards/rejected": -0.06768098473548889,
1316
+ "step": 850
1317
+ },
1318
+ {
1319
+ "epoch": 11.168831168831169,
1320
+ "grad_norm": 113.87052154541016,
1321
+ "learning_rate": 3.677512286701587e-06,
1322
+ "logits/chosen": -2.822108030319214,
1323
+ "logits/rejected": -2.8177809715270996,
1324
+ "logps/chosen": -7.26416540145874,
1325
+ "logps/rejected": -22.180326461791992,
1326
+ "loss": 0.9047,
1327
+ "rewards/accuracies": 0.925000011920929,
1328
+ "rewards/chosen": 0.03205590695142746,
1329
+ "rewards/margins": 0.09417016804218292,
1330
+ "rewards/rejected": -0.06211426109075546,
1331
+ "step": 860
1332
+ },
1333
+ {
1334
+ "epoch": 11.2987012987013,
1335
+ "grad_norm": 1.4642364978790283,
1336
+ "learning_rate": 3.646044880327176e-06,
1337
+ "logits/chosen": -2.8277363777160645,
1338
+ "logits/rejected": -2.8215818405151367,
1339
+ "logps/chosen": -8.22972297668457,
1340
+ "logps/rejected": -20.709514617919922,
1341
+ "loss": 0.9662,
1342
+ "rewards/accuracies": 0.7749999761581421,
1343
+ "rewards/chosen": 0.027242153882980347,
1344
+ "rewards/margins": 0.07722898572683334,
1345
+ "rewards/rejected": -0.04998684674501419,
1346
+ "step": 870
1347
+ },
1348
+ {
1349
+ "epoch": 11.428571428571429,
1350
+ "grad_norm": 227.3729248046875,
1351
+ "learning_rate": 3.6143458894413463e-06,
1352
+ "logits/chosen": -2.815782308578491,
1353
+ "logits/rejected": -2.809896230697632,
1354
+ "logps/chosen": -8.15450382232666,
1355
+ "logps/rejected": -23.144954681396484,
1356
+ "loss": 1.009,
1357
+ "rewards/accuracies": 0.9750000238418579,
1358
+ "rewards/chosen": 0.04248720407485962,
1359
+ "rewards/margins": 0.11930873245000839,
1360
+ "rewards/rejected": -0.07682152092456818,
1361
+ "step": 880
1362
+ },
1363
+ {
1364
+ "epoch": 11.558441558441558,
1365
+ "grad_norm": 321.1402893066406,
1366
+ "learning_rate": 3.5824217195483178e-06,
1367
+ "logits/chosen": -2.8049838542938232,
1368
+ "logits/rejected": -2.7966904640197754,
1369
+ "logps/chosen": -7.33872127532959,
1370
+ "logps/rejected": -21.830829620361328,
1371
+ "loss": 1.1877,
1372
+ "rewards/accuracies": 0.8999999761581421,
1373
+ "rewards/chosen": 0.031283579766750336,
1374
+ "rewards/margins": 0.09999363124370575,
1375
+ "rewards/rejected": -0.06871005892753601,
1376
+ "step": 890
1377
+ },
1378
+ {
1379
+ "epoch": 11.688311688311689,
1380
+ "grad_norm": 148.70529174804688,
1381
+ "learning_rate": 3.550278821654866e-06,
1382
+ "logits/chosen": -2.804623603820801,
1383
+ "logits/rejected": -2.7956459522247314,
1384
+ "logps/chosen": -7.542906761169434,
1385
+ "logps/rejected": -21.376934051513672,
1386
+ "loss": 0.899,
1387
+ "rewards/accuracies": 0.8999999761581421,
1388
+ "rewards/chosen": 0.03491160273551941,
1389
+ "rewards/margins": 0.10313661396503448,
1390
+ "rewards/rejected": -0.06822501122951508,
1391
+ "step": 900
1392
+ },
1393
+ {
1394
+ "epoch": 11.818181818181818,
1395
+ "grad_norm": 131.51914978027344,
1396
+ "learning_rate": 3.517923690966747e-06,
1397
+ "logits/chosen": -2.81646990776062,
1398
+ "logits/rejected": -2.803173065185547,
1399
+ "logps/chosen": -7.835005760192871,
1400
+ "logps/rejected": -24.5490665435791,
1401
+ "loss": 1.2002,
1402
+ "rewards/accuracies": 0.9750000238418579,
1403
+ "rewards/chosen": 0.03581169620156288,
1404
+ "rewards/margins": 0.12795209884643555,
1405
+ "rewards/rejected": -0.09214041382074356,
1406
+ "step": 910
1407
+ },
1408
+ {
1409
+ "epoch": 11.948051948051948,
1410
+ "grad_norm": 1.8541675806045532,
1411
+ "learning_rate": 3.4853628655761946e-06,
1412
+ "logits/chosen": -2.8321688175201416,
1413
+ "logits/rejected": -2.8234333992004395,
1414
+ "logps/chosen": -8.250754356384277,
1415
+ "logps/rejected": -21.983013153076172,
1416
+ "loss": 1.2209,
1417
+ "rewards/accuracies": 0.8500000238418579,
1418
+ "rewards/chosen": 0.02724345028400421,
1419
+ "rewards/margins": 0.11046279966831207,
1420
+ "rewards/rejected": -0.08321934938430786,
1421
+ "step": 920
1422
+ },
1423
+ {
1424
+ "epoch": 12.077922077922079,
1425
+ "grad_norm": 1.8971673250198364,
1426
+ "learning_rate": 3.452602925140751e-06,
1427
+ "logits/chosen": -2.8104381561279297,
1428
+ "logits/rejected": -2.805176258087158,
1429
+ "logps/chosen": -10.041051864624023,
1430
+ "logps/rejected": -22.36850929260254,
1431
+ "loss": 0.8314,
1432
+ "rewards/accuracies": 0.925000011920929,
1433
+ "rewards/chosen": 0.04497869685292244,
1434
+ "rewards/margins": 0.1275285929441452,
1435
+ "rewards/rejected": -0.08254990726709366,
1436
+ "step": 930
1437
+ },
1438
+ {
1439
+ "epoch": 12.207792207792208,
1440
+ "grad_norm": 78.92677307128906,
1441
+ "learning_rate": 3.4196504895536948e-06,
1442
+ "logits/chosen": -2.821592330932617,
1443
+ "logits/rejected": -2.812694549560547,
1444
+ "logps/chosen": -8.034502029418945,
1445
+ "logps/rejected": -21.927505493164062,
1446
+ "loss": 1.4864,
1447
+ "rewards/accuracies": 0.9750000238418579,
1448
+ "rewards/chosen": 0.03274466469883919,
1449
+ "rewards/margins": 0.14650548994541168,
1450
+ "rewards/rejected": -0.11376082897186279,
1451
+ "step": 940
1452
+ },
1453
+ {
1454
+ "epoch": 12.337662337662337,
1455
+ "grad_norm": 151.75515747070312,
1456
+ "learning_rate": 3.386512217606339e-06,
1457
+ "logits/chosen": -2.815185070037842,
1458
+ "logits/rejected": -2.798635721206665,
1459
+ "logps/chosen": -7.672248840332031,
1460
+ "logps/rejected": -24.917593002319336,
1461
+ "loss": 0.7744,
1462
+ "rewards/accuracies": 0.925000011920929,
1463
+ "rewards/chosen": 0.03489464521408081,
1464
+ "rewards/margins": 0.16525618731975555,
1465
+ "rewards/rejected": -0.13036152720451355,
1466
+ "step": 950
1467
+ },
1468
+ {
1469
+ "epoch": 12.467532467532468,
1470
+ "grad_norm": 287.6182556152344,
1471
+ "learning_rate": 3.3531948056424766e-06,
1472
+ "logits/chosen": -2.8206896781921387,
1473
+ "logits/rejected": -2.808096170425415,
1474
+ "logps/chosen": -8.610795021057129,
1475
+ "logps/rejected": -20.904489517211914,
1476
+ "loss": 1.5907,
1477
+ "rewards/accuracies": 0.875,
1478
+ "rewards/chosen": 0.04022675007581711,
1479
+ "rewards/margins": 0.12955889105796814,
1480
+ "rewards/rejected": -0.08933213353157043,
1481
+ "step": 960
1482
+ },
1483
+ {
1484
+ "epoch": 12.597402597402597,
1485
+ "grad_norm": 102.08235168457031,
1486
+ "learning_rate": 3.319704986205223e-06,
1487
+ "logits/chosen": -2.8291194438934326,
1488
+ "logits/rejected": -2.821276903152466,
1489
+ "logps/chosen": -7.160222053527832,
1490
+ "logps/rejected": -22.413909912109375,
1491
+ "loss": 2.002,
1492
+ "rewards/accuracies": 0.9750000238418579,
1493
+ "rewards/chosen": 0.029990587383508682,
1494
+ "rewards/margins": 0.1707821935415268,
1495
+ "rewards/rejected": -0.1407916247844696,
1496
+ "step": 970
1497
+ },
1498
+ {
1499
+ "epoch": 12.727272727272727,
1500
+ "grad_norm": 293.0462646484375,
1501
+ "learning_rate": 3.28604952667656e-06,
1502
+ "logits/chosen": -2.811882734298706,
1503
+ "logits/rejected": -2.8050527572631836,
1504
+ "logps/chosen": -8.143568992614746,
1505
+ "logps/rejected": -20.997821807861328,
1506
+ "loss": 1.7693,
1507
+ "rewards/accuracies": 0.875,
1508
+ "rewards/chosen": 0.03135104477405548,
1509
+ "rewards/margins": 0.13260604441165924,
1510
+ "rewards/rejected": -0.10125498473644257,
1511
+ "step": 980
1512
+ },
1513
+ {
1514
+ "epoch": 12.857142857142858,
1515
+ "grad_norm": 270.28076171875,
1516
+ "learning_rate": 3.2522352279098256e-06,
1517
+ "logits/chosen": -2.8084704875946045,
1518
+ "logits/rejected": -2.8023064136505127,
1519
+ "logps/chosen": -7.580096244812012,
1520
+ "logps/rejected": -23.871841430664062,
1521
+ "loss": 1.0787,
1522
+ "rewards/accuracies": 0.925000011920929,
1523
+ "rewards/chosen": 0.03514163941144943,
1524
+ "rewards/margins": 0.19183096289634705,
1525
+ "rewards/rejected": -0.15668931603431702,
1526
+ "step": 990
1527
+ },
1528
+ {
1529
+ "epoch": 12.987012987012987,
1530
+ "grad_norm": 147.3173065185547,
1531
+ "learning_rate": 3.218268922855452e-06,
1532
+ "logits/chosen": -2.8337278366088867,
1533
+ "logits/rejected": -2.822435140609741,
1534
+ "logps/chosen": -7.920820713043213,
1535
+ "logps/rejected": -23.01383399963379,
1536
+ "loss": 1.3496,
1537
+ "rewards/accuracies": 0.949999988079071,
1538
+ "rewards/chosen": 0.044772375375032425,
1539
+ "rewards/margins": 0.16832272708415985,
1540
+ "rewards/rejected": -0.12355033308267593,
1541
+ "step": 1000
1542
+ },
1543
+ {
1544
+ "epoch": 13.116883116883116,
1545
+ "grad_norm": 1.7505887746810913,
1546
+ "learning_rate": 3.184157475180208e-06,
1547
+ "logits/chosen": -2.8243801593780518,
1548
+ "logits/rejected": -2.8188118934631348,
1549
+ "logps/chosen": -6.851785182952881,
1550
+ "logps/rejected": -22.512981414794922,
1551
+ "loss": 0.7081,
1552
+ "rewards/accuracies": 0.875,
1553
+ "rewards/chosen": 0.046257633715867996,
1554
+ "rewards/margins": 0.17940297722816467,
1555
+ "rewards/rejected": -0.13314534723758698,
1556
+ "step": 1010
1557
+ },
1558
+ {
1559
+ "epoch": 13.246753246753247,
1560
+ "grad_norm": 226.2203826904297,
1561
+ "learning_rate": 3.149907777880239e-06,
1562
+ "logits/chosen": -2.8116049766540527,
1563
+ "logits/rejected": -2.802031993865967,
1564
+ "logps/chosen": -9.796255111694336,
1565
+ "logps/rejected": -23.3902587890625,
1566
+ "loss": 1.5793,
1567
+ "rewards/accuracies": 0.949999988079071,
1568
+ "rewards/chosen": 0.034330084919929504,
1569
+ "rewards/margins": 0.17266739904880524,
1570
+ "rewards/rejected": -0.13833732903003693,
1571
+ "step": 1020
1572
+ },
1573
+ {
1574
+ "epoch": 13.376623376623376,
1575
+ "grad_norm": 103.82469940185547,
1576
+ "learning_rate": 3.1155267518881816e-06,
1577
+ "logits/chosen": -2.8263697624206543,
1578
+ "logits/rejected": -2.8141000270843506,
1579
+ "logps/chosen": -6.771908760070801,
1580
+ "logps/rejected": -24.233366012573242,
1581
+ "loss": 1.0598,
1582
+ "rewards/accuracies": 0.9750000238418579,
1583
+ "rewards/chosen": 0.04042463377118111,
1584
+ "rewards/margins": 0.23938068747520447,
1585
+ "rewards/rejected": -0.19895607233047485,
1586
+ "step": 1030
1587
+ },
1588
+ {
1589
+ "epoch": 13.506493506493506,
1590
+ "grad_norm": 283.45172119140625,
1591
+ "learning_rate": 3.0810213446746323e-06,
1592
+ "logits/chosen": -2.8335254192352295,
1593
+ "logits/rejected": -2.8218533992767334,
1594
+ "logps/chosen": -8.464487075805664,
1595
+ "logps/rejected": -21.802452087402344,
1596
+ "loss": 2.5313,
1597
+ "rewards/accuracies": 0.8999999761581421,
1598
+ "rewards/chosen": 0.0437016561627388,
1599
+ "rewards/margins": 0.1905747503042221,
1600
+ "rewards/rejected": -0.1468731015920639,
1601
+ "step": 1040
1602
+ },
1603
+ {
1604
+ "epoch": 13.636363636363637,
1605
+ "grad_norm": 309.3778076171875,
1606
+ "learning_rate": 3.046398528844248e-06,
1607
+ "logits/chosen": -2.8289742469787598,
1608
+ "logits/rejected": -2.8251280784606934,
1609
+ "logps/chosen": -7.5258283615112305,
1610
+ "logps/rejected": -20.029939651489258,
1611
+ "loss": 1.7409,
1612
+ "rewards/accuracies": 0.925000011920929,
1613
+ "rewards/chosen": 0.05584300309419632,
1614
+ "rewards/margins": 0.15009096264839172,
1615
+ "rewards/rejected": -0.0942479595541954,
1616
+ "step": 1050
1617
+ },
1618
+ {
1619
+ "epoch": 13.766233766233766,
1620
+ "grad_norm": 229.9144744873047,
1621
+ "learning_rate": 3.0116653007267753e-06,
1622
+ "logits/chosen": -2.8172080516815186,
1623
+ "logits/rejected": -2.810072183609009,
1624
+ "logps/chosen": -7.817892551422119,
1625
+ "logps/rejected": -21.542694091796875,
1626
+ "loss": 1.3013,
1627
+ "rewards/accuracies": 0.9750000238418579,
1628
+ "rewards/chosen": 0.05069408565759659,
1629
+ "rewards/margins": 0.1945628523826599,
1630
+ "rewards/rejected": -0.14386877417564392,
1631
+ "step": 1060
1632
+ },
1633
+ {
1634
+ "epoch": 13.896103896103895,
1635
+ "grad_norm": 74.20500946044922,
1636
+ "learning_rate": 2.9768286789632845e-06,
1637
+ "logits/chosen": -2.8088202476501465,
1638
+ "logits/rejected": -2.7986044883728027,
1639
+ "logps/chosen": -9.273926734924316,
1640
+ "logps/rejected": -22.66620445251465,
1641
+ "loss": 0.8765,
1642
+ "rewards/accuracies": 0.949999988079071,
1643
+ "rewards/chosen": 0.058945827186107635,
1644
+ "rewards/margins": 0.23349115252494812,
1645
+ "rewards/rejected": -0.1745453178882599,
1646
+ "step": 1070
1647
+ },
1648
+ {
1649
+ "epoch": 14.025974025974026,
1650
+ "grad_norm": 569.8877563476562,
1651
+ "learning_rate": 2.9418957030878876e-06,
1652
+ "logits/chosen": -2.825758457183838,
1653
+ "logits/rejected": -2.819606304168701,
1654
+ "logps/chosen": -7.458970546722412,
1655
+ "logps/rejected": -22.59342384338379,
1656
+ "loss": 1.9946,
1657
+ "rewards/accuracies": 0.949999988079071,
1658
+ "rewards/chosen": 0.04193786159157753,
1659
+ "rewards/margins": 0.21687336266040802,
1660
+ "rewards/rejected": -0.1749354898929596,
1661
+ "step": 1080
1662
+ },
1663
+ {
1664
+ "epoch": 14.155844155844155,
1665
+ "grad_norm": 1.655894160270691,
1666
+ "learning_rate": 2.9068734321052445e-06,
1667
+ "logits/chosen": -2.829101800918579,
1668
+ "logits/rejected": -2.821521282196045,
1669
+ "logps/chosen": -8.147821426391602,
1670
+ "logps/rejected": -21.687742233276367,
1671
+ "loss": 1.6034,
1672
+ "rewards/accuracies": 0.8999999761581421,
1673
+ "rewards/chosen": 0.058260608464479446,
1674
+ "rewards/margins": 0.22970101237297058,
1675
+ "rewards/rejected": -0.17144039273262024,
1676
+ "step": 1090
1677
+ },
1678
+ {
1679
+ "epoch": 14.285714285714286,
1680
+ "grad_norm": 226.83702087402344,
1681
+ "learning_rate": 2.871768943064129e-06,
1682
+ "logits/chosen": -2.8222107887268066,
1683
+ "logits/rejected": -2.81400990486145,
1684
+ "logps/chosen": -6.330902576446533,
1685
+ "logps/rejected": -25.222558975219727,
1686
+ "loss": 0.9428,
1687
+ "rewards/accuracies": 0.9750000238418579,
1688
+ "rewards/chosen": 0.043078385293483734,
1689
+ "rewards/margins": 0.31059008836746216,
1690
+ "rewards/rejected": -0.26751166582107544,
1691
+ "step": 1100
1692
+ },
1693
+ {
1694
+ "epoch": 14.415584415584416,
1695
+ "grad_norm": 542.2830200195312,
1696
+ "learning_rate": 2.836589329627349e-06,
1697
+ "logits/chosen": -2.829596996307373,
1698
+ "logits/rejected": -2.8249754905700684,
1699
+ "logps/chosen": -7.770198822021484,
1700
+ "logps/rejected": -23.904422760009766,
1701
+ "loss": 1.6431,
1702
+ "rewards/accuracies": 0.9750000238418579,
1703
+ "rewards/chosen": 0.05304824188351631,
1704
+ "rewards/margins": 0.2782563269138336,
1705
+ "rewards/rejected": -0.2252080738544464,
1706
+ "step": 1110
1707
+ },
1708
+ {
1709
+ "epoch": 14.545454545454545,
1710
+ "grad_norm": 566.6854248046875,
1711
+ "learning_rate": 2.8013417006383078e-06,
1712
+ "logits/chosen": -2.814929962158203,
1713
+ "logits/rejected": -2.807624340057373,
1714
+ "logps/chosen": -8.236068725585938,
1715
+ "logps/rejected": -23.396425247192383,
1716
+ "loss": 1.9727,
1717
+ "rewards/accuracies": 0.949999988079071,
1718
+ "rewards/chosen": 0.04831843078136444,
1719
+ "rewards/margins": 0.24669453501701355,
1720
+ "rewards/rejected": -0.19837608933448792,
1721
+ "step": 1120
1722
+ },
1723
+ {
1724
+ "epoch": 14.675324675324676,
1725
+ "grad_norm": 144.7385711669922,
1726
+ "learning_rate": 2.766033178684506e-06,
1727
+ "logits/chosen": -2.812544107437134,
1728
+ "logits/rejected": -2.8035783767700195,
1729
+ "logps/chosen": -7.520627498626709,
1730
+ "logps/rejected": -23.25860023498535,
1731
+ "loss": 1.074,
1732
+ "rewards/accuracies": 1.0,
1733
+ "rewards/chosen": 0.04993298649787903,
1734
+ "rewards/margins": 0.31407758593559265,
1735
+ "rewards/rejected": -0.2641445994377136,
1736
+ "step": 1130
1737
+ },
1738
+ {
1739
+ "epoch": 14.805194805194805,
1740
+ "grad_norm": 1.8980822563171387,
1741
+ "learning_rate": 2.730670898658255e-06,
1742
+ "logits/chosen": -2.816342830657959,
1743
+ "logits/rejected": -2.805983066558838,
1744
+ "logps/chosen": -8.278943061828613,
1745
+ "logps/rejected": -24.814987182617188,
1746
+ "loss": 1.2085,
1747
+ "rewards/accuracies": 0.925000011920929,
1748
+ "rewards/chosen": 0.056690942496061325,
1749
+ "rewards/margins": 0.3225446939468384,
1750
+ "rewards/rejected": -0.26585373282432556,
1751
+ "step": 1140
1752
+ },
1753
+ {
1754
+ "epoch": 14.935064935064934,
1755
+ "grad_norm": 323.37408447265625,
1756
+ "learning_rate": 2.695262006314912e-06,
1757
+ "logits/chosen": -2.8120298385620117,
1758
+ "logits/rejected": -2.800677537918091,
1759
+ "logps/chosen": -8.421112060546875,
1760
+ "logps/rejected": -23.05984115600586,
1761
+ "loss": 2.2793,
1762
+ "rewards/accuracies": 0.875,
1763
+ "rewards/chosen": 0.057875752449035645,
1764
+ "rewards/margins": 0.2153436243534088,
1765
+ "rewards/rejected": -0.15746784210205078,
1766
+ "step": 1150
1767
+ },
1768
+ {
1769
+ "epoch": 15.064935064935066,
1770
+ "grad_norm": 192.21383666992188,
1771
+ "learning_rate": 2.6598136568289144e-06,
1772
+ "logits/chosen": -2.8322479724884033,
1773
+ "logits/rejected": -2.827678918838501,
1774
+ "logps/chosen": -6.597090244293213,
1775
+ "logps/rejected": -23.71731948852539,
1776
+ "loss": 1.8258,
1777
+ "rewards/accuracies": 0.949999988079071,
1778
+ "rewards/chosen": 0.050219256430864334,
1779
+ "rewards/margins": 0.29276466369628906,
1780
+ "rewards/rejected": -0.24254541099071503,
1781
+ "step": 1160
1782
+ },
1783
+ {
1784
+ "epoch": 15.194805194805195,
1785
+ "grad_norm": 1.7898062467575073,
1786
+ "learning_rate": 2.6243330133479173e-06,
1787
+ "logits/chosen": -2.8162477016448975,
1788
+ "logits/rejected": -2.810333251953125,
1789
+ "logps/chosen": -7.49225378036499,
1790
+ "logps/rejected": -24.20693588256836,
1791
+ "loss": 2.364,
1792
+ "rewards/accuracies": 0.949999988079071,
1793
+ "rewards/chosen": 0.05513688176870346,
1794
+ "rewards/margins": 0.2901547849178314,
1795
+ "rewards/rejected": -0.23501792550086975,
1796
+ "step": 1170
1797
+ },
1798
+ {
1799
+ "epoch": 15.324675324675324,
1800
+ "grad_norm": 1.774614691734314,
1801
+ "learning_rate": 2.5888272455453136e-06,
1802
+ "logits/chosen": -2.841599225997925,
1803
+ "logits/rejected": -2.830899715423584,
1804
+ "logps/chosen": -7.024038791656494,
1805
+ "logps/rejected": -24.798709869384766,
1806
+ "loss": 1.3787,
1807
+ "rewards/accuracies": 0.925000011920929,
1808
+ "rewards/chosen": 0.05138504505157471,
1809
+ "rewards/margins": 0.30870115756988525,
1810
+ "rewards/rejected": -0.25731611251831055,
1811
+ "step": 1180
1812
+ },
1813
+ {
1814
+ "epoch": 15.454545454545455,
1815
+ "grad_norm": 168.11526489257812,
1816
+ "learning_rate": 2.5533035281714368e-06,
1817
+ "logits/chosen": -2.8249008655548096,
1818
+ "logits/rejected": -2.819967031478882,
1819
+ "logps/chosen": -7.072245121002197,
1820
+ "logps/rejected": -24.478059768676758,
1821
+ "loss": 1.5998,
1822
+ "rewards/accuracies": 0.925000011920929,
1823
+ "rewards/chosen": 0.05316594988107681,
1824
+ "rewards/margins": 0.33188968896865845,
1825
+ "rewards/rejected": -0.2787237763404846,
1826
+ "step": 1190
1827
+ },
1828
+ {
1829
+ "epoch": 15.584415584415584,
1830
+ "grad_norm": 1.7886942625045776,
1831
+ "learning_rate": 2.517769039603744e-06,
1832
+ "logits/chosen": -2.824219226837158,
1833
+ "logits/rejected": -2.8117549419403076,
1834
+ "logps/chosen": -7.654397487640381,
1835
+ "logps/rejected": -26.292510986328125,
1836
+ "loss": 1.1387,
1837
+ "rewards/accuracies": 0.949999988079071,
1838
+ "rewards/chosen": 0.06867384910583496,
1839
+ "rewards/margins": 0.3651869297027588,
1840
+ "rewards/rejected": -0.29651308059692383,
1841
+ "step": 1200
1842
+ },
1843
+ {
1844
+ "epoch": 15.584415584415584,
1845
+ "eval_logits/chosen": -2.8089284896850586,
1846
+ "eval_logits/rejected": -2.8120827674865723,
1847
+ "eval_logps/chosen": -13.108169555664062,
1848
+ "eval_logps/rejected": -16.744564056396484,
1849
+ "eval_loss": 0.6140245795249939,
1850
+ "eval_rewards/accuracies": 0.75,
1851
+ "eval_rewards/chosen": 0.10423628985881805,
1852
+ "eval_rewards/margins": 0.15160974860191345,
1853
+ "eval_rewards/rejected": -0.0473734587430954,
1854
+ "eval_runtime": 1.1713,
1855
+ "eval_samples_per_second": 11.952,
1856
+ "eval_steps_per_second": 1.707,
1857
+ "step": 1200
1858
+ },
1859
+ {
1860
+ "epoch": 15.714285714285714,
1861
+ "grad_norm": 127.52613830566406,
1862
+ "learning_rate": 2.482230960396256e-06,
1863
+ "logits/chosen": -2.817732334136963,
1864
+ "logits/rejected": -2.805518865585327,
1865
+ "logps/chosen": -7.076968193054199,
1866
+ "logps/rejected": -23.43122673034668,
1867
+ "loss": 1.1089,
1868
+ "rewards/accuracies": 0.8999999761581421,
1869
+ "rewards/chosen": 0.057178787887096405,
1870
+ "rewards/margins": 0.35119739174842834,
1871
+ "rewards/rejected": -0.29401862621307373,
1872
+ "step": 1210
1873
+ },
1874
+ {
1875
+ "epoch": 15.844155844155845,
1876
+ "grad_norm": 418.4096984863281,
1877
+ "learning_rate": 2.4466964718285636e-06,
1878
+ "logits/chosen": -2.824800729751587,
1879
+ "logits/rejected": -2.817626714706421,
1880
+ "logps/chosen": -8.242257118225098,
1881
+ "logps/rejected": -24.571439743041992,
1882
+ "loss": 1.7635,
1883
+ "rewards/accuracies": 0.925000011920929,
1884
+ "rewards/chosen": 0.0650298148393631,
1885
+ "rewards/margins": 0.3450847566127777,
1886
+ "rewards/rejected": -0.2800549864768982,
1887
+ "step": 1220
1888
+ },
1889
+ {
1890
+ "epoch": 15.974025974025974,
1891
+ "grad_norm": 1.7215478420257568,
1892
+ "learning_rate": 2.411172754454688e-06,
1893
+ "logits/chosen": -2.83309006690979,
1894
+ "logits/rejected": -2.8249144554138184,
1895
+ "logps/chosen": -9.375545501708984,
1896
+ "logps/rejected": -23.499141693115234,
1897
+ "loss": 1.3375,
1898
+ "rewards/accuracies": 0.9750000238418579,
1899
+ "rewards/chosen": 0.06410713493824005,
1900
+ "rewards/margins": 0.33921390771865845,
1901
+ "rewards/rejected": -0.275106817483902,
1902
+ "step": 1230
1903
+ },
1904
+ {
1905
+ "epoch": 16.103896103896105,
1906
+ "grad_norm": 342.5730895996094,
1907
+ "learning_rate": 2.375666986652083e-06,
1908
+ "logits/chosen": -2.83019757270813,
1909
+ "logits/rejected": -2.8198351860046387,
1910
+ "logps/chosen": -9.095269203186035,
1911
+ "logps/rejected": -22.90569305419922,
1912
+ "loss": 1.4406,
1913
+ "rewards/accuracies": 0.949999988079071,
1914
+ "rewards/chosen": 0.04684508591890335,
1915
+ "rewards/margins": 0.3012949824333191,
1916
+ "rewards/rejected": -0.25444987416267395,
1917
+ "step": 1240
1918
+ },
1919
+ {
1920
+ "epoch": 16.233766233766232,
1921
+ "grad_norm": 1.895964503288269,
1922
+ "learning_rate": 2.3401863431710864e-06,
1923
+ "logits/chosen": -2.8324759006500244,
1924
+ "logits/rejected": -2.823629856109619,
1925
+ "logps/chosen": -6.349139213562012,
1926
+ "logps/rejected": -22.462413787841797,
1927
+ "loss": 0.7482,
1928
+ "rewards/accuracies": 0.9750000238418579,
1929
+ "rewards/chosen": 0.06610914319753647,
1930
+ "rewards/margins": 0.3332839906215668,
1931
+ "rewards/rejected": -0.2671748101711273,
1932
+ "step": 1250
1933
+ },
1934
+ {
1935
+ "epoch": 16.363636363636363,
1936
+ "grad_norm": 246.20245361328125,
1937
+ "learning_rate": 2.3047379936850885e-06,
1938
+ "logits/chosen": -2.8041343688964844,
1939
+ "logits/rejected": -2.794372320175171,
1940
+ "logps/chosen": -8.607588768005371,
1941
+ "logps/rejected": -21.551204681396484,
1942
+ "loss": 2.8934,
1943
+ "rewards/accuracies": 0.949999988079071,
1944
+ "rewards/chosen": 0.05655663087964058,
1945
+ "rewards/margins": 0.2559330463409424,
1946
+ "rewards/rejected": -0.1993764191865921,
1947
+ "step": 1260
1948
+ },
1949
+ {
1950
+ "epoch": 16.493506493506494,
1951
+ "grad_norm": 180.12371826171875,
1952
+ "learning_rate": 2.269329101341745e-06,
1953
+ "logits/chosen": -2.825704336166382,
1954
+ "logits/rejected": -2.813232898712158,
1955
+ "logps/chosen": -8.257078170776367,
1956
+ "logps/rejected": -24.829893112182617,
1957
+ "loss": 1.095,
1958
+ "rewards/accuracies": 0.925000011920929,
1959
+ "rewards/chosen": 0.056694962084293365,
1960
+ "rewards/margins": 0.37473762035369873,
1961
+ "rewards/rejected": -0.3180426359176636,
1962
+ "step": 1270
1963
+ },
1964
+ {
1965
+ "epoch": 16.623376623376622,
1966
+ "grad_norm": 266.23565673828125,
1967
+ "learning_rate": 2.2339668213154943e-06,
1968
+ "logits/chosen": -2.8129374980926514,
1969
+ "logits/rejected": -2.806753158569336,
1970
+ "logps/chosen": -9.956947326660156,
1971
+ "logps/rejected": -27.90561294555664,
1972
+ "loss": 1.3426,
1973
+ "rewards/accuracies": 1.0,
1974
+ "rewards/chosen": 0.06363432109355927,
1975
+ "rewards/margins": 0.42332082986831665,
1976
+ "rewards/rejected": -0.3596864640712738,
1977
+ "step": 1280
1978
+ },
1979
+ {
1980
+ "epoch": 16.753246753246753,
1981
+ "grad_norm": 268.4725036621094,
1982
+ "learning_rate": 2.1986582993616926e-06,
1983
+ "logits/chosen": -2.8167190551757812,
1984
+ "logits/rejected": -2.8063576221466064,
1985
+ "logps/chosen": -7.815527439117432,
1986
+ "logps/rejected": -25.4665584564209,
1987
+ "loss": 2.4032,
1988
+ "rewards/accuracies": 1.0,
1989
+ "rewards/chosen": 0.0594390332698822,
1990
+ "rewards/margins": 0.34777015447616577,
1991
+ "rewards/rejected": -0.28833115100860596,
1992
+ "step": 1290
1993
+ },
1994
+ {
1995
+ "epoch": 16.883116883116884,
1996
+ "grad_norm": 2.1606481075286865,
1997
+ "learning_rate": 2.163410670372652e-06,
1998
+ "logits/chosen": -2.8243236541748047,
1999
+ "logits/rejected": -2.814610719680786,
2000
+ "logps/chosen": -7.246170997619629,
2001
+ "logps/rejected": -25.429073333740234,
2002
+ "loss": 1.2455,
2003
+ "rewards/accuracies": 0.875,
2004
+ "rewards/chosen": 0.07013025879859924,
2005
+ "rewards/margins": 0.4054412841796875,
2006
+ "rewards/rejected": -0.33531102538108826,
2007
+ "step": 1300
2008
+ }
2009
+ ],
2010
+ "logging_steps": 10,
2011
+ "max_steps": 2310,
2012
+ "num_input_tokens_seen": 0,
2013
+ "num_train_epochs": 30,
2014
+ "save_steps": 50,
2015
+ "stateful_callbacks": {
2016
+ "TrainerControl": {
2017
+ "args": {
2018
+ "should_epoch_stop": false,
2019
+ "should_evaluate": false,
2020
+ "should_log": false,
2021
+ "should_save": true,
2022
+ "should_training_stop": false
2023
+ },
2024
+ "attributes": {}
2025
+ }
2026
+ },
2027
+ "total_flos": 0.0,
2028
+ "train_batch_size": 2,
2029
+ "trial_name": null,
2030
+ "trial_params": null
2031
+ }
weight_dir/DpoWeight/DPOP_Fix_ND3V1/checkpoint-1300/training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:dcfbbe20fe71ddf66bf6192d5e1ac58297b542012aef9e192577497ecf0bcf70
3
+ size 5944
weight_dir/loraWeight/fixbycrflp/checkpoint-8000/README.md ADDED
@@ -0,0 +1,202 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ library_name: peft
3
+ base_model: ./CodeLlama-7b-Instruct-hf
4
+ ---
5
+
6
+ # Model Card for Model ID
7
+
8
+ <!-- Provide a quick summary of what the model is/does. -->
9
+
10
+
11
+
12
+ ## Model Details
13
+
14
+ ### Model Description
15
+
16
+ <!-- Provide a longer summary of what this model is. -->
17
+
18
+
19
+
20
+ - **Developed by:** [More Information Needed]
21
+ - **Funded by [optional]:** [More Information Needed]
22
+ - **Shared by [optional]:** [More Information Needed]
23
+ - **Model type:** [More Information Needed]
24
+ - **Language(s) (NLP):** [More Information Needed]
25
+ - **License:** [More Information Needed]
26
+ - **Finetuned from model [optional]:** [More Information Needed]
27
+
28
+ ### Model Sources [optional]
29
+
30
+ <!-- Provide the basic links for the model. -->
31
+
32
+ - **Repository:** [More Information Needed]
33
+ - **Paper [optional]:** [More Information Needed]
34
+ - **Demo [optional]:** [More Information Needed]
35
+
36
+ ## Uses
37
+
38
+ <!-- Address questions around how the model is intended to be used, including the foreseeable users of the model and those affected by the model. -->
39
+
40
+ ### Direct Use
41
+
42
+ <!-- This section is for the model use without fine-tuning or plugging into a larger ecosystem/app. -->
43
+
44
+ [More Information Needed]
45
+
46
+ ### Downstream Use [optional]
47
+
48
+ <!-- This section is for the model use when fine-tuned for a task, or when plugged into a larger ecosystem/app -->
49
+
50
+ [More Information Needed]
51
+
52
+ ### Out-of-Scope Use
53
+
54
+ <!-- This section addresses misuse, malicious use, and uses that the model will not work well for. -->
55
+
56
+ [More Information Needed]
57
+
58
+ ## Bias, Risks, and Limitations
59
+
60
+ <!-- This section is meant to convey both technical and sociotechnical limitations. -->
61
+
62
+ [More Information Needed]
63
+
64
+ ### Recommendations
65
+
66
+ <!-- This section is meant to convey recommendations with respect to the bias, risk, and technical limitations. -->
67
+
68
+ Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations.
69
+
70
+ ## How to Get Started with the Model
71
+
72
+ Use the code below to get started with the model.
73
+
74
+ [More Information Needed]
75
+
76
+ ## Training Details
77
+
78
+ ### Training Data
79
+
80
+ <!-- This should link to a Dataset Card, perhaps with a short stub of information on what the training data is all about as well as documentation related to data pre-processing or additional filtering. -->
81
+
82
+ [More Information Needed]
83
+
84
+ ### Training Procedure
85
+
86
+ <!-- This relates heavily to the Technical Specifications. Content here should link to that section when it is relevant to the training procedure. -->
87
+
88
+ #### Preprocessing [optional]
89
+
90
+ [More Information Needed]
91
+
92
+
93
+ #### Training Hyperparameters
94
+
95
+ - **Training regime:** [More Information Needed] <!--fp32, fp16 mixed precision, bf16 mixed precision, bf16 non-mixed precision, fp16 non-mixed precision, fp8 mixed precision -->
96
+
97
+ #### Speeds, Sizes, Times [optional]
98
+
99
+ <!-- This section provides information about throughput, start/end time, checkpoint size if relevant, etc. -->
100
+
101
+ [More Information Needed]
102
+
103
+ ## Evaluation
104
+
105
+ <!-- This section describes the evaluation protocols and provides the results. -->
106
+
107
+ ### Testing Data, Factors & Metrics
108
+
109
+ #### Testing Data
110
+
111
+ <!-- This should link to a Dataset Card if possible. -->
112
+
113
+ [More Information Needed]
114
+
115
+ #### Factors
116
+
117
+ <!-- These are the things the evaluation is disaggregating by, e.g., subpopulations or domains. -->
118
+
119
+ [More Information Needed]
120
+
121
+ #### Metrics
122
+
123
+ <!-- These are the evaluation metrics being used, ideally with a description of why. -->
124
+
125
+ [More Information Needed]
126
+
127
+ ### Results
128
+
129
+ [More Information Needed]
130
+
131
+ #### Summary
132
+
133
+
134
+
135
+ ## Model Examination [optional]
136
+
137
+ <!-- Relevant interpretability work for the model goes here -->
138
+
139
+ [More Information Needed]
140
+
141
+ ## Environmental Impact
142
+
143
+ <!-- Total emissions (in grams of CO2eq) and additional considerations, such as electricity usage, go here. Edit the suggested text below accordingly -->
144
+
145
+ Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700).
146
+
147
+ - **Hardware Type:** [More Information Needed]
148
+ - **Hours used:** [More Information Needed]
149
+ - **Cloud Provider:** [More Information Needed]
150
+ - **Compute Region:** [More Information Needed]
151
+ - **Carbon Emitted:** [More Information Needed]
152
+
153
+ ## Technical Specifications [optional]
154
+
155
+ ### Model Architecture and Objective
156
+
157
+ [More Information Needed]
158
+
159
+ ### Compute Infrastructure
160
+
161
+ [More Information Needed]
162
+
163
+ #### Hardware
164
+
165
+ [More Information Needed]
166
+
167
+ #### Software
168
+
169
+ [More Information Needed]
170
+
171
+ ## Citation [optional]
172
+
173
+ <!-- If there is a paper or blog post introducing the model, the APA and Bibtex information for that should go in this section. -->
174
+
175
+ **BibTeX:**
176
+
177
+ [More Information Needed]
178
+
179
+ **APA:**
180
+
181
+ [More Information Needed]
182
+
183
+ ## Glossary [optional]
184
+
185
+ <!-- If relevant, include terms and calculations in this section that can help readers understand the model or model card. -->
186
+
187
+ [More Information Needed]
188
+
189
+ ## More Information [optional]
190
+
191
+ [More Information Needed]
192
+
193
+ ## Model Card Authors [optional]
194
+
195
+ [More Information Needed]
196
+
197
+ ## Model Card Contact
198
+
199
+ [More Information Needed]
200
+ ### Framework versions
201
+
202
+ - PEFT 0.11.1
weight_dir/loraWeight/fixbycrflp/checkpoint-8000/adapter_config.json ADDED
@@ -0,0 +1,29 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "alpha_pattern": {},
3
+ "auto_mapping": null,
4
+ "base_model_name_or_path": "./CodeLlama-7b-Instruct-hf",
5
+ "bias": "none",
6
+ "fan_in_fan_out": false,
7
+ "inference_mode": true,
8
+ "init_lora_weights": true,
9
+ "layer_replication": null,
10
+ "layers_pattern": null,
11
+ "layers_to_transform": null,
12
+ "loftq_config": {},
13
+ "lora_alpha": 32,
14
+ "lora_dropout": 0.05,
15
+ "megatron_config": null,
16
+ "megatron_core": "megatron.core",
17
+ "modules_to_save": null,
18
+ "peft_type": "LORA",
19
+ "r": 8,
20
+ "rank_pattern": {},
21
+ "revision": null,
22
+ "target_modules": [
23
+ "q_proj",
24
+ "v_proj"
25
+ ],
26
+ "task_type": "CAUSAL_LM",
27
+ "use_dora": false,
28
+ "use_rslora": false
29
+ }
weight_dir/loraWeight/fixbycrflp/checkpoint-8000/adapter_model.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9b282a5cf93953598ceac2f52df00bf903105e30948b8eed7d963192ddaa2f1d
3
+ size 8433034
weight_dir/loraWeight/fixbycrflp/checkpoint-8000/added_tokens.json ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ {
2
+ "<PAD>": 32016
3
+ }
weight_dir/loraWeight/fixbycrflp/checkpoint-8000/global_step8000/bf16_zero_pp_rank_0_mp_rank_00_optim_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c549fd6e8d5b46fcb98fa5ffd3b8d4f98982318b8abbf744a59a5f6ec07540b6
3
+ size 12589776
weight_dir/loraWeight/fixbycrflp/checkpoint-8000/global_step8000/bf16_zero_pp_rank_1_mp_rank_00_optim_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:155a0ff6ca2c8b215e2de05a2046e4af0f44dfbf36ddc1e5540840f63b6566c1
3
+ size 12589840
weight_dir/loraWeight/fixbycrflp/checkpoint-8000/global_step8000/bf16_zero_pp_rank_2_mp_rank_00_optim_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ef4e8acc41a38ca5bae3ca53f95ce12cfd17f6a3c6f3d1314c44ab63d8eff2fb
3
+ size 12589840
weight_dir/loraWeight/fixbycrflp/checkpoint-8000/global_step8000/bf16_zero_pp_rank_3_mp_rank_00_optim_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3eb2ca7d094608660a1769f9e8795610fdcd849fd4767a35767f3bd988030590
3
+ size 12589840
weight_dir/loraWeight/fixbycrflp/checkpoint-8000/global_step8000/mp_rank_00_model_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0b0370a3b4b12f58d3ede5c81eb8710c88abb7eb5d7c909f3c900de0c20606fa
3
+ size 8507372
weight_dir/loraWeight/fixbycrflp/checkpoint-8000/latest ADDED
@@ -0,0 +1 @@
 
 
1
+ global_step8000
weight_dir/loraWeight/fixbycrflp/checkpoint-8000/rng_state_0.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:db5c221a5a18ee088159763149e88b1f6677df21c4df4a48421f3a71d3c952fb
3
+ size 15024
weight_dir/loraWeight/fixbycrflp/checkpoint-8000/rng_state_1.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:da57800cf6a9e3b0749c2b2264d6781ea412ede7c7a3eff1a5481b49fb4a948f
3
+ size 15024
weight_dir/loraWeight/fixbycrflp/checkpoint-8000/rng_state_2.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3a38c281b64adeb9cd597bdf895b41a68c82730b2db11abc5bf968012fbc4842
3
+ size 15024
weight_dir/loraWeight/fixbycrflp/checkpoint-8000/rng_state_3.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d72aeb50b87ec962a05c4fd8f40284d218377094be833b3547db1bc4e15c91b9
3
+ size 15024
weight_dir/loraWeight/fixbycrflp/checkpoint-8000/special_tokens_map.json ADDED
@@ -0,0 +1,30 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "bos_token": {
3
+ "content": "<s>",
4
+ "lstrip": false,
5
+ "normalized": true,
6
+ "rstrip": false,
7
+ "single_word": false
8
+ },
9
+ "eos_token": {
10
+ "content": "</s>",
11
+ "lstrip": false,
12
+ "normalized": true,
13
+ "rstrip": false,
14
+ "single_word": false
15
+ },
16
+ "pad_token": {
17
+ "content": "<PAD>",
18
+ "lstrip": false,
19
+ "normalized": false,
20
+ "rstrip": false,
21
+ "single_word": false
22
+ },
23
+ "unk_token": {
24
+ "content": "<unk>",
25
+ "lstrip": false,
26
+ "normalized": true,
27
+ "rstrip": false,
28
+ "single_word": false
29
+ }
30
+ }
weight_dir/loraWeight/fixbycrflp/checkpoint-8000/tokenizer.model ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:45ccb9c8b6b561889acea59191d66986d314e7cbd6a78abc6e49b139ca91c1e6
3
+ size 500058
weight_dir/loraWeight/fixbycrflp/checkpoint-8000/tokenizer_config.json ADDED
@@ -0,0 +1,50 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "add_bos_token": true,
3
+ "add_eos_token": false,
4
+ "add_prefix_space": true,
5
+ "added_tokens_decoder": {
6
+ "0": {
7
+ "content": "<unk>",
8
+ "lstrip": false,
9
+ "normalized": true,
10
+ "rstrip": false,
11
+ "single_word": false,
12
+ "special": true
13
+ },
14
+ "1": {
15
+ "content": "<s>",
16
+ "lstrip": false,
17
+ "normalized": true,
18
+ "rstrip": false,
19
+ "single_word": false,
20
+ "special": true
21
+ },
22
+ "2": {
23
+ "content": "</s>",
24
+ "lstrip": false,
25
+ "normalized": true,
26
+ "rstrip": false,
27
+ "single_word": false,
28
+ "special": true
29
+ },
30
+ "32016": {
31
+ "content": "<PAD>",
32
+ "lstrip": false,
33
+ "normalized": false,
34
+ "rstrip": false,
35
+ "single_word": false,
36
+ "special": true
37
+ }
38
+ },
39
+ "bos_token": "<s>",
40
+ "clean_up_tokenization_spaces": false,
41
+ "eos_token": "</s>",
42
+ "legacy": true,
43
+ "model_max_length": 1000000000000000019884624838656,
44
+ "pad_token": "<PAD>",
45
+ "sp_model_kwargs": {},
46
+ "spaces_between_special_tokens": false,
47
+ "tokenizer_class": "LlamaTokenizer",
48
+ "unk_token": "<unk>",
49
+ "use_default_system_prompt": false
50
+ }
weight_dir/loraWeight/fixbycrflp/checkpoint-8000/trainer_state.json ADDED
@@ -0,0 +1,1153 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_metric": null,
3
+ "best_model_checkpoint": null,
4
+ "epoch": 2.548582351067219,
5
+ "eval_steps": 10000,
6
+ "global_step": 8000,
7
+ "is_hyper_param_search": false,
8
+ "is_local_process_zero": true,
9
+ "is_world_process_zero": true,
10
+ "log_history": [
11
+ {
12
+ "epoch": 0.01592863969417012,
13
+ "grad_norm": 0.2921621799468994,
14
+ "learning_rate": 4.992513062316809e-05,
15
+ "loss": 4.2405,
16
+ "step": 50
17
+ },
18
+ {
19
+ "epoch": 0.03185727938834024,
20
+ "grad_norm": 0.10916037112474442,
21
+ "learning_rate": 4.9845482349942656e-05,
22
+ "loss": 0.0569,
23
+ "step": 100
24
+ },
25
+ {
26
+ "epoch": 0.047785919082510356,
27
+ "grad_norm": 0.11249323934316635,
28
+ "learning_rate": 4.976583407671722e-05,
29
+ "loss": 0.0494,
30
+ "step": 150
31
+ },
32
+ {
33
+ "epoch": 0.06371455877668047,
34
+ "grad_norm": 0.21314933896064758,
35
+ "learning_rate": 4.968618580349178e-05,
36
+ "loss": 0.0506,
37
+ "step": 200
38
+ },
39
+ {
40
+ "epoch": 0.07964319847085059,
41
+ "grad_norm": 0.122999407351017,
42
+ "learning_rate": 4.960653753026634e-05,
43
+ "loss": 0.0465,
44
+ "step": 250
45
+ },
46
+ {
47
+ "epoch": 0.09557183816502071,
48
+ "grad_norm": 0.09448697417974472,
49
+ "learning_rate": 4.952688925704091e-05,
50
+ "loss": 0.0481,
51
+ "step": 300
52
+ },
53
+ {
54
+ "epoch": 0.11150047785919083,
55
+ "grad_norm": 0.09307976812124252,
56
+ "learning_rate": 4.944724098381547e-05,
57
+ "loss": 0.0472,
58
+ "step": 350
59
+ },
60
+ {
61
+ "epoch": 0.12742911755336095,
62
+ "grad_norm": 0.14354585111141205,
63
+ "learning_rate": 4.936759271059004e-05,
64
+ "loss": 0.043,
65
+ "step": 400
66
+ },
67
+ {
68
+ "epoch": 0.14335775724753105,
69
+ "grad_norm": 0.10726131498813629,
70
+ "learning_rate": 4.92879444373646e-05,
71
+ "loss": 0.0415,
72
+ "step": 450
73
+ },
74
+ {
75
+ "epoch": 0.15928639694170119,
76
+ "grad_norm": 0.12725014984607697,
77
+ "learning_rate": 4.920829616413916e-05,
78
+ "loss": 0.0468,
79
+ "step": 500
80
+ },
81
+ {
82
+ "epoch": 0.1752150366358713,
83
+ "grad_norm": 0.12311285734176636,
84
+ "learning_rate": 4.9128647890913724e-05,
85
+ "loss": 0.0443,
86
+ "step": 550
87
+ },
88
+ {
89
+ "epoch": 0.19114367633004142,
90
+ "grad_norm": 0.1121256873011589,
91
+ "learning_rate": 4.904899961768829e-05,
92
+ "loss": 0.0397,
93
+ "step": 600
94
+ },
95
+ {
96
+ "epoch": 0.20707231602421153,
97
+ "grad_norm": 0.08886804431676865,
98
+ "learning_rate": 4.896935134446285e-05,
99
+ "loss": 0.0407,
100
+ "step": 650
101
+ },
102
+ {
103
+ "epoch": 0.22300095571838166,
104
+ "grad_norm": 0.09782330691814423,
105
+ "learning_rate": 4.888970307123742e-05,
106
+ "loss": 0.0445,
107
+ "step": 700
108
+ },
109
+ {
110
+ "epoch": 0.23892959541255177,
111
+ "grad_norm": 0.08494489639997482,
112
+ "learning_rate": 4.881005479801198e-05,
113
+ "loss": 0.0414,
114
+ "step": 750
115
+ },
116
+ {
117
+ "epoch": 0.2548582351067219,
118
+ "grad_norm": 0.0991586446762085,
119
+ "learning_rate": 4.873040652478654e-05,
120
+ "loss": 0.0432,
121
+ "step": 800
122
+ },
123
+ {
124
+ "epoch": 0.27078687480089203,
125
+ "grad_norm": 0.10855372995138168,
126
+ "learning_rate": 4.8650758251561105e-05,
127
+ "loss": 0.0394,
128
+ "step": 850
129
+ },
130
+ {
131
+ "epoch": 0.2867155144950621,
132
+ "grad_norm": 0.08806545287370682,
133
+ "learning_rate": 4.857110997833567e-05,
134
+ "loss": 0.0402,
135
+ "step": 900
136
+ },
137
+ {
138
+ "epoch": 0.30264415418923224,
139
+ "grad_norm": 0.13318926095962524,
140
+ "learning_rate": 4.8491461705110234e-05,
141
+ "loss": 0.0374,
142
+ "step": 950
143
+ },
144
+ {
145
+ "epoch": 0.31857279388340237,
146
+ "grad_norm": 0.1285628378391266,
147
+ "learning_rate": 4.84118134318848e-05,
148
+ "loss": 0.0426,
149
+ "step": 1000
150
+ },
151
+ {
152
+ "epoch": 0.33450143357757245,
153
+ "grad_norm": 0.07570258527994156,
154
+ "learning_rate": 4.833216515865936e-05,
155
+ "loss": 0.0406,
156
+ "step": 1050
157
+ },
158
+ {
159
+ "epoch": 0.3504300732717426,
160
+ "grad_norm": 0.14918510615825653,
161
+ "learning_rate": 4.825251688543392e-05,
162
+ "loss": 0.041,
163
+ "step": 1100
164
+ },
165
+ {
166
+ "epoch": 0.3663587129659127,
167
+ "grad_norm": 0.14587858319282532,
168
+ "learning_rate": 4.8172868612208485e-05,
169
+ "loss": 0.0387,
170
+ "step": 1150
171
+ },
172
+ {
173
+ "epoch": 0.38228735266008285,
174
+ "grad_norm": 0.15989379584789276,
175
+ "learning_rate": 4.809322033898305e-05,
176
+ "loss": 0.0429,
177
+ "step": 1200
178
+ },
179
+ {
180
+ "epoch": 0.3982159923542529,
181
+ "grad_norm": 0.09545516967773438,
182
+ "learning_rate": 4.8013572065757615e-05,
183
+ "loss": 0.0369,
184
+ "step": 1250
185
+ },
186
+ {
187
+ "epoch": 0.41414463204842306,
188
+ "grad_norm": 0.10800140351057053,
189
+ "learning_rate": 4.793392379253218e-05,
190
+ "loss": 0.042,
191
+ "step": 1300
192
+ },
193
+ {
194
+ "epoch": 0.4300732717425932,
195
+ "grad_norm": 0.10968200862407684,
196
+ "learning_rate": 4.7854275519306744e-05,
197
+ "loss": 0.0368,
198
+ "step": 1350
199
+ },
200
+ {
201
+ "epoch": 0.4460019114367633,
202
+ "grad_norm": 0.1094348132610321,
203
+ "learning_rate": 4.777462724608131e-05,
204
+ "loss": 0.038,
205
+ "step": 1400
206
+ },
207
+ {
208
+ "epoch": 0.4619305511309334,
209
+ "grad_norm": 0.09409014135599136,
210
+ "learning_rate": 4.7694978972855866e-05,
211
+ "loss": 0.0399,
212
+ "step": 1450
213
+ },
214
+ {
215
+ "epoch": 0.47785919082510353,
216
+ "grad_norm": 0.09957227855920792,
217
+ "learning_rate": 4.761533069963043e-05,
218
+ "loss": 0.0431,
219
+ "step": 1500
220
+ },
221
+ {
222
+ "epoch": 0.49378783051927366,
223
+ "grad_norm": 0.10479158908128738,
224
+ "learning_rate": 4.7535682426404995e-05,
225
+ "loss": 0.0396,
226
+ "step": 1550
227
+ },
228
+ {
229
+ "epoch": 0.5097164702134438,
230
+ "grad_norm": 0.0960434228181839,
231
+ "learning_rate": 4.745603415317956e-05,
232
+ "loss": 0.0404,
233
+ "step": 1600
234
+ },
235
+ {
236
+ "epoch": 0.5256451099076139,
237
+ "grad_norm": 0.14295724034309387,
238
+ "learning_rate": 4.7376385879954124e-05,
239
+ "loss": 0.0401,
240
+ "step": 1650
241
+ },
242
+ {
243
+ "epoch": 0.5415737496017841,
244
+ "grad_norm": 0.0905664786696434,
245
+ "learning_rate": 4.729673760672869e-05,
246
+ "loss": 0.0378,
247
+ "step": 1700
248
+ },
249
+ {
250
+ "epoch": 0.5575023892959541,
251
+ "grad_norm": 0.13186714053153992,
252
+ "learning_rate": 4.7217089333503254e-05,
253
+ "loss": 0.0391,
254
+ "step": 1750
255
+ },
256
+ {
257
+ "epoch": 0.5734310289901242,
258
+ "grad_norm": 0.07855305820703506,
259
+ "learning_rate": 4.713744106027782e-05,
260
+ "loss": 0.0386,
261
+ "step": 1800
262
+ },
263
+ {
264
+ "epoch": 0.5893596686842943,
265
+ "grad_norm": 0.10626554489135742,
266
+ "learning_rate": 4.705779278705238e-05,
267
+ "loss": 0.038,
268
+ "step": 1850
269
+ },
270
+ {
271
+ "epoch": 0.6052883083784645,
272
+ "grad_norm": 0.09309270232915878,
273
+ "learning_rate": 4.697814451382694e-05,
274
+ "loss": 0.04,
275
+ "step": 1900
276
+ },
277
+ {
278
+ "epoch": 0.6212169480726346,
279
+ "grad_norm": 0.10274514555931091,
280
+ "learning_rate": 4.6898496240601505e-05,
281
+ "loss": 0.0399,
282
+ "step": 1950
283
+ },
284
+ {
285
+ "epoch": 0.6371455877668047,
286
+ "grad_norm": 0.08140850067138672,
287
+ "learning_rate": 4.681884796737607e-05,
288
+ "loss": 0.0395,
289
+ "step": 2000
290
+ },
291
+ {
292
+ "epoch": 0.6530742274609749,
293
+ "grad_norm": 0.09800681471824646,
294
+ "learning_rate": 4.6739199694150634e-05,
295
+ "loss": 0.0396,
296
+ "step": 2050
297
+ },
298
+ {
299
+ "epoch": 0.6690028671551449,
300
+ "grad_norm": 0.12131080776453018,
301
+ "learning_rate": 4.66595514209252e-05,
302
+ "loss": 0.0404,
303
+ "step": 2100
304
+ },
305
+ {
306
+ "epoch": 0.684931506849315,
307
+ "grad_norm": 0.1389102041721344,
308
+ "learning_rate": 4.6579903147699763e-05,
309
+ "loss": 0.0378,
310
+ "step": 2150
311
+ },
312
+ {
313
+ "epoch": 0.7008601465434852,
314
+ "grad_norm": 0.12080667912960052,
315
+ "learning_rate": 4.650025487447433e-05,
316
+ "loss": 0.0366,
317
+ "step": 2200
318
+ },
319
+ {
320
+ "epoch": 0.7167887862376553,
321
+ "grad_norm": 0.09532010555267334,
322
+ "learning_rate": 4.642060660124889e-05,
323
+ "loss": 0.038,
324
+ "step": 2250
325
+ },
326
+ {
327
+ "epoch": 0.7327174259318254,
328
+ "grad_norm": 0.09826835989952087,
329
+ "learning_rate": 4.634095832802345e-05,
330
+ "loss": 0.0358,
331
+ "step": 2300
332
+ },
333
+ {
334
+ "epoch": 0.7486460656259956,
335
+ "grad_norm": 0.11227104812860489,
336
+ "learning_rate": 4.6261310054798015e-05,
337
+ "loss": 0.0362,
338
+ "step": 2350
339
+ },
340
+ {
341
+ "epoch": 0.7645747053201657,
342
+ "grad_norm": 0.10029356181621552,
343
+ "learning_rate": 4.618166178157258e-05,
344
+ "loss": 0.0396,
345
+ "step": 2400
346
+ },
347
+ {
348
+ "epoch": 0.7805033450143358,
349
+ "grad_norm": 0.10683028399944305,
350
+ "learning_rate": 4.6102013508347144e-05,
351
+ "loss": 0.0389,
352
+ "step": 2450
353
+ },
354
+ {
355
+ "epoch": 0.7964319847085058,
356
+ "grad_norm": 0.08704536408185959,
357
+ "learning_rate": 4.602236523512171e-05,
358
+ "loss": 0.0364,
359
+ "step": 2500
360
+ },
361
+ {
362
+ "epoch": 0.812360624402676,
363
+ "grad_norm": 0.13864809274673462,
364
+ "learning_rate": 4.594271696189627e-05,
365
+ "loss": 0.0401,
366
+ "step": 2550
367
+ },
368
+ {
369
+ "epoch": 0.8282892640968461,
370
+ "grad_norm": 0.08906254917383194,
371
+ "learning_rate": 4.586306868867083e-05,
372
+ "loss": 0.0362,
373
+ "step": 2600
374
+ },
375
+ {
376
+ "epoch": 0.8442179037910162,
377
+ "grad_norm": 0.11775562167167664,
378
+ "learning_rate": 4.5783420415445396e-05,
379
+ "loss": 0.0404,
380
+ "step": 2650
381
+ },
382
+ {
383
+ "epoch": 0.8601465434851864,
384
+ "grad_norm": 0.12016937136650085,
385
+ "learning_rate": 4.570377214221996e-05,
386
+ "loss": 0.0398,
387
+ "step": 2700
388
+ },
389
+ {
390
+ "epoch": 0.8760751831793565,
391
+ "grad_norm": 0.0807521790266037,
392
+ "learning_rate": 4.5624123868994525e-05,
393
+ "loss": 0.0443,
394
+ "step": 2750
395
+ },
396
+ {
397
+ "epoch": 0.8920038228735266,
398
+ "grad_norm": 0.16572950780391693,
399
+ "learning_rate": 4.554447559576909e-05,
400
+ "loss": 0.0397,
401
+ "step": 2800
402
+ },
403
+ {
404
+ "epoch": 0.9079324625676968,
405
+ "grad_norm": 0.16349689662456512,
406
+ "learning_rate": 4.5464827322543654e-05,
407
+ "loss": 0.0363,
408
+ "step": 2850
409
+ },
410
+ {
411
+ "epoch": 0.9238611022618668,
412
+ "grad_norm": 0.09836030751466751,
413
+ "learning_rate": 4.538517904931821e-05,
414
+ "loss": 0.0355,
415
+ "step": 2900
416
+ },
417
+ {
418
+ "epoch": 0.9397897419560369,
419
+ "grad_norm": 0.09340459853410721,
420
+ "learning_rate": 4.5305530776092776e-05,
421
+ "loss": 0.0372,
422
+ "step": 2950
423
+ },
424
+ {
425
+ "epoch": 0.9557183816502071,
426
+ "grad_norm": 0.11303897202014923,
427
+ "learning_rate": 4.522588250286734e-05,
428
+ "loss": 0.0359,
429
+ "step": 3000
430
+ },
431
+ {
432
+ "epoch": 0.9716470213443772,
433
+ "grad_norm": 0.134646475315094,
434
+ "learning_rate": 4.5146234229641906e-05,
435
+ "loss": 0.0354,
436
+ "step": 3050
437
+ },
438
+ {
439
+ "epoch": 0.9875756610385473,
440
+ "grad_norm": 0.13911692798137665,
441
+ "learning_rate": 4.506658595641647e-05,
442
+ "loss": 0.0364,
443
+ "step": 3100
444
+ },
445
+ {
446
+ "epoch": 1.0035043007327173,
447
+ "grad_norm": 0.09443005919456482,
448
+ "learning_rate": 4.4986937683191035e-05,
449
+ "loss": 0.0338,
450
+ "step": 3150
451
+ },
452
+ {
453
+ "epoch": 1.0194329404268876,
454
+ "grad_norm": 0.12161055952310562,
455
+ "learning_rate": 4.490728940996559e-05,
456
+ "loss": 0.0378,
457
+ "step": 3200
458
+ },
459
+ {
460
+ "epoch": 1.0353615801210576,
461
+ "grad_norm": 0.09010250866413116,
462
+ "learning_rate": 4.482764113674016e-05,
463
+ "loss": 0.0358,
464
+ "step": 3250
465
+ },
466
+ {
467
+ "epoch": 1.0512902198152279,
468
+ "grad_norm": 0.0787167027592659,
469
+ "learning_rate": 4.474799286351472e-05,
470
+ "loss": 0.0375,
471
+ "step": 3300
472
+ },
473
+ {
474
+ "epoch": 1.0672188595093979,
475
+ "grad_norm": 0.10341610759496689,
476
+ "learning_rate": 4.4668344590289286e-05,
477
+ "loss": 0.037,
478
+ "step": 3350
479
+ },
480
+ {
481
+ "epoch": 1.0831474992035681,
482
+ "grad_norm": 0.09436332434415817,
483
+ "learning_rate": 4.458869631706385e-05,
484
+ "loss": 0.035,
485
+ "step": 3400
486
+ },
487
+ {
488
+ "epoch": 1.0990761388977381,
489
+ "grad_norm": 0.11110551655292511,
490
+ "learning_rate": 4.4509048043838415e-05,
491
+ "loss": 0.0394,
492
+ "step": 3450
493
+ },
494
+ {
495
+ "epoch": 1.1150047785919082,
496
+ "grad_norm": 0.11926066130399704,
497
+ "learning_rate": 4.442939977061297e-05,
498
+ "loss": 0.0364,
499
+ "step": 3500
500
+ },
501
+ {
502
+ "epoch": 1.1309334182860784,
503
+ "grad_norm": 0.08972738683223724,
504
+ "learning_rate": 4.434975149738754e-05,
505
+ "loss": 0.0353,
506
+ "step": 3550
507
+ },
508
+ {
509
+ "epoch": 1.1468620579802484,
510
+ "grad_norm": 0.1294146478176117,
511
+ "learning_rate": 4.42701032241621e-05,
512
+ "loss": 0.0347,
513
+ "step": 3600
514
+ },
515
+ {
516
+ "epoch": 1.1627906976744187,
517
+ "grad_norm": 0.18671779334545135,
518
+ "learning_rate": 4.419045495093667e-05,
519
+ "loss": 0.0393,
520
+ "step": 3650
521
+ },
522
+ {
523
+ "epoch": 1.1787193373685887,
524
+ "grad_norm": 0.15697024762630463,
525
+ "learning_rate": 4.411080667771123e-05,
526
+ "loss": 0.0368,
527
+ "step": 3700
528
+ },
529
+ {
530
+ "epoch": 1.194647977062759,
531
+ "grad_norm": 0.17401783168315887,
532
+ "learning_rate": 4.4031158404485796e-05,
533
+ "loss": 0.0346,
534
+ "step": 3750
535
+ },
536
+ {
537
+ "epoch": 1.210576616756929,
538
+ "grad_norm": 0.13288350403308868,
539
+ "learning_rate": 4.3951510131260354e-05,
540
+ "loss": 0.0343,
541
+ "step": 3800
542
+ },
543
+ {
544
+ "epoch": 1.226505256451099,
545
+ "grad_norm": 0.13558614253997803,
546
+ "learning_rate": 4.387186185803492e-05,
547
+ "loss": 0.0385,
548
+ "step": 3850
549
+ },
550
+ {
551
+ "epoch": 1.2424338961452692,
552
+ "grad_norm": 0.08078035712242126,
553
+ "learning_rate": 4.379221358480948e-05,
554
+ "loss": 0.033,
555
+ "step": 3900
556
+ },
557
+ {
558
+ "epoch": 1.2583625358394392,
559
+ "grad_norm": 0.08991101384162903,
560
+ "learning_rate": 4.371256531158405e-05,
561
+ "loss": 0.0371,
562
+ "step": 3950
563
+ },
564
+ {
565
+ "epoch": 1.2742911755336095,
566
+ "grad_norm": 0.09473446011543274,
567
+ "learning_rate": 4.363291703835861e-05,
568
+ "loss": 0.0334,
569
+ "step": 4000
570
+ },
571
+ {
572
+ "epoch": 1.2902198152277795,
573
+ "grad_norm": 0.09711175411939621,
574
+ "learning_rate": 4.355326876513318e-05,
575
+ "loss": 0.0361,
576
+ "step": 4050
577
+ },
578
+ {
579
+ "epoch": 1.3061484549219498,
580
+ "grad_norm": 0.11206386983394623,
581
+ "learning_rate": 4.3473620491907735e-05,
582
+ "loss": 0.0332,
583
+ "step": 4100
584
+ },
585
+ {
586
+ "epoch": 1.3220770946161198,
587
+ "grad_norm": 0.10798367857933044,
588
+ "learning_rate": 4.33939722186823e-05,
589
+ "loss": 0.0367,
590
+ "step": 4150
591
+ },
592
+ {
593
+ "epoch": 1.3380057343102898,
594
+ "grad_norm": 0.13645893335342407,
595
+ "learning_rate": 4.3314323945456864e-05,
596
+ "loss": 0.0353,
597
+ "step": 4200
598
+ },
599
+ {
600
+ "epoch": 1.35393437400446,
601
+ "grad_norm": 0.10864555090665817,
602
+ "learning_rate": 4.323467567223143e-05,
603
+ "loss": 0.0361,
604
+ "step": 4250
605
+ },
606
+ {
607
+ "epoch": 1.36986301369863,
608
+ "grad_norm": 0.12079176306724548,
609
+ "learning_rate": 4.315502739900599e-05,
610
+ "loss": 0.0329,
611
+ "step": 4300
612
+ },
613
+ {
614
+ "epoch": 1.3857916533928003,
615
+ "grad_norm": 0.1080985963344574,
616
+ "learning_rate": 4.307537912578056e-05,
617
+ "loss": 0.0368,
618
+ "step": 4350
619
+ },
620
+ {
621
+ "epoch": 1.4017202930869703,
622
+ "grad_norm": 0.1214875727891922,
623
+ "learning_rate": 4.2995730852555115e-05,
624
+ "loss": 0.033,
625
+ "step": 4400
626
+ },
627
+ {
628
+ "epoch": 1.4176489327811406,
629
+ "grad_norm": 0.0906578078866005,
630
+ "learning_rate": 4.291608257932968e-05,
631
+ "loss": 0.0361,
632
+ "step": 4450
633
+ },
634
+ {
635
+ "epoch": 1.4335775724753106,
636
+ "grad_norm": 0.11563856154680252,
637
+ "learning_rate": 4.2836434306104244e-05,
638
+ "loss": 0.0335,
639
+ "step": 4500
640
+ },
641
+ {
642
+ "epoch": 1.4495062121694806,
643
+ "grad_norm": 0.1468946486711502,
644
+ "learning_rate": 4.275678603287881e-05,
645
+ "loss": 0.0335,
646
+ "step": 4550
647
+ },
648
+ {
649
+ "epoch": 1.4654348518636509,
650
+ "grad_norm": 0.16663908958435059,
651
+ "learning_rate": 4.2677137759653374e-05,
652
+ "loss": 0.0354,
653
+ "step": 4600
654
+ },
655
+ {
656
+ "epoch": 1.481363491557821,
657
+ "grad_norm": 0.18749141693115234,
658
+ "learning_rate": 4.259748948642794e-05,
659
+ "loss": 0.0377,
660
+ "step": 4650
661
+ },
662
+ {
663
+ "epoch": 1.4972921312519911,
664
+ "grad_norm": 0.12299200892448425,
665
+ "learning_rate": 4.2517841213202496e-05,
666
+ "loss": 0.0338,
667
+ "step": 4700
668
+ },
669
+ {
670
+ "epoch": 1.5132207709461611,
671
+ "grad_norm": 0.10765209048986435,
672
+ "learning_rate": 4.243819293997706e-05,
673
+ "loss": 0.0368,
674
+ "step": 4750
675
+ },
676
+ {
677
+ "epoch": 1.5291494106403314,
678
+ "grad_norm": 0.09512902796268463,
679
+ "learning_rate": 4.2358544666751625e-05,
680
+ "loss": 0.034,
681
+ "step": 4800
682
+ },
683
+ {
684
+ "epoch": 1.5450780503345014,
685
+ "grad_norm": 0.13502831757068634,
686
+ "learning_rate": 4.227889639352619e-05,
687
+ "loss": 0.0354,
688
+ "step": 4850
689
+ },
690
+ {
691
+ "epoch": 1.5610066900286714,
692
+ "grad_norm": 0.12296276539564133,
693
+ "learning_rate": 4.2199248120300754e-05,
694
+ "loss": 0.0344,
695
+ "step": 4900
696
+ },
697
+ {
698
+ "epoch": 1.5769353297228417,
699
+ "grad_norm": 0.11571130156517029,
700
+ "learning_rate": 4.211959984707532e-05,
701
+ "loss": 0.034,
702
+ "step": 4950
703
+ },
704
+ {
705
+ "epoch": 1.592863969417012,
706
+ "grad_norm": 0.08514443039894104,
707
+ "learning_rate": 4.203995157384988e-05,
708
+ "loss": 0.0339,
709
+ "step": 5000
710
+ },
711
+ {
712
+ "epoch": 1.608792609111182,
713
+ "grad_norm": 0.10442246496677399,
714
+ "learning_rate": 4.196030330062444e-05,
715
+ "loss": 0.0354,
716
+ "step": 5050
717
+ },
718
+ {
719
+ "epoch": 1.624721248805352,
720
+ "grad_norm": 0.0917409136891365,
721
+ "learning_rate": 4.1880655027399006e-05,
722
+ "loss": 0.035,
723
+ "step": 5100
724
+ },
725
+ {
726
+ "epoch": 1.6406498884995222,
727
+ "grad_norm": 0.1276286244392395,
728
+ "learning_rate": 4.180100675417357e-05,
729
+ "loss": 0.0356,
730
+ "step": 5150
731
+ },
732
+ {
733
+ "epoch": 1.6565785281936922,
734
+ "grad_norm": 0.12348821014165878,
735
+ "learning_rate": 4.1721358480948135e-05,
736
+ "loss": 0.0354,
737
+ "step": 5200
738
+ },
739
+ {
740
+ "epoch": 1.6725071678878622,
741
+ "grad_norm": 0.0923234224319458,
742
+ "learning_rate": 4.16417102077227e-05,
743
+ "loss": 0.0353,
744
+ "step": 5250
745
+ },
746
+ {
747
+ "epoch": 1.6884358075820325,
748
+ "grad_norm": 0.08973834663629532,
749
+ "learning_rate": 4.156206193449726e-05,
750
+ "loss": 0.0333,
751
+ "step": 5300
752
+ },
753
+ {
754
+ "epoch": 1.7043644472762027,
755
+ "grad_norm": 0.10418592393398285,
756
+ "learning_rate": 4.148241366127182e-05,
757
+ "loss": 0.0343,
758
+ "step": 5350
759
+ },
760
+ {
761
+ "epoch": 1.7202930869703728,
762
+ "grad_norm": 0.1491956114768982,
763
+ "learning_rate": 4.1402765388046387e-05,
764
+ "loss": 0.0347,
765
+ "step": 5400
766
+ },
767
+ {
768
+ "epoch": 1.7362217266645428,
769
+ "grad_norm": 0.1951647698879242,
770
+ "learning_rate": 4.132311711482095e-05,
771
+ "loss": 0.0294,
772
+ "step": 5450
773
+ },
774
+ {
775
+ "epoch": 1.752150366358713,
776
+ "grad_norm": 0.20774711668491364,
777
+ "learning_rate": 4.1243468841595516e-05,
778
+ "loss": 0.0336,
779
+ "step": 5500
780
+ },
781
+ {
782
+ "epoch": 1.768079006052883,
783
+ "grad_norm": 0.0813850536942482,
784
+ "learning_rate": 4.116382056837008e-05,
785
+ "loss": 0.0359,
786
+ "step": 5550
787
+ },
788
+ {
789
+ "epoch": 1.784007645747053,
790
+ "grad_norm": 0.15596628189086914,
791
+ "learning_rate": 4.108417229514464e-05,
792
+ "loss": 0.0332,
793
+ "step": 5600
794
+ },
795
+ {
796
+ "epoch": 1.7999362854412233,
797
+ "grad_norm": 0.14951321482658386,
798
+ "learning_rate": 4.10045240219192e-05,
799
+ "loss": 0.0337,
800
+ "step": 5650
801
+ },
802
+ {
803
+ "epoch": 1.8158649251353935,
804
+ "grad_norm": 0.13903406262397766,
805
+ "learning_rate": 4.092487574869377e-05,
806
+ "loss": 0.035,
807
+ "step": 5700
808
+ },
809
+ {
810
+ "epoch": 1.8317935648295636,
811
+ "grad_norm": 0.14913810789585114,
812
+ "learning_rate": 4.084522747546833e-05,
813
+ "loss": 0.0336,
814
+ "step": 5750
815
+ },
816
+ {
817
+ "epoch": 1.8477222045237336,
818
+ "grad_norm": 0.16890230774879456,
819
+ "learning_rate": 4.0765579202242896e-05,
820
+ "loss": 0.0372,
821
+ "step": 5800
822
+ },
823
+ {
824
+ "epoch": 1.8636508442179038,
825
+ "grad_norm": 0.12355700880289078,
826
+ "learning_rate": 4.068593092901746e-05,
827
+ "loss": 0.0347,
828
+ "step": 5850
829
+ },
830
+ {
831
+ "epoch": 1.8795794839120739,
832
+ "grad_norm": 0.12095997482538223,
833
+ "learning_rate": 4.060628265579202e-05,
834
+ "loss": 0.0391,
835
+ "step": 5900
836
+ },
837
+ {
838
+ "epoch": 1.8955081236062439,
839
+ "grad_norm": 0.12925802171230316,
840
+ "learning_rate": 4.052663438256658e-05,
841
+ "loss": 0.036,
842
+ "step": 5950
843
+ },
844
+ {
845
+ "epoch": 1.9114367633004141,
846
+ "grad_norm": 0.11391396820545197,
847
+ "learning_rate": 4.044698610934115e-05,
848
+ "loss": 0.0325,
849
+ "step": 6000
850
+ },
851
+ {
852
+ "epoch": 1.9273654029945844,
853
+ "grad_norm": 0.129618838429451,
854
+ "learning_rate": 4.036733783611571e-05,
855
+ "loss": 0.0327,
856
+ "step": 6050
857
+ },
858
+ {
859
+ "epoch": 1.9432940426887544,
860
+ "grad_norm": 0.13725541532039642,
861
+ "learning_rate": 4.028768956289028e-05,
862
+ "loss": 0.0336,
863
+ "step": 6100
864
+ },
865
+ {
866
+ "epoch": 1.9592226823829244,
867
+ "grad_norm": 0.14412935078144073,
868
+ "learning_rate": 4.020804128966484e-05,
869
+ "loss": 0.0325,
870
+ "step": 6150
871
+ },
872
+ {
873
+ "epoch": 1.9751513220770947,
874
+ "grad_norm": 0.10575806349515915,
875
+ "learning_rate": 4.01283930164394e-05,
876
+ "loss": 0.0315,
877
+ "step": 6200
878
+ },
879
+ {
880
+ "epoch": 1.991079961771265,
881
+ "grad_norm": 0.11367379128932953,
882
+ "learning_rate": 4.0048744743213964e-05,
883
+ "loss": 0.0324,
884
+ "step": 6250
885
+ },
886
+ {
887
+ "epoch": 2.0070086014654347,
888
+ "grad_norm": 0.12420395016670227,
889
+ "learning_rate": 3.996909646998853e-05,
890
+ "loss": 0.0305,
891
+ "step": 6300
892
+ },
893
+ {
894
+ "epoch": 2.022937241159605,
895
+ "grad_norm": 0.2189149558544159,
896
+ "learning_rate": 3.988944819676309e-05,
897
+ "loss": 0.0343,
898
+ "step": 6350
899
+ },
900
+ {
901
+ "epoch": 2.038865880853775,
902
+ "grad_norm": 0.08044280856847763,
903
+ "learning_rate": 3.980979992353766e-05,
904
+ "loss": 0.0314,
905
+ "step": 6400
906
+ },
907
+ {
908
+ "epoch": 2.0547945205479454,
909
+ "grad_norm": 0.09585762768983841,
910
+ "learning_rate": 3.973015165031222e-05,
911
+ "loss": 0.0343,
912
+ "step": 6450
913
+ },
914
+ {
915
+ "epoch": 2.0707231602421152,
916
+ "grad_norm": 0.1755801886320114,
917
+ "learning_rate": 3.965050337708679e-05,
918
+ "loss": 0.0333,
919
+ "step": 6500
920
+ },
921
+ {
922
+ "epoch": 2.0866517999362855,
923
+ "grad_norm": 0.14164239168167114,
924
+ "learning_rate": 3.957085510386135e-05,
925
+ "loss": 0.0331,
926
+ "step": 6550
927
+ },
928
+ {
929
+ "epoch": 2.1025804396304557,
930
+ "grad_norm": 0.12496601790189743,
931
+ "learning_rate": 3.9491206830635916e-05,
932
+ "loss": 0.035,
933
+ "step": 6600
934
+ },
935
+ {
936
+ "epoch": 2.1185090793246255,
937
+ "grad_norm": 0.11803654581308365,
938
+ "learning_rate": 3.941155855741048e-05,
939
+ "loss": 0.0312,
940
+ "step": 6650
941
+ },
942
+ {
943
+ "epoch": 2.1344377190187958,
944
+ "grad_norm": 0.09984956681728363,
945
+ "learning_rate": 3.933191028418504e-05,
946
+ "loss": 0.0322,
947
+ "step": 6700
948
+ },
949
+ {
950
+ "epoch": 2.150366358712966,
951
+ "grad_norm": 0.11662815511226654,
952
+ "learning_rate": 3.92522620109596e-05,
953
+ "loss": 0.0332,
954
+ "step": 6750
955
+ },
956
+ {
957
+ "epoch": 2.1662949984071362,
958
+ "grad_norm": 0.12902189791202545,
959
+ "learning_rate": 3.917261373773417e-05,
960
+ "loss": 0.0343,
961
+ "step": 6800
962
+ },
963
+ {
964
+ "epoch": 2.182223638101306,
965
+ "grad_norm": 0.1841822862625122,
966
+ "learning_rate": 3.909296546450873e-05,
967
+ "loss": 0.0339,
968
+ "step": 6850
969
+ },
970
+ {
971
+ "epoch": 2.1981522777954763,
972
+ "grad_norm": 0.09873718023300171,
973
+ "learning_rate": 3.90133171912833e-05,
974
+ "loss": 0.0303,
975
+ "step": 6900
976
+ },
977
+ {
978
+ "epoch": 2.2140809174896465,
979
+ "grad_norm": 0.1674479842185974,
980
+ "learning_rate": 3.893366891805786e-05,
981
+ "loss": 0.032,
982
+ "step": 6950
983
+ },
984
+ {
985
+ "epoch": 2.2300095571838163,
986
+ "grad_norm": 0.13210225105285645,
987
+ "learning_rate": 3.8854020644832426e-05,
988
+ "loss": 0.0352,
989
+ "step": 7000
990
+ },
991
+ {
992
+ "epoch": 2.2459381968779866,
993
+ "grad_norm": 0.20769694447517395,
994
+ "learning_rate": 3.877437237160699e-05,
995
+ "loss": 0.0294,
996
+ "step": 7050
997
+ },
998
+ {
999
+ "epoch": 2.261866836572157,
1000
+ "grad_norm": 0.13857823610305786,
1001
+ "learning_rate": 3.869472409838155e-05,
1002
+ "loss": 0.0343,
1003
+ "step": 7100
1004
+ },
1005
+ {
1006
+ "epoch": 2.277795476266327,
1007
+ "grad_norm": 0.14322370290756226,
1008
+ "learning_rate": 3.861507582515611e-05,
1009
+ "loss": 0.0296,
1010
+ "step": 7150
1011
+ },
1012
+ {
1013
+ "epoch": 2.293724115960497,
1014
+ "grad_norm": 0.11245788633823395,
1015
+ "learning_rate": 3.853542755193068e-05,
1016
+ "loss": 0.0323,
1017
+ "step": 7200
1018
+ },
1019
+ {
1020
+ "epoch": 2.309652755654667,
1021
+ "grad_norm": 0.12236214429140091,
1022
+ "learning_rate": 3.845577927870524e-05,
1023
+ "loss": 0.0311,
1024
+ "step": 7250
1025
+ },
1026
+ {
1027
+ "epoch": 2.3255813953488373,
1028
+ "grad_norm": 0.08712544292211533,
1029
+ "learning_rate": 3.837613100547981e-05,
1030
+ "loss": 0.0341,
1031
+ "step": 7300
1032
+ },
1033
+ {
1034
+ "epoch": 2.341510035043007,
1035
+ "grad_norm": 0.11802078783512115,
1036
+ "learning_rate": 3.829648273225437e-05,
1037
+ "loss": 0.0317,
1038
+ "step": 7350
1039
+ },
1040
+ {
1041
+ "epoch": 2.3574386747371774,
1042
+ "grad_norm": 0.1141052171587944,
1043
+ "learning_rate": 3.821683445902893e-05,
1044
+ "loss": 0.031,
1045
+ "step": 7400
1046
+ },
1047
+ {
1048
+ "epoch": 2.3733673144313476,
1049
+ "grad_norm": 0.15947668254375458,
1050
+ "learning_rate": 3.8137186185803494e-05,
1051
+ "loss": 0.0313,
1052
+ "step": 7450
1053
+ },
1054
+ {
1055
+ "epoch": 2.389295954125518,
1056
+ "grad_norm": 0.11814655363559723,
1057
+ "learning_rate": 3.805753791257806e-05,
1058
+ "loss": 0.0334,
1059
+ "step": 7500
1060
+ },
1061
+ {
1062
+ "epoch": 2.4052245938196877,
1063
+ "grad_norm": 0.18867388367652893,
1064
+ "learning_rate": 3.797788963935262e-05,
1065
+ "loss": 0.0316,
1066
+ "step": 7550
1067
+ },
1068
+ {
1069
+ "epoch": 2.421153233513858,
1070
+ "grad_norm": 0.13499616086483002,
1071
+ "learning_rate": 3.789824136612719e-05,
1072
+ "loss": 0.0304,
1073
+ "step": 7600
1074
+ },
1075
+ {
1076
+ "epoch": 2.437081873208028,
1077
+ "grad_norm": 0.15890513360500336,
1078
+ "learning_rate": 3.781859309290175e-05,
1079
+ "loss": 0.0309,
1080
+ "step": 7650
1081
+ },
1082
+ {
1083
+ "epoch": 2.453010512902198,
1084
+ "grad_norm": 0.10094068199396133,
1085
+ "learning_rate": 3.773894481967631e-05,
1086
+ "loss": 0.0311,
1087
+ "step": 7700
1088
+ },
1089
+ {
1090
+ "epoch": 2.468939152596368,
1091
+ "grad_norm": 0.19545088708400726,
1092
+ "learning_rate": 3.7659296546450874e-05,
1093
+ "loss": 0.033,
1094
+ "step": 7750
1095
+ },
1096
+ {
1097
+ "epoch": 2.4848677922905384,
1098
+ "grad_norm": 0.12802977859973907,
1099
+ "learning_rate": 3.757964827322544e-05,
1100
+ "loss": 0.0332,
1101
+ "step": 7800
1102
+ },
1103
+ {
1104
+ "epoch": 2.5007964319847087,
1105
+ "grad_norm": 0.08226735889911652,
1106
+ "learning_rate": 3.7500000000000003e-05,
1107
+ "loss": 0.0318,
1108
+ "step": 7850
1109
+ },
1110
+ {
1111
+ "epoch": 2.5167250716788785,
1112
+ "grad_norm": 0.11623780429363251,
1113
+ "learning_rate": 3.742035172677457e-05,
1114
+ "loss": 0.0336,
1115
+ "step": 7900
1116
+ },
1117
+ {
1118
+ "epoch": 2.5326537113730487,
1119
+ "grad_norm": 0.16703219711780548,
1120
+ "learning_rate": 3.734070345354913e-05,
1121
+ "loss": 0.0307,
1122
+ "step": 7950
1123
+ },
1124
+ {
1125
+ "epoch": 2.548582351067219,
1126
+ "grad_norm": 0.10822132229804993,
1127
+ "learning_rate": 3.72610551803237e-05,
1128
+ "loss": 0.033,
1129
+ "step": 8000
1130
+ }
1131
+ ],
1132
+ "logging_steps": 50,
1133
+ "max_steps": 31390,
1134
+ "num_input_tokens_seen": 0,
1135
+ "num_train_epochs": 10,
1136
+ "save_steps": 2000,
1137
+ "stateful_callbacks": {
1138
+ "TrainerControl": {
1139
+ "args": {
1140
+ "should_epoch_stop": false,
1141
+ "should_evaluate": false,
1142
+ "should_log": false,
1143
+ "should_save": true,
1144
+ "should_training_stop": false
1145
+ },
1146
+ "attributes": {}
1147
+ }
1148
+ },
1149
+ "total_flos": 4.922138329882821e+18,
1150
+ "train_batch_size": 4,
1151
+ "trial_name": null,
1152
+ "trial_params": null
1153
+ }
weight_dir/loraWeight/fixbycrflp/checkpoint-8000/training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:adde3152d6d22eecd1475fa998c1854302f8f17da34801350df8aca4fd5ec59b
3
+ size 7736
weight_dir/loraWeight/fixbycrflp/checkpoint-8000/zero_to_fp32.py ADDED
@@ -0,0 +1,604 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/usr/bin/env python
2
+
3
+ # Copyright (c) Microsoft Corporation.
4
+ # SPDX-License-Identifier: Apache-2.0
5
+
6
+ # DeepSpeed Team
7
+
8
+ # This script extracts fp32 consolidated weights from a zero 1, 2 and 3 DeepSpeed checkpoints. It gets
9
+ # copied into the top level checkpoint dir, so the user can easily do the conversion at any point in
10
+ # the future. Once extracted, the weights don't require DeepSpeed and can be used in any
11
+ # application.
12
+ #
13
+ # example: python zero_to_fp32.py . pytorch_model.bin
14
+
15
+ import argparse
16
+ import torch
17
+ import glob
18
+ import math
19
+ import os
20
+ import re
21
+ from collections import OrderedDict
22
+ from dataclasses import dataclass
23
+
24
+ # while this script doesn't use deepspeed to recover data, since the checkpoints are pickled with
25
+ # DeepSpeed data structures it has to be available in the current python environment.
26
+ from deepspeed.utils import logger
27
+ from deepspeed.checkpoint.constants import (DS_VERSION, OPTIMIZER_STATE_DICT, SINGLE_PARTITION_OF_FP32_GROUPS,
28
+ FP32_FLAT_GROUPS, ZERO_STAGE, PARTITION_COUNT, PARAM_SHAPES, BUFFER_NAMES,
29
+ FROZEN_PARAM_SHAPES, FROZEN_PARAM_FRAGMENTS)
30
+
31
+
32
+ @dataclass
33
+ class zero_model_state:
34
+ buffers: dict()
35
+ param_shapes: dict()
36
+ shared_params: list
37
+ ds_version: int
38
+ frozen_param_shapes: dict()
39
+ frozen_param_fragments: dict()
40
+
41
+
42
+ debug = 0
43
+
44
+ # load to cpu
45
+ device = torch.device('cpu')
46
+
47
+
48
+ def atoi(text):
49
+ return int(text) if text.isdigit() else text
50
+
51
+
52
+ def natural_keys(text):
53
+ '''
54
+ alist.sort(key=natural_keys) sorts in human order
55
+ http://nedbatchelder.com/blog/200712/human_sorting.html
56
+ (See Toothy's implementation in the comments)
57
+ '''
58
+ return [atoi(c) for c in re.split(r'(\d+)', text)]
59
+
60
+
61
+ def get_model_state_file(checkpoint_dir, zero_stage):
62
+ if not os.path.isdir(checkpoint_dir):
63
+ raise FileNotFoundError(f"Directory '{checkpoint_dir}' doesn't exist")
64
+
65
+ # there should be only one file
66
+ if zero_stage <= 2:
67
+ file = os.path.join(checkpoint_dir, "mp_rank_00_model_states.pt")
68
+ elif zero_stage == 3:
69
+ file = os.path.join(checkpoint_dir, "zero_pp_rank_0_mp_rank_00_model_states.pt")
70
+
71
+ if not os.path.exists(file):
72
+ raise FileNotFoundError(f"can't find model states file at '{file}'")
73
+
74
+ return file
75
+
76
+
77
+ def get_checkpoint_files(checkpoint_dir, glob_pattern):
78
+ # XXX: need to test that this simple glob rule works for multi-node setup too
79
+ ckpt_files = sorted(glob.glob(os.path.join(checkpoint_dir, glob_pattern)), key=natural_keys)
80
+
81
+ if len(ckpt_files) == 0:
82
+ raise FileNotFoundError(f"can't find {glob_pattern} files in directory '{checkpoint_dir}'")
83
+
84
+ return ckpt_files
85
+
86
+
87
+ def get_optim_files(checkpoint_dir):
88
+ return get_checkpoint_files(checkpoint_dir, "*_optim_states.pt")
89
+
90
+
91
+ def get_model_state_files(checkpoint_dir):
92
+ return get_checkpoint_files(checkpoint_dir, "*_model_states.pt")
93
+
94
+
95
+ def parse_model_states(files):
96
+ zero_model_states = []
97
+ for file in files:
98
+ state_dict = torch.load(file, map_location=device)
99
+
100
+ if BUFFER_NAMES not in state_dict:
101
+ raise ValueError(f"{file} is not a model state checkpoint")
102
+ buffer_names = state_dict[BUFFER_NAMES]
103
+ if debug:
104
+ print("Found buffers:", buffer_names)
105
+
106
+ # recover just the buffers while restoring them to fp32 if they were saved in fp16
107
+ buffers = {k: v.float() for k, v in state_dict["module"].items() if k in buffer_names}
108
+ param_shapes = state_dict[PARAM_SHAPES]
109
+
110
+ # collect parameters that are included in param_shapes
111
+ param_names = []
112
+ for s in param_shapes:
113
+ for name in s.keys():
114
+ param_names.append(name)
115
+
116
+ # update with frozen parameters
117
+ frozen_param_shapes = state_dict.get(FROZEN_PARAM_SHAPES, None)
118
+ if frozen_param_shapes is not None:
119
+ if debug:
120
+ print(f"Found frozen_param_shapes: {frozen_param_shapes}")
121
+ param_names += list(frozen_param_shapes.keys())
122
+
123
+ # handle shared params
124
+ shared_params = [[k, v] for k, v in state_dict["shared_params"].items()]
125
+
126
+ ds_version = state_dict.get(DS_VERSION, None)
127
+
128
+ frozen_param_fragments = state_dict.get(FROZEN_PARAM_FRAGMENTS, None)
129
+
130
+ z_model_state = zero_model_state(buffers=buffers,
131
+ param_shapes=param_shapes,
132
+ shared_params=shared_params,
133
+ ds_version=ds_version,
134
+ frozen_param_shapes=frozen_param_shapes,
135
+ frozen_param_fragments=frozen_param_fragments)
136
+ zero_model_states.append(z_model_state)
137
+
138
+ return zero_model_states
139
+
140
+
141
+ def parse_optim_states(files, ds_checkpoint_dir):
142
+
143
+ total_files = len(files)
144
+ state_dicts = []
145
+ for f in files:
146
+ state_dict = torch.load(f, map_location=device)
147
+ # immediately discard the potentially huge 2 optimizer states as we only care for fp32 master weights
148
+ # and also handle the case where it was already removed by another helper script
149
+ state_dict["optimizer_state_dict"].pop("optimizer_state_dict", None)
150
+ state_dicts.append(state_dict)
151
+
152
+ if not ZERO_STAGE in state_dicts[0][OPTIMIZER_STATE_DICT]:
153
+ raise ValueError(f"{files[0]} is not a zero checkpoint")
154
+ zero_stage = state_dicts[0][OPTIMIZER_STATE_DICT][ZERO_STAGE]
155
+ world_size = state_dicts[0][OPTIMIZER_STATE_DICT][PARTITION_COUNT]
156
+
157
+ # For ZeRO-2 each param group can have different partition_count as data parallelism for expert
158
+ # parameters can be different from data parallelism for non-expert parameters. So we can just
159
+ # use the max of the partition_count to get the dp world_size.
160
+
161
+ if type(world_size) is list:
162
+ world_size = max(world_size)
163
+
164
+ if world_size != total_files:
165
+ raise ValueError(
166
+ f"Expected {world_size} of '*_optim_states.pt' under '{ds_checkpoint_dir}' but found {total_files} files. "
167
+ "Possibly due to an overwrite of an old checkpoint, or a checkpoint didn't get saved by one or more processes."
168
+ )
169
+
170
+ # the groups are named differently in each stage
171
+ if zero_stage <= 2:
172
+ fp32_groups_key = SINGLE_PARTITION_OF_FP32_GROUPS
173
+ elif zero_stage == 3:
174
+ fp32_groups_key = FP32_FLAT_GROUPS
175
+ else:
176
+ raise ValueError(f"unknown zero stage {zero_stage}")
177
+
178
+ if zero_stage <= 2:
179
+ fp32_flat_groups = [state_dicts[i][OPTIMIZER_STATE_DICT][fp32_groups_key] for i in range(len(state_dicts))]
180
+ elif zero_stage == 3:
181
+ # if there is more than one param group, there will be multiple flattened tensors - one
182
+ # flattened tensor per group - for simplicity merge them into a single tensor
183
+ #
184
+ # XXX: could make the script more memory efficient for when there are multiple groups - it
185
+ # will require matching the sub-lists of param_shapes for each param group flattened tensor
186
+
187
+ fp32_flat_groups = [
188
+ torch.cat(state_dicts[i][OPTIMIZER_STATE_DICT][fp32_groups_key], 0) for i in range(len(state_dicts))
189
+ ]
190
+
191
+ return zero_stage, world_size, fp32_flat_groups
192
+
193
+
194
+ def _get_fp32_state_dict_from_zero_checkpoint(ds_checkpoint_dir, exclude_frozen_parameters):
195
+ """
196
+ Returns fp32 state_dict reconstructed from ds checkpoint
197
+
198
+ Args:
199
+ - ``ds_checkpoint_dir``: path to the deepspeed checkpoint folder (where the optimizer files are)
200
+
201
+ """
202
+ print(f"Processing zero checkpoint '{ds_checkpoint_dir}'")
203
+
204
+ optim_files = get_optim_files(ds_checkpoint_dir)
205
+ zero_stage, world_size, fp32_flat_groups = parse_optim_states(optim_files, ds_checkpoint_dir)
206
+ print(f"Detected checkpoint of type zero stage {zero_stage}, world_size: {world_size}")
207
+
208
+ model_files = get_model_state_files(ds_checkpoint_dir)
209
+
210
+ zero_model_states = parse_model_states(model_files)
211
+ print(f'Parsing checkpoint created by deepspeed=={zero_model_states[0].ds_version}')
212
+
213
+ if zero_stage <= 2:
214
+ return _get_fp32_state_dict_from_zero2_checkpoint(world_size, fp32_flat_groups, zero_model_states,
215
+ exclude_frozen_parameters)
216
+ elif zero_stage == 3:
217
+ return _get_fp32_state_dict_from_zero3_checkpoint(world_size, fp32_flat_groups, zero_model_states,
218
+ exclude_frozen_parameters)
219
+
220
+
221
+ def _zero2_merge_frozen_params(state_dict, zero_model_states):
222
+ if zero_model_states[0].frozen_param_shapes is None or len(zero_model_states[0].frozen_param_shapes) == 0:
223
+ return
224
+
225
+ frozen_param_shapes = zero_model_states[0].frozen_param_shapes
226
+ frozen_param_fragments = zero_model_states[0].frozen_param_fragments
227
+
228
+ if debug:
229
+ num_elem = sum(s.numel() for s in frozen_param_shapes.values())
230
+ print(f'rank 0: {FROZEN_PARAM_SHAPES}.numel = {num_elem}')
231
+
232
+ wanted_params = len(frozen_param_shapes)
233
+ wanted_numel = sum(s.numel() for s in frozen_param_shapes.values())
234
+ avail_numel = sum([p.numel() for p in frozen_param_fragments.values()])
235
+ print(f'Frozen params: Have {avail_numel} numels to process.')
236
+ print(f'Frozen params: Need {wanted_numel} numels in {wanted_params} params')
237
+
238
+ total_params = 0
239
+ total_numel = 0
240
+ for name, shape in frozen_param_shapes.items():
241
+ total_params += 1
242
+ unpartitioned_numel = shape.numel()
243
+ total_numel += unpartitioned_numel
244
+
245
+ state_dict[name] = frozen_param_fragments[name]
246
+
247
+ if debug:
248
+ print(f"{name} full shape: {shape} unpartitioned numel {unpartitioned_numel} ")
249
+
250
+ print(f"Reconstructed Frozen fp32 state dict with {total_params} params {total_numel} elements")
251
+
252
+
253
+ def _has_callable(obj, fn):
254
+ attr = getattr(obj, fn, None)
255
+ return callable(attr)
256
+
257
+
258
+ def _zero2_merge_trainable_params(state_dict, world_size, fp32_flat_groups, zero_model_states):
259
+ param_shapes = zero_model_states[0].param_shapes
260
+
261
+ # Reconstruction protocol:
262
+ #
263
+ # XXX: document this
264
+
265
+ if debug:
266
+ for i in range(world_size):
267
+ for j in range(len(fp32_flat_groups[0])):
268
+ print(f"{FP32_FLAT_GROUPS}[{i}][{j}].shape={fp32_flat_groups[i][j].shape}")
269
+
270
+ # XXX: memory usage doubles here (zero2)
271
+ num_param_groups = len(fp32_flat_groups[0])
272
+ merged_single_partition_of_fp32_groups = []
273
+ for i in range(num_param_groups):
274
+ merged_partitions = [sd[i] for sd in fp32_flat_groups]
275
+ full_single_fp32_vector = torch.cat(merged_partitions, 0)
276
+ merged_single_partition_of_fp32_groups.append(full_single_fp32_vector)
277
+ avail_numel = sum(
278
+ [full_single_fp32_vector.numel() for full_single_fp32_vector in merged_single_partition_of_fp32_groups])
279
+
280
+ if debug:
281
+ wanted_params = sum([len(shapes) for shapes in param_shapes])
282
+ wanted_numel = sum([sum(shape.numel() for shape in shapes.values()) for shapes in param_shapes])
283
+ # not asserting if there is a mismatch due to possible padding
284
+ print(f"Have {avail_numel} numels to process.")
285
+ print(f"Need {wanted_numel} numels in {wanted_params} params.")
286
+
287
+ # params
288
+ # XXX: for huge models that can't fit into the host's RAM we will have to recode this to support
289
+ # out-of-core computing solution
290
+ total_numel = 0
291
+ total_params = 0
292
+ for shapes, full_single_fp32_vector in zip(param_shapes, merged_single_partition_of_fp32_groups):
293
+ offset = 0
294
+ avail_numel = full_single_fp32_vector.numel()
295
+ for name, shape in shapes.items():
296
+
297
+ unpartitioned_numel = shape.numel() if _has_callable(shape, 'numel') else math.prod(shape)
298
+ total_numel += unpartitioned_numel
299
+ total_params += 1
300
+
301
+ if debug:
302
+ print(f"{name} full shape: {shape} unpartitioned numel {unpartitioned_numel} ")
303
+ state_dict[name] = full_single_fp32_vector.narrow(0, offset, unpartitioned_numel).view(shape)
304
+ offset += unpartitioned_numel
305
+
306
+ # Z2 started to align to 2*world_size to improve nccl performance. Therefore both offset and
307
+ # avail_numel can differ by anywhere between 0..2*world_size. Due to two unrelated complex
308
+ # paddings performed in the code it's almost impossible to predict the exact numbers w/o the
309
+ # live optimizer object, so we are checking that the numbers are within the right range
310
+ align_to = 2 * world_size
311
+
312
+ def zero2_align(x):
313
+ return align_to * math.ceil(x / align_to)
314
+
315
+ if debug:
316
+ print(f"original offset={offset}, avail_numel={avail_numel}")
317
+
318
+ offset = zero2_align(offset)
319
+ avail_numel = zero2_align(avail_numel)
320
+
321
+ if debug:
322
+ print(f"aligned offset={offset}, avail_numel={avail_numel}")
323
+
324
+ # Sanity check
325
+ if offset != avail_numel:
326
+ raise ValueError(f"consumed {offset} numels out of {avail_numel} - something is wrong")
327
+
328
+ print(f"Reconstructed fp32 state dict with {total_params} params {total_numel} elements")
329
+
330
+
331
+ def _get_fp32_state_dict_from_zero2_checkpoint(world_size, fp32_flat_groups, zero_model_states,
332
+ exclude_frozen_parameters):
333
+ state_dict = OrderedDict()
334
+
335
+ # buffers
336
+ buffers = zero_model_states[0].buffers
337
+ state_dict.update(buffers)
338
+ if debug:
339
+ print(f"added {len(buffers)} buffers")
340
+
341
+ if not exclude_frozen_parameters:
342
+ _zero2_merge_frozen_params(state_dict, zero_model_states)
343
+
344
+ _zero2_merge_trainable_params(state_dict, world_size, fp32_flat_groups, zero_model_states)
345
+
346
+ # recover shared parameters
347
+ for pair in zero_model_states[0].shared_params:
348
+ if pair[1] in state_dict:
349
+ state_dict[pair[0]] = state_dict[pair[1]]
350
+
351
+ return state_dict
352
+
353
+
354
+ def zero3_partitioned_param_info(unpartitioned_numel, world_size):
355
+ remainder = unpartitioned_numel % world_size
356
+ padding_numel = (world_size - remainder) if remainder else 0
357
+ partitioned_numel = math.ceil(unpartitioned_numel / world_size)
358
+ return partitioned_numel, padding_numel
359
+
360
+
361
+ def _zero3_merge_frozen_params(state_dict, world_size, zero_model_states):
362
+ if zero_model_states[0].frozen_param_shapes is None or len(zero_model_states[0].frozen_param_shapes) == 0:
363
+ return
364
+
365
+ if debug:
366
+ for i in range(world_size):
367
+ num_elem = sum(s.numel() for s in zero_model_states[i].frozen_param_fragments.values())
368
+ print(f'rank {i}: {FROZEN_PARAM_SHAPES}.numel = {num_elem}')
369
+
370
+ frozen_param_shapes = zero_model_states[0].frozen_param_shapes
371
+ wanted_params = len(frozen_param_shapes)
372
+ wanted_numel = sum(s.numel() for s in frozen_param_shapes.values())
373
+ avail_numel = sum([p.numel() for p in zero_model_states[0].frozen_param_fragments.values()]) * world_size
374
+ print(f'Frozen params: Have {avail_numel} numels to process.')
375
+ print(f'Frozen params: Need {wanted_numel} numels in {wanted_params} params')
376
+
377
+ total_params = 0
378
+ total_numel = 0
379
+ for name, shape in zero_model_states[0].frozen_param_shapes.items():
380
+ total_params += 1
381
+ unpartitioned_numel = shape.numel()
382
+ total_numel += unpartitioned_numel
383
+
384
+ param_frags = tuple(model_state.frozen_param_fragments[name] for model_state in zero_model_states)
385
+ state_dict[name] = torch.cat(param_frags, 0).narrow(0, 0, unpartitioned_numel).view(shape)
386
+
387
+ partitioned_numel, partitioned_padding_numel = zero3_partitioned_param_info(unpartitioned_numel, world_size)
388
+
389
+ if debug:
390
+ print(
391
+ f"Frozen params: {total_params} {name} full shape: {shape} partition0 numel={partitioned_numel} partitioned_padding_numel={partitioned_padding_numel}"
392
+ )
393
+
394
+ print(f"Reconstructed Frozen fp32 state dict with {total_params} params {total_numel} elements")
395
+
396
+
397
+ def _zero3_merge_trainable_params(state_dict, world_size, fp32_flat_groups, zero_model_states):
398
+ param_shapes = zero_model_states[0].param_shapes
399
+ avail_numel = fp32_flat_groups[0].numel() * world_size
400
+ # Reconstruction protocol: For zero3 we need to zip the partitions together at boundary of each
401
+ # param, re-consolidating each param, while dealing with padding if any
402
+
403
+ # merge list of dicts, preserving order
404
+ param_shapes = {k: v for d in param_shapes for k, v in d.items()}
405
+
406
+ if debug:
407
+ for i in range(world_size):
408
+ print(f"{FP32_FLAT_GROUPS}[{i}].shape={fp32_flat_groups[i].shape}")
409
+
410
+ wanted_params = len(param_shapes)
411
+ wanted_numel = sum(shape.numel() for shape in param_shapes.values())
412
+ # not asserting if there is a mismatch due to possible padding
413
+ avail_numel = fp32_flat_groups[0].numel() * world_size
414
+ print(f"Trainable params: Have {avail_numel} numels to process.")
415
+ print(f"Trainable params: Need {wanted_numel} numels in {wanted_params} params.")
416
+
417
+ # params
418
+ # XXX: for huge models that can't fit into the host's RAM we will have to recode this to support
419
+ # out-of-core computing solution
420
+ offset = 0
421
+ total_numel = 0
422
+ total_params = 0
423
+ for name, shape in param_shapes.items():
424
+
425
+ unpartitioned_numel = shape.numel()
426
+ total_numel += unpartitioned_numel
427
+ total_params += 1
428
+
429
+ partitioned_numel, partitioned_padding_numel = zero3_partitioned_param_info(unpartitioned_numel, world_size)
430
+
431
+ if debug:
432
+ print(
433
+ f"Trainable params: {total_params} {name} full shape: {shape} partition0 numel={partitioned_numel} partitioned_padding_numel={partitioned_padding_numel}"
434
+ )
435
+
436
+ # XXX: memory usage doubles here
437
+ state_dict[name] = torch.cat(
438
+ tuple(fp32_flat_groups[i].narrow(0, offset, partitioned_numel) for i in range(world_size)),
439
+ 0).narrow(0, 0, unpartitioned_numel).view(shape)
440
+ offset += partitioned_numel
441
+
442
+ offset *= world_size
443
+
444
+ # Sanity check
445
+ if offset != avail_numel:
446
+ raise ValueError(f"consumed {offset} numels out of {avail_numel} - something is wrong")
447
+
448
+ print(f"Reconstructed Trainable fp32 state dict with {total_params} params {total_numel} elements")
449
+
450
+
451
+ def _get_fp32_state_dict_from_zero3_checkpoint(world_size, fp32_flat_groups, zero_model_states,
452
+ exclude_frozen_parameters):
453
+ state_dict = OrderedDict()
454
+
455
+ # buffers
456
+ buffers = zero_model_states[0].buffers
457
+ state_dict.update(buffers)
458
+ if debug:
459
+ print(f"added {len(buffers)} buffers")
460
+
461
+ if not exclude_frozen_parameters:
462
+ _zero3_merge_frozen_params(state_dict, world_size, zero_model_states)
463
+
464
+ _zero3_merge_trainable_params(state_dict, world_size, fp32_flat_groups, zero_model_states)
465
+
466
+ # recover shared parameters
467
+ for pair in zero_model_states[0].shared_params:
468
+ if pair[1] in state_dict:
469
+ state_dict[pair[0]] = state_dict[pair[1]]
470
+
471
+ return state_dict
472
+
473
+
474
+ def get_fp32_state_dict_from_zero_checkpoint(checkpoint_dir, tag=None, exclude_frozen_parameters=False):
475
+ """
476
+ Convert ZeRO 2 or 3 checkpoint into a single fp32 consolidated state_dict that can be loaded with
477
+ ``load_state_dict()`` and used for training without DeepSpeed or shared with others, for example
478
+ via a model hub.
479
+
480
+ Args:
481
+ - ``checkpoint_dir``: path to the desired checkpoint folder
482
+ - ``tag``: checkpoint tag used as a unique identifier for checkpoint. If not provided will attempt to load tag in 'latest' file. e.g., ``global_step14``
483
+ - ``exclude_frozen_parameters``: exclude frozen parameters
484
+
485
+ Returns:
486
+ - pytorch ``state_dict``
487
+
488
+ Note: this approach may not work if your application doesn't have sufficient free CPU memory and
489
+ you may need to use the offline approach using the ``zero_to_fp32.py`` script that is saved with
490
+ the checkpoint.
491
+
492
+ A typical usage might be ::
493
+
494
+ from deepspeed.utils.zero_to_fp32 import get_fp32_state_dict_from_zero_checkpoint
495
+ # do the training and checkpoint saving
496
+ state_dict = get_fp32_state_dict_from_zero_checkpoint(checkpoint_dir) # already on cpu
497
+ model = model.cpu() # move to cpu
498
+ model.load_state_dict(state_dict)
499
+ # submit to model hub or save the model to share with others
500
+
501
+ In this example the ``model`` will no longer be usable in the deepspeed context of the same
502
+ application. i.e. you will need to re-initialize the deepspeed engine, since
503
+ ``model.load_state_dict(state_dict)`` will remove all the deepspeed magic from it.
504
+
505
+ If you want it all done for you, use ``load_state_dict_from_zero_checkpoint`` instead.
506
+
507
+ """
508
+ if tag is None:
509
+ latest_path = os.path.join(checkpoint_dir, 'latest')
510
+ if os.path.isfile(latest_path):
511
+ with open(latest_path, 'r') as fd:
512
+ tag = fd.read().strip()
513
+ else:
514
+ raise ValueError(f"Unable to find 'latest' file at {latest_path}")
515
+
516
+ ds_checkpoint_dir = os.path.join(checkpoint_dir, tag)
517
+
518
+ if not os.path.isdir(ds_checkpoint_dir):
519
+ raise FileNotFoundError(f"Directory '{ds_checkpoint_dir}' doesn't exist")
520
+
521
+ return _get_fp32_state_dict_from_zero_checkpoint(ds_checkpoint_dir, exclude_frozen_parameters)
522
+
523
+
524
+ def convert_zero_checkpoint_to_fp32_state_dict(checkpoint_dir, output_file, tag=None, exclude_frozen_parameters=False):
525
+ """
526
+ Convert ZeRO 2 or 3 checkpoint into a single fp32 consolidated ``state_dict`` file that can be
527
+ loaded with ``torch.load(file)`` + ``load_state_dict()`` and used for training without DeepSpeed.
528
+
529
+ Args:
530
+ - ``checkpoint_dir``: path to the desired checkpoint folder. (one that contains the tag-folder, like ``global_step14``)
531
+ - ``output_file``: path to the pytorch fp32 state_dict output file (e.g. path/pytorch_model.bin)
532
+ - ``tag``: checkpoint tag used as a unique identifier for checkpoint. If not provided will attempt to load tag in the file named ``latest`` in the checkpoint folder, e.g., ``global_step14``
533
+ - ``exclude_frozen_parameters``: exclude frozen parameters
534
+ """
535
+
536
+ state_dict = get_fp32_state_dict_from_zero_checkpoint(checkpoint_dir, tag, exclude_frozen_parameters)
537
+ print(f"Saving fp32 state dict to {output_file}")
538
+ torch.save(state_dict, output_file)
539
+
540
+
541
+ def load_state_dict_from_zero_checkpoint(model, checkpoint_dir, tag=None):
542
+ """
543
+ 1. Put the provided model to cpu
544
+ 2. Convert ZeRO 2 or 3 checkpoint into a single fp32 consolidated ``state_dict``
545
+ 3. Load it into the provided model
546
+
547
+ Args:
548
+ - ``model``: the model object to update
549
+ - ``checkpoint_dir``: path to the desired checkpoint folder. (one that contains the tag-folder, like ``global_step14``)
550
+ - ``tag``: checkpoint tag used as a unique identifier for checkpoint. If not provided will attempt to load tag in the file named ``latest`` in the checkpoint folder, e.g., ``global_step14``
551
+
552
+ Returns:
553
+ - ``model`: modified model
554
+
555
+ Make sure you have plenty of CPU memory available before you call this function. If you don't
556
+ have enough use the ``zero_to_fp32.py`` utility to do the conversion. You will find it
557
+ conveniently placed for you in the checkpoint folder.
558
+
559
+ A typical usage might be ::
560
+
561
+ from deepspeed.utils.zero_to_fp32 import load_state_dict_from_zero_checkpoint
562
+ model = load_state_dict_from_zero_checkpoint(trainer.model, checkpoint_dir)
563
+ # submit to model hub or save the model to share with others
564
+
565
+ Note, that once this was run, the ``model`` will no longer be usable in the deepspeed context
566
+ of the same application. i.e. you will need to re-initialize the deepspeed engine, since
567
+ ``model.load_state_dict(state_dict)`` will remove all the deepspeed magic from it.
568
+
569
+ """
570
+ logger.info(f"Extracting fp32 weights")
571
+ state_dict = get_fp32_state_dict_from_zero_checkpoint(checkpoint_dir, tag)
572
+
573
+ logger.info(f"Overwriting model with fp32 weights")
574
+ model = model.cpu()
575
+ model.load_state_dict(state_dict, strict=False)
576
+
577
+ return model
578
+
579
+
580
+ if __name__ == "__main__":
581
+
582
+ parser = argparse.ArgumentParser()
583
+ parser.add_argument("checkpoint_dir",
584
+ type=str,
585
+ help="path to the desired checkpoint folder, e.g., path/checkpoint-12")
586
+ parser.add_argument(
587
+ "output_file",
588
+ type=str,
589
+ help="path to the pytorch fp32 state_dict output file (e.g. path/checkpoint-12/pytorch_model.bin)")
590
+ parser.add_argument("-t",
591
+ "--tag",
592
+ type=str,
593
+ default=None,
594
+ help="checkpoint tag used as a unique identifier for checkpoint. e.g., global_step1")
595
+ parser.add_argument("--exclude_frozen_parameters", action='store_true', help="exclude frozen parameters")
596
+ parser.add_argument("-d", "--debug", action='store_true', help="enable debug")
597
+ args = parser.parse_args()
598
+
599
+ debug = args.debug
600
+
601
+ convert_zero_checkpoint_to_fp32_state_dict(args.checkpoint_dir,
602
+ args.output_file,
603
+ tag=args.tag,
604
+ exclude_frozen_parameters=args.exclude_frozen_parameters)
weight_dir/loraWeight/fixbycrflp2/checkpoint-12000/README.md ADDED
@@ -0,0 +1,202 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ library_name: peft
3
+ base_model: ./CodeLlama-7b-Instruct-hf
4
+ ---
5
+
6
+ # Model Card for Model ID
7
+
8
+ <!-- Provide a quick summary of what the model is/does. -->
9
+
10
+
11
+
12
+ ## Model Details
13
+
14
+ ### Model Description
15
+
16
+ <!-- Provide a longer summary of what this model is. -->
17
+
18
+
19
+
20
+ - **Developed by:** [More Information Needed]
21
+ - **Funded by [optional]:** [More Information Needed]
22
+ - **Shared by [optional]:** [More Information Needed]
23
+ - **Model type:** [More Information Needed]
24
+ - **Language(s) (NLP):** [More Information Needed]
25
+ - **License:** [More Information Needed]
26
+ - **Finetuned from model [optional]:** [More Information Needed]
27
+
28
+ ### Model Sources [optional]
29
+
30
+ <!-- Provide the basic links for the model. -->
31
+
32
+ - **Repository:** [More Information Needed]
33
+ - **Paper [optional]:** [More Information Needed]
34
+ - **Demo [optional]:** [More Information Needed]
35
+
36
+ ## Uses
37
+
38
+ <!-- Address questions around how the model is intended to be used, including the foreseeable users of the model and those affected by the model. -->
39
+
40
+ ### Direct Use
41
+
42
+ <!-- This section is for the model use without fine-tuning or plugging into a larger ecosystem/app. -->
43
+
44
+ [More Information Needed]
45
+
46
+ ### Downstream Use [optional]
47
+
48
+ <!-- This section is for the model use when fine-tuned for a task, or when plugged into a larger ecosystem/app -->
49
+
50
+ [More Information Needed]
51
+
52
+ ### Out-of-Scope Use
53
+
54
+ <!-- This section addresses misuse, malicious use, and uses that the model will not work well for. -->
55
+
56
+ [More Information Needed]
57
+
58
+ ## Bias, Risks, and Limitations
59
+
60
+ <!-- This section is meant to convey both technical and sociotechnical limitations. -->
61
+
62
+ [More Information Needed]
63
+
64
+ ### Recommendations
65
+
66
+ <!-- This section is meant to convey recommendations with respect to the bias, risk, and technical limitations. -->
67
+
68
+ Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations.
69
+
70
+ ## How to Get Started with the Model
71
+
72
+ Use the code below to get started with the model.
73
+
74
+ [More Information Needed]
75
+
76
+ ## Training Details
77
+
78
+ ### Training Data
79
+
80
+ <!-- This should link to a Dataset Card, perhaps with a short stub of information on what the training data is all about as well as documentation related to data pre-processing or additional filtering. -->
81
+
82
+ [More Information Needed]
83
+
84
+ ### Training Procedure
85
+
86
+ <!-- This relates heavily to the Technical Specifications. Content here should link to that section when it is relevant to the training procedure. -->
87
+
88
+ #### Preprocessing [optional]
89
+
90
+ [More Information Needed]
91
+
92
+
93
+ #### Training Hyperparameters
94
+
95
+ - **Training regime:** [More Information Needed] <!--fp32, fp16 mixed precision, bf16 mixed precision, bf16 non-mixed precision, fp16 non-mixed precision, fp8 mixed precision -->
96
+
97
+ #### Speeds, Sizes, Times [optional]
98
+
99
+ <!-- This section provides information about throughput, start/end time, checkpoint size if relevant, etc. -->
100
+
101
+ [More Information Needed]
102
+
103
+ ## Evaluation
104
+
105
+ <!-- This section describes the evaluation protocols and provides the results. -->
106
+
107
+ ### Testing Data, Factors & Metrics
108
+
109
+ #### Testing Data
110
+
111
+ <!-- This should link to a Dataset Card if possible. -->
112
+
113
+ [More Information Needed]
114
+
115
+ #### Factors
116
+
117
+ <!-- These are the things the evaluation is disaggregating by, e.g., subpopulations or domains. -->
118
+
119
+ [More Information Needed]
120
+
121
+ #### Metrics
122
+
123
+ <!-- These are the evaluation metrics being used, ideally with a description of why. -->
124
+
125
+ [More Information Needed]
126
+
127
+ ### Results
128
+
129
+ [More Information Needed]
130
+
131
+ #### Summary
132
+
133
+
134
+
135
+ ## Model Examination [optional]
136
+
137
+ <!-- Relevant interpretability work for the model goes here -->
138
+
139
+ [More Information Needed]
140
+
141
+ ## Environmental Impact
142
+
143
+ <!-- Total emissions (in grams of CO2eq) and additional considerations, such as electricity usage, go here. Edit the suggested text below accordingly -->
144
+
145
+ Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700).
146
+
147
+ - **Hardware Type:** [More Information Needed]
148
+ - **Hours used:** [More Information Needed]
149
+ - **Cloud Provider:** [More Information Needed]
150
+ - **Compute Region:** [More Information Needed]
151
+ - **Carbon Emitted:** [More Information Needed]
152
+
153
+ ## Technical Specifications [optional]
154
+
155
+ ### Model Architecture and Objective
156
+
157
+ [More Information Needed]
158
+
159
+ ### Compute Infrastructure
160
+
161
+ [More Information Needed]
162
+
163
+ #### Hardware
164
+
165
+ [More Information Needed]
166
+
167
+ #### Software
168
+
169
+ [More Information Needed]
170
+
171
+ ## Citation [optional]
172
+
173
+ <!-- If there is a paper or blog post introducing the model, the APA and Bibtex information for that should go in this section. -->
174
+
175
+ **BibTeX:**
176
+
177
+ [More Information Needed]
178
+
179
+ **APA:**
180
+
181
+ [More Information Needed]
182
+
183
+ ## Glossary [optional]
184
+
185
+ <!-- If relevant, include terms and calculations in this section that can help readers understand the model or model card. -->
186
+
187
+ [More Information Needed]
188
+
189
+ ## More Information [optional]
190
+
191
+ [More Information Needed]
192
+
193
+ ## Model Card Authors [optional]
194
+
195
+ [More Information Needed]
196
+
197
+ ## Model Card Contact
198
+
199
+ [More Information Needed]
200
+ ### Framework versions
201
+
202
+ - PEFT 0.11.1
weight_dir/loraWeight/fixbycrflp2/checkpoint-12000/adapter_config.json ADDED
@@ -0,0 +1,29 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "alpha_pattern": {},
3
+ "auto_mapping": null,
4
+ "base_model_name_or_path": "./CodeLlama-7b-Instruct-hf",
5
+ "bias": "none",
6
+ "fan_in_fan_out": false,
7
+ "inference_mode": true,
8
+ "init_lora_weights": true,
9
+ "layer_replication": null,
10
+ "layers_pattern": null,
11
+ "layers_to_transform": null,
12
+ "loftq_config": {},
13
+ "lora_alpha": 32,
14
+ "lora_dropout": 0.05,
15
+ "megatron_config": null,
16
+ "megatron_core": "megatron.core",
17
+ "modules_to_save": null,
18
+ "peft_type": "LORA",
19
+ "r": 8,
20
+ "rank_pattern": {},
21
+ "revision": null,
22
+ "target_modules": [
23
+ "v_proj",
24
+ "q_proj"
25
+ ],
26
+ "task_type": "CAUSAL_LM",
27
+ "use_dora": false,
28
+ "use_rslora": false
29
+ }
weight_dir/loraWeight/fixbycrflp2/checkpoint-12000/adapter_model.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:da70a17fee47afe232392efb41959db2fb39a710b36e51755cba3f0651173b81
3
+ size 8433034
weight_dir/loraWeight/fixbycrflp2/checkpoint-12000/added_tokens.json ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ {
2
+ "<PAD>": 32016
3
+ }
weight_dir/loraWeight/fixbycrflp2/checkpoint-12000/global_step12000/bf16_zero_pp_rank_0_mp_rank_00_optim_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7f59fb4348dce482deebf38ece351831ee84d9151b63e6685b43f381c24f89e3
3
+ size 12589776
weight_dir/loraWeight/fixbycrflp2/checkpoint-12000/global_step12000/bf16_zero_pp_rank_1_mp_rank_00_optim_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a3b156529ae1a1c4708ace9ee3152b50a0a2e3c107efc9998a1a4c63fecb73be
3
+ size 12589840
weight_dir/loraWeight/fixbycrflp2/checkpoint-12000/global_step12000/bf16_zero_pp_rank_2_mp_rank_00_optim_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ece314cfdcbb4ed6c8655dc845821f529b58a7a64e9295e0e03d6ce4c3f27971
3
+ size 12589840
weight_dir/loraWeight/fixbycrflp2/checkpoint-12000/global_step12000/bf16_zero_pp_rank_3_mp_rank_00_optim_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:eb9a4c17bbfcbaa0c166f62ad74a2fcbdc8d5ced1df818b16caf2f375c6c6bad
3
+ size 12589840
weight_dir/loraWeight/fixbycrflp2/checkpoint-12000/global_step12000/mp_rank_00_model_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ba6c65fd295bb30b20fbf30e97d37e724733e14759a6459b074a218d014dd6dd
3
+ size 8507372
weight_dir/loraWeight/fixbycrflp2/checkpoint-12000/latest ADDED
@@ -0,0 +1 @@
 
 
1
+ global_step12000
weight_dir/loraWeight/fixbycrflp2/checkpoint-12000/rng_state_0.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6144186c9e0ee485931eb4d99dedd714a16e9deb2967f0b44a941fe715daea34
3
+ size 15024
weight_dir/loraWeight/fixbycrflp2/checkpoint-12000/rng_state_1.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2b52dc2350899c1747d6f03e5ad15ad8eff3a3da9e789354be34fa58fb06557b
3
+ size 15024
weight_dir/loraWeight/fixbycrflp2/checkpoint-12000/rng_state_2.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f148060e8336b10f2da5da6047431bdc25d3a563bd1498b6f14a9d6e0d272b24
3
+ size 15024
weight_dir/loraWeight/fixbycrflp2/checkpoint-12000/rng_state_3.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9a8cb83b3ada39d011d872f34af1c1648119ffb242ca9c0f615a40d193a66eb7
3
+ size 15024
weight_dir/loraWeight/fixbycrflp2/checkpoint-12000/special_tokens_map.json ADDED
@@ -0,0 +1,30 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "bos_token": {
3
+ "content": "<s>",
4
+ "lstrip": false,
5
+ "normalized": true,
6
+ "rstrip": false,
7
+ "single_word": false
8
+ },
9
+ "eos_token": {
10
+ "content": "</s>",
11
+ "lstrip": false,
12
+ "normalized": true,
13
+ "rstrip": false,
14
+ "single_word": false
15
+ },
16
+ "pad_token": {
17
+ "content": "<PAD>",
18
+ "lstrip": false,
19
+ "normalized": false,
20
+ "rstrip": false,
21
+ "single_word": false
22
+ },
23
+ "unk_token": {
24
+ "content": "<unk>",
25
+ "lstrip": false,
26
+ "normalized": true,
27
+ "rstrip": false,
28
+ "single_word": false
29
+ }
30
+ }