bboeun commited on
Commit
997028f
·
verified ·
1 Parent(s): 952b2bc

Upload folder using huggingface_hub

Browse files
This view is limited to 50 files because it contains too many changes.   See raw diff
Files changed (50) hide show
  1. README.md +202 -0
  2. adapter_config.json +29 -0
  3. adapter_model.safetensors +3 -0
  4. checkpoint-1000/README.md +202 -0
  5. checkpoint-1000/adapter_config.json +29 -0
  6. checkpoint-1000/adapter_model.safetensors +3 -0
  7. checkpoint-1000/optimizer.pt +3 -0
  8. checkpoint-1000/rng_state.pth +3 -0
  9. checkpoint-1000/scheduler.pt +3 -0
  10. checkpoint-1000/special_tokens_map.json +24 -0
  11. checkpoint-1000/tokenizer.model +3 -0
  12. checkpoint-1000/tokenizer_config.json +44 -0
  13. checkpoint-1000/trainer_state.json +1421 -0
  14. checkpoint-1000/training_args.bin +3 -0
  15. checkpoint-1500/README.md +202 -0
  16. checkpoint-1500/adapter_config.json +29 -0
  17. checkpoint-1500/adapter_model.safetensors +3 -0
  18. checkpoint-1500/optimizer.pt +3 -0
  19. checkpoint-1500/rng_state.pth +3 -0
  20. checkpoint-1500/scheduler.pt +3 -0
  21. checkpoint-1500/special_tokens_map.json +24 -0
  22. checkpoint-1500/tokenizer.model +3 -0
  23. checkpoint-1500/tokenizer_config.json +44 -0
  24. checkpoint-1500/trainer_state.json +2121 -0
  25. checkpoint-1500/training_args.bin +3 -0
  26. checkpoint-2000/README.md +202 -0
  27. checkpoint-2000/adapter_config.json +29 -0
  28. checkpoint-2000/adapter_model.safetensors +3 -0
  29. checkpoint-2000/optimizer.pt +3 -0
  30. checkpoint-2000/rng_state.pth +3 -0
  31. checkpoint-2000/scheduler.pt +3 -0
  32. checkpoint-2000/special_tokens_map.json +24 -0
  33. checkpoint-2000/tokenizer.model +3 -0
  34. checkpoint-2000/tokenizer_config.json +44 -0
  35. checkpoint-2000/trainer_state.json +0 -0
  36. checkpoint-2000/training_args.bin +3 -0
  37. checkpoint-500/README.md +202 -0
  38. checkpoint-500/adapter_config.json +29 -0
  39. checkpoint-500/adapter_model.safetensors +3 -0
  40. checkpoint-500/optimizer.pt +3 -0
  41. checkpoint-500/rng_state.pth +3 -0
  42. checkpoint-500/scheduler.pt +3 -0
  43. checkpoint-500/special_tokens_map.json +24 -0
  44. checkpoint-500/tokenizer.model +3 -0
  45. checkpoint-500/tokenizer_config.json +44 -0
  46. checkpoint-500/trainer_state.json +721 -0
  47. checkpoint-500/training_args.bin +3 -0
  48. config.json +26 -0
  49. model-00001-of-00008.safetensors +3 -0
  50. model-00002-of-00008.safetensors +3 -0
README.md ADDED
@@ -0,0 +1,202 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ base_model: mistralai/Mistral-7B-Instruct-v0.2
3
+ library_name: peft
4
+ ---
5
+
6
+ # Model Card for Model ID
7
+
8
+ <!-- Provide a quick summary of what the model is/does. -->
9
+
10
+
11
+
12
+ ## Model Details
13
+
14
+ ### Model Description
15
+
16
+ <!-- Provide a longer summary of what this model is. -->
17
+
18
+
19
+
20
+ - **Developed by:** [More Information Needed]
21
+ - **Funded by [optional]:** [More Information Needed]
22
+ - **Shared by [optional]:** [More Information Needed]
23
+ - **Model type:** [More Information Needed]
24
+ - **Language(s) (NLP):** [More Information Needed]
25
+ - **License:** [More Information Needed]
26
+ - **Finetuned from model [optional]:** [More Information Needed]
27
+
28
+ ### Model Sources [optional]
29
+
30
+ <!-- Provide the basic links for the model. -->
31
+
32
+ - **Repository:** [More Information Needed]
33
+ - **Paper [optional]:** [More Information Needed]
34
+ - **Demo [optional]:** [More Information Needed]
35
+
36
+ ## Uses
37
+
38
+ <!-- Address questions around how the model is intended to be used, including the foreseeable users of the model and those affected by the model. -->
39
+
40
+ ### Direct Use
41
+
42
+ <!-- This section is for the model use without fine-tuning or plugging into a larger ecosystem/app. -->
43
+
44
+ [More Information Needed]
45
+
46
+ ### Downstream Use [optional]
47
+
48
+ <!-- This section is for the model use when fine-tuned for a task, or when plugged into a larger ecosystem/app -->
49
+
50
+ [More Information Needed]
51
+
52
+ ### Out-of-Scope Use
53
+
54
+ <!-- This section addresses misuse, malicious use, and uses that the model will not work well for. -->
55
+
56
+ [More Information Needed]
57
+
58
+ ## Bias, Risks, and Limitations
59
+
60
+ <!-- This section is meant to convey both technical and sociotechnical limitations. -->
61
+
62
+ [More Information Needed]
63
+
64
+ ### Recommendations
65
+
66
+ <!-- This section is meant to convey recommendations with respect to the bias, risk, and technical limitations. -->
67
+
68
+ Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations.
69
+
70
+ ## How to Get Started with the Model
71
+
72
+ Use the code below to get started with the model.
73
+
74
+ [More Information Needed]
75
+
76
+ ## Training Details
77
+
78
+ ### Training Data
79
+
80
+ <!-- This should link to a Dataset Card, perhaps with a short stub of information on what the training data is all about as well as documentation related to data pre-processing or additional filtering. -->
81
+
82
+ [More Information Needed]
83
+
84
+ ### Training Procedure
85
+
86
+ <!-- This relates heavily to the Technical Specifications. Content here should link to that section when it is relevant to the training procedure. -->
87
+
88
+ #### Preprocessing [optional]
89
+
90
+ [More Information Needed]
91
+
92
+
93
+ #### Training Hyperparameters
94
+
95
+ - **Training regime:** [More Information Needed] <!--fp32, fp16 mixed precision, bf16 mixed precision, bf16 non-mixed precision, fp16 non-mixed precision, fp8 mixed precision -->
96
+
97
+ #### Speeds, Sizes, Times [optional]
98
+
99
+ <!-- This section provides information about throughput, start/end time, checkpoint size if relevant, etc. -->
100
+
101
+ [More Information Needed]
102
+
103
+ ## Evaluation
104
+
105
+ <!-- This section describes the evaluation protocols and provides the results. -->
106
+
107
+ ### Testing Data, Factors & Metrics
108
+
109
+ #### Testing Data
110
+
111
+ <!-- This should link to a Dataset Card if possible. -->
112
+
113
+ [More Information Needed]
114
+
115
+ #### Factors
116
+
117
+ <!-- These are the things the evaluation is disaggregating by, e.g., subpopulations or domains. -->
118
+
119
+ [More Information Needed]
120
+
121
+ #### Metrics
122
+
123
+ <!-- These are the evaluation metrics being used, ideally with a description of why. -->
124
+
125
+ [More Information Needed]
126
+
127
+ ### Results
128
+
129
+ [More Information Needed]
130
+
131
+ #### Summary
132
+
133
+
134
+
135
+ ## Model Examination [optional]
136
+
137
+ <!-- Relevant interpretability work for the model goes here -->
138
+
139
+ [More Information Needed]
140
+
141
+ ## Environmental Impact
142
+
143
+ <!-- Total emissions (in grams of CO2eq) and additional considerations, such as electricity usage, go here. Edit the suggested text below accordingly -->
144
+
145
+ Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700).
146
+
147
+ - **Hardware Type:** [More Information Needed]
148
+ - **Hours used:** [More Information Needed]
149
+ - **Cloud Provider:** [More Information Needed]
150
+ - **Compute Region:** [More Information Needed]
151
+ - **Carbon Emitted:** [More Information Needed]
152
+
153
+ ## Technical Specifications [optional]
154
+
155
+ ### Model Architecture and Objective
156
+
157
+ [More Information Needed]
158
+
159
+ ### Compute Infrastructure
160
+
161
+ [More Information Needed]
162
+
163
+ #### Hardware
164
+
165
+ [More Information Needed]
166
+
167
+ #### Software
168
+
169
+ [More Information Needed]
170
+
171
+ ## Citation [optional]
172
+
173
+ <!-- If there is a paper or blog post introducing the model, the APA and Bibtex information for that should go in this section. -->
174
+
175
+ **BibTeX:**
176
+
177
+ [More Information Needed]
178
+
179
+ **APA:**
180
+
181
+ [More Information Needed]
182
+
183
+ ## Glossary [optional]
184
+
185
+ <!-- If relevant, include terms and calculations in this section that can help readers understand the model or model card. -->
186
+
187
+ [More Information Needed]
188
+
189
+ ## More Information [optional]
190
+
191
+ [More Information Needed]
192
+
193
+ ## Model Card Authors [optional]
194
+
195
+ [More Information Needed]
196
+
197
+ ## Model Card Contact
198
+
199
+ [More Information Needed]
200
+ ### Framework versions
201
+
202
+ - PEFT 0.8.2
adapter_config.json ADDED
@@ -0,0 +1,29 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "alpha_pattern": {},
3
+ "auto_mapping": null,
4
+ "base_model_name_or_path": "mistralai/Mistral-7B-Instruct-v0.2",
5
+ "bias": "none",
6
+ "fan_in_fan_out": false,
7
+ "inference_mode": true,
8
+ "init_lora_weights": true,
9
+ "layers_pattern": null,
10
+ "layers_to_transform": null,
11
+ "loftq_config": {},
12
+ "lora_alpha": 8,
13
+ "lora_dropout": 0.1,
14
+ "megatron_config": null,
15
+ "megatron_core": "megatron.core",
16
+ "modules_to_save": null,
17
+ "peft_type": "LORA",
18
+ "r": 4,
19
+ "rank_pattern": {},
20
+ "revision": null,
21
+ "target_modules": [
22
+ "k_proj",
23
+ "q_proj",
24
+ "v_proj",
25
+ "o_proj"
26
+ ],
27
+ "task_type": "CAUSAL_LM",
28
+ "use_rslora": false
29
+ }
adapter_model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c488d137305e1cdedb70e9d692ff2178c8b3531670fd2c0b2f1cb8503e4a86ef
3
+ size 13665336
checkpoint-1000/README.md ADDED
@@ -0,0 +1,202 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ base_model: mistralai/Mistral-7B-Instruct-v0.2
3
+ library_name: peft
4
+ ---
5
+
6
+ # Model Card for Model ID
7
+
8
+ <!-- Provide a quick summary of what the model is/does. -->
9
+
10
+
11
+
12
+ ## Model Details
13
+
14
+ ### Model Description
15
+
16
+ <!-- Provide a longer summary of what this model is. -->
17
+
18
+
19
+
20
+ - **Developed by:** [More Information Needed]
21
+ - **Funded by [optional]:** [More Information Needed]
22
+ - **Shared by [optional]:** [More Information Needed]
23
+ - **Model type:** [More Information Needed]
24
+ - **Language(s) (NLP):** [More Information Needed]
25
+ - **License:** [More Information Needed]
26
+ - **Finetuned from model [optional]:** [More Information Needed]
27
+
28
+ ### Model Sources [optional]
29
+
30
+ <!-- Provide the basic links for the model. -->
31
+
32
+ - **Repository:** [More Information Needed]
33
+ - **Paper [optional]:** [More Information Needed]
34
+ - **Demo [optional]:** [More Information Needed]
35
+
36
+ ## Uses
37
+
38
+ <!-- Address questions around how the model is intended to be used, including the foreseeable users of the model and those affected by the model. -->
39
+
40
+ ### Direct Use
41
+
42
+ <!-- This section is for the model use without fine-tuning or plugging into a larger ecosystem/app. -->
43
+
44
+ [More Information Needed]
45
+
46
+ ### Downstream Use [optional]
47
+
48
+ <!-- This section is for the model use when fine-tuned for a task, or when plugged into a larger ecosystem/app -->
49
+
50
+ [More Information Needed]
51
+
52
+ ### Out-of-Scope Use
53
+
54
+ <!-- This section addresses misuse, malicious use, and uses that the model will not work well for. -->
55
+
56
+ [More Information Needed]
57
+
58
+ ## Bias, Risks, and Limitations
59
+
60
+ <!-- This section is meant to convey both technical and sociotechnical limitations. -->
61
+
62
+ [More Information Needed]
63
+
64
+ ### Recommendations
65
+
66
+ <!-- This section is meant to convey recommendations with respect to the bias, risk, and technical limitations. -->
67
+
68
+ Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations.
69
+
70
+ ## How to Get Started with the Model
71
+
72
+ Use the code below to get started with the model.
73
+
74
+ [More Information Needed]
75
+
76
+ ## Training Details
77
+
78
+ ### Training Data
79
+
80
+ <!-- This should link to a Dataset Card, perhaps with a short stub of information on what the training data is all about as well as documentation related to data pre-processing or additional filtering. -->
81
+
82
+ [More Information Needed]
83
+
84
+ ### Training Procedure
85
+
86
+ <!-- This relates heavily to the Technical Specifications. Content here should link to that section when it is relevant to the training procedure. -->
87
+
88
+ #### Preprocessing [optional]
89
+
90
+ [More Information Needed]
91
+
92
+
93
+ #### Training Hyperparameters
94
+
95
+ - **Training regime:** [More Information Needed] <!--fp32, fp16 mixed precision, bf16 mixed precision, bf16 non-mixed precision, fp16 non-mixed precision, fp8 mixed precision -->
96
+
97
+ #### Speeds, Sizes, Times [optional]
98
+
99
+ <!-- This section provides information about throughput, start/end time, checkpoint size if relevant, etc. -->
100
+
101
+ [More Information Needed]
102
+
103
+ ## Evaluation
104
+
105
+ <!-- This section describes the evaluation protocols and provides the results. -->
106
+
107
+ ### Testing Data, Factors & Metrics
108
+
109
+ #### Testing Data
110
+
111
+ <!-- This should link to a Dataset Card if possible. -->
112
+
113
+ [More Information Needed]
114
+
115
+ #### Factors
116
+
117
+ <!-- These are the things the evaluation is disaggregating by, e.g., subpopulations or domains. -->
118
+
119
+ [More Information Needed]
120
+
121
+ #### Metrics
122
+
123
+ <!-- These are the evaluation metrics being used, ideally with a description of why. -->
124
+
125
+ [More Information Needed]
126
+
127
+ ### Results
128
+
129
+ [More Information Needed]
130
+
131
+ #### Summary
132
+
133
+
134
+
135
+ ## Model Examination [optional]
136
+
137
+ <!-- Relevant interpretability work for the model goes here -->
138
+
139
+ [More Information Needed]
140
+
141
+ ## Environmental Impact
142
+
143
+ <!-- Total emissions (in grams of CO2eq) and additional considerations, such as electricity usage, go here. Edit the suggested text below accordingly -->
144
+
145
+ Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700).
146
+
147
+ - **Hardware Type:** [More Information Needed]
148
+ - **Hours used:** [More Information Needed]
149
+ - **Cloud Provider:** [More Information Needed]
150
+ - **Compute Region:** [More Information Needed]
151
+ - **Carbon Emitted:** [More Information Needed]
152
+
153
+ ## Technical Specifications [optional]
154
+
155
+ ### Model Architecture and Objective
156
+
157
+ [More Information Needed]
158
+
159
+ ### Compute Infrastructure
160
+
161
+ [More Information Needed]
162
+
163
+ #### Hardware
164
+
165
+ [More Information Needed]
166
+
167
+ #### Software
168
+
169
+ [More Information Needed]
170
+
171
+ ## Citation [optional]
172
+
173
+ <!-- If there is a paper or blog post introducing the model, the APA and Bibtex information for that should go in this section. -->
174
+
175
+ **BibTeX:**
176
+
177
+ [More Information Needed]
178
+
179
+ **APA:**
180
+
181
+ [More Information Needed]
182
+
183
+ ## Glossary [optional]
184
+
185
+ <!-- If relevant, include terms and calculations in this section that can help readers understand the model or model card. -->
186
+
187
+ [More Information Needed]
188
+
189
+ ## More Information [optional]
190
+
191
+ [More Information Needed]
192
+
193
+ ## Model Card Authors [optional]
194
+
195
+ [More Information Needed]
196
+
197
+ ## Model Card Contact
198
+
199
+ [More Information Needed]
200
+ ### Framework versions
201
+
202
+ - PEFT 0.8.2
checkpoint-1000/adapter_config.json ADDED
@@ -0,0 +1,29 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "alpha_pattern": {},
3
+ "auto_mapping": null,
4
+ "base_model_name_or_path": "mistralai/Mistral-7B-Instruct-v0.2",
5
+ "bias": "none",
6
+ "fan_in_fan_out": false,
7
+ "inference_mode": true,
8
+ "init_lora_weights": true,
9
+ "layers_pattern": null,
10
+ "layers_to_transform": null,
11
+ "loftq_config": {},
12
+ "lora_alpha": 8,
13
+ "lora_dropout": 0.1,
14
+ "megatron_config": null,
15
+ "megatron_core": "megatron.core",
16
+ "modules_to_save": null,
17
+ "peft_type": "LORA",
18
+ "r": 4,
19
+ "rank_pattern": {},
20
+ "revision": null,
21
+ "target_modules": [
22
+ "k_proj",
23
+ "q_proj",
24
+ "v_proj",
25
+ "o_proj"
26
+ ],
27
+ "task_type": "CAUSAL_LM",
28
+ "use_rslora": false
29
+ }
checkpoint-1000/adapter_model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4c4d6238b36107b91696fd47eb609bd0afb89a1837d0755cdd64ac6090d15cb1
3
+ size 13665336
checkpoint-1000/optimizer.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6ce7dd41b17a65af35f835c1d351700ce7048b25bc168652aef0e52891424d50
3
+ size 27413893
checkpoint-1000/rng_state.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b2278a87cdf86c3f9219223c847f6b27f6b7f15b8226b617f38936e8ff2cbcde
3
+ size 14575
checkpoint-1000/scheduler.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a10dd98c9317565cf838e3c42a494c01884a047c5ae5c0dbf308fa4359a32727
3
+ size 627
checkpoint-1000/special_tokens_map.json ADDED
@@ -0,0 +1,24 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "bos_token": {
3
+ "content": "<s>",
4
+ "lstrip": false,
5
+ "normalized": false,
6
+ "rstrip": false,
7
+ "single_word": false
8
+ },
9
+ "eos_token": {
10
+ "content": "</s>",
11
+ "lstrip": false,
12
+ "normalized": false,
13
+ "rstrip": false,
14
+ "single_word": false
15
+ },
16
+ "pad_token": "</s>",
17
+ "unk_token": {
18
+ "content": "<unk>",
19
+ "lstrip": false,
20
+ "normalized": false,
21
+ "rstrip": false,
22
+ "single_word": false
23
+ }
24
+ }
checkpoint-1000/tokenizer.model ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:dadfd56d766715c61d2ef780a525ab43b8e6da4de6865bda3d95fdef5e134055
3
+ size 493443
checkpoint-1000/tokenizer_config.json ADDED
@@ -0,0 +1,44 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "add_bos_token": true,
3
+ "add_eos_token": false,
4
+ "add_prefix_space": null,
5
+ "added_tokens_decoder": {
6
+ "0": {
7
+ "content": "<unk>",
8
+ "lstrip": false,
9
+ "normalized": false,
10
+ "rstrip": false,
11
+ "single_word": false,
12
+ "special": true
13
+ },
14
+ "1": {
15
+ "content": "<s>",
16
+ "lstrip": false,
17
+ "normalized": false,
18
+ "rstrip": false,
19
+ "single_word": false,
20
+ "special": true
21
+ },
22
+ "2": {
23
+ "content": "</s>",
24
+ "lstrip": false,
25
+ "normalized": false,
26
+ "rstrip": false,
27
+ "single_word": false,
28
+ "special": true
29
+ }
30
+ },
31
+ "additional_special_tokens": [],
32
+ "bos_token": "<s>",
33
+ "chat_template": "{%- if messages[0]['role'] == 'system' %}\n {%- set system_message = messages[0]['content'] %}\n {%- set loop_messages = messages[1:] %}\n{%- else %}\n {%- set loop_messages = messages %}\n{%- endif %}\n\n{{- bos_token }}\n{%- for message in loop_messages %}\n {%- if (message['role'] == 'user') != (loop.index0 % 2 == 0) %}\n {{- raise_exception('After the optional system message, conversation roles must alternate user/assistant/user/assistant/...') }}\n {%- endif %}\n {%- if message['role'] == 'user' %}\n {%- if loop.first and system_message is defined %}\n {{- ' [INST] ' + system_message + '\\n\\n' + message['content'] + ' [/INST]' }}\n {%- else %}\n {{- ' [INST] ' + message['content'] + ' [/INST]' }}\n {%- endif %}\n {%- elif message['role'] == 'assistant' %}\n {{- ' ' + message['content'] + eos_token}}\n {%- else %}\n {{- raise_exception('Only user and assistant roles are supported, with the exception of an initial optional system message!') }}\n {%- endif %}\n{%- endfor %}\n",
34
+ "clean_up_tokenization_spaces": false,
35
+ "eos_token": "</s>",
36
+ "legacy": false,
37
+ "model_max_length": 1000000000000000019884624838656,
38
+ "pad_token": "</s>",
39
+ "sp_model_kwargs": {},
40
+ "spaces_between_special_tokens": false,
41
+ "tokenizer_class": "LlamaTokenizer",
42
+ "unk_token": "<unk>",
43
+ "use_default_system_prompt": false
44
+ }
checkpoint-1000/trainer_state.json ADDED
@@ -0,0 +1,1421 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_metric": null,
3
+ "best_model_checkpoint": null,
4
+ "epoch": 0.4444444444444444,
5
+ "eval_steps": 500,
6
+ "global_step": 1000,
7
+ "is_hyper_param_search": false,
8
+ "is_local_process_zero": true,
9
+ "is_world_process_zero": true,
10
+ "log_history": [
11
+ {
12
+ "epoch": 0.0,
13
+ "learning_rate": 5.309734513274336e-07,
14
+ "logits/chosen": -2.1858465671539307,
15
+ "logits/rejected": -2.2539868354797363,
16
+ "logps/chosen": -292.47344970703125,
17
+ "logps/rejected": -334.2834777832031,
18
+ "loss": 2.328,
19
+ "rewards/accuracies": 0.574999988079071,
20
+ "rewards/chosen": -17.95108985900879,
21
+ "rewards/margins": 1.5200703144073486,
22
+ "rewards/rejected": -19.47115707397461,
23
+ "step": 10
24
+ },
25
+ {
26
+ "epoch": 0.01,
27
+ "learning_rate": 1.415929203539823e-06,
28
+ "logits/chosen": -2.250004529953003,
29
+ "logits/rejected": -2.2245919704437256,
30
+ "logps/chosen": -323.00567626953125,
31
+ "logps/rejected": -341.8704528808594,
32
+ "loss": 3.0458,
33
+ "rewards/accuracies": 0.550000011920929,
34
+ "rewards/chosen": -18.9575138092041,
35
+ "rewards/margins": 0.811493992805481,
36
+ "rewards/rejected": -19.76900863647461,
37
+ "step": 20
38
+ },
39
+ {
40
+ "epoch": 0.01,
41
+ "learning_rate": 2.3008849557522127e-06,
42
+ "logits/chosen": -2.2509493827819824,
43
+ "logits/rejected": -2.2362070083618164,
44
+ "logps/chosen": -309.36627197265625,
45
+ "logps/rejected": -354.1287841796875,
46
+ "loss": 2.001,
47
+ "rewards/accuracies": 0.6499999761581421,
48
+ "rewards/chosen": -19.02206039428711,
49
+ "rewards/margins": 2.324467182159424,
50
+ "rewards/rejected": -21.346529006958008,
51
+ "step": 30
52
+ },
53
+ {
54
+ "epoch": 0.02,
55
+ "learning_rate": 3.185840707964602e-06,
56
+ "logits/chosen": -2.261589527130127,
57
+ "logits/rejected": -2.234139919281006,
58
+ "logps/chosen": -341.8447265625,
59
+ "logps/rejected": -361.2301330566406,
60
+ "loss": 2.3698,
61
+ "rewards/accuracies": 0.574999988079071,
62
+ "rewards/chosen": -19.69499397277832,
63
+ "rewards/margins": 1.1049805879592896,
64
+ "rewards/rejected": -20.799976348876953,
65
+ "step": 40
66
+ },
67
+ {
68
+ "epoch": 0.02,
69
+ "learning_rate": 4.070796460176992e-06,
70
+ "logits/chosen": -2.282593250274658,
71
+ "logits/rejected": -2.219956874847412,
72
+ "logps/chosen": -333.1883850097656,
73
+ "logps/rejected": -323.2119140625,
74
+ "loss": 2.3553,
75
+ "rewards/accuracies": 0.574999988079071,
76
+ "rewards/chosen": -19.006275177001953,
77
+ "rewards/margins": 0.894936203956604,
78
+ "rewards/rejected": -19.90121078491211,
79
+ "step": 50
80
+ },
81
+ {
82
+ "epoch": 0.03,
83
+ "learning_rate": 4.955752212389381e-06,
84
+ "logits/chosen": -2.2947192192077637,
85
+ "logits/rejected": -2.191793918609619,
86
+ "logps/chosen": -327.3343200683594,
87
+ "logps/rejected": -302.55914306640625,
88
+ "loss": 3.0721,
89
+ "rewards/accuracies": 0.625,
90
+ "rewards/chosen": -18.95311164855957,
91
+ "rewards/margins": 0.02760167047381401,
92
+ "rewards/rejected": -18.980712890625,
93
+ "step": 60
94
+ },
95
+ {
96
+ "epoch": 0.03,
97
+ "learning_rate": 5.840707964601771e-06,
98
+ "logits/chosen": -2.1300625801086426,
99
+ "logits/rejected": -2.197695255279541,
100
+ "logps/chosen": -296.19586181640625,
101
+ "logps/rejected": -322.7232360839844,
102
+ "loss": 2.5242,
103
+ "rewards/accuracies": 0.5249999761581421,
104
+ "rewards/chosen": -18.607830047607422,
105
+ "rewards/margins": 0.5492460131645203,
106
+ "rewards/rejected": -19.15707778930664,
107
+ "step": 70
108
+ },
109
+ {
110
+ "epoch": 0.04,
111
+ "learning_rate": 6.72566371681416e-06,
112
+ "logits/chosen": -2.191436290740967,
113
+ "logits/rejected": -2.203051805496216,
114
+ "logps/chosen": -322.64581298828125,
115
+ "logps/rejected": -318.93475341796875,
116
+ "loss": 2.4603,
117
+ "rewards/accuracies": 0.625,
118
+ "rewards/chosen": -18.32453155517578,
119
+ "rewards/margins": 1.1701295375823975,
120
+ "rewards/rejected": -19.494661331176758,
121
+ "step": 80
122
+ },
123
+ {
124
+ "epoch": 0.04,
125
+ "learning_rate": 7.610619469026549e-06,
126
+ "logits/chosen": -2.3291049003601074,
127
+ "logits/rejected": -2.13211727142334,
128
+ "logps/chosen": -351.888671875,
129
+ "logps/rejected": -316.0814514160156,
130
+ "loss": 4.3049,
131
+ "rewards/accuracies": 0.375,
132
+ "rewards/chosen": -20.648174285888672,
133
+ "rewards/margins": -1.917999505996704,
134
+ "rewards/rejected": -18.730175018310547,
135
+ "step": 90
136
+ },
137
+ {
138
+ "epoch": 0.04,
139
+ "learning_rate": 8.495575221238938e-06,
140
+ "logits/chosen": -2.326770782470703,
141
+ "logits/rejected": -2.2708096504211426,
142
+ "logps/chosen": -319.8079528808594,
143
+ "logps/rejected": -325.22467041015625,
144
+ "loss": 2.9022,
145
+ "rewards/accuracies": 0.550000011920929,
146
+ "rewards/chosen": -17.211898803710938,
147
+ "rewards/margins": 0.4395485818386078,
148
+ "rewards/rejected": -17.651447296142578,
149
+ "step": 100
150
+ },
151
+ {
152
+ "epoch": 0.05,
153
+ "learning_rate": 9.380530973451329e-06,
154
+ "logits/chosen": -2.2947869300842285,
155
+ "logits/rejected": -2.2642266750335693,
156
+ "logps/chosen": -319.7033386230469,
157
+ "logps/rejected": -301.95684814453125,
158
+ "loss": 2.9535,
159
+ "rewards/accuracies": 0.32499998807907104,
160
+ "rewards/chosen": -18.591039657592773,
161
+ "rewards/margins": -1.4626668691635132,
162
+ "rewards/rejected": -17.128376007080078,
163
+ "step": 110
164
+ },
165
+ {
166
+ "epoch": 0.05,
167
+ "learning_rate": 9.999951373555555e-06,
168
+ "logits/chosen": -2.356776475906372,
169
+ "logits/rejected": -2.2779877185821533,
170
+ "logps/chosen": -332.5343322753906,
171
+ "logps/rejected": -308.6272888183594,
172
+ "loss": 2.8838,
173
+ "rewards/accuracies": 0.42500001192092896,
174
+ "rewards/chosen": -18.02423095703125,
175
+ "rewards/margins": -0.5776697993278503,
176
+ "rewards/rejected": -17.446561813354492,
177
+ "step": 120
178
+ },
179
+ {
180
+ "epoch": 0.06,
181
+ "learning_rate": 9.999086929743288e-06,
182
+ "logits/chosen": -2.34501314163208,
183
+ "logits/rejected": -2.3048901557922363,
184
+ "logps/chosen": -298.5960388183594,
185
+ "logps/rejected": -309.3174743652344,
186
+ "loss": 2.0696,
187
+ "rewards/accuracies": 0.6499999761581421,
188
+ "rewards/chosen": -16.59781265258789,
189
+ "rewards/margins": 0.7586337327957153,
190
+ "rewards/rejected": -17.356447219848633,
191
+ "step": 130
192
+ },
193
+ {
194
+ "epoch": 0.06,
195
+ "learning_rate": 9.997142113313472e-06,
196
+ "logits/chosen": -2.3136909008026123,
197
+ "logits/rejected": -2.3042447566986084,
198
+ "logps/chosen": -292.8536071777344,
199
+ "logps/rejected": -281.0971984863281,
200
+ "loss": 1.8399,
201
+ "rewards/accuracies": 0.5249999761581421,
202
+ "rewards/chosen": -15.984518051147461,
203
+ "rewards/margins": 0.30002641677856445,
204
+ "rewards/rejected": -16.284543991088867,
205
+ "step": 140
206
+ },
207
+ {
208
+ "epoch": 0.07,
209
+ "learning_rate": 9.994117344568142e-06,
210
+ "logits/chosen": -2.337782144546509,
211
+ "logits/rejected": -2.3470942974090576,
212
+ "logps/chosen": -286.35504150390625,
213
+ "logps/rejected": -303.07684326171875,
214
+ "loss": 1.5656,
215
+ "rewards/accuracies": 0.625,
216
+ "rewards/chosen": -14.58277416229248,
217
+ "rewards/margins": 1.030444860458374,
218
+ "rewards/rejected": -15.61322021484375,
219
+ "step": 150
220
+ },
221
+ {
222
+ "epoch": 0.07,
223
+ "learning_rate": 9.990013277202137e-06,
224
+ "logits/chosen": -2.3595287799835205,
225
+ "logits/rejected": -2.4950690269470215,
226
+ "logps/chosen": -292.61651611328125,
227
+ "logps/rejected": -363.38507080078125,
228
+ "loss": 1.523,
229
+ "rewards/accuracies": 0.625,
230
+ "rewards/chosen": -15.285211563110352,
231
+ "rewards/margins": 2.0152671337127686,
232
+ "rewards/rejected": -17.300477981567383,
233
+ "step": 160
234
+ },
235
+ {
236
+ "epoch": 0.08,
237
+ "learning_rate": 9.984830798161828e-06,
238
+ "logits/chosen": -2.4216346740722656,
239
+ "logits/rejected": -2.35921311378479,
240
+ "logps/chosen": -329.1554870605469,
241
+ "logps/rejected": -308.78326416015625,
242
+ "loss": 2.5844,
243
+ "rewards/accuracies": 0.44999998807907104,
244
+ "rewards/chosen": -14.768890380859375,
245
+ "rewards/margins": -0.32357311248779297,
246
+ "rewards/rejected": -14.445318222045898,
247
+ "step": 170
248
+ },
249
+ {
250
+ "epoch": 0.08,
251
+ "learning_rate": 9.978571027453433e-06,
252
+ "logits/chosen": -2.5200698375701904,
253
+ "logits/rejected": -2.338383674621582,
254
+ "logps/chosen": -296.1730041503906,
255
+ "logps/rejected": -232.0618896484375,
256
+ "loss": 2.4226,
257
+ "rewards/accuracies": 0.375,
258
+ "rewards/chosen": -13.72007942199707,
259
+ "rewards/margins": -0.8905000686645508,
260
+ "rewards/rejected": -12.829577445983887,
261
+ "step": 180
262
+ },
263
+ {
264
+ "epoch": 0.08,
265
+ "learning_rate": 9.971235317900968e-06,
266
+ "logits/chosen": -2.4042282104492188,
267
+ "logits/rejected": -2.4900546073913574,
268
+ "logps/chosen": -219.2891845703125,
269
+ "logps/rejected": -247.385498046875,
270
+ "loss": 1.5221,
271
+ "rewards/accuracies": 0.5,
272
+ "rewards/chosen": -11.559672355651855,
273
+ "rewards/margins": 0.2930552363395691,
274
+ "rewards/rejected": -11.852727890014648,
275
+ "step": 190
276
+ },
277
+ {
278
+ "epoch": 0.09,
279
+ "learning_rate": 9.962825254853888e-06,
280
+ "logits/chosen": -2.591836929321289,
281
+ "logits/rejected": -2.5101170539855957,
282
+ "logps/chosen": -311.3710632324219,
283
+ "logps/rejected": -277.0614318847656,
284
+ "loss": 2.1722,
285
+ "rewards/accuracies": 0.32499998807907104,
286
+ "rewards/chosen": -13.245725631713867,
287
+ "rewards/margins": -1.0529097318649292,
288
+ "rewards/rejected": -12.192815780639648,
289
+ "step": 200
290
+ },
291
+ {
292
+ "epoch": 0.09,
293
+ "learning_rate": 9.954339123272747e-06,
294
+ "logits/chosen": -2.5649514198303223,
295
+ "logits/rejected": -2.4265828132629395,
296
+ "logps/chosen": -250.44009399414062,
297
+ "logps/rejected": -228.14224243164062,
298
+ "loss": 1.4704,
299
+ "rewards/accuracies": 0.32499998807907104,
300
+ "rewards/chosen": -10.943647384643555,
301
+ "rewards/margins": -0.3656729757785797,
302
+ "rewards/rejected": -10.577974319458008,
303
+ "step": 210
304
+ },
305
+ {
306
+ "epoch": 0.1,
307
+ "learning_rate": 9.943892987470688e-06,
308
+ "logits/chosen": -2.559394598007202,
309
+ "logits/rejected": -2.523345470428467,
310
+ "logps/chosen": -260.9962463378906,
311
+ "logps/rejected": -234.96670532226562,
312
+ "loss": 1.709,
313
+ "rewards/accuracies": 0.5,
314
+ "rewards/chosen": -10.393632888793945,
315
+ "rewards/margins": 0.1549229919910431,
316
+ "rewards/rejected": -10.548555374145508,
317
+ "step": 220
318
+ },
319
+ {
320
+ "epoch": 0.1,
321
+ "learning_rate": 9.932378407234108e-06,
322
+ "logits/chosen": -2.63352632522583,
323
+ "logits/rejected": -2.5623555183410645,
324
+ "logps/chosen": -271.7388916015625,
325
+ "logps/rejected": -272.16796875,
326
+ "loss": 1.2704,
327
+ "rewards/accuracies": 0.574999988079071,
328
+ "rewards/chosen": -9.603178024291992,
329
+ "rewards/margins": 0.19006821513175964,
330
+ "rewards/rejected": -9.793245315551758,
331
+ "step": 230
332
+ },
333
+ {
334
+ "epoch": 0.11,
335
+ "learning_rate": 9.919797871024877e-06,
336
+ "logits/chosen": -2.6439247131347656,
337
+ "logits/rejected": -2.6053879261016846,
338
+ "logps/chosen": -229.23764038085938,
339
+ "logps/rejected": -197.8614044189453,
340
+ "loss": 1.405,
341
+ "rewards/accuracies": 0.375,
342
+ "rewards/chosen": -9.295554161071777,
343
+ "rewards/margins": -0.5798273086547852,
344
+ "rewards/rejected": -8.715726852416992,
345
+ "step": 240
346
+ },
347
+ {
348
+ "epoch": 0.11,
349
+ "learning_rate": 9.906154097672858e-06,
350
+ "logits/chosen": -2.6798043251037598,
351
+ "logits/rejected": -2.600550889968872,
352
+ "logps/chosen": -235.1671142578125,
353
+ "logps/rejected": -223.978271484375,
354
+ "loss": 1.2942,
355
+ "rewards/accuracies": 0.574999988079071,
356
+ "rewards/chosen": -8.195772171020508,
357
+ "rewards/margins": -0.00971608143299818,
358
+ "rewards/rejected": -8.186057090759277,
359
+ "step": 250
360
+ },
361
+ {
362
+ "epoch": 0.12,
363
+ "learning_rate": 9.89145003578833e-06,
364
+ "logits/chosen": -2.670474052429199,
365
+ "logits/rejected": -2.6329426765441895,
366
+ "logps/chosen": -224.05068969726562,
367
+ "logps/rejected": -207.1922607421875,
368
+ "loss": 1.0877,
369
+ "rewards/accuracies": 0.5249999761581421,
370
+ "rewards/chosen": -7.675335884094238,
371
+ "rewards/margins": 0.17977333068847656,
372
+ "rewards/rejected": -7.855108737945557,
373
+ "step": 260
374
+ },
375
+ {
376
+ "epoch": 0.12,
377
+ "learning_rate": 9.875688863124766e-06,
378
+ "logits/chosen": -2.620087146759033,
379
+ "logits/rejected": -2.676790714263916,
380
+ "logps/chosen": -255.08486938476562,
381
+ "logps/rejected": -265.8028564453125,
382
+ "loss": 1.0495,
383
+ "rewards/accuracies": 0.6000000238418579,
384
+ "rewards/chosen": -7.672966957092285,
385
+ "rewards/margins": 0.1472960114479065,
386
+ "rewards/rejected": -7.8202619552612305,
387
+ "step": 270
388
+ },
389
+ {
390
+ "epoch": 0.12,
391
+ "learning_rate": 9.858873985892058e-06,
392
+ "logits/chosen": -2.6771128177642822,
393
+ "logits/rejected": -2.5845065116882324,
394
+ "logps/chosen": -222.91311645507812,
395
+ "logps/rejected": -234.68359375,
396
+ "loss": 1.0752,
397
+ "rewards/accuracies": 0.4749999940395355,
398
+ "rewards/chosen": -6.951257228851318,
399
+ "rewards/margins": -0.008678942918777466,
400
+ "rewards/rejected": -6.9425787925720215,
401
+ "step": 280
402
+ },
403
+ {
404
+ "epoch": 0.13,
405
+ "learning_rate": 9.841009038020401e-06,
406
+ "logits/chosen": -2.6333932876586914,
407
+ "logits/rejected": -2.65295147895813,
408
+ "logps/chosen": -204.25399780273438,
409
+ "logps/rejected": -208.4911651611328,
410
+ "loss": 1.0669,
411
+ "rewards/accuracies": 0.5249999761581421,
412
+ "rewards/chosen": -6.73724365234375,
413
+ "rewards/margins": 0.05855642631649971,
414
+ "rewards/rejected": -6.795799255371094,
415
+ "step": 290
416
+ },
417
+ {
418
+ "epoch": 0.13,
419
+ "learning_rate": 9.82209788037494e-06,
420
+ "logits/chosen": -2.685725450515747,
421
+ "logits/rejected": -2.700352907180786,
422
+ "logps/chosen": -230.539794921875,
423
+ "logps/rejected": -240.39224243164062,
424
+ "loss": 1.1248,
425
+ "rewards/accuracies": 0.44999998807907104,
426
+ "rewards/chosen": -6.4564642906188965,
427
+ "rewards/margins": -0.2118469774723053,
428
+ "rewards/rejected": -6.244616985321045,
429
+ "step": 300
430
+ },
431
+ {
432
+ "epoch": 0.14,
433
+ "learning_rate": 9.80214459992139e-06,
434
+ "logits/chosen": -2.714470624923706,
435
+ "logits/rejected": -2.6982994079589844,
436
+ "logps/chosen": -214.0612030029297,
437
+ "logps/rejected": -231.0535125732422,
438
+ "loss": 0.8095,
439
+ "rewards/accuracies": 0.625,
440
+ "rewards/chosen": -6.09361457824707,
441
+ "rewards/margins": 0.35767459869384766,
442
+ "rewards/rejected": -6.45128870010376,
443
+ "step": 310
444
+ },
445
+ {
446
+ "epoch": 0.14,
447
+ "learning_rate": 9.781153508842785e-06,
448
+ "logits/chosen": -2.6795332431793213,
449
+ "logits/rejected": -2.6861202716827393,
450
+ "logps/chosen": -191.6574249267578,
451
+ "logps/rejected": -206.572998046875,
452
+ "loss": 0.9054,
453
+ "rewards/accuracies": 0.574999988079071,
454
+ "rewards/chosen": -6.0128302574157715,
455
+ "rewards/margins": 0.5337953567504883,
456
+ "rewards/rejected": -6.54662561416626,
457
+ "step": 320
458
+ },
459
+ {
460
+ "epoch": 0.15,
461
+ "learning_rate": 9.759129143607547e-06,
462
+ "logits/chosen": -2.719517469406128,
463
+ "logits/rejected": -2.630643367767334,
464
+ "logps/chosen": -228.45797729492188,
465
+ "logps/rejected": -176.00814819335938,
466
+ "loss": 1.1571,
467
+ "rewards/accuracies": 0.4000000059604645,
468
+ "rewards/chosen": -6.262964725494385,
469
+ "rewards/margins": -0.4287610650062561,
470
+ "rewards/rejected": -5.834203243255615,
471
+ "step": 330
472
+ },
473
+ {
474
+ "epoch": 0.15,
475
+ "learning_rate": 9.736076263989103e-06,
476
+ "logits/chosen": -2.753007650375366,
477
+ "logits/rejected": -2.7196168899536133,
478
+ "logps/chosen": -234.21731567382812,
479
+ "logps/rejected": -214.3049774169922,
480
+ "loss": 0.93,
481
+ "rewards/accuracies": 0.42500001192092896,
482
+ "rewards/chosen": -5.743313789367676,
483
+ "rewards/margins": 0.08766243606805801,
484
+ "rewards/rejected": -5.830975532531738,
485
+ "step": 340
486
+ },
487
+ {
488
+ "epoch": 0.16,
489
+ "learning_rate": 9.711999852037226e-06,
490
+ "logits/chosen": -2.702094554901123,
491
+ "logits/rejected": -2.6643381118774414,
492
+ "logps/chosen": -235.38766479492188,
493
+ "logps/rejected": -208.2432861328125,
494
+ "loss": 1.1837,
495
+ "rewards/accuracies": 0.5,
496
+ "rewards/chosen": -5.757768154144287,
497
+ "rewards/margins": -0.27699437737464905,
498
+ "rewards/rejected": -5.480773448944092,
499
+ "step": 350
500
+ },
501
+ {
502
+ "epoch": 0.16,
503
+ "learning_rate": 9.68690511100134e-06,
504
+ "logits/chosen": -2.6954503059387207,
505
+ "logits/rejected": -2.6649551391601562,
506
+ "logps/chosen": -185.06394958496094,
507
+ "logps/rejected": -187.76278686523438,
508
+ "loss": 1.0071,
509
+ "rewards/accuracies": 0.42500001192092896,
510
+ "rewards/chosen": -5.9798102378845215,
511
+ "rewards/margins": -0.11689682304859161,
512
+ "rewards/rejected": -5.862914085388184,
513
+ "step": 360
514
+ },
515
+ {
516
+ "epoch": 0.16,
517
+ "learning_rate": 9.660797464206035e-06,
518
+ "logits/chosen": -2.6881985664367676,
519
+ "logits/rejected": -2.676832914352417,
520
+ "logps/chosen": -195.05517578125,
521
+ "logps/rejected": -212.87161254882812,
522
+ "loss": 0.6422,
523
+ "rewards/accuracies": 0.699999988079071,
524
+ "rewards/chosen": -4.617544174194336,
525
+ "rewards/margins": 0.8601192235946655,
526
+ "rewards/rejected": -5.477663516998291,
527
+ "step": 370
528
+ },
529
+ {
530
+ "epoch": 0.17,
531
+ "learning_rate": 9.633682553879e-06,
532
+ "logits/chosen": -2.749539852142334,
533
+ "logits/rejected": -2.7113490104675293,
534
+ "logps/chosen": -173.92945861816406,
535
+ "logps/rejected": -176.216796875,
536
+ "loss": 0.8915,
537
+ "rewards/accuracies": 0.6000000238418579,
538
+ "rewards/chosen": -5.114466667175293,
539
+ "rewards/margins": 0.10453431308269501,
540
+ "rewards/rejected": -5.219000816345215,
541
+ "step": 380
542
+ },
543
+ {
544
+ "epoch": 0.17,
545
+ "learning_rate": 9.605566239931666e-06,
546
+ "logits/chosen": -2.744715690612793,
547
+ "logits/rejected": -2.6837120056152344,
548
+ "logps/chosen": -200.80999755859375,
549
+ "logps/rejected": -200.7525177001953,
550
+ "loss": 0.633,
551
+ "rewards/accuracies": 0.5249999761581421,
552
+ "rewards/chosen": -4.719931602478027,
553
+ "rewards/margins": 0.698486864566803,
554
+ "rewards/rejected": -5.418419361114502,
555
+ "step": 390
556
+ },
557
+ {
558
+ "epoch": 0.18,
559
+ "learning_rate": 9.576454598692797e-06,
560
+ "logits/chosen": -2.7422823905944824,
561
+ "logits/rejected": -2.7130322456359863,
562
+ "logps/chosen": -204.26626586914062,
563
+ "logps/rejected": -174.83802795410156,
564
+ "loss": 0.9281,
565
+ "rewards/accuracies": 0.4749999940395355,
566
+ "rewards/chosen": -4.756241798400879,
567
+ "rewards/margins": -0.038588762283325195,
568
+ "rewards/rejected": -4.717652320861816,
569
+ "step": 400
570
+ },
571
+ {
572
+ "epoch": 0.18,
573
+ "learning_rate": 9.546353921595306e-06,
574
+ "logits/chosen": -2.7594494819641113,
575
+ "logits/rejected": -2.7436954975128174,
576
+ "logps/chosen": -183.6326141357422,
577
+ "logps/rejected": -186.80911254882812,
578
+ "loss": 0.9906,
579
+ "rewards/accuracies": 0.4749999940395355,
580
+ "rewards/chosen": -4.743472099304199,
581
+ "rewards/margins": -0.156986802816391,
582
+ "rewards/rejected": -4.586484909057617,
583
+ "step": 410
584
+ },
585
+ {
586
+ "epoch": 0.19,
587
+ "learning_rate": 9.515270713816589e-06,
588
+ "logits/chosen": -2.762357711791992,
589
+ "logits/rejected": -2.661778688430786,
590
+ "logps/chosen": -212.29739379882812,
591
+ "logps/rejected": -185.29476928710938,
592
+ "loss": 0.9206,
593
+ "rewards/accuracies": 0.5,
594
+ "rewards/chosen": -5.090394973754883,
595
+ "rewards/margins": 0.17267219722270966,
596
+ "rewards/rejected": -5.263067722320557,
597
+ "step": 420
598
+ },
599
+ {
600
+ "epoch": 0.19,
601
+ "learning_rate": 9.483211692872669e-06,
602
+ "logits/chosen": -2.694725513458252,
603
+ "logits/rejected": -2.689701557159424,
604
+ "logps/chosen": -168.6083221435547,
605
+ "logps/rejected": -170.26681518554688,
606
+ "loss": 0.9479,
607
+ "rewards/accuracies": 0.4000000059604645,
608
+ "rewards/chosen": -4.593288421630859,
609
+ "rewards/margins": -0.16782906651496887,
610
+ "rewards/rejected": -4.425459384918213,
611
+ "step": 430
612
+ },
613
+ {
614
+ "epoch": 0.2,
615
+ "learning_rate": 9.450183787166447e-06,
616
+ "logits/chosen": -2.6913774013519287,
617
+ "logits/rejected": -2.780381202697754,
618
+ "logps/chosen": -141.98934936523438,
619
+ "logps/rejected": -177.6278076171875,
620
+ "loss": 0.9904,
621
+ "rewards/accuracies": 0.44999998807907104,
622
+ "rewards/chosen": -4.094004154205322,
623
+ "rewards/margins": -0.12103526294231415,
624
+ "rewards/rejected": -3.972968578338623,
625
+ "step": 440
626
+ },
627
+ {
628
+ "epoch": 0.2,
629
+ "learning_rate": 9.41619413449037e-06,
630
+ "logits/chosen": -2.804361581802368,
631
+ "logits/rejected": -2.7710132598876953,
632
+ "logps/chosen": -209.9197540283203,
633
+ "logps/rejected": -231.4965057373047,
634
+ "loss": 0.654,
635
+ "rewards/accuracies": 0.675000011920929,
636
+ "rewards/chosen": -3.7489547729492188,
637
+ "rewards/margins": 0.5616118311882019,
638
+ "rewards/rejected": -4.3105669021606445,
639
+ "step": 450
640
+ },
641
+ {
642
+ "epoch": 0.2,
643
+ "learning_rate": 9.381250080483864e-06,
644
+ "logits/chosen": -2.777339458465576,
645
+ "logits/rejected": -2.7908101081848145,
646
+ "logps/chosen": -197.44711303710938,
647
+ "logps/rejected": -195.8129425048828,
648
+ "loss": 0.8654,
649
+ "rewards/accuracies": 0.625,
650
+ "rewards/chosen": -4.063180923461914,
651
+ "rewards/margins": 0.1730591356754303,
652
+ "rewards/rejected": -4.236240386962891,
653
+ "step": 460
654
+ },
655
+ {
656
+ "epoch": 0.21,
657
+ "learning_rate": 9.345359177045827e-06,
658
+ "logits/chosen": -2.7428901195526123,
659
+ "logits/rejected": -2.720733642578125,
660
+ "logps/chosen": -163.38687133789062,
661
+ "logps/rejected": -152.174072265625,
662
+ "loss": 1.008,
663
+ "rewards/accuracies": 0.4749999940395355,
664
+ "rewards/chosen": -4.339611053466797,
665
+ "rewards/margins": -0.002607667353004217,
666
+ "rewards/rejected": -4.337003707885742,
667
+ "step": 470
668
+ },
669
+ {
670
+ "epoch": 0.21,
671
+ "learning_rate": 9.308529180702568e-06,
672
+ "logits/chosen": -2.771120309829712,
673
+ "logits/rejected": -2.754432201385498,
674
+ "logps/chosen": -190.38487243652344,
675
+ "logps/rejected": -209.5969696044922,
676
+ "loss": 0.9381,
677
+ "rewards/accuracies": 0.4749999940395355,
678
+ "rewards/chosen": -4.276428699493408,
679
+ "rewards/margins": 0.031873930245637894,
680
+ "rewards/rejected": -4.308302879333496,
681
+ "step": 480
682
+ },
683
+ {
684
+ "epoch": 0.22,
685
+ "learning_rate": 9.270768050931515e-06,
686
+ "logits/chosen": -2.729900360107422,
687
+ "logits/rejected": -2.793795108795166,
688
+ "logps/chosen": -181.68646240234375,
689
+ "logps/rejected": -203.8788299560547,
690
+ "loss": 0.9827,
691
+ "rewards/accuracies": 0.44999998807907104,
692
+ "rewards/chosen": -4.365941524505615,
693
+ "rewards/margins": -0.1641651839017868,
694
+ "rewards/rejected": -4.201776504516602,
695
+ "step": 490
696
+ },
697
+ {
698
+ "epoch": 0.22,
699
+ "learning_rate": 9.232083948441046e-06,
700
+ "logits/chosen": -2.7761735916137695,
701
+ "logits/rejected": -2.7046539783477783,
702
+ "logps/chosen": -190.8777618408203,
703
+ "logps/rejected": -169.68423461914062,
704
+ "loss": 0.7403,
705
+ "rewards/accuracies": 0.625,
706
+ "rewards/chosen": -4.126107215881348,
707
+ "rewards/margins": 0.2456977367401123,
708
+ "rewards/rejected": -4.371805191040039,
709
+ "step": 500
710
+ },
711
+ {
712
+ "epoch": 0.23,
713
+ "learning_rate": 9.192485233406862e-06,
714
+ "logits/chosen": -2.788799524307251,
715
+ "logits/rejected": -2.8254306316375732,
716
+ "logps/chosen": -204.0353240966797,
717
+ "logps/rejected": -216.4750518798828,
718
+ "loss": 0.6348,
719
+ "rewards/accuracies": 0.675000011920929,
720
+ "rewards/chosen": -3.6200859546661377,
721
+ "rewards/margins": 0.6306756138801575,
722
+ "rewards/rejected": -4.250761985778809,
723
+ "step": 510
724
+ },
725
+ {
726
+ "epoch": 0.23,
727
+ "learning_rate": 9.151980463665227e-06,
728
+ "logits/chosen": -2.7755134105682373,
729
+ "logits/rejected": -2.7311973571777344,
730
+ "logps/chosen": -215.2248077392578,
731
+ "logps/rejected": -178.1062469482422,
732
+ "loss": 0.9884,
733
+ "rewards/accuracies": 0.2750000059604645,
734
+ "rewards/chosen": -3.970677137374878,
735
+ "rewards/margins": -0.2024170607328415,
736
+ "rewards/rejected": -3.7682597637176514,
737
+ "step": 520
738
+ },
739
+ {
740
+ "epoch": 0.24,
741
+ "learning_rate": 9.1105783928635e-06,
742
+ "logits/chosen": -2.7572436332702637,
743
+ "logits/rejected": -2.720371961593628,
744
+ "logps/chosen": -203.22486877441406,
745
+ "logps/rejected": -213.2503662109375,
746
+ "loss": 0.8808,
747
+ "rewards/accuracies": 0.5249999761581421,
748
+ "rewards/chosen": -4.16678524017334,
749
+ "rewards/margins": 0.12414976209402084,
750
+ "rewards/rejected": -4.2909345626831055,
751
+ "step": 530
752
+ },
753
+ {
754
+ "epoch": 0.24,
755
+ "learning_rate": 9.068287968568355e-06,
756
+ "logits/chosen": -2.7487785816192627,
757
+ "logits/rejected": -2.724555253982544,
758
+ "logps/chosen": -175.81295776367188,
759
+ "logps/rejected": -203.91702270507812,
760
+ "loss": 0.7847,
761
+ "rewards/accuracies": 0.4749999940395355,
762
+ "rewards/chosen": -3.4349968433380127,
763
+ "rewards/margins": 0.22362789511680603,
764
+ "rewards/rejected": -3.6586246490478516,
765
+ "step": 540
766
+ },
767
+ {
768
+ "epoch": 0.24,
769
+ "learning_rate": 9.02511833033208e-06,
770
+ "logits/chosen": -2.6728549003601074,
771
+ "logits/rejected": -2.7027556896209717,
772
+ "logps/chosen": -174.01890563964844,
773
+ "logps/rejected": -172.8794403076172,
774
+ "loss": 0.8984,
775
+ "rewards/accuracies": 0.5249999761581421,
776
+ "rewards/chosen": -4.354010105133057,
777
+ "rewards/margins": -0.13735604286193848,
778
+ "rewards/rejected": -4.216653823852539,
779
+ "step": 550
780
+ },
781
+ {
782
+ "epoch": 0.25,
783
+ "learning_rate": 8.981078807717396e-06,
784
+ "logits/chosen": -2.780517578125,
785
+ "logits/rejected": -2.6801159381866455,
786
+ "logps/chosen": -230.1298370361328,
787
+ "logps/rejected": -203.07168579101562,
788
+ "loss": 0.625,
789
+ "rewards/accuracies": 0.574999988079071,
790
+ "rewards/chosen": -3.319823741912842,
791
+ "rewards/margins": 0.6677559614181519,
792
+ "rewards/rejected": -3.987579822540283,
793
+ "step": 560
794
+ },
795
+ {
796
+ "epoch": 0.25,
797
+ "learning_rate": 8.936178918281209e-06,
798
+ "logits/chosen": -2.799701690673828,
799
+ "logits/rejected": -2.815525770187378,
800
+ "logps/chosen": -205.35971069335938,
801
+ "logps/rejected": -223.64096069335938,
802
+ "loss": 0.8421,
803
+ "rewards/accuracies": 0.4749999940395355,
804
+ "rewards/chosen": -3.762897491455078,
805
+ "rewards/margins": 0.13321921229362488,
806
+ "rewards/rejected": -3.8961167335510254,
807
+ "step": 570
808
+ },
809
+ {
810
+ "epoch": 0.26,
811
+ "learning_rate": 8.890428365517728e-06,
812
+ "logits/chosen": -2.8051438331604004,
813
+ "logits/rejected": -2.7885632514953613,
814
+ "logps/chosen": -197.3937530517578,
815
+ "logps/rejected": -187.13601684570312,
816
+ "loss": 0.7381,
817
+ "rewards/accuracies": 0.574999988079071,
818
+ "rewards/chosen": -3.4687581062316895,
819
+ "rewards/margins": 0.25684064626693726,
820
+ "rewards/rejected": -3.7255985736846924,
821
+ "step": 580
822
+ },
823
+ {
824
+ "epoch": 0.26,
825
+ "learning_rate": 8.843837036761404e-06,
826
+ "logits/chosen": -2.7467944622039795,
827
+ "logits/rejected": -2.7005391120910645,
828
+ "logps/chosen": -152.84494018554688,
829
+ "logps/rejected": -160.13241577148438,
830
+ "loss": 0.7662,
831
+ "rewards/accuracies": 0.5249999761581421,
832
+ "rewards/chosen": -3.732775926589966,
833
+ "rewards/margins": 0.06671512126922607,
834
+ "rewards/rejected": -3.7994911670684814,
835
+ "step": 590
836
+ },
837
+ {
838
+ "epoch": 0.27,
839
+ "learning_rate": 8.796415001050154e-06,
840
+ "logits/chosen": -2.7716736793518066,
841
+ "logits/rejected": -2.7501323223114014,
842
+ "logps/chosen": -221.6551513671875,
843
+ "logps/rejected": -201.65664672851562,
844
+ "loss": 0.8448,
845
+ "rewards/accuracies": 0.574999988079071,
846
+ "rewards/chosen": -3.700239896774292,
847
+ "rewards/margins": 0.12370122969150543,
848
+ "rewards/rejected": -3.823941469192505,
849
+ "step": 600
850
+ },
851
+ {
852
+ "epoch": 0.27,
853
+ "learning_rate": 8.748172506949274e-06,
854
+ "logits/chosen": -2.7913918495178223,
855
+ "logits/rejected": -2.7350287437438965,
856
+ "logps/chosen": -168.91790771484375,
857
+ "logps/rejected": -155.0623321533203,
858
+ "loss": 0.5851,
859
+ "rewards/accuracies": 0.699999988079071,
860
+ "rewards/chosen": -3.5637214183807373,
861
+ "rewards/margins": 0.42019423842430115,
862
+ "rewards/rejected": -3.9839158058166504,
863
+ "step": 610
864
+ },
865
+ {
866
+ "epoch": 0.28,
867
+ "learning_rate": 8.699119980336602e-06,
868
+ "logits/chosen": -2.7849667072296143,
869
+ "logits/rejected": -2.771721601486206,
870
+ "logps/chosen": -192.60813903808594,
871
+ "logps/rejected": -206.09646606445312,
872
+ "loss": 0.9976,
873
+ "rewards/accuracies": 0.32499998807907104,
874
+ "rewards/chosen": -3.9602694511413574,
875
+ "rewards/margins": -0.18327102065086365,
876
+ "rewards/rejected": -3.776998519897461,
877
+ "step": 620
878
+ },
879
+ {
880
+ "epoch": 0.28,
881
+ "learning_rate": 8.649268022149333e-06,
882
+ "logits/chosen": -2.7933568954467773,
883
+ "logits/rejected": -2.7272696495056152,
884
+ "logps/chosen": -179.9084930419922,
885
+ "logps/rejected": -169.74490356445312,
886
+ "loss": 0.68,
887
+ "rewards/accuracies": 0.6000000238418579,
888
+ "rewards/chosen": -3.221353054046631,
889
+ "rewards/margins": 0.3395439684391022,
890
+ "rewards/rejected": -3.5608971118927,
891
+ "step": 630
892
+ },
893
+ {
894
+ "epoch": 0.28,
895
+ "learning_rate": 8.59862740609301e-06,
896
+ "logits/chosen": -2.7812328338623047,
897
+ "logits/rejected": -2.8435587882995605,
898
+ "logps/chosen": -209.0635223388672,
899
+ "logps/rejected": -244.5385284423828,
900
+ "loss": 0.6579,
901
+ "rewards/accuracies": 0.574999988079071,
902
+ "rewards/chosen": -3.0288193225860596,
903
+ "rewards/margins": 0.4765963554382324,
904
+ "rewards/rejected": -3.505415678024292,
905
+ "step": 640
906
+ },
907
+ {
908
+ "epoch": 0.29,
909
+ "learning_rate": 8.547209076313172e-06,
910
+ "logits/chosen": -2.8104701042175293,
911
+ "logits/rejected": -2.7969369888305664,
912
+ "logps/chosen": -206.7493896484375,
913
+ "logps/rejected": -246.7706298828125,
914
+ "loss": 0.7068,
915
+ "rewards/accuracies": 0.5,
916
+ "rewards/chosen": -3.3913025856018066,
917
+ "rewards/margins": 0.37605711817741394,
918
+ "rewards/rejected": -3.767359495162964,
919
+ "step": 650
920
+ },
921
+ {
922
+ "epoch": 0.29,
923
+ "learning_rate": 8.495024145030174e-06,
924
+ "logits/chosen": -2.743499279022217,
925
+ "logits/rejected": -2.7557623386383057,
926
+ "logps/chosen": -173.17481994628906,
927
+ "logps/rejected": -186.05215454101562,
928
+ "loss": 0.6764,
929
+ "rewards/accuracies": 0.6000000238418579,
930
+ "rewards/chosen": -3.472040891647339,
931
+ "rewards/margins": 0.3358023464679718,
932
+ "rewards/rejected": -3.807842969894409,
933
+ "step": 660
934
+ },
935
+ {
936
+ "epoch": 0.3,
937
+ "learning_rate": 8.442083890137678e-06,
938
+ "logits/chosen": -2.8170254230499268,
939
+ "logits/rejected": -2.760282516479492,
940
+ "logps/chosen": -173.0248565673828,
941
+ "logps/rejected": -184.1920623779297,
942
+ "loss": 0.8291,
943
+ "rewards/accuracies": 0.4749999940395355,
944
+ "rewards/chosen": -3.5644659996032715,
945
+ "rewards/margins": 0.05758289247751236,
946
+ "rewards/rejected": -3.622048854827881,
947
+ "step": 670
948
+ },
949
+ {
950
+ "epoch": 0.3,
951
+ "learning_rate": 8.388399752765344e-06,
952
+ "logits/chosen": -2.773528814315796,
953
+ "logits/rejected": -2.758387327194214,
954
+ "logps/chosen": -204.7705078125,
955
+ "logps/rejected": -200.41160583496094,
956
+ "loss": 0.8527,
957
+ "rewards/accuracies": 0.42500001192092896,
958
+ "rewards/chosen": -3.599118709564209,
959
+ "rewards/margins": 0.017948562279343605,
960
+ "rewards/rejected": -3.617067337036133,
961
+ "step": 680
962
+ },
963
+ {
964
+ "epoch": 0.31,
965
+ "learning_rate": 8.333983334806248e-06,
966
+ "logits/chosen": -2.8039369583129883,
967
+ "logits/rejected": -2.7655069828033447,
968
+ "logps/chosen": -192.72186279296875,
969
+ "logps/rejected": -173.8263702392578,
970
+ "loss": 0.8641,
971
+ "rewards/accuracies": 0.4000000059604645,
972
+ "rewards/chosen": -3.713822603225708,
973
+ "rewards/margins": -0.06442561000585556,
974
+ "rewards/rejected": -3.6493968963623047,
975
+ "step": 690
976
+ },
977
+ {
978
+ "epoch": 0.31,
979
+ "learning_rate": 8.278846396409534e-06,
980
+ "logits/chosen": -2.797102451324463,
981
+ "logits/rejected": -2.7584991455078125,
982
+ "logps/chosen": -195.19786071777344,
983
+ "logps/rejected": -184.8418731689453,
984
+ "loss": 0.7849,
985
+ "rewards/accuracies": 0.6000000238418579,
986
+ "rewards/chosen": -3.548352003097534,
987
+ "rewards/margins": 0.16000667214393616,
988
+ "rewards/rejected": -3.7083587646484375,
989
+ "step": 700
990
+ },
991
+ {
992
+ "epoch": 0.32,
993
+ "learning_rate": 8.223000853438904e-06,
994
+ "logits/chosen": -2.8177175521850586,
995
+ "logits/rejected": -2.7559008598327637,
996
+ "logps/chosen": -218.43588256835938,
997
+ "logps/rejected": -219.35946655273438,
998
+ "loss": 0.7455,
999
+ "rewards/accuracies": 0.6000000238418579,
1000
+ "rewards/chosen": -3.5060417652130127,
1001
+ "rewards/margins": 0.2477506697177887,
1002
+ "rewards/rejected": -3.7537918090820312,
1003
+ "step": 710
1004
+ },
1005
+ {
1006
+ "epoch": 0.32,
1007
+ "learning_rate": 8.166458774897413e-06,
1008
+ "logits/chosen": -2.7866969108581543,
1009
+ "logits/rejected": -2.7426235675811768,
1010
+ "logps/chosen": -196.8046417236328,
1011
+ "logps/rejected": -180.7646484375,
1012
+ "loss": 0.6577,
1013
+ "rewards/accuracies": 0.75,
1014
+ "rewards/chosen": -3.515568494796753,
1015
+ "rewards/margins": 0.46584025025367737,
1016
+ "rewards/rejected": -3.9814085960388184,
1017
+ "step": 720
1018
+ },
1019
+ {
1020
+ "epoch": 0.32,
1021
+ "learning_rate": 8.109232380319194e-06,
1022
+ "logits/chosen": -2.781240940093994,
1023
+ "logits/rejected": -2.7888545989990234,
1024
+ "logps/chosen": -232.93215942382812,
1025
+ "logps/rejected": -232.01644897460938,
1026
+ "loss": 0.7337,
1027
+ "rewards/accuracies": 0.550000011920929,
1028
+ "rewards/chosen": -3.5336124897003174,
1029
+ "rewards/margins": 0.2553574740886688,
1030
+ "rewards/rejected": -3.7889697551727295,
1031
+ "step": 730
1032
+ },
1033
+ {
1034
+ "epoch": 0.33,
1035
+ "learning_rate": 8.051334037128661e-06,
1036
+ "logits/chosen": -2.7906103134155273,
1037
+ "logits/rejected": -2.742318630218506,
1038
+ "logps/chosen": -170.14791870117188,
1039
+ "logps/rejected": -173.310791015625,
1040
+ "loss": 0.828,
1041
+ "rewards/accuracies": 0.5249999761581421,
1042
+ "rewards/chosen": -3.540756940841675,
1043
+ "rewards/margins": 0.037487827241420746,
1044
+ "rewards/rejected": -3.578244686126709,
1045
+ "step": 740
1046
+ },
1047
+ {
1048
+ "epoch": 0.33,
1049
+ "learning_rate": 7.99277625796771e-06,
1050
+ "logits/chosen": -2.7460989952087402,
1051
+ "logits/rejected": -2.710388660430908,
1052
+ "logps/chosen": -164.4999542236328,
1053
+ "logps/rejected": -171.73757934570312,
1054
+ "loss": 0.8343,
1055
+ "rewards/accuracies": 0.5,
1056
+ "rewards/chosen": -3.3666107654571533,
1057
+ "rewards/margins": 0.040531255304813385,
1058
+ "rewards/rejected": -3.407141923904419,
1059
+ "step": 750
1060
+ },
1061
+ {
1062
+ "epoch": 0.34,
1063
+ "learning_rate": 7.933571697991582e-06,
1064
+ "logits/chosen": -2.830110549926758,
1065
+ "logits/rejected": -2.7687745094299316,
1066
+ "logps/chosen": -210.4406280517578,
1067
+ "logps/rejected": -182.27137756347656,
1068
+ "loss": 0.8217,
1069
+ "rewards/accuracies": 0.44999998807907104,
1070
+ "rewards/chosen": -3.588493824005127,
1071
+ "rewards/margins": -0.05328698828816414,
1072
+ "rewards/rejected": -3.5352070331573486,
1073
+ "step": 760
1074
+ },
1075
+ {
1076
+ "epoch": 0.34,
1077
+ "learning_rate": 7.873733152133898e-06,
1078
+ "logits/chosen": -2.751688241958618,
1079
+ "logits/rejected": -2.7940192222595215,
1080
+ "logps/chosen": -153.90414428710938,
1081
+ "logps/rejected": -158.2861328125,
1082
+ "loss": 0.8625,
1083
+ "rewards/accuracies": 0.42500001192092896,
1084
+ "rewards/chosen": -3.453404664993286,
1085
+ "rewards/margins": -0.1084330826997757,
1086
+ "rewards/rejected": -3.3449714183807373,
1087
+ "step": 770
1088
+ },
1089
+ {
1090
+ "epoch": 0.35,
1091
+ "learning_rate": 7.813273552341496e-06,
1092
+ "logits/chosen": -2.7797484397888184,
1093
+ "logits/rejected": -2.775768995285034,
1094
+ "logps/chosen": -169.4456787109375,
1095
+ "logps/rejected": -177.5587921142578,
1096
+ "loss": 0.756,
1097
+ "rewards/accuracies": 0.5249999761581421,
1098
+ "rewards/chosen": -3.326707363128662,
1099
+ "rewards/margins": 0.25994253158569336,
1100
+ "rewards/rejected": -3.5866501331329346,
1101
+ "step": 780
1102
+ },
1103
+ {
1104
+ "epoch": 0.35,
1105
+ "learning_rate": 7.75220596477966e-06,
1106
+ "logits/chosen": -2.7829766273498535,
1107
+ "logits/rejected": -2.7465267181396484,
1108
+ "logps/chosen": -164.01870727539062,
1109
+ "logps/rejected": -156.57614135742188,
1110
+ "loss": 0.6708,
1111
+ "rewards/accuracies": 0.625,
1112
+ "rewards/chosen": -3.3400473594665527,
1113
+ "rewards/margins": 0.3525925874710083,
1114
+ "rewards/rejected": -3.6926398277282715,
1115
+ "step": 790
1116
+ },
1117
+ {
1118
+ "epoch": 0.36,
1119
+ "learning_rate": 7.690543587008332e-06,
1120
+ "logits/chosen": -2.7533538341522217,
1121
+ "logits/rejected": -2.762204647064209,
1122
+ "logps/chosen": -221.1579132080078,
1123
+ "logps/rejected": -204.04983520507812,
1124
+ "loss": 0.8969,
1125
+ "rewards/accuracies": 0.5,
1126
+ "rewards/chosen": -3.536458969116211,
1127
+ "rewards/margins": 0.05794559791684151,
1128
+ "rewards/rejected": -3.5944042205810547,
1129
+ "step": 800
1130
+ },
1131
+ {
1132
+ "epoch": 0.36,
1133
+ "learning_rate": 7.628299745129943e-06,
1134
+ "logits/chosen": -2.7850310802459717,
1135
+ "logits/rejected": -2.756134510040283,
1136
+ "logps/chosen": -224.99118041992188,
1137
+ "logps/rejected": -199.38502502441406,
1138
+ "loss": 0.8558,
1139
+ "rewards/accuracies": 0.550000011920929,
1140
+ "rewards/chosen": -3.7199528217315674,
1141
+ "rewards/margins": -0.056097112596035004,
1142
+ "rewards/rejected": -3.6638553142547607,
1143
+ "step": 810
1144
+ },
1145
+ {
1146
+ "epoch": 0.36,
1147
+ "learning_rate": 7.565487890909448e-06,
1148
+ "logits/chosen": -2.8218209743499756,
1149
+ "logits/rejected": -2.775695323944092,
1150
+ "logps/chosen": -169.43869018554688,
1151
+ "logps/rejected": -147.3358612060547,
1152
+ "loss": 0.7543,
1153
+ "rewards/accuracies": 0.5,
1154
+ "rewards/chosen": -3.0327506065368652,
1155
+ "rewards/margins": 0.10092975944280624,
1156
+ "rewards/rejected": -3.133680820465088,
1157
+ "step": 820
1158
+ },
1159
+ {
1160
+ "epoch": 0.37,
1161
+ "learning_rate": 7.502121598867218e-06,
1162
+ "logits/chosen": -2.794593572616577,
1163
+ "logits/rejected": -2.8074254989624023,
1164
+ "logps/chosen": -191.18869018554688,
1165
+ "logps/rejected": -161.5567169189453,
1166
+ "loss": 0.702,
1167
+ "rewards/accuracies": 0.6499999761581421,
1168
+ "rewards/chosen": -3.0870423316955566,
1169
+ "rewards/margins": 0.3286024034023285,
1170
+ "rewards/rejected": -3.415644407272339,
1171
+ "step": 830
1172
+ },
1173
+ {
1174
+ "epoch": 0.37,
1175
+ "learning_rate": 7.438214563345389e-06,
1176
+ "logits/chosen": -2.8384017944335938,
1177
+ "logits/rejected": -2.8303287029266357,
1178
+ "logps/chosen": -200.47872924804688,
1179
+ "logps/rejected": -202.9823760986328,
1180
+ "loss": 0.9219,
1181
+ "rewards/accuracies": 0.4749999940395355,
1182
+ "rewards/chosen": -2.9461796283721924,
1183
+ "rewards/margins": -0.013927942141890526,
1184
+ "rewards/rejected": -2.932251453399658,
1185
+ "step": 840
1186
+ },
1187
+ {
1188
+ "epoch": 0.38,
1189
+ "learning_rate": 7.373780595548334e-06,
1190
+ "logits/chosen": -2.8200442790985107,
1191
+ "logits/rejected": -2.7595479488372803,
1192
+ "logps/chosen": -203.58987426757812,
1193
+ "logps/rejected": -193.07473754882812,
1194
+ "loss": 0.5825,
1195
+ "rewards/accuracies": 0.675000011920929,
1196
+ "rewards/chosen": -2.7171919345855713,
1197
+ "rewards/margins": 0.6527736783027649,
1198
+ "rewards/rejected": -3.3699657917022705,
1199
+ "step": 850
1200
+ },
1201
+ {
1202
+ "epoch": 0.38,
1203
+ "learning_rate": 7.3088336205578565e-06,
1204
+ "logits/chosen": -2.7865753173828125,
1205
+ "logits/rejected": -2.7725372314453125,
1206
+ "logps/chosen": -181.54159545898438,
1207
+ "logps/rejected": -192.08921813964844,
1208
+ "loss": 0.723,
1209
+ "rewards/accuracies": 0.6000000238418579,
1210
+ "rewards/chosen": -2.9962518215179443,
1211
+ "rewards/margins": 0.2384202927350998,
1212
+ "rewards/rejected": -3.2346718311309814,
1213
+ "step": 860
1214
+ },
1215
+ {
1216
+ "epoch": 0.39,
1217
+ "learning_rate": 7.243387674323794e-06,
1218
+ "logits/chosen": -2.7999701499938965,
1219
+ "logits/rejected": -2.7826244831085205,
1220
+ "logps/chosen": -170.237548828125,
1221
+ "logps/rejected": -182.22238159179688,
1222
+ "loss": 0.7287,
1223
+ "rewards/accuracies": 0.6499999761581421,
1224
+ "rewards/chosen": -2.636094331741333,
1225
+ "rewards/margins": 0.3836243152618408,
1226
+ "rewards/rejected": -3.019718647003174,
1227
+ "step": 870
1228
+ },
1229
+ {
1230
+ "epoch": 0.39,
1231
+ "learning_rate": 7.177456900630645e-06,
1232
+ "logits/chosen": -2.8270153999328613,
1233
+ "logits/rejected": -2.801821231842041,
1234
+ "logps/chosen": -169.65478515625,
1235
+ "logps/rejected": -149.74624633789062,
1236
+ "loss": 0.9289,
1237
+ "rewards/accuracies": 0.5,
1238
+ "rewards/chosen": -2.8085739612579346,
1239
+ "rewards/margins": -0.13477511703968048,
1240
+ "rewards/rejected": -2.6737987995147705,
1241
+ "step": 880
1242
+ },
1243
+ {
1244
+ "epoch": 0.4,
1245
+ "learning_rate": 7.111055548040911e-06,
1246
+ "logits/chosen": -2.843956708908081,
1247
+ "logits/rejected": -2.807281017303467,
1248
+ "logps/chosen": -204.63934326171875,
1249
+ "logps/rejected": -196.67213439941406,
1250
+ "loss": 0.7793,
1251
+ "rewards/accuracies": 0.5249999761581421,
1252
+ "rewards/chosen": -2.520915985107422,
1253
+ "rewards/margins": 0.05325014516711235,
1254
+ "rewards/rejected": -2.5741655826568604,
1255
+ "step": 890
1256
+ },
1257
+ {
1258
+ "epoch": 0.4,
1259
+ "learning_rate": 7.044197966815773e-06,
1260
+ "logits/chosen": -2.8285329341888428,
1261
+ "logits/rejected": -2.735088348388672,
1262
+ "logps/chosen": -153.91452026367188,
1263
+ "logps/rejected": -138.55552673339844,
1264
+ "loss": 0.6409,
1265
+ "rewards/accuracies": 0.5,
1266
+ "rewards/chosen": -2.84395694732666,
1267
+ "rewards/margins": 0.25751471519470215,
1268
+ "rewards/rejected": -3.1014719009399414,
1269
+ "step": 900
1270
+ },
1271
+ {
1272
+ "epoch": 0.4,
1273
+ "learning_rate": 6.976898605813798e-06,
1274
+ "logits/chosen": -2.822996139526367,
1275
+ "logits/rejected": -2.8268377780914307,
1276
+ "logps/chosen": -167.09097290039062,
1277
+ "logps/rejected": -203.2536163330078,
1278
+ "loss": 0.8486,
1279
+ "rewards/accuracies": 0.550000011920929,
1280
+ "rewards/chosen": -2.88107967376709,
1281
+ "rewards/margins": 0.07577097415924072,
1282
+ "rewards/rejected": -2.95685076713562,
1283
+ "step": 910
1284
+ },
1285
+ {
1286
+ "epoch": 0.41,
1287
+ "learning_rate": 6.90917200936835e-06,
1288
+ "logits/chosen": -2.7948951721191406,
1289
+ "logits/rejected": -2.783585548400879,
1290
+ "logps/chosen": -145.66119384765625,
1291
+ "logps/rejected": -160.69918823242188,
1292
+ "loss": 0.8522,
1293
+ "rewards/accuracies": 0.44999998807907104,
1294
+ "rewards/chosen": -2.815007448196411,
1295
+ "rewards/margins": 0.01567123830318451,
1296
+ "rewards/rejected": -2.8306784629821777,
1297
+ "step": 920
1298
+ },
1299
+ {
1300
+ "epoch": 0.41,
1301
+ "learning_rate": 6.841032814144345e-06,
1302
+ "logits/chosen": -2.7837324142456055,
1303
+ "logits/rejected": -2.7920632362365723,
1304
+ "logps/chosen": -150.3719940185547,
1305
+ "logps/rejected": -168.3992919921875,
1306
+ "loss": 0.661,
1307
+ "rewards/accuracies": 0.574999988079071,
1308
+ "rewards/chosen": -2.577317476272583,
1309
+ "rewards/margins": 0.19722715020179749,
1310
+ "rewards/rejected": -2.7745444774627686,
1311
+ "step": 930
1312
+ },
1313
+ {
1314
+ "epoch": 0.42,
1315
+ "learning_rate": 6.772495745975067e-06,
1316
+ "logits/chosen": -2.822993278503418,
1317
+ "logits/rejected": -2.793628454208374,
1318
+ "logps/chosen": -179.533447265625,
1319
+ "logps/rejected": -170.6478729248047,
1320
+ "loss": 0.6447,
1321
+ "rewards/accuracies": 0.675000011920929,
1322
+ "rewards/chosen": -2.440274477005005,
1323
+ "rewards/margins": 0.4804176390171051,
1324
+ "rewards/rejected": -2.920691967010498,
1325
+ "step": 940
1326
+ },
1327
+ {
1328
+ "epoch": 0.42,
1329
+ "learning_rate": 6.703575616679709e-06,
1330
+ "logits/chosen": -2.8847832679748535,
1331
+ "logits/rejected": -2.862794876098633,
1332
+ "logps/chosen": -203.72158813476562,
1333
+ "logps/rejected": -196.6941375732422,
1334
+ "loss": 0.6708,
1335
+ "rewards/accuracies": 0.550000011920929,
1336
+ "rewards/chosen": -2.6080403327941895,
1337
+ "rewards/margins": 0.2483837604522705,
1338
+ "rewards/rejected": -2.856423854827881,
1339
+ "step": 950
1340
+ },
1341
+ {
1342
+ "epoch": 0.43,
1343
+ "learning_rate": 6.634287320862334e-06,
1344
+ "logits/chosen": -2.8792309761047363,
1345
+ "logits/rejected": -2.7815871238708496,
1346
+ "logps/chosen": -189.05697631835938,
1347
+ "logps/rejected": -170.0454559326172,
1348
+ "loss": 0.7327,
1349
+ "rewards/accuracies": 0.6000000238418579,
1350
+ "rewards/chosen": -2.4347012042999268,
1351
+ "rewards/margins": 0.20491544902324677,
1352
+ "rewards/rejected": -2.6396164894104004,
1353
+ "step": 960
1354
+ },
1355
+ {
1356
+ "epoch": 0.43,
1357
+ "learning_rate": 6.564645832692938e-06,
1358
+ "logits/chosen": -2.8398923873901367,
1359
+ "logits/rejected": -2.821370840072632,
1360
+ "logps/chosen": -162.66635131835938,
1361
+ "logps/rejected": -177.49655151367188,
1362
+ "loss": 0.6929,
1363
+ "rewards/accuracies": 0.574999988079071,
1364
+ "rewards/chosen": -2.672360420227051,
1365
+ "rewards/margins": 0.2537608742713928,
1366
+ "rewards/rejected": -2.926121473312378,
1367
+ "step": 970
1368
+ },
1369
+ {
1370
+ "epoch": 0.44,
1371
+ "learning_rate": 6.494666202671329e-06,
1372
+ "logits/chosen": -2.828071355819702,
1373
+ "logits/rejected": -2.7870450019836426,
1374
+ "logps/chosen": -175.61985778808594,
1375
+ "logps/rejected": -147.78115844726562,
1376
+ "loss": 0.9773,
1377
+ "rewards/accuracies": 0.3499999940395355,
1378
+ "rewards/chosen": -2.7835516929626465,
1379
+ "rewards/margins": -0.18404017388820648,
1380
+ "rewards/rejected": -2.59951114654541,
1381
+ "step": 980
1382
+ },
1383
+ {
1384
+ "epoch": 0.44,
1385
+ "learning_rate": 6.424363554374496e-06,
1386
+ "logits/chosen": -2.8303914070129395,
1387
+ "logits/rejected": -2.8009707927703857,
1388
+ "logps/chosen": -184.24453735351562,
1389
+ "logps/rejected": -177.34475708007812,
1390
+ "loss": 0.8386,
1391
+ "rewards/accuracies": 0.4749999940395355,
1392
+ "rewards/chosen": -2.654391050338745,
1393
+ "rewards/margins": 0.08457916229963303,
1394
+ "rewards/rejected": -2.7389702796936035,
1395
+ "step": 990
1396
+ },
1397
+ {
1398
+ "epoch": 0.44,
1399
+ "learning_rate": 6.353753081188194e-06,
1400
+ "logits/chosen": -2.8116297721862793,
1401
+ "logits/rejected": -2.8462095260620117,
1402
+ "logps/chosen": -154.33535766601562,
1403
+ "logps/rejected": -173.16500854492188,
1404
+ "loss": 0.8474,
1405
+ "rewards/accuracies": 0.44999998807907104,
1406
+ "rewards/chosen": -2.643991470336914,
1407
+ "rewards/margins": 0.021061301231384277,
1408
+ "rewards/rejected": -2.665052890777588,
1409
+ "step": 1000
1410
+ }
1411
+ ],
1412
+ "logging_steps": 10,
1413
+ "max_steps": 2250,
1414
+ "num_input_tokens_seen": 0,
1415
+ "num_train_epochs": 1,
1416
+ "save_steps": 500,
1417
+ "total_flos": 0.0,
1418
+ "train_batch_size": 1,
1419
+ "trial_name": null,
1420
+ "trial_params": null
1421
+ }
checkpoint-1000/training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:929ce4eef102da1f4bf0c0abf6ec22df7a8d310ad21751ff23e497886fcbbedb
3
+ size 4987
checkpoint-1500/README.md ADDED
@@ -0,0 +1,202 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ base_model: mistralai/Mistral-7B-Instruct-v0.2
3
+ library_name: peft
4
+ ---
5
+
6
+ # Model Card for Model ID
7
+
8
+ <!-- Provide a quick summary of what the model is/does. -->
9
+
10
+
11
+
12
+ ## Model Details
13
+
14
+ ### Model Description
15
+
16
+ <!-- Provide a longer summary of what this model is. -->
17
+
18
+
19
+
20
+ - **Developed by:** [More Information Needed]
21
+ - **Funded by [optional]:** [More Information Needed]
22
+ - **Shared by [optional]:** [More Information Needed]
23
+ - **Model type:** [More Information Needed]
24
+ - **Language(s) (NLP):** [More Information Needed]
25
+ - **License:** [More Information Needed]
26
+ - **Finetuned from model [optional]:** [More Information Needed]
27
+
28
+ ### Model Sources [optional]
29
+
30
+ <!-- Provide the basic links for the model. -->
31
+
32
+ - **Repository:** [More Information Needed]
33
+ - **Paper [optional]:** [More Information Needed]
34
+ - **Demo [optional]:** [More Information Needed]
35
+
36
+ ## Uses
37
+
38
+ <!-- Address questions around how the model is intended to be used, including the foreseeable users of the model and those affected by the model. -->
39
+
40
+ ### Direct Use
41
+
42
+ <!-- This section is for the model use without fine-tuning or plugging into a larger ecosystem/app. -->
43
+
44
+ [More Information Needed]
45
+
46
+ ### Downstream Use [optional]
47
+
48
+ <!-- This section is for the model use when fine-tuned for a task, or when plugged into a larger ecosystem/app -->
49
+
50
+ [More Information Needed]
51
+
52
+ ### Out-of-Scope Use
53
+
54
+ <!-- This section addresses misuse, malicious use, and uses that the model will not work well for. -->
55
+
56
+ [More Information Needed]
57
+
58
+ ## Bias, Risks, and Limitations
59
+
60
+ <!-- This section is meant to convey both technical and sociotechnical limitations. -->
61
+
62
+ [More Information Needed]
63
+
64
+ ### Recommendations
65
+
66
+ <!-- This section is meant to convey recommendations with respect to the bias, risk, and technical limitations. -->
67
+
68
+ Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations.
69
+
70
+ ## How to Get Started with the Model
71
+
72
+ Use the code below to get started with the model.
73
+
74
+ [More Information Needed]
75
+
76
+ ## Training Details
77
+
78
+ ### Training Data
79
+
80
+ <!-- This should link to a Dataset Card, perhaps with a short stub of information on what the training data is all about as well as documentation related to data pre-processing or additional filtering. -->
81
+
82
+ [More Information Needed]
83
+
84
+ ### Training Procedure
85
+
86
+ <!-- This relates heavily to the Technical Specifications. Content here should link to that section when it is relevant to the training procedure. -->
87
+
88
+ #### Preprocessing [optional]
89
+
90
+ [More Information Needed]
91
+
92
+
93
+ #### Training Hyperparameters
94
+
95
+ - **Training regime:** [More Information Needed] <!--fp32, fp16 mixed precision, bf16 mixed precision, bf16 non-mixed precision, fp16 non-mixed precision, fp8 mixed precision -->
96
+
97
+ #### Speeds, Sizes, Times [optional]
98
+
99
+ <!-- This section provides information about throughput, start/end time, checkpoint size if relevant, etc. -->
100
+
101
+ [More Information Needed]
102
+
103
+ ## Evaluation
104
+
105
+ <!-- This section describes the evaluation protocols and provides the results. -->
106
+
107
+ ### Testing Data, Factors & Metrics
108
+
109
+ #### Testing Data
110
+
111
+ <!-- This should link to a Dataset Card if possible. -->
112
+
113
+ [More Information Needed]
114
+
115
+ #### Factors
116
+
117
+ <!-- These are the things the evaluation is disaggregating by, e.g., subpopulations or domains. -->
118
+
119
+ [More Information Needed]
120
+
121
+ #### Metrics
122
+
123
+ <!-- These are the evaluation metrics being used, ideally with a description of why. -->
124
+
125
+ [More Information Needed]
126
+
127
+ ### Results
128
+
129
+ [More Information Needed]
130
+
131
+ #### Summary
132
+
133
+
134
+
135
+ ## Model Examination [optional]
136
+
137
+ <!-- Relevant interpretability work for the model goes here -->
138
+
139
+ [More Information Needed]
140
+
141
+ ## Environmental Impact
142
+
143
+ <!-- Total emissions (in grams of CO2eq) and additional considerations, such as electricity usage, go here. Edit the suggested text below accordingly -->
144
+
145
+ Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700).
146
+
147
+ - **Hardware Type:** [More Information Needed]
148
+ - **Hours used:** [More Information Needed]
149
+ - **Cloud Provider:** [More Information Needed]
150
+ - **Compute Region:** [More Information Needed]
151
+ - **Carbon Emitted:** [More Information Needed]
152
+
153
+ ## Technical Specifications [optional]
154
+
155
+ ### Model Architecture and Objective
156
+
157
+ [More Information Needed]
158
+
159
+ ### Compute Infrastructure
160
+
161
+ [More Information Needed]
162
+
163
+ #### Hardware
164
+
165
+ [More Information Needed]
166
+
167
+ #### Software
168
+
169
+ [More Information Needed]
170
+
171
+ ## Citation [optional]
172
+
173
+ <!-- If there is a paper or blog post introducing the model, the APA and Bibtex information for that should go in this section. -->
174
+
175
+ **BibTeX:**
176
+
177
+ [More Information Needed]
178
+
179
+ **APA:**
180
+
181
+ [More Information Needed]
182
+
183
+ ## Glossary [optional]
184
+
185
+ <!-- If relevant, include terms and calculations in this section that can help readers understand the model or model card. -->
186
+
187
+ [More Information Needed]
188
+
189
+ ## More Information [optional]
190
+
191
+ [More Information Needed]
192
+
193
+ ## Model Card Authors [optional]
194
+
195
+ [More Information Needed]
196
+
197
+ ## Model Card Contact
198
+
199
+ [More Information Needed]
200
+ ### Framework versions
201
+
202
+ - PEFT 0.8.2
checkpoint-1500/adapter_config.json ADDED
@@ -0,0 +1,29 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "alpha_pattern": {},
3
+ "auto_mapping": null,
4
+ "base_model_name_or_path": "mistralai/Mistral-7B-Instruct-v0.2",
5
+ "bias": "none",
6
+ "fan_in_fan_out": false,
7
+ "inference_mode": true,
8
+ "init_lora_weights": true,
9
+ "layers_pattern": null,
10
+ "layers_to_transform": null,
11
+ "loftq_config": {},
12
+ "lora_alpha": 8,
13
+ "lora_dropout": 0.1,
14
+ "megatron_config": null,
15
+ "megatron_core": "megatron.core",
16
+ "modules_to_save": null,
17
+ "peft_type": "LORA",
18
+ "r": 4,
19
+ "rank_pattern": {},
20
+ "revision": null,
21
+ "target_modules": [
22
+ "k_proj",
23
+ "q_proj",
24
+ "v_proj",
25
+ "o_proj"
26
+ ],
27
+ "task_type": "CAUSAL_LM",
28
+ "use_rslora": false
29
+ }
checkpoint-1500/adapter_model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:08977e42dfffe119b04954d14a630fc57b3e4be2c3981783e1a27e6741b003a2
3
+ size 13665336
checkpoint-1500/optimizer.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9c4e97988c2973c4dcaef3616eec1845325ed6090d8d601f6d54efe7a15b0e99
3
+ size 27413893
checkpoint-1500/rng_state.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b2278a87cdf86c3f9219223c847f6b27f6b7f15b8226b617f38936e8ff2cbcde
3
+ size 14575
checkpoint-1500/scheduler.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a246251c0b2b1f08b77e69184794bce8855c9a0f5eded6b25e5b5d037ae26da3
3
+ size 627
checkpoint-1500/special_tokens_map.json ADDED
@@ -0,0 +1,24 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "bos_token": {
3
+ "content": "<s>",
4
+ "lstrip": false,
5
+ "normalized": false,
6
+ "rstrip": false,
7
+ "single_word": false
8
+ },
9
+ "eos_token": {
10
+ "content": "</s>",
11
+ "lstrip": false,
12
+ "normalized": false,
13
+ "rstrip": false,
14
+ "single_word": false
15
+ },
16
+ "pad_token": "</s>",
17
+ "unk_token": {
18
+ "content": "<unk>",
19
+ "lstrip": false,
20
+ "normalized": false,
21
+ "rstrip": false,
22
+ "single_word": false
23
+ }
24
+ }
checkpoint-1500/tokenizer.model ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:dadfd56d766715c61d2ef780a525ab43b8e6da4de6865bda3d95fdef5e134055
3
+ size 493443
checkpoint-1500/tokenizer_config.json ADDED
@@ -0,0 +1,44 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "add_bos_token": true,
3
+ "add_eos_token": false,
4
+ "add_prefix_space": null,
5
+ "added_tokens_decoder": {
6
+ "0": {
7
+ "content": "<unk>",
8
+ "lstrip": false,
9
+ "normalized": false,
10
+ "rstrip": false,
11
+ "single_word": false,
12
+ "special": true
13
+ },
14
+ "1": {
15
+ "content": "<s>",
16
+ "lstrip": false,
17
+ "normalized": false,
18
+ "rstrip": false,
19
+ "single_word": false,
20
+ "special": true
21
+ },
22
+ "2": {
23
+ "content": "</s>",
24
+ "lstrip": false,
25
+ "normalized": false,
26
+ "rstrip": false,
27
+ "single_word": false,
28
+ "special": true
29
+ }
30
+ },
31
+ "additional_special_tokens": [],
32
+ "bos_token": "<s>",
33
+ "chat_template": "{%- if messages[0]['role'] == 'system' %}\n {%- set system_message = messages[0]['content'] %}\n {%- set loop_messages = messages[1:] %}\n{%- else %}\n {%- set loop_messages = messages %}\n{%- endif %}\n\n{{- bos_token }}\n{%- for message in loop_messages %}\n {%- if (message['role'] == 'user') != (loop.index0 % 2 == 0) %}\n {{- raise_exception('After the optional system message, conversation roles must alternate user/assistant/user/assistant/...') }}\n {%- endif %}\n {%- if message['role'] == 'user' %}\n {%- if loop.first and system_message is defined %}\n {{- ' [INST] ' + system_message + '\\n\\n' + message['content'] + ' [/INST]' }}\n {%- else %}\n {{- ' [INST] ' + message['content'] + ' [/INST]' }}\n {%- endif %}\n {%- elif message['role'] == 'assistant' %}\n {{- ' ' + message['content'] + eos_token}}\n {%- else %}\n {{- raise_exception('Only user and assistant roles are supported, with the exception of an initial optional system message!') }}\n {%- endif %}\n{%- endfor %}\n",
34
+ "clean_up_tokenization_spaces": false,
35
+ "eos_token": "</s>",
36
+ "legacy": false,
37
+ "model_max_length": 1000000000000000019884624838656,
38
+ "pad_token": "</s>",
39
+ "sp_model_kwargs": {},
40
+ "spaces_between_special_tokens": false,
41
+ "tokenizer_class": "LlamaTokenizer",
42
+ "unk_token": "<unk>",
43
+ "use_default_system_prompt": false
44
+ }
checkpoint-1500/trainer_state.json ADDED
@@ -0,0 +1,2121 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_metric": null,
3
+ "best_model_checkpoint": null,
4
+ "epoch": 0.6666666666666666,
5
+ "eval_steps": 500,
6
+ "global_step": 1500,
7
+ "is_hyper_param_search": false,
8
+ "is_local_process_zero": true,
9
+ "is_world_process_zero": true,
10
+ "log_history": [
11
+ {
12
+ "epoch": 0.0,
13
+ "learning_rate": 5.309734513274336e-07,
14
+ "logits/chosen": -2.1858465671539307,
15
+ "logits/rejected": -2.2539868354797363,
16
+ "logps/chosen": -292.47344970703125,
17
+ "logps/rejected": -334.2834777832031,
18
+ "loss": 2.328,
19
+ "rewards/accuracies": 0.574999988079071,
20
+ "rewards/chosen": -17.95108985900879,
21
+ "rewards/margins": 1.5200703144073486,
22
+ "rewards/rejected": -19.47115707397461,
23
+ "step": 10
24
+ },
25
+ {
26
+ "epoch": 0.01,
27
+ "learning_rate": 1.415929203539823e-06,
28
+ "logits/chosen": -2.250004529953003,
29
+ "logits/rejected": -2.2245919704437256,
30
+ "logps/chosen": -323.00567626953125,
31
+ "logps/rejected": -341.8704528808594,
32
+ "loss": 3.0458,
33
+ "rewards/accuracies": 0.550000011920929,
34
+ "rewards/chosen": -18.9575138092041,
35
+ "rewards/margins": 0.811493992805481,
36
+ "rewards/rejected": -19.76900863647461,
37
+ "step": 20
38
+ },
39
+ {
40
+ "epoch": 0.01,
41
+ "learning_rate": 2.3008849557522127e-06,
42
+ "logits/chosen": -2.2509493827819824,
43
+ "logits/rejected": -2.2362070083618164,
44
+ "logps/chosen": -309.36627197265625,
45
+ "logps/rejected": -354.1287841796875,
46
+ "loss": 2.001,
47
+ "rewards/accuracies": 0.6499999761581421,
48
+ "rewards/chosen": -19.02206039428711,
49
+ "rewards/margins": 2.324467182159424,
50
+ "rewards/rejected": -21.346529006958008,
51
+ "step": 30
52
+ },
53
+ {
54
+ "epoch": 0.02,
55
+ "learning_rate": 3.185840707964602e-06,
56
+ "logits/chosen": -2.261589527130127,
57
+ "logits/rejected": -2.234139919281006,
58
+ "logps/chosen": -341.8447265625,
59
+ "logps/rejected": -361.2301330566406,
60
+ "loss": 2.3698,
61
+ "rewards/accuracies": 0.574999988079071,
62
+ "rewards/chosen": -19.69499397277832,
63
+ "rewards/margins": 1.1049805879592896,
64
+ "rewards/rejected": -20.799976348876953,
65
+ "step": 40
66
+ },
67
+ {
68
+ "epoch": 0.02,
69
+ "learning_rate": 4.070796460176992e-06,
70
+ "logits/chosen": -2.282593250274658,
71
+ "logits/rejected": -2.219956874847412,
72
+ "logps/chosen": -333.1883850097656,
73
+ "logps/rejected": -323.2119140625,
74
+ "loss": 2.3553,
75
+ "rewards/accuracies": 0.574999988079071,
76
+ "rewards/chosen": -19.006275177001953,
77
+ "rewards/margins": 0.894936203956604,
78
+ "rewards/rejected": -19.90121078491211,
79
+ "step": 50
80
+ },
81
+ {
82
+ "epoch": 0.03,
83
+ "learning_rate": 4.955752212389381e-06,
84
+ "logits/chosen": -2.2947192192077637,
85
+ "logits/rejected": -2.191793918609619,
86
+ "logps/chosen": -327.3343200683594,
87
+ "logps/rejected": -302.55914306640625,
88
+ "loss": 3.0721,
89
+ "rewards/accuracies": 0.625,
90
+ "rewards/chosen": -18.95311164855957,
91
+ "rewards/margins": 0.02760167047381401,
92
+ "rewards/rejected": -18.980712890625,
93
+ "step": 60
94
+ },
95
+ {
96
+ "epoch": 0.03,
97
+ "learning_rate": 5.840707964601771e-06,
98
+ "logits/chosen": -2.1300625801086426,
99
+ "logits/rejected": -2.197695255279541,
100
+ "logps/chosen": -296.19586181640625,
101
+ "logps/rejected": -322.7232360839844,
102
+ "loss": 2.5242,
103
+ "rewards/accuracies": 0.5249999761581421,
104
+ "rewards/chosen": -18.607830047607422,
105
+ "rewards/margins": 0.5492460131645203,
106
+ "rewards/rejected": -19.15707778930664,
107
+ "step": 70
108
+ },
109
+ {
110
+ "epoch": 0.04,
111
+ "learning_rate": 6.72566371681416e-06,
112
+ "logits/chosen": -2.191436290740967,
113
+ "logits/rejected": -2.203051805496216,
114
+ "logps/chosen": -322.64581298828125,
115
+ "logps/rejected": -318.93475341796875,
116
+ "loss": 2.4603,
117
+ "rewards/accuracies": 0.625,
118
+ "rewards/chosen": -18.32453155517578,
119
+ "rewards/margins": 1.1701295375823975,
120
+ "rewards/rejected": -19.494661331176758,
121
+ "step": 80
122
+ },
123
+ {
124
+ "epoch": 0.04,
125
+ "learning_rate": 7.610619469026549e-06,
126
+ "logits/chosen": -2.3291049003601074,
127
+ "logits/rejected": -2.13211727142334,
128
+ "logps/chosen": -351.888671875,
129
+ "logps/rejected": -316.0814514160156,
130
+ "loss": 4.3049,
131
+ "rewards/accuracies": 0.375,
132
+ "rewards/chosen": -20.648174285888672,
133
+ "rewards/margins": -1.917999505996704,
134
+ "rewards/rejected": -18.730175018310547,
135
+ "step": 90
136
+ },
137
+ {
138
+ "epoch": 0.04,
139
+ "learning_rate": 8.495575221238938e-06,
140
+ "logits/chosen": -2.326770782470703,
141
+ "logits/rejected": -2.2708096504211426,
142
+ "logps/chosen": -319.8079528808594,
143
+ "logps/rejected": -325.22467041015625,
144
+ "loss": 2.9022,
145
+ "rewards/accuracies": 0.550000011920929,
146
+ "rewards/chosen": -17.211898803710938,
147
+ "rewards/margins": 0.4395485818386078,
148
+ "rewards/rejected": -17.651447296142578,
149
+ "step": 100
150
+ },
151
+ {
152
+ "epoch": 0.05,
153
+ "learning_rate": 9.380530973451329e-06,
154
+ "logits/chosen": -2.2947869300842285,
155
+ "logits/rejected": -2.2642266750335693,
156
+ "logps/chosen": -319.7033386230469,
157
+ "logps/rejected": -301.95684814453125,
158
+ "loss": 2.9535,
159
+ "rewards/accuracies": 0.32499998807907104,
160
+ "rewards/chosen": -18.591039657592773,
161
+ "rewards/margins": -1.4626668691635132,
162
+ "rewards/rejected": -17.128376007080078,
163
+ "step": 110
164
+ },
165
+ {
166
+ "epoch": 0.05,
167
+ "learning_rate": 9.999951373555555e-06,
168
+ "logits/chosen": -2.356776475906372,
169
+ "logits/rejected": -2.2779877185821533,
170
+ "logps/chosen": -332.5343322753906,
171
+ "logps/rejected": -308.6272888183594,
172
+ "loss": 2.8838,
173
+ "rewards/accuracies": 0.42500001192092896,
174
+ "rewards/chosen": -18.02423095703125,
175
+ "rewards/margins": -0.5776697993278503,
176
+ "rewards/rejected": -17.446561813354492,
177
+ "step": 120
178
+ },
179
+ {
180
+ "epoch": 0.06,
181
+ "learning_rate": 9.999086929743288e-06,
182
+ "logits/chosen": -2.34501314163208,
183
+ "logits/rejected": -2.3048901557922363,
184
+ "logps/chosen": -298.5960388183594,
185
+ "logps/rejected": -309.3174743652344,
186
+ "loss": 2.0696,
187
+ "rewards/accuracies": 0.6499999761581421,
188
+ "rewards/chosen": -16.59781265258789,
189
+ "rewards/margins": 0.7586337327957153,
190
+ "rewards/rejected": -17.356447219848633,
191
+ "step": 130
192
+ },
193
+ {
194
+ "epoch": 0.06,
195
+ "learning_rate": 9.997142113313472e-06,
196
+ "logits/chosen": -2.3136909008026123,
197
+ "logits/rejected": -2.3042447566986084,
198
+ "logps/chosen": -292.8536071777344,
199
+ "logps/rejected": -281.0971984863281,
200
+ "loss": 1.8399,
201
+ "rewards/accuracies": 0.5249999761581421,
202
+ "rewards/chosen": -15.984518051147461,
203
+ "rewards/margins": 0.30002641677856445,
204
+ "rewards/rejected": -16.284543991088867,
205
+ "step": 140
206
+ },
207
+ {
208
+ "epoch": 0.07,
209
+ "learning_rate": 9.994117344568142e-06,
210
+ "logits/chosen": -2.337782144546509,
211
+ "logits/rejected": -2.3470942974090576,
212
+ "logps/chosen": -286.35504150390625,
213
+ "logps/rejected": -303.07684326171875,
214
+ "loss": 1.5656,
215
+ "rewards/accuracies": 0.625,
216
+ "rewards/chosen": -14.58277416229248,
217
+ "rewards/margins": 1.030444860458374,
218
+ "rewards/rejected": -15.61322021484375,
219
+ "step": 150
220
+ },
221
+ {
222
+ "epoch": 0.07,
223
+ "learning_rate": 9.990013277202137e-06,
224
+ "logits/chosen": -2.3595287799835205,
225
+ "logits/rejected": -2.4950690269470215,
226
+ "logps/chosen": -292.61651611328125,
227
+ "logps/rejected": -363.38507080078125,
228
+ "loss": 1.523,
229
+ "rewards/accuracies": 0.625,
230
+ "rewards/chosen": -15.285211563110352,
231
+ "rewards/margins": 2.0152671337127686,
232
+ "rewards/rejected": -17.300477981567383,
233
+ "step": 160
234
+ },
235
+ {
236
+ "epoch": 0.08,
237
+ "learning_rate": 9.984830798161828e-06,
238
+ "logits/chosen": -2.4216346740722656,
239
+ "logits/rejected": -2.35921311378479,
240
+ "logps/chosen": -329.1554870605469,
241
+ "logps/rejected": -308.78326416015625,
242
+ "loss": 2.5844,
243
+ "rewards/accuracies": 0.44999998807907104,
244
+ "rewards/chosen": -14.768890380859375,
245
+ "rewards/margins": -0.32357311248779297,
246
+ "rewards/rejected": -14.445318222045898,
247
+ "step": 170
248
+ },
249
+ {
250
+ "epoch": 0.08,
251
+ "learning_rate": 9.978571027453433e-06,
252
+ "logits/chosen": -2.5200698375701904,
253
+ "logits/rejected": -2.338383674621582,
254
+ "logps/chosen": -296.1730041503906,
255
+ "logps/rejected": -232.0618896484375,
256
+ "loss": 2.4226,
257
+ "rewards/accuracies": 0.375,
258
+ "rewards/chosen": -13.72007942199707,
259
+ "rewards/margins": -0.8905000686645508,
260
+ "rewards/rejected": -12.829577445983887,
261
+ "step": 180
262
+ },
263
+ {
264
+ "epoch": 0.08,
265
+ "learning_rate": 9.971235317900968e-06,
266
+ "logits/chosen": -2.4042282104492188,
267
+ "logits/rejected": -2.4900546073913574,
268
+ "logps/chosen": -219.2891845703125,
269
+ "logps/rejected": -247.385498046875,
270
+ "loss": 1.5221,
271
+ "rewards/accuracies": 0.5,
272
+ "rewards/chosen": -11.559672355651855,
273
+ "rewards/margins": 0.2930552363395691,
274
+ "rewards/rejected": -11.852727890014648,
275
+ "step": 190
276
+ },
277
+ {
278
+ "epoch": 0.09,
279
+ "learning_rate": 9.962825254853888e-06,
280
+ "logits/chosen": -2.591836929321289,
281
+ "logits/rejected": -2.5101170539855957,
282
+ "logps/chosen": -311.3710632324219,
283
+ "logps/rejected": -277.0614318847656,
284
+ "loss": 2.1722,
285
+ "rewards/accuracies": 0.32499998807907104,
286
+ "rewards/chosen": -13.245725631713867,
287
+ "rewards/margins": -1.0529097318649292,
288
+ "rewards/rejected": -12.192815780639648,
289
+ "step": 200
290
+ },
291
+ {
292
+ "epoch": 0.09,
293
+ "learning_rate": 9.954339123272747e-06,
294
+ "logits/chosen": -2.5649514198303223,
295
+ "logits/rejected": -2.4265828132629395,
296
+ "logps/chosen": -250.44009399414062,
297
+ "logps/rejected": -228.14224243164062,
298
+ "loss": 1.4704,
299
+ "rewards/accuracies": 0.32499998807907104,
300
+ "rewards/chosen": -10.943647384643555,
301
+ "rewards/margins": -0.3656729757785797,
302
+ "rewards/rejected": -10.577974319458008,
303
+ "step": 210
304
+ },
305
+ {
306
+ "epoch": 0.1,
307
+ "learning_rate": 9.943892987470688e-06,
308
+ "logits/chosen": -2.559394598007202,
309
+ "logits/rejected": -2.523345470428467,
310
+ "logps/chosen": -260.9962463378906,
311
+ "logps/rejected": -234.96670532226562,
312
+ "loss": 1.709,
313
+ "rewards/accuracies": 0.5,
314
+ "rewards/chosen": -10.393632888793945,
315
+ "rewards/margins": 0.1549229919910431,
316
+ "rewards/rejected": -10.548555374145508,
317
+ "step": 220
318
+ },
319
+ {
320
+ "epoch": 0.1,
321
+ "learning_rate": 9.932378407234108e-06,
322
+ "logits/chosen": -2.63352632522583,
323
+ "logits/rejected": -2.5623555183410645,
324
+ "logps/chosen": -271.7388916015625,
325
+ "logps/rejected": -272.16796875,
326
+ "loss": 1.2704,
327
+ "rewards/accuracies": 0.574999988079071,
328
+ "rewards/chosen": -9.603178024291992,
329
+ "rewards/margins": 0.19006821513175964,
330
+ "rewards/rejected": -9.793245315551758,
331
+ "step": 230
332
+ },
333
+ {
334
+ "epoch": 0.11,
335
+ "learning_rate": 9.919797871024877e-06,
336
+ "logits/chosen": -2.6439247131347656,
337
+ "logits/rejected": -2.6053879261016846,
338
+ "logps/chosen": -229.23764038085938,
339
+ "logps/rejected": -197.8614044189453,
340
+ "loss": 1.405,
341
+ "rewards/accuracies": 0.375,
342
+ "rewards/chosen": -9.295554161071777,
343
+ "rewards/margins": -0.5798273086547852,
344
+ "rewards/rejected": -8.715726852416992,
345
+ "step": 240
346
+ },
347
+ {
348
+ "epoch": 0.11,
349
+ "learning_rate": 9.906154097672858e-06,
350
+ "logits/chosen": -2.6798043251037598,
351
+ "logits/rejected": -2.600550889968872,
352
+ "logps/chosen": -235.1671142578125,
353
+ "logps/rejected": -223.978271484375,
354
+ "loss": 1.2942,
355
+ "rewards/accuracies": 0.574999988079071,
356
+ "rewards/chosen": -8.195772171020508,
357
+ "rewards/margins": -0.00971608143299818,
358
+ "rewards/rejected": -8.186057090759277,
359
+ "step": 250
360
+ },
361
+ {
362
+ "epoch": 0.12,
363
+ "learning_rate": 9.89145003578833e-06,
364
+ "logits/chosen": -2.670474052429199,
365
+ "logits/rejected": -2.6329426765441895,
366
+ "logps/chosen": -224.05068969726562,
367
+ "logps/rejected": -207.1922607421875,
368
+ "loss": 1.0877,
369
+ "rewards/accuracies": 0.5249999761581421,
370
+ "rewards/chosen": -7.675335884094238,
371
+ "rewards/margins": 0.17977333068847656,
372
+ "rewards/rejected": -7.855108737945557,
373
+ "step": 260
374
+ },
375
+ {
376
+ "epoch": 0.12,
377
+ "learning_rate": 9.875688863124766e-06,
378
+ "logits/chosen": -2.620087146759033,
379
+ "logits/rejected": -2.676790714263916,
380
+ "logps/chosen": -255.08486938476562,
381
+ "logps/rejected": -265.8028564453125,
382
+ "loss": 1.0495,
383
+ "rewards/accuracies": 0.6000000238418579,
384
+ "rewards/chosen": -7.672966957092285,
385
+ "rewards/margins": 0.1472960114479065,
386
+ "rewards/rejected": -7.8202619552612305,
387
+ "step": 270
388
+ },
389
+ {
390
+ "epoch": 0.12,
391
+ "learning_rate": 9.858873985892058e-06,
392
+ "logits/chosen": -2.6771128177642822,
393
+ "logits/rejected": -2.5845065116882324,
394
+ "logps/chosen": -222.91311645507812,
395
+ "logps/rejected": -234.68359375,
396
+ "loss": 1.0752,
397
+ "rewards/accuracies": 0.4749999940395355,
398
+ "rewards/chosen": -6.951257228851318,
399
+ "rewards/margins": -0.008678942918777466,
400
+ "rewards/rejected": -6.9425787925720215,
401
+ "step": 280
402
+ },
403
+ {
404
+ "epoch": 0.13,
405
+ "learning_rate": 9.841009038020401e-06,
406
+ "logits/chosen": -2.6333932876586914,
407
+ "logits/rejected": -2.65295147895813,
408
+ "logps/chosen": -204.25399780273438,
409
+ "logps/rejected": -208.4911651611328,
410
+ "loss": 1.0669,
411
+ "rewards/accuracies": 0.5249999761581421,
412
+ "rewards/chosen": -6.73724365234375,
413
+ "rewards/margins": 0.05855642631649971,
414
+ "rewards/rejected": -6.795799255371094,
415
+ "step": 290
416
+ },
417
+ {
418
+ "epoch": 0.13,
419
+ "learning_rate": 9.82209788037494e-06,
420
+ "logits/chosen": -2.685725450515747,
421
+ "logits/rejected": -2.700352907180786,
422
+ "logps/chosen": -230.539794921875,
423
+ "logps/rejected": -240.39224243164062,
424
+ "loss": 1.1248,
425
+ "rewards/accuracies": 0.44999998807907104,
426
+ "rewards/chosen": -6.4564642906188965,
427
+ "rewards/margins": -0.2118469774723053,
428
+ "rewards/rejected": -6.244616985321045,
429
+ "step": 300
430
+ },
431
+ {
432
+ "epoch": 0.14,
433
+ "learning_rate": 9.80214459992139e-06,
434
+ "logits/chosen": -2.714470624923706,
435
+ "logits/rejected": -2.6982994079589844,
436
+ "logps/chosen": -214.0612030029297,
437
+ "logps/rejected": -231.0535125732422,
438
+ "loss": 0.8095,
439
+ "rewards/accuracies": 0.625,
440
+ "rewards/chosen": -6.09361457824707,
441
+ "rewards/margins": 0.35767459869384766,
442
+ "rewards/rejected": -6.45128870010376,
443
+ "step": 310
444
+ },
445
+ {
446
+ "epoch": 0.14,
447
+ "learning_rate": 9.781153508842785e-06,
448
+ "logits/chosen": -2.6795332431793213,
449
+ "logits/rejected": -2.6861202716827393,
450
+ "logps/chosen": -191.6574249267578,
451
+ "logps/rejected": -206.572998046875,
452
+ "loss": 0.9054,
453
+ "rewards/accuracies": 0.574999988079071,
454
+ "rewards/chosen": -6.0128302574157715,
455
+ "rewards/margins": 0.5337953567504883,
456
+ "rewards/rejected": -6.54662561416626,
457
+ "step": 320
458
+ },
459
+ {
460
+ "epoch": 0.15,
461
+ "learning_rate": 9.759129143607547e-06,
462
+ "logits/chosen": -2.719517469406128,
463
+ "logits/rejected": -2.630643367767334,
464
+ "logps/chosen": -228.45797729492188,
465
+ "logps/rejected": -176.00814819335938,
466
+ "loss": 1.1571,
467
+ "rewards/accuracies": 0.4000000059604645,
468
+ "rewards/chosen": -6.262964725494385,
469
+ "rewards/margins": -0.4287610650062561,
470
+ "rewards/rejected": -5.834203243255615,
471
+ "step": 330
472
+ },
473
+ {
474
+ "epoch": 0.15,
475
+ "learning_rate": 9.736076263989103e-06,
476
+ "logits/chosen": -2.753007650375366,
477
+ "logits/rejected": -2.7196168899536133,
478
+ "logps/chosen": -234.21731567382812,
479
+ "logps/rejected": -214.3049774169922,
480
+ "loss": 0.93,
481
+ "rewards/accuracies": 0.42500001192092896,
482
+ "rewards/chosen": -5.743313789367676,
483
+ "rewards/margins": 0.08766243606805801,
484
+ "rewards/rejected": -5.830975532531738,
485
+ "step": 340
486
+ },
487
+ {
488
+ "epoch": 0.16,
489
+ "learning_rate": 9.711999852037226e-06,
490
+ "logits/chosen": -2.702094554901123,
491
+ "logits/rejected": -2.6643381118774414,
492
+ "logps/chosen": -235.38766479492188,
493
+ "logps/rejected": -208.2432861328125,
494
+ "loss": 1.1837,
495
+ "rewards/accuracies": 0.5,
496
+ "rewards/chosen": -5.757768154144287,
497
+ "rewards/margins": -0.27699437737464905,
498
+ "rewards/rejected": -5.480773448944092,
499
+ "step": 350
500
+ },
501
+ {
502
+ "epoch": 0.16,
503
+ "learning_rate": 9.68690511100134e-06,
504
+ "logits/chosen": -2.6954503059387207,
505
+ "logits/rejected": -2.6649551391601562,
506
+ "logps/chosen": -185.06394958496094,
507
+ "logps/rejected": -187.76278686523438,
508
+ "loss": 1.0071,
509
+ "rewards/accuracies": 0.42500001192092896,
510
+ "rewards/chosen": -5.9798102378845215,
511
+ "rewards/margins": -0.11689682304859161,
512
+ "rewards/rejected": -5.862914085388184,
513
+ "step": 360
514
+ },
515
+ {
516
+ "epoch": 0.16,
517
+ "learning_rate": 9.660797464206035e-06,
518
+ "logits/chosen": -2.6881985664367676,
519
+ "logits/rejected": -2.676832914352417,
520
+ "logps/chosen": -195.05517578125,
521
+ "logps/rejected": -212.87161254882812,
522
+ "loss": 0.6422,
523
+ "rewards/accuracies": 0.699999988079071,
524
+ "rewards/chosen": -4.617544174194336,
525
+ "rewards/margins": 0.8601192235946655,
526
+ "rewards/rejected": -5.477663516998291,
527
+ "step": 370
528
+ },
529
+ {
530
+ "epoch": 0.17,
531
+ "learning_rate": 9.633682553879e-06,
532
+ "logits/chosen": -2.749539852142334,
533
+ "logits/rejected": -2.7113490104675293,
534
+ "logps/chosen": -173.92945861816406,
535
+ "logps/rejected": -176.216796875,
536
+ "loss": 0.8915,
537
+ "rewards/accuracies": 0.6000000238418579,
538
+ "rewards/chosen": -5.114466667175293,
539
+ "rewards/margins": 0.10453431308269501,
540
+ "rewards/rejected": -5.219000816345215,
541
+ "step": 380
542
+ },
543
+ {
544
+ "epoch": 0.17,
545
+ "learning_rate": 9.605566239931666e-06,
546
+ "logits/chosen": -2.744715690612793,
547
+ "logits/rejected": -2.6837120056152344,
548
+ "logps/chosen": -200.80999755859375,
549
+ "logps/rejected": -200.7525177001953,
550
+ "loss": 0.633,
551
+ "rewards/accuracies": 0.5249999761581421,
552
+ "rewards/chosen": -4.719931602478027,
553
+ "rewards/margins": 0.698486864566803,
554
+ "rewards/rejected": -5.418419361114502,
555
+ "step": 390
556
+ },
557
+ {
558
+ "epoch": 0.18,
559
+ "learning_rate": 9.576454598692797e-06,
560
+ "logits/chosen": -2.7422823905944824,
561
+ "logits/rejected": -2.7130322456359863,
562
+ "logps/chosen": -204.26626586914062,
563
+ "logps/rejected": -174.83802795410156,
564
+ "loss": 0.9281,
565
+ "rewards/accuracies": 0.4749999940395355,
566
+ "rewards/chosen": -4.756241798400879,
567
+ "rewards/margins": -0.038588762283325195,
568
+ "rewards/rejected": -4.717652320861816,
569
+ "step": 400
570
+ },
571
+ {
572
+ "epoch": 0.18,
573
+ "learning_rate": 9.546353921595306e-06,
574
+ "logits/chosen": -2.7594494819641113,
575
+ "logits/rejected": -2.7436954975128174,
576
+ "logps/chosen": -183.6326141357422,
577
+ "logps/rejected": -186.80911254882812,
578
+ "loss": 0.9906,
579
+ "rewards/accuracies": 0.4749999940395355,
580
+ "rewards/chosen": -4.743472099304199,
581
+ "rewards/margins": -0.156986802816391,
582
+ "rewards/rejected": -4.586484909057617,
583
+ "step": 410
584
+ },
585
+ {
586
+ "epoch": 0.19,
587
+ "learning_rate": 9.515270713816589e-06,
588
+ "logits/chosen": -2.762357711791992,
589
+ "logits/rejected": -2.661778688430786,
590
+ "logps/chosen": -212.29739379882812,
591
+ "logps/rejected": -185.29476928710938,
592
+ "loss": 0.9206,
593
+ "rewards/accuracies": 0.5,
594
+ "rewards/chosen": -5.090394973754883,
595
+ "rewards/margins": 0.17267219722270966,
596
+ "rewards/rejected": -5.263067722320557,
597
+ "step": 420
598
+ },
599
+ {
600
+ "epoch": 0.19,
601
+ "learning_rate": 9.483211692872669e-06,
602
+ "logits/chosen": -2.694725513458252,
603
+ "logits/rejected": -2.689701557159424,
604
+ "logps/chosen": -168.6083221435547,
605
+ "logps/rejected": -170.26681518554688,
606
+ "loss": 0.9479,
607
+ "rewards/accuracies": 0.4000000059604645,
608
+ "rewards/chosen": -4.593288421630859,
609
+ "rewards/margins": -0.16782906651496887,
610
+ "rewards/rejected": -4.425459384918213,
611
+ "step": 430
612
+ },
613
+ {
614
+ "epoch": 0.2,
615
+ "learning_rate": 9.450183787166447e-06,
616
+ "logits/chosen": -2.6913774013519287,
617
+ "logits/rejected": -2.780381202697754,
618
+ "logps/chosen": -141.98934936523438,
619
+ "logps/rejected": -177.6278076171875,
620
+ "loss": 0.9904,
621
+ "rewards/accuracies": 0.44999998807907104,
622
+ "rewards/chosen": -4.094004154205322,
623
+ "rewards/margins": -0.12103526294231415,
624
+ "rewards/rejected": -3.972968578338623,
625
+ "step": 440
626
+ },
627
+ {
628
+ "epoch": 0.2,
629
+ "learning_rate": 9.41619413449037e-06,
630
+ "logits/chosen": -2.804361581802368,
631
+ "logits/rejected": -2.7710132598876953,
632
+ "logps/chosen": -209.9197540283203,
633
+ "logps/rejected": -231.4965057373047,
634
+ "loss": 0.654,
635
+ "rewards/accuracies": 0.675000011920929,
636
+ "rewards/chosen": -3.7489547729492188,
637
+ "rewards/margins": 0.5616118311882019,
638
+ "rewards/rejected": -4.3105669021606445,
639
+ "step": 450
640
+ },
641
+ {
642
+ "epoch": 0.2,
643
+ "learning_rate": 9.381250080483864e-06,
644
+ "logits/chosen": -2.777339458465576,
645
+ "logits/rejected": -2.7908101081848145,
646
+ "logps/chosen": -197.44711303710938,
647
+ "logps/rejected": -195.8129425048828,
648
+ "loss": 0.8654,
649
+ "rewards/accuracies": 0.625,
650
+ "rewards/chosen": -4.063180923461914,
651
+ "rewards/margins": 0.1730591356754303,
652
+ "rewards/rejected": -4.236240386962891,
653
+ "step": 460
654
+ },
655
+ {
656
+ "epoch": 0.21,
657
+ "learning_rate": 9.345359177045827e-06,
658
+ "logits/chosen": -2.7428901195526123,
659
+ "logits/rejected": -2.720733642578125,
660
+ "logps/chosen": -163.38687133789062,
661
+ "logps/rejected": -152.174072265625,
662
+ "loss": 1.008,
663
+ "rewards/accuracies": 0.4749999940395355,
664
+ "rewards/chosen": -4.339611053466797,
665
+ "rewards/margins": -0.002607667353004217,
666
+ "rewards/rejected": -4.337003707885742,
667
+ "step": 470
668
+ },
669
+ {
670
+ "epoch": 0.21,
671
+ "learning_rate": 9.308529180702568e-06,
672
+ "logits/chosen": -2.771120309829712,
673
+ "logits/rejected": -2.754432201385498,
674
+ "logps/chosen": -190.38487243652344,
675
+ "logps/rejected": -209.5969696044922,
676
+ "loss": 0.9381,
677
+ "rewards/accuracies": 0.4749999940395355,
678
+ "rewards/chosen": -4.276428699493408,
679
+ "rewards/margins": 0.031873930245637894,
680
+ "rewards/rejected": -4.308302879333496,
681
+ "step": 480
682
+ },
683
+ {
684
+ "epoch": 0.22,
685
+ "learning_rate": 9.270768050931515e-06,
686
+ "logits/chosen": -2.729900360107422,
687
+ "logits/rejected": -2.793795108795166,
688
+ "logps/chosen": -181.68646240234375,
689
+ "logps/rejected": -203.8788299560547,
690
+ "loss": 0.9827,
691
+ "rewards/accuracies": 0.44999998807907104,
692
+ "rewards/chosen": -4.365941524505615,
693
+ "rewards/margins": -0.1641651839017868,
694
+ "rewards/rejected": -4.201776504516602,
695
+ "step": 490
696
+ },
697
+ {
698
+ "epoch": 0.22,
699
+ "learning_rate": 9.232083948441046e-06,
700
+ "logits/chosen": -2.7761735916137695,
701
+ "logits/rejected": -2.7046539783477783,
702
+ "logps/chosen": -190.8777618408203,
703
+ "logps/rejected": -169.68423461914062,
704
+ "loss": 0.7403,
705
+ "rewards/accuracies": 0.625,
706
+ "rewards/chosen": -4.126107215881348,
707
+ "rewards/margins": 0.2456977367401123,
708
+ "rewards/rejected": -4.371805191040039,
709
+ "step": 500
710
+ },
711
+ {
712
+ "epoch": 0.23,
713
+ "learning_rate": 9.192485233406862e-06,
714
+ "logits/chosen": -2.788799524307251,
715
+ "logits/rejected": -2.8254306316375732,
716
+ "logps/chosen": -204.0353240966797,
717
+ "logps/rejected": -216.4750518798828,
718
+ "loss": 0.6348,
719
+ "rewards/accuracies": 0.675000011920929,
720
+ "rewards/chosen": -3.6200859546661377,
721
+ "rewards/margins": 0.6306756138801575,
722
+ "rewards/rejected": -4.250761985778809,
723
+ "step": 510
724
+ },
725
+ {
726
+ "epoch": 0.23,
727
+ "learning_rate": 9.151980463665227e-06,
728
+ "logits/chosen": -2.7755134105682373,
729
+ "logits/rejected": -2.7311973571777344,
730
+ "logps/chosen": -215.2248077392578,
731
+ "logps/rejected": -178.1062469482422,
732
+ "loss": 0.9884,
733
+ "rewards/accuracies": 0.2750000059604645,
734
+ "rewards/chosen": -3.970677137374878,
735
+ "rewards/margins": -0.2024170607328415,
736
+ "rewards/rejected": -3.7682597637176514,
737
+ "step": 520
738
+ },
739
+ {
740
+ "epoch": 0.24,
741
+ "learning_rate": 9.1105783928635e-06,
742
+ "logits/chosen": -2.7572436332702637,
743
+ "logits/rejected": -2.720371961593628,
744
+ "logps/chosen": -203.22486877441406,
745
+ "logps/rejected": -213.2503662109375,
746
+ "loss": 0.8808,
747
+ "rewards/accuracies": 0.5249999761581421,
748
+ "rewards/chosen": -4.16678524017334,
749
+ "rewards/margins": 0.12414976209402084,
750
+ "rewards/rejected": -4.2909345626831055,
751
+ "step": 530
752
+ },
753
+ {
754
+ "epoch": 0.24,
755
+ "learning_rate": 9.068287968568355e-06,
756
+ "logits/chosen": -2.7487785816192627,
757
+ "logits/rejected": -2.724555253982544,
758
+ "logps/chosen": -175.81295776367188,
759
+ "logps/rejected": -203.91702270507812,
760
+ "loss": 0.7847,
761
+ "rewards/accuracies": 0.4749999940395355,
762
+ "rewards/chosen": -3.4349968433380127,
763
+ "rewards/margins": 0.22362789511680603,
764
+ "rewards/rejected": -3.6586246490478516,
765
+ "step": 540
766
+ },
767
+ {
768
+ "epoch": 0.24,
769
+ "learning_rate": 9.02511833033208e-06,
770
+ "logits/chosen": -2.6728549003601074,
771
+ "logits/rejected": -2.7027556896209717,
772
+ "logps/chosen": -174.01890563964844,
773
+ "logps/rejected": -172.8794403076172,
774
+ "loss": 0.8984,
775
+ "rewards/accuracies": 0.5249999761581421,
776
+ "rewards/chosen": -4.354010105133057,
777
+ "rewards/margins": -0.13735604286193848,
778
+ "rewards/rejected": -4.216653823852539,
779
+ "step": 550
780
+ },
781
+ {
782
+ "epoch": 0.25,
783
+ "learning_rate": 8.981078807717396e-06,
784
+ "logits/chosen": -2.780517578125,
785
+ "logits/rejected": -2.6801159381866455,
786
+ "logps/chosen": -230.1298370361328,
787
+ "logps/rejected": -203.07168579101562,
788
+ "loss": 0.625,
789
+ "rewards/accuracies": 0.574999988079071,
790
+ "rewards/chosen": -3.319823741912842,
791
+ "rewards/margins": 0.6677559614181519,
792
+ "rewards/rejected": -3.987579822540283,
793
+ "step": 560
794
+ },
795
+ {
796
+ "epoch": 0.25,
797
+ "learning_rate": 8.936178918281209e-06,
798
+ "logits/chosen": -2.799701690673828,
799
+ "logits/rejected": -2.815525770187378,
800
+ "logps/chosen": -205.35971069335938,
801
+ "logps/rejected": -223.64096069335938,
802
+ "loss": 0.8421,
803
+ "rewards/accuracies": 0.4749999940395355,
804
+ "rewards/chosen": -3.762897491455078,
805
+ "rewards/margins": 0.13321921229362488,
806
+ "rewards/rejected": -3.8961167335510254,
807
+ "step": 570
808
+ },
809
+ {
810
+ "epoch": 0.26,
811
+ "learning_rate": 8.890428365517728e-06,
812
+ "logits/chosen": -2.8051438331604004,
813
+ "logits/rejected": -2.7885632514953613,
814
+ "logps/chosen": -197.3937530517578,
815
+ "logps/rejected": -187.13601684570312,
816
+ "loss": 0.7381,
817
+ "rewards/accuracies": 0.574999988079071,
818
+ "rewards/chosen": -3.4687581062316895,
819
+ "rewards/margins": 0.25684064626693726,
820
+ "rewards/rejected": -3.7255985736846924,
821
+ "step": 580
822
+ },
823
+ {
824
+ "epoch": 0.26,
825
+ "learning_rate": 8.843837036761404e-06,
826
+ "logits/chosen": -2.7467944622039795,
827
+ "logits/rejected": -2.7005391120910645,
828
+ "logps/chosen": -152.84494018554688,
829
+ "logps/rejected": -160.13241577148438,
830
+ "loss": 0.7662,
831
+ "rewards/accuracies": 0.5249999761581421,
832
+ "rewards/chosen": -3.732775926589966,
833
+ "rewards/margins": 0.06671512126922607,
834
+ "rewards/rejected": -3.7994911670684814,
835
+ "step": 590
836
+ },
837
+ {
838
+ "epoch": 0.27,
839
+ "learning_rate": 8.796415001050154e-06,
840
+ "logits/chosen": -2.7716736793518066,
841
+ "logits/rejected": -2.7501323223114014,
842
+ "logps/chosen": -221.6551513671875,
843
+ "logps/rejected": -201.65664672851562,
844
+ "loss": 0.8448,
845
+ "rewards/accuracies": 0.574999988079071,
846
+ "rewards/chosen": -3.700239896774292,
847
+ "rewards/margins": 0.12370122969150543,
848
+ "rewards/rejected": -3.823941469192505,
849
+ "step": 600
850
+ },
851
+ {
852
+ "epoch": 0.27,
853
+ "learning_rate": 8.748172506949274e-06,
854
+ "logits/chosen": -2.7913918495178223,
855
+ "logits/rejected": -2.7350287437438965,
856
+ "logps/chosen": -168.91790771484375,
857
+ "logps/rejected": -155.0623321533203,
858
+ "loss": 0.5851,
859
+ "rewards/accuracies": 0.699999988079071,
860
+ "rewards/chosen": -3.5637214183807373,
861
+ "rewards/margins": 0.42019423842430115,
862
+ "rewards/rejected": -3.9839158058166504,
863
+ "step": 610
864
+ },
865
+ {
866
+ "epoch": 0.28,
867
+ "learning_rate": 8.699119980336602e-06,
868
+ "logits/chosen": -2.7849667072296143,
869
+ "logits/rejected": -2.771721601486206,
870
+ "logps/chosen": -192.60813903808594,
871
+ "logps/rejected": -206.09646606445312,
872
+ "loss": 0.9976,
873
+ "rewards/accuracies": 0.32499998807907104,
874
+ "rewards/chosen": -3.9602694511413574,
875
+ "rewards/margins": -0.18327102065086365,
876
+ "rewards/rejected": -3.776998519897461,
877
+ "step": 620
878
+ },
879
+ {
880
+ "epoch": 0.28,
881
+ "learning_rate": 8.649268022149333e-06,
882
+ "logits/chosen": -2.7933568954467773,
883
+ "logits/rejected": -2.7272696495056152,
884
+ "logps/chosen": -179.9084930419922,
885
+ "logps/rejected": -169.74490356445312,
886
+ "loss": 0.68,
887
+ "rewards/accuracies": 0.6000000238418579,
888
+ "rewards/chosen": -3.221353054046631,
889
+ "rewards/margins": 0.3395439684391022,
890
+ "rewards/rejected": -3.5608971118927,
891
+ "step": 630
892
+ },
893
+ {
894
+ "epoch": 0.28,
895
+ "learning_rate": 8.59862740609301e-06,
896
+ "logits/chosen": -2.7812328338623047,
897
+ "logits/rejected": -2.8435587882995605,
898
+ "logps/chosen": -209.0635223388672,
899
+ "logps/rejected": -244.5385284423828,
900
+ "loss": 0.6579,
901
+ "rewards/accuracies": 0.574999988079071,
902
+ "rewards/chosen": -3.0288193225860596,
903
+ "rewards/margins": 0.4765963554382324,
904
+ "rewards/rejected": -3.505415678024292,
905
+ "step": 640
906
+ },
907
+ {
908
+ "epoch": 0.29,
909
+ "learning_rate": 8.547209076313172e-06,
910
+ "logits/chosen": -2.8104701042175293,
911
+ "logits/rejected": -2.7969369888305664,
912
+ "logps/chosen": -206.7493896484375,
913
+ "logps/rejected": -246.7706298828125,
914
+ "loss": 0.7068,
915
+ "rewards/accuracies": 0.5,
916
+ "rewards/chosen": -3.3913025856018066,
917
+ "rewards/margins": 0.37605711817741394,
918
+ "rewards/rejected": -3.767359495162964,
919
+ "step": 650
920
+ },
921
+ {
922
+ "epoch": 0.29,
923
+ "learning_rate": 8.495024145030174e-06,
924
+ "logits/chosen": -2.743499279022217,
925
+ "logits/rejected": -2.7557623386383057,
926
+ "logps/chosen": -173.17481994628906,
927
+ "logps/rejected": -186.05215454101562,
928
+ "loss": 0.6764,
929
+ "rewards/accuracies": 0.6000000238418579,
930
+ "rewards/chosen": -3.472040891647339,
931
+ "rewards/margins": 0.3358023464679718,
932
+ "rewards/rejected": -3.807842969894409,
933
+ "step": 660
934
+ },
935
+ {
936
+ "epoch": 0.3,
937
+ "learning_rate": 8.442083890137678e-06,
938
+ "logits/chosen": -2.8170254230499268,
939
+ "logits/rejected": -2.760282516479492,
940
+ "logps/chosen": -173.0248565673828,
941
+ "logps/rejected": -184.1920623779297,
942
+ "loss": 0.8291,
943
+ "rewards/accuracies": 0.4749999940395355,
944
+ "rewards/chosen": -3.5644659996032715,
945
+ "rewards/margins": 0.05758289247751236,
946
+ "rewards/rejected": -3.622048854827881,
947
+ "step": 670
948
+ },
949
+ {
950
+ "epoch": 0.3,
951
+ "learning_rate": 8.388399752765344e-06,
952
+ "logits/chosen": -2.773528814315796,
953
+ "logits/rejected": -2.758387327194214,
954
+ "logps/chosen": -204.7705078125,
955
+ "logps/rejected": -200.41160583496094,
956
+ "loss": 0.8527,
957
+ "rewards/accuracies": 0.42500001192092896,
958
+ "rewards/chosen": -3.599118709564209,
959
+ "rewards/margins": 0.017948562279343605,
960
+ "rewards/rejected": -3.617067337036133,
961
+ "step": 680
962
+ },
963
+ {
964
+ "epoch": 0.31,
965
+ "learning_rate": 8.333983334806248e-06,
966
+ "logits/chosen": -2.8039369583129883,
967
+ "logits/rejected": -2.7655069828033447,
968
+ "logps/chosen": -192.72186279296875,
969
+ "logps/rejected": -173.8263702392578,
970
+ "loss": 0.8641,
971
+ "rewards/accuracies": 0.4000000059604645,
972
+ "rewards/chosen": -3.713822603225708,
973
+ "rewards/margins": -0.06442561000585556,
974
+ "rewards/rejected": -3.6493968963623047,
975
+ "step": 690
976
+ },
977
+ {
978
+ "epoch": 0.31,
979
+ "learning_rate": 8.278846396409534e-06,
980
+ "logits/chosen": -2.797102451324463,
981
+ "logits/rejected": -2.7584991455078125,
982
+ "logps/chosen": -195.19786071777344,
983
+ "logps/rejected": -184.8418731689453,
984
+ "loss": 0.7849,
985
+ "rewards/accuracies": 0.6000000238418579,
986
+ "rewards/chosen": -3.548352003097534,
987
+ "rewards/margins": 0.16000667214393616,
988
+ "rewards/rejected": -3.7083587646484375,
989
+ "step": 700
990
+ },
991
+ {
992
+ "epoch": 0.32,
993
+ "learning_rate": 8.223000853438904e-06,
994
+ "logits/chosen": -2.8177175521850586,
995
+ "logits/rejected": -2.7559008598327637,
996
+ "logps/chosen": -218.43588256835938,
997
+ "logps/rejected": -219.35946655273438,
998
+ "loss": 0.7455,
999
+ "rewards/accuracies": 0.6000000238418579,
1000
+ "rewards/chosen": -3.5060417652130127,
1001
+ "rewards/margins": 0.2477506697177887,
1002
+ "rewards/rejected": -3.7537918090820312,
1003
+ "step": 710
1004
+ },
1005
+ {
1006
+ "epoch": 0.32,
1007
+ "learning_rate": 8.166458774897413e-06,
1008
+ "logits/chosen": -2.7866969108581543,
1009
+ "logits/rejected": -2.7426235675811768,
1010
+ "logps/chosen": -196.8046417236328,
1011
+ "logps/rejected": -180.7646484375,
1012
+ "loss": 0.6577,
1013
+ "rewards/accuracies": 0.75,
1014
+ "rewards/chosen": -3.515568494796753,
1015
+ "rewards/margins": 0.46584025025367737,
1016
+ "rewards/rejected": -3.9814085960388184,
1017
+ "step": 720
1018
+ },
1019
+ {
1020
+ "epoch": 0.32,
1021
+ "learning_rate": 8.109232380319194e-06,
1022
+ "logits/chosen": -2.781240940093994,
1023
+ "logits/rejected": -2.7888545989990234,
1024
+ "logps/chosen": -232.93215942382812,
1025
+ "logps/rejected": -232.01644897460938,
1026
+ "loss": 0.7337,
1027
+ "rewards/accuracies": 0.550000011920929,
1028
+ "rewards/chosen": -3.5336124897003174,
1029
+ "rewards/margins": 0.2553574740886688,
1030
+ "rewards/rejected": -3.7889697551727295,
1031
+ "step": 730
1032
+ },
1033
+ {
1034
+ "epoch": 0.33,
1035
+ "learning_rate": 8.051334037128661e-06,
1036
+ "logits/chosen": -2.7906103134155273,
1037
+ "logits/rejected": -2.742318630218506,
1038
+ "logps/chosen": -170.14791870117188,
1039
+ "logps/rejected": -173.310791015625,
1040
+ "loss": 0.828,
1041
+ "rewards/accuracies": 0.5249999761581421,
1042
+ "rewards/chosen": -3.540756940841675,
1043
+ "rewards/margins": 0.037487827241420746,
1044
+ "rewards/rejected": -3.578244686126709,
1045
+ "step": 740
1046
+ },
1047
+ {
1048
+ "epoch": 0.33,
1049
+ "learning_rate": 7.99277625796771e-06,
1050
+ "logits/chosen": -2.7460989952087402,
1051
+ "logits/rejected": -2.710388660430908,
1052
+ "logps/chosen": -164.4999542236328,
1053
+ "logps/rejected": -171.73757934570312,
1054
+ "loss": 0.8343,
1055
+ "rewards/accuracies": 0.5,
1056
+ "rewards/chosen": -3.3666107654571533,
1057
+ "rewards/margins": 0.040531255304813385,
1058
+ "rewards/rejected": -3.407141923904419,
1059
+ "step": 750
1060
+ },
1061
+ {
1062
+ "epoch": 0.34,
1063
+ "learning_rate": 7.933571697991582e-06,
1064
+ "logits/chosen": -2.830110549926758,
1065
+ "logits/rejected": -2.7687745094299316,
1066
+ "logps/chosen": -210.4406280517578,
1067
+ "logps/rejected": -182.27137756347656,
1068
+ "loss": 0.8217,
1069
+ "rewards/accuracies": 0.44999998807907104,
1070
+ "rewards/chosen": -3.588493824005127,
1071
+ "rewards/margins": -0.05328698828816414,
1072
+ "rewards/rejected": -3.5352070331573486,
1073
+ "step": 760
1074
+ },
1075
+ {
1076
+ "epoch": 0.34,
1077
+ "learning_rate": 7.873733152133898e-06,
1078
+ "logits/chosen": -2.751688241958618,
1079
+ "logits/rejected": -2.7940192222595215,
1080
+ "logps/chosen": -153.90414428710938,
1081
+ "logps/rejected": -158.2861328125,
1082
+ "loss": 0.8625,
1083
+ "rewards/accuracies": 0.42500001192092896,
1084
+ "rewards/chosen": -3.453404664993286,
1085
+ "rewards/margins": -0.1084330826997757,
1086
+ "rewards/rejected": -3.3449714183807373,
1087
+ "step": 770
1088
+ },
1089
+ {
1090
+ "epoch": 0.35,
1091
+ "learning_rate": 7.813273552341496e-06,
1092
+ "logits/chosen": -2.7797484397888184,
1093
+ "logits/rejected": -2.775768995285034,
1094
+ "logps/chosen": -169.4456787109375,
1095
+ "logps/rejected": -177.5587921142578,
1096
+ "loss": 0.756,
1097
+ "rewards/accuracies": 0.5249999761581421,
1098
+ "rewards/chosen": -3.326707363128662,
1099
+ "rewards/margins": 0.25994253158569336,
1100
+ "rewards/rejected": -3.5866501331329346,
1101
+ "step": 780
1102
+ },
1103
+ {
1104
+ "epoch": 0.35,
1105
+ "learning_rate": 7.75220596477966e-06,
1106
+ "logits/chosen": -2.7829766273498535,
1107
+ "logits/rejected": -2.7465267181396484,
1108
+ "logps/chosen": -164.01870727539062,
1109
+ "logps/rejected": -156.57614135742188,
1110
+ "loss": 0.6708,
1111
+ "rewards/accuracies": 0.625,
1112
+ "rewards/chosen": -3.3400473594665527,
1113
+ "rewards/margins": 0.3525925874710083,
1114
+ "rewards/rejected": -3.6926398277282715,
1115
+ "step": 790
1116
+ },
1117
+ {
1118
+ "epoch": 0.36,
1119
+ "learning_rate": 7.690543587008332e-06,
1120
+ "logits/chosen": -2.7533538341522217,
1121
+ "logits/rejected": -2.762204647064209,
1122
+ "logps/chosen": -221.1579132080078,
1123
+ "logps/rejected": -204.04983520507812,
1124
+ "loss": 0.8969,
1125
+ "rewards/accuracies": 0.5,
1126
+ "rewards/chosen": -3.536458969116211,
1127
+ "rewards/margins": 0.05794559791684151,
1128
+ "rewards/rejected": -3.5944042205810547,
1129
+ "step": 800
1130
+ },
1131
+ {
1132
+ "epoch": 0.36,
1133
+ "learning_rate": 7.628299745129943e-06,
1134
+ "logits/chosen": -2.7850310802459717,
1135
+ "logits/rejected": -2.756134510040283,
1136
+ "logps/chosen": -224.99118041992188,
1137
+ "logps/rejected": -199.38502502441406,
1138
+ "loss": 0.8558,
1139
+ "rewards/accuracies": 0.550000011920929,
1140
+ "rewards/chosen": -3.7199528217315674,
1141
+ "rewards/margins": -0.056097112596035004,
1142
+ "rewards/rejected": -3.6638553142547607,
1143
+ "step": 810
1144
+ },
1145
+ {
1146
+ "epoch": 0.36,
1147
+ "learning_rate": 7.565487890909448e-06,
1148
+ "logits/chosen": -2.8218209743499756,
1149
+ "logits/rejected": -2.775695323944092,
1150
+ "logps/chosen": -169.43869018554688,
1151
+ "logps/rejected": -147.3358612060547,
1152
+ "loss": 0.7543,
1153
+ "rewards/accuracies": 0.5,
1154
+ "rewards/chosen": -3.0327506065368652,
1155
+ "rewards/margins": 0.10092975944280624,
1156
+ "rewards/rejected": -3.133680820465088,
1157
+ "step": 820
1158
+ },
1159
+ {
1160
+ "epoch": 0.37,
1161
+ "learning_rate": 7.502121598867218e-06,
1162
+ "logits/chosen": -2.794593572616577,
1163
+ "logits/rejected": -2.8074254989624023,
1164
+ "logps/chosen": -191.18869018554688,
1165
+ "logps/rejected": -161.5567169189453,
1166
+ "loss": 0.702,
1167
+ "rewards/accuracies": 0.6499999761581421,
1168
+ "rewards/chosen": -3.0870423316955566,
1169
+ "rewards/margins": 0.3286024034023285,
1170
+ "rewards/rejected": -3.415644407272339,
1171
+ "step": 830
1172
+ },
1173
+ {
1174
+ "epoch": 0.37,
1175
+ "learning_rate": 7.438214563345389e-06,
1176
+ "logits/chosen": -2.8384017944335938,
1177
+ "logits/rejected": -2.8303287029266357,
1178
+ "logps/chosen": -200.47872924804688,
1179
+ "logps/rejected": -202.9823760986328,
1180
+ "loss": 0.9219,
1181
+ "rewards/accuracies": 0.4749999940395355,
1182
+ "rewards/chosen": -2.9461796283721924,
1183
+ "rewards/margins": -0.013927942141890526,
1184
+ "rewards/rejected": -2.932251453399658,
1185
+ "step": 840
1186
+ },
1187
+ {
1188
+ "epoch": 0.38,
1189
+ "learning_rate": 7.373780595548334e-06,
1190
+ "logits/chosen": -2.8200442790985107,
1191
+ "logits/rejected": -2.7595479488372803,
1192
+ "logps/chosen": -203.58987426757812,
1193
+ "logps/rejected": -193.07473754882812,
1194
+ "loss": 0.5825,
1195
+ "rewards/accuracies": 0.675000011920929,
1196
+ "rewards/chosen": -2.7171919345855713,
1197
+ "rewards/margins": 0.6527736783027649,
1198
+ "rewards/rejected": -3.3699657917022705,
1199
+ "step": 850
1200
+ },
1201
+ {
1202
+ "epoch": 0.38,
1203
+ "learning_rate": 7.3088336205578565e-06,
1204
+ "logits/chosen": -2.7865753173828125,
1205
+ "logits/rejected": -2.7725372314453125,
1206
+ "logps/chosen": -181.54159545898438,
1207
+ "logps/rejected": -192.08921813964844,
1208
+ "loss": 0.723,
1209
+ "rewards/accuracies": 0.6000000238418579,
1210
+ "rewards/chosen": -2.9962518215179443,
1211
+ "rewards/margins": 0.2384202927350998,
1212
+ "rewards/rejected": -3.2346718311309814,
1213
+ "step": 860
1214
+ },
1215
+ {
1216
+ "epoch": 0.39,
1217
+ "learning_rate": 7.243387674323794e-06,
1218
+ "logits/chosen": -2.7999701499938965,
1219
+ "logits/rejected": -2.7826244831085205,
1220
+ "logps/chosen": -170.237548828125,
1221
+ "logps/rejected": -182.22238159179688,
1222
+ "loss": 0.7287,
1223
+ "rewards/accuracies": 0.6499999761581421,
1224
+ "rewards/chosen": -2.636094331741333,
1225
+ "rewards/margins": 0.3836243152618408,
1226
+ "rewards/rejected": -3.019718647003174,
1227
+ "step": 870
1228
+ },
1229
+ {
1230
+ "epoch": 0.39,
1231
+ "learning_rate": 7.177456900630645e-06,
1232
+ "logits/chosen": -2.8270153999328613,
1233
+ "logits/rejected": -2.801821231842041,
1234
+ "logps/chosen": -169.65478515625,
1235
+ "logps/rejected": -149.74624633789062,
1236
+ "loss": 0.9289,
1237
+ "rewards/accuracies": 0.5,
1238
+ "rewards/chosen": -2.8085739612579346,
1239
+ "rewards/margins": -0.13477511703968048,
1240
+ "rewards/rejected": -2.6737987995147705,
1241
+ "step": 880
1242
+ },
1243
+ {
1244
+ "epoch": 0.4,
1245
+ "learning_rate": 7.111055548040911e-06,
1246
+ "logits/chosen": -2.843956708908081,
1247
+ "logits/rejected": -2.807281017303467,
1248
+ "logps/chosen": -204.63934326171875,
1249
+ "logps/rejected": -196.67213439941406,
1250
+ "loss": 0.7793,
1251
+ "rewards/accuracies": 0.5249999761581421,
1252
+ "rewards/chosen": -2.520915985107422,
1253
+ "rewards/margins": 0.05325014516711235,
1254
+ "rewards/rejected": -2.5741655826568604,
1255
+ "step": 890
1256
+ },
1257
+ {
1258
+ "epoch": 0.4,
1259
+ "learning_rate": 7.044197966815773e-06,
1260
+ "logits/chosen": -2.8285329341888428,
1261
+ "logits/rejected": -2.735088348388672,
1262
+ "logps/chosen": -153.91452026367188,
1263
+ "logps/rejected": -138.55552673339844,
1264
+ "loss": 0.6409,
1265
+ "rewards/accuracies": 0.5,
1266
+ "rewards/chosen": -2.84395694732666,
1267
+ "rewards/margins": 0.25751471519470215,
1268
+ "rewards/rejected": -3.1014719009399414,
1269
+ "step": 900
1270
+ },
1271
+ {
1272
+ "epoch": 0.4,
1273
+ "learning_rate": 6.976898605813798e-06,
1274
+ "logits/chosen": -2.822996139526367,
1275
+ "logits/rejected": -2.8268377780914307,
1276
+ "logps/chosen": -167.09097290039062,
1277
+ "logps/rejected": -203.2536163330078,
1278
+ "loss": 0.8486,
1279
+ "rewards/accuracies": 0.550000011920929,
1280
+ "rewards/chosen": -2.88107967376709,
1281
+ "rewards/margins": 0.07577097415924072,
1282
+ "rewards/rejected": -2.95685076713562,
1283
+ "step": 910
1284
+ },
1285
+ {
1286
+ "epoch": 0.41,
1287
+ "learning_rate": 6.90917200936835e-06,
1288
+ "logits/chosen": -2.7948951721191406,
1289
+ "logits/rejected": -2.783585548400879,
1290
+ "logps/chosen": -145.66119384765625,
1291
+ "logps/rejected": -160.69918823242188,
1292
+ "loss": 0.8522,
1293
+ "rewards/accuracies": 0.44999998807907104,
1294
+ "rewards/chosen": -2.815007448196411,
1295
+ "rewards/margins": 0.01567123830318451,
1296
+ "rewards/rejected": -2.8306784629821777,
1297
+ "step": 920
1298
+ },
1299
+ {
1300
+ "epoch": 0.41,
1301
+ "learning_rate": 6.841032814144345e-06,
1302
+ "logits/chosen": -2.7837324142456055,
1303
+ "logits/rejected": -2.7920632362365723,
1304
+ "logps/chosen": -150.3719940185547,
1305
+ "logps/rejected": -168.3992919921875,
1306
+ "loss": 0.661,
1307
+ "rewards/accuracies": 0.574999988079071,
1308
+ "rewards/chosen": -2.577317476272583,
1309
+ "rewards/margins": 0.19722715020179749,
1310
+ "rewards/rejected": -2.7745444774627686,
1311
+ "step": 930
1312
+ },
1313
+ {
1314
+ "epoch": 0.42,
1315
+ "learning_rate": 6.772495745975067e-06,
1316
+ "logits/chosen": -2.822993278503418,
1317
+ "logits/rejected": -2.793628454208374,
1318
+ "logps/chosen": -179.533447265625,
1319
+ "logps/rejected": -170.6478729248047,
1320
+ "loss": 0.6447,
1321
+ "rewards/accuracies": 0.675000011920929,
1322
+ "rewards/chosen": -2.440274477005005,
1323
+ "rewards/margins": 0.4804176390171051,
1324
+ "rewards/rejected": -2.920691967010498,
1325
+ "step": 940
1326
+ },
1327
+ {
1328
+ "epoch": 0.42,
1329
+ "learning_rate": 6.703575616679709e-06,
1330
+ "logits/chosen": -2.8847832679748535,
1331
+ "logits/rejected": -2.862794876098633,
1332
+ "logps/chosen": -203.72158813476562,
1333
+ "logps/rejected": -196.6941375732422,
1334
+ "loss": 0.6708,
1335
+ "rewards/accuracies": 0.550000011920929,
1336
+ "rewards/chosen": -2.6080403327941895,
1337
+ "rewards/margins": 0.2483837604522705,
1338
+ "rewards/rejected": -2.856423854827881,
1339
+ "step": 950
1340
+ },
1341
+ {
1342
+ "epoch": 0.43,
1343
+ "learning_rate": 6.634287320862334e-06,
1344
+ "logits/chosen": -2.8792309761047363,
1345
+ "logits/rejected": -2.7815871238708496,
1346
+ "logps/chosen": -189.05697631835938,
1347
+ "logps/rejected": -170.0454559326172,
1348
+ "loss": 0.7327,
1349
+ "rewards/accuracies": 0.6000000238418579,
1350
+ "rewards/chosen": -2.4347012042999268,
1351
+ "rewards/margins": 0.20491544902324677,
1352
+ "rewards/rejected": -2.6396164894104004,
1353
+ "step": 960
1354
+ },
1355
+ {
1356
+ "epoch": 0.43,
1357
+ "learning_rate": 6.564645832692938e-06,
1358
+ "logits/chosen": -2.8398923873901367,
1359
+ "logits/rejected": -2.821370840072632,
1360
+ "logps/chosen": -162.66635131835938,
1361
+ "logps/rejected": -177.49655151367188,
1362
+ "loss": 0.6929,
1363
+ "rewards/accuracies": 0.574999988079071,
1364
+ "rewards/chosen": -2.672360420227051,
1365
+ "rewards/margins": 0.2537608742713928,
1366
+ "rewards/rejected": -2.926121473312378,
1367
+ "step": 970
1368
+ },
1369
+ {
1370
+ "epoch": 0.44,
1371
+ "learning_rate": 6.494666202671329e-06,
1372
+ "logits/chosen": -2.828071355819702,
1373
+ "logits/rejected": -2.7870450019836426,
1374
+ "logps/chosen": -175.61985778808594,
1375
+ "logps/rejected": -147.78115844726562,
1376
+ "loss": 0.9773,
1377
+ "rewards/accuracies": 0.3499999940395355,
1378
+ "rewards/chosen": -2.7835516929626465,
1379
+ "rewards/margins": -0.18404017388820648,
1380
+ "rewards/rejected": -2.59951114654541,
1381
+ "step": 980
1382
+ },
1383
+ {
1384
+ "epoch": 0.44,
1385
+ "learning_rate": 6.424363554374496e-06,
1386
+ "logits/chosen": -2.8303914070129395,
1387
+ "logits/rejected": -2.8009707927703857,
1388
+ "logps/chosen": -184.24453735351562,
1389
+ "logps/rejected": -177.34475708007812,
1390
+ "loss": 0.8386,
1391
+ "rewards/accuracies": 0.4749999940395355,
1392
+ "rewards/chosen": -2.654391050338745,
1393
+ "rewards/margins": 0.08457916229963303,
1394
+ "rewards/rejected": -2.7389702796936035,
1395
+ "step": 990
1396
+ },
1397
+ {
1398
+ "epoch": 0.44,
1399
+ "learning_rate": 6.353753081188194e-06,
1400
+ "logits/chosen": -2.8116297721862793,
1401
+ "logits/rejected": -2.8462095260620117,
1402
+ "logps/chosen": -154.33535766601562,
1403
+ "logps/rejected": -173.16500854492188,
1404
+ "loss": 0.8474,
1405
+ "rewards/accuracies": 0.44999998807907104,
1406
+ "rewards/chosen": -2.643991470336914,
1407
+ "rewards/margins": 0.021061301231384277,
1408
+ "rewards/rejected": -2.665052890777588,
1409
+ "step": 1000
1410
+ },
1411
+ {
1412
+ "epoch": 0.45,
1413
+ "learning_rate": 6.28285004302345e-06,
1414
+ "logits/chosen": -2.813953399658203,
1415
+ "logits/rejected": -2.7975823879241943,
1416
+ "logps/chosen": -157.3970489501953,
1417
+ "logps/rejected": -169.88925170898438,
1418
+ "loss": 0.7732,
1419
+ "rewards/accuracies": 0.574999988079071,
1420
+ "rewards/chosen": -2.4602997303009033,
1421
+ "rewards/margins": 0.03091360628604889,
1422
+ "rewards/rejected": -2.491213321685791,
1423
+ "step": 1010
1424
+ },
1425
+ {
1426
+ "epoch": 0.45,
1427
+ "learning_rate": 6.2116697630186685e-06,
1428
+ "logits/chosen": -2.8499863147735596,
1429
+ "logits/rejected": -2.761946201324463,
1430
+ "logps/chosen": -179.04576110839844,
1431
+ "logps/rejected": -169.37741088867188,
1432
+ "loss": 0.7098,
1433
+ "rewards/accuracies": 0.550000011920929,
1434
+ "rewards/chosen": -2.956383228302002,
1435
+ "rewards/margins": 0.20148436725139618,
1436
+ "rewards/rejected": -3.157867431640625,
1437
+ "step": 1020
1438
+ },
1439
+ {
1440
+ "epoch": 0.46,
1441
+ "learning_rate": 6.140227624228098e-06,
1442
+ "logits/chosen": -2.829965353012085,
1443
+ "logits/rejected": -2.7929511070251465,
1444
+ "logps/chosen": -188.39486694335938,
1445
+ "logps/rejected": -196.4234161376953,
1446
+ "loss": 0.8087,
1447
+ "rewards/accuracies": 0.42500001192092896,
1448
+ "rewards/chosen": -3.0763561725616455,
1449
+ "rewards/margins": 0.08712232112884521,
1450
+ "rewards/rejected": -3.163478374481201,
1451
+ "step": 1030
1452
+ },
1453
+ {
1454
+ "epoch": 0.46,
1455
+ "learning_rate": 6.068539066297331e-06,
1456
+ "logits/chosen": -2.820751428604126,
1457
+ "logits/rejected": -2.7950470447540283,
1458
+ "logps/chosen": -193.37313842773438,
1459
+ "logps/rejected": -182.583251953125,
1460
+ "loss": 0.7543,
1461
+ "rewards/accuracies": 0.550000011920929,
1462
+ "rewards/chosen": -2.8425216674804688,
1463
+ "rewards/margins": 0.28575873374938965,
1464
+ "rewards/rejected": -3.1282806396484375,
1465
+ "step": 1040
1466
+ },
1467
+ {
1468
+ "epoch": 0.47,
1469
+ "learning_rate": 5.996619582126586e-06,
1470
+ "logits/chosen": -2.803011894226074,
1471
+ "logits/rejected": -2.792786121368408,
1472
+ "logps/chosen": -192.3000946044922,
1473
+ "logps/rejected": -199.48699951171875,
1474
+ "loss": 0.8482,
1475
+ "rewards/accuracies": 0.5249999761581421,
1476
+ "rewards/chosen": -3.1065292358398438,
1477
+ "rewards/margins": 0.10464553534984589,
1478
+ "rewards/rejected": -3.211174726486206,
1479
+ "step": 1050
1480
+ },
1481
+ {
1482
+ "epoch": 0.47,
1483
+ "learning_rate": 5.924484714522473e-06,
1484
+ "logits/chosen": -2.7662460803985596,
1485
+ "logits/rejected": -2.782365322113037,
1486
+ "logps/chosen": -188.29409790039062,
1487
+ "logps/rejected": -161.1019744873047,
1488
+ "loss": 0.6869,
1489
+ "rewards/accuracies": 0.6499999761581421,
1490
+ "rewards/chosen": -2.632988452911377,
1491
+ "rewards/margins": 0.358073890209198,
1492
+ "rewards/rejected": -2.9910624027252197,
1493
+ "step": 1060
1494
+ },
1495
+ {
1496
+ "epoch": 0.48,
1497
+ "learning_rate": 5.8521500528389685e-06,
1498
+ "logits/chosen": -2.8015599250793457,
1499
+ "logits/rejected": -2.7828166484832764,
1500
+ "logps/chosen": -175.898681640625,
1501
+ "logps/rejected": -171.10836791992188,
1502
+ "loss": 0.6562,
1503
+ "rewards/accuracies": 0.574999988079071,
1504
+ "rewards/chosen": -2.830270767211914,
1505
+ "rewards/margins": 0.31004253029823303,
1506
+ "rewards/rejected": -3.1403133869171143,
1507
+ "step": 1070
1508
+ },
1509
+ {
1510
+ "epoch": 0.48,
1511
+ "learning_rate": 5.779631229608352e-06,
1512
+ "logits/chosen": -2.814619541168213,
1513
+ "logits/rejected": -2.7792232036590576,
1514
+ "logps/chosen": -183.1345672607422,
1515
+ "logps/rejected": -180.23687744140625,
1516
+ "loss": 0.629,
1517
+ "rewards/accuracies": 0.675000011920929,
1518
+ "rewards/chosen": -2.774634599685669,
1519
+ "rewards/margins": 0.35699692368507385,
1520
+ "rewards/rejected": -3.13163161277771,
1521
+ "step": 1080
1522
+ },
1523
+ {
1524
+ "epoch": 0.48,
1525
+ "learning_rate": 5.706943917162786e-06,
1526
+ "logits/chosen": -2.8597893714904785,
1527
+ "logits/rejected": -2.7718067169189453,
1528
+ "logps/chosen": -187.40206909179688,
1529
+ "logps/rejected": -167.49673461914062,
1530
+ "loss": 0.7502,
1531
+ "rewards/accuracies": 0.574999988079071,
1532
+ "rewards/chosen": -2.7406411170959473,
1533
+ "rewards/margins": 0.28711724281311035,
1534
+ "rewards/rejected": -3.0277581214904785,
1535
+ "step": 1090
1536
+ },
1537
+ {
1538
+ "epoch": 0.49,
1539
+ "learning_rate": 5.634103824247312e-06,
1540
+ "logits/chosen": -2.8031020164489746,
1541
+ "logits/rejected": -2.770418643951416,
1542
+ "logps/chosen": -166.7908935546875,
1543
+ "logps/rejected": -169.6998748779297,
1544
+ "loss": 0.7387,
1545
+ "rewards/accuracies": 0.5249999761581421,
1546
+ "rewards/chosen": -2.8097705841064453,
1547
+ "rewards/margins": 0.22710688412189484,
1548
+ "rewards/rejected": -3.036877393722534,
1549
+ "step": 1100
1550
+ },
1551
+ {
1552
+ "epoch": 0.49,
1553
+ "learning_rate": 5.561126692624963e-06,
1554
+ "logits/chosen": -2.8061394691467285,
1555
+ "logits/rejected": -2.8010807037353516,
1556
+ "logps/chosen": -207.2965850830078,
1557
+ "logps/rejected": -172.8783721923828,
1558
+ "loss": 0.9337,
1559
+ "rewards/accuracies": 0.375,
1560
+ "rewards/chosen": -3.002063035964966,
1561
+ "rewards/margins": -0.22279544174671173,
1562
+ "rewards/rejected": -2.7792675495147705,
1563
+ "step": 1110
1564
+ },
1565
+ {
1566
+ "epoch": 0.5,
1567
+ "learning_rate": 5.488028293674759e-06,
1568
+ "logits/chosen": -2.7473597526550293,
1569
+ "logits/rejected": -2.86487078666687,
1570
+ "logps/chosen": -145.13525390625,
1571
+ "logps/rejected": -196.2593231201172,
1572
+ "loss": 0.7441,
1573
+ "rewards/accuracies": 0.574999988079071,
1574
+ "rewards/chosen": -2.414668321609497,
1575
+ "rewards/margins": 0.26904162764549255,
1576
+ "rewards/rejected": -2.6837100982666016,
1577
+ "step": 1120
1578
+ },
1579
+ {
1580
+ "epoch": 0.5,
1581
+ "learning_rate": 5.414824424983282e-06,
1582
+ "logits/chosen": -2.8032655715942383,
1583
+ "logits/rejected": -2.835156202316284,
1584
+ "logps/chosen": -174.91506958007812,
1585
+ "logps/rejected": -200.90908813476562,
1586
+ "loss": 0.8901,
1587
+ "rewards/accuracies": 0.4749999940395355,
1588
+ "rewards/chosen": -3.0149245262145996,
1589
+ "rewards/margins": -0.0794035792350769,
1590
+ "rewards/rejected": -2.935521364212036,
1591
+ "step": 1130
1592
+ },
1593
+ {
1594
+ "epoch": 0.51,
1595
+ "learning_rate": 5.341530906930604e-06,
1596
+ "logits/chosen": -2.849372386932373,
1597
+ "logits/rejected": -2.8182034492492676,
1598
+ "logps/chosen": -200.5182342529297,
1599
+ "logps/rejected": -165.98849487304688,
1600
+ "loss": 0.8282,
1601
+ "rewards/accuracies": 0.5249999761581421,
1602
+ "rewards/chosen": -2.665292263031006,
1603
+ "rewards/margins": 0.001919907284900546,
1604
+ "rewards/rejected": -2.6672122478485107,
1605
+ "step": 1140
1606
+ },
1607
+ {
1608
+ "epoch": 0.51,
1609
+ "learning_rate": 5.268163579271276e-06,
1610
+ "logits/chosen": -2.8164515495300293,
1611
+ "logits/rejected": -2.777838706970215,
1612
+ "logps/chosen": -158.1012420654297,
1613
+ "logps/rejected": -157.5447540283203,
1614
+ "loss": 0.5828,
1615
+ "rewards/accuracies": 0.75,
1616
+ "rewards/chosen": -2.521636962890625,
1617
+ "rewards/margins": 0.5487133264541626,
1618
+ "rewards/rejected": -3.070350170135498,
1619
+ "step": 1150
1620
+ },
1621
+ {
1622
+ "epoch": 0.52,
1623
+ "learning_rate": 5.1947382977111374e-06,
1624
+ "logits/chosen": -2.806366443634033,
1625
+ "logits/rejected": -2.7530558109283447,
1626
+ "logps/chosen": -184.51922607421875,
1627
+ "logps/rejected": -184.57203674316406,
1628
+ "loss": 0.7037,
1629
+ "rewards/accuracies": 0.5249999761581421,
1630
+ "rewards/chosen": -2.7491421699523926,
1631
+ "rewards/margins": 0.25690436363220215,
1632
+ "rewards/rejected": -3.006046772003174,
1633
+ "step": 1160
1634
+ },
1635
+ {
1636
+ "epoch": 0.52,
1637
+ "learning_rate": 5.1212709304806774e-06,
1638
+ "logits/chosen": -2.837092399597168,
1639
+ "logits/rejected": -2.8233399391174316,
1640
+ "logps/chosen": -170.21676635742188,
1641
+ "logps/rejected": -175.38031005859375,
1642
+ "loss": 0.8019,
1643
+ "rewards/accuracies": 0.5249999761581421,
1644
+ "rewards/chosen": -2.616921901702881,
1645
+ "rewards/margins": 0.020748604089021683,
1646
+ "rewards/rejected": -2.6376705169677734,
1647
+ "step": 1170
1648
+ },
1649
+ {
1650
+ "epoch": 0.52,
1651
+ "learning_rate": 5.047777354905685e-06,
1652
+ "logits/chosen": -2.8479232788085938,
1653
+ "logits/rejected": -2.8166935443878174,
1654
+ "logps/chosen": -165.58253479003906,
1655
+ "logps/rejected": -178.3239288330078,
1656
+ "loss": 0.6336,
1657
+ "rewards/accuracies": 0.699999988079071,
1658
+ "rewards/chosen": -2.5399913787841797,
1659
+ "rewards/margins": 0.3785735070705414,
1660
+ "rewards/rejected": -2.918564796447754,
1661
+ "step": 1180
1662
+ },
1663
+ {
1664
+ "epoch": 0.53,
1665
+ "learning_rate": 4.974273453975942e-06,
1666
+ "logits/chosen": -2.8627452850341797,
1667
+ "logits/rejected": -2.776982069015503,
1668
+ "logps/chosen": -198.88880920410156,
1669
+ "logps/rejected": -198.97647094726562,
1670
+ "loss": 0.6924,
1671
+ "rewards/accuracies": 0.625,
1672
+ "rewards/chosen": -2.6049208641052246,
1673
+ "rewards/margins": 0.4274630546569824,
1674
+ "rewards/rejected": -3.032383918762207,
1675
+ "step": 1190
1676
+ },
1677
+ {
1678
+ "epoch": 0.53,
1679
+ "learning_rate": 4.900775112912684e-06,
1680
+ "logits/chosen": -2.7575716972351074,
1681
+ "logits/rejected": -2.7695202827453613,
1682
+ "logps/chosen": -151.79171752929688,
1683
+ "logps/rejected": -155.3359832763672,
1684
+ "loss": 0.6988,
1685
+ "rewards/accuracies": 0.5,
1686
+ "rewards/chosen": -2.552367925643921,
1687
+ "rewards/margins": 0.287198007106781,
1688
+ "rewards/rejected": -2.8395657539367676,
1689
+ "step": 1200
1690
+ },
1691
+ {
1692
+ "epoch": 0.54,
1693
+ "learning_rate": 4.827298215735577e-06,
1694
+ "logits/chosen": -2.852078914642334,
1695
+ "logits/rejected": -2.7572147846221924,
1696
+ "logps/chosen": -183.82571411132812,
1697
+ "logps/rejected": -173.97152709960938,
1698
+ "loss": 0.8329,
1699
+ "rewards/accuracies": 0.6000000238418579,
1700
+ "rewards/chosen": -2.600367307662964,
1701
+ "rewards/margins": 0.07606508582830429,
1702
+ "rewards/rejected": -2.6764326095581055,
1703
+ "step": 1210
1704
+ },
1705
+ {
1706
+ "epoch": 0.54,
1707
+ "learning_rate": 4.75385864182997e-06,
1708
+ "logits/chosen": -2.801530361175537,
1709
+ "logits/rejected": -2.7251269817352295,
1710
+ "logps/chosen": -156.46092224121094,
1711
+ "logps/rejected": -130.04135131835938,
1712
+ "loss": 1.0341,
1713
+ "rewards/accuracies": 0.4000000059604645,
1714
+ "rewards/chosen": -3.1289448738098145,
1715
+ "rewards/margins": -0.38991624116897583,
1716
+ "rewards/rejected": -2.7390286922454834,
1717
+ "step": 1220
1718
+ },
1719
+ {
1720
+ "epoch": 0.55,
1721
+ "learning_rate": 4.680472262515123e-06,
1722
+ "logits/chosen": -2.8145370483398438,
1723
+ "logits/rejected": -2.8123114109039307,
1724
+ "logps/chosen": -132.849365234375,
1725
+ "logps/rejected": -148.0765838623047,
1726
+ "loss": 0.7382,
1727
+ "rewards/accuracies": 0.550000011920929,
1728
+ "rewards/chosen": -2.4975039958953857,
1729
+ "rewards/margins": 0.32709741592407227,
1730
+ "rewards/rejected": -2.824601650238037,
1731
+ "step": 1230
1732
+ },
1733
+ {
1734
+ "epoch": 0.55,
1735
+ "learning_rate": 4.607154937614209e-06,
1736
+ "logits/chosen": -2.829385757446289,
1737
+ "logits/rejected": -2.8185107707977295,
1738
+ "logps/chosen": -173.2303466796875,
1739
+ "logps/rejected": -166.53912353515625,
1740
+ "loss": 0.6272,
1741
+ "rewards/accuracies": 0.6499999761581421,
1742
+ "rewards/chosen": -2.501225709915161,
1743
+ "rewards/margins": 0.4133445620536804,
1744
+ "rewards/rejected": -2.914569854736328,
1745
+ "step": 1240
1746
+ },
1747
+ {
1748
+ "epoch": 0.56,
1749
+ "learning_rate": 4.533922512026772e-06,
1750
+ "logits/chosen": -2.808532238006592,
1751
+ "logits/rejected": -2.8340225219726562,
1752
+ "logps/chosen": -156.80160522460938,
1753
+ "logps/rejected": -195.7024383544922,
1754
+ "loss": 0.8639,
1755
+ "rewards/accuracies": 0.42500001192092896,
1756
+ "rewards/chosen": -2.797833204269409,
1757
+ "rewards/margins": -0.15304505825042725,
1758
+ "rewards/rejected": -2.6447882652282715,
1759
+ "step": 1250
1760
+ },
1761
+ {
1762
+ "epoch": 0.56,
1763
+ "learning_rate": 4.4607908123044235e-06,
1764
+ "logits/chosen": -2.762845516204834,
1765
+ "logits/rejected": -2.80631685256958,
1766
+ "logps/chosen": -164.12916564941406,
1767
+ "logps/rejected": -168.5762481689453,
1768
+ "loss": 0.7842,
1769
+ "rewards/accuracies": 0.574999988079071,
1770
+ "rewards/chosen": -2.7107739448547363,
1771
+ "rewards/margins": 0.14155347645282745,
1772
+ "rewards/rejected": -2.852327585220337,
1773
+ "step": 1260
1774
+ },
1775
+ {
1776
+ "epoch": 0.56,
1777
+ "learning_rate": 4.387775643230513e-06,
1778
+ "logits/chosen": -2.8005309104919434,
1779
+ "logits/rejected": -2.7626125812530518,
1780
+ "logps/chosen": -164.07000732421875,
1781
+ "logps/rejected": -161.9875946044922,
1782
+ "loss": 0.817,
1783
+ "rewards/accuracies": 0.550000011920929,
1784
+ "rewards/chosen": -2.8267662525177,
1785
+ "rewards/margins": 0.035615064203739166,
1786
+ "rewards/rejected": -2.8623814582824707,
1787
+ "step": 1270
1788
+ },
1789
+ {
1790
+ "epoch": 0.57,
1791
+ "learning_rate": 4.3148927844044845e-06,
1792
+ "logits/chosen": -2.8500914573669434,
1793
+ "logits/rejected": -2.7665367126464844,
1794
+ "logps/chosen": -201.25502014160156,
1795
+ "logps/rejected": -181.4254608154297,
1796
+ "loss": 0.7916,
1797
+ "rewards/accuracies": 0.6000000238418579,
1798
+ "rewards/chosen": -2.674546241760254,
1799
+ "rewards/margins": 0.15138781070709229,
1800
+ "rewards/rejected": -2.8259339332580566,
1801
+ "step": 1280
1802
+ },
1803
+ {
1804
+ "epoch": 0.57,
1805
+ "learning_rate": 4.2421579868316835e-06,
1806
+ "logits/chosen": -2.871143341064453,
1807
+ "logits/rejected": -2.800727605819702,
1808
+ "logps/chosen": -213.4442901611328,
1809
+ "logps/rejected": -197.76211547851562,
1810
+ "loss": 0.7817,
1811
+ "rewards/accuracies": 0.6499999761581421,
1812
+ "rewards/chosen": -2.7138214111328125,
1813
+ "rewards/margins": 0.09940443933010101,
1814
+ "rewards/rejected": -2.8132259845733643,
1815
+ "step": 1290
1816
+ },
1817
+ {
1818
+ "epoch": 0.58,
1819
+ "learning_rate": 4.169586969519349e-06,
1820
+ "logits/chosen": -2.839171886444092,
1821
+ "logits/rejected": -2.7628910541534424,
1822
+ "logps/chosen": -185.0808563232422,
1823
+ "logps/rejected": -156.76638793945312,
1824
+ "loss": 0.8095,
1825
+ "rewards/accuracies": 0.550000011920929,
1826
+ "rewards/chosen": -2.863227128982544,
1827
+ "rewards/margins": 0.06683845818042755,
1828
+ "rewards/rejected": -2.930065631866455,
1829
+ "step": 1300
1830
+ },
1831
+ {
1832
+ "epoch": 0.58,
1833
+ "learning_rate": 4.097195416079519e-06,
1834
+ "logits/chosen": -2.7593963146209717,
1835
+ "logits/rejected": -2.7466847896575928,
1836
+ "logps/chosen": -167.2697296142578,
1837
+ "logps/rejected": -151.8631134033203,
1838
+ "loss": 0.7299,
1839
+ "rewards/accuracies": 0.4749999940395355,
1840
+ "rewards/chosen": -2.843665361404419,
1841
+ "rewards/margins": 0.11098279803991318,
1842
+ "rewards/rejected": -2.954648494720459,
1843
+ "step": 1310
1844
+ },
1845
+ {
1846
+ "epoch": 0.59,
1847
+ "learning_rate": 4.024998971339572e-06,
1848
+ "logits/chosen": -2.819622039794922,
1849
+ "logits/rejected": -2.7979581356048584,
1850
+ "logps/chosen": -187.9668426513672,
1851
+ "logps/rejected": -220.19497680664062,
1852
+ "loss": 0.6732,
1853
+ "rewards/accuracies": 0.574999988079071,
1854
+ "rewards/chosen": -2.7116570472717285,
1855
+ "rewards/margins": 0.3411490023136139,
1856
+ "rewards/rejected": -3.0528059005737305,
1857
+ "step": 1320
1858
+ },
1859
+ {
1860
+ "epoch": 0.59,
1861
+ "learning_rate": 3.953013237961169e-06,
1862
+ "logits/chosen": -2.8445045948028564,
1863
+ "logits/rejected": -2.7679443359375,
1864
+ "logps/chosen": -195.4940185546875,
1865
+ "logps/rejected": -221.0810546875,
1866
+ "loss": 0.6838,
1867
+ "rewards/accuracies": 0.574999988079071,
1868
+ "rewards/chosen": -2.7768163681030273,
1869
+ "rewards/margins": 0.34462517499923706,
1870
+ "rewards/rejected": -3.12144136428833,
1871
+ "step": 1330
1872
+ },
1873
+ {
1874
+ "epoch": 0.6,
1875
+ "learning_rate": 3.8812537730683e-06,
1876
+ "logits/chosen": -2.7969844341278076,
1877
+ "logits/rejected": -2.824723482131958,
1878
+ "logps/chosen": -163.15548706054688,
1879
+ "logps/rejected": -178.32977294921875,
1880
+ "loss": 0.7611,
1881
+ "rewards/accuracies": 0.5249999761581421,
1882
+ "rewards/chosen": -2.8133432865142822,
1883
+ "rewards/margins": 0.08294029533863068,
1884
+ "rewards/rejected": -2.8962836265563965,
1885
+ "step": 1340
1886
+ },
1887
+ {
1888
+ "epoch": 0.6,
1889
+ "learning_rate": 3.80973608488517e-06,
1890
+ "logits/chosen": -2.7507481575012207,
1891
+ "logits/rejected": -2.7548985481262207,
1892
+ "logps/chosen": -138.450927734375,
1893
+ "logps/rejected": -146.4220733642578,
1894
+ "loss": 0.707,
1895
+ "rewards/accuracies": 0.4749999940395355,
1896
+ "rewards/chosen": -2.846163511276245,
1897
+ "rewards/margins": 0.16207213699817657,
1898
+ "rewards/rejected": -3.0082356929779053,
1899
+ "step": 1350
1900
+ },
1901
+ {
1902
+ "epoch": 0.6,
1903
+ "learning_rate": 3.73847562938465e-06,
1904
+ "logits/chosen": -2.8039064407348633,
1905
+ "logits/rejected": -2.735996723175049,
1906
+ "logps/chosen": -167.43582153320312,
1907
+ "logps/rejected": -138.5779571533203,
1908
+ "loss": 0.7448,
1909
+ "rewards/accuracies": 0.574999988079071,
1910
+ "rewards/chosen": -2.9816246032714844,
1911
+ "rewards/margins": 0.11592672020196915,
1912
+ "rewards/rejected": -3.0975515842437744,
1913
+ "step": 1360
1914
+ },
1915
+ {
1916
+ "epoch": 0.61,
1917
+ "learning_rate": 3.6674878069480345e-06,
1918
+ "logits/chosen": -2.812595844268799,
1919
+ "logits/rejected": -2.8279619216918945,
1920
+ "logps/chosen": -207.8079376220703,
1921
+ "logps/rejected": -227.8889923095703,
1922
+ "loss": 0.6799,
1923
+ "rewards/accuracies": 0.6000000238418579,
1924
+ "rewards/chosen": -2.651724338531494,
1925
+ "rewards/margins": 0.2655607759952545,
1926
+ "rewards/rejected": -2.9172849655151367,
1927
+ "step": 1370
1928
+ },
1929
+ {
1930
+ "epoch": 0.61,
1931
+ "learning_rate": 3.5967879590368e-06,
1932
+ "logits/chosen": -2.8392462730407715,
1933
+ "logits/rejected": -2.80900239944458,
1934
+ "logps/chosen": -214.54019165039062,
1935
+ "logps/rejected": -240.7262420654297,
1936
+ "loss": 0.7693,
1937
+ "rewards/accuracies": 0.6000000238418579,
1938
+ "rewards/chosen": -2.56378436088562,
1939
+ "rewards/margins": 0.14276638627052307,
1940
+ "rewards/rejected": -2.7065505981445312,
1941
+ "step": 1380
1942
+ },
1943
+ {
1944
+ "epoch": 0.62,
1945
+ "learning_rate": 3.5263913648770974e-06,
1946
+ "logits/chosen": -2.7690839767456055,
1947
+ "logits/rejected": -2.796400785446167,
1948
+ "logps/chosen": -175.9673614501953,
1949
+ "logps/rejected": -169.49261474609375,
1950
+ "loss": 0.8555,
1951
+ "rewards/accuracies": 0.4749999940395355,
1952
+ "rewards/chosen": -3.1168508529663086,
1953
+ "rewards/margins": -0.02891675755381584,
1954
+ "rewards/rejected": -3.0879340171813965,
1955
+ "step": 1390
1956
+ },
1957
+ {
1958
+ "epoch": 0.62,
1959
+ "learning_rate": 3.456313238157697e-06,
1960
+ "logits/chosen": -2.8202641010284424,
1961
+ "logits/rejected": -2.775507688522339,
1962
+ "logps/chosen": -179.59512329101562,
1963
+ "logps/rejected": -172.36398315429688,
1964
+ "loss": 0.5841,
1965
+ "rewards/accuracies": 0.7749999761581421,
1966
+ "rewards/chosen": -2.501037120819092,
1967
+ "rewards/margins": 0.5284804105758667,
1968
+ "rewards/rejected": -3.029517412185669,
1969
+ "step": 1400
1970
+ },
1971
+ {
1972
+ "epoch": 0.63,
1973
+ "learning_rate": 3.3865687237421047e-06,
1974
+ "logits/chosen": -2.8022942543029785,
1975
+ "logits/rejected": -2.839221954345703,
1976
+ "logps/chosen": -178.23875427246094,
1977
+ "logps/rejected": -220.74722290039062,
1978
+ "loss": 0.7175,
1979
+ "rewards/accuracies": 0.6000000238418579,
1980
+ "rewards/chosen": -2.740363359451294,
1981
+ "rewards/margins": 0.14777664840221405,
1982
+ "rewards/rejected": -2.8881397247314453,
1983
+ "step": 1410
1984
+ },
1985
+ {
1986
+ "epoch": 0.63,
1987
+ "learning_rate": 3.317172894395541e-06,
1988
+ "logits/chosen": -2.8122408390045166,
1989
+ "logits/rejected": -2.7978949546813965,
1990
+ "logps/chosen": -193.39605712890625,
1991
+ "logps/rejected": -197.32260131835938,
1992
+ "loss": 0.7203,
1993
+ "rewards/accuracies": 0.574999988079071,
1994
+ "rewards/chosen": -2.6169321537017822,
1995
+ "rewards/margins": 0.16434483230113983,
1996
+ "rewards/rejected": -2.7812769412994385,
1997
+ "step": 1420
1998
+ },
1999
+ {
2000
+ "epoch": 0.64,
2001
+ "learning_rate": 3.2481407475274995e-06,
2002
+ "logits/chosen": -2.812182664871216,
2003
+ "logits/rejected": -2.8286237716674805,
2004
+ "logps/chosen": -197.23226928710938,
2005
+ "logps/rejected": -221.1904754638672,
2006
+ "loss": 0.6232,
2007
+ "rewards/accuracies": 0.675000011920929,
2008
+ "rewards/chosen": -2.409712314605713,
2009
+ "rewards/margins": 0.39808765053749084,
2010
+ "rewards/rejected": -2.8077995777130127,
2011
+ "step": 1430
2012
+ },
2013
+ {
2014
+ "epoch": 0.64,
2015
+ "learning_rate": 3.179487201950607e-06,
2016
+ "logits/chosen": -2.8621065616607666,
2017
+ "logits/rejected": -2.7421019077301025,
2018
+ "logps/chosen": -196.08909606933594,
2019
+ "logps/rejected": -170.71444702148438,
2020
+ "loss": 0.6596,
2021
+ "rewards/accuracies": 0.625,
2022
+ "rewards/chosen": -2.5933759212493896,
2023
+ "rewards/margins": 0.4136047959327698,
2024
+ "rewards/rejected": -3.0069806575775146,
2025
+ "step": 1440
2026
+ },
2027
+ {
2028
+ "epoch": 0.64,
2029
+ "learning_rate": 3.111227094656457e-06,
2030
+ "logits/chosen": -2.8379364013671875,
2031
+ "logits/rejected": -2.763042688369751,
2032
+ "logps/chosen": -183.64736938476562,
2033
+ "logps/rejected": -172.20584106445312,
2034
+ "loss": 0.6305,
2035
+ "rewards/accuracies": 0.800000011920929,
2036
+ "rewards/chosen": -2.426516056060791,
2037
+ "rewards/margins": 0.491415411233902,
2038
+ "rewards/rejected": -2.917931079864502,
2039
+ "step": 1450
2040
+ },
2041
+ {
2042
+ "epoch": 0.65,
2043
+ "learning_rate": 3.0433751776091243e-06,
2044
+ "logits/chosen": -2.772272825241089,
2045
+ "logits/rejected": -2.797128200531006,
2046
+ "logps/chosen": -172.43173217773438,
2047
+ "logps/rejected": -167.80349731445312,
2048
+ "loss": 0.7952,
2049
+ "rewards/accuracies": 0.4000000059604645,
2050
+ "rewards/chosen": -2.848965644836426,
2051
+ "rewards/margins": 0.02570332959294319,
2052
+ "rewards/rejected": -2.874669075012207,
2053
+ "step": 1460
2054
+ },
2055
+ {
2056
+ "epoch": 0.65,
2057
+ "learning_rate": 2.9759461145570562e-06,
2058
+ "logits/chosen": -2.8297877311706543,
2059
+ "logits/rejected": -2.812828302383423,
2060
+ "logps/chosen": -154.22589111328125,
2061
+ "logps/rejected": -165.05397033691406,
2062
+ "loss": 0.7833,
2063
+ "rewards/accuracies": 0.574999988079071,
2064
+ "rewards/chosen": -2.5259437561035156,
2065
+ "rewards/margins": 0.10835230350494385,
2066
+ "rewards/rejected": -2.63429594039917,
2067
+ "step": 1470
2068
+ },
2069
+ {
2070
+ "epoch": 0.66,
2071
+ "learning_rate": 2.9089544778640434e-06,
2072
+ "logits/chosen": -2.8057451248168945,
2073
+ "logits/rejected": -2.8399128913879395,
2074
+ "logps/chosen": -188.55502319335938,
2075
+ "logps/rejected": -195.39517211914062,
2076
+ "loss": 0.7464,
2077
+ "rewards/accuracies": 0.6000000238418579,
2078
+ "rewards/chosen": -2.9316458702087402,
2079
+ "rewards/margins": 0.07946301996707916,
2080
+ "rewards/rejected": -3.0111091136932373,
2081
+ "step": 1480
2082
+ },
2083
+ {
2084
+ "epoch": 0.66,
2085
+ "learning_rate": 2.84241474535992e-06,
2086
+ "logits/chosen": -2.8304762840270996,
2087
+ "logits/rejected": -2.805596351623535,
2088
+ "logps/chosen": -173.1647491455078,
2089
+ "logps/rejected": -199.00692749023438,
2090
+ "loss": 0.7088,
2091
+ "rewards/accuracies": 0.5,
2092
+ "rewards/chosen": -2.439711570739746,
2093
+ "rewards/margins": 0.4066222608089447,
2094
+ "rewards/rejected": -2.8463339805603027,
2095
+ "step": 1490
2096
+ },
2097
+ {
2098
+ "epoch": 0.67,
2099
+ "learning_rate": 2.7763412972117e-06,
2100
+ "logits/chosen": -2.7816245555877686,
2101
+ "logits/rejected": -2.7993526458740234,
2102
+ "logps/chosen": -163.3373260498047,
2103
+ "logps/rejected": -181.728515625,
2104
+ "loss": 0.7681,
2105
+ "rewards/accuracies": 0.375,
2106
+ "rewards/chosen": -2.659446954727173,
2107
+ "rewards/margins": 0.027357567101716995,
2108
+ "rewards/rejected": -2.6868045330047607,
2109
+ "step": 1500
2110
+ }
2111
+ ],
2112
+ "logging_steps": 10,
2113
+ "max_steps": 2250,
2114
+ "num_input_tokens_seen": 0,
2115
+ "num_train_epochs": 1,
2116
+ "save_steps": 500,
2117
+ "total_flos": 0.0,
2118
+ "train_batch_size": 1,
2119
+ "trial_name": null,
2120
+ "trial_params": null
2121
+ }
checkpoint-1500/training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:929ce4eef102da1f4bf0c0abf6ec22df7a8d310ad21751ff23e497886fcbbedb
3
+ size 4987
checkpoint-2000/README.md ADDED
@@ -0,0 +1,202 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ base_model: mistralai/Mistral-7B-Instruct-v0.2
3
+ library_name: peft
4
+ ---
5
+
6
+ # Model Card for Model ID
7
+
8
+ <!-- Provide a quick summary of what the model is/does. -->
9
+
10
+
11
+
12
+ ## Model Details
13
+
14
+ ### Model Description
15
+
16
+ <!-- Provide a longer summary of what this model is. -->
17
+
18
+
19
+
20
+ - **Developed by:** [More Information Needed]
21
+ - **Funded by [optional]:** [More Information Needed]
22
+ - **Shared by [optional]:** [More Information Needed]
23
+ - **Model type:** [More Information Needed]
24
+ - **Language(s) (NLP):** [More Information Needed]
25
+ - **License:** [More Information Needed]
26
+ - **Finetuned from model [optional]:** [More Information Needed]
27
+
28
+ ### Model Sources [optional]
29
+
30
+ <!-- Provide the basic links for the model. -->
31
+
32
+ - **Repository:** [More Information Needed]
33
+ - **Paper [optional]:** [More Information Needed]
34
+ - **Demo [optional]:** [More Information Needed]
35
+
36
+ ## Uses
37
+
38
+ <!-- Address questions around how the model is intended to be used, including the foreseeable users of the model and those affected by the model. -->
39
+
40
+ ### Direct Use
41
+
42
+ <!-- This section is for the model use without fine-tuning or plugging into a larger ecosystem/app. -->
43
+
44
+ [More Information Needed]
45
+
46
+ ### Downstream Use [optional]
47
+
48
+ <!-- This section is for the model use when fine-tuned for a task, or when plugged into a larger ecosystem/app -->
49
+
50
+ [More Information Needed]
51
+
52
+ ### Out-of-Scope Use
53
+
54
+ <!-- This section addresses misuse, malicious use, and uses that the model will not work well for. -->
55
+
56
+ [More Information Needed]
57
+
58
+ ## Bias, Risks, and Limitations
59
+
60
+ <!-- This section is meant to convey both technical and sociotechnical limitations. -->
61
+
62
+ [More Information Needed]
63
+
64
+ ### Recommendations
65
+
66
+ <!-- This section is meant to convey recommendations with respect to the bias, risk, and technical limitations. -->
67
+
68
+ Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations.
69
+
70
+ ## How to Get Started with the Model
71
+
72
+ Use the code below to get started with the model.
73
+
74
+ [More Information Needed]
75
+
76
+ ## Training Details
77
+
78
+ ### Training Data
79
+
80
+ <!-- This should link to a Dataset Card, perhaps with a short stub of information on what the training data is all about as well as documentation related to data pre-processing or additional filtering. -->
81
+
82
+ [More Information Needed]
83
+
84
+ ### Training Procedure
85
+
86
+ <!-- This relates heavily to the Technical Specifications. Content here should link to that section when it is relevant to the training procedure. -->
87
+
88
+ #### Preprocessing [optional]
89
+
90
+ [More Information Needed]
91
+
92
+
93
+ #### Training Hyperparameters
94
+
95
+ - **Training regime:** [More Information Needed] <!--fp32, fp16 mixed precision, bf16 mixed precision, bf16 non-mixed precision, fp16 non-mixed precision, fp8 mixed precision -->
96
+
97
+ #### Speeds, Sizes, Times [optional]
98
+
99
+ <!-- This section provides information about throughput, start/end time, checkpoint size if relevant, etc. -->
100
+
101
+ [More Information Needed]
102
+
103
+ ## Evaluation
104
+
105
+ <!-- This section describes the evaluation protocols and provides the results. -->
106
+
107
+ ### Testing Data, Factors & Metrics
108
+
109
+ #### Testing Data
110
+
111
+ <!-- This should link to a Dataset Card if possible. -->
112
+
113
+ [More Information Needed]
114
+
115
+ #### Factors
116
+
117
+ <!-- These are the things the evaluation is disaggregating by, e.g., subpopulations or domains. -->
118
+
119
+ [More Information Needed]
120
+
121
+ #### Metrics
122
+
123
+ <!-- These are the evaluation metrics being used, ideally with a description of why. -->
124
+
125
+ [More Information Needed]
126
+
127
+ ### Results
128
+
129
+ [More Information Needed]
130
+
131
+ #### Summary
132
+
133
+
134
+
135
+ ## Model Examination [optional]
136
+
137
+ <!-- Relevant interpretability work for the model goes here -->
138
+
139
+ [More Information Needed]
140
+
141
+ ## Environmental Impact
142
+
143
+ <!-- Total emissions (in grams of CO2eq) and additional considerations, such as electricity usage, go here. Edit the suggested text below accordingly -->
144
+
145
+ Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700).
146
+
147
+ - **Hardware Type:** [More Information Needed]
148
+ - **Hours used:** [More Information Needed]
149
+ - **Cloud Provider:** [More Information Needed]
150
+ - **Compute Region:** [More Information Needed]
151
+ - **Carbon Emitted:** [More Information Needed]
152
+
153
+ ## Technical Specifications [optional]
154
+
155
+ ### Model Architecture and Objective
156
+
157
+ [More Information Needed]
158
+
159
+ ### Compute Infrastructure
160
+
161
+ [More Information Needed]
162
+
163
+ #### Hardware
164
+
165
+ [More Information Needed]
166
+
167
+ #### Software
168
+
169
+ [More Information Needed]
170
+
171
+ ## Citation [optional]
172
+
173
+ <!-- If there is a paper or blog post introducing the model, the APA and Bibtex information for that should go in this section. -->
174
+
175
+ **BibTeX:**
176
+
177
+ [More Information Needed]
178
+
179
+ **APA:**
180
+
181
+ [More Information Needed]
182
+
183
+ ## Glossary [optional]
184
+
185
+ <!-- If relevant, include terms and calculations in this section that can help readers understand the model or model card. -->
186
+
187
+ [More Information Needed]
188
+
189
+ ## More Information [optional]
190
+
191
+ [More Information Needed]
192
+
193
+ ## Model Card Authors [optional]
194
+
195
+ [More Information Needed]
196
+
197
+ ## Model Card Contact
198
+
199
+ [More Information Needed]
200
+ ### Framework versions
201
+
202
+ - PEFT 0.8.2
checkpoint-2000/adapter_config.json ADDED
@@ -0,0 +1,29 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "alpha_pattern": {},
3
+ "auto_mapping": null,
4
+ "base_model_name_or_path": "mistralai/Mistral-7B-Instruct-v0.2",
5
+ "bias": "none",
6
+ "fan_in_fan_out": false,
7
+ "inference_mode": true,
8
+ "init_lora_weights": true,
9
+ "layers_pattern": null,
10
+ "layers_to_transform": null,
11
+ "loftq_config": {},
12
+ "lora_alpha": 8,
13
+ "lora_dropout": 0.1,
14
+ "megatron_config": null,
15
+ "megatron_core": "megatron.core",
16
+ "modules_to_save": null,
17
+ "peft_type": "LORA",
18
+ "r": 4,
19
+ "rank_pattern": {},
20
+ "revision": null,
21
+ "target_modules": [
22
+ "k_proj",
23
+ "q_proj",
24
+ "v_proj",
25
+ "o_proj"
26
+ ],
27
+ "task_type": "CAUSAL_LM",
28
+ "use_rslora": false
29
+ }
checkpoint-2000/adapter_model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e828230570e6f415f5efba0e5501e37a3773772713a69f0dbee02d208eeaceef
3
+ size 13665336
checkpoint-2000/optimizer.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:33495b963f7d4bad72b9063f580df1c3d710c872a7176aecbb852586e9a81cc3
3
+ size 27413893
checkpoint-2000/rng_state.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b2278a87cdf86c3f9219223c847f6b27f6b7f15b8226b617f38936e8ff2cbcde
3
+ size 14575
checkpoint-2000/scheduler.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:128e4aa3989313e45e89dbba4a520e142fe8e95ea06e5ef09d182c4855e52dca
3
+ size 627
checkpoint-2000/special_tokens_map.json ADDED
@@ -0,0 +1,24 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "bos_token": {
3
+ "content": "<s>",
4
+ "lstrip": false,
5
+ "normalized": false,
6
+ "rstrip": false,
7
+ "single_word": false
8
+ },
9
+ "eos_token": {
10
+ "content": "</s>",
11
+ "lstrip": false,
12
+ "normalized": false,
13
+ "rstrip": false,
14
+ "single_word": false
15
+ },
16
+ "pad_token": "</s>",
17
+ "unk_token": {
18
+ "content": "<unk>",
19
+ "lstrip": false,
20
+ "normalized": false,
21
+ "rstrip": false,
22
+ "single_word": false
23
+ }
24
+ }
checkpoint-2000/tokenizer.model ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:dadfd56d766715c61d2ef780a525ab43b8e6da4de6865bda3d95fdef5e134055
3
+ size 493443
checkpoint-2000/tokenizer_config.json ADDED
@@ -0,0 +1,44 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "add_bos_token": true,
3
+ "add_eos_token": false,
4
+ "add_prefix_space": null,
5
+ "added_tokens_decoder": {
6
+ "0": {
7
+ "content": "<unk>",
8
+ "lstrip": false,
9
+ "normalized": false,
10
+ "rstrip": false,
11
+ "single_word": false,
12
+ "special": true
13
+ },
14
+ "1": {
15
+ "content": "<s>",
16
+ "lstrip": false,
17
+ "normalized": false,
18
+ "rstrip": false,
19
+ "single_word": false,
20
+ "special": true
21
+ },
22
+ "2": {
23
+ "content": "</s>",
24
+ "lstrip": false,
25
+ "normalized": false,
26
+ "rstrip": false,
27
+ "single_word": false,
28
+ "special": true
29
+ }
30
+ },
31
+ "additional_special_tokens": [],
32
+ "bos_token": "<s>",
33
+ "chat_template": "{%- if messages[0]['role'] == 'system' %}\n {%- set system_message = messages[0]['content'] %}\n {%- set loop_messages = messages[1:] %}\n{%- else %}\n {%- set loop_messages = messages %}\n{%- endif %}\n\n{{- bos_token }}\n{%- for message in loop_messages %}\n {%- if (message['role'] == 'user') != (loop.index0 % 2 == 0) %}\n {{- raise_exception('After the optional system message, conversation roles must alternate user/assistant/user/assistant/...') }}\n {%- endif %}\n {%- if message['role'] == 'user' %}\n {%- if loop.first and system_message is defined %}\n {{- ' [INST] ' + system_message + '\\n\\n' + message['content'] + ' [/INST]' }}\n {%- else %}\n {{- ' [INST] ' + message['content'] + ' [/INST]' }}\n {%- endif %}\n {%- elif message['role'] == 'assistant' %}\n {{- ' ' + message['content'] + eos_token}}\n {%- else %}\n {{- raise_exception('Only user and assistant roles are supported, with the exception of an initial optional system message!') }}\n {%- endif %}\n{%- endfor %}\n",
34
+ "clean_up_tokenization_spaces": false,
35
+ "eos_token": "</s>",
36
+ "legacy": false,
37
+ "model_max_length": 1000000000000000019884624838656,
38
+ "pad_token": "</s>",
39
+ "sp_model_kwargs": {},
40
+ "spaces_between_special_tokens": false,
41
+ "tokenizer_class": "LlamaTokenizer",
42
+ "unk_token": "<unk>",
43
+ "use_default_system_prompt": false
44
+ }
checkpoint-2000/trainer_state.json ADDED
The diff for this file is too large to render. See raw diff
 
checkpoint-2000/training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:929ce4eef102da1f4bf0c0abf6ec22df7a8d310ad21751ff23e497886fcbbedb
3
+ size 4987
checkpoint-500/README.md ADDED
@@ -0,0 +1,202 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ base_model: mistralai/Mistral-7B-Instruct-v0.2
3
+ library_name: peft
4
+ ---
5
+
6
+ # Model Card for Model ID
7
+
8
+ <!-- Provide a quick summary of what the model is/does. -->
9
+
10
+
11
+
12
+ ## Model Details
13
+
14
+ ### Model Description
15
+
16
+ <!-- Provide a longer summary of what this model is. -->
17
+
18
+
19
+
20
+ - **Developed by:** [More Information Needed]
21
+ - **Funded by [optional]:** [More Information Needed]
22
+ - **Shared by [optional]:** [More Information Needed]
23
+ - **Model type:** [More Information Needed]
24
+ - **Language(s) (NLP):** [More Information Needed]
25
+ - **License:** [More Information Needed]
26
+ - **Finetuned from model [optional]:** [More Information Needed]
27
+
28
+ ### Model Sources [optional]
29
+
30
+ <!-- Provide the basic links for the model. -->
31
+
32
+ - **Repository:** [More Information Needed]
33
+ - **Paper [optional]:** [More Information Needed]
34
+ - **Demo [optional]:** [More Information Needed]
35
+
36
+ ## Uses
37
+
38
+ <!-- Address questions around how the model is intended to be used, including the foreseeable users of the model and those affected by the model. -->
39
+
40
+ ### Direct Use
41
+
42
+ <!-- This section is for the model use without fine-tuning or plugging into a larger ecosystem/app. -->
43
+
44
+ [More Information Needed]
45
+
46
+ ### Downstream Use [optional]
47
+
48
+ <!-- This section is for the model use when fine-tuned for a task, or when plugged into a larger ecosystem/app -->
49
+
50
+ [More Information Needed]
51
+
52
+ ### Out-of-Scope Use
53
+
54
+ <!-- This section addresses misuse, malicious use, and uses that the model will not work well for. -->
55
+
56
+ [More Information Needed]
57
+
58
+ ## Bias, Risks, and Limitations
59
+
60
+ <!-- This section is meant to convey both technical and sociotechnical limitations. -->
61
+
62
+ [More Information Needed]
63
+
64
+ ### Recommendations
65
+
66
+ <!-- This section is meant to convey recommendations with respect to the bias, risk, and technical limitations. -->
67
+
68
+ Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations.
69
+
70
+ ## How to Get Started with the Model
71
+
72
+ Use the code below to get started with the model.
73
+
74
+ [More Information Needed]
75
+
76
+ ## Training Details
77
+
78
+ ### Training Data
79
+
80
+ <!-- This should link to a Dataset Card, perhaps with a short stub of information on what the training data is all about as well as documentation related to data pre-processing or additional filtering. -->
81
+
82
+ [More Information Needed]
83
+
84
+ ### Training Procedure
85
+
86
+ <!-- This relates heavily to the Technical Specifications. Content here should link to that section when it is relevant to the training procedure. -->
87
+
88
+ #### Preprocessing [optional]
89
+
90
+ [More Information Needed]
91
+
92
+
93
+ #### Training Hyperparameters
94
+
95
+ - **Training regime:** [More Information Needed] <!--fp32, fp16 mixed precision, bf16 mixed precision, bf16 non-mixed precision, fp16 non-mixed precision, fp8 mixed precision -->
96
+
97
+ #### Speeds, Sizes, Times [optional]
98
+
99
+ <!-- This section provides information about throughput, start/end time, checkpoint size if relevant, etc. -->
100
+
101
+ [More Information Needed]
102
+
103
+ ## Evaluation
104
+
105
+ <!-- This section describes the evaluation protocols and provides the results. -->
106
+
107
+ ### Testing Data, Factors & Metrics
108
+
109
+ #### Testing Data
110
+
111
+ <!-- This should link to a Dataset Card if possible. -->
112
+
113
+ [More Information Needed]
114
+
115
+ #### Factors
116
+
117
+ <!-- These are the things the evaluation is disaggregating by, e.g., subpopulations or domains. -->
118
+
119
+ [More Information Needed]
120
+
121
+ #### Metrics
122
+
123
+ <!-- These are the evaluation metrics being used, ideally with a description of why. -->
124
+
125
+ [More Information Needed]
126
+
127
+ ### Results
128
+
129
+ [More Information Needed]
130
+
131
+ #### Summary
132
+
133
+
134
+
135
+ ## Model Examination [optional]
136
+
137
+ <!-- Relevant interpretability work for the model goes here -->
138
+
139
+ [More Information Needed]
140
+
141
+ ## Environmental Impact
142
+
143
+ <!-- Total emissions (in grams of CO2eq) and additional considerations, such as electricity usage, go here. Edit the suggested text below accordingly -->
144
+
145
+ Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700).
146
+
147
+ - **Hardware Type:** [More Information Needed]
148
+ - **Hours used:** [More Information Needed]
149
+ - **Cloud Provider:** [More Information Needed]
150
+ - **Compute Region:** [More Information Needed]
151
+ - **Carbon Emitted:** [More Information Needed]
152
+
153
+ ## Technical Specifications [optional]
154
+
155
+ ### Model Architecture and Objective
156
+
157
+ [More Information Needed]
158
+
159
+ ### Compute Infrastructure
160
+
161
+ [More Information Needed]
162
+
163
+ #### Hardware
164
+
165
+ [More Information Needed]
166
+
167
+ #### Software
168
+
169
+ [More Information Needed]
170
+
171
+ ## Citation [optional]
172
+
173
+ <!-- If there is a paper or blog post introducing the model, the APA and Bibtex information for that should go in this section. -->
174
+
175
+ **BibTeX:**
176
+
177
+ [More Information Needed]
178
+
179
+ **APA:**
180
+
181
+ [More Information Needed]
182
+
183
+ ## Glossary [optional]
184
+
185
+ <!-- If relevant, include terms and calculations in this section that can help readers understand the model or model card. -->
186
+
187
+ [More Information Needed]
188
+
189
+ ## More Information [optional]
190
+
191
+ [More Information Needed]
192
+
193
+ ## Model Card Authors [optional]
194
+
195
+ [More Information Needed]
196
+
197
+ ## Model Card Contact
198
+
199
+ [More Information Needed]
200
+ ### Framework versions
201
+
202
+ - PEFT 0.8.2
checkpoint-500/adapter_config.json ADDED
@@ -0,0 +1,29 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "alpha_pattern": {},
3
+ "auto_mapping": null,
4
+ "base_model_name_or_path": "mistralai/Mistral-7B-Instruct-v0.2",
5
+ "bias": "none",
6
+ "fan_in_fan_out": false,
7
+ "inference_mode": true,
8
+ "init_lora_weights": true,
9
+ "layers_pattern": null,
10
+ "layers_to_transform": null,
11
+ "loftq_config": {},
12
+ "lora_alpha": 8,
13
+ "lora_dropout": 0.1,
14
+ "megatron_config": null,
15
+ "megatron_core": "megatron.core",
16
+ "modules_to_save": null,
17
+ "peft_type": "LORA",
18
+ "r": 4,
19
+ "rank_pattern": {},
20
+ "revision": null,
21
+ "target_modules": [
22
+ "k_proj",
23
+ "q_proj",
24
+ "v_proj",
25
+ "o_proj"
26
+ ],
27
+ "task_type": "CAUSAL_LM",
28
+ "use_rslora": false
29
+ }
checkpoint-500/adapter_model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:00634f93dfdd387053c1c2d239bf62e4e342d51843b003a3f05b6129ea5a91e3
3
+ size 13665336
checkpoint-500/optimizer.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:fc4e2ea7191811b4694b851b3980d748aebaf0fafdf20a30aa1f872406119ea5
3
+ size 27413893
checkpoint-500/rng_state.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b2278a87cdf86c3f9219223c847f6b27f6b7f15b8226b617f38936e8ff2cbcde
3
+ size 14575
checkpoint-500/scheduler.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a3494f69d3e851927889bd28ab7a83ab60b5684faff8a5f9ca29bbf1d5dfc874
3
+ size 627
checkpoint-500/special_tokens_map.json ADDED
@@ -0,0 +1,24 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "bos_token": {
3
+ "content": "<s>",
4
+ "lstrip": false,
5
+ "normalized": false,
6
+ "rstrip": false,
7
+ "single_word": false
8
+ },
9
+ "eos_token": {
10
+ "content": "</s>",
11
+ "lstrip": false,
12
+ "normalized": false,
13
+ "rstrip": false,
14
+ "single_word": false
15
+ },
16
+ "pad_token": "</s>",
17
+ "unk_token": {
18
+ "content": "<unk>",
19
+ "lstrip": false,
20
+ "normalized": false,
21
+ "rstrip": false,
22
+ "single_word": false
23
+ }
24
+ }
checkpoint-500/tokenizer.model ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:dadfd56d766715c61d2ef780a525ab43b8e6da4de6865bda3d95fdef5e134055
3
+ size 493443
checkpoint-500/tokenizer_config.json ADDED
@@ -0,0 +1,44 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "add_bos_token": true,
3
+ "add_eos_token": false,
4
+ "add_prefix_space": null,
5
+ "added_tokens_decoder": {
6
+ "0": {
7
+ "content": "<unk>",
8
+ "lstrip": false,
9
+ "normalized": false,
10
+ "rstrip": false,
11
+ "single_word": false,
12
+ "special": true
13
+ },
14
+ "1": {
15
+ "content": "<s>",
16
+ "lstrip": false,
17
+ "normalized": false,
18
+ "rstrip": false,
19
+ "single_word": false,
20
+ "special": true
21
+ },
22
+ "2": {
23
+ "content": "</s>",
24
+ "lstrip": false,
25
+ "normalized": false,
26
+ "rstrip": false,
27
+ "single_word": false,
28
+ "special": true
29
+ }
30
+ },
31
+ "additional_special_tokens": [],
32
+ "bos_token": "<s>",
33
+ "chat_template": "{%- if messages[0]['role'] == 'system' %}\n {%- set system_message = messages[0]['content'] %}\n {%- set loop_messages = messages[1:] %}\n{%- else %}\n {%- set loop_messages = messages %}\n{%- endif %}\n\n{{- bos_token }}\n{%- for message in loop_messages %}\n {%- if (message['role'] == 'user') != (loop.index0 % 2 == 0) %}\n {{- raise_exception('After the optional system message, conversation roles must alternate user/assistant/user/assistant/...') }}\n {%- endif %}\n {%- if message['role'] == 'user' %}\n {%- if loop.first and system_message is defined %}\n {{- ' [INST] ' + system_message + '\\n\\n' + message['content'] + ' [/INST]' }}\n {%- else %}\n {{- ' [INST] ' + message['content'] + ' [/INST]' }}\n {%- endif %}\n {%- elif message['role'] == 'assistant' %}\n {{- ' ' + message['content'] + eos_token}}\n {%- else %}\n {{- raise_exception('Only user and assistant roles are supported, with the exception of an initial optional system message!') }}\n {%- endif %}\n{%- endfor %}\n",
34
+ "clean_up_tokenization_spaces": false,
35
+ "eos_token": "</s>",
36
+ "legacy": false,
37
+ "model_max_length": 1000000000000000019884624838656,
38
+ "pad_token": "</s>",
39
+ "sp_model_kwargs": {},
40
+ "spaces_between_special_tokens": false,
41
+ "tokenizer_class": "LlamaTokenizer",
42
+ "unk_token": "<unk>",
43
+ "use_default_system_prompt": false
44
+ }
checkpoint-500/trainer_state.json ADDED
@@ -0,0 +1,721 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_metric": null,
3
+ "best_model_checkpoint": null,
4
+ "epoch": 0.2222222222222222,
5
+ "eval_steps": 500,
6
+ "global_step": 500,
7
+ "is_hyper_param_search": false,
8
+ "is_local_process_zero": true,
9
+ "is_world_process_zero": true,
10
+ "log_history": [
11
+ {
12
+ "epoch": 0.0,
13
+ "learning_rate": 5.309734513274336e-07,
14
+ "logits/chosen": -2.1858465671539307,
15
+ "logits/rejected": -2.2539868354797363,
16
+ "logps/chosen": -292.47344970703125,
17
+ "logps/rejected": -334.2834777832031,
18
+ "loss": 2.328,
19
+ "rewards/accuracies": 0.574999988079071,
20
+ "rewards/chosen": -17.95108985900879,
21
+ "rewards/margins": 1.5200703144073486,
22
+ "rewards/rejected": -19.47115707397461,
23
+ "step": 10
24
+ },
25
+ {
26
+ "epoch": 0.01,
27
+ "learning_rate": 1.415929203539823e-06,
28
+ "logits/chosen": -2.250004529953003,
29
+ "logits/rejected": -2.2245919704437256,
30
+ "logps/chosen": -323.00567626953125,
31
+ "logps/rejected": -341.8704528808594,
32
+ "loss": 3.0458,
33
+ "rewards/accuracies": 0.550000011920929,
34
+ "rewards/chosen": -18.9575138092041,
35
+ "rewards/margins": 0.811493992805481,
36
+ "rewards/rejected": -19.76900863647461,
37
+ "step": 20
38
+ },
39
+ {
40
+ "epoch": 0.01,
41
+ "learning_rate": 2.3008849557522127e-06,
42
+ "logits/chosen": -2.2509493827819824,
43
+ "logits/rejected": -2.2362070083618164,
44
+ "logps/chosen": -309.36627197265625,
45
+ "logps/rejected": -354.1287841796875,
46
+ "loss": 2.001,
47
+ "rewards/accuracies": 0.6499999761581421,
48
+ "rewards/chosen": -19.02206039428711,
49
+ "rewards/margins": 2.324467182159424,
50
+ "rewards/rejected": -21.346529006958008,
51
+ "step": 30
52
+ },
53
+ {
54
+ "epoch": 0.02,
55
+ "learning_rate": 3.185840707964602e-06,
56
+ "logits/chosen": -2.261589527130127,
57
+ "logits/rejected": -2.234139919281006,
58
+ "logps/chosen": -341.8447265625,
59
+ "logps/rejected": -361.2301330566406,
60
+ "loss": 2.3698,
61
+ "rewards/accuracies": 0.574999988079071,
62
+ "rewards/chosen": -19.69499397277832,
63
+ "rewards/margins": 1.1049805879592896,
64
+ "rewards/rejected": -20.799976348876953,
65
+ "step": 40
66
+ },
67
+ {
68
+ "epoch": 0.02,
69
+ "learning_rate": 4.070796460176992e-06,
70
+ "logits/chosen": -2.282593250274658,
71
+ "logits/rejected": -2.219956874847412,
72
+ "logps/chosen": -333.1883850097656,
73
+ "logps/rejected": -323.2119140625,
74
+ "loss": 2.3553,
75
+ "rewards/accuracies": 0.574999988079071,
76
+ "rewards/chosen": -19.006275177001953,
77
+ "rewards/margins": 0.894936203956604,
78
+ "rewards/rejected": -19.90121078491211,
79
+ "step": 50
80
+ },
81
+ {
82
+ "epoch": 0.03,
83
+ "learning_rate": 4.955752212389381e-06,
84
+ "logits/chosen": -2.2947192192077637,
85
+ "logits/rejected": -2.191793918609619,
86
+ "logps/chosen": -327.3343200683594,
87
+ "logps/rejected": -302.55914306640625,
88
+ "loss": 3.0721,
89
+ "rewards/accuracies": 0.625,
90
+ "rewards/chosen": -18.95311164855957,
91
+ "rewards/margins": 0.02760167047381401,
92
+ "rewards/rejected": -18.980712890625,
93
+ "step": 60
94
+ },
95
+ {
96
+ "epoch": 0.03,
97
+ "learning_rate": 5.840707964601771e-06,
98
+ "logits/chosen": -2.1300625801086426,
99
+ "logits/rejected": -2.197695255279541,
100
+ "logps/chosen": -296.19586181640625,
101
+ "logps/rejected": -322.7232360839844,
102
+ "loss": 2.5242,
103
+ "rewards/accuracies": 0.5249999761581421,
104
+ "rewards/chosen": -18.607830047607422,
105
+ "rewards/margins": 0.5492460131645203,
106
+ "rewards/rejected": -19.15707778930664,
107
+ "step": 70
108
+ },
109
+ {
110
+ "epoch": 0.04,
111
+ "learning_rate": 6.72566371681416e-06,
112
+ "logits/chosen": -2.191436290740967,
113
+ "logits/rejected": -2.203051805496216,
114
+ "logps/chosen": -322.64581298828125,
115
+ "logps/rejected": -318.93475341796875,
116
+ "loss": 2.4603,
117
+ "rewards/accuracies": 0.625,
118
+ "rewards/chosen": -18.32453155517578,
119
+ "rewards/margins": 1.1701295375823975,
120
+ "rewards/rejected": -19.494661331176758,
121
+ "step": 80
122
+ },
123
+ {
124
+ "epoch": 0.04,
125
+ "learning_rate": 7.610619469026549e-06,
126
+ "logits/chosen": -2.3291049003601074,
127
+ "logits/rejected": -2.13211727142334,
128
+ "logps/chosen": -351.888671875,
129
+ "logps/rejected": -316.0814514160156,
130
+ "loss": 4.3049,
131
+ "rewards/accuracies": 0.375,
132
+ "rewards/chosen": -20.648174285888672,
133
+ "rewards/margins": -1.917999505996704,
134
+ "rewards/rejected": -18.730175018310547,
135
+ "step": 90
136
+ },
137
+ {
138
+ "epoch": 0.04,
139
+ "learning_rate": 8.495575221238938e-06,
140
+ "logits/chosen": -2.326770782470703,
141
+ "logits/rejected": -2.2708096504211426,
142
+ "logps/chosen": -319.8079528808594,
143
+ "logps/rejected": -325.22467041015625,
144
+ "loss": 2.9022,
145
+ "rewards/accuracies": 0.550000011920929,
146
+ "rewards/chosen": -17.211898803710938,
147
+ "rewards/margins": 0.4395485818386078,
148
+ "rewards/rejected": -17.651447296142578,
149
+ "step": 100
150
+ },
151
+ {
152
+ "epoch": 0.05,
153
+ "learning_rate": 9.380530973451329e-06,
154
+ "logits/chosen": -2.2947869300842285,
155
+ "logits/rejected": -2.2642266750335693,
156
+ "logps/chosen": -319.7033386230469,
157
+ "logps/rejected": -301.95684814453125,
158
+ "loss": 2.9535,
159
+ "rewards/accuracies": 0.32499998807907104,
160
+ "rewards/chosen": -18.591039657592773,
161
+ "rewards/margins": -1.4626668691635132,
162
+ "rewards/rejected": -17.128376007080078,
163
+ "step": 110
164
+ },
165
+ {
166
+ "epoch": 0.05,
167
+ "learning_rate": 9.999951373555555e-06,
168
+ "logits/chosen": -2.356776475906372,
169
+ "logits/rejected": -2.2779877185821533,
170
+ "logps/chosen": -332.5343322753906,
171
+ "logps/rejected": -308.6272888183594,
172
+ "loss": 2.8838,
173
+ "rewards/accuracies": 0.42500001192092896,
174
+ "rewards/chosen": -18.02423095703125,
175
+ "rewards/margins": -0.5776697993278503,
176
+ "rewards/rejected": -17.446561813354492,
177
+ "step": 120
178
+ },
179
+ {
180
+ "epoch": 0.06,
181
+ "learning_rate": 9.999086929743288e-06,
182
+ "logits/chosen": -2.34501314163208,
183
+ "logits/rejected": -2.3048901557922363,
184
+ "logps/chosen": -298.5960388183594,
185
+ "logps/rejected": -309.3174743652344,
186
+ "loss": 2.0696,
187
+ "rewards/accuracies": 0.6499999761581421,
188
+ "rewards/chosen": -16.59781265258789,
189
+ "rewards/margins": 0.7586337327957153,
190
+ "rewards/rejected": -17.356447219848633,
191
+ "step": 130
192
+ },
193
+ {
194
+ "epoch": 0.06,
195
+ "learning_rate": 9.997142113313472e-06,
196
+ "logits/chosen": -2.3136909008026123,
197
+ "logits/rejected": -2.3042447566986084,
198
+ "logps/chosen": -292.8536071777344,
199
+ "logps/rejected": -281.0971984863281,
200
+ "loss": 1.8399,
201
+ "rewards/accuracies": 0.5249999761581421,
202
+ "rewards/chosen": -15.984518051147461,
203
+ "rewards/margins": 0.30002641677856445,
204
+ "rewards/rejected": -16.284543991088867,
205
+ "step": 140
206
+ },
207
+ {
208
+ "epoch": 0.07,
209
+ "learning_rate": 9.994117344568142e-06,
210
+ "logits/chosen": -2.337782144546509,
211
+ "logits/rejected": -2.3470942974090576,
212
+ "logps/chosen": -286.35504150390625,
213
+ "logps/rejected": -303.07684326171875,
214
+ "loss": 1.5656,
215
+ "rewards/accuracies": 0.625,
216
+ "rewards/chosen": -14.58277416229248,
217
+ "rewards/margins": 1.030444860458374,
218
+ "rewards/rejected": -15.61322021484375,
219
+ "step": 150
220
+ },
221
+ {
222
+ "epoch": 0.07,
223
+ "learning_rate": 9.990013277202137e-06,
224
+ "logits/chosen": -2.3595287799835205,
225
+ "logits/rejected": -2.4950690269470215,
226
+ "logps/chosen": -292.61651611328125,
227
+ "logps/rejected": -363.38507080078125,
228
+ "loss": 1.523,
229
+ "rewards/accuracies": 0.625,
230
+ "rewards/chosen": -15.285211563110352,
231
+ "rewards/margins": 2.0152671337127686,
232
+ "rewards/rejected": -17.300477981567383,
233
+ "step": 160
234
+ },
235
+ {
236
+ "epoch": 0.08,
237
+ "learning_rate": 9.984830798161828e-06,
238
+ "logits/chosen": -2.4216346740722656,
239
+ "logits/rejected": -2.35921311378479,
240
+ "logps/chosen": -329.1554870605469,
241
+ "logps/rejected": -308.78326416015625,
242
+ "loss": 2.5844,
243
+ "rewards/accuracies": 0.44999998807907104,
244
+ "rewards/chosen": -14.768890380859375,
245
+ "rewards/margins": -0.32357311248779297,
246
+ "rewards/rejected": -14.445318222045898,
247
+ "step": 170
248
+ },
249
+ {
250
+ "epoch": 0.08,
251
+ "learning_rate": 9.978571027453433e-06,
252
+ "logits/chosen": -2.5200698375701904,
253
+ "logits/rejected": -2.338383674621582,
254
+ "logps/chosen": -296.1730041503906,
255
+ "logps/rejected": -232.0618896484375,
256
+ "loss": 2.4226,
257
+ "rewards/accuracies": 0.375,
258
+ "rewards/chosen": -13.72007942199707,
259
+ "rewards/margins": -0.8905000686645508,
260
+ "rewards/rejected": -12.829577445983887,
261
+ "step": 180
262
+ },
263
+ {
264
+ "epoch": 0.08,
265
+ "learning_rate": 9.971235317900968e-06,
266
+ "logits/chosen": -2.4042282104492188,
267
+ "logits/rejected": -2.4900546073913574,
268
+ "logps/chosen": -219.2891845703125,
269
+ "logps/rejected": -247.385498046875,
270
+ "loss": 1.5221,
271
+ "rewards/accuracies": 0.5,
272
+ "rewards/chosen": -11.559672355651855,
273
+ "rewards/margins": 0.2930552363395691,
274
+ "rewards/rejected": -11.852727890014648,
275
+ "step": 190
276
+ },
277
+ {
278
+ "epoch": 0.09,
279
+ "learning_rate": 9.962825254853888e-06,
280
+ "logits/chosen": -2.591836929321289,
281
+ "logits/rejected": -2.5101170539855957,
282
+ "logps/chosen": -311.3710632324219,
283
+ "logps/rejected": -277.0614318847656,
284
+ "loss": 2.1722,
285
+ "rewards/accuracies": 0.32499998807907104,
286
+ "rewards/chosen": -13.245725631713867,
287
+ "rewards/margins": -1.0529097318649292,
288
+ "rewards/rejected": -12.192815780639648,
289
+ "step": 200
290
+ },
291
+ {
292
+ "epoch": 0.09,
293
+ "learning_rate": 9.954339123272747e-06,
294
+ "logits/chosen": -2.5649514198303223,
295
+ "logits/rejected": -2.4265828132629395,
296
+ "logps/chosen": -250.44009399414062,
297
+ "logps/rejected": -228.14224243164062,
298
+ "loss": 1.4704,
299
+ "rewards/accuracies": 0.32499998807907104,
300
+ "rewards/chosen": -10.943647384643555,
301
+ "rewards/margins": -0.3656729757785797,
302
+ "rewards/rejected": -10.577974319458008,
303
+ "step": 210
304
+ },
305
+ {
306
+ "epoch": 0.1,
307
+ "learning_rate": 9.943892987470688e-06,
308
+ "logits/chosen": -2.559394598007202,
309
+ "logits/rejected": -2.523345470428467,
310
+ "logps/chosen": -260.9962463378906,
311
+ "logps/rejected": -234.96670532226562,
312
+ "loss": 1.709,
313
+ "rewards/accuracies": 0.5,
314
+ "rewards/chosen": -10.393632888793945,
315
+ "rewards/margins": 0.1549229919910431,
316
+ "rewards/rejected": -10.548555374145508,
317
+ "step": 220
318
+ },
319
+ {
320
+ "epoch": 0.1,
321
+ "learning_rate": 9.932378407234108e-06,
322
+ "logits/chosen": -2.63352632522583,
323
+ "logits/rejected": -2.5623555183410645,
324
+ "logps/chosen": -271.7388916015625,
325
+ "logps/rejected": -272.16796875,
326
+ "loss": 1.2704,
327
+ "rewards/accuracies": 0.574999988079071,
328
+ "rewards/chosen": -9.603178024291992,
329
+ "rewards/margins": 0.19006821513175964,
330
+ "rewards/rejected": -9.793245315551758,
331
+ "step": 230
332
+ },
333
+ {
334
+ "epoch": 0.11,
335
+ "learning_rate": 9.919797871024877e-06,
336
+ "logits/chosen": -2.6439247131347656,
337
+ "logits/rejected": -2.6053879261016846,
338
+ "logps/chosen": -229.23764038085938,
339
+ "logps/rejected": -197.8614044189453,
340
+ "loss": 1.405,
341
+ "rewards/accuracies": 0.375,
342
+ "rewards/chosen": -9.295554161071777,
343
+ "rewards/margins": -0.5798273086547852,
344
+ "rewards/rejected": -8.715726852416992,
345
+ "step": 240
346
+ },
347
+ {
348
+ "epoch": 0.11,
349
+ "learning_rate": 9.906154097672858e-06,
350
+ "logits/chosen": -2.6798043251037598,
351
+ "logits/rejected": -2.600550889968872,
352
+ "logps/chosen": -235.1671142578125,
353
+ "logps/rejected": -223.978271484375,
354
+ "loss": 1.2942,
355
+ "rewards/accuracies": 0.574999988079071,
356
+ "rewards/chosen": -8.195772171020508,
357
+ "rewards/margins": -0.00971608143299818,
358
+ "rewards/rejected": -8.186057090759277,
359
+ "step": 250
360
+ },
361
+ {
362
+ "epoch": 0.12,
363
+ "learning_rate": 9.89145003578833e-06,
364
+ "logits/chosen": -2.670474052429199,
365
+ "logits/rejected": -2.6329426765441895,
366
+ "logps/chosen": -224.05068969726562,
367
+ "logps/rejected": -207.1922607421875,
368
+ "loss": 1.0877,
369
+ "rewards/accuracies": 0.5249999761581421,
370
+ "rewards/chosen": -7.675335884094238,
371
+ "rewards/margins": 0.17977333068847656,
372
+ "rewards/rejected": -7.855108737945557,
373
+ "step": 260
374
+ },
375
+ {
376
+ "epoch": 0.12,
377
+ "learning_rate": 9.875688863124766e-06,
378
+ "logits/chosen": -2.620087146759033,
379
+ "logits/rejected": -2.676790714263916,
380
+ "logps/chosen": -255.08486938476562,
381
+ "logps/rejected": -265.8028564453125,
382
+ "loss": 1.0495,
383
+ "rewards/accuracies": 0.6000000238418579,
384
+ "rewards/chosen": -7.672966957092285,
385
+ "rewards/margins": 0.1472960114479065,
386
+ "rewards/rejected": -7.8202619552612305,
387
+ "step": 270
388
+ },
389
+ {
390
+ "epoch": 0.12,
391
+ "learning_rate": 9.858873985892058e-06,
392
+ "logits/chosen": -2.6771128177642822,
393
+ "logits/rejected": -2.5845065116882324,
394
+ "logps/chosen": -222.91311645507812,
395
+ "logps/rejected": -234.68359375,
396
+ "loss": 1.0752,
397
+ "rewards/accuracies": 0.4749999940395355,
398
+ "rewards/chosen": -6.951257228851318,
399
+ "rewards/margins": -0.008678942918777466,
400
+ "rewards/rejected": -6.9425787925720215,
401
+ "step": 280
402
+ },
403
+ {
404
+ "epoch": 0.13,
405
+ "learning_rate": 9.841009038020401e-06,
406
+ "logits/chosen": -2.6333932876586914,
407
+ "logits/rejected": -2.65295147895813,
408
+ "logps/chosen": -204.25399780273438,
409
+ "logps/rejected": -208.4911651611328,
410
+ "loss": 1.0669,
411
+ "rewards/accuracies": 0.5249999761581421,
412
+ "rewards/chosen": -6.73724365234375,
413
+ "rewards/margins": 0.05855642631649971,
414
+ "rewards/rejected": -6.795799255371094,
415
+ "step": 290
416
+ },
417
+ {
418
+ "epoch": 0.13,
419
+ "learning_rate": 9.82209788037494e-06,
420
+ "logits/chosen": -2.685725450515747,
421
+ "logits/rejected": -2.700352907180786,
422
+ "logps/chosen": -230.539794921875,
423
+ "logps/rejected": -240.39224243164062,
424
+ "loss": 1.1248,
425
+ "rewards/accuracies": 0.44999998807907104,
426
+ "rewards/chosen": -6.4564642906188965,
427
+ "rewards/margins": -0.2118469774723053,
428
+ "rewards/rejected": -6.244616985321045,
429
+ "step": 300
430
+ },
431
+ {
432
+ "epoch": 0.14,
433
+ "learning_rate": 9.80214459992139e-06,
434
+ "logits/chosen": -2.714470624923706,
435
+ "logits/rejected": -2.6982994079589844,
436
+ "logps/chosen": -214.0612030029297,
437
+ "logps/rejected": -231.0535125732422,
438
+ "loss": 0.8095,
439
+ "rewards/accuracies": 0.625,
440
+ "rewards/chosen": -6.09361457824707,
441
+ "rewards/margins": 0.35767459869384766,
442
+ "rewards/rejected": -6.45128870010376,
443
+ "step": 310
444
+ },
445
+ {
446
+ "epoch": 0.14,
447
+ "learning_rate": 9.781153508842785e-06,
448
+ "logits/chosen": -2.6795332431793213,
449
+ "logits/rejected": -2.6861202716827393,
450
+ "logps/chosen": -191.6574249267578,
451
+ "logps/rejected": -206.572998046875,
452
+ "loss": 0.9054,
453
+ "rewards/accuracies": 0.574999988079071,
454
+ "rewards/chosen": -6.0128302574157715,
455
+ "rewards/margins": 0.5337953567504883,
456
+ "rewards/rejected": -6.54662561416626,
457
+ "step": 320
458
+ },
459
+ {
460
+ "epoch": 0.15,
461
+ "learning_rate": 9.759129143607547e-06,
462
+ "logits/chosen": -2.719517469406128,
463
+ "logits/rejected": -2.630643367767334,
464
+ "logps/chosen": -228.45797729492188,
465
+ "logps/rejected": -176.00814819335938,
466
+ "loss": 1.1571,
467
+ "rewards/accuracies": 0.4000000059604645,
468
+ "rewards/chosen": -6.262964725494385,
469
+ "rewards/margins": -0.4287610650062561,
470
+ "rewards/rejected": -5.834203243255615,
471
+ "step": 330
472
+ },
473
+ {
474
+ "epoch": 0.15,
475
+ "learning_rate": 9.736076263989103e-06,
476
+ "logits/chosen": -2.753007650375366,
477
+ "logits/rejected": -2.7196168899536133,
478
+ "logps/chosen": -234.21731567382812,
479
+ "logps/rejected": -214.3049774169922,
480
+ "loss": 0.93,
481
+ "rewards/accuracies": 0.42500001192092896,
482
+ "rewards/chosen": -5.743313789367676,
483
+ "rewards/margins": 0.08766243606805801,
484
+ "rewards/rejected": -5.830975532531738,
485
+ "step": 340
486
+ },
487
+ {
488
+ "epoch": 0.16,
489
+ "learning_rate": 9.711999852037226e-06,
490
+ "logits/chosen": -2.702094554901123,
491
+ "logits/rejected": -2.6643381118774414,
492
+ "logps/chosen": -235.38766479492188,
493
+ "logps/rejected": -208.2432861328125,
494
+ "loss": 1.1837,
495
+ "rewards/accuracies": 0.5,
496
+ "rewards/chosen": -5.757768154144287,
497
+ "rewards/margins": -0.27699437737464905,
498
+ "rewards/rejected": -5.480773448944092,
499
+ "step": 350
500
+ },
501
+ {
502
+ "epoch": 0.16,
503
+ "learning_rate": 9.68690511100134e-06,
504
+ "logits/chosen": -2.6954503059387207,
505
+ "logits/rejected": -2.6649551391601562,
506
+ "logps/chosen": -185.06394958496094,
507
+ "logps/rejected": -187.76278686523438,
508
+ "loss": 1.0071,
509
+ "rewards/accuracies": 0.42500001192092896,
510
+ "rewards/chosen": -5.9798102378845215,
511
+ "rewards/margins": -0.11689682304859161,
512
+ "rewards/rejected": -5.862914085388184,
513
+ "step": 360
514
+ },
515
+ {
516
+ "epoch": 0.16,
517
+ "learning_rate": 9.660797464206035e-06,
518
+ "logits/chosen": -2.6881985664367676,
519
+ "logits/rejected": -2.676832914352417,
520
+ "logps/chosen": -195.05517578125,
521
+ "logps/rejected": -212.87161254882812,
522
+ "loss": 0.6422,
523
+ "rewards/accuracies": 0.699999988079071,
524
+ "rewards/chosen": -4.617544174194336,
525
+ "rewards/margins": 0.8601192235946655,
526
+ "rewards/rejected": -5.477663516998291,
527
+ "step": 370
528
+ },
529
+ {
530
+ "epoch": 0.17,
531
+ "learning_rate": 9.633682553879e-06,
532
+ "logits/chosen": -2.749539852142334,
533
+ "logits/rejected": -2.7113490104675293,
534
+ "logps/chosen": -173.92945861816406,
535
+ "logps/rejected": -176.216796875,
536
+ "loss": 0.8915,
537
+ "rewards/accuracies": 0.6000000238418579,
538
+ "rewards/chosen": -5.114466667175293,
539
+ "rewards/margins": 0.10453431308269501,
540
+ "rewards/rejected": -5.219000816345215,
541
+ "step": 380
542
+ },
543
+ {
544
+ "epoch": 0.17,
545
+ "learning_rate": 9.605566239931666e-06,
546
+ "logits/chosen": -2.744715690612793,
547
+ "logits/rejected": -2.6837120056152344,
548
+ "logps/chosen": -200.80999755859375,
549
+ "logps/rejected": -200.7525177001953,
550
+ "loss": 0.633,
551
+ "rewards/accuracies": 0.5249999761581421,
552
+ "rewards/chosen": -4.719931602478027,
553
+ "rewards/margins": 0.698486864566803,
554
+ "rewards/rejected": -5.418419361114502,
555
+ "step": 390
556
+ },
557
+ {
558
+ "epoch": 0.18,
559
+ "learning_rate": 9.576454598692797e-06,
560
+ "logits/chosen": -2.7422823905944824,
561
+ "logits/rejected": -2.7130322456359863,
562
+ "logps/chosen": -204.26626586914062,
563
+ "logps/rejected": -174.83802795410156,
564
+ "loss": 0.9281,
565
+ "rewards/accuracies": 0.4749999940395355,
566
+ "rewards/chosen": -4.756241798400879,
567
+ "rewards/margins": -0.038588762283325195,
568
+ "rewards/rejected": -4.717652320861816,
569
+ "step": 400
570
+ },
571
+ {
572
+ "epoch": 0.18,
573
+ "learning_rate": 9.546353921595306e-06,
574
+ "logits/chosen": -2.7594494819641113,
575
+ "logits/rejected": -2.7436954975128174,
576
+ "logps/chosen": -183.6326141357422,
577
+ "logps/rejected": -186.80911254882812,
578
+ "loss": 0.9906,
579
+ "rewards/accuracies": 0.4749999940395355,
580
+ "rewards/chosen": -4.743472099304199,
581
+ "rewards/margins": -0.156986802816391,
582
+ "rewards/rejected": -4.586484909057617,
583
+ "step": 410
584
+ },
585
+ {
586
+ "epoch": 0.19,
587
+ "learning_rate": 9.515270713816589e-06,
588
+ "logits/chosen": -2.762357711791992,
589
+ "logits/rejected": -2.661778688430786,
590
+ "logps/chosen": -212.29739379882812,
591
+ "logps/rejected": -185.29476928710938,
592
+ "loss": 0.9206,
593
+ "rewards/accuracies": 0.5,
594
+ "rewards/chosen": -5.090394973754883,
595
+ "rewards/margins": 0.17267219722270966,
596
+ "rewards/rejected": -5.263067722320557,
597
+ "step": 420
598
+ },
599
+ {
600
+ "epoch": 0.19,
601
+ "learning_rate": 9.483211692872669e-06,
602
+ "logits/chosen": -2.694725513458252,
603
+ "logits/rejected": -2.689701557159424,
604
+ "logps/chosen": -168.6083221435547,
605
+ "logps/rejected": -170.26681518554688,
606
+ "loss": 0.9479,
607
+ "rewards/accuracies": 0.4000000059604645,
608
+ "rewards/chosen": -4.593288421630859,
609
+ "rewards/margins": -0.16782906651496887,
610
+ "rewards/rejected": -4.425459384918213,
611
+ "step": 430
612
+ },
613
+ {
614
+ "epoch": 0.2,
615
+ "learning_rate": 9.450183787166447e-06,
616
+ "logits/chosen": -2.6913774013519287,
617
+ "logits/rejected": -2.780381202697754,
618
+ "logps/chosen": -141.98934936523438,
619
+ "logps/rejected": -177.6278076171875,
620
+ "loss": 0.9904,
621
+ "rewards/accuracies": 0.44999998807907104,
622
+ "rewards/chosen": -4.094004154205322,
623
+ "rewards/margins": -0.12103526294231415,
624
+ "rewards/rejected": -3.972968578338623,
625
+ "step": 440
626
+ },
627
+ {
628
+ "epoch": 0.2,
629
+ "learning_rate": 9.41619413449037e-06,
630
+ "logits/chosen": -2.804361581802368,
631
+ "logits/rejected": -2.7710132598876953,
632
+ "logps/chosen": -209.9197540283203,
633
+ "logps/rejected": -231.4965057373047,
634
+ "loss": 0.654,
635
+ "rewards/accuracies": 0.675000011920929,
636
+ "rewards/chosen": -3.7489547729492188,
637
+ "rewards/margins": 0.5616118311882019,
638
+ "rewards/rejected": -4.3105669021606445,
639
+ "step": 450
640
+ },
641
+ {
642
+ "epoch": 0.2,
643
+ "learning_rate": 9.381250080483864e-06,
644
+ "logits/chosen": -2.777339458465576,
645
+ "logits/rejected": -2.7908101081848145,
646
+ "logps/chosen": -197.44711303710938,
647
+ "logps/rejected": -195.8129425048828,
648
+ "loss": 0.8654,
649
+ "rewards/accuracies": 0.625,
650
+ "rewards/chosen": -4.063180923461914,
651
+ "rewards/margins": 0.1730591356754303,
652
+ "rewards/rejected": -4.236240386962891,
653
+ "step": 460
654
+ },
655
+ {
656
+ "epoch": 0.21,
657
+ "learning_rate": 9.345359177045827e-06,
658
+ "logits/chosen": -2.7428901195526123,
659
+ "logits/rejected": -2.720733642578125,
660
+ "logps/chosen": -163.38687133789062,
661
+ "logps/rejected": -152.174072265625,
662
+ "loss": 1.008,
663
+ "rewards/accuracies": 0.4749999940395355,
664
+ "rewards/chosen": -4.339611053466797,
665
+ "rewards/margins": -0.002607667353004217,
666
+ "rewards/rejected": -4.337003707885742,
667
+ "step": 470
668
+ },
669
+ {
670
+ "epoch": 0.21,
671
+ "learning_rate": 9.308529180702568e-06,
672
+ "logits/chosen": -2.771120309829712,
673
+ "logits/rejected": -2.754432201385498,
674
+ "logps/chosen": -190.38487243652344,
675
+ "logps/rejected": -209.5969696044922,
676
+ "loss": 0.9381,
677
+ "rewards/accuracies": 0.4749999940395355,
678
+ "rewards/chosen": -4.276428699493408,
679
+ "rewards/margins": 0.031873930245637894,
680
+ "rewards/rejected": -4.308302879333496,
681
+ "step": 480
682
+ },
683
+ {
684
+ "epoch": 0.22,
685
+ "learning_rate": 9.270768050931515e-06,
686
+ "logits/chosen": -2.729900360107422,
687
+ "logits/rejected": -2.793795108795166,
688
+ "logps/chosen": -181.68646240234375,
689
+ "logps/rejected": -203.8788299560547,
690
+ "loss": 0.9827,
691
+ "rewards/accuracies": 0.44999998807907104,
692
+ "rewards/chosen": -4.365941524505615,
693
+ "rewards/margins": -0.1641651839017868,
694
+ "rewards/rejected": -4.201776504516602,
695
+ "step": 490
696
+ },
697
+ {
698
+ "epoch": 0.22,
699
+ "learning_rate": 9.232083948441046e-06,
700
+ "logits/chosen": -2.7761735916137695,
701
+ "logits/rejected": -2.7046539783477783,
702
+ "logps/chosen": -190.8777618408203,
703
+ "logps/rejected": -169.68423461914062,
704
+ "loss": 0.7403,
705
+ "rewards/accuracies": 0.625,
706
+ "rewards/chosen": -4.126107215881348,
707
+ "rewards/margins": 0.2456977367401123,
708
+ "rewards/rejected": -4.371805191040039,
709
+ "step": 500
710
+ }
711
+ ],
712
+ "logging_steps": 10,
713
+ "max_steps": 2250,
714
+ "num_input_tokens_seen": 0,
715
+ "num_train_epochs": 1,
716
+ "save_steps": 500,
717
+ "total_flos": 0.0,
718
+ "train_batch_size": 1,
719
+ "trial_name": null,
720
+ "trial_params": null
721
+ }
checkpoint-500/training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:929ce4eef102da1f4bf0c0abf6ec22df7a8d310ad21751ff23e497886fcbbedb
3
+ size 4987
config.json ADDED
@@ -0,0 +1,26 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_name_or_path": "mistralai/Mistral-7B-Instruct-v0.2",
3
+ "architectures": [
4
+ "MistralModel"
5
+ ],
6
+ "attention_dropout": 0.0,
7
+ "bos_token_id": 1,
8
+ "eos_token_id": 2,
9
+ "hidden_act": "silu",
10
+ "hidden_size": 4096,
11
+ "initializer_range": 0.02,
12
+ "intermediate_size": 14336,
13
+ "max_position_embeddings": 32768,
14
+ "model_type": "mistral",
15
+ "num_attention_heads": 32,
16
+ "num_hidden_layers": 32,
17
+ "num_key_value_heads": 8,
18
+ "rms_norm_eps": 1e-05,
19
+ "rope_theta": 1000000.0,
20
+ "sliding_window": null,
21
+ "tie_word_embeddings": false,
22
+ "torch_dtype": "float16",
23
+ "transformers_version": "4.37.2",
24
+ "use_cache": true,
25
+ "vocab_size": 32000
26
+ }
model-00001-of-00008.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c1f0e99604201dbcc43bc85539cae58fdd7035db2292e707d801e7858ea8e848
3
+ size 1889586800
model-00002-of-00008.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:de96f34cfb191a1764c2c13cbbee573dc3f3c825db573f76dff8d6f0f2d7ef29
3
+ size 1946243640