Incomple commited on
Commit
0988355
·
verified ·
1 Parent(s): 0b08e07

End of training

Browse files
README.md CHANGED
@@ -3,10 +3,10 @@ library_name: peft
3
  license: llama3.1
4
  base_model: meta-llama/Llama-3.1-8B-Instruct
5
  tags:
6
- - trl
7
- - kto
8
  - llama-factory
9
  - lora
 
 
10
  - generated_from_trainer
11
  model-index:
12
  - name: Llama-3.1-8B-Instruct_kto_sg_values
@@ -18,7 +18,7 @@ should probably proofread and complete it, then remove this comment. -->
18
 
19
  # Llama-3.1-8B-Instruct_kto_sg_values
20
 
21
- This model is a fine-tuned version of [meta-llama/Llama-3.1-8B-Instruct](https://huggingface.co/meta-llama/Llama-3.1-8B-Instruct) on an unknown dataset.
22
 
23
  ## Model description
24
 
 
3
  license: llama3.1
4
  base_model: meta-llama/Llama-3.1-8B-Instruct
5
  tags:
 
 
6
  - llama-factory
7
  - lora
8
+ - trl
9
+ - kto
10
  - generated_from_trainer
11
  model-index:
12
  - name: Llama-3.1-8B-Instruct_kto_sg_values
 
18
 
19
  # Llama-3.1-8B-Instruct_kto_sg_values
20
 
21
+ This model is a fine-tuned version of [meta-llama/Llama-3.1-8B-Instruct](https://huggingface.co/meta-llama/Llama-3.1-8B-Instruct) on the kto_sg_values dataset.
22
 
23
  ## Model description
24
 
all_results.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
  "epoch": 1.0,
3
  "total_flos": 5.091091732590756e+17,
4
- "train_loss": 0.23059077864843058,
5
- "train_runtime": 10627.7324,
6
- "train_samples_per_second": 5.307,
7
- "train_steps_per_second": 0.332
8
  }
 
1
  {
2
  "epoch": 1.0,
3
  "total_flos": 5.091091732590756e+17,
4
+ "train_loss": 0.413429944193955,
5
+ "train_runtime": 10515.385,
6
+ "train_samples_per_second": 5.364,
7
+ "train_steps_per_second": 0.335
8
  }
train_results.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
  "epoch": 1.0,
3
  "total_flos": 5.091091732590756e+17,
4
- "train_loss": 0.23059077864843058,
5
- "train_runtime": 10627.7324,
6
- "train_samples_per_second": 5.307,
7
- "train_steps_per_second": 0.332
8
  }
 
1
  {
2
  "epoch": 1.0,
3
  "total_flos": 5.091091732590756e+17,
4
+ "train_loss": 0.413429944193955,
5
+ "train_runtime": 10515.385,
6
+ "train_samples_per_second": 5.364,
7
+ "train_steps_per_second": 0.335
8
  }
trainer_state.json CHANGED
@@ -10,297 +10,297 @@
10
  "log_history": [
11
  {
12
  "epoch": 0.05021276595744681,
13
- "grad_norm": 0.7250668406486511,
14
- "kl": 10.799270629882812,
15
- "learning_rate": 2.5070821529745037e-07,
16
- "logits/chosen": -15170619.076923076,
17
- "logits/rejected": -16525174.033245845,
18
- "logps/chosen": -2.1839165879693225,
19
- "logps/rejected": -5.995521311789151,
20
- "loss": 0.4996,
21
- "rewards/chosen": 0.000273060312856248,
22
- "rewards/margins": 0.000992770597812732,
23
- "rewards/rejected": -0.000719710284956484,
24
  "step": 177
25
  },
26
  {
27
  "epoch": 0.10042553191489362,
28
- "grad_norm": 0.37310150265693665,
29
- "kl": 0.8455712795257568,
30
- "learning_rate": 4.9984237074401e-07,
31
- "logits/chosen": -14958245.494949495,
32
- "logits/rejected": -16451794.473637177,
33
- "logps/chosen": -2.413396250920665,
34
- "logps/rejected": -6.402490365281501,
35
- "loss": 0.4952,
36
- "rewards/chosen": -0.013412311823681147,
37
- "rewards/margins": 0.012531101672975501,
38
- "rewards/rejected": -0.02594341349665665,
39
  "step": 354
40
  },
41
  {
42
  "epoch": 0.15063829787234043,
43
- "grad_norm": 0.7330515384674072,
44
- "kl": 0.0,
45
- "learning_rate": 4.719419924337957e-07,
46
- "logits/chosen": -15493149.883268483,
47
- "logits/rejected": -17431733.1216566,
48
- "logps/chosen": -10.362312287208171,
49
- "logps/rejected": -17.032325145599653,
50
- "loss": 0.3627,
51
- "rewards/chosen": -0.8161516597762646,
52
- "rewards/margins": 0.26472018556454646,
53
- "rewards/rejected": -1.080871845340811,
54
  "step": 531
55
  },
56
  {
57
  "epoch": 0.20085106382978724,
58
- "grad_norm": 0.17463324964046478,
59
- "kl": 0.0,
60
- "learning_rate": 4.4404161412358134e-07,
61
- "logits/chosen": -16503318.021505376,
62
- "logits/rejected": -18315989.896218117,
63
- "logps/chosen": -38.83077116935484,
64
- "logps/rejected": -44.21139786719437,
65
- "loss": 0.2129,
66
- "rewards/chosen": -3.6382855377744177,
67
- "rewards/margins": 0.1601535082348171,
68
- "rewards/rejected": -3.7984390460092348,
69
  "step": 708
70
  },
71
  {
72
  "epoch": 0.251063829787234,
73
- "grad_norm": 0.04723167046904564,
74
- "kl": 0.0,
75
- "learning_rate": 4.16141235813367e-07,
76
- "logits/chosen": -16537055.085714286,
77
- "logits/rejected": -18434136.33802817,
78
- "logps/chosen": -50.28404366629464,
79
- "logps/rejected": -56.05894104863556,
80
- "loss": 0.1991,
81
- "rewards/chosen": -4.803175571986607,
82
- "rewards/margins": 0.18089855048258308,
83
- "rewards/rejected": -4.98407412246919,
84
  "step": 885
85
  },
86
  {
87
  "epoch": 0.30127659574468085,
88
- "grad_norm": 0.03626934066414833,
89
- "kl": 0.0,
90
- "learning_rate": 3.882408575031526e-07,
91
- "logits/chosen": -16553432.759124087,
92
- "logits/rejected": -18578659.75481611,
93
- "logps/chosen": -56.20960809192518,
94
- "logps/rejected": -62.88292879816112,
95
- "loss": 0.2012,
96
- "rewards/chosen": -5.414033096202099,
97
- "rewards/margins": 0.25663873091042344,
98
- "rewards/rejected": -5.670671827112522,
99
  "step": 1062
100
  },
101
  {
102
  "epoch": 0.35148936170212763,
103
- "grad_norm": 0.026356538757681847,
104
  "kl": 0.0,
105
- "learning_rate": 3.6034047919293817e-07,
106
- "logits/chosen": -16753125.946902655,
107
- "logits/rejected": -18672338.82352941,
108
- "logps/chosen": -61.49022573285398,
109
- "logps/rejected": -67.57514443277311,
110
- "loss": 0.1751,
111
- "rewards/chosen": -5.908338158531526,
112
- "rewards/margins": 0.23123231809662492,
113
- "rewards/rejected": -6.139570476628151,
114
  "step": 1239
115
  },
116
  {
117
  "epoch": 0.40170212765957447,
118
- "grad_norm": 0.027685383334755898,
119
- "kl": 0.0,
120
- "learning_rate": 3.324401008827238e-07,
121
- "logits/chosen": -16838802.810035843,
122
- "logits/rejected": -18765256.612137202,
123
- "logps/chosen": -64.57314068100358,
124
- "logps/rejected": -71.2826311015831,
125
- "loss": 0.1891,
126
- "rewards/chosen": -6.220487861223118,
127
- "rewards/margins": 0.2930856704116662,
128
- "rewards/rejected": -6.513573531634784,
129
  "step": 1416
130
  },
131
  {
132
  "epoch": 0.45191489361702125,
133
- "grad_norm": 0.01471030618995428,
134
  "kl": 0.0,
135
- "learning_rate": 3.045397225725094e-07,
136
- "logits/chosen": -17096763.36231884,
137
- "logits/rejected": -18954482.52631579,
138
- "logps/chosen": -67.2448341259058,
139
- "logps/rejected": -74.1321683114035,
140
- "loss": 0.1931,
141
- "rewards/chosen": -6.504730666893116,
142
- "rewards/margins": 0.28420505130863827,
143
- "rewards/rejected": -6.7889357182017545,
144
  "step": 1593
145
  },
146
  {
147
  "epoch": 0.502127659574468,
148
- "grad_norm": 0.010299217887222767,
149
  "kl": 0.0,
150
- "learning_rate": 2.7663934426229505e-07,
151
- "logits/chosen": -17058365.519379847,
152
- "logits/rejected": -18912892.683937825,
153
- "logps/chosen": -69.01617762839147,
154
- "logps/rejected": -75.52461814550949,
155
- "loss": 0.1944,
156
- "rewards/chosen": -6.667378270348837,
157
- "rewards/margins": 0.3062005697850143,
158
- "rewards/rejected": -6.973578840133851,
159
  "step": 1770
160
  },
161
  {
162
  "epoch": 0.5523404255319149,
163
- "grad_norm": 0.00703179556876421,
164
  "kl": 0.0,
165
- "learning_rate": 2.487389659520807e-07,
166
- "logits/chosen": -16823254.34576271,
167
- "logits/rejected": -19061004.560214095,
168
- "logps/chosen": -69.84638506355932,
169
- "logps/rejected": -77.77510314451382,
170
- "loss": 0.1996,
171
- "rewards/chosen": -6.773340671345339,
172
- "rewards/margins": 0.3934298077191132,
173
- "rewards/rejected": -7.166770479064452,
174
  "step": 1947
175
  },
176
  {
177
  "epoch": 0.6025531914893617,
178
- "grad_norm": 0.015442676842212677,
179
  "kl": 0.0,
180
- "learning_rate": 2.2083858764186634e-07,
181
- "logits/chosen": -17488395.815384615,
182
- "logits/rejected": -18972861.564013842,
183
- "logps/chosen": -72.38798076923077,
184
- "logps/rejected": -79.26926092128028,
185
- "loss": 0.1886,
186
- "rewards/chosen": -7.007599346454327,
187
- "rewards/margins": 0.30762928406037826,
188
- "rewards/rejected": -7.315228630514706,
189
  "step": 2124
190
  },
191
  {
192
  "epoch": 0.6527659574468085,
193
- "grad_norm": 0.012100782245397568,
194
  "kl": 0.0,
195
- "learning_rate": 1.9293820933165196e-07,
196
- "logits/chosen": -17114164.0,
197
- "logits/rejected": -19232628.524137933,
198
- "logps/chosen": -72.89530944824219,
199
- "logps/rejected": -81.06624461206897,
200
- "loss": 0.1861,
201
- "rewards/chosen": -7.082716941833496,
202
- "rewards/margins": 0.38840038036477953,
203
- "rewards/rejected": -7.471117322198276,
204
  "step": 2301
205
  },
206
  {
207
  "epoch": 0.7029787234042553,
208
- "grad_norm": 0.009404444135725498,
209
  "kl": 0.0,
210
- "learning_rate": 1.6503783102143755e-07,
211
- "logits/chosen": -17324397.714285713,
212
- "logits/rejected": -19416927.721739132,
213
- "logps/chosen": -74.51356907894737,
214
- "logps/rejected": -81.89915760869565,
215
- "loss": 0.1853,
216
- "rewards/chosen": -7.221235461701128,
217
- "rewards/margins": 0.35202370949452444,
218
- "rewards/rejected": -7.5732591711956525,
219
  "step": 2478
220
  },
221
  {
222
  "epoch": 0.7531914893617021,
223
- "grad_norm": 0.00973726250231266,
224
  "kl": 0.0,
225
- "learning_rate": 1.371374527112232e-07,
226
- "logits/chosen": -16804548.204379562,
227
- "logits/rejected": -19371664.364273205,
228
- "logps/chosen": -75.09304430885037,
229
- "logps/rejected": -82.9453535464098,
230
- "loss": 0.1913,
231
- "rewards/chosen": -7.289654585566834,
232
- "rewards/margins": 0.3768818157575975,
233
- "rewards/rejected": -7.666536401324431,
234
  "step": 2655
235
  },
236
  {
237
  "epoch": 0.8034042553191489,
238
- "grad_norm": 0.01282684225589037,
239
  "kl": 0.0,
240
- "learning_rate": 1.0923707440100883e-07,
241
- "logits/chosen": -17255675.78600823,
242
- "logits/rejected": -19418751.89087809,
243
- "logps/chosen": -75.07608346193416,
244
- "logps/rejected": -83.94623827791986,
245
- "loss": 0.1892,
246
- "rewards/chosen": -7.311107996559928,
247
- "rewards/margins": 0.4396163081075919,
248
- "rewards/rejected": -7.75072430466752,
249
  "step": 2832
250
  },
251
  {
252
  "epoch": 0.8536170212765958,
253
- "grad_norm": 0.00687984237447381,
254
  "kl": 0.0,
255
- "learning_rate": 8.133669609079445e-08,
256
- "logits/chosen": -16891363.235955056,
257
- "logits/rejected": -19397185.50391645,
258
- "logps/chosen": -76.50997776217228,
259
- "logps/rejected": -84.4607743146214,
260
- "loss": 0.1951,
261
- "rewards/chosen": -7.409224949525983,
262
- "rewards/margins": 0.40561744059586147,
263
- "rewards/rejected": -7.814842390121845,
264
  "step": 3009
265
  },
266
  {
267
  "epoch": 0.9038297872340425,
268
- "grad_norm": 0.0068199522793293,
269
  "kl": 0.0,
270
- "learning_rate": 5.343631778058008e-08,
271
- "logits/chosen": -17728405.0989011,
272
- "logits/rejected": -19307877.459317584,
273
- "logps/chosen": -77.02319425366301,
274
- "logps/rejected": -84.82096183289589,
275
- "loss": 0.1877,
276
- "rewards/chosen": -7.465746086595696,
277
- "rewards/margins": 0.4030767070394745,
278
- "rewards/rejected": -7.868822793635171,
279
  "step": 3186
280
  },
281
  {
282
  "epoch": 0.9540425531914893,
283
- "grad_norm": 0.005797545425593853,
284
  "kl": 0.0,
285
- "learning_rate": 2.55359394703657e-08,
286
- "logits/chosen": -17274124.19047619,
287
- "logits/rejected": -19408322.419243988,
288
- "logps/chosen": -77.08465091765873,
289
- "logps/rejected": -85.22552888745705,
290
- "loss": 0.1786,
291
- "rewards/chosen": -7.480488126240079,
292
- "rewards/margins": 0.42159470371052254,
293
- "rewards/rejected": -7.902082829950602,
294
  "step": 3363
295
  },
296
  {
297
  "epoch": 1.0,
298
  "step": 3525,
299
  "total_flos": 5.091091732590756e+17,
300
- "train_loss": 0.23059077864843058,
301
- "train_runtime": 10627.7324,
302
- "train_samples_per_second": 5.307,
303
- "train_steps_per_second": 0.332
304
  }
305
  ],
306
  "logging_steps": 177,
 
10
  "log_history": [
11
  {
12
  "epoch": 0.05021276595744681,
13
+ "grad_norm": 0.732947051525116,
14
+ "kl": 12.475480079650879,
15
+ "learning_rate": 5.014164305949008e-08,
16
+ "logits/chosen": -15179654.095238095,
17
+ "logits/rejected": -16528289.93175853,
18
+ "logps/chosen": -2.1814988803514193,
19
+ "logps/rejected": -5.992375464785652,
20
+ "loss": 0.4997,
21
+ "rewards/chosen": 0.0005148474550072527,
22
+ "rewards/margins": 0.0009199919065958275,
23
+ "rewards/rejected": -0.00040514445158857477,
24
  "step": 177
25
  },
26
  {
27
  "epoch": 0.10042553191489362,
28
+ "grad_norm": 0.26544979214668274,
29
+ "kl": 11.266656875610352,
30
+ "learning_rate": 9.996847414880202e-08,
31
+ "logits/chosen": -14902797.791245792,
32
+ "logits/rejected": -16395014.176943699,
33
+ "logps/chosen": -2.2833465678924663,
34
+ "logps/rejected": -6.153056317722297,
35
+ "loss": 0.4996,
36
+ "rewards/chosen": -0.00040731574048096886,
37
+ "rewards/margins": 0.000592714583635074,
38
+ "rewards/rejected": -0.0010000303241160429,
39
  "step": 354
40
  },
41
  {
42
  "epoch": 0.15063829787234043,
43
+ "grad_norm": 0.6797279715538025,
44
+ "kl": 3.9419214725494385,
45
+ "learning_rate": 9.438839848675913e-08,
46
+ "logits/chosen": -14784543.875486381,
47
+ "logits/rejected": -16617535.61345988,
48
+ "logps/chosen": -2.2231409688867947,
49
+ "logps/rejected": -6.279140995739862,
50
+ "loss": 0.4988,
51
+ "rewards/chosen": -0.0022345268772733812,
52
+ "rewards/margins": 0.003318870893000191,
53
+ "rewards/rejected": -0.005553397770273572,
54
  "step": 531
55
  },
56
  {
57
  "epoch": 0.20085106382978724,
58
+ "grad_norm": 0.5770508050918579,
59
+ "kl": 0.7956511974334717,
60
+ "learning_rate": 8.880832282471626e-08,
61
+ "logits/chosen": -14984102.078853047,
62
+ "logits/rejected": -16609355.65171504,
63
+ "logps/chosen": -2.5137440671202955,
64
+ "logps/rejected": -6.364399530013193,
65
+ "loss": 0.4975,
66
+ "rewards/chosen": -0.006582849341908664,
67
+ "rewards/margins": 0.007156510907123363,
68
+ "rewards/rejected": -0.013739360249032027,
69
  "step": 708
70
  },
71
  {
72
  "epoch": 0.251063829787234,
73
+ "grad_norm": 0.5370281934738159,
74
+ "kl": 0.05692530423402786,
75
+ "learning_rate": 8.32282471626734e-08,
76
+ "logits/chosen": -14969774.628571428,
77
+ "logits/rejected": -16552960.0,
78
+ "logps/chosen": -2.374119785853795,
79
+ "logps/rejected": -6.513669296049736,
80
+ "loss": 0.4945,
81
+ "rewards/chosen": -0.012183338403701782,
82
+ "rewards/margins": 0.01736304331833208,
83
+ "rewards/rejected": -0.029546381722033863,
84
  "step": 885
85
  },
86
  {
87
  "epoch": 0.30127659574468085,
88
+ "grad_norm": 0.7874128222465515,
89
+ "kl": 0.04236392676830292,
90
+ "learning_rate": 7.76481715006305e-08,
91
+ "logits/chosen": -14856485.372262774,
92
+ "logits/rejected": -16540230.837127846,
93
+ "logps/chosen": -2.2836047541486084,
94
+ "logps/rejected": -6.687177187089536,
95
+ "loss": 0.4906,
96
+ "rewards/chosen": -0.021432671233685346,
97
+ "rewards/margins": 0.029663742240055215,
98
+ "rewards/rejected": -0.05109641347374056,
99
  "step": 1062
100
  },
101
  {
102
  "epoch": 0.35148936170212763,
103
+ "grad_norm": 0.7597999572753906,
104
  "kl": 0.0,
105
+ "learning_rate": 7.206809583858764e-08,
106
+ "logits/chosen": -15033836.743362831,
107
+ "logits/rejected": -16566120.551260505,
108
+ "logps/chosen": -2.7796039412506914,
109
+ "logps/rejected": -6.999364824054622,
110
+ "loss": 0.4848,
111
+ "rewards/chosen": -0.037276040136286645,
112
+ "rewards/margins": 0.04471597297755264,
113
+ "rewards/rejected": -0.08199201311383929,
114
  "step": 1239
115
  },
116
  {
117
  "epoch": 0.40170212765957447,
118
+ "grad_norm": 1.355948567390442,
119
+ "kl": 0.0319652259349823,
120
+ "learning_rate": 6.648802017654477e-08,
121
+ "logits/chosen": -15049823.426523298,
122
+ "logits/rejected": -16636860.453825857,
123
+ "logps/chosen": -3.031732155857975,
124
+ "logps/rejected": -7.528678471306069,
125
+ "loss": 0.4752,
126
+ "rewards/chosen": -0.06634654110050543,
127
+ "rewards/margins": 0.07183165772143553,
128
+ "rewards/rejected": -0.13817819882194096,
129
  "step": 1416
130
  },
131
  {
132
  "epoch": 0.45191489361702125,
133
+ "grad_norm": 0.9223116040229797,
134
  "kl": 0.0,
135
+ "learning_rate": 6.090794451450188e-08,
136
+ "logits/chosen": -15345349.56521739,
137
+ "logits/rejected": -16821483.340350877,
138
+ "logps/chosen": -3.2707338194916216,
139
+ "logps/rejected": -8.287376644736842,
140
+ "loss": 0.4649,
141
+ "rewards/chosen": -0.10732046072033868,
142
+ "rewards/margins": 0.09713628355734771,
143
+ "rewards/rejected": -0.2044567442776864,
144
  "step": 1593
145
  },
146
  {
147
  "epoch": 0.502127659574468,
148
+ "grad_norm": 0.8558508157730103,
149
  "kl": 0.0,
150
+ "learning_rate": 5.5327868852459016e-08,
151
+ "logits/chosen": -15458446.88372093,
152
+ "logits/rejected": -16827832.373056997,
153
+ "logps/chosen": -4.2163511764171515,
154
+ "logps/rejected": -8.720990696513384,
155
+ "loss": 0.4485,
156
+ "rewards/chosen": -0.18739568725112796,
157
+ "rewards/margins": 0.10582041852458987,
158
+ "rewards/rejected": -0.29321610577571783,
159
  "step": 1770
160
  },
161
  {
162
  "epoch": 0.5523404255319149,
163
+ "grad_norm": 1.2379802465438843,
164
  "kl": 0.0,
165
+ "learning_rate": 4.9747793190416137e-08,
166
+ "logits/chosen": -15368077.450847458,
167
+ "logits/rejected": -17055221.495093666,
168
+ "logps/chosen": -5.225153105137712,
169
+ "logps/rejected": -10.289466714986618,
170
+ "loss": 0.4349,
171
+ "rewards/chosen": -0.31121714963751324,
172
+ "rewards/margins": 0.10698996959893858,
173
+ "rewards/rejected": -0.4182071192364518,
174
  "step": 1947
175
  },
176
  {
177
  "epoch": 0.6025531914893617,
178
+ "grad_norm": 2.636925220489502,
179
  "kl": 0.0,
180
+ "learning_rate": 4.4167717528373264e-08,
181
+ "logits/chosen": -15860050.707692308,
182
+ "logits/rejected": -17018940.23529412,
183
+ "logps/chosen": -6.8304584209735575,
184
+ "logps/rejected": -12.103902532980104,
185
+ "loss": 0.4076,
186
+ "rewards/chosen": -0.4518470470721905,
187
+ "rewards/margins": 0.14684447744662443,
188
+ "rewards/rejected": -0.5986915245188149,
189
  "step": 2124
190
  },
191
  {
192
  "epoch": 0.6527659574468085,
193
+ "grad_norm": 2.6684844493865967,
194
  "kl": 0.0,
195
+ "learning_rate": 3.858764186633039e-08,
196
+ "logits/chosen": -15603414.0,
197
+ "logits/rejected": -17277662.455172412,
198
+ "logps/chosen": -8.103792190551758,
199
+ "logps/rejected": -14.19166049299569,
200
+ "loss": 0.3791,
201
+ "rewards/chosen": -0.6035651564598083,
202
+ "rewards/margins": 0.18009341708545024,
203
+ "rewards/rejected": -0.7836585735452586,
204
  "step": 2301
205
  },
206
  {
207
  "epoch": 0.7029787234042553,
208
+ "grad_norm": 2.259669542312622,
209
  "kl": 0.0,
210
+ "learning_rate": 3.300756620428751e-08,
211
+ "logits/chosen": -15778993.082706766,
212
+ "logits/rejected": -17491624.292173915,
213
+ "logps/chosen": -9.979437118186091,
214
+ "logps/rejected": -16.35701086956522,
215
+ "loss": 0.357,
216
+ "rewards/chosen": -0.7678221508972627,
217
+ "rewards/margins": 0.25122187933371554,
218
+ "rewards/rejected": -1.0190440302309782,
219
  "step": 2478
220
  },
221
  {
222
  "epoch": 0.7531914893617021,
223
+ "grad_norm": 3.3658533096313477,
224
  "kl": 0.0,
225
+ "learning_rate": 2.742749054224464e-08,
226
+ "logits/chosen": -15436312.291970802,
227
+ "logits/rejected": -17457363.614711035,
228
+ "logps/chosen": -11.540958599452555,
229
+ "logps/rejected": -18.07249138025394,
230
+ "loss": 0.3366,
231
+ "rewards/chosen": -0.9344464601391423,
232
+ "rewards/margins": 0.2448034252729635,
233
+ "rewards/rejected": -1.1792498854121058,
234
  "step": 2655
235
  },
236
  {
237
  "epoch": 0.8034042553191489,
238
+ "grad_norm": 2.318241834640503,
239
  "kl": 0.0,
240
+ "learning_rate": 2.1847414880201765e-08,
241
+ "logits/chosen": -15859763.621399177,
242
+ "logits/rejected": -17528114.41432225,
243
+ "logps/chosen": -13.565882804462449,
244
+ "logps/rejected": -20.141737265558397,
245
+ "loss": 0.3189,
246
+ "rewards/chosen": -1.1600869261188271,
247
+ "rewards/margins": 0.21018625745827224,
248
+ "rewards/rejected": -1.3702731835770994,
249
  "step": 2832
250
  },
251
  {
252
  "epoch": 0.8536170212765958,
253
+ "grad_norm": 1.956950306892395,
254
  "kl": 0.0,
255
+ "learning_rate": 1.626733921815889e-08,
256
+ "logits/chosen": -15627038.68164794,
257
+ "logits/rejected": -17529227.69712794,
258
+ "logps/chosen": -15.227195253979401,
259
+ "logps/rejected": -21.2347761640557,
260
+ "loss": 0.3081,
261
+ "rewards/chosen": -1.2809460814972495,
262
+ "rewards/margins": 0.21129570738846626,
263
+ "rewards/rejected": -1.4922417888857158,
264
  "step": 3009
265
  },
266
  {
267
  "epoch": 0.9038297872340425,
268
+ "grad_norm": 2.3128018379211426,
269
  "kl": 0.0,
270
+ "learning_rate": 1.0687263556116015e-08,
271
+ "logits/chosen": -16287571.457875459,
272
+ "logits/rejected": -17474259.877515312,
273
+ "logps/chosen": -16.702005351419412,
274
+ "logps/rejected": -22.0252146216098,
275
+ "loss": 0.2956,
276
+ "rewards/chosen": -1.4336266150841346,
277
+ "rewards/margins": 0.1556206671159419,
278
+ "rewards/rejected": -1.5892472822000765,
279
  "step": 3186
280
  },
281
  {
282
  "epoch": 0.9540425531914893,
283
+ "grad_norm": 0.8989251255989075,
284
  "kl": 0.0,
285
+ "learning_rate": 5.1071878940731394e-09,
286
+ "logits/chosen": -15992458.158730159,
287
+ "logits/rejected": -17564913.04467354,
288
+ "logps/chosen": -16.995010618179563,
289
+ "logps/rejected": -22.893974173109967,
290
+ "loss": 0.2823,
291
+ "rewards/chosen": -1.4715233454628596,
292
+ "rewards/margins": 0.19740445351292446,
293
+ "rewards/rejected": -1.668927798975784,
294
  "step": 3363
295
  },
296
  {
297
  "epoch": 1.0,
298
  "step": 3525,
299
  "total_flos": 5.091091732590756e+17,
300
+ "train_loss": 0.413429944193955,
301
+ "train_runtime": 10515.385,
302
+ "train_samples_per_second": 5.364,
303
+ "train_steps_per_second": 0.335
304
  }
305
  ],
306
  "logging_steps": 177,
training_loss.png CHANGED
training_rewards_chosen.png CHANGED