File size: 10,220 Bytes
ab02f28
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
{
  "best_global_step": null,
  "best_metric": null,
  "best_model_checkpoint": null,
  "epoch": 1.0,
  "eval_steps": 500,
  "global_step": 183,
  "is_hyper_param_search": false,
  "is_local_process_zero": true,
  "is_world_process_zero": true,
  "log_history": [
    {
      "epoch": 0.0547945205479452,
      "grad_norm": 513.996826171875,
      "learning_rate": 4.090909090909091e-07,
      "logits/chosen": 2.990995407104492,
      "logits/rejected": 3.0081257820129395,
      "logps/chosen": -298.52886962890625,
      "logps/rejected": -202.96295166015625,
      "loss": 0.8843,
      "rewards/accuracies": 0.526562511920929,
      "rewards/chosen": 0.010423189960420132,
      "rewards/margins": 0.05534166842699051,
      "rewards/rejected": -0.0449184887111187,
      "step": 10
    },
    {
      "epoch": 0.1095890410958904,
      "grad_norm": 461.9533996582031,
      "learning_rate": 8.636363636363637e-07,
      "logits/chosen": 2.934217929840088,
      "logits/rejected": 2.919574737548828,
      "logps/chosen": -276.70391845703125,
      "logps/rejected": -200.52728271484375,
      "loss": 0.6858,
      "rewards/accuracies": 0.65625,
      "rewards/chosen": 0.7733574509620667,
      "rewards/margins": 0.9247980117797852,
      "rewards/rejected": -0.15144045650959015,
      "step": 20
    },
    {
      "epoch": 0.1643835616438356,
      "grad_norm": 150.57086181640625,
      "learning_rate": 1.3181818181818182e-06,
      "logits/chosen": 3.0546913146972656,
      "logits/rejected": 3.0510308742523193,
      "logps/chosen": -289.57977294921875,
      "logps/rejected": -218.24765014648438,
      "loss": 0.2839,
      "rewards/accuracies": 0.8890625238418579,
      "rewards/chosen": 2.8807666301727295,
      "rewards/margins": 4.351069450378418,
      "rewards/rejected": -1.4703023433685303,
      "step": 30
    },
    {
      "epoch": 0.2191780821917808,
      "grad_norm": 151.46690368652344,
      "learning_rate": 1.7727272727272729e-06,
      "logits/chosen": 3.088348150253296,
      "logits/rejected": 3.1125292778015137,
      "logps/chosen": -275.5274963378906,
      "logps/rejected": -214.15737915039062,
      "loss": 0.1502,
      "rewards/accuracies": 0.9390624761581421,
      "rewards/chosen": 5.216189384460449,
      "rewards/margins": 8.827147483825684,
      "rewards/rejected": -3.610957384109497,
      "step": 40
    },
    {
      "epoch": 0.273972602739726,
      "grad_norm": 130.53453063964844,
      "learning_rate": 2.2272727272727274e-06,
      "logits/chosen": 2.9992308616638184,
      "logits/rejected": 3.075270175933838,
      "logps/chosen": -259.50836181640625,
      "logps/rejected": -203.08811950683594,
      "loss": 0.1281,
      "rewards/accuracies": 0.957812488079071,
      "rewards/chosen": 6.97296667098999,
      "rewards/margins": 12.8120698928833,
      "rewards/rejected": -5.839103698730469,
      "step": 50
    },
    {
      "epoch": 0.3287671232876712,
      "grad_norm": 110.53746032714844,
      "learning_rate": 2.6818181818181822e-06,
      "logits/chosen": 3.088064670562744,
      "logits/rejected": 2.986386775970459,
      "logps/chosen": -325.0431213378906,
      "logps/rejected": -224.979736328125,
      "loss": 0.1444,
      "rewards/accuracies": 0.964062511920929,
      "rewards/chosen": 10.117586135864258,
      "rewards/margins": 18.425701141357422,
      "rewards/rejected": -8.30811595916748,
      "step": 60
    },
    {
      "epoch": 0.3835616438356164,
      "grad_norm": 95.7762222290039,
      "learning_rate": 3.1363636363636367e-06,
      "logits/chosen": 3.040494918823242,
      "logits/rejected": 3.022307872772217,
      "logps/chosen": -273.9972839355469,
      "logps/rejected": -211.2712860107422,
      "loss": 0.1443,
      "rewards/accuracies": 0.9671875238418579,
      "rewards/chosen": 9.627126693725586,
      "rewards/margins": 20.590023040771484,
      "rewards/rejected": -10.962896347045898,
      "step": 70
    },
    {
      "epoch": 0.4383561643835616,
      "grad_norm": 179.24440002441406,
      "learning_rate": 3.590909090909091e-06,
      "logits/chosen": 3.0298266410827637,
      "logits/rejected": 3.0730605125427246,
      "logps/chosen": -280.2432861328125,
      "logps/rejected": -214.884033203125,
      "loss": 0.142,
      "rewards/accuracies": 0.979687511920929,
      "rewards/chosen": 10.719534873962402,
      "rewards/margins": 23.82217788696289,
      "rewards/rejected": -13.102640151977539,
      "step": 80
    },
    {
      "epoch": 0.4931506849315068,
      "grad_norm": 2.0804860591888428,
      "learning_rate": 4.045454545454546e-06,
      "logits/chosen": 3.1072518825531006,
      "logits/rejected": 3.067288875579834,
      "logps/chosen": -294.1097106933594,
      "logps/rejected": -219.38949584960938,
      "loss": 0.1262,
      "rewards/accuracies": 0.981249988079071,
      "rewards/chosen": 12.069157600402832,
      "rewards/margins": 29.454524993896484,
      "rewards/rejected": -17.385366439819336,
      "step": 90
    },
    {
      "epoch": 0.547945205479452,
      "grad_norm": 134.90240478515625,
      "learning_rate": 4.5e-06,
      "logits/chosen": 2.9695353507995605,
      "logits/rejected": 2.9900407791137695,
      "logps/chosen": -270.2259826660156,
      "logps/rejected": -210.32302856445312,
      "loss": 0.1125,
      "rewards/accuracies": 0.984375,
      "rewards/chosen": 8.642024040222168,
      "rewards/margins": 26.856210708618164,
      "rewards/rejected": -18.21418571472168,
      "step": 100
    },
    {
      "epoch": 0.6027397260273972,
      "grad_norm": 237.83163452148438,
      "learning_rate": 4.954545454545455e-06,
      "logits/chosen": 3.001239538192749,
      "logits/rejected": 2.9165444374084473,
      "logps/chosen": -261.63848876953125,
      "logps/rejected": -217.56314086914062,
      "loss": 0.1337,
      "rewards/accuracies": 0.9765625,
      "rewards/chosen": 4.362582206726074,
      "rewards/margins": 21.842912673950195,
      "rewards/rejected": -17.480329513549805,
      "step": 110
    },
    {
      "epoch": 0.6575342465753424,
      "grad_norm": 124.83686065673828,
      "learning_rate": 4.998976350571773e-06,
      "logits/chosen": 3.0631394386291504,
      "logits/rejected": 3.0034124851226807,
      "logps/chosen": -296.0355224609375,
      "logps/rejected": -219.4881134033203,
      "loss": 0.176,
      "rewards/accuracies": 0.96875,
      "rewards/chosen": 3.6324734687805176,
      "rewards/margins": 20.854042053222656,
      "rewards/rejected": -17.221569061279297,
      "step": 120
    },
    {
      "epoch": 0.7123287671232876,
      "grad_norm": 110.8822250366211,
      "learning_rate": 4.995438885558294e-06,
      "logits/chosen": 3.0476179122924805,
      "logits/rejected": 2.9690792560577393,
      "logps/chosen": -292.52276611328125,
      "logps/rejected": -210.3925018310547,
      "loss": 0.2762,
      "rewards/accuracies": 0.9781249761581421,
      "rewards/chosen": 5.373471736907959,
      "rewards/margins": 26.076580047607422,
      "rewards/rejected": -20.703105926513672,
      "step": 130
    },
    {
      "epoch": 0.7671232876712328,
      "grad_norm": 72.18496704101562,
      "learning_rate": 4.989378542821969e-06,
      "logits/chosen": 3.0710926055908203,
      "logits/rejected": 3.0577285289764404,
      "logps/chosen": -284.55230712890625,
      "logps/rejected": -230.9425506591797,
      "loss": 0.2372,
      "rewards/accuracies": 0.973437488079071,
      "rewards/chosen": 5.454714775085449,
      "rewards/margins": 30.868602752685547,
      "rewards/rejected": -25.413890838623047,
      "step": 140
    },
    {
      "epoch": 0.821917808219178,
      "grad_norm": 104.61406707763672,
      "learning_rate": 4.9808014493426124e-06,
      "logits/chosen": 3.053307294845581,
      "logits/rejected": 3.0027899742126465,
      "logps/chosen": -282.54864501953125,
      "logps/rejected": -236.41592407226562,
      "loss": 0.6492,
      "rewards/accuracies": 0.9546874761581421,
      "rewards/chosen": 0.29179587960243225,
      "rewards/margins": 30.52816390991211,
      "rewards/rejected": -30.23636817932129,
      "step": 150
    },
    {
      "epoch": 0.8767123287671232,
      "grad_norm": 114.0179443359375,
      "learning_rate": 4.9697162765239595e-06,
      "logits/chosen": 3.0813591480255127,
      "logits/rejected": 3.093292713165283,
      "logps/chosen": -265.5400390625,
      "logps/rejected": -233.4171142578125,
      "loss": 0.1703,
      "rewards/accuracies": 0.981249988079071,
      "rewards/chosen": 2.905339002609253,
      "rewards/margins": 36.841583251953125,
      "rewards/rejected": -33.936241149902344,
      "step": 160
    },
    {
      "epoch": 0.9315068493150684,
      "grad_norm": 132.4547576904297,
      "learning_rate": 4.9561342314269055e-06,
      "logits/chosen": 3.124277114868164,
      "logits/rejected": 3.048166036605835,
      "logps/chosen": -262.30194091796875,
      "logps/rejected": -232.39297485351562,
      "loss": 0.4256,
      "rewards/accuracies": 0.9515625238418579,
      "rewards/chosen": -1.3073980808258057,
      "rewards/margins": 30.139415740966797,
      "rewards/rejected": -31.446813583374023,
      "step": 170
    },
    {
      "epoch": 0.9863013698630136,
      "grad_norm": 1.0581492185592651,
      "learning_rate": 4.940069045439226e-06,
      "logits/chosen": 3.164407968521118,
      "logits/rejected": 3.164742946624756,
      "logps/chosen": -274.58673095703125,
      "logps/rejected": -232.9495849609375,
      "loss": 0.3806,
      "rewards/accuracies": 0.9703124761581421,
      "rewards/chosen": 0.4807693064212799,
      "rewards/margins": 33.424041748046875,
      "rewards/rejected": -32.943275451660156,
      "step": 180
    }
  ],
  "logging_steps": 10,
  "max_steps": 1098,
  "num_input_tokens_seen": 0,
  "num_train_epochs": 6,
  "save_steps": 500,
  "stateful_callbacks": {
    "TrainerControl": {
      "args": {
        "should_epoch_stop": false,
        "should_evaluate": false,
        "should_log": false,
        "should_save": true,
        "should_training_stop": false
      },
      "attributes": {}
    }
  },
  "total_flos": 0.0,
  "train_batch_size": 4,
  "trial_name": null,
  "trial_params": null
}