Sean13 commited on
Commit
eac6216
·
verified ·
1 Parent(s): 3b6e40a

Model save

Browse files
all_results.json ADDED
@@ -0,0 +1,9 @@
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "epoch": 1.0,
3
+ "total_flos": 0.0,
4
+ "train_loss": 0.6294400285945709,
5
+ "train_runtime": 3850.4934,
6
+ "train_samples": 29304,
7
+ "train_samples_per_second": 7.61,
8
+ "train_steps_per_second": 0.059
9
+ }
model-00001-of-00003.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:7ec3ef474d8a965fff12d04444e1f3112d809c52e6deb47d313ce7d521990ec8
3
  size 4943162336
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:44cb364b56f168eda695bef3c68f0c444c85049d9ccad6163ef8bb1c775f6244
3
  size 4943162336
model-00002-of-00003.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:150319633ef7c49b35001593549be16f5d92c29a8c89955475a3e5d943390ba7
3
  size 4999819336
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a0905e80b80f7e8bc8e57e31b71385b60ecb165b7881b01015b35a9df09f2e1a
3
  size 4999819336
model-00003-of-00003.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:83f662779ae2e36e2dce047adc50b83a175b1f4b7460a05c0f63c7ffb96d020b
3
  size 4540516344
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:00613aca57403a3bbaeb5fe6b542bc6bfbc2916481d1a6dac1e7fa2066d48ba2
3
  size 4540516344
runs/Nov16_22-01-40_is-db4bnmjuehm3cygl-devmachine-0/events.out.tfevents.1763301827.is-db4bnmjuehm3cygl-devmachine-0.734091.0 CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:d65341ec4e2cad1e8989873b99cdee29c92d0ef80a2172ce12d5a2d347f529e2
3
- size 22351
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a52acbc4d6ad8cee2d4c8508780abaf3624ee68a079efac1c4d6059eec708d10
3
+ size 24081
train_results.json ADDED
@@ -0,0 +1,9 @@
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "epoch": 1.0,
3
+ "total_flos": 0.0,
4
+ "train_loss": 0.6294400285945709,
5
+ "train_runtime": 3850.4934,
6
+ "train_samples": 29304,
7
+ "train_samples_per_second": 7.61,
8
+ "train_steps_per_second": 0.059
9
+ }
trainer_state.json ADDED
@@ -0,0 +1,420 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_global_step": null,
3
+ "best_metric": null,
4
+ "best_model_checkpoint": null,
5
+ "epoch": 1.0,
6
+ "eval_steps": 100,
7
+ "global_step": 229,
8
+ "is_hyper_param_search": false,
9
+ "is_local_process_zero": true,
10
+ "is_world_process_zero": true,
11
+ "log_history": [
12
+ {
13
+ "epoch": 0.004366812227074236,
14
+ "grad_norm": 257.5207536740227,
15
+ "learning_rate": 0.0,
16
+ "logits/chosen": -2.591796875,
17
+ "logits/rejected": -2.5625,
18
+ "logps/chosen": -432.75,
19
+ "logps/rejected": -351.125,
20
+ "loss": 0.6914,
21
+ "rewards/accuracies": 0.0,
22
+ "rewards/chosen": 0.0,
23
+ "rewards/margins": 0.0,
24
+ "rewards/rejected": 0.0,
25
+ "step": 1
26
+ },
27
+ {
28
+ "epoch": 0.043668122270742356,
29
+ "grad_norm": 234.01839869103762,
30
+ "learning_rate": 7.826086956521739e-08,
31
+ "logits/chosen": -2.584852457046509,
32
+ "logits/rejected": -2.5475261211395264,
33
+ "logps/chosen": -433.09722900390625,
34
+ "logps/rejected": -333.7916564941406,
35
+ "loss": 0.6931,
36
+ "rewards/accuracies": 0.2439236044883728,
37
+ "rewards/chosen": -0.0006177690229378641,
38
+ "rewards/margins": 0.0030831231269985437,
39
+ "rewards/rejected": -0.0037005741614848375,
40
+ "step": 10
41
+ },
42
+ {
43
+ "epoch": 0.08733624454148471,
44
+ "grad_norm": 209.09345964734084,
45
+ "learning_rate": 1.652173913043478e-07,
46
+ "logits/chosen": -2.5966796875,
47
+ "logits/rejected": -2.550976514816284,
48
+ "logps/chosen": -414.79998779296875,
49
+ "logps/rejected": -330.2250061035156,
50
+ "loss": 0.6741,
51
+ "rewards/accuracies": 0.4164062440395355,
52
+ "rewards/chosen": 0.0072196959517896175,
53
+ "rewards/margins": 0.04988040775060654,
54
+ "rewards/rejected": -0.04266033321619034,
55
+ "step": 20
56
+ },
57
+ {
58
+ "epoch": 0.13100436681222707,
59
+ "grad_norm": 200.445288102568,
60
+ "learning_rate": 1.941747572815534e-07,
61
+ "logits/chosen": -2.5845704078674316,
62
+ "logits/rejected": -2.538867235183716,
63
+ "logps/chosen": -420.2250061035156,
64
+ "logps/rejected": -334.98748779296875,
65
+ "loss": 0.6192,
66
+ "rewards/accuracies": 0.620312511920929,
67
+ "rewards/chosen": 0.04333477094769478,
68
+ "rewards/margins": 0.28486937284469604,
69
+ "rewards/rejected": -0.24138298630714417,
70
+ "step": 30
71
+ },
72
+ {
73
+ "epoch": 0.17467248908296942,
74
+ "grad_norm": 214.51955589588093,
75
+ "learning_rate": 1.8446601941747572e-07,
76
+ "logits/chosen": -2.5927734375,
77
+ "logits/rejected": -2.555468797683716,
78
+ "logps/chosen": -415.5,
79
+ "logps/rejected": -331.875,
80
+ "loss": 0.6294,
81
+ "rewards/accuracies": 0.6351562738418579,
82
+ "rewards/chosen": -0.04081001132726669,
83
+ "rewards/margins": 0.5258804559707642,
84
+ "rewards/rejected": -0.5668045282363892,
85
+ "step": 40
86
+ },
87
+ {
88
+ "epoch": 0.2183406113537118,
89
+ "grad_norm": 205.63590618055372,
90
+ "learning_rate": 1.7475728155339804e-07,
91
+ "logits/chosen": -2.5941405296325684,
92
+ "logits/rejected": -2.510937452316284,
93
+ "logps/chosen": -419.45001220703125,
94
+ "logps/rejected": -334.7124938964844,
95
+ "loss": 0.6259,
96
+ "rewards/accuracies": 0.64453125,
97
+ "rewards/chosen": 0.0036384582053869963,
98
+ "rewards/margins": 0.5619354248046875,
99
+ "rewards/rejected": -0.5584350824356079,
100
+ "step": 50
101
+ },
102
+ {
103
+ "epoch": 0.26200873362445415,
104
+ "grad_norm": 223.9917126342855,
105
+ "learning_rate": 1.6504854368932038e-07,
106
+ "logits/chosen": -2.5884766578674316,
107
+ "logits/rejected": -2.537890672683716,
108
+ "logps/chosen": -421.625,
109
+ "logps/rejected": -336.86248779296875,
110
+ "loss": 0.6185,
111
+ "rewards/accuracies": 0.643750011920929,
112
+ "rewards/chosen": 0.14196090400218964,
113
+ "rewards/margins": 0.5803176760673523,
114
+ "rewards/rejected": -0.4383193850517273,
115
+ "step": 60
116
+ },
117
+ {
118
+ "epoch": 0.3056768558951965,
119
+ "grad_norm": 193.43844738802343,
120
+ "learning_rate": 1.553398058252427e-07,
121
+ "logits/chosen": -2.5912108421325684,
122
+ "logits/rejected": -2.5093750953674316,
123
+ "logps/chosen": -413.3500061035156,
124
+ "logps/rejected": -320.3374938964844,
125
+ "loss": 0.598,
126
+ "rewards/accuracies": 0.660937488079071,
127
+ "rewards/chosen": 0.19363251328468323,
128
+ "rewards/margins": 0.6151596307754517,
129
+ "rewards/rejected": -0.4214450716972351,
130
+ "step": 70
131
+ },
132
+ {
133
+ "epoch": 0.34934497816593885,
134
+ "grad_norm": 193.23943723663754,
135
+ "learning_rate": 1.4563106796116505e-07,
136
+ "logits/chosen": -2.6195311546325684,
137
+ "logits/rejected": -2.5523438453674316,
138
+ "logps/chosen": -423.9750061035156,
139
+ "logps/rejected": -329.7124938964844,
140
+ "loss": 0.6073,
141
+ "rewards/accuracies": 0.675000011920929,
142
+ "rewards/chosen": 0.30989378690719604,
143
+ "rewards/margins": 0.6296051144599915,
144
+ "rewards/rejected": -0.3195934295654297,
145
+ "step": 80
146
+ },
147
+ {
148
+ "epoch": 0.3930131004366812,
149
+ "grad_norm": 190.20728791240097,
150
+ "learning_rate": 1.359223300970874e-07,
151
+ "logits/chosen": -2.6001954078674316,
152
+ "logits/rejected": -2.551953077316284,
153
+ "logps/chosen": -411.79998779296875,
154
+ "logps/rejected": -336.6625061035156,
155
+ "loss": 0.6202,
156
+ "rewards/accuracies": 0.672656238079071,
157
+ "rewards/chosen": 0.37478941679000854,
158
+ "rewards/margins": 0.6424636840820312,
159
+ "rewards/rejected": -0.2676635682582855,
160
+ "step": 90
161
+ },
162
+ {
163
+ "epoch": 0.4366812227074236,
164
+ "grad_norm": 222.84694096252016,
165
+ "learning_rate": 1.262135922330097e-07,
166
+ "logits/chosen": -2.5894532203674316,
167
+ "logits/rejected": -2.5283203125,
168
+ "logps/chosen": -414.375,
169
+ "logps/rejected": -327.82501220703125,
170
+ "loss": 0.6371,
171
+ "rewards/accuracies": 0.6546875238418579,
172
+ "rewards/chosen": 0.33614271879196167,
173
+ "rewards/margins": 0.6191139221191406,
174
+ "rewards/rejected": -0.28304824233055115,
175
+ "step": 100
176
+ },
177
+ {
178
+ "epoch": 0.4366812227074236,
179
+ "eval_logits/chosen": -2.581024408340454,
180
+ "eval_logits/rejected": -2.533827304840088,
181
+ "eval_logps/chosen": -415.6082458496094,
182
+ "eval_logps/rejected": -331.9175109863281,
183
+ "eval_loss": 0.6342874765396118,
184
+ "eval_rewards/accuracies": 0.6429492235183716,
185
+ "eval_rewards/chosen": 0.3959445059299469,
186
+ "eval_rewards/margins": 0.5977292656898499,
187
+ "eval_rewards/rejected": -0.2017633467912674,
188
+ "eval_runtime": 67.8599,
189
+ "eval_samples_per_second": 22.738,
190
+ "eval_steps_per_second": 1.429,
191
+ "step": 100
192
+ },
193
+ {
194
+ "epoch": 0.48034934497816595,
195
+ "grad_norm": 240.7764484868466,
196
+ "learning_rate": 1.1650485436893203e-07,
197
+ "logits/chosen": -2.6099610328674316,
198
+ "logits/rejected": -2.5560545921325684,
199
+ "logps/chosen": -421.95001220703125,
200
+ "logps/rejected": -333.4624938964844,
201
+ "loss": 0.6413,
202
+ "rewards/accuracies": 0.649218738079071,
203
+ "rewards/chosen": 0.36162033677101135,
204
+ "rewards/margins": 0.6145599484443665,
205
+ "rewards/rejected": -0.2532787322998047,
206
+ "step": 110
207
+ },
208
+ {
209
+ "epoch": 0.5240174672489083,
210
+ "grad_norm": 232.16678770972172,
211
+ "learning_rate": 1.0679611650485436e-07,
212
+ "logits/chosen": -2.609375,
213
+ "logits/rejected": -2.559375047683716,
214
+ "logps/chosen": -411.29998779296875,
215
+ "logps/rejected": -335.17498779296875,
216
+ "loss": 0.6483,
217
+ "rewards/accuracies": 0.651562511920929,
218
+ "rewards/chosen": 0.3660331666469574,
219
+ "rewards/margins": 0.597150444984436,
220
+ "rewards/rejected": -0.23108120262622833,
221
+ "step": 120
222
+ },
223
+ {
224
+ "epoch": 0.5676855895196506,
225
+ "grad_norm": 233.8374652476243,
226
+ "learning_rate": 9.70873786407767e-08,
227
+ "logits/chosen": -2.608593702316284,
228
+ "logits/rejected": -2.5416016578674316,
229
+ "logps/chosen": -427.5375061035156,
230
+ "logps/rejected": -334.88751220703125,
231
+ "loss": 0.6269,
232
+ "rewards/accuracies": 0.667187511920929,
233
+ "rewards/chosen": 0.41166526079177856,
234
+ "rewards/margins": 0.6970123052597046,
235
+ "rewards/rejected": -0.2859039306640625,
236
+ "step": 130
237
+ },
238
+ {
239
+ "epoch": 0.611353711790393,
240
+ "grad_norm": 210.86682983433053,
241
+ "learning_rate": 8.737864077669902e-08,
242
+ "logits/chosen": -2.587695360183716,
243
+ "logits/rejected": -2.546679735183716,
244
+ "logps/chosen": -410.32501220703125,
245
+ "logps/rejected": -328.625,
246
+ "loss": 0.6611,
247
+ "rewards/accuracies": 0.6578124761581421,
248
+ "rewards/chosen": 0.4082275331020355,
249
+ "rewards/margins": 0.6441711187362671,
250
+ "rewards/rejected": -0.23561783134937286,
251
+ "step": 140
252
+ },
253
+ {
254
+ "epoch": 0.6550218340611353,
255
+ "grad_norm": 205.21616859687637,
256
+ "learning_rate": 7.766990291262135e-08,
257
+ "logits/chosen": -2.6058592796325684,
258
+ "logits/rejected": -2.546875,
259
+ "logps/chosen": -423.8999938964844,
260
+ "logps/rejected": -333.2875061035156,
261
+ "loss": 0.6362,
262
+ "rewards/accuracies": 0.676562488079071,
263
+ "rewards/chosen": 0.49949073791503906,
264
+ "rewards/margins": 0.6958252191543579,
265
+ "rewards/rejected": -0.19634170830249786,
266
+ "step": 150
267
+ },
268
+ {
269
+ "epoch": 0.6986899563318777,
270
+ "grad_norm": 207.31134498490616,
271
+ "learning_rate": 6.79611650485437e-08,
272
+ "logits/chosen": -2.6107420921325684,
273
+ "logits/rejected": -2.5423827171325684,
274
+ "logps/chosen": -410.8500061035156,
275
+ "logps/rejected": -326.5874938964844,
276
+ "loss": 0.6429,
277
+ "rewards/accuracies": 0.6734374761581421,
278
+ "rewards/chosen": 0.5038589239120483,
279
+ "rewards/margins": 0.6741867065429688,
280
+ "rewards/rejected": -0.1705268919467926,
281
+ "step": 160
282
+ },
283
+ {
284
+ "epoch": 0.74235807860262,
285
+ "grad_norm": 186.84771212396123,
286
+ "learning_rate": 5.825242718446601e-08,
287
+ "logits/chosen": -2.615429639816284,
288
+ "logits/rejected": -2.548046827316284,
289
+ "logps/chosen": -407.1499938964844,
290
+ "logps/rejected": -336.3374938964844,
291
+ "loss": 0.6425,
292
+ "rewards/accuracies": 0.6617187261581421,
293
+ "rewards/chosen": 0.5310348272323608,
294
+ "rewards/margins": 0.682720959186554,
295
+ "rewards/rejected": -0.1518428772687912,
296
+ "step": 170
297
+ },
298
+ {
299
+ "epoch": 0.7860262008733624,
300
+ "grad_norm": 188.74506708738218,
301
+ "learning_rate": 4.854368932038835e-08,
302
+ "logits/chosen": -2.610156297683716,
303
+ "logits/rejected": -2.546093702316284,
304
+ "logps/chosen": -412.875,
305
+ "logps/rejected": -328.8374938964844,
306
+ "loss": 0.6145,
307
+ "rewards/accuracies": 0.6937500238418579,
308
+ "rewards/chosen": 0.550067126750946,
309
+ "rewards/margins": 0.7159286737442017,
310
+ "rewards/rejected": -0.16567841172218323,
311
+ "step": 180
312
+ },
313
+ {
314
+ "epoch": 0.8296943231441049,
315
+ "grad_norm": 192.35820754552165,
316
+ "learning_rate": 3.8834951456310675e-08,
317
+ "logits/chosen": -2.5999999046325684,
318
+ "logits/rejected": -2.555468797683716,
319
+ "logps/chosen": -421.20001220703125,
320
+ "logps/rejected": -329.04998779296875,
321
+ "loss": 0.6037,
322
+ "rewards/accuracies": 0.6890624761581421,
323
+ "rewards/chosen": 0.5914382934570312,
324
+ "rewards/margins": 0.7566490173339844,
325
+ "rewards/rejected": -0.1647975891828537,
326
+ "step": 190
327
+ },
328
+ {
329
+ "epoch": 0.8733624454148472,
330
+ "grad_norm": 199.01450536603292,
331
+ "learning_rate": 2.9126213592233006e-08,
332
+ "logits/chosen": -2.6044921875,
333
+ "logits/rejected": -2.543750047683716,
334
+ "logps/chosen": -418.42498779296875,
335
+ "logps/rejected": -327.0625,
336
+ "loss": 0.6253,
337
+ "rewards/accuracies": 0.6695312261581421,
338
+ "rewards/chosen": 0.537921130657196,
339
+ "rewards/margins": 0.6540893316268921,
340
+ "rewards/rejected": -0.11599349975585938,
341
+ "step": 200
342
+ },
343
+ {
344
+ "epoch": 0.8733624454148472,
345
+ "eval_logits/chosen": -2.5845682621002197,
346
+ "eval_logits/rejected": -2.5405123233795166,
347
+ "eval_logps/chosen": -414.4948425292969,
348
+ "eval_logps/rejected": -331.1443176269531,
349
+ "eval_loss": 0.6223304867744446,
350
+ "eval_rewards/accuracies": 0.6558358073234558,
351
+ "eval_rewards/chosen": 0.527737021446228,
352
+ "eval_rewards/margins": 0.656870424747467,
353
+ "eval_rewards/rejected": -0.12893016636371613,
354
+ "eval_runtime": 67.7691,
355
+ "eval_samples_per_second": 22.768,
356
+ "eval_steps_per_second": 1.431,
357
+ "step": 200
358
+ },
359
+ {
360
+ "epoch": 0.9170305676855895,
361
+ "grad_norm": 182.3398057560757,
362
+ "learning_rate": 1.9417475728155338e-08,
363
+ "logits/chosen": -2.6318359375,
364
+ "logits/rejected": -2.5595703125,
365
+ "logps/chosen": -431.07501220703125,
366
+ "logps/rejected": -334.875,
367
+ "loss": 0.6041,
368
+ "rewards/accuracies": 0.6781250238418579,
369
+ "rewards/chosen": 0.549694836139679,
370
+ "rewards/margins": 0.7145935297012329,
371
+ "rewards/rejected": -0.16451263427734375,
372
+ "step": 210
373
+ },
374
+ {
375
+ "epoch": 0.9606986899563319,
376
+ "grad_norm": 200.59628515083702,
377
+ "learning_rate": 9.708737864077669e-09,
378
+ "logits/chosen": -2.5894532203674316,
379
+ "logits/rejected": -2.5513672828674316,
380
+ "logps/chosen": -409.6000061035156,
381
+ "logps/rejected": -325.57501220703125,
382
+ "loss": 0.6247,
383
+ "rewards/accuracies": 0.6625000238418579,
384
+ "rewards/chosen": 0.5058227777481079,
385
+ "rewards/margins": 0.6715041995048523,
386
+ "rewards/rejected": -0.16602401435375214,
387
+ "step": 220
388
+ },
389
+ {
390
+ "epoch": 1.0,
391
+ "step": 229,
392
+ "total_flos": 0.0,
393
+ "train_loss": 0.6294400285945709,
394
+ "train_runtime": 3850.4934,
395
+ "train_samples_per_second": 7.61,
396
+ "train_steps_per_second": 0.059
397
+ }
398
+ ],
399
+ "logging_steps": 10,
400
+ "max_steps": 229,
401
+ "num_input_tokens_seen": 0,
402
+ "num_train_epochs": 1,
403
+ "save_steps": 100,
404
+ "stateful_callbacks": {
405
+ "TrainerControl": {
406
+ "args": {
407
+ "should_epoch_stop": false,
408
+ "should_evaluate": false,
409
+ "should_log": false,
410
+ "should_save": true,
411
+ "should_training_stop": true
412
+ },
413
+ "attributes": {}
414
+ }
415
+ },
416
+ "total_flos": 0.0,
417
+ "train_batch_size": 2,
418
+ "trial_name": null,
419
+ "trial_params": null
420
+ }