JulesGo commited on
Commit
bedeabf
·
verified ·
1 Parent(s): babce7c

Fin de l'entraînement

Browse files
Files changed (5) hide show
  1. README.md +3 -3
  2. all_results.json +14 -0
  3. eval_results.json +9 -0
  4. train_results.json +8 -0
  5. trainer_state.json +609 -0
README.md CHANGED
@@ -14,9 +14,9 @@ should probably proofread and complete it, then remove this comment. -->
14
 
15
  This model is a fine-tuned version of [](https://huggingface.co/) on the None dataset.
16
  It achieves the following results on the evaluation set:
17
- - Loss: 0.0533
18
- - Mse: 0.1327
19
- - Mae: 0.3166
20
 
21
  ## Model description
22
 
 
14
 
15
  This model is a fine-tuned version of [](https://huggingface.co/) on the None dataset.
16
  It achieves the following results on the evaluation set:
17
+ - Loss: 0.0531
18
+ - Mse: 0.1291
19
+ - Mae: 0.3119
20
 
21
  ## Model description
22
 
all_results.json ADDED
@@ -0,0 +1,14 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "epoch": 29.985507246376812,
3
+ "eval_loss": 0.05310577526688576,
4
+ "eval_mae": 0.3118866980075836,
5
+ "eval_mse": 0.12907913327217102,
6
+ "eval_runtime": 58.3393,
7
+ "eval_samples_per_second": 7.096,
8
+ "eval_steps_per_second": 0.891,
9
+ "total_flos": 0.0,
10
+ "train_loss": 0.11046674571006126,
11
+ "train_runtime": 98364.9673,
12
+ "train_samples_per_second": 0.504,
13
+ "train_steps_per_second": 0.016
14
+ }
eval_results.json ADDED
@@ -0,0 +1,9 @@
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "epoch": 29.985507246376812,
3
+ "eval_loss": 0.05310577526688576,
4
+ "eval_mae": 0.3118866980075836,
5
+ "eval_mse": 0.12907913327217102,
6
+ "eval_runtime": 58.3393,
7
+ "eval_samples_per_second": 7.096,
8
+ "eval_steps_per_second": 0.891
9
+ }
train_results.json ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "epoch": 29.985507246376812,
3
+ "total_flos": 0.0,
4
+ "train_loss": 0.11046674571006126,
5
+ "train_runtime": 98364.9673,
6
+ "train_samples_per_second": 0.504,
7
+ "train_steps_per_second": 0.016
8
+ }
trainer_state.json ADDED
@@ -0,0 +1,609 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_global_step": 969,
3
+ "best_metric": 0.12907913327217102,
4
+ "best_model_checkpoint": "./vit_focus_full/checkpoint-969",
5
+ "epoch": 29.985507246376812,
6
+ "eval_steps": 500,
7
+ "global_step": 1530,
8
+ "is_hyper_param_search": false,
9
+ "is_local_process_zero": true,
10
+ "is_world_process_zero": true,
11
+ "log_history": [
12
+ {
13
+ "epoch": 0.7729468599033816,
14
+ "grad_norm": 10.6144437789917,
15
+ "learning_rate": 4.872549019607843e-05,
16
+ "loss": 0.3146,
17
+ "step": 40
18
+ },
19
+ {
20
+ "epoch": 0.9855072463768116,
21
+ "eval_loss": 0.05953465402126312,
22
+ "eval_mae": 0.3265259861946106,
23
+ "eval_mse": 0.1403445601463318,
24
+ "eval_runtime": 57.1346,
25
+ "eval_samples_per_second": 7.246,
26
+ "eval_steps_per_second": 0.91,
27
+ "step": 51
28
+ },
29
+ {
30
+ "epoch": 1.5603864734299517,
31
+ "grad_norm": 5.5437469482421875,
32
+ "learning_rate": 4.741830065359477e-05,
33
+ "loss": 0.2488,
34
+ "step": 80
35
+ },
36
+ {
37
+ "epoch": 1.9855072463768115,
38
+ "eval_loss": 0.05661279708147049,
39
+ "eval_mae": 0.3253043293952942,
40
+ "eval_mse": 0.13950611650943756,
41
+ "eval_runtime": 56.4199,
42
+ "eval_samples_per_second": 7.338,
43
+ "eval_steps_per_second": 0.922,
44
+ "step": 102
45
+ },
46
+ {
47
+ "epoch": 2.3478260869565215,
48
+ "grad_norm": 4.464972972869873,
49
+ "learning_rate": 4.6111111111111115e-05,
50
+ "loss": 0.2278,
51
+ "step": 120
52
+ },
53
+ {
54
+ "epoch": 2.9855072463768115,
55
+ "eval_loss": 0.06113344058394432,
56
+ "eval_mae": 0.3287981450557709,
57
+ "eval_mse": 0.14261041581630707,
58
+ "eval_runtime": 56.9728,
59
+ "eval_samples_per_second": 7.267,
60
+ "eval_steps_per_second": 0.913,
61
+ "step": 153
62
+ },
63
+ {
64
+ "epoch": 3.135265700483092,
65
+ "grad_norm": 8.567197799682617,
66
+ "learning_rate": 4.480392156862745e-05,
67
+ "loss": 0.2148,
68
+ "step": 160
69
+ },
70
+ {
71
+ "epoch": 3.9082125603864735,
72
+ "grad_norm": 5.523195266723633,
73
+ "learning_rate": 4.3496732026143795e-05,
74
+ "loss": 0.206,
75
+ "step": 200
76
+ },
77
+ {
78
+ "epoch": 3.9855072463768115,
79
+ "eval_loss": 0.05355362221598625,
80
+ "eval_mae": 0.31797775626182556,
81
+ "eval_mse": 0.13227654993534088,
82
+ "eval_runtime": 56.8583,
83
+ "eval_samples_per_second": 7.281,
84
+ "eval_steps_per_second": 0.915,
85
+ "step": 204
86
+ },
87
+ {
88
+ "epoch": 4.695652173913043,
89
+ "grad_norm": 6.00140380859375,
90
+ "learning_rate": 4.218954248366013e-05,
91
+ "loss": 0.1902,
92
+ "step": 240
93
+ },
94
+ {
95
+ "epoch": 4.9855072463768115,
96
+ "eval_loss": 0.06186218187212944,
97
+ "eval_mae": 0.3270839750766754,
98
+ "eval_mse": 0.1410592794418335,
99
+ "eval_runtime": 56.1466,
100
+ "eval_samples_per_second": 7.374,
101
+ "eval_steps_per_second": 0.926,
102
+ "step": 255
103
+ },
104
+ {
105
+ "epoch": 5.483091787439614,
106
+ "grad_norm": 9.328702926635742,
107
+ "learning_rate": 4.0882352941176474e-05,
108
+ "loss": 0.187,
109
+ "step": 280
110
+ },
111
+ {
112
+ "epoch": 5.9855072463768115,
113
+ "eval_loss": 0.05080530419945717,
114
+ "eval_mae": 0.3168753385543823,
115
+ "eval_mse": 0.1319676637649536,
116
+ "eval_runtime": 57.4612,
117
+ "eval_samples_per_second": 7.205,
118
+ "eval_steps_per_second": 0.905,
119
+ "step": 306
120
+ },
121
+ {
122
+ "epoch": 6.270531400966184,
123
+ "grad_norm": 7.799366474151611,
124
+ "learning_rate": 3.957516339869281e-05,
125
+ "loss": 0.1757,
126
+ "step": 320
127
+ },
128
+ {
129
+ "epoch": 6.9855072463768115,
130
+ "eval_loss": 0.05371123179793358,
131
+ "eval_mae": 0.31825557351112366,
132
+ "eval_mse": 0.13387194275856018,
133
+ "eval_runtime": 57.3782,
134
+ "eval_samples_per_second": 7.215,
135
+ "eval_steps_per_second": 0.906,
136
+ "step": 357
137
+ },
138
+ {
139
+ "epoch": 7.057971014492754,
140
+ "grad_norm": 4.06664514541626,
141
+ "learning_rate": 3.8267973856209146e-05,
142
+ "loss": 0.1677,
143
+ "step": 360
144
+ },
145
+ {
146
+ "epoch": 7.830917874396135,
147
+ "grad_norm": 5.403101921081543,
148
+ "learning_rate": 3.6960784313725496e-05,
149
+ "loss": 0.1523,
150
+ "step": 400
151
+ },
152
+ {
153
+ "epoch": 7.9855072463768115,
154
+ "eval_loss": 0.055755238980054855,
155
+ "eval_mae": 0.31683334708213806,
156
+ "eval_mse": 0.13297995924949646,
157
+ "eval_runtime": 65.7904,
158
+ "eval_samples_per_second": 6.293,
159
+ "eval_steps_per_second": 0.79,
160
+ "step": 408
161
+ },
162
+ {
163
+ "epoch": 8.618357487922705,
164
+ "grad_norm": 6.7577948570251465,
165
+ "learning_rate": 3.565359477124183e-05,
166
+ "loss": 0.1528,
167
+ "step": 440
168
+ },
169
+ {
170
+ "epoch": 8.985507246376812,
171
+ "eval_loss": 0.05914789438247681,
172
+ "eval_mae": 0.3224806785583496,
173
+ "eval_mse": 0.1381232738494873,
174
+ "eval_runtime": 56.9831,
175
+ "eval_samples_per_second": 7.265,
176
+ "eval_steps_per_second": 0.913,
177
+ "step": 459
178
+ },
179
+ {
180
+ "epoch": 9.405797101449275,
181
+ "grad_norm": 4.654517650604248,
182
+ "learning_rate": 3.434640522875817e-05,
183
+ "loss": 0.1416,
184
+ "step": 480
185
+ },
186
+ {
187
+ "epoch": 9.985507246376812,
188
+ "eval_loss": 0.05355934053659439,
189
+ "eval_mae": 0.3197546601295471,
190
+ "eval_mse": 0.1352616846561432,
191
+ "eval_runtime": 57.4136,
192
+ "eval_samples_per_second": 7.211,
193
+ "eval_steps_per_second": 0.906,
194
+ "step": 510
195
+ },
196
+ {
197
+ "epoch": 10.193236714975846,
198
+ "grad_norm": 4.063232421875,
199
+ "learning_rate": 3.303921568627451e-05,
200
+ "loss": 0.1391,
201
+ "step": 520
202
+ },
203
+ {
204
+ "epoch": 10.966183574879228,
205
+ "grad_norm": 4.905858993530273,
206
+ "learning_rate": 3.173202614379085e-05,
207
+ "loss": 0.1298,
208
+ "step": 560
209
+ },
210
+ {
211
+ "epoch": 10.985507246376812,
212
+ "eval_loss": 0.05300646275281906,
213
+ "eval_mae": 0.3164079189300537,
214
+ "eval_mse": 0.132521390914917,
215
+ "eval_runtime": 58.995,
216
+ "eval_samples_per_second": 7.018,
217
+ "eval_steps_per_second": 0.881,
218
+ "step": 561
219
+ },
220
+ {
221
+ "epoch": 11.753623188405797,
222
+ "grad_norm": 4.643632411956787,
223
+ "learning_rate": 3.0424836601307187e-05,
224
+ "loss": 0.1161,
225
+ "step": 600
226
+ },
227
+ {
228
+ "epoch": 11.985507246376812,
229
+ "eval_loss": 0.0511205680668354,
230
+ "eval_mae": 0.315570205450058,
231
+ "eval_mse": 0.13146661221981049,
232
+ "eval_runtime": 57.0447,
233
+ "eval_samples_per_second": 7.257,
234
+ "eval_steps_per_second": 0.912,
235
+ "step": 612
236
+ },
237
+ {
238
+ "epoch": 12.541062801932368,
239
+ "grad_norm": 3.0849831104278564,
240
+ "learning_rate": 2.9117647058823534e-05,
241
+ "loss": 0.1085,
242
+ "step": 640
243
+ },
244
+ {
245
+ "epoch": 12.985507246376812,
246
+ "eval_loss": 0.05314180254936218,
247
+ "eval_mae": 0.32430657744407654,
248
+ "eval_mse": 0.13849547505378723,
249
+ "eval_runtime": 631.8234,
250
+ "eval_samples_per_second": 0.655,
251
+ "eval_steps_per_second": 0.082,
252
+ "step": 663
253
+ },
254
+ {
255
+ "epoch": 13.328502415458937,
256
+ "grad_norm": 5.586836338043213,
257
+ "learning_rate": 2.7810457516339873e-05,
258
+ "loss": 0.1028,
259
+ "step": 680
260
+ },
261
+ {
262
+ "epoch": 13.985507246376812,
263
+ "eval_loss": 0.05296429246664047,
264
+ "eval_mae": 0.31508708000183105,
265
+ "eval_mse": 0.1316087543964386,
266
+ "eval_runtime": 57.8458,
267
+ "eval_samples_per_second": 7.157,
268
+ "eval_steps_per_second": 0.899,
269
+ "step": 714
270
+ },
271
+ {
272
+ "epoch": 14.115942028985508,
273
+ "grad_norm": 3.5024545192718506,
274
+ "learning_rate": 2.650326797385621e-05,
275
+ "loss": 0.0974,
276
+ "step": 720
277
+ },
278
+ {
279
+ "epoch": 14.88888888888889,
280
+ "grad_norm": 3.7782580852508545,
281
+ "learning_rate": 2.519607843137255e-05,
282
+ "loss": 0.0891,
283
+ "step": 760
284
+ },
285
+ {
286
+ "epoch": 14.985507246376812,
287
+ "eval_loss": 0.0540492981672287,
288
+ "eval_mae": 0.31779569387435913,
289
+ "eval_mse": 0.1337898075580597,
290
+ "eval_runtime": 57.6717,
291
+ "eval_samples_per_second": 7.179,
292
+ "eval_steps_per_second": 0.902,
293
+ "step": 765
294
+ },
295
+ {
296
+ "epoch": 15.676328502415458,
297
+ "grad_norm": 3.615967035293579,
298
+ "learning_rate": 2.3888888888888892e-05,
299
+ "loss": 0.0878,
300
+ "step": 800
301
+ },
302
+ {
303
+ "epoch": 15.985507246376812,
304
+ "eval_loss": 0.05357988178730011,
305
+ "eval_mae": 0.3177140951156616,
306
+ "eval_mse": 0.13350851833820343,
307
+ "eval_runtime": 57.5097,
308
+ "eval_samples_per_second": 7.199,
309
+ "eval_steps_per_second": 0.904,
310
+ "step": 816
311
+ },
312
+ {
313
+ "epoch": 16.463768115942027,
314
+ "grad_norm": 9.533724784851074,
315
+ "learning_rate": 2.258169934640523e-05,
316
+ "loss": 0.077,
317
+ "step": 840
318
+ },
319
+ {
320
+ "epoch": 16.985507246376812,
321
+ "eval_loss": 0.05338989570736885,
322
+ "eval_mae": 0.31321439146995544,
323
+ "eval_mse": 0.12988565862178802,
324
+ "eval_runtime": 58.1505,
325
+ "eval_samples_per_second": 7.119,
326
+ "eval_steps_per_second": 0.894,
327
+ "step": 867
328
+ },
329
+ {
330
+ "epoch": 17.2512077294686,
331
+ "grad_norm": 3.7093381881713867,
332
+ "learning_rate": 2.1274509803921568e-05,
333
+ "loss": 0.0769,
334
+ "step": 880
335
+ },
336
+ {
337
+ "epoch": 17.985507246376812,
338
+ "eval_loss": 0.0548611618578434,
339
+ "eval_mae": 0.3149418532848358,
340
+ "eval_mse": 0.1313086301088333,
341
+ "eval_runtime": 56.4832,
342
+ "eval_samples_per_second": 7.33,
343
+ "eval_steps_per_second": 0.921,
344
+ "step": 918
345
+ },
346
+ {
347
+ "epoch": 18.03864734299517,
348
+ "grad_norm": 2.9852871894836426,
349
+ "learning_rate": 1.996732026143791e-05,
350
+ "loss": 0.0717,
351
+ "step": 920
352
+ },
353
+ {
354
+ "epoch": 18.81159420289855,
355
+ "grad_norm": 3.3752264976501465,
356
+ "learning_rate": 1.866013071895425e-05,
357
+ "loss": 0.0663,
358
+ "step": 960
359
+ },
360
+ {
361
+ "epoch": 18.985507246376812,
362
+ "eval_loss": 0.05310577526688576,
363
+ "eval_mae": 0.3118866980075836,
364
+ "eval_mse": 0.12907913327217102,
365
+ "eval_runtime": 58.2255,
366
+ "eval_samples_per_second": 7.11,
367
+ "eval_steps_per_second": 0.893,
368
+ "step": 969
369
+ },
370
+ {
371
+ "epoch": 19.59903381642512,
372
+ "grad_norm": 2.9139506816864014,
373
+ "learning_rate": 1.735294117647059e-05,
374
+ "loss": 0.064,
375
+ "step": 1000
376
+ },
377
+ {
378
+ "epoch": 19.985507246376812,
379
+ "eval_loss": 0.05400167778134346,
380
+ "eval_mae": 0.31967055797576904,
381
+ "eval_mse": 0.13520964980125427,
382
+ "eval_runtime": 58.0572,
383
+ "eval_samples_per_second": 7.131,
384
+ "eval_steps_per_second": 0.896,
385
+ "step": 1020
386
+ },
387
+ {
388
+ "epoch": 20.386473429951693,
389
+ "grad_norm": 3.1011509895324707,
390
+ "learning_rate": 1.604575163398693e-05,
391
+ "loss": 0.0608,
392
+ "step": 1040
393
+ },
394
+ {
395
+ "epoch": 20.985507246376812,
396
+ "eval_loss": 0.05348004400730133,
397
+ "eval_mae": 0.3179128170013428,
398
+ "eval_mse": 0.13336069881916046,
399
+ "eval_runtime": 56.8284,
400
+ "eval_samples_per_second": 7.285,
401
+ "eval_steps_per_second": 0.915,
402
+ "step": 1071
403
+ },
404
+ {
405
+ "epoch": 21.17391304347826,
406
+ "grad_norm": 2.4269816875457764,
407
+ "learning_rate": 1.473856209150327e-05,
408
+ "loss": 0.0558,
409
+ "step": 1080
410
+ },
411
+ {
412
+ "epoch": 21.946859903381643,
413
+ "grad_norm": 2.612093925476074,
414
+ "learning_rate": 1.3431372549019607e-05,
415
+ "loss": 0.0548,
416
+ "step": 1120
417
+ },
418
+ {
419
+ "epoch": 21.985507246376812,
420
+ "eval_loss": 0.052902594208717346,
421
+ "eval_mae": 0.3134055733680725,
422
+ "eval_mse": 0.129911869764328,
423
+ "eval_runtime": 57.5407,
424
+ "eval_samples_per_second": 7.195,
425
+ "eval_steps_per_second": 0.904,
426
+ "step": 1122
427
+ },
428
+ {
429
+ "epoch": 22.734299516908212,
430
+ "grad_norm": 1.7072349786758423,
431
+ "learning_rate": 1.2124183006535949e-05,
432
+ "loss": 0.0517,
433
+ "step": 1160
434
+ },
435
+ {
436
+ "epoch": 22.985507246376812,
437
+ "eval_loss": 0.05338846519589424,
438
+ "eval_mae": 0.31519371271133423,
439
+ "eval_mse": 0.13099054992198944,
440
+ "eval_runtime": 2988.6114,
441
+ "eval_samples_per_second": 0.139,
442
+ "eval_steps_per_second": 0.017,
443
+ "step": 1173
444
+ },
445
+ {
446
+ "epoch": 23.52173913043478,
447
+ "grad_norm": 2.942000389099121,
448
+ "learning_rate": 1.0816993464052288e-05,
449
+ "loss": 0.0498,
450
+ "step": 1200
451
+ },
452
+ {
453
+ "epoch": 23.985507246376812,
454
+ "eval_loss": 0.05435283109545708,
455
+ "eval_mae": 0.31506991386413574,
456
+ "eval_mse": 0.13137240707874298,
457
+ "eval_runtime": 158.629,
458
+ "eval_samples_per_second": 2.61,
459
+ "eval_steps_per_second": 0.328,
460
+ "step": 1224
461
+ },
462
+ {
463
+ "epoch": 24.309178743961354,
464
+ "grad_norm": 1.7872236967086792,
465
+ "learning_rate": 9.509803921568628e-06,
466
+ "loss": 0.047,
467
+ "step": 1240
468
+ },
469
+ {
470
+ "epoch": 24.985507246376812,
471
+ "eval_loss": 0.05310087278485298,
472
+ "eval_mae": 0.3145076036453247,
473
+ "eval_mse": 0.13092052936553955,
474
+ "eval_runtime": 59.2601,
475
+ "eval_samples_per_second": 6.986,
476
+ "eval_steps_per_second": 0.877,
477
+ "step": 1275
478
+ },
479
+ {
480
+ "epoch": 25.096618357487923,
481
+ "grad_norm": 1.8146392107009888,
482
+ "learning_rate": 8.202614379084967e-06,
483
+ "loss": 0.0467,
484
+ "step": 1280
485
+ },
486
+ {
487
+ "epoch": 25.869565217391305,
488
+ "grad_norm": 1.8770432472229004,
489
+ "learning_rate": 6.895424836601308e-06,
490
+ "loss": 0.0443,
491
+ "step": 1320
492
+ },
493
+ {
494
+ "epoch": 25.985507246376812,
495
+ "eval_loss": 0.053730811923742294,
496
+ "eval_mae": 0.31641700863838196,
497
+ "eval_mse": 0.1325235366821289,
498
+ "eval_runtime": 8331.3737,
499
+ "eval_samples_per_second": 0.05,
500
+ "eval_steps_per_second": 0.006,
501
+ "step": 1326
502
+ },
503
+ {
504
+ "epoch": 26.657004830917874,
505
+ "grad_norm": 2.1211466789245605,
506
+ "learning_rate": 5.588235294117647e-06,
507
+ "loss": 0.042,
508
+ "step": 1360
509
+ },
510
+ {
511
+ "epoch": 26.985507246376812,
512
+ "eval_loss": 0.05325399339199066,
513
+ "eval_mae": 0.31560125946998596,
514
+ "eval_mse": 0.13193772733211517,
515
+ "eval_runtime": 3946.2723,
516
+ "eval_samples_per_second": 0.105,
517
+ "eval_steps_per_second": 0.013,
518
+ "step": 1377
519
+ },
520
+ {
521
+ "epoch": 27.444444444444443,
522
+ "grad_norm": 1.9497586488723755,
523
+ "learning_rate": 4.281045751633987e-06,
524
+ "loss": 0.0397,
525
+ "step": 1400
526
+ },
527
+ {
528
+ "epoch": 27.985507246376812,
529
+ "eval_loss": 0.052952226251363754,
530
+ "eval_mae": 0.3155405521392822,
531
+ "eval_mse": 0.13170257210731506,
532
+ "eval_runtime": 58.8468,
533
+ "eval_samples_per_second": 7.035,
534
+ "eval_steps_per_second": 0.884,
535
+ "step": 1428
536
+ },
537
+ {
538
+ "epoch": 28.231884057971016,
539
+ "grad_norm": 5.6321330070495605,
540
+ "learning_rate": 2.9738562091503266e-06,
541
+ "loss": 0.0411,
542
+ "step": 1440
543
+ },
544
+ {
545
+ "epoch": 28.985507246376812,
546
+ "eval_loss": 0.05421222001314163,
547
+ "eval_mae": 0.31665799021720886,
548
+ "eval_mse": 0.13281531631946564,
549
+ "eval_runtime": 60.076,
550
+ "eval_samples_per_second": 6.891,
551
+ "eval_steps_per_second": 0.866,
552
+ "step": 1479
553
+ },
554
+ {
555
+ "epoch": 29.019323671497585,
556
+ "grad_norm": 1.5062155723571777,
557
+ "learning_rate": 1.6666666666666667e-06,
558
+ "loss": 0.0385,
559
+ "step": 1480
560
+ },
561
+ {
562
+ "epoch": 29.792270531400966,
563
+ "grad_norm": 3.7781600952148438,
564
+ "learning_rate": 3.5947712418300653e-07,
565
+ "loss": 0.0382,
566
+ "step": 1520
567
+ },
568
+ {
569
+ "epoch": 29.985507246376812,
570
+ "eval_loss": 0.05334796383976936,
571
+ "eval_mae": 0.31658393144607544,
572
+ "eval_mse": 0.13268809020519257,
573
+ "eval_runtime": 61.4065,
574
+ "eval_samples_per_second": 6.742,
575
+ "eval_steps_per_second": 0.847,
576
+ "step": 1530
577
+ },
578
+ {
579
+ "epoch": 29.985507246376812,
580
+ "step": 1530,
581
+ "total_flos": 0.0,
582
+ "train_loss": 0.11046674571006126,
583
+ "train_runtime": 98364.9673,
584
+ "train_samples_per_second": 0.504,
585
+ "train_steps_per_second": 0.016
586
+ }
587
+ ],
588
+ "logging_steps": 40,
589
+ "max_steps": 1530,
590
+ "num_input_tokens_seen": 0,
591
+ "num_train_epochs": 30,
592
+ "save_steps": 500,
593
+ "stateful_callbacks": {
594
+ "TrainerControl": {
595
+ "args": {
596
+ "should_epoch_stop": false,
597
+ "should_evaluate": false,
598
+ "should_log": false,
599
+ "should_save": true,
600
+ "should_training_stop": true
601
+ },
602
+ "attributes": {}
603
+ }
604
+ },
605
+ "total_flos": 0.0,
606
+ "train_batch_size": 8,
607
+ "trial_name": null,
608
+ "trial_params": null
609
+ }