Thibaut commited on
Commit
db248f3
·
verified ·
1 Parent(s): f63f5eb

End of training

Browse files
README.md CHANGED
@@ -3,6 +3,8 @@ library_name: transformers
3
  license: apache-2.0
4
  base_model: facebook/convnext-base-224-22k
5
  tags:
 
 
6
  - generated_from_trainer
7
  metrics:
8
  - accuracy
@@ -16,9 +18,9 @@ should probably proofread and complete it, then remove this comment. -->
16
 
17
  # Validated_Balanced_Raw_Data_model_boost9
18
 
19
- This model is a fine-tuned version of [facebook/convnext-base-224-22k](https://huggingface.co/facebook/convnext-base-224-22k) on an unknown dataset.
20
  It achieves the following results on the evaluation set:
21
- - Loss: 1.2585
22
  - Accuracy: 0.4151
23
 
24
  ## Model description
 
3
  license: apache-2.0
4
  base_model: facebook/convnext-base-224-22k
5
  tags:
6
+ - image-classification
7
+ - vision
8
  - generated_from_trainer
9
  metrics:
10
  - accuracy
 
18
 
19
  # Validated_Balanced_Raw_Data_model_boost9
20
 
21
+ This model is a fine-tuned version of [facebook/convnext-base-224-22k](https://huggingface.co/facebook/convnext-base-224-22k) on the Logiroad/Validated_Balanced_Raw_Dataset dataset.
22
  It achieves the following results on the evaluation set:
23
+ - Loss: 1.2586
24
  - Accuracy: 0.4151
25
 
26
  ## Model description
all_results.json ADDED
@@ -0,0 +1,13 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "epoch": 25.0,
3
+ "eval_accuracy": 0.41509433962264153,
4
+ "eval_loss": 1.2586045265197754,
5
+ "eval_runtime": 1.5488,
6
+ "eval_samples_per_second": 136.881,
7
+ "eval_steps_per_second": 17.433,
8
+ "total_flos": 3.739939937176781e+18,
9
+ "train_loss": 1.1909779739379882,
10
+ "train_runtime": 509.0926,
11
+ "train_samples_per_second": 31.232,
12
+ "train_steps_per_second": 3.929
13
+ }
eval_results.json ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "epoch": 25.0,
3
+ "eval_accuracy": 0.41509433962264153,
4
+ "eval_loss": 1.2586045265197754,
5
+ "eval_runtime": 1.5488,
6
+ "eval_samples_per_second": 136.881,
7
+ "eval_steps_per_second": 17.433
8
+ }
runs/Dec01_15-49-04_algo-1/events.out.tfevents.1764604683.algo-1.68.1 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8191ce396bea95d71f8681969cb35aac6396de427e8b2cc40df6ca82e46ed325
3
+ size 411
train_results.json ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "epoch": 25.0,
3
+ "total_flos": 3.739939937176781e+18,
4
+ "train_loss": 1.1909779739379882,
5
+ "train_runtime": 509.0926,
6
+ "train_samples_per_second": 31.232,
7
+ "train_steps_per_second": 3.929
8
+ }
trainer_state.json ADDED
@@ -0,0 +1,547 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_metric": 0.41509433962264153,
3
+ "best_model_checkpoint": "./Validated_Balanced_Raw_Data_model_boost9_outputs/checkpoint-1600",
4
+ "epoch": 25.0,
5
+ "eval_steps": 500,
6
+ "global_step": 2000,
7
+ "is_hyper_param_search": false,
8
+ "is_local_process_zero": true,
9
+ "is_world_process_zero": true,
10
+ "log_history": [
11
+ {
12
+ "epoch": 0.625,
13
+ "grad_norm": 1.5967129468917847,
14
+ "learning_rate": 1.5e-05,
15
+ "loss": 1.3942,
16
+ "step": 50
17
+ },
18
+ {
19
+ "epoch": 1.0,
20
+ "eval_accuracy": 0.33490566037735847,
21
+ "eval_loss": 1.3565607070922852,
22
+ "eval_runtime": 1.5583,
23
+ "eval_samples_per_second": 136.048,
24
+ "eval_steps_per_second": 17.327,
25
+ "step": 80
26
+ },
27
+ {
28
+ "epoch": 1.25,
29
+ "grad_norm": 1.937333583831787,
30
+ "learning_rate": 3e-05,
31
+ "loss": 1.3639,
32
+ "step": 100
33
+ },
34
+ {
35
+ "epoch": 1.875,
36
+ "grad_norm": 1.9313533306121826,
37
+ "learning_rate": 2.994876739510005e-05,
38
+ "loss": 1.3192,
39
+ "step": 150
40
+ },
41
+ {
42
+ "epoch": 2.0,
43
+ "eval_accuracy": 0.3584905660377358,
44
+ "eval_loss": 1.3104463815689087,
45
+ "eval_runtime": 1.5299,
46
+ "eval_samples_per_second": 138.573,
47
+ "eval_steps_per_second": 17.648,
48
+ "step": 160
49
+ },
50
+ {
51
+ "epoch": 2.5,
52
+ "grad_norm": 1.8240773677825928,
53
+ "learning_rate": 2.9795419551040836e-05,
54
+ "loss": 1.2795,
55
+ "step": 200
56
+ },
57
+ {
58
+ "epoch": 3.0,
59
+ "eval_accuracy": 0.37264150943396224,
60
+ "eval_loss": 1.2999355792999268,
61
+ "eval_runtime": 1.5285,
62
+ "eval_samples_per_second": 138.698,
63
+ "eval_steps_per_second": 17.664,
64
+ "step": 240
65
+ },
66
+ {
67
+ "epoch": 3.125,
68
+ "grad_norm": 1.688889980316162,
69
+ "learning_rate": 2.9541003989089956e-05,
70
+ "loss": 1.2794,
71
+ "step": 250
72
+ },
73
+ {
74
+ "epoch": 3.75,
75
+ "grad_norm": 2.2275140285491943,
76
+ "learning_rate": 2.9187258625509518e-05,
77
+ "loss": 1.2419,
78
+ "step": 300
79
+ },
80
+ {
81
+ "epoch": 4.0,
82
+ "eval_accuracy": 0.37264150943396224,
83
+ "eval_loss": 1.2860321998596191,
84
+ "eval_runtime": 1.5373,
85
+ "eval_samples_per_second": 137.908,
86
+ "eval_steps_per_second": 17.564,
87
+ "step": 320
88
+ },
89
+ {
90
+ "epoch": 4.375,
91
+ "grad_norm": 1.8031086921691895,
92
+ "learning_rate": 2.873659989982586e-05,
93
+ "loss": 1.2749,
94
+ "step": 350
95
+ },
96
+ {
97
+ "epoch": 5.0,
98
+ "grad_norm": 2.7317395210266113,
99
+ "learning_rate": 2.8192106268097336e-05,
100
+ "loss": 1.2213,
101
+ "step": 400
102
+ },
103
+ {
104
+ "epoch": 5.0,
105
+ "eval_accuracy": 0.36792452830188677,
106
+ "eval_loss": 1.2893822193145752,
107
+ "eval_runtime": 1.538,
108
+ "eval_samples_per_second": 137.84,
109
+ "eval_steps_per_second": 17.555,
110
+ "step": 400
111
+ },
112
+ {
113
+ "epoch": 5.625,
114
+ "grad_norm": 1.6971545219421387,
115
+ "learning_rate": 2.7557497173937928e-05,
116
+ "loss": 1.2287,
117
+ "step": 450
118
+ },
119
+ {
120
+ "epoch": 6.0,
121
+ "eval_accuracy": 0.3632075471698113,
122
+ "eval_loss": 1.2862772941589355,
123
+ "eval_runtime": 1.5343,
124
+ "eval_samples_per_second": 138.176,
125
+ "eval_steps_per_second": 17.598,
126
+ "step": 480
127
+ },
128
+ {
129
+ "epoch": 6.25,
130
+ "grad_norm": 1.7909101247787476,
131
+ "learning_rate": 2.6837107640945904e-05,
132
+ "loss": 1.2138,
133
+ "step": 500
134
+ },
135
+ {
136
+ "epoch": 6.875,
137
+ "grad_norm": 2.154249668121338,
138
+ "learning_rate": 2.6035858660096975e-05,
139
+ "loss": 1.2123,
140
+ "step": 550
141
+ },
142
+ {
143
+ "epoch": 7.0,
144
+ "eval_accuracy": 0.3915094339622642,
145
+ "eval_loss": 1.287874460220337,
146
+ "eval_runtime": 1.5329,
147
+ "eval_samples_per_second": 138.296,
148
+ "eval_steps_per_second": 17.613,
149
+ "step": 560
150
+ },
151
+ {
152
+ "epoch": 7.5,
153
+ "grad_norm": 1.533173680305481,
154
+ "learning_rate": 2.5159223574386117e-05,
155
+ "loss": 1.2124,
156
+ "step": 600
157
+ },
158
+ {
159
+ "epoch": 8.0,
160
+ "eval_accuracy": 0.3867924528301887,
161
+ "eval_loss": 1.2767480611801147,
162
+ "eval_runtime": 1.527,
163
+ "eval_samples_per_second": 138.831,
164
+ "eval_steps_per_second": 17.681,
165
+ "step": 640
166
+ },
167
+ {
168
+ "epoch": 8.125,
169
+ "grad_norm": 1.6685694456100464,
170
+ "learning_rate": 2.4213190690345018e-05,
171
+ "loss": 1.2018,
172
+ "step": 650
173
+ },
174
+ {
175
+ "epoch": 8.75,
176
+ "grad_norm": 2.190777540206909,
177
+ "learning_rate": 2.320422237183641e-05,
178
+ "loss": 1.2144,
179
+ "step": 700
180
+ },
181
+ {
182
+ "epoch": 9.0,
183
+ "eval_accuracy": 0.37264150943396224,
184
+ "eval_loss": 1.2851072549819946,
185
+ "eval_runtime": 1.5312,
186
+ "eval_samples_per_second": 138.456,
187
+ "eval_steps_per_second": 17.634,
188
+ "step": 720
189
+ },
190
+ {
191
+ "epoch": 9.375,
192
+ "grad_norm": 2.0433597564697266,
193
+ "learning_rate": 2.2139210895556104e-05,
194
+ "loss": 1.1531,
195
+ "step": 750
196
+ },
197
+ {
198
+ "epoch": 10.0,
199
+ "grad_norm": 2.4729323387145996,
200
+ "learning_rate": 2.1025431369794546e-05,
201
+ "loss": 1.2202,
202
+ "step": 800
203
+ },
204
+ {
205
+ "epoch": 10.0,
206
+ "eval_accuracy": 0.39622641509433965,
207
+ "eval_loss": 1.2682827711105347,
208
+ "eval_runtime": 1.5206,
209
+ "eval_samples_per_second": 139.414,
210
+ "eval_steps_per_second": 17.756,
211
+ "step": 800
212
+ },
213
+ {
214
+ "epoch": 10.625,
215
+ "grad_norm": 1.8216651678085327,
216
+ "learning_rate": 1.9870492038070255e-05,
217
+ "loss": 1.1804,
218
+ "step": 850
219
+ },
220
+ {
221
+ "epoch": 11.0,
222
+ "eval_accuracy": 0.4009433962264151,
223
+ "eval_loss": 1.2658637762069702,
224
+ "eval_runtime": 1.5239,
225
+ "eval_samples_per_second": 139.115,
226
+ "eval_steps_per_second": 17.718,
227
+ "step": 880
228
+ },
229
+ {
230
+ "epoch": 11.25,
231
+ "grad_norm": 2.511573553085327,
232
+ "learning_rate": 1.8682282307111988e-05,
233
+ "loss": 1.1541,
234
+ "step": 900
235
+ },
236
+ {
237
+ "epoch": 11.875,
238
+ "grad_norm": 2.372868299484253,
239
+ "learning_rate": 1.746891885421101e-05,
240
+ "loss": 1.2031,
241
+ "step": 950
242
+ },
243
+ {
244
+ "epoch": 12.0,
245
+ "eval_accuracy": 0.39622641509433965,
246
+ "eval_loss": 1.265770435333252,
247
+ "eval_runtime": 1.5336,
248
+ "eval_samples_per_second": 138.236,
249
+ "eval_steps_per_second": 17.606,
250
+ "step": 960
251
+ },
252
+ {
253
+ "epoch": 12.5,
254
+ "grad_norm": 1.9403022527694702,
255
+ "learning_rate": 1.623869018208499e-05,
256
+ "loss": 1.1428,
257
+ "step": 1000
258
+ },
259
+ {
260
+ "epoch": 13.0,
261
+ "eval_accuracy": 0.4056603773584906,
262
+ "eval_loss": 1.262069582939148,
263
+ "eval_runtime": 1.5275,
264
+ "eval_samples_per_second": 138.792,
265
+ "eval_steps_per_second": 17.676,
266
+ "step": 1040
267
+ },
268
+ {
269
+ "epoch": 13.125,
270
+ "grad_norm": 2.0001461505889893,
271
+ "learning_rate": 1.5e-05,
272
+ "loss": 1.1744,
273
+ "step": 1050
274
+ },
275
+ {
276
+ "epoch": 13.75,
277
+ "grad_norm": 1.5777283906936646,
278
+ "learning_rate": 1.3761309817915017e-05,
279
+ "loss": 1.1224,
280
+ "step": 1100
281
+ },
282
+ {
283
+ "epoch": 14.0,
284
+ "eval_accuracy": 0.41037735849056606,
285
+ "eval_loss": 1.2655014991760254,
286
+ "eval_runtime": 1.5235,
287
+ "eval_samples_per_second": 139.151,
288
+ "eval_steps_per_second": 17.722,
289
+ "step": 1120
290
+ },
291
+ {
292
+ "epoch": 14.375,
293
+ "grad_norm": 1.9835065603256226,
294
+ "learning_rate": 1.2531081145788989e-05,
295
+ "loss": 1.1765,
296
+ "step": 1150
297
+ },
298
+ {
299
+ "epoch": 15.0,
300
+ "grad_norm": 3.021399974822998,
301
+ "learning_rate": 1.1317717692888014e-05,
302
+ "loss": 1.1486,
303
+ "step": 1200
304
+ },
305
+ {
306
+ "epoch": 15.0,
307
+ "eval_accuracy": 0.39622641509433965,
308
+ "eval_loss": 1.2606432437896729,
309
+ "eval_runtime": 1.5215,
310
+ "eval_samples_per_second": 139.34,
311
+ "eval_steps_per_second": 17.746,
312
+ "step": 1200
313
+ },
314
+ {
315
+ "epoch": 15.625,
316
+ "grad_norm": 1.9539563655853271,
317
+ "learning_rate": 1.0129507961929749e-05,
318
+ "loss": 1.1451,
319
+ "step": 1250
320
+ },
321
+ {
322
+ "epoch": 16.0,
323
+ "eval_accuracy": 0.4056603773584906,
324
+ "eval_loss": 1.2635830640792847,
325
+ "eval_runtime": 1.5268,
326
+ "eval_samples_per_second": 138.851,
327
+ "eval_steps_per_second": 17.684,
328
+ "step": 1280
329
+ },
330
+ {
331
+ "epoch": 16.25,
332
+ "grad_norm": 2.3783926963806152,
333
+ "learning_rate": 8.974568630205462e-06,
334
+ "loss": 1.1363,
335
+ "step": 1300
336
+ },
337
+ {
338
+ "epoch": 16.875,
339
+ "grad_norm": 2.221468448638916,
340
+ "learning_rate": 7.860789104443897e-06,
341
+ "loss": 1.1717,
342
+ "step": 1350
343
+ },
344
+ {
345
+ "epoch": 17.0,
346
+ "eval_accuracy": 0.4056603773584906,
347
+ "eval_loss": 1.2595568895339966,
348
+ "eval_runtime": 1.5272,
349
+ "eval_samples_per_second": 138.816,
350
+ "eval_steps_per_second": 17.679,
351
+ "step": 1360
352
+ },
353
+ {
354
+ "epoch": 17.5,
355
+ "grad_norm": 1.5900912284851074,
356
+ "learning_rate": 6.795777628163599e-06,
357
+ "loss": 1.1231,
358
+ "step": 1400
359
+ },
360
+ {
361
+ "epoch": 18.0,
362
+ "eval_accuracy": 0.4056603773584906,
363
+ "eval_loss": 1.26264488697052,
364
+ "eval_runtime": 1.5254,
365
+ "eval_samples_per_second": 138.976,
366
+ "eval_steps_per_second": 17.7,
367
+ "step": 1440
368
+ },
369
+ {
370
+ "epoch": 18.125,
371
+ "grad_norm": 1.9134879112243652,
372
+ "learning_rate": 5.786809309654983e-06,
373
+ "loss": 1.1455,
374
+ "step": 1450
375
+ },
376
+ {
377
+ "epoch": 18.75,
378
+ "grad_norm": 1.3620388507843018,
379
+ "learning_rate": 4.840776425613887e-06,
380
+ "loss": 1.1468,
381
+ "step": 1500
382
+ },
383
+ {
384
+ "epoch": 19.0,
385
+ "eval_accuracy": 0.39622641509433965,
386
+ "eval_loss": 1.2616825103759766,
387
+ "eval_runtime": 1.5238,
388
+ "eval_samples_per_second": 139.125,
389
+ "eval_steps_per_second": 17.719,
390
+ "step": 1520
391
+ },
392
+ {
393
+ "epoch": 19.375,
394
+ "grad_norm": 1.9459707736968994,
395
+ "learning_rate": 3.964141339903026e-06,
396
+ "loss": 1.167,
397
+ "step": 1550
398
+ },
399
+ {
400
+ "epoch": 20.0,
401
+ "grad_norm": 2.5087108612060547,
402
+ "learning_rate": 3.162892359054098e-06,
403
+ "loss": 1.0958,
404
+ "step": 1600
405
+ },
406
+ {
407
+ "epoch": 20.0,
408
+ "eval_accuracy": 0.41509433962264153,
409
+ "eval_loss": 1.2586045265197754,
410
+ "eval_runtime": 1.5245,
411
+ "eval_samples_per_second": 139.065,
412
+ "eval_steps_per_second": 17.711,
413
+ "step": 1600
414
+ },
415
+ {
416
+ "epoch": 20.625,
417
+ "grad_norm": 1.9791457653045654,
418
+ "learning_rate": 2.442502826062072e-06,
419
+ "loss": 1.1456,
420
+ "step": 1650
421
+ },
422
+ {
423
+ "epoch": 21.0,
424
+ "eval_accuracy": 0.41037735849056606,
425
+ "eval_loss": 1.258667230606079,
426
+ "eval_runtime": 1.5176,
427
+ "eval_samples_per_second": 139.693,
428
+ "eval_steps_per_second": 17.791,
429
+ "step": 1680
430
+ },
431
+ {
432
+ "epoch": 21.25,
433
+ "grad_norm": 1.4690279960632324,
434
+ "learning_rate": 1.8078937319026655e-06,
435
+ "loss": 1.1492,
436
+ "step": 1700
437
+ },
438
+ {
439
+ "epoch": 21.875,
440
+ "grad_norm": 2.0290393829345703,
441
+ "learning_rate": 1.2634001001741375e-06,
442
+ "loss": 1.127,
443
+ "step": 1750
444
+ },
445
+ {
446
+ "epoch": 22.0,
447
+ "eval_accuracy": 0.41509433962264153,
448
+ "eval_loss": 1.258967399597168,
449
+ "eval_runtime": 1.5264,
450
+ "eval_samples_per_second": 138.887,
451
+ "eval_steps_per_second": 17.688,
452
+ "step": 1760
453
+ },
454
+ {
455
+ "epoch": 22.5,
456
+ "grad_norm": 1.6253653764724731,
457
+ "learning_rate": 8.127413744904805e-07,
458
+ "loss": 1.1308,
459
+ "step": 1800
460
+ },
461
+ {
462
+ "epoch": 23.0,
463
+ "eval_accuracy": 0.41509433962264153,
464
+ "eval_loss": 1.2586345672607422,
465
+ "eval_runtime": 1.5342,
466
+ "eval_samples_per_second": 138.183,
467
+ "eval_steps_per_second": 17.599,
468
+ "step": 1840
469
+ },
470
+ {
471
+ "epoch": 23.125,
472
+ "grad_norm": 1.7840685844421387,
473
+ "learning_rate": 4.589960109100444e-07,
474
+ "loss": 1.1989,
475
+ "step": 1850
476
+ },
477
+ {
478
+ "epoch": 23.75,
479
+ "grad_norm": 1.6836535930633545,
480
+ "learning_rate": 2.0458044895916516e-07,
481
+ "loss": 1.1433,
482
+ "step": 1900
483
+ },
484
+ {
485
+ "epoch": 24.0,
486
+ "eval_accuracy": 0.41509433962264153,
487
+ "eval_loss": 1.2584929466247559,
488
+ "eval_runtime": 1.5229,
489
+ "eval_samples_per_second": 139.206,
490
+ "eval_steps_per_second": 17.729,
491
+ "step": 1920
492
+ },
493
+ {
494
+ "epoch": 24.375,
495
+ "grad_norm": 2.0122158527374268,
496
+ "learning_rate": 5.1232604899952296e-08,
497
+ "loss": 1.1303,
498
+ "step": 1950
499
+ },
500
+ {
501
+ "epoch": 25.0,
502
+ "grad_norm": 2.9466681480407715,
503
+ "learning_rate": 0.0,
504
+ "loss": 1.1492,
505
+ "step": 2000
506
+ },
507
+ {
508
+ "epoch": 25.0,
509
+ "eval_accuracy": 0.41509433962264153,
510
+ "eval_loss": 1.2584505081176758,
511
+ "eval_runtime": 2.8679,
512
+ "eval_samples_per_second": 73.922,
513
+ "eval_steps_per_second": 9.415,
514
+ "step": 2000
515
+ },
516
+ {
517
+ "epoch": 25.0,
518
+ "step": 2000,
519
+ "total_flos": 3.739939937176781e+18,
520
+ "train_loss": 1.1909779739379882,
521
+ "train_runtime": 509.0926,
522
+ "train_samples_per_second": 31.232,
523
+ "train_steps_per_second": 3.929
524
+ }
525
+ ],
526
+ "logging_steps": 50,
527
+ "max_steps": 2000,
528
+ "num_input_tokens_seen": 0,
529
+ "num_train_epochs": 25,
530
+ "save_steps": 500,
531
+ "stateful_callbacks": {
532
+ "TrainerControl": {
533
+ "args": {
534
+ "should_epoch_stop": false,
535
+ "should_evaluate": false,
536
+ "should_log": false,
537
+ "should_save": true,
538
+ "should_training_stop": true
539
+ },
540
+ "attributes": {}
541
+ }
542
+ },
543
+ "total_flos": 3.739939937176781e+18,
544
+ "train_batch_size": 8,
545
+ "trial_name": null,
546
+ "trial_params": null
547
+ }