Shio-Koube commited on
Commit
0354e68
·
verified ·
1 Parent(s): b0ac55d

Add files using upload-large-folder tool

Browse files
Files changed (7) hide show
  1. config.json +1 -1
  2. model.safetensors +1 -1
  3. optimizer.pt +3 -0
  4. rng_state.pth +3 -0
  5. scheduler.pt +3 -0
  6. trainer_state.json +569 -0
  7. training_args.bin +3 -0
config.json CHANGED
@@ -49,5 +49,5 @@
49
  },
50
  "problem_type": "single_label_classification",
51
  "torch_dtype": "float32",
52
- "transformers_version": "4.50.0"
53
  }
 
49
  },
50
  "problem_type": "single_label_classification",
51
  "torch_dtype": "float32",
52
+ "transformers_version": "4.50.2"
53
  }
model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:101bacba0804a223bc026653f30a20bc767d1fb993e04eb7e0b4ac1307e400a3
3
  size 1216278228
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:90080ada1059c43dcfcf4436c02eeba3f62ba55be21e21ff52d8b0738c4036ef
3
  size 1216278228
optimizer.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:72200c90102465b5ac85e50501741aaf82338dbc84964ba30f21b5a31c4bfbd3
3
+ size 2432729850
rng_state.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ad5226cdfe3646e9da790bb7cbd6b2d9bddcb9c5b51d7ea24722acc5a9bc1f68
3
+ size 14308
scheduler.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0c14558b7776c1091c3c83b12abce9f8f797f8fd4125511c0b45734038ab1177
3
+ size 1064
trainer_state.json ADDED
@@ -0,0 +1,569 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_global_step": 705,
3
+ "best_metric": 0.16024889051914215,
4
+ "best_model_checkpoint": "./anime_out/checkpoint-705",
5
+ "epoch": 5.0,
6
+ "eval_steps": 500,
7
+ "global_step": 705,
8
+ "is_hyper_param_search": false,
9
+ "is_local_process_zero": true,
10
+ "is_world_process_zero": true,
11
+ "log_history": [
12
+ {
13
+ "epoch": 0.07092198581560284,
14
+ "grad_norm": 1956.7347412109375,
15
+ "learning_rate": 1.9858156028368796e-05,
16
+ "loss": 2.5378,
17
+ "step": 10
18
+ },
19
+ {
20
+ "epoch": 0.14184397163120568,
21
+ "grad_norm": 398.7348327636719,
22
+ "learning_rate": 1.971631205673759e-05,
23
+ "loss": 0.7439,
24
+ "step": 20
25
+ },
26
+ {
27
+ "epoch": 0.2127659574468085,
28
+ "grad_norm": 267.0840759277344,
29
+ "learning_rate": 1.9574468085106384e-05,
30
+ "loss": 1.8382,
31
+ "step": 30
32
+ },
33
+ {
34
+ "epoch": 0.28368794326241137,
35
+ "grad_norm": 123.07373809814453,
36
+ "learning_rate": 1.9432624113475178e-05,
37
+ "loss": 2.3966,
38
+ "step": 40
39
+ },
40
+ {
41
+ "epoch": 0.3546099290780142,
42
+ "grad_norm": 8.433001494267955e-05,
43
+ "learning_rate": 1.929078014184397e-05,
44
+ "loss": 1.4528,
45
+ "step": 50
46
+ },
47
+ {
48
+ "epoch": 0.425531914893617,
49
+ "grad_norm": 310.42840576171875,
50
+ "learning_rate": 1.914893617021277e-05,
51
+ "loss": 1.5857,
52
+ "step": 60
53
+ },
54
+ {
55
+ "epoch": 0.49645390070921985,
56
+ "grad_norm": 14.845270156860352,
57
+ "learning_rate": 1.9007092198581563e-05,
58
+ "loss": 0.6746,
59
+ "step": 70
60
+ },
61
+ {
62
+ "epoch": 0.5673758865248227,
63
+ "grad_norm": 355.8478698730469,
64
+ "learning_rate": 1.8865248226950357e-05,
65
+ "loss": 1.2875,
66
+ "step": 80
67
+ },
68
+ {
69
+ "epoch": 0.6382978723404256,
70
+ "grad_norm": 571.3264770507812,
71
+ "learning_rate": 1.872340425531915e-05,
72
+ "loss": 0.4645,
73
+ "step": 90
74
+ },
75
+ {
76
+ "epoch": 0.7092198581560284,
77
+ "grad_norm": 270.5877380371094,
78
+ "learning_rate": 1.8581560283687945e-05,
79
+ "loss": 2.0277,
80
+ "step": 100
81
+ },
82
+ {
83
+ "epoch": 0.7801418439716312,
84
+ "grad_norm": 14.781210899353027,
85
+ "learning_rate": 1.843971631205674e-05,
86
+ "loss": 1.1098,
87
+ "step": 110
88
+ },
89
+ {
90
+ "epoch": 0.851063829787234,
91
+ "grad_norm": 323.6094665527344,
92
+ "learning_rate": 1.8297872340425533e-05,
93
+ "loss": 1.0614,
94
+ "step": 120
95
+ },
96
+ {
97
+ "epoch": 0.9219858156028369,
98
+ "grad_norm": 469.43707275390625,
99
+ "learning_rate": 1.8156028368794327e-05,
100
+ "loss": 0.8355,
101
+ "step": 130
102
+ },
103
+ {
104
+ "epoch": 0.9929078014184397,
105
+ "grad_norm": 0.007668596692383289,
106
+ "learning_rate": 1.801418439716312e-05,
107
+ "loss": 0.5349,
108
+ "step": 140
109
+ },
110
+ {
111
+ "epoch": 1.0,
112
+ "eval_accuracy": 0.9197860962566845,
113
+ "eval_loss": 0.36517199873924255,
114
+ "eval_runtime": 19.748,
115
+ "eval_samples_per_second": 9.469,
116
+ "eval_steps_per_second": 1.215,
117
+ "step": 141
118
+ },
119
+ {
120
+ "epoch": 1.0638297872340425,
121
+ "grad_norm": 1.1018470525741577,
122
+ "learning_rate": 1.7872340425531915e-05,
123
+ "loss": 0.4347,
124
+ "step": 150
125
+ },
126
+ {
127
+ "epoch": 1.1347517730496455,
128
+ "grad_norm": 2.6226277351379395,
129
+ "learning_rate": 1.773049645390071e-05,
130
+ "loss": 0.6987,
131
+ "step": 160
132
+ },
133
+ {
134
+ "epoch": 1.2056737588652482,
135
+ "grad_norm": 0.04894453287124634,
136
+ "learning_rate": 1.7588652482269506e-05,
137
+ "loss": 0.7401,
138
+ "step": 170
139
+ },
140
+ {
141
+ "epoch": 1.2765957446808511,
142
+ "grad_norm": 0.19339050352573395,
143
+ "learning_rate": 1.74468085106383e-05,
144
+ "loss": 0.5764,
145
+ "step": 180
146
+ },
147
+ {
148
+ "epoch": 1.3475177304964538,
149
+ "grad_norm": 176.8428192138672,
150
+ "learning_rate": 1.7304964539007094e-05,
151
+ "loss": 0.4433,
152
+ "step": 190
153
+ },
154
+ {
155
+ "epoch": 1.4184397163120568,
156
+ "grad_norm": 253.8905029296875,
157
+ "learning_rate": 1.716312056737589e-05,
158
+ "loss": 0.5627,
159
+ "step": 200
160
+ },
161
+ {
162
+ "epoch": 1.4893617021276595,
163
+ "grad_norm": 191.34634399414062,
164
+ "learning_rate": 1.7021276595744682e-05,
165
+ "loss": 0.757,
166
+ "step": 210
167
+ },
168
+ {
169
+ "epoch": 1.5602836879432624,
170
+ "grad_norm": 436.973876953125,
171
+ "learning_rate": 1.6879432624113476e-05,
172
+ "loss": 1.9875,
173
+ "step": 220
174
+ },
175
+ {
176
+ "epoch": 1.6312056737588652,
177
+ "grad_norm": 412.5374755859375,
178
+ "learning_rate": 1.673758865248227e-05,
179
+ "loss": 0.8893,
180
+ "step": 230
181
+ },
182
+ {
183
+ "epoch": 1.702127659574468,
184
+ "grad_norm": 153.39613342285156,
185
+ "learning_rate": 1.6595744680851064e-05,
186
+ "loss": 1.1219,
187
+ "step": 240
188
+ },
189
+ {
190
+ "epoch": 1.773049645390071,
191
+ "grad_norm": 303.0024108886719,
192
+ "learning_rate": 1.645390070921986e-05,
193
+ "loss": 0.8197,
194
+ "step": 250
195
+ },
196
+ {
197
+ "epoch": 1.8439716312056738,
198
+ "grad_norm": 4.597931385040283,
199
+ "learning_rate": 1.6312056737588652e-05,
200
+ "loss": 0.3096,
201
+ "step": 260
202
+ },
203
+ {
204
+ "epoch": 1.9148936170212765,
205
+ "grad_norm": 7.166381359100342,
206
+ "learning_rate": 1.6170212765957446e-05,
207
+ "loss": 0.304,
208
+ "step": 270
209
+ },
210
+ {
211
+ "epoch": 1.9858156028368794,
212
+ "grad_norm": 301.7471618652344,
213
+ "learning_rate": 1.6028368794326244e-05,
214
+ "loss": 0.6178,
215
+ "step": 280
216
+ },
217
+ {
218
+ "epoch": 2.0,
219
+ "eval_accuracy": 0.9251336898395722,
220
+ "eval_loss": 0.28867608308792114,
221
+ "eval_runtime": 19.4186,
222
+ "eval_samples_per_second": 9.63,
223
+ "eval_steps_per_second": 1.236,
224
+ "step": 282
225
+ },
226
+ {
227
+ "epoch": 2.0567375886524824,
228
+ "grad_norm": 2.6156604290008545,
229
+ "learning_rate": 1.5886524822695038e-05,
230
+ "loss": 0.2282,
231
+ "step": 290
232
+ },
233
+ {
234
+ "epoch": 2.127659574468085,
235
+ "grad_norm": 0.07026529312133789,
236
+ "learning_rate": 1.5744680851063832e-05,
237
+ "loss": 0.2304,
238
+ "step": 300
239
+ },
240
+ {
241
+ "epoch": 2.198581560283688,
242
+ "grad_norm": 0.2403770089149475,
243
+ "learning_rate": 1.5602836879432626e-05,
244
+ "loss": 0.1619,
245
+ "step": 310
246
+ },
247
+ {
248
+ "epoch": 2.269503546099291,
249
+ "grad_norm": 1001.3410034179688,
250
+ "learning_rate": 1.546099290780142e-05,
251
+ "loss": 0.4448,
252
+ "step": 320
253
+ },
254
+ {
255
+ "epoch": 2.3404255319148937,
256
+ "grad_norm": 0.13391649723052979,
257
+ "learning_rate": 1.5319148936170214e-05,
258
+ "loss": 0.2728,
259
+ "step": 330
260
+ },
261
+ {
262
+ "epoch": 2.4113475177304964,
263
+ "grad_norm": 189.5388946533203,
264
+ "learning_rate": 1.5177304964539008e-05,
265
+ "loss": 0.7779,
266
+ "step": 340
267
+ },
268
+ {
269
+ "epoch": 2.482269503546099,
270
+ "grad_norm": 1.5827876040930278e-06,
271
+ "learning_rate": 1.5035460992907802e-05,
272
+ "loss": 0.8039,
273
+ "step": 350
274
+ },
275
+ {
276
+ "epoch": 2.5531914893617023,
277
+ "grad_norm": 127.49388885498047,
278
+ "learning_rate": 1.4893617021276596e-05,
279
+ "loss": 0.6087,
280
+ "step": 360
281
+ },
282
+ {
283
+ "epoch": 2.624113475177305,
284
+ "grad_norm": 0.00041575109935365617,
285
+ "learning_rate": 1.475177304964539e-05,
286
+ "loss": 0.468,
287
+ "step": 370
288
+ },
289
+ {
290
+ "epoch": 2.6950354609929077,
291
+ "grad_norm": 637.6605224609375,
292
+ "learning_rate": 1.4609929078014187e-05,
293
+ "loss": 0.4797,
294
+ "step": 380
295
+ },
296
+ {
297
+ "epoch": 2.7659574468085104,
298
+ "grad_norm": 0.000833332072943449,
299
+ "learning_rate": 1.4468085106382981e-05,
300
+ "loss": 0.7188,
301
+ "step": 390
302
+ },
303
+ {
304
+ "epoch": 2.8368794326241136,
305
+ "grad_norm": 0.004562568850815296,
306
+ "learning_rate": 1.4326241134751775e-05,
307
+ "loss": 0.1451,
308
+ "step": 400
309
+ },
310
+ {
311
+ "epoch": 2.9078014184397163,
312
+ "grad_norm": 360.48321533203125,
313
+ "learning_rate": 1.418439716312057e-05,
314
+ "loss": 0.7191,
315
+ "step": 410
316
+ },
317
+ {
318
+ "epoch": 2.978723404255319,
319
+ "grad_norm": 236.08912658691406,
320
+ "learning_rate": 1.4042553191489363e-05,
321
+ "loss": 0.3556,
322
+ "step": 420
323
+ },
324
+ {
325
+ "epoch": 3.0,
326
+ "eval_accuracy": 0.9197860962566845,
327
+ "eval_loss": 0.297281414270401,
328
+ "eval_runtime": 19.4814,
329
+ "eval_samples_per_second": 9.599,
330
+ "eval_steps_per_second": 1.232,
331
+ "step": 423
332
+ },
333
+ {
334
+ "epoch": 3.049645390070922,
335
+ "grad_norm": 222.08946228027344,
336
+ "learning_rate": 1.3900709219858157e-05,
337
+ "loss": 0.6342,
338
+ "step": 430
339
+ },
340
+ {
341
+ "epoch": 3.120567375886525,
342
+ "grad_norm": 0.12243126332759857,
343
+ "learning_rate": 1.3758865248226951e-05,
344
+ "loss": 0.9937,
345
+ "step": 440
346
+ },
347
+ {
348
+ "epoch": 3.1914893617021276,
349
+ "grad_norm": 24.167377471923828,
350
+ "learning_rate": 1.3617021276595745e-05,
351
+ "loss": 0.122,
352
+ "step": 450
353
+ },
354
+ {
355
+ "epoch": 3.2624113475177303,
356
+ "grad_norm": 0.006283226888626814,
357
+ "learning_rate": 1.347517730496454e-05,
358
+ "loss": 0.2216,
359
+ "step": 460
360
+ },
361
+ {
362
+ "epoch": 3.3333333333333335,
363
+ "grad_norm": 41.654052734375,
364
+ "learning_rate": 1.3333333333333333e-05,
365
+ "loss": 0.0438,
366
+ "step": 470
367
+ },
368
+ {
369
+ "epoch": 3.404255319148936,
370
+ "grad_norm": 317.71917724609375,
371
+ "learning_rate": 1.3191489361702127e-05,
372
+ "loss": 0.7924,
373
+ "step": 480
374
+ },
375
+ {
376
+ "epoch": 3.475177304964539,
377
+ "grad_norm": 1.5460463762283325,
378
+ "learning_rate": 1.3049645390070925e-05,
379
+ "loss": 0.0088,
380
+ "step": 490
381
+ },
382
+ {
383
+ "epoch": 3.546099290780142,
384
+ "grad_norm": 121.88572692871094,
385
+ "learning_rate": 1.2907801418439719e-05,
386
+ "loss": 0.2176,
387
+ "step": 500
388
+ },
389
+ {
390
+ "epoch": 3.617021276595745,
391
+ "grad_norm": 0.06358540803194046,
392
+ "learning_rate": 1.2765957446808513e-05,
393
+ "loss": 0.1413,
394
+ "step": 510
395
+ },
396
+ {
397
+ "epoch": 3.6879432624113475,
398
+ "grad_norm": 147.4714813232422,
399
+ "learning_rate": 1.2624113475177307e-05,
400
+ "loss": 0.1081,
401
+ "step": 520
402
+ },
403
+ {
404
+ "epoch": 3.7588652482269502,
405
+ "grad_norm": 0.18462225794792175,
406
+ "learning_rate": 1.24822695035461e-05,
407
+ "loss": 0.1447,
408
+ "step": 530
409
+ },
410
+ {
411
+ "epoch": 3.829787234042553,
412
+ "grad_norm": 0.011159995570778847,
413
+ "learning_rate": 1.2340425531914895e-05,
414
+ "loss": 0.3039,
415
+ "step": 540
416
+ },
417
+ {
418
+ "epoch": 3.900709219858156,
419
+ "grad_norm": 5.591824054718018,
420
+ "learning_rate": 1.2198581560283689e-05,
421
+ "loss": 0.3814,
422
+ "step": 550
423
+ },
424
+ {
425
+ "epoch": 3.971631205673759,
426
+ "grad_norm": 0.0026896377094089985,
427
+ "learning_rate": 1.2056737588652483e-05,
428
+ "loss": 0.2332,
429
+ "step": 560
430
+ },
431
+ {
432
+ "epoch": 4.0,
433
+ "eval_accuracy": 0.9518716577540107,
434
+ "eval_loss": 0.20226924121379852,
435
+ "eval_runtime": 19.4299,
436
+ "eval_samples_per_second": 9.624,
437
+ "eval_steps_per_second": 1.235,
438
+ "step": 564
439
+ },
440
+ {
441
+ "epoch": 4.042553191489362,
442
+ "grad_norm": 49.80411911010742,
443
+ "learning_rate": 1.1914893617021277e-05,
444
+ "loss": 1.1912,
445
+ "step": 570
446
+ },
447
+ {
448
+ "epoch": 4.113475177304965,
449
+ "grad_norm": 485.7699279785156,
450
+ "learning_rate": 1.177304964539007e-05,
451
+ "loss": 0.5817,
452
+ "step": 580
453
+ },
454
+ {
455
+ "epoch": 4.184397163120567,
456
+ "grad_norm": 19.611454010009766,
457
+ "learning_rate": 1.1631205673758865e-05,
458
+ "loss": 0.1125,
459
+ "step": 590
460
+ },
461
+ {
462
+ "epoch": 4.25531914893617,
463
+ "grad_norm": 36.56155014038086,
464
+ "learning_rate": 1.1489361702127662e-05,
465
+ "loss": 0.6281,
466
+ "step": 600
467
+ },
468
+ {
469
+ "epoch": 4.326241134751773,
470
+ "grad_norm": 279.4352722167969,
471
+ "learning_rate": 1.1347517730496456e-05,
472
+ "loss": 0.9411,
473
+ "step": 610
474
+ },
475
+ {
476
+ "epoch": 4.397163120567376,
477
+ "grad_norm": 8.183698654174805,
478
+ "learning_rate": 1.120567375886525e-05,
479
+ "loss": 0.0053,
480
+ "step": 620
481
+ },
482
+ {
483
+ "epoch": 4.468085106382979,
484
+ "grad_norm": 0.005647369660437107,
485
+ "learning_rate": 1.1063829787234044e-05,
486
+ "loss": 0.6476,
487
+ "step": 630
488
+ },
489
+ {
490
+ "epoch": 4.539007092198582,
491
+ "grad_norm": 76.17412567138672,
492
+ "learning_rate": 1.0921985815602838e-05,
493
+ "loss": 0.2671,
494
+ "step": 640
495
+ },
496
+ {
497
+ "epoch": 4.609929078014185,
498
+ "grad_norm": 177.41612243652344,
499
+ "learning_rate": 1.0780141843971632e-05,
500
+ "loss": 0.3544,
501
+ "step": 650
502
+ },
503
+ {
504
+ "epoch": 4.680851063829787,
505
+ "grad_norm": 304.32763671875,
506
+ "learning_rate": 1.0638297872340426e-05,
507
+ "loss": 0.6,
508
+ "step": 660
509
+ },
510
+ {
511
+ "epoch": 4.75177304964539,
512
+ "grad_norm": 1.9934466763515957e-05,
513
+ "learning_rate": 1.049645390070922e-05,
514
+ "loss": 0.1934,
515
+ "step": 670
516
+ },
517
+ {
518
+ "epoch": 4.822695035460993,
519
+ "grad_norm": 284.1419677734375,
520
+ "learning_rate": 1.0354609929078014e-05,
521
+ "loss": 0.5667,
522
+ "step": 680
523
+ },
524
+ {
525
+ "epoch": 4.8936170212765955,
526
+ "grad_norm": 12.019274711608887,
527
+ "learning_rate": 1.0212765957446808e-05,
528
+ "loss": 0.4064,
529
+ "step": 690
530
+ },
531
+ {
532
+ "epoch": 4.964539007092198,
533
+ "grad_norm": 0.014889250509440899,
534
+ "learning_rate": 1.0070921985815602e-05,
535
+ "loss": 0.4825,
536
+ "step": 700
537
+ },
538
+ {
539
+ "epoch": 5.0,
540
+ "eval_accuracy": 0.9625668449197861,
541
+ "eval_loss": 0.16024889051914215,
542
+ "eval_runtime": 19.4874,
543
+ "eval_samples_per_second": 9.596,
544
+ "eval_steps_per_second": 1.232,
545
+ "step": 705
546
+ }
547
+ ],
548
+ "logging_steps": 10,
549
+ "max_steps": 1410,
550
+ "num_input_tokens_seen": 0,
551
+ "num_train_epochs": 10,
552
+ "save_steps": 500,
553
+ "stateful_callbacks": {
554
+ "TrainerControl": {
555
+ "args": {
556
+ "should_epoch_stop": false,
557
+ "should_evaluate": false,
558
+ "should_log": false,
559
+ "should_save": true,
560
+ "should_training_stop": false
561
+ },
562
+ "attributes": {}
563
+ }
564
+ },
565
+ "total_flos": 3.0811885244459827e+18,
566
+ "train_batch_size": 4,
567
+ "trial_name": null,
568
+ "trial_params": null
569
+ }
training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a31855562751e56c0bbdc023a572257f070dafb10957a4f7de4b7223c6a1e1e5
3
+ size 5368