error577 commited on
Commit
f795b5b
·
verified ·
1 Parent(s): 46cb048

Training in progress, step 4071, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:8b18bf2ef5a58fafa878b248cbf68b82419946e906651ff194b5c76c056fb08b
3
  size 100966336
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:019f7d564a5c9d518bec0e30d9649ba99937af670238d8452968fa5b1413a528
3
  size 100966336
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:01e166d9d5b84714454a7cc37b98e06d7bae8e84fb581970ec17a8c1840e30fe
3
  size 51613668
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:72e4711ba0b347f94b175a3390cd1f352cd517007e6176a860b09cd59f1edf4e
3
  size 51613668
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:3f0b89a8cec8741d7852668aebad53cb8460a6fb7ce6157d3a1c6ef281fa056d
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d0c791d11fe3a154567778d2d9453c793a7de48963a5332d5e4afe615c58f1f4
3
  size 14244
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:56e05c57415a8b6cee746a07459d6221af8b9d5f7345da95a6363db12a8223b3
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5c84513d13cd2d96eb71f4021c545ed7c31d2c720ba1e9098bc883af81a53ca7
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": 0.21696823835372925,
3
  "best_model_checkpoint": "miner_id_24/checkpoint-4000",
4
- "epoch": 2.9465930018416207,
5
  "eval_steps": 200,
6
- "global_step": 4000,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -28175,6 +28175,503 @@
28175
  "eval_samples_per_second": 31.796,
28176
  "eval_steps_per_second": 2.033,
28177
  "step": 4000
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
28178
  }
28179
  ],
28180
  "logging_steps": 1,
@@ -28198,12 +28695,12 @@
28198
  "should_evaluate": false,
28199
  "should_log": false,
28200
  "should_save": true,
28201
- "should_training_stop": false
28202
  },
28203
  "attributes": {}
28204
  }
28205
  },
28206
- "total_flos": 4.0183041122893824e+17,
28207
  "train_batch_size": 16,
28208
  "trial_name": null,
28209
  "trial_params": null
 
1
  {
2
  "best_metric": 0.21696823835372925,
3
  "best_model_checkpoint": "miner_id_24/checkpoint-4000",
4
+ "epoch": 2.998895027624309,
5
  "eval_steps": 200,
6
+ "global_step": 4071,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
28175
  "eval_samples_per_second": 31.796,
28176
  "eval_steps_per_second": 2.033,
28177
  "step": 4000
28178
+ },
28179
+ {
28180
+ "epoch": 2.947329650092081,
28181
+ "grad_norm": 0.3928582966327667,
28182
+ "learning_rate": 1.480406367429299e-07,
28183
+ "loss": 0.1097,
28184
+ "step": 4001
28185
+ },
28186
+ {
28187
+ "epoch": 2.9480662983425416,
28188
+ "grad_norm": 0.4033927619457245,
28189
+ "learning_rate": 1.4384212371320039e-07,
28190
+ "loss": 0.1031,
28191
+ "step": 4002
28192
+ },
28193
+ {
28194
+ "epoch": 2.948802946593002,
28195
+ "grad_norm": 0.31086212396621704,
28196
+ "learning_rate": 1.397039634071895e-07,
28197
+ "loss": 0.0823,
28198
+ "step": 4003
28199
+ },
28200
+ {
28201
+ "epoch": 2.949539594843462,
28202
+ "grad_norm": 0.363800972700119,
28203
+ "learning_rate": 1.3562615832597437e-07,
28204
+ "loss": 0.1162,
28205
+ "step": 4004
28206
+ },
28207
+ {
28208
+ "epoch": 2.9502762430939224,
28209
+ "grad_norm": 0.36636438965797424,
28210
+ "learning_rate": 1.3160871093416128e-07,
28211
+ "loss": 0.1065,
28212
+ "step": 4005
28213
+ },
28214
+ {
28215
+ "epoch": 2.951012891344383,
28216
+ "grad_norm": 0.44840720295906067,
28217
+ "learning_rate": 1.276516236598857e-07,
28218
+ "loss": 0.139,
28219
+ "step": 4006
28220
+ },
28221
+ {
28222
+ "epoch": 2.9517495395948434,
28223
+ "grad_norm": 0.37990802526474,
28224
+ "learning_rate": 1.237548988948123e-07,
28225
+ "loss": 0.1155,
28226
+ "step": 4007
28227
+ },
28228
+ {
28229
+ "epoch": 2.952486187845304,
28230
+ "grad_norm": 0.3310796618461609,
28231
+ "learning_rate": 1.1991853899409044e-07,
28232
+ "loss": 0.1023,
28233
+ "step": 4008
28234
+ },
28235
+ {
28236
+ "epoch": 2.9532228360957644,
28237
+ "grad_norm": 0.525777280330658,
28238
+ "learning_rate": 1.1614254627640986e-07,
28239
+ "loss": 0.1692,
28240
+ "step": 4009
28241
+ },
28242
+ {
28243
+ "epoch": 2.9539594843462247,
28244
+ "grad_norm": 0.3540642559528351,
28245
+ "learning_rate": 1.1242692302395608e-07,
28246
+ "loss": 0.0742,
28247
+ "step": 4010
28248
+ },
28249
+ {
28250
+ "epoch": 2.954696132596685,
28251
+ "grad_norm": 0.4345633089542389,
28252
+ "learning_rate": 1.0877167148246604e-07,
28253
+ "loss": 0.1342,
28254
+ "step": 4011
28255
+ },
28256
+ {
28257
+ "epoch": 2.9554327808471452,
28258
+ "grad_norm": 0.38242772221565247,
28259
+ "learning_rate": 1.0517679386113922e-07,
28260
+ "loss": 0.1004,
28261
+ "step": 4012
28262
+ },
28263
+ {
28264
+ "epoch": 2.956169429097606,
28265
+ "grad_norm": 0.43410253524780273,
28266
+ "learning_rate": 1.0164229233271538e-07,
28267
+ "loss": 0.1084,
28268
+ "step": 4013
28269
+ },
28270
+ {
28271
+ "epoch": 2.956906077348066,
28272
+ "grad_norm": 0.5683735609054565,
28273
+ "learning_rate": 9.816816903343018e-08,
28274
+ "loss": 0.1536,
28275
+ "step": 4014
28276
+ },
28277
+ {
28278
+ "epoch": 2.957642725598527,
28279
+ "grad_norm": 0.3811461627483368,
28280
+ "learning_rate": 9.47544260630373e-08,
28281
+ "loss": 0.0991,
28282
+ "step": 4015
28283
+ },
28284
+ {
28285
+ "epoch": 2.958379373848987,
28286
+ "grad_norm": 0.45557355880737305,
28287
+ "learning_rate": 9.140106548478633e-08,
28288
+ "loss": 0.1312,
28289
+ "step": 4016
28290
+ },
28291
+ {
28292
+ "epoch": 2.9591160220994475,
28293
+ "grad_norm": 0.3726716935634613,
28294
+ "learning_rate": 8.81080893254449e-08,
28295
+ "loss": 0.1225,
28296
+ "step": 4017
28297
+ },
28298
+ {
28299
+ "epoch": 2.9598526703499077,
28300
+ "grad_norm": 0.44345927238464355,
28301
+ "learning_rate": 8.487549957526541e-08,
28302
+ "loss": 0.1332,
28303
+ "step": 4018
28304
+ },
28305
+ {
28306
+ "epoch": 2.9605893186003684,
28307
+ "grad_norm": 0.41493895649909973,
28308
+ "learning_rate": 8.170329818802947e-08,
28309
+ "loss": 0.1248,
28310
+ "step": 4019
28311
+ },
28312
+ {
28313
+ "epoch": 2.9613259668508287,
28314
+ "grad_norm": 0.4436560869216919,
28315
+ "learning_rate": 7.859148708099228e-08,
28316
+ "loss": 0.1317,
28317
+ "step": 4020
28318
+ },
28319
+ {
28320
+ "epoch": 2.962062615101289,
28321
+ "grad_norm": 0.3871510624885559,
28322
+ "learning_rate": 7.554006813491609e-08,
28323
+ "loss": 0.1029,
28324
+ "step": 4021
28325
+ },
28326
+ {
28327
+ "epoch": 2.9627992633517497,
28328
+ "grad_norm": 0.42555782198905945,
28329
+ "learning_rate": 7.254904319409229e-08,
28330
+ "loss": 0.1519,
28331
+ "step": 4022
28332
+ },
28333
+ {
28334
+ "epoch": 2.96353591160221,
28335
+ "grad_norm": 0.4393730163574219,
28336
+ "learning_rate": 6.961841406626368e-08,
28337
+ "loss": 0.0969,
28338
+ "step": 4023
28339
+ },
28340
+ {
28341
+ "epoch": 2.9642725598526702,
28342
+ "grad_norm": 0.43214312195777893,
28343
+ "learning_rate": 6.674818252270232e-08,
28344
+ "loss": 0.1308,
28345
+ "step": 4024
28346
+ },
28347
+ {
28348
+ "epoch": 2.9650092081031305,
28349
+ "grad_norm": 0.3302169442176819,
28350
+ "learning_rate": 6.393835029816497e-08,
28351
+ "loss": 0.1412,
28352
+ "step": 4025
28353
+ },
28354
+ {
28355
+ "epoch": 2.9657458563535912,
28356
+ "grad_norm": 0.42520150542259216,
28357
+ "learning_rate": 6.11889190909043e-08,
28358
+ "loss": 0.1354,
28359
+ "step": 4026
28360
+ },
28361
+ {
28362
+ "epoch": 2.9664825046040515,
28363
+ "grad_norm": 0.4056524932384491,
28364
+ "learning_rate": 5.84998905626799e-08,
28365
+ "loss": 0.1074,
28366
+ "step": 4027
28367
+ },
28368
+ {
28369
+ "epoch": 2.967219152854512,
28370
+ "grad_norm": 0.40226319432258606,
28371
+ "learning_rate": 5.5871266338702876e-08,
28372
+ "loss": 0.1444,
28373
+ "step": 4028
28374
+ },
28375
+ {
28376
+ "epoch": 2.9679558011049725,
28377
+ "grad_norm": 0.370077520608902,
28378
+ "learning_rate": 5.330304800772456e-08,
28379
+ "loss": 0.1096,
28380
+ "step": 4029
28381
+ },
28382
+ {
28383
+ "epoch": 2.9686924493554327,
28384
+ "grad_norm": 0.4626006782054901,
28385
+ "learning_rate": 5.0795237121969984e-08,
28386
+ "loss": 0.145,
28387
+ "step": 4030
28388
+ },
28389
+ {
28390
+ "epoch": 2.969429097605893,
28391
+ "grad_norm": 0.45213082432746887,
28392
+ "learning_rate": 4.8347835197137814e-08,
28393
+ "loss": 0.1208,
28394
+ "step": 4031
28395
+ },
28396
+ {
28397
+ "epoch": 2.9701657458563537,
28398
+ "grad_norm": 0.43094053864479065,
28399
+ "learning_rate": 4.59608437124337e-08,
28400
+ "loss": 0.1237,
28401
+ "step": 4032
28402
+ },
28403
+ {
28404
+ "epoch": 2.970902394106814,
28405
+ "grad_norm": 0.42492741346359253,
28406
+ "learning_rate": 4.363426411055915e-08,
28407
+ "loss": 0.1358,
28408
+ "step": 4033
28409
+ },
28410
+ {
28411
+ "epoch": 2.9716390423572743,
28412
+ "grad_norm": 0.4052026569843292,
28413
+ "learning_rate": 4.1368097797678255e-08,
28414
+ "loss": 0.1083,
28415
+ "step": 4034
28416
+ },
28417
+ {
28418
+ "epoch": 2.972375690607735,
28419
+ "grad_norm": 0.42593371868133545,
28420
+ "learning_rate": 3.916234614346204e-08,
28421
+ "loss": 0.1469,
28422
+ "step": 4035
28423
+ },
28424
+ {
28425
+ "epoch": 2.9731123388581953,
28426
+ "grad_norm": 0.40533506870269775,
28427
+ "learning_rate": 3.701701048105521e-08,
28428
+ "loss": 0.1215,
28429
+ "step": 4036
28430
+ },
28431
+ {
28432
+ "epoch": 2.9738489871086555,
28433
+ "grad_norm": 0.5333660244941711,
28434
+ "learning_rate": 3.493209210708725e-08,
28435
+ "loss": 0.1454,
28436
+ "step": 4037
28437
+ },
28438
+ {
28439
+ "epoch": 2.974585635359116,
28440
+ "grad_norm": 0.4893753230571747,
28441
+ "learning_rate": 3.29075922816946e-08,
28442
+ "loss": 0.1435,
28443
+ "step": 4038
28444
+ },
28445
+ {
28446
+ "epoch": 2.9753222836095765,
28447
+ "grad_norm": 0.39299649000167847,
28448
+ "learning_rate": 3.094351222844294e-08,
28449
+ "loss": 0.1066,
28450
+ "step": 4039
28451
+ },
28452
+ {
28453
+ "epoch": 2.976058931860037,
28454
+ "grad_norm": 0.4104492962360382,
28455
+ "learning_rate": 2.9039853134449348e-08,
28456
+ "loss": 0.0963,
28457
+ "step": 4040
28458
+ },
28459
+ {
28460
+ "epoch": 2.9767955801104975,
28461
+ "grad_norm": 0.4806900918483734,
28462
+ "learning_rate": 2.7196616150271247e-08,
28463
+ "loss": 0.1255,
28464
+ "step": 4041
28465
+ },
28466
+ {
28467
+ "epoch": 2.9775322283609578,
28468
+ "grad_norm": 0.4183269143104553,
28469
+ "learning_rate": 2.5413802389939735e-08,
28470
+ "loss": 0.1456,
28471
+ "step": 4042
28472
+ },
28473
+ {
28474
+ "epoch": 2.978268876611418,
28475
+ "grad_norm": 0.4006012976169586,
28476
+ "learning_rate": 2.3691412930992864e-08,
28477
+ "loss": 0.1109,
28478
+ "step": 4043
28479
+ },
28480
+ {
28481
+ "epoch": 2.9790055248618783,
28482
+ "grad_norm": 0.4611269235610962,
28483
+ "learning_rate": 2.2029448814431253e-08,
28484
+ "loss": 0.1349,
28485
+ "step": 4044
28486
+ },
28487
+ {
28488
+ "epoch": 2.979742173112339,
28489
+ "grad_norm": 0.4572463631629944,
28490
+ "learning_rate": 2.0427911044751392e-08,
28491
+ "loss": 0.1125,
28492
+ "step": 4045
28493
+ },
28494
+ {
28495
+ "epoch": 2.9804788213627993,
28496
+ "grad_norm": 0.3652282953262329,
28497
+ "learning_rate": 1.8886800589912322e-08,
28498
+ "loss": 0.0912,
28499
+ "step": 4046
28500
+ },
28501
+ {
28502
+ "epoch": 2.9812154696132596,
28503
+ "grad_norm": 0.4529966413974762,
28504
+ "learning_rate": 1.7406118381346758e-08,
28505
+ "loss": 0.0887,
28506
+ "step": 4047
28507
+ },
28508
+ {
28509
+ "epoch": 2.9819521178637203,
28510
+ "grad_norm": 0.41920584440231323,
28511
+ "learning_rate": 1.5985865313994374e-08,
28512
+ "loss": 0.1024,
28513
+ "step": 4048
28514
+ },
28515
+ {
28516
+ "epoch": 2.9826887661141805,
28517
+ "grad_norm": 0.38177281618118286,
28518
+ "learning_rate": 1.4626042246224103e-08,
28519
+ "loss": 0.1005,
28520
+ "step": 4049
28521
+ },
28522
+ {
28523
+ "epoch": 2.983425414364641,
28524
+ "grad_norm": 0.46106603741645813,
28525
+ "learning_rate": 1.3326649999934049e-08,
28526
+ "loss": 0.1222,
28527
+ "step": 4050
28528
+ },
28529
+ {
28530
+ "epoch": 2.984162062615101,
28531
+ "grad_norm": 0.44589948654174805,
28532
+ "learning_rate": 1.208768936045157e-08,
28533
+ "loss": 0.1088,
28534
+ "step": 4051
28535
+ },
28536
+ {
28537
+ "epoch": 2.984898710865562,
28538
+ "grad_norm": 0.4326612651348114,
28539
+ "learning_rate": 1.09091610766221e-08,
28540
+ "loss": 0.1544,
28541
+ "step": 4052
28542
+ },
28543
+ {
28544
+ "epoch": 2.985635359116022,
28545
+ "grad_norm": 0.40109142661094666,
28546
+ "learning_rate": 9.791065860720317e-09,
28547
+ "loss": 0.1251,
28548
+ "step": 4053
28549
+ },
28550
+ {
28551
+ "epoch": 2.9863720073664823,
28552
+ "grad_norm": 0.418179452419281,
28553
+ "learning_rate": 8.733404388538979e-09,
28554
+ "loss": 0.091,
28555
+ "step": 4054
28556
+ },
28557
+ {
28558
+ "epoch": 2.987108655616943,
28559
+ "grad_norm": 0.41939765214920044,
28560
+ "learning_rate": 7.736177299311198e-09,
28561
+ "loss": 0.1116,
28562
+ "step": 4055
28563
+ },
28564
+ {
28565
+ "epoch": 2.9878453038674033,
28566
+ "grad_norm": 0.4822397232055664,
28567
+ "learning_rate": 6.79938519576595e-09,
28568
+ "loss": 0.1445,
28569
+ "step": 4056
28570
+ },
28571
+ {
28572
+ "epoch": 2.9885819521178636,
28573
+ "grad_norm": 0.33148708939552307,
28574
+ "learning_rate": 5.9230286440947745e-09,
28575
+ "loss": 0.0995,
28576
+ "step": 4057
28577
+ },
28578
+ {
28579
+ "epoch": 2.989318600368324,
28580
+ "grad_norm": 0.394879549741745,
28581
+ "learning_rate": 5.107108173962871e-09,
28582
+ "loss": 0.0885,
28583
+ "step": 4058
28584
+ },
28585
+ {
28586
+ "epoch": 2.9900552486187846,
28587
+ "grad_norm": 0.535071849822998,
28588
+ "learning_rate": 4.351624278509103e-09,
28589
+ "loss": 0.1855,
28590
+ "step": 4059
28591
+ },
28592
+ {
28593
+ "epoch": 2.990791896869245,
28594
+ "grad_norm": 0.39724433422088623,
28595
+ "learning_rate": 3.6565774143459962e-09,
28596
+ "loss": 0.1311,
28597
+ "step": 4060
28598
+ },
28599
+ {
28600
+ "epoch": 2.9915285451197056,
28601
+ "grad_norm": 0.4425889551639557,
28602
+ "learning_rate": 3.0219680015708406e-09,
28603
+ "loss": 0.1081,
28604
+ "step": 4061
28605
+ },
28606
+ {
28607
+ "epoch": 2.992265193370166,
28608
+ "grad_norm": 0.35188987851142883,
28609
+ "learning_rate": 2.4477964237212824e-09,
28610
+ "loss": 0.1005,
28611
+ "step": 4062
28612
+ },
28613
+ {
28614
+ "epoch": 2.993001841620626,
28615
+ "grad_norm": 0.42041268944740295,
28616
+ "learning_rate": 1.9340630278308345e-09,
28617
+ "loss": 0.1107,
28618
+ "step": 4063
28619
+ },
28620
+ {
28621
+ "epoch": 2.9937384898710864,
28622
+ "grad_norm": 0.43599700927734375,
28623
+ "learning_rate": 1.4807681243955707e-09,
28624
+ "loss": 0.102,
28625
+ "step": 4064
28626
+ },
28627
+ {
28628
+ "epoch": 2.994475138121547,
28629
+ "grad_norm": 0.4020419716835022,
28630
+ "learning_rate": 1.0879119873852262e-09,
28631
+ "loss": 0.1328,
28632
+ "step": 4065
28633
+ },
28634
+ {
28635
+ "epoch": 2.9952117863720074,
28636
+ "grad_norm": 0.5001260042190552,
28637
+ "learning_rate": 7.554948542543017e-10,
28638
+ "loss": 0.1634,
28639
+ "step": 4066
28640
+ },
28641
+ {
28642
+ "epoch": 2.9959484346224676,
28643
+ "grad_norm": 0.48046594858169556,
28644
+ "learning_rate": 4.835169258976536e-10,
28645
+ "loss": 0.1276,
28646
+ "step": 4067
28647
+ },
28648
+ {
28649
+ "epoch": 2.9966850828729283,
28650
+ "grad_norm": 0.3800838589668274,
28651
+ "learning_rate": 2.71978366706005e-10,
28652
+ "loss": 0.0995,
28653
+ "step": 4068
28654
+ },
28655
+ {
28656
+ "epoch": 2.9974217311233886,
28657
+ "grad_norm": 0.38035425543785095,
28658
+ "learning_rate": 1.2087930453263952e-10,
28659
+ "loss": 0.1164,
28660
+ "step": 4069
28661
+ },
28662
+ {
28663
+ "epoch": 2.998158379373849,
28664
+ "grad_norm": 0.48082515597343445,
28665
+ "learning_rate": 3.021983070450318e-11,
28666
+ "loss": 0.1213,
28667
+ "step": 4070
28668
+ },
28669
+ {
28670
+ "epoch": 2.998895027624309,
28671
+ "grad_norm": 0.3878474831581116,
28672
+ "learning_rate": 0.0,
28673
+ "loss": 0.103,
28674
+ "step": 4071
28675
  }
28676
  ],
28677
  "logging_steps": 1,
 
28695
  "should_evaluate": false,
28696
  "should_log": false,
28697
  "should_save": true,
28698
+ "should_training_stop": true
28699
  },
28700
  "attributes": {}
28701
  }
28702
  },
28703
+ "total_flos": 4.089665390861353e+17,
28704
  "train_batch_size": 16,
28705
  "trial_name": null,
28706
  "trial_params": null