Fanucci commited on
Commit
cac6e58
·
verified ·
1 Parent(s): cbaebc3

Training in progress, step 5760, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:d8c4999d1d781390bb82ba66c422d6f2218afa2b655c8715f5e1b3b2dbee6e38
3
  size 389074464
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9235f4607cf8e84409de4ce4b2847c74a696066b57d3cdb69cbc2e01b28806d0
3
  size 389074464
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:16a21d4f04d7ead68e40367b0c1693ce79f1b8164bbeb69b1d457b3bc7519725
3
  size 198011700
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f5ff9daa50a0a5c480018483e7ad671a0dce75147275c76f66a36458c77efbee
3
  size 198011700
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:0b822d37b79d6999b750099b6fba79edc44b929584c0eb8e06dd9d96747579de
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4570cde6877f7ecdff7adaa553bffdfd408783d3b318129cb1d03c18f4128e5c
3
  size 14244
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:230f75bba14745c5a2f9522a21df2a8ff99459f0af9d260483162250deae40f5
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5b35e714094c6b7ca02b4593d2c4332f2835d3b2ebcdf357e02eb76c44a693fd
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": 0.9804788827896118,
3
  "best_model_checkpoint": "miner_id_24/checkpoint-5700",
4
- "epoch": 0.07667731629392971,
5
  "eval_steps": 150,
6
- "global_step": 5700,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -40219,6 +40219,426 @@
40219
  "eval_samples_per_second": 11.75,
40220
  "eval_steps_per_second": 5.875,
40221
  "step": 5700
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
40222
  }
40223
  ],
40224
  "logging_steps": 1,
@@ -40242,12 +40662,12 @@
40242
  "should_evaluate": false,
40243
  "should_log": false,
40244
  "should_save": true,
40245
- "should_training_stop": false
40246
  },
40247
  "attributes": {}
40248
  }
40249
  },
40250
- "total_flos": 1.6339320226971648e+18,
40251
  "train_batch_size": 2,
40252
  "trial_name": null,
40253
  "trial_params": null
 
1
  {
2
  "best_metric": 0.9804788827896118,
3
  "best_model_checkpoint": "miner_id_24/checkpoint-5700",
4
+ "epoch": 0.07748444593912897,
5
  "eval_steps": 150,
6
+ "global_step": 5760,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
40219
  "eval_samples_per_second": 11.75,
40220
  "eval_steps_per_second": 5.875,
40221
  "step": 5700
40222
+ },
40223
+ {
40224
+ "epoch": 0.07669076845468303,
40225
+ "grad_norm": 1.586574673652649,
40226
+ "learning_rate": 5.195178525271294e-08,
40227
+ "loss": 1.0676,
40228
+ "step": 5701
40229
+ },
40230
+ {
40231
+ "epoch": 0.07670422061543636,
40232
+ "grad_norm": 1.4937348365783691,
40233
+ "learning_rate": 5.0205778298384464e-08,
40234
+ "loss": 0.9485,
40235
+ "step": 5702
40236
+ },
40237
+ {
40238
+ "epoch": 0.07671767277618967,
40239
+ "grad_norm": 1.4224642515182495,
40240
+ "learning_rate": 4.848960771166588e-08,
40241
+ "loss": 0.9619,
40242
+ "step": 5703
40243
+ },
40244
+ {
40245
+ "epoch": 0.076731124936943,
40246
+ "grad_norm": 1.5152407884597778,
40247
+ "learning_rate": 4.680327400486961e-08,
40248
+ "loss": 1.1684,
40249
+ "step": 5704
40250
+ },
40251
+ {
40252
+ "epoch": 0.07674457709769632,
40253
+ "grad_norm": 1.5055732727050781,
40254
+ "learning_rate": 4.514677768138187e-08,
40255
+ "loss": 1.0995,
40256
+ "step": 5705
40257
+ },
40258
+ {
40259
+ "epoch": 0.07675802925844964,
40260
+ "grad_norm": 1.5480650663375854,
40261
+ "learning_rate": 4.352011923569599e-08,
40262
+ "loss": 1.0017,
40263
+ "step": 5706
40264
+ },
40265
+ {
40266
+ "epoch": 0.07677148141920295,
40267
+ "grad_norm": 1.6415358781814575,
40268
+ "learning_rate": 4.192329915339022e-08,
40269
+ "loss": 0.8832,
40270
+ "step": 5707
40271
+ },
40272
+ {
40273
+ "epoch": 0.07678493357995628,
40274
+ "grad_norm": 1.4596832990646362,
40275
+ "learning_rate": 4.035631791113881e-08,
40276
+ "loss": 1.0574,
40277
+ "step": 5708
40278
+ },
40279
+ {
40280
+ "epoch": 0.0767983857407096,
40281
+ "grad_norm": 1.5040479898452759,
40282
+ "learning_rate": 3.881917597670093e-08,
40283
+ "loss": 0.9738,
40284
+ "step": 5709
40285
+ },
40286
+ {
40287
+ "epoch": 0.07681183790146293,
40288
+ "grad_norm": 1.401673674583435,
40289
+ "learning_rate": 3.731187380893175e-08,
40290
+ "loss": 0.8504,
40291
+ "step": 5710
40292
+ },
40293
+ {
40294
+ "epoch": 0.07682529006221624,
40295
+ "grad_norm": 1.6924898624420166,
40296
+ "learning_rate": 3.583441185779357e-08,
40297
+ "loss": 1.1012,
40298
+ "step": 5711
40299
+ },
40300
+ {
40301
+ "epoch": 0.07683874222296956,
40302
+ "grad_norm": 1.5380204916000366,
40303
+ "learning_rate": 3.438679056432248e-08,
40304
+ "loss": 0.9772,
40305
+ "step": 5712
40306
+ },
40307
+ {
40308
+ "epoch": 0.07685219438372289,
40309
+ "grad_norm": 1.3671621084213257,
40310
+ "learning_rate": 3.296901036065059e-08,
40311
+ "loss": 0.7888,
40312
+ "step": 5713
40313
+ },
40314
+ {
40315
+ "epoch": 0.07686564654447621,
40316
+ "grad_norm": 1.4451839923858643,
40317
+ "learning_rate": 3.1581071670006015e-08,
40318
+ "loss": 0.8835,
40319
+ "step": 5714
40320
+ },
40321
+ {
40322
+ "epoch": 0.07687909870522953,
40323
+ "grad_norm": 1.5461901426315308,
40324
+ "learning_rate": 3.022297490670178e-08,
40325
+ "loss": 1.161,
40326
+ "step": 5715
40327
+ },
40328
+ {
40329
+ "epoch": 0.07689255086598284,
40330
+ "grad_norm": 1.5095206499099731,
40331
+ "learning_rate": 2.8894720476158044e-08,
40332
+ "loss": 1.0474,
40333
+ "step": 5716
40334
+ },
40335
+ {
40336
+ "epoch": 0.07690600302673617,
40337
+ "grad_norm": 1.3444650173187256,
40338
+ "learning_rate": 2.7596308774879842e-08,
40339
+ "loss": 0.9731,
40340
+ "step": 5717
40341
+ },
40342
+ {
40343
+ "epoch": 0.0769194551874895,
40344
+ "grad_norm": 1.8739641904830933,
40345
+ "learning_rate": 2.6327740190446037e-08,
40346
+ "loss": 1.1698,
40347
+ "step": 5718
40348
+ },
40349
+ {
40350
+ "epoch": 0.07693290734824282,
40351
+ "grad_norm": 1.4870262145996094,
40352
+ "learning_rate": 2.50890151015426e-08,
40353
+ "loss": 0.902,
40354
+ "step": 5719
40355
+ },
40356
+ {
40357
+ "epoch": 0.07694635950899613,
40358
+ "grad_norm": 1.540949821472168,
40359
+ "learning_rate": 2.388013387796262e-08,
40360
+ "loss": 1.1144,
40361
+ "step": 5720
40362
+ },
40363
+ {
40364
+ "epoch": 0.07695981166974945,
40365
+ "grad_norm": 1.6155050992965698,
40366
+ "learning_rate": 2.270109688056188e-08,
40367
+ "loss": 0.9566,
40368
+ "step": 5721
40369
+ },
40370
+ {
40371
+ "epoch": 0.07697326383050278,
40372
+ "grad_norm": 1.5978723764419556,
40373
+ "learning_rate": 2.1551904461292184e-08,
40374
+ "loss": 1.0488,
40375
+ "step": 5722
40376
+ },
40377
+ {
40378
+ "epoch": 0.0769867159912561,
40379
+ "grad_norm": 1.4267683029174805,
40380
+ "learning_rate": 2.043255696322355e-08,
40381
+ "loss": 0.9771,
40382
+ "step": 5723
40383
+ },
40384
+ {
40385
+ "epoch": 0.07700016815200941,
40386
+ "grad_norm": 1.5521541833877563,
40387
+ "learning_rate": 1.9343054720466492e-08,
40388
+ "loss": 1.1641,
40389
+ "step": 5724
40390
+ },
40391
+ {
40392
+ "epoch": 0.07701362031276274,
40393
+ "grad_norm": 1.5656774044036865,
40394
+ "learning_rate": 1.8283398058283053e-08,
40395
+ "loss": 1.0749,
40396
+ "step": 5725
40397
+ },
40398
+ {
40399
+ "epoch": 0.07702707247351606,
40400
+ "grad_norm": 1.6160752773284912,
40401
+ "learning_rate": 1.725358729297577e-08,
40402
+ "loss": 1.0366,
40403
+ "step": 5726
40404
+ },
40405
+ {
40406
+ "epoch": 0.07704052463426939,
40407
+ "grad_norm": 1.6153086423873901,
40408
+ "learning_rate": 1.62536227319654e-08,
40409
+ "loss": 0.9984,
40410
+ "step": 5727
40411
+ },
40412
+ {
40413
+ "epoch": 0.0770539767950227,
40414
+ "grad_norm": 1.3504514694213867,
40415
+ "learning_rate": 1.5283504673757608e-08,
40416
+ "loss": 0.8402,
40417
+ "step": 5728
40418
+ },
40419
+ {
40420
+ "epoch": 0.07706742895577602,
40421
+ "grad_norm": 1.4485338926315308,
40422
+ "learning_rate": 1.4343233407931866e-08,
40423
+ "loss": 0.9682,
40424
+ "step": 5729
40425
+ },
40426
+ {
40427
+ "epoch": 0.07708088111652935,
40428
+ "grad_norm": 1.7915130853652954,
40429
+ "learning_rate": 1.343280921518586e-08,
40430
+ "loss": 1.0206,
40431
+ "step": 5730
40432
+ },
40433
+ {
40434
+ "epoch": 0.07709433327728267,
40435
+ "grad_norm": 1.4139072895050049,
40436
+ "learning_rate": 1.2552232367279981e-08,
40437
+ "loss": 1.0062,
40438
+ "step": 5731
40439
+ },
40440
+ {
40441
+ "epoch": 0.07710778543803598,
40442
+ "grad_norm": 1.5041614770889282,
40443
+ "learning_rate": 1.1701503127092838e-08,
40444
+ "loss": 0.9514,
40445
+ "step": 5732
40446
+ },
40447
+ {
40448
+ "epoch": 0.0771212375987893,
40449
+ "grad_norm": 1.5072709321975708,
40450
+ "learning_rate": 1.0880621748576847e-08,
40451
+ "loss": 0.8834,
40452
+ "step": 5733
40453
+ },
40454
+ {
40455
+ "epoch": 0.07713468975954263,
40456
+ "grad_norm": 1.4389986991882324,
40457
+ "learning_rate": 1.0089588476758227e-08,
40458
+ "loss": 0.9029,
40459
+ "step": 5734
40460
+ },
40461
+ {
40462
+ "epoch": 0.07714814192029595,
40463
+ "grad_norm": 1.4401514530181885,
40464
+ "learning_rate": 9.32840354779252e-09,
40465
+ "loss": 1.0095,
40466
+ "step": 5735
40467
+ },
40468
+ {
40469
+ "epoch": 0.07716159408104926,
40470
+ "grad_norm": 1.7930731773376465,
40471
+ "learning_rate": 8.597067188897966e-09,
40472
+ "loss": 1.2101,
40473
+ "step": 5736
40474
+ },
40475
+ {
40476
+ "epoch": 0.07717504624180259,
40477
+ "grad_norm": 1.6275607347488403,
40478
+ "learning_rate": 7.895579618388827e-09,
40479
+ "loss": 1.0339,
40480
+ "step": 5737
40481
+ },
40482
+ {
40483
+ "epoch": 0.07718849840255591,
40484
+ "grad_norm": 1.538468837738037,
40485
+ "learning_rate": 7.223941045664262e-09,
40486
+ "loss": 1.0778,
40487
+ "step": 5738
40488
+ },
40489
+ {
40490
+ "epoch": 0.07720195056330924,
40491
+ "grad_norm": 1.4641597270965576,
40492
+ "learning_rate": 6.582151671208348e-09,
40493
+ "loss": 0.9457,
40494
+ "step": 5739
40495
+ },
40496
+ {
40497
+ "epoch": 0.07721540272406255,
40498
+ "grad_norm": 1.4278123378753662,
40499
+ "learning_rate": 5.970211686623372e-09,
40500
+ "loss": 0.9369,
40501
+ "step": 5740
40502
+ },
40503
+ {
40504
+ "epoch": 0.07722885488481587,
40505
+ "grad_norm": 1.355145812034607,
40506
+ "learning_rate": 5.388121274574331e-09,
40507
+ "loss": 0.8486,
40508
+ "step": 5741
40509
+ },
40510
+ {
40511
+ "epoch": 0.0772423070455692,
40512
+ "grad_norm": 1.5918469429016113,
40513
+ "learning_rate": 4.83588060882223e-09,
40514
+ "loss": 0.9758,
40515
+ "step": 5742
40516
+ },
40517
+ {
40518
+ "epoch": 0.07725575920632252,
40519
+ "grad_norm": 1.686596155166626,
40520
+ "learning_rate": 4.313489854212982e-09,
40521
+ "loss": 1.0544,
40522
+ "step": 5743
40523
+ },
40524
+ {
40525
+ "epoch": 0.07726921136707583,
40526
+ "grad_norm": 1.381110429763794,
40527
+ "learning_rate": 3.820949166699616e-09,
40528
+ "loss": 0.8962,
40529
+ "step": 5744
40530
+ },
40531
+ {
40532
+ "epoch": 0.07728266352782916,
40533
+ "grad_norm": 1.5031489133834839,
40534
+ "learning_rate": 3.3582586932978666e-09,
40535
+ "loss": 1.0251,
40536
+ "step": 5745
40537
+ },
40538
+ {
40539
+ "epoch": 0.07729611568858248,
40540
+ "grad_norm": 1.4953575134277344,
40541
+ "learning_rate": 2.9254185721416803e-09,
40542
+ "loss": 1.0684,
40543
+ "step": 5746
40544
+ },
40545
+ {
40546
+ "epoch": 0.0773095678493358,
40547
+ "grad_norm": 1.4256117343902588,
40548
+ "learning_rate": 2.5224289324277118e-09,
40549
+ "loss": 0.9782,
40550
+ "step": 5747
40551
+ },
40552
+ {
40553
+ "epoch": 0.07732302001008912,
40554
+ "grad_norm": 1.7791322469711304,
40555
+ "learning_rate": 2.149289894459727e-09,
40556
+ "loss": 1.2141,
40557
+ "step": 5748
40558
+ },
40559
+ {
40560
+ "epoch": 0.07733647217084244,
40561
+ "grad_norm": 1.8340305089950562,
40562
+ "learning_rate": 1.8060015696264032e-09,
40563
+ "loss": 1.059,
40564
+ "step": 5749
40565
+ },
40566
+ {
40567
+ "epoch": 0.07734992433159577,
40568
+ "grad_norm": 1.5660282373428345,
40569
+ "learning_rate": 1.4925640603902224e-09,
40570
+ "loss": 0.9299,
40571
+ "step": 5750
40572
+ },
40573
+ {
40574
+ "epoch": 0.07736337649234909,
40575
+ "grad_norm": 1.551103949546814,
40576
+ "learning_rate": 1.208977460342986e-09,
40577
+ "loss": 1.1289,
40578
+ "step": 5751
40579
+ },
40580
+ {
40581
+ "epoch": 0.0773768286531024,
40582
+ "grad_norm": 1.676748514175415,
40583
+ "learning_rate": 9.552418541058928e-10,
40584
+ "loss": 0.9262,
40585
+ "step": 5752
40586
+ },
40587
+ {
40588
+ "epoch": 0.07739028081385572,
40589
+ "grad_norm": 1.4658743143081665,
40590
+ "learning_rate": 7.313573174516642e-10,
40591
+ "loss": 0.8708,
40592
+ "step": 5753
40593
+ },
40594
+ {
40595
+ "epoch": 0.07740373297460905,
40596
+ "grad_norm": 1.6699283123016357,
40597
+ "learning_rate": 5.37323917204624e-10,
40598
+ "loss": 1.083,
40599
+ "step": 5754
40600
+ },
40601
+ {
40602
+ "epoch": 0.07741718513536237,
40603
+ "grad_norm": 1.4694359302520752,
40604
+ "learning_rate": 3.731417112740054e-10,
40605
+ "loss": 0.8933,
40606
+ "step": 5755
40607
+ },
40608
+ {
40609
+ "epoch": 0.07743063729611568,
40610
+ "grad_norm": 1.7645187377929688,
40611
+ "learning_rate": 2.388107486761548e-10,
40612
+ "loss": 1.1274,
40613
+ "step": 5756
40614
+ },
40615
+ {
40616
+ "epoch": 0.07744408945686901,
40617
+ "grad_norm": 1.5873627662658691,
40618
+ "learning_rate": 1.343310695234301e-10,
40619
+ "loss": 1.0934,
40620
+ "step": 5757
40621
+ },
40622
+ {
40623
+ "epoch": 0.07745754161762233,
40624
+ "grad_norm": 1.486417293548584,
40625
+ "learning_rate": 5.97027049908938e-11,
40626
+ "loss": 1.0576,
40627
+ "step": 5758
40628
+ },
40629
+ {
40630
+ "epoch": 0.07747099377837566,
40631
+ "grad_norm": 1.636743187904358,
40632
+ "learning_rate": 1.4925677360722036e-11,
40633
+ "loss": 1.1294,
40634
+ "step": 5759
40635
+ },
40636
+ {
40637
+ "epoch": 0.07748444593912897,
40638
+ "grad_norm": 1.4540817737579346,
40639
+ "learning_rate": 0.0,
40640
+ "loss": 0.8497,
40641
+ "step": 5760
40642
  }
40643
  ],
40644
  "logging_steps": 1,
 
40662
  "should_evaluate": false,
40663
  "should_log": false,
40664
  "should_save": true,
40665
+ "should_training_stop": true
40666
  },
40667
  "attributes": {}
40668
  }
40669
  },
40670
+ "total_flos": 1.6511313071466086e+18,
40671
  "train_batch_size": 2,
40672
  "trial_name": null,
40673
  "trial_params": null