error577 commited on
Commit
95fdb70
·
verified ·
1 Parent(s): e137858

Training in progress, step 650, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:af38ef62375ec891cbfce4fd28bd22d7e34c157587cc450f78b67c24cf274d79
3
  size 590925768
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b9d2cdbc64c8aed93e3493383438d70c436fda2ba3d5f97e2c7265c7dc57d98a
3
  size 590925768
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:b9eecd260ac7545716c34b890ae0f24c36dadbc6be9c3347db814a46163e37fd
3
  size 301533378
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9b530320adc4b6131fc012b9e702b151b24095c8446eb428ef27157c2d8238cb
3
  size 301533378
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:de5671ef90315d5673225dc1ab0e98e92309a6a79f8b3269920ade8d1ca939a6
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5bcc8702776502dcccb377886280ad3a2283e2886a22f0c6d17f658d0d4a4262
3
  size 14244
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:15f9642019ca32b3c422641145349dede0a960d5ced3931f098afc618e340971
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e2a95abcdacb257d1005778993fa8106f84c53601a08ca596204bb7536dd504e
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": 0.12324624508619308,
3
  "best_model_checkpoint": "miner_id_24/checkpoint-500",
4
- "epoch": 0.08760722759627669,
5
  "eval_steps": 50,
6
- "global_step": 600,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -4311,6 +4311,364 @@
4311
  "eval_samples_per_second": 8.378,
4312
  "eval_steps_per_second": 8.378,
4313
  "step": 600
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
4314
  }
4315
  ],
4316
  "logging_steps": 1,
@@ -4325,7 +4683,7 @@
4325
  "early_stopping_threshold": 0.0
4326
  },
4327
  "attributes": {
4328
- "early_stopping_patience_counter": 2
4329
  }
4330
  },
4331
  "TrainerControl": {
@@ -4334,12 +4692,12 @@
4334
  "should_evaluate": false,
4335
  "should_log": false,
4336
  "should_save": true,
4337
- "should_training_stop": false
4338
  },
4339
  "attributes": {}
4340
  }
4341
  },
4342
- "total_flos": 6.887151013095014e+16,
4343
  "train_batch_size": 1,
4344
  "trial_name": null,
4345
  "trial_params": null
 
1
  {
2
  "best_metric": 0.12324624508619308,
3
  "best_model_checkpoint": "miner_id_24/checkpoint-500",
4
+ "epoch": 0.09490782989596641,
5
  "eval_steps": 50,
6
+ "global_step": 650,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
4311
  "eval_samples_per_second": 8.378,
4312
  "eval_steps_per_second": 8.378,
4313
  "step": 600
4314
+ },
4315
+ {
4316
+ "epoch": 0.08775323964227048,
4317
+ "grad_norm": 0.7561835050582886,
4318
+ "learning_rate": 0.00010500763630844842,
4319
+ "loss": 0.4772,
4320
+ "step": 601
4321
+ },
4322
+ {
4323
+ "epoch": 0.08789925168826428,
4324
+ "grad_norm": 0.5763047933578491,
4325
+ "learning_rate": 0.00010455378198874092,
4326
+ "loss": 0.2431,
4327
+ "step": 602
4328
+ },
4329
+ {
4330
+ "epoch": 0.08804526373425807,
4331
+ "grad_norm": 0.41480308771133423,
4332
+ "learning_rate": 0.00010410038531194427,
4333
+ "loss": 0.1497,
4334
+ "step": 603
4335
+ },
4336
+ {
4337
+ "epoch": 0.08819127578025188,
4338
+ "grad_norm": 0.5208658576011658,
4339
+ "learning_rate": 0.0001036474508437579,
4340
+ "loss": 0.297,
4341
+ "step": 604
4342
+ },
4343
+ {
4344
+ "epoch": 0.08833728782624567,
4345
+ "grad_norm": 0.5673862099647522,
4346
+ "learning_rate": 0.00010319498314522693,
4347
+ "loss": 0.3096,
4348
+ "step": 605
4349
+ },
4350
+ {
4351
+ "epoch": 0.08848329987223946,
4352
+ "grad_norm": 1.0537465810775757,
4353
+ "learning_rate": 0.0001027429867726961,
4354
+ "loss": 0.4135,
4355
+ "step": 606
4356
+ },
4357
+ {
4358
+ "epoch": 0.08862931191823326,
4359
+ "grad_norm": 0.7120312452316284,
4360
+ "learning_rate": 0.00010229146627776376,
4361
+ "loss": 0.4118,
4362
+ "step": 607
4363
+ },
4364
+ {
4365
+ "epoch": 0.08877532396422705,
4366
+ "grad_norm": 0.6144299507141113,
4367
+ "learning_rate": 0.00010184042620723637,
4368
+ "loss": 0.2349,
4369
+ "step": 608
4370
+ },
4371
+ {
4372
+ "epoch": 0.08892133601022084,
4373
+ "grad_norm": 0.5841251611709595,
4374
+ "learning_rate": 0.00010138987110308241,
4375
+ "loss": 0.23,
4376
+ "step": 609
4377
+ },
4378
+ {
4379
+ "epoch": 0.08906734805621463,
4380
+ "grad_norm": 0.6115531921386719,
4381
+ "learning_rate": 0.00010093980550238675,
4382
+ "loss": 0.2537,
4383
+ "step": 610
4384
+ },
4385
+ {
4386
+ "epoch": 0.08921336010220843,
4387
+ "grad_norm": 0.521041214466095,
4388
+ "learning_rate": 0.00010049023393730502,
4389
+ "loss": 0.17,
4390
+ "step": 611
4391
+ },
4392
+ {
4393
+ "epoch": 0.08935937214820222,
4394
+ "grad_norm": 0.6351935267448425,
4395
+ "learning_rate": 0.00010004116093501789,
4396
+ "loss": 0.1524,
4397
+ "step": 612
4398
+ },
4399
+ {
4400
+ "epoch": 0.08950538419419603,
4401
+ "grad_norm": 0.43919649720191956,
4402
+ "learning_rate": 9.959259101768545e-05,
4403
+ "loss": 0.1156,
4404
+ "step": 613
4405
+ },
4406
+ {
4407
+ "epoch": 0.08965139624018982,
4408
+ "grad_norm": 0.34156444668769836,
4409
+ "learning_rate": 9.914452870240188e-05,
4410
+ "loss": 0.0821,
4411
+ "step": 614
4412
+ },
4413
+ {
4414
+ "epoch": 0.08979740828618361,
4415
+ "grad_norm": 0.3891245424747467,
4416
+ "learning_rate": 9.869697850114969e-05,
4417
+ "loss": 0.122,
4418
+ "step": 615
4419
+ },
4420
+ {
4421
+ "epoch": 0.0899434203321774,
4422
+ "grad_norm": 0.44263043999671936,
4423
+ "learning_rate": 9.824994492075444e-05,
4424
+ "loss": 0.1545,
4425
+ "step": 616
4426
+ },
4427
+ {
4428
+ "epoch": 0.0900894323781712,
4429
+ "grad_norm": 0.2580360770225525,
4430
+ "learning_rate": 9.780343246283923e-05,
4431
+ "loss": 0.0343,
4432
+ "step": 617
4433
+ },
4434
+ {
4435
+ "epoch": 0.09023544442416499,
4436
+ "grad_norm": 0.5391973257064819,
4437
+ "learning_rate": 9.735744562377968e-05,
4438
+ "loss": 0.2013,
4439
+ "step": 618
4440
+ },
4441
+ {
4442
+ "epoch": 0.09038145647015879,
4443
+ "grad_norm": 0.5272008180618286,
4444
+ "learning_rate": 9.691198889465824e-05,
4445
+ "loss": 0.1217,
4446
+ "step": 619
4447
+ },
4448
+ {
4449
+ "epoch": 0.09052746851615258,
4450
+ "grad_norm": 0.3311897814273834,
4451
+ "learning_rate": 9.646706676121923e-05,
4452
+ "loss": 0.0898,
4453
+ "step": 620
4454
+ },
4455
+ {
4456
+ "epoch": 0.09067348056214637,
4457
+ "grad_norm": 0.8139061331748962,
4458
+ "learning_rate": 9.602268370382363e-05,
4459
+ "loss": 0.2168,
4460
+ "step": 621
4461
+ },
4462
+ {
4463
+ "epoch": 0.09081949260814016,
4464
+ "grad_norm": 0.8335362672805786,
4465
+ "learning_rate": 9.557884419740386e-05,
4466
+ "loss": 0.1057,
4467
+ "step": 622
4468
+ },
4469
+ {
4470
+ "epoch": 0.09096550465413397,
4471
+ "grad_norm": 0.4633618891239166,
4472
+ "learning_rate": 9.513555271141882e-05,
4473
+ "loss": 0.1036,
4474
+ "step": 623
4475
+ },
4476
+ {
4477
+ "epoch": 0.09111151670012776,
4478
+ "grad_norm": 0.5218425393104553,
4479
+ "learning_rate": 9.46928137098089e-05,
4480
+ "loss": 0.1092,
4481
+ "step": 624
4482
+ },
4483
+ {
4484
+ "epoch": 0.09125752874612156,
4485
+ "grad_norm": 0.3713681995868683,
4486
+ "learning_rate": 9.425063165095088e-05,
4487
+ "loss": 0.064,
4488
+ "step": 625
4489
+ },
4490
+ {
4491
+ "epoch": 0.09140354079211535,
4492
+ "grad_norm": 0.5872324705123901,
4493
+ "learning_rate": 9.380901098761319e-05,
4494
+ "loss": 0.1207,
4495
+ "step": 626
4496
+ },
4497
+ {
4498
+ "epoch": 0.09154955283810914,
4499
+ "grad_norm": 0.786533772945404,
4500
+ "learning_rate": 9.336795616691103e-05,
4501
+ "loss": 0.1839,
4502
+ "step": 627
4503
+ },
4504
+ {
4505
+ "epoch": 0.09169556488410294,
4506
+ "grad_norm": 1.0154459476470947,
4507
+ "learning_rate": 9.292747163026154e-05,
4508
+ "loss": 0.2062,
4509
+ "step": 628
4510
+ },
4511
+ {
4512
+ "epoch": 0.09184157693009673,
4513
+ "grad_norm": 0.30658984184265137,
4514
+ "learning_rate": 9.24875618133391e-05,
4515
+ "loss": 0.0219,
4516
+ "step": 629
4517
+ },
4518
+ {
4519
+ "epoch": 0.09198758897609052,
4520
+ "grad_norm": 0.9130704402923584,
4521
+ "learning_rate": 9.204823114603068e-05,
4522
+ "loss": 0.1325,
4523
+ "step": 630
4524
+ },
4525
+ {
4526
+ "epoch": 0.09213360102208432,
4527
+ "grad_norm": 0.4111880958080292,
4528
+ "learning_rate": 9.160948405239128e-05,
4529
+ "loss": 0.0612,
4530
+ "step": 631
4531
+ },
4532
+ {
4533
+ "epoch": 0.09227961306807812,
4534
+ "grad_norm": 0.014520260505378246,
4535
+ "learning_rate": 9.117132495059916e-05,
4536
+ "loss": 0.0002,
4537
+ "step": 632
4538
+ },
4539
+ {
4540
+ "epoch": 0.09242562511407192,
4541
+ "grad_norm": 0.004326899070292711,
4542
+ "learning_rate": 9.07337582529117e-05,
4543
+ "loss": 0.0002,
4544
+ "step": 633
4545
+ },
4546
+ {
4547
+ "epoch": 0.09257163716006571,
4548
+ "grad_norm": 0.008311674930155277,
4549
+ "learning_rate": 9.02967883656207e-05,
4550
+ "loss": 0.0001,
4551
+ "step": 634
4552
+ },
4553
+ {
4554
+ "epoch": 0.0927176492060595,
4555
+ "grad_norm": 0.0011568053159862757,
4556
+ "learning_rate": 8.986041968900796e-05,
4557
+ "loss": 0.0001,
4558
+ "step": 635
4559
+ },
4560
+ {
4561
+ "epoch": 0.0928636612520533,
4562
+ "grad_norm": 0.03044409491121769,
4563
+ "learning_rate": 8.942465661730129e-05,
4564
+ "loss": 0.0006,
4565
+ "step": 636
4566
+ },
4567
+ {
4568
+ "epoch": 0.09300967329804709,
4569
+ "grad_norm": 0.0062841372564435005,
4570
+ "learning_rate": 8.898950353862998e-05,
4571
+ "loss": 0.0001,
4572
+ "step": 637
4573
+ },
4574
+ {
4575
+ "epoch": 0.09315568534404088,
4576
+ "grad_norm": 0.004739905241876841,
4577
+ "learning_rate": 8.85549648349807e-05,
4578
+ "loss": 0.0001,
4579
+ "step": 638
4580
+ },
4581
+ {
4582
+ "epoch": 0.09330169739003467,
4583
+ "grad_norm": 0.0005953651270829141,
4584
+ "learning_rate": 8.812104488215332e-05,
4585
+ "loss": 0.0,
4586
+ "step": 639
4587
+ },
4588
+ {
4589
+ "epoch": 0.09344770943602847,
4590
+ "grad_norm": 0.001960960915312171,
4591
+ "learning_rate": 8.768774804971705e-05,
4592
+ "loss": 0.0001,
4593
+ "step": 640
4594
+ },
4595
+ {
4596
+ "epoch": 0.09359372148202226,
4597
+ "grad_norm": 0.005731022451072931,
4598
+ "learning_rate": 8.725507870096609e-05,
4599
+ "loss": 0.0001,
4600
+ "step": 641
4601
+ },
4602
+ {
4603
+ "epoch": 0.09373973352801607,
4604
+ "grad_norm": 0.002941427519544959,
4605
+ "learning_rate": 8.6823041192876e-05,
4606
+ "loss": 0.0001,
4607
+ "step": 642
4608
+ },
4609
+ {
4610
+ "epoch": 0.09388574557400986,
4611
+ "grad_norm": 0.005661524832248688,
4612
+ "learning_rate": 8.639163987605976e-05,
4613
+ "loss": 0.0001,
4614
+ "step": 643
4615
+ },
4616
+ {
4617
+ "epoch": 0.09403175762000365,
4618
+ "grad_norm": 0.0021083110477775335,
4619
+ "learning_rate": 8.596087909472373e-05,
4620
+ "loss": 0.0001,
4621
+ "step": 644
4622
+ },
4623
+ {
4624
+ "epoch": 0.09417776966599745,
4625
+ "grad_norm": 0.0027868840843439102,
4626
+ "learning_rate": 8.553076318662425e-05,
4627
+ "loss": 0.0001,
4628
+ "step": 645
4629
+ },
4630
+ {
4631
+ "epoch": 0.09432378171199124,
4632
+ "grad_norm": 0.001279468764550984,
4633
+ "learning_rate": 8.510129648302372e-05,
4634
+ "loss": 0.0001,
4635
+ "step": 646
4636
+ },
4637
+ {
4638
+ "epoch": 0.09446979375798503,
4639
+ "grad_norm": 0.007063029333949089,
4640
+ "learning_rate": 8.467248330864718e-05,
4641
+ "loss": 0.0001,
4642
+ "step": 647
4643
+ },
4644
+ {
4645
+ "epoch": 0.09461580580397883,
4646
+ "grad_norm": 0.0014115847880020738,
4647
+ "learning_rate": 8.424432798163836e-05,
4648
+ "loss": 0.0001,
4649
+ "step": 648
4650
+ },
4651
+ {
4652
+ "epoch": 0.09476181784997262,
4653
+ "grad_norm": 0.911738395690918,
4654
+ "learning_rate": 8.381683481351676e-05,
4655
+ "loss": 0.0305,
4656
+ "step": 649
4657
+ },
4658
+ {
4659
+ "epoch": 0.09490782989596641,
4660
+ "grad_norm": 0.0007988119614310563,
4661
+ "learning_rate": 8.339000810913386e-05,
4662
+ "loss": 0.0,
4663
+ "step": 650
4664
+ },
4665
+ {
4666
+ "epoch": 0.09490782989596641,
4667
+ "eval_loss": 0.13424192368984222,
4668
+ "eval_runtime": 26.2396,
4669
+ "eval_samples_per_second": 8.384,
4670
+ "eval_steps_per_second": 8.384,
4671
+ "step": 650
4672
  }
4673
  ],
4674
  "logging_steps": 1,
 
4683
  "early_stopping_threshold": 0.0
4684
  },
4685
  "attributes": {
4686
+ "early_stopping_patience_counter": 3
4687
  }
4688
  },
4689
  "TrainerControl": {
 
4692
  "should_evaluate": false,
4693
  "should_log": false,
4694
  "should_save": true,
4695
+ "should_training_stop": true
4696
  },
4697
  "attributes": {}
4698
  }
4699
  },
4700
+ "total_flos": 7.467201778050662e+16,
4701
  "train_batch_size": 1,
4702
  "trial_name": null,
4703
  "trial_params": null