kiritan commited on
Commit
6df0da5
·
verified ·
1 Parent(s): eeef4a9

Training in progress, step 17000, checkpoint

Browse files
last-checkpoint/global_step17000/bf16_zero_pp_rank_0_mp_rank_00_optim_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9d5e5ee5de88ba37830911f0b46c57d6fb97ef78baef3bbcb369caedac1d2537
3
+ size 5117197020
last-checkpoint/global_step17000/mp_rank_00_model_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3508f7d0f16906096e5610ffe0590bca5cd155de85a6a8021b6e5bf9d8e2eabb
3
+ size 859127504
last-checkpoint/latest CHANGED
@@ -1 +1 @@
1
- global_step15000
 
1
+ global_step17000
last-checkpoint/model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:4b11d79fb3db23754b444fc4964b7982434af7a97aa600fcdcde4d9a8a9b3f21
3
  size 962205216
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:aa4f82ea40fb0305db931cf7a54215d8c646ba708abad07172d476a907b2dad4
3
  size 962205216
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:63340953913b9f23b1567a89f7ad34314d1dcb47b9a43157322f84f789423e67
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:be21ff914d7590ad2180b18bca69f62255c4deee5c5c2b727794908b9d148dcc
3
  size 14244
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:6b94442b72559c9262bb7b2684827bc59deb41027ddc14af8d9ffecb8119b4aa
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4aa6830d6aa63edbea9a9fa4aac3b79365984a3d18eed4b014dcec7309b75dc2
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
- "best_metric": 83.80009429514381,
3
- "best_model_checkpoint": "./iteboshi_temp/checkpoint-10000",
4
- "epoch": 16.519823788546255,
5
  "eval_steps": 1000,
6
- "global_step": 15000,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -4357,6 +4357,586 @@
4357
  "eval_steps_per_second": 1.552,
4358
  "eval_wer": 84.67703913248468,
4359
  "step": 15000
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
4360
  }
4361
  ],
4362
  "logging_steps": 25,
@@ -4376,7 +4956,7 @@
4376
  "attributes": {}
4377
  }
4378
  },
4379
- "total_flos": 2.5772147797351465e+20,
4380
  "train_batch_size": 4,
4381
  "trial_name": null,
4382
  "trial_params": null
 
1
  {
2
+ "best_metric": 82.65912305516267,
3
+ "best_model_checkpoint": "./iteboshi_temp/checkpoint-16000",
4
+ "epoch": 18.722466960352424,
5
  "eval_steps": 1000,
6
+ "global_step": 17000,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
4357
  "eval_steps_per_second": 1.552,
4358
  "eval_wer": 84.67703913248468,
4359
  "step": 15000
4360
+ },
4361
+ {
4362
+ "epoch": 16.547356828193834,
4363
+ "grad_norm": 0.10291285067796707,
4364
+ "learning_rate": 5.1025641025641024e-06,
4365
+ "loss": 0.0041,
4366
+ "step": 15025
4367
+ },
4368
+ {
4369
+ "epoch": 16.57488986784141,
4370
+ "grad_norm": 0.1358381062746048,
4371
+ "learning_rate": 5.076923076923077e-06,
4372
+ "loss": 0.0066,
4373
+ "step": 15050
4374
+ },
4375
+ {
4376
+ "epoch": 16.602422907488986,
4377
+ "grad_norm": 0.020193297415971756,
4378
+ "learning_rate": 5.051282051282051e-06,
4379
+ "loss": 0.0054,
4380
+ "step": 15075
4381
+ },
4382
+ {
4383
+ "epoch": 16.629955947136565,
4384
+ "grad_norm": 0.03404547646641731,
4385
+ "learning_rate": 5.025641025641026e-06,
4386
+ "loss": 0.003,
4387
+ "step": 15100
4388
+ },
4389
+ {
4390
+ "epoch": 16.65748898678414,
4391
+ "grad_norm": 0.07087010145187378,
4392
+ "learning_rate": 5e-06,
4393
+ "loss": 0.0043,
4394
+ "step": 15125
4395
+ },
4396
+ {
4397
+ "epoch": 16.685022026431717,
4398
+ "grad_norm": 0.01731196418404579,
4399
+ "learning_rate": 4.974358974358975e-06,
4400
+ "loss": 0.0054,
4401
+ "step": 15150
4402
+ },
4403
+ {
4404
+ "epoch": 16.712555066079297,
4405
+ "grad_norm": 0.3552390933036804,
4406
+ "learning_rate": 4.948717948717949e-06,
4407
+ "loss": 0.0056,
4408
+ "step": 15175
4409
+ },
4410
+ {
4411
+ "epoch": 16.740088105726873,
4412
+ "grad_norm": 0.626363217830658,
4413
+ "learning_rate": 4.923076923076924e-06,
4414
+ "loss": 0.0079,
4415
+ "step": 15200
4416
+ },
4417
+ {
4418
+ "epoch": 16.76762114537445,
4419
+ "grad_norm": 0.0181206613779068,
4420
+ "learning_rate": 4.8974358974358975e-06,
4421
+ "loss": 0.006,
4422
+ "step": 15225
4423
+ },
4424
+ {
4425
+ "epoch": 16.795154185022028,
4426
+ "grad_norm": 0.09998787939548492,
4427
+ "learning_rate": 4.871794871794872e-06,
4428
+ "loss": 0.0055,
4429
+ "step": 15250
4430
+ },
4431
+ {
4432
+ "epoch": 16.822687224669604,
4433
+ "grad_norm": 0.1320696324110031,
4434
+ "learning_rate": 4.8461538461538465e-06,
4435
+ "loss": 0.004,
4436
+ "step": 15275
4437
+ },
4438
+ {
4439
+ "epoch": 16.85022026431718,
4440
+ "grad_norm": 0.09725293517112732,
4441
+ "learning_rate": 4.820512820512821e-06,
4442
+ "loss": 0.0047,
4443
+ "step": 15300
4444
+ },
4445
+ {
4446
+ "epoch": 16.87775330396476,
4447
+ "grad_norm": 0.11701034754514694,
4448
+ "learning_rate": 4.7948717948717955e-06,
4449
+ "loss": 0.0038,
4450
+ "step": 15325
4451
+ },
4452
+ {
4453
+ "epoch": 16.905286343612335,
4454
+ "grad_norm": 0.046660326421260834,
4455
+ "learning_rate": 4.76923076923077e-06,
4456
+ "loss": 0.0052,
4457
+ "step": 15350
4458
+ },
4459
+ {
4460
+ "epoch": 16.93281938325991,
4461
+ "grad_norm": 0.03657762333750725,
4462
+ "learning_rate": 4.743589743589744e-06,
4463
+ "loss": 0.004,
4464
+ "step": 15375
4465
+ },
4466
+ {
4467
+ "epoch": 16.96035242290749,
4468
+ "grad_norm": 0.13504423201084137,
4469
+ "learning_rate": 4.717948717948718e-06,
4470
+ "loss": 0.0038,
4471
+ "step": 15400
4472
+ },
4473
+ {
4474
+ "epoch": 16.987885462555067,
4475
+ "grad_norm": 0.08526286482810974,
4476
+ "learning_rate": 4.692307692307693e-06,
4477
+ "loss": 0.0066,
4478
+ "step": 15425
4479
+ },
4480
+ {
4481
+ "epoch": 17.015418502202643,
4482
+ "grad_norm": 0.02115774340927601,
4483
+ "learning_rate": 4.666666666666667e-06,
4484
+ "loss": 0.0046,
4485
+ "step": 15450
4486
+ },
4487
+ {
4488
+ "epoch": 17.042951541850222,
4489
+ "grad_norm": 0.2753286063671112,
4490
+ "learning_rate": 4.641025641025642e-06,
4491
+ "loss": 0.0045,
4492
+ "step": 15475
4493
+ },
4494
+ {
4495
+ "epoch": 17.070484581497798,
4496
+ "grad_norm": 0.03063320554792881,
4497
+ "learning_rate": 4.615384615384616e-06,
4498
+ "loss": 0.0045,
4499
+ "step": 15500
4500
+ },
4501
+ {
4502
+ "epoch": 17.098017621145374,
4503
+ "grad_norm": 0.010663969442248344,
4504
+ "learning_rate": 4.58974358974359e-06,
4505
+ "loss": 0.003,
4506
+ "step": 15525
4507
+ },
4508
+ {
4509
+ "epoch": 17.125550660792953,
4510
+ "grad_norm": 0.22800272703170776,
4511
+ "learning_rate": 4.564102564102564e-06,
4512
+ "loss": 0.002,
4513
+ "step": 15550
4514
+ },
4515
+ {
4516
+ "epoch": 17.15308370044053,
4517
+ "grad_norm": 0.22873614728450775,
4518
+ "learning_rate": 4.538461538461539e-06,
4519
+ "loss": 0.002,
4520
+ "step": 15575
4521
+ },
4522
+ {
4523
+ "epoch": 17.180616740088105,
4524
+ "grad_norm": 0.011216685175895691,
4525
+ "learning_rate": 4.512820512820513e-06,
4526
+ "loss": 0.0032,
4527
+ "step": 15600
4528
+ },
4529
+ {
4530
+ "epoch": 17.208149779735685,
4531
+ "grad_norm": 0.4962778091430664,
4532
+ "learning_rate": 4.487179487179488e-06,
4533
+ "loss": 0.0038,
4534
+ "step": 15625
4535
+ },
4536
+ {
4537
+ "epoch": 17.23568281938326,
4538
+ "grad_norm": 0.023572538048028946,
4539
+ "learning_rate": 4.461538461538462e-06,
4540
+ "loss": 0.0031,
4541
+ "step": 15650
4542
+ },
4543
+ {
4544
+ "epoch": 17.263215859030836,
4545
+ "grad_norm": 0.032316386699676514,
4546
+ "learning_rate": 4.435897435897436e-06,
4547
+ "loss": 0.0024,
4548
+ "step": 15675
4549
+ },
4550
+ {
4551
+ "epoch": 17.290748898678412,
4552
+ "grad_norm": 0.025533461943268776,
4553
+ "learning_rate": 4.4102564102564104e-06,
4554
+ "loss": 0.0024,
4555
+ "step": 15700
4556
+ },
4557
+ {
4558
+ "epoch": 17.318281938325992,
4559
+ "grad_norm": 0.02447574771940708,
4560
+ "learning_rate": 4.384615384615385e-06,
4561
+ "loss": 0.0057,
4562
+ "step": 15725
4563
+ },
4564
+ {
4565
+ "epoch": 17.345814977973568,
4566
+ "grad_norm": 0.021540969610214233,
4567
+ "learning_rate": 4.358974358974359e-06,
4568
+ "loss": 0.0035,
4569
+ "step": 15750
4570
+ },
4571
+ {
4572
+ "epoch": 17.373348017621144,
4573
+ "grad_norm": 0.022210588678717613,
4574
+ "learning_rate": 4.333333333333334e-06,
4575
+ "loss": 0.0029,
4576
+ "step": 15775
4577
+ },
4578
+ {
4579
+ "epoch": 17.400881057268723,
4580
+ "grad_norm": 0.01674061268568039,
4581
+ "learning_rate": 4.307692307692308e-06,
4582
+ "loss": 0.0037,
4583
+ "step": 15800
4584
+ },
4585
+ {
4586
+ "epoch": 17.4284140969163,
4587
+ "grad_norm": 0.013861955143511295,
4588
+ "learning_rate": 4.282051282051282e-06,
4589
+ "loss": 0.0023,
4590
+ "step": 15825
4591
+ },
4592
+ {
4593
+ "epoch": 17.455947136563875,
4594
+ "grad_norm": 0.023190615698695183,
4595
+ "learning_rate": 4.2564102564102566e-06,
4596
+ "loss": 0.0045,
4597
+ "step": 15850
4598
+ },
4599
+ {
4600
+ "epoch": 17.483480176211454,
4601
+ "grad_norm": 0.015583349391818047,
4602
+ "learning_rate": 4.230769230769231e-06,
4603
+ "loss": 0.0037,
4604
+ "step": 15875
4605
+ },
4606
+ {
4607
+ "epoch": 17.51101321585903,
4608
+ "grad_norm": 0.013716256245970726,
4609
+ "learning_rate": 4.2051282051282055e-06,
4610
+ "loss": 0.0035,
4611
+ "step": 15900
4612
+ },
4613
+ {
4614
+ "epoch": 17.538546255506606,
4615
+ "grad_norm": 0.016609976068139076,
4616
+ "learning_rate": 4.17948717948718e-06,
4617
+ "loss": 0.0041,
4618
+ "step": 15925
4619
+ },
4620
+ {
4621
+ "epoch": 17.566079295154186,
4622
+ "grad_norm": 0.01952126808464527,
4623
+ "learning_rate": 4.1538461538461545e-06,
4624
+ "loss": 0.0037,
4625
+ "step": 15950
4626
+ },
4627
+ {
4628
+ "epoch": 17.59361233480176,
4629
+ "grad_norm": 0.17321471869945526,
4630
+ "learning_rate": 4.128205128205128e-06,
4631
+ "loss": 0.0039,
4632
+ "step": 15975
4633
+ },
4634
+ {
4635
+ "epoch": 17.621145374449338,
4636
+ "grad_norm": 0.01522456482052803,
4637
+ "learning_rate": 4.102564102564103e-06,
4638
+ "loss": 0.0032,
4639
+ "step": 16000
4640
+ },
4641
+ {
4642
+ "epoch": 17.621145374449338,
4643
+ "eval_cer": 23.317437208395905,
4644
+ "eval_loss": 0.8810132741928101,
4645
+ "eval_runtime": 1715.3061,
4646
+ "eval_samples_per_second": 6.169,
4647
+ "eval_steps_per_second": 1.543,
4648
+ "eval_wer": 82.65912305516267,
4649
+ "step": 16000
4650
+ },
4651
+ {
4652
+ "epoch": 17.648678414096917,
4653
+ "grad_norm": 0.30109259486198425,
4654
+ "learning_rate": 4.076923076923077e-06,
4655
+ "loss": 0.0026,
4656
+ "step": 16025
4657
+ },
4658
+ {
4659
+ "epoch": 17.676211453744493,
4660
+ "grad_norm": 0.06414441019296646,
4661
+ "learning_rate": 4.051282051282052e-06,
4662
+ "loss": 0.0032,
4663
+ "step": 16050
4664
+ },
4665
+ {
4666
+ "epoch": 17.70374449339207,
4667
+ "grad_norm": 0.12120723724365234,
4668
+ "learning_rate": 4.025641025641026e-06,
4669
+ "loss": 0.0026,
4670
+ "step": 16075
4671
+ },
4672
+ {
4673
+ "epoch": 17.73127753303965,
4674
+ "grad_norm": 0.19122646749019623,
4675
+ "learning_rate": 4.000000000000001e-06,
4676
+ "loss": 0.0039,
4677
+ "step": 16100
4678
+ },
4679
+ {
4680
+ "epoch": 17.758810572687224,
4681
+ "grad_norm": 0.03467703238129616,
4682
+ "learning_rate": 3.974358974358974e-06,
4683
+ "loss": 0.0027,
4684
+ "step": 16125
4685
+ },
4686
+ {
4687
+ "epoch": 17.7863436123348,
4688
+ "grad_norm": 0.01050791796296835,
4689
+ "learning_rate": 3.948717948717949e-06,
4690
+ "loss": 0.0024,
4691
+ "step": 16150
4692
+ },
4693
+ {
4694
+ "epoch": 17.81387665198238,
4695
+ "grad_norm": 0.01552590075880289,
4696
+ "learning_rate": 3.923076923076923e-06,
4697
+ "loss": 0.003,
4698
+ "step": 16175
4699
+ },
4700
+ {
4701
+ "epoch": 17.841409691629956,
4702
+ "grad_norm": 0.021685760468244553,
4703
+ "learning_rate": 3.897435897435898e-06,
4704
+ "loss": 0.0028,
4705
+ "step": 16200
4706
+ },
4707
+ {
4708
+ "epoch": 17.86894273127753,
4709
+ "grad_norm": 0.012615197338163853,
4710
+ "learning_rate": 3.871794871794872e-06,
4711
+ "loss": 0.0018,
4712
+ "step": 16225
4713
+ },
4714
+ {
4715
+ "epoch": 17.89647577092511,
4716
+ "grad_norm": 0.024285893887281418,
4717
+ "learning_rate": 3.846153846153847e-06,
4718
+ "loss": 0.0034,
4719
+ "step": 16250
4720
+ },
4721
+ {
4722
+ "epoch": 17.924008810572687,
4723
+ "grad_norm": 0.019548427313566208,
4724
+ "learning_rate": 3.8205128205128204e-06,
4725
+ "loss": 0.0046,
4726
+ "step": 16275
4727
+ },
4728
+ {
4729
+ "epoch": 17.951541850220263,
4730
+ "grad_norm": 0.014185987412929535,
4731
+ "learning_rate": 3.794871794871795e-06,
4732
+ "loss": 0.0042,
4733
+ "step": 16300
4734
+ },
4735
+ {
4736
+ "epoch": 17.979074889867842,
4737
+ "grad_norm": 0.2013942152261734,
4738
+ "learning_rate": 3.7692307692307694e-06,
4739
+ "loss": 0.0025,
4740
+ "step": 16325
4741
+ },
4742
+ {
4743
+ "epoch": 18.006607929515418,
4744
+ "grad_norm": 0.01142708957195282,
4745
+ "learning_rate": 3.743589743589744e-06,
4746
+ "loss": 0.0028,
4747
+ "step": 16350
4748
+ },
4749
+ {
4750
+ "epoch": 18.034140969162994,
4751
+ "grad_norm": 0.1827182024717331,
4752
+ "learning_rate": 3.7179487179487184e-06,
4753
+ "loss": 0.0027,
4754
+ "step": 16375
4755
+ },
4756
+ {
4757
+ "epoch": 18.061674008810574,
4758
+ "grad_norm": 0.008858841843903065,
4759
+ "learning_rate": 3.692307692307693e-06,
4760
+ "loss": 0.0022,
4761
+ "step": 16400
4762
+ },
4763
+ {
4764
+ "epoch": 18.08920704845815,
4765
+ "grad_norm": 0.037348657846450806,
4766
+ "learning_rate": 3.6666666666666666e-06,
4767
+ "loss": 0.004,
4768
+ "step": 16425
4769
+ },
4770
+ {
4771
+ "epoch": 18.116740088105725,
4772
+ "grad_norm": 0.014842098578810692,
4773
+ "learning_rate": 3.641025641025641e-06,
4774
+ "loss": 0.003,
4775
+ "step": 16450
4776
+ },
4777
+ {
4778
+ "epoch": 18.144273127753305,
4779
+ "grad_norm": 0.012190734967589378,
4780
+ "learning_rate": 3.6153846153846156e-06,
4781
+ "loss": 0.0047,
4782
+ "step": 16475
4783
+ },
4784
+ {
4785
+ "epoch": 18.17180616740088,
4786
+ "grad_norm": 0.010254699736833572,
4787
+ "learning_rate": 3.58974358974359e-06,
4788
+ "loss": 0.0018,
4789
+ "step": 16500
4790
+ },
4791
+ {
4792
+ "epoch": 18.199339207048457,
4793
+ "grad_norm": 0.012803646735846996,
4794
+ "learning_rate": 3.5641025641025646e-06,
4795
+ "loss": 0.0018,
4796
+ "step": 16525
4797
+ },
4798
+ {
4799
+ "epoch": 18.226872246696036,
4800
+ "grad_norm": 0.010007087141275406,
4801
+ "learning_rate": 3.538461538461539e-06,
4802
+ "loss": 0.0037,
4803
+ "step": 16550
4804
+ },
4805
+ {
4806
+ "epoch": 18.254405286343612,
4807
+ "grad_norm": 0.010007468052208424,
4808
+ "learning_rate": 3.5128205128205127e-06,
4809
+ "loss": 0.0017,
4810
+ "step": 16575
4811
+ },
4812
+ {
4813
+ "epoch": 18.281938325991188,
4814
+ "grad_norm": 0.021304214373230934,
4815
+ "learning_rate": 3.487179487179487e-06,
4816
+ "loss": 0.0017,
4817
+ "step": 16600
4818
+ },
4819
+ {
4820
+ "epoch": 18.309471365638768,
4821
+ "grad_norm": 0.00610103365033865,
4822
+ "learning_rate": 3.4615384615384617e-06,
4823
+ "loss": 0.0017,
4824
+ "step": 16625
4825
+ },
4826
+ {
4827
+ "epoch": 18.337004405286343,
4828
+ "grad_norm": 0.17184419929981232,
4829
+ "learning_rate": 3.435897435897436e-06,
4830
+ "loss": 0.0023,
4831
+ "step": 16650
4832
+ },
4833
+ {
4834
+ "epoch": 18.36453744493392,
4835
+ "grad_norm": 0.010224095545709133,
4836
+ "learning_rate": 3.4102564102564107e-06,
4837
+ "loss": 0.0028,
4838
+ "step": 16675
4839
+ },
4840
+ {
4841
+ "epoch": 18.3920704845815,
4842
+ "grad_norm": 0.016741087660193443,
4843
+ "learning_rate": 3.384615384615385e-06,
4844
+ "loss": 0.002,
4845
+ "step": 16700
4846
+ },
4847
+ {
4848
+ "epoch": 18.419603524229075,
4849
+ "grad_norm": 0.01324927993118763,
4850
+ "learning_rate": 3.358974358974359e-06,
4851
+ "loss": 0.0017,
4852
+ "step": 16725
4853
+ },
4854
+ {
4855
+ "epoch": 18.44713656387665,
4856
+ "grad_norm": 0.14577801525592804,
4857
+ "learning_rate": 3.3333333333333333e-06,
4858
+ "loss": 0.0025,
4859
+ "step": 16750
4860
+ },
4861
+ {
4862
+ "epoch": 18.47466960352423,
4863
+ "grad_norm": 0.0260769035667181,
4864
+ "learning_rate": 3.307692307692308e-06,
4865
+ "loss": 0.0018,
4866
+ "step": 16775
4867
+ },
4868
+ {
4869
+ "epoch": 18.502202643171806,
4870
+ "grad_norm": 0.01632179506123066,
4871
+ "learning_rate": 3.2820512820512823e-06,
4872
+ "loss": 0.0041,
4873
+ "step": 16800
4874
+ },
4875
+ {
4876
+ "epoch": 18.529735682819382,
4877
+ "grad_norm": 0.014896622858941555,
4878
+ "learning_rate": 3.256410256410257e-06,
4879
+ "loss": 0.0018,
4880
+ "step": 16825
4881
+ },
4882
+ {
4883
+ "epoch": 18.55726872246696,
4884
+ "grad_norm": 0.014535325579345226,
4885
+ "learning_rate": 3.2307692307692313e-06,
4886
+ "loss": 0.0022,
4887
+ "step": 16850
4888
+ },
4889
+ {
4890
+ "epoch": 18.584801762114537,
4891
+ "grad_norm": 0.011787498369812965,
4892
+ "learning_rate": 3.205128205128206e-06,
4893
+ "loss": 0.0016,
4894
+ "step": 16875
4895
+ },
4896
+ {
4897
+ "epoch": 18.612334801762113,
4898
+ "grad_norm": 0.04083514213562012,
4899
+ "learning_rate": 3.1794871794871795e-06,
4900
+ "loss": 0.0017,
4901
+ "step": 16900
4902
+ },
4903
+ {
4904
+ "epoch": 18.639867841409693,
4905
+ "grad_norm": 0.16764149069786072,
4906
+ "learning_rate": 3.153846153846154e-06,
4907
+ "loss": 0.0024,
4908
+ "step": 16925
4909
+ },
4910
+ {
4911
+ "epoch": 18.66740088105727,
4912
+ "grad_norm": 0.008704649284482002,
4913
+ "learning_rate": 3.1282051282051284e-06,
4914
+ "loss": 0.0015,
4915
+ "step": 16950
4916
+ },
4917
+ {
4918
+ "epoch": 18.694933920704845,
4919
+ "grad_norm": 0.007399390451610088,
4920
+ "learning_rate": 3.102564102564103e-06,
4921
+ "loss": 0.0014,
4922
+ "step": 16975
4923
+ },
4924
+ {
4925
+ "epoch": 18.722466960352424,
4926
+ "grad_norm": 0.016065089032053947,
4927
+ "learning_rate": 3.0769230769230774e-06,
4928
+ "loss": 0.0017,
4929
+ "step": 17000
4930
+ },
4931
+ {
4932
+ "epoch": 18.722466960352424,
4933
+ "eval_cer": 22.853163367074387,
4934
+ "eval_loss": 0.8870487809181213,
4935
+ "eval_runtime": 1702.6926,
4936
+ "eval_samples_per_second": 6.214,
4937
+ "eval_steps_per_second": 1.554,
4938
+ "eval_wer": 82.998585572843,
4939
+ "step": 17000
4940
  }
4941
  ],
4942
  "logging_steps": 25,
 
4956
  "attributes": {}
4957
  }
4958
  },
4959
+ "total_flos": 2.920843417033166e+20,
4960
  "train_batch_size": 4,
4961
  "trial_name": null,
4962
  "trial_params": null