kiritan commited on
Commit
588f84b
·
verified ·
1 Parent(s): 6ca5191

Training in progress, step 18000, checkpoint

Browse files
last-checkpoint/global_step18000/bf16_zero_pp_rank_0_mp_rank_00_optim_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8230942c17955592b8b6ebc9441a6b170ac40dbed5121f918d832cd02b6b200a
3
+ size 761059696
last-checkpoint/global_step18000/mp_rank_00_model_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5408d8ce3eea149fd9ec4a1b84adc16df254d931e011232b11bb83a72ebad456
3
+ size 129965712
last-checkpoint/latest CHANGED
@@ -1 +1 @@
1
- global_step16000
 
1
+ global_step18000
last-checkpoint/model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:a370fd951ebb9e606924b2b0e0b39e8cbeab364a48c2293a4da2fe84cca15ea3
3
  size 181508256
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ad61c1d6676decf968d7cc262cb88d3340a58571f59eb03dc41c8694daf8e28e
3
  size 181508256
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:bf47e6f3fc3df3ec2ec720d756ada5f3fe86dd4b309a3d2c50d42b22bc6fd7f8
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:76da6beb47ba6fea32e3903f5fb6715e6c7d9cfa9223676725c0a4f3ab456246
3
  size 14244
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:99418ddc40a829db03db6cc2c954e7b03b65fc0f7c9d78bead52fa43cebbd4fe
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:389fc41872de99e18419ed46bb961f8c27ddde2cc92d05129c78c005704b1713
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
- "best_metric": 96.5959453088166,
3
- "best_model_checkpoint": "./iteboshi_temp/checkpoint-16000",
4
- "epoch": 17.621145374449338,
5
  "eval_steps": 1000,
6
- "global_step": 16000,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -4647,6 +4647,586 @@
4647
  "eval_steps_per_second": 3.221,
4648
  "eval_wer": 96.5959453088166,
4649
  "step": 16000
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
4650
  }
4651
  ],
4652
  "logging_steps": 25,
@@ -4666,7 +5246,7 @@
4666
  "attributes": {}
4667
  }
4668
  },
4669
- "total_flos": 2.701073184056148e+19,
4670
  "train_batch_size": 4,
4671
  "trial_name": null,
4672
  "trial_params": null
 
1
  {
2
+ "best_metric": 96.5865157944366,
3
+ "best_model_checkpoint": "./iteboshi_temp/checkpoint-17000",
4
+ "epoch": 19.823788546255507,
5
  "eval_steps": 1000,
6
+ "global_step": 18000,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
4647
  "eval_steps_per_second": 3.221,
4648
  "eval_wer": 96.5959453088166,
4649
  "step": 16000
4650
+ },
4651
+ {
4652
+ "epoch": 17.648678414096917,
4653
+ "grad_norm": 0.03022758848965168,
4654
+ "learning_rate": 4.076923076923077e-06,
4655
+ "loss": 0.0031,
4656
+ "step": 16025
4657
+ },
4658
+ {
4659
+ "epoch": 17.676211453744493,
4660
+ "grad_norm": 0.02969919890165329,
4661
+ "learning_rate": 4.051282051282052e-06,
4662
+ "loss": 0.0036,
4663
+ "step": 16050
4664
+ },
4665
+ {
4666
+ "epoch": 17.70374449339207,
4667
+ "grad_norm": 0.04370042681694031,
4668
+ "learning_rate": 4.025641025641026e-06,
4669
+ "loss": 0.0038,
4670
+ "step": 16075
4671
+ },
4672
+ {
4673
+ "epoch": 17.73127753303965,
4674
+ "grad_norm": 0.04208499938249588,
4675
+ "learning_rate": 4.000000000000001e-06,
4676
+ "loss": 0.0034,
4677
+ "step": 16100
4678
+ },
4679
+ {
4680
+ "epoch": 17.758810572687224,
4681
+ "grad_norm": 0.07844261825084686,
4682
+ "learning_rate": 3.974358974358974e-06,
4683
+ "loss": 0.0032,
4684
+ "step": 16125
4685
+ },
4686
+ {
4687
+ "epoch": 17.7863436123348,
4688
+ "grad_norm": 0.02524634823203087,
4689
+ "learning_rate": 3.948717948717949e-06,
4690
+ "loss": 0.0037,
4691
+ "step": 16150
4692
+ },
4693
+ {
4694
+ "epoch": 17.81387665198238,
4695
+ "grad_norm": 0.027894780039787292,
4696
+ "learning_rate": 3.923076923076923e-06,
4697
+ "loss": 0.0035,
4698
+ "step": 16175
4699
+ },
4700
+ {
4701
+ "epoch": 17.841409691629956,
4702
+ "grad_norm": 0.11929473280906677,
4703
+ "learning_rate": 3.897435897435898e-06,
4704
+ "loss": 0.0035,
4705
+ "step": 16200
4706
+ },
4707
+ {
4708
+ "epoch": 17.86894273127753,
4709
+ "grad_norm": 0.02329305373132229,
4710
+ "learning_rate": 3.871794871794872e-06,
4711
+ "loss": 0.0033,
4712
+ "step": 16225
4713
+ },
4714
+ {
4715
+ "epoch": 17.89647577092511,
4716
+ "grad_norm": 0.03569497913122177,
4717
+ "learning_rate": 3.846153846153847e-06,
4718
+ "loss": 0.0033,
4719
+ "step": 16250
4720
+ },
4721
+ {
4722
+ "epoch": 17.924008810572687,
4723
+ "grad_norm": 0.025960877537727356,
4724
+ "learning_rate": 3.8205128205128204e-06,
4725
+ "loss": 0.0036,
4726
+ "step": 16275
4727
+ },
4728
+ {
4729
+ "epoch": 17.951541850220263,
4730
+ "grad_norm": 0.0266879815608263,
4731
+ "learning_rate": 3.794871794871795e-06,
4732
+ "loss": 0.004,
4733
+ "step": 16300
4734
+ },
4735
+ {
4736
+ "epoch": 17.979074889867842,
4737
+ "grad_norm": 0.035963866859674454,
4738
+ "learning_rate": 3.7692307692307694e-06,
4739
+ "loss": 0.0036,
4740
+ "step": 16325
4741
+ },
4742
+ {
4743
+ "epoch": 18.006607929515418,
4744
+ "grad_norm": 0.06047583743929863,
4745
+ "learning_rate": 3.743589743589744e-06,
4746
+ "loss": 0.0032,
4747
+ "step": 16350
4748
+ },
4749
+ {
4750
+ "epoch": 18.034140969162994,
4751
+ "grad_norm": 0.024352600798010826,
4752
+ "learning_rate": 3.7179487179487184e-06,
4753
+ "loss": 0.0027,
4754
+ "step": 16375
4755
+ },
4756
+ {
4757
+ "epoch": 18.061674008810574,
4758
+ "grad_norm": 0.02058643475174904,
4759
+ "learning_rate": 3.692307692307693e-06,
4760
+ "loss": 0.0031,
4761
+ "step": 16400
4762
+ },
4763
+ {
4764
+ "epoch": 18.08920704845815,
4765
+ "grad_norm": 0.0343441516160965,
4766
+ "learning_rate": 3.6666666666666666e-06,
4767
+ "loss": 0.0028,
4768
+ "step": 16425
4769
+ },
4770
+ {
4771
+ "epoch": 18.116740088105725,
4772
+ "grad_norm": 0.029145579785108566,
4773
+ "learning_rate": 3.641025641025641e-06,
4774
+ "loss": 0.0025,
4775
+ "step": 16450
4776
+ },
4777
+ {
4778
+ "epoch": 18.144273127753305,
4779
+ "grad_norm": 0.02418331801891327,
4780
+ "learning_rate": 3.6153846153846156e-06,
4781
+ "loss": 0.0027,
4782
+ "step": 16475
4783
+ },
4784
+ {
4785
+ "epoch": 18.17180616740088,
4786
+ "grad_norm": 0.017598390579223633,
4787
+ "learning_rate": 3.58974358974359e-06,
4788
+ "loss": 0.0028,
4789
+ "step": 16500
4790
+ },
4791
+ {
4792
+ "epoch": 18.199339207048457,
4793
+ "grad_norm": 0.020629288628697395,
4794
+ "learning_rate": 3.5641025641025646e-06,
4795
+ "loss": 0.0027,
4796
+ "step": 16525
4797
+ },
4798
+ {
4799
+ "epoch": 18.226872246696036,
4800
+ "grad_norm": 0.020629016682505608,
4801
+ "learning_rate": 3.538461538461539e-06,
4802
+ "loss": 0.0029,
4803
+ "step": 16550
4804
+ },
4805
+ {
4806
+ "epoch": 18.254405286343612,
4807
+ "grad_norm": 0.034342389553785324,
4808
+ "learning_rate": 3.5128205128205127e-06,
4809
+ "loss": 0.0028,
4810
+ "step": 16575
4811
+ },
4812
+ {
4813
+ "epoch": 18.281938325991188,
4814
+ "grad_norm": 0.19688080251216888,
4815
+ "learning_rate": 3.487179487179487e-06,
4816
+ "loss": 0.0029,
4817
+ "step": 16600
4818
+ },
4819
+ {
4820
+ "epoch": 18.309471365638768,
4821
+ "grad_norm": 0.020140135660767555,
4822
+ "learning_rate": 3.4615384615384617e-06,
4823
+ "loss": 0.0031,
4824
+ "step": 16625
4825
+ },
4826
+ {
4827
+ "epoch": 18.337004405286343,
4828
+ "grad_norm": 0.2628467082977295,
4829
+ "learning_rate": 3.435897435897436e-06,
4830
+ "loss": 0.0037,
4831
+ "step": 16650
4832
+ },
4833
+ {
4834
+ "epoch": 18.36453744493392,
4835
+ "grad_norm": 0.08471482247114182,
4836
+ "learning_rate": 3.4102564102564107e-06,
4837
+ "loss": 0.0035,
4838
+ "step": 16675
4839
+ },
4840
+ {
4841
+ "epoch": 18.3920704845815,
4842
+ "grad_norm": 0.05477755516767502,
4843
+ "learning_rate": 3.384615384615385e-06,
4844
+ "loss": 0.0031,
4845
+ "step": 16700
4846
+ },
4847
+ {
4848
+ "epoch": 18.419603524229075,
4849
+ "grad_norm": 0.020290255546569824,
4850
+ "learning_rate": 3.358974358974359e-06,
4851
+ "loss": 0.0027,
4852
+ "step": 16725
4853
+ },
4854
+ {
4855
+ "epoch": 18.44713656387665,
4856
+ "grad_norm": 0.022715341299772263,
4857
+ "learning_rate": 3.3333333333333333e-06,
4858
+ "loss": 0.003,
4859
+ "step": 16750
4860
+ },
4861
+ {
4862
+ "epoch": 18.47466960352423,
4863
+ "grad_norm": 0.04665736109018326,
4864
+ "learning_rate": 3.307692307692308e-06,
4865
+ "loss": 0.0034,
4866
+ "step": 16775
4867
+ },
4868
+ {
4869
+ "epoch": 18.502202643171806,
4870
+ "grad_norm": 0.024362141266465187,
4871
+ "learning_rate": 3.2820512820512823e-06,
4872
+ "loss": 0.0034,
4873
+ "step": 16800
4874
+ },
4875
+ {
4876
+ "epoch": 18.529735682819382,
4877
+ "grad_norm": 0.027004770934581757,
4878
+ "learning_rate": 3.256410256410257e-06,
4879
+ "loss": 0.0031,
4880
+ "step": 16825
4881
+ },
4882
+ {
4883
+ "epoch": 18.55726872246696,
4884
+ "grad_norm": 0.02136850170791149,
4885
+ "learning_rate": 3.2307692307692313e-06,
4886
+ "loss": 0.0034,
4887
+ "step": 16850
4888
+ },
4889
+ {
4890
+ "epoch": 18.584801762114537,
4891
+ "grad_norm": 0.024859808385372162,
4892
+ "learning_rate": 3.205128205128206e-06,
4893
+ "loss": 0.003,
4894
+ "step": 16875
4895
+ },
4896
+ {
4897
+ "epoch": 18.612334801762113,
4898
+ "grad_norm": 0.020625699311494827,
4899
+ "learning_rate": 3.1794871794871795e-06,
4900
+ "loss": 0.0028,
4901
+ "step": 16900
4902
+ },
4903
+ {
4904
+ "epoch": 18.639867841409693,
4905
+ "grad_norm": 0.0206185020506382,
4906
+ "learning_rate": 3.153846153846154e-06,
4907
+ "loss": 0.0031,
4908
+ "step": 16925
4909
+ },
4910
+ {
4911
+ "epoch": 18.66740088105727,
4912
+ "grad_norm": 0.022832127287983894,
4913
+ "learning_rate": 3.1282051282051284e-06,
4914
+ "loss": 0.003,
4915
+ "step": 16950
4916
+ },
4917
+ {
4918
+ "epoch": 18.694933920704845,
4919
+ "grad_norm": 0.022303447127342224,
4920
+ "learning_rate": 3.102564102564103e-06,
4921
+ "loss": 0.0028,
4922
+ "step": 16975
4923
+ },
4924
+ {
4925
+ "epoch": 18.722466960352424,
4926
+ "grad_norm": 0.023096712306141853,
4927
+ "learning_rate": 3.0769230769230774e-06,
4928
+ "loss": 0.0029,
4929
+ "step": 17000
4930
+ },
4931
+ {
4932
+ "epoch": 18.722466960352424,
4933
+ "eval_cer": 55.69451927596207,
4934
+ "eval_loss": 1.0366687774658203,
4935
+ "eval_runtime": 832.9104,
4936
+ "eval_samples_per_second": 12.704,
4937
+ "eval_steps_per_second": 3.177,
4938
+ "eval_wer": 96.5865157944366,
4939
+ "step": 17000
4940
+ },
4941
+ {
4942
+ "epoch": 18.75,
4943
+ "grad_norm": 0.03265475109219551,
4944
+ "learning_rate": 3.051282051282052e-06,
4945
+ "loss": 0.003,
4946
+ "step": 17025
4947
+ },
4948
+ {
4949
+ "epoch": 18.777533039647576,
4950
+ "grad_norm": 0.02906380034983158,
4951
+ "learning_rate": 3.0256410256410256e-06,
4952
+ "loss": 0.0027,
4953
+ "step": 17050
4954
+ },
4955
+ {
4956
+ "epoch": 18.805066079295155,
4957
+ "grad_norm": 0.02245141565799713,
4958
+ "learning_rate": 3e-06,
4959
+ "loss": 0.0029,
4960
+ "step": 17075
4961
+ },
4962
+ {
4963
+ "epoch": 18.83259911894273,
4964
+ "grad_norm": 0.028345687314867973,
4965
+ "learning_rate": 2.9743589743589746e-06,
4966
+ "loss": 0.0029,
4967
+ "step": 17100
4968
+ },
4969
+ {
4970
+ "epoch": 18.860132158590307,
4971
+ "grad_norm": 0.0485895536839962,
4972
+ "learning_rate": 2.948717948717949e-06,
4973
+ "loss": 0.0039,
4974
+ "step": 17125
4975
+ },
4976
+ {
4977
+ "epoch": 18.887665198237887,
4978
+ "grad_norm": 0.028899550437927246,
4979
+ "learning_rate": 2.9230769230769236e-06,
4980
+ "loss": 0.0032,
4981
+ "step": 17150
4982
+ },
4983
+ {
4984
+ "epoch": 18.915198237885463,
4985
+ "grad_norm": 0.021577881649136543,
4986
+ "learning_rate": 2.897435897435898e-06,
4987
+ "loss": 0.0033,
4988
+ "step": 17175
4989
+ },
4990
+ {
4991
+ "epoch": 18.94273127753304,
4992
+ "grad_norm": 0.03163857385516167,
4993
+ "learning_rate": 2.8717948717948717e-06,
4994
+ "loss": 0.0029,
4995
+ "step": 17200
4996
+ },
4997
+ {
4998
+ "epoch": 18.970264317180618,
4999
+ "grad_norm": 0.026857255026698112,
5000
+ "learning_rate": 2.846153846153846e-06,
5001
+ "loss": 0.0029,
5002
+ "step": 17225
5003
+ },
5004
+ {
5005
+ "epoch": 18.997797356828194,
5006
+ "grad_norm": 0.027735862880945206,
5007
+ "learning_rate": 2.8205128205128207e-06,
5008
+ "loss": 0.0029,
5009
+ "step": 17250
5010
+ },
5011
+ {
5012
+ "epoch": 19.02533039647577,
5013
+ "grad_norm": 0.017564741894602776,
5014
+ "learning_rate": 2.794871794871795e-06,
5015
+ "loss": 0.0025,
5016
+ "step": 17275
5017
+ },
5018
+ {
5019
+ "epoch": 19.05286343612335,
5020
+ "grad_norm": 0.018339525908231735,
5021
+ "learning_rate": 2.7692307692307697e-06,
5022
+ "loss": 0.0023,
5023
+ "step": 17300
5024
+ },
5025
+ {
5026
+ "epoch": 19.080396475770925,
5027
+ "grad_norm": 0.04713983088731766,
5028
+ "learning_rate": 2.743589743589744e-06,
5029
+ "loss": 0.0026,
5030
+ "step": 17325
5031
+ },
5032
+ {
5033
+ "epoch": 19.1079295154185,
5034
+ "grad_norm": 0.1079772338271141,
5035
+ "learning_rate": 2.717948717948718e-06,
5036
+ "loss": 0.0025,
5037
+ "step": 17350
5038
+ },
5039
+ {
5040
+ "epoch": 19.13546255506608,
5041
+ "grad_norm": 0.017452696338295937,
5042
+ "learning_rate": 2.6923076923076923e-06,
5043
+ "loss": 0.0022,
5044
+ "step": 17375
5045
+ },
5046
+ {
5047
+ "epoch": 19.162995594713657,
5048
+ "grad_norm": 0.017971495166420937,
5049
+ "learning_rate": 2.666666666666667e-06,
5050
+ "loss": 0.0028,
5051
+ "step": 17400
5052
+ },
5053
+ {
5054
+ "epoch": 19.190528634361232,
5055
+ "grad_norm": 0.022472327575087547,
5056
+ "learning_rate": 2.6410256410256413e-06,
5057
+ "loss": 0.0023,
5058
+ "step": 17425
5059
+ },
5060
+ {
5061
+ "epoch": 19.218061674008812,
5062
+ "grad_norm": 0.024212457239627838,
5063
+ "learning_rate": 2.615384615384616e-06,
5064
+ "loss": 0.0023,
5065
+ "step": 17450
5066
+ },
5067
+ {
5068
+ "epoch": 19.245594713656388,
5069
+ "grad_norm": 0.018772531300783157,
5070
+ "learning_rate": 2.5897435897435903e-06,
5071
+ "loss": 0.0024,
5072
+ "step": 17475
5073
+ },
5074
+ {
5075
+ "epoch": 19.273127753303964,
5076
+ "grad_norm": 0.01784471981227398,
5077
+ "learning_rate": 2.564102564102564e-06,
5078
+ "loss": 0.0022,
5079
+ "step": 17500
5080
+ },
5081
+ {
5082
+ "epoch": 19.300660792951543,
5083
+ "grad_norm": 0.020429756492376328,
5084
+ "learning_rate": 2.5384615384615385e-06,
5085
+ "loss": 0.0026,
5086
+ "step": 17525
5087
+ },
5088
+ {
5089
+ "epoch": 19.32819383259912,
5090
+ "grad_norm": 0.022216424345970154,
5091
+ "learning_rate": 2.512820512820513e-06,
5092
+ "loss": 0.0023,
5093
+ "step": 17550
5094
+ },
5095
+ {
5096
+ "epoch": 19.355726872246695,
5097
+ "grad_norm": 0.017014402896165848,
5098
+ "learning_rate": 2.4871794871794875e-06,
5099
+ "loss": 0.0022,
5100
+ "step": 17575
5101
+ },
5102
+ {
5103
+ "epoch": 19.383259911894275,
5104
+ "grad_norm": 0.02979693002998829,
5105
+ "learning_rate": 2.461538461538462e-06,
5106
+ "loss": 0.0024,
5107
+ "step": 17600
5108
+ },
5109
+ {
5110
+ "epoch": 19.41079295154185,
5111
+ "grad_norm": 0.07756248861551285,
5112
+ "learning_rate": 2.435897435897436e-06,
5113
+ "loss": 0.0038,
5114
+ "step": 17625
5115
+ },
5116
+ {
5117
+ "epoch": 19.438325991189426,
5118
+ "grad_norm": 0.027045181021094322,
5119
+ "learning_rate": 2.4102564102564105e-06,
5120
+ "loss": 0.003,
5121
+ "step": 17650
5122
+ },
5123
+ {
5124
+ "epoch": 19.465859030837006,
5125
+ "grad_norm": 0.02446981891989708,
5126
+ "learning_rate": 2.384615384615385e-06,
5127
+ "loss": 0.0034,
5128
+ "step": 17675
5129
+ },
5130
+ {
5131
+ "epoch": 19.493392070484582,
5132
+ "grad_norm": 0.01992960087954998,
5133
+ "learning_rate": 2.358974358974359e-06,
5134
+ "loss": 0.0025,
5135
+ "step": 17700
5136
+ },
5137
+ {
5138
+ "epoch": 19.520925110132158,
5139
+ "grad_norm": 0.030592037364840508,
5140
+ "learning_rate": 2.3333333333333336e-06,
5141
+ "loss": 0.0035,
5142
+ "step": 17725
5143
+ },
5144
+ {
5145
+ "epoch": 19.548458149779737,
5146
+ "grad_norm": 0.018608825281262398,
5147
+ "learning_rate": 2.307692307692308e-06,
5148
+ "loss": 0.0028,
5149
+ "step": 17750
5150
+ },
5151
+ {
5152
+ "epoch": 19.575991189427313,
5153
+ "grad_norm": 0.021949810907244682,
5154
+ "learning_rate": 2.282051282051282e-06,
5155
+ "loss": 0.0026,
5156
+ "step": 17775
5157
+ },
5158
+ {
5159
+ "epoch": 19.60352422907489,
5160
+ "grad_norm": 0.029381688684225082,
5161
+ "learning_rate": 2.2564102564102566e-06,
5162
+ "loss": 0.0023,
5163
+ "step": 17800
5164
+ },
5165
+ {
5166
+ "epoch": 19.63105726872247,
5167
+ "grad_norm": 0.023357443511486053,
5168
+ "learning_rate": 2.230769230769231e-06,
5169
+ "loss": 0.0027,
5170
+ "step": 17825
5171
+ },
5172
+ {
5173
+ "epoch": 19.658590308370044,
5174
+ "grad_norm": 0.014633470214903355,
5175
+ "learning_rate": 2.2051282051282052e-06,
5176
+ "loss": 0.0022,
5177
+ "step": 17850
5178
+ },
5179
+ {
5180
+ "epoch": 19.68612334801762,
5181
+ "grad_norm": 0.018193107098340988,
5182
+ "learning_rate": 2.1794871794871797e-06,
5183
+ "loss": 0.0025,
5184
+ "step": 17875
5185
+ },
5186
+ {
5187
+ "epoch": 19.7136563876652,
5188
+ "grad_norm": 0.0176758524030447,
5189
+ "learning_rate": 2.153846153846154e-06,
5190
+ "loss": 0.0022,
5191
+ "step": 17900
5192
+ },
5193
+ {
5194
+ "epoch": 19.741189427312776,
5195
+ "grad_norm": 0.021504636853933334,
5196
+ "learning_rate": 2.1282051282051283e-06,
5197
+ "loss": 0.0023,
5198
+ "step": 17925
5199
+ },
5200
+ {
5201
+ "epoch": 19.76872246696035,
5202
+ "grad_norm": 0.09170462936162949,
5203
+ "learning_rate": 2.1025641025641028e-06,
5204
+ "loss": 0.0026,
5205
+ "step": 17950
5206
+ },
5207
+ {
5208
+ "epoch": 19.79625550660793,
5209
+ "grad_norm": 0.01736604981124401,
5210
+ "learning_rate": 2.0769230769230773e-06,
5211
+ "loss": 0.0023,
5212
+ "step": 17975
5213
+ },
5214
+ {
5215
+ "epoch": 19.823788546255507,
5216
+ "grad_norm": 0.03202914819121361,
5217
+ "learning_rate": 2.0512820512820513e-06,
5218
+ "loss": 0.0022,
5219
+ "step": 18000
5220
+ },
5221
+ {
5222
+ "epoch": 19.823788546255507,
5223
+ "eval_cer": 55.551798058074354,
5224
+ "eval_loss": 1.0446730852127075,
5225
+ "eval_runtime": 801.1525,
5226
+ "eval_samples_per_second": 13.207,
5227
+ "eval_steps_per_second": 3.303,
5228
+ "eval_wer": 96.61480433757662,
5229
+ "step": 18000
5230
  }
5231
  ],
5232
  "logging_steps": 25,
 
5246
  "attributes": {}
5247
  }
5248
  },
5249
+ "total_flos": 3.0387073320631665e+19,
5250
  "train_batch_size": 4,
5251
  "trial_name": null,
5252
  "trial_params": null