vit5-base-newformat / trainer_state.json
duyvu8373's picture
Upload 12 files
ba2f18c verified
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 46.51162790697674,
"eval_steps": 500,
"global_step": 6000,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 1.0,
"eval_bp": 0.534018833785647,
"eval_counts": [
4494,
3736,
3174,
2624
],
"eval_loss": 0.11933137476444244,
"eval_precisions": [
90.82457558609539,
84.48665762098598,
81.46817248459959,
77.86350148367953
],
"eval_ref_len": 8052,
"eval_runtime": 17.1328,
"eval_samples_per_second": 30.701,
"eval_score": 44.60514203554415,
"eval_steps_per_second": 0.992,
"eval_sys_len": 4948,
"eval_totals": [
4948,
4422,
3896,
3370
],
"step": 129
},
{
"epoch": 2.0,
"eval_bp": 0.5726693333498457,
"eval_counts": [
4791,
4091,
3534,
2987
],
"eval_loss": 0.06976903229951859,
"eval_precisions": [
92.66924564796905,
88.09216192937123,
85.81835842642059,
83.15701559020044
],
"eval_ref_len": 8052,
"eval_runtime": 14.2327,
"eval_samples_per_second": 36.957,
"eval_score": 50.0313158874479,
"eval_steps_per_second": 1.194,
"eval_sys_len": 5170,
"eval_totals": [
5170,
4644,
4118,
3592
],
"step": 258
},
{
"epoch": 3.0,
"eval_bp": 0.5766332581310416,
"eval_counts": [
4894,
4247,
3708,
3173
],
"eval_loss": 0.05093446373939514,
"eval_precisions": [
94.24224918159061,
91.00064281122776,
89.54358850519198,
87.7731673582296
],
"eval_ref_len": 8052,
"eval_runtime": 14.4915,
"eval_samples_per_second": 36.297,
"eval_score": 52.24819351845175,
"eval_steps_per_second": 1.173,
"eval_sys_len": 5193,
"eval_totals": [
5193,
4667,
4141,
3615
],
"step": 387
},
{
"epoch": 3.88,
"learning_rate": 1.8449612403100777e-05,
"loss": 0.2265,
"step": 500
},
{
"epoch": 4.0,
"eval_bp": 0.561945267714356,
"eval_counts": [
4947,
4355,
3819,
3283
],
"eval_loss": 0.034593429416418076,
"eval_precisions": [
96.84808144087705,
95.04583151462244,
94.15680473372781,
93.0028328611898
],
"eval_ref_len": 8052,
"eval_runtime": 14.4253,
"eval_samples_per_second": 36.464,
"eval_score": 53.246003291698294,
"eval_steps_per_second": 1.178,
"eval_sys_len": 5108,
"eval_totals": [
5108,
4582,
4056,
3530
],
"step": 516
},
{
"epoch": 5.0,
"eval_bp": 0.579214183971878,
"eval_counts": [
5015,
4428,
3891,
3356
],
"eval_loss": 0.0272398479282856,
"eval_precisions": [
96.29416282642089,
94.57496796240923,
93.62367661212704,
92.45179063360881
],
"eval_ref_len": 8052,
"eval_runtime": 14.2643,
"eval_samples_per_second": 36.875,
"eval_score": 54.57685231173965,
"eval_steps_per_second": 1.192,
"eval_sys_len": 5208,
"eval_totals": [
5208,
4682,
4156,
3630
],
"step": 645
},
{
"epoch": 6.0,
"eval_bp": 0.5805893001921,
"eval_counts": [
5064,
4489,
3963,
3433
],
"eval_loss": 0.021859439089894295,
"eval_precisions": [
97.08588957055214,
95.71428571428571,
95.17291066282421,
94.36503573391974
],
"eval_ref_len": 8052,
"eval_runtime": 14.2621,
"eval_samples_per_second": 36.881,
"eval_score": 55.492381352127445,
"eval_steps_per_second": 1.192,
"eval_sys_len": 5216,
"eval_totals": [
5216,
4690,
4164,
3638
],
"step": 774
},
{
"epoch": 7.0,
"eval_bp": 0.5776660318872596,
"eval_counts": [
5101,
4547,
4013,
3479
],
"eval_loss": 0.013949821703135967,
"eval_precisions": [
98.1150221196384,
97.30365931949497,
96.76874849288643,
96.07843137254902
],
"eval_ref_len": 8052,
"eval_runtime": 14.1956,
"eval_samples_per_second": 37.054,
"eval_score": 56.07034878827298,
"eval_steps_per_second": 1.198,
"eval_sys_len": 5199,
"eval_totals": [
5199,
4673,
4147,
3621
],
"step": 903
},
{
"epoch": 7.75,
"learning_rate": 1.689922480620155e-05,
"loss": 0.0485,
"step": 1000
},
{
"epoch": 8.0,
"eval_bp": 0.5903589930927092,
"eval_counts": [
5054,
4465,
3938,
3409
],
"eval_loss": 0.01422152854502201,
"eval_precisions": [
95.84676654655793,
94.05940594059406,
93.29542762378583,
92.25981055480379
],
"eval_ref_len": 8052,
"eval_runtime": 14.5535,
"eval_samples_per_second": 36.142,
"eval_score": 55.40887561474743,
"eval_steps_per_second": 1.168,
"eval_sys_len": 5273,
"eval_totals": [
5273,
4747,
4221,
3695
],
"step": 1032
},
{
"epoch": 9.0,
"eval_bp": 0.5766332581310416,
"eval_counts": [
5111,
4561,
4028,
3495
],
"eval_loss": 0.011180982924997807,
"eval_precisions": [
98.42095128057,
97.7287336618813,
97.27119053368752,
96.6804979253112
],
"eval_ref_len": 8052,
"eval_runtime": 14.213,
"eval_samples_per_second": 37.008,
"eval_score": 56.23515832604645,
"eval_steps_per_second": 1.196,
"eval_sys_len": 5193,
"eval_totals": [
5193,
4667,
4141,
3615
],
"step": 1161
},
{
"epoch": 10.0,
"eval_bp": 0.5891618704024797,
"eval_counts": [
5068,
4491,
3972,
3453
],
"eval_loss": 0.01184480544179678,
"eval_precisions": [
96.24003038359287,
94.74683544303798,
94.25723777883246,
93.62798264642082
],
"eval_ref_len": 8052,
"eval_runtime": 14.3293,
"eval_samples_per_second": 36.708,
"eval_score": 55.8013674935763,
"eval_steps_per_second": 1.186,
"eval_sys_len": 5266,
"eval_totals": [
5266,
4740,
4214,
3688
],
"step": 1290
},
{
"epoch": 11.0,
"eval_bp": 0.5816200061954333,
"eval_counts": [
5085,
4531,
4008,
3485
],
"eval_loss": 0.006630906369537115,
"eval_precisions": [
97.37648410570662,
96.48637137989779,
96.11510791366906,
95.63666300768386
],
"eval_ref_len": 8052,
"eval_runtime": 14.4178,
"eval_samples_per_second": 36.483,
"eval_score": 56.0690723204566,
"eval_steps_per_second": 1.179,
"eval_sys_len": 5222,
"eval_totals": [
5222,
4696,
4170,
3644
],
"step": 1419
},
{
"epoch": 11.63,
"learning_rate": 1.5348837209302328e-05,
"loss": 0.0249,
"step": 1500
},
{
"epoch": 12.0,
"eval_bp": 0.5738765755022939,
"eval_counts": [
5123,
4585,
4054,
3523
],
"eval_loss": 0.0072512696497142315,
"eval_precisions": [
98.95692485995751,
98.58095033326167,
98.27878787878788,
97.88830230619617
],
"eval_ref_len": 8052,
"eval_runtime": 14.278,
"eval_samples_per_second": 36.84,
"eval_score": 56.48406484323021,
"eval_steps_per_second": 1.191,
"eval_sys_len": 5177,
"eval_totals": [
5177,
4651,
4125,
3599
],
"step": 1548
},
{
"epoch": 13.0,
"eval_bp": 0.57938612601173,
"eval_counts": [
5102,
4550,
4024,
3498
],
"eval_loss": 0.007895253598690033,
"eval_precisions": [
97.94586292954502,
97.15994020926756,
96.80057733942748,
96.33709721839713
],
"eval_ref_len": 8052,
"eval_runtime": 14.2797,
"eval_samples_per_second": 36.835,
"eval_score": 56.23468946332795,
"eval_steps_per_second": 1.191,
"eval_sys_len": 5209,
"eval_totals": [
5209,
4683,
4157,
3631
],
"step": 1677
},
{
"epoch": 14.0,
"eval_bp": 0.5781822172087536,
"eval_counts": [
5112,
4566,
4040,
3515
],
"eval_loss": 0.0048005045391619205,
"eval_precisions": [
98.26989619377163,
97.6475620188195,
97.34939759036145,
96.99227373068433
],
"eval_ref_len": 8052,
"eval_runtime": 14.651,
"eval_samples_per_second": 35.902,
"eval_score": 56.40957242906697,
"eval_steps_per_second": 1.16,
"eval_sys_len": 5202,
"eval_totals": [
5202,
4676,
4150,
3624
],
"step": 1806
},
{
"epoch": 15.0,
"eval_bp": 0.5750830920334751,
"eval_counts": [
5115,
4573,
4046,
3520
],
"eval_loss": 0.003846166655421257,
"eval_precisions": [
98.66898148148148,
98.17518248175182,
97.918683446273,
97.61508596783139
],
"eval_ref_len": 8052,
"eval_runtime": 14.3879,
"eval_samples_per_second": 36.559,
"eval_score": 56.41204145654326,
"eval_steps_per_second": 1.182,
"eval_sys_len": 5184,
"eval_totals": [
5184,
4658,
4132,
3606
],
"step": 1935
},
{
"epoch": 15.5,
"learning_rate": 1.3798449612403102e-05,
"loss": 0.0146,
"step": 2000
},
{
"epoch": 16.0,
"eval_bp": 0.5697344510837399,
"eval_counts": [
5127,
4597,
4071,
3545
],
"eval_loss": 0.002730604959651828,
"eval_precisions": [
99.49543954977683,
99.35163172682084,
99.26847110460864,
99.16083916083916
],
"eval_ref_len": 8052,
"eval_runtime": 14.1671,
"eval_samples_per_second": 37.128,
"eval_score": 56.58546744480375,
"eval_steps_per_second": 1.2,
"eval_sys_len": 5153,
"eval_totals": [
5153,
4627,
4101,
3575
],
"step": 2064
},
{
"epoch": 17.0,
"eval_bp": 0.5742213686422221,
"eval_counts": [
5114,
4576,
4052,
3528
],
"eval_loss": 0.003083485411480069,
"eval_precisions": [
98.74493145394864,
98.3451536643026,
98.18269929731039,
97.97278533740628
],
"eval_ref_len": 8052,
"eval_runtime": 14.5698,
"eval_samples_per_second": 36.102,
"eval_score": 56.45226865658991,
"eval_steps_per_second": 1.167,
"eval_sys_len": 5179,
"eval_totals": [
5179,
4653,
4127,
3601
],
"step": 2193
},
{
"epoch": 18.0,
"eval_bp": 0.5676602159962684,
"eval_counts": [
5136,
4610,
4082,
3554
],
"eval_loss": 0.0022970717400312424,
"eval_precisions": [
99.9027426570706,
99.89165763813651,
99.82880899975544,
99.74740387314061
],
"eval_ref_len": 8052,
"eval_runtime": 14.1909,
"eval_samples_per_second": 37.066,
"eval_score": 56.67669127411511,
"eval_steps_per_second": 1.198,
"eval_sys_len": 5141,
"eval_totals": [
5141,
4615,
4089,
3563
],
"step": 2322
},
{
"epoch": 19.0,
"eval_bp": 0.5695616786732568,
"eval_counts": [
5126,
4592,
4063,
3534
],
"eval_loss": 0.0013985991245135665,
"eval_precisions": [
99.49534161490683,
99.26502377864246,
99.09756097560975,
98.88080581980974
],
"eval_ref_len": 8052,
"eval_runtime": 14.2655,
"eval_samples_per_second": 36.872,
"eval_score": 56.49164894423193,
"eval_steps_per_second": 1.192,
"eval_sys_len": 5152,
"eval_totals": [
5152,
4626,
4100,
3574
],
"step": 2451
},
{
"epoch": 19.38,
"learning_rate": 1.2248062015503876e-05,
"loss": 0.0095,
"step": 2500
},
{
"epoch": 20.0,
"eval_bp": 0.5673143055770848,
"eval_counts": [
5132,
4602,
4074,
3546
],
"eval_loss": 0.001880223280750215,
"eval_precisions": [
99.8637867289356,
99.76154346412314,
99.68191827746513,
99.5787700084246
],
"eval_ref_len": 8052,
"eval_runtime": 14.4294,
"eval_samples_per_second": 36.453,
"eval_score": 56.57340500595413,
"eval_steps_per_second": 1.178,
"eval_sys_len": 5139,
"eval_totals": [
5139,
4613,
4087,
3561
],
"step": 2580
},
{
"epoch": 21.0,
"eval_bp": 0.5673143055770848,
"eval_counts": [
5134,
4606,
4078,
3550
],
"eval_loss": 0.000961140263825655,
"eval_precisions": [
99.90270480638256,
99.84825493171472,
99.77978957670663,
99.69109800617804
],
"eval_ref_len": 8052,
"eval_runtime": 14.1969,
"eval_samples_per_second": 37.05,
"eval_score": 56.621048487262286,
"eval_steps_per_second": 1.197,
"eval_sys_len": 5139,
"eval_totals": [
5139,
4613,
4087,
3561
],
"step": 2709
},
{
"epoch": 22.0,
"eval_bp": 0.5673143055770848,
"eval_counts": [
5134,
4608,
4082,
3556
],
"eval_loss": 0.0009278175421059132,
"eval_precisions": [
99.90270480638256,
99.89161066551051,
99.87766087594812,
99.8595900028082
],
"eval_ref_len": 8052,
"eval_runtime": 14.1806,
"eval_samples_per_second": 37.093,
"eval_score": 56.6649925424657,
"eval_steps_per_second": 1.199,
"eval_sys_len": 5139,
"eval_totals": [
5139,
4613,
4087,
3561
],
"step": 2838
},
{
"epoch": 23.0,
"eval_bp": 0.5702526802993914,
"eval_counts": [
5125,
4592,
4066,
3540
],
"eval_loss": 0.002242365386337042,
"eval_precisions": [
99.39875872769589,
99.1792656587473,
99.07407407407408,
98.93795416433763
],
"eval_ref_len": 8052,
"eval_runtime": 14.3753,
"eval_samples_per_second": 36.591,
"eval_score": 56.5390535949233,
"eval_steps_per_second": 1.183,
"eval_sys_len": 5156,
"eval_totals": [
5156,
4630,
4104,
3578
],
"step": 2967
},
{
"epoch": 23.26,
"learning_rate": 1.0697674418604651e-05,
"loss": 0.0066,
"step": 3000
},
{
"epoch": 24.0,
"eval_bp": 0.5673143055770848,
"eval_counts": [
5134,
4606,
4078,
3550
],
"eval_loss": 0.0007149834418669343,
"eval_precisions": [
99.90270480638256,
99.84825493171472,
99.77978957670663,
99.69109800617804
],
"eval_ref_len": 8052,
"eval_runtime": 14.2084,
"eval_samples_per_second": 37.02,
"eval_score": 56.621048487262286,
"eval_steps_per_second": 1.196,
"eval_sys_len": 5139,
"eval_totals": [
5139,
4613,
4087,
3561
],
"step": 3096
},
{
"epoch": 25.0,
"eval_bp": 0.5692160898808599,
"eval_counts": [
5130,
4600,
4072,
3544
],
"eval_loss": 0.0007009973051026464,
"eval_precisions": [
99.6116504854369,
99.48096885813149,
99.36554416788677,
99.21612541993281
],
"eval_ref_len": 8052,
"eval_runtime": 14.1732,
"eval_samples_per_second": 37.112,
"eval_score": 56.59059013212041,
"eval_steps_per_second": 1.199,
"eval_sys_len": 5150,
"eval_totals": [
5150,
4624,
4098,
3572
],
"step": 3225
},
{
"epoch": 26.0,
"eval_bp": 0.5673143055770848,
"eval_counts": [
5134,
4608,
4082,
3556
],
"eval_loss": 0.0008850299054756761,
"eval_precisions": [
99.90270480638256,
99.89161066551051,
99.87766087594812,
99.8595900028082
],
"eval_ref_len": 8052,
"eval_runtime": 14.4143,
"eval_samples_per_second": 36.492,
"eval_score": 56.6649925424657,
"eval_steps_per_second": 1.179,
"eval_sys_len": 5139,
"eval_totals": [
5139,
4613,
4087,
3561
],
"step": 3354
},
{
"epoch": 27.0,
"eval_bp": 0.5690432735111319,
"eval_counts": [
5129,
4599,
4072,
3545
],
"eval_loss": 0.0007779916049912572,
"eval_precisions": [
99.61157506311905,
99.48085658663206,
99.38979741274103,
99.27191262951554
],
"eval_ref_len": 8052,
"eval_runtime": 14.1435,
"eval_samples_per_second": 37.19,
"eval_score": 56.584785454136124,
"eval_steps_per_second": 1.202,
"eval_sys_len": 5149,
"eval_totals": [
5149,
4623,
4097,
3571
],
"step": 3483
},
{
"epoch": 27.13,
"learning_rate": 9.147286821705427e-06,
"loss": 0.0047,
"step": 3500
},
{
"epoch": 28.0,
"eval_bp": 0.5673143055770848,
"eval_counts": [
5134,
4608,
4082,
3556
],
"eval_loss": 0.0003345063014421612,
"eval_precisions": [
99.90270480638256,
99.89161066551051,
99.87766087594812,
99.8595900028082
],
"eval_ref_len": 8052,
"eval_runtime": 14.2353,
"eval_samples_per_second": 36.951,
"eval_score": 56.6649925424657,
"eval_steps_per_second": 1.194,
"eval_sys_len": 5139,
"eval_totals": [
5139,
4613,
4087,
3561
],
"step": 3612
},
{
"epoch": 29.0,
"eval_bp": 0.5673143055770848,
"eval_counts": [
5134,
4608,
4082,
3556
],
"eval_loss": 0.0004555524792522192,
"eval_precisions": [
99.90270480638256,
99.89161066551051,
99.87766087594812,
99.8595900028082
],
"eval_ref_len": 8052,
"eval_runtime": 14.219,
"eval_samples_per_second": 36.993,
"eval_score": 56.6649925424657,
"eval_steps_per_second": 1.196,
"eval_sys_len": 5139,
"eval_totals": [
5139,
4613,
4087,
3561
],
"step": 3741
},
{
"epoch": 30.0,
"eval_bp": 0.5673143055770848,
"eval_counts": [
5134,
4608,
4082,
3556
],
"eval_loss": 0.0002565362665336579,
"eval_precisions": [
99.90270480638256,
99.89161066551051,
99.87766087594812,
99.8595900028082
],
"eval_ref_len": 8052,
"eval_runtime": 14.4474,
"eval_samples_per_second": 36.408,
"eval_score": 56.6649925424657,
"eval_steps_per_second": 1.177,
"eval_sys_len": 5139,
"eval_totals": [
5139,
4613,
4087,
3561
],
"step": 3870
},
{
"epoch": 31.0,
"eval_bp": 0.5673143055770848,
"eval_counts": [
5134,
4608,
4082,
3556
],
"eval_loss": 0.00015086405619513243,
"eval_precisions": [
99.90270480638256,
99.89161066551051,
99.87766087594812,
99.8595900028082
],
"eval_ref_len": 8052,
"eval_runtime": 14.2355,
"eval_samples_per_second": 36.95,
"eval_score": 56.6649925424657,
"eval_steps_per_second": 1.194,
"eval_sys_len": 5139,
"eval_totals": [
5139,
4613,
4087,
3561
],
"step": 3999
},
{
"epoch": 31.01,
"learning_rate": 7.596899224806202e-06,
"loss": 0.0037,
"step": 4000
},
{
"epoch": 32.0,
"eval_bp": 0.5673143055770848,
"eval_counts": [
5134,
4608,
4082,
3556
],
"eval_loss": 0.00028631059103645384,
"eval_precisions": [
99.90270480638256,
99.89161066551051,
99.87766087594812,
99.8595900028082
],
"eval_ref_len": 8052,
"eval_runtime": 14.2512,
"eval_samples_per_second": 36.909,
"eval_score": 56.6649925424657,
"eval_steps_per_second": 1.193,
"eval_sys_len": 5139,
"eval_totals": [
5139,
4613,
4087,
3561
],
"step": 4128
},
{
"epoch": 33.0,
"eval_bp": 0.5673143055770848,
"eval_counts": [
5134,
4608,
4082,
3556
],
"eval_loss": 0.00013379484880715609,
"eval_precisions": [
99.90270480638256,
99.89161066551051,
99.87766087594812,
99.8595900028082
],
"eval_ref_len": 8052,
"eval_runtime": 14.5025,
"eval_samples_per_second": 36.27,
"eval_score": 56.6649925424657,
"eval_steps_per_second": 1.172,
"eval_sys_len": 5139,
"eval_totals": [
5139,
4613,
4087,
3561
],
"step": 4257
},
{
"epoch": 34.0,
"eval_bp": 0.5673143055770848,
"eval_counts": [
5134,
4608,
4082,
3556
],
"eval_loss": 0.0007538048666901886,
"eval_precisions": [
99.90270480638256,
99.89161066551051,
99.87766087594812,
99.8595900028082
],
"eval_ref_len": 8052,
"eval_runtime": 14.2097,
"eval_samples_per_second": 37.017,
"eval_score": 56.6649925424657,
"eval_steps_per_second": 1.196,
"eval_sys_len": 5139,
"eval_totals": [
5139,
4613,
4087,
3561
],
"step": 4386
},
{
"epoch": 34.88,
"learning_rate": 6.046511627906977e-06,
"loss": 0.0026,
"step": 4500
},
{
"epoch": 35.0,
"eval_bp": 0.5673143055770848,
"eval_counts": [
5134,
4608,
4082,
3556
],
"eval_loss": 0.00037576485192403197,
"eval_precisions": [
99.90270480638256,
99.89161066551051,
99.87766087594812,
99.8595900028082
],
"eval_ref_len": 8052,
"eval_runtime": 14.2801,
"eval_samples_per_second": 36.835,
"eval_score": 56.6649925424657,
"eval_steps_per_second": 1.19,
"eval_sys_len": 5139,
"eval_totals": [
5139,
4613,
4087,
3561
],
"step": 4515
},
{
"epoch": 36.0,
"eval_bp": 0.5673143055770848,
"eval_counts": [
5134,
4608,
4082,
3556
],
"eval_loss": 0.0005439831875264645,
"eval_precisions": [
99.90270480638256,
99.89161066551051,
99.87766087594812,
99.8595900028082
],
"eval_ref_len": 8052,
"eval_runtime": 14.4879,
"eval_samples_per_second": 36.306,
"eval_score": 56.6649925424657,
"eval_steps_per_second": 1.173,
"eval_sys_len": 5139,
"eval_totals": [
5139,
4613,
4087,
3561
],
"step": 4644
},
{
"epoch": 37.0,
"eval_bp": 0.5673143055770848,
"eval_counts": [
5134,
4608,
4082,
3556
],
"eval_loss": 0.00012769590830430388,
"eval_precisions": [
99.90270480638256,
99.89161066551051,
99.87766087594812,
99.8595900028082
],
"eval_ref_len": 8052,
"eval_runtime": 14.3104,
"eval_samples_per_second": 36.756,
"eval_score": 56.6649925424657,
"eval_steps_per_second": 1.188,
"eval_sys_len": 5139,
"eval_totals": [
5139,
4613,
4087,
3561
],
"step": 4773
},
{
"epoch": 38.0,
"eval_bp": 0.5673143055770848,
"eval_counts": [
5134,
4608,
4082,
3556
],
"eval_loss": 8.528557373210788e-05,
"eval_precisions": [
99.90270480638256,
99.89161066551051,
99.87766087594812,
99.8595900028082
],
"eval_ref_len": 8052,
"eval_runtime": 14.2526,
"eval_samples_per_second": 36.906,
"eval_score": 56.6649925424657,
"eval_steps_per_second": 1.193,
"eval_sys_len": 5139,
"eval_totals": [
5139,
4613,
4087,
3561
],
"step": 4902
},
{
"epoch": 38.76,
"learning_rate": 4.4961240310077525e-06,
"loss": 0.002,
"step": 5000
},
{
"epoch": 39.0,
"eval_bp": 0.5673143055770848,
"eval_counts": [
5134,
4608,
4082,
3556
],
"eval_loss": 9.439041605219245e-05,
"eval_precisions": [
99.90270480638256,
99.89161066551051,
99.87766087594812,
99.8595900028082
],
"eval_ref_len": 8052,
"eval_runtime": 14.6256,
"eval_samples_per_second": 35.964,
"eval_score": 56.6649925424657,
"eval_steps_per_second": 1.162,
"eval_sys_len": 5139,
"eval_totals": [
5139,
4613,
4087,
3561
],
"step": 5031
},
{
"epoch": 40.0,
"eval_bp": 0.5673143055770848,
"eval_counts": [
5134,
4608,
4082,
3556
],
"eval_loss": 0.0001698030246188864,
"eval_precisions": [
99.90270480638256,
99.89161066551051,
99.87766087594812,
99.8595900028082
],
"eval_ref_len": 8052,
"eval_runtime": 14.2726,
"eval_samples_per_second": 36.854,
"eval_score": 56.6649925424657,
"eval_steps_per_second": 1.191,
"eval_sys_len": 5139,
"eval_totals": [
5139,
4613,
4087,
3561
],
"step": 5160
},
{
"epoch": 41.0,
"eval_bp": 0.5673143055770848,
"eval_counts": [
5134,
4608,
4082,
3556
],
"eval_loss": 0.00026142288697883487,
"eval_precisions": [
99.90270480638256,
99.89161066551051,
99.87766087594812,
99.8595900028082
],
"eval_ref_len": 8052,
"eval_runtime": 14.1981,
"eval_samples_per_second": 37.047,
"eval_score": 56.6649925424657,
"eval_steps_per_second": 1.197,
"eval_sys_len": 5139,
"eval_totals": [
5139,
4613,
4087,
3561
],
"step": 5289
},
{
"epoch": 42.0,
"eval_bp": 0.5673143055770848,
"eval_counts": [
5134,
4608,
4082,
3556
],
"eval_loss": 6.877077976241708e-05,
"eval_precisions": [
99.90270480638256,
99.89161066551051,
99.87766087594812,
99.8595900028082
],
"eval_ref_len": 8052,
"eval_runtime": 14.5632,
"eval_samples_per_second": 36.118,
"eval_score": 56.6649925424657,
"eval_steps_per_second": 1.167,
"eval_sys_len": 5139,
"eval_totals": [
5139,
4613,
4087,
3561
],
"step": 5418
},
{
"epoch": 42.64,
"learning_rate": 2.9457364341085276e-06,
"loss": 0.0017,
"step": 5500
},
{
"epoch": 43.0,
"eval_bp": 0.5673143055770848,
"eval_counts": [
5134,
4608,
4082,
3556
],
"eval_loss": 3.3805001294240355e-05,
"eval_precisions": [
99.90270480638256,
99.89161066551051,
99.87766087594812,
99.8595900028082
],
"eval_ref_len": 8052,
"eval_runtime": 14.1656,
"eval_samples_per_second": 37.132,
"eval_score": 56.6649925424657,
"eval_steps_per_second": 1.2,
"eval_sys_len": 5139,
"eval_totals": [
5139,
4613,
4087,
3561
],
"step": 5547
},
{
"epoch": 44.0,
"eval_bp": 0.5673143055770848,
"eval_counts": [
5134,
4608,
4082,
3556
],
"eval_loss": 1.8181766790803522e-05,
"eval_precisions": [
99.90270480638256,
99.89161066551051,
99.87766087594812,
99.8595900028082
],
"eval_ref_len": 8052,
"eval_runtime": 14.2765,
"eval_samples_per_second": 36.844,
"eval_score": 56.6649925424657,
"eval_steps_per_second": 1.191,
"eval_sys_len": 5139,
"eval_totals": [
5139,
4613,
4087,
3561
],
"step": 5676
},
{
"epoch": 45.0,
"eval_bp": 0.5673143055770848,
"eval_counts": [
5134,
4608,
4082,
3556
],
"eval_loss": 3.8955997297307476e-05,
"eval_precisions": [
99.90270480638256,
99.89161066551051,
99.87766087594812,
99.8595900028082
],
"eval_ref_len": 8052,
"eval_runtime": 14.2512,
"eval_samples_per_second": 36.909,
"eval_score": 56.6649925424657,
"eval_steps_per_second": 1.193,
"eval_sys_len": 5139,
"eval_totals": [
5139,
4613,
4087,
3561
],
"step": 5805
},
{
"epoch": 46.0,
"eval_bp": 0.5673143055770848,
"eval_counts": [
5134,
4608,
4082,
3556
],
"eval_loss": 5.0567497964948416e-05,
"eval_precisions": [
99.90270480638256,
99.89161066551051,
99.87766087594812,
99.8595900028082
],
"eval_ref_len": 8052,
"eval_runtime": 14.3515,
"eval_samples_per_second": 36.651,
"eval_score": 56.6649925424657,
"eval_steps_per_second": 1.185,
"eval_sys_len": 5139,
"eval_totals": [
5139,
4613,
4087,
3561
],
"step": 5934
},
{
"epoch": 46.51,
"learning_rate": 1.3953488372093025e-06,
"loss": 0.0014,
"step": 6000
}
],
"logging_steps": 500,
"max_steps": 6450,
"num_input_tokens_seen": 0,
"num_train_epochs": 50,
"save_steps": 500,
"total_flos": 2.345536665897984e+16,
"train_batch_size": 32,
"trial_name": null,
"trial_params": null
}