{ "best_metric": null, "best_model_checkpoint": null, "epoch": 46.51162790697674, "eval_steps": 500, "global_step": 6000, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 1.0, "eval_bp": 0.534018833785647, "eval_counts": [ 4494, 3736, 3174, 2624 ], "eval_loss": 0.11933137476444244, "eval_precisions": [ 90.82457558609539, 84.48665762098598, 81.46817248459959, 77.86350148367953 ], "eval_ref_len": 8052, "eval_runtime": 17.1328, "eval_samples_per_second": 30.701, "eval_score": 44.60514203554415, "eval_steps_per_second": 0.992, "eval_sys_len": 4948, "eval_totals": [ 4948, 4422, 3896, 3370 ], "step": 129 }, { "epoch": 2.0, "eval_bp": 0.5726693333498457, "eval_counts": [ 4791, 4091, 3534, 2987 ], "eval_loss": 0.06976903229951859, "eval_precisions": [ 92.66924564796905, 88.09216192937123, 85.81835842642059, 83.15701559020044 ], "eval_ref_len": 8052, "eval_runtime": 14.2327, "eval_samples_per_second": 36.957, "eval_score": 50.0313158874479, "eval_steps_per_second": 1.194, "eval_sys_len": 5170, "eval_totals": [ 5170, 4644, 4118, 3592 ], "step": 258 }, { "epoch": 3.0, "eval_bp": 0.5766332581310416, "eval_counts": [ 4894, 4247, 3708, 3173 ], "eval_loss": 0.05093446373939514, "eval_precisions": [ 94.24224918159061, 91.00064281122776, 89.54358850519198, 87.7731673582296 ], "eval_ref_len": 8052, "eval_runtime": 14.4915, "eval_samples_per_second": 36.297, "eval_score": 52.24819351845175, "eval_steps_per_second": 1.173, "eval_sys_len": 5193, "eval_totals": [ 5193, 4667, 4141, 3615 ], "step": 387 }, { "epoch": 3.88, "learning_rate": 1.8449612403100777e-05, "loss": 0.2265, "step": 500 }, { "epoch": 4.0, "eval_bp": 0.561945267714356, "eval_counts": [ 4947, 4355, 3819, 3283 ], "eval_loss": 0.034593429416418076, "eval_precisions": [ 96.84808144087705, 95.04583151462244, 94.15680473372781, 93.0028328611898 ], "eval_ref_len": 8052, "eval_runtime": 14.4253, "eval_samples_per_second": 36.464, "eval_score": 53.246003291698294, "eval_steps_per_second": 1.178, "eval_sys_len": 5108, "eval_totals": [ 5108, 4582, 4056, 3530 ], "step": 516 }, { "epoch": 5.0, "eval_bp": 0.579214183971878, "eval_counts": [ 5015, 4428, 3891, 3356 ], "eval_loss": 0.0272398479282856, "eval_precisions": [ 96.29416282642089, 94.57496796240923, 93.62367661212704, 92.45179063360881 ], "eval_ref_len": 8052, "eval_runtime": 14.2643, "eval_samples_per_second": 36.875, "eval_score": 54.57685231173965, "eval_steps_per_second": 1.192, "eval_sys_len": 5208, "eval_totals": [ 5208, 4682, 4156, 3630 ], "step": 645 }, { "epoch": 6.0, "eval_bp": 0.5805893001921, "eval_counts": [ 5064, 4489, 3963, 3433 ], "eval_loss": 0.021859439089894295, "eval_precisions": [ 97.08588957055214, 95.71428571428571, 95.17291066282421, 94.36503573391974 ], "eval_ref_len": 8052, "eval_runtime": 14.2621, "eval_samples_per_second": 36.881, "eval_score": 55.492381352127445, "eval_steps_per_second": 1.192, "eval_sys_len": 5216, "eval_totals": [ 5216, 4690, 4164, 3638 ], "step": 774 }, { "epoch": 7.0, "eval_bp": 0.5776660318872596, "eval_counts": [ 5101, 4547, 4013, 3479 ], "eval_loss": 0.013949821703135967, "eval_precisions": [ 98.1150221196384, 97.30365931949497, 96.76874849288643, 96.07843137254902 ], "eval_ref_len": 8052, "eval_runtime": 14.1956, "eval_samples_per_second": 37.054, "eval_score": 56.07034878827298, "eval_steps_per_second": 1.198, "eval_sys_len": 5199, "eval_totals": [ 5199, 4673, 4147, 3621 ], "step": 903 }, { "epoch": 7.75, "learning_rate": 1.689922480620155e-05, "loss": 0.0485, "step": 1000 }, { "epoch": 8.0, "eval_bp": 0.5903589930927092, "eval_counts": [ 5054, 4465, 3938, 3409 ], "eval_loss": 0.01422152854502201, "eval_precisions": [ 95.84676654655793, 94.05940594059406, 93.29542762378583, 92.25981055480379 ], "eval_ref_len": 8052, "eval_runtime": 14.5535, "eval_samples_per_second": 36.142, "eval_score": 55.40887561474743, "eval_steps_per_second": 1.168, "eval_sys_len": 5273, "eval_totals": [ 5273, 4747, 4221, 3695 ], "step": 1032 }, { "epoch": 9.0, "eval_bp": 0.5766332581310416, "eval_counts": [ 5111, 4561, 4028, 3495 ], "eval_loss": 0.011180982924997807, "eval_precisions": [ 98.42095128057, 97.7287336618813, 97.27119053368752, 96.6804979253112 ], "eval_ref_len": 8052, "eval_runtime": 14.213, "eval_samples_per_second": 37.008, "eval_score": 56.23515832604645, "eval_steps_per_second": 1.196, "eval_sys_len": 5193, "eval_totals": [ 5193, 4667, 4141, 3615 ], "step": 1161 }, { "epoch": 10.0, "eval_bp": 0.5891618704024797, "eval_counts": [ 5068, 4491, 3972, 3453 ], "eval_loss": 0.01184480544179678, "eval_precisions": [ 96.24003038359287, 94.74683544303798, 94.25723777883246, 93.62798264642082 ], "eval_ref_len": 8052, "eval_runtime": 14.3293, "eval_samples_per_second": 36.708, "eval_score": 55.8013674935763, "eval_steps_per_second": 1.186, "eval_sys_len": 5266, "eval_totals": [ 5266, 4740, 4214, 3688 ], "step": 1290 }, { "epoch": 11.0, "eval_bp": 0.5816200061954333, "eval_counts": [ 5085, 4531, 4008, 3485 ], "eval_loss": 0.006630906369537115, "eval_precisions": [ 97.37648410570662, 96.48637137989779, 96.11510791366906, 95.63666300768386 ], "eval_ref_len": 8052, "eval_runtime": 14.4178, "eval_samples_per_second": 36.483, "eval_score": 56.0690723204566, "eval_steps_per_second": 1.179, "eval_sys_len": 5222, "eval_totals": [ 5222, 4696, 4170, 3644 ], "step": 1419 }, { "epoch": 11.63, "learning_rate": 1.5348837209302328e-05, "loss": 0.0249, "step": 1500 }, { "epoch": 12.0, "eval_bp": 0.5738765755022939, "eval_counts": [ 5123, 4585, 4054, 3523 ], "eval_loss": 0.0072512696497142315, "eval_precisions": [ 98.95692485995751, 98.58095033326167, 98.27878787878788, 97.88830230619617 ], "eval_ref_len": 8052, "eval_runtime": 14.278, "eval_samples_per_second": 36.84, "eval_score": 56.48406484323021, "eval_steps_per_second": 1.191, "eval_sys_len": 5177, "eval_totals": [ 5177, 4651, 4125, 3599 ], "step": 1548 }, { "epoch": 13.0, "eval_bp": 0.57938612601173, "eval_counts": [ 5102, 4550, 4024, 3498 ], "eval_loss": 0.007895253598690033, "eval_precisions": [ 97.94586292954502, 97.15994020926756, 96.80057733942748, 96.33709721839713 ], "eval_ref_len": 8052, "eval_runtime": 14.2797, "eval_samples_per_second": 36.835, "eval_score": 56.23468946332795, "eval_steps_per_second": 1.191, "eval_sys_len": 5209, "eval_totals": [ 5209, 4683, 4157, 3631 ], "step": 1677 }, { "epoch": 14.0, "eval_bp": 0.5781822172087536, "eval_counts": [ 5112, 4566, 4040, 3515 ], "eval_loss": 0.0048005045391619205, "eval_precisions": [ 98.26989619377163, 97.6475620188195, 97.34939759036145, 96.99227373068433 ], "eval_ref_len": 8052, "eval_runtime": 14.651, "eval_samples_per_second": 35.902, "eval_score": 56.40957242906697, "eval_steps_per_second": 1.16, "eval_sys_len": 5202, "eval_totals": [ 5202, 4676, 4150, 3624 ], "step": 1806 }, { "epoch": 15.0, "eval_bp": 0.5750830920334751, "eval_counts": [ 5115, 4573, 4046, 3520 ], "eval_loss": 0.003846166655421257, "eval_precisions": [ 98.66898148148148, 98.17518248175182, 97.918683446273, 97.61508596783139 ], "eval_ref_len": 8052, "eval_runtime": 14.3879, "eval_samples_per_second": 36.559, "eval_score": 56.41204145654326, "eval_steps_per_second": 1.182, "eval_sys_len": 5184, "eval_totals": [ 5184, 4658, 4132, 3606 ], "step": 1935 }, { "epoch": 15.5, "learning_rate": 1.3798449612403102e-05, "loss": 0.0146, "step": 2000 }, { "epoch": 16.0, "eval_bp": 0.5697344510837399, "eval_counts": [ 5127, 4597, 4071, 3545 ], "eval_loss": 0.002730604959651828, "eval_precisions": [ 99.49543954977683, 99.35163172682084, 99.26847110460864, 99.16083916083916 ], "eval_ref_len": 8052, "eval_runtime": 14.1671, "eval_samples_per_second": 37.128, "eval_score": 56.58546744480375, "eval_steps_per_second": 1.2, "eval_sys_len": 5153, "eval_totals": [ 5153, 4627, 4101, 3575 ], "step": 2064 }, { "epoch": 17.0, "eval_bp": 0.5742213686422221, "eval_counts": [ 5114, 4576, 4052, 3528 ], "eval_loss": 0.003083485411480069, "eval_precisions": [ 98.74493145394864, 98.3451536643026, 98.18269929731039, 97.97278533740628 ], "eval_ref_len": 8052, "eval_runtime": 14.5698, "eval_samples_per_second": 36.102, "eval_score": 56.45226865658991, "eval_steps_per_second": 1.167, "eval_sys_len": 5179, "eval_totals": [ 5179, 4653, 4127, 3601 ], "step": 2193 }, { "epoch": 18.0, "eval_bp": 0.5676602159962684, "eval_counts": [ 5136, 4610, 4082, 3554 ], "eval_loss": 0.0022970717400312424, "eval_precisions": [ 99.9027426570706, 99.89165763813651, 99.82880899975544, 99.74740387314061 ], "eval_ref_len": 8052, "eval_runtime": 14.1909, "eval_samples_per_second": 37.066, "eval_score": 56.67669127411511, "eval_steps_per_second": 1.198, "eval_sys_len": 5141, "eval_totals": [ 5141, 4615, 4089, 3563 ], "step": 2322 }, { "epoch": 19.0, "eval_bp": 0.5695616786732568, "eval_counts": [ 5126, 4592, 4063, 3534 ], "eval_loss": 0.0013985991245135665, "eval_precisions": [ 99.49534161490683, 99.26502377864246, 99.09756097560975, 98.88080581980974 ], "eval_ref_len": 8052, "eval_runtime": 14.2655, "eval_samples_per_second": 36.872, "eval_score": 56.49164894423193, "eval_steps_per_second": 1.192, "eval_sys_len": 5152, "eval_totals": [ 5152, 4626, 4100, 3574 ], "step": 2451 }, { "epoch": 19.38, "learning_rate": 1.2248062015503876e-05, "loss": 0.0095, "step": 2500 }, { "epoch": 20.0, "eval_bp": 0.5673143055770848, "eval_counts": [ 5132, 4602, 4074, 3546 ], "eval_loss": 0.001880223280750215, "eval_precisions": [ 99.8637867289356, 99.76154346412314, 99.68191827746513, 99.5787700084246 ], "eval_ref_len": 8052, "eval_runtime": 14.4294, "eval_samples_per_second": 36.453, "eval_score": 56.57340500595413, "eval_steps_per_second": 1.178, "eval_sys_len": 5139, "eval_totals": [ 5139, 4613, 4087, 3561 ], "step": 2580 }, { "epoch": 21.0, "eval_bp": 0.5673143055770848, "eval_counts": [ 5134, 4606, 4078, 3550 ], "eval_loss": 0.000961140263825655, "eval_precisions": [ 99.90270480638256, 99.84825493171472, 99.77978957670663, 99.69109800617804 ], "eval_ref_len": 8052, "eval_runtime": 14.1969, "eval_samples_per_second": 37.05, "eval_score": 56.621048487262286, "eval_steps_per_second": 1.197, "eval_sys_len": 5139, "eval_totals": [ 5139, 4613, 4087, 3561 ], "step": 2709 }, { "epoch": 22.0, "eval_bp": 0.5673143055770848, "eval_counts": [ 5134, 4608, 4082, 3556 ], "eval_loss": 0.0009278175421059132, "eval_precisions": [ 99.90270480638256, 99.89161066551051, 99.87766087594812, 99.8595900028082 ], "eval_ref_len": 8052, "eval_runtime": 14.1806, "eval_samples_per_second": 37.093, "eval_score": 56.6649925424657, "eval_steps_per_second": 1.199, "eval_sys_len": 5139, "eval_totals": [ 5139, 4613, 4087, 3561 ], "step": 2838 }, { "epoch": 23.0, "eval_bp": 0.5702526802993914, "eval_counts": [ 5125, 4592, 4066, 3540 ], "eval_loss": 0.002242365386337042, "eval_precisions": [ 99.39875872769589, 99.1792656587473, 99.07407407407408, 98.93795416433763 ], "eval_ref_len": 8052, "eval_runtime": 14.3753, "eval_samples_per_second": 36.591, "eval_score": 56.5390535949233, "eval_steps_per_second": 1.183, "eval_sys_len": 5156, "eval_totals": [ 5156, 4630, 4104, 3578 ], "step": 2967 }, { "epoch": 23.26, "learning_rate": 1.0697674418604651e-05, "loss": 0.0066, "step": 3000 }, { "epoch": 24.0, "eval_bp": 0.5673143055770848, "eval_counts": [ 5134, 4606, 4078, 3550 ], "eval_loss": 0.0007149834418669343, "eval_precisions": [ 99.90270480638256, 99.84825493171472, 99.77978957670663, 99.69109800617804 ], "eval_ref_len": 8052, "eval_runtime": 14.2084, "eval_samples_per_second": 37.02, "eval_score": 56.621048487262286, "eval_steps_per_second": 1.196, "eval_sys_len": 5139, "eval_totals": [ 5139, 4613, 4087, 3561 ], "step": 3096 }, { "epoch": 25.0, "eval_bp": 0.5692160898808599, "eval_counts": [ 5130, 4600, 4072, 3544 ], "eval_loss": 0.0007009973051026464, "eval_precisions": [ 99.6116504854369, 99.48096885813149, 99.36554416788677, 99.21612541993281 ], "eval_ref_len": 8052, "eval_runtime": 14.1732, "eval_samples_per_second": 37.112, "eval_score": 56.59059013212041, "eval_steps_per_second": 1.199, "eval_sys_len": 5150, "eval_totals": [ 5150, 4624, 4098, 3572 ], "step": 3225 }, { "epoch": 26.0, "eval_bp": 0.5673143055770848, "eval_counts": [ 5134, 4608, 4082, 3556 ], "eval_loss": 0.0008850299054756761, "eval_precisions": [ 99.90270480638256, 99.89161066551051, 99.87766087594812, 99.8595900028082 ], "eval_ref_len": 8052, "eval_runtime": 14.4143, "eval_samples_per_second": 36.492, "eval_score": 56.6649925424657, "eval_steps_per_second": 1.179, "eval_sys_len": 5139, "eval_totals": [ 5139, 4613, 4087, 3561 ], "step": 3354 }, { "epoch": 27.0, "eval_bp": 0.5690432735111319, "eval_counts": [ 5129, 4599, 4072, 3545 ], "eval_loss": 0.0007779916049912572, "eval_precisions": [ 99.61157506311905, 99.48085658663206, 99.38979741274103, 99.27191262951554 ], "eval_ref_len": 8052, "eval_runtime": 14.1435, "eval_samples_per_second": 37.19, "eval_score": 56.584785454136124, "eval_steps_per_second": 1.202, "eval_sys_len": 5149, "eval_totals": [ 5149, 4623, 4097, 3571 ], "step": 3483 }, { "epoch": 27.13, "learning_rate": 9.147286821705427e-06, "loss": 0.0047, "step": 3500 }, { "epoch": 28.0, "eval_bp": 0.5673143055770848, "eval_counts": [ 5134, 4608, 4082, 3556 ], "eval_loss": 0.0003345063014421612, "eval_precisions": [ 99.90270480638256, 99.89161066551051, 99.87766087594812, 99.8595900028082 ], "eval_ref_len": 8052, "eval_runtime": 14.2353, "eval_samples_per_second": 36.951, "eval_score": 56.6649925424657, "eval_steps_per_second": 1.194, "eval_sys_len": 5139, "eval_totals": [ 5139, 4613, 4087, 3561 ], "step": 3612 }, { "epoch": 29.0, "eval_bp": 0.5673143055770848, "eval_counts": [ 5134, 4608, 4082, 3556 ], "eval_loss": 0.0004555524792522192, "eval_precisions": [ 99.90270480638256, 99.89161066551051, 99.87766087594812, 99.8595900028082 ], "eval_ref_len": 8052, "eval_runtime": 14.219, "eval_samples_per_second": 36.993, "eval_score": 56.6649925424657, "eval_steps_per_second": 1.196, "eval_sys_len": 5139, "eval_totals": [ 5139, 4613, 4087, 3561 ], "step": 3741 }, { "epoch": 30.0, "eval_bp": 0.5673143055770848, "eval_counts": [ 5134, 4608, 4082, 3556 ], "eval_loss": 0.0002565362665336579, "eval_precisions": [ 99.90270480638256, 99.89161066551051, 99.87766087594812, 99.8595900028082 ], "eval_ref_len": 8052, "eval_runtime": 14.4474, "eval_samples_per_second": 36.408, "eval_score": 56.6649925424657, "eval_steps_per_second": 1.177, "eval_sys_len": 5139, "eval_totals": [ 5139, 4613, 4087, 3561 ], "step": 3870 }, { "epoch": 31.0, "eval_bp": 0.5673143055770848, "eval_counts": [ 5134, 4608, 4082, 3556 ], "eval_loss": 0.00015086405619513243, "eval_precisions": [ 99.90270480638256, 99.89161066551051, 99.87766087594812, 99.8595900028082 ], "eval_ref_len": 8052, "eval_runtime": 14.2355, "eval_samples_per_second": 36.95, "eval_score": 56.6649925424657, "eval_steps_per_second": 1.194, "eval_sys_len": 5139, "eval_totals": [ 5139, 4613, 4087, 3561 ], "step": 3999 }, { "epoch": 31.01, "learning_rate": 7.596899224806202e-06, "loss": 0.0037, "step": 4000 }, { "epoch": 32.0, "eval_bp": 0.5673143055770848, "eval_counts": [ 5134, 4608, 4082, 3556 ], "eval_loss": 0.00028631059103645384, "eval_precisions": [ 99.90270480638256, 99.89161066551051, 99.87766087594812, 99.8595900028082 ], "eval_ref_len": 8052, "eval_runtime": 14.2512, "eval_samples_per_second": 36.909, "eval_score": 56.6649925424657, "eval_steps_per_second": 1.193, "eval_sys_len": 5139, "eval_totals": [ 5139, 4613, 4087, 3561 ], "step": 4128 }, { "epoch": 33.0, "eval_bp": 0.5673143055770848, "eval_counts": [ 5134, 4608, 4082, 3556 ], "eval_loss": 0.00013379484880715609, "eval_precisions": [ 99.90270480638256, 99.89161066551051, 99.87766087594812, 99.8595900028082 ], "eval_ref_len": 8052, "eval_runtime": 14.5025, "eval_samples_per_second": 36.27, "eval_score": 56.6649925424657, "eval_steps_per_second": 1.172, "eval_sys_len": 5139, "eval_totals": [ 5139, 4613, 4087, 3561 ], "step": 4257 }, { "epoch": 34.0, "eval_bp": 0.5673143055770848, "eval_counts": [ 5134, 4608, 4082, 3556 ], "eval_loss": 0.0007538048666901886, "eval_precisions": [ 99.90270480638256, 99.89161066551051, 99.87766087594812, 99.8595900028082 ], "eval_ref_len": 8052, "eval_runtime": 14.2097, "eval_samples_per_second": 37.017, "eval_score": 56.6649925424657, "eval_steps_per_second": 1.196, "eval_sys_len": 5139, "eval_totals": [ 5139, 4613, 4087, 3561 ], "step": 4386 }, { "epoch": 34.88, "learning_rate": 6.046511627906977e-06, "loss": 0.0026, "step": 4500 }, { "epoch": 35.0, "eval_bp": 0.5673143055770848, "eval_counts": [ 5134, 4608, 4082, 3556 ], "eval_loss": 0.00037576485192403197, "eval_precisions": [ 99.90270480638256, 99.89161066551051, 99.87766087594812, 99.8595900028082 ], "eval_ref_len": 8052, "eval_runtime": 14.2801, "eval_samples_per_second": 36.835, "eval_score": 56.6649925424657, "eval_steps_per_second": 1.19, "eval_sys_len": 5139, "eval_totals": [ 5139, 4613, 4087, 3561 ], "step": 4515 }, { "epoch": 36.0, "eval_bp": 0.5673143055770848, "eval_counts": [ 5134, 4608, 4082, 3556 ], "eval_loss": 0.0005439831875264645, "eval_precisions": [ 99.90270480638256, 99.89161066551051, 99.87766087594812, 99.8595900028082 ], "eval_ref_len": 8052, "eval_runtime": 14.4879, "eval_samples_per_second": 36.306, "eval_score": 56.6649925424657, "eval_steps_per_second": 1.173, "eval_sys_len": 5139, "eval_totals": [ 5139, 4613, 4087, 3561 ], "step": 4644 }, { "epoch": 37.0, "eval_bp": 0.5673143055770848, "eval_counts": [ 5134, 4608, 4082, 3556 ], "eval_loss": 0.00012769590830430388, "eval_precisions": [ 99.90270480638256, 99.89161066551051, 99.87766087594812, 99.8595900028082 ], "eval_ref_len": 8052, "eval_runtime": 14.3104, "eval_samples_per_second": 36.756, "eval_score": 56.6649925424657, "eval_steps_per_second": 1.188, "eval_sys_len": 5139, "eval_totals": [ 5139, 4613, 4087, 3561 ], "step": 4773 }, { "epoch": 38.0, "eval_bp": 0.5673143055770848, "eval_counts": [ 5134, 4608, 4082, 3556 ], "eval_loss": 8.528557373210788e-05, "eval_precisions": [ 99.90270480638256, 99.89161066551051, 99.87766087594812, 99.8595900028082 ], "eval_ref_len": 8052, "eval_runtime": 14.2526, "eval_samples_per_second": 36.906, "eval_score": 56.6649925424657, "eval_steps_per_second": 1.193, "eval_sys_len": 5139, "eval_totals": [ 5139, 4613, 4087, 3561 ], "step": 4902 }, { "epoch": 38.76, "learning_rate": 4.4961240310077525e-06, "loss": 0.002, "step": 5000 }, { "epoch": 39.0, "eval_bp": 0.5673143055770848, "eval_counts": [ 5134, 4608, 4082, 3556 ], "eval_loss": 9.439041605219245e-05, "eval_precisions": [ 99.90270480638256, 99.89161066551051, 99.87766087594812, 99.8595900028082 ], "eval_ref_len": 8052, "eval_runtime": 14.6256, "eval_samples_per_second": 35.964, "eval_score": 56.6649925424657, "eval_steps_per_second": 1.162, "eval_sys_len": 5139, "eval_totals": [ 5139, 4613, 4087, 3561 ], "step": 5031 }, { "epoch": 40.0, "eval_bp": 0.5673143055770848, "eval_counts": [ 5134, 4608, 4082, 3556 ], "eval_loss": 0.0001698030246188864, "eval_precisions": [ 99.90270480638256, 99.89161066551051, 99.87766087594812, 99.8595900028082 ], "eval_ref_len": 8052, "eval_runtime": 14.2726, "eval_samples_per_second": 36.854, "eval_score": 56.6649925424657, "eval_steps_per_second": 1.191, "eval_sys_len": 5139, "eval_totals": [ 5139, 4613, 4087, 3561 ], "step": 5160 }, { "epoch": 41.0, "eval_bp": 0.5673143055770848, "eval_counts": [ 5134, 4608, 4082, 3556 ], "eval_loss": 0.00026142288697883487, "eval_precisions": [ 99.90270480638256, 99.89161066551051, 99.87766087594812, 99.8595900028082 ], "eval_ref_len": 8052, "eval_runtime": 14.1981, "eval_samples_per_second": 37.047, "eval_score": 56.6649925424657, "eval_steps_per_second": 1.197, "eval_sys_len": 5139, "eval_totals": [ 5139, 4613, 4087, 3561 ], "step": 5289 }, { "epoch": 42.0, "eval_bp": 0.5673143055770848, "eval_counts": [ 5134, 4608, 4082, 3556 ], "eval_loss": 6.877077976241708e-05, "eval_precisions": [ 99.90270480638256, 99.89161066551051, 99.87766087594812, 99.8595900028082 ], "eval_ref_len": 8052, "eval_runtime": 14.5632, "eval_samples_per_second": 36.118, "eval_score": 56.6649925424657, "eval_steps_per_second": 1.167, "eval_sys_len": 5139, "eval_totals": [ 5139, 4613, 4087, 3561 ], "step": 5418 }, { "epoch": 42.64, "learning_rate": 2.9457364341085276e-06, "loss": 0.0017, "step": 5500 }, { "epoch": 43.0, "eval_bp": 0.5673143055770848, "eval_counts": [ 5134, 4608, 4082, 3556 ], "eval_loss": 3.3805001294240355e-05, "eval_precisions": [ 99.90270480638256, 99.89161066551051, 99.87766087594812, 99.8595900028082 ], "eval_ref_len": 8052, "eval_runtime": 14.1656, "eval_samples_per_second": 37.132, "eval_score": 56.6649925424657, "eval_steps_per_second": 1.2, "eval_sys_len": 5139, "eval_totals": [ 5139, 4613, 4087, 3561 ], "step": 5547 }, { "epoch": 44.0, "eval_bp": 0.5673143055770848, "eval_counts": [ 5134, 4608, 4082, 3556 ], "eval_loss": 1.8181766790803522e-05, "eval_precisions": [ 99.90270480638256, 99.89161066551051, 99.87766087594812, 99.8595900028082 ], "eval_ref_len": 8052, "eval_runtime": 14.2765, "eval_samples_per_second": 36.844, "eval_score": 56.6649925424657, "eval_steps_per_second": 1.191, "eval_sys_len": 5139, "eval_totals": [ 5139, 4613, 4087, 3561 ], "step": 5676 }, { "epoch": 45.0, "eval_bp": 0.5673143055770848, "eval_counts": [ 5134, 4608, 4082, 3556 ], "eval_loss": 3.8955997297307476e-05, "eval_precisions": [ 99.90270480638256, 99.89161066551051, 99.87766087594812, 99.8595900028082 ], "eval_ref_len": 8052, "eval_runtime": 14.2512, "eval_samples_per_second": 36.909, "eval_score": 56.6649925424657, "eval_steps_per_second": 1.193, "eval_sys_len": 5139, "eval_totals": [ 5139, 4613, 4087, 3561 ], "step": 5805 }, { "epoch": 46.0, "eval_bp": 0.5673143055770848, "eval_counts": [ 5134, 4608, 4082, 3556 ], "eval_loss": 5.0567497964948416e-05, "eval_precisions": [ 99.90270480638256, 99.89161066551051, 99.87766087594812, 99.8595900028082 ], "eval_ref_len": 8052, "eval_runtime": 14.3515, "eval_samples_per_second": 36.651, "eval_score": 56.6649925424657, "eval_steps_per_second": 1.185, "eval_sys_len": 5139, "eval_totals": [ 5139, 4613, 4087, 3561 ], "step": 5934 }, { "epoch": 46.51, "learning_rate": 1.3953488372093025e-06, "loss": 0.0014, "step": 6000 } ], "logging_steps": 500, "max_steps": 6450, "num_input_tokens_seen": 0, "num_train_epochs": 50, "save_steps": 500, "total_flos": 2.345536665897984e+16, "train_batch_size": 32, "trial_name": null, "trial_params": null }