| { | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 46.51162790697674, | |
| "eval_steps": 500, | |
| "global_step": 6000, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 1.0, | |
| "eval_bp": 0.534018833785647, | |
| "eval_counts": [ | |
| 4494, | |
| 3736, | |
| 3174, | |
| 2624 | |
| ], | |
| "eval_loss": 0.11933137476444244, | |
| "eval_precisions": [ | |
| 90.82457558609539, | |
| 84.48665762098598, | |
| 81.46817248459959, | |
| 77.86350148367953 | |
| ], | |
| "eval_ref_len": 8052, | |
| "eval_runtime": 17.1328, | |
| "eval_samples_per_second": 30.701, | |
| "eval_score": 44.60514203554415, | |
| "eval_steps_per_second": 0.992, | |
| "eval_sys_len": 4948, | |
| "eval_totals": [ | |
| 4948, | |
| 4422, | |
| 3896, | |
| 3370 | |
| ], | |
| "step": 129 | |
| }, | |
| { | |
| "epoch": 2.0, | |
| "eval_bp": 0.5726693333498457, | |
| "eval_counts": [ | |
| 4791, | |
| 4091, | |
| 3534, | |
| 2987 | |
| ], | |
| "eval_loss": 0.06976903229951859, | |
| "eval_precisions": [ | |
| 92.66924564796905, | |
| 88.09216192937123, | |
| 85.81835842642059, | |
| 83.15701559020044 | |
| ], | |
| "eval_ref_len": 8052, | |
| "eval_runtime": 14.2327, | |
| "eval_samples_per_second": 36.957, | |
| "eval_score": 50.0313158874479, | |
| "eval_steps_per_second": 1.194, | |
| "eval_sys_len": 5170, | |
| "eval_totals": [ | |
| 5170, | |
| 4644, | |
| 4118, | |
| 3592 | |
| ], | |
| "step": 258 | |
| }, | |
| { | |
| "epoch": 3.0, | |
| "eval_bp": 0.5766332581310416, | |
| "eval_counts": [ | |
| 4894, | |
| 4247, | |
| 3708, | |
| 3173 | |
| ], | |
| "eval_loss": 0.05093446373939514, | |
| "eval_precisions": [ | |
| 94.24224918159061, | |
| 91.00064281122776, | |
| 89.54358850519198, | |
| 87.7731673582296 | |
| ], | |
| "eval_ref_len": 8052, | |
| "eval_runtime": 14.4915, | |
| "eval_samples_per_second": 36.297, | |
| "eval_score": 52.24819351845175, | |
| "eval_steps_per_second": 1.173, | |
| "eval_sys_len": 5193, | |
| "eval_totals": [ | |
| 5193, | |
| 4667, | |
| 4141, | |
| 3615 | |
| ], | |
| "step": 387 | |
| }, | |
| { | |
| "epoch": 3.88, | |
| "learning_rate": 1.8449612403100777e-05, | |
| "loss": 0.2265, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 4.0, | |
| "eval_bp": 0.561945267714356, | |
| "eval_counts": [ | |
| 4947, | |
| 4355, | |
| 3819, | |
| 3283 | |
| ], | |
| "eval_loss": 0.034593429416418076, | |
| "eval_precisions": [ | |
| 96.84808144087705, | |
| 95.04583151462244, | |
| 94.15680473372781, | |
| 93.0028328611898 | |
| ], | |
| "eval_ref_len": 8052, | |
| "eval_runtime": 14.4253, | |
| "eval_samples_per_second": 36.464, | |
| "eval_score": 53.246003291698294, | |
| "eval_steps_per_second": 1.178, | |
| "eval_sys_len": 5108, | |
| "eval_totals": [ | |
| 5108, | |
| 4582, | |
| 4056, | |
| 3530 | |
| ], | |
| "step": 516 | |
| }, | |
| { | |
| "epoch": 5.0, | |
| "eval_bp": 0.579214183971878, | |
| "eval_counts": [ | |
| 5015, | |
| 4428, | |
| 3891, | |
| 3356 | |
| ], | |
| "eval_loss": 0.0272398479282856, | |
| "eval_precisions": [ | |
| 96.29416282642089, | |
| 94.57496796240923, | |
| 93.62367661212704, | |
| 92.45179063360881 | |
| ], | |
| "eval_ref_len": 8052, | |
| "eval_runtime": 14.2643, | |
| "eval_samples_per_second": 36.875, | |
| "eval_score": 54.57685231173965, | |
| "eval_steps_per_second": 1.192, | |
| "eval_sys_len": 5208, | |
| "eval_totals": [ | |
| 5208, | |
| 4682, | |
| 4156, | |
| 3630 | |
| ], | |
| "step": 645 | |
| }, | |
| { | |
| "epoch": 6.0, | |
| "eval_bp": 0.5805893001921, | |
| "eval_counts": [ | |
| 5064, | |
| 4489, | |
| 3963, | |
| 3433 | |
| ], | |
| "eval_loss": 0.021859439089894295, | |
| "eval_precisions": [ | |
| 97.08588957055214, | |
| 95.71428571428571, | |
| 95.17291066282421, | |
| 94.36503573391974 | |
| ], | |
| "eval_ref_len": 8052, | |
| "eval_runtime": 14.2621, | |
| "eval_samples_per_second": 36.881, | |
| "eval_score": 55.492381352127445, | |
| "eval_steps_per_second": 1.192, | |
| "eval_sys_len": 5216, | |
| "eval_totals": [ | |
| 5216, | |
| 4690, | |
| 4164, | |
| 3638 | |
| ], | |
| "step": 774 | |
| }, | |
| { | |
| "epoch": 7.0, | |
| "eval_bp": 0.5776660318872596, | |
| "eval_counts": [ | |
| 5101, | |
| 4547, | |
| 4013, | |
| 3479 | |
| ], | |
| "eval_loss": 0.013949821703135967, | |
| "eval_precisions": [ | |
| 98.1150221196384, | |
| 97.30365931949497, | |
| 96.76874849288643, | |
| 96.07843137254902 | |
| ], | |
| "eval_ref_len": 8052, | |
| "eval_runtime": 14.1956, | |
| "eval_samples_per_second": 37.054, | |
| "eval_score": 56.07034878827298, | |
| "eval_steps_per_second": 1.198, | |
| "eval_sys_len": 5199, | |
| "eval_totals": [ | |
| 5199, | |
| 4673, | |
| 4147, | |
| 3621 | |
| ], | |
| "step": 903 | |
| }, | |
| { | |
| "epoch": 7.75, | |
| "learning_rate": 1.689922480620155e-05, | |
| "loss": 0.0485, | |
| "step": 1000 | |
| }, | |
| { | |
| "epoch": 8.0, | |
| "eval_bp": 0.5903589930927092, | |
| "eval_counts": [ | |
| 5054, | |
| 4465, | |
| 3938, | |
| 3409 | |
| ], | |
| "eval_loss": 0.01422152854502201, | |
| "eval_precisions": [ | |
| 95.84676654655793, | |
| 94.05940594059406, | |
| 93.29542762378583, | |
| 92.25981055480379 | |
| ], | |
| "eval_ref_len": 8052, | |
| "eval_runtime": 14.5535, | |
| "eval_samples_per_second": 36.142, | |
| "eval_score": 55.40887561474743, | |
| "eval_steps_per_second": 1.168, | |
| "eval_sys_len": 5273, | |
| "eval_totals": [ | |
| 5273, | |
| 4747, | |
| 4221, | |
| 3695 | |
| ], | |
| "step": 1032 | |
| }, | |
| { | |
| "epoch": 9.0, | |
| "eval_bp": 0.5766332581310416, | |
| "eval_counts": [ | |
| 5111, | |
| 4561, | |
| 4028, | |
| 3495 | |
| ], | |
| "eval_loss": 0.011180982924997807, | |
| "eval_precisions": [ | |
| 98.42095128057, | |
| 97.7287336618813, | |
| 97.27119053368752, | |
| 96.6804979253112 | |
| ], | |
| "eval_ref_len": 8052, | |
| "eval_runtime": 14.213, | |
| "eval_samples_per_second": 37.008, | |
| "eval_score": 56.23515832604645, | |
| "eval_steps_per_second": 1.196, | |
| "eval_sys_len": 5193, | |
| "eval_totals": [ | |
| 5193, | |
| 4667, | |
| 4141, | |
| 3615 | |
| ], | |
| "step": 1161 | |
| }, | |
| { | |
| "epoch": 10.0, | |
| "eval_bp": 0.5891618704024797, | |
| "eval_counts": [ | |
| 5068, | |
| 4491, | |
| 3972, | |
| 3453 | |
| ], | |
| "eval_loss": 0.01184480544179678, | |
| "eval_precisions": [ | |
| 96.24003038359287, | |
| 94.74683544303798, | |
| 94.25723777883246, | |
| 93.62798264642082 | |
| ], | |
| "eval_ref_len": 8052, | |
| "eval_runtime": 14.3293, | |
| "eval_samples_per_second": 36.708, | |
| "eval_score": 55.8013674935763, | |
| "eval_steps_per_second": 1.186, | |
| "eval_sys_len": 5266, | |
| "eval_totals": [ | |
| 5266, | |
| 4740, | |
| 4214, | |
| 3688 | |
| ], | |
| "step": 1290 | |
| }, | |
| { | |
| "epoch": 11.0, | |
| "eval_bp": 0.5816200061954333, | |
| "eval_counts": [ | |
| 5085, | |
| 4531, | |
| 4008, | |
| 3485 | |
| ], | |
| "eval_loss": 0.006630906369537115, | |
| "eval_precisions": [ | |
| 97.37648410570662, | |
| 96.48637137989779, | |
| 96.11510791366906, | |
| 95.63666300768386 | |
| ], | |
| "eval_ref_len": 8052, | |
| "eval_runtime": 14.4178, | |
| "eval_samples_per_second": 36.483, | |
| "eval_score": 56.0690723204566, | |
| "eval_steps_per_second": 1.179, | |
| "eval_sys_len": 5222, | |
| "eval_totals": [ | |
| 5222, | |
| 4696, | |
| 4170, | |
| 3644 | |
| ], | |
| "step": 1419 | |
| }, | |
| { | |
| "epoch": 11.63, | |
| "learning_rate": 1.5348837209302328e-05, | |
| "loss": 0.0249, | |
| "step": 1500 | |
| }, | |
| { | |
| "epoch": 12.0, | |
| "eval_bp": 0.5738765755022939, | |
| "eval_counts": [ | |
| 5123, | |
| 4585, | |
| 4054, | |
| 3523 | |
| ], | |
| "eval_loss": 0.0072512696497142315, | |
| "eval_precisions": [ | |
| 98.95692485995751, | |
| 98.58095033326167, | |
| 98.27878787878788, | |
| 97.88830230619617 | |
| ], | |
| "eval_ref_len": 8052, | |
| "eval_runtime": 14.278, | |
| "eval_samples_per_second": 36.84, | |
| "eval_score": 56.48406484323021, | |
| "eval_steps_per_second": 1.191, | |
| "eval_sys_len": 5177, | |
| "eval_totals": [ | |
| 5177, | |
| 4651, | |
| 4125, | |
| 3599 | |
| ], | |
| "step": 1548 | |
| }, | |
| { | |
| "epoch": 13.0, | |
| "eval_bp": 0.57938612601173, | |
| "eval_counts": [ | |
| 5102, | |
| 4550, | |
| 4024, | |
| 3498 | |
| ], | |
| "eval_loss": 0.007895253598690033, | |
| "eval_precisions": [ | |
| 97.94586292954502, | |
| 97.15994020926756, | |
| 96.80057733942748, | |
| 96.33709721839713 | |
| ], | |
| "eval_ref_len": 8052, | |
| "eval_runtime": 14.2797, | |
| "eval_samples_per_second": 36.835, | |
| "eval_score": 56.23468946332795, | |
| "eval_steps_per_second": 1.191, | |
| "eval_sys_len": 5209, | |
| "eval_totals": [ | |
| 5209, | |
| 4683, | |
| 4157, | |
| 3631 | |
| ], | |
| "step": 1677 | |
| }, | |
| { | |
| "epoch": 14.0, | |
| "eval_bp": 0.5781822172087536, | |
| "eval_counts": [ | |
| 5112, | |
| 4566, | |
| 4040, | |
| 3515 | |
| ], | |
| "eval_loss": 0.0048005045391619205, | |
| "eval_precisions": [ | |
| 98.26989619377163, | |
| 97.6475620188195, | |
| 97.34939759036145, | |
| 96.99227373068433 | |
| ], | |
| "eval_ref_len": 8052, | |
| "eval_runtime": 14.651, | |
| "eval_samples_per_second": 35.902, | |
| "eval_score": 56.40957242906697, | |
| "eval_steps_per_second": 1.16, | |
| "eval_sys_len": 5202, | |
| "eval_totals": [ | |
| 5202, | |
| 4676, | |
| 4150, | |
| 3624 | |
| ], | |
| "step": 1806 | |
| }, | |
| { | |
| "epoch": 15.0, | |
| "eval_bp": 0.5750830920334751, | |
| "eval_counts": [ | |
| 5115, | |
| 4573, | |
| 4046, | |
| 3520 | |
| ], | |
| "eval_loss": 0.003846166655421257, | |
| "eval_precisions": [ | |
| 98.66898148148148, | |
| 98.17518248175182, | |
| 97.918683446273, | |
| 97.61508596783139 | |
| ], | |
| "eval_ref_len": 8052, | |
| "eval_runtime": 14.3879, | |
| "eval_samples_per_second": 36.559, | |
| "eval_score": 56.41204145654326, | |
| "eval_steps_per_second": 1.182, | |
| "eval_sys_len": 5184, | |
| "eval_totals": [ | |
| 5184, | |
| 4658, | |
| 4132, | |
| 3606 | |
| ], | |
| "step": 1935 | |
| }, | |
| { | |
| "epoch": 15.5, | |
| "learning_rate": 1.3798449612403102e-05, | |
| "loss": 0.0146, | |
| "step": 2000 | |
| }, | |
| { | |
| "epoch": 16.0, | |
| "eval_bp": 0.5697344510837399, | |
| "eval_counts": [ | |
| 5127, | |
| 4597, | |
| 4071, | |
| 3545 | |
| ], | |
| "eval_loss": 0.002730604959651828, | |
| "eval_precisions": [ | |
| 99.49543954977683, | |
| 99.35163172682084, | |
| 99.26847110460864, | |
| 99.16083916083916 | |
| ], | |
| "eval_ref_len": 8052, | |
| "eval_runtime": 14.1671, | |
| "eval_samples_per_second": 37.128, | |
| "eval_score": 56.58546744480375, | |
| "eval_steps_per_second": 1.2, | |
| "eval_sys_len": 5153, | |
| "eval_totals": [ | |
| 5153, | |
| 4627, | |
| 4101, | |
| 3575 | |
| ], | |
| "step": 2064 | |
| }, | |
| { | |
| "epoch": 17.0, | |
| "eval_bp": 0.5742213686422221, | |
| "eval_counts": [ | |
| 5114, | |
| 4576, | |
| 4052, | |
| 3528 | |
| ], | |
| "eval_loss": 0.003083485411480069, | |
| "eval_precisions": [ | |
| 98.74493145394864, | |
| 98.3451536643026, | |
| 98.18269929731039, | |
| 97.97278533740628 | |
| ], | |
| "eval_ref_len": 8052, | |
| "eval_runtime": 14.5698, | |
| "eval_samples_per_second": 36.102, | |
| "eval_score": 56.45226865658991, | |
| "eval_steps_per_second": 1.167, | |
| "eval_sys_len": 5179, | |
| "eval_totals": [ | |
| 5179, | |
| 4653, | |
| 4127, | |
| 3601 | |
| ], | |
| "step": 2193 | |
| }, | |
| { | |
| "epoch": 18.0, | |
| "eval_bp": 0.5676602159962684, | |
| "eval_counts": [ | |
| 5136, | |
| 4610, | |
| 4082, | |
| 3554 | |
| ], | |
| "eval_loss": 0.0022970717400312424, | |
| "eval_precisions": [ | |
| 99.9027426570706, | |
| 99.89165763813651, | |
| 99.82880899975544, | |
| 99.74740387314061 | |
| ], | |
| "eval_ref_len": 8052, | |
| "eval_runtime": 14.1909, | |
| "eval_samples_per_second": 37.066, | |
| "eval_score": 56.67669127411511, | |
| "eval_steps_per_second": 1.198, | |
| "eval_sys_len": 5141, | |
| "eval_totals": [ | |
| 5141, | |
| 4615, | |
| 4089, | |
| 3563 | |
| ], | |
| "step": 2322 | |
| }, | |
| { | |
| "epoch": 19.0, | |
| "eval_bp": 0.5695616786732568, | |
| "eval_counts": [ | |
| 5126, | |
| 4592, | |
| 4063, | |
| 3534 | |
| ], | |
| "eval_loss": 0.0013985991245135665, | |
| "eval_precisions": [ | |
| 99.49534161490683, | |
| 99.26502377864246, | |
| 99.09756097560975, | |
| 98.88080581980974 | |
| ], | |
| "eval_ref_len": 8052, | |
| "eval_runtime": 14.2655, | |
| "eval_samples_per_second": 36.872, | |
| "eval_score": 56.49164894423193, | |
| "eval_steps_per_second": 1.192, | |
| "eval_sys_len": 5152, | |
| "eval_totals": [ | |
| 5152, | |
| 4626, | |
| 4100, | |
| 3574 | |
| ], | |
| "step": 2451 | |
| }, | |
| { | |
| "epoch": 19.38, | |
| "learning_rate": 1.2248062015503876e-05, | |
| "loss": 0.0095, | |
| "step": 2500 | |
| }, | |
| { | |
| "epoch": 20.0, | |
| "eval_bp": 0.5673143055770848, | |
| "eval_counts": [ | |
| 5132, | |
| 4602, | |
| 4074, | |
| 3546 | |
| ], | |
| "eval_loss": 0.001880223280750215, | |
| "eval_precisions": [ | |
| 99.8637867289356, | |
| 99.76154346412314, | |
| 99.68191827746513, | |
| 99.5787700084246 | |
| ], | |
| "eval_ref_len": 8052, | |
| "eval_runtime": 14.4294, | |
| "eval_samples_per_second": 36.453, | |
| "eval_score": 56.57340500595413, | |
| "eval_steps_per_second": 1.178, | |
| "eval_sys_len": 5139, | |
| "eval_totals": [ | |
| 5139, | |
| 4613, | |
| 4087, | |
| 3561 | |
| ], | |
| "step": 2580 | |
| }, | |
| { | |
| "epoch": 21.0, | |
| "eval_bp": 0.5673143055770848, | |
| "eval_counts": [ | |
| 5134, | |
| 4606, | |
| 4078, | |
| 3550 | |
| ], | |
| "eval_loss": 0.000961140263825655, | |
| "eval_precisions": [ | |
| 99.90270480638256, | |
| 99.84825493171472, | |
| 99.77978957670663, | |
| 99.69109800617804 | |
| ], | |
| "eval_ref_len": 8052, | |
| "eval_runtime": 14.1969, | |
| "eval_samples_per_second": 37.05, | |
| "eval_score": 56.621048487262286, | |
| "eval_steps_per_second": 1.197, | |
| "eval_sys_len": 5139, | |
| "eval_totals": [ | |
| 5139, | |
| 4613, | |
| 4087, | |
| 3561 | |
| ], | |
| "step": 2709 | |
| }, | |
| { | |
| "epoch": 22.0, | |
| "eval_bp": 0.5673143055770848, | |
| "eval_counts": [ | |
| 5134, | |
| 4608, | |
| 4082, | |
| 3556 | |
| ], | |
| "eval_loss": 0.0009278175421059132, | |
| "eval_precisions": [ | |
| 99.90270480638256, | |
| 99.89161066551051, | |
| 99.87766087594812, | |
| 99.8595900028082 | |
| ], | |
| "eval_ref_len": 8052, | |
| "eval_runtime": 14.1806, | |
| "eval_samples_per_second": 37.093, | |
| "eval_score": 56.6649925424657, | |
| "eval_steps_per_second": 1.199, | |
| "eval_sys_len": 5139, | |
| "eval_totals": [ | |
| 5139, | |
| 4613, | |
| 4087, | |
| 3561 | |
| ], | |
| "step": 2838 | |
| }, | |
| { | |
| "epoch": 23.0, | |
| "eval_bp": 0.5702526802993914, | |
| "eval_counts": [ | |
| 5125, | |
| 4592, | |
| 4066, | |
| 3540 | |
| ], | |
| "eval_loss": 0.002242365386337042, | |
| "eval_precisions": [ | |
| 99.39875872769589, | |
| 99.1792656587473, | |
| 99.07407407407408, | |
| 98.93795416433763 | |
| ], | |
| "eval_ref_len": 8052, | |
| "eval_runtime": 14.3753, | |
| "eval_samples_per_second": 36.591, | |
| "eval_score": 56.5390535949233, | |
| "eval_steps_per_second": 1.183, | |
| "eval_sys_len": 5156, | |
| "eval_totals": [ | |
| 5156, | |
| 4630, | |
| 4104, | |
| 3578 | |
| ], | |
| "step": 2967 | |
| }, | |
| { | |
| "epoch": 23.26, | |
| "learning_rate": 1.0697674418604651e-05, | |
| "loss": 0.0066, | |
| "step": 3000 | |
| }, | |
| { | |
| "epoch": 24.0, | |
| "eval_bp": 0.5673143055770848, | |
| "eval_counts": [ | |
| 5134, | |
| 4606, | |
| 4078, | |
| 3550 | |
| ], | |
| "eval_loss": 0.0007149834418669343, | |
| "eval_precisions": [ | |
| 99.90270480638256, | |
| 99.84825493171472, | |
| 99.77978957670663, | |
| 99.69109800617804 | |
| ], | |
| "eval_ref_len": 8052, | |
| "eval_runtime": 14.2084, | |
| "eval_samples_per_second": 37.02, | |
| "eval_score": 56.621048487262286, | |
| "eval_steps_per_second": 1.196, | |
| "eval_sys_len": 5139, | |
| "eval_totals": [ | |
| 5139, | |
| 4613, | |
| 4087, | |
| 3561 | |
| ], | |
| "step": 3096 | |
| }, | |
| { | |
| "epoch": 25.0, | |
| "eval_bp": 0.5692160898808599, | |
| "eval_counts": [ | |
| 5130, | |
| 4600, | |
| 4072, | |
| 3544 | |
| ], | |
| "eval_loss": 0.0007009973051026464, | |
| "eval_precisions": [ | |
| 99.6116504854369, | |
| 99.48096885813149, | |
| 99.36554416788677, | |
| 99.21612541993281 | |
| ], | |
| "eval_ref_len": 8052, | |
| "eval_runtime": 14.1732, | |
| "eval_samples_per_second": 37.112, | |
| "eval_score": 56.59059013212041, | |
| "eval_steps_per_second": 1.199, | |
| "eval_sys_len": 5150, | |
| "eval_totals": [ | |
| 5150, | |
| 4624, | |
| 4098, | |
| 3572 | |
| ], | |
| "step": 3225 | |
| }, | |
| { | |
| "epoch": 26.0, | |
| "eval_bp": 0.5673143055770848, | |
| "eval_counts": [ | |
| 5134, | |
| 4608, | |
| 4082, | |
| 3556 | |
| ], | |
| "eval_loss": 0.0008850299054756761, | |
| "eval_precisions": [ | |
| 99.90270480638256, | |
| 99.89161066551051, | |
| 99.87766087594812, | |
| 99.8595900028082 | |
| ], | |
| "eval_ref_len": 8052, | |
| "eval_runtime": 14.4143, | |
| "eval_samples_per_second": 36.492, | |
| "eval_score": 56.6649925424657, | |
| "eval_steps_per_second": 1.179, | |
| "eval_sys_len": 5139, | |
| "eval_totals": [ | |
| 5139, | |
| 4613, | |
| 4087, | |
| 3561 | |
| ], | |
| "step": 3354 | |
| }, | |
| { | |
| "epoch": 27.0, | |
| "eval_bp": 0.5690432735111319, | |
| "eval_counts": [ | |
| 5129, | |
| 4599, | |
| 4072, | |
| 3545 | |
| ], | |
| "eval_loss": 0.0007779916049912572, | |
| "eval_precisions": [ | |
| 99.61157506311905, | |
| 99.48085658663206, | |
| 99.38979741274103, | |
| 99.27191262951554 | |
| ], | |
| "eval_ref_len": 8052, | |
| "eval_runtime": 14.1435, | |
| "eval_samples_per_second": 37.19, | |
| "eval_score": 56.584785454136124, | |
| "eval_steps_per_second": 1.202, | |
| "eval_sys_len": 5149, | |
| "eval_totals": [ | |
| 5149, | |
| 4623, | |
| 4097, | |
| 3571 | |
| ], | |
| "step": 3483 | |
| }, | |
| { | |
| "epoch": 27.13, | |
| "learning_rate": 9.147286821705427e-06, | |
| "loss": 0.0047, | |
| "step": 3500 | |
| }, | |
| { | |
| "epoch": 28.0, | |
| "eval_bp": 0.5673143055770848, | |
| "eval_counts": [ | |
| 5134, | |
| 4608, | |
| 4082, | |
| 3556 | |
| ], | |
| "eval_loss": 0.0003345063014421612, | |
| "eval_precisions": [ | |
| 99.90270480638256, | |
| 99.89161066551051, | |
| 99.87766087594812, | |
| 99.8595900028082 | |
| ], | |
| "eval_ref_len": 8052, | |
| "eval_runtime": 14.2353, | |
| "eval_samples_per_second": 36.951, | |
| "eval_score": 56.6649925424657, | |
| "eval_steps_per_second": 1.194, | |
| "eval_sys_len": 5139, | |
| "eval_totals": [ | |
| 5139, | |
| 4613, | |
| 4087, | |
| 3561 | |
| ], | |
| "step": 3612 | |
| }, | |
| { | |
| "epoch": 29.0, | |
| "eval_bp": 0.5673143055770848, | |
| "eval_counts": [ | |
| 5134, | |
| 4608, | |
| 4082, | |
| 3556 | |
| ], | |
| "eval_loss": 0.0004555524792522192, | |
| "eval_precisions": [ | |
| 99.90270480638256, | |
| 99.89161066551051, | |
| 99.87766087594812, | |
| 99.8595900028082 | |
| ], | |
| "eval_ref_len": 8052, | |
| "eval_runtime": 14.219, | |
| "eval_samples_per_second": 36.993, | |
| "eval_score": 56.6649925424657, | |
| "eval_steps_per_second": 1.196, | |
| "eval_sys_len": 5139, | |
| "eval_totals": [ | |
| 5139, | |
| 4613, | |
| 4087, | |
| 3561 | |
| ], | |
| "step": 3741 | |
| }, | |
| { | |
| "epoch": 30.0, | |
| "eval_bp": 0.5673143055770848, | |
| "eval_counts": [ | |
| 5134, | |
| 4608, | |
| 4082, | |
| 3556 | |
| ], | |
| "eval_loss": 0.0002565362665336579, | |
| "eval_precisions": [ | |
| 99.90270480638256, | |
| 99.89161066551051, | |
| 99.87766087594812, | |
| 99.8595900028082 | |
| ], | |
| "eval_ref_len": 8052, | |
| "eval_runtime": 14.4474, | |
| "eval_samples_per_second": 36.408, | |
| "eval_score": 56.6649925424657, | |
| "eval_steps_per_second": 1.177, | |
| "eval_sys_len": 5139, | |
| "eval_totals": [ | |
| 5139, | |
| 4613, | |
| 4087, | |
| 3561 | |
| ], | |
| "step": 3870 | |
| }, | |
| { | |
| "epoch": 31.0, | |
| "eval_bp": 0.5673143055770848, | |
| "eval_counts": [ | |
| 5134, | |
| 4608, | |
| 4082, | |
| 3556 | |
| ], | |
| "eval_loss": 0.00015086405619513243, | |
| "eval_precisions": [ | |
| 99.90270480638256, | |
| 99.89161066551051, | |
| 99.87766087594812, | |
| 99.8595900028082 | |
| ], | |
| "eval_ref_len": 8052, | |
| "eval_runtime": 14.2355, | |
| "eval_samples_per_second": 36.95, | |
| "eval_score": 56.6649925424657, | |
| "eval_steps_per_second": 1.194, | |
| "eval_sys_len": 5139, | |
| "eval_totals": [ | |
| 5139, | |
| 4613, | |
| 4087, | |
| 3561 | |
| ], | |
| "step": 3999 | |
| }, | |
| { | |
| "epoch": 31.01, | |
| "learning_rate": 7.596899224806202e-06, | |
| "loss": 0.0037, | |
| "step": 4000 | |
| }, | |
| { | |
| "epoch": 32.0, | |
| "eval_bp": 0.5673143055770848, | |
| "eval_counts": [ | |
| 5134, | |
| 4608, | |
| 4082, | |
| 3556 | |
| ], | |
| "eval_loss": 0.00028631059103645384, | |
| "eval_precisions": [ | |
| 99.90270480638256, | |
| 99.89161066551051, | |
| 99.87766087594812, | |
| 99.8595900028082 | |
| ], | |
| "eval_ref_len": 8052, | |
| "eval_runtime": 14.2512, | |
| "eval_samples_per_second": 36.909, | |
| "eval_score": 56.6649925424657, | |
| "eval_steps_per_second": 1.193, | |
| "eval_sys_len": 5139, | |
| "eval_totals": [ | |
| 5139, | |
| 4613, | |
| 4087, | |
| 3561 | |
| ], | |
| "step": 4128 | |
| }, | |
| { | |
| "epoch": 33.0, | |
| "eval_bp": 0.5673143055770848, | |
| "eval_counts": [ | |
| 5134, | |
| 4608, | |
| 4082, | |
| 3556 | |
| ], | |
| "eval_loss": 0.00013379484880715609, | |
| "eval_precisions": [ | |
| 99.90270480638256, | |
| 99.89161066551051, | |
| 99.87766087594812, | |
| 99.8595900028082 | |
| ], | |
| "eval_ref_len": 8052, | |
| "eval_runtime": 14.5025, | |
| "eval_samples_per_second": 36.27, | |
| "eval_score": 56.6649925424657, | |
| "eval_steps_per_second": 1.172, | |
| "eval_sys_len": 5139, | |
| "eval_totals": [ | |
| 5139, | |
| 4613, | |
| 4087, | |
| 3561 | |
| ], | |
| "step": 4257 | |
| }, | |
| { | |
| "epoch": 34.0, | |
| "eval_bp": 0.5673143055770848, | |
| "eval_counts": [ | |
| 5134, | |
| 4608, | |
| 4082, | |
| 3556 | |
| ], | |
| "eval_loss": 0.0007538048666901886, | |
| "eval_precisions": [ | |
| 99.90270480638256, | |
| 99.89161066551051, | |
| 99.87766087594812, | |
| 99.8595900028082 | |
| ], | |
| "eval_ref_len": 8052, | |
| "eval_runtime": 14.2097, | |
| "eval_samples_per_second": 37.017, | |
| "eval_score": 56.6649925424657, | |
| "eval_steps_per_second": 1.196, | |
| "eval_sys_len": 5139, | |
| "eval_totals": [ | |
| 5139, | |
| 4613, | |
| 4087, | |
| 3561 | |
| ], | |
| "step": 4386 | |
| }, | |
| { | |
| "epoch": 34.88, | |
| "learning_rate": 6.046511627906977e-06, | |
| "loss": 0.0026, | |
| "step": 4500 | |
| }, | |
| { | |
| "epoch": 35.0, | |
| "eval_bp": 0.5673143055770848, | |
| "eval_counts": [ | |
| 5134, | |
| 4608, | |
| 4082, | |
| 3556 | |
| ], | |
| "eval_loss": 0.00037576485192403197, | |
| "eval_precisions": [ | |
| 99.90270480638256, | |
| 99.89161066551051, | |
| 99.87766087594812, | |
| 99.8595900028082 | |
| ], | |
| "eval_ref_len": 8052, | |
| "eval_runtime": 14.2801, | |
| "eval_samples_per_second": 36.835, | |
| "eval_score": 56.6649925424657, | |
| "eval_steps_per_second": 1.19, | |
| "eval_sys_len": 5139, | |
| "eval_totals": [ | |
| 5139, | |
| 4613, | |
| 4087, | |
| 3561 | |
| ], | |
| "step": 4515 | |
| }, | |
| { | |
| "epoch": 36.0, | |
| "eval_bp": 0.5673143055770848, | |
| "eval_counts": [ | |
| 5134, | |
| 4608, | |
| 4082, | |
| 3556 | |
| ], | |
| "eval_loss": 0.0005439831875264645, | |
| "eval_precisions": [ | |
| 99.90270480638256, | |
| 99.89161066551051, | |
| 99.87766087594812, | |
| 99.8595900028082 | |
| ], | |
| "eval_ref_len": 8052, | |
| "eval_runtime": 14.4879, | |
| "eval_samples_per_second": 36.306, | |
| "eval_score": 56.6649925424657, | |
| "eval_steps_per_second": 1.173, | |
| "eval_sys_len": 5139, | |
| "eval_totals": [ | |
| 5139, | |
| 4613, | |
| 4087, | |
| 3561 | |
| ], | |
| "step": 4644 | |
| }, | |
| { | |
| "epoch": 37.0, | |
| "eval_bp": 0.5673143055770848, | |
| "eval_counts": [ | |
| 5134, | |
| 4608, | |
| 4082, | |
| 3556 | |
| ], | |
| "eval_loss": 0.00012769590830430388, | |
| "eval_precisions": [ | |
| 99.90270480638256, | |
| 99.89161066551051, | |
| 99.87766087594812, | |
| 99.8595900028082 | |
| ], | |
| "eval_ref_len": 8052, | |
| "eval_runtime": 14.3104, | |
| "eval_samples_per_second": 36.756, | |
| "eval_score": 56.6649925424657, | |
| "eval_steps_per_second": 1.188, | |
| "eval_sys_len": 5139, | |
| "eval_totals": [ | |
| 5139, | |
| 4613, | |
| 4087, | |
| 3561 | |
| ], | |
| "step": 4773 | |
| }, | |
| { | |
| "epoch": 38.0, | |
| "eval_bp": 0.5673143055770848, | |
| "eval_counts": [ | |
| 5134, | |
| 4608, | |
| 4082, | |
| 3556 | |
| ], | |
| "eval_loss": 8.528557373210788e-05, | |
| "eval_precisions": [ | |
| 99.90270480638256, | |
| 99.89161066551051, | |
| 99.87766087594812, | |
| 99.8595900028082 | |
| ], | |
| "eval_ref_len": 8052, | |
| "eval_runtime": 14.2526, | |
| "eval_samples_per_second": 36.906, | |
| "eval_score": 56.6649925424657, | |
| "eval_steps_per_second": 1.193, | |
| "eval_sys_len": 5139, | |
| "eval_totals": [ | |
| 5139, | |
| 4613, | |
| 4087, | |
| 3561 | |
| ], | |
| "step": 4902 | |
| }, | |
| { | |
| "epoch": 38.76, | |
| "learning_rate": 4.4961240310077525e-06, | |
| "loss": 0.002, | |
| "step": 5000 | |
| }, | |
| { | |
| "epoch": 39.0, | |
| "eval_bp": 0.5673143055770848, | |
| "eval_counts": [ | |
| 5134, | |
| 4608, | |
| 4082, | |
| 3556 | |
| ], | |
| "eval_loss": 9.439041605219245e-05, | |
| "eval_precisions": [ | |
| 99.90270480638256, | |
| 99.89161066551051, | |
| 99.87766087594812, | |
| 99.8595900028082 | |
| ], | |
| "eval_ref_len": 8052, | |
| "eval_runtime": 14.6256, | |
| "eval_samples_per_second": 35.964, | |
| "eval_score": 56.6649925424657, | |
| "eval_steps_per_second": 1.162, | |
| "eval_sys_len": 5139, | |
| "eval_totals": [ | |
| 5139, | |
| 4613, | |
| 4087, | |
| 3561 | |
| ], | |
| "step": 5031 | |
| }, | |
| { | |
| "epoch": 40.0, | |
| "eval_bp": 0.5673143055770848, | |
| "eval_counts": [ | |
| 5134, | |
| 4608, | |
| 4082, | |
| 3556 | |
| ], | |
| "eval_loss": 0.0001698030246188864, | |
| "eval_precisions": [ | |
| 99.90270480638256, | |
| 99.89161066551051, | |
| 99.87766087594812, | |
| 99.8595900028082 | |
| ], | |
| "eval_ref_len": 8052, | |
| "eval_runtime": 14.2726, | |
| "eval_samples_per_second": 36.854, | |
| "eval_score": 56.6649925424657, | |
| "eval_steps_per_second": 1.191, | |
| "eval_sys_len": 5139, | |
| "eval_totals": [ | |
| 5139, | |
| 4613, | |
| 4087, | |
| 3561 | |
| ], | |
| "step": 5160 | |
| }, | |
| { | |
| "epoch": 41.0, | |
| "eval_bp": 0.5673143055770848, | |
| "eval_counts": [ | |
| 5134, | |
| 4608, | |
| 4082, | |
| 3556 | |
| ], | |
| "eval_loss": 0.00026142288697883487, | |
| "eval_precisions": [ | |
| 99.90270480638256, | |
| 99.89161066551051, | |
| 99.87766087594812, | |
| 99.8595900028082 | |
| ], | |
| "eval_ref_len": 8052, | |
| "eval_runtime": 14.1981, | |
| "eval_samples_per_second": 37.047, | |
| "eval_score": 56.6649925424657, | |
| "eval_steps_per_second": 1.197, | |
| "eval_sys_len": 5139, | |
| "eval_totals": [ | |
| 5139, | |
| 4613, | |
| 4087, | |
| 3561 | |
| ], | |
| "step": 5289 | |
| }, | |
| { | |
| "epoch": 42.0, | |
| "eval_bp": 0.5673143055770848, | |
| "eval_counts": [ | |
| 5134, | |
| 4608, | |
| 4082, | |
| 3556 | |
| ], | |
| "eval_loss": 6.877077976241708e-05, | |
| "eval_precisions": [ | |
| 99.90270480638256, | |
| 99.89161066551051, | |
| 99.87766087594812, | |
| 99.8595900028082 | |
| ], | |
| "eval_ref_len": 8052, | |
| "eval_runtime": 14.5632, | |
| "eval_samples_per_second": 36.118, | |
| "eval_score": 56.6649925424657, | |
| "eval_steps_per_second": 1.167, | |
| "eval_sys_len": 5139, | |
| "eval_totals": [ | |
| 5139, | |
| 4613, | |
| 4087, | |
| 3561 | |
| ], | |
| "step": 5418 | |
| }, | |
| { | |
| "epoch": 42.64, | |
| "learning_rate": 2.9457364341085276e-06, | |
| "loss": 0.0017, | |
| "step": 5500 | |
| }, | |
| { | |
| "epoch": 43.0, | |
| "eval_bp": 0.5673143055770848, | |
| "eval_counts": [ | |
| 5134, | |
| 4608, | |
| 4082, | |
| 3556 | |
| ], | |
| "eval_loss": 3.3805001294240355e-05, | |
| "eval_precisions": [ | |
| 99.90270480638256, | |
| 99.89161066551051, | |
| 99.87766087594812, | |
| 99.8595900028082 | |
| ], | |
| "eval_ref_len": 8052, | |
| "eval_runtime": 14.1656, | |
| "eval_samples_per_second": 37.132, | |
| "eval_score": 56.6649925424657, | |
| "eval_steps_per_second": 1.2, | |
| "eval_sys_len": 5139, | |
| "eval_totals": [ | |
| 5139, | |
| 4613, | |
| 4087, | |
| 3561 | |
| ], | |
| "step": 5547 | |
| }, | |
| { | |
| "epoch": 44.0, | |
| "eval_bp": 0.5673143055770848, | |
| "eval_counts": [ | |
| 5134, | |
| 4608, | |
| 4082, | |
| 3556 | |
| ], | |
| "eval_loss": 1.8181766790803522e-05, | |
| "eval_precisions": [ | |
| 99.90270480638256, | |
| 99.89161066551051, | |
| 99.87766087594812, | |
| 99.8595900028082 | |
| ], | |
| "eval_ref_len": 8052, | |
| "eval_runtime": 14.2765, | |
| "eval_samples_per_second": 36.844, | |
| "eval_score": 56.6649925424657, | |
| "eval_steps_per_second": 1.191, | |
| "eval_sys_len": 5139, | |
| "eval_totals": [ | |
| 5139, | |
| 4613, | |
| 4087, | |
| 3561 | |
| ], | |
| "step": 5676 | |
| }, | |
| { | |
| "epoch": 45.0, | |
| "eval_bp": 0.5673143055770848, | |
| "eval_counts": [ | |
| 5134, | |
| 4608, | |
| 4082, | |
| 3556 | |
| ], | |
| "eval_loss": 3.8955997297307476e-05, | |
| "eval_precisions": [ | |
| 99.90270480638256, | |
| 99.89161066551051, | |
| 99.87766087594812, | |
| 99.8595900028082 | |
| ], | |
| "eval_ref_len": 8052, | |
| "eval_runtime": 14.2512, | |
| "eval_samples_per_second": 36.909, | |
| "eval_score": 56.6649925424657, | |
| "eval_steps_per_second": 1.193, | |
| "eval_sys_len": 5139, | |
| "eval_totals": [ | |
| 5139, | |
| 4613, | |
| 4087, | |
| 3561 | |
| ], | |
| "step": 5805 | |
| }, | |
| { | |
| "epoch": 46.0, | |
| "eval_bp": 0.5673143055770848, | |
| "eval_counts": [ | |
| 5134, | |
| 4608, | |
| 4082, | |
| 3556 | |
| ], | |
| "eval_loss": 5.0567497964948416e-05, | |
| "eval_precisions": [ | |
| 99.90270480638256, | |
| 99.89161066551051, | |
| 99.87766087594812, | |
| 99.8595900028082 | |
| ], | |
| "eval_ref_len": 8052, | |
| "eval_runtime": 14.3515, | |
| "eval_samples_per_second": 36.651, | |
| "eval_score": 56.6649925424657, | |
| "eval_steps_per_second": 1.185, | |
| "eval_sys_len": 5139, | |
| "eval_totals": [ | |
| 5139, | |
| 4613, | |
| 4087, | |
| 3561 | |
| ], | |
| "step": 5934 | |
| }, | |
| { | |
| "epoch": 46.51, | |
| "learning_rate": 1.3953488372093025e-06, | |
| "loss": 0.0014, | |
| "step": 6000 | |
| } | |
| ], | |
| "logging_steps": 500, | |
| "max_steps": 6450, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 50, | |
| "save_steps": 500, | |
| "total_flos": 2.345536665897984e+16, | |
| "train_batch_size": 32, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |