| { |
| "best_metric": null, |
| "best_model_checkpoint": null, |
| "epoch": 48.63813229571984, |
| "eval_steps": 500, |
| "global_step": 12500, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 1.0, |
| "eval_bp": 0.4559528592458481, |
| "eval_counts": [ |
| 3210, |
| 1930, |
| 1488, |
| 1066 |
| ], |
| "eval_loss": 2.412109375, |
| "eval_precisions": [ |
| 71.17516629711751, |
| 48.44377510040161, |
| 43.03065355696935, |
| 36.24617477048623 |
| ], |
| "eval_ref_len": 8052, |
| "eval_runtime": 118.4312, |
| "eval_samples_per_second": 4.441, |
| "eval_score": 21.9569286964753, |
| "eval_steps_per_second": 0.279, |
| "eval_sys_len": 4510, |
| "eval_totals": [ |
| 4510, |
| 3984, |
| 3458, |
| 2941 |
| ], |
| "step": 257 |
| }, |
| { |
| "epoch": 1.95, |
| "learning_rate": 1.9221789883268484e-05, |
| "loss": 2.8948, |
| "step": 500 |
| }, |
| { |
| "epoch": 2.0, |
| "eval_bp": 0.43532380297987505, |
| "eval_counts": [ |
| 1708, |
| 541, |
| 376, |
| 228 |
| ], |
| "eval_loss": 1.5029296875, |
| "eval_precisions": [ |
| 38.853503184713375, |
| 13.979328165374676, |
| 11.24401913875598, |
| 7.497533706017757 |
| ], |
| "eval_ref_len": 8052, |
| "eval_runtime": 115.6212, |
| "eval_samples_per_second": 4.549, |
| "eval_score": 6.3679777301051494, |
| "eval_steps_per_second": 0.285, |
| "eval_sys_len": 4396, |
| "eval_totals": [ |
| 4396, |
| 3870, |
| 3344, |
| 3041 |
| ], |
| "step": 514 |
| }, |
| { |
| "epoch": 3.0, |
| "eval_bp": 0.5596896112039585, |
| "eval_counts": [ |
| 2795, |
| 1858, |
| 1416, |
| 991 |
| ], |
| "eval_loss": 0.65576171875, |
| "eval_precisions": [ |
| 54.85770363101079, |
| 40.66535346903042, |
| 35.02349740291862, |
| 28.177423940858688 |
| ], |
| "eval_ref_len": 8052, |
| "eval_runtime": 115.5065, |
| "eval_samples_per_second": 4.554, |
| "eval_score": 21.558969055160425, |
| "eval_steps_per_second": 0.286, |
| "eval_sys_len": 5095, |
| "eval_totals": [ |
| 5095, |
| 4569, |
| 4043, |
| 3517 |
| ], |
| "step": 771 |
| }, |
| { |
| "epoch": 3.89, |
| "learning_rate": 1.8443579766536967e-05, |
| "loss": 0.8924, |
| "step": 1000 |
| }, |
| { |
| "epoch": 4.0, |
| "eval_bp": 0.6650198090145658, |
| "eval_counts": [ |
| 3257, |
| 2161, |
| 1704, |
| 1256 |
| ], |
| "eval_loss": 0.485107421875, |
| "eval_precisions": [ |
| 56.950515824444835, |
| 41.61371076449066, |
| 36.51167773730448, |
| 30.330837961844964 |
| ], |
| "eval_ref_len": 8052, |
| "eval_runtime": 115.5544, |
| "eval_samples_per_second": 4.552, |
| "eval_score": 26.766843398496384, |
| "eval_steps_per_second": 0.286, |
| "eval_sys_len": 5719, |
| "eval_totals": [ |
| 5719, |
| 5193, |
| 4667, |
| 4141 |
| ], |
| "step": 1028 |
| }, |
| { |
| "epoch": 5.0, |
| "eval_bp": 0.41788118238391686, |
| "eval_counts": [ |
| 3699, |
| 2841, |
| 2368, |
| 1900 |
| ], |
| "eval_loss": 0.293701171875, |
| "eval_precisions": [ |
| 86.02325581395348, |
| 75.27821939586646, |
| 72.9064039408867, |
| 69.80161645848641 |
| ], |
| "eval_ref_len": 8052, |
| "eval_runtime": 115.1699, |
| "eval_samples_per_second": 4.567, |
| "eval_score": 31.661530724736487, |
| "eval_steps_per_second": 0.287, |
| "eval_sys_len": 4300, |
| "eval_totals": [ |
| 4300, |
| 3774, |
| 3248, |
| 2722 |
| ], |
| "step": 1285 |
| }, |
| { |
| "epoch": 5.84, |
| "learning_rate": 1.766536964980545e-05, |
| "loss": 0.4295, |
| "step": 1500 |
| }, |
| { |
| "epoch": 6.0, |
| "eval_bp": 0.4089581075583404, |
| "eval_counts": [ |
| 3783, |
| 2928, |
| 2446, |
| 1971 |
| ], |
| "eval_loss": 0.2445068359375, |
| "eval_precisions": [ |
| 88.9908256880734, |
| 78.60402684563758, |
| 76.46139418568302, |
| 73.73737373737374 |
| ], |
| "eval_ref_len": 8052, |
| "eval_runtime": 115.449, |
| "eval_samples_per_second": 4.556, |
| "eval_score": 32.408490251125635, |
| "eval_steps_per_second": 0.286, |
| "eval_sys_len": 4251, |
| "eval_totals": [ |
| 4251, |
| 3725, |
| 3199, |
| 2673 |
| ], |
| "step": 1542 |
| }, |
| { |
| "epoch": 7.0, |
| "eval_bp": 0.417153226107242, |
| "eval_counts": [ |
| 3818, |
| 2965, |
| 2480, |
| 2002 |
| ], |
| "eval_loss": 0.22021484375, |
| "eval_precisions": [ |
| 88.8733705772812, |
| 78.64721485411141, |
| 76.44882860665845, |
| 73.6571008094187 |
| ], |
| "eval_ref_len": 8052, |
| "eval_runtime": 115.4073, |
| "eval_samples_per_second": 4.558, |
| "eval_score": 33.04119304311829, |
| "eval_steps_per_second": 0.286, |
| "eval_sys_len": 4296, |
| "eval_totals": [ |
| 4296, |
| 3770, |
| 3244, |
| 2718 |
| ], |
| "step": 1799 |
| }, |
| { |
| "epoch": 7.78, |
| "learning_rate": 1.6887159533073932e-05, |
| "loss": 0.2991, |
| "step": 2000 |
| }, |
| { |
| "epoch": 8.0, |
| "eval_bp": 0.42679131632296613, |
| "eval_counts": [ |
| 3874, |
| 3019, |
| 2524, |
| 2038 |
| ], |
| "eval_loss": 0.2076416015625, |
| "eval_precisions": [ |
| 89.07794895378248, |
| 78.9693957624902, |
| 76.55444343342432, |
| 73.54745579213281 |
| ], |
| "eval_ref_len": 8052, |
| "eval_runtime": 114.0107, |
| "eval_samples_per_second": 4.614, |
| "eval_score": 33.85769159645968, |
| "eval_steps_per_second": 0.289, |
| "eval_sys_len": 4349, |
| "eval_totals": [ |
| 4349, |
| 3823, |
| 3297, |
| 2771 |
| ], |
| "step": 2056 |
| }, |
| { |
| "epoch": 9.0, |
| "eval_bp": 0.46028228872696303, |
| "eval_counts": [ |
| 4065, |
| 3225, |
| 2700, |
| 2186 |
| ], |
| "eval_loss": 0.1663818359375, |
| "eval_precisions": [ |
| 89.65593295103662, |
| 80.46407185628742, |
| 77.54164273406089, |
| 73.9512855209743 |
| ], |
| "eval_ref_len": 8052, |
| "eval_runtime": 114.8195, |
| "eval_samples_per_second": 4.581, |
| "eval_score": 36.91388078489759, |
| "eval_steps_per_second": 0.287, |
| "eval_sys_len": 4534, |
| "eval_totals": [ |
| 4534, |
| 4008, |
| 3482, |
| 2956 |
| ], |
| "step": 2313 |
| }, |
| { |
| "epoch": 9.73, |
| "learning_rate": 1.6108949416342414e-05, |
| "loss": 0.2277, |
| "step": 2500 |
| }, |
| { |
| "epoch": 10.0, |
| "eval_bp": 0.5426087135017283, |
| "eval_counts": [ |
| 4419, |
| 3611, |
| 3062, |
| 2525 |
| ], |
| "eval_loss": 0.1044921875, |
| "eval_precisions": [ |
| 88.43305983590155, |
| 80.76492954596287, |
| 77.617237008872, |
| 73.85200350979818 |
| ], |
| "eval_ref_len": 8052, |
| "eval_runtime": 114.7152, |
| "eval_samples_per_second": 4.585, |
| "eval_score": 43.40364936643555, |
| "eval_steps_per_second": 0.288, |
| "eval_sys_len": 4997, |
| "eval_totals": [ |
| 4997, |
| 4471, |
| 3945, |
| 3419 |
| ], |
| "step": 2570 |
| }, |
| { |
| "epoch": 11.0, |
| "eval_bp": 0.5841943959505824, |
| "eval_counts": [ |
| 4717, |
| 3950, |
| 3372, |
| 2808 |
| ], |
| "eval_loss": 0.08892822265625, |
| "eval_precisions": [ |
| 90.07065113614665, |
| 83.846317130121, |
| 80.57347670250896, |
| 76.74227931128723 |
| ], |
| "eval_ref_len": 8052, |
| "eval_runtime": 113.7863, |
| "eval_samples_per_second": 4.623, |
| "eval_score": 48.29263576279789, |
| "eval_steps_per_second": 0.29, |
| "eval_sys_len": 5237, |
| "eval_totals": [ |
| 5237, |
| 4711, |
| 4185, |
| 3659 |
| ], |
| "step": 2827 |
| }, |
| { |
| "epoch": 11.67, |
| "learning_rate": 1.5330739299610897e-05, |
| "loss": 0.1405, |
| "step": 3000 |
| }, |
| { |
| "epoch": 12.0, |
| "eval_bp": 0.5586477230994942, |
| "eval_counts": [ |
| 4630, |
| 3875, |
| 3303, |
| 2749 |
| ], |
| "eval_loss": 0.08489990234375, |
| "eval_precisions": [ |
| 90.98054627628218, |
| 84.9222003068157, |
| 81.81818181818181, |
| 78.29678154371973 |
| ], |
| "eval_ref_len": 8052, |
| "eval_runtime": 107.8868, |
| "eval_samples_per_second": 4.875, |
| "eval_score": 46.85747814062412, |
| "eval_steps_per_second": 0.306, |
| "eval_sys_len": 5089, |
| "eval_totals": [ |
| 5089, |
| 4563, |
| 4037, |
| 3511 |
| ], |
| "step": 3084 |
| }, |
| { |
| "epoch": 13.0, |
| "eval_bp": 0.5695616786732568, |
| "eval_counts": [ |
| 4747, |
| 4034, |
| 3464, |
| 2904 |
| ], |
| "eval_loss": 0.08123779296875, |
| "eval_precisions": [ |
| 92.1389751552795, |
| 87.20276696930394, |
| 84.48780487804878, |
| 81.2534974818131 |
| ], |
| "eval_ref_len": 8052, |
| "eval_runtime": 108.2759, |
| "eval_samples_per_second": 4.858, |
| "eval_score": 49.08436700451685, |
| "eval_steps_per_second": 0.305, |
| "eval_sys_len": 5152, |
| "eval_totals": [ |
| 5152, |
| 4626, |
| 4100, |
| 3574 |
| ], |
| "step": 3341 |
| }, |
| { |
| "epoch": 13.62, |
| "learning_rate": 1.4552529182879378e-05, |
| "loss": 0.1241, |
| "step": 3500 |
| }, |
| { |
| "epoch": 14.0, |
| "eval_bp": 0.5695616786732568, |
| "eval_counts": [ |
| 4738, |
| 4024, |
| 3452, |
| 2894 |
| ], |
| "eval_loss": 0.07525634765625, |
| "eval_precisions": [ |
| 91.96428571428571, |
| 86.98659749243407, |
| 84.1951219512195, |
| 80.97369893676553 |
| ], |
| "eval_ref_len": 8052, |
| "eval_runtime": 107.2784, |
| "eval_samples_per_second": 4.903, |
| "eval_score": 48.94590635934059, |
| "eval_steps_per_second": 0.308, |
| "eval_sys_len": 5152, |
| "eval_totals": [ |
| 5152, |
| 4626, |
| 4100, |
| 3574 |
| ], |
| "step": 3598 |
| }, |
| { |
| "epoch": 15.0, |
| "eval_bp": 0.5961628688829712, |
| "eval_counts": [ |
| 4741, |
| 4006, |
| 3444, |
| 2891 |
| ], |
| "eval_loss": 0.07562255859375, |
| "eval_precisions": [ |
| 89.33484077633314, |
| 83.79000209161264, |
| 80.94007050528789, |
| 77.52748726200053 |
| ], |
| "eval_ref_len": 8052, |
| "eval_runtime": 106.2867, |
| "eval_samples_per_second": 4.949, |
| "eval_score": 49.354074470195435, |
| "eval_steps_per_second": 0.31, |
| "eval_sys_len": 5307, |
| "eval_totals": [ |
| 5307, |
| 4781, |
| 4255, |
| 3729 |
| ], |
| "step": 3855 |
| }, |
| { |
| "epoch": 15.56, |
| "learning_rate": 1.377431906614786e-05, |
| "loss": 0.1147, |
| "step": 4000 |
| }, |
| { |
| "epoch": 16.0, |
| "eval_bp": 0.5655838797151567, |
| "eval_counts": [ |
| 4743, |
| 4039, |
| 3477, |
| 2925 |
| ], |
| "eval_loss": 0.06915283203125, |
| "eval_precisions": [ |
| 92.47416650419186, |
| 87.74712144253748, |
| 85.28329654157469, |
| 82.37116305266122 |
| ], |
| "eval_ref_len": 8052, |
| "eval_runtime": 107.836, |
| "eval_samples_per_second": 4.878, |
| "eval_score": 49.143959340541095, |
| "eval_steps_per_second": 0.306, |
| "eval_sys_len": 5129, |
| "eval_totals": [ |
| 5129, |
| 4603, |
| 4077, |
| 3551 |
| ], |
| "step": 4112 |
| }, |
| { |
| "epoch": 17.0, |
| "eval_bp": 0.5932631592602093, |
| "eval_counts": [ |
| 4727, |
| 3996, |
| 3439, |
| 2892 |
| ], |
| "eval_loss": 0.070068359375, |
| "eval_precisions": [ |
| 89.35727788279773, |
| 83.87909319899245, |
| 81.14676734308637, |
| 77.90948275862068 |
| ], |
| "eval_ref_len": 8052, |
| "eval_runtime": 106.6266, |
| "eval_samples_per_second": 4.933, |
| "eval_score": 49.221934768405774, |
| "eval_steps_per_second": 0.309, |
| "eval_sys_len": 5290, |
| "eval_totals": [ |
| 5290, |
| 4764, |
| 4238, |
| 3712 |
| ], |
| "step": 4369 |
| }, |
| { |
| "epoch": 17.51, |
| "learning_rate": 1.2996108949416343e-05, |
| "loss": 0.1065, |
| "step": 4500 |
| }, |
| { |
| "epoch": 18.0, |
| "eval_bp": 0.5610779943992972, |
| "eval_counts": [ |
| 4753, |
| 4064, |
| 3505, |
| 2956 |
| ], |
| "eval_loss": 0.0623779296875, |
| "eval_precisions": [ |
| 93.14128943758574, |
| 88.7917850120166, |
| 86.52184645766478, |
| 83.8581560283688 |
| ], |
| "eval_ref_len": 8052, |
| "eval_runtime": 106.949, |
| "eval_samples_per_second": 4.918, |
| "eval_score": 49.382124037917905, |
| "eval_steps_per_second": 0.309, |
| "eval_sys_len": 5103, |
| "eval_totals": [ |
| 5103, |
| 4577, |
| 4051, |
| 3525 |
| ], |
| "step": 4626 |
| }, |
| { |
| "epoch": 19.0, |
| "eval_bp": 0.5788702549376445, |
| "eval_counts": [ |
| 4784, |
| 4087, |
| 3529, |
| 2977 |
| ], |
| "eval_loss": 0.060699462890625, |
| "eval_precisions": [ |
| 91.89396849788706, |
| 87.32905982905983, |
| 84.95426095329803, |
| 82.05622932745314 |
| ], |
| "eval_ref_len": 8052, |
| "eval_runtime": 106.4771, |
| "eval_samples_per_second": 4.94, |
| "eval_score": 50.0629990425284, |
| "eval_steps_per_second": 0.31, |
| "eval_sys_len": 5206, |
| "eval_totals": [ |
| 5206, |
| 4680, |
| 4154, |
| 3628 |
| ], |
| "step": 4883 |
| }, |
| { |
| "epoch": 19.46, |
| "learning_rate": 1.2217898832684827e-05, |
| "loss": 0.0964, |
| "step": 5000 |
| }, |
| { |
| "epoch": 20.0, |
| "eval_bp": 0.5826501698750266, |
| "eval_counts": [ |
| 4773, |
| 4068, |
| 3509, |
| 2957 |
| ], |
| "eval_loss": 0.0595703125, |
| "eval_precisions": [ |
| 91.29686304514155, |
| 86.51637601020842, |
| 84.02777777777777, |
| 81.01369863013699 |
| ], |
| "eval_ref_len": 8052, |
| "eval_runtime": 106.5688, |
| "eval_samples_per_second": 4.936, |
| "eval_score": 49.89324555557292, |
| "eval_steps_per_second": 0.31, |
| "eval_sys_len": 5228, |
| "eval_totals": [ |
| 5228, |
| 4702, |
| 4176, |
| 3650 |
| ], |
| "step": 5140 |
| }, |
| { |
| "epoch": 21.0, |
| "eval_bp": 0.5824785136401668, |
| "eval_counts": [ |
| 4780, |
| 4078, |
| 3521, |
| 2972 |
| ], |
| "eval_loss": 0.057952880859375, |
| "eval_precisions": [ |
| 91.4482494738856, |
| 86.74750053180175, |
| 84.33532934131736, |
| 81.44697177308852 |
| ], |
| "eval_ref_len": 8052, |
| "eval_runtime": 105.7144, |
| "eval_samples_per_second": 4.976, |
| "eval_score": 50.04481904653482, |
| "eval_steps_per_second": 0.312, |
| "eval_sys_len": 5227, |
| "eval_totals": [ |
| 5227, |
| 4701, |
| 4175, |
| 3649 |
| ], |
| "step": 5397 |
| }, |
| { |
| "epoch": 21.4, |
| "learning_rate": 1.1439688715953308e-05, |
| "loss": 0.0925, |
| "step": 5500 |
| }, |
| { |
| "epoch": 22.0, |
| "eval_bp": 0.6060221334079605, |
| "eval_counts": [ |
| 4800, |
| 4076, |
| 3514, |
| 2962 |
| ], |
| "eval_loss": 0.060546875, |
| "eval_precisions": [ |
| 89.46877912395153, |
| 84.23227939656954, |
| 81.47461163923023, |
| 78.21494586744124 |
| ], |
| "eval_ref_len": 8052, |
| "eval_runtime": 105.9417, |
| "eval_samples_per_second": 4.965, |
| "eval_score": 50.4491686657978, |
| "eval_steps_per_second": 0.311, |
| "eval_sys_len": 5365, |
| "eval_totals": [ |
| 5365, |
| 4839, |
| 4313, |
| 3787 |
| ], |
| "step": 5654 |
| }, |
| { |
| "epoch": 23.0, |
| "eval_bp": 0.5761166700049626, |
| "eval_counts": [ |
| 4832, |
| 4155, |
| 3593, |
| 3036 |
| ], |
| "eval_loss": 0.053558349609375, |
| "eval_precisions": [ |
| 93.10211946050096, |
| 89.08662092624357, |
| 86.82938617689705, |
| 84.0531561461794 |
| ], |
| "eval_ref_len": 8052, |
| "eval_runtime": 106.564, |
| "eval_samples_per_second": 4.936, |
| "eval_score": 50.81695573260325, |
| "eval_steps_per_second": 0.31, |
| "eval_sys_len": 5190, |
| "eval_totals": [ |
| 5190, |
| 4664, |
| 4138, |
| 3612 |
| ], |
| "step": 5911 |
| }, |
| { |
| "epoch": 23.35, |
| "learning_rate": 1.066147859922179e-05, |
| "loss": 0.0871, |
| "step": 6000 |
| }, |
| { |
| "epoch": 24.0, |
| "eval_bp": 0.5719791556804446, |
| "eval_counts": [ |
| 4807, |
| 4125, |
| 3565, |
| 3012 |
| ], |
| "eval_loss": 0.052276611328125, |
| "eval_precisions": [ |
| 93.05071622144793, |
| 88.90086206896552, |
| 86.65532328633932, |
| 83.94648829431438 |
| ], |
| "eval_ref_len": 8052, |
| "eval_runtime": 107.1172, |
| "eval_samples_per_second": 4.911, |
| "eval_score": 50.37743722047891, |
| "eval_steps_per_second": 0.308, |
| "eval_sys_len": 5166, |
| "eval_totals": [ |
| 5166, |
| 4640, |
| 4114, |
| 3588 |
| ], |
| "step": 6168 |
| }, |
| { |
| "epoch": 25.0, |
| "eval_bp": 0.579214183971878, |
| "eval_counts": [ |
| 4838, |
| 4161, |
| 3602, |
| 3050 |
| ], |
| "eval_loss": 0.050567626953125, |
| "eval_precisions": [ |
| 92.89554531490015, |
| 88.87227680478428, |
| 86.66987487969202, |
| 84.02203856749311 |
| ], |
| "eval_ref_len": 8052, |
| "eval_runtime": 106.5446, |
| "eval_samples_per_second": 4.937, |
| "eval_score": 51.0028956058344, |
| "eval_steps_per_second": 0.31, |
| "eval_sys_len": 5208, |
| "eval_totals": [ |
| 5208, |
| 4682, |
| 4156, |
| 3630 |
| ], |
| "step": 6425 |
| }, |
| { |
| "epoch": 25.29, |
| "learning_rate": 9.883268482490273e-06, |
| "loss": 0.0843, |
| "step": 6500 |
| }, |
| { |
| "epoch": 26.0, |
| "eval_bp": 0.5607309845734951, |
| "eval_counts": [ |
| 4817, |
| 4157, |
| 3596, |
| 3042 |
| ], |
| "eval_loss": 0.051177978515625, |
| "eval_precisions": [ |
| 94.43246422270143, |
| 90.86338797814207, |
| 88.81205235860706, |
| 86.34686346863468 |
| ], |
| "eval_ref_len": 8052, |
| "eval_runtime": 107.5728, |
| "eval_samples_per_second": 4.89, |
| "eval_score": 50.50236718840154, |
| "eval_steps_per_second": 0.307, |
| "eval_sys_len": 5101, |
| "eval_totals": [ |
| 5101, |
| 4575, |
| 4049, |
| 3523 |
| ], |
| "step": 6682 |
| }, |
| { |
| "epoch": 27.0, |
| "eval_bp": 0.5869366550146455, |
| "eval_counts": [ |
| 4855, |
| 4170, |
| 3608, |
| 3055 |
| ], |
| "eval_loss": 0.0489501953125, |
| "eval_precisions": [ |
| 92.42337711783743, |
| 88.21662788237784, |
| 85.88431325874792, |
| 83.12925170068027 |
| ], |
| "eval_ref_len": 8052, |
| "eval_runtime": 109.9833, |
| "eval_samples_per_second": 4.783, |
| "eval_score": 51.26739301541927, |
| "eval_steps_per_second": 0.3, |
| "eval_sys_len": 5253, |
| "eval_totals": [ |
| 5253, |
| 4727, |
| 4201, |
| 3675 |
| ], |
| "step": 6939 |
| }, |
| { |
| "epoch": 27.24, |
| "learning_rate": 9.105058365758756e-06, |
| "loss": 0.0813, |
| "step": 7000 |
| }, |
| { |
| "epoch": 28.0, |
| "eval_bp": 0.5641984935077309, |
| "eval_counts": [ |
| 4838, |
| 4184, |
| 3624, |
| 3070 |
| ], |
| "eval_loss": 0.047760009765625, |
| "eval_precisions": [ |
| 94.47373559851592, |
| 91.05549510337323, |
| 89.06365200294913, |
| 86.64973186565058 |
| ], |
| "eval_ref_len": 8052, |
| "eval_runtime": 109.0318, |
| "eval_samples_per_second": 4.824, |
| "eval_score": 50.9275946797506, |
| "eval_steps_per_second": 0.303, |
| "eval_sys_len": 5121, |
| "eval_totals": [ |
| 5121, |
| 4595, |
| 4069, |
| 3543 |
| ], |
| "step": 7196 |
| }, |
| { |
| "epoch": 29.0, |
| "eval_bp": 0.5711161019095474, |
| "eval_counts": [ |
| 4838, |
| 4179, |
| 3625, |
| 3079 |
| ], |
| "eval_loss": 0.0462646484375, |
| "eval_precisions": [ |
| 93.74152296066654, |
| 90.16181229773463, |
| 88.22097834022877, |
| 85.93357521629919 |
| ], |
| "eval_ref_len": 8052, |
| "eval_runtime": 107.2897, |
| "eval_samples_per_second": 4.903, |
| "eval_score": 51.09715872720289, |
| "eval_steps_per_second": 0.308, |
| "eval_sys_len": 5161, |
| "eval_totals": [ |
| 5161, |
| 4635, |
| 4109, |
| 3583 |
| ], |
| "step": 7453 |
| }, |
| { |
| "epoch": 29.18, |
| "learning_rate": 8.326848249027239e-06, |
| "loss": 0.0778, |
| "step": 7500 |
| }, |
| { |
| "epoch": 30.0, |
| "eval_bp": 0.587279163676868, |
| "eval_counts": [ |
| 4863, |
| 4185, |
| 3626, |
| 3075 |
| ], |
| "eval_loss": 0.04534912109375, |
| "eval_precisions": [ |
| 92.54043767840152, |
| 88.49651089025164, |
| 86.27171068284558, |
| 83.62795757410933 |
| ], |
| "eval_ref_len": 8052, |
| "eval_runtime": 108.186, |
| "eval_samples_per_second": 4.862, |
| "eval_score": 51.488944843891275, |
| "eval_steps_per_second": 0.305, |
| "eval_sys_len": 5255, |
| "eval_totals": [ |
| 5255, |
| 4729, |
| 4203, |
| 3677 |
| ], |
| "step": 7710 |
| }, |
| { |
| "epoch": 31.0, |
| "eval_bp": 0.587279163676868, |
| "eval_counts": [ |
| 4847, |
| 4168, |
| 3612, |
| 3064 |
| ], |
| "eval_loss": 0.044677734375, |
| "eval_precisions": [ |
| 92.23596574690771, |
| 88.137026855572, |
| 85.93861527480371, |
| 83.32880065270601 |
| ], |
| "eval_ref_len": 8052, |
| "eval_runtime": 106.6208, |
| "eval_samples_per_second": 4.933, |
| "eval_score": 51.298555626377826, |
| "eval_steps_per_second": 0.31, |
| "eval_sys_len": 5255, |
| "eval_totals": [ |
| 5255, |
| 4729, |
| 4203, |
| 3677 |
| ], |
| "step": 7967 |
| }, |
| { |
| "epoch": 31.13, |
| "learning_rate": 7.54863813229572e-06, |
| "loss": 0.0753, |
| "step": 8000 |
| }, |
| { |
| "epoch": 32.0, |
| "eval_bp": 0.5690432735111319, |
| "eval_counts": [ |
| 4866, |
| 4219, |
| 3661, |
| 3111 |
| ], |
| "eval_loss": 0.0438232421875, |
| "eval_precisions": [ |
| 94.50378714313459, |
| 91.26108587497296, |
| 89.35806687820356, |
| 87.11845421450575 |
| ], |
| "eval_ref_len": 8052, |
| "eval_runtime": 107.5192, |
| "eval_samples_per_second": 4.892, |
| "eval_score": 51.50981459551784, |
| "eval_steps_per_second": 0.307, |
| "eval_sys_len": 5149, |
| "eval_totals": [ |
| 5149, |
| 4623, |
| 4097, |
| 3571 |
| ], |
| "step": 8224 |
| }, |
| { |
| "epoch": 33.0, |
| "eval_bp": 0.5881351685074624, |
| "eval_counts": [ |
| 4869, |
| 4201, |
| 3645, |
| 3097 |
| ], |
| "eval_loss": 0.04400634765625, |
| "eval_precisions": [ |
| 92.56653992395437, |
| 88.74102239121251, |
| 86.62072243346007, |
| 84.11189570885388 |
| ], |
| "eval_ref_len": 8052, |
| "eval_runtime": 106.8688, |
| "eval_samples_per_second": 4.922, |
| "eval_score": 51.729891771805434, |
| "eval_steps_per_second": 0.309, |
| "eval_sys_len": 5260, |
| "eval_totals": [ |
| 5260, |
| 4734, |
| 4208, |
| 3682 |
| ], |
| "step": 8481 |
| }, |
| { |
| "epoch": 33.07, |
| "learning_rate": 6.770428015564204e-06, |
| "loss": 0.0714, |
| "step": 8500 |
| }, |
| { |
| "epoch": 34.0, |
| "eval_bp": 0.5823068423133116, |
| "eval_counts": [ |
| 4881, |
| 4226, |
| 3674, |
| 3130 |
| ], |
| "eval_loss": 0.041656494140625, |
| "eval_precisions": [ |
| 93.398392652124, |
| 89.91489361702128, |
| 88.02108289410637, |
| 85.80043859649123 |
| ], |
| "eval_ref_len": 8052, |
| "eval_runtime": 107.0835, |
| "eval_samples_per_second": 4.912, |
| "eval_score": 51.96533200156475, |
| "eval_steps_per_second": 0.308, |
| "eval_sys_len": 5226, |
| "eval_totals": [ |
| 5226, |
| 4700, |
| 4174, |
| 3648 |
| ], |
| "step": 8738 |
| }, |
| { |
| "epoch": 35.0, |
| "eval_bp": 0.5862514549555176, |
| "eval_counts": [ |
| 4902, |
| 4242, |
| 3685, |
| 3133 |
| ], |
| "eval_loss": 0.042633056640625, |
| "eval_precisions": [ |
| 93.38921699371309, |
| 89.81579504552191, |
| 87.80081010245414, |
| 85.34459275401798 |
| ], |
| "eval_ref_len": 8052, |
| "eval_runtime": 106.8404, |
| "eval_samples_per_second": 4.923, |
| "eval_score": 52.19933016750815, |
| "eval_steps_per_second": 0.309, |
| "eval_sys_len": 5249, |
| "eval_totals": [ |
| 5249, |
| 4723, |
| 4197, |
| 3671 |
| ], |
| "step": 8995 |
| }, |
| { |
| "epoch": 35.02, |
| "learning_rate": 5.992217898832685e-06, |
| "loss": 0.0697, |
| "step": 9000 |
| }, |
| { |
| "epoch": 36.0, |
| "eval_bp": 0.5807611221368078, |
| "eval_counts": [ |
| 4907, |
| 4257, |
| 3699, |
| 3149 |
| ], |
| "eval_loss": 0.04095458984375, |
| "eval_precisions": [ |
| 94.05788767490895, |
| 90.74824131315285, |
| 88.81152460984394, |
| 86.53476229733444 |
| ], |
| "eval_ref_len": 8052, |
| "eval_runtime": 106.3976, |
| "eval_samples_per_second": 4.944, |
| "eval_score": 52.266194224133834, |
| "eval_steps_per_second": 0.31, |
| "eval_sys_len": 5217, |
| "eval_totals": [ |
| 5217, |
| 4691, |
| 4165, |
| 3639 |
| ], |
| "step": 9252 |
| }, |
| { |
| "epoch": 36.96, |
| "learning_rate": 5.214007782101168e-06, |
| "loss": 0.0686, |
| "step": 9500 |
| }, |
| { |
| "epoch": 37.0, |
| "eval_bp": 0.5983772718445015, |
| "eval_counts": [ |
| 4899, |
| 4227, |
| 3672, |
| 3123 |
| ], |
| "eval_loss": 0.042388916015625, |
| "eval_precisions": [ |
| 92.08646616541354, |
| 88.17271589486859, |
| 86.03561387066541, |
| 83.45804382683058 |
| ], |
| "eval_ref_len": 8052, |
| "eval_runtime": 106.906, |
| "eval_samples_per_second": 4.92, |
| "eval_score": 52.28705860616529, |
| "eval_steps_per_second": 0.309, |
| "eval_sys_len": 5320, |
| "eval_totals": [ |
| 5320, |
| 4794, |
| 4268, |
| 3742 |
| ], |
| "step": 9509 |
| }, |
| { |
| "epoch": 38.0, |
| "eval_bp": 0.5780101703802235, |
| "eval_counts": [ |
| 4913, |
| 4273, |
| 3718, |
| 3172 |
| ], |
| "eval_loss": 0.0394287109375, |
| "eval_precisions": [ |
| 94.46260334551047, |
| 91.40106951871658, |
| 89.61195468787659, |
| 87.55175269113994 |
| ], |
| "eval_ref_len": 8052, |
| "eval_runtime": 106.7621, |
| "eval_samples_per_second": 4.927, |
| "eval_score": 52.43798269679689, |
| "eval_steps_per_second": 0.309, |
| "eval_sys_len": 5201, |
| "eval_totals": [ |
| 5201, |
| 4675, |
| 4149, |
| 3623 |
| ], |
| "step": 9766 |
| }, |
| { |
| "epoch": 38.91, |
| "learning_rate": 4.43579766536965e-06, |
| "loss": 0.0664, |
| "step": 10000 |
| }, |
| { |
| "epoch": 39.0, |
| "eval_bp": 0.5975258891581067, |
| "eval_counts": [ |
| 4912, |
| 4243, |
| 3689, |
| 3141 |
| ], |
| "eval_loss": 0.040374755859375, |
| "eval_precisions": [ |
| 92.41768579492003, |
| 88.59887241595322, |
| 86.5353037766831, |
| 84.05137811078406 |
| ], |
| "eval_ref_len": 8052, |
| "eval_runtime": 106.9692, |
| "eval_samples_per_second": 4.917, |
| "eval_score": 52.491270926490635, |
| "eval_steps_per_second": 0.309, |
| "eval_sys_len": 5315, |
| "eval_totals": [ |
| 5315, |
| 4789, |
| 4263, |
| 3737 |
| ], |
| "step": 10023 |
| }, |
| { |
| "epoch": 40.0, |
| "eval_bp": 0.587279163676868, |
| "eval_counts": [ |
| 4913, |
| 4259, |
| 3711, |
| 3170 |
| ], |
| "eval_loss": 0.0382080078125, |
| "eval_precisions": [ |
| 93.4919124643197, |
| 90.0613237470924, |
| 88.29407566024268, |
| 86.21158553168344 |
| ], |
| "eval_ref_len": 8052, |
| "eval_runtime": 107.0316, |
| "eval_samples_per_second": 4.914, |
| "eval_score": 52.5468859983503, |
| "eval_steps_per_second": 0.308, |
| "eval_sys_len": 5255, |
| "eval_totals": [ |
| 5255, |
| 4729, |
| 4203, |
| 3677 |
| ], |
| "step": 10280 |
| }, |
| { |
| "epoch": 40.86, |
| "learning_rate": 3.6575875486381323e-06, |
| "loss": 0.0658, |
| "step": 10500 |
| }, |
| { |
| "epoch": 41.0, |
| "eval_bp": 0.5811047209098391, |
| "eval_counts": [ |
| 4921, |
| 4278, |
| 3725, |
| 3179 |
| ], |
| "eval_loss": 0.0377197265625, |
| "eval_precisions": [ |
| 94.29009388771796, |
| 91.1570424035798, |
| 89.39284857211423, |
| 87.31117824773413 |
| ], |
| "eval_ref_len": 8052, |
| "eval_runtime": 106.7065, |
| "eval_samples_per_second": 4.929, |
| "eval_score": 52.59102479681527, |
| "eval_steps_per_second": 0.309, |
| "eval_sys_len": 5219, |
| "eval_totals": [ |
| 5219, |
| 4693, |
| 4167, |
| 3641 |
| ], |
| "step": 10537 |
| }, |
| { |
| "epoch": 42.0, |
| "eval_bp": 0.5817917378355022, |
| "eval_counts": [ |
| 4908, |
| 4261, |
| 3712, |
| 3169 |
| ], |
| "eval_loss": 0.037109375, |
| "eval_precisions": [ |
| 93.96898334290637, |
| 90.7174792420694, |
| 88.99544473747302, |
| 86.94101508916324 |
| ], |
| "eval_ref_len": 8052, |
| "eval_runtime": 107.4197, |
| "eval_samples_per_second": 4.897, |
| "eval_score": 52.43056600057888, |
| "eval_steps_per_second": 0.307, |
| "eval_sys_len": 5223, |
| "eval_totals": [ |
| 5223, |
| 4697, |
| 4171, |
| 3645 |
| ], |
| "step": 10794 |
| }, |
| { |
| "epoch": 42.8, |
| "learning_rate": 2.879377431906615e-06, |
| "loss": 0.0643, |
| "step": 11000 |
| }, |
| { |
| "epoch": 43.0, |
| "eval_bp": 0.5804174632159932, |
| "eval_counts": [ |
| 4905, |
| 4264, |
| 3714, |
| 3172 |
| ], |
| "eval_loss": 0.037017822265625, |
| "eval_precisions": [ |
| 94.0556088207095, |
| 90.936233738537, |
| 89.21450876771559, |
| 87.21473742095134 |
| ], |
| "eval_ref_len": 8052, |
| "eval_runtime": 106.7676, |
| "eval_samples_per_second": 4.927, |
| "eval_score": 52.42364666449266, |
| "eval_steps_per_second": 0.309, |
| "eval_sys_len": 5215, |
| "eval_totals": [ |
| 5215, |
| 4689, |
| 4163, |
| 3637 |
| ], |
| "step": 11051 |
| }, |
| { |
| "epoch": 44.0, |
| "eval_bp": 0.5961628688829712, |
| "eval_counts": [ |
| 4930, |
| 4270, |
| 3718, |
| 3173 |
| ], |
| "eval_loss": 0.0380859375, |
| "eval_precisions": [ |
| 92.89617486338798, |
| 89.31185944363104, |
| 87.37955346650999, |
| 85.08983641727005 |
| ], |
| "eval_ref_len": 8052, |
| "eval_runtime": 107.4748, |
| "eval_samples_per_second": 4.894, |
| "eval_score": 52.834006019511406, |
| "eval_steps_per_second": 0.307, |
| "eval_sys_len": 5307, |
| "eval_totals": [ |
| 5307, |
| 4781, |
| 4255, |
| 3729 |
| ], |
| "step": 11308 |
| }, |
| { |
| "epoch": 44.75, |
| "learning_rate": 2.1011673151750974e-06, |
| "loss": 0.0608, |
| "step": 11500 |
| }, |
| { |
| "epoch": 45.0, |
| "eval_bp": 0.5757722034899391, |
| "eval_counts": [ |
| 4915, |
| 4280, |
| 3729, |
| 3186 |
| ], |
| "eval_loss": 0.036224365234375, |
| "eval_precisions": [ |
| 94.7378565921357, |
| 91.8060918060918, |
| 90.15957446808511, |
| 88.25484764542936 |
| ], |
| "eval_ref_len": 8052, |
| "eval_runtime": 107.743, |
| "eval_samples_per_second": 4.882, |
| "eval_score": 52.515446703245765, |
| "eval_steps_per_second": 0.306, |
| "eval_sys_len": 5188, |
| "eval_totals": [ |
| 5188, |
| 4662, |
| 4136, |
| 3610 |
| ], |
| "step": 11565 |
| }, |
| { |
| "epoch": 46.0, |
| "eval_bp": 0.5843659009664612, |
| "eval_counts": [ |
| 4924, |
| 4278, |
| 3730, |
| 3188 |
| ], |
| "eval_loss": 0.036651611328125, |
| "eval_precisions": [ |
| 94.0053455517373, |
| 90.78947368421052, |
| 89.10654562828476, |
| 87.10382513661202 |
| ], |
| "eval_ref_len": 8052, |
| "eval_runtime": 107.889, |
| "eval_samples_per_second": 4.875, |
| "eval_score": 52.71917275684773, |
| "eval_steps_per_second": 0.306, |
| "eval_sys_len": 5238, |
| "eval_totals": [ |
| 5238, |
| 4712, |
| 4186, |
| 3660 |
| ], |
| "step": 11822 |
| }, |
| { |
| "epoch": 46.69, |
| "learning_rate": 1.32295719844358e-06, |
| "loss": 0.0622, |
| "step": 12000 |
| }, |
| { |
| "epoch": 47.0, |
| "eval_bp": 0.586080116901772, |
| "eval_counts": [ |
| 4938, |
| 4295, |
| 3745, |
| 3201 |
| ], |
| "eval_loss": 0.036529541015625, |
| "eval_precisions": [ |
| 94.09298780487805, |
| 90.95722151630665, |
| 89.2516682554814, |
| 87.22070844686648 |
| ], |
| "eval_ref_len": 8052, |
| "eval_runtime": 109.2986, |
| "eval_samples_per_second": 4.813, |
| "eval_score": 52.949832085516945, |
| "eval_steps_per_second": 0.302, |
| "eval_sys_len": 5248, |
| "eval_totals": [ |
| 5248, |
| 4722, |
| 4196, |
| 3670 |
| ], |
| "step": 12079 |
| }, |
| { |
| "epoch": 48.0, |
| "eval_bp": 0.5817917378355022, |
| "eval_counts": [ |
| 4925, |
| 4285, |
| 3733, |
| 3189 |
| ], |
| "eval_loss": 0.036285400390625, |
| "eval_precisions": [ |
| 94.29446678154318, |
| 91.22844368746009, |
| 89.49892112203308, |
| 87.48971193415638 |
| ], |
| "eval_ref_len": 8052, |
| "eval_runtime": 108.6659, |
| "eval_samples_per_second": 4.841, |
| "eval_score": 52.70664408353883, |
| "eval_steps_per_second": 0.304, |
| "eval_sys_len": 5223, |
| "eval_totals": [ |
| 5223, |
| 4697, |
| 4171, |
| 3645 |
| ], |
| "step": 12336 |
| }, |
| { |
| "epoch": 48.64, |
| "learning_rate": 5.447470817120623e-07, |
| "loss": 0.0625, |
| "step": 12500 |
| } |
| ], |
| "logging_steps": 500, |
| "max_steps": 12850, |
| "num_input_tokens_seen": 0, |
| "num_train_epochs": 50, |
| "save_steps": 500, |
| "total_flos": 8.680648839008256e+16, |
| "train_batch_size": 16, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|