| { | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 16.99889502762431, | |
| "global_step": 5763, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.38, | |
| "gpu_memory": 2987030016, | |
| "learning_rate": 8.32e-06, | |
| "loss": 4.0407, | |
| "step": 128 | |
| }, | |
| { | |
| "epoch": 0.75, | |
| "gpu_memory": 3076460544, | |
| "learning_rate": 1.664e-05, | |
| "loss": 2.405, | |
| "step": 256 | |
| }, | |
| { | |
| "epoch": 1.0, | |
| "eval_bp": 0.035349686560536234, | |
| "eval_counts": [ | |
| 505, | |
| 125, | |
| 50, | |
| 11 | |
| ], | |
| "eval_loss": 1.9292821884155273, | |
| "eval_precisions": [ | |
| 46.118721461187214, | |
| 15.723270440251572, | |
| 9.861932938856016, | |
| 4.471544715447155 | |
| ], | |
| "eval_ref_len": 4755, | |
| "eval_runtime": 44.1807, | |
| "eval_samples_per_second": 6.79, | |
| "eval_score": 0.47271078280719403, | |
| "eval_steps_per_second": 6.79, | |
| "eval_sys_len": 1095, | |
| "eval_totals": [ | |
| 1095, | |
| 795, | |
| 507, | |
| 246 | |
| ], | |
| "gpu_memory": 3076460544, | |
| "step": 339 | |
| }, | |
| { | |
| "epoch": 1.13, | |
| "gpu_memory": 3076460544, | |
| "learning_rate": 2.4959999999999998e-05, | |
| "loss": 2.0089, | |
| "step": 384 | |
| }, | |
| { | |
| "epoch": 1.51, | |
| "gpu_memory": 3076460544, | |
| "learning_rate": 3.2437898089171974e-05, | |
| "loss": 1.8155, | |
| "step": 512 | |
| }, | |
| { | |
| "epoch": 1.89, | |
| "gpu_memory": 3076460544, | |
| "learning_rate": 3.1775477707006364e-05, | |
| "loss": 1.7234, | |
| "step": 640 | |
| }, | |
| { | |
| "epoch": 2.0, | |
| "eval_bp": 0.0840891954437523, | |
| "eval_counts": [ | |
| 492, | |
| 189, | |
| 85, | |
| 29 | |
| ], | |
| "eval_loss": 1.6681220531463623, | |
| "eval_precisions": [ | |
| 35.96491228070175, | |
| 17.696629213483146, | |
| 10.303030303030303, | |
| 4.833333333333333 | |
| ], | |
| "eval_ref_len": 4755, | |
| "eval_runtime": 53.1682, | |
| "eval_samples_per_second": 5.642, | |
| "eval_score": 1.1219810390322362, | |
| "eval_steps_per_second": 5.642, | |
| "eval_sys_len": 1368, | |
| "eval_totals": [ | |
| 1368, | |
| 1068, | |
| 825, | |
| 600 | |
| ], | |
| "gpu_memory": 3076460544, | |
| "step": 678 | |
| }, | |
| { | |
| "epoch": 2.27, | |
| "gpu_memory": 3076460544, | |
| "learning_rate": 3.111305732484076e-05, | |
| "loss": 1.6058, | |
| "step": 768 | |
| }, | |
| { | |
| "epoch": 2.64, | |
| "gpu_memory": 3076460544, | |
| "learning_rate": 3.0450636942675155e-05, | |
| "loss": 1.5189, | |
| "step": 896 | |
| }, | |
| { | |
| "epoch": 3.0, | |
| "eval_bp": 0.09192776836698148, | |
| "eval_counts": [ | |
| 571, | |
| 192, | |
| 93, | |
| 40 | |
| ], | |
| "eval_loss": 1.5985139608383179, | |
| "eval_precisions": [ | |
| 40.66951566951567, | |
| 17.391304347826086, | |
| 11.03202846975089, | |
| 6.734006734006734 | |
| ], | |
| "eval_ref_len": 4755, | |
| "eval_runtime": 44.5545, | |
| "eval_samples_per_second": 6.733, | |
| "eval_score": 1.391807704814939, | |
| "eval_steps_per_second": 6.733, | |
| "eval_sys_len": 1404, | |
| "eval_totals": [ | |
| 1404, | |
| 1104, | |
| 843, | |
| 594 | |
| ], | |
| "gpu_memory": 3076460544, | |
| "step": 1017 | |
| }, | |
| { | |
| "epoch": 3.02, | |
| "gpu_memory": 3076460544, | |
| "learning_rate": 2.9788216560509553e-05, | |
| "loss": 1.4885, | |
| "step": 1024 | |
| }, | |
| { | |
| "epoch": 3.4, | |
| "gpu_memory": 3076460544, | |
| "learning_rate": 2.9125796178343946e-05, | |
| "loss": 1.334, | |
| "step": 1152 | |
| }, | |
| { | |
| "epoch": 3.77, | |
| "gpu_memory": 3076460544, | |
| "learning_rate": 2.8463375796178344e-05, | |
| "loss": 1.3861, | |
| "step": 1280 | |
| }, | |
| { | |
| "epoch": 4.0, | |
| "eval_bp": 0.034513967404432855, | |
| "eval_counts": [ | |
| 432, | |
| 173, | |
| 84, | |
| 35 | |
| ], | |
| "eval_loss": 1.6043497323989868, | |
| "eval_precisions": [ | |
| 39.66942148760331, | |
| 21.926489226869457, | |
| 16.184971098265898, | |
| 9.48509485094851 | |
| ], | |
| "eval_ref_len": 4755, | |
| "eval_runtime": 42.8527, | |
| "eval_samples_per_second": 7.001, | |
| "eval_score": 0.6597653875525311, | |
| "eval_steps_per_second": 7.001, | |
| "eval_sys_len": 1089, | |
| "eval_totals": [ | |
| 1089, | |
| 789, | |
| 519, | |
| 369 | |
| ], | |
| "gpu_memory": 3076460544, | |
| "step": 1356 | |
| }, | |
| { | |
| "epoch": 4.15, | |
| "gpu_memory": 3076460544, | |
| "learning_rate": 2.7800955414012737e-05, | |
| "loss": 1.3367, | |
| "step": 1408 | |
| }, | |
| { | |
| "epoch": 4.53, | |
| "gpu_memory": 3076460544, | |
| "learning_rate": 2.713853503184713e-05, | |
| "loss": 1.2828, | |
| "step": 1536 | |
| }, | |
| { | |
| "epoch": 4.91, | |
| "gpu_memory": 3076460544, | |
| "learning_rate": 2.647611464968153e-05, | |
| "loss": 1.2571, | |
| "step": 1664 | |
| }, | |
| { | |
| "epoch": 5.0, | |
| "eval_bp": 0.17929973112718744, | |
| "eval_counts": [ | |
| 671, | |
| 230, | |
| 102, | |
| 43 | |
| ], | |
| "eval_loss": 1.5908681154251099, | |
| "eval_precisions": [ | |
| 38.36477987421384, | |
| 15.873015873015873, | |
| 8.695652173913043, | |
| 4.699453551912568 | |
| ], | |
| "eval_ref_len": 4755, | |
| "eval_runtime": 55.6404, | |
| "eval_samples_per_second": 5.392, | |
| "eval_score": 2.2519827467510987, | |
| "eval_steps_per_second": 5.392, | |
| "eval_sys_len": 1749, | |
| "eval_totals": [ | |
| 1749, | |
| 1449, | |
| 1173, | |
| 915 | |
| ], | |
| "gpu_memory": 3076460544, | |
| "step": 1695 | |
| }, | |
| { | |
| "epoch": 5.29, | |
| "gpu_memory": 3076460544, | |
| "learning_rate": 2.5813694267515922e-05, | |
| "loss": 1.2035, | |
| "step": 1792 | |
| }, | |
| { | |
| "epoch": 5.66, | |
| "gpu_memory": 3076460544, | |
| "learning_rate": 2.515127388535032e-05, | |
| "loss": 1.183, | |
| "step": 1920 | |
| }, | |
| { | |
| "epoch": 6.0, | |
| "eval_bp": 0.07050485313640832, | |
| "eval_counts": [ | |
| 615, | |
| 257, | |
| 141, | |
| 80 | |
| ], | |
| "eval_loss": 1.5943706035614014, | |
| "eval_precisions": [ | |
| 47.235023041474655, | |
| 25.64870259481038, | |
| 19.502074688796682, | |
| 14.109347442680775 | |
| ], | |
| "eval_ref_len": 4755, | |
| "eval_runtime": 47.1825, | |
| "eval_samples_per_second": 6.358, | |
| "eval_score": 1.6941362350992444, | |
| "eval_steps_per_second": 6.358, | |
| "eval_sys_len": 1302, | |
| "eval_totals": [ | |
| 1302, | |
| 1002, | |
| 723, | |
| 567 | |
| ], | |
| "gpu_memory": 3076460544, | |
| "step": 2034 | |
| }, | |
| { | |
| "epoch": 6.04, | |
| "gpu_memory": 3076460544, | |
| "learning_rate": 2.4488853503184713e-05, | |
| "loss": 1.1964, | |
| "step": 2048 | |
| }, | |
| { | |
| "epoch": 6.42, | |
| "gpu_memory": 3076460544, | |
| "learning_rate": 2.3826433121019104e-05, | |
| "loss": 1.1073, | |
| "step": 2176 | |
| }, | |
| { | |
| "epoch": 6.8, | |
| "gpu_memory": 3076460544, | |
| "learning_rate": 2.31640127388535e-05, | |
| "loss": 1.1316, | |
| "step": 2304 | |
| }, | |
| { | |
| "epoch": 7.0, | |
| "eval_bp": 0.10421315891869368, | |
| "eval_counts": [ | |
| 649, | |
| 197, | |
| 79, | |
| 22 | |
| ], | |
| "eval_loss": 1.6070951223373413, | |
| "eval_precisions": [ | |
| 44.51303155006859, | |
| 17.012089810017272, | |
| 9.111880046136102, | |
| 3.559870550161812 | |
| ], | |
| "eval_ref_len": 4755, | |
| "eval_runtime": 47.6479, | |
| "eval_samples_per_second": 6.296, | |
| "eval_score": 1.3046509061748794, | |
| "eval_steps_per_second": 6.296, | |
| "eval_sys_len": 1458, | |
| "eval_totals": [ | |
| 1458, | |
| 1158, | |
| 867, | |
| 618 | |
| ], | |
| "gpu_memory": 3076460544, | |
| "step": 2373 | |
| }, | |
| { | |
| "epoch": 7.17, | |
| "gpu_memory": 3076460544, | |
| "learning_rate": 2.2501592356687895e-05, | |
| "loss": 1.0398, | |
| "step": 2432 | |
| }, | |
| { | |
| "epoch": 7.55, | |
| "gpu_memory": 3076460544, | |
| "learning_rate": 2.183917197452229e-05, | |
| "loss": 1.0349, | |
| "step": 2560 | |
| }, | |
| { | |
| "epoch": 7.93, | |
| "gpu_memory": 3076460544, | |
| "learning_rate": 2.1176751592356686e-05, | |
| "loss": 1.0816, | |
| "step": 2688 | |
| }, | |
| { | |
| "epoch": 8.0, | |
| "eval_bp": 0.21001389512353258, | |
| "eval_counts": [ | |
| 846, | |
| 344, | |
| 187, | |
| 105 | |
| ], | |
| "eval_loss": 1.6298103332519531, | |
| "eval_precisions": [ | |
| 45.55735056542811, | |
| 22.093770070648684, | |
| 14.597970335675253, | |
| 10.294117647058824 | |
| ], | |
| "eval_ref_len": 4755, | |
| "eval_runtime": 54.6716, | |
| "eval_samples_per_second": 5.487, | |
| "eval_score": 4.141670104799348, | |
| "eval_steps_per_second": 5.487, | |
| "eval_sys_len": 1857, | |
| "eval_totals": [ | |
| 1857, | |
| 1557, | |
| 1281, | |
| 1020 | |
| ], | |
| "gpu_memory": 3076460544, | |
| "step": 2712 | |
| }, | |
| { | |
| "epoch": 8.31, | |
| "gpu_memory": 3076460544, | |
| "learning_rate": 2.051433121019108e-05, | |
| "loss": 0.987, | |
| "step": 2816 | |
| }, | |
| { | |
| "epoch": 8.68, | |
| "gpu_memory": 3076460544, | |
| "learning_rate": 1.9851910828025477e-05, | |
| "loss": 0.9829, | |
| "step": 2944 | |
| }, | |
| { | |
| "epoch": 9.0, | |
| "eval_bp": 0.06525766524199453, | |
| "eval_counts": [ | |
| 577, | |
| 216, | |
| 100, | |
| 37 | |
| ], | |
| "eval_loss": 1.6366333961486816, | |
| "eval_precisions": [ | |
| 45.254901960784316, | |
| 22.153846153846153, | |
| 14.367816091954023, | |
| 7.297830374753452 | |
| ], | |
| "eval_ref_len": 4755, | |
| "eval_runtime": 49.3567, | |
| "eval_samples_per_second": 6.078, | |
| "eval_score": 1.1750500193614282, | |
| "eval_steps_per_second": 6.078, | |
| "eval_sys_len": 1275, | |
| "eval_totals": [ | |
| 1275, | |
| 975, | |
| 696, | |
| 507 | |
| ], | |
| "gpu_memory": 3076460544, | |
| "step": 3051 | |
| }, | |
| { | |
| "epoch": 9.06, | |
| "gpu_memory": 3076460544, | |
| "learning_rate": 1.918949044585987e-05, | |
| "loss": 1.003, | |
| "step": 3072 | |
| }, | |
| { | |
| "epoch": 9.44, | |
| "gpu_memory": 3076460544, | |
| "learning_rate": 1.8527070063694264e-05, | |
| "loss": 0.9337, | |
| "step": 3200 | |
| }, | |
| { | |
| "epoch": 9.82, | |
| "gpu_memory": 3076460544, | |
| "learning_rate": 1.786464968152866e-05, | |
| "loss": 0.9325, | |
| "step": 3328 | |
| }, | |
| { | |
| "epoch": 10.0, | |
| "eval_bp": 0.16851984622310243, | |
| "eval_counts": [ | |
| 667, | |
| 248, | |
| 121, | |
| 62 | |
| ], | |
| "eval_loss": 1.67235267162323, | |
| "eval_precisions": [ | |
| 39.005847953216374, | |
| 17.588652482269502, | |
| 10.503472222222221, | |
| 6.68824163969795 | |
| ], | |
| "eval_ref_len": 4755, | |
| "eval_runtime": 50.0923, | |
| "eval_samples_per_second": 5.989, | |
| "eval_score": 2.4969097127652855, | |
| "eval_steps_per_second": 5.989, | |
| "eval_sys_len": 1710, | |
| "eval_totals": [ | |
| 1710, | |
| 1410, | |
| 1152, | |
| 927 | |
| ], | |
| "gpu_memory": 3076460544, | |
| "step": 3390 | |
| }, | |
| { | |
| "epoch": 10.19, | |
| "gpu_memory": 3076460544, | |
| "learning_rate": 1.7202229299363055e-05, | |
| "loss": 0.9075, | |
| "step": 3456 | |
| }, | |
| { | |
| "epoch": 10.57, | |
| "gpu_memory": 3076460544, | |
| "learning_rate": 1.6539808917197452e-05, | |
| "loss": 0.8753, | |
| "step": 3584 | |
| }, | |
| { | |
| "epoch": 10.95, | |
| "gpu_memory": 3076460544, | |
| "learning_rate": 1.5877388535031846e-05, | |
| "loss": 0.9098, | |
| "step": 3712 | |
| }, | |
| { | |
| "epoch": 11.0, | |
| "eval_bp": 0.1483387334695538, | |
| "eval_counts": [ | |
| 735, | |
| 268, | |
| 134, | |
| 67 | |
| ], | |
| "eval_loss": 1.6972090005874634, | |
| "eval_precisions": [ | |
| 44.95412844036697, | |
| 20.074906367041198, | |
| 12.725546058879392, | |
| 8.18070818070818 | |
| ], | |
| "eval_ref_len": 4755, | |
| "eval_runtime": 46.9235, | |
| "eval_samples_per_second": 6.393, | |
| "eval_score": 2.5970312545681904, | |
| "eval_steps_per_second": 6.393, | |
| "eval_sys_len": 1635, | |
| "eval_totals": [ | |
| 1635, | |
| 1335, | |
| 1053, | |
| 819 | |
| ], | |
| "gpu_memory": 3076460544, | |
| "step": 3729 | |
| }, | |
| { | |
| "epoch": 11.33, | |
| "gpu_memory": 3076460544, | |
| "learning_rate": 1.5214968152866242e-05, | |
| "loss": 0.839, | |
| "step": 3840 | |
| }, | |
| { | |
| "epoch": 11.7, | |
| "gpu_memory": 3076460544, | |
| "learning_rate": 1.4552547770700635e-05, | |
| "loss": 0.8643, | |
| "step": 3968 | |
| }, | |
| { | |
| "epoch": 12.0, | |
| "eval_bp": 0.1320190352563076, | |
| "eval_counts": [ | |
| 715, | |
| 285, | |
| 143, | |
| 70 | |
| ], | |
| "eval_loss": 1.713928461074829, | |
| "eval_precisions": [ | |
| 45.48346055979644, | |
| 22.40566037735849, | |
| 14.357429718875501, | |
| 9.25925925925926 | |
| ], | |
| "eval_ref_len": 4755, | |
| "eval_runtime": 46.2792, | |
| "eval_samples_per_second": 6.482, | |
| "eval_score": 2.532809945547002, | |
| "eval_steps_per_second": 6.482, | |
| "eval_sys_len": 1572, | |
| "eval_totals": [ | |
| 1572, | |
| 1272, | |
| 996, | |
| 756 | |
| ], | |
| "gpu_memory": 3076460544, | |
| "step": 4068 | |
| }, | |
| { | |
| "epoch": 12.08, | |
| "gpu_memory": 3076460544, | |
| "learning_rate": 1.3890127388535031e-05, | |
| "loss": 0.8264, | |
| "step": 4096 | |
| }, | |
| { | |
| "epoch": 12.46, | |
| "gpu_memory": 3076460544, | |
| "learning_rate": 1.3227707006369426e-05, | |
| "loss": 0.8008, | |
| "step": 4224 | |
| }, | |
| { | |
| "epoch": 12.84, | |
| "gpu_memory": 3076460544, | |
| "learning_rate": 1.2565286624203822e-05, | |
| "loss": 0.7963, | |
| "step": 4352 | |
| }, | |
| { | |
| "epoch": 13.0, | |
| "eval_bp": 0.18517745860640325, | |
| "eval_counts": [ | |
| 782, | |
| 310, | |
| 160, | |
| 79 | |
| ], | |
| "eval_loss": 1.7276182174682617, | |
| "eval_precisions": [ | |
| 44.18079096045198, | |
| 21.08843537414966, | |
| 13.43408900083963, | |
| 8.44017094017094 | |
| ], | |
| "eval_ref_len": 4755, | |
| "eval_runtime": 49.531, | |
| "eval_samples_per_second": 6.057, | |
| "eval_score": 3.3384697611529055, | |
| "eval_steps_per_second": 6.057, | |
| "eval_sys_len": 1770, | |
| "eval_totals": [ | |
| 1770, | |
| 1470, | |
| 1191, | |
| 936 | |
| ], | |
| "gpu_memory": 3076460544, | |
| "step": 4407 | |
| }, | |
| { | |
| "epoch": 13.22, | |
| "gpu_memory": 3076460544, | |
| "learning_rate": 1.1902866242038214e-05, | |
| "loss": 0.791, | |
| "step": 4480 | |
| }, | |
| { | |
| "epoch": 13.59, | |
| "gpu_memory": 3076460544, | |
| "learning_rate": 1.124044585987261e-05, | |
| "loss": 0.7591, | |
| "step": 4608 | |
| }, | |
| { | |
| "epoch": 13.97, | |
| "gpu_memory": 3076460544, | |
| "learning_rate": 1.0578025477707005e-05, | |
| "loss": 0.7651, | |
| "step": 4736 | |
| }, | |
| { | |
| "epoch": 14.0, | |
| "eval_bp": 0.17762954994257873, | |
| "eval_counts": [ | |
| 784, | |
| 310, | |
| 160, | |
| 81 | |
| ], | |
| "eval_loss": 1.788110375404358, | |
| "eval_precisions": [ | |
| 44.97991967871486, | |
| 21.48302148302148, | |
| 13.605442176870747, | |
| 8.653846153846153 | |
| ], | |
| "eval_ref_len": 4755, | |
| "eval_runtime": 48.2995, | |
| "eval_samples_per_second": 6.211, | |
| "eval_score": 3.262302153360586, | |
| "eval_steps_per_second": 6.211, | |
| "eval_sys_len": 1743, | |
| "eval_totals": [ | |
| 1743, | |
| 1443, | |
| 1176, | |
| 936 | |
| ], | |
| "gpu_memory": 3076460544, | |
| "step": 4746 | |
| }, | |
| { | |
| "epoch": 14.35, | |
| "gpu_memory": 3076460544, | |
| "learning_rate": 9.9156050955414e-06, | |
| "loss": 0.7389, | |
| "step": 4864 | |
| }, | |
| { | |
| "epoch": 14.72, | |
| "gpu_memory": 3076460544, | |
| "learning_rate": 9.253184713375794e-06, | |
| "loss": 0.7292, | |
| "step": 4992 | |
| }, | |
| { | |
| "epoch": 15.0, | |
| "eval_bp": 0.19451009506119815, | |
| "eval_counts": [ | |
| 756, | |
| 286, | |
| 139, | |
| 66 | |
| ], | |
| "eval_loss": 1.8334678411483765, | |
| "eval_precisions": [ | |
| 41.930116472545755, | |
| 19.028609447771125, | |
| 11.356209150326798, | |
| 6.790123456790123 | |
| ], | |
| "eval_ref_len": 4755, | |
| "eval_runtime": 50.9389, | |
| "eval_samples_per_second": 5.889, | |
| "eval_score": 3.063396343878355, | |
| "eval_steps_per_second": 5.889, | |
| "eval_sys_len": 1803, | |
| "eval_totals": [ | |
| 1803, | |
| 1503, | |
| 1224, | |
| 972 | |
| ], | |
| "gpu_memory": 3076460544, | |
| "step": 5085 | |
| }, | |
| { | |
| "epoch": 15.1, | |
| "gpu_memory": 3076460544, | |
| "learning_rate": 8.59076433121019e-06, | |
| "loss": 0.7051, | |
| "step": 5120 | |
| }, | |
| { | |
| "epoch": 15.48, | |
| "gpu_memory": 3076460544, | |
| "learning_rate": 7.928343949044585e-06, | |
| "loss": 0.6872, | |
| "step": 5248 | |
| }, | |
| { | |
| "epoch": 15.86, | |
| "gpu_memory": 3076460544, | |
| "learning_rate": 7.265923566878981e-06, | |
| "loss": 0.6935, | |
| "step": 5376 | |
| }, | |
| { | |
| "epoch": 16.0, | |
| "eval_bp": 0.2204937574447589, | |
| "eval_counts": [ | |
| 792, | |
| 311, | |
| 160, | |
| 80 | |
| ], | |
| "eval_loss": 1.8358988761901855, | |
| "eval_precisions": [ | |
| 41.83835182250396, | |
| 19.522912743251727, | |
| 12.121212121212121, | |
| 7.469654528478058 | |
| ], | |
| "eval_ref_len": 4755, | |
| "eval_runtime": 51.7295, | |
| "eval_samples_per_second": 5.799, | |
| "eval_score": 3.6361160482722528, | |
| "eval_steps_per_second": 5.799, | |
| "eval_sys_len": 1893, | |
| "eval_totals": [ | |
| 1893, | |
| 1593, | |
| 1320, | |
| 1071 | |
| ], | |
| "gpu_memory": 3076460544, | |
| "step": 5424 | |
| }, | |
| { | |
| "epoch": 16.24, | |
| "gpu_memory": 3076460544, | |
| "learning_rate": 6.6035031847133755e-06, | |
| "loss": 0.6808, | |
| "step": 5504 | |
| }, | |
| { | |
| "epoch": 16.61, | |
| "gpu_memory": 3076460544, | |
| "learning_rate": 5.94108280254777e-06, | |
| "loss": 0.6649, | |
| "step": 5632 | |
| }, | |
| { | |
| "epoch": 16.99, | |
| "gpu_memory": 3076460544, | |
| "learning_rate": 5.278662420382165e-06, | |
| "loss": 0.6902, | |
| "step": 5760 | |
| }, | |
| { | |
| "epoch": 17.0, | |
| "eval_bp": 0.27059488659440983, | |
| "eval_counts": [ | |
| 875, | |
| 346, | |
| 196, | |
| 113 | |
| ], | |
| "eval_loss": 1.8474984169006348, | |
| "eval_precisions": [ | |
| 42.45511887433285, | |
| 19.64792731402612, | |
| 13.198653198653199, | |
| 9.254709254709255 | |
| ], | |
| "eval_ref_len": 4755, | |
| "eval_runtime": 53.1106, | |
| "eval_samples_per_second": 5.649, | |
| "eval_score": 4.834531406134382, | |
| "eval_steps_per_second": 5.649, | |
| "eval_sys_len": 2061, | |
| "eval_totals": [ | |
| 2061, | |
| 1761, | |
| 1485, | |
| 1221 | |
| ], | |
| "gpu_memory": 3076460544, | |
| "step": 5763 | |
| } | |
| ], | |
| "max_steps": 6780, | |
| "num_train_epochs": 20, | |
| "total_flos": 5005888091043840.0, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |