| { | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 16.99889502762431, | |
| "global_step": 5763, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.38, | |
| "gpu_memory": 2825061888, | |
| "learning_rate": 8.32e-06, | |
| "loss": 4.6062, | |
| "step": 128 | |
| }, | |
| { | |
| "epoch": 0.75, | |
| "gpu_memory": 2903643648, | |
| "learning_rate": 1.664e-05, | |
| "loss": 2.7746, | |
| "step": 256 | |
| }, | |
| { | |
| "epoch": 1.0, | |
| "eval_bp": 0.021341648192077716, | |
| "eval_counts": [ | |
| 342, | |
| 58, | |
| 18, | |
| 6 | |
| ], | |
| "eval_loss": 2.035790205001831, | |
| "eval_precisions": [ | |
| 34.862385321100916, | |
| 8.516886930983848, | |
| 4.651162790697675, | |
| 2.3529411764705883 | |
| ], | |
| "eval_ref_len": 4755, | |
| "eval_runtime": 35.7733, | |
| "eval_samples_per_second": 8.386, | |
| "eval_score": 0.16113155714674393, | |
| "eval_steps_per_second": 8.386, | |
| "eval_sys_len": 981, | |
| "eval_totals": [ | |
| 981, | |
| 681, | |
| 387, | |
| 255 | |
| ], | |
| "gpu_memory": 2903643648, | |
| "step": 339 | |
| }, | |
| { | |
| "epoch": 1.13, | |
| "gpu_memory": 2903643648, | |
| "learning_rate": 2.4959999999999998e-05, | |
| "loss": 2.2201, | |
| "step": 384 | |
| }, | |
| { | |
| "epoch": 1.51, | |
| "gpu_memory": 2903643648, | |
| "learning_rate": 3.2437898089171974e-05, | |
| "loss": 1.9599, | |
| "step": 512 | |
| }, | |
| { | |
| "epoch": 1.89, | |
| "gpu_memory": 2903643648, | |
| "learning_rate": 3.1775477707006364e-05, | |
| "loss": 1.8228, | |
| "step": 640 | |
| }, | |
| { | |
| "epoch": 2.0, | |
| "eval_bp": 0.1919535866757935, | |
| "eval_counts": [ | |
| 640, | |
| 199, | |
| 91, | |
| 36 | |
| ], | |
| "eval_loss": 1.740516185760498, | |
| "eval_precisions": [ | |
| 35.67447045707915, | |
| 13.319946452476573, | |
| 7.526881720430108, | |
| 3.896103896103896 | |
| ], | |
| "eval_ref_len": 4755, | |
| "eval_runtime": 51.5291, | |
| "eval_samples_per_second": 5.822, | |
| "eval_score": 2.0855597670386987, | |
| "eval_steps_per_second": 5.822, | |
| "eval_sys_len": 1794, | |
| "eval_totals": [ | |
| 1794, | |
| 1494, | |
| 1209, | |
| 924 | |
| ], | |
| "gpu_memory": 2903643648, | |
| "step": 678 | |
| }, | |
| { | |
| "epoch": 2.27, | |
| "gpu_memory": 2903643648, | |
| "learning_rate": 3.111305732484076e-05, | |
| "loss": 1.7275, | |
| "step": 768 | |
| }, | |
| { | |
| "epoch": 2.64, | |
| "gpu_memory": 2903643648, | |
| "learning_rate": 3.0450636942675155e-05, | |
| "loss": 1.614, | |
| "step": 896 | |
| }, | |
| { | |
| "epoch": 3.0, | |
| "eval_bp": 0.157930307305936, | |
| "eval_counts": [ | |
| 662, | |
| 239, | |
| 127, | |
| 66 | |
| ], | |
| "eval_loss": 1.6653738021850586, | |
| "eval_precisions": [ | |
| 39.61699581089168, | |
| 17.432530999270604, | |
| 11.598173515981735, | |
| 8.02919708029197 | |
| ], | |
| "eval_ref_len": 4755, | |
| "eval_runtime": 46.1755, | |
| "eval_samples_per_second": 6.497, | |
| "eval_score": 2.515019790343611, | |
| "eval_steps_per_second": 6.497, | |
| "eval_sys_len": 1671, | |
| "eval_totals": [ | |
| 1671, | |
| 1371, | |
| 1095, | |
| 822 | |
| ], | |
| "gpu_memory": 2903643648, | |
| "step": 1017 | |
| }, | |
| { | |
| "epoch": 3.02, | |
| "gpu_memory": 2903643648, | |
| "learning_rate": 2.9788216560509553e-05, | |
| "loss": 1.561, | |
| "step": 1024 | |
| }, | |
| { | |
| "epoch": 3.4, | |
| "gpu_memory": 2903643648, | |
| "learning_rate": 2.9125796178343946e-05, | |
| "loss": 1.4029, | |
| "step": 1152 | |
| }, | |
| { | |
| "epoch": 3.77, | |
| "gpu_memory": 2903643648, | |
| "learning_rate": 2.8463375796178344e-05, | |
| "loss": 1.4541, | |
| "step": 1280 | |
| }, | |
| { | |
| "epoch": 4.0, | |
| "eval_bp": 0.06814983706797134, | |
| "eval_counts": [ | |
| 481, | |
| 162, | |
| 79, | |
| 37 | |
| ], | |
| "eval_loss": 1.6631227731704712, | |
| "eval_precisions": [ | |
| 37.286821705426355, | |
| 16.363636363636363, | |
| 10.881542699724518, | |
| 6.630824372759856 | |
| ], | |
| "eval_ref_len": 4755, | |
| "eval_runtime": 45.6186, | |
| "eval_samples_per_second": 6.576, | |
| "eval_score": 0.9871612910485801, | |
| "eval_steps_per_second": 6.576, | |
| "eval_sys_len": 1290, | |
| "eval_totals": [ | |
| 1290, | |
| 990, | |
| 726, | |
| 558 | |
| ], | |
| "gpu_memory": 2903643648, | |
| "step": 1356 | |
| }, | |
| { | |
| "epoch": 4.15, | |
| "gpu_memory": 2903643648, | |
| "learning_rate": 2.7800955414012737e-05, | |
| "loss": 1.4088, | |
| "step": 1408 | |
| }, | |
| { | |
| "epoch": 4.53, | |
| "gpu_memory": 2903643648, | |
| "learning_rate": 2.713853503184713e-05, | |
| "loss": 1.3351, | |
| "step": 1536 | |
| }, | |
| { | |
| "epoch": 4.91, | |
| "gpu_memory": 2903643648, | |
| "learning_rate": 2.647611464968153e-05, | |
| "loss": 1.3229, | |
| "step": 1664 | |
| }, | |
| { | |
| "epoch": 5.0, | |
| "eval_bp": 0.23640264658354365, | |
| "eval_counts": [ | |
| 633, | |
| 216, | |
| 105, | |
| 58 | |
| ], | |
| "eval_loss": 1.6731408834457397, | |
| "eval_precisions": [ | |
| 32.5115562403698, | |
| 13.114754098360656, | |
| 7.658643326039387, | |
| 5.239385727190605 | |
| ], | |
| "eval_ref_len": 4755, | |
| "eval_runtime": 51.3595, | |
| "eval_samples_per_second": 5.841, | |
| "eval_score": 2.703708498377427, | |
| "eval_steps_per_second": 5.841, | |
| "eval_sys_len": 1947, | |
| "eval_totals": [ | |
| 1947, | |
| 1647, | |
| 1371, | |
| 1107 | |
| ], | |
| "gpu_memory": 2903643648, | |
| "step": 1695 | |
| }, | |
| { | |
| "epoch": 5.29, | |
| "gpu_memory": 2903643648, | |
| "learning_rate": 2.5813694267515922e-05, | |
| "loss": 1.2429, | |
| "step": 1792 | |
| }, | |
| { | |
| "epoch": 5.66, | |
| "gpu_memory": 2903643648, | |
| "learning_rate": 2.515127388535032e-05, | |
| "loss": 1.2329, | |
| "step": 1920 | |
| }, | |
| { | |
| "epoch": 6.0, | |
| "eval_bp": 0.07532276614122083, | |
| "eval_counts": [ | |
| 579, | |
| 202, | |
| 98, | |
| 55 | |
| ], | |
| "eval_loss": 1.6539884805679321, | |
| "eval_precisions": [ | |
| 43.665158371040725, | |
| 19.68810916179337, | |
| 13.01460823373174, | |
| 9.499136442141623 | |
| ], | |
| "eval_ref_len": 4755, | |
| "eval_runtime": 46.8126, | |
| "eval_samples_per_second": 6.409, | |
| "eval_score": 1.3600028829560191, | |
| "eval_steps_per_second": 6.409, | |
| "eval_sys_len": 1326, | |
| "eval_totals": [ | |
| 1326, | |
| 1026, | |
| 753, | |
| 579 | |
| ], | |
| "gpu_memory": 2903643648, | |
| "step": 2034 | |
| }, | |
| { | |
| "epoch": 6.04, | |
| "gpu_memory": 2903643648, | |
| "learning_rate": 2.4488853503184713e-05, | |
| "loss": 1.2504, | |
| "step": 2048 | |
| }, | |
| { | |
| "epoch": 6.42, | |
| "gpu_memory": 2903643648, | |
| "learning_rate": 2.3826433121019104e-05, | |
| "loss": 1.1421, | |
| "step": 2176 | |
| }, | |
| { | |
| "epoch": 6.8, | |
| "gpu_memory": 2903643648, | |
| "learning_rate": 2.31640127388535e-05, | |
| "loss": 1.1795, | |
| "step": 2304 | |
| }, | |
| { | |
| "epoch": 7.0, | |
| "eval_bp": 0.17181721996808308, | |
| "eval_counts": [ | |
| 768, | |
| 262, | |
| 133, | |
| 70 | |
| ], | |
| "eval_loss": 1.667359471321106, | |
| "eval_precisions": [ | |
| 44.599303135888505, | |
| 18.424753867791843, | |
| 11.697449428320141, | |
| 8.018327605956472 | |
| ], | |
| "eval_ref_len": 4755, | |
| "eval_runtime": 50.5053, | |
| "eval_samples_per_second": 5.94, | |
| "eval_score": 2.862812289607837, | |
| "eval_steps_per_second": 5.94, | |
| "eval_sys_len": 1722, | |
| "eval_totals": [ | |
| 1722, | |
| 1422, | |
| 1137, | |
| 873 | |
| ], | |
| "gpu_memory": 2903643648, | |
| "step": 2373 | |
| }, | |
| { | |
| "epoch": 7.17, | |
| "gpu_memory": 2903643648, | |
| "learning_rate": 2.2501592356687895e-05, | |
| "loss": 1.0902, | |
| "step": 2432 | |
| }, | |
| { | |
| "epoch": 7.55, | |
| "gpu_memory": 2903643648, | |
| "learning_rate": 2.183917197452229e-05, | |
| "loss": 1.0705, | |
| "step": 2560 | |
| }, | |
| { | |
| "epoch": 7.93, | |
| "gpu_memory": 2903643648, | |
| "learning_rate": 2.1176751592356686e-05, | |
| "loss": 1.1128, | |
| "step": 2688 | |
| }, | |
| { | |
| "epoch": 8.0, | |
| "eval_bp": 0.2669632643662467, | |
| "eval_counts": [ | |
| 866, | |
| 300, | |
| 163, | |
| 96 | |
| ], | |
| "eval_loss": 1.708727240562439, | |
| "eval_precisions": [ | |
| 42.26451927769644, | |
| 17.152658662092623, | |
| 11.20274914089347, | |
| 8.226221079691516 | |
| ], | |
| "eval_ref_len": 4755, | |
| "eval_runtime": 53.4181, | |
| "eval_samples_per_second": 5.616, | |
| "eval_score": 4.291998839505449, | |
| "eval_steps_per_second": 5.616, | |
| "eval_sys_len": 2049, | |
| "eval_totals": [ | |
| 2049, | |
| 1749, | |
| 1455, | |
| 1167 | |
| ], | |
| "gpu_memory": 2903643648, | |
| "step": 2712 | |
| }, | |
| { | |
| "epoch": 8.31, | |
| "gpu_memory": 2903643648, | |
| "learning_rate": 2.051433121019108e-05, | |
| "loss": 1.0162, | |
| "step": 2816 | |
| }, | |
| { | |
| "epoch": 8.68, | |
| "gpu_memory": 2903643648, | |
| "learning_rate": 1.9851910828025477e-05, | |
| "loss": 1.0183, | |
| "step": 2944 | |
| }, | |
| { | |
| "epoch": 9.0, | |
| "eval_bp": 0.09731210069014802, | |
| "eval_counts": [ | |
| 678, | |
| 233, | |
| 102, | |
| 45 | |
| ], | |
| "eval_loss": 1.7135441303253174, | |
| "eval_precisions": [ | |
| 47.47899159663866, | |
| 20.656028368794328, | |
| 12.23021582733813, | |
| 7.142857142857143 | |
| ], | |
| "eval_ref_len": 4755, | |
| "eval_runtime": 50.1778, | |
| "eval_samples_per_second": 5.979, | |
| "eval_score": 1.664870454299152, | |
| "eval_steps_per_second": 5.979, | |
| "eval_sys_len": 1428, | |
| "eval_totals": [ | |
| 1428, | |
| 1128, | |
| 834, | |
| 630 | |
| ], | |
| "gpu_memory": 2903643648, | |
| "step": 3051 | |
| }, | |
| { | |
| "epoch": 9.06, | |
| "gpu_memory": 2903643648, | |
| "learning_rate": 1.918949044585987e-05, | |
| "loss": 1.0367, | |
| "step": 3072 | |
| }, | |
| { | |
| "epoch": 9.44, | |
| "gpu_memory": 2903643648, | |
| "learning_rate": 1.8527070063694264e-05, | |
| "loss": 0.9645, | |
| "step": 3200 | |
| }, | |
| { | |
| "epoch": 9.82, | |
| "gpu_memory": 2903643648, | |
| "learning_rate": 1.786464968152866e-05, | |
| "loss": 0.9616, | |
| "step": 3328 | |
| }, | |
| { | |
| "epoch": 10.0, | |
| "eval_bp": 0.22930577411313655, | |
| "eval_counts": [ | |
| 768, | |
| 280, | |
| 145, | |
| 80 | |
| ], | |
| "eval_loss": 1.736754298210144, | |
| "eval_precisions": [ | |
| 39.93759750390016, | |
| 17.25200246457178, | |
| 10.837070254110612, | |
| 7.428040854224698 | |
| ], | |
| "eval_ref_len": 4755, | |
| "eval_runtime": 57.956, | |
| "eval_samples_per_second": 5.176, | |
| "eval_score": 3.518980787396955, | |
| "eval_steps_per_second": 5.176, | |
| "eval_sys_len": 1923, | |
| "eval_totals": [ | |
| 1923, | |
| 1623, | |
| 1338, | |
| 1077 | |
| ], | |
| "gpu_memory": 2903643648, | |
| "step": 3390 | |
| }, | |
| { | |
| "epoch": 10.19, | |
| "gpu_memory": 2903643648, | |
| "learning_rate": 1.7202229299363055e-05, | |
| "loss": 0.9403, | |
| "step": 3456 | |
| }, | |
| { | |
| "epoch": 10.57, | |
| "gpu_memory": 2903643648, | |
| "learning_rate": 1.6539808917197452e-05, | |
| "loss": 0.9059, | |
| "step": 3584 | |
| }, | |
| { | |
| "epoch": 10.95, | |
| "gpu_memory": 2903643648, | |
| "learning_rate": 1.5877388535031846e-05, | |
| "loss": 0.9249, | |
| "step": 3712 | |
| }, | |
| { | |
| "epoch": 11.0, | |
| "eval_bp": 0.1751321349922995, | |
| "eval_counts": [ | |
| 748, | |
| 240, | |
| 115, | |
| 63 | |
| ], | |
| "eval_loss": 1.782728672027588, | |
| "eval_precisions": [ | |
| 43.13725490196079, | |
| 16.736401673640167, | |
| 10.008703220191471, | |
| 7.11864406779661 | |
| ], | |
| "eval_ref_len": 4755, | |
| "eval_runtime": 54.5903, | |
| "eval_samples_per_second": 5.495, | |
| "eval_score": 2.6374744638290037, | |
| "eval_steps_per_second": 5.495, | |
| "eval_sys_len": 1734, | |
| "eval_totals": [ | |
| 1734, | |
| 1434, | |
| 1149, | |
| 885 | |
| ], | |
| "gpu_memory": 2903643648, | |
| "step": 3729 | |
| }, | |
| { | |
| "epoch": 11.33, | |
| "gpu_memory": 2903643648, | |
| "learning_rate": 1.5214968152866242e-05, | |
| "loss": 0.8587, | |
| "step": 3840 | |
| }, | |
| { | |
| "epoch": 11.7, | |
| "gpu_memory": 2903643648, | |
| "learning_rate": 1.4552547770700635e-05, | |
| "loss": 0.8739, | |
| "step": 3968 | |
| }, | |
| { | |
| "epoch": 12.0, | |
| "eval_bp": 0.1555153512571023, | |
| "eval_counts": [ | |
| 739, | |
| 267, | |
| 125, | |
| 60 | |
| ], | |
| "eval_loss": 1.8148356676101685, | |
| "eval_precisions": [ | |
| 44.46450060168472, | |
| 19.60352422907489, | |
| 11.671335200746965, | |
| 7.462686567164179 | |
| ], | |
| "eval_ref_len": 4755, | |
| "eval_runtime": 53.3032, | |
| "eval_samples_per_second": 5.628, | |
| "eval_score": 2.581452241674501, | |
| "eval_steps_per_second": 5.628, | |
| "eval_sys_len": 1662, | |
| "eval_totals": [ | |
| 1662, | |
| 1362, | |
| 1071, | |
| 804 | |
| ], | |
| "gpu_memory": 2903643648, | |
| "step": 4068 | |
| }, | |
| { | |
| "epoch": 12.08, | |
| "gpu_memory": 2903643648, | |
| "learning_rate": 1.3890127388535031e-05, | |
| "loss": 0.8413, | |
| "step": 4096 | |
| }, | |
| { | |
| "epoch": 12.46, | |
| "gpu_memory": 2903643648, | |
| "learning_rate": 1.3227707006369426e-05, | |
| "loss": 0.8195, | |
| "step": 4224 | |
| }, | |
| { | |
| "epoch": 12.84, | |
| "gpu_memory": 2903643648, | |
| "learning_rate": 1.2565286624203822e-05, | |
| "loss": 0.823, | |
| "step": 4352 | |
| }, | |
| { | |
| "epoch": 13.0, | |
| "eval_bp": 0.2187397058134024, | |
| "eval_counts": [ | |
| 843, | |
| 326, | |
| 173, | |
| 91 | |
| ], | |
| "eval_loss": 1.8146471977233887, | |
| "eval_precisions": [ | |
| 44.67408585055644, | |
| 20.5419029615627, | |
| 13.442113442113442, | |
| 9.027777777777779 | |
| ], | |
| "eval_ref_len": 4755, | |
| "eval_runtime": 55.2439, | |
| "eval_samples_per_second": 5.43, | |
| "eval_score": 3.995892671984357, | |
| "eval_steps_per_second": 5.43, | |
| "eval_sys_len": 1887, | |
| "eval_totals": [ | |
| 1887, | |
| 1587, | |
| 1287, | |
| 1008 | |
| ], | |
| "gpu_memory": 2903643648, | |
| "step": 4407 | |
| }, | |
| { | |
| "epoch": 13.22, | |
| "gpu_memory": 2903643648, | |
| "learning_rate": 1.1902866242038214e-05, | |
| "loss": 0.7992, | |
| "step": 4480 | |
| }, | |
| { | |
| "epoch": 13.59, | |
| "gpu_memory": 2903643648, | |
| "learning_rate": 1.124044585987261e-05, | |
| "loss": 0.7702, | |
| "step": 4608 | |
| }, | |
| { | |
| "epoch": 13.97, | |
| "gpu_memory": 2903643648, | |
| "learning_rate": 1.0578025477707005e-05, | |
| "loss": 0.7824, | |
| "step": 4736 | |
| }, | |
| { | |
| "epoch": 14.0, | |
| "eval_bp": 0.16524048903893263, | |
| "eval_counts": [ | |
| 719, | |
| 244, | |
| 108, | |
| 52 | |
| ], | |
| "eval_loss": 1.8748054504394531, | |
| "eval_precisions": [ | |
| 42.34393404004712, | |
| 17.453505007153076, | |
| 9.72972972972973, | |
| 6.081871345029239 | |
| ], | |
| "eval_ref_len": 4755, | |
| "eval_runtime": 54.7238, | |
| "eval_samples_per_second": 5.482, | |
| "eval_score": 2.389568242739576, | |
| "eval_steps_per_second": 5.482, | |
| "eval_sys_len": 1698, | |
| "eval_totals": [ | |
| 1698, | |
| 1398, | |
| 1110, | |
| 855 | |
| ], | |
| "gpu_memory": 2903643648, | |
| "step": 4746 | |
| }, | |
| { | |
| "epoch": 14.35, | |
| "gpu_memory": 2903643648, | |
| "learning_rate": 9.9156050955414e-06, | |
| "loss": 0.7425, | |
| "step": 4864 | |
| }, | |
| { | |
| "epoch": 14.72, | |
| "gpu_memory": 2903643648, | |
| "learning_rate": 9.253184713375794e-06, | |
| "loss": 0.7501, | |
| "step": 4992 | |
| }, | |
| { | |
| "epoch": 15.0, | |
| "eval_bp": 0.1953640836862138, | |
| "eval_counts": [ | |
| 762, | |
| 263, | |
| 131, | |
| 74 | |
| ], | |
| "eval_loss": 1.9026106595993042, | |
| "eval_precisions": [ | |
| 42.19269102990033, | |
| 17.46347941567065, | |
| 10.835401157981803, | |
| 7.781282860147213 | |
| ], | |
| "eval_ref_len": 4755, | |
| "eval_runtime": 56.8759, | |
| "eval_samples_per_second": 5.275, | |
| "eval_score": 3.0843295492719487, | |
| "eval_steps_per_second": 5.275, | |
| "eval_sys_len": 1806, | |
| "eval_totals": [ | |
| 1806, | |
| 1506, | |
| 1209, | |
| 951 | |
| ], | |
| "gpu_memory": 2903643648, | |
| "step": 5085 | |
| }, | |
| { | |
| "epoch": 15.1, | |
| "gpu_memory": 2903643648, | |
| "learning_rate": 8.59076433121019e-06, | |
| "loss": 0.7315, | |
| "step": 5120 | |
| }, | |
| { | |
| "epoch": 15.48, | |
| "gpu_memory": 2903643648, | |
| "learning_rate": 7.928343949044585e-06, | |
| "loss": 0.7011, | |
| "step": 5248 | |
| }, | |
| { | |
| "epoch": 15.86, | |
| "gpu_memory": 2903643648, | |
| "learning_rate": 7.265923566878981e-06, | |
| "loss": 0.7139, | |
| "step": 5376 | |
| }, | |
| { | |
| "epoch": 16.0, | |
| "eval_bp": 0.23551335586741148, | |
| "eval_counts": [ | |
| 816, | |
| 277, | |
| 129, | |
| 72 | |
| ], | |
| "eval_loss": 1.9286922216415405, | |
| "eval_precisions": [ | |
| 41.97530864197531, | |
| 16.849148418491485, | |
| 9.57683741648107, | |
| 6.70391061452514 | |
| ], | |
| "eval_ref_len": 4755, | |
| "eval_runtime": 58.3566, | |
| "eval_samples_per_second": 5.141, | |
| "eval_score": 3.4379225352028846, | |
| "eval_steps_per_second": 5.141, | |
| "eval_sys_len": 1944, | |
| "eval_totals": [ | |
| 1944, | |
| 1644, | |
| 1347, | |
| 1074 | |
| ], | |
| "gpu_memory": 2903643648, | |
| "step": 5424 | |
| }, | |
| { | |
| "epoch": 16.24, | |
| "gpu_memory": 2903643648, | |
| "learning_rate": 6.6035031847133755e-06, | |
| "loss": 0.689, | |
| "step": 5504 | |
| }, | |
| { | |
| "epoch": 16.61, | |
| "gpu_memory": 2903643648, | |
| "learning_rate": 5.94108280254777e-06, | |
| "loss": 0.6788, | |
| "step": 5632 | |
| }, | |
| { | |
| "epoch": 16.99, | |
| "gpu_memory": 2903643648, | |
| "learning_rate": 5.278662420382165e-06, | |
| "loss": 0.7053, | |
| "step": 5760 | |
| }, | |
| { | |
| "epoch": 17.0, | |
| "eval_bp": 0.2934278208519596, | |
| "eval_counts": [ | |
| 886, | |
| 340, | |
| 171, | |
| 99 | |
| ], | |
| "eval_loss": 1.9354726076126099, | |
| "eval_precisions": [ | |
| 41.47940074906367, | |
| 18.51851851851852, | |
| 11.089494163424124, | |
| 7.746478873239437 | |
| ], | |
| "eval_ref_len": 4755, | |
| "eval_runtime": 60.6492, | |
| "eval_samples_per_second": 4.946, | |
| "eval_score": 4.702891790634525, | |
| "eval_steps_per_second": 4.946, | |
| "eval_sys_len": 2136, | |
| "eval_totals": [ | |
| 2136, | |
| 1836, | |
| 1542, | |
| 1278 | |
| ], | |
| "gpu_memory": 2903643648, | |
| "step": 5763 | |
| } | |
| ], | |
| "max_steps": 6780, | |
| "num_train_epochs": 20, | |
| "total_flos": 1765580040806400.0, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |