| { | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 49.01960784313726, | |
| "global_step": 12500, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 1.0, | |
| "learning_rate": 4.9e-05, | |
| "loss": 2.0116, | |
| "step": 255 | |
| }, | |
| { | |
| "epoch": 1.0, | |
| "eval_accuracy": 0.4553, | |
| "eval_f1_macro": 0.1796, | |
| "eval_gen_len": 2.3716, | |
| "eval_loss": 0.7925581336021423, | |
| "eval_precision": 0.2144, | |
| "eval_recall": 0.2023, | |
| "eval_runtime": 18.41, | |
| "eval_samples_per_second": 264.584, | |
| "step": 255 | |
| }, | |
| { | |
| "epoch": 2.0, | |
| "learning_rate": 4.8e-05, | |
| "loss": 0.779, | |
| "step": 510 | |
| }, | |
| { | |
| "epoch": 2.0, | |
| "eval_accuracy": 0.4911, | |
| "eval_f1_macro": 0.2749, | |
| "eval_gen_len": 2.2535, | |
| "eval_loss": 0.752581775188446, | |
| "eval_precision": 0.4243, | |
| "eval_recall": 0.2719, | |
| "eval_runtime": 18.4504, | |
| "eval_samples_per_second": 264.005, | |
| "step": 510 | |
| }, | |
| { | |
| "epoch": 3.0, | |
| "learning_rate": 4.7e-05, | |
| "loss": 0.6777, | |
| "step": 765 | |
| }, | |
| { | |
| "epoch": 3.0, | |
| "eval_accuracy": 0.503, | |
| "eval_f1_macro": 0.2851, | |
| "eval_gen_len": 2.3207, | |
| "eval_loss": 0.7490188479423523, | |
| "eval_precision": 0.3735, | |
| "eval_recall": 0.288, | |
| "eval_runtime": 18.569, | |
| "eval_samples_per_second": 262.318, | |
| "step": 765 | |
| }, | |
| { | |
| "epoch": 4.0, | |
| "learning_rate": 4.600000000000001e-05, | |
| "loss": 0.5968, | |
| "step": 1020 | |
| }, | |
| { | |
| "epoch": 4.0, | |
| "eval_accuracy": 0.5013, | |
| "eval_f1_macro": 0.2994, | |
| "eval_gen_len": 2.2948, | |
| "eval_loss": 0.7779901027679443, | |
| "eval_precision": 0.3833, | |
| "eval_recall": 0.2971, | |
| "eval_runtime": 18.5932, | |
| "eval_samples_per_second": 261.978, | |
| "step": 1020 | |
| }, | |
| { | |
| "epoch": 5.0, | |
| "learning_rate": 4.5e-05, | |
| "loss": 0.5367, | |
| "step": 1275 | |
| }, | |
| { | |
| "epoch": 5.0, | |
| "eval_accuracy": 0.5044, | |
| "eval_f1_macro": 0.3052, | |
| "eval_gen_len": 2.3751, | |
| "eval_loss": 0.8049420118331909, | |
| "eval_precision": 0.3699, | |
| "eval_recall": 0.3016, | |
| "eval_runtime": 18.6267, | |
| "eval_samples_per_second": 261.506, | |
| "step": 1275 | |
| }, | |
| { | |
| "epoch": 6.0, | |
| "learning_rate": 4.4000000000000006e-05, | |
| "loss": 0.475, | |
| "step": 1530 | |
| }, | |
| { | |
| "epoch": 6.0, | |
| "eval_accuracy": 0.5005, | |
| "eval_f1_macro": 0.3111, | |
| "eval_gen_len": 2.3361, | |
| "eval_loss": 0.8648290038108826, | |
| "eval_precision": 0.3545, | |
| "eval_recall": 0.3096, | |
| "eval_runtime": 18.6251, | |
| "eval_samples_per_second": 261.529, | |
| "step": 1530 | |
| }, | |
| { | |
| "epoch": 7.0, | |
| "learning_rate": 4.3e-05, | |
| "loss": 0.4194, | |
| "step": 1785 | |
| }, | |
| { | |
| "epoch": 7.0, | |
| "eval_accuracy": 0.4868, | |
| "eval_f1_macro": 0.2957, | |
| "eval_gen_len": 2.3318, | |
| "eval_loss": 0.9633024334907532, | |
| "eval_precision": 0.3316, | |
| "eval_recall": 0.2895, | |
| "eval_runtime": 18.3899, | |
| "eval_samples_per_second": 264.873, | |
| "step": 1785 | |
| }, | |
| { | |
| "epoch": 8.0, | |
| "learning_rate": 4.2e-05, | |
| "loss": 0.3619, | |
| "step": 2040 | |
| }, | |
| { | |
| "epoch": 8.0, | |
| "eval_accuracy": 0.4925, | |
| "eval_f1_macro": 0.2992, | |
| "eval_gen_len": 2.3545, | |
| "eval_loss": 0.9422969818115234, | |
| "eval_precision": 0.3316, | |
| "eval_recall": 0.2964, | |
| "eval_runtime": 18.6364, | |
| "eval_samples_per_second": 261.37, | |
| "step": 2040 | |
| }, | |
| { | |
| "epoch": 9.0, | |
| "learning_rate": 4.1e-05, | |
| "loss": 0.3126, | |
| "step": 2295 | |
| }, | |
| { | |
| "epoch": 9.0, | |
| "eval_accuracy": 0.4718, | |
| "eval_f1_macro": 0.2899, | |
| "eval_gen_len": 2.4246, | |
| "eval_loss": 1.086911678314209, | |
| "eval_precision": 0.3039, | |
| "eval_recall": 0.2956, | |
| "eval_runtime": 18.6675, | |
| "eval_samples_per_second": 260.935, | |
| "step": 2295 | |
| }, | |
| { | |
| "epoch": 10.0, | |
| "learning_rate": 4e-05, | |
| "loss": 0.2714, | |
| "step": 2550 | |
| }, | |
| { | |
| "epoch": 10.0, | |
| "eval_accuracy": 0.4757, | |
| "eval_f1_macro": 0.2959, | |
| "eval_gen_len": 2.3593, | |
| "eval_loss": 1.1425822973251343, | |
| "eval_precision": 0.3162, | |
| "eval_recall": 0.2919, | |
| "eval_runtime": 18.4062, | |
| "eval_samples_per_second": 264.64, | |
| "step": 2550 | |
| }, | |
| { | |
| "epoch": 11.0, | |
| "learning_rate": 3.9000000000000006e-05, | |
| "loss": 0.2295, | |
| "step": 2805 | |
| }, | |
| { | |
| "epoch": 11.0, | |
| "eval_accuracy": 0.4691, | |
| "eval_f1_macro": 0.289, | |
| "eval_gen_len": 2.4087, | |
| "eval_loss": 1.189605951309204, | |
| "eval_precision": 0.304, | |
| "eval_recall": 0.2901, | |
| "eval_runtime": 18.6533, | |
| "eval_samples_per_second": 261.133, | |
| "step": 2805 | |
| }, | |
| { | |
| "epoch": 12.0, | |
| "learning_rate": 3.8e-05, | |
| "loss": 0.1945, | |
| "step": 3060 | |
| }, | |
| { | |
| "epoch": 12.0, | |
| "eval_accuracy": 0.4703, | |
| "eval_f1_macro": 0.3098, | |
| "eval_gen_len": 2.38, | |
| "eval_loss": 1.2930792570114136, | |
| "eval_precision": 0.3263, | |
| "eval_recall": 0.3061, | |
| "eval_runtime": 18.3552, | |
| "eval_samples_per_second": 265.375, | |
| "step": 3060 | |
| }, | |
| { | |
| "epoch": 13.0, | |
| "learning_rate": 3.7e-05, | |
| "loss": 0.1622, | |
| "step": 3315 | |
| }, | |
| { | |
| "epoch": 13.0, | |
| "eval_accuracy": 0.4648, | |
| "eval_f1_macro": 0.3084, | |
| "eval_gen_len": 2.4237, | |
| "eval_loss": 1.3369712829589844, | |
| "eval_precision": 0.3194, | |
| "eval_recall": 0.3082, | |
| "eval_runtime": 18.3904, | |
| "eval_samples_per_second": 264.867, | |
| "step": 3315 | |
| }, | |
| { | |
| "epoch": 14.0, | |
| "learning_rate": 3.6e-05, | |
| "loss": 0.1339, | |
| "step": 3570 | |
| }, | |
| { | |
| "epoch": 14.0, | |
| "eval_accuracy": 0.4757, | |
| "eval_f1_macro": 0.3098, | |
| "eval_gen_len": 2.3874, | |
| "eval_loss": 1.5157912969589233, | |
| "eval_precision": 0.3258, | |
| "eval_recall": 0.3087, | |
| "eval_runtime": 18.3743, | |
| "eval_samples_per_second": 265.099, | |
| "step": 3570 | |
| }, | |
| { | |
| "epoch": 15.0, | |
| "learning_rate": 3.5e-05, | |
| "loss": 0.1195, | |
| "step": 3825 | |
| }, | |
| { | |
| "epoch": 15.0, | |
| "eval_accuracy": 0.4683, | |
| "eval_f1_macro": 0.3044, | |
| "eval_gen_len": 2.403, | |
| "eval_loss": 1.5008689165115356, | |
| "eval_precision": 0.3135, | |
| "eval_recall": 0.3059, | |
| "eval_runtime": 18.3877, | |
| "eval_samples_per_second": 264.905, | |
| "step": 3825 | |
| }, | |
| { | |
| "epoch": 16.0, | |
| "learning_rate": 3.4000000000000007e-05, | |
| "loss": 0.1019, | |
| "step": 4080 | |
| }, | |
| { | |
| "epoch": 16.0, | |
| "eval_accuracy": 0.4738, | |
| "eval_f1_macro": 0.3054, | |
| "eval_gen_len": 2.3734, | |
| "eval_loss": 1.5503424406051636, | |
| "eval_precision": 0.3235, | |
| "eval_recall": 0.3035, | |
| "eval_runtime": 18.347, | |
| "eval_samples_per_second": 265.493, | |
| "step": 4080 | |
| }, | |
| { | |
| "epoch": 17.0, | |
| "learning_rate": 3.3e-05, | |
| "loss": 0.0853, | |
| "step": 4335 | |
| }, | |
| { | |
| "epoch": 17.0, | |
| "eval_accuracy": 0.4759, | |
| "eval_f1_macro": 0.305, | |
| "eval_gen_len": 2.3954, | |
| "eval_loss": 1.7289695739746094, | |
| "eval_precision": 0.3219, | |
| "eval_recall": 0.3014, | |
| "eval_runtime": 18.3899, | |
| "eval_samples_per_second": 264.873, | |
| "step": 4335 | |
| }, | |
| { | |
| "epoch": 18.0, | |
| "learning_rate": 3.2000000000000005e-05, | |
| "loss": 0.0773, | |
| "step": 4590 | |
| }, | |
| { | |
| "epoch": 18.0, | |
| "eval_accuracy": 0.473, | |
| "eval_f1_macro": 0.3045, | |
| "eval_gen_len": 2.4233, | |
| "eval_loss": 1.7796562910079956, | |
| "eval_precision": 0.321, | |
| "eval_recall": 0.2996, | |
| "eval_runtime": 18.3356, | |
| "eval_samples_per_second": 265.658, | |
| "step": 4590 | |
| }, | |
| { | |
| "epoch": 19.0, | |
| "learning_rate": 3.1e-05, | |
| "loss": 0.0681, | |
| "step": 4845 | |
| }, | |
| { | |
| "epoch": 19.0, | |
| "eval_accuracy": 0.4638, | |
| "eval_f1_macro": 0.2984, | |
| "eval_gen_len": 2.3843, | |
| "eval_loss": 1.753821611404419, | |
| "eval_precision": 0.3106, | |
| "eval_recall": 0.2996, | |
| "eval_runtime": 18.3825, | |
| "eval_samples_per_second": 264.98, | |
| "step": 4845 | |
| }, | |
| { | |
| "epoch": 20.0, | |
| "learning_rate": 3e-05, | |
| "loss": 0.0617, | |
| "step": 5100 | |
| }, | |
| { | |
| "epoch": 20.0, | |
| "eval_accuracy": 0.4638, | |
| "eval_f1_macro": 0.3047, | |
| "eval_gen_len": 2.4092, | |
| "eval_loss": 1.868014931678772, | |
| "eval_precision": 0.3154, | |
| "eval_recall": 0.3036, | |
| "eval_runtime": 18.3921, | |
| "eval_samples_per_second": 264.841, | |
| "step": 5100 | |
| }, | |
| { | |
| "epoch": 21.0, | |
| "learning_rate": 2.9e-05, | |
| "loss": 0.0537, | |
| "step": 5355 | |
| }, | |
| { | |
| "epoch": 21.0, | |
| "eval_accuracy": 0.4642, | |
| "eval_f1_macro": 0.3035, | |
| "eval_gen_len": 2.3738, | |
| "eval_loss": 1.9632675647735596, | |
| "eval_precision": 0.3201, | |
| "eval_recall": 0.2996, | |
| "eval_runtime": 18.4289, | |
| "eval_samples_per_second": 264.314, | |
| "step": 5355 | |
| }, | |
| { | |
| "epoch": 22.0, | |
| "learning_rate": 2.8000000000000003e-05, | |
| "loss": 0.0473, | |
| "step": 5610 | |
| }, | |
| { | |
| "epoch": 22.0, | |
| "eval_accuracy": 0.4726, | |
| "eval_f1_macro": 0.303, | |
| "eval_gen_len": 2.3862, | |
| "eval_loss": 1.8952040672302246, | |
| "eval_precision": 0.3214, | |
| "eval_recall": 0.2998, | |
| "eval_runtime": 18.3834, | |
| "eval_samples_per_second": 264.967, | |
| "step": 5610 | |
| }, | |
| { | |
| "epoch": 23.0, | |
| "learning_rate": 2.7000000000000002e-05, | |
| "loss": 0.0465, | |
| "step": 5865 | |
| }, | |
| { | |
| "epoch": 23.0, | |
| "eval_accuracy": 0.466, | |
| "eval_f1_macro": 0.3035, | |
| "eval_gen_len": 2.4024, | |
| "eval_loss": 1.9048091173171997, | |
| "eval_precision": 0.3173, | |
| "eval_recall": 0.3012, | |
| "eval_runtime": 18.3842, | |
| "eval_samples_per_second": 264.956, | |
| "step": 5865 | |
| }, | |
| { | |
| "epoch": 24.0, | |
| "learning_rate": 2.6000000000000002e-05, | |
| "loss": 0.0406, | |
| "step": 6120 | |
| }, | |
| { | |
| "epoch": 24.0, | |
| "eval_accuracy": 0.4634, | |
| "eval_f1_macro": 0.3068, | |
| "eval_gen_len": 2.426, | |
| "eval_loss": 2.021580696105957, | |
| "eval_precision": 0.3153, | |
| "eval_recall": 0.3044, | |
| "eval_runtime": 18.3498, | |
| "eval_samples_per_second": 265.452, | |
| "step": 6120 | |
| }, | |
| { | |
| "epoch": 25.0, | |
| "learning_rate": 2.5e-05, | |
| "loss": 0.0358, | |
| "step": 6375 | |
| }, | |
| { | |
| "epoch": 25.0, | |
| "eval_accuracy": 0.4742, | |
| "eval_f1_macro": 0.3003, | |
| "eval_gen_len": 2.3597, | |
| "eval_loss": 2.116412401199341, | |
| "eval_precision": 0.3236, | |
| "eval_recall": 0.2931, | |
| "eval_runtime": 18.3974, | |
| "eval_samples_per_second": 264.766, | |
| "step": 6375 | |
| }, | |
| { | |
| "epoch": 26.0, | |
| "learning_rate": 2.4e-05, | |
| "loss": 0.0353, | |
| "step": 6630 | |
| }, | |
| { | |
| "epoch": 26.0, | |
| "eval_accuracy": 0.4668, | |
| "eval_f1_macro": 0.3004, | |
| "eval_gen_len": 2.4484, | |
| "eval_loss": 2.0235698223114014, | |
| "eval_precision": 0.3084, | |
| "eval_recall": 0.2995, | |
| "eval_runtime": 18.3702, | |
| "eval_samples_per_second": 265.158, | |
| "step": 6630 | |
| }, | |
| { | |
| "epoch": 27.0, | |
| "learning_rate": 2.3000000000000003e-05, | |
| "loss": 0.0314, | |
| "step": 6885 | |
| }, | |
| { | |
| "epoch": 27.0, | |
| "eval_accuracy": 0.464, | |
| "eval_f1_macro": 0.3013, | |
| "eval_gen_len": 2.4204, | |
| "eval_loss": 2.124769926071167, | |
| "eval_precision": 0.3066, | |
| "eval_recall": 0.3019, | |
| "eval_runtime": 18.4132, | |
| "eval_samples_per_second": 264.538, | |
| "step": 6885 | |
| }, | |
| { | |
| "epoch": 28.0, | |
| "learning_rate": 2.2000000000000003e-05, | |
| "loss": 0.0296, | |
| "step": 7140 | |
| }, | |
| { | |
| "epoch": 28.0, | |
| "eval_accuracy": 0.4722, | |
| "eval_f1_macro": 0.2997, | |
| "eval_gen_len": 2.3732, | |
| "eval_loss": 2.124000072479248, | |
| "eval_precision": 0.3261, | |
| "eval_recall": 0.294, | |
| "eval_runtime": 18.3663, | |
| "eval_samples_per_second": 265.213, | |
| "step": 7140 | |
| }, | |
| { | |
| "epoch": 29.0, | |
| "learning_rate": 2.1e-05, | |
| "loss": 0.0274, | |
| "step": 7395 | |
| }, | |
| { | |
| "epoch": 29.0, | |
| "eval_accuracy": 0.467, | |
| "eval_f1_macro": 0.3011, | |
| "eval_gen_len": 2.3636, | |
| "eval_loss": 2.1549148559570312, | |
| "eval_precision": 0.3197, | |
| "eval_recall": 0.2963, | |
| "eval_runtime": 18.4004, | |
| "eval_samples_per_second": 264.722, | |
| "step": 7395 | |
| }, | |
| { | |
| "epoch": 30.0, | |
| "learning_rate": 2e-05, | |
| "loss": 0.0248, | |
| "step": 7650 | |
| }, | |
| { | |
| "epoch": 30.0, | |
| "eval_accuracy": 0.4697, | |
| "eval_f1_macro": 0.2982, | |
| "eval_gen_len": 2.3798, | |
| "eval_loss": 2.2189269065856934, | |
| "eval_precision": 0.3152, | |
| "eval_recall": 0.2951, | |
| "eval_runtime": 18.3622, | |
| "eval_samples_per_second": 265.273, | |
| "step": 7650 | |
| }, | |
| { | |
| "epoch": 31.0, | |
| "learning_rate": 1.9e-05, | |
| "loss": 0.0219, | |
| "step": 7905 | |
| }, | |
| { | |
| "epoch": 31.0, | |
| "eval_accuracy": 0.4736, | |
| "eval_f1_macro": 0.3048, | |
| "eval_gen_len": 2.3942, | |
| "eval_loss": 2.3595752716064453, | |
| "eval_precision": 0.3157, | |
| "eval_recall": 0.3049, | |
| "eval_runtime": 18.3521, | |
| "eval_samples_per_second": 265.419, | |
| "step": 7905 | |
| }, | |
| { | |
| "epoch": 32.0, | |
| "learning_rate": 1.8e-05, | |
| "loss": 0.0205, | |
| "step": 8160 | |
| }, | |
| { | |
| "epoch": 32.0, | |
| "eval_accuracy": 0.4705, | |
| "eval_f1_macro": 0.3013, | |
| "eval_gen_len": 2.3909, | |
| "eval_loss": 2.4317517280578613, | |
| "eval_precision": 0.3151, | |
| "eval_recall": 0.3001, | |
| "eval_runtime": 18.3589, | |
| "eval_samples_per_second": 265.321, | |
| "step": 8160 | |
| }, | |
| { | |
| "epoch": 33.0, | |
| "learning_rate": 1.7000000000000003e-05, | |
| "loss": 0.0189, | |
| "step": 8415 | |
| }, | |
| { | |
| "epoch": 33.0, | |
| "eval_accuracy": 0.4767, | |
| "eval_f1_macro": 0.3084, | |
| "eval_gen_len": 2.3751, | |
| "eval_loss": 2.4803547859191895, | |
| "eval_precision": 0.3242, | |
| "eval_recall": 0.3045, | |
| "eval_runtime": 18.4166, | |
| "eval_samples_per_second": 264.489, | |
| "step": 8415 | |
| }, | |
| { | |
| "epoch": 34.0, | |
| "learning_rate": 1.6000000000000003e-05, | |
| "loss": 0.0211, | |
| "step": 8670 | |
| }, | |
| { | |
| "epoch": 34.0, | |
| "eval_accuracy": 0.4699, | |
| "eval_f1_macro": 0.304, | |
| "eval_gen_len": 2.4102, | |
| "eval_loss": 2.371544361114502, | |
| "eval_precision": 0.3223, | |
| "eval_recall": 0.2999, | |
| "eval_runtime": 18.4037, | |
| "eval_samples_per_second": 264.675, | |
| "step": 8670 | |
| }, | |
| { | |
| "epoch": 35.0, | |
| "learning_rate": 1.5e-05, | |
| "loss": 0.0156, | |
| "step": 8925 | |
| }, | |
| { | |
| "epoch": 35.0, | |
| "eval_accuracy": 0.4753, | |
| "eval_f1_macro": 0.3014, | |
| "eval_gen_len": 2.395, | |
| "eval_loss": 2.55307674407959, | |
| "eval_precision": 0.3136, | |
| "eval_recall": 0.2973, | |
| "eval_runtime": 18.4323, | |
| "eval_samples_per_second": 264.264, | |
| "step": 8925 | |
| }, | |
| { | |
| "epoch": 36.0, | |
| "learning_rate": 1.4000000000000001e-05, | |
| "loss": 0.0179, | |
| "step": 9180 | |
| }, | |
| { | |
| "epoch": 36.0, | |
| "eval_accuracy": 0.4829, | |
| "eval_f1_macro": 0.3055, | |
| "eval_gen_len": 2.3621, | |
| "eval_loss": 2.49765682220459, | |
| "eval_precision": 0.3308, | |
| "eval_recall": 0.2991, | |
| "eval_runtime": 18.4231, | |
| "eval_samples_per_second": 264.396, | |
| "step": 9180 | |
| }, | |
| { | |
| "epoch": 37.0, | |
| "learning_rate": 1.3000000000000001e-05, | |
| "loss": 0.0155, | |
| "step": 9435 | |
| }, | |
| { | |
| "epoch": 37.0, | |
| "eval_accuracy": 0.4808, | |
| "eval_f1_macro": 0.3086, | |
| "eval_gen_len": 2.3775, | |
| "eval_loss": 2.4960439205169678, | |
| "eval_precision": 0.3245, | |
| "eval_recall": 0.3039, | |
| "eval_runtime": 18.4083, | |
| "eval_samples_per_second": 264.609, | |
| "step": 9435 | |
| }, | |
| { | |
| "epoch": 38.0, | |
| "learning_rate": 1.2e-05, | |
| "loss": 0.0154, | |
| "step": 9690 | |
| }, | |
| { | |
| "epoch": 38.0, | |
| "eval_accuracy": 0.4652, | |
| "eval_f1_macro": 0.3047, | |
| "eval_gen_len": 2.3983, | |
| "eval_loss": 2.59112811088562, | |
| "eval_precision": 0.3196, | |
| "eval_recall": 0.2998, | |
| "eval_runtime": 18.406, | |
| "eval_samples_per_second": 264.642, | |
| "step": 9690 | |
| }, | |
| { | |
| "epoch": 39.0, | |
| "learning_rate": 1.1000000000000001e-05, | |
| "loss": 0.0144, | |
| "step": 9945 | |
| }, | |
| { | |
| "epoch": 39.0, | |
| "eval_accuracy": 0.473, | |
| "eval_f1_macro": 0.3097, | |
| "eval_gen_len": 2.3634, | |
| "eval_loss": 2.646393299102783, | |
| "eval_precision": 0.3283, | |
| "eval_recall": 0.3046, | |
| "eval_runtime": 18.3793, | |
| "eval_samples_per_second": 265.027, | |
| "step": 9945 | |
| }, | |
| { | |
| "epoch": 40.0, | |
| "learning_rate": 1e-05, | |
| "loss": 0.0135, | |
| "step": 10200 | |
| }, | |
| { | |
| "epoch": 40.0, | |
| "eval_accuracy": 0.4695, | |
| "eval_f1_macro": 0.3035, | |
| "eval_gen_len": 2.3812, | |
| "eval_loss": 2.711395740509033, | |
| "eval_precision": 0.3185, | |
| "eval_recall": 0.2989, | |
| "eval_runtime": 18.3802, | |
| "eval_samples_per_second": 265.013, | |
| "step": 10200 | |
| }, | |
| { | |
| "epoch": 41.0, | |
| "learning_rate": 9e-06, | |
| "loss": 0.0132, | |
| "step": 10455 | |
| }, | |
| { | |
| "epoch": 41.0, | |
| "eval_accuracy": 0.4707, | |
| "eval_f1_macro": 0.307, | |
| "eval_gen_len": 2.4055, | |
| "eval_loss": 2.706991672515869, | |
| "eval_precision": 0.3218, | |
| "eval_recall": 0.3029, | |
| "eval_runtime": 18.4175, | |
| "eval_samples_per_second": 264.476, | |
| "step": 10455 | |
| }, | |
| { | |
| "epoch": 42.0, | |
| "learning_rate": 8.000000000000001e-06, | |
| "loss": 0.0113, | |
| "step": 10710 | |
| }, | |
| { | |
| "epoch": 42.0, | |
| "eval_accuracy": 0.4705, | |
| "eval_f1_macro": 0.3041, | |
| "eval_gen_len": 2.3833, | |
| "eval_loss": 2.7490220069885254, | |
| "eval_precision": 0.3226, | |
| "eval_recall": 0.3009, | |
| "eval_runtime": 18.3891, | |
| "eval_samples_per_second": 264.885, | |
| "step": 10710 | |
| }, | |
| { | |
| "epoch": 43.0, | |
| "learning_rate": 7.000000000000001e-06, | |
| "loss": 0.0104, | |
| "step": 10965 | |
| }, | |
| { | |
| "epoch": 43.0, | |
| "eval_accuracy": 0.4707, | |
| "eval_f1_macro": 0.3094, | |
| "eval_gen_len": 2.3868, | |
| "eval_loss": 2.8594679832458496, | |
| "eval_precision": 0.3257, | |
| "eval_recall": 0.304, | |
| "eval_runtime": 18.4068, | |
| "eval_samples_per_second": 264.631, | |
| "step": 10965 | |
| }, | |
| { | |
| "epoch": 44.0, | |
| "learning_rate": 6e-06, | |
| "loss": 0.0125, | |
| "step": 11220 | |
| }, | |
| { | |
| "epoch": 44.0, | |
| "eval_accuracy": 0.4705, | |
| "eval_f1_macro": 0.3068, | |
| "eval_gen_len": 2.3921, | |
| "eval_loss": 2.777812957763672, | |
| "eval_precision": 0.3207, | |
| "eval_recall": 0.3034, | |
| "eval_runtime": 18.3628, | |
| "eval_samples_per_second": 265.264, | |
| "step": 11220 | |
| }, | |
| { | |
| "epoch": 45.0, | |
| "learning_rate": 5e-06, | |
| "loss": 0.0094, | |
| "step": 11475 | |
| }, | |
| { | |
| "epoch": 45.0, | |
| "eval_accuracy": 0.4753, | |
| "eval_f1_macro": 0.3102, | |
| "eval_gen_len": 2.4036, | |
| "eval_loss": 2.7739901542663574, | |
| "eval_precision": 0.3301, | |
| "eval_recall": 0.3028, | |
| "eval_runtime": 18.4413, | |
| "eval_samples_per_second": 264.135, | |
| "step": 11475 | |
| }, | |
| { | |
| "epoch": 46.0, | |
| "learning_rate": 4.000000000000001e-06, | |
| "loss": 0.0094, | |
| "step": 11730 | |
| }, | |
| { | |
| "epoch": 46.0, | |
| "eval_accuracy": 0.4755, | |
| "eval_f1_macro": 0.3122, | |
| "eval_gen_len": 2.3979, | |
| "eval_loss": 2.759046792984009, | |
| "eval_precision": 0.3284, | |
| "eval_recall": 0.3064, | |
| "eval_runtime": 18.4399, | |
| "eval_samples_per_second": 264.155, | |
| "step": 11730 | |
| }, | |
| { | |
| "epoch": 47.0, | |
| "learning_rate": 3e-06, | |
| "loss": 0.0085, | |
| "step": 11985 | |
| }, | |
| { | |
| "epoch": 47.0, | |
| "eval_accuracy": 0.4759, | |
| "eval_f1_macro": 0.3092, | |
| "eval_gen_len": 2.3903, | |
| "eval_loss": 2.8234634399414062, | |
| "eval_precision": 0.3248, | |
| "eval_recall": 0.304, | |
| "eval_runtime": 18.4435, | |
| "eval_samples_per_second": 264.104, | |
| "step": 11985 | |
| }, | |
| { | |
| "epoch": 48.0, | |
| "learning_rate": 2.0000000000000003e-06, | |
| "loss": 0.0087, | |
| "step": 12240 | |
| }, | |
| { | |
| "epoch": 48.0, | |
| "eval_accuracy": 0.474, | |
| "eval_f1_macro": 0.3097, | |
| "eval_gen_len": 2.3977, | |
| "eval_loss": 2.834430694580078, | |
| "eval_precision": 0.3247, | |
| "eval_recall": 0.3045, | |
| "eval_runtime": 18.4163, | |
| "eval_samples_per_second": 264.494, | |
| "step": 12240 | |
| }, | |
| { | |
| "epoch": 49.0, | |
| "learning_rate": 1.0000000000000002e-06, | |
| "loss": 0.009, | |
| "step": 12495 | |
| }, | |
| { | |
| "epoch": 49.0, | |
| "eval_accuracy": 0.4732, | |
| "eval_f1_macro": 0.3081, | |
| "eval_gen_len": 2.3868, | |
| "eval_loss": 2.841893196105957, | |
| "eval_precision": 0.3256, | |
| "eval_recall": 0.3018, | |
| "eval_runtime": 18.4481, | |
| "eval_samples_per_second": 264.037, | |
| "step": 12495 | |
| } | |
| ], | |
| "max_steps": 12750, | |
| "num_train_epochs": 50, | |
| "total_flos": 1.14107158131029e+17, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |