araT5-baseline / trainer_state.json
imen11111's picture
Upload trainer_state.json
14e09a5
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 49.01960784313726,
"global_step": 12500,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 1.0,
"learning_rate": 4.9e-05,
"loss": 2.0116,
"step": 255
},
{
"epoch": 1.0,
"eval_accuracy": 0.4553,
"eval_f1_macro": 0.1796,
"eval_gen_len": 2.3716,
"eval_loss": 0.7925581336021423,
"eval_precision": 0.2144,
"eval_recall": 0.2023,
"eval_runtime": 18.41,
"eval_samples_per_second": 264.584,
"step": 255
},
{
"epoch": 2.0,
"learning_rate": 4.8e-05,
"loss": 0.779,
"step": 510
},
{
"epoch": 2.0,
"eval_accuracy": 0.4911,
"eval_f1_macro": 0.2749,
"eval_gen_len": 2.2535,
"eval_loss": 0.752581775188446,
"eval_precision": 0.4243,
"eval_recall": 0.2719,
"eval_runtime": 18.4504,
"eval_samples_per_second": 264.005,
"step": 510
},
{
"epoch": 3.0,
"learning_rate": 4.7e-05,
"loss": 0.6777,
"step": 765
},
{
"epoch": 3.0,
"eval_accuracy": 0.503,
"eval_f1_macro": 0.2851,
"eval_gen_len": 2.3207,
"eval_loss": 0.7490188479423523,
"eval_precision": 0.3735,
"eval_recall": 0.288,
"eval_runtime": 18.569,
"eval_samples_per_second": 262.318,
"step": 765
},
{
"epoch": 4.0,
"learning_rate": 4.600000000000001e-05,
"loss": 0.5968,
"step": 1020
},
{
"epoch": 4.0,
"eval_accuracy": 0.5013,
"eval_f1_macro": 0.2994,
"eval_gen_len": 2.2948,
"eval_loss": 0.7779901027679443,
"eval_precision": 0.3833,
"eval_recall": 0.2971,
"eval_runtime": 18.5932,
"eval_samples_per_second": 261.978,
"step": 1020
},
{
"epoch": 5.0,
"learning_rate": 4.5e-05,
"loss": 0.5367,
"step": 1275
},
{
"epoch": 5.0,
"eval_accuracy": 0.5044,
"eval_f1_macro": 0.3052,
"eval_gen_len": 2.3751,
"eval_loss": 0.8049420118331909,
"eval_precision": 0.3699,
"eval_recall": 0.3016,
"eval_runtime": 18.6267,
"eval_samples_per_second": 261.506,
"step": 1275
},
{
"epoch": 6.0,
"learning_rate": 4.4000000000000006e-05,
"loss": 0.475,
"step": 1530
},
{
"epoch": 6.0,
"eval_accuracy": 0.5005,
"eval_f1_macro": 0.3111,
"eval_gen_len": 2.3361,
"eval_loss": 0.8648290038108826,
"eval_precision": 0.3545,
"eval_recall": 0.3096,
"eval_runtime": 18.6251,
"eval_samples_per_second": 261.529,
"step": 1530
},
{
"epoch": 7.0,
"learning_rate": 4.3e-05,
"loss": 0.4194,
"step": 1785
},
{
"epoch": 7.0,
"eval_accuracy": 0.4868,
"eval_f1_macro": 0.2957,
"eval_gen_len": 2.3318,
"eval_loss": 0.9633024334907532,
"eval_precision": 0.3316,
"eval_recall": 0.2895,
"eval_runtime": 18.3899,
"eval_samples_per_second": 264.873,
"step": 1785
},
{
"epoch": 8.0,
"learning_rate": 4.2e-05,
"loss": 0.3619,
"step": 2040
},
{
"epoch": 8.0,
"eval_accuracy": 0.4925,
"eval_f1_macro": 0.2992,
"eval_gen_len": 2.3545,
"eval_loss": 0.9422969818115234,
"eval_precision": 0.3316,
"eval_recall": 0.2964,
"eval_runtime": 18.6364,
"eval_samples_per_second": 261.37,
"step": 2040
},
{
"epoch": 9.0,
"learning_rate": 4.1e-05,
"loss": 0.3126,
"step": 2295
},
{
"epoch": 9.0,
"eval_accuracy": 0.4718,
"eval_f1_macro": 0.2899,
"eval_gen_len": 2.4246,
"eval_loss": 1.086911678314209,
"eval_precision": 0.3039,
"eval_recall": 0.2956,
"eval_runtime": 18.6675,
"eval_samples_per_second": 260.935,
"step": 2295
},
{
"epoch": 10.0,
"learning_rate": 4e-05,
"loss": 0.2714,
"step": 2550
},
{
"epoch": 10.0,
"eval_accuracy": 0.4757,
"eval_f1_macro": 0.2959,
"eval_gen_len": 2.3593,
"eval_loss": 1.1425822973251343,
"eval_precision": 0.3162,
"eval_recall": 0.2919,
"eval_runtime": 18.4062,
"eval_samples_per_second": 264.64,
"step": 2550
},
{
"epoch": 11.0,
"learning_rate": 3.9000000000000006e-05,
"loss": 0.2295,
"step": 2805
},
{
"epoch": 11.0,
"eval_accuracy": 0.4691,
"eval_f1_macro": 0.289,
"eval_gen_len": 2.4087,
"eval_loss": 1.189605951309204,
"eval_precision": 0.304,
"eval_recall": 0.2901,
"eval_runtime": 18.6533,
"eval_samples_per_second": 261.133,
"step": 2805
},
{
"epoch": 12.0,
"learning_rate": 3.8e-05,
"loss": 0.1945,
"step": 3060
},
{
"epoch": 12.0,
"eval_accuracy": 0.4703,
"eval_f1_macro": 0.3098,
"eval_gen_len": 2.38,
"eval_loss": 1.2930792570114136,
"eval_precision": 0.3263,
"eval_recall": 0.3061,
"eval_runtime": 18.3552,
"eval_samples_per_second": 265.375,
"step": 3060
},
{
"epoch": 13.0,
"learning_rate": 3.7e-05,
"loss": 0.1622,
"step": 3315
},
{
"epoch": 13.0,
"eval_accuracy": 0.4648,
"eval_f1_macro": 0.3084,
"eval_gen_len": 2.4237,
"eval_loss": 1.3369712829589844,
"eval_precision": 0.3194,
"eval_recall": 0.3082,
"eval_runtime": 18.3904,
"eval_samples_per_second": 264.867,
"step": 3315
},
{
"epoch": 14.0,
"learning_rate": 3.6e-05,
"loss": 0.1339,
"step": 3570
},
{
"epoch": 14.0,
"eval_accuracy": 0.4757,
"eval_f1_macro": 0.3098,
"eval_gen_len": 2.3874,
"eval_loss": 1.5157912969589233,
"eval_precision": 0.3258,
"eval_recall": 0.3087,
"eval_runtime": 18.3743,
"eval_samples_per_second": 265.099,
"step": 3570
},
{
"epoch": 15.0,
"learning_rate": 3.5e-05,
"loss": 0.1195,
"step": 3825
},
{
"epoch": 15.0,
"eval_accuracy": 0.4683,
"eval_f1_macro": 0.3044,
"eval_gen_len": 2.403,
"eval_loss": 1.5008689165115356,
"eval_precision": 0.3135,
"eval_recall": 0.3059,
"eval_runtime": 18.3877,
"eval_samples_per_second": 264.905,
"step": 3825
},
{
"epoch": 16.0,
"learning_rate": 3.4000000000000007e-05,
"loss": 0.1019,
"step": 4080
},
{
"epoch": 16.0,
"eval_accuracy": 0.4738,
"eval_f1_macro": 0.3054,
"eval_gen_len": 2.3734,
"eval_loss": 1.5503424406051636,
"eval_precision": 0.3235,
"eval_recall": 0.3035,
"eval_runtime": 18.347,
"eval_samples_per_second": 265.493,
"step": 4080
},
{
"epoch": 17.0,
"learning_rate": 3.3e-05,
"loss": 0.0853,
"step": 4335
},
{
"epoch": 17.0,
"eval_accuracy": 0.4759,
"eval_f1_macro": 0.305,
"eval_gen_len": 2.3954,
"eval_loss": 1.7289695739746094,
"eval_precision": 0.3219,
"eval_recall": 0.3014,
"eval_runtime": 18.3899,
"eval_samples_per_second": 264.873,
"step": 4335
},
{
"epoch": 18.0,
"learning_rate": 3.2000000000000005e-05,
"loss": 0.0773,
"step": 4590
},
{
"epoch": 18.0,
"eval_accuracy": 0.473,
"eval_f1_macro": 0.3045,
"eval_gen_len": 2.4233,
"eval_loss": 1.7796562910079956,
"eval_precision": 0.321,
"eval_recall": 0.2996,
"eval_runtime": 18.3356,
"eval_samples_per_second": 265.658,
"step": 4590
},
{
"epoch": 19.0,
"learning_rate": 3.1e-05,
"loss": 0.0681,
"step": 4845
},
{
"epoch": 19.0,
"eval_accuracy": 0.4638,
"eval_f1_macro": 0.2984,
"eval_gen_len": 2.3843,
"eval_loss": 1.753821611404419,
"eval_precision": 0.3106,
"eval_recall": 0.2996,
"eval_runtime": 18.3825,
"eval_samples_per_second": 264.98,
"step": 4845
},
{
"epoch": 20.0,
"learning_rate": 3e-05,
"loss": 0.0617,
"step": 5100
},
{
"epoch": 20.0,
"eval_accuracy": 0.4638,
"eval_f1_macro": 0.3047,
"eval_gen_len": 2.4092,
"eval_loss": 1.868014931678772,
"eval_precision": 0.3154,
"eval_recall": 0.3036,
"eval_runtime": 18.3921,
"eval_samples_per_second": 264.841,
"step": 5100
},
{
"epoch": 21.0,
"learning_rate": 2.9e-05,
"loss": 0.0537,
"step": 5355
},
{
"epoch": 21.0,
"eval_accuracy": 0.4642,
"eval_f1_macro": 0.3035,
"eval_gen_len": 2.3738,
"eval_loss": 1.9632675647735596,
"eval_precision": 0.3201,
"eval_recall": 0.2996,
"eval_runtime": 18.4289,
"eval_samples_per_second": 264.314,
"step": 5355
},
{
"epoch": 22.0,
"learning_rate": 2.8000000000000003e-05,
"loss": 0.0473,
"step": 5610
},
{
"epoch": 22.0,
"eval_accuracy": 0.4726,
"eval_f1_macro": 0.303,
"eval_gen_len": 2.3862,
"eval_loss": 1.8952040672302246,
"eval_precision": 0.3214,
"eval_recall": 0.2998,
"eval_runtime": 18.3834,
"eval_samples_per_second": 264.967,
"step": 5610
},
{
"epoch": 23.0,
"learning_rate": 2.7000000000000002e-05,
"loss": 0.0465,
"step": 5865
},
{
"epoch": 23.0,
"eval_accuracy": 0.466,
"eval_f1_macro": 0.3035,
"eval_gen_len": 2.4024,
"eval_loss": 1.9048091173171997,
"eval_precision": 0.3173,
"eval_recall": 0.3012,
"eval_runtime": 18.3842,
"eval_samples_per_second": 264.956,
"step": 5865
},
{
"epoch": 24.0,
"learning_rate": 2.6000000000000002e-05,
"loss": 0.0406,
"step": 6120
},
{
"epoch": 24.0,
"eval_accuracy": 0.4634,
"eval_f1_macro": 0.3068,
"eval_gen_len": 2.426,
"eval_loss": 2.021580696105957,
"eval_precision": 0.3153,
"eval_recall": 0.3044,
"eval_runtime": 18.3498,
"eval_samples_per_second": 265.452,
"step": 6120
},
{
"epoch": 25.0,
"learning_rate": 2.5e-05,
"loss": 0.0358,
"step": 6375
},
{
"epoch": 25.0,
"eval_accuracy": 0.4742,
"eval_f1_macro": 0.3003,
"eval_gen_len": 2.3597,
"eval_loss": 2.116412401199341,
"eval_precision": 0.3236,
"eval_recall": 0.2931,
"eval_runtime": 18.3974,
"eval_samples_per_second": 264.766,
"step": 6375
},
{
"epoch": 26.0,
"learning_rate": 2.4e-05,
"loss": 0.0353,
"step": 6630
},
{
"epoch": 26.0,
"eval_accuracy": 0.4668,
"eval_f1_macro": 0.3004,
"eval_gen_len": 2.4484,
"eval_loss": 2.0235698223114014,
"eval_precision": 0.3084,
"eval_recall": 0.2995,
"eval_runtime": 18.3702,
"eval_samples_per_second": 265.158,
"step": 6630
},
{
"epoch": 27.0,
"learning_rate": 2.3000000000000003e-05,
"loss": 0.0314,
"step": 6885
},
{
"epoch": 27.0,
"eval_accuracy": 0.464,
"eval_f1_macro": 0.3013,
"eval_gen_len": 2.4204,
"eval_loss": 2.124769926071167,
"eval_precision": 0.3066,
"eval_recall": 0.3019,
"eval_runtime": 18.4132,
"eval_samples_per_second": 264.538,
"step": 6885
},
{
"epoch": 28.0,
"learning_rate": 2.2000000000000003e-05,
"loss": 0.0296,
"step": 7140
},
{
"epoch": 28.0,
"eval_accuracy": 0.4722,
"eval_f1_macro": 0.2997,
"eval_gen_len": 2.3732,
"eval_loss": 2.124000072479248,
"eval_precision": 0.3261,
"eval_recall": 0.294,
"eval_runtime": 18.3663,
"eval_samples_per_second": 265.213,
"step": 7140
},
{
"epoch": 29.0,
"learning_rate": 2.1e-05,
"loss": 0.0274,
"step": 7395
},
{
"epoch": 29.0,
"eval_accuracy": 0.467,
"eval_f1_macro": 0.3011,
"eval_gen_len": 2.3636,
"eval_loss": 2.1549148559570312,
"eval_precision": 0.3197,
"eval_recall": 0.2963,
"eval_runtime": 18.4004,
"eval_samples_per_second": 264.722,
"step": 7395
},
{
"epoch": 30.0,
"learning_rate": 2e-05,
"loss": 0.0248,
"step": 7650
},
{
"epoch": 30.0,
"eval_accuracy": 0.4697,
"eval_f1_macro": 0.2982,
"eval_gen_len": 2.3798,
"eval_loss": 2.2189269065856934,
"eval_precision": 0.3152,
"eval_recall": 0.2951,
"eval_runtime": 18.3622,
"eval_samples_per_second": 265.273,
"step": 7650
},
{
"epoch": 31.0,
"learning_rate": 1.9e-05,
"loss": 0.0219,
"step": 7905
},
{
"epoch": 31.0,
"eval_accuracy": 0.4736,
"eval_f1_macro": 0.3048,
"eval_gen_len": 2.3942,
"eval_loss": 2.3595752716064453,
"eval_precision": 0.3157,
"eval_recall": 0.3049,
"eval_runtime": 18.3521,
"eval_samples_per_second": 265.419,
"step": 7905
},
{
"epoch": 32.0,
"learning_rate": 1.8e-05,
"loss": 0.0205,
"step": 8160
},
{
"epoch": 32.0,
"eval_accuracy": 0.4705,
"eval_f1_macro": 0.3013,
"eval_gen_len": 2.3909,
"eval_loss": 2.4317517280578613,
"eval_precision": 0.3151,
"eval_recall": 0.3001,
"eval_runtime": 18.3589,
"eval_samples_per_second": 265.321,
"step": 8160
},
{
"epoch": 33.0,
"learning_rate": 1.7000000000000003e-05,
"loss": 0.0189,
"step": 8415
},
{
"epoch": 33.0,
"eval_accuracy": 0.4767,
"eval_f1_macro": 0.3084,
"eval_gen_len": 2.3751,
"eval_loss": 2.4803547859191895,
"eval_precision": 0.3242,
"eval_recall": 0.3045,
"eval_runtime": 18.4166,
"eval_samples_per_second": 264.489,
"step": 8415
},
{
"epoch": 34.0,
"learning_rate": 1.6000000000000003e-05,
"loss": 0.0211,
"step": 8670
},
{
"epoch": 34.0,
"eval_accuracy": 0.4699,
"eval_f1_macro": 0.304,
"eval_gen_len": 2.4102,
"eval_loss": 2.371544361114502,
"eval_precision": 0.3223,
"eval_recall": 0.2999,
"eval_runtime": 18.4037,
"eval_samples_per_second": 264.675,
"step": 8670
},
{
"epoch": 35.0,
"learning_rate": 1.5e-05,
"loss": 0.0156,
"step": 8925
},
{
"epoch": 35.0,
"eval_accuracy": 0.4753,
"eval_f1_macro": 0.3014,
"eval_gen_len": 2.395,
"eval_loss": 2.55307674407959,
"eval_precision": 0.3136,
"eval_recall": 0.2973,
"eval_runtime": 18.4323,
"eval_samples_per_second": 264.264,
"step": 8925
},
{
"epoch": 36.0,
"learning_rate": 1.4000000000000001e-05,
"loss": 0.0179,
"step": 9180
},
{
"epoch": 36.0,
"eval_accuracy": 0.4829,
"eval_f1_macro": 0.3055,
"eval_gen_len": 2.3621,
"eval_loss": 2.49765682220459,
"eval_precision": 0.3308,
"eval_recall": 0.2991,
"eval_runtime": 18.4231,
"eval_samples_per_second": 264.396,
"step": 9180
},
{
"epoch": 37.0,
"learning_rate": 1.3000000000000001e-05,
"loss": 0.0155,
"step": 9435
},
{
"epoch": 37.0,
"eval_accuracy": 0.4808,
"eval_f1_macro": 0.3086,
"eval_gen_len": 2.3775,
"eval_loss": 2.4960439205169678,
"eval_precision": 0.3245,
"eval_recall": 0.3039,
"eval_runtime": 18.4083,
"eval_samples_per_second": 264.609,
"step": 9435
},
{
"epoch": 38.0,
"learning_rate": 1.2e-05,
"loss": 0.0154,
"step": 9690
},
{
"epoch": 38.0,
"eval_accuracy": 0.4652,
"eval_f1_macro": 0.3047,
"eval_gen_len": 2.3983,
"eval_loss": 2.59112811088562,
"eval_precision": 0.3196,
"eval_recall": 0.2998,
"eval_runtime": 18.406,
"eval_samples_per_second": 264.642,
"step": 9690
},
{
"epoch": 39.0,
"learning_rate": 1.1000000000000001e-05,
"loss": 0.0144,
"step": 9945
},
{
"epoch": 39.0,
"eval_accuracy": 0.473,
"eval_f1_macro": 0.3097,
"eval_gen_len": 2.3634,
"eval_loss": 2.646393299102783,
"eval_precision": 0.3283,
"eval_recall": 0.3046,
"eval_runtime": 18.3793,
"eval_samples_per_second": 265.027,
"step": 9945
},
{
"epoch": 40.0,
"learning_rate": 1e-05,
"loss": 0.0135,
"step": 10200
},
{
"epoch": 40.0,
"eval_accuracy": 0.4695,
"eval_f1_macro": 0.3035,
"eval_gen_len": 2.3812,
"eval_loss": 2.711395740509033,
"eval_precision": 0.3185,
"eval_recall": 0.2989,
"eval_runtime": 18.3802,
"eval_samples_per_second": 265.013,
"step": 10200
},
{
"epoch": 41.0,
"learning_rate": 9e-06,
"loss": 0.0132,
"step": 10455
},
{
"epoch": 41.0,
"eval_accuracy": 0.4707,
"eval_f1_macro": 0.307,
"eval_gen_len": 2.4055,
"eval_loss": 2.706991672515869,
"eval_precision": 0.3218,
"eval_recall": 0.3029,
"eval_runtime": 18.4175,
"eval_samples_per_second": 264.476,
"step": 10455
},
{
"epoch": 42.0,
"learning_rate": 8.000000000000001e-06,
"loss": 0.0113,
"step": 10710
},
{
"epoch": 42.0,
"eval_accuracy": 0.4705,
"eval_f1_macro": 0.3041,
"eval_gen_len": 2.3833,
"eval_loss": 2.7490220069885254,
"eval_precision": 0.3226,
"eval_recall": 0.3009,
"eval_runtime": 18.3891,
"eval_samples_per_second": 264.885,
"step": 10710
},
{
"epoch": 43.0,
"learning_rate": 7.000000000000001e-06,
"loss": 0.0104,
"step": 10965
},
{
"epoch": 43.0,
"eval_accuracy": 0.4707,
"eval_f1_macro": 0.3094,
"eval_gen_len": 2.3868,
"eval_loss": 2.8594679832458496,
"eval_precision": 0.3257,
"eval_recall": 0.304,
"eval_runtime": 18.4068,
"eval_samples_per_second": 264.631,
"step": 10965
},
{
"epoch": 44.0,
"learning_rate": 6e-06,
"loss": 0.0125,
"step": 11220
},
{
"epoch": 44.0,
"eval_accuracy": 0.4705,
"eval_f1_macro": 0.3068,
"eval_gen_len": 2.3921,
"eval_loss": 2.777812957763672,
"eval_precision": 0.3207,
"eval_recall": 0.3034,
"eval_runtime": 18.3628,
"eval_samples_per_second": 265.264,
"step": 11220
},
{
"epoch": 45.0,
"learning_rate": 5e-06,
"loss": 0.0094,
"step": 11475
},
{
"epoch": 45.0,
"eval_accuracy": 0.4753,
"eval_f1_macro": 0.3102,
"eval_gen_len": 2.4036,
"eval_loss": 2.7739901542663574,
"eval_precision": 0.3301,
"eval_recall": 0.3028,
"eval_runtime": 18.4413,
"eval_samples_per_second": 264.135,
"step": 11475
},
{
"epoch": 46.0,
"learning_rate": 4.000000000000001e-06,
"loss": 0.0094,
"step": 11730
},
{
"epoch": 46.0,
"eval_accuracy": 0.4755,
"eval_f1_macro": 0.3122,
"eval_gen_len": 2.3979,
"eval_loss": 2.759046792984009,
"eval_precision": 0.3284,
"eval_recall": 0.3064,
"eval_runtime": 18.4399,
"eval_samples_per_second": 264.155,
"step": 11730
},
{
"epoch": 47.0,
"learning_rate": 3e-06,
"loss": 0.0085,
"step": 11985
},
{
"epoch": 47.0,
"eval_accuracy": 0.4759,
"eval_f1_macro": 0.3092,
"eval_gen_len": 2.3903,
"eval_loss": 2.8234634399414062,
"eval_precision": 0.3248,
"eval_recall": 0.304,
"eval_runtime": 18.4435,
"eval_samples_per_second": 264.104,
"step": 11985
},
{
"epoch": 48.0,
"learning_rate": 2.0000000000000003e-06,
"loss": 0.0087,
"step": 12240
},
{
"epoch": 48.0,
"eval_accuracy": 0.474,
"eval_f1_macro": 0.3097,
"eval_gen_len": 2.3977,
"eval_loss": 2.834430694580078,
"eval_precision": 0.3247,
"eval_recall": 0.3045,
"eval_runtime": 18.4163,
"eval_samples_per_second": 264.494,
"step": 12240
},
{
"epoch": 49.0,
"learning_rate": 1.0000000000000002e-06,
"loss": 0.009,
"step": 12495
},
{
"epoch": 49.0,
"eval_accuracy": 0.4732,
"eval_f1_macro": 0.3081,
"eval_gen_len": 2.3868,
"eval_loss": 2.841893196105957,
"eval_precision": 0.3256,
"eval_recall": 0.3018,
"eval_runtime": 18.4481,
"eval_samples_per_second": 264.037,
"step": 12495
}
],
"max_steps": 12750,
"num_train_epochs": 50,
"total_flos": 1.14107158131029e+17,
"trial_name": null,
"trial_params": null
}