darklorddad's picture
Upload 5 files
d64ef1b verified
{
"best_metric": 0.3975684940814972,
"best_model_checkpoint": "Model-Swin-Transformer-\\checkpoint-1275",
"epoch": 25.0,
"eval_steps": 7,
"global_step": 1275,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.1390728476821192,
"grad_norm": 3.756645917892456,
"learning_rate": 7.000000000000001e-07,
"loss": 5.3644,
"step": 7
},
{
"epoch": 0.2781456953642384,
"grad_norm": 3.558716297149658,
"learning_rate": 1.4000000000000001e-06,
"loss": 5.3658,
"step": 14
},
{
"epoch": 0.41721854304635764,
"grad_norm": 3.7727837562561035,
"learning_rate": 2.1000000000000002e-06,
"loss": 5.3463,
"step": 21
},
{
"epoch": 0.5562913907284768,
"grad_norm": 3.529733896255493,
"learning_rate": 2.8000000000000003e-06,
"loss": 5.3343,
"step": 28
},
{
"epoch": 0.695364238410596,
"grad_norm": 4.100412368774414,
"learning_rate": 3.5000000000000004e-06,
"loss": 5.3713,
"step": 35
},
{
"epoch": 0.8344370860927153,
"grad_norm": 4.024387836456299,
"learning_rate": 4.2000000000000004e-06,
"loss": 5.3528,
"step": 42
},
{
"epoch": 0.9735099337748344,
"grad_norm": 3.3524274826049805,
"learning_rate": 4.9000000000000005e-06,
"loss": 5.3086,
"step": 49
},
{
"epoch": 1.0,
"eval_accuracy": 0.00819672131147541,
"eval_f1_macro": 0.004708259390712807,
"eval_f1_micro": 0.00819672131147541,
"eval_f1_weighted": 0.005191171978413603,
"eval_loss": 5.3028178215026855,
"eval_precision_macro": 0.004008928571428571,
"eval_precision_micro": 0.00819672131147541,
"eval_precision_weighted": 0.004471604215456675,
"eval_recall_macro": 0.007458333333333333,
"eval_recall_micro": 0.00819672131147541,
"eval_recall_weighted": 0.00819672131147541,
"eval_runtime": 18.3225,
"eval_samples_per_second": 66.585,
"eval_steps_per_second": 1.092,
"step": 51
},
{
"epoch": 1.099337748344371,
"grad_norm": 3.333894729614258,
"learning_rate": 5.600000000000001e-06,
"loss": 4.7989,
"step": 56
},
{
"epoch": 1.23841059602649,
"grad_norm": 3.226445198059082,
"learning_rate": 6.300000000000001e-06,
"loss": 5.2748,
"step": 63
},
{
"epoch": 1.3774834437086092,
"grad_norm": 2.960792303085327,
"learning_rate": 7.000000000000001e-06,
"loss": 5.2799,
"step": 70
},
{
"epoch": 1.5165562913907285,
"grad_norm": 2.967210054397583,
"learning_rate": 7.7e-06,
"loss": 5.2765,
"step": 77
},
{
"epoch": 1.6556291390728477,
"grad_norm": 2.8297080993652344,
"learning_rate": 8.400000000000001e-06,
"loss": 5.2309,
"step": 84
},
{
"epoch": 1.794701986754967,
"grad_norm": 3.001478433609009,
"learning_rate": 9.100000000000001e-06,
"loss": 5.2076,
"step": 91
},
{
"epoch": 1.9337748344370862,
"grad_norm": 3.130455493927002,
"learning_rate": 9.800000000000001e-06,
"loss": 5.1799,
"step": 98
},
{
"epoch": 2.0,
"eval_accuracy": 0.031967213114754096,
"eval_f1_macro": 0.02138895151975647,
"eval_f1_micro": 0.031967213114754096,
"eval_f1_weighted": 0.022752398833071827,
"eval_loss": 5.114638805389404,
"eval_precision_macro": 0.02439267581077926,
"eval_precision_micro": 0.031967213114754096,
"eval_precision_weighted": 0.026138166290088277,
"eval_recall_macro": 0.030029761904761903,
"eval_recall_micro": 0.031967213114754096,
"eval_recall_weighted": 0.031967213114754096,
"eval_runtime": 19.7575,
"eval_samples_per_second": 61.749,
"eval_steps_per_second": 1.012,
"step": 102
},
{
"epoch": 2.0596026490066226,
"grad_norm": 3.9784674644470215,
"learning_rate": 1.05e-05,
"loss": 4.6593,
"step": 105
},
{
"epoch": 2.198675496688742,
"grad_norm": 4.487362861633301,
"learning_rate": 1.1200000000000001e-05,
"loss": 5.1011,
"step": 112
},
{
"epoch": 2.337748344370861,
"grad_norm": 3.840144157409668,
"learning_rate": 1.19e-05,
"loss": 5.0346,
"step": 119
},
{
"epoch": 2.47682119205298,
"grad_norm": 4.5351409912109375,
"learning_rate": 1.2600000000000001e-05,
"loss": 4.993,
"step": 126
},
{
"epoch": 2.6158940397350996,
"grad_norm": 5.248815059661865,
"learning_rate": 1.3300000000000001e-05,
"loss": 4.9318,
"step": 133
},
{
"epoch": 2.7549668874172184,
"grad_norm": 5.679067611694336,
"learning_rate": 1.4000000000000001e-05,
"loss": 4.8848,
"step": 140
},
{
"epoch": 2.8940397350993377,
"grad_norm": 5.753520488739014,
"learning_rate": 1.47e-05,
"loss": 4.7788,
"step": 147
},
{
"epoch": 3.0,
"eval_accuracy": 0.20163934426229507,
"eval_f1_macro": 0.15865115752245537,
"eval_f1_micro": 0.20163934426229507,
"eval_f1_weighted": 0.16819957965187343,
"eval_loss": 4.359624862670898,
"eval_precision_macro": 0.2004175752293943,
"eval_precision_micro": 0.20163934426229507,
"eval_precision_weighted": 0.21056413625270873,
"eval_recall_macro": 0.18860714285714286,
"eval_recall_micro": 0.20163934426229507,
"eval_recall_weighted": 0.20163934426229507,
"eval_runtime": 16.799,
"eval_samples_per_second": 72.623,
"eval_steps_per_second": 1.191,
"step": 153
},
{
"epoch": 3.019867549668874,
"grad_norm": 7.467461585998535,
"learning_rate": 1.54e-05,
"loss": 4.1979,
"step": 154
},
{
"epoch": 3.1589403973509933,
"grad_norm": 8.049297332763672,
"learning_rate": 1.6100000000000002e-05,
"loss": 4.3989,
"step": 161
},
{
"epoch": 3.2980132450331126,
"grad_norm": 15.375213623046875,
"learning_rate": 1.6800000000000002e-05,
"loss": 4.2083,
"step": 168
},
{
"epoch": 3.437086092715232,
"grad_norm": 9.002079963684082,
"learning_rate": 1.75e-05,
"loss": 4.0218,
"step": 175
},
{
"epoch": 3.576158940397351,
"grad_norm": 9.430607795715332,
"learning_rate": 1.8200000000000002e-05,
"loss": 3.8887,
"step": 182
},
{
"epoch": 3.7152317880794703,
"grad_norm": 8.839299201965332,
"learning_rate": 1.8900000000000002e-05,
"loss": 3.6825,
"step": 189
},
{
"epoch": 3.8543046357615895,
"grad_norm": 10.325126647949219,
"learning_rate": 1.9600000000000002e-05,
"loss": 3.5607,
"step": 196
},
{
"epoch": 3.993377483443709,
"grad_norm": 10.668830871582031,
"learning_rate": 2.0300000000000002e-05,
"loss": 3.3596,
"step": 203
},
{
"epoch": 4.0,
"eval_accuracy": 0.4819672131147541,
"eval_f1_macro": 0.4183671558365781,
"eval_f1_micro": 0.4819672131147541,
"eval_f1_weighted": 0.4325206736160847,
"eval_loss": 2.7524967193603516,
"eval_precision_macro": 0.4532485780774093,
"eval_precision_micro": 0.4819672131147541,
"eval_precision_weighted": 0.4607049565114263,
"eval_recall_macro": 0.4596547619047619,
"eval_recall_micro": 0.4819672131147541,
"eval_recall_weighted": 0.4819672131147541,
"eval_runtime": 21.0215,
"eval_samples_per_second": 58.036,
"eval_steps_per_second": 0.951,
"step": 204
},
{
"epoch": 4.119205298013245,
"grad_norm": 14.15578842163086,
"learning_rate": 2.1e-05,
"loss": 2.792,
"step": 210
},
{
"epoch": 4.258278145695364,
"grad_norm": 13.17058277130127,
"learning_rate": 2.1700000000000002e-05,
"loss": 2.9187,
"step": 217
},
{
"epoch": 4.397350993377484,
"grad_norm": 17.613101959228516,
"learning_rate": 2.2400000000000002e-05,
"loss": 2.8103,
"step": 224
},
{
"epoch": 4.5364238410596025,
"grad_norm": 10.50324821472168,
"learning_rate": 2.3100000000000002e-05,
"loss": 2.6919,
"step": 231
},
{
"epoch": 4.675496688741722,
"grad_norm": 9.514472007751465,
"learning_rate": 2.38e-05,
"loss": 2.4714,
"step": 238
},
{
"epoch": 4.814569536423841,
"grad_norm": 9.27059268951416,
"learning_rate": 2.45e-05,
"loss": 2.4165,
"step": 245
},
{
"epoch": 4.95364238410596,
"grad_norm": 14.103813171386719,
"learning_rate": 2.5200000000000003e-05,
"loss": 2.2583,
"step": 252
},
{
"epoch": 5.0,
"eval_accuracy": 0.6754098360655738,
"eval_f1_macro": 0.6554426723087714,
"eval_f1_micro": 0.6754098360655738,
"eval_f1_weighted": 0.661180283037551,
"eval_loss": 1.6013199090957642,
"eval_precision_macro": 0.7148540158456954,
"eval_precision_micro": 0.6754098360655738,
"eval_precision_weighted": 0.7134497831635958,
"eval_recall_macro": 0.6648869047619047,
"eval_recall_micro": 0.6754098360655738,
"eval_recall_weighted": 0.6754098360655738,
"eval_runtime": 17.8695,
"eval_samples_per_second": 68.273,
"eval_steps_per_second": 1.119,
"step": 255
},
{
"epoch": 5.079470198675497,
"grad_norm": 10.124486923217773,
"learning_rate": 2.5900000000000003e-05,
"loss": 1.7994,
"step": 259
},
{
"epoch": 5.218543046357616,
"grad_norm": 9.2846040725708,
"learning_rate": 2.6600000000000003e-05,
"loss": 1.8769,
"step": 266
},
{
"epoch": 5.357615894039735,
"grad_norm": 9.681060791015625,
"learning_rate": 2.7300000000000003e-05,
"loss": 1.8592,
"step": 273
},
{
"epoch": 5.496688741721854,
"grad_norm": 10.057909965515137,
"learning_rate": 2.8000000000000003e-05,
"loss": 1.7236,
"step": 280
},
{
"epoch": 5.635761589403973,
"grad_norm": 11.661372184753418,
"learning_rate": 2.87e-05,
"loss": 1.7951,
"step": 287
},
{
"epoch": 5.774834437086093,
"grad_norm": 9.344789505004883,
"learning_rate": 2.94e-05,
"loss": 1.6862,
"step": 294
},
{
"epoch": 5.913907284768212,
"grad_norm": 9.517767906188965,
"learning_rate": 3.01e-05,
"loss": 1.5559,
"step": 301
},
{
"epoch": 6.0,
"eval_accuracy": 0.7745901639344263,
"eval_f1_macro": 0.7566554592638184,
"eval_f1_micro": 0.7745901639344263,
"eval_f1_weighted": 0.7613547559463157,
"eval_loss": 1.02675461769104,
"eval_precision_macro": 0.799015755078255,
"eval_precision_micro": 0.7745901639344263,
"eval_precision_weighted": 0.802561029589718,
"eval_recall_macro": 0.767904761904762,
"eval_recall_micro": 0.7745901639344263,
"eval_recall_weighted": 0.7745901639344263,
"eval_runtime": 20.701,
"eval_samples_per_second": 58.934,
"eval_steps_per_second": 0.966,
"step": 306
},
{
"epoch": 6.039735099337748,
"grad_norm": 8.714813232421875,
"learning_rate": 3.08e-05,
"loss": 1.3741,
"step": 308
},
{
"epoch": 6.178807947019868,
"grad_norm": 8.35527515411377,
"learning_rate": 3.15e-05,
"loss": 1.355,
"step": 315
},
{
"epoch": 6.317880794701987,
"grad_norm": 10.128329277038574,
"learning_rate": 3.2200000000000003e-05,
"loss": 1.31,
"step": 322
},
{
"epoch": 6.456953642384106,
"grad_norm": 8.156630516052246,
"learning_rate": 3.29e-05,
"loss": 1.2743,
"step": 329
},
{
"epoch": 6.596026490066225,
"grad_norm": 8.779105186462402,
"learning_rate": 3.3600000000000004e-05,
"loss": 1.3193,
"step": 336
},
{
"epoch": 6.735099337748345,
"grad_norm": 11.118610382080078,
"learning_rate": 3.430000000000001e-05,
"loss": 1.2748,
"step": 343
},
{
"epoch": 6.874172185430464,
"grad_norm": 8.063292503356934,
"learning_rate": 3.5e-05,
"loss": 1.2319,
"step": 350
},
{
"epoch": 7.0,
"grad_norm": 7.127477169036865,
"learning_rate": 3.57e-05,
"loss": 1.0089,
"step": 357
},
{
"epoch": 7.0,
"eval_accuracy": 0.8016393442622951,
"eval_f1_macro": 0.7894899096002038,
"eval_f1_micro": 0.8016393442622951,
"eval_f1_weighted": 0.7904804760759438,
"eval_loss": 0.7927541136741638,
"eval_precision_macro": 0.8299506911217438,
"eval_precision_micro": 0.8016393442622951,
"eval_precision_weighted": 0.8283684745893546,
"eval_recall_macro": 0.7991785714285714,
"eval_recall_micro": 0.8016393442622951,
"eval_recall_weighted": 0.8016393442622951,
"eval_runtime": 19.3393,
"eval_samples_per_second": 63.084,
"eval_steps_per_second": 1.034,
"step": 357
},
{
"epoch": 7.139072847682119,
"grad_norm": 9.581562995910645,
"learning_rate": 3.6400000000000004e-05,
"loss": 1.0836,
"step": 364
},
{
"epoch": 7.2781456953642385,
"grad_norm": 11.660162925720215,
"learning_rate": 3.71e-05,
"loss": 0.9816,
"step": 371
},
{
"epoch": 7.417218543046357,
"grad_norm": 10.361519813537598,
"learning_rate": 3.7800000000000004e-05,
"loss": 1.1386,
"step": 378
},
{
"epoch": 7.556291390728477,
"grad_norm": 8.699442863464355,
"learning_rate": 3.85e-05,
"loss": 0.9964,
"step": 385
},
{
"epoch": 7.695364238410596,
"grad_norm": 9.233181953430176,
"learning_rate": 3.9200000000000004e-05,
"loss": 1.077,
"step": 392
},
{
"epoch": 7.8344370860927155,
"grad_norm": 10.234973907470703,
"learning_rate": 3.99e-05,
"loss": 0.9844,
"step": 399
},
{
"epoch": 7.973509933774834,
"grad_norm": 9.366828918457031,
"learning_rate": 4.0600000000000004e-05,
"loss": 1.0463,
"step": 406
},
{
"epoch": 8.0,
"eval_accuracy": 0.8311475409836065,
"eval_f1_macro": 0.8222988632697376,
"eval_f1_micro": 0.8311475409836065,
"eval_f1_weighted": 0.8253316645805044,
"eval_loss": 0.6437082886695862,
"eval_precision_macro": 0.8575361999111999,
"eval_precision_micro": 0.8311475409836065,
"eval_precision_weighted": 0.8598102466750007,
"eval_recall_macro": 0.8266071428571429,
"eval_recall_micro": 0.8311475409836065,
"eval_recall_weighted": 0.8311475409836065,
"eval_runtime": 16.6963,
"eval_samples_per_second": 73.07,
"eval_steps_per_second": 1.198,
"step": 408
},
{
"epoch": 8.099337748344372,
"grad_norm": 8.740901947021484,
"learning_rate": 4.13e-05,
"loss": 0.8633,
"step": 413
},
{
"epoch": 8.23841059602649,
"grad_norm": 9.237879753112793,
"learning_rate": 4.2e-05,
"loss": 0.978,
"step": 420
},
{
"epoch": 8.37748344370861,
"grad_norm": 9.53095817565918,
"learning_rate": 4.27e-05,
"loss": 0.9132,
"step": 427
},
{
"epoch": 8.516556291390728,
"grad_norm": 9.277670860290527,
"learning_rate": 4.3400000000000005e-05,
"loss": 0.8908,
"step": 434
},
{
"epoch": 8.655629139072847,
"grad_norm": 7.736002445220947,
"learning_rate": 4.41e-05,
"loss": 0.9602,
"step": 441
},
{
"epoch": 8.794701986754967,
"grad_norm": 9.285863876342773,
"learning_rate": 4.4800000000000005e-05,
"loss": 0.8773,
"step": 448
},
{
"epoch": 8.933774834437086,
"grad_norm": 6.872951507568359,
"learning_rate": 4.55e-05,
"loss": 0.8551,
"step": 455
},
{
"epoch": 9.0,
"eval_accuracy": 0.8467213114754099,
"eval_f1_macro": 0.8380145271395273,
"eval_f1_micro": 0.8467213114754099,
"eval_f1_weighted": 0.8412076976650266,
"eval_loss": 0.5831084847450256,
"eval_precision_macro": 0.8702838485044367,
"eval_precision_micro": 0.8467213114754099,
"eval_precision_weighted": 0.8695735086995936,
"eval_recall_macro": 0.8410952380952381,
"eval_recall_micro": 0.8467213114754099,
"eval_recall_weighted": 0.8467213114754099,
"eval_runtime": 17.1069,
"eval_samples_per_second": 71.316,
"eval_steps_per_second": 1.169,
"step": 459
},
{
"epoch": 9.059602649006623,
"grad_norm": 9.55445671081543,
"learning_rate": 4.6200000000000005e-05,
"loss": 0.7727,
"step": 462
},
{
"epoch": 9.198675496688741,
"grad_norm": 10.598060607910156,
"learning_rate": 4.69e-05,
"loss": 0.7759,
"step": 469
},
{
"epoch": 9.33774834437086,
"grad_norm": 9.53549861907959,
"learning_rate": 4.76e-05,
"loss": 0.8016,
"step": 476
},
{
"epoch": 9.47682119205298,
"grad_norm": 8.146626472473145,
"learning_rate": 4.83e-05,
"loss": 0.779,
"step": 483
},
{
"epoch": 9.6158940397351,
"grad_norm": 6.373037815093994,
"learning_rate": 4.9e-05,
"loss": 0.6726,
"step": 490
},
{
"epoch": 9.754966887417218,
"grad_norm": 9.011489868164062,
"learning_rate": 4.97e-05,
"loss": 0.835,
"step": 497
},
{
"epoch": 9.894039735099337,
"grad_norm": 8.79944133758545,
"learning_rate": 4.995555555555556e-05,
"loss": 0.7234,
"step": 504
},
{
"epoch": 10.0,
"eval_accuracy": 0.8672131147540983,
"eval_f1_macro": 0.8578000912486207,
"eval_f1_micro": 0.8672131147540983,
"eval_f1_weighted": 0.8622077336035869,
"eval_loss": 0.5154255628585815,
"eval_precision_macro": 0.8773138528138529,
"eval_precision_micro": 0.8672131147540983,
"eval_precision_weighted": 0.8793111737988787,
"eval_recall_macro": 0.8614821428571429,
"eval_recall_micro": 0.8672131147540983,
"eval_recall_weighted": 0.8672131147540983,
"eval_runtime": 18.8743,
"eval_samples_per_second": 64.638,
"eval_steps_per_second": 1.06,
"step": 510
},
{
"epoch": 10.019867549668874,
"grad_norm": 12.6382474899292,
"learning_rate": 4.987777777777778e-05,
"loss": 0.6376,
"step": 511
},
{
"epoch": 10.158940397350994,
"grad_norm": 7.875304222106934,
"learning_rate": 4.9800000000000004e-05,
"loss": 0.608,
"step": 518
},
{
"epoch": 10.298013245033113,
"grad_norm": 8.565564155578613,
"learning_rate": 4.972222222222223e-05,
"loss": 0.7333,
"step": 525
},
{
"epoch": 10.437086092715232,
"grad_norm": 8.355602264404297,
"learning_rate": 4.964444444444445e-05,
"loss": 0.6883,
"step": 532
},
{
"epoch": 10.57615894039735,
"grad_norm": 8.657296180725098,
"learning_rate": 4.956666666666667e-05,
"loss": 0.7567,
"step": 539
},
{
"epoch": 10.71523178807947,
"grad_norm": 7.550002098083496,
"learning_rate": 4.948888888888889e-05,
"loss": 0.7422,
"step": 546
},
{
"epoch": 10.85430463576159,
"grad_norm": 7.08698844909668,
"learning_rate": 4.9411111111111114e-05,
"loss": 0.7045,
"step": 553
},
{
"epoch": 10.993377483443709,
"grad_norm": 8.84117317199707,
"learning_rate": 4.933333333333334e-05,
"loss": 0.7177,
"step": 560
},
{
"epoch": 11.0,
"eval_accuracy": 0.8745901639344262,
"eval_f1_macro": 0.8674472995614636,
"eval_f1_micro": 0.8745901639344262,
"eval_f1_weighted": 0.869469718145237,
"eval_loss": 0.4802148938179016,
"eval_precision_macro": 0.8951194083694085,
"eval_precision_micro": 0.8745901639344262,
"eval_precision_weighted": 0.8939981489366735,
"eval_recall_macro": 0.8713035714285715,
"eval_recall_micro": 0.8745901639344262,
"eval_recall_weighted": 0.8745901639344262,
"eval_runtime": 17.7202,
"eval_samples_per_second": 68.848,
"eval_steps_per_second": 1.129,
"step": 561
},
{
"epoch": 11.119205298013245,
"grad_norm": 9.191046714782715,
"learning_rate": 4.925555555555556e-05,
"loss": 0.5652,
"step": 567
},
{
"epoch": 11.258278145695364,
"grad_norm": 6.099012851715088,
"learning_rate": 4.917777777777778e-05,
"loss": 0.6823,
"step": 574
},
{
"epoch": 11.397350993377483,
"grad_norm": 7.644286632537842,
"learning_rate": 4.91e-05,
"loss": 0.5704,
"step": 581
},
{
"epoch": 11.536423841059603,
"grad_norm": 7.208959579467773,
"learning_rate": 4.9022222222222224e-05,
"loss": 0.5889,
"step": 588
},
{
"epoch": 11.675496688741722,
"grad_norm": 8.255132675170898,
"learning_rate": 4.894444444444445e-05,
"loss": 0.7361,
"step": 595
},
{
"epoch": 11.814569536423841,
"grad_norm": 8.378829002380371,
"learning_rate": 4.886666666666667e-05,
"loss": 0.6723,
"step": 602
},
{
"epoch": 11.95364238410596,
"grad_norm": 11.726705551147461,
"learning_rate": 4.878888888888889e-05,
"loss": 0.711,
"step": 609
},
{
"epoch": 12.0,
"eval_accuracy": 0.8565573770491803,
"eval_f1_macro": 0.8515204682228676,
"eval_f1_micro": 0.8565573770491803,
"eval_f1_weighted": 0.8536645233724359,
"eval_loss": 0.4757900536060333,
"eval_precision_macro": 0.8826355588855589,
"eval_precision_micro": 0.8565573770491803,
"eval_precision_weighted": 0.8834410489328521,
"eval_recall_macro": 0.8537678571428572,
"eval_recall_micro": 0.8565573770491803,
"eval_recall_weighted": 0.8565573770491803,
"eval_runtime": 18.3018,
"eval_samples_per_second": 66.66,
"eval_steps_per_second": 1.093,
"step": 612
},
{
"epoch": 12.079470198675496,
"grad_norm": 7.42709493637085,
"learning_rate": 4.871111111111111e-05,
"loss": 0.5113,
"step": 616
},
{
"epoch": 12.218543046357617,
"grad_norm": 9.922298431396484,
"learning_rate": 4.8633333333333334e-05,
"loss": 0.6113,
"step": 623
},
{
"epoch": 12.357615894039736,
"grad_norm": 5.806549072265625,
"learning_rate": 4.855555555555556e-05,
"loss": 0.592,
"step": 630
},
{
"epoch": 12.496688741721854,
"grad_norm": 8.837335586547852,
"learning_rate": 4.847777777777778e-05,
"loss": 0.604,
"step": 637
},
{
"epoch": 12.635761589403973,
"grad_norm": 7.156353950500488,
"learning_rate": 4.8400000000000004e-05,
"loss": 0.5203,
"step": 644
},
{
"epoch": 12.774834437086092,
"grad_norm": 8.29881477355957,
"learning_rate": 4.832222222222223e-05,
"loss": 0.6528,
"step": 651
},
{
"epoch": 12.913907284768213,
"grad_norm": 7.490756511688232,
"learning_rate": 4.824444444444445e-05,
"loss": 0.5318,
"step": 658
},
{
"epoch": 13.0,
"eval_accuracy": 0.8672131147540983,
"eval_f1_macro": 0.8627146097366685,
"eval_f1_micro": 0.8672131147540983,
"eval_f1_weighted": 0.8659745527428748,
"eval_loss": 0.4569399952888489,
"eval_precision_macro": 0.8866749639249639,
"eval_precision_micro": 0.8672131147540983,
"eval_precision_weighted": 0.888172740283396,
"eval_recall_macro": 0.8630297619047619,
"eval_recall_micro": 0.8672131147540983,
"eval_recall_weighted": 0.8672131147540983,
"eval_runtime": 17.7039,
"eval_samples_per_second": 68.911,
"eval_steps_per_second": 1.13,
"step": 663
},
{
"epoch": 13.039735099337749,
"grad_norm": 8.017422676086426,
"learning_rate": 4.8166666666666674e-05,
"loss": 0.4727,
"step": 665
},
{
"epoch": 13.178807947019868,
"grad_norm": 7.218966007232666,
"learning_rate": 4.808888888888889e-05,
"loss": 0.5897,
"step": 672
},
{
"epoch": 13.317880794701987,
"grad_norm": 10.478813171386719,
"learning_rate": 4.8011111111111114e-05,
"loss": 0.5473,
"step": 679
},
{
"epoch": 13.456953642384105,
"grad_norm": 6.682877540588379,
"learning_rate": 4.793333333333334e-05,
"loss": 0.5479,
"step": 686
},
{
"epoch": 13.596026490066226,
"grad_norm": 12.535813331604004,
"learning_rate": 4.785555555555556e-05,
"loss": 0.5458,
"step": 693
},
{
"epoch": 13.735099337748345,
"grad_norm": 7.044444561004639,
"learning_rate": 4.7777777777777784e-05,
"loss": 0.5401,
"step": 700
},
{
"epoch": 13.874172185430464,
"grad_norm": 7.247359752655029,
"learning_rate": 4.77e-05,
"loss": 0.5912,
"step": 707
},
{
"epoch": 14.0,
"grad_norm": 6.261186122894287,
"learning_rate": 4.7622222222222224e-05,
"loss": 0.5383,
"step": 714
},
{
"epoch": 14.0,
"eval_accuracy": 0.8786885245901639,
"eval_f1_macro": 0.87375893714129,
"eval_f1_micro": 0.8786885245901639,
"eval_f1_weighted": 0.8755628520466998,
"eval_loss": 0.4514833092689514,
"eval_precision_macro": 0.8954447203123673,
"eval_precision_micro": 0.8786885245901639,
"eval_precision_weighted": 0.8948432423143127,
"eval_recall_macro": 0.8754642857142858,
"eval_recall_micro": 0.8786885245901639,
"eval_recall_weighted": 0.8786885245901639,
"eval_runtime": 18.849,
"eval_samples_per_second": 64.725,
"eval_steps_per_second": 1.061,
"step": 714
},
{
"epoch": 14.139072847682119,
"grad_norm": 8.038314819335938,
"learning_rate": 4.754444444444445e-05,
"loss": 0.4757,
"step": 721
},
{
"epoch": 14.278145695364238,
"grad_norm": 6.933023929595947,
"learning_rate": 4.746666666666667e-05,
"loss": 0.5422,
"step": 728
},
{
"epoch": 14.417218543046358,
"grad_norm": 5.427736282348633,
"learning_rate": 4.7388888888888894e-05,
"loss": 0.45,
"step": 735
},
{
"epoch": 14.556291390728477,
"grad_norm": 7.140816688537598,
"learning_rate": 4.731111111111111e-05,
"loss": 0.4801,
"step": 742
},
{
"epoch": 14.695364238410596,
"grad_norm": 14.195199966430664,
"learning_rate": 4.7233333333333334e-05,
"loss": 0.4627,
"step": 749
},
{
"epoch": 14.834437086092715,
"grad_norm": 6.867580413818359,
"learning_rate": 4.715555555555556e-05,
"loss": 0.4988,
"step": 756
},
{
"epoch": 14.973509933774835,
"grad_norm": 6.6102519035339355,
"learning_rate": 4.707777777777778e-05,
"loss": 0.4884,
"step": 763
},
{
"epoch": 15.0,
"eval_accuracy": 0.8762295081967213,
"eval_f1_macro": 0.8721245222748117,
"eval_f1_micro": 0.8762295081967213,
"eval_f1_weighted": 0.8740396988842932,
"eval_loss": 0.4364243745803833,
"eval_precision_macro": 0.8978099123099124,
"eval_precision_micro": 0.8762295081967213,
"eval_precision_weighted": 0.899852774729824,
"eval_recall_macro": 0.8744226190476191,
"eval_recall_micro": 0.8762295081967213,
"eval_recall_weighted": 0.8762295081967213,
"eval_runtime": 18.6981,
"eval_samples_per_second": 65.247,
"eval_steps_per_second": 1.07,
"step": 765
},
{
"epoch": 15.099337748344372,
"grad_norm": 6.755874156951904,
"learning_rate": 4.7e-05,
"loss": 0.4067,
"step": 770
},
{
"epoch": 15.23841059602649,
"grad_norm": 7.634614944458008,
"learning_rate": 4.692222222222222e-05,
"loss": 0.5742,
"step": 777
},
{
"epoch": 15.37748344370861,
"grad_norm": 4.661431789398193,
"learning_rate": 4.6844444444444444e-05,
"loss": 0.4345,
"step": 784
},
{
"epoch": 15.516556291390728,
"grad_norm": 18.19213104248047,
"learning_rate": 4.676666666666667e-05,
"loss": 0.3906,
"step": 791
},
{
"epoch": 15.655629139072847,
"grad_norm": 9.202508926391602,
"learning_rate": 4.668888888888889e-05,
"loss": 0.4467,
"step": 798
},
{
"epoch": 15.794701986754967,
"grad_norm": 5.7517619132995605,
"learning_rate": 4.6611111111111114e-05,
"loss": 0.4794,
"step": 805
},
{
"epoch": 15.933774834437086,
"grad_norm": 7.352263927459717,
"learning_rate": 4.653333333333334e-05,
"loss": 0.5808,
"step": 812
},
{
"epoch": 16.0,
"eval_accuracy": 0.8844262295081967,
"eval_f1_macro": 0.8804055188447603,
"eval_f1_micro": 0.8844262295081967,
"eval_f1_weighted": 0.8828176785374927,
"eval_loss": 0.42853277921676636,
"eval_precision_macro": 0.9033134920634922,
"eval_precision_micro": 0.8844262295081967,
"eval_precision_weighted": 0.9039139344262296,
"eval_recall_macro": 0.8815000000000001,
"eval_recall_micro": 0.8844262295081967,
"eval_recall_weighted": 0.8844262295081967,
"eval_runtime": 16.8381,
"eval_samples_per_second": 72.455,
"eval_steps_per_second": 1.188,
"step": 816
},
{
"epoch": 16.05960264900662,
"grad_norm": 7.281948089599609,
"learning_rate": 4.645555555555556e-05,
"loss": 0.4086,
"step": 819
},
{
"epoch": 16.198675496688743,
"grad_norm": 6.114387512207031,
"learning_rate": 4.6377777777777784e-05,
"loss": 0.4764,
"step": 826
},
{
"epoch": 16.337748344370862,
"grad_norm": 6.659070014953613,
"learning_rate": 4.630000000000001e-05,
"loss": 0.3795,
"step": 833
},
{
"epoch": 16.47682119205298,
"grad_norm": 4.916147708892822,
"learning_rate": 4.6222222222222224e-05,
"loss": 0.4566,
"step": 840
},
{
"epoch": 16.6158940397351,
"grad_norm": 4.47711706161499,
"learning_rate": 4.614444444444445e-05,
"loss": 0.4853,
"step": 847
},
{
"epoch": 16.75496688741722,
"grad_norm": 3.844993829727173,
"learning_rate": 4.606666666666667e-05,
"loss": 0.4298,
"step": 854
},
{
"epoch": 16.894039735099337,
"grad_norm": 6.387825012207031,
"learning_rate": 4.5988888888888894e-05,
"loss": 0.5004,
"step": 861
},
{
"epoch": 17.0,
"eval_accuracy": 0.8762295081967213,
"eval_f1_macro": 0.8698742597272009,
"eval_f1_micro": 0.8762295081967213,
"eval_f1_weighted": 0.8724110373447403,
"eval_loss": 0.4314015805721283,
"eval_precision_macro": 0.8928979076479078,
"eval_precision_micro": 0.8762295081967213,
"eval_precision_weighted": 0.8934799694840678,
"eval_recall_macro": 0.8727380952380953,
"eval_recall_micro": 0.8762295081967213,
"eval_recall_weighted": 0.8762295081967213,
"eval_runtime": 16.836,
"eval_samples_per_second": 72.464,
"eval_steps_per_second": 1.188,
"step": 867
},
{
"epoch": 17.019867549668874,
"grad_norm": 27.180448532104492,
"learning_rate": 4.591111111111112e-05,
"loss": 0.3922,
"step": 868
},
{
"epoch": 17.158940397350992,
"grad_norm": 7.212271690368652,
"learning_rate": 4.5833333333333334e-05,
"loss": 0.4964,
"step": 875
},
{
"epoch": 17.29801324503311,
"grad_norm": 6.941275119781494,
"learning_rate": 4.575555555555556e-05,
"loss": 0.4749,
"step": 882
},
{
"epoch": 17.437086092715234,
"grad_norm": 5.599315166473389,
"learning_rate": 4.567777777777778e-05,
"loss": 0.4885,
"step": 889
},
{
"epoch": 17.576158940397352,
"grad_norm": 5.130136489868164,
"learning_rate": 4.5600000000000004e-05,
"loss": 0.5264,
"step": 896
},
{
"epoch": 17.71523178807947,
"grad_norm": 6.701182842254639,
"learning_rate": 4.552222222222222e-05,
"loss": 0.4466,
"step": 903
},
{
"epoch": 17.85430463576159,
"grad_norm": 6.988078594207764,
"learning_rate": 4.5444444444444444e-05,
"loss": 0.3975,
"step": 910
},
{
"epoch": 17.99337748344371,
"grad_norm": 21.04884910583496,
"learning_rate": 4.536666666666667e-05,
"loss": 0.3537,
"step": 917
},
{
"epoch": 18.0,
"eval_accuracy": 0.8827868852459017,
"eval_f1_macro": 0.8796409929939343,
"eval_f1_micro": 0.8827868852459017,
"eval_f1_weighted": 0.8807563045280018,
"eval_loss": 0.43711456656455994,
"eval_precision_macro": 0.8961841630591632,
"eval_precision_micro": 0.8827868852459017,
"eval_precision_weighted": 0.8967877250254299,
"eval_recall_macro": 0.8815892857142857,
"eval_recall_micro": 0.8827868852459017,
"eval_recall_weighted": 0.8827868852459017,
"eval_runtime": 16.5565,
"eval_samples_per_second": 73.687,
"eval_steps_per_second": 1.208,
"step": 918
},
{
"epoch": 18.119205298013245,
"grad_norm": 7.078250408172607,
"learning_rate": 4.528888888888889e-05,
"loss": 0.406,
"step": 924
},
{
"epoch": 18.258278145695364,
"grad_norm": 5.898381233215332,
"learning_rate": 4.5211111111111114e-05,
"loss": 0.3619,
"step": 931
},
{
"epoch": 18.397350993377483,
"grad_norm": 7.024068355560303,
"learning_rate": 4.513333333333333e-05,
"loss": 0.4441,
"step": 938
},
{
"epoch": 18.5364238410596,
"grad_norm": 6.673207759857178,
"learning_rate": 4.5055555555555554e-05,
"loss": 0.3495,
"step": 945
},
{
"epoch": 18.67549668874172,
"grad_norm": 7.188521385192871,
"learning_rate": 4.497777777777778e-05,
"loss": 0.424,
"step": 952
},
{
"epoch": 18.814569536423843,
"grad_norm": 13.439776420593262,
"learning_rate": 4.49e-05,
"loss": 0.3988,
"step": 959
},
{
"epoch": 18.95364238410596,
"grad_norm": 6.0843305587768555,
"learning_rate": 4.4822222222222224e-05,
"loss": 0.4362,
"step": 966
},
{
"epoch": 19.0,
"eval_accuracy": 0.8827868852459017,
"eval_f1_macro": 0.8795757517766032,
"eval_f1_micro": 0.8827868852459017,
"eval_f1_weighted": 0.8811126749814384,
"eval_loss": 0.41565409302711487,
"eval_precision_macro": 0.9002148268398268,
"eval_precision_micro": 0.8827868852459017,
"eval_precision_weighted": 0.9000307820594705,
"eval_recall_macro": 0.8807797619047619,
"eval_recall_micro": 0.8827868852459017,
"eval_recall_weighted": 0.8827868852459017,
"eval_runtime": 20.7708,
"eval_samples_per_second": 58.736,
"eval_steps_per_second": 0.963,
"step": 969
},
{
"epoch": 19.079470198675498,
"grad_norm": 5.036433696746826,
"learning_rate": 4.474444444444445e-05,
"loss": 0.3053,
"step": 973
},
{
"epoch": 19.218543046357617,
"grad_norm": 6.565299034118652,
"learning_rate": 4.466666666666667e-05,
"loss": 0.3934,
"step": 980
},
{
"epoch": 19.357615894039736,
"grad_norm": 8.689690589904785,
"learning_rate": 4.4588888888888894e-05,
"loss": 0.4622,
"step": 987
},
{
"epoch": 19.496688741721854,
"grad_norm": 6.253081321716309,
"learning_rate": 4.451111111111112e-05,
"loss": 0.4095,
"step": 994
},
{
"epoch": 19.635761589403973,
"grad_norm": 5.1961846351623535,
"learning_rate": 4.443333333333334e-05,
"loss": 0.3806,
"step": 1001
},
{
"epoch": 19.774834437086092,
"grad_norm": 7.494758129119873,
"learning_rate": 4.435555555555556e-05,
"loss": 0.4382,
"step": 1008
},
{
"epoch": 19.91390728476821,
"grad_norm": 4.928430557250977,
"learning_rate": 4.427777777777778e-05,
"loss": 0.3672,
"step": 1015
},
{
"epoch": 20.0,
"eval_accuracy": 0.8885245901639345,
"eval_f1_macro": 0.8857740416618127,
"eval_f1_micro": 0.8885245901639345,
"eval_f1_weighted": 0.8862372325718009,
"eval_loss": 0.404880166053772,
"eval_precision_macro": 0.9038582528582529,
"eval_precision_micro": 0.8885245901639345,
"eval_precision_weighted": 0.9033163921688513,
"eval_recall_macro": 0.887125,
"eval_recall_micro": 0.8885245901639345,
"eval_recall_weighted": 0.8885245901639345,
"eval_runtime": 31.922,
"eval_samples_per_second": 38.218,
"eval_steps_per_second": 0.627,
"step": 1020
},
{
"epoch": 20.039735099337747,
"grad_norm": 4.6109299659729,
"learning_rate": 4.4200000000000004e-05,
"loss": 0.3098,
"step": 1022
},
{
"epoch": 20.178807947019866,
"grad_norm": 9.729621887207031,
"learning_rate": 4.412222222222223e-05,
"loss": 0.3719,
"step": 1029
},
{
"epoch": 20.31788079470199,
"grad_norm": 5.514610767364502,
"learning_rate": 4.404444444444445e-05,
"loss": 0.3623,
"step": 1036
},
{
"epoch": 20.456953642384107,
"grad_norm": 4.57627534866333,
"learning_rate": 4.396666666666667e-05,
"loss": 0.3866,
"step": 1043
},
{
"epoch": 20.596026490066226,
"grad_norm": 5.22489595413208,
"learning_rate": 4.388888888888889e-05,
"loss": 0.3288,
"step": 1050
},
{
"epoch": 20.735099337748345,
"grad_norm": 5.026643753051758,
"learning_rate": 4.3811111111111114e-05,
"loss": 0.3729,
"step": 1057
},
{
"epoch": 20.874172185430464,
"grad_norm": 5.927851676940918,
"learning_rate": 4.373333333333334e-05,
"loss": 0.3909,
"step": 1064
},
{
"epoch": 21.0,
"grad_norm": 2.4257664680480957,
"learning_rate": 4.3655555555555554e-05,
"loss": 0.3431,
"step": 1071
},
{
"epoch": 21.0,
"eval_accuracy": 0.8885245901639345,
"eval_f1_macro": 0.8852515238029943,
"eval_f1_micro": 0.8885245901639345,
"eval_f1_weighted": 0.8881485842454984,
"eval_loss": 0.4021802544593811,
"eval_precision_macro": 0.9078284354534355,
"eval_precision_micro": 0.8885245901639345,
"eval_precision_weighted": 0.9098325740743773,
"eval_recall_macro": 0.885482142857143,
"eval_recall_micro": 0.8885245901639345,
"eval_recall_weighted": 0.8885245901639345,
"eval_runtime": 19.889,
"eval_samples_per_second": 61.341,
"eval_steps_per_second": 1.006,
"step": 1071
},
{
"epoch": 21.13907284768212,
"grad_norm": 4.817611217498779,
"learning_rate": 4.357777777777778e-05,
"loss": 0.3287,
"step": 1078
},
{
"epoch": 21.278145695364238,
"grad_norm": 5.901778697967529,
"learning_rate": 4.35e-05,
"loss": 0.3539,
"step": 1085
},
{
"epoch": 21.417218543046356,
"grad_norm": 6.392418384552002,
"learning_rate": 4.3422222222222224e-05,
"loss": 0.3926,
"step": 1092
},
{
"epoch": 21.556291390728475,
"grad_norm": 14.076611518859863,
"learning_rate": 4.334444444444445e-05,
"loss": 0.3784,
"step": 1099
},
{
"epoch": 21.695364238410598,
"grad_norm": 8.352983474731445,
"learning_rate": 4.3266666666666664e-05,
"loss": 0.3291,
"step": 1106
},
{
"epoch": 21.834437086092716,
"grad_norm": 8.554953575134277,
"learning_rate": 4.318888888888889e-05,
"loss": 0.3553,
"step": 1113
},
{
"epoch": 21.973509933774835,
"grad_norm": 5.422201633453369,
"learning_rate": 4.311111111111111e-05,
"loss": 0.3301,
"step": 1120
},
{
"epoch": 22.0,
"eval_accuracy": 0.8868852459016393,
"eval_f1_macro": 0.8844045750001632,
"eval_f1_micro": 0.8868852459016393,
"eval_f1_weighted": 0.8863481245798025,
"eval_loss": 0.43187016248703003,
"eval_precision_macro": 0.905546176046176,
"eval_precision_micro": 0.8868852459016393,
"eval_precision_weighted": 0.9069275010053698,
"eval_recall_macro": 0.884922619047619,
"eval_recall_micro": 0.8868852459016393,
"eval_recall_weighted": 0.8868852459016393,
"eval_runtime": 16.9877,
"eval_samples_per_second": 71.817,
"eval_steps_per_second": 1.177,
"step": 1122
},
{
"epoch": 22.09933774834437,
"grad_norm": 4.536903381347656,
"learning_rate": 4.3033333333333334e-05,
"loss": 0.3081,
"step": 1127
},
{
"epoch": 22.23841059602649,
"grad_norm": 5.818119525909424,
"learning_rate": 4.295555555555556e-05,
"loss": 0.4447,
"step": 1134
},
{
"epoch": 22.37748344370861,
"grad_norm": 6.355660438537598,
"learning_rate": 4.287777777777778e-05,
"loss": 0.2905,
"step": 1141
},
{
"epoch": 22.516556291390728,
"grad_norm": 3.7836861610412598,
"learning_rate": 4.2800000000000004e-05,
"loss": 0.3363,
"step": 1148
},
{
"epoch": 22.655629139072847,
"grad_norm": 7.677190780639648,
"learning_rate": 4.272222222222223e-05,
"loss": 0.335,
"step": 1155
},
{
"epoch": 22.794701986754966,
"grad_norm": 9.610170364379883,
"learning_rate": 4.264444444444445e-05,
"loss": 0.3224,
"step": 1162
},
{
"epoch": 22.933774834437084,
"grad_norm": 4.4913458824157715,
"learning_rate": 4.2566666666666674e-05,
"loss": 0.3594,
"step": 1169
},
{
"epoch": 23.0,
"eval_accuracy": 0.8909836065573771,
"eval_f1_macro": 0.8853569522153114,
"eval_f1_micro": 0.8909836065573771,
"eval_f1_weighted": 0.8877271844125241,
"eval_loss": 0.415208101272583,
"eval_precision_macro": 0.9054862637362637,
"eval_precision_micro": 0.8909836065573771,
"eval_precision_weighted": 0.9061880492003442,
"eval_recall_macro": 0.8882023809523809,
"eval_recall_micro": 0.8909836065573771,
"eval_recall_weighted": 0.8909836065573771,
"eval_runtime": 21.468,
"eval_samples_per_second": 56.829,
"eval_steps_per_second": 0.932,
"step": 1173
},
{
"epoch": 23.05960264900662,
"grad_norm": 6.537961959838867,
"learning_rate": 4.248888888888889e-05,
"loss": 0.3416,
"step": 1176
},
{
"epoch": 23.198675496688743,
"grad_norm": 4.195661544799805,
"learning_rate": 4.2411111111111114e-05,
"loss": 0.3493,
"step": 1183
},
{
"epoch": 23.337748344370862,
"grad_norm": 6.090582370758057,
"learning_rate": 4.233333333333334e-05,
"loss": 0.2816,
"step": 1190
},
{
"epoch": 23.47682119205298,
"grad_norm": 4.269461154937744,
"learning_rate": 4.225555555555556e-05,
"loss": 0.3461,
"step": 1197
},
{
"epoch": 23.6158940397351,
"grad_norm": 5.240416049957275,
"learning_rate": 4.217777777777778e-05,
"loss": 0.3688,
"step": 1204
},
{
"epoch": 23.75496688741722,
"grad_norm": 3.2008297443389893,
"learning_rate": 4.21e-05,
"loss": 0.2945,
"step": 1211
},
{
"epoch": 23.894039735099337,
"grad_norm": 5.42747163772583,
"learning_rate": 4.2022222222222223e-05,
"loss": 0.365,
"step": 1218
},
{
"epoch": 24.0,
"eval_accuracy": 0.8868852459016393,
"eval_f1_macro": 0.8826740438354216,
"eval_f1_micro": 0.8868852459016393,
"eval_f1_weighted": 0.885225590303497,
"eval_loss": 0.41284599900245667,
"eval_precision_macro": 0.8989790764790766,
"eval_precision_micro": 0.8868852459016393,
"eval_precision_weighted": 0.90095879757765,
"eval_recall_macro": 0.8842916666666667,
"eval_recall_micro": 0.8868852459016393,
"eval_recall_weighted": 0.8868852459016393,
"eval_runtime": 18.0644,
"eval_samples_per_second": 67.536,
"eval_steps_per_second": 1.107,
"step": 1224
},
{
"epoch": 24.019867549668874,
"grad_norm": 4.910553455352783,
"learning_rate": 4.194444444444445e-05,
"loss": 0.27,
"step": 1225
},
{
"epoch": 24.158940397350992,
"grad_norm": 4.917506694793701,
"learning_rate": 4.186666666666667e-05,
"loss": 0.2662,
"step": 1232
},
{
"epoch": 24.29801324503311,
"grad_norm": 6.041675090789795,
"learning_rate": 4.178888888888889e-05,
"loss": 0.3693,
"step": 1239
},
{
"epoch": 24.437086092715234,
"grad_norm": 6.5168776512146,
"learning_rate": 4.171111111111111e-05,
"loss": 0.2868,
"step": 1246
},
{
"epoch": 24.576158940397352,
"grad_norm": 3.36521315574646,
"learning_rate": 4.1633333333333333e-05,
"loss": 0.4133,
"step": 1253
},
{
"epoch": 24.71523178807947,
"grad_norm": 4.277838230133057,
"learning_rate": 4.155555555555556e-05,
"loss": 0.2291,
"step": 1260
},
{
"epoch": 24.85430463576159,
"grad_norm": 5.821409225463867,
"learning_rate": 4.147777777777778e-05,
"loss": 0.2927,
"step": 1267
},
{
"epoch": 24.99337748344371,
"grad_norm": 6.532901763916016,
"learning_rate": 4.14e-05,
"loss": 0.3711,
"step": 1274
},
{
"epoch": 25.0,
"eval_accuracy": 0.8885245901639345,
"eval_f1_macro": 0.8860624318491966,
"eval_f1_micro": 0.8885245901639345,
"eval_f1_weighted": 0.8860652284338398,
"eval_loss": 0.3975684940814972,
"eval_precision_macro": 0.9022628066378067,
"eval_precision_micro": 0.8885245901639345,
"eval_precision_weighted": 0.901722671681688,
"eval_recall_macro": 0.8880000000000001,
"eval_recall_micro": 0.8885245901639345,
"eval_recall_weighted": 0.8885245901639345,
"eval_runtime": 19.8974,
"eval_samples_per_second": 61.315,
"eval_steps_per_second": 1.005,
"step": 1275
}
],
"logging_steps": 7,
"max_steps": 5000,
"num_input_tokens_seen": 0,
"num_train_epochs": 100,
"save_steps": 7,
"stateful_callbacks": {
"EarlyStoppingCallback": {
"args": {
"early_stopping_patience": 5,
"early_stopping_threshold": 0.01
},
"attributes": {
"early_stopping_patience_counter": 5
}
},
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": true
},
"attributes": {}
}
},
"total_flos": 9.44898430624727e+18,
"train_batch_size": 32,
"trial_name": null,
"trial_params": null
}