darklorddad's picture
Upload 13 files
e7ed158 verified
{
"best_metric": 0.6602770090103149,
"best_model_checkpoint": "Model-Focalnet-Base-\\checkpoint-1224",
"epoch": 24.0,
"eval_steps": 7,
"global_step": 1224,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.1390728476821192,
"grad_norm": 2.261648416519165,
"learning_rate": 7.000000000000001e-07,
"loss": 5.3125,
"step": 7
},
{
"epoch": 0.2781456953642384,
"grad_norm": 2.211456537246704,
"learning_rate": 1.4000000000000001e-06,
"loss": 5.3234,
"step": 14
},
{
"epoch": 0.41721854304635764,
"grad_norm": 2.1140072345733643,
"learning_rate": 2.1000000000000002e-06,
"loss": 5.308,
"step": 21
},
{
"epoch": 0.5562913907284768,
"grad_norm": 2.2730369567871094,
"learning_rate": 2.8000000000000003e-06,
"loss": 5.3189,
"step": 28
},
{
"epoch": 0.695364238410596,
"grad_norm": 2.2154030799865723,
"learning_rate": 3.5000000000000004e-06,
"loss": 5.2991,
"step": 35
},
{
"epoch": 0.8344370860927153,
"grad_norm": 1.887474536895752,
"learning_rate": 4.2000000000000004e-06,
"loss": 5.3036,
"step": 42
},
{
"epoch": 0.9735099337748344,
"grad_norm": 2.4402565956115723,
"learning_rate": 4.9000000000000005e-06,
"loss": 5.2965,
"step": 49
},
{
"epoch": 1.0,
"eval_accuracy": 0.00819672131147541,
"eval_f1_macro": 0.00337801133638995,
"eval_f1_micro": 0.00819672131147541,
"eval_f1_weighted": 0.0035373101357153293,
"eval_loss": 5.291137218475342,
"eval_precision_macro": 0.0028020831663437863,
"eval_precision_micro": 0.00819672131147541,
"eval_precision_weighted": 0.002911205169282775,
"eval_recall_macro": 0.008023809523809523,
"eval_recall_micro": 0.00819672131147541,
"eval_recall_weighted": 0.00819672131147541,
"eval_runtime": 65.9936,
"eval_samples_per_second": 18.487,
"eval_steps_per_second": 0.303,
"step": 51
},
{
"epoch": 1.099337748344371,
"grad_norm": 2.17191219329834,
"learning_rate": 5.600000000000001e-06,
"loss": 4.7769,
"step": 56
},
{
"epoch": 1.23841059602649,
"grad_norm": 2.5151634216308594,
"learning_rate": 6.300000000000001e-06,
"loss": 5.2913,
"step": 63
},
{
"epoch": 1.3774834437086092,
"grad_norm": 2.353184938430786,
"learning_rate": 7.000000000000001e-06,
"loss": 5.2925,
"step": 70
},
{
"epoch": 1.5165562913907285,
"grad_norm": 2.138894557952881,
"learning_rate": 7.7e-06,
"loss": 5.2627,
"step": 77
},
{
"epoch": 1.6556291390728477,
"grad_norm": 2.234560012817383,
"learning_rate": 8.400000000000001e-06,
"loss": 5.2627,
"step": 84
},
{
"epoch": 1.794701986754967,
"grad_norm": 2.210279703140259,
"learning_rate": 9.100000000000001e-06,
"loss": 5.2633,
"step": 91
},
{
"epoch": 1.9337748344370862,
"grad_norm": 2.9447083473205566,
"learning_rate": 9.800000000000001e-06,
"loss": 5.2558,
"step": 98
},
{
"epoch": 2.0,
"eval_accuracy": 0.01557377049180328,
"eval_f1_macro": 0.006874468130470725,
"eval_f1_micro": 0.01557377049180328,
"eval_f1_weighted": 0.007013333548104455,
"eval_loss": 5.22589635848999,
"eval_precision_macro": 0.006156529662888035,
"eval_precision_micro": 0.01557377049180328,
"eval_precision_weighted": 0.006207445270776915,
"eval_recall_macro": 0.01500595238095238,
"eval_recall_micro": 0.01557377049180328,
"eval_recall_weighted": 0.01557377049180328,
"eval_runtime": 56.5165,
"eval_samples_per_second": 21.587,
"eval_steps_per_second": 0.354,
"step": 102
},
{
"epoch": 2.0596026490066226,
"grad_norm": 2.3659451007843018,
"learning_rate": 1.05e-05,
"loss": 4.7295,
"step": 105
},
{
"epoch": 2.198675496688742,
"grad_norm": 2.9902284145355225,
"learning_rate": 1.1200000000000001e-05,
"loss": 5.2129,
"step": 112
},
{
"epoch": 2.337748344370861,
"grad_norm": 4.189450740814209,
"learning_rate": 1.19e-05,
"loss": 5.1816,
"step": 119
},
{
"epoch": 2.47682119205298,
"grad_norm": 4.748580455780029,
"learning_rate": 1.2600000000000001e-05,
"loss": 5.1931,
"step": 126
},
{
"epoch": 2.6158940397350996,
"grad_norm": 3.579268455505371,
"learning_rate": 1.3300000000000001e-05,
"loss": 5.164,
"step": 133
},
{
"epoch": 2.7549668874172184,
"grad_norm": 4.054067611694336,
"learning_rate": 1.4000000000000001e-05,
"loss": 5.1483,
"step": 140
},
{
"epoch": 2.8940397350993377,
"grad_norm": 3.84291410446167,
"learning_rate": 1.47e-05,
"loss": 5.1257,
"step": 147
},
{
"epoch": 3.0,
"eval_accuracy": 0.051639344262295085,
"eval_f1_macro": 0.03261693581398834,
"eval_f1_micro": 0.051639344262295085,
"eval_f1_weighted": 0.03516077511642792,
"eval_loss": 5.062410354614258,
"eval_precision_macro": 0.03239256187924794,
"eval_precision_micro": 0.051639344262295085,
"eval_precision_weighted": 0.034858832269066796,
"eval_recall_macro": 0.047523809523809524,
"eval_recall_micro": 0.051639344262295085,
"eval_recall_weighted": 0.051639344262295085,
"eval_runtime": 63.3821,
"eval_samples_per_second": 19.248,
"eval_steps_per_second": 0.316,
"step": 153
},
{
"epoch": 3.019867549668874,
"grad_norm": 4.443902492523193,
"learning_rate": 1.54e-05,
"loss": 4.6257,
"step": 154
},
{
"epoch": 3.1589403973509933,
"grad_norm": 5.521849155426025,
"learning_rate": 1.6100000000000002e-05,
"loss": 5.0162,
"step": 161
},
{
"epoch": 3.2980132450331126,
"grad_norm": 6.407104969024658,
"learning_rate": 1.6800000000000002e-05,
"loss": 4.9824,
"step": 168
},
{
"epoch": 3.437086092715232,
"grad_norm": 5.278021335601807,
"learning_rate": 1.75e-05,
"loss": 4.96,
"step": 175
},
{
"epoch": 3.576158940397351,
"grad_norm": 7.5942182540893555,
"learning_rate": 1.8200000000000002e-05,
"loss": 4.898,
"step": 182
},
{
"epoch": 3.7152317880794703,
"grad_norm": 6.050070285797119,
"learning_rate": 1.8900000000000002e-05,
"loss": 4.8774,
"step": 189
},
{
"epoch": 3.8543046357615895,
"grad_norm": 6.589919567108154,
"learning_rate": 1.9600000000000002e-05,
"loss": 4.7924,
"step": 196
},
{
"epoch": 3.993377483443709,
"grad_norm": 8.232624053955078,
"learning_rate": 2.0300000000000002e-05,
"loss": 4.6994,
"step": 203
},
{
"epoch": 4.0,
"eval_accuracy": 0.11721311475409836,
"eval_f1_macro": 0.08082684992031455,
"eval_f1_micro": 0.11721311475409836,
"eval_f1_weighted": 0.08625745944487533,
"eval_loss": 4.516047477722168,
"eval_precision_macro": 0.0960521320476185,
"eval_precision_micro": 0.11721311475409836,
"eval_precision_weighted": 0.10289070402055932,
"eval_recall_macro": 0.1088095238095238,
"eval_recall_micro": 0.11721311475409836,
"eval_recall_weighted": 0.11721311475409836,
"eval_runtime": 81.067,
"eval_samples_per_second": 15.049,
"eval_steps_per_second": 0.247,
"step": 204
},
{
"epoch": 4.119205298013245,
"grad_norm": 7.674986362457275,
"learning_rate": 2.1e-05,
"loss": 4.1019,
"step": 210
},
{
"epoch": 4.258278145695364,
"grad_norm": 7.877310276031494,
"learning_rate": 2.1700000000000002e-05,
"loss": 4.4153,
"step": 217
},
{
"epoch": 4.397350993377484,
"grad_norm": 9.657820701599121,
"learning_rate": 2.2400000000000002e-05,
"loss": 4.2469,
"step": 224
},
{
"epoch": 4.5364238410596025,
"grad_norm": 11.347479820251465,
"learning_rate": 2.3100000000000002e-05,
"loss": 4.1469,
"step": 231
},
{
"epoch": 4.675496688741722,
"grad_norm": 12.215789794921875,
"learning_rate": 2.38e-05,
"loss": 4.0285,
"step": 238
},
{
"epoch": 4.814569536423841,
"grad_norm": 10.887558937072754,
"learning_rate": 2.45e-05,
"loss": 4.0269,
"step": 245
},
{
"epoch": 4.95364238410596,
"grad_norm": 12.850284576416016,
"learning_rate": 2.5200000000000003e-05,
"loss": 3.7643,
"step": 252
},
{
"epoch": 5.0,
"eval_accuracy": 0.2680327868852459,
"eval_f1_macro": 0.22529862929165922,
"eval_f1_micro": 0.2680327868852459,
"eval_f1_weighted": 0.23166885649403285,
"eval_loss": 3.3234214782714844,
"eval_precision_macro": 0.25987233164420576,
"eval_precision_micro": 0.2680327868852459,
"eval_precision_weighted": 0.26332140193762377,
"eval_recall_macro": 0.2577738095238095,
"eval_recall_micro": 0.2680327868852459,
"eval_recall_weighted": 0.2680327868852459,
"eval_runtime": 77.8338,
"eval_samples_per_second": 15.674,
"eval_steps_per_second": 0.257,
"step": 255
},
{
"epoch": 5.079470198675497,
"grad_norm": 11.808965682983398,
"learning_rate": 2.5900000000000003e-05,
"loss": 3.1734,
"step": 259
},
{
"epoch": 5.218543046357616,
"grad_norm": 17.217893600463867,
"learning_rate": 2.6600000000000003e-05,
"loss": 3.3163,
"step": 266
},
{
"epoch": 5.357615894039735,
"grad_norm": 14.96292495727539,
"learning_rate": 2.7300000000000003e-05,
"loss": 3.3328,
"step": 273
},
{
"epoch": 5.496688741721854,
"grad_norm": 11.553727149963379,
"learning_rate": 2.8000000000000003e-05,
"loss": 3.2036,
"step": 280
},
{
"epoch": 5.635761589403973,
"grad_norm": 12.452818870544434,
"learning_rate": 2.87e-05,
"loss": 3.1867,
"step": 287
},
{
"epoch": 5.774834437086093,
"grad_norm": 13.04163646697998,
"learning_rate": 2.94e-05,
"loss": 3.0558,
"step": 294
},
{
"epoch": 5.913907284768212,
"grad_norm": 12.779662132263184,
"learning_rate": 3.01e-05,
"loss": 2.9603,
"step": 301
},
{
"epoch": 6.0,
"eval_accuracy": 0.40327868852459015,
"eval_f1_macro": 0.3522325245599723,
"eval_f1_micro": 0.40327868852459015,
"eval_f1_weighted": 0.36290227384056034,
"eval_loss": 2.3593220710754395,
"eval_precision_macro": 0.4013160035627141,
"eval_precision_micro": 0.40327868852459015,
"eval_precision_weighted": 0.40977942860114985,
"eval_recall_macro": 0.38851190476190484,
"eval_recall_micro": 0.40327868852459015,
"eval_recall_weighted": 0.40327868852459015,
"eval_runtime": 71.8317,
"eval_samples_per_second": 16.984,
"eval_steps_per_second": 0.278,
"step": 306
},
{
"epoch": 6.039735099337748,
"grad_norm": 13.623518943786621,
"learning_rate": 3.08e-05,
"loss": 2.4678,
"step": 308
},
{
"epoch": 6.178807947019868,
"grad_norm": 13.266014099121094,
"learning_rate": 3.15e-05,
"loss": 2.6213,
"step": 315
},
{
"epoch": 6.317880794701987,
"grad_norm": 13.395142555236816,
"learning_rate": 3.2200000000000003e-05,
"loss": 2.4566,
"step": 322
},
{
"epoch": 6.456953642384106,
"grad_norm": 13.428766250610352,
"learning_rate": 3.29e-05,
"loss": 2.3462,
"step": 329
},
{
"epoch": 6.596026490066225,
"grad_norm": 11.362808227539062,
"learning_rate": 3.3600000000000004e-05,
"loss": 2.3357,
"step": 336
},
{
"epoch": 6.735099337748345,
"grad_norm": 11.982301712036133,
"learning_rate": 3.430000000000001e-05,
"loss": 2.2728,
"step": 343
},
{
"epoch": 6.874172185430464,
"grad_norm": 15.563032150268555,
"learning_rate": 3.5e-05,
"loss": 2.3091,
"step": 350
},
{
"epoch": 7.0,
"grad_norm": 10.777310371398926,
"learning_rate": 3.57e-05,
"loss": 1.9475,
"step": 357
},
{
"epoch": 7.0,
"eval_accuracy": 0.5336065573770492,
"eval_f1_macro": 0.5010502512573436,
"eval_f1_micro": 0.5336065573770492,
"eval_f1_weighted": 0.5078295641241183,
"eval_loss": 1.7169982194900513,
"eval_precision_macro": 0.570199926363626,
"eval_precision_micro": 0.5336065573770492,
"eval_precision_weighted": 0.5742672096804716,
"eval_recall_macro": 0.5233749999999999,
"eval_recall_micro": 0.5336065573770492,
"eval_recall_weighted": 0.5336065573770492,
"eval_runtime": 63.8109,
"eval_samples_per_second": 19.119,
"eval_steps_per_second": 0.313,
"step": 357
},
{
"epoch": 7.139072847682119,
"grad_norm": 12.829914093017578,
"learning_rate": 3.6400000000000004e-05,
"loss": 1.9122,
"step": 364
},
{
"epoch": 7.2781456953642385,
"grad_norm": 15.254327774047852,
"learning_rate": 3.71e-05,
"loss": 1.9511,
"step": 371
},
{
"epoch": 7.417218543046357,
"grad_norm": 13.248723030090332,
"learning_rate": 3.7800000000000004e-05,
"loss": 1.921,
"step": 378
},
{
"epoch": 7.556291390728477,
"grad_norm": 14.405394554138184,
"learning_rate": 3.85e-05,
"loss": 1.8447,
"step": 385
},
{
"epoch": 7.695364238410596,
"grad_norm": 13.432222366333008,
"learning_rate": 3.9200000000000004e-05,
"loss": 1.7079,
"step": 392
},
{
"epoch": 7.8344370860927155,
"grad_norm": 13.591761589050293,
"learning_rate": 3.99e-05,
"loss": 1.7888,
"step": 399
},
{
"epoch": 7.973509933774834,
"grad_norm": 12.760810852050781,
"learning_rate": 4.0600000000000004e-05,
"loss": 1.8494,
"step": 406
},
{
"epoch": 8.0,
"eval_accuracy": 0.6360655737704918,
"eval_f1_macro": 0.6127611312020431,
"eval_f1_micro": 0.6360655737704918,
"eval_f1_weighted": 0.6178432613234403,
"eval_loss": 1.343964695930481,
"eval_precision_macro": 0.6623227605727605,
"eval_precision_micro": 0.6360655737704918,
"eval_precision_weighted": 0.6653364258692127,
"eval_recall_macro": 0.6304523809523809,
"eval_recall_micro": 0.6360655737704918,
"eval_recall_weighted": 0.6360655737704918,
"eval_runtime": 79.4787,
"eval_samples_per_second": 15.35,
"eval_steps_per_second": 0.252,
"step": 408
},
{
"epoch": 8.099337748344372,
"grad_norm": 11.729964256286621,
"learning_rate": 4.13e-05,
"loss": 1.4108,
"step": 413
},
{
"epoch": 8.23841059602649,
"grad_norm": 12.144929885864258,
"learning_rate": 4.2e-05,
"loss": 1.5489,
"step": 420
},
{
"epoch": 8.37748344370861,
"grad_norm": 13.483667373657227,
"learning_rate": 4.27e-05,
"loss": 1.5863,
"step": 427
},
{
"epoch": 8.516556291390728,
"grad_norm": 16.043304443359375,
"learning_rate": 4.3400000000000005e-05,
"loss": 1.4405,
"step": 434
},
{
"epoch": 8.655629139072847,
"grad_norm": 15.305998802185059,
"learning_rate": 4.41e-05,
"loss": 1.4753,
"step": 441
},
{
"epoch": 8.794701986754967,
"grad_norm": 13.507715225219727,
"learning_rate": 4.4800000000000005e-05,
"loss": 1.4817,
"step": 448
},
{
"epoch": 8.933774834437086,
"grad_norm": 13.252425193786621,
"learning_rate": 4.55e-05,
"loss": 1.5227,
"step": 455
},
{
"epoch": 9.0,
"eval_accuracy": 0.6786885245901639,
"eval_f1_macro": 0.6571807258516,
"eval_f1_micro": 0.6786885245901639,
"eval_f1_weighted": 0.6634537879698879,
"eval_loss": 1.1470587253570557,
"eval_precision_macro": 0.7084700165031047,
"eval_precision_micro": 0.6786885245901639,
"eval_precision_weighted": 0.7094984540397994,
"eval_recall_macro": 0.6691130952380953,
"eval_recall_micro": 0.6786885245901639,
"eval_recall_weighted": 0.6786885245901639,
"eval_runtime": 69.8414,
"eval_samples_per_second": 17.468,
"eval_steps_per_second": 0.286,
"step": 459
},
{
"epoch": 9.059602649006623,
"grad_norm": 11.350573539733887,
"learning_rate": 4.6200000000000005e-05,
"loss": 1.352,
"step": 462
},
{
"epoch": 9.198675496688741,
"grad_norm": 11.896257400512695,
"learning_rate": 4.69e-05,
"loss": 1.2096,
"step": 469
},
{
"epoch": 9.33774834437086,
"grad_norm": 14.927756309509277,
"learning_rate": 4.76e-05,
"loss": 1.3018,
"step": 476
},
{
"epoch": 9.47682119205298,
"grad_norm": 14.38377571105957,
"learning_rate": 4.83e-05,
"loss": 1.2997,
"step": 483
},
{
"epoch": 9.6158940397351,
"grad_norm": 10.836702346801758,
"learning_rate": 4.9e-05,
"loss": 1.2053,
"step": 490
},
{
"epoch": 9.754966887417218,
"grad_norm": 13.384648323059082,
"learning_rate": 4.97e-05,
"loss": 1.2461,
"step": 497
},
{
"epoch": 9.894039735099337,
"grad_norm": 12.859415054321289,
"learning_rate": 4.995555555555556e-05,
"loss": 1.2476,
"step": 504
},
{
"epoch": 10.0,
"eval_accuracy": 0.7295081967213115,
"eval_f1_macro": 0.7136993719988303,
"eval_f1_micro": 0.7295081967213115,
"eval_f1_weighted": 0.7185265127973471,
"eval_loss": 0.9676371812820435,
"eval_precision_macro": 0.7655571405718464,
"eval_precision_micro": 0.7295081967213115,
"eval_precision_weighted": 0.7658903327466492,
"eval_recall_macro": 0.7218452380952379,
"eval_recall_micro": 0.7295081967213115,
"eval_recall_weighted": 0.7295081967213115,
"eval_runtime": 64.0687,
"eval_samples_per_second": 19.042,
"eval_steps_per_second": 0.312,
"step": 510
},
{
"epoch": 10.019867549668874,
"grad_norm": 11.940890312194824,
"learning_rate": 4.987777777777778e-05,
"loss": 1.0087,
"step": 511
},
{
"epoch": 10.158940397350994,
"grad_norm": 11.370889663696289,
"learning_rate": 4.9800000000000004e-05,
"loss": 1.0759,
"step": 518
},
{
"epoch": 10.298013245033113,
"grad_norm": 11.712719917297363,
"learning_rate": 4.972222222222223e-05,
"loss": 1.0388,
"step": 525
},
{
"epoch": 10.437086092715232,
"grad_norm": 15.134650230407715,
"learning_rate": 4.964444444444445e-05,
"loss": 1.0933,
"step": 532
},
{
"epoch": 10.57615894039735,
"grad_norm": 11.481903076171875,
"learning_rate": 4.956666666666667e-05,
"loss": 1.0236,
"step": 539
},
{
"epoch": 10.71523178807947,
"grad_norm": 11.978276252746582,
"learning_rate": 4.948888888888889e-05,
"loss": 1.1232,
"step": 546
},
{
"epoch": 10.85430463576159,
"grad_norm": 12.34005355834961,
"learning_rate": 4.9411111111111114e-05,
"loss": 1.0067,
"step": 553
},
{
"epoch": 10.993377483443709,
"grad_norm": 11.154061317443848,
"learning_rate": 4.933333333333334e-05,
"loss": 1.1001,
"step": 560
},
{
"epoch": 11.0,
"eval_accuracy": 0.7385245901639345,
"eval_f1_macro": 0.7282043296830448,
"eval_f1_micro": 0.7385245901639345,
"eval_f1_weighted": 0.732015719256241,
"eval_loss": 0.8772674798965454,
"eval_precision_macro": 0.7795788517038517,
"eval_precision_micro": 0.7385245901639345,
"eval_precision_weighted": 0.7814253801753802,
"eval_recall_macro": 0.733672619047619,
"eval_recall_micro": 0.7385245901639345,
"eval_recall_weighted": 0.7385245901639345,
"eval_runtime": 60.2253,
"eval_samples_per_second": 20.257,
"eval_steps_per_second": 0.332,
"step": 561
},
{
"epoch": 11.119205298013245,
"grad_norm": 8.575409889221191,
"learning_rate": 4.925555555555556e-05,
"loss": 0.8726,
"step": 567
},
{
"epoch": 11.258278145695364,
"grad_norm": 12.448003768920898,
"learning_rate": 4.917777777777778e-05,
"loss": 0.9765,
"step": 574
},
{
"epoch": 11.397350993377483,
"grad_norm": 10.99142837524414,
"learning_rate": 4.91e-05,
"loss": 0.8438,
"step": 581
},
{
"epoch": 11.536423841059603,
"grad_norm": 9.985913276672363,
"learning_rate": 4.9022222222222224e-05,
"loss": 0.863,
"step": 588
},
{
"epoch": 11.675496688741722,
"grad_norm": 14.102209091186523,
"learning_rate": 4.894444444444445e-05,
"loss": 0.9674,
"step": 595
},
{
"epoch": 11.814569536423841,
"grad_norm": 10.937699317932129,
"learning_rate": 4.886666666666667e-05,
"loss": 0.9521,
"step": 602
},
{
"epoch": 11.95364238410596,
"grad_norm": 10.190333366394043,
"learning_rate": 4.878888888888889e-05,
"loss": 0.8804,
"step": 609
},
{
"epoch": 12.0,
"eval_accuracy": 0.759016393442623,
"eval_f1_macro": 0.7427500998456881,
"eval_f1_micro": 0.759016393442623,
"eval_f1_weighted": 0.7456563548213297,
"eval_loss": 0.8271353840827942,
"eval_precision_macro": 0.7684717300243616,
"eval_precision_micro": 0.759016393442623,
"eval_precision_weighted": 0.7719776994647571,
"eval_recall_macro": 0.756702380952381,
"eval_recall_micro": 0.759016393442623,
"eval_recall_weighted": 0.759016393442623,
"eval_runtime": 58.6516,
"eval_samples_per_second": 20.801,
"eval_steps_per_second": 0.341,
"step": 612
},
{
"epoch": 12.079470198675496,
"grad_norm": 14.1576509475708,
"learning_rate": 4.871111111111111e-05,
"loss": 0.719,
"step": 616
},
{
"epoch": 12.218543046357617,
"grad_norm": 11.829643249511719,
"learning_rate": 4.8633333333333334e-05,
"loss": 0.9113,
"step": 623
},
{
"epoch": 12.357615894039736,
"grad_norm": 9.620296478271484,
"learning_rate": 4.855555555555556e-05,
"loss": 0.8671,
"step": 630
},
{
"epoch": 12.496688741721854,
"grad_norm": 10.44937801361084,
"learning_rate": 4.847777777777778e-05,
"loss": 0.8422,
"step": 637
},
{
"epoch": 12.635761589403973,
"grad_norm": 7.808290958404541,
"learning_rate": 4.8400000000000004e-05,
"loss": 0.8018,
"step": 644
},
{
"epoch": 12.774834437086092,
"grad_norm": 9.790284156799316,
"learning_rate": 4.832222222222223e-05,
"loss": 0.8626,
"step": 651
},
{
"epoch": 12.913907284768213,
"grad_norm": 12.296673774719238,
"learning_rate": 4.824444444444445e-05,
"loss": 0.9596,
"step": 658
},
{
"epoch": 13.0,
"eval_accuracy": 0.7622950819672131,
"eval_f1_macro": 0.7541482304589116,
"eval_f1_micro": 0.7622950819672131,
"eval_f1_weighted": 0.7581034870800643,
"eval_loss": 0.8282718062400818,
"eval_precision_macro": 0.7943097392803276,
"eval_precision_micro": 0.7622950819672131,
"eval_precision_weighted": 0.7971667340748826,
"eval_recall_macro": 0.7580535714285713,
"eval_recall_micro": 0.7622950819672131,
"eval_recall_weighted": 0.7622950819672131,
"eval_runtime": 59.927,
"eval_samples_per_second": 20.358,
"eval_steps_per_second": 0.334,
"step": 663
},
{
"epoch": 13.039735099337749,
"grad_norm": 18.717695236206055,
"learning_rate": 4.8166666666666674e-05,
"loss": 0.7906,
"step": 665
},
{
"epoch": 13.178807947019868,
"grad_norm": 14.046932220458984,
"learning_rate": 4.808888888888889e-05,
"loss": 0.7326,
"step": 672
},
{
"epoch": 13.317880794701987,
"grad_norm": 11.162008285522461,
"learning_rate": 4.8011111111111114e-05,
"loss": 0.8299,
"step": 679
},
{
"epoch": 13.456953642384105,
"grad_norm": 9.34903335571289,
"learning_rate": 4.793333333333334e-05,
"loss": 0.7046,
"step": 686
},
{
"epoch": 13.596026490066226,
"grad_norm": 8.978596687316895,
"learning_rate": 4.785555555555556e-05,
"loss": 0.672,
"step": 693
},
{
"epoch": 13.735099337748345,
"grad_norm": 9.649175643920898,
"learning_rate": 4.7777777777777784e-05,
"loss": 0.7706,
"step": 700
},
{
"epoch": 13.874172185430464,
"grad_norm": 9.140443801879883,
"learning_rate": 4.77e-05,
"loss": 0.7734,
"step": 707
},
{
"epoch": 14.0,
"grad_norm": 6.996921062469482,
"learning_rate": 4.7622222222222224e-05,
"loss": 0.6202,
"step": 714
},
{
"epoch": 14.0,
"eval_accuracy": 0.7754098360655738,
"eval_f1_macro": 0.765805670364494,
"eval_f1_micro": 0.7754098360655738,
"eval_f1_weighted": 0.7695095891286827,
"eval_loss": 0.7957718372344971,
"eval_precision_macro": 0.8098741258741259,
"eval_precision_micro": 0.7754098360655738,
"eval_precision_weighted": 0.8099575401829501,
"eval_recall_macro": 0.769452380952381,
"eval_recall_micro": 0.7754098360655738,
"eval_recall_weighted": 0.7754098360655738,
"eval_runtime": 58.8988,
"eval_samples_per_second": 20.713,
"eval_steps_per_second": 0.34,
"step": 714
},
{
"epoch": 14.139072847682119,
"grad_norm": 11.548070907592773,
"learning_rate": 4.754444444444445e-05,
"loss": 0.7968,
"step": 721
},
{
"epoch": 14.278145695364238,
"grad_norm": 11.0925874710083,
"learning_rate": 4.746666666666667e-05,
"loss": 0.6864,
"step": 728
},
{
"epoch": 14.417218543046358,
"grad_norm": 9.538455963134766,
"learning_rate": 4.7388888888888894e-05,
"loss": 0.6766,
"step": 735
},
{
"epoch": 14.556291390728477,
"grad_norm": 7.995402812957764,
"learning_rate": 4.731111111111111e-05,
"loss": 0.7023,
"step": 742
},
{
"epoch": 14.695364238410596,
"grad_norm": 10.825759887695312,
"learning_rate": 4.7233333333333334e-05,
"loss": 0.6883,
"step": 749
},
{
"epoch": 14.834437086092715,
"grad_norm": 14.279191017150879,
"learning_rate": 4.715555555555556e-05,
"loss": 0.6533,
"step": 756
},
{
"epoch": 14.973509933774835,
"grad_norm": 8.562923431396484,
"learning_rate": 4.707777777777778e-05,
"loss": 0.6466,
"step": 763
},
{
"epoch": 15.0,
"eval_accuracy": 0.7967213114754098,
"eval_f1_macro": 0.7874462737947056,
"eval_f1_micro": 0.7967213114754098,
"eval_f1_weighted": 0.7923798470661948,
"eval_loss": 0.7445575594902039,
"eval_precision_macro": 0.8216799295475766,
"eval_precision_micro": 0.7967213114754098,
"eval_precision_weighted": 0.8259746225862427,
"eval_recall_macro": 0.7922261904761905,
"eval_recall_micro": 0.7967213114754098,
"eval_recall_weighted": 0.7967213114754098,
"eval_runtime": 59.8003,
"eval_samples_per_second": 20.401,
"eval_steps_per_second": 0.334,
"step": 765
},
{
"epoch": 15.099337748344372,
"grad_norm": 9.65889835357666,
"learning_rate": 4.7e-05,
"loss": 0.6024,
"step": 770
},
{
"epoch": 15.23841059602649,
"grad_norm": 8.170406341552734,
"learning_rate": 4.692222222222222e-05,
"loss": 0.5263,
"step": 777
},
{
"epoch": 15.37748344370861,
"grad_norm": 8.782620429992676,
"learning_rate": 4.6844444444444444e-05,
"loss": 0.552,
"step": 784
},
{
"epoch": 15.516556291390728,
"grad_norm": 11.878396034240723,
"learning_rate": 4.676666666666667e-05,
"loss": 0.6127,
"step": 791
},
{
"epoch": 15.655629139072847,
"grad_norm": 8.88171672821045,
"learning_rate": 4.668888888888889e-05,
"loss": 0.6756,
"step": 798
},
{
"epoch": 15.794701986754967,
"grad_norm": 11.983383178710938,
"learning_rate": 4.6611111111111114e-05,
"loss": 0.664,
"step": 805
},
{
"epoch": 15.933774834437086,
"grad_norm": 10.409689903259277,
"learning_rate": 4.653333333333334e-05,
"loss": 0.6436,
"step": 812
},
{
"epoch": 16.0,
"eval_accuracy": 0.7918032786885246,
"eval_f1_macro": 0.7815447427921685,
"eval_f1_micro": 0.7918032786885246,
"eval_f1_weighted": 0.7856156314459259,
"eval_loss": 0.7297011017799377,
"eval_precision_macro": 0.8101799866799867,
"eval_precision_micro": 0.7918032786885246,
"eval_precision_weighted": 0.8123722907329464,
"eval_recall_macro": 0.7866488095238096,
"eval_recall_micro": 0.7918032786885246,
"eval_recall_weighted": 0.7918032786885246,
"eval_runtime": 60.0895,
"eval_samples_per_second": 20.303,
"eval_steps_per_second": 0.333,
"step": 816
},
{
"epoch": 16.05960264900662,
"grad_norm": 10.903715133666992,
"learning_rate": 4.645555555555556e-05,
"loss": 0.5591,
"step": 819
},
{
"epoch": 16.198675496688743,
"grad_norm": 8.767610549926758,
"learning_rate": 4.6377777777777784e-05,
"loss": 0.5711,
"step": 826
},
{
"epoch": 16.337748344370862,
"grad_norm": 8.273555755615234,
"learning_rate": 4.630000000000001e-05,
"loss": 0.5811,
"step": 833
},
{
"epoch": 16.47682119205298,
"grad_norm": 12.013016700744629,
"learning_rate": 4.6222222222222224e-05,
"loss": 0.6443,
"step": 840
},
{
"epoch": 16.6158940397351,
"grad_norm": 7.874364376068115,
"learning_rate": 4.614444444444445e-05,
"loss": 0.5073,
"step": 847
},
{
"epoch": 16.75496688741722,
"grad_norm": 9.01498031616211,
"learning_rate": 4.606666666666667e-05,
"loss": 0.608,
"step": 854
},
{
"epoch": 16.894039735099337,
"grad_norm": 9.848909378051758,
"learning_rate": 4.5988888888888894e-05,
"loss": 0.5929,
"step": 861
},
{
"epoch": 17.0,
"eval_accuracy": 0.7959016393442623,
"eval_f1_macro": 0.7867938321138785,
"eval_f1_micro": 0.7959016393442623,
"eval_f1_weighted": 0.7917754148114372,
"eval_loss": 0.7077643871307373,
"eval_precision_macro": 0.8185556526806528,
"eval_precision_micro": 0.7959016393442623,
"eval_precision_weighted": 0.8217451378312034,
"eval_recall_macro": 0.7902916666666667,
"eval_recall_micro": 0.7959016393442623,
"eval_recall_weighted": 0.7959016393442623,
"eval_runtime": 59.7504,
"eval_samples_per_second": 20.418,
"eval_steps_per_second": 0.335,
"step": 867
},
{
"epoch": 17.019867549668874,
"grad_norm": 9.507264137268066,
"learning_rate": 4.591111111111112e-05,
"loss": 0.5247,
"step": 868
},
{
"epoch": 17.158940397350992,
"grad_norm": 7.274167537689209,
"learning_rate": 4.5833333333333334e-05,
"loss": 0.5212,
"step": 875
},
{
"epoch": 17.29801324503311,
"grad_norm": 8.040386199951172,
"learning_rate": 4.575555555555556e-05,
"loss": 0.4957,
"step": 882
},
{
"epoch": 17.437086092715234,
"grad_norm": 10.34827709197998,
"learning_rate": 4.567777777777778e-05,
"loss": 0.4938,
"step": 889
},
{
"epoch": 17.576158940397352,
"grad_norm": 9.062361717224121,
"learning_rate": 4.5600000000000004e-05,
"loss": 0.5341,
"step": 896
},
{
"epoch": 17.71523178807947,
"grad_norm": 7.889723777770996,
"learning_rate": 4.552222222222222e-05,
"loss": 0.5407,
"step": 903
},
{
"epoch": 17.85430463576159,
"grad_norm": 7.329662799835205,
"learning_rate": 4.5444444444444444e-05,
"loss": 0.5344,
"step": 910
},
{
"epoch": 17.99337748344371,
"grad_norm": 10.251781463623047,
"learning_rate": 4.536666666666667e-05,
"loss": 0.5108,
"step": 917
},
{
"epoch": 18.0,
"eval_accuracy": 0.8,
"eval_f1_macro": 0.7904420722323199,
"eval_f1_micro": 0.8,
"eval_f1_weighted": 0.794189161749749,
"eval_loss": 0.7119916081428528,
"eval_precision_macro": 0.8223463203463203,
"eval_precision_micro": 0.8,
"eval_precision_weighted": 0.8258165377427673,
"eval_recall_macro": 0.7962023809523809,
"eval_recall_micro": 0.8,
"eval_recall_weighted": 0.8,
"eval_runtime": 58.9812,
"eval_samples_per_second": 20.685,
"eval_steps_per_second": 0.339,
"step": 918
},
{
"epoch": 18.119205298013245,
"grad_norm": 7.837319374084473,
"learning_rate": 4.528888888888889e-05,
"loss": 0.4401,
"step": 924
},
{
"epoch": 18.258278145695364,
"grad_norm": 7.545521259307861,
"learning_rate": 4.5211111111111114e-05,
"loss": 0.4821,
"step": 931
},
{
"epoch": 18.397350993377483,
"grad_norm": 7.626832962036133,
"learning_rate": 4.513333333333333e-05,
"loss": 0.4991,
"step": 938
},
{
"epoch": 18.5364238410596,
"grad_norm": 7.265345573425293,
"learning_rate": 4.5055555555555554e-05,
"loss": 0.5936,
"step": 945
},
{
"epoch": 18.67549668874172,
"grad_norm": 6.648807525634766,
"learning_rate": 4.497777777777778e-05,
"loss": 0.4418,
"step": 952
},
{
"epoch": 18.814569536423843,
"grad_norm": 6.413826942443848,
"learning_rate": 4.49e-05,
"loss": 0.4185,
"step": 959
},
{
"epoch": 18.95364238410596,
"grad_norm": 9.378252029418945,
"learning_rate": 4.4822222222222224e-05,
"loss": 0.5109,
"step": 966
},
{
"epoch": 19.0,
"eval_accuracy": 0.8106557377049181,
"eval_f1_macro": 0.8023834074422309,
"eval_f1_micro": 0.8106557377049181,
"eval_f1_weighted": 0.8054703936104611,
"eval_loss": 0.671293318271637,
"eval_precision_macro": 0.8325211038961038,
"eval_precision_micro": 0.8106557377049181,
"eval_precision_weighted": 0.8349751023111679,
"eval_recall_macro": 0.8078333333333333,
"eval_recall_micro": 0.8106557377049181,
"eval_recall_weighted": 0.8106557377049181,
"eval_runtime": 60.0408,
"eval_samples_per_second": 20.32,
"eval_steps_per_second": 0.333,
"step": 969
},
{
"epoch": 19.079470198675498,
"grad_norm": 6.43688440322876,
"learning_rate": 4.474444444444445e-05,
"loss": 0.401,
"step": 973
},
{
"epoch": 19.218543046357617,
"grad_norm": 10.133489608764648,
"learning_rate": 4.466666666666667e-05,
"loss": 0.4449,
"step": 980
},
{
"epoch": 19.357615894039736,
"grad_norm": 9.007479667663574,
"learning_rate": 4.4588888888888894e-05,
"loss": 0.5457,
"step": 987
},
{
"epoch": 19.496688741721854,
"grad_norm": 10.912771224975586,
"learning_rate": 4.451111111111112e-05,
"loss": 0.5306,
"step": 994
},
{
"epoch": 19.635761589403973,
"grad_norm": 6.615180492401123,
"learning_rate": 4.443333333333334e-05,
"loss": 0.4925,
"step": 1001
},
{
"epoch": 19.774834437086092,
"grad_norm": 7.076197147369385,
"learning_rate": 4.435555555555556e-05,
"loss": 0.4787,
"step": 1008
},
{
"epoch": 19.91390728476821,
"grad_norm": 7.040290832519531,
"learning_rate": 4.427777777777778e-05,
"loss": 0.4809,
"step": 1015
},
{
"epoch": 20.0,
"eval_accuracy": 0.8139344262295082,
"eval_f1_macro": 0.8081211352716771,
"eval_f1_micro": 0.8139344262295082,
"eval_f1_weighted": 0.8116663019924579,
"eval_loss": 0.6667141914367676,
"eval_precision_macro": 0.8430578726828728,
"eval_precision_micro": 0.8139344262295082,
"eval_precision_weighted": 0.8445454568200469,
"eval_recall_macro": 0.8106190476190476,
"eval_recall_micro": 0.8139344262295082,
"eval_recall_weighted": 0.8139344262295082,
"eval_runtime": 58.7404,
"eval_samples_per_second": 20.769,
"eval_steps_per_second": 0.34,
"step": 1020
},
{
"epoch": 20.039735099337747,
"grad_norm": 9.023087501525879,
"learning_rate": 4.4200000000000004e-05,
"loss": 0.386,
"step": 1022
},
{
"epoch": 20.178807947019866,
"grad_norm": 7.4928178787231445,
"learning_rate": 4.412222222222223e-05,
"loss": 0.4569,
"step": 1029
},
{
"epoch": 20.31788079470199,
"grad_norm": 8.090821266174316,
"learning_rate": 4.404444444444445e-05,
"loss": 0.4778,
"step": 1036
},
{
"epoch": 20.456953642384107,
"grad_norm": 8.650497436523438,
"learning_rate": 4.396666666666667e-05,
"loss": 0.4786,
"step": 1043
},
{
"epoch": 20.596026490066226,
"grad_norm": 6.049080848693848,
"learning_rate": 4.388888888888889e-05,
"loss": 0.4975,
"step": 1050
},
{
"epoch": 20.735099337748345,
"grad_norm": 10.202515602111816,
"learning_rate": 4.3811111111111114e-05,
"loss": 0.4035,
"step": 1057
},
{
"epoch": 20.874172185430464,
"grad_norm": 7.0871429443359375,
"learning_rate": 4.373333333333334e-05,
"loss": 0.4274,
"step": 1064
},
{
"epoch": 21.0,
"grad_norm": 6.111388206481934,
"learning_rate": 4.3655555555555554e-05,
"loss": 0.3576,
"step": 1071
},
{
"epoch": 21.0,
"eval_accuracy": 0.8073770491803278,
"eval_f1_macro": 0.7980818380535872,
"eval_f1_micro": 0.8073770491803278,
"eval_f1_weighted": 0.8027800592784986,
"eval_loss": 0.6649746298789978,
"eval_precision_macro": 0.8290474247974248,
"eval_precision_micro": 0.8073770491803278,
"eval_precision_weighted": 0.8307658143313881,
"eval_recall_macro": 0.8019166666666666,
"eval_recall_micro": 0.8073770491803278,
"eval_recall_weighted": 0.8073770491803278,
"eval_runtime": 60.057,
"eval_samples_per_second": 20.314,
"eval_steps_per_second": 0.333,
"step": 1071
},
{
"epoch": 21.13907284768212,
"grad_norm": 9.059436798095703,
"learning_rate": 4.357777777777778e-05,
"loss": 0.4775,
"step": 1078
},
{
"epoch": 21.278145695364238,
"grad_norm": 9.497885704040527,
"learning_rate": 4.35e-05,
"loss": 0.4531,
"step": 1085
},
{
"epoch": 21.417218543046356,
"grad_norm": 10.471771240234375,
"learning_rate": 4.3422222222222224e-05,
"loss": 0.479,
"step": 1092
},
{
"epoch": 21.556291390728475,
"grad_norm": 6.627233505249023,
"learning_rate": 4.334444444444445e-05,
"loss": 0.4332,
"step": 1099
},
{
"epoch": 21.695364238410598,
"grad_norm": 9.046399116516113,
"learning_rate": 4.3266666666666664e-05,
"loss": 0.4767,
"step": 1106
},
{
"epoch": 21.834437086092716,
"grad_norm": 6.7745513916015625,
"learning_rate": 4.318888888888889e-05,
"loss": 0.5137,
"step": 1113
},
{
"epoch": 21.973509933774835,
"grad_norm": 8.061189651489258,
"learning_rate": 4.311111111111111e-05,
"loss": 0.4877,
"step": 1120
},
{
"epoch": 22.0,
"eval_accuracy": 0.8114754098360656,
"eval_f1_macro": 0.8045914526649821,
"eval_f1_micro": 0.8114754098360656,
"eval_f1_weighted": 0.8078734461991453,
"eval_loss": 0.6778721809387207,
"eval_precision_macro": 0.836376651126651,
"eval_precision_micro": 0.8114754098360656,
"eval_precision_weighted": 0.8366239998617048,
"eval_recall_macro": 0.806404761904762,
"eval_recall_micro": 0.8114754098360656,
"eval_recall_weighted": 0.8114754098360656,
"eval_runtime": 59.185,
"eval_samples_per_second": 20.613,
"eval_steps_per_second": 0.338,
"step": 1122
},
{
"epoch": 22.09933774834437,
"grad_norm": 9.460957527160645,
"learning_rate": 4.3033333333333334e-05,
"loss": 0.4915,
"step": 1127
},
{
"epoch": 22.23841059602649,
"grad_norm": 9.026511192321777,
"learning_rate": 4.295555555555556e-05,
"loss": 0.4157,
"step": 1134
},
{
"epoch": 22.37748344370861,
"grad_norm": 9.733258247375488,
"learning_rate": 4.287777777777778e-05,
"loss": 0.3564,
"step": 1141
},
{
"epoch": 22.516556291390728,
"grad_norm": 9.269991874694824,
"learning_rate": 4.2800000000000004e-05,
"loss": 0.4707,
"step": 1148
},
{
"epoch": 22.655629139072847,
"grad_norm": 7.8387041091918945,
"learning_rate": 4.272222222222223e-05,
"loss": 0.4902,
"step": 1155
},
{
"epoch": 22.794701986754966,
"grad_norm": 10.261953353881836,
"learning_rate": 4.264444444444445e-05,
"loss": 0.4656,
"step": 1162
},
{
"epoch": 22.933774834437084,
"grad_norm": 9.317761421203613,
"learning_rate": 4.2566666666666674e-05,
"loss": 0.4705,
"step": 1169
},
{
"epoch": 23.0,
"eval_accuracy": 0.8131147540983606,
"eval_f1_macro": 0.8073761565232153,
"eval_f1_micro": 0.8131147540983606,
"eval_f1_weighted": 0.8111283224168953,
"eval_loss": 0.6698673963546753,
"eval_precision_macro": 0.8399364801864801,
"eval_precision_micro": 0.8131147540983606,
"eval_precision_weighted": 0.8420730703722508,
"eval_recall_macro": 0.808672619047619,
"eval_recall_micro": 0.8131147540983606,
"eval_recall_weighted": 0.8131147540983606,
"eval_runtime": 59.9539,
"eval_samples_per_second": 20.349,
"eval_steps_per_second": 0.334,
"step": 1173
},
{
"epoch": 23.05960264900662,
"grad_norm": 9.809006690979004,
"learning_rate": 4.248888888888889e-05,
"loss": 0.3833,
"step": 1176
},
{
"epoch": 23.198675496688743,
"grad_norm": 8.9915132522583,
"learning_rate": 4.2411111111111114e-05,
"loss": 0.4552,
"step": 1183
},
{
"epoch": 23.337748344370862,
"grad_norm": 10.036259651184082,
"learning_rate": 4.233333333333334e-05,
"loss": 0.3869,
"step": 1190
},
{
"epoch": 23.47682119205298,
"grad_norm": 10.57496166229248,
"learning_rate": 4.225555555555556e-05,
"loss": 0.4003,
"step": 1197
},
{
"epoch": 23.6158940397351,
"grad_norm": 9.061355590820312,
"learning_rate": 4.217777777777778e-05,
"loss": 0.4654,
"step": 1204
},
{
"epoch": 23.75496688741722,
"grad_norm": 7.108461380004883,
"learning_rate": 4.21e-05,
"loss": 0.4085,
"step": 1211
},
{
"epoch": 23.894039735099337,
"grad_norm": 5.542710781097412,
"learning_rate": 4.2022222222222223e-05,
"loss": 0.4358,
"step": 1218
},
{
"epoch": 24.0,
"eval_accuracy": 0.8262295081967214,
"eval_f1_macro": 0.8156689398492805,
"eval_f1_micro": 0.8262295081967214,
"eval_f1_weighted": 0.8195565714293827,
"eval_loss": 0.6602770090103149,
"eval_precision_macro": 0.8476504329004328,
"eval_precision_micro": 0.8262295081967214,
"eval_precision_weighted": 0.849387256641355,
"eval_recall_macro": 0.8219166666666666,
"eval_recall_micro": 0.8262295081967214,
"eval_recall_weighted": 0.8262295081967214,
"eval_runtime": 58.671,
"eval_samples_per_second": 20.794,
"eval_steps_per_second": 0.341,
"step": 1224
}
],
"logging_steps": 7,
"max_steps": 5000,
"num_input_tokens_seen": 0,
"num_train_epochs": 100,
"save_steps": 7,
"stateful_callbacks": {
"EarlyStoppingCallback": {
"args": {
"early_stopping_patience": 5,
"early_stopping_threshold": 0.01
},
"attributes": {
"early_stopping_patience_counter": 5
}
},
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": true
},
"attributes": {}
}
},
"total_flos": 1.1899638530382496e+19,
"train_batch_size": 32,
"trial_name": null,
"trial_params": null
}