{ "best_metric": 0.6602770090103149, "best_model_checkpoint": "Model-Focalnet-Base-\\checkpoint-1224", "epoch": 24.0, "eval_steps": 7, "global_step": 1224, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.1390728476821192, "grad_norm": 2.261648416519165, "learning_rate": 7.000000000000001e-07, "loss": 5.3125, "step": 7 }, { "epoch": 0.2781456953642384, "grad_norm": 2.211456537246704, "learning_rate": 1.4000000000000001e-06, "loss": 5.3234, "step": 14 }, { "epoch": 0.41721854304635764, "grad_norm": 2.1140072345733643, "learning_rate": 2.1000000000000002e-06, "loss": 5.308, "step": 21 }, { "epoch": 0.5562913907284768, "grad_norm": 2.2730369567871094, "learning_rate": 2.8000000000000003e-06, "loss": 5.3189, "step": 28 }, { "epoch": 0.695364238410596, "grad_norm": 2.2154030799865723, "learning_rate": 3.5000000000000004e-06, "loss": 5.2991, "step": 35 }, { "epoch": 0.8344370860927153, "grad_norm": 1.887474536895752, "learning_rate": 4.2000000000000004e-06, "loss": 5.3036, "step": 42 }, { "epoch": 0.9735099337748344, "grad_norm": 2.4402565956115723, "learning_rate": 4.9000000000000005e-06, "loss": 5.2965, "step": 49 }, { "epoch": 1.0, "eval_accuracy": 0.00819672131147541, "eval_f1_macro": 0.00337801133638995, "eval_f1_micro": 0.00819672131147541, "eval_f1_weighted": 0.0035373101357153293, "eval_loss": 5.291137218475342, "eval_precision_macro": 0.0028020831663437863, "eval_precision_micro": 0.00819672131147541, "eval_precision_weighted": 0.002911205169282775, "eval_recall_macro": 0.008023809523809523, "eval_recall_micro": 0.00819672131147541, "eval_recall_weighted": 0.00819672131147541, "eval_runtime": 65.9936, "eval_samples_per_second": 18.487, "eval_steps_per_second": 0.303, "step": 51 }, { "epoch": 1.099337748344371, "grad_norm": 2.17191219329834, "learning_rate": 5.600000000000001e-06, "loss": 4.7769, "step": 56 }, { "epoch": 1.23841059602649, "grad_norm": 2.5151634216308594, "learning_rate": 6.300000000000001e-06, "loss": 5.2913, "step": 63 }, { "epoch": 1.3774834437086092, "grad_norm": 2.353184938430786, "learning_rate": 7.000000000000001e-06, "loss": 5.2925, "step": 70 }, { "epoch": 1.5165562913907285, "grad_norm": 2.138894557952881, "learning_rate": 7.7e-06, "loss": 5.2627, "step": 77 }, { "epoch": 1.6556291390728477, "grad_norm": 2.234560012817383, "learning_rate": 8.400000000000001e-06, "loss": 5.2627, "step": 84 }, { "epoch": 1.794701986754967, "grad_norm": 2.210279703140259, "learning_rate": 9.100000000000001e-06, "loss": 5.2633, "step": 91 }, { "epoch": 1.9337748344370862, "grad_norm": 2.9447083473205566, "learning_rate": 9.800000000000001e-06, "loss": 5.2558, "step": 98 }, { "epoch": 2.0, "eval_accuracy": 0.01557377049180328, "eval_f1_macro": 0.006874468130470725, "eval_f1_micro": 0.01557377049180328, "eval_f1_weighted": 0.007013333548104455, "eval_loss": 5.22589635848999, "eval_precision_macro": 0.006156529662888035, "eval_precision_micro": 0.01557377049180328, "eval_precision_weighted": 0.006207445270776915, "eval_recall_macro": 0.01500595238095238, "eval_recall_micro": 0.01557377049180328, "eval_recall_weighted": 0.01557377049180328, "eval_runtime": 56.5165, "eval_samples_per_second": 21.587, "eval_steps_per_second": 0.354, "step": 102 }, { "epoch": 2.0596026490066226, "grad_norm": 2.3659451007843018, "learning_rate": 1.05e-05, "loss": 4.7295, "step": 105 }, { "epoch": 2.198675496688742, "grad_norm": 2.9902284145355225, "learning_rate": 1.1200000000000001e-05, "loss": 5.2129, "step": 112 }, { "epoch": 2.337748344370861, "grad_norm": 4.189450740814209, "learning_rate": 1.19e-05, "loss": 5.1816, "step": 119 }, { "epoch": 2.47682119205298, "grad_norm": 4.748580455780029, "learning_rate": 1.2600000000000001e-05, "loss": 5.1931, "step": 126 }, { "epoch": 2.6158940397350996, "grad_norm": 3.579268455505371, "learning_rate": 1.3300000000000001e-05, "loss": 5.164, "step": 133 }, { "epoch": 2.7549668874172184, "grad_norm": 4.054067611694336, "learning_rate": 1.4000000000000001e-05, "loss": 5.1483, "step": 140 }, { "epoch": 2.8940397350993377, "grad_norm": 3.84291410446167, "learning_rate": 1.47e-05, "loss": 5.1257, "step": 147 }, { "epoch": 3.0, "eval_accuracy": 0.051639344262295085, "eval_f1_macro": 0.03261693581398834, "eval_f1_micro": 0.051639344262295085, "eval_f1_weighted": 0.03516077511642792, "eval_loss": 5.062410354614258, "eval_precision_macro": 0.03239256187924794, "eval_precision_micro": 0.051639344262295085, "eval_precision_weighted": 0.034858832269066796, "eval_recall_macro": 0.047523809523809524, "eval_recall_micro": 0.051639344262295085, "eval_recall_weighted": 0.051639344262295085, "eval_runtime": 63.3821, "eval_samples_per_second": 19.248, "eval_steps_per_second": 0.316, "step": 153 }, { "epoch": 3.019867549668874, "grad_norm": 4.443902492523193, "learning_rate": 1.54e-05, "loss": 4.6257, "step": 154 }, { "epoch": 3.1589403973509933, "grad_norm": 5.521849155426025, "learning_rate": 1.6100000000000002e-05, "loss": 5.0162, "step": 161 }, { "epoch": 3.2980132450331126, "grad_norm": 6.407104969024658, "learning_rate": 1.6800000000000002e-05, "loss": 4.9824, "step": 168 }, { "epoch": 3.437086092715232, "grad_norm": 5.278021335601807, "learning_rate": 1.75e-05, "loss": 4.96, "step": 175 }, { "epoch": 3.576158940397351, "grad_norm": 7.5942182540893555, "learning_rate": 1.8200000000000002e-05, "loss": 4.898, "step": 182 }, { "epoch": 3.7152317880794703, "grad_norm": 6.050070285797119, "learning_rate": 1.8900000000000002e-05, "loss": 4.8774, "step": 189 }, { "epoch": 3.8543046357615895, "grad_norm": 6.589919567108154, "learning_rate": 1.9600000000000002e-05, "loss": 4.7924, "step": 196 }, { "epoch": 3.993377483443709, "grad_norm": 8.232624053955078, "learning_rate": 2.0300000000000002e-05, "loss": 4.6994, "step": 203 }, { "epoch": 4.0, "eval_accuracy": 0.11721311475409836, "eval_f1_macro": 0.08082684992031455, "eval_f1_micro": 0.11721311475409836, "eval_f1_weighted": 0.08625745944487533, "eval_loss": 4.516047477722168, "eval_precision_macro": 0.0960521320476185, "eval_precision_micro": 0.11721311475409836, "eval_precision_weighted": 0.10289070402055932, "eval_recall_macro": 0.1088095238095238, "eval_recall_micro": 0.11721311475409836, "eval_recall_weighted": 0.11721311475409836, "eval_runtime": 81.067, "eval_samples_per_second": 15.049, "eval_steps_per_second": 0.247, "step": 204 }, { "epoch": 4.119205298013245, "grad_norm": 7.674986362457275, "learning_rate": 2.1e-05, "loss": 4.1019, "step": 210 }, { "epoch": 4.258278145695364, "grad_norm": 7.877310276031494, "learning_rate": 2.1700000000000002e-05, "loss": 4.4153, "step": 217 }, { "epoch": 4.397350993377484, "grad_norm": 9.657820701599121, "learning_rate": 2.2400000000000002e-05, "loss": 4.2469, "step": 224 }, { "epoch": 4.5364238410596025, "grad_norm": 11.347479820251465, "learning_rate": 2.3100000000000002e-05, "loss": 4.1469, "step": 231 }, { "epoch": 4.675496688741722, "grad_norm": 12.215789794921875, "learning_rate": 2.38e-05, "loss": 4.0285, "step": 238 }, { "epoch": 4.814569536423841, "grad_norm": 10.887558937072754, "learning_rate": 2.45e-05, "loss": 4.0269, "step": 245 }, { "epoch": 4.95364238410596, "grad_norm": 12.850284576416016, "learning_rate": 2.5200000000000003e-05, "loss": 3.7643, "step": 252 }, { "epoch": 5.0, "eval_accuracy": 0.2680327868852459, "eval_f1_macro": 0.22529862929165922, "eval_f1_micro": 0.2680327868852459, "eval_f1_weighted": 0.23166885649403285, "eval_loss": 3.3234214782714844, "eval_precision_macro": 0.25987233164420576, "eval_precision_micro": 0.2680327868852459, "eval_precision_weighted": 0.26332140193762377, "eval_recall_macro": 0.2577738095238095, "eval_recall_micro": 0.2680327868852459, "eval_recall_weighted": 0.2680327868852459, "eval_runtime": 77.8338, "eval_samples_per_second": 15.674, "eval_steps_per_second": 0.257, "step": 255 }, { "epoch": 5.079470198675497, "grad_norm": 11.808965682983398, "learning_rate": 2.5900000000000003e-05, "loss": 3.1734, "step": 259 }, { "epoch": 5.218543046357616, "grad_norm": 17.217893600463867, "learning_rate": 2.6600000000000003e-05, "loss": 3.3163, "step": 266 }, { "epoch": 5.357615894039735, "grad_norm": 14.96292495727539, "learning_rate": 2.7300000000000003e-05, "loss": 3.3328, "step": 273 }, { "epoch": 5.496688741721854, "grad_norm": 11.553727149963379, "learning_rate": 2.8000000000000003e-05, "loss": 3.2036, "step": 280 }, { "epoch": 5.635761589403973, "grad_norm": 12.452818870544434, "learning_rate": 2.87e-05, "loss": 3.1867, "step": 287 }, { "epoch": 5.774834437086093, "grad_norm": 13.04163646697998, "learning_rate": 2.94e-05, "loss": 3.0558, "step": 294 }, { "epoch": 5.913907284768212, "grad_norm": 12.779662132263184, "learning_rate": 3.01e-05, "loss": 2.9603, "step": 301 }, { "epoch": 6.0, "eval_accuracy": 0.40327868852459015, "eval_f1_macro": 0.3522325245599723, "eval_f1_micro": 0.40327868852459015, "eval_f1_weighted": 0.36290227384056034, "eval_loss": 2.3593220710754395, "eval_precision_macro": 0.4013160035627141, "eval_precision_micro": 0.40327868852459015, "eval_precision_weighted": 0.40977942860114985, "eval_recall_macro": 0.38851190476190484, "eval_recall_micro": 0.40327868852459015, "eval_recall_weighted": 0.40327868852459015, "eval_runtime": 71.8317, "eval_samples_per_second": 16.984, "eval_steps_per_second": 0.278, "step": 306 }, { "epoch": 6.039735099337748, "grad_norm": 13.623518943786621, "learning_rate": 3.08e-05, "loss": 2.4678, "step": 308 }, { "epoch": 6.178807947019868, "grad_norm": 13.266014099121094, "learning_rate": 3.15e-05, "loss": 2.6213, "step": 315 }, { "epoch": 6.317880794701987, "grad_norm": 13.395142555236816, "learning_rate": 3.2200000000000003e-05, "loss": 2.4566, "step": 322 }, { "epoch": 6.456953642384106, "grad_norm": 13.428766250610352, "learning_rate": 3.29e-05, "loss": 2.3462, "step": 329 }, { "epoch": 6.596026490066225, "grad_norm": 11.362808227539062, "learning_rate": 3.3600000000000004e-05, "loss": 2.3357, "step": 336 }, { "epoch": 6.735099337748345, "grad_norm": 11.982301712036133, "learning_rate": 3.430000000000001e-05, "loss": 2.2728, "step": 343 }, { "epoch": 6.874172185430464, "grad_norm": 15.563032150268555, "learning_rate": 3.5e-05, "loss": 2.3091, "step": 350 }, { "epoch": 7.0, "grad_norm": 10.777310371398926, "learning_rate": 3.57e-05, "loss": 1.9475, "step": 357 }, { "epoch": 7.0, "eval_accuracy": 0.5336065573770492, "eval_f1_macro": 0.5010502512573436, "eval_f1_micro": 0.5336065573770492, "eval_f1_weighted": 0.5078295641241183, "eval_loss": 1.7169982194900513, "eval_precision_macro": 0.570199926363626, "eval_precision_micro": 0.5336065573770492, "eval_precision_weighted": 0.5742672096804716, "eval_recall_macro": 0.5233749999999999, "eval_recall_micro": 0.5336065573770492, "eval_recall_weighted": 0.5336065573770492, "eval_runtime": 63.8109, "eval_samples_per_second": 19.119, "eval_steps_per_second": 0.313, "step": 357 }, { "epoch": 7.139072847682119, "grad_norm": 12.829914093017578, "learning_rate": 3.6400000000000004e-05, "loss": 1.9122, "step": 364 }, { "epoch": 7.2781456953642385, "grad_norm": 15.254327774047852, "learning_rate": 3.71e-05, "loss": 1.9511, "step": 371 }, { "epoch": 7.417218543046357, "grad_norm": 13.248723030090332, "learning_rate": 3.7800000000000004e-05, "loss": 1.921, "step": 378 }, { "epoch": 7.556291390728477, "grad_norm": 14.405394554138184, "learning_rate": 3.85e-05, "loss": 1.8447, "step": 385 }, { "epoch": 7.695364238410596, "grad_norm": 13.432222366333008, "learning_rate": 3.9200000000000004e-05, "loss": 1.7079, "step": 392 }, { "epoch": 7.8344370860927155, "grad_norm": 13.591761589050293, "learning_rate": 3.99e-05, "loss": 1.7888, "step": 399 }, { "epoch": 7.973509933774834, "grad_norm": 12.760810852050781, "learning_rate": 4.0600000000000004e-05, "loss": 1.8494, "step": 406 }, { "epoch": 8.0, "eval_accuracy": 0.6360655737704918, "eval_f1_macro": 0.6127611312020431, "eval_f1_micro": 0.6360655737704918, "eval_f1_weighted": 0.6178432613234403, "eval_loss": 1.343964695930481, "eval_precision_macro": 0.6623227605727605, "eval_precision_micro": 0.6360655737704918, "eval_precision_weighted": 0.6653364258692127, "eval_recall_macro": 0.6304523809523809, "eval_recall_micro": 0.6360655737704918, "eval_recall_weighted": 0.6360655737704918, "eval_runtime": 79.4787, "eval_samples_per_second": 15.35, "eval_steps_per_second": 0.252, "step": 408 }, { "epoch": 8.099337748344372, "grad_norm": 11.729964256286621, "learning_rate": 4.13e-05, "loss": 1.4108, "step": 413 }, { "epoch": 8.23841059602649, "grad_norm": 12.144929885864258, "learning_rate": 4.2e-05, "loss": 1.5489, "step": 420 }, { "epoch": 8.37748344370861, "grad_norm": 13.483667373657227, "learning_rate": 4.27e-05, "loss": 1.5863, "step": 427 }, { "epoch": 8.516556291390728, "grad_norm": 16.043304443359375, "learning_rate": 4.3400000000000005e-05, "loss": 1.4405, "step": 434 }, { "epoch": 8.655629139072847, "grad_norm": 15.305998802185059, "learning_rate": 4.41e-05, "loss": 1.4753, "step": 441 }, { "epoch": 8.794701986754967, "grad_norm": 13.507715225219727, "learning_rate": 4.4800000000000005e-05, "loss": 1.4817, "step": 448 }, { "epoch": 8.933774834437086, "grad_norm": 13.252425193786621, "learning_rate": 4.55e-05, "loss": 1.5227, "step": 455 }, { "epoch": 9.0, "eval_accuracy": 0.6786885245901639, "eval_f1_macro": 0.6571807258516, "eval_f1_micro": 0.6786885245901639, "eval_f1_weighted": 0.6634537879698879, "eval_loss": 1.1470587253570557, "eval_precision_macro": 0.7084700165031047, "eval_precision_micro": 0.6786885245901639, "eval_precision_weighted": 0.7094984540397994, "eval_recall_macro": 0.6691130952380953, "eval_recall_micro": 0.6786885245901639, "eval_recall_weighted": 0.6786885245901639, "eval_runtime": 69.8414, "eval_samples_per_second": 17.468, "eval_steps_per_second": 0.286, "step": 459 }, { "epoch": 9.059602649006623, "grad_norm": 11.350573539733887, "learning_rate": 4.6200000000000005e-05, "loss": 1.352, "step": 462 }, { "epoch": 9.198675496688741, "grad_norm": 11.896257400512695, "learning_rate": 4.69e-05, "loss": 1.2096, "step": 469 }, { "epoch": 9.33774834437086, "grad_norm": 14.927756309509277, "learning_rate": 4.76e-05, "loss": 1.3018, "step": 476 }, { "epoch": 9.47682119205298, "grad_norm": 14.38377571105957, "learning_rate": 4.83e-05, "loss": 1.2997, "step": 483 }, { "epoch": 9.6158940397351, "grad_norm": 10.836702346801758, "learning_rate": 4.9e-05, "loss": 1.2053, "step": 490 }, { "epoch": 9.754966887417218, "grad_norm": 13.384648323059082, "learning_rate": 4.97e-05, "loss": 1.2461, "step": 497 }, { "epoch": 9.894039735099337, "grad_norm": 12.859415054321289, "learning_rate": 4.995555555555556e-05, "loss": 1.2476, "step": 504 }, { "epoch": 10.0, "eval_accuracy": 0.7295081967213115, "eval_f1_macro": 0.7136993719988303, "eval_f1_micro": 0.7295081967213115, "eval_f1_weighted": 0.7185265127973471, "eval_loss": 0.9676371812820435, "eval_precision_macro": 0.7655571405718464, "eval_precision_micro": 0.7295081967213115, "eval_precision_weighted": 0.7658903327466492, "eval_recall_macro": 0.7218452380952379, "eval_recall_micro": 0.7295081967213115, "eval_recall_weighted": 0.7295081967213115, "eval_runtime": 64.0687, "eval_samples_per_second": 19.042, "eval_steps_per_second": 0.312, "step": 510 }, { "epoch": 10.019867549668874, "grad_norm": 11.940890312194824, "learning_rate": 4.987777777777778e-05, "loss": 1.0087, "step": 511 }, { "epoch": 10.158940397350994, "grad_norm": 11.370889663696289, "learning_rate": 4.9800000000000004e-05, "loss": 1.0759, "step": 518 }, { "epoch": 10.298013245033113, "grad_norm": 11.712719917297363, "learning_rate": 4.972222222222223e-05, "loss": 1.0388, "step": 525 }, { "epoch": 10.437086092715232, "grad_norm": 15.134650230407715, "learning_rate": 4.964444444444445e-05, "loss": 1.0933, "step": 532 }, { "epoch": 10.57615894039735, "grad_norm": 11.481903076171875, "learning_rate": 4.956666666666667e-05, "loss": 1.0236, "step": 539 }, { "epoch": 10.71523178807947, "grad_norm": 11.978276252746582, "learning_rate": 4.948888888888889e-05, "loss": 1.1232, "step": 546 }, { "epoch": 10.85430463576159, "grad_norm": 12.34005355834961, "learning_rate": 4.9411111111111114e-05, "loss": 1.0067, "step": 553 }, { "epoch": 10.993377483443709, "grad_norm": 11.154061317443848, "learning_rate": 4.933333333333334e-05, "loss": 1.1001, "step": 560 }, { "epoch": 11.0, "eval_accuracy": 0.7385245901639345, "eval_f1_macro": 0.7282043296830448, "eval_f1_micro": 0.7385245901639345, "eval_f1_weighted": 0.732015719256241, "eval_loss": 0.8772674798965454, "eval_precision_macro": 0.7795788517038517, "eval_precision_micro": 0.7385245901639345, "eval_precision_weighted": 0.7814253801753802, "eval_recall_macro": 0.733672619047619, "eval_recall_micro": 0.7385245901639345, "eval_recall_weighted": 0.7385245901639345, "eval_runtime": 60.2253, "eval_samples_per_second": 20.257, "eval_steps_per_second": 0.332, "step": 561 }, { "epoch": 11.119205298013245, "grad_norm": 8.575409889221191, "learning_rate": 4.925555555555556e-05, "loss": 0.8726, "step": 567 }, { "epoch": 11.258278145695364, "grad_norm": 12.448003768920898, "learning_rate": 4.917777777777778e-05, "loss": 0.9765, "step": 574 }, { "epoch": 11.397350993377483, "grad_norm": 10.99142837524414, "learning_rate": 4.91e-05, "loss": 0.8438, "step": 581 }, { "epoch": 11.536423841059603, "grad_norm": 9.985913276672363, "learning_rate": 4.9022222222222224e-05, "loss": 0.863, "step": 588 }, { "epoch": 11.675496688741722, "grad_norm": 14.102209091186523, "learning_rate": 4.894444444444445e-05, "loss": 0.9674, "step": 595 }, { "epoch": 11.814569536423841, "grad_norm": 10.937699317932129, "learning_rate": 4.886666666666667e-05, "loss": 0.9521, "step": 602 }, { "epoch": 11.95364238410596, "grad_norm": 10.190333366394043, "learning_rate": 4.878888888888889e-05, "loss": 0.8804, "step": 609 }, { "epoch": 12.0, "eval_accuracy": 0.759016393442623, "eval_f1_macro": 0.7427500998456881, "eval_f1_micro": 0.759016393442623, "eval_f1_weighted": 0.7456563548213297, "eval_loss": 0.8271353840827942, "eval_precision_macro": 0.7684717300243616, "eval_precision_micro": 0.759016393442623, "eval_precision_weighted": 0.7719776994647571, "eval_recall_macro": 0.756702380952381, "eval_recall_micro": 0.759016393442623, "eval_recall_weighted": 0.759016393442623, "eval_runtime": 58.6516, "eval_samples_per_second": 20.801, "eval_steps_per_second": 0.341, "step": 612 }, { "epoch": 12.079470198675496, "grad_norm": 14.1576509475708, "learning_rate": 4.871111111111111e-05, "loss": 0.719, "step": 616 }, { "epoch": 12.218543046357617, "grad_norm": 11.829643249511719, "learning_rate": 4.8633333333333334e-05, "loss": 0.9113, "step": 623 }, { "epoch": 12.357615894039736, "grad_norm": 9.620296478271484, "learning_rate": 4.855555555555556e-05, "loss": 0.8671, "step": 630 }, { "epoch": 12.496688741721854, "grad_norm": 10.44937801361084, "learning_rate": 4.847777777777778e-05, "loss": 0.8422, "step": 637 }, { "epoch": 12.635761589403973, "grad_norm": 7.808290958404541, "learning_rate": 4.8400000000000004e-05, "loss": 0.8018, "step": 644 }, { "epoch": 12.774834437086092, "grad_norm": 9.790284156799316, "learning_rate": 4.832222222222223e-05, "loss": 0.8626, "step": 651 }, { "epoch": 12.913907284768213, "grad_norm": 12.296673774719238, "learning_rate": 4.824444444444445e-05, "loss": 0.9596, "step": 658 }, { "epoch": 13.0, "eval_accuracy": 0.7622950819672131, "eval_f1_macro": 0.7541482304589116, "eval_f1_micro": 0.7622950819672131, "eval_f1_weighted": 0.7581034870800643, "eval_loss": 0.8282718062400818, "eval_precision_macro": 0.7943097392803276, "eval_precision_micro": 0.7622950819672131, "eval_precision_weighted": 0.7971667340748826, "eval_recall_macro": 0.7580535714285713, "eval_recall_micro": 0.7622950819672131, "eval_recall_weighted": 0.7622950819672131, "eval_runtime": 59.927, "eval_samples_per_second": 20.358, "eval_steps_per_second": 0.334, "step": 663 }, { "epoch": 13.039735099337749, "grad_norm": 18.717695236206055, "learning_rate": 4.8166666666666674e-05, "loss": 0.7906, "step": 665 }, { "epoch": 13.178807947019868, "grad_norm": 14.046932220458984, "learning_rate": 4.808888888888889e-05, "loss": 0.7326, "step": 672 }, { "epoch": 13.317880794701987, "grad_norm": 11.162008285522461, "learning_rate": 4.8011111111111114e-05, "loss": 0.8299, "step": 679 }, { "epoch": 13.456953642384105, "grad_norm": 9.34903335571289, "learning_rate": 4.793333333333334e-05, "loss": 0.7046, "step": 686 }, { "epoch": 13.596026490066226, "grad_norm": 8.978596687316895, "learning_rate": 4.785555555555556e-05, "loss": 0.672, "step": 693 }, { "epoch": 13.735099337748345, "grad_norm": 9.649175643920898, "learning_rate": 4.7777777777777784e-05, "loss": 0.7706, "step": 700 }, { "epoch": 13.874172185430464, "grad_norm": 9.140443801879883, "learning_rate": 4.77e-05, "loss": 0.7734, "step": 707 }, { "epoch": 14.0, "grad_norm": 6.996921062469482, "learning_rate": 4.7622222222222224e-05, "loss": 0.6202, "step": 714 }, { "epoch": 14.0, "eval_accuracy": 0.7754098360655738, "eval_f1_macro": 0.765805670364494, "eval_f1_micro": 0.7754098360655738, "eval_f1_weighted": 0.7695095891286827, "eval_loss": 0.7957718372344971, "eval_precision_macro": 0.8098741258741259, "eval_precision_micro": 0.7754098360655738, "eval_precision_weighted": 0.8099575401829501, "eval_recall_macro": 0.769452380952381, "eval_recall_micro": 0.7754098360655738, "eval_recall_weighted": 0.7754098360655738, "eval_runtime": 58.8988, "eval_samples_per_second": 20.713, "eval_steps_per_second": 0.34, "step": 714 }, { "epoch": 14.139072847682119, "grad_norm": 11.548070907592773, "learning_rate": 4.754444444444445e-05, "loss": 0.7968, "step": 721 }, { "epoch": 14.278145695364238, "grad_norm": 11.0925874710083, "learning_rate": 4.746666666666667e-05, "loss": 0.6864, "step": 728 }, { "epoch": 14.417218543046358, "grad_norm": 9.538455963134766, "learning_rate": 4.7388888888888894e-05, "loss": 0.6766, "step": 735 }, { "epoch": 14.556291390728477, "grad_norm": 7.995402812957764, "learning_rate": 4.731111111111111e-05, "loss": 0.7023, "step": 742 }, { "epoch": 14.695364238410596, "grad_norm": 10.825759887695312, "learning_rate": 4.7233333333333334e-05, "loss": 0.6883, "step": 749 }, { "epoch": 14.834437086092715, "grad_norm": 14.279191017150879, "learning_rate": 4.715555555555556e-05, "loss": 0.6533, "step": 756 }, { "epoch": 14.973509933774835, "grad_norm": 8.562923431396484, "learning_rate": 4.707777777777778e-05, "loss": 0.6466, "step": 763 }, { "epoch": 15.0, "eval_accuracy": 0.7967213114754098, "eval_f1_macro": 0.7874462737947056, "eval_f1_micro": 0.7967213114754098, "eval_f1_weighted": 0.7923798470661948, "eval_loss": 0.7445575594902039, "eval_precision_macro": 0.8216799295475766, "eval_precision_micro": 0.7967213114754098, "eval_precision_weighted": 0.8259746225862427, "eval_recall_macro": 0.7922261904761905, "eval_recall_micro": 0.7967213114754098, "eval_recall_weighted": 0.7967213114754098, "eval_runtime": 59.8003, "eval_samples_per_second": 20.401, "eval_steps_per_second": 0.334, "step": 765 }, { "epoch": 15.099337748344372, "grad_norm": 9.65889835357666, "learning_rate": 4.7e-05, "loss": 0.6024, "step": 770 }, { "epoch": 15.23841059602649, "grad_norm": 8.170406341552734, "learning_rate": 4.692222222222222e-05, "loss": 0.5263, "step": 777 }, { "epoch": 15.37748344370861, "grad_norm": 8.782620429992676, "learning_rate": 4.6844444444444444e-05, "loss": 0.552, "step": 784 }, { "epoch": 15.516556291390728, "grad_norm": 11.878396034240723, "learning_rate": 4.676666666666667e-05, "loss": 0.6127, "step": 791 }, { "epoch": 15.655629139072847, "grad_norm": 8.88171672821045, "learning_rate": 4.668888888888889e-05, "loss": 0.6756, "step": 798 }, { "epoch": 15.794701986754967, "grad_norm": 11.983383178710938, "learning_rate": 4.6611111111111114e-05, "loss": 0.664, "step": 805 }, { "epoch": 15.933774834437086, "grad_norm": 10.409689903259277, "learning_rate": 4.653333333333334e-05, "loss": 0.6436, "step": 812 }, { "epoch": 16.0, "eval_accuracy": 0.7918032786885246, "eval_f1_macro": 0.7815447427921685, "eval_f1_micro": 0.7918032786885246, "eval_f1_weighted": 0.7856156314459259, "eval_loss": 0.7297011017799377, "eval_precision_macro": 0.8101799866799867, "eval_precision_micro": 0.7918032786885246, "eval_precision_weighted": 0.8123722907329464, "eval_recall_macro": 0.7866488095238096, "eval_recall_micro": 0.7918032786885246, "eval_recall_weighted": 0.7918032786885246, "eval_runtime": 60.0895, "eval_samples_per_second": 20.303, "eval_steps_per_second": 0.333, "step": 816 }, { "epoch": 16.05960264900662, "grad_norm": 10.903715133666992, "learning_rate": 4.645555555555556e-05, "loss": 0.5591, "step": 819 }, { "epoch": 16.198675496688743, "grad_norm": 8.767610549926758, "learning_rate": 4.6377777777777784e-05, "loss": 0.5711, "step": 826 }, { "epoch": 16.337748344370862, "grad_norm": 8.273555755615234, "learning_rate": 4.630000000000001e-05, "loss": 0.5811, "step": 833 }, { "epoch": 16.47682119205298, "grad_norm": 12.013016700744629, "learning_rate": 4.6222222222222224e-05, "loss": 0.6443, "step": 840 }, { "epoch": 16.6158940397351, "grad_norm": 7.874364376068115, "learning_rate": 4.614444444444445e-05, "loss": 0.5073, "step": 847 }, { "epoch": 16.75496688741722, "grad_norm": 9.01498031616211, "learning_rate": 4.606666666666667e-05, "loss": 0.608, "step": 854 }, { "epoch": 16.894039735099337, "grad_norm": 9.848909378051758, "learning_rate": 4.5988888888888894e-05, "loss": 0.5929, "step": 861 }, { "epoch": 17.0, "eval_accuracy": 0.7959016393442623, "eval_f1_macro": 0.7867938321138785, "eval_f1_micro": 0.7959016393442623, "eval_f1_weighted": 0.7917754148114372, "eval_loss": 0.7077643871307373, "eval_precision_macro": 0.8185556526806528, "eval_precision_micro": 0.7959016393442623, "eval_precision_weighted": 0.8217451378312034, "eval_recall_macro": 0.7902916666666667, "eval_recall_micro": 0.7959016393442623, "eval_recall_weighted": 0.7959016393442623, "eval_runtime": 59.7504, "eval_samples_per_second": 20.418, "eval_steps_per_second": 0.335, "step": 867 }, { "epoch": 17.019867549668874, "grad_norm": 9.507264137268066, "learning_rate": 4.591111111111112e-05, "loss": 0.5247, "step": 868 }, { "epoch": 17.158940397350992, "grad_norm": 7.274167537689209, "learning_rate": 4.5833333333333334e-05, "loss": 0.5212, "step": 875 }, { "epoch": 17.29801324503311, "grad_norm": 8.040386199951172, "learning_rate": 4.575555555555556e-05, "loss": 0.4957, "step": 882 }, { "epoch": 17.437086092715234, "grad_norm": 10.34827709197998, "learning_rate": 4.567777777777778e-05, "loss": 0.4938, "step": 889 }, { "epoch": 17.576158940397352, "grad_norm": 9.062361717224121, "learning_rate": 4.5600000000000004e-05, "loss": 0.5341, "step": 896 }, { "epoch": 17.71523178807947, "grad_norm": 7.889723777770996, "learning_rate": 4.552222222222222e-05, "loss": 0.5407, "step": 903 }, { "epoch": 17.85430463576159, "grad_norm": 7.329662799835205, "learning_rate": 4.5444444444444444e-05, "loss": 0.5344, "step": 910 }, { "epoch": 17.99337748344371, "grad_norm": 10.251781463623047, "learning_rate": 4.536666666666667e-05, "loss": 0.5108, "step": 917 }, { "epoch": 18.0, "eval_accuracy": 0.8, "eval_f1_macro": 0.7904420722323199, "eval_f1_micro": 0.8, "eval_f1_weighted": 0.794189161749749, "eval_loss": 0.7119916081428528, "eval_precision_macro": 0.8223463203463203, "eval_precision_micro": 0.8, "eval_precision_weighted": 0.8258165377427673, "eval_recall_macro": 0.7962023809523809, "eval_recall_micro": 0.8, "eval_recall_weighted": 0.8, "eval_runtime": 58.9812, "eval_samples_per_second": 20.685, "eval_steps_per_second": 0.339, "step": 918 }, { "epoch": 18.119205298013245, "grad_norm": 7.837319374084473, "learning_rate": 4.528888888888889e-05, "loss": 0.4401, "step": 924 }, { "epoch": 18.258278145695364, "grad_norm": 7.545521259307861, "learning_rate": 4.5211111111111114e-05, "loss": 0.4821, "step": 931 }, { "epoch": 18.397350993377483, "grad_norm": 7.626832962036133, "learning_rate": 4.513333333333333e-05, "loss": 0.4991, "step": 938 }, { "epoch": 18.5364238410596, "grad_norm": 7.265345573425293, "learning_rate": 4.5055555555555554e-05, "loss": 0.5936, "step": 945 }, { "epoch": 18.67549668874172, "grad_norm": 6.648807525634766, "learning_rate": 4.497777777777778e-05, "loss": 0.4418, "step": 952 }, { "epoch": 18.814569536423843, "grad_norm": 6.413826942443848, "learning_rate": 4.49e-05, "loss": 0.4185, "step": 959 }, { "epoch": 18.95364238410596, "grad_norm": 9.378252029418945, "learning_rate": 4.4822222222222224e-05, "loss": 0.5109, "step": 966 }, { "epoch": 19.0, "eval_accuracy": 0.8106557377049181, "eval_f1_macro": 0.8023834074422309, "eval_f1_micro": 0.8106557377049181, "eval_f1_weighted": 0.8054703936104611, "eval_loss": 0.671293318271637, "eval_precision_macro": 0.8325211038961038, "eval_precision_micro": 0.8106557377049181, "eval_precision_weighted": 0.8349751023111679, "eval_recall_macro": 0.8078333333333333, "eval_recall_micro": 0.8106557377049181, "eval_recall_weighted": 0.8106557377049181, "eval_runtime": 60.0408, "eval_samples_per_second": 20.32, "eval_steps_per_second": 0.333, "step": 969 }, { "epoch": 19.079470198675498, "grad_norm": 6.43688440322876, "learning_rate": 4.474444444444445e-05, "loss": 0.401, "step": 973 }, { "epoch": 19.218543046357617, "grad_norm": 10.133489608764648, "learning_rate": 4.466666666666667e-05, "loss": 0.4449, "step": 980 }, { "epoch": 19.357615894039736, "grad_norm": 9.007479667663574, "learning_rate": 4.4588888888888894e-05, "loss": 0.5457, "step": 987 }, { "epoch": 19.496688741721854, "grad_norm": 10.912771224975586, "learning_rate": 4.451111111111112e-05, "loss": 0.5306, "step": 994 }, { "epoch": 19.635761589403973, "grad_norm": 6.615180492401123, "learning_rate": 4.443333333333334e-05, "loss": 0.4925, "step": 1001 }, { "epoch": 19.774834437086092, "grad_norm": 7.076197147369385, "learning_rate": 4.435555555555556e-05, "loss": 0.4787, "step": 1008 }, { "epoch": 19.91390728476821, "grad_norm": 7.040290832519531, "learning_rate": 4.427777777777778e-05, "loss": 0.4809, "step": 1015 }, { "epoch": 20.0, "eval_accuracy": 0.8139344262295082, "eval_f1_macro": 0.8081211352716771, "eval_f1_micro": 0.8139344262295082, "eval_f1_weighted": 0.8116663019924579, "eval_loss": 0.6667141914367676, "eval_precision_macro": 0.8430578726828728, "eval_precision_micro": 0.8139344262295082, "eval_precision_weighted": 0.8445454568200469, "eval_recall_macro": 0.8106190476190476, "eval_recall_micro": 0.8139344262295082, "eval_recall_weighted": 0.8139344262295082, "eval_runtime": 58.7404, "eval_samples_per_second": 20.769, "eval_steps_per_second": 0.34, "step": 1020 }, { "epoch": 20.039735099337747, "grad_norm": 9.023087501525879, "learning_rate": 4.4200000000000004e-05, "loss": 0.386, "step": 1022 }, { "epoch": 20.178807947019866, "grad_norm": 7.4928178787231445, "learning_rate": 4.412222222222223e-05, "loss": 0.4569, "step": 1029 }, { "epoch": 20.31788079470199, "grad_norm": 8.090821266174316, "learning_rate": 4.404444444444445e-05, "loss": 0.4778, "step": 1036 }, { "epoch": 20.456953642384107, "grad_norm": 8.650497436523438, "learning_rate": 4.396666666666667e-05, "loss": 0.4786, "step": 1043 }, { "epoch": 20.596026490066226, "grad_norm": 6.049080848693848, "learning_rate": 4.388888888888889e-05, "loss": 0.4975, "step": 1050 }, { "epoch": 20.735099337748345, "grad_norm": 10.202515602111816, "learning_rate": 4.3811111111111114e-05, "loss": 0.4035, "step": 1057 }, { "epoch": 20.874172185430464, "grad_norm": 7.0871429443359375, "learning_rate": 4.373333333333334e-05, "loss": 0.4274, "step": 1064 }, { "epoch": 21.0, "grad_norm": 6.111388206481934, "learning_rate": 4.3655555555555554e-05, "loss": 0.3576, "step": 1071 }, { "epoch": 21.0, "eval_accuracy": 0.8073770491803278, "eval_f1_macro": 0.7980818380535872, "eval_f1_micro": 0.8073770491803278, "eval_f1_weighted": 0.8027800592784986, "eval_loss": 0.6649746298789978, "eval_precision_macro": 0.8290474247974248, "eval_precision_micro": 0.8073770491803278, "eval_precision_weighted": 0.8307658143313881, "eval_recall_macro": 0.8019166666666666, "eval_recall_micro": 0.8073770491803278, "eval_recall_weighted": 0.8073770491803278, "eval_runtime": 60.057, "eval_samples_per_second": 20.314, "eval_steps_per_second": 0.333, "step": 1071 }, { "epoch": 21.13907284768212, "grad_norm": 9.059436798095703, "learning_rate": 4.357777777777778e-05, "loss": 0.4775, "step": 1078 }, { "epoch": 21.278145695364238, "grad_norm": 9.497885704040527, "learning_rate": 4.35e-05, "loss": 0.4531, "step": 1085 }, { "epoch": 21.417218543046356, "grad_norm": 10.471771240234375, "learning_rate": 4.3422222222222224e-05, "loss": 0.479, "step": 1092 }, { "epoch": 21.556291390728475, "grad_norm": 6.627233505249023, "learning_rate": 4.334444444444445e-05, "loss": 0.4332, "step": 1099 }, { "epoch": 21.695364238410598, "grad_norm": 9.046399116516113, "learning_rate": 4.3266666666666664e-05, "loss": 0.4767, "step": 1106 }, { "epoch": 21.834437086092716, "grad_norm": 6.7745513916015625, "learning_rate": 4.318888888888889e-05, "loss": 0.5137, "step": 1113 }, { "epoch": 21.973509933774835, "grad_norm": 8.061189651489258, "learning_rate": 4.311111111111111e-05, "loss": 0.4877, "step": 1120 }, { "epoch": 22.0, "eval_accuracy": 0.8114754098360656, "eval_f1_macro": 0.8045914526649821, "eval_f1_micro": 0.8114754098360656, "eval_f1_weighted": 0.8078734461991453, "eval_loss": 0.6778721809387207, "eval_precision_macro": 0.836376651126651, "eval_precision_micro": 0.8114754098360656, "eval_precision_weighted": 0.8366239998617048, "eval_recall_macro": 0.806404761904762, "eval_recall_micro": 0.8114754098360656, "eval_recall_weighted": 0.8114754098360656, "eval_runtime": 59.185, "eval_samples_per_second": 20.613, "eval_steps_per_second": 0.338, "step": 1122 }, { "epoch": 22.09933774834437, "grad_norm": 9.460957527160645, "learning_rate": 4.3033333333333334e-05, "loss": 0.4915, "step": 1127 }, { "epoch": 22.23841059602649, "grad_norm": 9.026511192321777, "learning_rate": 4.295555555555556e-05, "loss": 0.4157, "step": 1134 }, { "epoch": 22.37748344370861, "grad_norm": 9.733258247375488, "learning_rate": 4.287777777777778e-05, "loss": 0.3564, "step": 1141 }, { "epoch": 22.516556291390728, "grad_norm": 9.269991874694824, "learning_rate": 4.2800000000000004e-05, "loss": 0.4707, "step": 1148 }, { "epoch": 22.655629139072847, "grad_norm": 7.8387041091918945, "learning_rate": 4.272222222222223e-05, "loss": 0.4902, "step": 1155 }, { "epoch": 22.794701986754966, "grad_norm": 10.261953353881836, "learning_rate": 4.264444444444445e-05, "loss": 0.4656, "step": 1162 }, { "epoch": 22.933774834437084, "grad_norm": 9.317761421203613, "learning_rate": 4.2566666666666674e-05, "loss": 0.4705, "step": 1169 }, { "epoch": 23.0, "eval_accuracy": 0.8131147540983606, "eval_f1_macro": 0.8073761565232153, "eval_f1_micro": 0.8131147540983606, "eval_f1_weighted": 0.8111283224168953, "eval_loss": 0.6698673963546753, "eval_precision_macro": 0.8399364801864801, "eval_precision_micro": 0.8131147540983606, "eval_precision_weighted": 0.8420730703722508, "eval_recall_macro": 0.808672619047619, "eval_recall_micro": 0.8131147540983606, "eval_recall_weighted": 0.8131147540983606, "eval_runtime": 59.9539, "eval_samples_per_second": 20.349, "eval_steps_per_second": 0.334, "step": 1173 }, { "epoch": 23.05960264900662, "grad_norm": 9.809006690979004, "learning_rate": 4.248888888888889e-05, "loss": 0.3833, "step": 1176 }, { "epoch": 23.198675496688743, "grad_norm": 8.9915132522583, "learning_rate": 4.2411111111111114e-05, "loss": 0.4552, "step": 1183 }, { "epoch": 23.337748344370862, "grad_norm": 10.036259651184082, "learning_rate": 4.233333333333334e-05, "loss": 0.3869, "step": 1190 }, { "epoch": 23.47682119205298, "grad_norm": 10.57496166229248, "learning_rate": 4.225555555555556e-05, "loss": 0.4003, "step": 1197 }, { "epoch": 23.6158940397351, "grad_norm": 9.061355590820312, "learning_rate": 4.217777777777778e-05, "loss": 0.4654, "step": 1204 }, { "epoch": 23.75496688741722, "grad_norm": 7.108461380004883, "learning_rate": 4.21e-05, "loss": 0.4085, "step": 1211 }, { "epoch": 23.894039735099337, "grad_norm": 5.542710781097412, "learning_rate": 4.2022222222222223e-05, "loss": 0.4358, "step": 1218 }, { "epoch": 24.0, "eval_accuracy": 0.8262295081967214, "eval_f1_macro": 0.8156689398492805, "eval_f1_micro": 0.8262295081967214, "eval_f1_weighted": 0.8195565714293827, "eval_loss": 0.6602770090103149, "eval_precision_macro": 0.8476504329004328, "eval_precision_micro": 0.8262295081967214, "eval_precision_weighted": 0.849387256641355, "eval_recall_macro": 0.8219166666666666, "eval_recall_micro": 0.8262295081967214, "eval_recall_weighted": 0.8262295081967214, "eval_runtime": 58.671, "eval_samples_per_second": 20.794, "eval_steps_per_second": 0.341, "step": 1224 } ], "logging_steps": 7, "max_steps": 5000, "num_input_tokens_seen": 0, "num_train_epochs": 100, "save_steps": 7, "stateful_callbacks": { "EarlyStoppingCallback": { "args": { "early_stopping_patience": 5, "early_stopping_threshold": 0.01 }, "attributes": { "early_stopping_patience_counter": 5 } }, "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 1.1899638530382496e+19, "train_batch_size": 32, "trial_name": null, "trial_params": null }