| { | |
| "best_metric": 0.6602770090103149, | |
| "best_model_checkpoint": "Model-Focalnet-Base-\\checkpoint-1224", | |
| "epoch": 24.0, | |
| "eval_steps": 7, | |
| "global_step": 1224, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.1390728476821192, | |
| "grad_norm": 2.261648416519165, | |
| "learning_rate": 7.000000000000001e-07, | |
| "loss": 5.3125, | |
| "step": 7 | |
| }, | |
| { | |
| "epoch": 0.2781456953642384, | |
| "grad_norm": 2.211456537246704, | |
| "learning_rate": 1.4000000000000001e-06, | |
| "loss": 5.3234, | |
| "step": 14 | |
| }, | |
| { | |
| "epoch": 0.41721854304635764, | |
| "grad_norm": 2.1140072345733643, | |
| "learning_rate": 2.1000000000000002e-06, | |
| "loss": 5.308, | |
| "step": 21 | |
| }, | |
| { | |
| "epoch": 0.5562913907284768, | |
| "grad_norm": 2.2730369567871094, | |
| "learning_rate": 2.8000000000000003e-06, | |
| "loss": 5.3189, | |
| "step": 28 | |
| }, | |
| { | |
| "epoch": 0.695364238410596, | |
| "grad_norm": 2.2154030799865723, | |
| "learning_rate": 3.5000000000000004e-06, | |
| "loss": 5.2991, | |
| "step": 35 | |
| }, | |
| { | |
| "epoch": 0.8344370860927153, | |
| "grad_norm": 1.887474536895752, | |
| "learning_rate": 4.2000000000000004e-06, | |
| "loss": 5.3036, | |
| "step": 42 | |
| }, | |
| { | |
| "epoch": 0.9735099337748344, | |
| "grad_norm": 2.4402565956115723, | |
| "learning_rate": 4.9000000000000005e-06, | |
| "loss": 5.2965, | |
| "step": 49 | |
| }, | |
| { | |
| "epoch": 1.0, | |
| "eval_accuracy": 0.00819672131147541, | |
| "eval_f1_macro": 0.00337801133638995, | |
| "eval_f1_micro": 0.00819672131147541, | |
| "eval_f1_weighted": 0.0035373101357153293, | |
| "eval_loss": 5.291137218475342, | |
| "eval_precision_macro": 0.0028020831663437863, | |
| "eval_precision_micro": 0.00819672131147541, | |
| "eval_precision_weighted": 0.002911205169282775, | |
| "eval_recall_macro": 0.008023809523809523, | |
| "eval_recall_micro": 0.00819672131147541, | |
| "eval_recall_weighted": 0.00819672131147541, | |
| "eval_runtime": 65.9936, | |
| "eval_samples_per_second": 18.487, | |
| "eval_steps_per_second": 0.303, | |
| "step": 51 | |
| }, | |
| { | |
| "epoch": 1.099337748344371, | |
| "grad_norm": 2.17191219329834, | |
| "learning_rate": 5.600000000000001e-06, | |
| "loss": 4.7769, | |
| "step": 56 | |
| }, | |
| { | |
| "epoch": 1.23841059602649, | |
| "grad_norm": 2.5151634216308594, | |
| "learning_rate": 6.300000000000001e-06, | |
| "loss": 5.2913, | |
| "step": 63 | |
| }, | |
| { | |
| "epoch": 1.3774834437086092, | |
| "grad_norm": 2.353184938430786, | |
| "learning_rate": 7.000000000000001e-06, | |
| "loss": 5.2925, | |
| "step": 70 | |
| }, | |
| { | |
| "epoch": 1.5165562913907285, | |
| "grad_norm": 2.138894557952881, | |
| "learning_rate": 7.7e-06, | |
| "loss": 5.2627, | |
| "step": 77 | |
| }, | |
| { | |
| "epoch": 1.6556291390728477, | |
| "grad_norm": 2.234560012817383, | |
| "learning_rate": 8.400000000000001e-06, | |
| "loss": 5.2627, | |
| "step": 84 | |
| }, | |
| { | |
| "epoch": 1.794701986754967, | |
| "grad_norm": 2.210279703140259, | |
| "learning_rate": 9.100000000000001e-06, | |
| "loss": 5.2633, | |
| "step": 91 | |
| }, | |
| { | |
| "epoch": 1.9337748344370862, | |
| "grad_norm": 2.9447083473205566, | |
| "learning_rate": 9.800000000000001e-06, | |
| "loss": 5.2558, | |
| "step": 98 | |
| }, | |
| { | |
| "epoch": 2.0, | |
| "eval_accuracy": 0.01557377049180328, | |
| "eval_f1_macro": 0.006874468130470725, | |
| "eval_f1_micro": 0.01557377049180328, | |
| "eval_f1_weighted": 0.007013333548104455, | |
| "eval_loss": 5.22589635848999, | |
| "eval_precision_macro": 0.006156529662888035, | |
| "eval_precision_micro": 0.01557377049180328, | |
| "eval_precision_weighted": 0.006207445270776915, | |
| "eval_recall_macro": 0.01500595238095238, | |
| "eval_recall_micro": 0.01557377049180328, | |
| "eval_recall_weighted": 0.01557377049180328, | |
| "eval_runtime": 56.5165, | |
| "eval_samples_per_second": 21.587, | |
| "eval_steps_per_second": 0.354, | |
| "step": 102 | |
| }, | |
| { | |
| "epoch": 2.0596026490066226, | |
| "grad_norm": 2.3659451007843018, | |
| "learning_rate": 1.05e-05, | |
| "loss": 4.7295, | |
| "step": 105 | |
| }, | |
| { | |
| "epoch": 2.198675496688742, | |
| "grad_norm": 2.9902284145355225, | |
| "learning_rate": 1.1200000000000001e-05, | |
| "loss": 5.2129, | |
| "step": 112 | |
| }, | |
| { | |
| "epoch": 2.337748344370861, | |
| "grad_norm": 4.189450740814209, | |
| "learning_rate": 1.19e-05, | |
| "loss": 5.1816, | |
| "step": 119 | |
| }, | |
| { | |
| "epoch": 2.47682119205298, | |
| "grad_norm": 4.748580455780029, | |
| "learning_rate": 1.2600000000000001e-05, | |
| "loss": 5.1931, | |
| "step": 126 | |
| }, | |
| { | |
| "epoch": 2.6158940397350996, | |
| "grad_norm": 3.579268455505371, | |
| "learning_rate": 1.3300000000000001e-05, | |
| "loss": 5.164, | |
| "step": 133 | |
| }, | |
| { | |
| "epoch": 2.7549668874172184, | |
| "grad_norm": 4.054067611694336, | |
| "learning_rate": 1.4000000000000001e-05, | |
| "loss": 5.1483, | |
| "step": 140 | |
| }, | |
| { | |
| "epoch": 2.8940397350993377, | |
| "grad_norm": 3.84291410446167, | |
| "learning_rate": 1.47e-05, | |
| "loss": 5.1257, | |
| "step": 147 | |
| }, | |
| { | |
| "epoch": 3.0, | |
| "eval_accuracy": 0.051639344262295085, | |
| "eval_f1_macro": 0.03261693581398834, | |
| "eval_f1_micro": 0.051639344262295085, | |
| "eval_f1_weighted": 0.03516077511642792, | |
| "eval_loss": 5.062410354614258, | |
| "eval_precision_macro": 0.03239256187924794, | |
| "eval_precision_micro": 0.051639344262295085, | |
| "eval_precision_weighted": 0.034858832269066796, | |
| "eval_recall_macro": 0.047523809523809524, | |
| "eval_recall_micro": 0.051639344262295085, | |
| "eval_recall_weighted": 0.051639344262295085, | |
| "eval_runtime": 63.3821, | |
| "eval_samples_per_second": 19.248, | |
| "eval_steps_per_second": 0.316, | |
| "step": 153 | |
| }, | |
| { | |
| "epoch": 3.019867549668874, | |
| "grad_norm": 4.443902492523193, | |
| "learning_rate": 1.54e-05, | |
| "loss": 4.6257, | |
| "step": 154 | |
| }, | |
| { | |
| "epoch": 3.1589403973509933, | |
| "grad_norm": 5.521849155426025, | |
| "learning_rate": 1.6100000000000002e-05, | |
| "loss": 5.0162, | |
| "step": 161 | |
| }, | |
| { | |
| "epoch": 3.2980132450331126, | |
| "grad_norm": 6.407104969024658, | |
| "learning_rate": 1.6800000000000002e-05, | |
| "loss": 4.9824, | |
| "step": 168 | |
| }, | |
| { | |
| "epoch": 3.437086092715232, | |
| "grad_norm": 5.278021335601807, | |
| "learning_rate": 1.75e-05, | |
| "loss": 4.96, | |
| "step": 175 | |
| }, | |
| { | |
| "epoch": 3.576158940397351, | |
| "grad_norm": 7.5942182540893555, | |
| "learning_rate": 1.8200000000000002e-05, | |
| "loss": 4.898, | |
| "step": 182 | |
| }, | |
| { | |
| "epoch": 3.7152317880794703, | |
| "grad_norm": 6.050070285797119, | |
| "learning_rate": 1.8900000000000002e-05, | |
| "loss": 4.8774, | |
| "step": 189 | |
| }, | |
| { | |
| "epoch": 3.8543046357615895, | |
| "grad_norm": 6.589919567108154, | |
| "learning_rate": 1.9600000000000002e-05, | |
| "loss": 4.7924, | |
| "step": 196 | |
| }, | |
| { | |
| "epoch": 3.993377483443709, | |
| "grad_norm": 8.232624053955078, | |
| "learning_rate": 2.0300000000000002e-05, | |
| "loss": 4.6994, | |
| "step": 203 | |
| }, | |
| { | |
| "epoch": 4.0, | |
| "eval_accuracy": 0.11721311475409836, | |
| "eval_f1_macro": 0.08082684992031455, | |
| "eval_f1_micro": 0.11721311475409836, | |
| "eval_f1_weighted": 0.08625745944487533, | |
| "eval_loss": 4.516047477722168, | |
| "eval_precision_macro": 0.0960521320476185, | |
| "eval_precision_micro": 0.11721311475409836, | |
| "eval_precision_weighted": 0.10289070402055932, | |
| "eval_recall_macro": 0.1088095238095238, | |
| "eval_recall_micro": 0.11721311475409836, | |
| "eval_recall_weighted": 0.11721311475409836, | |
| "eval_runtime": 81.067, | |
| "eval_samples_per_second": 15.049, | |
| "eval_steps_per_second": 0.247, | |
| "step": 204 | |
| }, | |
| { | |
| "epoch": 4.119205298013245, | |
| "grad_norm": 7.674986362457275, | |
| "learning_rate": 2.1e-05, | |
| "loss": 4.1019, | |
| "step": 210 | |
| }, | |
| { | |
| "epoch": 4.258278145695364, | |
| "grad_norm": 7.877310276031494, | |
| "learning_rate": 2.1700000000000002e-05, | |
| "loss": 4.4153, | |
| "step": 217 | |
| }, | |
| { | |
| "epoch": 4.397350993377484, | |
| "grad_norm": 9.657820701599121, | |
| "learning_rate": 2.2400000000000002e-05, | |
| "loss": 4.2469, | |
| "step": 224 | |
| }, | |
| { | |
| "epoch": 4.5364238410596025, | |
| "grad_norm": 11.347479820251465, | |
| "learning_rate": 2.3100000000000002e-05, | |
| "loss": 4.1469, | |
| "step": 231 | |
| }, | |
| { | |
| "epoch": 4.675496688741722, | |
| "grad_norm": 12.215789794921875, | |
| "learning_rate": 2.38e-05, | |
| "loss": 4.0285, | |
| "step": 238 | |
| }, | |
| { | |
| "epoch": 4.814569536423841, | |
| "grad_norm": 10.887558937072754, | |
| "learning_rate": 2.45e-05, | |
| "loss": 4.0269, | |
| "step": 245 | |
| }, | |
| { | |
| "epoch": 4.95364238410596, | |
| "grad_norm": 12.850284576416016, | |
| "learning_rate": 2.5200000000000003e-05, | |
| "loss": 3.7643, | |
| "step": 252 | |
| }, | |
| { | |
| "epoch": 5.0, | |
| "eval_accuracy": 0.2680327868852459, | |
| "eval_f1_macro": 0.22529862929165922, | |
| "eval_f1_micro": 0.2680327868852459, | |
| "eval_f1_weighted": 0.23166885649403285, | |
| "eval_loss": 3.3234214782714844, | |
| "eval_precision_macro": 0.25987233164420576, | |
| "eval_precision_micro": 0.2680327868852459, | |
| "eval_precision_weighted": 0.26332140193762377, | |
| "eval_recall_macro": 0.2577738095238095, | |
| "eval_recall_micro": 0.2680327868852459, | |
| "eval_recall_weighted": 0.2680327868852459, | |
| "eval_runtime": 77.8338, | |
| "eval_samples_per_second": 15.674, | |
| "eval_steps_per_second": 0.257, | |
| "step": 255 | |
| }, | |
| { | |
| "epoch": 5.079470198675497, | |
| "grad_norm": 11.808965682983398, | |
| "learning_rate": 2.5900000000000003e-05, | |
| "loss": 3.1734, | |
| "step": 259 | |
| }, | |
| { | |
| "epoch": 5.218543046357616, | |
| "grad_norm": 17.217893600463867, | |
| "learning_rate": 2.6600000000000003e-05, | |
| "loss": 3.3163, | |
| "step": 266 | |
| }, | |
| { | |
| "epoch": 5.357615894039735, | |
| "grad_norm": 14.96292495727539, | |
| "learning_rate": 2.7300000000000003e-05, | |
| "loss": 3.3328, | |
| "step": 273 | |
| }, | |
| { | |
| "epoch": 5.496688741721854, | |
| "grad_norm": 11.553727149963379, | |
| "learning_rate": 2.8000000000000003e-05, | |
| "loss": 3.2036, | |
| "step": 280 | |
| }, | |
| { | |
| "epoch": 5.635761589403973, | |
| "grad_norm": 12.452818870544434, | |
| "learning_rate": 2.87e-05, | |
| "loss": 3.1867, | |
| "step": 287 | |
| }, | |
| { | |
| "epoch": 5.774834437086093, | |
| "grad_norm": 13.04163646697998, | |
| "learning_rate": 2.94e-05, | |
| "loss": 3.0558, | |
| "step": 294 | |
| }, | |
| { | |
| "epoch": 5.913907284768212, | |
| "grad_norm": 12.779662132263184, | |
| "learning_rate": 3.01e-05, | |
| "loss": 2.9603, | |
| "step": 301 | |
| }, | |
| { | |
| "epoch": 6.0, | |
| "eval_accuracy": 0.40327868852459015, | |
| "eval_f1_macro": 0.3522325245599723, | |
| "eval_f1_micro": 0.40327868852459015, | |
| "eval_f1_weighted": 0.36290227384056034, | |
| "eval_loss": 2.3593220710754395, | |
| "eval_precision_macro": 0.4013160035627141, | |
| "eval_precision_micro": 0.40327868852459015, | |
| "eval_precision_weighted": 0.40977942860114985, | |
| "eval_recall_macro": 0.38851190476190484, | |
| "eval_recall_micro": 0.40327868852459015, | |
| "eval_recall_weighted": 0.40327868852459015, | |
| "eval_runtime": 71.8317, | |
| "eval_samples_per_second": 16.984, | |
| "eval_steps_per_second": 0.278, | |
| "step": 306 | |
| }, | |
| { | |
| "epoch": 6.039735099337748, | |
| "grad_norm": 13.623518943786621, | |
| "learning_rate": 3.08e-05, | |
| "loss": 2.4678, | |
| "step": 308 | |
| }, | |
| { | |
| "epoch": 6.178807947019868, | |
| "grad_norm": 13.266014099121094, | |
| "learning_rate": 3.15e-05, | |
| "loss": 2.6213, | |
| "step": 315 | |
| }, | |
| { | |
| "epoch": 6.317880794701987, | |
| "grad_norm": 13.395142555236816, | |
| "learning_rate": 3.2200000000000003e-05, | |
| "loss": 2.4566, | |
| "step": 322 | |
| }, | |
| { | |
| "epoch": 6.456953642384106, | |
| "grad_norm": 13.428766250610352, | |
| "learning_rate": 3.29e-05, | |
| "loss": 2.3462, | |
| "step": 329 | |
| }, | |
| { | |
| "epoch": 6.596026490066225, | |
| "grad_norm": 11.362808227539062, | |
| "learning_rate": 3.3600000000000004e-05, | |
| "loss": 2.3357, | |
| "step": 336 | |
| }, | |
| { | |
| "epoch": 6.735099337748345, | |
| "grad_norm": 11.982301712036133, | |
| "learning_rate": 3.430000000000001e-05, | |
| "loss": 2.2728, | |
| "step": 343 | |
| }, | |
| { | |
| "epoch": 6.874172185430464, | |
| "grad_norm": 15.563032150268555, | |
| "learning_rate": 3.5e-05, | |
| "loss": 2.3091, | |
| "step": 350 | |
| }, | |
| { | |
| "epoch": 7.0, | |
| "grad_norm": 10.777310371398926, | |
| "learning_rate": 3.57e-05, | |
| "loss": 1.9475, | |
| "step": 357 | |
| }, | |
| { | |
| "epoch": 7.0, | |
| "eval_accuracy": 0.5336065573770492, | |
| "eval_f1_macro": 0.5010502512573436, | |
| "eval_f1_micro": 0.5336065573770492, | |
| "eval_f1_weighted": 0.5078295641241183, | |
| "eval_loss": 1.7169982194900513, | |
| "eval_precision_macro": 0.570199926363626, | |
| "eval_precision_micro": 0.5336065573770492, | |
| "eval_precision_weighted": 0.5742672096804716, | |
| "eval_recall_macro": 0.5233749999999999, | |
| "eval_recall_micro": 0.5336065573770492, | |
| "eval_recall_weighted": 0.5336065573770492, | |
| "eval_runtime": 63.8109, | |
| "eval_samples_per_second": 19.119, | |
| "eval_steps_per_second": 0.313, | |
| "step": 357 | |
| }, | |
| { | |
| "epoch": 7.139072847682119, | |
| "grad_norm": 12.829914093017578, | |
| "learning_rate": 3.6400000000000004e-05, | |
| "loss": 1.9122, | |
| "step": 364 | |
| }, | |
| { | |
| "epoch": 7.2781456953642385, | |
| "grad_norm": 15.254327774047852, | |
| "learning_rate": 3.71e-05, | |
| "loss": 1.9511, | |
| "step": 371 | |
| }, | |
| { | |
| "epoch": 7.417218543046357, | |
| "grad_norm": 13.248723030090332, | |
| "learning_rate": 3.7800000000000004e-05, | |
| "loss": 1.921, | |
| "step": 378 | |
| }, | |
| { | |
| "epoch": 7.556291390728477, | |
| "grad_norm": 14.405394554138184, | |
| "learning_rate": 3.85e-05, | |
| "loss": 1.8447, | |
| "step": 385 | |
| }, | |
| { | |
| "epoch": 7.695364238410596, | |
| "grad_norm": 13.432222366333008, | |
| "learning_rate": 3.9200000000000004e-05, | |
| "loss": 1.7079, | |
| "step": 392 | |
| }, | |
| { | |
| "epoch": 7.8344370860927155, | |
| "grad_norm": 13.591761589050293, | |
| "learning_rate": 3.99e-05, | |
| "loss": 1.7888, | |
| "step": 399 | |
| }, | |
| { | |
| "epoch": 7.973509933774834, | |
| "grad_norm": 12.760810852050781, | |
| "learning_rate": 4.0600000000000004e-05, | |
| "loss": 1.8494, | |
| "step": 406 | |
| }, | |
| { | |
| "epoch": 8.0, | |
| "eval_accuracy": 0.6360655737704918, | |
| "eval_f1_macro": 0.6127611312020431, | |
| "eval_f1_micro": 0.6360655737704918, | |
| "eval_f1_weighted": 0.6178432613234403, | |
| "eval_loss": 1.343964695930481, | |
| "eval_precision_macro": 0.6623227605727605, | |
| "eval_precision_micro": 0.6360655737704918, | |
| "eval_precision_weighted": 0.6653364258692127, | |
| "eval_recall_macro": 0.6304523809523809, | |
| "eval_recall_micro": 0.6360655737704918, | |
| "eval_recall_weighted": 0.6360655737704918, | |
| "eval_runtime": 79.4787, | |
| "eval_samples_per_second": 15.35, | |
| "eval_steps_per_second": 0.252, | |
| "step": 408 | |
| }, | |
| { | |
| "epoch": 8.099337748344372, | |
| "grad_norm": 11.729964256286621, | |
| "learning_rate": 4.13e-05, | |
| "loss": 1.4108, | |
| "step": 413 | |
| }, | |
| { | |
| "epoch": 8.23841059602649, | |
| "grad_norm": 12.144929885864258, | |
| "learning_rate": 4.2e-05, | |
| "loss": 1.5489, | |
| "step": 420 | |
| }, | |
| { | |
| "epoch": 8.37748344370861, | |
| "grad_norm": 13.483667373657227, | |
| "learning_rate": 4.27e-05, | |
| "loss": 1.5863, | |
| "step": 427 | |
| }, | |
| { | |
| "epoch": 8.516556291390728, | |
| "grad_norm": 16.043304443359375, | |
| "learning_rate": 4.3400000000000005e-05, | |
| "loss": 1.4405, | |
| "step": 434 | |
| }, | |
| { | |
| "epoch": 8.655629139072847, | |
| "grad_norm": 15.305998802185059, | |
| "learning_rate": 4.41e-05, | |
| "loss": 1.4753, | |
| "step": 441 | |
| }, | |
| { | |
| "epoch": 8.794701986754967, | |
| "grad_norm": 13.507715225219727, | |
| "learning_rate": 4.4800000000000005e-05, | |
| "loss": 1.4817, | |
| "step": 448 | |
| }, | |
| { | |
| "epoch": 8.933774834437086, | |
| "grad_norm": 13.252425193786621, | |
| "learning_rate": 4.55e-05, | |
| "loss": 1.5227, | |
| "step": 455 | |
| }, | |
| { | |
| "epoch": 9.0, | |
| "eval_accuracy": 0.6786885245901639, | |
| "eval_f1_macro": 0.6571807258516, | |
| "eval_f1_micro": 0.6786885245901639, | |
| "eval_f1_weighted": 0.6634537879698879, | |
| "eval_loss": 1.1470587253570557, | |
| "eval_precision_macro": 0.7084700165031047, | |
| "eval_precision_micro": 0.6786885245901639, | |
| "eval_precision_weighted": 0.7094984540397994, | |
| "eval_recall_macro": 0.6691130952380953, | |
| "eval_recall_micro": 0.6786885245901639, | |
| "eval_recall_weighted": 0.6786885245901639, | |
| "eval_runtime": 69.8414, | |
| "eval_samples_per_second": 17.468, | |
| "eval_steps_per_second": 0.286, | |
| "step": 459 | |
| }, | |
| { | |
| "epoch": 9.059602649006623, | |
| "grad_norm": 11.350573539733887, | |
| "learning_rate": 4.6200000000000005e-05, | |
| "loss": 1.352, | |
| "step": 462 | |
| }, | |
| { | |
| "epoch": 9.198675496688741, | |
| "grad_norm": 11.896257400512695, | |
| "learning_rate": 4.69e-05, | |
| "loss": 1.2096, | |
| "step": 469 | |
| }, | |
| { | |
| "epoch": 9.33774834437086, | |
| "grad_norm": 14.927756309509277, | |
| "learning_rate": 4.76e-05, | |
| "loss": 1.3018, | |
| "step": 476 | |
| }, | |
| { | |
| "epoch": 9.47682119205298, | |
| "grad_norm": 14.38377571105957, | |
| "learning_rate": 4.83e-05, | |
| "loss": 1.2997, | |
| "step": 483 | |
| }, | |
| { | |
| "epoch": 9.6158940397351, | |
| "grad_norm": 10.836702346801758, | |
| "learning_rate": 4.9e-05, | |
| "loss": 1.2053, | |
| "step": 490 | |
| }, | |
| { | |
| "epoch": 9.754966887417218, | |
| "grad_norm": 13.384648323059082, | |
| "learning_rate": 4.97e-05, | |
| "loss": 1.2461, | |
| "step": 497 | |
| }, | |
| { | |
| "epoch": 9.894039735099337, | |
| "grad_norm": 12.859415054321289, | |
| "learning_rate": 4.995555555555556e-05, | |
| "loss": 1.2476, | |
| "step": 504 | |
| }, | |
| { | |
| "epoch": 10.0, | |
| "eval_accuracy": 0.7295081967213115, | |
| "eval_f1_macro": 0.7136993719988303, | |
| "eval_f1_micro": 0.7295081967213115, | |
| "eval_f1_weighted": 0.7185265127973471, | |
| "eval_loss": 0.9676371812820435, | |
| "eval_precision_macro": 0.7655571405718464, | |
| "eval_precision_micro": 0.7295081967213115, | |
| "eval_precision_weighted": 0.7658903327466492, | |
| "eval_recall_macro": 0.7218452380952379, | |
| "eval_recall_micro": 0.7295081967213115, | |
| "eval_recall_weighted": 0.7295081967213115, | |
| "eval_runtime": 64.0687, | |
| "eval_samples_per_second": 19.042, | |
| "eval_steps_per_second": 0.312, | |
| "step": 510 | |
| }, | |
| { | |
| "epoch": 10.019867549668874, | |
| "grad_norm": 11.940890312194824, | |
| "learning_rate": 4.987777777777778e-05, | |
| "loss": 1.0087, | |
| "step": 511 | |
| }, | |
| { | |
| "epoch": 10.158940397350994, | |
| "grad_norm": 11.370889663696289, | |
| "learning_rate": 4.9800000000000004e-05, | |
| "loss": 1.0759, | |
| "step": 518 | |
| }, | |
| { | |
| "epoch": 10.298013245033113, | |
| "grad_norm": 11.712719917297363, | |
| "learning_rate": 4.972222222222223e-05, | |
| "loss": 1.0388, | |
| "step": 525 | |
| }, | |
| { | |
| "epoch": 10.437086092715232, | |
| "grad_norm": 15.134650230407715, | |
| "learning_rate": 4.964444444444445e-05, | |
| "loss": 1.0933, | |
| "step": 532 | |
| }, | |
| { | |
| "epoch": 10.57615894039735, | |
| "grad_norm": 11.481903076171875, | |
| "learning_rate": 4.956666666666667e-05, | |
| "loss": 1.0236, | |
| "step": 539 | |
| }, | |
| { | |
| "epoch": 10.71523178807947, | |
| "grad_norm": 11.978276252746582, | |
| "learning_rate": 4.948888888888889e-05, | |
| "loss": 1.1232, | |
| "step": 546 | |
| }, | |
| { | |
| "epoch": 10.85430463576159, | |
| "grad_norm": 12.34005355834961, | |
| "learning_rate": 4.9411111111111114e-05, | |
| "loss": 1.0067, | |
| "step": 553 | |
| }, | |
| { | |
| "epoch": 10.993377483443709, | |
| "grad_norm": 11.154061317443848, | |
| "learning_rate": 4.933333333333334e-05, | |
| "loss": 1.1001, | |
| "step": 560 | |
| }, | |
| { | |
| "epoch": 11.0, | |
| "eval_accuracy": 0.7385245901639345, | |
| "eval_f1_macro": 0.7282043296830448, | |
| "eval_f1_micro": 0.7385245901639345, | |
| "eval_f1_weighted": 0.732015719256241, | |
| "eval_loss": 0.8772674798965454, | |
| "eval_precision_macro": 0.7795788517038517, | |
| "eval_precision_micro": 0.7385245901639345, | |
| "eval_precision_weighted": 0.7814253801753802, | |
| "eval_recall_macro": 0.733672619047619, | |
| "eval_recall_micro": 0.7385245901639345, | |
| "eval_recall_weighted": 0.7385245901639345, | |
| "eval_runtime": 60.2253, | |
| "eval_samples_per_second": 20.257, | |
| "eval_steps_per_second": 0.332, | |
| "step": 561 | |
| }, | |
| { | |
| "epoch": 11.119205298013245, | |
| "grad_norm": 8.575409889221191, | |
| "learning_rate": 4.925555555555556e-05, | |
| "loss": 0.8726, | |
| "step": 567 | |
| }, | |
| { | |
| "epoch": 11.258278145695364, | |
| "grad_norm": 12.448003768920898, | |
| "learning_rate": 4.917777777777778e-05, | |
| "loss": 0.9765, | |
| "step": 574 | |
| }, | |
| { | |
| "epoch": 11.397350993377483, | |
| "grad_norm": 10.99142837524414, | |
| "learning_rate": 4.91e-05, | |
| "loss": 0.8438, | |
| "step": 581 | |
| }, | |
| { | |
| "epoch": 11.536423841059603, | |
| "grad_norm": 9.985913276672363, | |
| "learning_rate": 4.9022222222222224e-05, | |
| "loss": 0.863, | |
| "step": 588 | |
| }, | |
| { | |
| "epoch": 11.675496688741722, | |
| "grad_norm": 14.102209091186523, | |
| "learning_rate": 4.894444444444445e-05, | |
| "loss": 0.9674, | |
| "step": 595 | |
| }, | |
| { | |
| "epoch": 11.814569536423841, | |
| "grad_norm": 10.937699317932129, | |
| "learning_rate": 4.886666666666667e-05, | |
| "loss": 0.9521, | |
| "step": 602 | |
| }, | |
| { | |
| "epoch": 11.95364238410596, | |
| "grad_norm": 10.190333366394043, | |
| "learning_rate": 4.878888888888889e-05, | |
| "loss": 0.8804, | |
| "step": 609 | |
| }, | |
| { | |
| "epoch": 12.0, | |
| "eval_accuracy": 0.759016393442623, | |
| "eval_f1_macro": 0.7427500998456881, | |
| "eval_f1_micro": 0.759016393442623, | |
| "eval_f1_weighted": 0.7456563548213297, | |
| "eval_loss": 0.8271353840827942, | |
| "eval_precision_macro": 0.7684717300243616, | |
| "eval_precision_micro": 0.759016393442623, | |
| "eval_precision_weighted": 0.7719776994647571, | |
| "eval_recall_macro": 0.756702380952381, | |
| "eval_recall_micro": 0.759016393442623, | |
| "eval_recall_weighted": 0.759016393442623, | |
| "eval_runtime": 58.6516, | |
| "eval_samples_per_second": 20.801, | |
| "eval_steps_per_second": 0.341, | |
| "step": 612 | |
| }, | |
| { | |
| "epoch": 12.079470198675496, | |
| "grad_norm": 14.1576509475708, | |
| "learning_rate": 4.871111111111111e-05, | |
| "loss": 0.719, | |
| "step": 616 | |
| }, | |
| { | |
| "epoch": 12.218543046357617, | |
| "grad_norm": 11.829643249511719, | |
| "learning_rate": 4.8633333333333334e-05, | |
| "loss": 0.9113, | |
| "step": 623 | |
| }, | |
| { | |
| "epoch": 12.357615894039736, | |
| "grad_norm": 9.620296478271484, | |
| "learning_rate": 4.855555555555556e-05, | |
| "loss": 0.8671, | |
| "step": 630 | |
| }, | |
| { | |
| "epoch": 12.496688741721854, | |
| "grad_norm": 10.44937801361084, | |
| "learning_rate": 4.847777777777778e-05, | |
| "loss": 0.8422, | |
| "step": 637 | |
| }, | |
| { | |
| "epoch": 12.635761589403973, | |
| "grad_norm": 7.808290958404541, | |
| "learning_rate": 4.8400000000000004e-05, | |
| "loss": 0.8018, | |
| "step": 644 | |
| }, | |
| { | |
| "epoch": 12.774834437086092, | |
| "grad_norm": 9.790284156799316, | |
| "learning_rate": 4.832222222222223e-05, | |
| "loss": 0.8626, | |
| "step": 651 | |
| }, | |
| { | |
| "epoch": 12.913907284768213, | |
| "grad_norm": 12.296673774719238, | |
| "learning_rate": 4.824444444444445e-05, | |
| "loss": 0.9596, | |
| "step": 658 | |
| }, | |
| { | |
| "epoch": 13.0, | |
| "eval_accuracy": 0.7622950819672131, | |
| "eval_f1_macro": 0.7541482304589116, | |
| "eval_f1_micro": 0.7622950819672131, | |
| "eval_f1_weighted": 0.7581034870800643, | |
| "eval_loss": 0.8282718062400818, | |
| "eval_precision_macro": 0.7943097392803276, | |
| "eval_precision_micro": 0.7622950819672131, | |
| "eval_precision_weighted": 0.7971667340748826, | |
| "eval_recall_macro": 0.7580535714285713, | |
| "eval_recall_micro": 0.7622950819672131, | |
| "eval_recall_weighted": 0.7622950819672131, | |
| "eval_runtime": 59.927, | |
| "eval_samples_per_second": 20.358, | |
| "eval_steps_per_second": 0.334, | |
| "step": 663 | |
| }, | |
| { | |
| "epoch": 13.039735099337749, | |
| "grad_norm": 18.717695236206055, | |
| "learning_rate": 4.8166666666666674e-05, | |
| "loss": 0.7906, | |
| "step": 665 | |
| }, | |
| { | |
| "epoch": 13.178807947019868, | |
| "grad_norm": 14.046932220458984, | |
| "learning_rate": 4.808888888888889e-05, | |
| "loss": 0.7326, | |
| "step": 672 | |
| }, | |
| { | |
| "epoch": 13.317880794701987, | |
| "grad_norm": 11.162008285522461, | |
| "learning_rate": 4.8011111111111114e-05, | |
| "loss": 0.8299, | |
| "step": 679 | |
| }, | |
| { | |
| "epoch": 13.456953642384105, | |
| "grad_norm": 9.34903335571289, | |
| "learning_rate": 4.793333333333334e-05, | |
| "loss": 0.7046, | |
| "step": 686 | |
| }, | |
| { | |
| "epoch": 13.596026490066226, | |
| "grad_norm": 8.978596687316895, | |
| "learning_rate": 4.785555555555556e-05, | |
| "loss": 0.672, | |
| "step": 693 | |
| }, | |
| { | |
| "epoch": 13.735099337748345, | |
| "grad_norm": 9.649175643920898, | |
| "learning_rate": 4.7777777777777784e-05, | |
| "loss": 0.7706, | |
| "step": 700 | |
| }, | |
| { | |
| "epoch": 13.874172185430464, | |
| "grad_norm": 9.140443801879883, | |
| "learning_rate": 4.77e-05, | |
| "loss": 0.7734, | |
| "step": 707 | |
| }, | |
| { | |
| "epoch": 14.0, | |
| "grad_norm": 6.996921062469482, | |
| "learning_rate": 4.7622222222222224e-05, | |
| "loss": 0.6202, | |
| "step": 714 | |
| }, | |
| { | |
| "epoch": 14.0, | |
| "eval_accuracy": 0.7754098360655738, | |
| "eval_f1_macro": 0.765805670364494, | |
| "eval_f1_micro": 0.7754098360655738, | |
| "eval_f1_weighted": 0.7695095891286827, | |
| "eval_loss": 0.7957718372344971, | |
| "eval_precision_macro": 0.8098741258741259, | |
| "eval_precision_micro": 0.7754098360655738, | |
| "eval_precision_weighted": 0.8099575401829501, | |
| "eval_recall_macro": 0.769452380952381, | |
| "eval_recall_micro": 0.7754098360655738, | |
| "eval_recall_weighted": 0.7754098360655738, | |
| "eval_runtime": 58.8988, | |
| "eval_samples_per_second": 20.713, | |
| "eval_steps_per_second": 0.34, | |
| "step": 714 | |
| }, | |
| { | |
| "epoch": 14.139072847682119, | |
| "grad_norm": 11.548070907592773, | |
| "learning_rate": 4.754444444444445e-05, | |
| "loss": 0.7968, | |
| "step": 721 | |
| }, | |
| { | |
| "epoch": 14.278145695364238, | |
| "grad_norm": 11.0925874710083, | |
| "learning_rate": 4.746666666666667e-05, | |
| "loss": 0.6864, | |
| "step": 728 | |
| }, | |
| { | |
| "epoch": 14.417218543046358, | |
| "grad_norm": 9.538455963134766, | |
| "learning_rate": 4.7388888888888894e-05, | |
| "loss": 0.6766, | |
| "step": 735 | |
| }, | |
| { | |
| "epoch": 14.556291390728477, | |
| "grad_norm": 7.995402812957764, | |
| "learning_rate": 4.731111111111111e-05, | |
| "loss": 0.7023, | |
| "step": 742 | |
| }, | |
| { | |
| "epoch": 14.695364238410596, | |
| "grad_norm": 10.825759887695312, | |
| "learning_rate": 4.7233333333333334e-05, | |
| "loss": 0.6883, | |
| "step": 749 | |
| }, | |
| { | |
| "epoch": 14.834437086092715, | |
| "grad_norm": 14.279191017150879, | |
| "learning_rate": 4.715555555555556e-05, | |
| "loss": 0.6533, | |
| "step": 756 | |
| }, | |
| { | |
| "epoch": 14.973509933774835, | |
| "grad_norm": 8.562923431396484, | |
| "learning_rate": 4.707777777777778e-05, | |
| "loss": 0.6466, | |
| "step": 763 | |
| }, | |
| { | |
| "epoch": 15.0, | |
| "eval_accuracy": 0.7967213114754098, | |
| "eval_f1_macro": 0.7874462737947056, | |
| "eval_f1_micro": 0.7967213114754098, | |
| "eval_f1_weighted": 0.7923798470661948, | |
| "eval_loss": 0.7445575594902039, | |
| "eval_precision_macro": 0.8216799295475766, | |
| "eval_precision_micro": 0.7967213114754098, | |
| "eval_precision_weighted": 0.8259746225862427, | |
| "eval_recall_macro": 0.7922261904761905, | |
| "eval_recall_micro": 0.7967213114754098, | |
| "eval_recall_weighted": 0.7967213114754098, | |
| "eval_runtime": 59.8003, | |
| "eval_samples_per_second": 20.401, | |
| "eval_steps_per_second": 0.334, | |
| "step": 765 | |
| }, | |
| { | |
| "epoch": 15.099337748344372, | |
| "grad_norm": 9.65889835357666, | |
| "learning_rate": 4.7e-05, | |
| "loss": 0.6024, | |
| "step": 770 | |
| }, | |
| { | |
| "epoch": 15.23841059602649, | |
| "grad_norm": 8.170406341552734, | |
| "learning_rate": 4.692222222222222e-05, | |
| "loss": 0.5263, | |
| "step": 777 | |
| }, | |
| { | |
| "epoch": 15.37748344370861, | |
| "grad_norm": 8.782620429992676, | |
| "learning_rate": 4.6844444444444444e-05, | |
| "loss": 0.552, | |
| "step": 784 | |
| }, | |
| { | |
| "epoch": 15.516556291390728, | |
| "grad_norm": 11.878396034240723, | |
| "learning_rate": 4.676666666666667e-05, | |
| "loss": 0.6127, | |
| "step": 791 | |
| }, | |
| { | |
| "epoch": 15.655629139072847, | |
| "grad_norm": 8.88171672821045, | |
| "learning_rate": 4.668888888888889e-05, | |
| "loss": 0.6756, | |
| "step": 798 | |
| }, | |
| { | |
| "epoch": 15.794701986754967, | |
| "grad_norm": 11.983383178710938, | |
| "learning_rate": 4.6611111111111114e-05, | |
| "loss": 0.664, | |
| "step": 805 | |
| }, | |
| { | |
| "epoch": 15.933774834437086, | |
| "grad_norm": 10.409689903259277, | |
| "learning_rate": 4.653333333333334e-05, | |
| "loss": 0.6436, | |
| "step": 812 | |
| }, | |
| { | |
| "epoch": 16.0, | |
| "eval_accuracy": 0.7918032786885246, | |
| "eval_f1_macro": 0.7815447427921685, | |
| "eval_f1_micro": 0.7918032786885246, | |
| "eval_f1_weighted": 0.7856156314459259, | |
| "eval_loss": 0.7297011017799377, | |
| "eval_precision_macro": 0.8101799866799867, | |
| "eval_precision_micro": 0.7918032786885246, | |
| "eval_precision_weighted": 0.8123722907329464, | |
| "eval_recall_macro": 0.7866488095238096, | |
| "eval_recall_micro": 0.7918032786885246, | |
| "eval_recall_weighted": 0.7918032786885246, | |
| "eval_runtime": 60.0895, | |
| "eval_samples_per_second": 20.303, | |
| "eval_steps_per_second": 0.333, | |
| "step": 816 | |
| }, | |
| { | |
| "epoch": 16.05960264900662, | |
| "grad_norm": 10.903715133666992, | |
| "learning_rate": 4.645555555555556e-05, | |
| "loss": 0.5591, | |
| "step": 819 | |
| }, | |
| { | |
| "epoch": 16.198675496688743, | |
| "grad_norm": 8.767610549926758, | |
| "learning_rate": 4.6377777777777784e-05, | |
| "loss": 0.5711, | |
| "step": 826 | |
| }, | |
| { | |
| "epoch": 16.337748344370862, | |
| "grad_norm": 8.273555755615234, | |
| "learning_rate": 4.630000000000001e-05, | |
| "loss": 0.5811, | |
| "step": 833 | |
| }, | |
| { | |
| "epoch": 16.47682119205298, | |
| "grad_norm": 12.013016700744629, | |
| "learning_rate": 4.6222222222222224e-05, | |
| "loss": 0.6443, | |
| "step": 840 | |
| }, | |
| { | |
| "epoch": 16.6158940397351, | |
| "grad_norm": 7.874364376068115, | |
| "learning_rate": 4.614444444444445e-05, | |
| "loss": 0.5073, | |
| "step": 847 | |
| }, | |
| { | |
| "epoch": 16.75496688741722, | |
| "grad_norm": 9.01498031616211, | |
| "learning_rate": 4.606666666666667e-05, | |
| "loss": 0.608, | |
| "step": 854 | |
| }, | |
| { | |
| "epoch": 16.894039735099337, | |
| "grad_norm": 9.848909378051758, | |
| "learning_rate": 4.5988888888888894e-05, | |
| "loss": 0.5929, | |
| "step": 861 | |
| }, | |
| { | |
| "epoch": 17.0, | |
| "eval_accuracy": 0.7959016393442623, | |
| "eval_f1_macro": 0.7867938321138785, | |
| "eval_f1_micro": 0.7959016393442623, | |
| "eval_f1_weighted": 0.7917754148114372, | |
| "eval_loss": 0.7077643871307373, | |
| "eval_precision_macro": 0.8185556526806528, | |
| "eval_precision_micro": 0.7959016393442623, | |
| "eval_precision_weighted": 0.8217451378312034, | |
| "eval_recall_macro": 0.7902916666666667, | |
| "eval_recall_micro": 0.7959016393442623, | |
| "eval_recall_weighted": 0.7959016393442623, | |
| "eval_runtime": 59.7504, | |
| "eval_samples_per_second": 20.418, | |
| "eval_steps_per_second": 0.335, | |
| "step": 867 | |
| }, | |
| { | |
| "epoch": 17.019867549668874, | |
| "grad_norm": 9.507264137268066, | |
| "learning_rate": 4.591111111111112e-05, | |
| "loss": 0.5247, | |
| "step": 868 | |
| }, | |
| { | |
| "epoch": 17.158940397350992, | |
| "grad_norm": 7.274167537689209, | |
| "learning_rate": 4.5833333333333334e-05, | |
| "loss": 0.5212, | |
| "step": 875 | |
| }, | |
| { | |
| "epoch": 17.29801324503311, | |
| "grad_norm": 8.040386199951172, | |
| "learning_rate": 4.575555555555556e-05, | |
| "loss": 0.4957, | |
| "step": 882 | |
| }, | |
| { | |
| "epoch": 17.437086092715234, | |
| "grad_norm": 10.34827709197998, | |
| "learning_rate": 4.567777777777778e-05, | |
| "loss": 0.4938, | |
| "step": 889 | |
| }, | |
| { | |
| "epoch": 17.576158940397352, | |
| "grad_norm": 9.062361717224121, | |
| "learning_rate": 4.5600000000000004e-05, | |
| "loss": 0.5341, | |
| "step": 896 | |
| }, | |
| { | |
| "epoch": 17.71523178807947, | |
| "grad_norm": 7.889723777770996, | |
| "learning_rate": 4.552222222222222e-05, | |
| "loss": 0.5407, | |
| "step": 903 | |
| }, | |
| { | |
| "epoch": 17.85430463576159, | |
| "grad_norm": 7.329662799835205, | |
| "learning_rate": 4.5444444444444444e-05, | |
| "loss": 0.5344, | |
| "step": 910 | |
| }, | |
| { | |
| "epoch": 17.99337748344371, | |
| "grad_norm": 10.251781463623047, | |
| "learning_rate": 4.536666666666667e-05, | |
| "loss": 0.5108, | |
| "step": 917 | |
| }, | |
| { | |
| "epoch": 18.0, | |
| "eval_accuracy": 0.8, | |
| "eval_f1_macro": 0.7904420722323199, | |
| "eval_f1_micro": 0.8, | |
| "eval_f1_weighted": 0.794189161749749, | |
| "eval_loss": 0.7119916081428528, | |
| "eval_precision_macro": 0.8223463203463203, | |
| "eval_precision_micro": 0.8, | |
| "eval_precision_weighted": 0.8258165377427673, | |
| "eval_recall_macro": 0.7962023809523809, | |
| "eval_recall_micro": 0.8, | |
| "eval_recall_weighted": 0.8, | |
| "eval_runtime": 58.9812, | |
| "eval_samples_per_second": 20.685, | |
| "eval_steps_per_second": 0.339, | |
| "step": 918 | |
| }, | |
| { | |
| "epoch": 18.119205298013245, | |
| "grad_norm": 7.837319374084473, | |
| "learning_rate": 4.528888888888889e-05, | |
| "loss": 0.4401, | |
| "step": 924 | |
| }, | |
| { | |
| "epoch": 18.258278145695364, | |
| "grad_norm": 7.545521259307861, | |
| "learning_rate": 4.5211111111111114e-05, | |
| "loss": 0.4821, | |
| "step": 931 | |
| }, | |
| { | |
| "epoch": 18.397350993377483, | |
| "grad_norm": 7.626832962036133, | |
| "learning_rate": 4.513333333333333e-05, | |
| "loss": 0.4991, | |
| "step": 938 | |
| }, | |
| { | |
| "epoch": 18.5364238410596, | |
| "grad_norm": 7.265345573425293, | |
| "learning_rate": 4.5055555555555554e-05, | |
| "loss": 0.5936, | |
| "step": 945 | |
| }, | |
| { | |
| "epoch": 18.67549668874172, | |
| "grad_norm": 6.648807525634766, | |
| "learning_rate": 4.497777777777778e-05, | |
| "loss": 0.4418, | |
| "step": 952 | |
| }, | |
| { | |
| "epoch": 18.814569536423843, | |
| "grad_norm": 6.413826942443848, | |
| "learning_rate": 4.49e-05, | |
| "loss": 0.4185, | |
| "step": 959 | |
| }, | |
| { | |
| "epoch": 18.95364238410596, | |
| "grad_norm": 9.378252029418945, | |
| "learning_rate": 4.4822222222222224e-05, | |
| "loss": 0.5109, | |
| "step": 966 | |
| }, | |
| { | |
| "epoch": 19.0, | |
| "eval_accuracy": 0.8106557377049181, | |
| "eval_f1_macro": 0.8023834074422309, | |
| "eval_f1_micro": 0.8106557377049181, | |
| "eval_f1_weighted": 0.8054703936104611, | |
| "eval_loss": 0.671293318271637, | |
| "eval_precision_macro": 0.8325211038961038, | |
| "eval_precision_micro": 0.8106557377049181, | |
| "eval_precision_weighted": 0.8349751023111679, | |
| "eval_recall_macro": 0.8078333333333333, | |
| "eval_recall_micro": 0.8106557377049181, | |
| "eval_recall_weighted": 0.8106557377049181, | |
| "eval_runtime": 60.0408, | |
| "eval_samples_per_second": 20.32, | |
| "eval_steps_per_second": 0.333, | |
| "step": 969 | |
| }, | |
| { | |
| "epoch": 19.079470198675498, | |
| "grad_norm": 6.43688440322876, | |
| "learning_rate": 4.474444444444445e-05, | |
| "loss": 0.401, | |
| "step": 973 | |
| }, | |
| { | |
| "epoch": 19.218543046357617, | |
| "grad_norm": 10.133489608764648, | |
| "learning_rate": 4.466666666666667e-05, | |
| "loss": 0.4449, | |
| "step": 980 | |
| }, | |
| { | |
| "epoch": 19.357615894039736, | |
| "grad_norm": 9.007479667663574, | |
| "learning_rate": 4.4588888888888894e-05, | |
| "loss": 0.5457, | |
| "step": 987 | |
| }, | |
| { | |
| "epoch": 19.496688741721854, | |
| "grad_norm": 10.912771224975586, | |
| "learning_rate": 4.451111111111112e-05, | |
| "loss": 0.5306, | |
| "step": 994 | |
| }, | |
| { | |
| "epoch": 19.635761589403973, | |
| "grad_norm": 6.615180492401123, | |
| "learning_rate": 4.443333333333334e-05, | |
| "loss": 0.4925, | |
| "step": 1001 | |
| }, | |
| { | |
| "epoch": 19.774834437086092, | |
| "grad_norm": 7.076197147369385, | |
| "learning_rate": 4.435555555555556e-05, | |
| "loss": 0.4787, | |
| "step": 1008 | |
| }, | |
| { | |
| "epoch": 19.91390728476821, | |
| "grad_norm": 7.040290832519531, | |
| "learning_rate": 4.427777777777778e-05, | |
| "loss": 0.4809, | |
| "step": 1015 | |
| }, | |
| { | |
| "epoch": 20.0, | |
| "eval_accuracy": 0.8139344262295082, | |
| "eval_f1_macro": 0.8081211352716771, | |
| "eval_f1_micro": 0.8139344262295082, | |
| "eval_f1_weighted": 0.8116663019924579, | |
| "eval_loss": 0.6667141914367676, | |
| "eval_precision_macro": 0.8430578726828728, | |
| "eval_precision_micro": 0.8139344262295082, | |
| "eval_precision_weighted": 0.8445454568200469, | |
| "eval_recall_macro": 0.8106190476190476, | |
| "eval_recall_micro": 0.8139344262295082, | |
| "eval_recall_weighted": 0.8139344262295082, | |
| "eval_runtime": 58.7404, | |
| "eval_samples_per_second": 20.769, | |
| "eval_steps_per_second": 0.34, | |
| "step": 1020 | |
| }, | |
| { | |
| "epoch": 20.039735099337747, | |
| "grad_norm": 9.023087501525879, | |
| "learning_rate": 4.4200000000000004e-05, | |
| "loss": 0.386, | |
| "step": 1022 | |
| }, | |
| { | |
| "epoch": 20.178807947019866, | |
| "grad_norm": 7.4928178787231445, | |
| "learning_rate": 4.412222222222223e-05, | |
| "loss": 0.4569, | |
| "step": 1029 | |
| }, | |
| { | |
| "epoch": 20.31788079470199, | |
| "grad_norm": 8.090821266174316, | |
| "learning_rate": 4.404444444444445e-05, | |
| "loss": 0.4778, | |
| "step": 1036 | |
| }, | |
| { | |
| "epoch": 20.456953642384107, | |
| "grad_norm": 8.650497436523438, | |
| "learning_rate": 4.396666666666667e-05, | |
| "loss": 0.4786, | |
| "step": 1043 | |
| }, | |
| { | |
| "epoch": 20.596026490066226, | |
| "grad_norm": 6.049080848693848, | |
| "learning_rate": 4.388888888888889e-05, | |
| "loss": 0.4975, | |
| "step": 1050 | |
| }, | |
| { | |
| "epoch": 20.735099337748345, | |
| "grad_norm": 10.202515602111816, | |
| "learning_rate": 4.3811111111111114e-05, | |
| "loss": 0.4035, | |
| "step": 1057 | |
| }, | |
| { | |
| "epoch": 20.874172185430464, | |
| "grad_norm": 7.0871429443359375, | |
| "learning_rate": 4.373333333333334e-05, | |
| "loss": 0.4274, | |
| "step": 1064 | |
| }, | |
| { | |
| "epoch": 21.0, | |
| "grad_norm": 6.111388206481934, | |
| "learning_rate": 4.3655555555555554e-05, | |
| "loss": 0.3576, | |
| "step": 1071 | |
| }, | |
| { | |
| "epoch": 21.0, | |
| "eval_accuracy": 0.8073770491803278, | |
| "eval_f1_macro": 0.7980818380535872, | |
| "eval_f1_micro": 0.8073770491803278, | |
| "eval_f1_weighted": 0.8027800592784986, | |
| "eval_loss": 0.6649746298789978, | |
| "eval_precision_macro": 0.8290474247974248, | |
| "eval_precision_micro": 0.8073770491803278, | |
| "eval_precision_weighted": 0.8307658143313881, | |
| "eval_recall_macro": 0.8019166666666666, | |
| "eval_recall_micro": 0.8073770491803278, | |
| "eval_recall_weighted": 0.8073770491803278, | |
| "eval_runtime": 60.057, | |
| "eval_samples_per_second": 20.314, | |
| "eval_steps_per_second": 0.333, | |
| "step": 1071 | |
| }, | |
| { | |
| "epoch": 21.13907284768212, | |
| "grad_norm": 9.059436798095703, | |
| "learning_rate": 4.357777777777778e-05, | |
| "loss": 0.4775, | |
| "step": 1078 | |
| }, | |
| { | |
| "epoch": 21.278145695364238, | |
| "grad_norm": 9.497885704040527, | |
| "learning_rate": 4.35e-05, | |
| "loss": 0.4531, | |
| "step": 1085 | |
| }, | |
| { | |
| "epoch": 21.417218543046356, | |
| "grad_norm": 10.471771240234375, | |
| "learning_rate": 4.3422222222222224e-05, | |
| "loss": 0.479, | |
| "step": 1092 | |
| }, | |
| { | |
| "epoch": 21.556291390728475, | |
| "grad_norm": 6.627233505249023, | |
| "learning_rate": 4.334444444444445e-05, | |
| "loss": 0.4332, | |
| "step": 1099 | |
| }, | |
| { | |
| "epoch": 21.695364238410598, | |
| "grad_norm": 9.046399116516113, | |
| "learning_rate": 4.3266666666666664e-05, | |
| "loss": 0.4767, | |
| "step": 1106 | |
| }, | |
| { | |
| "epoch": 21.834437086092716, | |
| "grad_norm": 6.7745513916015625, | |
| "learning_rate": 4.318888888888889e-05, | |
| "loss": 0.5137, | |
| "step": 1113 | |
| }, | |
| { | |
| "epoch": 21.973509933774835, | |
| "grad_norm": 8.061189651489258, | |
| "learning_rate": 4.311111111111111e-05, | |
| "loss": 0.4877, | |
| "step": 1120 | |
| }, | |
| { | |
| "epoch": 22.0, | |
| "eval_accuracy": 0.8114754098360656, | |
| "eval_f1_macro": 0.8045914526649821, | |
| "eval_f1_micro": 0.8114754098360656, | |
| "eval_f1_weighted": 0.8078734461991453, | |
| "eval_loss": 0.6778721809387207, | |
| "eval_precision_macro": 0.836376651126651, | |
| "eval_precision_micro": 0.8114754098360656, | |
| "eval_precision_weighted": 0.8366239998617048, | |
| "eval_recall_macro": 0.806404761904762, | |
| "eval_recall_micro": 0.8114754098360656, | |
| "eval_recall_weighted": 0.8114754098360656, | |
| "eval_runtime": 59.185, | |
| "eval_samples_per_second": 20.613, | |
| "eval_steps_per_second": 0.338, | |
| "step": 1122 | |
| }, | |
| { | |
| "epoch": 22.09933774834437, | |
| "grad_norm": 9.460957527160645, | |
| "learning_rate": 4.3033333333333334e-05, | |
| "loss": 0.4915, | |
| "step": 1127 | |
| }, | |
| { | |
| "epoch": 22.23841059602649, | |
| "grad_norm": 9.026511192321777, | |
| "learning_rate": 4.295555555555556e-05, | |
| "loss": 0.4157, | |
| "step": 1134 | |
| }, | |
| { | |
| "epoch": 22.37748344370861, | |
| "grad_norm": 9.733258247375488, | |
| "learning_rate": 4.287777777777778e-05, | |
| "loss": 0.3564, | |
| "step": 1141 | |
| }, | |
| { | |
| "epoch": 22.516556291390728, | |
| "grad_norm": 9.269991874694824, | |
| "learning_rate": 4.2800000000000004e-05, | |
| "loss": 0.4707, | |
| "step": 1148 | |
| }, | |
| { | |
| "epoch": 22.655629139072847, | |
| "grad_norm": 7.8387041091918945, | |
| "learning_rate": 4.272222222222223e-05, | |
| "loss": 0.4902, | |
| "step": 1155 | |
| }, | |
| { | |
| "epoch": 22.794701986754966, | |
| "grad_norm": 10.261953353881836, | |
| "learning_rate": 4.264444444444445e-05, | |
| "loss": 0.4656, | |
| "step": 1162 | |
| }, | |
| { | |
| "epoch": 22.933774834437084, | |
| "grad_norm": 9.317761421203613, | |
| "learning_rate": 4.2566666666666674e-05, | |
| "loss": 0.4705, | |
| "step": 1169 | |
| }, | |
| { | |
| "epoch": 23.0, | |
| "eval_accuracy": 0.8131147540983606, | |
| "eval_f1_macro": 0.8073761565232153, | |
| "eval_f1_micro": 0.8131147540983606, | |
| "eval_f1_weighted": 0.8111283224168953, | |
| "eval_loss": 0.6698673963546753, | |
| "eval_precision_macro": 0.8399364801864801, | |
| "eval_precision_micro": 0.8131147540983606, | |
| "eval_precision_weighted": 0.8420730703722508, | |
| "eval_recall_macro": 0.808672619047619, | |
| "eval_recall_micro": 0.8131147540983606, | |
| "eval_recall_weighted": 0.8131147540983606, | |
| "eval_runtime": 59.9539, | |
| "eval_samples_per_second": 20.349, | |
| "eval_steps_per_second": 0.334, | |
| "step": 1173 | |
| }, | |
| { | |
| "epoch": 23.05960264900662, | |
| "grad_norm": 9.809006690979004, | |
| "learning_rate": 4.248888888888889e-05, | |
| "loss": 0.3833, | |
| "step": 1176 | |
| }, | |
| { | |
| "epoch": 23.198675496688743, | |
| "grad_norm": 8.9915132522583, | |
| "learning_rate": 4.2411111111111114e-05, | |
| "loss": 0.4552, | |
| "step": 1183 | |
| }, | |
| { | |
| "epoch": 23.337748344370862, | |
| "grad_norm": 10.036259651184082, | |
| "learning_rate": 4.233333333333334e-05, | |
| "loss": 0.3869, | |
| "step": 1190 | |
| }, | |
| { | |
| "epoch": 23.47682119205298, | |
| "grad_norm": 10.57496166229248, | |
| "learning_rate": 4.225555555555556e-05, | |
| "loss": 0.4003, | |
| "step": 1197 | |
| }, | |
| { | |
| "epoch": 23.6158940397351, | |
| "grad_norm": 9.061355590820312, | |
| "learning_rate": 4.217777777777778e-05, | |
| "loss": 0.4654, | |
| "step": 1204 | |
| }, | |
| { | |
| "epoch": 23.75496688741722, | |
| "grad_norm": 7.108461380004883, | |
| "learning_rate": 4.21e-05, | |
| "loss": 0.4085, | |
| "step": 1211 | |
| }, | |
| { | |
| "epoch": 23.894039735099337, | |
| "grad_norm": 5.542710781097412, | |
| "learning_rate": 4.2022222222222223e-05, | |
| "loss": 0.4358, | |
| "step": 1218 | |
| }, | |
| { | |
| "epoch": 24.0, | |
| "eval_accuracy": 0.8262295081967214, | |
| "eval_f1_macro": 0.8156689398492805, | |
| "eval_f1_micro": 0.8262295081967214, | |
| "eval_f1_weighted": 0.8195565714293827, | |
| "eval_loss": 0.6602770090103149, | |
| "eval_precision_macro": 0.8476504329004328, | |
| "eval_precision_micro": 0.8262295081967214, | |
| "eval_precision_weighted": 0.849387256641355, | |
| "eval_recall_macro": 0.8219166666666666, | |
| "eval_recall_micro": 0.8262295081967214, | |
| "eval_recall_weighted": 0.8262295081967214, | |
| "eval_runtime": 58.671, | |
| "eval_samples_per_second": 20.794, | |
| "eval_steps_per_second": 0.341, | |
| "step": 1224 | |
| } | |
| ], | |
| "logging_steps": 7, | |
| "max_steps": 5000, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 100, | |
| "save_steps": 7, | |
| "stateful_callbacks": { | |
| "EarlyStoppingCallback": { | |
| "args": { | |
| "early_stopping_patience": 5, | |
| "early_stopping_threshold": 0.01 | |
| }, | |
| "attributes": { | |
| "early_stopping_patience_counter": 5 | |
| } | |
| }, | |
| "TrainerControl": { | |
| "args": { | |
| "should_epoch_stop": false, | |
| "should_evaluate": false, | |
| "should_log": false, | |
| "should_save": true, | |
| "should_training_stop": true | |
| }, | |
| "attributes": {} | |
| } | |
| }, | |
| "total_flos": 1.1899638530382496e+19, | |
| "train_batch_size": 32, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |