| { | |
| "best_metric": 0.3975684940814972, | |
| "best_model_checkpoint": "Model-Swin-Transformer-\\checkpoint-1275", | |
| "epoch": 25.0, | |
| "eval_steps": 7, | |
| "global_step": 1275, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.1390728476821192, | |
| "grad_norm": 3.756645917892456, | |
| "learning_rate": 7.000000000000001e-07, | |
| "loss": 5.3644, | |
| "step": 7 | |
| }, | |
| { | |
| "epoch": 0.2781456953642384, | |
| "grad_norm": 3.558716297149658, | |
| "learning_rate": 1.4000000000000001e-06, | |
| "loss": 5.3658, | |
| "step": 14 | |
| }, | |
| { | |
| "epoch": 0.41721854304635764, | |
| "grad_norm": 3.7727837562561035, | |
| "learning_rate": 2.1000000000000002e-06, | |
| "loss": 5.3463, | |
| "step": 21 | |
| }, | |
| { | |
| "epoch": 0.5562913907284768, | |
| "grad_norm": 3.529733896255493, | |
| "learning_rate": 2.8000000000000003e-06, | |
| "loss": 5.3343, | |
| "step": 28 | |
| }, | |
| { | |
| "epoch": 0.695364238410596, | |
| "grad_norm": 4.100412368774414, | |
| "learning_rate": 3.5000000000000004e-06, | |
| "loss": 5.3713, | |
| "step": 35 | |
| }, | |
| { | |
| "epoch": 0.8344370860927153, | |
| "grad_norm": 4.024387836456299, | |
| "learning_rate": 4.2000000000000004e-06, | |
| "loss": 5.3528, | |
| "step": 42 | |
| }, | |
| { | |
| "epoch": 0.9735099337748344, | |
| "grad_norm": 3.3524274826049805, | |
| "learning_rate": 4.9000000000000005e-06, | |
| "loss": 5.3086, | |
| "step": 49 | |
| }, | |
| { | |
| "epoch": 1.0, | |
| "eval_accuracy": 0.00819672131147541, | |
| "eval_f1_macro": 0.004708259390712807, | |
| "eval_f1_micro": 0.00819672131147541, | |
| "eval_f1_weighted": 0.005191171978413603, | |
| "eval_loss": 5.3028178215026855, | |
| "eval_precision_macro": 0.004008928571428571, | |
| "eval_precision_micro": 0.00819672131147541, | |
| "eval_precision_weighted": 0.004471604215456675, | |
| "eval_recall_macro": 0.007458333333333333, | |
| "eval_recall_micro": 0.00819672131147541, | |
| "eval_recall_weighted": 0.00819672131147541, | |
| "eval_runtime": 18.3225, | |
| "eval_samples_per_second": 66.585, | |
| "eval_steps_per_second": 1.092, | |
| "step": 51 | |
| }, | |
| { | |
| "epoch": 1.099337748344371, | |
| "grad_norm": 3.333894729614258, | |
| "learning_rate": 5.600000000000001e-06, | |
| "loss": 4.7989, | |
| "step": 56 | |
| }, | |
| { | |
| "epoch": 1.23841059602649, | |
| "grad_norm": 3.226445198059082, | |
| "learning_rate": 6.300000000000001e-06, | |
| "loss": 5.2748, | |
| "step": 63 | |
| }, | |
| { | |
| "epoch": 1.3774834437086092, | |
| "grad_norm": 2.960792303085327, | |
| "learning_rate": 7.000000000000001e-06, | |
| "loss": 5.2799, | |
| "step": 70 | |
| }, | |
| { | |
| "epoch": 1.5165562913907285, | |
| "grad_norm": 2.967210054397583, | |
| "learning_rate": 7.7e-06, | |
| "loss": 5.2765, | |
| "step": 77 | |
| }, | |
| { | |
| "epoch": 1.6556291390728477, | |
| "grad_norm": 2.8297080993652344, | |
| "learning_rate": 8.400000000000001e-06, | |
| "loss": 5.2309, | |
| "step": 84 | |
| }, | |
| { | |
| "epoch": 1.794701986754967, | |
| "grad_norm": 3.001478433609009, | |
| "learning_rate": 9.100000000000001e-06, | |
| "loss": 5.2076, | |
| "step": 91 | |
| }, | |
| { | |
| "epoch": 1.9337748344370862, | |
| "grad_norm": 3.130455493927002, | |
| "learning_rate": 9.800000000000001e-06, | |
| "loss": 5.1799, | |
| "step": 98 | |
| }, | |
| { | |
| "epoch": 2.0, | |
| "eval_accuracy": 0.031967213114754096, | |
| "eval_f1_macro": 0.02138895151975647, | |
| "eval_f1_micro": 0.031967213114754096, | |
| "eval_f1_weighted": 0.022752398833071827, | |
| "eval_loss": 5.114638805389404, | |
| "eval_precision_macro": 0.02439267581077926, | |
| "eval_precision_micro": 0.031967213114754096, | |
| "eval_precision_weighted": 0.026138166290088277, | |
| "eval_recall_macro": 0.030029761904761903, | |
| "eval_recall_micro": 0.031967213114754096, | |
| "eval_recall_weighted": 0.031967213114754096, | |
| "eval_runtime": 19.7575, | |
| "eval_samples_per_second": 61.749, | |
| "eval_steps_per_second": 1.012, | |
| "step": 102 | |
| }, | |
| { | |
| "epoch": 2.0596026490066226, | |
| "grad_norm": 3.9784674644470215, | |
| "learning_rate": 1.05e-05, | |
| "loss": 4.6593, | |
| "step": 105 | |
| }, | |
| { | |
| "epoch": 2.198675496688742, | |
| "grad_norm": 4.487362861633301, | |
| "learning_rate": 1.1200000000000001e-05, | |
| "loss": 5.1011, | |
| "step": 112 | |
| }, | |
| { | |
| "epoch": 2.337748344370861, | |
| "grad_norm": 3.840144157409668, | |
| "learning_rate": 1.19e-05, | |
| "loss": 5.0346, | |
| "step": 119 | |
| }, | |
| { | |
| "epoch": 2.47682119205298, | |
| "grad_norm": 4.5351409912109375, | |
| "learning_rate": 1.2600000000000001e-05, | |
| "loss": 4.993, | |
| "step": 126 | |
| }, | |
| { | |
| "epoch": 2.6158940397350996, | |
| "grad_norm": 5.248815059661865, | |
| "learning_rate": 1.3300000000000001e-05, | |
| "loss": 4.9318, | |
| "step": 133 | |
| }, | |
| { | |
| "epoch": 2.7549668874172184, | |
| "grad_norm": 5.679067611694336, | |
| "learning_rate": 1.4000000000000001e-05, | |
| "loss": 4.8848, | |
| "step": 140 | |
| }, | |
| { | |
| "epoch": 2.8940397350993377, | |
| "grad_norm": 5.753520488739014, | |
| "learning_rate": 1.47e-05, | |
| "loss": 4.7788, | |
| "step": 147 | |
| }, | |
| { | |
| "epoch": 3.0, | |
| "eval_accuracy": 0.20163934426229507, | |
| "eval_f1_macro": 0.15865115752245537, | |
| "eval_f1_micro": 0.20163934426229507, | |
| "eval_f1_weighted": 0.16819957965187343, | |
| "eval_loss": 4.359624862670898, | |
| "eval_precision_macro": 0.2004175752293943, | |
| "eval_precision_micro": 0.20163934426229507, | |
| "eval_precision_weighted": 0.21056413625270873, | |
| "eval_recall_macro": 0.18860714285714286, | |
| "eval_recall_micro": 0.20163934426229507, | |
| "eval_recall_weighted": 0.20163934426229507, | |
| "eval_runtime": 16.799, | |
| "eval_samples_per_second": 72.623, | |
| "eval_steps_per_second": 1.191, | |
| "step": 153 | |
| }, | |
| { | |
| "epoch": 3.019867549668874, | |
| "grad_norm": 7.467461585998535, | |
| "learning_rate": 1.54e-05, | |
| "loss": 4.1979, | |
| "step": 154 | |
| }, | |
| { | |
| "epoch": 3.1589403973509933, | |
| "grad_norm": 8.049297332763672, | |
| "learning_rate": 1.6100000000000002e-05, | |
| "loss": 4.3989, | |
| "step": 161 | |
| }, | |
| { | |
| "epoch": 3.2980132450331126, | |
| "grad_norm": 15.375213623046875, | |
| "learning_rate": 1.6800000000000002e-05, | |
| "loss": 4.2083, | |
| "step": 168 | |
| }, | |
| { | |
| "epoch": 3.437086092715232, | |
| "grad_norm": 9.002079963684082, | |
| "learning_rate": 1.75e-05, | |
| "loss": 4.0218, | |
| "step": 175 | |
| }, | |
| { | |
| "epoch": 3.576158940397351, | |
| "grad_norm": 9.430607795715332, | |
| "learning_rate": 1.8200000000000002e-05, | |
| "loss": 3.8887, | |
| "step": 182 | |
| }, | |
| { | |
| "epoch": 3.7152317880794703, | |
| "grad_norm": 8.839299201965332, | |
| "learning_rate": 1.8900000000000002e-05, | |
| "loss": 3.6825, | |
| "step": 189 | |
| }, | |
| { | |
| "epoch": 3.8543046357615895, | |
| "grad_norm": 10.325126647949219, | |
| "learning_rate": 1.9600000000000002e-05, | |
| "loss": 3.5607, | |
| "step": 196 | |
| }, | |
| { | |
| "epoch": 3.993377483443709, | |
| "grad_norm": 10.668830871582031, | |
| "learning_rate": 2.0300000000000002e-05, | |
| "loss": 3.3596, | |
| "step": 203 | |
| }, | |
| { | |
| "epoch": 4.0, | |
| "eval_accuracy": 0.4819672131147541, | |
| "eval_f1_macro": 0.4183671558365781, | |
| "eval_f1_micro": 0.4819672131147541, | |
| "eval_f1_weighted": 0.4325206736160847, | |
| "eval_loss": 2.7524967193603516, | |
| "eval_precision_macro": 0.4532485780774093, | |
| "eval_precision_micro": 0.4819672131147541, | |
| "eval_precision_weighted": 0.4607049565114263, | |
| "eval_recall_macro": 0.4596547619047619, | |
| "eval_recall_micro": 0.4819672131147541, | |
| "eval_recall_weighted": 0.4819672131147541, | |
| "eval_runtime": 21.0215, | |
| "eval_samples_per_second": 58.036, | |
| "eval_steps_per_second": 0.951, | |
| "step": 204 | |
| }, | |
| { | |
| "epoch": 4.119205298013245, | |
| "grad_norm": 14.15578842163086, | |
| "learning_rate": 2.1e-05, | |
| "loss": 2.792, | |
| "step": 210 | |
| }, | |
| { | |
| "epoch": 4.258278145695364, | |
| "grad_norm": 13.17058277130127, | |
| "learning_rate": 2.1700000000000002e-05, | |
| "loss": 2.9187, | |
| "step": 217 | |
| }, | |
| { | |
| "epoch": 4.397350993377484, | |
| "grad_norm": 17.613101959228516, | |
| "learning_rate": 2.2400000000000002e-05, | |
| "loss": 2.8103, | |
| "step": 224 | |
| }, | |
| { | |
| "epoch": 4.5364238410596025, | |
| "grad_norm": 10.50324821472168, | |
| "learning_rate": 2.3100000000000002e-05, | |
| "loss": 2.6919, | |
| "step": 231 | |
| }, | |
| { | |
| "epoch": 4.675496688741722, | |
| "grad_norm": 9.514472007751465, | |
| "learning_rate": 2.38e-05, | |
| "loss": 2.4714, | |
| "step": 238 | |
| }, | |
| { | |
| "epoch": 4.814569536423841, | |
| "grad_norm": 9.27059268951416, | |
| "learning_rate": 2.45e-05, | |
| "loss": 2.4165, | |
| "step": 245 | |
| }, | |
| { | |
| "epoch": 4.95364238410596, | |
| "grad_norm": 14.103813171386719, | |
| "learning_rate": 2.5200000000000003e-05, | |
| "loss": 2.2583, | |
| "step": 252 | |
| }, | |
| { | |
| "epoch": 5.0, | |
| "eval_accuracy": 0.6754098360655738, | |
| "eval_f1_macro": 0.6554426723087714, | |
| "eval_f1_micro": 0.6754098360655738, | |
| "eval_f1_weighted": 0.661180283037551, | |
| "eval_loss": 1.6013199090957642, | |
| "eval_precision_macro": 0.7148540158456954, | |
| "eval_precision_micro": 0.6754098360655738, | |
| "eval_precision_weighted": 0.7134497831635958, | |
| "eval_recall_macro": 0.6648869047619047, | |
| "eval_recall_micro": 0.6754098360655738, | |
| "eval_recall_weighted": 0.6754098360655738, | |
| "eval_runtime": 17.8695, | |
| "eval_samples_per_second": 68.273, | |
| "eval_steps_per_second": 1.119, | |
| "step": 255 | |
| }, | |
| { | |
| "epoch": 5.079470198675497, | |
| "grad_norm": 10.124486923217773, | |
| "learning_rate": 2.5900000000000003e-05, | |
| "loss": 1.7994, | |
| "step": 259 | |
| }, | |
| { | |
| "epoch": 5.218543046357616, | |
| "grad_norm": 9.2846040725708, | |
| "learning_rate": 2.6600000000000003e-05, | |
| "loss": 1.8769, | |
| "step": 266 | |
| }, | |
| { | |
| "epoch": 5.357615894039735, | |
| "grad_norm": 9.681060791015625, | |
| "learning_rate": 2.7300000000000003e-05, | |
| "loss": 1.8592, | |
| "step": 273 | |
| }, | |
| { | |
| "epoch": 5.496688741721854, | |
| "grad_norm": 10.057909965515137, | |
| "learning_rate": 2.8000000000000003e-05, | |
| "loss": 1.7236, | |
| "step": 280 | |
| }, | |
| { | |
| "epoch": 5.635761589403973, | |
| "grad_norm": 11.661372184753418, | |
| "learning_rate": 2.87e-05, | |
| "loss": 1.7951, | |
| "step": 287 | |
| }, | |
| { | |
| "epoch": 5.774834437086093, | |
| "grad_norm": 9.344789505004883, | |
| "learning_rate": 2.94e-05, | |
| "loss": 1.6862, | |
| "step": 294 | |
| }, | |
| { | |
| "epoch": 5.913907284768212, | |
| "grad_norm": 9.517767906188965, | |
| "learning_rate": 3.01e-05, | |
| "loss": 1.5559, | |
| "step": 301 | |
| }, | |
| { | |
| "epoch": 6.0, | |
| "eval_accuracy": 0.7745901639344263, | |
| "eval_f1_macro": 0.7566554592638184, | |
| "eval_f1_micro": 0.7745901639344263, | |
| "eval_f1_weighted": 0.7613547559463157, | |
| "eval_loss": 1.02675461769104, | |
| "eval_precision_macro": 0.799015755078255, | |
| "eval_precision_micro": 0.7745901639344263, | |
| "eval_precision_weighted": 0.802561029589718, | |
| "eval_recall_macro": 0.767904761904762, | |
| "eval_recall_micro": 0.7745901639344263, | |
| "eval_recall_weighted": 0.7745901639344263, | |
| "eval_runtime": 20.701, | |
| "eval_samples_per_second": 58.934, | |
| "eval_steps_per_second": 0.966, | |
| "step": 306 | |
| }, | |
| { | |
| "epoch": 6.039735099337748, | |
| "grad_norm": 8.714813232421875, | |
| "learning_rate": 3.08e-05, | |
| "loss": 1.3741, | |
| "step": 308 | |
| }, | |
| { | |
| "epoch": 6.178807947019868, | |
| "grad_norm": 8.35527515411377, | |
| "learning_rate": 3.15e-05, | |
| "loss": 1.355, | |
| "step": 315 | |
| }, | |
| { | |
| "epoch": 6.317880794701987, | |
| "grad_norm": 10.128329277038574, | |
| "learning_rate": 3.2200000000000003e-05, | |
| "loss": 1.31, | |
| "step": 322 | |
| }, | |
| { | |
| "epoch": 6.456953642384106, | |
| "grad_norm": 8.156630516052246, | |
| "learning_rate": 3.29e-05, | |
| "loss": 1.2743, | |
| "step": 329 | |
| }, | |
| { | |
| "epoch": 6.596026490066225, | |
| "grad_norm": 8.779105186462402, | |
| "learning_rate": 3.3600000000000004e-05, | |
| "loss": 1.3193, | |
| "step": 336 | |
| }, | |
| { | |
| "epoch": 6.735099337748345, | |
| "grad_norm": 11.118610382080078, | |
| "learning_rate": 3.430000000000001e-05, | |
| "loss": 1.2748, | |
| "step": 343 | |
| }, | |
| { | |
| "epoch": 6.874172185430464, | |
| "grad_norm": 8.063292503356934, | |
| "learning_rate": 3.5e-05, | |
| "loss": 1.2319, | |
| "step": 350 | |
| }, | |
| { | |
| "epoch": 7.0, | |
| "grad_norm": 7.127477169036865, | |
| "learning_rate": 3.57e-05, | |
| "loss": 1.0089, | |
| "step": 357 | |
| }, | |
| { | |
| "epoch": 7.0, | |
| "eval_accuracy": 0.8016393442622951, | |
| "eval_f1_macro": 0.7894899096002038, | |
| "eval_f1_micro": 0.8016393442622951, | |
| "eval_f1_weighted": 0.7904804760759438, | |
| "eval_loss": 0.7927541136741638, | |
| "eval_precision_macro": 0.8299506911217438, | |
| "eval_precision_micro": 0.8016393442622951, | |
| "eval_precision_weighted": 0.8283684745893546, | |
| "eval_recall_macro": 0.7991785714285714, | |
| "eval_recall_micro": 0.8016393442622951, | |
| "eval_recall_weighted": 0.8016393442622951, | |
| "eval_runtime": 19.3393, | |
| "eval_samples_per_second": 63.084, | |
| "eval_steps_per_second": 1.034, | |
| "step": 357 | |
| }, | |
| { | |
| "epoch": 7.139072847682119, | |
| "grad_norm": 9.581562995910645, | |
| "learning_rate": 3.6400000000000004e-05, | |
| "loss": 1.0836, | |
| "step": 364 | |
| }, | |
| { | |
| "epoch": 7.2781456953642385, | |
| "grad_norm": 11.660162925720215, | |
| "learning_rate": 3.71e-05, | |
| "loss": 0.9816, | |
| "step": 371 | |
| }, | |
| { | |
| "epoch": 7.417218543046357, | |
| "grad_norm": 10.361519813537598, | |
| "learning_rate": 3.7800000000000004e-05, | |
| "loss": 1.1386, | |
| "step": 378 | |
| }, | |
| { | |
| "epoch": 7.556291390728477, | |
| "grad_norm": 8.699442863464355, | |
| "learning_rate": 3.85e-05, | |
| "loss": 0.9964, | |
| "step": 385 | |
| }, | |
| { | |
| "epoch": 7.695364238410596, | |
| "grad_norm": 9.233181953430176, | |
| "learning_rate": 3.9200000000000004e-05, | |
| "loss": 1.077, | |
| "step": 392 | |
| }, | |
| { | |
| "epoch": 7.8344370860927155, | |
| "grad_norm": 10.234973907470703, | |
| "learning_rate": 3.99e-05, | |
| "loss": 0.9844, | |
| "step": 399 | |
| }, | |
| { | |
| "epoch": 7.973509933774834, | |
| "grad_norm": 9.366828918457031, | |
| "learning_rate": 4.0600000000000004e-05, | |
| "loss": 1.0463, | |
| "step": 406 | |
| }, | |
| { | |
| "epoch": 8.0, | |
| "eval_accuracy": 0.8311475409836065, | |
| "eval_f1_macro": 0.8222988632697376, | |
| "eval_f1_micro": 0.8311475409836065, | |
| "eval_f1_weighted": 0.8253316645805044, | |
| "eval_loss": 0.6437082886695862, | |
| "eval_precision_macro": 0.8575361999111999, | |
| "eval_precision_micro": 0.8311475409836065, | |
| "eval_precision_weighted": 0.8598102466750007, | |
| "eval_recall_macro": 0.8266071428571429, | |
| "eval_recall_micro": 0.8311475409836065, | |
| "eval_recall_weighted": 0.8311475409836065, | |
| "eval_runtime": 16.6963, | |
| "eval_samples_per_second": 73.07, | |
| "eval_steps_per_second": 1.198, | |
| "step": 408 | |
| }, | |
| { | |
| "epoch": 8.099337748344372, | |
| "grad_norm": 8.740901947021484, | |
| "learning_rate": 4.13e-05, | |
| "loss": 0.8633, | |
| "step": 413 | |
| }, | |
| { | |
| "epoch": 8.23841059602649, | |
| "grad_norm": 9.237879753112793, | |
| "learning_rate": 4.2e-05, | |
| "loss": 0.978, | |
| "step": 420 | |
| }, | |
| { | |
| "epoch": 8.37748344370861, | |
| "grad_norm": 9.53095817565918, | |
| "learning_rate": 4.27e-05, | |
| "loss": 0.9132, | |
| "step": 427 | |
| }, | |
| { | |
| "epoch": 8.516556291390728, | |
| "grad_norm": 9.277670860290527, | |
| "learning_rate": 4.3400000000000005e-05, | |
| "loss": 0.8908, | |
| "step": 434 | |
| }, | |
| { | |
| "epoch": 8.655629139072847, | |
| "grad_norm": 7.736002445220947, | |
| "learning_rate": 4.41e-05, | |
| "loss": 0.9602, | |
| "step": 441 | |
| }, | |
| { | |
| "epoch": 8.794701986754967, | |
| "grad_norm": 9.285863876342773, | |
| "learning_rate": 4.4800000000000005e-05, | |
| "loss": 0.8773, | |
| "step": 448 | |
| }, | |
| { | |
| "epoch": 8.933774834437086, | |
| "grad_norm": 6.872951507568359, | |
| "learning_rate": 4.55e-05, | |
| "loss": 0.8551, | |
| "step": 455 | |
| }, | |
| { | |
| "epoch": 9.0, | |
| "eval_accuracy": 0.8467213114754099, | |
| "eval_f1_macro": 0.8380145271395273, | |
| "eval_f1_micro": 0.8467213114754099, | |
| "eval_f1_weighted": 0.8412076976650266, | |
| "eval_loss": 0.5831084847450256, | |
| "eval_precision_macro": 0.8702838485044367, | |
| "eval_precision_micro": 0.8467213114754099, | |
| "eval_precision_weighted": 0.8695735086995936, | |
| "eval_recall_macro": 0.8410952380952381, | |
| "eval_recall_micro": 0.8467213114754099, | |
| "eval_recall_weighted": 0.8467213114754099, | |
| "eval_runtime": 17.1069, | |
| "eval_samples_per_second": 71.316, | |
| "eval_steps_per_second": 1.169, | |
| "step": 459 | |
| }, | |
| { | |
| "epoch": 9.059602649006623, | |
| "grad_norm": 9.55445671081543, | |
| "learning_rate": 4.6200000000000005e-05, | |
| "loss": 0.7727, | |
| "step": 462 | |
| }, | |
| { | |
| "epoch": 9.198675496688741, | |
| "grad_norm": 10.598060607910156, | |
| "learning_rate": 4.69e-05, | |
| "loss": 0.7759, | |
| "step": 469 | |
| }, | |
| { | |
| "epoch": 9.33774834437086, | |
| "grad_norm": 9.53549861907959, | |
| "learning_rate": 4.76e-05, | |
| "loss": 0.8016, | |
| "step": 476 | |
| }, | |
| { | |
| "epoch": 9.47682119205298, | |
| "grad_norm": 8.146626472473145, | |
| "learning_rate": 4.83e-05, | |
| "loss": 0.779, | |
| "step": 483 | |
| }, | |
| { | |
| "epoch": 9.6158940397351, | |
| "grad_norm": 6.373037815093994, | |
| "learning_rate": 4.9e-05, | |
| "loss": 0.6726, | |
| "step": 490 | |
| }, | |
| { | |
| "epoch": 9.754966887417218, | |
| "grad_norm": 9.011489868164062, | |
| "learning_rate": 4.97e-05, | |
| "loss": 0.835, | |
| "step": 497 | |
| }, | |
| { | |
| "epoch": 9.894039735099337, | |
| "grad_norm": 8.79944133758545, | |
| "learning_rate": 4.995555555555556e-05, | |
| "loss": 0.7234, | |
| "step": 504 | |
| }, | |
| { | |
| "epoch": 10.0, | |
| "eval_accuracy": 0.8672131147540983, | |
| "eval_f1_macro": 0.8578000912486207, | |
| "eval_f1_micro": 0.8672131147540983, | |
| "eval_f1_weighted": 0.8622077336035869, | |
| "eval_loss": 0.5154255628585815, | |
| "eval_precision_macro": 0.8773138528138529, | |
| "eval_precision_micro": 0.8672131147540983, | |
| "eval_precision_weighted": 0.8793111737988787, | |
| "eval_recall_macro": 0.8614821428571429, | |
| "eval_recall_micro": 0.8672131147540983, | |
| "eval_recall_weighted": 0.8672131147540983, | |
| "eval_runtime": 18.8743, | |
| "eval_samples_per_second": 64.638, | |
| "eval_steps_per_second": 1.06, | |
| "step": 510 | |
| }, | |
| { | |
| "epoch": 10.019867549668874, | |
| "grad_norm": 12.6382474899292, | |
| "learning_rate": 4.987777777777778e-05, | |
| "loss": 0.6376, | |
| "step": 511 | |
| }, | |
| { | |
| "epoch": 10.158940397350994, | |
| "grad_norm": 7.875304222106934, | |
| "learning_rate": 4.9800000000000004e-05, | |
| "loss": 0.608, | |
| "step": 518 | |
| }, | |
| { | |
| "epoch": 10.298013245033113, | |
| "grad_norm": 8.565564155578613, | |
| "learning_rate": 4.972222222222223e-05, | |
| "loss": 0.7333, | |
| "step": 525 | |
| }, | |
| { | |
| "epoch": 10.437086092715232, | |
| "grad_norm": 8.355602264404297, | |
| "learning_rate": 4.964444444444445e-05, | |
| "loss": 0.6883, | |
| "step": 532 | |
| }, | |
| { | |
| "epoch": 10.57615894039735, | |
| "grad_norm": 8.657296180725098, | |
| "learning_rate": 4.956666666666667e-05, | |
| "loss": 0.7567, | |
| "step": 539 | |
| }, | |
| { | |
| "epoch": 10.71523178807947, | |
| "grad_norm": 7.550002098083496, | |
| "learning_rate": 4.948888888888889e-05, | |
| "loss": 0.7422, | |
| "step": 546 | |
| }, | |
| { | |
| "epoch": 10.85430463576159, | |
| "grad_norm": 7.08698844909668, | |
| "learning_rate": 4.9411111111111114e-05, | |
| "loss": 0.7045, | |
| "step": 553 | |
| }, | |
| { | |
| "epoch": 10.993377483443709, | |
| "grad_norm": 8.84117317199707, | |
| "learning_rate": 4.933333333333334e-05, | |
| "loss": 0.7177, | |
| "step": 560 | |
| }, | |
| { | |
| "epoch": 11.0, | |
| "eval_accuracy": 0.8745901639344262, | |
| "eval_f1_macro": 0.8674472995614636, | |
| "eval_f1_micro": 0.8745901639344262, | |
| "eval_f1_weighted": 0.869469718145237, | |
| "eval_loss": 0.4802148938179016, | |
| "eval_precision_macro": 0.8951194083694085, | |
| "eval_precision_micro": 0.8745901639344262, | |
| "eval_precision_weighted": 0.8939981489366735, | |
| "eval_recall_macro": 0.8713035714285715, | |
| "eval_recall_micro": 0.8745901639344262, | |
| "eval_recall_weighted": 0.8745901639344262, | |
| "eval_runtime": 17.7202, | |
| "eval_samples_per_second": 68.848, | |
| "eval_steps_per_second": 1.129, | |
| "step": 561 | |
| }, | |
| { | |
| "epoch": 11.119205298013245, | |
| "grad_norm": 9.191046714782715, | |
| "learning_rate": 4.925555555555556e-05, | |
| "loss": 0.5652, | |
| "step": 567 | |
| }, | |
| { | |
| "epoch": 11.258278145695364, | |
| "grad_norm": 6.099012851715088, | |
| "learning_rate": 4.917777777777778e-05, | |
| "loss": 0.6823, | |
| "step": 574 | |
| }, | |
| { | |
| "epoch": 11.397350993377483, | |
| "grad_norm": 7.644286632537842, | |
| "learning_rate": 4.91e-05, | |
| "loss": 0.5704, | |
| "step": 581 | |
| }, | |
| { | |
| "epoch": 11.536423841059603, | |
| "grad_norm": 7.208959579467773, | |
| "learning_rate": 4.9022222222222224e-05, | |
| "loss": 0.5889, | |
| "step": 588 | |
| }, | |
| { | |
| "epoch": 11.675496688741722, | |
| "grad_norm": 8.255132675170898, | |
| "learning_rate": 4.894444444444445e-05, | |
| "loss": 0.7361, | |
| "step": 595 | |
| }, | |
| { | |
| "epoch": 11.814569536423841, | |
| "grad_norm": 8.378829002380371, | |
| "learning_rate": 4.886666666666667e-05, | |
| "loss": 0.6723, | |
| "step": 602 | |
| }, | |
| { | |
| "epoch": 11.95364238410596, | |
| "grad_norm": 11.726705551147461, | |
| "learning_rate": 4.878888888888889e-05, | |
| "loss": 0.711, | |
| "step": 609 | |
| }, | |
| { | |
| "epoch": 12.0, | |
| "eval_accuracy": 0.8565573770491803, | |
| "eval_f1_macro": 0.8515204682228676, | |
| "eval_f1_micro": 0.8565573770491803, | |
| "eval_f1_weighted": 0.8536645233724359, | |
| "eval_loss": 0.4757900536060333, | |
| "eval_precision_macro": 0.8826355588855589, | |
| "eval_precision_micro": 0.8565573770491803, | |
| "eval_precision_weighted": 0.8834410489328521, | |
| "eval_recall_macro": 0.8537678571428572, | |
| "eval_recall_micro": 0.8565573770491803, | |
| "eval_recall_weighted": 0.8565573770491803, | |
| "eval_runtime": 18.3018, | |
| "eval_samples_per_second": 66.66, | |
| "eval_steps_per_second": 1.093, | |
| "step": 612 | |
| }, | |
| { | |
| "epoch": 12.079470198675496, | |
| "grad_norm": 7.42709493637085, | |
| "learning_rate": 4.871111111111111e-05, | |
| "loss": 0.5113, | |
| "step": 616 | |
| }, | |
| { | |
| "epoch": 12.218543046357617, | |
| "grad_norm": 9.922298431396484, | |
| "learning_rate": 4.8633333333333334e-05, | |
| "loss": 0.6113, | |
| "step": 623 | |
| }, | |
| { | |
| "epoch": 12.357615894039736, | |
| "grad_norm": 5.806549072265625, | |
| "learning_rate": 4.855555555555556e-05, | |
| "loss": 0.592, | |
| "step": 630 | |
| }, | |
| { | |
| "epoch": 12.496688741721854, | |
| "grad_norm": 8.837335586547852, | |
| "learning_rate": 4.847777777777778e-05, | |
| "loss": 0.604, | |
| "step": 637 | |
| }, | |
| { | |
| "epoch": 12.635761589403973, | |
| "grad_norm": 7.156353950500488, | |
| "learning_rate": 4.8400000000000004e-05, | |
| "loss": 0.5203, | |
| "step": 644 | |
| }, | |
| { | |
| "epoch": 12.774834437086092, | |
| "grad_norm": 8.29881477355957, | |
| "learning_rate": 4.832222222222223e-05, | |
| "loss": 0.6528, | |
| "step": 651 | |
| }, | |
| { | |
| "epoch": 12.913907284768213, | |
| "grad_norm": 7.490756511688232, | |
| "learning_rate": 4.824444444444445e-05, | |
| "loss": 0.5318, | |
| "step": 658 | |
| }, | |
| { | |
| "epoch": 13.0, | |
| "eval_accuracy": 0.8672131147540983, | |
| "eval_f1_macro": 0.8627146097366685, | |
| "eval_f1_micro": 0.8672131147540983, | |
| "eval_f1_weighted": 0.8659745527428748, | |
| "eval_loss": 0.4569399952888489, | |
| "eval_precision_macro": 0.8866749639249639, | |
| "eval_precision_micro": 0.8672131147540983, | |
| "eval_precision_weighted": 0.888172740283396, | |
| "eval_recall_macro": 0.8630297619047619, | |
| "eval_recall_micro": 0.8672131147540983, | |
| "eval_recall_weighted": 0.8672131147540983, | |
| "eval_runtime": 17.7039, | |
| "eval_samples_per_second": 68.911, | |
| "eval_steps_per_second": 1.13, | |
| "step": 663 | |
| }, | |
| { | |
| "epoch": 13.039735099337749, | |
| "grad_norm": 8.017422676086426, | |
| "learning_rate": 4.8166666666666674e-05, | |
| "loss": 0.4727, | |
| "step": 665 | |
| }, | |
| { | |
| "epoch": 13.178807947019868, | |
| "grad_norm": 7.218966007232666, | |
| "learning_rate": 4.808888888888889e-05, | |
| "loss": 0.5897, | |
| "step": 672 | |
| }, | |
| { | |
| "epoch": 13.317880794701987, | |
| "grad_norm": 10.478813171386719, | |
| "learning_rate": 4.8011111111111114e-05, | |
| "loss": 0.5473, | |
| "step": 679 | |
| }, | |
| { | |
| "epoch": 13.456953642384105, | |
| "grad_norm": 6.682877540588379, | |
| "learning_rate": 4.793333333333334e-05, | |
| "loss": 0.5479, | |
| "step": 686 | |
| }, | |
| { | |
| "epoch": 13.596026490066226, | |
| "grad_norm": 12.535813331604004, | |
| "learning_rate": 4.785555555555556e-05, | |
| "loss": 0.5458, | |
| "step": 693 | |
| }, | |
| { | |
| "epoch": 13.735099337748345, | |
| "grad_norm": 7.044444561004639, | |
| "learning_rate": 4.7777777777777784e-05, | |
| "loss": 0.5401, | |
| "step": 700 | |
| }, | |
| { | |
| "epoch": 13.874172185430464, | |
| "grad_norm": 7.247359752655029, | |
| "learning_rate": 4.77e-05, | |
| "loss": 0.5912, | |
| "step": 707 | |
| }, | |
| { | |
| "epoch": 14.0, | |
| "grad_norm": 6.261186122894287, | |
| "learning_rate": 4.7622222222222224e-05, | |
| "loss": 0.5383, | |
| "step": 714 | |
| }, | |
| { | |
| "epoch": 14.0, | |
| "eval_accuracy": 0.8786885245901639, | |
| "eval_f1_macro": 0.87375893714129, | |
| "eval_f1_micro": 0.8786885245901639, | |
| "eval_f1_weighted": 0.8755628520466998, | |
| "eval_loss": 0.4514833092689514, | |
| "eval_precision_macro": 0.8954447203123673, | |
| "eval_precision_micro": 0.8786885245901639, | |
| "eval_precision_weighted": 0.8948432423143127, | |
| "eval_recall_macro": 0.8754642857142858, | |
| "eval_recall_micro": 0.8786885245901639, | |
| "eval_recall_weighted": 0.8786885245901639, | |
| "eval_runtime": 18.849, | |
| "eval_samples_per_second": 64.725, | |
| "eval_steps_per_second": 1.061, | |
| "step": 714 | |
| }, | |
| { | |
| "epoch": 14.139072847682119, | |
| "grad_norm": 8.038314819335938, | |
| "learning_rate": 4.754444444444445e-05, | |
| "loss": 0.4757, | |
| "step": 721 | |
| }, | |
| { | |
| "epoch": 14.278145695364238, | |
| "grad_norm": 6.933023929595947, | |
| "learning_rate": 4.746666666666667e-05, | |
| "loss": 0.5422, | |
| "step": 728 | |
| }, | |
| { | |
| "epoch": 14.417218543046358, | |
| "grad_norm": 5.427736282348633, | |
| "learning_rate": 4.7388888888888894e-05, | |
| "loss": 0.45, | |
| "step": 735 | |
| }, | |
| { | |
| "epoch": 14.556291390728477, | |
| "grad_norm": 7.140816688537598, | |
| "learning_rate": 4.731111111111111e-05, | |
| "loss": 0.4801, | |
| "step": 742 | |
| }, | |
| { | |
| "epoch": 14.695364238410596, | |
| "grad_norm": 14.195199966430664, | |
| "learning_rate": 4.7233333333333334e-05, | |
| "loss": 0.4627, | |
| "step": 749 | |
| }, | |
| { | |
| "epoch": 14.834437086092715, | |
| "grad_norm": 6.867580413818359, | |
| "learning_rate": 4.715555555555556e-05, | |
| "loss": 0.4988, | |
| "step": 756 | |
| }, | |
| { | |
| "epoch": 14.973509933774835, | |
| "grad_norm": 6.6102519035339355, | |
| "learning_rate": 4.707777777777778e-05, | |
| "loss": 0.4884, | |
| "step": 763 | |
| }, | |
| { | |
| "epoch": 15.0, | |
| "eval_accuracy": 0.8762295081967213, | |
| "eval_f1_macro": 0.8721245222748117, | |
| "eval_f1_micro": 0.8762295081967213, | |
| "eval_f1_weighted": 0.8740396988842932, | |
| "eval_loss": 0.4364243745803833, | |
| "eval_precision_macro": 0.8978099123099124, | |
| "eval_precision_micro": 0.8762295081967213, | |
| "eval_precision_weighted": 0.899852774729824, | |
| "eval_recall_macro": 0.8744226190476191, | |
| "eval_recall_micro": 0.8762295081967213, | |
| "eval_recall_weighted": 0.8762295081967213, | |
| "eval_runtime": 18.6981, | |
| "eval_samples_per_second": 65.247, | |
| "eval_steps_per_second": 1.07, | |
| "step": 765 | |
| }, | |
| { | |
| "epoch": 15.099337748344372, | |
| "grad_norm": 6.755874156951904, | |
| "learning_rate": 4.7e-05, | |
| "loss": 0.4067, | |
| "step": 770 | |
| }, | |
| { | |
| "epoch": 15.23841059602649, | |
| "grad_norm": 7.634614944458008, | |
| "learning_rate": 4.692222222222222e-05, | |
| "loss": 0.5742, | |
| "step": 777 | |
| }, | |
| { | |
| "epoch": 15.37748344370861, | |
| "grad_norm": 4.661431789398193, | |
| "learning_rate": 4.6844444444444444e-05, | |
| "loss": 0.4345, | |
| "step": 784 | |
| }, | |
| { | |
| "epoch": 15.516556291390728, | |
| "grad_norm": 18.19213104248047, | |
| "learning_rate": 4.676666666666667e-05, | |
| "loss": 0.3906, | |
| "step": 791 | |
| }, | |
| { | |
| "epoch": 15.655629139072847, | |
| "grad_norm": 9.202508926391602, | |
| "learning_rate": 4.668888888888889e-05, | |
| "loss": 0.4467, | |
| "step": 798 | |
| }, | |
| { | |
| "epoch": 15.794701986754967, | |
| "grad_norm": 5.7517619132995605, | |
| "learning_rate": 4.6611111111111114e-05, | |
| "loss": 0.4794, | |
| "step": 805 | |
| }, | |
| { | |
| "epoch": 15.933774834437086, | |
| "grad_norm": 7.352263927459717, | |
| "learning_rate": 4.653333333333334e-05, | |
| "loss": 0.5808, | |
| "step": 812 | |
| }, | |
| { | |
| "epoch": 16.0, | |
| "eval_accuracy": 0.8844262295081967, | |
| "eval_f1_macro": 0.8804055188447603, | |
| "eval_f1_micro": 0.8844262295081967, | |
| "eval_f1_weighted": 0.8828176785374927, | |
| "eval_loss": 0.42853277921676636, | |
| "eval_precision_macro": 0.9033134920634922, | |
| "eval_precision_micro": 0.8844262295081967, | |
| "eval_precision_weighted": 0.9039139344262296, | |
| "eval_recall_macro": 0.8815000000000001, | |
| "eval_recall_micro": 0.8844262295081967, | |
| "eval_recall_weighted": 0.8844262295081967, | |
| "eval_runtime": 16.8381, | |
| "eval_samples_per_second": 72.455, | |
| "eval_steps_per_second": 1.188, | |
| "step": 816 | |
| }, | |
| { | |
| "epoch": 16.05960264900662, | |
| "grad_norm": 7.281948089599609, | |
| "learning_rate": 4.645555555555556e-05, | |
| "loss": 0.4086, | |
| "step": 819 | |
| }, | |
| { | |
| "epoch": 16.198675496688743, | |
| "grad_norm": 6.114387512207031, | |
| "learning_rate": 4.6377777777777784e-05, | |
| "loss": 0.4764, | |
| "step": 826 | |
| }, | |
| { | |
| "epoch": 16.337748344370862, | |
| "grad_norm": 6.659070014953613, | |
| "learning_rate": 4.630000000000001e-05, | |
| "loss": 0.3795, | |
| "step": 833 | |
| }, | |
| { | |
| "epoch": 16.47682119205298, | |
| "grad_norm": 4.916147708892822, | |
| "learning_rate": 4.6222222222222224e-05, | |
| "loss": 0.4566, | |
| "step": 840 | |
| }, | |
| { | |
| "epoch": 16.6158940397351, | |
| "grad_norm": 4.47711706161499, | |
| "learning_rate": 4.614444444444445e-05, | |
| "loss": 0.4853, | |
| "step": 847 | |
| }, | |
| { | |
| "epoch": 16.75496688741722, | |
| "grad_norm": 3.844993829727173, | |
| "learning_rate": 4.606666666666667e-05, | |
| "loss": 0.4298, | |
| "step": 854 | |
| }, | |
| { | |
| "epoch": 16.894039735099337, | |
| "grad_norm": 6.387825012207031, | |
| "learning_rate": 4.5988888888888894e-05, | |
| "loss": 0.5004, | |
| "step": 861 | |
| }, | |
| { | |
| "epoch": 17.0, | |
| "eval_accuracy": 0.8762295081967213, | |
| "eval_f1_macro": 0.8698742597272009, | |
| "eval_f1_micro": 0.8762295081967213, | |
| "eval_f1_weighted": 0.8724110373447403, | |
| "eval_loss": 0.4314015805721283, | |
| "eval_precision_macro": 0.8928979076479078, | |
| "eval_precision_micro": 0.8762295081967213, | |
| "eval_precision_weighted": 0.8934799694840678, | |
| "eval_recall_macro": 0.8727380952380953, | |
| "eval_recall_micro": 0.8762295081967213, | |
| "eval_recall_weighted": 0.8762295081967213, | |
| "eval_runtime": 16.836, | |
| "eval_samples_per_second": 72.464, | |
| "eval_steps_per_second": 1.188, | |
| "step": 867 | |
| }, | |
| { | |
| "epoch": 17.019867549668874, | |
| "grad_norm": 27.180448532104492, | |
| "learning_rate": 4.591111111111112e-05, | |
| "loss": 0.3922, | |
| "step": 868 | |
| }, | |
| { | |
| "epoch": 17.158940397350992, | |
| "grad_norm": 7.212271690368652, | |
| "learning_rate": 4.5833333333333334e-05, | |
| "loss": 0.4964, | |
| "step": 875 | |
| }, | |
| { | |
| "epoch": 17.29801324503311, | |
| "grad_norm": 6.941275119781494, | |
| "learning_rate": 4.575555555555556e-05, | |
| "loss": 0.4749, | |
| "step": 882 | |
| }, | |
| { | |
| "epoch": 17.437086092715234, | |
| "grad_norm": 5.599315166473389, | |
| "learning_rate": 4.567777777777778e-05, | |
| "loss": 0.4885, | |
| "step": 889 | |
| }, | |
| { | |
| "epoch": 17.576158940397352, | |
| "grad_norm": 5.130136489868164, | |
| "learning_rate": 4.5600000000000004e-05, | |
| "loss": 0.5264, | |
| "step": 896 | |
| }, | |
| { | |
| "epoch": 17.71523178807947, | |
| "grad_norm": 6.701182842254639, | |
| "learning_rate": 4.552222222222222e-05, | |
| "loss": 0.4466, | |
| "step": 903 | |
| }, | |
| { | |
| "epoch": 17.85430463576159, | |
| "grad_norm": 6.988078594207764, | |
| "learning_rate": 4.5444444444444444e-05, | |
| "loss": 0.3975, | |
| "step": 910 | |
| }, | |
| { | |
| "epoch": 17.99337748344371, | |
| "grad_norm": 21.04884910583496, | |
| "learning_rate": 4.536666666666667e-05, | |
| "loss": 0.3537, | |
| "step": 917 | |
| }, | |
| { | |
| "epoch": 18.0, | |
| "eval_accuracy": 0.8827868852459017, | |
| "eval_f1_macro": 0.8796409929939343, | |
| "eval_f1_micro": 0.8827868852459017, | |
| "eval_f1_weighted": 0.8807563045280018, | |
| "eval_loss": 0.43711456656455994, | |
| "eval_precision_macro": 0.8961841630591632, | |
| "eval_precision_micro": 0.8827868852459017, | |
| "eval_precision_weighted": 0.8967877250254299, | |
| "eval_recall_macro": 0.8815892857142857, | |
| "eval_recall_micro": 0.8827868852459017, | |
| "eval_recall_weighted": 0.8827868852459017, | |
| "eval_runtime": 16.5565, | |
| "eval_samples_per_second": 73.687, | |
| "eval_steps_per_second": 1.208, | |
| "step": 918 | |
| }, | |
| { | |
| "epoch": 18.119205298013245, | |
| "grad_norm": 7.078250408172607, | |
| "learning_rate": 4.528888888888889e-05, | |
| "loss": 0.406, | |
| "step": 924 | |
| }, | |
| { | |
| "epoch": 18.258278145695364, | |
| "grad_norm": 5.898381233215332, | |
| "learning_rate": 4.5211111111111114e-05, | |
| "loss": 0.3619, | |
| "step": 931 | |
| }, | |
| { | |
| "epoch": 18.397350993377483, | |
| "grad_norm": 7.024068355560303, | |
| "learning_rate": 4.513333333333333e-05, | |
| "loss": 0.4441, | |
| "step": 938 | |
| }, | |
| { | |
| "epoch": 18.5364238410596, | |
| "grad_norm": 6.673207759857178, | |
| "learning_rate": 4.5055555555555554e-05, | |
| "loss": 0.3495, | |
| "step": 945 | |
| }, | |
| { | |
| "epoch": 18.67549668874172, | |
| "grad_norm": 7.188521385192871, | |
| "learning_rate": 4.497777777777778e-05, | |
| "loss": 0.424, | |
| "step": 952 | |
| }, | |
| { | |
| "epoch": 18.814569536423843, | |
| "grad_norm": 13.439776420593262, | |
| "learning_rate": 4.49e-05, | |
| "loss": 0.3988, | |
| "step": 959 | |
| }, | |
| { | |
| "epoch": 18.95364238410596, | |
| "grad_norm": 6.0843305587768555, | |
| "learning_rate": 4.4822222222222224e-05, | |
| "loss": 0.4362, | |
| "step": 966 | |
| }, | |
| { | |
| "epoch": 19.0, | |
| "eval_accuracy": 0.8827868852459017, | |
| "eval_f1_macro": 0.8795757517766032, | |
| "eval_f1_micro": 0.8827868852459017, | |
| "eval_f1_weighted": 0.8811126749814384, | |
| "eval_loss": 0.41565409302711487, | |
| "eval_precision_macro": 0.9002148268398268, | |
| "eval_precision_micro": 0.8827868852459017, | |
| "eval_precision_weighted": 0.9000307820594705, | |
| "eval_recall_macro": 0.8807797619047619, | |
| "eval_recall_micro": 0.8827868852459017, | |
| "eval_recall_weighted": 0.8827868852459017, | |
| "eval_runtime": 20.7708, | |
| "eval_samples_per_second": 58.736, | |
| "eval_steps_per_second": 0.963, | |
| "step": 969 | |
| }, | |
| { | |
| "epoch": 19.079470198675498, | |
| "grad_norm": 5.036433696746826, | |
| "learning_rate": 4.474444444444445e-05, | |
| "loss": 0.3053, | |
| "step": 973 | |
| }, | |
| { | |
| "epoch": 19.218543046357617, | |
| "grad_norm": 6.565299034118652, | |
| "learning_rate": 4.466666666666667e-05, | |
| "loss": 0.3934, | |
| "step": 980 | |
| }, | |
| { | |
| "epoch": 19.357615894039736, | |
| "grad_norm": 8.689690589904785, | |
| "learning_rate": 4.4588888888888894e-05, | |
| "loss": 0.4622, | |
| "step": 987 | |
| }, | |
| { | |
| "epoch": 19.496688741721854, | |
| "grad_norm": 6.253081321716309, | |
| "learning_rate": 4.451111111111112e-05, | |
| "loss": 0.4095, | |
| "step": 994 | |
| }, | |
| { | |
| "epoch": 19.635761589403973, | |
| "grad_norm": 5.1961846351623535, | |
| "learning_rate": 4.443333333333334e-05, | |
| "loss": 0.3806, | |
| "step": 1001 | |
| }, | |
| { | |
| "epoch": 19.774834437086092, | |
| "grad_norm": 7.494758129119873, | |
| "learning_rate": 4.435555555555556e-05, | |
| "loss": 0.4382, | |
| "step": 1008 | |
| }, | |
| { | |
| "epoch": 19.91390728476821, | |
| "grad_norm": 4.928430557250977, | |
| "learning_rate": 4.427777777777778e-05, | |
| "loss": 0.3672, | |
| "step": 1015 | |
| }, | |
| { | |
| "epoch": 20.0, | |
| "eval_accuracy": 0.8885245901639345, | |
| "eval_f1_macro": 0.8857740416618127, | |
| "eval_f1_micro": 0.8885245901639345, | |
| "eval_f1_weighted": 0.8862372325718009, | |
| "eval_loss": 0.404880166053772, | |
| "eval_precision_macro": 0.9038582528582529, | |
| "eval_precision_micro": 0.8885245901639345, | |
| "eval_precision_weighted": 0.9033163921688513, | |
| "eval_recall_macro": 0.887125, | |
| "eval_recall_micro": 0.8885245901639345, | |
| "eval_recall_weighted": 0.8885245901639345, | |
| "eval_runtime": 31.922, | |
| "eval_samples_per_second": 38.218, | |
| "eval_steps_per_second": 0.627, | |
| "step": 1020 | |
| }, | |
| { | |
| "epoch": 20.039735099337747, | |
| "grad_norm": 4.6109299659729, | |
| "learning_rate": 4.4200000000000004e-05, | |
| "loss": 0.3098, | |
| "step": 1022 | |
| }, | |
| { | |
| "epoch": 20.178807947019866, | |
| "grad_norm": 9.729621887207031, | |
| "learning_rate": 4.412222222222223e-05, | |
| "loss": 0.3719, | |
| "step": 1029 | |
| }, | |
| { | |
| "epoch": 20.31788079470199, | |
| "grad_norm": 5.514610767364502, | |
| "learning_rate": 4.404444444444445e-05, | |
| "loss": 0.3623, | |
| "step": 1036 | |
| }, | |
| { | |
| "epoch": 20.456953642384107, | |
| "grad_norm": 4.57627534866333, | |
| "learning_rate": 4.396666666666667e-05, | |
| "loss": 0.3866, | |
| "step": 1043 | |
| }, | |
| { | |
| "epoch": 20.596026490066226, | |
| "grad_norm": 5.22489595413208, | |
| "learning_rate": 4.388888888888889e-05, | |
| "loss": 0.3288, | |
| "step": 1050 | |
| }, | |
| { | |
| "epoch": 20.735099337748345, | |
| "grad_norm": 5.026643753051758, | |
| "learning_rate": 4.3811111111111114e-05, | |
| "loss": 0.3729, | |
| "step": 1057 | |
| }, | |
| { | |
| "epoch": 20.874172185430464, | |
| "grad_norm": 5.927851676940918, | |
| "learning_rate": 4.373333333333334e-05, | |
| "loss": 0.3909, | |
| "step": 1064 | |
| }, | |
| { | |
| "epoch": 21.0, | |
| "grad_norm": 2.4257664680480957, | |
| "learning_rate": 4.3655555555555554e-05, | |
| "loss": 0.3431, | |
| "step": 1071 | |
| }, | |
| { | |
| "epoch": 21.0, | |
| "eval_accuracy": 0.8885245901639345, | |
| "eval_f1_macro": 0.8852515238029943, | |
| "eval_f1_micro": 0.8885245901639345, | |
| "eval_f1_weighted": 0.8881485842454984, | |
| "eval_loss": 0.4021802544593811, | |
| "eval_precision_macro": 0.9078284354534355, | |
| "eval_precision_micro": 0.8885245901639345, | |
| "eval_precision_weighted": 0.9098325740743773, | |
| "eval_recall_macro": 0.885482142857143, | |
| "eval_recall_micro": 0.8885245901639345, | |
| "eval_recall_weighted": 0.8885245901639345, | |
| "eval_runtime": 19.889, | |
| "eval_samples_per_second": 61.341, | |
| "eval_steps_per_second": 1.006, | |
| "step": 1071 | |
| }, | |
| { | |
| "epoch": 21.13907284768212, | |
| "grad_norm": 4.817611217498779, | |
| "learning_rate": 4.357777777777778e-05, | |
| "loss": 0.3287, | |
| "step": 1078 | |
| }, | |
| { | |
| "epoch": 21.278145695364238, | |
| "grad_norm": 5.901778697967529, | |
| "learning_rate": 4.35e-05, | |
| "loss": 0.3539, | |
| "step": 1085 | |
| }, | |
| { | |
| "epoch": 21.417218543046356, | |
| "grad_norm": 6.392418384552002, | |
| "learning_rate": 4.3422222222222224e-05, | |
| "loss": 0.3926, | |
| "step": 1092 | |
| }, | |
| { | |
| "epoch": 21.556291390728475, | |
| "grad_norm": 14.076611518859863, | |
| "learning_rate": 4.334444444444445e-05, | |
| "loss": 0.3784, | |
| "step": 1099 | |
| }, | |
| { | |
| "epoch": 21.695364238410598, | |
| "grad_norm": 8.352983474731445, | |
| "learning_rate": 4.3266666666666664e-05, | |
| "loss": 0.3291, | |
| "step": 1106 | |
| }, | |
| { | |
| "epoch": 21.834437086092716, | |
| "grad_norm": 8.554953575134277, | |
| "learning_rate": 4.318888888888889e-05, | |
| "loss": 0.3553, | |
| "step": 1113 | |
| }, | |
| { | |
| "epoch": 21.973509933774835, | |
| "grad_norm": 5.422201633453369, | |
| "learning_rate": 4.311111111111111e-05, | |
| "loss": 0.3301, | |
| "step": 1120 | |
| }, | |
| { | |
| "epoch": 22.0, | |
| "eval_accuracy": 0.8868852459016393, | |
| "eval_f1_macro": 0.8844045750001632, | |
| "eval_f1_micro": 0.8868852459016393, | |
| "eval_f1_weighted": 0.8863481245798025, | |
| "eval_loss": 0.43187016248703003, | |
| "eval_precision_macro": 0.905546176046176, | |
| "eval_precision_micro": 0.8868852459016393, | |
| "eval_precision_weighted": 0.9069275010053698, | |
| "eval_recall_macro": 0.884922619047619, | |
| "eval_recall_micro": 0.8868852459016393, | |
| "eval_recall_weighted": 0.8868852459016393, | |
| "eval_runtime": 16.9877, | |
| "eval_samples_per_second": 71.817, | |
| "eval_steps_per_second": 1.177, | |
| "step": 1122 | |
| }, | |
| { | |
| "epoch": 22.09933774834437, | |
| "grad_norm": 4.536903381347656, | |
| "learning_rate": 4.3033333333333334e-05, | |
| "loss": 0.3081, | |
| "step": 1127 | |
| }, | |
| { | |
| "epoch": 22.23841059602649, | |
| "grad_norm": 5.818119525909424, | |
| "learning_rate": 4.295555555555556e-05, | |
| "loss": 0.4447, | |
| "step": 1134 | |
| }, | |
| { | |
| "epoch": 22.37748344370861, | |
| "grad_norm": 6.355660438537598, | |
| "learning_rate": 4.287777777777778e-05, | |
| "loss": 0.2905, | |
| "step": 1141 | |
| }, | |
| { | |
| "epoch": 22.516556291390728, | |
| "grad_norm": 3.7836861610412598, | |
| "learning_rate": 4.2800000000000004e-05, | |
| "loss": 0.3363, | |
| "step": 1148 | |
| }, | |
| { | |
| "epoch": 22.655629139072847, | |
| "grad_norm": 7.677190780639648, | |
| "learning_rate": 4.272222222222223e-05, | |
| "loss": 0.335, | |
| "step": 1155 | |
| }, | |
| { | |
| "epoch": 22.794701986754966, | |
| "grad_norm": 9.610170364379883, | |
| "learning_rate": 4.264444444444445e-05, | |
| "loss": 0.3224, | |
| "step": 1162 | |
| }, | |
| { | |
| "epoch": 22.933774834437084, | |
| "grad_norm": 4.4913458824157715, | |
| "learning_rate": 4.2566666666666674e-05, | |
| "loss": 0.3594, | |
| "step": 1169 | |
| }, | |
| { | |
| "epoch": 23.0, | |
| "eval_accuracy": 0.8909836065573771, | |
| "eval_f1_macro": 0.8853569522153114, | |
| "eval_f1_micro": 0.8909836065573771, | |
| "eval_f1_weighted": 0.8877271844125241, | |
| "eval_loss": 0.415208101272583, | |
| "eval_precision_macro": 0.9054862637362637, | |
| "eval_precision_micro": 0.8909836065573771, | |
| "eval_precision_weighted": 0.9061880492003442, | |
| "eval_recall_macro": 0.8882023809523809, | |
| "eval_recall_micro": 0.8909836065573771, | |
| "eval_recall_weighted": 0.8909836065573771, | |
| "eval_runtime": 21.468, | |
| "eval_samples_per_second": 56.829, | |
| "eval_steps_per_second": 0.932, | |
| "step": 1173 | |
| }, | |
| { | |
| "epoch": 23.05960264900662, | |
| "grad_norm": 6.537961959838867, | |
| "learning_rate": 4.248888888888889e-05, | |
| "loss": 0.3416, | |
| "step": 1176 | |
| }, | |
| { | |
| "epoch": 23.198675496688743, | |
| "grad_norm": 4.195661544799805, | |
| "learning_rate": 4.2411111111111114e-05, | |
| "loss": 0.3493, | |
| "step": 1183 | |
| }, | |
| { | |
| "epoch": 23.337748344370862, | |
| "grad_norm": 6.090582370758057, | |
| "learning_rate": 4.233333333333334e-05, | |
| "loss": 0.2816, | |
| "step": 1190 | |
| }, | |
| { | |
| "epoch": 23.47682119205298, | |
| "grad_norm": 4.269461154937744, | |
| "learning_rate": 4.225555555555556e-05, | |
| "loss": 0.3461, | |
| "step": 1197 | |
| }, | |
| { | |
| "epoch": 23.6158940397351, | |
| "grad_norm": 5.240416049957275, | |
| "learning_rate": 4.217777777777778e-05, | |
| "loss": 0.3688, | |
| "step": 1204 | |
| }, | |
| { | |
| "epoch": 23.75496688741722, | |
| "grad_norm": 3.2008297443389893, | |
| "learning_rate": 4.21e-05, | |
| "loss": 0.2945, | |
| "step": 1211 | |
| }, | |
| { | |
| "epoch": 23.894039735099337, | |
| "grad_norm": 5.42747163772583, | |
| "learning_rate": 4.2022222222222223e-05, | |
| "loss": 0.365, | |
| "step": 1218 | |
| }, | |
| { | |
| "epoch": 24.0, | |
| "eval_accuracy": 0.8868852459016393, | |
| "eval_f1_macro": 0.8826740438354216, | |
| "eval_f1_micro": 0.8868852459016393, | |
| "eval_f1_weighted": 0.885225590303497, | |
| "eval_loss": 0.41284599900245667, | |
| "eval_precision_macro": 0.8989790764790766, | |
| "eval_precision_micro": 0.8868852459016393, | |
| "eval_precision_weighted": 0.90095879757765, | |
| "eval_recall_macro": 0.8842916666666667, | |
| "eval_recall_micro": 0.8868852459016393, | |
| "eval_recall_weighted": 0.8868852459016393, | |
| "eval_runtime": 18.0644, | |
| "eval_samples_per_second": 67.536, | |
| "eval_steps_per_second": 1.107, | |
| "step": 1224 | |
| }, | |
| { | |
| "epoch": 24.019867549668874, | |
| "grad_norm": 4.910553455352783, | |
| "learning_rate": 4.194444444444445e-05, | |
| "loss": 0.27, | |
| "step": 1225 | |
| }, | |
| { | |
| "epoch": 24.158940397350992, | |
| "grad_norm": 4.917506694793701, | |
| "learning_rate": 4.186666666666667e-05, | |
| "loss": 0.2662, | |
| "step": 1232 | |
| }, | |
| { | |
| "epoch": 24.29801324503311, | |
| "grad_norm": 6.041675090789795, | |
| "learning_rate": 4.178888888888889e-05, | |
| "loss": 0.3693, | |
| "step": 1239 | |
| }, | |
| { | |
| "epoch": 24.437086092715234, | |
| "grad_norm": 6.5168776512146, | |
| "learning_rate": 4.171111111111111e-05, | |
| "loss": 0.2868, | |
| "step": 1246 | |
| }, | |
| { | |
| "epoch": 24.576158940397352, | |
| "grad_norm": 3.36521315574646, | |
| "learning_rate": 4.1633333333333333e-05, | |
| "loss": 0.4133, | |
| "step": 1253 | |
| }, | |
| { | |
| "epoch": 24.71523178807947, | |
| "grad_norm": 4.277838230133057, | |
| "learning_rate": 4.155555555555556e-05, | |
| "loss": 0.2291, | |
| "step": 1260 | |
| }, | |
| { | |
| "epoch": 24.85430463576159, | |
| "grad_norm": 5.821409225463867, | |
| "learning_rate": 4.147777777777778e-05, | |
| "loss": 0.2927, | |
| "step": 1267 | |
| }, | |
| { | |
| "epoch": 24.99337748344371, | |
| "grad_norm": 6.532901763916016, | |
| "learning_rate": 4.14e-05, | |
| "loss": 0.3711, | |
| "step": 1274 | |
| }, | |
| { | |
| "epoch": 25.0, | |
| "eval_accuracy": 0.8885245901639345, | |
| "eval_f1_macro": 0.8860624318491966, | |
| "eval_f1_micro": 0.8885245901639345, | |
| "eval_f1_weighted": 0.8860652284338398, | |
| "eval_loss": 0.3975684940814972, | |
| "eval_precision_macro": 0.9022628066378067, | |
| "eval_precision_micro": 0.8885245901639345, | |
| "eval_precision_weighted": 0.901722671681688, | |
| "eval_recall_macro": 0.8880000000000001, | |
| "eval_recall_micro": 0.8885245901639345, | |
| "eval_recall_weighted": 0.8885245901639345, | |
| "eval_runtime": 19.8974, | |
| "eval_samples_per_second": 61.315, | |
| "eval_steps_per_second": 1.005, | |
| "step": 1275 | |
| } | |
| ], | |
| "logging_steps": 7, | |
| "max_steps": 5000, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 100, | |
| "save_steps": 7, | |
| "stateful_callbacks": { | |
| "EarlyStoppingCallback": { | |
| "args": { | |
| "early_stopping_patience": 5, | |
| "early_stopping_threshold": 0.01 | |
| }, | |
| "attributes": { | |
| "early_stopping_patience_counter": 5 | |
| } | |
| }, | |
| "TrainerControl": { | |
| "args": { | |
| "should_epoch_stop": false, | |
| "should_evaluate": false, | |
| "should_log": false, | |
| "should_save": true, | |
| "should_training_stop": true | |
| }, | |
| "attributes": {} | |
| } | |
| }, | |
| "total_flos": 9.44898430624727e+18, | |
| "train_batch_size": 32, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |