{ "best_metric": 0.8198928704505658, "best_model_checkpoint": "/root/kyivnotkiev/data/cl/model/benchmark_runs/deberta-v3-large_lr1e-05_seed456_weighted/checkpoint-1840", "epoch": 1.0, "eval_steps": 500, "global_step": 1840, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.02717391304347826, "grad_norm": 6.552072048187256, "learning_rate": 9.057971014492754e-07, "loss": 2.4236, "step": 50 }, { "epoch": 0.05434782608695652, "grad_norm": 5.586729049682617, "learning_rate": 1.8115942028985508e-06, "loss": 2.4231, "step": 100 }, { "epoch": 0.08152173913043478, "grad_norm": 3.997156858444214, "learning_rate": 2.6992753623188405e-06, "loss": 2.402, "step": 150 }, { "epoch": 0.10869565217391304, "grad_norm": 6.317577362060547, "learning_rate": 3.6050724637681163e-06, "loss": 2.3855, "step": 200 }, { "epoch": 0.1358695652173913, "grad_norm": 9.232672691345215, "learning_rate": 4.510869565217392e-06, "loss": 2.2186, "step": 250 }, { "epoch": 0.16304347826086957, "grad_norm": 9.297445297241211, "learning_rate": 5.416666666666667e-06, "loss": 1.9197, "step": 300 }, { "epoch": 0.19021739130434784, "grad_norm": 9.51098346710205, "learning_rate": 6.322463768115943e-06, "loss": 1.6219, "step": 350 }, { "epoch": 0.21739130434782608, "grad_norm": 8.97563648223877, "learning_rate": 7.228260869565218e-06, "loss": 1.1933, "step": 400 }, { "epoch": 0.24456521739130435, "grad_norm": 17.16935157775879, "learning_rate": 8.115942028985508e-06, "loss": 0.9993, "step": 450 }, { "epoch": 0.2717391304347826, "grad_norm": 19.718891143798828, "learning_rate": 9.021739130434784e-06, "loss": 0.8655, "step": 500 }, { "epoch": 0.29891304347826086, "grad_norm": 11.259733200073242, "learning_rate": 9.927536231884058e-06, "loss": 0.8272, "step": 550 }, { "epoch": 0.32608695652173914, "grad_norm": 24.362035751342773, "learning_rate": 9.909420289855073e-06, "loss": 0.6588, "step": 600 }, { "epoch": 0.3532608695652174, "grad_norm": 14.829962730407715, "learning_rate": 9.808776167471822e-06, "loss": 0.7243, "step": 650 }, { "epoch": 0.3804347826086957, "grad_norm": 21.006099700927734, "learning_rate": 9.710144927536233e-06, "loss": 0.7559, "step": 700 }, { "epoch": 0.4076086956521739, "grad_norm": 4.0826263427734375, "learning_rate": 9.60950080515298e-06, "loss": 0.7237, "step": 750 }, { "epoch": 0.43478260869565216, "grad_norm": 16.393157958984375, "learning_rate": 9.508856682769728e-06, "loss": 0.6039, "step": 800 }, { "epoch": 0.46195652173913043, "grad_norm": 15.008636474609375, "learning_rate": 9.408212560386473e-06, "loss": 0.6314, "step": 850 }, { "epoch": 0.4891304347826087, "grad_norm": 6.631000518798828, "learning_rate": 9.307568438003222e-06, "loss": 0.7266, "step": 900 }, { "epoch": 0.5163043478260869, "grad_norm": 9.905613899230957, "learning_rate": 9.206924315619969e-06, "loss": 0.6624, "step": 950 }, { "epoch": 0.5434782608695652, "grad_norm": 13.82048511505127, "learning_rate": 9.106280193236716e-06, "loss": 0.5971, "step": 1000 }, { "epoch": 0.5706521739130435, "grad_norm": 10.97307300567627, "learning_rate": 9.005636070853463e-06, "loss": 0.6621, "step": 1050 }, { "epoch": 0.5978260869565217, "grad_norm": 6.645138263702393, "learning_rate": 8.90499194847021e-06, "loss": 0.6195, "step": 1100 }, { "epoch": 0.625, "grad_norm": 19.04903221130371, "learning_rate": 8.804347826086957e-06, "loss": 0.5684, "step": 1150 }, { "epoch": 0.6521739130434783, "grad_norm": 3.2631289958953857, "learning_rate": 8.703703703703705e-06, "loss": 0.577, "step": 1200 }, { "epoch": 0.6793478260869565, "grad_norm": 5.16421365737915, "learning_rate": 8.603059581320452e-06, "loss": 0.5752, "step": 1250 }, { "epoch": 0.7065217391304348, "grad_norm": 14.548922538757324, "learning_rate": 8.502415458937199e-06, "loss": 0.5222, "step": 1300 }, { "epoch": 0.7336956521739131, "grad_norm": 35.01688003540039, "learning_rate": 8.401771336553946e-06, "loss": 0.7212, "step": 1350 }, { "epoch": 0.7608695652173914, "grad_norm": 12.004179000854492, "learning_rate": 8.301127214170693e-06, "loss": 0.5907, "step": 1400 }, { "epoch": 0.7880434782608695, "grad_norm": 17.919300079345703, "learning_rate": 8.20048309178744e-06, "loss": 0.5463, "step": 1450 }, { "epoch": 0.8152173913043478, "grad_norm": 42.389678955078125, "learning_rate": 8.099838969404187e-06, "loss": 0.5077, "step": 1500 }, { "epoch": 0.842391304347826, "grad_norm": 3.7581164836883545, "learning_rate": 7.999194847020934e-06, "loss": 0.5113, "step": 1550 }, { "epoch": 0.8695652173913043, "grad_norm": 19.534931182861328, "learning_rate": 7.898550724637682e-06, "loss": 0.552, "step": 1600 }, { "epoch": 0.8967391304347826, "grad_norm": 70.55868530273438, "learning_rate": 7.797906602254429e-06, "loss": 0.5187, "step": 1650 }, { "epoch": 0.9239130434782609, "grad_norm": 7.407011032104492, "learning_rate": 7.697262479871176e-06, "loss": 0.5304, "step": 1700 }, { "epoch": 0.9510869565217391, "grad_norm": 17.54728889465332, "learning_rate": 7.596618357487924e-06, "loss": 0.5928, "step": 1750 }, { "epoch": 0.9782608695652174, "grad_norm": 9.951366424560547, "learning_rate": 7.49597423510467e-06, "loss": 0.509, "step": 1800 }, { "epoch": 1.0, "eval_accuracy": 0.87822777928785, "eval_f1_macro": 0.8198928704505658, "eval_f1_weighted": 0.8776767663384684, "eval_loss": 0.5368145108222961, "eval_runtime": 20.0124, "eval_samples_per_second": 183.836, "eval_steps_per_second": 5.746, "step": 1840 } ], "logging_steps": 50, "max_steps": 5520, "num_input_tokens_seen": 0, "num_train_epochs": 3, "save_steps": 500, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": false }, "attributes": {} } }, "total_flos": 2.742962313493709e+16, "train_batch_size": 16, "trial_name": null, "trial_params": null }