| { |
| "best_metric": 0.8198928704505658, |
| "best_model_checkpoint": "/root/kyivnotkiev/data/cl/model/benchmark_runs/deberta-v3-large_lr1e-05_seed456_weighted/checkpoint-1840", |
| "epoch": 1.0, |
| "eval_steps": 500, |
| "global_step": 1840, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 0.02717391304347826, |
| "grad_norm": 6.552072048187256, |
| "learning_rate": 9.057971014492754e-07, |
| "loss": 2.4236, |
| "step": 50 |
| }, |
| { |
| "epoch": 0.05434782608695652, |
| "grad_norm": 5.586729049682617, |
| "learning_rate": 1.8115942028985508e-06, |
| "loss": 2.4231, |
| "step": 100 |
| }, |
| { |
| "epoch": 0.08152173913043478, |
| "grad_norm": 3.997156858444214, |
| "learning_rate": 2.6992753623188405e-06, |
| "loss": 2.402, |
| "step": 150 |
| }, |
| { |
| "epoch": 0.10869565217391304, |
| "grad_norm": 6.317577362060547, |
| "learning_rate": 3.6050724637681163e-06, |
| "loss": 2.3855, |
| "step": 200 |
| }, |
| { |
| "epoch": 0.1358695652173913, |
| "grad_norm": 9.232672691345215, |
| "learning_rate": 4.510869565217392e-06, |
| "loss": 2.2186, |
| "step": 250 |
| }, |
| { |
| "epoch": 0.16304347826086957, |
| "grad_norm": 9.297445297241211, |
| "learning_rate": 5.416666666666667e-06, |
| "loss": 1.9197, |
| "step": 300 |
| }, |
| { |
| "epoch": 0.19021739130434784, |
| "grad_norm": 9.51098346710205, |
| "learning_rate": 6.322463768115943e-06, |
| "loss": 1.6219, |
| "step": 350 |
| }, |
| { |
| "epoch": 0.21739130434782608, |
| "grad_norm": 8.97563648223877, |
| "learning_rate": 7.228260869565218e-06, |
| "loss": 1.1933, |
| "step": 400 |
| }, |
| { |
| "epoch": 0.24456521739130435, |
| "grad_norm": 17.16935157775879, |
| "learning_rate": 8.115942028985508e-06, |
| "loss": 0.9993, |
| "step": 450 |
| }, |
| { |
| "epoch": 0.2717391304347826, |
| "grad_norm": 19.718891143798828, |
| "learning_rate": 9.021739130434784e-06, |
| "loss": 0.8655, |
| "step": 500 |
| }, |
| { |
| "epoch": 0.29891304347826086, |
| "grad_norm": 11.259733200073242, |
| "learning_rate": 9.927536231884058e-06, |
| "loss": 0.8272, |
| "step": 550 |
| }, |
| { |
| "epoch": 0.32608695652173914, |
| "grad_norm": 24.362035751342773, |
| "learning_rate": 9.909420289855073e-06, |
| "loss": 0.6588, |
| "step": 600 |
| }, |
| { |
| "epoch": 0.3532608695652174, |
| "grad_norm": 14.829962730407715, |
| "learning_rate": 9.808776167471822e-06, |
| "loss": 0.7243, |
| "step": 650 |
| }, |
| { |
| "epoch": 0.3804347826086957, |
| "grad_norm": 21.006099700927734, |
| "learning_rate": 9.710144927536233e-06, |
| "loss": 0.7559, |
| "step": 700 |
| }, |
| { |
| "epoch": 0.4076086956521739, |
| "grad_norm": 4.0826263427734375, |
| "learning_rate": 9.60950080515298e-06, |
| "loss": 0.7237, |
| "step": 750 |
| }, |
| { |
| "epoch": 0.43478260869565216, |
| "grad_norm": 16.393157958984375, |
| "learning_rate": 9.508856682769728e-06, |
| "loss": 0.6039, |
| "step": 800 |
| }, |
| { |
| "epoch": 0.46195652173913043, |
| "grad_norm": 15.008636474609375, |
| "learning_rate": 9.408212560386473e-06, |
| "loss": 0.6314, |
| "step": 850 |
| }, |
| { |
| "epoch": 0.4891304347826087, |
| "grad_norm": 6.631000518798828, |
| "learning_rate": 9.307568438003222e-06, |
| "loss": 0.7266, |
| "step": 900 |
| }, |
| { |
| "epoch": 0.5163043478260869, |
| "grad_norm": 9.905613899230957, |
| "learning_rate": 9.206924315619969e-06, |
| "loss": 0.6624, |
| "step": 950 |
| }, |
| { |
| "epoch": 0.5434782608695652, |
| "grad_norm": 13.82048511505127, |
| "learning_rate": 9.106280193236716e-06, |
| "loss": 0.5971, |
| "step": 1000 |
| }, |
| { |
| "epoch": 0.5706521739130435, |
| "grad_norm": 10.97307300567627, |
| "learning_rate": 9.005636070853463e-06, |
| "loss": 0.6621, |
| "step": 1050 |
| }, |
| { |
| "epoch": 0.5978260869565217, |
| "grad_norm": 6.645138263702393, |
| "learning_rate": 8.90499194847021e-06, |
| "loss": 0.6195, |
| "step": 1100 |
| }, |
| { |
| "epoch": 0.625, |
| "grad_norm": 19.04903221130371, |
| "learning_rate": 8.804347826086957e-06, |
| "loss": 0.5684, |
| "step": 1150 |
| }, |
| { |
| "epoch": 0.6521739130434783, |
| "grad_norm": 3.2631289958953857, |
| "learning_rate": 8.703703703703705e-06, |
| "loss": 0.577, |
| "step": 1200 |
| }, |
| { |
| "epoch": 0.6793478260869565, |
| "grad_norm": 5.16421365737915, |
| "learning_rate": 8.603059581320452e-06, |
| "loss": 0.5752, |
| "step": 1250 |
| }, |
| { |
| "epoch": 0.7065217391304348, |
| "grad_norm": 14.548922538757324, |
| "learning_rate": 8.502415458937199e-06, |
| "loss": 0.5222, |
| "step": 1300 |
| }, |
| { |
| "epoch": 0.7336956521739131, |
| "grad_norm": 35.01688003540039, |
| "learning_rate": 8.401771336553946e-06, |
| "loss": 0.7212, |
| "step": 1350 |
| }, |
| { |
| "epoch": 0.7608695652173914, |
| "grad_norm": 12.004179000854492, |
| "learning_rate": 8.301127214170693e-06, |
| "loss": 0.5907, |
| "step": 1400 |
| }, |
| { |
| "epoch": 0.7880434782608695, |
| "grad_norm": 17.919300079345703, |
| "learning_rate": 8.20048309178744e-06, |
| "loss": 0.5463, |
| "step": 1450 |
| }, |
| { |
| "epoch": 0.8152173913043478, |
| "grad_norm": 42.389678955078125, |
| "learning_rate": 8.099838969404187e-06, |
| "loss": 0.5077, |
| "step": 1500 |
| }, |
| { |
| "epoch": 0.842391304347826, |
| "grad_norm": 3.7581164836883545, |
| "learning_rate": 7.999194847020934e-06, |
| "loss": 0.5113, |
| "step": 1550 |
| }, |
| { |
| "epoch": 0.8695652173913043, |
| "grad_norm": 19.534931182861328, |
| "learning_rate": 7.898550724637682e-06, |
| "loss": 0.552, |
| "step": 1600 |
| }, |
| { |
| "epoch": 0.8967391304347826, |
| "grad_norm": 70.55868530273438, |
| "learning_rate": 7.797906602254429e-06, |
| "loss": 0.5187, |
| "step": 1650 |
| }, |
| { |
| "epoch": 0.9239130434782609, |
| "grad_norm": 7.407011032104492, |
| "learning_rate": 7.697262479871176e-06, |
| "loss": 0.5304, |
| "step": 1700 |
| }, |
| { |
| "epoch": 0.9510869565217391, |
| "grad_norm": 17.54728889465332, |
| "learning_rate": 7.596618357487924e-06, |
| "loss": 0.5928, |
| "step": 1750 |
| }, |
| { |
| "epoch": 0.9782608695652174, |
| "grad_norm": 9.951366424560547, |
| "learning_rate": 7.49597423510467e-06, |
| "loss": 0.509, |
| "step": 1800 |
| }, |
| { |
| "epoch": 1.0, |
| "eval_accuracy": 0.87822777928785, |
| "eval_f1_macro": 0.8198928704505658, |
| "eval_f1_weighted": 0.8776767663384684, |
| "eval_loss": 0.5368145108222961, |
| "eval_runtime": 20.0124, |
| "eval_samples_per_second": 183.836, |
| "eval_steps_per_second": 5.746, |
| "step": 1840 |
| } |
| ], |
| "logging_steps": 50, |
| "max_steps": 5520, |
| "num_input_tokens_seen": 0, |
| "num_train_epochs": 3, |
| "save_steps": 500, |
| "stateful_callbacks": { |
| "TrainerControl": { |
| "args": { |
| "should_epoch_stop": false, |
| "should_evaluate": false, |
| "should_log": false, |
| "should_save": true, |
| "should_training_stop": false |
| }, |
| "attributes": {} |
| } |
| }, |
| "total_flos": 2.742962313493709e+16, |
| "train_batch_size": 16, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|