| { | |
| "best_metric": 0.616822429906542, | |
| "best_model_checkpoint": "SW2-RHS-DA\\checkpoint-292", | |
| "epoch": 39.111111111111114, | |
| "eval_steps": 500, | |
| "global_step": 880, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.44, | |
| "learning_rate": 3.954545454545455e-05, | |
| "loss": 1.6539, | |
| "step": 10 | |
| }, | |
| { | |
| "epoch": 0.89, | |
| "learning_rate": 3.909090909090909e-05, | |
| "loss": 1.302, | |
| "step": 20 | |
| }, | |
| { | |
| "epoch": 0.98, | |
| "eval_accuracy": 0.411214953271028, | |
| "eval_loss": 2.847358226776123, | |
| "eval_runtime": 2.4491, | |
| "eval_samples_per_second": 43.69, | |
| "eval_steps_per_second": 2.858, | |
| "step": 22 | |
| }, | |
| { | |
| "epoch": 1.33, | |
| "learning_rate": 3.863636363636364e-05, | |
| "loss": 0.8548, | |
| "step": 30 | |
| }, | |
| { | |
| "epoch": 1.78, | |
| "learning_rate": 3.818181818181819e-05, | |
| "loss": 0.6224, | |
| "step": 40 | |
| }, | |
| { | |
| "epoch": 2.0, | |
| "eval_accuracy": 0.411214953271028, | |
| "eval_loss": 1.4649972915649414, | |
| "eval_runtime": 2.5381, | |
| "eval_samples_per_second": 42.157, | |
| "eval_steps_per_second": 2.758, | |
| "step": 45 | |
| }, | |
| { | |
| "epoch": 2.22, | |
| "learning_rate": 3.772727272727273e-05, | |
| "loss": 0.4983, | |
| "step": 50 | |
| }, | |
| { | |
| "epoch": 2.67, | |
| "learning_rate": 3.7272727272727276e-05, | |
| "loss": 0.3905, | |
| "step": 60 | |
| }, | |
| { | |
| "epoch": 2.98, | |
| "eval_accuracy": 0.411214953271028, | |
| "eval_loss": 1.486470103263855, | |
| "eval_runtime": 2.5569, | |
| "eval_samples_per_second": 41.847, | |
| "eval_steps_per_second": 2.738, | |
| "step": 67 | |
| }, | |
| { | |
| "epoch": 3.11, | |
| "learning_rate": 3.681818181818182e-05, | |
| "loss": 0.3143, | |
| "step": 70 | |
| }, | |
| { | |
| "epoch": 3.56, | |
| "learning_rate": 3.6363636363636364e-05, | |
| "loss": 0.2038, | |
| "step": 80 | |
| }, | |
| { | |
| "epoch": 4.0, | |
| "learning_rate": 3.590909090909091e-05, | |
| "loss": 0.1416, | |
| "step": 90 | |
| }, | |
| { | |
| "epoch": 4.0, | |
| "eval_accuracy": 0.5981308411214953, | |
| "eval_loss": 0.9452531933784485, | |
| "eval_runtime": 2.8177, | |
| "eval_samples_per_second": 37.974, | |
| "eval_steps_per_second": 2.484, | |
| "step": 90 | |
| }, | |
| { | |
| "epoch": 4.44, | |
| "learning_rate": 3.545454545454546e-05, | |
| "loss": 0.1267, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 4.89, | |
| "learning_rate": 3.5000000000000004e-05, | |
| "loss": 0.1116, | |
| "step": 110 | |
| }, | |
| { | |
| "epoch": 4.98, | |
| "eval_accuracy": 0.5514018691588785, | |
| "eval_loss": 0.9800727963447571, | |
| "eval_runtime": 2.4796, | |
| "eval_samples_per_second": 43.152, | |
| "eval_steps_per_second": 2.823, | |
| "step": 112 | |
| }, | |
| { | |
| "epoch": 5.33, | |
| "learning_rate": 3.454545454545455e-05, | |
| "loss": 0.1516, | |
| "step": 120 | |
| }, | |
| { | |
| "epoch": 5.78, | |
| "learning_rate": 3.409090909090909e-05, | |
| "loss": 0.0866, | |
| "step": 130 | |
| }, | |
| { | |
| "epoch": 6.0, | |
| "eval_accuracy": 0.6074766355140186, | |
| "eval_loss": 1.520142912864685, | |
| "eval_runtime": 2.5261, | |
| "eval_samples_per_second": 42.358, | |
| "eval_steps_per_second": 2.771, | |
| "step": 135 | |
| }, | |
| { | |
| "epoch": 6.22, | |
| "learning_rate": 3.363636363636364e-05, | |
| "loss": 0.1082, | |
| "step": 140 | |
| }, | |
| { | |
| "epoch": 6.67, | |
| "learning_rate": 3.318181818181819e-05, | |
| "loss": 0.0579, | |
| "step": 150 | |
| }, | |
| { | |
| "epoch": 6.98, | |
| "eval_accuracy": 0.5981308411214953, | |
| "eval_loss": 1.723357915878296, | |
| "eval_runtime": 2.4956, | |
| "eval_samples_per_second": 42.876, | |
| "eval_steps_per_second": 2.805, | |
| "step": 157 | |
| }, | |
| { | |
| "epoch": 7.11, | |
| "learning_rate": 3.272727272727273e-05, | |
| "loss": 0.0679, | |
| "step": 160 | |
| }, | |
| { | |
| "epoch": 7.56, | |
| "learning_rate": 3.2272727272727276e-05, | |
| "loss": 0.1047, | |
| "step": 170 | |
| }, | |
| { | |
| "epoch": 8.0, | |
| "learning_rate": 3.181818181818182e-05, | |
| "loss": 0.0667, | |
| "step": 180 | |
| }, | |
| { | |
| "epoch": 8.0, | |
| "eval_accuracy": 0.5981308411214953, | |
| "eval_loss": 1.9649068117141724, | |
| "eval_runtime": 2.4861, | |
| "eval_samples_per_second": 43.04, | |
| "eval_steps_per_second": 2.816, | |
| "step": 180 | |
| }, | |
| { | |
| "epoch": 8.44, | |
| "learning_rate": 3.1363636363636365e-05, | |
| "loss": 0.0661, | |
| "step": 190 | |
| }, | |
| { | |
| "epoch": 8.89, | |
| "learning_rate": 3.090909090909091e-05, | |
| "loss": 0.0664, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 8.98, | |
| "eval_accuracy": 0.5981308411214953, | |
| "eval_loss": 1.9504597187042236, | |
| "eval_runtime": 2.4306, | |
| "eval_samples_per_second": 44.022, | |
| "eval_steps_per_second": 2.88, | |
| "step": 202 | |
| }, | |
| { | |
| "epoch": 9.33, | |
| "learning_rate": 3.0454545454545456e-05, | |
| "loss": 0.0647, | |
| "step": 210 | |
| }, | |
| { | |
| "epoch": 9.78, | |
| "learning_rate": 3.0000000000000004e-05, | |
| "loss": 0.0742, | |
| "step": 220 | |
| }, | |
| { | |
| "epoch": 10.0, | |
| "eval_accuracy": 0.5981308411214953, | |
| "eval_loss": 1.9448089599609375, | |
| "eval_runtime": 2.4836, | |
| "eval_samples_per_second": 43.083, | |
| "eval_steps_per_second": 2.819, | |
| "step": 225 | |
| }, | |
| { | |
| "epoch": 10.22, | |
| "learning_rate": 2.954545454545455e-05, | |
| "loss": 0.0588, | |
| "step": 230 | |
| }, | |
| { | |
| "epoch": 10.67, | |
| "learning_rate": 2.9090909090909093e-05, | |
| "loss": 0.0558, | |
| "step": 240 | |
| }, | |
| { | |
| "epoch": 10.98, | |
| "eval_accuracy": 0.5981308411214953, | |
| "eval_loss": 1.9545286893844604, | |
| "eval_runtime": 2.5206, | |
| "eval_samples_per_second": 42.45, | |
| "eval_steps_per_second": 2.777, | |
| "step": 247 | |
| }, | |
| { | |
| "epoch": 11.11, | |
| "learning_rate": 2.863636363636364e-05, | |
| "loss": 0.0699, | |
| "step": 250 | |
| }, | |
| { | |
| "epoch": 11.56, | |
| "learning_rate": 2.8181818181818185e-05, | |
| "loss": 0.0564, | |
| "step": 260 | |
| }, | |
| { | |
| "epoch": 12.0, | |
| "learning_rate": 2.7727272727272732e-05, | |
| "loss": 0.0475, | |
| "step": 270 | |
| }, | |
| { | |
| "epoch": 12.0, | |
| "eval_accuracy": 0.5887850467289719, | |
| "eval_loss": 2.1516401767730713, | |
| "eval_runtime": 2.4679, | |
| "eval_samples_per_second": 43.356, | |
| "eval_steps_per_second": 2.836, | |
| "step": 270 | |
| }, | |
| { | |
| "epoch": 12.44, | |
| "learning_rate": 2.7272727272727273e-05, | |
| "loss": 0.0408, | |
| "step": 280 | |
| }, | |
| { | |
| "epoch": 12.89, | |
| "learning_rate": 2.6818181818181817e-05, | |
| "loss": 0.114, | |
| "step": 290 | |
| }, | |
| { | |
| "epoch": 12.98, | |
| "eval_accuracy": 0.616822429906542, | |
| "eval_loss": 2.1001620292663574, | |
| "eval_runtime": 2.4814, | |
| "eval_samples_per_second": 43.122, | |
| "eval_steps_per_second": 2.821, | |
| "step": 292 | |
| }, | |
| { | |
| "epoch": 13.33, | |
| "learning_rate": 2.6363636363636365e-05, | |
| "loss": 0.0592, | |
| "step": 300 | |
| }, | |
| { | |
| "epoch": 13.78, | |
| "learning_rate": 2.590909090909091e-05, | |
| "loss": 0.051, | |
| "step": 310 | |
| }, | |
| { | |
| "epoch": 14.0, | |
| "eval_accuracy": 0.5981308411214953, | |
| "eval_loss": 2.2643392086029053, | |
| "eval_runtime": 2.4411, | |
| "eval_samples_per_second": 43.833, | |
| "eval_steps_per_second": 2.868, | |
| "step": 315 | |
| }, | |
| { | |
| "epoch": 14.22, | |
| "learning_rate": 2.5454545454545457e-05, | |
| "loss": 0.0575, | |
| "step": 320 | |
| }, | |
| { | |
| "epoch": 14.67, | |
| "learning_rate": 2.5e-05, | |
| "loss": 0.0318, | |
| "step": 330 | |
| }, | |
| { | |
| "epoch": 14.98, | |
| "eval_accuracy": 0.5981308411214953, | |
| "eval_loss": 2.346831798553467, | |
| "eval_runtime": 2.4771, | |
| "eval_samples_per_second": 43.196, | |
| "eval_steps_per_second": 2.826, | |
| "step": 337 | |
| }, | |
| { | |
| "epoch": 15.11, | |
| "learning_rate": 2.454545454545455e-05, | |
| "loss": 0.0273, | |
| "step": 340 | |
| }, | |
| { | |
| "epoch": 15.56, | |
| "learning_rate": 2.4090909090909093e-05, | |
| "loss": 0.0232, | |
| "step": 350 | |
| }, | |
| { | |
| "epoch": 16.0, | |
| "learning_rate": 2.363636363636364e-05, | |
| "loss": 0.0673, | |
| "step": 360 | |
| }, | |
| { | |
| "epoch": 16.0, | |
| "eval_accuracy": 0.6074766355140186, | |
| "eval_loss": 2.334057092666626, | |
| "eval_runtime": 2.5831, | |
| "eval_samples_per_second": 41.422, | |
| "eval_steps_per_second": 2.71, | |
| "step": 360 | |
| }, | |
| { | |
| "epoch": 16.44, | |
| "learning_rate": 2.3181818181818185e-05, | |
| "loss": 0.0402, | |
| "step": 370 | |
| }, | |
| { | |
| "epoch": 16.89, | |
| "learning_rate": 2.2727272727272733e-05, | |
| "loss": 0.0566, | |
| "step": 380 | |
| }, | |
| { | |
| "epoch": 16.98, | |
| "eval_accuracy": 0.6074766355140186, | |
| "eval_loss": 2.319139003753662, | |
| "eval_runtime": 2.4296, | |
| "eval_samples_per_second": 44.04, | |
| "eval_steps_per_second": 2.881, | |
| "step": 382 | |
| }, | |
| { | |
| "epoch": 17.33, | |
| "learning_rate": 2.2272727272727274e-05, | |
| "loss": 0.0445, | |
| "step": 390 | |
| }, | |
| { | |
| "epoch": 17.78, | |
| "learning_rate": 2.1818181818181818e-05, | |
| "loss": 0.0543, | |
| "step": 400 | |
| }, | |
| { | |
| "epoch": 18.0, | |
| "eval_accuracy": 0.616822429906542, | |
| "eval_loss": 2.280747413635254, | |
| "eval_runtime": 2.5226, | |
| "eval_samples_per_second": 42.417, | |
| "eval_steps_per_second": 2.775, | |
| "step": 405 | |
| }, | |
| { | |
| "epoch": 18.22, | |
| "learning_rate": 2.1363636363636365e-05, | |
| "loss": 0.0419, | |
| "step": 410 | |
| }, | |
| { | |
| "epoch": 18.67, | |
| "learning_rate": 2.090909090909091e-05, | |
| "loss": 0.0458, | |
| "step": 420 | |
| }, | |
| { | |
| "epoch": 18.98, | |
| "eval_accuracy": 0.616822429906542, | |
| "eval_loss": 2.2148241996765137, | |
| "eval_runtime": 2.5536, | |
| "eval_samples_per_second": 41.901, | |
| "eval_steps_per_second": 2.741, | |
| "step": 427 | |
| }, | |
| { | |
| "epoch": 19.11, | |
| "learning_rate": 2.0454545454545457e-05, | |
| "loss": 0.0255, | |
| "step": 430 | |
| }, | |
| { | |
| "epoch": 19.56, | |
| "learning_rate": 2e-05, | |
| "loss": 0.0306, | |
| "step": 440 | |
| }, | |
| { | |
| "epoch": 20.0, | |
| "learning_rate": 1.9545454545454546e-05, | |
| "loss": 0.0421, | |
| "step": 450 | |
| }, | |
| { | |
| "epoch": 20.0, | |
| "eval_accuracy": 0.5887850467289719, | |
| "eval_loss": 2.5467782020568848, | |
| "eval_runtime": 2.4846, | |
| "eval_samples_per_second": 43.065, | |
| "eval_steps_per_second": 2.817, | |
| "step": 450 | |
| }, | |
| { | |
| "epoch": 20.44, | |
| "learning_rate": 1.9090909090909094e-05, | |
| "loss": 0.0509, | |
| "step": 460 | |
| }, | |
| { | |
| "epoch": 20.89, | |
| "learning_rate": 1.8636363636363638e-05, | |
| "loss": 0.0139, | |
| "step": 470 | |
| }, | |
| { | |
| "epoch": 20.98, | |
| "eval_accuracy": 0.616822429906542, | |
| "eval_loss": 2.2408316135406494, | |
| "eval_runtime": 2.5411, | |
| "eval_samples_per_second": 42.108, | |
| "eval_steps_per_second": 2.755, | |
| "step": 472 | |
| }, | |
| { | |
| "epoch": 21.33, | |
| "learning_rate": 1.8181818181818182e-05, | |
| "loss": 0.0208, | |
| "step": 480 | |
| }, | |
| { | |
| "epoch": 21.78, | |
| "learning_rate": 1.772727272727273e-05, | |
| "loss": 0.012, | |
| "step": 490 | |
| }, | |
| { | |
| "epoch": 22.0, | |
| "eval_accuracy": 0.5887850467289719, | |
| "eval_loss": 2.568880796432495, | |
| "eval_runtime": 2.4491, | |
| "eval_samples_per_second": 43.69, | |
| "eval_steps_per_second": 2.858, | |
| "step": 495 | |
| }, | |
| { | |
| "epoch": 22.22, | |
| "learning_rate": 1.7272727272727274e-05, | |
| "loss": 0.0375, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 22.67, | |
| "learning_rate": 1.681818181818182e-05, | |
| "loss": 0.017, | |
| "step": 510 | |
| }, | |
| { | |
| "epoch": 22.98, | |
| "eval_accuracy": 0.616822429906542, | |
| "eval_loss": 2.548487901687622, | |
| "eval_runtime": 2.5416, | |
| "eval_samples_per_second": 42.1, | |
| "eval_steps_per_second": 2.754, | |
| "step": 517 | |
| }, | |
| { | |
| "epoch": 23.11, | |
| "learning_rate": 1.6363636363636366e-05, | |
| "loss": 0.0293, | |
| "step": 520 | |
| }, | |
| { | |
| "epoch": 23.56, | |
| "learning_rate": 1.590909090909091e-05, | |
| "loss": 0.0482, | |
| "step": 530 | |
| }, | |
| { | |
| "epoch": 24.0, | |
| "learning_rate": 1.5454545454545454e-05, | |
| "loss": 0.0529, | |
| "step": 540 | |
| }, | |
| { | |
| "epoch": 24.0, | |
| "eval_accuracy": 0.6074766355140186, | |
| "eval_loss": 2.674633502960205, | |
| "eval_runtime": 2.5767, | |
| "eval_samples_per_second": 41.526, | |
| "eval_steps_per_second": 2.717, | |
| "step": 540 | |
| }, | |
| { | |
| "epoch": 24.44, | |
| "learning_rate": 1.5000000000000002e-05, | |
| "loss": 0.0063, | |
| "step": 550 | |
| }, | |
| { | |
| "epoch": 24.89, | |
| "learning_rate": 1.4545454545454546e-05, | |
| "loss": 0.0414, | |
| "step": 560 | |
| }, | |
| { | |
| "epoch": 24.98, | |
| "eval_accuracy": 0.5887850467289719, | |
| "eval_loss": 2.7693288326263428, | |
| "eval_runtime": 2.6239, | |
| "eval_samples_per_second": 40.778, | |
| "eval_steps_per_second": 2.668, | |
| "step": 562 | |
| }, | |
| { | |
| "epoch": 25.33, | |
| "learning_rate": 1.4090909090909092e-05, | |
| "loss": 0.0203, | |
| "step": 570 | |
| }, | |
| { | |
| "epoch": 25.78, | |
| "learning_rate": 1.3636363636363637e-05, | |
| "loss": 0.0158, | |
| "step": 580 | |
| }, | |
| { | |
| "epoch": 26.0, | |
| "eval_accuracy": 0.5887850467289719, | |
| "eval_loss": 2.7446506023406982, | |
| "eval_runtime": 2.5651, | |
| "eval_samples_per_second": 41.714, | |
| "eval_steps_per_second": 2.729, | |
| "step": 585 | |
| }, | |
| { | |
| "epoch": 26.22, | |
| "learning_rate": 1.3181818181818183e-05, | |
| "loss": 0.0185, | |
| "step": 590 | |
| }, | |
| { | |
| "epoch": 26.67, | |
| "learning_rate": 1.2727272727272728e-05, | |
| "loss": 0.0205, | |
| "step": 600 | |
| }, | |
| { | |
| "epoch": 26.98, | |
| "eval_accuracy": 0.5887850467289719, | |
| "eval_loss": 2.8566343784332275, | |
| "eval_runtime": 2.5211, | |
| "eval_samples_per_second": 42.442, | |
| "eval_steps_per_second": 2.777, | |
| "step": 607 | |
| }, | |
| { | |
| "epoch": 27.11, | |
| "learning_rate": 1.2272727272727274e-05, | |
| "loss": 0.04, | |
| "step": 610 | |
| }, | |
| { | |
| "epoch": 27.56, | |
| "learning_rate": 1.181818181818182e-05, | |
| "loss": 0.0133, | |
| "step": 620 | |
| }, | |
| { | |
| "epoch": 28.0, | |
| "learning_rate": 1.1363636363636366e-05, | |
| "loss": 0.0205, | |
| "step": 630 | |
| }, | |
| { | |
| "epoch": 28.0, | |
| "eval_accuracy": 0.5887850467289719, | |
| "eval_loss": 2.846874713897705, | |
| "eval_runtime": 2.4371, | |
| "eval_samples_per_second": 43.905, | |
| "eval_steps_per_second": 2.872, | |
| "step": 630 | |
| }, | |
| { | |
| "epoch": 28.44, | |
| "learning_rate": 1.0909090909090909e-05, | |
| "loss": 0.0211, | |
| "step": 640 | |
| }, | |
| { | |
| "epoch": 28.89, | |
| "learning_rate": 1.0454545454545455e-05, | |
| "loss": 0.006, | |
| "step": 650 | |
| }, | |
| { | |
| "epoch": 28.98, | |
| "eval_accuracy": 0.5887850467289719, | |
| "eval_loss": 2.9508450031280518, | |
| "eval_runtime": 2.4616, | |
| "eval_samples_per_second": 43.468, | |
| "eval_steps_per_second": 2.844, | |
| "step": 652 | |
| }, | |
| { | |
| "epoch": 29.33, | |
| "learning_rate": 1e-05, | |
| "loss": 0.0198, | |
| "step": 660 | |
| }, | |
| { | |
| "epoch": 29.78, | |
| "learning_rate": 9.545454545454547e-06, | |
| "loss": 0.0061, | |
| "step": 670 | |
| }, | |
| { | |
| "epoch": 30.0, | |
| "eval_accuracy": 0.5887850467289719, | |
| "eval_loss": 3.0560247898101807, | |
| "eval_runtime": 2.6005, | |
| "eval_samples_per_second": 41.145, | |
| "eval_steps_per_second": 2.692, | |
| "step": 675 | |
| }, | |
| { | |
| "epoch": 30.22, | |
| "learning_rate": 9.090909090909091e-06, | |
| "loss": 0.0124, | |
| "step": 680 | |
| }, | |
| { | |
| "epoch": 30.67, | |
| "learning_rate": 8.636363636363637e-06, | |
| "loss": 0.0227, | |
| "step": 690 | |
| }, | |
| { | |
| "epoch": 30.98, | |
| "eval_accuracy": 0.5887850467289719, | |
| "eval_loss": 3.0431346893310547, | |
| "eval_runtime": 2.5396, | |
| "eval_samples_per_second": 42.132, | |
| "eval_steps_per_second": 2.756, | |
| "step": 697 | |
| }, | |
| { | |
| "epoch": 31.11, | |
| "learning_rate": 8.181818181818183e-06, | |
| "loss": 0.0261, | |
| "step": 700 | |
| }, | |
| { | |
| "epoch": 31.56, | |
| "learning_rate": 7.727272727272727e-06, | |
| "loss": 0.0174, | |
| "step": 710 | |
| }, | |
| { | |
| "epoch": 32.0, | |
| "learning_rate": 7.272727272727273e-06, | |
| "loss": 0.034, | |
| "step": 720 | |
| }, | |
| { | |
| "epoch": 32.0, | |
| "eval_accuracy": 0.5887850467289719, | |
| "eval_loss": 3.0496559143066406, | |
| "eval_runtime": 2.4746, | |
| "eval_samples_per_second": 43.24, | |
| "eval_steps_per_second": 2.829, | |
| "step": 720 | |
| }, | |
| { | |
| "epoch": 32.44, | |
| "learning_rate": 6.818181818181818e-06, | |
| "loss": 0.0277, | |
| "step": 730 | |
| }, | |
| { | |
| "epoch": 32.89, | |
| "learning_rate": 6.363636363636364e-06, | |
| "loss": 0.0039, | |
| "step": 740 | |
| }, | |
| { | |
| "epoch": 32.98, | |
| "eval_accuracy": 0.5887850467289719, | |
| "eval_loss": 3.0935721397399902, | |
| "eval_runtime": 2.5656, | |
| "eval_samples_per_second": 41.706, | |
| "eval_steps_per_second": 2.728, | |
| "step": 742 | |
| }, | |
| { | |
| "epoch": 33.33, | |
| "learning_rate": 5.90909090909091e-06, | |
| "loss": 0.02, | |
| "step": 750 | |
| }, | |
| { | |
| "epoch": 33.78, | |
| "learning_rate": 5.4545454545454545e-06, | |
| "loss": 0.0031, | |
| "step": 760 | |
| }, | |
| { | |
| "epoch": 34.0, | |
| "eval_accuracy": 0.5887850467289719, | |
| "eval_loss": 3.1158316135406494, | |
| "eval_runtime": 2.4251, | |
| "eval_samples_per_second": 44.123, | |
| "eval_steps_per_second": 2.887, | |
| "step": 765 | |
| }, | |
| { | |
| "epoch": 34.22, | |
| "learning_rate": 5e-06, | |
| "loss": 0.0178, | |
| "step": 770 | |
| }, | |
| { | |
| "epoch": 34.67, | |
| "learning_rate": 4.5454545454545455e-06, | |
| "loss": 0.0118, | |
| "step": 780 | |
| }, | |
| { | |
| "epoch": 34.98, | |
| "eval_accuracy": 0.5887850467289719, | |
| "eval_loss": 3.109339714050293, | |
| "eval_runtime": 2.5796, | |
| "eval_samples_per_second": 41.479, | |
| "eval_steps_per_second": 2.714, | |
| "step": 787 | |
| }, | |
| { | |
| "epoch": 35.11, | |
| "learning_rate": 4.0909090909090915e-06, | |
| "loss": 0.0061, | |
| "step": 790 | |
| }, | |
| { | |
| "epoch": 35.56, | |
| "learning_rate": 3.6363636363636366e-06, | |
| "loss": 0.018, | |
| "step": 800 | |
| }, | |
| { | |
| "epoch": 36.0, | |
| "learning_rate": 3.181818181818182e-06, | |
| "loss": 0.0045, | |
| "step": 810 | |
| }, | |
| { | |
| "epoch": 36.0, | |
| "eval_accuracy": 0.5887850467289719, | |
| "eval_loss": 3.088502883911133, | |
| "eval_runtime": 2.4691, | |
| "eval_samples_per_second": 43.336, | |
| "eval_steps_per_second": 2.835, | |
| "step": 810 | |
| }, | |
| { | |
| "epoch": 36.44, | |
| "learning_rate": 2.7272727272727272e-06, | |
| "loss": 0.0224, | |
| "step": 820 | |
| }, | |
| { | |
| "epoch": 36.89, | |
| "learning_rate": 2.2727272727272728e-06, | |
| "loss": 0.0168, | |
| "step": 830 | |
| }, | |
| { | |
| "epoch": 36.98, | |
| "eval_accuracy": 0.5887850467289719, | |
| "eval_loss": 3.0813817977905273, | |
| "eval_runtime": 2.46, | |
| "eval_samples_per_second": 43.495, | |
| "eval_steps_per_second": 2.845, | |
| "step": 832 | |
| }, | |
| { | |
| "epoch": 37.33, | |
| "learning_rate": 1.8181818181818183e-06, | |
| "loss": 0.0106, | |
| "step": 840 | |
| }, | |
| { | |
| "epoch": 37.78, | |
| "learning_rate": 1.3636363636363636e-06, | |
| "loss": 0.0071, | |
| "step": 850 | |
| }, | |
| { | |
| "epoch": 38.0, | |
| "eval_accuracy": 0.5887850467289719, | |
| "eval_loss": 3.073270320892334, | |
| "eval_runtime": 2.8765, | |
| "eval_samples_per_second": 37.198, | |
| "eval_steps_per_second": 2.433, | |
| "step": 855 | |
| }, | |
| { | |
| "epoch": 38.22, | |
| "learning_rate": 9.090909090909091e-07, | |
| "loss": 0.0123, | |
| "step": 860 | |
| }, | |
| { | |
| "epoch": 38.67, | |
| "learning_rate": 4.5454545454545457e-07, | |
| "loss": 0.0238, | |
| "step": 870 | |
| }, | |
| { | |
| "epoch": 38.98, | |
| "eval_accuracy": 0.5887850467289719, | |
| "eval_loss": 3.0825016498565674, | |
| "eval_runtime": 2.6096, | |
| "eval_samples_per_second": 41.002, | |
| "eval_steps_per_second": 2.682, | |
| "step": 877 | |
| }, | |
| { | |
| "epoch": 39.11, | |
| "learning_rate": 0.0, | |
| "loss": 0.0072, | |
| "step": 880 | |
| }, | |
| { | |
| "epoch": 39.11, | |
| "eval_accuracy": 0.5887850467289719, | |
| "eval_loss": 3.082833766937256, | |
| "eval_runtime": 2.596, | |
| "eval_samples_per_second": 41.217, | |
| "eval_steps_per_second": 2.696, | |
| "step": 880 | |
| }, | |
| { | |
| "epoch": 39.11, | |
| "step": 880, | |
| "total_flos": 1.8272580263466762e+18, | |
| "train_loss": 0.10380522197493437, | |
| "train_runtime": 1214.1801, | |
| "train_samples_per_second": 47.308, | |
| "train_steps_per_second": 0.725 | |
| } | |
| ], | |
| "logging_steps": 10, | |
| "max_steps": 880, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 40, | |
| "save_steps": 500, | |
| "total_flos": 1.8272580263466762e+18, | |
| "train_batch_size": 16, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |