| { | |
| "best_metric": 0.6381103992462158, | |
| "best_model_checkpoint": "./vit-base-beans/checkpoint-1200", | |
| "epoch": 8.0, | |
| "global_step": 3120, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.03, | |
| "learning_rate": 0.00019935897435897437, | |
| "loss": 1.7678, | |
| "step": 10 | |
| }, | |
| { | |
| "epoch": 0.05, | |
| "learning_rate": 0.00019871794871794874, | |
| "loss": 1.7014, | |
| "step": 20 | |
| }, | |
| { | |
| "epoch": 0.08, | |
| "learning_rate": 0.0001980769230769231, | |
| "loss": 1.4957, | |
| "step": 30 | |
| }, | |
| { | |
| "epoch": 0.1, | |
| "learning_rate": 0.00019743589743589744, | |
| "loss": 1.5043, | |
| "step": 40 | |
| }, | |
| { | |
| "epoch": 0.1, | |
| "eval_accuracy": 0.44556677890011226, | |
| "eval_loss": 1.493240237236023, | |
| "eval_runtime": 59.3866, | |
| "eval_samples_per_second": 60.081, | |
| "eval_steps_per_second": 7.51, | |
| "step": 40 | |
| }, | |
| { | |
| "epoch": 0.13, | |
| "learning_rate": 0.00019679487179487178, | |
| "loss": 1.3931, | |
| "step": 50 | |
| }, | |
| { | |
| "epoch": 0.15, | |
| "learning_rate": 0.00019615384615384615, | |
| "loss": 1.3029, | |
| "step": 60 | |
| }, | |
| { | |
| "epoch": 0.18, | |
| "learning_rate": 0.0001955128205128205, | |
| "loss": 1.1753, | |
| "step": 70 | |
| }, | |
| { | |
| "epoch": 0.21, | |
| "learning_rate": 0.00019487179487179487, | |
| "loss": 1.3894, | |
| "step": 80 | |
| }, | |
| { | |
| "epoch": 0.21, | |
| "eval_accuracy": 0.5297418630751964, | |
| "eval_loss": 1.2430940866470337, | |
| "eval_runtime": 38.1635, | |
| "eval_samples_per_second": 93.492, | |
| "eval_steps_per_second": 11.687, | |
| "step": 80 | |
| }, | |
| { | |
| "epoch": 0.23, | |
| "learning_rate": 0.00019423076923076924, | |
| "loss": 1.118, | |
| "step": 90 | |
| }, | |
| { | |
| "epoch": 0.26, | |
| "learning_rate": 0.0001935897435897436, | |
| "loss": 1.1395, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 0.28, | |
| "learning_rate": 0.00019294871794871797, | |
| "loss": 1.0946, | |
| "step": 110 | |
| }, | |
| { | |
| "epoch": 0.31, | |
| "learning_rate": 0.00019230769230769233, | |
| "loss": 1.208, | |
| "step": 120 | |
| }, | |
| { | |
| "epoch": 0.31, | |
| "eval_accuracy": 0.6464646464646465, | |
| "eval_loss": 0.9881709218025208, | |
| "eval_runtime": 34.6718, | |
| "eval_samples_per_second": 102.908, | |
| "eval_steps_per_second": 12.863, | |
| "step": 120 | |
| }, | |
| { | |
| "epoch": 0.33, | |
| "learning_rate": 0.00019166666666666667, | |
| "loss": 0.9775, | |
| "step": 130 | |
| }, | |
| { | |
| "epoch": 0.36, | |
| "learning_rate": 0.00019102564102564104, | |
| "loss": 1.0216, | |
| "step": 140 | |
| }, | |
| { | |
| "epoch": 0.38, | |
| "learning_rate": 0.00019038461538461538, | |
| "loss": 1.1942, | |
| "step": 150 | |
| }, | |
| { | |
| "epoch": 0.41, | |
| "learning_rate": 0.00018974358974358974, | |
| "loss": 0.9847, | |
| "step": 160 | |
| }, | |
| { | |
| "epoch": 0.41, | |
| "eval_accuracy": 0.6599326599326599, | |
| "eval_loss": 0.950343132019043, | |
| "eval_runtime": 40.3536, | |
| "eval_samples_per_second": 88.418, | |
| "eval_steps_per_second": 11.052, | |
| "step": 160 | |
| }, | |
| { | |
| "epoch": 0.44, | |
| "learning_rate": 0.0001891025641025641, | |
| "loss": 0.9707, | |
| "step": 170 | |
| }, | |
| { | |
| "epoch": 0.46, | |
| "learning_rate": 0.00018846153846153847, | |
| "loss": 1.1941, | |
| "step": 180 | |
| }, | |
| { | |
| "epoch": 0.49, | |
| "learning_rate": 0.00018782051282051283, | |
| "loss": 1.0309, | |
| "step": 190 | |
| }, | |
| { | |
| "epoch": 0.51, | |
| "learning_rate": 0.0001871794871794872, | |
| "loss": 1.102, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 0.51, | |
| "eval_accuracy": 0.6374859708193041, | |
| "eval_loss": 0.9868837594985962, | |
| "eval_runtime": 32.1858, | |
| "eval_samples_per_second": 110.856, | |
| "eval_steps_per_second": 13.857, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 0.54, | |
| "learning_rate": 0.00018653846153846154, | |
| "loss": 0.9899, | |
| "step": 210 | |
| }, | |
| { | |
| "epoch": 0.56, | |
| "learning_rate": 0.0001858974358974359, | |
| "loss": 0.8926, | |
| "step": 220 | |
| }, | |
| { | |
| "epoch": 0.59, | |
| "learning_rate": 0.00018525641025641027, | |
| "loss": 0.9552, | |
| "step": 230 | |
| }, | |
| { | |
| "epoch": 0.62, | |
| "learning_rate": 0.00018461538461538463, | |
| "loss": 0.9109, | |
| "step": 240 | |
| }, | |
| { | |
| "epoch": 0.62, | |
| "eval_accuracy": 0.6380471380471381, | |
| "eval_loss": 0.9887320399284363, | |
| "eval_runtime": 32.5312, | |
| "eval_samples_per_second": 109.679, | |
| "eval_steps_per_second": 13.71, | |
| "step": 240 | |
| }, | |
| { | |
| "epoch": 0.64, | |
| "learning_rate": 0.00018397435897435897, | |
| "loss": 0.832, | |
| "step": 250 | |
| }, | |
| { | |
| "epoch": 0.67, | |
| "learning_rate": 0.00018333333333333334, | |
| "loss": 0.9323, | |
| "step": 260 | |
| }, | |
| { | |
| "epoch": 0.69, | |
| "learning_rate": 0.0001826923076923077, | |
| "loss": 1.0061, | |
| "step": 270 | |
| }, | |
| { | |
| "epoch": 0.72, | |
| "learning_rate": 0.00018205128205128207, | |
| "loss": 0.9627, | |
| "step": 280 | |
| }, | |
| { | |
| "epoch": 0.72, | |
| "eval_accuracy": 0.6156004489337823, | |
| "eval_loss": 1.057395100593567, | |
| "eval_runtime": 34.1152, | |
| "eval_samples_per_second": 104.587, | |
| "eval_steps_per_second": 13.073, | |
| "step": 280 | |
| }, | |
| { | |
| "epoch": 0.74, | |
| "learning_rate": 0.00018141025641025643, | |
| "loss": 0.9613, | |
| "step": 290 | |
| }, | |
| { | |
| "epoch": 0.77, | |
| "learning_rate": 0.00018076923076923077, | |
| "loss": 0.8986, | |
| "step": 300 | |
| }, | |
| { | |
| "epoch": 0.79, | |
| "learning_rate": 0.00018012820512820513, | |
| "loss": 0.9344, | |
| "step": 310 | |
| }, | |
| { | |
| "epoch": 0.82, | |
| "learning_rate": 0.0001794871794871795, | |
| "loss": 0.7031, | |
| "step": 320 | |
| }, | |
| { | |
| "epoch": 0.82, | |
| "eval_accuracy": 0.7087542087542088, | |
| "eval_loss": 0.813530683517456, | |
| "eval_runtime": 39.1342, | |
| "eval_samples_per_second": 91.173, | |
| "eval_steps_per_second": 11.397, | |
| "step": 320 | |
| }, | |
| { | |
| "epoch": 0.85, | |
| "learning_rate": 0.00017884615384615386, | |
| "loss": 0.7596, | |
| "step": 330 | |
| }, | |
| { | |
| "epoch": 0.87, | |
| "learning_rate": 0.00017820512820512823, | |
| "loss": 0.7207, | |
| "step": 340 | |
| }, | |
| { | |
| "epoch": 0.9, | |
| "learning_rate": 0.00017756410256410257, | |
| "loss": 0.8904, | |
| "step": 350 | |
| }, | |
| { | |
| "epoch": 0.92, | |
| "learning_rate": 0.00017692307692307693, | |
| "loss": 0.7605, | |
| "step": 360 | |
| }, | |
| { | |
| "epoch": 0.92, | |
| "eval_accuracy": 0.7272727272727273, | |
| "eval_loss": 0.7620912790298462, | |
| "eval_runtime": 33.0054, | |
| "eval_samples_per_second": 108.103, | |
| "eval_steps_per_second": 13.513, | |
| "step": 360 | |
| }, | |
| { | |
| "epoch": 0.95, | |
| "learning_rate": 0.0001762820512820513, | |
| "loss": 0.8093, | |
| "step": 370 | |
| }, | |
| { | |
| "epoch": 0.97, | |
| "learning_rate": 0.00017564102564102566, | |
| "loss": 0.8198, | |
| "step": 380 | |
| }, | |
| { | |
| "epoch": 1.0, | |
| "learning_rate": 0.000175, | |
| "loss": 0.9515, | |
| "step": 390 | |
| }, | |
| { | |
| "epoch": 1.03, | |
| "learning_rate": 0.00017435897435897436, | |
| "loss": 0.8467, | |
| "step": 400 | |
| }, | |
| { | |
| "epoch": 1.03, | |
| "eval_accuracy": 0.7244668911335578, | |
| "eval_loss": 0.7974632978439331, | |
| "eval_runtime": 33.3512, | |
| "eval_samples_per_second": 106.983, | |
| "eval_steps_per_second": 13.373, | |
| "step": 400 | |
| }, | |
| { | |
| "epoch": 1.05, | |
| "learning_rate": 0.00017371794871794873, | |
| "loss": 0.5829, | |
| "step": 410 | |
| }, | |
| { | |
| "epoch": 1.08, | |
| "learning_rate": 0.0001730769230769231, | |
| "loss": 0.5576, | |
| "step": 420 | |
| }, | |
| { | |
| "epoch": 1.1, | |
| "learning_rate": 0.00017243589743589746, | |
| "loss": 0.6035, | |
| "step": 430 | |
| }, | |
| { | |
| "epoch": 1.13, | |
| "learning_rate": 0.0001717948717948718, | |
| "loss": 0.6653, | |
| "step": 440 | |
| }, | |
| { | |
| "epoch": 1.13, | |
| "eval_accuracy": 0.7317620650953984, | |
| "eval_loss": 0.7574185132980347, | |
| "eval_runtime": 32.4855, | |
| "eval_samples_per_second": 109.834, | |
| "eval_steps_per_second": 13.729, | |
| "step": 440 | |
| }, | |
| { | |
| "epoch": 1.15, | |
| "learning_rate": 0.00017115384615384616, | |
| "loss": 0.5067, | |
| "step": 450 | |
| }, | |
| { | |
| "epoch": 1.18, | |
| "learning_rate": 0.00017051282051282053, | |
| "loss": 0.4973, | |
| "step": 460 | |
| }, | |
| { | |
| "epoch": 1.21, | |
| "learning_rate": 0.00016987179487179486, | |
| "loss": 0.5721, | |
| "step": 470 | |
| }, | |
| { | |
| "epoch": 1.23, | |
| "learning_rate": 0.00016923076923076923, | |
| "loss": 0.5467, | |
| "step": 480 | |
| }, | |
| { | |
| "epoch": 1.23, | |
| "eval_accuracy": 0.7244668911335578, | |
| "eval_loss": 0.7471520900726318, | |
| "eval_runtime": 32.6752, | |
| "eval_samples_per_second": 109.196, | |
| "eval_steps_per_second": 13.65, | |
| "step": 480 | |
| }, | |
| { | |
| "epoch": 1.26, | |
| "learning_rate": 0.0001685897435897436, | |
| "loss": 0.5494, | |
| "step": 490 | |
| }, | |
| { | |
| "epoch": 1.28, | |
| "learning_rate": 0.00016794871794871796, | |
| "loss": 0.6158, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 1.31, | |
| "learning_rate": 0.00016730769230769232, | |
| "loss": 0.5412, | |
| "step": 510 | |
| }, | |
| { | |
| "epoch": 1.33, | |
| "learning_rate": 0.0001666666666666667, | |
| "loss": 0.388, | |
| "step": 520 | |
| }, | |
| { | |
| "epoch": 1.33, | |
| "eval_accuracy": 0.7716049382716049, | |
| "eval_loss": 0.6508304476737976, | |
| "eval_runtime": 36.0929, | |
| "eval_samples_per_second": 98.856, | |
| "eval_steps_per_second": 12.357, | |
| "step": 520 | |
| }, | |
| { | |
| "epoch": 1.36, | |
| "learning_rate": 0.00016602564102564105, | |
| "loss": 0.4511, | |
| "step": 530 | |
| }, | |
| { | |
| "epoch": 1.38, | |
| "learning_rate": 0.0001653846153846154, | |
| "loss": 0.566, | |
| "step": 540 | |
| }, | |
| { | |
| "epoch": 1.41, | |
| "learning_rate": 0.00016474358974358976, | |
| "loss": 0.4476, | |
| "step": 550 | |
| }, | |
| { | |
| "epoch": 1.44, | |
| "learning_rate": 0.0001641025641025641, | |
| "loss": 0.4699, | |
| "step": 560 | |
| }, | |
| { | |
| "epoch": 1.44, | |
| "eval_accuracy": 0.7244668911335578, | |
| "eval_loss": 0.773759663105011, | |
| "eval_runtime": 31.7272, | |
| "eval_samples_per_second": 112.459, | |
| "eval_steps_per_second": 14.057, | |
| "step": 560 | |
| }, | |
| { | |
| "epoch": 1.46, | |
| "learning_rate": 0.00016346153846153846, | |
| "loss": 0.402, | |
| "step": 570 | |
| }, | |
| { | |
| "epoch": 1.49, | |
| "learning_rate": 0.00016282051282051282, | |
| "loss": 0.425, | |
| "step": 580 | |
| }, | |
| { | |
| "epoch": 1.51, | |
| "learning_rate": 0.0001621794871794872, | |
| "loss": 0.5171, | |
| "step": 590 | |
| }, | |
| { | |
| "epoch": 1.54, | |
| "learning_rate": 0.00016153846153846155, | |
| "loss": 0.5344, | |
| "step": 600 | |
| }, | |
| { | |
| "epoch": 1.54, | |
| "eval_accuracy": 0.7328843995510662, | |
| "eval_loss": 0.7624912858009338, | |
| "eval_runtime": 32.007, | |
| "eval_samples_per_second": 111.476, | |
| "eval_steps_per_second": 13.934, | |
| "step": 600 | |
| }, | |
| { | |
| "epoch": 1.56, | |
| "learning_rate": 0.00016089743589743592, | |
| "loss": 0.4913, | |
| "step": 610 | |
| }, | |
| { | |
| "epoch": 1.59, | |
| "learning_rate": 0.00016025641025641028, | |
| "loss": 0.5042, | |
| "step": 620 | |
| }, | |
| { | |
| "epoch": 1.62, | |
| "learning_rate": 0.00015961538461538462, | |
| "loss": 0.6051, | |
| "step": 630 | |
| }, | |
| { | |
| "epoch": 1.64, | |
| "learning_rate": 0.00015897435897435896, | |
| "loss": 0.5753, | |
| "step": 640 | |
| }, | |
| { | |
| "epoch": 1.64, | |
| "eval_accuracy": 0.7598204264870931, | |
| "eval_loss": 0.6743783950805664, | |
| "eval_runtime": 30.705, | |
| "eval_samples_per_second": 116.203, | |
| "eval_steps_per_second": 14.525, | |
| "step": 640 | |
| }, | |
| { | |
| "epoch": 1.67, | |
| "learning_rate": 0.00015833333333333332, | |
| "loss": 0.524, | |
| "step": 650 | |
| }, | |
| { | |
| "epoch": 1.69, | |
| "learning_rate": 0.0001576923076923077, | |
| "loss": 0.5007, | |
| "step": 660 | |
| }, | |
| { | |
| "epoch": 1.72, | |
| "learning_rate": 0.00015705128205128205, | |
| "loss": 0.57, | |
| "step": 670 | |
| }, | |
| { | |
| "epoch": 1.74, | |
| "learning_rate": 0.00015641025641025642, | |
| "loss": 0.5533, | |
| "step": 680 | |
| }, | |
| { | |
| "epoch": 1.74, | |
| "eval_accuracy": 0.734006734006734, | |
| "eval_loss": 0.7230738997459412, | |
| "eval_runtime": 33.3034, | |
| "eval_samples_per_second": 107.136, | |
| "eval_steps_per_second": 13.392, | |
| "step": 680 | |
| }, | |
| { | |
| "epoch": 1.77, | |
| "learning_rate": 0.00015576923076923078, | |
| "loss": 0.4399, | |
| "step": 690 | |
| }, | |
| { | |
| "epoch": 1.79, | |
| "learning_rate": 0.00015512820512820515, | |
| "loss": 0.453, | |
| "step": 700 | |
| }, | |
| { | |
| "epoch": 1.82, | |
| "learning_rate": 0.00015448717948717951, | |
| "loss": 0.4504, | |
| "step": 710 | |
| }, | |
| { | |
| "epoch": 1.85, | |
| "learning_rate": 0.00015384615384615385, | |
| "loss": 0.4244, | |
| "step": 720 | |
| }, | |
| { | |
| "epoch": 1.85, | |
| "eval_accuracy": 0.7687991021324355, | |
| "eval_loss": 0.6673141717910767, | |
| "eval_runtime": 32.6948, | |
| "eval_samples_per_second": 109.131, | |
| "eval_steps_per_second": 13.641, | |
| "step": 720 | |
| }, | |
| { | |
| "epoch": 1.87, | |
| "learning_rate": 0.00015320512820512822, | |
| "loss": 0.46, | |
| "step": 730 | |
| }, | |
| { | |
| "epoch": 1.9, | |
| "learning_rate": 0.00015256410256410255, | |
| "loss": 0.4132, | |
| "step": 740 | |
| }, | |
| { | |
| "epoch": 1.92, | |
| "learning_rate": 0.00015192307692307692, | |
| "loss": 0.4849, | |
| "step": 750 | |
| }, | |
| { | |
| "epoch": 1.95, | |
| "learning_rate": 0.00015128205128205128, | |
| "loss": 0.5423, | |
| "step": 760 | |
| }, | |
| { | |
| "epoch": 1.95, | |
| "eval_accuracy": 0.7413019079685746, | |
| "eval_loss": 0.7185856699943542, | |
| "eval_runtime": 35.9539, | |
| "eval_samples_per_second": 99.238, | |
| "eval_steps_per_second": 12.405, | |
| "step": 760 | |
| }, | |
| { | |
| "epoch": 1.97, | |
| "learning_rate": 0.00015064102564102565, | |
| "loss": 0.3997, | |
| "step": 770 | |
| }, | |
| { | |
| "epoch": 2.0, | |
| "learning_rate": 0.00015000000000000001, | |
| "loss": 0.4744, | |
| "step": 780 | |
| }, | |
| { | |
| "epoch": 2.03, | |
| "learning_rate": 0.00014935897435897438, | |
| "loss": 0.4717, | |
| "step": 790 | |
| }, | |
| { | |
| "epoch": 2.05, | |
| "learning_rate": 0.00014871794871794872, | |
| "loss": 0.3384, | |
| "step": 800 | |
| }, | |
| { | |
| "epoch": 2.05, | |
| "eval_accuracy": 0.7351290684624018, | |
| "eval_loss": 0.7269920706748962, | |
| "eval_runtime": 33.5897, | |
| "eval_samples_per_second": 106.223, | |
| "eval_steps_per_second": 13.278, | |
| "step": 800 | |
| }, | |
| { | |
| "epoch": 2.08, | |
| "learning_rate": 0.00014807692307692308, | |
| "loss": 0.2602, | |
| "step": 810 | |
| }, | |
| { | |
| "epoch": 2.1, | |
| "learning_rate": 0.00014743589743589745, | |
| "loss": 0.2345, | |
| "step": 820 | |
| }, | |
| { | |
| "epoch": 2.13, | |
| "learning_rate": 0.00014679487179487178, | |
| "loss": 0.2135, | |
| "step": 830 | |
| }, | |
| { | |
| "epoch": 2.15, | |
| "learning_rate": 0.00014615384615384615, | |
| "loss": 0.2797, | |
| "step": 840 | |
| }, | |
| { | |
| "epoch": 2.15, | |
| "eval_accuracy": 0.7497194163860831, | |
| "eval_loss": 0.7742622494697571, | |
| "eval_runtime": 31.2996, | |
| "eval_samples_per_second": 113.995, | |
| "eval_steps_per_second": 14.249, | |
| "step": 840 | |
| }, | |
| { | |
| "epoch": 2.18, | |
| "learning_rate": 0.00014551282051282051, | |
| "loss": 0.1862, | |
| "step": 850 | |
| }, | |
| { | |
| "epoch": 2.21, | |
| "learning_rate": 0.00014487179487179488, | |
| "loss": 0.2334, | |
| "step": 860 | |
| }, | |
| { | |
| "epoch": 2.23, | |
| "learning_rate": 0.00014423076923076924, | |
| "loss": 0.3361, | |
| "step": 870 | |
| }, | |
| { | |
| "epoch": 2.26, | |
| "learning_rate": 0.0001435897435897436, | |
| "loss": 0.2939, | |
| "step": 880 | |
| }, | |
| { | |
| "epoch": 2.26, | |
| "eval_accuracy": 0.7732884399551067, | |
| "eval_loss": 0.6985616087913513, | |
| "eval_runtime": 32.9047, | |
| "eval_samples_per_second": 108.435, | |
| "eval_steps_per_second": 13.554, | |
| "step": 880 | |
| }, | |
| { | |
| "epoch": 2.28, | |
| "learning_rate": 0.00014294871794871795, | |
| "loss": 0.2846, | |
| "step": 890 | |
| }, | |
| { | |
| "epoch": 2.31, | |
| "learning_rate": 0.0001423076923076923, | |
| "loss": 0.2555, | |
| "step": 900 | |
| }, | |
| { | |
| "epoch": 2.33, | |
| "learning_rate": 0.00014166666666666668, | |
| "loss": 0.2629, | |
| "step": 910 | |
| }, | |
| { | |
| "epoch": 2.36, | |
| "learning_rate": 0.00014102564102564104, | |
| "loss": 0.2288, | |
| "step": 920 | |
| }, | |
| { | |
| "epoch": 2.36, | |
| "eval_accuracy": 0.7480359147025814, | |
| "eval_loss": 0.761025607585907, | |
| "eval_runtime": 34.1426, | |
| "eval_samples_per_second": 104.503, | |
| "eval_steps_per_second": 13.063, | |
| "step": 920 | |
| }, | |
| { | |
| "epoch": 2.38, | |
| "learning_rate": 0.00014038461538461538, | |
| "loss": 0.2115, | |
| "step": 930 | |
| }, | |
| { | |
| "epoch": 2.41, | |
| "learning_rate": 0.00013974358974358974, | |
| "loss": 0.2036, | |
| "step": 940 | |
| }, | |
| { | |
| "epoch": 2.44, | |
| "learning_rate": 0.0001391025641025641, | |
| "loss": 0.2913, | |
| "step": 950 | |
| }, | |
| { | |
| "epoch": 2.46, | |
| "learning_rate": 0.00013846153846153847, | |
| "loss": 0.2204, | |
| "step": 960 | |
| }, | |
| { | |
| "epoch": 2.46, | |
| "eval_accuracy": 0.7570145903479237, | |
| "eval_loss": 0.7840890884399414, | |
| "eval_runtime": 35.7983, | |
| "eval_samples_per_second": 99.67, | |
| "eval_steps_per_second": 12.459, | |
| "step": 960 | |
| }, | |
| { | |
| "epoch": 2.49, | |
| "learning_rate": 0.00013782051282051284, | |
| "loss": 0.2759, | |
| "step": 970 | |
| }, | |
| { | |
| "epoch": 2.51, | |
| "learning_rate": 0.00013717948717948718, | |
| "loss": 0.3945, | |
| "step": 980 | |
| }, | |
| { | |
| "epoch": 2.54, | |
| "learning_rate": 0.00013653846153846154, | |
| "loss": 0.3639, | |
| "step": 990 | |
| }, | |
| { | |
| "epoch": 2.56, | |
| "learning_rate": 0.0001358974358974359, | |
| "loss": 0.4397, | |
| "step": 1000 | |
| }, | |
| { | |
| "epoch": 2.56, | |
| "eval_accuracy": 0.7789001122334456, | |
| "eval_loss": 0.6565700173377991, | |
| "eval_runtime": 31.6553, | |
| "eval_samples_per_second": 112.714, | |
| "eval_steps_per_second": 14.089, | |
| "step": 1000 | |
| }, | |
| { | |
| "epoch": 2.59, | |
| "learning_rate": 0.00013525641025641027, | |
| "loss": 0.2707, | |
| "step": 1010 | |
| }, | |
| { | |
| "epoch": 2.62, | |
| "learning_rate": 0.00013461538461538464, | |
| "loss": 0.3451, | |
| "step": 1020 | |
| }, | |
| { | |
| "epoch": 2.64, | |
| "learning_rate": 0.00013397435897435897, | |
| "loss": 0.3277, | |
| "step": 1030 | |
| }, | |
| { | |
| "epoch": 2.67, | |
| "learning_rate": 0.00013333333333333334, | |
| "loss": 0.2219, | |
| "step": 1040 | |
| }, | |
| { | |
| "epoch": 2.67, | |
| "eval_accuracy": 0.7581369248035915, | |
| "eval_loss": 0.6900522112846375, | |
| "eval_runtime": 31.7845, | |
| "eval_samples_per_second": 112.256, | |
| "eval_steps_per_second": 14.032, | |
| "step": 1040 | |
| }, | |
| { | |
| "epoch": 2.69, | |
| "learning_rate": 0.0001326923076923077, | |
| "loss": 0.3174, | |
| "step": 1050 | |
| }, | |
| { | |
| "epoch": 2.72, | |
| "learning_rate": 0.00013205128205128204, | |
| "loss": 0.189, | |
| "step": 1060 | |
| }, | |
| { | |
| "epoch": 2.74, | |
| "learning_rate": 0.0001314102564102564, | |
| "loss": 0.2619, | |
| "step": 1070 | |
| }, | |
| { | |
| "epoch": 2.77, | |
| "learning_rate": 0.00013076923076923077, | |
| "loss": 0.2297, | |
| "step": 1080 | |
| }, | |
| { | |
| "epoch": 2.77, | |
| "eval_accuracy": 0.7581369248035915, | |
| "eval_loss": 0.7095093131065369, | |
| "eval_runtime": 38.4561, | |
| "eval_samples_per_second": 92.781, | |
| "eval_steps_per_second": 11.598, | |
| "step": 1080 | |
| }, | |
| { | |
| "epoch": 2.79, | |
| "learning_rate": 0.00013012820512820514, | |
| "loss": 0.2201, | |
| "step": 1090 | |
| }, | |
| { | |
| "epoch": 2.82, | |
| "learning_rate": 0.0001294871794871795, | |
| "loss": 0.2261, | |
| "step": 1100 | |
| }, | |
| { | |
| "epoch": 2.85, | |
| "learning_rate": 0.00012884615384615387, | |
| "loss": 0.2095, | |
| "step": 1110 | |
| }, | |
| { | |
| "epoch": 2.87, | |
| "learning_rate": 0.00012820512820512823, | |
| "loss": 0.209, | |
| "step": 1120 | |
| }, | |
| { | |
| "epoch": 2.87, | |
| "eval_accuracy": 0.7738496071829405, | |
| "eval_loss": 0.7127913236618042, | |
| "eval_runtime": 32.9997, | |
| "eval_samples_per_second": 108.122, | |
| "eval_steps_per_second": 13.515, | |
| "step": 1120 | |
| }, | |
| { | |
| "epoch": 2.9, | |
| "learning_rate": 0.00012756410256410257, | |
| "loss": 0.3365, | |
| "step": 1130 | |
| }, | |
| { | |
| "epoch": 2.92, | |
| "learning_rate": 0.00012692307692307693, | |
| "loss": 0.2639, | |
| "step": 1140 | |
| }, | |
| { | |
| "epoch": 2.95, | |
| "learning_rate": 0.00012628205128205127, | |
| "loss": 0.1892, | |
| "step": 1150 | |
| }, | |
| { | |
| "epoch": 2.97, | |
| "learning_rate": 0.00012564102564102564, | |
| "loss": 0.1853, | |
| "step": 1160 | |
| }, | |
| { | |
| "epoch": 2.97, | |
| "eval_accuracy": 0.7710437710437711, | |
| "eval_loss": 0.6986069083213806, | |
| "eval_runtime": 32.2598, | |
| "eval_samples_per_second": 110.602, | |
| "eval_steps_per_second": 13.825, | |
| "step": 1160 | |
| }, | |
| { | |
| "epoch": 3.0, | |
| "learning_rate": 0.000125, | |
| "loss": 0.1807, | |
| "step": 1170 | |
| }, | |
| { | |
| "epoch": 3.03, | |
| "learning_rate": 0.00012435897435897437, | |
| "loss": 0.1554, | |
| "step": 1180 | |
| }, | |
| { | |
| "epoch": 3.05, | |
| "learning_rate": 0.00012371794871794873, | |
| "loss": 0.1845, | |
| "step": 1190 | |
| }, | |
| { | |
| "epoch": 3.08, | |
| "learning_rate": 0.0001230769230769231, | |
| "loss": 0.1322, | |
| "step": 1200 | |
| }, | |
| { | |
| "epoch": 3.08, | |
| "eval_accuracy": 0.7962962962962963, | |
| "eval_loss": 0.6381103992462158, | |
| "eval_runtime": 35.1257, | |
| "eval_samples_per_second": 101.578, | |
| "eval_steps_per_second": 12.697, | |
| "step": 1200 | |
| }, | |
| { | |
| "epoch": 3.1, | |
| "learning_rate": 0.00012243589743589746, | |
| "loss": 0.0598, | |
| "step": 1210 | |
| }, | |
| { | |
| "epoch": 3.13, | |
| "learning_rate": 0.00012179487179487179, | |
| "loss": 0.0853, | |
| "step": 1220 | |
| }, | |
| { | |
| "epoch": 3.15, | |
| "learning_rate": 0.00012115384615384615, | |
| "loss": 0.1616, | |
| "step": 1230 | |
| }, | |
| { | |
| "epoch": 3.18, | |
| "learning_rate": 0.00012051282051282052, | |
| "loss": 0.2603, | |
| "step": 1240 | |
| }, | |
| { | |
| "epoch": 3.18, | |
| "eval_accuracy": 0.7682379349046016, | |
| "eval_loss": 0.7860389351844788, | |
| "eval_runtime": 35.648, | |
| "eval_samples_per_second": 100.09, | |
| "eval_steps_per_second": 12.511, | |
| "step": 1240 | |
| }, | |
| { | |
| "epoch": 3.21, | |
| "learning_rate": 0.00011987179487179487, | |
| "loss": 0.1104, | |
| "step": 1250 | |
| }, | |
| { | |
| "epoch": 3.23, | |
| "learning_rate": 0.00011923076923076923, | |
| "loss": 0.0634, | |
| "step": 1260 | |
| }, | |
| { | |
| "epoch": 3.26, | |
| "learning_rate": 0.0001185897435897436, | |
| "loss": 0.0717, | |
| "step": 1270 | |
| }, | |
| { | |
| "epoch": 3.28, | |
| "learning_rate": 0.00011794871794871796, | |
| "loss": 0.1031, | |
| "step": 1280 | |
| }, | |
| { | |
| "epoch": 3.28, | |
| "eval_accuracy": 0.7895622895622896, | |
| "eval_loss": 0.7322177886962891, | |
| "eval_runtime": 30.6139, | |
| "eval_samples_per_second": 116.549, | |
| "eval_steps_per_second": 14.569, | |
| "step": 1280 | |
| }, | |
| { | |
| "epoch": 3.31, | |
| "learning_rate": 0.00011730769230769231, | |
| "loss": 0.054, | |
| "step": 1290 | |
| }, | |
| { | |
| "epoch": 3.33, | |
| "learning_rate": 0.00011666666666666668, | |
| "loss": 0.0883, | |
| "step": 1300 | |
| }, | |
| { | |
| "epoch": 3.36, | |
| "learning_rate": 0.00011602564102564104, | |
| "loss": 0.1443, | |
| "step": 1310 | |
| }, | |
| { | |
| "epoch": 3.38, | |
| "learning_rate": 0.00011538461538461538, | |
| "loss": 0.0763, | |
| "step": 1320 | |
| }, | |
| { | |
| "epoch": 3.38, | |
| "eval_accuracy": 0.7839506172839507, | |
| "eval_loss": 0.77773118019104, | |
| "eval_runtime": 31.4828, | |
| "eval_samples_per_second": 113.332, | |
| "eval_steps_per_second": 14.166, | |
| "step": 1320 | |
| }, | |
| { | |
| "epoch": 3.41, | |
| "learning_rate": 0.00011474358974358975, | |
| "loss": 0.1328, | |
| "step": 1330 | |
| }, | |
| { | |
| "epoch": 3.44, | |
| "learning_rate": 0.0001141025641025641, | |
| "loss": 0.0751, | |
| "step": 1340 | |
| }, | |
| { | |
| "epoch": 3.46, | |
| "learning_rate": 0.00011346153846153846, | |
| "loss": 0.1193, | |
| "step": 1350 | |
| }, | |
| { | |
| "epoch": 3.49, | |
| "learning_rate": 0.00011282051282051283, | |
| "loss": 0.1437, | |
| "step": 1360 | |
| }, | |
| { | |
| "epoch": 3.49, | |
| "eval_accuracy": 0.7665544332210998, | |
| "eval_loss": 0.9293356537818909, | |
| "eval_runtime": 31.1681, | |
| "eval_samples_per_second": 114.476, | |
| "eval_steps_per_second": 14.31, | |
| "step": 1360 | |
| }, | |
| { | |
| "epoch": 3.51, | |
| "learning_rate": 0.00011217948717948718, | |
| "loss": 0.1805, | |
| "step": 1370 | |
| }, | |
| { | |
| "epoch": 3.54, | |
| "learning_rate": 0.00011153846153846154, | |
| "loss": 0.0931, | |
| "step": 1380 | |
| }, | |
| { | |
| "epoch": 3.56, | |
| "learning_rate": 0.00011089743589743591, | |
| "loss": 0.1227, | |
| "step": 1390 | |
| }, | |
| { | |
| "epoch": 3.59, | |
| "learning_rate": 0.00011025641025641027, | |
| "loss": 0.0818, | |
| "step": 1400 | |
| }, | |
| { | |
| "epoch": 3.59, | |
| "eval_accuracy": 0.7940516273849607, | |
| "eval_loss": 0.796922504901886, | |
| "eval_runtime": 31.9044, | |
| "eval_samples_per_second": 111.834, | |
| "eval_steps_per_second": 13.979, | |
| "step": 1400 | |
| }, | |
| { | |
| "epoch": 3.62, | |
| "learning_rate": 0.00010961538461538463, | |
| "loss": 0.1193, | |
| "step": 1410 | |
| }, | |
| { | |
| "epoch": 3.64, | |
| "learning_rate": 0.00010897435897435896, | |
| "loss": 0.1237, | |
| "step": 1420 | |
| }, | |
| { | |
| "epoch": 3.67, | |
| "learning_rate": 0.00010833333333333333, | |
| "loss": 0.1074, | |
| "step": 1430 | |
| }, | |
| { | |
| "epoch": 3.69, | |
| "learning_rate": 0.0001076923076923077, | |
| "loss": 0.1355, | |
| "step": 1440 | |
| }, | |
| { | |
| "epoch": 3.69, | |
| "eval_accuracy": 0.7716049382716049, | |
| "eval_loss": 0.8145824670791626, | |
| "eval_runtime": 37.6519, | |
| "eval_samples_per_second": 94.763, | |
| "eval_steps_per_second": 11.845, | |
| "step": 1440 | |
| }, | |
| { | |
| "epoch": 3.72, | |
| "learning_rate": 0.00010705128205128206, | |
| "loss": 0.0844, | |
| "step": 1450 | |
| }, | |
| { | |
| "epoch": 3.74, | |
| "learning_rate": 0.00010641025641025641, | |
| "loss": 0.1067, | |
| "step": 1460 | |
| }, | |
| { | |
| "epoch": 3.77, | |
| "learning_rate": 0.00010576923076923077, | |
| "loss": 0.0342, | |
| "step": 1470 | |
| }, | |
| { | |
| "epoch": 3.79, | |
| "learning_rate": 0.00010512820512820514, | |
| "loss": 0.0802, | |
| "step": 1480 | |
| }, | |
| { | |
| "epoch": 3.79, | |
| "eval_accuracy": 0.8075196408529742, | |
| "eval_loss": 0.6976819038391113, | |
| "eval_runtime": 32.2393, | |
| "eval_samples_per_second": 110.672, | |
| "eval_steps_per_second": 13.834, | |
| "step": 1480 | |
| }, | |
| { | |
| "epoch": 3.82, | |
| "learning_rate": 0.0001044871794871795, | |
| "loss": 0.1143, | |
| "step": 1490 | |
| }, | |
| { | |
| "epoch": 3.85, | |
| "learning_rate": 0.00010384615384615386, | |
| "loss": 0.1332, | |
| "step": 1500 | |
| }, | |
| { | |
| "epoch": 3.87, | |
| "learning_rate": 0.00010320512820512822, | |
| "loss": 0.069, | |
| "step": 1510 | |
| }, | |
| { | |
| "epoch": 3.9, | |
| "learning_rate": 0.00010256410256410256, | |
| "loss": 0.032, | |
| "step": 1520 | |
| }, | |
| { | |
| "epoch": 3.9, | |
| "eval_accuracy": 0.7929292929292929, | |
| "eval_loss": 0.8349816203117371, | |
| "eval_runtime": 32.9637, | |
| "eval_samples_per_second": 108.24, | |
| "eval_steps_per_second": 13.53, | |
| "step": 1520 | |
| }, | |
| { | |
| "epoch": 3.92, | |
| "learning_rate": 0.00010192307692307692, | |
| "loss": 0.0722, | |
| "step": 1530 | |
| }, | |
| { | |
| "epoch": 3.95, | |
| "learning_rate": 0.00010128205128205129, | |
| "loss": 0.1383, | |
| "step": 1540 | |
| }, | |
| { | |
| "epoch": 3.97, | |
| "learning_rate": 0.00010064102564102564, | |
| "loss": 0.0861, | |
| "step": 1550 | |
| }, | |
| { | |
| "epoch": 4.0, | |
| "learning_rate": 0.0001, | |
| "loss": 0.1836, | |
| "step": 1560 | |
| }, | |
| { | |
| "epoch": 4.0, | |
| "eval_accuracy": 0.7968574635241302, | |
| "eval_loss": 0.775885820388794, | |
| "eval_runtime": 33.0589, | |
| "eval_samples_per_second": 107.928, | |
| "eval_steps_per_second": 13.491, | |
| "step": 1560 | |
| }, | |
| { | |
| "epoch": 4.03, | |
| "learning_rate": 9.935897435897437e-05, | |
| "loss": 0.0486, | |
| "step": 1570 | |
| }, | |
| { | |
| "epoch": 4.05, | |
| "learning_rate": 9.871794871794872e-05, | |
| "loss": 0.0369, | |
| "step": 1580 | |
| }, | |
| { | |
| "epoch": 4.08, | |
| "learning_rate": 9.807692307692307e-05, | |
| "loss": 0.0209, | |
| "step": 1590 | |
| }, | |
| { | |
| "epoch": 4.1, | |
| "learning_rate": 9.743589743589744e-05, | |
| "loss": 0.0391, | |
| "step": 1600 | |
| }, | |
| { | |
| "epoch": 4.1, | |
| "eval_accuracy": 0.8013468013468014, | |
| "eval_loss": 0.8052350282669067, | |
| "eval_runtime": 32.0537, | |
| "eval_samples_per_second": 111.313, | |
| "eval_steps_per_second": 13.914, | |
| "step": 1600 | |
| }, | |
| { | |
| "epoch": 4.13, | |
| "learning_rate": 9.67948717948718e-05, | |
| "loss": 0.0354, | |
| "step": 1610 | |
| }, | |
| { | |
| "epoch": 4.15, | |
| "learning_rate": 9.615384615384617e-05, | |
| "loss": 0.0738, | |
| "step": 1620 | |
| }, | |
| { | |
| "epoch": 4.18, | |
| "learning_rate": 9.551282051282052e-05, | |
| "loss": 0.0275, | |
| "step": 1630 | |
| }, | |
| { | |
| "epoch": 4.21, | |
| "learning_rate": 9.487179487179487e-05, | |
| "loss": 0.0406, | |
| "step": 1640 | |
| }, | |
| { | |
| "epoch": 4.21, | |
| "eval_accuracy": 0.7828282828282829, | |
| "eval_loss": 0.9251748919487, | |
| "eval_runtime": 31.6082, | |
| "eval_samples_per_second": 112.882, | |
| "eval_steps_per_second": 14.11, | |
| "step": 1640 | |
| }, | |
| { | |
| "epoch": 4.23, | |
| "learning_rate": 9.423076923076924e-05, | |
| "loss": 0.1377, | |
| "step": 1650 | |
| }, | |
| { | |
| "epoch": 4.26, | |
| "learning_rate": 9.35897435897436e-05, | |
| "loss": 0.0526, | |
| "step": 1660 | |
| }, | |
| { | |
| "epoch": 4.28, | |
| "learning_rate": 9.294871794871795e-05, | |
| "loss": 0.152, | |
| "step": 1670 | |
| }, | |
| { | |
| "epoch": 4.31, | |
| "learning_rate": 9.230769230769232e-05, | |
| "loss": 0.0488, | |
| "step": 1680 | |
| }, | |
| { | |
| "epoch": 4.31, | |
| "eval_accuracy": 0.7901234567901234, | |
| "eval_loss": 0.8203706741333008, | |
| "eval_runtime": 31.8606, | |
| "eval_samples_per_second": 111.988, | |
| "eval_steps_per_second": 13.998, | |
| "step": 1680 | |
| }, | |
| { | |
| "epoch": 4.33, | |
| "learning_rate": 9.166666666666667e-05, | |
| "loss": 0.0372, | |
| "step": 1690 | |
| }, | |
| { | |
| "epoch": 4.36, | |
| "learning_rate": 9.102564102564103e-05, | |
| "loss": 0.0968, | |
| "step": 1700 | |
| }, | |
| { | |
| "epoch": 4.38, | |
| "learning_rate": 9.038461538461538e-05, | |
| "loss": 0.0147, | |
| "step": 1710 | |
| }, | |
| { | |
| "epoch": 4.41, | |
| "learning_rate": 8.974358974358975e-05, | |
| "loss": 0.016, | |
| "step": 1720 | |
| }, | |
| { | |
| "epoch": 4.41, | |
| "eval_accuracy": 0.7794612794612794, | |
| "eval_loss": 0.9169319868087769, | |
| "eval_runtime": 33.724, | |
| "eval_samples_per_second": 105.8, | |
| "eval_steps_per_second": 13.225, | |
| "step": 1720 | |
| }, | |
| { | |
| "epoch": 4.44, | |
| "learning_rate": 8.910256410256411e-05, | |
| "loss": 0.0393, | |
| "step": 1730 | |
| }, | |
| { | |
| "epoch": 4.46, | |
| "learning_rate": 8.846153846153847e-05, | |
| "loss": 0.0258, | |
| "step": 1740 | |
| }, | |
| { | |
| "epoch": 4.49, | |
| "learning_rate": 8.782051282051283e-05, | |
| "loss": 0.009, | |
| "step": 1750 | |
| }, | |
| { | |
| "epoch": 4.51, | |
| "learning_rate": 8.717948717948718e-05, | |
| "loss": 0.0994, | |
| "step": 1760 | |
| }, | |
| { | |
| "epoch": 4.51, | |
| "eval_accuracy": 0.8120089786756454, | |
| "eval_loss": 0.7930631637573242, | |
| "eval_runtime": 34.0477, | |
| "eval_samples_per_second": 104.794, | |
| "eval_steps_per_second": 13.099, | |
| "step": 1760 | |
| }, | |
| { | |
| "epoch": 4.54, | |
| "learning_rate": 8.653846153846155e-05, | |
| "loss": 0.0418, | |
| "step": 1770 | |
| }, | |
| { | |
| "epoch": 4.56, | |
| "learning_rate": 8.58974358974359e-05, | |
| "loss": 0.0793, | |
| "step": 1780 | |
| }, | |
| { | |
| "epoch": 4.59, | |
| "learning_rate": 8.525641025641026e-05, | |
| "loss": 0.0122, | |
| "step": 1790 | |
| }, | |
| { | |
| "epoch": 4.62, | |
| "learning_rate": 8.461538461538461e-05, | |
| "loss": 0.07, | |
| "step": 1800 | |
| }, | |
| { | |
| "epoch": 4.62, | |
| "eval_accuracy": 0.8103254769921436, | |
| "eval_loss": 0.8245030045509338, | |
| "eval_runtime": 32.6583, | |
| "eval_samples_per_second": 109.252, | |
| "eval_steps_per_second": 13.657, | |
| "step": 1800 | |
| }, | |
| { | |
| "epoch": 4.64, | |
| "learning_rate": 8.397435897435898e-05, | |
| "loss": 0.0656, | |
| "step": 1810 | |
| }, | |
| { | |
| "epoch": 4.67, | |
| "learning_rate": 8.333333333333334e-05, | |
| "loss": 0.0364, | |
| "step": 1820 | |
| }, | |
| { | |
| "epoch": 4.69, | |
| "learning_rate": 8.26923076923077e-05, | |
| "loss": 0.0639, | |
| "step": 1830 | |
| }, | |
| { | |
| "epoch": 4.72, | |
| "learning_rate": 8.205128205128205e-05, | |
| "loss": 0.0088, | |
| "step": 1840 | |
| }, | |
| { | |
| "epoch": 4.72, | |
| "eval_accuracy": 0.7985409652076318, | |
| "eval_loss": 0.907173752784729, | |
| "eval_runtime": 40.0812, | |
| "eval_samples_per_second": 89.019, | |
| "eval_steps_per_second": 11.127, | |
| "step": 1840 | |
| }, | |
| { | |
| "epoch": 4.74, | |
| "learning_rate": 8.141025641025641e-05, | |
| "loss": 0.0557, | |
| "step": 1850 | |
| }, | |
| { | |
| "epoch": 4.77, | |
| "learning_rate": 8.076923076923078e-05, | |
| "loss": 0.0548, | |
| "step": 1860 | |
| }, | |
| { | |
| "epoch": 4.79, | |
| "learning_rate": 8.012820512820514e-05, | |
| "loss": 0.0055, | |
| "step": 1870 | |
| }, | |
| { | |
| "epoch": 4.82, | |
| "learning_rate": 7.948717948717948e-05, | |
| "loss": 0.0085, | |
| "step": 1880 | |
| }, | |
| { | |
| "epoch": 4.82, | |
| "eval_accuracy": 0.8114478114478114, | |
| "eval_loss": 0.7862226963043213, | |
| "eval_runtime": 33.9761, | |
| "eval_samples_per_second": 105.015, | |
| "eval_steps_per_second": 13.127, | |
| "step": 1880 | |
| }, | |
| { | |
| "epoch": 4.85, | |
| "learning_rate": 7.884615384615384e-05, | |
| "loss": 0.0423, | |
| "step": 1890 | |
| }, | |
| { | |
| "epoch": 4.87, | |
| "learning_rate": 7.820512820512821e-05, | |
| "loss": 0.0581, | |
| "step": 1900 | |
| }, | |
| { | |
| "epoch": 4.9, | |
| "learning_rate": 7.756410256410257e-05, | |
| "loss": 0.0043, | |
| "step": 1910 | |
| }, | |
| { | |
| "epoch": 4.92, | |
| "learning_rate": 7.692307692307693e-05, | |
| "loss": 0.083, | |
| "step": 1920 | |
| }, | |
| { | |
| "epoch": 4.92, | |
| "eval_accuracy": 0.8198653198653199, | |
| "eval_loss": 0.7796981334686279, | |
| "eval_runtime": 32.3864, | |
| "eval_samples_per_second": 110.17, | |
| "eval_steps_per_second": 13.771, | |
| "step": 1920 | |
| }, | |
| { | |
| "epoch": 4.95, | |
| "learning_rate": 7.628205128205128e-05, | |
| "loss": 0.1308, | |
| "step": 1930 | |
| }, | |
| { | |
| "epoch": 4.97, | |
| "learning_rate": 7.564102564102564e-05, | |
| "loss": 0.0063, | |
| "step": 1940 | |
| }, | |
| { | |
| "epoch": 5.0, | |
| "learning_rate": 7.500000000000001e-05, | |
| "loss": 0.0132, | |
| "step": 1950 | |
| }, | |
| { | |
| "epoch": 5.03, | |
| "learning_rate": 7.435897435897436e-05, | |
| "loss": 0.0055, | |
| "step": 1960 | |
| }, | |
| { | |
| "epoch": 5.03, | |
| "eval_accuracy": 0.8069584736251403, | |
| "eval_loss": 0.8723996877670288, | |
| "eval_runtime": 33.6185, | |
| "eval_samples_per_second": 106.132, | |
| "eval_steps_per_second": 13.267, | |
| "step": 1960 | |
| }, | |
| { | |
| "epoch": 5.05, | |
| "learning_rate": 7.371794871794872e-05, | |
| "loss": 0.0366, | |
| "step": 1970 | |
| }, | |
| { | |
| "epoch": 5.08, | |
| "learning_rate": 7.307692307692307e-05, | |
| "loss": 0.06, | |
| "step": 1980 | |
| }, | |
| { | |
| "epoch": 5.1, | |
| "learning_rate": 7.243589743589744e-05, | |
| "loss": 0.0065, | |
| "step": 1990 | |
| }, | |
| { | |
| "epoch": 5.13, | |
| "learning_rate": 7.17948717948718e-05, | |
| "loss": 0.0223, | |
| "step": 2000 | |
| }, | |
| { | |
| "epoch": 5.13, | |
| "eval_accuracy": 0.8249158249158249, | |
| "eval_loss": 0.7518730163574219, | |
| "eval_runtime": 43.1922, | |
| "eval_samples_per_second": 82.607, | |
| "eval_steps_per_second": 10.326, | |
| "step": 2000 | |
| }, | |
| { | |
| "epoch": 5.15, | |
| "learning_rate": 7.115384615384616e-05, | |
| "loss": 0.0215, | |
| "step": 2010 | |
| }, | |
| { | |
| "epoch": 5.18, | |
| "learning_rate": 7.051282051282052e-05, | |
| "loss": 0.0494, | |
| "step": 2020 | |
| }, | |
| { | |
| "epoch": 5.21, | |
| "learning_rate": 6.987179487179487e-05, | |
| "loss": 0.0441, | |
| "step": 2030 | |
| }, | |
| { | |
| "epoch": 5.23, | |
| "learning_rate": 6.923076923076924e-05, | |
| "loss": 0.0042, | |
| "step": 2040 | |
| }, | |
| { | |
| "epoch": 5.23, | |
| "eval_accuracy": 0.8232323232323232, | |
| "eval_loss": 0.7583897113800049, | |
| "eval_runtime": 31.893, | |
| "eval_samples_per_second": 111.874, | |
| "eval_steps_per_second": 13.984, | |
| "step": 2040 | |
| }, | |
| { | |
| "epoch": 5.26, | |
| "learning_rate": 6.858974358974359e-05, | |
| "loss": 0.0131, | |
| "step": 2050 | |
| }, | |
| { | |
| "epoch": 5.28, | |
| "learning_rate": 6.794871794871795e-05, | |
| "loss": 0.0037, | |
| "step": 2060 | |
| }, | |
| { | |
| "epoch": 5.31, | |
| "learning_rate": 6.730769230769232e-05, | |
| "loss": 0.02, | |
| "step": 2070 | |
| }, | |
| { | |
| "epoch": 5.33, | |
| "learning_rate": 6.666666666666667e-05, | |
| "loss": 0.0178, | |
| "step": 2080 | |
| }, | |
| { | |
| "epoch": 5.33, | |
| "eval_accuracy": 0.8080808080808081, | |
| "eval_loss": 0.8523600697517395, | |
| "eval_runtime": 32.5102, | |
| "eval_samples_per_second": 109.75, | |
| "eval_steps_per_second": 13.719, | |
| "step": 2080 | |
| }, | |
| { | |
| "epoch": 5.36, | |
| "learning_rate": 6.602564102564102e-05, | |
| "loss": 0.0035, | |
| "step": 2090 | |
| }, | |
| { | |
| "epoch": 5.38, | |
| "learning_rate": 6.538461538461539e-05, | |
| "loss": 0.0137, | |
| "step": 2100 | |
| }, | |
| { | |
| "epoch": 5.41, | |
| "learning_rate": 6.474358974358975e-05, | |
| "loss": 0.0033, | |
| "step": 2110 | |
| }, | |
| { | |
| "epoch": 5.44, | |
| "learning_rate": 6.410256410256412e-05, | |
| "loss": 0.0172, | |
| "step": 2120 | |
| }, | |
| { | |
| "epoch": 5.44, | |
| "eval_accuracy": 0.8215488215488216, | |
| "eval_loss": 0.7729219794273376, | |
| "eval_runtime": 31.7305, | |
| "eval_samples_per_second": 112.447, | |
| "eval_steps_per_second": 14.056, | |
| "step": 2120 | |
| }, | |
| { | |
| "epoch": 5.46, | |
| "learning_rate": 6.346153846153847e-05, | |
| "loss": 0.0252, | |
| "step": 2130 | |
| }, | |
| { | |
| "epoch": 5.49, | |
| "learning_rate": 6.282051282051282e-05, | |
| "loss": 0.0034, | |
| "step": 2140 | |
| }, | |
| { | |
| "epoch": 5.51, | |
| "learning_rate": 6.217948717948718e-05, | |
| "loss": 0.0045, | |
| "step": 2150 | |
| }, | |
| { | |
| "epoch": 5.54, | |
| "learning_rate": 6.153846153846155e-05, | |
| "loss": 0.0044, | |
| "step": 2160 | |
| }, | |
| { | |
| "epoch": 5.54, | |
| "eval_accuracy": 0.808641975308642, | |
| "eval_loss": 0.8701183795928955, | |
| "eval_runtime": 31.0299, | |
| "eval_samples_per_second": 114.986, | |
| "eval_steps_per_second": 14.373, | |
| "step": 2160 | |
| }, | |
| { | |
| "epoch": 5.56, | |
| "learning_rate": 6.089743589743589e-05, | |
| "loss": 0.0373, | |
| "step": 2170 | |
| }, | |
| { | |
| "epoch": 5.59, | |
| "learning_rate": 6.025641025641026e-05, | |
| "loss": 0.0029, | |
| "step": 2180 | |
| }, | |
| { | |
| "epoch": 5.62, | |
| "learning_rate": 5.9615384615384616e-05, | |
| "loss": 0.0032, | |
| "step": 2190 | |
| }, | |
| { | |
| "epoch": 5.64, | |
| "learning_rate": 5.897435897435898e-05, | |
| "loss": 0.003, | |
| "step": 2200 | |
| }, | |
| { | |
| "epoch": 5.64, | |
| "eval_accuracy": 0.8237934904601572, | |
| "eval_loss": 0.8060529828071594, | |
| "eval_runtime": 32.4421, | |
| "eval_samples_per_second": 109.98, | |
| "eval_steps_per_second": 13.748, | |
| "step": 2200 | |
| }, | |
| { | |
| "epoch": 5.67, | |
| "learning_rate": 5.833333333333334e-05, | |
| "loss": 0.0034, | |
| "step": 2210 | |
| }, | |
| { | |
| "epoch": 5.69, | |
| "learning_rate": 5.769230769230769e-05, | |
| "loss": 0.0025, | |
| "step": 2220 | |
| }, | |
| { | |
| "epoch": 5.72, | |
| "learning_rate": 5.705128205128205e-05, | |
| "loss": 0.0029, | |
| "step": 2230 | |
| }, | |
| { | |
| "epoch": 5.74, | |
| "learning_rate": 5.6410256410256414e-05, | |
| "loss": 0.0033, | |
| "step": 2240 | |
| }, | |
| { | |
| "epoch": 5.74, | |
| "eval_accuracy": 0.8226711560044894, | |
| "eval_loss": 0.813855767250061, | |
| "eval_runtime": 32.378, | |
| "eval_samples_per_second": 110.198, | |
| "eval_steps_per_second": 13.775, | |
| "step": 2240 | |
| }, | |
| { | |
| "epoch": 5.77, | |
| "learning_rate": 5.576923076923077e-05, | |
| "loss": 0.0129, | |
| "step": 2250 | |
| }, | |
| { | |
| "epoch": 5.79, | |
| "learning_rate": 5.512820512820514e-05, | |
| "loss": 0.0079, | |
| "step": 2260 | |
| }, | |
| { | |
| "epoch": 5.82, | |
| "learning_rate": 5.448717948717948e-05, | |
| "loss": 0.0117, | |
| "step": 2270 | |
| }, | |
| { | |
| "epoch": 5.85, | |
| "learning_rate": 5.384615384615385e-05, | |
| "loss": 0.0023, | |
| "step": 2280 | |
| }, | |
| { | |
| "epoch": 5.85, | |
| "eval_accuracy": 0.8164983164983165, | |
| "eval_loss": 0.8478395342826843, | |
| "eval_runtime": 35.321, | |
| "eval_samples_per_second": 101.016, | |
| "eval_steps_per_second": 12.627, | |
| "step": 2280 | |
| }, | |
| { | |
| "epoch": 5.87, | |
| "learning_rate": 5.3205128205128205e-05, | |
| "loss": 0.0075, | |
| "step": 2290 | |
| }, | |
| { | |
| "epoch": 5.9, | |
| "learning_rate": 5.256410256410257e-05, | |
| "loss": 0.0023, | |
| "step": 2300 | |
| }, | |
| { | |
| "epoch": 5.92, | |
| "learning_rate": 5.192307692307693e-05, | |
| "loss": 0.0058, | |
| "step": 2310 | |
| }, | |
| { | |
| "epoch": 5.95, | |
| "learning_rate": 5.128205128205128e-05, | |
| "loss": 0.003, | |
| "step": 2320 | |
| }, | |
| { | |
| "epoch": 5.95, | |
| "eval_accuracy": 0.8120089786756454, | |
| "eval_loss": 0.8443180918693542, | |
| "eval_runtime": 32.2001, | |
| "eval_samples_per_second": 110.807, | |
| "eval_steps_per_second": 13.851, | |
| "step": 2320 | |
| }, | |
| { | |
| "epoch": 5.97, | |
| "learning_rate": 5.0641025641025644e-05, | |
| "loss": 0.0512, | |
| "step": 2330 | |
| }, | |
| { | |
| "epoch": 6.0, | |
| "learning_rate": 5e-05, | |
| "loss": 0.0024, | |
| "step": 2340 | |
| }, | |
| { | |
| "epoch": 6.03, | |
| "learning_rate": 4.935897435897436e-05, | |
| "loss": 0.0022, | |
| "step": 2350 | |
| }, | |
| { | |
| "epoch": 6.05, | |
| "learning_rate": 4.871794871794872e-05, | |
| "loss": 0.031, | |
| "step": 2360 | |
| }, | |
| { | |
| "epoch": 6.05, | |
| "eval_accuracy": 0.8047138047138047, | |
| "eval_loss": 0.9272196888923645, | |
| "eval_runtime": 33.2145, | |
| "eval_samples_per_second": 107.423, | |
| "eval_steps_per_second": 13.428, | |
| "step": 2360 | |
| }, | |
| { | |
| "epoch": 6.08, | |
| "learning_rate": 4.8076923076923084e-05, | |
| "loss": 0.0456, | |
| "step": 2370 | |
| }, | |
| { | |
| "epoch": 6.1, | |
| "learning_rate": 4.7435897435897435e-05, | |
| "loss": 0.0024, | |
| "step": 2380 | |
| }, | |
| { | |
| "epoch": 6.13, | |
| "learning_rate": 4.67948717948718e-05, | |
| "loss": 0.0023, | |
| "step": 2390 | |
| }, | |
| { | |
| "epoch": 6.15, | |
| "learning_rate": 4.615384615384616e-05, | |
| "loss": 0.0021, | |
| "step": 2400 | |
| }, | |
| { | |
| "epoch": 6.15, | |
| "eval_accuracy": 0.8204264870931538, | |
| "eval_loss": 0.8369048237800598, | |
| "eval_runtime": 36.1395, | |
| "eval_samples_per_second": 98.728, | |
| "eval_steps_per_second": 12.341, | |
| "step": 2400 | |
| }, | |
| { | |
| "epoch": 6.18, | |
| "learning_rate": 4.5512820512820516e-05, | |
| "loss": 0.0021, | |
| "step": 2410 | |
| }, | |
| { | |
| "epoch": 6.21, | |
| "learning_rate": 4.4871794871794874e-05, | |
| "loss": 0.0026, | |
| "step": 2420 | |
| }, | |
| { | |
| "epoch": 6.23, | |
| "learning_rate": 4.423076923076923e-05, | |
| "loss": 0.0021, | |
| "step": 2430 | |
| }, | |
| { | |
| "epoch": 6.26, | |
| "learning_rate": 4.358974358974359e-05, | |
| "loss": 0.0019, | |
| "step": 2440 | |
| }, | |
| { | |
| "epoch": 6.26, | |
| "eval_accuracy": 0.8209876543209876, | |
| "eval_loss": 0.828059196472168, | |
| "eval_runtime": 32.1149, | |
| "eval_samples_per_second": 111.101, | |
| "eval_steps_per_second": 13.888, | |
| "step": 2440 | |
| }, | |
| { | |
| "epoch": 6.28, | |
| "learning_rate": 4.294871794871795e-05, | |
| "loss": 0.0024, | |
| "step": 2450 | |
| }, | |
| { | |
| "epoch": 6.31, | |
| "learning_rate": 4.230769230769231e-05, | |
| "loss": 0.0019, | |
| "step": 2460 | |
| }, | |
| { | |
| "epoch": 6.33, | |
| "learning_rate": 4.166666666666667e-05, | |
| "loss": 0.002, | |
| "step": 2470 | |
| }, | |
| { | |
| "epoch": 6.36, | |
| "learning_rate": 4.1025641025641023e-05, | |
| "loss": 0.0019, | |
| "step": 2480 | |
| }, | |
| { | |
| "epoch": 6.36, | |
| "eval_accuracy": 0.8198653198653199, | |
| "eval_loss": 0.834019124507904, | |
| "eval_runtime": 35.8439, | |
| "eval_samples_per_second": 99.543, | |
| "eval_steps_per_second": 12.443, | |
| "step": 2480 | |
| }, | |
| { | |
| "epoch": 6.38, | |
| "learning_rate": 4.038461538461539e-05, | |
| "loss": 0.002, | |
| "step": 2490 | |
| }, | |
| { | |
| "epoch": 6.41, | |
| "learning_rate": 3.974358974358974e-05, | |
| "loss": 0.0019, | |
| "step": 2500 | |
| }, | |
| { | |
| "epoch": 6.44, | |
| "learning_rate": 3.9102564102564105e-05, | |
| "loss": 0.0018, | |
| "step": 2510 | |
| }, | |
| { | |
| "epoch": 6.46, | |
| "learning_rate": 3.846153846153846e-05, | |
| "loss": 0.0018, | |
| "step": 2520 | |
| }, | |
| { | |
| "epoch": 6.46, | |
| "eval_accuracy": 0.8204264870931538, | |
| "eval_loss": 0.8339148759841919, | |
| "eval_runtime": 34.0288, | |
| "eval_samples_per_second": 104.853, | |
| "eval_steps_per_second": 13.107, | |
| "step": 2520 | |
| }, | |
| { | |
| "epoch": 6.49, | |
| "learning_rate": 3.782051282051282e-05, | |
| "loss": 0.0018, | |
| "step": 2530 | |
| }, | |
| { | |
| "epoch": 6.51, | |
| "learning_rate": 3.717948717948718e-05, | |
| "loss": 0.0018, | |
| "step": 2540 | |
| }, | |
| { | |
| "epoch": 6.54, | |
| "learning_rate": 3.653846153846154e-05, | |
| "loss": 0.0018, | |
| "step": 2550 | |
| }, | |
| { | |
| "epoch": 6.56, | |
| "learning_rate": 3.58974358974359e-05, | |
| "loss": 0.002, | |
| "step": 2560 | |
| }, | |
| { | |
| "epoch": 6.56, | |
| "eval_accuracy": 0.8209876543209876, | |
| "eval_loss": 0.8293877840042114, | |
| "eval_runtime": 33.9846, | |
| "eval_samples_per_second": 104.989, | |
| "eval_steps_per_second": 13.124, | |
| "step": 2560 | |
| }, | |
| { | |
| "epoch": 6.59, | |
| "learning_rate": 3.525641025641026e-05, | |
| "loss": 0.0017, | |
| "step": 2570 | |
| }, | |
| { | |
| "epoch": 6.62, | |
| "learning_rate": 3.461538461538462e-05, | |
| "loss": 0.0017, | |
| "step": 2580 | |
| }, | |
| { | |
| "epoch": 6.64, | |
| "learning_rate": 3.397435897435898e-05, | |
| "loss": 0.0018, | |
| "step": 2590 | |
| }, | |
| { | |
| "epoch": 6.67, | |
| "learning_rate": 3.3333333333333335e-05, | |
| "loss": 0.0017, | |
| "step": 2600 | |
| }, | |
| { | |
| "epoch": 6.67, | |
| "eval_accuracy": 0.8209876543209876, | |
| "eval_loss": 0.8266403079032898, | |
| "eval_runtime": 36.0162, | |
| "eval_samples_per_second": 99.067, | |
| "eval_steps_per_second": 12.383, | |
| "step": 2600 | |
| }, | |
| { | |
| "epoch": 6.69, | |
| "learning_rate": 3.269230769230769e-05, | |
| "loss": 0.002, | |
| "step": 2610 | |
| }, | |
| { | |
| "epoch": 6.72, | |
| "learning_rate": 3.205128205128206e-05, | |
| "loss": 0.0017, | |
| "step": 2620 | |
| }, | |
| { | |
| "epoch": 6.74, | |
| "learning_rate": 3.141025641025641e-05, | |
| "loss": 0.0017, | |
| "step": 2630 | |
| }, | |
| { | |
| "epoch": 6.77, | |
| "learning_rate": 3.0769230769230774e-05, | |
| "loss": 0.0017, | |
| "step": 2640 | |
| }, | |
| { | |
| "epoch": 6.77, | |
| "eval_accuracy": 0.8226711560044894, | |
| "eval_loss": 0.8177886009216309, | |
| "eval_runtime": 36.3329, | |
| "eval_samples_per_second": 98.203, | |
| "eval_steps_per_second": 12.275, | |
| "step": 2640 | |
| }, | |
| { | |
| "epoch": 6.79, | |
| "learning_rate": 3.012820512820513e-05, | |
| "loss": 0.0016, | |
| "step": 2650 | |
| }, | |
| { | |
| "epoch": 6.82, | |
| "learning_rate": 2.948717948717949e-05, | |
| "loss": 0.0016, | |
| "step": 2660 | |
| }, | |
| { | |
| "epoch": 6.85, | |
| "learning_rate": 2.8846153846153845e-05, | |
| "loss": 0.0016, | |
| "step": 2670 | |
| }, | |
| { | |
| "epoch": 6.87, | |
| "learning_rate": 2.8205128205128207e-05, | |
| "loss": 0.0017, | |
| "step": 2680 | |
| }, | |
| { | |
| "epoch": 6.87, | |
| "eval_accuracy": 0.824354657687991, | |
| "eval_loss": 0.8174560070037842, | |
| "eval_runtime": 32.3063, | |
| "eval_samples_per_second": 110.443, | |
| "eval_steps_per_second": 13.805, | |
| "step": 2680 | |
| }, | |
| { | |
| "epoch": 6.9, | |
| "learning_rate": 2.756410256410257e-05, | |
| "loss": 0.0017, | |
| "step": 2690 | |
| }, | |
| { | |
| "epoch": 6.92, | |
| "learning_rate": 2.6923076923076923e-05, | |
| "loss": 0.0017, | |
| "step": 2700 | |
| }, | |
| { | |
| "epoch": 6.95, | |
| "learning_rate": 2.6282051282051285e-05, | |
| "loss": 0.0017, | |
| "step": 2710 | |
| }, | |
| { | |
| "epoch": 6.97, | |
| "learning_rate": 2.564102564102564e-05, | |
| "loss": 0.0017, | |
| "step": 2720 | |
| }, | |
| { | |
| "epoch": 6.97, | |
| "eval_accuracy": 0.8237934904601572, | |
| "eval_loss": 0.8166114687919617, | |
| "eval_runtime": 33.2936, | |
| "eval_samples_per_second": 107.168, | |
| "eval_steps_per_second": 13.396, | |
| "step": 2720 | |
| }, | |
| { | |
| "epoch": 7.0, | |
| "learning_rate": 2.5e-05, | |
| "loss": 0.0015, | |
| "step": 2730 | |
| }, | |
| { | |
| "epoch": 7.03, | |
| "learning_rate": 2.435897435897436e-05, | |
| "loss": 0.0016, | |
| "step": 2740 | |
| }, | |
| { | |
| "epoch": 7.05, | |
| "learning_rate": 2.3717948717948718e-05, | |
| "loss": 0.0016, | |
| "step": 2750 | |
| }, | |
| { | |
| "epoch": 7.08, | |
| "learning_rate": 2.307692307692308e-05, | |
| "loss": 0.0015, | |
| "step": 2760 | |
| }, | |
| { | |
| "epoch": 7.08, | |
| "eval_accuracy": 0.824354657687991, | |
| "eval_loss": 0.8176218271255493, | |
| "eval_runtime": 32.3011, | |
| "eval_samples_per_second": 110.46, | |
| "eval_steps_per_second": 13.808, | |
| "step": 2760 | |
| }, | |
| { | |
| "epoch": 7.1, | |
| "learning_rate": 2.2435897435897437e-05, | |
| "loss": 0.0015, | |
| "step": 2770 | |
| }, | |
| { | |
| "epoch": 7.13, | |
| "learning_rate": 2.1794871794871795e-05, | |
| "loss": 0.0015, | |
| "step": 2780 | |
| }, | |
| { | |
| "epoch": 7.15, | |
| "learning_rate": 2.1153846153846154e-05, | |
| "loss": 0.0015, | |
| "step": 2790 | |
| }, | |
| { | |
| "epoch": 7.18, | |
| "learning_rate": 2.0512820512820512e-05, | |
| "loss": 0.0015, | |
| "step": 2800 | |
| }, | |
| { | |
| "epoch": 7.18, | |
| "eval_accuracy": 0.824354657687991, | |
| "eval_loss": 0.8185549378395081, | |
| "eval_runtime": 32.7126, | |
| "eval_samples_per_second": 109.071, | |
| "eval_steps_per_second": 13.634, | |
| "step": 2800 | |
| }, | |
| { | |
| "epoch": 7.21, | |
| "learning_rate": 1.987179487179487e-05, | |
| "loss": 0.0015, | |
| "step": 2810 | |
| }, | |
| { | |
| "epoch": 7.23, | |
| "learning_rate": 1.923076923076923e-05, | |
| "loss": 0.0015, | |
| "step": 2820 | |
| }, | |
| { | |
| "epoch": 7.26, | |
| "learning_rate": 1.858974358974359e-05, | |
| "loss": 0.0015, | |
| "step": 2830 | |
| }, | |
| { | |
| "epoch": 7.28, | |
| "learning_rate": 1.794871794871795e-05, | |
| "loss": 0.0015, | |
| "step": 2840 | |
| }, | |
| { | |
| "epoch": 7.28, | |
| "eval_accuracy": 0.8249158249158249, | |
| "eval_loss": 0.819684624671936, | |
| "eval_runtime": 35.8925, | |
| "eval_samples_per_second": 99.408, | |
| "eval_steps_per_second": 12.426, | |
| "step": 2840 | |
| }, | |
| { | |
| "epoch": 7.31, | |
| "learning_rate": 1.730769230769231e-05, | |
| "loss": 0.0016, | |
| "step": 2850 | |
| }, | |
| { | |
| "epoch": 7.33, | |
| "learning_rate": 1.6666666666666667e-05, | |
| "loss": 0.0015, | |
| "step": 2860 | |
| }, | |
| { | |
| "epoch": 7.36, | |
| "learning_rate": 1.602564102564103e-05, | |
| "loss": 0.0015, | |
| "step": 2870 | |
| }, | |
| { | |
| "epoch": 7.38, | |
| "learning_rate": 1.5384615384615387e-05, | |
| "loss": 0.0015, | |
| "step": 2880 | |
| }, | |
| { | |
| "epoch": 7.38, | |
| "eval_accuracy": 0.8249158249158249, | |
| "eval_loss": 0.8204275965690613, | |
| "eval_runtime": 31.8471, | |
| "eval_samples_per_second": 112.035, | |
| "eval_steps_per_second": 14.004, | |
| "step": 2880 | |
| }, | |
| { | |
| "epoch": 7.41, | |
| "learning_rate": 1.4743589743589745e-05, | |
| "loss": 0.0015, | |
| "step": 2890 | |
| }, | |
| { | |
| "epoch": 7.44, | |
| "learning_rate": 1.4102564102564104e-05, | |
| "loss": 0.0015, | |
| "step": 2900 | |
| }, | |
| { | |
| "epoch": 7.46, | |
| "learning_rate": 1.3461538461538462e-05, | |
| "loss": 0.0015, | |
| "step": 2910 | |
| }, | |
| { | |
| "epoch": 7.49, | |
| "learning_rate": 1.282051282051282e-05, | |
| "loss": 0.0015, | |
| "step": 2920 | |
| }, | |
| { | |
| "epoch": 7.49, | |
| "eval_accuracy": 0.8249158249158249, | |
| "eval_loss": 0.8213893175125122, | |
| "eval_runtime": 30.8496, | |
| "eval_samples_per_second": 115.658, | |
| "eval_steps_per_second": 14.457, | |
| "step": 2920 | |
| }, | |
| { | |
| "epoch": 7.51, | |
| "learning_rate": 1.217948717948718e-05, | |
| "loss": 0.0015, | |
| "step": 2930 | |
| }, | |
| { | |
| "epoch": 7.54, | |
| "learning_rate": 1.153846153846154e-05, | |
| "loss": 0.0015, | |
| "step": 2940 | |
| }, | |
| { | |
| "epoch": 7.56, | |
| "learning_rate": 1.0897435897435898e-05, | |
| "loss": 0.0015, | |
| "step": 2950 | |
| }, | |
| { | |
| "epoch": 7.59, | |
| "learning_rate": 1.0256410256410256e-05, | |
| "loss": 0.0015, | |
| "step": 2960 | |
| }, | |
| { | |
| "epoch": 7.59, | |
| "eval_accuracy": 0.8249158249158249, | |
| "eval_loss": 0.8220140933990479, | |
| "eval_runtime": 31.6074, | |
| "eval_samples_per_second": 112.885, | |
| "eval_steps_per_second": 14.111, | |
| "step": 2960 | |
| }, | |
| { | |
| "epoch": 7.62, | |
| "learning_rate": 9.615384615384616e-06, | |
| "loss": 0.0016, | |
| "step": 2970 | |
| }, | |
| { | |
| "epoch": 7.64, | |
| "learning_rate": 8.974358974358976e-06, | |
| "loss": 0.0014, | |
| "step": 2980 | |
| }, | |
| { | |
| "epoch": 7.67, | |
| "learning_rate": 8.333333333333334e-06, | |
| "loss": 0.0015, | |
| "step": 2990 | |
| }, | |
| { | |
| "epoch": 7.69, | |
| "learning_rate": 7.692307692307694e-06, | |
| "loss": 0.0015, | |
| "step": 3000 | |
| }, | |
| { | |
| "epoch": 7.69, | |
| "eval_accuracy": 0.8254769921436588, | |
| "eval_loss": 0.8220102787017822, | |
| "eval_runtime": 30.2325, | |
| "eval_samples_per_second": 118.019, | |
| "eval_steps_per_second": 14.752, | |
| "step": 3000 | |
| }, | |
| { | |
| "epoch": 7.72, | |
| "learning_rate": 7.051282051282052e-06, | |
| "loss": 0.0014, | |
| "step": 3010 | |
| }, | |
| { | |
| "epoch": 7.74, | |
| "learning_rate": 6.41025641025641e-06, | |
| "loss": 0.0014, | |
| "step": 3020 | |
| }, | |
| { | |
| "epoch": 7.77, | |
| "learning_rate": 5.76923076923077e-06, | |
| "loss": 0.0015, | |
| "step": 3030 | |
| }, | |
| { | |
| "epoch": 7.79, | |
| "learning_rate": 5.128205128205128e-06, | |
| "loss": 0.0015, | |
| "step": 3040 | |
| }, | |
| { | |
| "epoch": 7.79, | |
| "eval_accuracy": 0.8254769921436588, | |
| "eval_loss": 0.8217305541038513, | |
| "eval_runtime": 38.9983, | |
| "eval_samples_per_second": 91.491, | |
| "eval_steps_per_second": 11.436, | |
| "step": 3040 | |
| }, | |
| { | |
| "epoch": 7.82, | |
| "learning_rate": 4.487179487179488e-06, | |
| "loss": 0.0014, | |
| "step": 3050 | |
| }, | |
| { | |
| "epoch": 7.85, | |
| "learning_rate": 3.846153846153847e-06, | |
| "loss": 0.0015, | |
| "step": 3060 | |
| }, | |
| { | |
| "epoch": 7.87, | |
| "learning_rate": 3.205128205128205e-06, | |
| "loss": 0.0014, | |
| "step": 3070 | |
| }, | |
| { | |
| "epoch": 7.9, | |
| "learning_rate": 2.564102564102564e-06, | |
| "loss": 0.0014, | |
| "step": 3080 | |
| }, | |
| { | |
| "epoch": 7.9, | |
| "eval_accuracy": 0.8254769921436588, | |
| "eval_loss": 0.8224795460700989, | |
| "eval_runtime": 31.9073, | |
| "eval_samples_per_second": 111.824, | |
| "eval_steps_per_second": 13.978, | |
| "step": 3080 | |
| }, | |
| { | |
| "epoch": 7.92, | |
| "learning_rate": 1.9230769230769234e-06, | |
| "loss": 0.0014, | |
| "step": 3090 | |
| }, | |
| { | |
| "epoch": 7.95, | |
| "learning_rate": 1.282051282051282e-06, | |
| "loss": 0.0015, | |
| "step": 3100 | |
| }, | |
| { | |
| "epoch": 7.97, | |
| "learning_rate": 6.41025641025641e-07, | |
| "loss": 0.0014, | |
| "step": 3110 | |
| }, | |
| { | |
| "epoch": 8.0, | |
| "learning_rate": 0.0, | |
| "loss": 0.0014, | |
| "step": 3120 | |
| }, | |
| { | |
| "epoch": 8.0, | |
| "eval_accuracy": 0.8260381593714927, | |
| "eval_loss": 0.8224756717681885, | |
| "eval_runtime": 30.7985, | |
| "eval_samples_per_second": 115.85, | |
| "eval_steps_per_second": 14.481, | |
| "step": 3120 | |
| }, | |
| { | |
| "epoch": 8.0, | |
| "step": 3120, | |
| "total_flos": 3.8629591832685773e+18, | |
| "train_loss": 0.2533179052472592, | |
| "train_runtime": 4957.8804, | |
| "train_samples_per_second": 10.069, | |
| "train_steps_per_second": 0.629 | |
| } | |
| ], | |
| "max_steps": 3120, | |
| "num_train_epochs": 8, | |
| "total_flos": 3.8629591832685773e+18, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |