| { | |
| "best_metric": 0.74, | |
| "best_model_checkpoint": "swinv2-base-patch4-window16-256-finetuned-eurosat/checkpoint-1446", | |
| "epoch": 29.657794676806084, | |
| "eval_steps": 500, | |
| "global_step": 1950, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.15, | |
| "grad_norm": 10.112361907958984, | |
| "learning_rate": 2.564102564102564e-06, | |
| "loss": 6.2407, | |
| "step": 10 | |
| }, | |
| { | |
| "epoch": 0.3, | |
| "grad_norm": 8.693501472473145, | |
| "learning_rate": 5.128205128205128e-06, | |
| "loss": 6.2346, | |
| "step": 20 | |
| }, | |
| { | |
| "epoch": 0.46, | |
| "grad_norm": 31.615943908691406, | |
| "learning_rate": 7.692307692307694e-06, | |
| "loss": 6.2382, | |
| "step": 30 | |
| }, | |
| { | |
| "epoch": 0.61, | |
| "grad_norm": 4.278875827789307, | |
| "learning_rate": 1.0256410256410256e-05, | |
| "loss": 6.2402, | |
| "step": 40 | |
| }, | |
| { | |
| "epoch": 0.76, | |
| "grad_norm": 5.3635783195495605, | |
| "learning_rate": 1.282051282051282e-05, | |
| "loss": 6.2228, | |
| "step": 50 | |
| }, | |
| { | |
| "epoch": 0.91, | |
| "grad_norm": 3.80220890045166, | |
| "learning_rate": 1.5384615384615387e-05, | |
| "loss": 6.2125, | |
| "step": 60 | |
| }, | |
| { | |
| "epoch": 0.99, | |
| "eval_accuracy": 0.0, | |
| "eval_loss": 6.20700216293335, | |
| "eval_runtime": 9.9729, | |
| "eval_samples_per_second": 35.095, | |
| "eval_steps_per_second": 3.008, | |
| "step": 65 | |
| }, | |
| { | |
| "epoch": 1.06, | |
| "grad_norm": 4.892258167266846, | |
| "learning_rate": 1.794871794871795e-05, | |
| "loss": 6.197, | |
| "step": 70 | |
| }, | |
| { | |
| "epoch": 1.22, | |
| "grad_norm": 5.695966720581055, | |
| "learning_rate": 2.0512820512820512e-05, | |
| "loss": 6.161, | |
| "step": 80 | |
| }, | |
| { | |
| "epoch": 1.37, | |
| "grad_norm": 5.43502950668335, | |
| "learning_rate": 2.307692307692308e-05, | |
| "loss": 6.1137, | |
| "step": 90 | |
| }, | |
| { | |
| "epoch": 1.52, | |
| "grad_norm": 10.649894714355469, | |
| "learning_rate": 2.564102564102564e-05, | |
| "loss": 6.1187, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 1.67, | |
| "grad_norm": 6.482412815093994, | |
| "learning_rate": 2.8205128205128207e-05, | |
| "loss": 6.0495, | |
| "step": 110 | |
| }, | |
| { | |
| "epoch": 1.83, | |
| "grad_norm": 20.60057830810547, | |
| "learning_rate": 3.0769230769230774e-05, | |
| "loss": 6.0249, | |
| "step": 120 | |
| }, | |
| { | |
| "epoch": 1.98, | |
| "grad_norm": 10.727930068969727, | |
| "learning_rate": 3.3333333333333335e-05, | |
| "loss": 5.9584, | |
| "step": 130 | |
| }, | |
| { | |
| "epoch": 1.99, | |
| "eval_accuracy": 0.054285714285714284, | |
| "eval_loss": 5.894497871398926, | |
| "eval_runtime": 8.019, | |
| "eval_samples_per_second": 43.646, | |
| "eval_steps_per_second": 3.741, | |
| "step": 131 | |
| }, | |
| { | |
| "epoch": 2.13, | |
| "grad_norm": 9.321084976196289, | |
| "learning_rate": 3.58974358974359e-05, | |
| "loss": 5.6353, | |
| "step": 140 | |
| }, | |
| { | |
| "epoch": 2.28, | |
| "grad_norm": 15.699618339538574, | |
| "learning_rate": 3.846153846153846e-05, | |
| "loss": 5.4549, | |
| "step": 150 | |
| }, | |
| { | |
| "epoch": 2.43, | |
| "grad_norm": 13.360058784484863, | |
| "learning_rate": 4.1025641025641023e-05, | |
| "loss": 5.1768, | |
| "step": 160 | |
| }, | |
| { | |
| "epoch": 2.59, | |
| "grad_norm": 14.356024742126465, | |
| "learning_rate": 4.358974358974359e-05, | |
| "loss": 5.08, | |
| "step": 170 | |
| }, | |
| { | |
| "epoch": 2.74, | |
| "grad_norm": 12.42605209350586, | |
| "learning_rate": 4.615384615384616e-05, | |
| "loss": 4.8681, | |
| "step": 180 | |
| }, | |
| { | |
| "epoch": 2.89, | |
| "grad_norm": 13.120949745178223, | |
| "learning_rate": 4.871794871794872e-05, | |
| "loss": 4.7047, | |
| "step": 190 | |
| }, | |
| { | |
| "epoch": 3.0, | |
| "eval_accuracy": 0.3028571428571429, | |
| "eval_loss": 4.368250370025635, | |
| "eval_runtime": 8.0905, | |
| "eval_samples_per_second": 43.26, | |
| "eval_steps_per_second": 3.708, | |
| "step": 197 | |
| }, | |
| { | |
| "epoch": 3.04, | |
| "grad_norm": 14.149765014648438, | |
| "learning_rate": 4.985754985754986e-05, | |
| "loss": 4.2241, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 3.19, | |
| "grad_norm": 12.356833457946777, | |
| "learning_rate": 4.9572649572649575e-05, | |
| "loss": 3.6145, | |
| "step": 210 | |
| }, | |
| { | |
| "epoch": 3.35, | |
| "grad_norm": 13.141170501708984, | |
| "learning_rate": 4.928774928774929e-05, | |
| "loss": 3.3265, | |
| "step": 220 | |
| }, | |
| { | |
| "epoch": 3.5, | |
| "grad_norm": 11.219949722290039, | |
| "learning_rate": 4.9002849002849004e-05, | |
| "loss": 3.1353, | |
| "step": 230 | |
| }, | |
| { | |
| "epoch": 3.65, | |
| "grad_norm": 12.410125732421875, | |
| "learning_rate": 4.871794871794872e-05, | |
| "loss": 2.9184, | |
| "step": 240 | |
| }, | |
| { | |
| "epoch": 3.8, | |
| "grad_norm": 12.346807479858398, | |
| "learning_rate": 4.8433048433048433e-05, | |
| "loss": 2.6721, | |
| "step": 250 | |
| }, | |
| { | |
| "epoch": 3.95, | |
| "grad_norm": 11.234230995178223, | |
| "learning_rate": 4.814814814814815e-05, | |
| "loss": 2.7217, | |
| "step": 260 | |
| }, | |
| { | |
| "epoch": 4.0, | |
| "eval_accuracy": 0.5457142857142857, | |
| "eval_loss": 2.71696400642395, | |
| "eval_runtime": 8.017, | |
| "eval_samples_per_second": 43.657, | |
| "eval_steps_per_second": 3.742, | |
| "step": 263 | |
| }, | |
| { | |
| "epoch": 4.11, | |
| "grad_norm": 9.922853469848633, | |
| "learning_rate": 4.786324786324787e-05, | |
| "loss": 1.9846, | |
| "step": 270 | |
| }, | |
| { | |
| "epoch": 4.26, | |
| "grad_norm": 10.338376998901367, | |
| "learning_rate": 4.7578347578347584e-05, | |
| "loss": 1.7283, | |
| "step": 280 | |
| }, | |
| { | |
| "epoch": 4.41, | |
| "grad_norm": 10.503661155700684, | |
| "learning_rate": 4.72934472934473e-05, | |
| "loss": 1.7337, | |
| "step": 290 | |
| }, | |
| { | |
| "epoch": 4.56, | |
| "grad_norm": 9.319334983825684, | |
| "learning_rate": 4.700854700854701e-05, | |
| "loss": 1.6741, | |
| "step": 300 | |
| }, | |
| { | |
| "epoch": 4.71, | |
| "grad_norm": 9.088035583496094, | |
| "learning_rate": 4.672364672364672e-05, | |
| "loss": 1.5406, | |
| "step": 310 | |
| }, | |
| { | |
| "epoch": 4.87, | |
| "grad_norm": 8.811790466308594, | |
| "learning_rate": 4.643874643874644e-05, | |
| "loss": 1.6097, | |
| "step": 320 | |
| }, | |
| { | |
| "epoch": 4.99, | |
| "eval_accuracy": 0.6314285714285715, | |
| "eval_loss": 2.015495538711548, | |
| "eval_runtime": 7.9582, | |
| "eval_samples_per_second": 43.98, | |
| "eval_steps_per_second": 3.77, | |
| "step": 328 | |
| }, | |
| { | |
| "epoch": 5.02, | |
| "grad_norm": 9.573624610900879, | |
| "learning_rate": 4.615384615384616e-05, | |
| "loss": 1.4472, | |
| "step": 330 | |
| }, | |
| { | |
| "epoch": 5.17, | |
| "grad_norm": 9.028331756591797, | |
| "learning_rate": 4.586894586894587e-05, | |
| "loss": 0.9959, | |
| "step": 340 | |
| }, | |
| { | |
| "epoch": 5.32, | |
| "grad_norm": 8.725614547729492, | |
| "learning_rate": 4.558404558404559e-05, | |
| "loss": 0.945, | |
| "step": 350 | |
| }, | |
| { | |
| "epoch": 5.48, | |
| "grad_norm": 10.008910179138184, | |
| "learning_rate": 4.52991452991453e-05, | |
| "loss": 0.9564, | |
| "step": 360 | |
| }, | |
| { | |
| "epoch": 5.63, | |
| "grad_norm": 9.664880752563477, | |
| "learning_rate": 4.501424501424502e-05, | |
| "loss": 0.9423, | |
| "step": 370 | |
| }, | |
| { | |
| "epoch": 5.78, | |
| "grad_norm": 7.615637302398682, | |
| "learning_rate": 4.472934472934473e-05, | |
| "loss": 0.9333, | |
| "step": 380 | |
| }, | |
| { | |
| "epoch": 5.93, | |
| "grad_norm": 9.067399978637695, | |
| "learning_rate": 4.4444444444444447e-05, | |
| "loss": 0.8932, | |
| "step": 390 | |
| }, | |
| { | |
| "epoch": 5.99, | |
| "eval_accuracy": 0.6742857142857143, | |
| "eval_loss": 1.70182204246521, | |
| "eval_runtime": 8.0389, | |
| "eval_samples_per_second": 43.538, | |
| "eval_steps_per_second": 3.732, | |
| "step": 394 | |
| }, | |
| { | |
| "epoch": 6.08, | |
| "grad_norm": 5.935275554656982, | |
| "learning_rate": 4.415954415954416e-05, | |
| "loss": 0.7379, | |
| "step": 400 | |
| }, | |
| { | |
| "epoch": 6.24, | |
| "grad_norm": 7.257266521453857, | |
| "learning_rate": 4.3874643874643876e-05, | |
| "loss": 0.5602, | |
| "step": 410 | |
| }, | |
| { | |
| "epoch": 6.39, | |
| "grad_norm": 9.825379371643066, | |
| "learning_rate": 4.358974358974359e-05, | |
| "loss": 0.5144, | |
| "step": 420 | |
| }, | |
| { | |
| "epoch": 6.54, | |
| "grad_norm": 6.920632362365723, | |
| "learning_rate": 4.3304843304843306e-05, | |
| "loss": 0.5718, | |
| "step": 430 | |
| }, | |
| { | |
| "epoch": 6.69, | |
| "grad_norm": 7.798554420471191, | |
| "learning_rate": 4.301994301994302e-05, | |
| "loss": 0.515, | |
| "step": 440 | |
| }, | |
| { | |
| "epoch": 6.84, | |
| "grad_norm": 6.575021266937256, | |
| "learning_rate": 4.2735042735042735e-05, | |
| "loss": 0.5472, | |
| "step": 450 | |
| }, | |
| { | |
| "epoch": 7.0, | |
| "grad_norm": 5.641183853149414, | |
| "learning_rate": 4.2450142450142457e-05, | |
| "loss": 0.5734, | |
| "step": 460 | |
| }, | |
| { | |
| "epoch": 7.0, | |
| "eval_accuracy": 0.7057142857142857, | |
| "eval_loss": 1.5249170064926147, | |
| "eval_runtime": 8.0507, | |
| "eval_samples_per_second": 43.474, | |
| "eval_steps_per_second": 3.726, | |
| "step": 460 | |
| }, | |
| { | |
| "epoch": 7.15, | |
| "grad_norm": 3.5870327949523926, | |
| "learning_rate": 4.216524216524217e-05, | |
| "loss": 0.3342, | |
| "step": 470 | |
| }, | |
| { | |
| "epoch": 7.3, | |
| "grad_norm": 4.1048479080200195, | |
| "learning_rate": 4.1880341880341886e-05, | |
| "loss": 0.3382, | |
| "step": 480 | |
| }, | |
| { | |
| "epoch": 7.45, | |
| "grad_norm": 6.017439842224121, | |
| "learning_rate": 4.15954415954416e-05, | |
| "loss": 0.3804, | |
| "step": 490 | |
| }, | |
| { | |
| "epoch": 7.6, | |
| "grad_norm": 5.106074333190918, | |
| "learning_rate": 4.131054131054131e-05, | |
| "loss": 0.3606, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 7.76, | |
| "grad_norm": 5.5891900062561035, | |
| "learning_rate": 4.1025641025641023e-05, | |
| "loss": 0.3295, | |
| "step": 510 | |
| }, | |
| { | |
| "epoch": 7.91, | |
| "grad_norm": 4.079031944274902, | |
| "learning_rate": 4.074074074074074e-05, | |
| "loss": 0.324, | |
| "step": 520 | |
| }, | |
| { | |
| "epoch": 8.0, | |
| "eval_accuracy": 0.7085714285714285, | |
| "eval_loss": 1.4846410751342773, | |
| "eval_runtime": 7.9798, | |
| "eval_samples_per_second": 43.861, | |
| "eval_steps_per_second": 3.76, | |
| "step": 526 | |
| }, | |
| { | |
| "epoch": 8.06, | |
| "grad_norm": 3.212510824203491, | |
| "learning_rate": 4.045584045584046e-05, | |
| "loss": 0.2964, | |
| "step": 530 | |
| }, | |
| { | |
| "epoch": 8.21, | |
| "grad_norm": 5.004084587097168, | |
| "learning_rate": 4.0170940170940174e-05, | |
| "loss": 0.2145, | |
| "step": 540 | |
| }, | |
| { | |
| "epoch": 8.37, | |
| "grad_norm": 4.74351167678833, | |
| "learning_rate": 3.988603988603989e-05, | |
| "loss": 0.2206, | |
| "step": 550 | |
| }, | |
| { | |
| "epoch": 8.52, | |
| "grad_norm": 5.272638320922852, | |
| "learning_rate": 3.9601139601139604e-05, | |
| "loss": 0.2131, | |
| "step": 560 | |
| }, | |
| { | |
| "epoch": 8.67, | |
| "grad_norm": 3.062843084335327, | |
| "learning_rate": 3.931623931623932e-05, | |
| "loss": 0.2447, | |
| "step": 570 | |
| }, | |
| { | |
| "epoch": 8.82, | |
| "grad_norm": 3.7355995178222656, | |
| "learning_rate": 3.903133903133903e-05, | |
| "loss": 0.213, | |
| "step": 580 | |
| }, | |
| { | |
| "epoch": 8.97, | |
| "grad_norm": 3.62921404838562, | |
| "learning_rate": 3.874643874643875e-05, | |
| "loss": 0.2195, | |
| "step": 590 | |
| }, | |
| { | |
| "epoch": 8.99, | |
| "eval_accuracy": 0.7114285714285714, | |
| "eval_loss": 1.4269201755523682, | |
| "eval_runtime": 8.004, | |
| "eval_samples_per_second": 43.728, | |
| "eval_steps_per_second": 3.748, | |
| "step": 591 | |
| }, | |
| { | |
| "epoch": 9.13, | |
| "grad_norm": 2.647521734237671, | |
| "learning_rate": 3.846153846153846e-05, | |
| "loss": 0.1677, | |
| "step": 600 | |
| }, | |
| { | |
| "epoch": 9.28, | |
| "grad_norm": 4.363504409790039, | |
| "learning_rate": 3.817663817663818e-05, | |
| "loss": 0.1513, | |
| "step": 610 | |
| }, | |
| { | |
| "epoch": 9.43, | |
| "grad_norm": 2.5766873359680176, | |
| "learning_rate": 3.789173789173789e-05, | |
| "loss": 0.1684, | |
| "step": 620 | |
| }, | |
| { | |
| "epoch": 9.58, | |
| "grad_norm": 3.8854830265045166, | |
| "learning_rate": 3.760683760683761e-05, | |
| "loss": 0.1552, | |
| "step": 630 | |
| }, | |
| { | |
| "epoch": 9.73, | |
| "grad_norm": 6.697465896606445, | |
| "learning_rate": 3.732193732193732e-05, | |
| "loss": 0.188, | |
| "step": 640 | |
| }, | |
| { | |
| "epoch": 9.89, | |
| "grad_norm": 3.860522985458374, | |
| "learning_rate": 3.7037037037037037e-05, | |
| "loss": 0.1679, | |
| "step": 650 | |
| }, | |
| { | |
| "epoch": 9.99, | |
| "eval_accuracy": 0.7171428571428572, | |
| "eval_loss": 1.4169081449508667, | |
| "eval_runtime": 8.0283, | |
| "eval_samples_per_second": 43.596, | |
| "eval_steps_per_second": 3.737, | |
| "step": 657 | |
| }, | |
| { | |
| "epoch": 10.04, | |
| "grad_norm": 4.154173374176025, | |
| "learning_rate": 3.675213675213676e-05, | |
| "loss": 0.1645, | |
| "step": 660 | |
| }, | |
| { | |
| "epoch": 10.19, | |
| "grad_norm": 1.8003276586532593, | |
| "learning_rate": 3.646723646723647e-05, | |
| "loss": 0.105, | |
| "step": 670 | |
| }, | |
| { | |
| "epoch": 10.34, | |
| "grad_norm": 4.1917619705200195, | |
| "learning_rate": 3.618233618233619e-05, | |
| "loss": 0.149, | |
| "step": 680 | |
| }, | |
| { | |
| "epoch": 10.49, | |
| "grad_norm": 3.338636636734009, | |
| "learning_rate": 3.58974358974359e-05, | |
| "loss": 0.1287, | |
| "step": 690 | |
| }, | |
| { | |
| "epoch": 10.65, | |
| "grad_norm": 1.6283141374588013, | |
| "learning_rate": 3.561253561253561e-05, | |
| "loss": 0.1458, | |
| "step": 700 | |
| }, | |
| { | |
| "epoch": 10.8, | |
| "grad_norm": 2.769218921661377, | |
| "learning_rate": 3.5327635327635325e-05, | |
| "loss": 0.1394, | |
| "step": 710 | |
| }, | |
| { | |
| "epoch": 10.95, | |
| "grad_norm": 3.2028868198394775, | |
| "learning_rate": 3.504273504273504e-05, | |
| "loss": 0.1277, | |
| "step": 720 | |
| }, | |
| { | |
| "epoch": 11.0, | |
| "eval_accuracy": 0.7057142857142857, | |
| "eval_loss": 1.404009222984314, | |
| "eval_runtime": 8.031, | |
| "eval_samples_per_second": 43.581, | |
| "eval_steps_per_second": 3.736, | |
| "step": 723 | |
| }, | |
| { | |
| "epoch": 11.1, | |
| "grad_norm": 1.2642875909805298, | |
| "learning_rate": 3.475783475783476e-05, | |
| "loss": 0.1187, | |
| "step": 730 | |
| }, | |
| { | |
| "epoch": 11.25, | |
| "grad_norm": 1.5215080976486206, | |
| "learning_rate": 3.4472934472934476e-05, | |
| "loss": 0.0854, | |
| "step": 740 | |
| }, | |
| { | |
| "epoch": 11.41, | |
| "grad_norm": 2.877058982849121, | |
| "learning_rate": 3.418803418803419e-05, | |
| "loss": 0.1105, | |
| "step": 750 | |
| }, | |
| { | |
| "epoch": 11.56, | |
| "grad_norm": 5.0010552406311035, | |
| "learning_rate": 3.3903133903133905e-05, | |
| "loss": 0.0912, | |
| "step": 760 | |
| }, | |
| { | |
| "epoch": 11.71, | |
| "grad_norm": 5.7503981590271, | |
| "learning_rate": 3.361823361823362e-05, | |
| "loss": 0.1264, | |
| "step": 770 | |
| }, | |
| { | |
| "epoch": 11.86, | |
| "grad_norm": 3.2310426235198975, | |
| "learning_rate": 3.3333333333333335e-05, | |
| "loss": 0.1238, | |
| "step": 780 | |
| }, | |
| { | |
| "epoch": 12.0, | |
| "eval_accuracy": 0.7285714285714285, | |
| "eval_loss": 1.4007512331008911, | |
| "eval_runtime": 8.0356, | |
| "eval_samples_per_second": 43.556, | |
| "eval_steps_per_second": 3.733, | |
| "step": 789 | |
| }, | |
| { | |
| "epoch": 12.02, | |
| "grad_norm": 1.719030737876892, | |
| "learning_rate": 3.304843304843305e-05, | |
| "loss": 0.0817, | |
| "step": 790 | |
| }, | |
| { | |
| "epoch": 12.17, | |
| "grad_norm": 3.475520610809326, | |
| "learning_rate": 3.2763532763532764e-05, | |
| "loss": 0.0765, | |
| "step": 800 | |
| }, | |
| { | |
| "epoch": 12.32, | |
| "grad_norm": 3.978292226791382, | |
| "learning_rate": 3.247863247863248e-05, | |
| "loss": 0.0874, | |
| "step": 810 | |
| }, | |
| { | |
| "epoch": 12.47, | |
| "grad_norm": 1.6397371292114258, | |
| "learning_rate": 3.2193732193732194e-05, | |
| "loss": 0.1348, | |
| "step": 820 | |
| }, | |
| { | |
| "epoch": 12.62, | |
| "grad_norm": 0.9705621600151062, | |
| "learning_rate": 3.190883190883191e-05, | |
| "loss": 0.057, | |
| "step": 830 | |
| }, | |
| { | |
| "epoch": 12.78, | |
| "grad_norm": 3.8919146060943604, | |
| "learning_rate": 3.162393162393162e-05, | |
| "loss": 0.085, | |
| "step": 840 | |
| }, | |
| { | |
| "epoch": 12.93, | |
| "grad_norm": 1.4797801971435547, | |
| "learning_rate": 3.133903133903134e-05, | |
| "loss": 0.088, | |
| "step": 850 | |
| }, | |
| { | |
| "epoch": 12.99, | |
| "eval_accuracy": 0.7114285714285714, | |
| "eval_loss": 1.3840457201004028, | |
| "eval_runtime": 7.9781, | |
| "eval_samples_per_second": 43.87, | |
| "eval_steps_per_second": 3.76, | |
| "step": 854 | |
| }, | |
| { | |
| "epoch": 13.08, | |
| "grad_norm": 2.244473695755005, | |
| "learning_rate": 3.105413105413106e-05, | |
| "loss": 0.0673, | |
| "step": 860 | |
| }, | |
| { | |
| "epoch": 13.23, | |
| "grad_norm": 1.467897653579712, | |
| "learning_rate": 3.0769230769230774e-05, | |
| "loss": 0.0523, | |
| "step": 870 | |
| }, | |
| { | |
| "epoch": 13.38, | |
| "grad_norm": 2.4079532623291016, | |
| "learning_rate": 3.0484330484330486e-05, | |
| "loss": 0.0752, | |
| "step": 880 | |
| }, | |
| { | |
| "epoch": 13.54, | |
| "grad_norm": 3.189384698867798, | |
| "learning_rate": 3.01994301994302e-05, | |
| "loss": 0.0559, | |
| "step": 890 | |
| }, | |
| { | |
| "epoch": 13.69, | |
| "grad_norm": 2.8496036529541016, | |
| "learning_rate": 2.9914529914529915e-05, | |
| "loss": 0.0688, | |
| "step": 900 | |
| }, | |
| { | |
| "epoch": 13.84, | |
| "grad_norm": 0.6937215328216553, | |
| "learning_rate": 2.962962962962963e-05, | |
| "loss": 0.073, | |
| "step": 910 | |
| }, | |
| { | |
| "epoch": 13.99, | |
| "grad_norm": 1.4593366384506226, | |
| "learning_rate": 2.9344729344729345e-05, | |
| "loss": 0.0834, | |
| "step": 920 | |
| }, | |
| { | |
| "epoch": 13.99, | |
| "eval_accuracy": 0.72, | |
| "eval_loss": 1.3873815536499023, | |
| "eval_runtime": 8.1063, | |
| "eval_samples_per_second": 43.176, | |
| "eval_steps_per_second": 3.701, | |
| "step": 920 | |
| }, | |
| { | |
| "epoch": 14.14, | |
| "grad_norm": 0.6792957186698914, | |
| "learning_rate": 2.9059829059829063e-05, | |
| "loss": 0.0434, | |
| "step": 930 | |
| }, | |
| { | |
| "epoch": 14.3, | |
| "grad_norm": 1.9660212993621826, | |
| "learning_rate": 2.8774928774928778e-05, | |
| "loss": 0.0457, | |
| "step": 940 | |
| }, | |
| { | |
| "epoch": 14.45, | |
| "grad_norm": 1.9186339378356934, | |
| "learning_rate": 2.8490028490028492e-05, | |
| "loss": 0.0485, | |
| "step": 950 | |
| }, | |
| { | |
| "epoch": 14.6, | |
| "grad_norm": 1.0086941719055176, | |
| "learning_rate": 2.8205128205128207e-05, | |
| "loss": 0.0472, | |
| "step": 960 | |
| }, | |
| { | |
| "epoch": 14.75, | |
| "grad_norm": 2.760943651199341, | |
| "learning_rate": 2.7920227920227922e-05, | |
| "loss": 0.0733, | |
| "step": 970 | |
| }, | |
| { | |
| "epoch": 14.9, | |
| "grad_norm": 0.8688881993293762, | |
| "learning_rate": 2.7635327635327633e-05, | |
| "loss": 0.0813, | |
| "step": 980 | |
| }, | |
| { | |
| "epoch": 15.0, | |
| "eval_accuracy": 0.7257142857142858, | |
| "eval_loss": 1.3705737590789795, | |
| "eval_runtime": 8.1151, | |
| "eval_samples_per_second": 43.13, | |
| "eval_steps_per_second": 3.697, | |
| "step": 986 | |
| }, | |
| { | |
| "epoch": 15.06, | |
| "grad_norm": 0.6380533576011658, | |
| "learning_rate": 2.7350427350427355e-05, | |
| "loss": 0.0466, | |
| "step": 990 | |
| }, | |
| { | |
| "epoch": 15.21, | |
| "grad_norm": 6.788400650024414, | |
| "learning_rate": 2.706552706552707e-05, | |
| "loss": 0.044, | |
| "step": 1000 | |
| }, | |
| { | |
| "epoch": 15.36, | |
| "grad_norm": 2.104766607284546, | |
| "learning_rate": 2.6780626780626784e-05, | |
| "loss": 0.0723, | |
| "step": 1010 | |
| }, | |
| { | |
| "epoch": 15.51, | |
| "grad_norm": 1.0589812994003296, | |
| "learning_rate": 2.64957264957265e-05, | |
| "loss": 0.0628, | |
| "step": 1020 | |
| }, | |
| { | |
| "epoch": 15.67, | |
| "grad_norm": 1.543593168258667, | |
| "learning_rate": 2.621082621082621e-05, | |
| "loss": 0.0485, | |
| "step": 1030 | |
| }, | |
| { | |
| "epoch": 15.82, | |
| "grad_norm": 2.2463526725769043, | |
| "learning_rate": 2.5925925925925925e-05, | |
| "loss": 0.0442, | |
| "step": 1040 | |
| }, | |
| { | |
| "epoch": 15.97, | |
| "grad_norm": 5.468172550201416, | |
| "learning_rate": 2.564102564102564e-05, | |
| "loss": 0.0423, | |
| "step": 1050 | |
| }, | |
| { | |
| "epoch": 16.0, | |
| "eval_accuracy": 0.7228571428571429, | |
| "eval_loss": 1.3519986867904663, | |
| "eval_runtime": 8.094, | |
| "eval_samples_per_second": 43.242, | |
| "eval_steps_per_second": 3.706, | |
| "step": 1052 | |
| }, | |
| { | |
| "epoch": 16.12, | |
| "grad_norm": 2.093841791152954, | |
| "learning_rate": 2.535612535612536e-05, | |
| "loss": 0.0532, | |
| "step": 1060 | |
| }, | |
| { | |
| "epoch": 16.27, | |
| "grad_norm": 0.7975372672080994, | |
| "learning_rate": 2.5071225071225073e-05, | |
| "loss": 0.0273, | |
| "step": 1070 | |
| }, | |
| { | |
| "epoch": 16.43, | |
| "grad_norm": 6.552361965179443, | |
| "learning_rate": 2.4786324786324787e-05, | |
| "loss": 0.0643, | |
| "step": 1080 | |
| }, | |
| { | |
| "epoch": 16.58, | |
| "grad_norm": 1.8863351345062256, | |
| "learning_rate": 2.4501424501424502e-05, | |
| "loss": 0.0345, | |
| "step": 1090 | |
| }, | |
| { | |
| "epoch": 16.73, | |
| "grad_norm": 0.8653244376182556, | |
| "learning_rate": 2.4216524216524217e-05, | |
| "loss": 0.0502, | |
| "step": 1100 | |
| }, | |
| { | |
| "epoch": 16.88, | |
| "grad_norm": 0.7265773415565491, | |
| "learning_rate": 2.3931623931623935e-05, | |
| "loss": 0.067, | |
| "step": 1110 | |
| }, | |
| { | |
| "epoch": 16.99, | |
| "eval_accuracy": 0.7228571428571429, | |
| "eval_loss": 1.3108690977096558, | |
| "eval_runtime": 8.0282, | |
| "eval_samples_per_second": 43.597, | |
| "eval_steps_per_second": 3.737, | |
| "step": 1117 | |
| }, | |
| { | |
| "epoch": 17.03, | |
| "grad_norm": 0.5706465244293213, | |
| "learning_rate": 2.364672364672365e-05, | |
| "loss": 0.0456, | |
| "step": 1120 | |
| }, | |
| { | |
| "epoch": 17.19, | |
| "grad_norm": 0.4868156313896179, | |
| "learning_rate": 2.336182336182336e-05, | |
| "loss": 0.0239, | |
| "step": 1130 | |
| }, | |
| { | |
| "epoch": 17.34, | |
| "grad_norm": 0.2969132661819458, | |
| "learning_rate": 2.307692307692308e-05, | |
| "loss": 0.0258, | |
| "step": 1140 | |
| }, | |
| { | |
| "epoch": 17.49, | |
| "grad_norm": 0.7196402549743652, | |
| "learning_rate": 2.2792022792022794e-05, | |
| "loss": 0.0307, | |
| "step": 1150 | |
| }, | |
| { | |
| "epoch": 17.64, | |
| "grad_norm": 0.6792505383491516, | |
| "learning_rate": 2.250712250712251e-05, | |
| "loss": 0.0357, | |
| "step": 1160 | |
| }, | |
| { | |
| "epoch": 17.79, | |
| "grad_norm": 1.3564707040786743, | |
| "learning_rate": 2.2222222222222223e-05, | |
| "loss": 0.0447, | |
| "step": 1170 | |
| }, | |
| { | |
| "epoch": 17.95, | |
| "grad_norm": 0.7506925463676453, | |
| "learning_rate": 2.1937321937321938e-05, | |
| "loss": 0.0438, | |
| "step": 1180 | |
| }, | |
| { | |
| "epoch": 17.99, | |
| "eval_accuracy": 0.7171428571428572, | |
| "eval_loss": 1.3395991325378418, | |
| "eval_runtime": 7.9804, | |
| "eval_samples_per_second": 43.857, | |
| "eval_steps_per_second": 3.759, | |
| "step": 1183 | |
| }, | |
| { | |
| "epoch": 18.1, | |
| "grad_norm": 0.2639639377593994, | |
| "learning_rate": 2.1652421652421653e-05, | |
| "loss": 0.0364, | |
| "step": 1190 | |
| }, | |
| { | |
| "epoch": 18.25, | |
| "grad_norm": 0.6512497067451477, | |
| "learning_rate": 2.1367521367521368e-05, | |
| "loss": 0.035, | |
| "step": 1200 | |
| }, | |
| { | |
| "epoch": 18.4, | |
| "grad_norm": 0.36454707384109497, | |
| "learning_rate": 2.1082621082621086e-05, | |
| "loss": 0.031, | |
| "step": 1210 | |
| }, | |
| { | |
| "epoch": 18.56, | |
| "grad_norm": 1.9671510457992554, | |
| "learning_rate": 2.07977207977208e-05, | |
| "loss": 0.0365, | |
| "step": 1220 | |
| }, | |
| { | |
| "epoch": 18.71, | |
| "grad_norm": 2.5179057121276855, | |
| "learning_rate": 2.0512820512820512e-05, | |
| "loss": 0.0343, | |
| "step": 1230 | |
| }, | |
| { | |
| "epoch": 18.86, | |
| "grad_norm": 0.5848199725151062, | |
| "learning_rate": 2.022792022792023e-05, | |
| "loss": 0.0399, | |
| "step": 1240 | |
| }, | |
| { | |
| "epoch": 19.0, | |
| "eval_accuracy": 0.7257142857142858, | |
| "eval_loss": 1.3867747783660889, | |
| "eval_runtime": 7.995, | |
| "eval_samples_per_second": 43.778, | |
| "eval_steps_per_second": 3.752, | |
| "step": 1249 | |
| }, | |
| { | |
| "epoch": 19.01, | |
| "grad_norm": 1.6354899406433105, | |
| "learning_rate": 1.9943019943019945e-05, | |
| "loss": 0.0488, | |
| "step": 1250 | |
| }, | |
| { | |
| "epoch": 19.16, | |
| "grad_norm": 4.593708038330078, | |
| "learning_rate": 1.965811965811966e-05, | |
| "loss": 0.0326, | |
| "step": 1260 | |
| }, | |
| { | |
| "epoch": 19.32, | |
| "grad_norm": 0.5004624128341675, | |
| "learning_rate": 1.9373219373219374e-05, | |
| "loss": 0.0312, | |
| "step": 1270 | |
| }, | |
| { | |
| "epoch": 19.47, | |
| "grad_norm": 3.982077121734619, | |
| "learning_rate": 1.908831908831909e-05, | |
| "loss": 0.0367, | |
| "step": 1280 | |
| }, | |
| { | |
| "epoch": 19.62, | |
| "grad_norm": 1.31514573097229, | |
| "learning_rate": 1.8803418803418804e-05, | |
| "loss": 0.0288, | |
| "step": 1290 | |
| }, | |
| { | |
| "epoch": 19.77, | |
| "grad_norm": 2.477193593978882, | |
| "learning_rate": 1.8518518518518518e-05, | |
| "loss": 0.0188, | |
| "step": 1300 | |
| }, | |
| { | |
| "epoch": 19.92, | |
| "grad_norm": 1.13873291015625, | |
| "learning_rate": 1.8233618233618236e-05, | |
| "loss": 0.022, | |
| "step": 1310 | |
| }, | |
| { | |
| "epoch": 20.0, | |
| "eval_accuracy": 0.7257142857142858, | |
| "eval_loss": 1.3571245670318604, | |
| "eval_runtime": 7.9825, | |
| "eval_samples_per_second": 43.846, | |
| "eval_steps_per_second": 3.758, | |
| "step": 1315 | |
| }, | |
| { | |
| "epoch": 20.08, | |
| "grad_norm": 0.1975400298833847, | |
| "learning_rate": 1.794871794871795e-05, | |
| "loss": 0.016, | |
| "step": 1320 | |
| }, | |
| { | |
| "epoch": 20.23, | |
| "grad_norm": 2.610684871673584, | |
| "learning_rate": 1.7663817663817662e-05, | |
| "loss": 0.0364, | |
| "step": 1330 | |
| }, | |
| { | |
| "epoch": 20.38, | |
| "grad_norm": 2.5552616119384766, | |
| "learning_rate": 1.737891737891738e-05, | |
| "loss": 0.0209, | |
| "step": 1340 | |
| }, | |
| { | |
| "epoch": 20.53, | |
| "grad_norm": 1.8163336515426636, | |
| "learning_rate": 1.7094017094017095e-05, | |
| "loss": 0.014, | |
| "step": 1350 | |
| }, | |
| { | |
| "epoch": 20.68, | |
| "grad_norm": 2.3455891609191895, | |
| "learning_rate": 1.680911680911681e-05, | |
| "loss": 0.015, | |
| "step": 1360 | |
| }, | |
| { | |
| "epoch": 20.84, | |
| "grad_norm": 1.0087167024612427, | |
| "learning_rate": 1.6524216524216525e-05, | |
| "loss": 0.021, | |
| "step": 1370 | |
| }, | |
| { | |
| "epoch": 20.99, | |
| "grad_norm": 4.435824394226074, | |
| "learning_rate": 1.623931623931624e-05, | |
| "loss": 0.0326, | |
| "step": 1380 | |
| }, | |
| { | |
| "epoch": 20.99, | |
| "eval_accuracy": 0.7342857142857143, | |
| "eval_loss": 1.316083550453186, | |
| "eval_runtime": 8.0694, | |
| "eval_samples_per_second": 43.374, | |
| "eval_steps_per_second": 3.718, | |
| "step": 1380 | |
| }, | |
| { | |
| "epoch": 21.14, | |
| "grad_norm": 2.11207914352417, | |
| "learning_rate": 1.5954415954415954e-05, | |
| "loss": 0.0249, | |
| "step": 1390 | |
| }, | |
| { | |
| "epoch": 21.29, | |
| "grad_norm": 0.3664344251155853, | |
| "learning_rate": 1.566951566951567e-05, | |
| "loss": 0.0168, | |
| "step": 1400 | |
| }, | |
| { | |
| "epoch": 21.44, | |
| "grad_norm": 2.1651501655578613, | |
| "learning_rate": 1.5384615384615387e-05, | |
| "loss": 0.0269, | |
| "step": 1410 | |
| }, | |
| { | |
| "epoch": 21.6, | |
| "grad_norm": 6.236063480377197, | |
| "learning_rate": 1.50997150997151e-05, | |
| "loss": 0.0266, | |
| "step": 1420 | |
| }, | |
| { | |
| "epoch": 21.75, | |
| "grad_norm": 0.4216400980949402, | |
| "learning_rate": 1.4814814814814815e-05, | |
| "loss": 0.0276, | |
| "step": 1430 | |
| }, | |
| { | |
| "epoch": 21.9, | |
| "grad_norm": 0.34464436769485474, | |
| "learning_rate": 1.4529914529914531e-05, | |
| "loss": 0.0217, | |
| "step": 1440 | |
| }, | |
| { | |
| "epoch": 21.99, | |
| "eval_accuracy": 0.74, | |
| "eval_loss": 1.3431659936904907, | |
| "eval_runtime": 8.2814, | |
| "eval_samples_per_second": 42.263, | |
| "eval_steps_per_second": 3.623, | |
| "step": 1446 | |
| }, | |
| { | |
| "epoch": 22.05, | |
| "grad_norm": 0.550115168094635, | |
| "learning_rate": 1.4245014245014246e-05, | |
| "loss": 0.0116, | |
| "step": 1450 | |
| }, | |
| { | |
| "epoch": 22.21, | |
| "grad_norm": 0.7523086071014404, | |
| "learning_rate": 1.3960113960113961e-05, | |
| "loss": 0.0167, | |
| "step": 1460 | |
| }, | |
| { | |
| "epoch": 22.36, | |
| "grad_norm": 0.4303203821182251, | |
| "learning_rate": 1.3675213675213677e-05, | |
| "loss": 0.0152, | |
| "step": 1470 | |
| }, | |
| { | |
| "epoch": 22.51, | |
| "grad_norm": 0.9599018096923828, | |
| "learning_rate": 1.3390313390313392e-05, | |
| "loss": 0.0129, | |
| "step": 1480 | |
| }, | |
| { | |
| "epoch": 22.66, | |
| "grad_norm": 0.6038946509361267, | |
| "learning_rate": 1.3105413105413105e-05, | |
| "loss": 0.0153, | |
| "step": 1490 | |
| }, | |
| { | |
| "epoch": 22.81, | |
| "grad_norm": 2.5680289268493652, | |
| "learning_rate": 1.282051282051282e-05, | |
| "loss": 0.0302, | |
| "step": 1500 | |
| }, | |
| { | |
| "epoch": 22.97, | |
| "grad_norm": 0.7856467366218567, | |
| "learning_rate": 1.2535612535612536e-05, | |
| "loss": 0.0185, | |
| "step": 1510 | |
| }, | |
| { | |
| "epoch": 23.0, | |
| "eval_accuracy": 0.7342857142857143, | |
| "eval_loss": 1.3489614725112915, | |
| "eval_runtime": 8.0906, | |
| "eval_samples_per_second": 43.26, | |
| "eval_steps_per_second": 3.708, | |
| "step": 1512 | |
| }, | |
| { | |
| "epoch": 23.12, | |
| "grad_norm": 0.6607487201690674, | |
| "learning_rate": 1.2250712250712251e-05, | |
| "loss": 0.014, | |
| "step": 1520 | |
| }, | |
| { | |
| "epoch": 23.27, | |
| "grad_norm": 0.14532317221164703, | |
| "learning_rate": 1.1965811965811967e-05, | |
| "loss": 0.02, | |
| "step": 1530 | |
| }, | |
| { | |
| "epoch": 23.42, | |
| "grad_norm": 0.3423649072647095, | |
| "learning_rate": 1.168091168091168e-05, | |
| "loss": 0.0156, | |
| "step": 1540 | |
| }, | |
| { | |
| "epoch": 23.57, | |
| "grad_norm": 0.15258215367794037, | |
| "learning_rate": 1.1396011396011397e-05, | |
| "loss": 0.0087, | |
| "step": 1550 | |
| }, | |
| { | |
| "epoch": 23.73, | |
| "grad_norm": 0.20266969501972198, | |
| "learning_rate": 1.1111111111111112e-05, | |
| "loss": 0.0257, | |
| "step": 1560 | |
| }, | |
| { | |
| "epoch": 23.88, | |
| "grad_norm": 0.46567222476005554, | |
| "learning_rate": 1.0826210826210826e-05, | |
| "loss": 0.0247, | |
| "step": 1570 | |
| }, | |
| { | |
| "epoch": 24.0, | |
| "eval_accuracy": 0.7285714285714285, | |
| "eval_loss": 1.3712286949157715, | |
| "eval_runtime": 8.0686, | |
| "eval_samples_per_second": 43.378, | |
| "eval_steps_per_second": 3.718, | |
| "step": 1578 | |
| }, | |
| { | |
| "epoch": 24.03, | |
| "grad_norm": 0.43167567253112793, | |
| "learning_rate": 1.0541310541310543e-05, | |
| "loss": 0.0151, | |
| "step": 1580 | |
| }, | |
| { | |
| "epoch": 24.18, | |
| "grad_norm": 0.3076987862586975, | |
| "learning_rate": 1.0256410256410256e-05, | |
| "loss": 0.0145, | |
| "step": 1590 | |
| }, | |
| { | |
| "epoch": 24.33, | |
| "grad_norm": 0.28051629662513733, | |
| "learning_rate": 9.971509971509972e-06, | |
| "loss": 0.0068, | |
| "step": 1600 | |
| }, | |
| { | |
| "epoch": 24.49, | |
| "grad_norm": 0.17808012664318085, | |
| "learning_rate": 9.686609686609687e-06, | |
| "loss": 0.015, | |
| "step": 1610 | |
| }, | |
| { | |
| "epoch": 24.64, | |
| "grad_norm": 0.46903499960899353, | |
| "learning_rate": 9.401709401709402e-06, | |
| "loss": 0.0111, | |
| "step": 1620 | |
| }, | |
| { | |
| "epoch": 24.79, | |
| "grad_norm": 3.1560771465301514, | |
| "learning_rate": 9.116809116809118e-06, | |
| "loss": 0.0198, | |
| "step": 1630 | |
| }, | |
| { | |
| "epoch": 24.94, | |
| "grad_norm": 1.1795072555541992, | |
| "learning_rate": 8.831908831908831e-06, | |
| "loss": 0.0147, | |
| "step": 1640 | |
| }, | |
| { | |
| "epoch": 24.99, | |
| "eval_accuracy": 0.7285714285714285, | |
| "eval_loss": 1.3384881019592285, | |
| "eval_runtime": 8.045, | |
| "eval_samples_per_second": 43.505, | |
| "eval_steps_per_second": 3.729, | |
| "step": 1643 | |
| }, | |
| { | |
| "epoch": 25.1, | |
| "grad_norm": 2.324568748474121, | |
| "learning_rate": 8.547008547008548e-06, | |
| "loss": 0.0147, | |
| "step": 1650 | |
| }, | |
| { | |
| "epoch": 25.25, | |
| "grad_norm": 0.6252849102020264, | |
| "learning_rate": 8.262108262108262e-06, | |
| "loss": 0.0141, | |
| "step": 1660 | |
| }, | |
| { | |
| "epoch": 25.4, | |
| "grad_norm": 2.523175001144409, | |
| "learning_rate": 7.977207977207977e-06, | |
| "loss": 0.0288, | |
| "step": 1670 | |
| }, | |
| { | |
| "epoch": 25.55, | |
| "grad_norm": 0.6321514844894409, | |
| "learning_rate": 7.692307692307694e-06, | |
| "loss": 0.0151, | |
| "step": 1680 | |
| }, | |
| { | |
| "epoch": 25.7, | |
| "grad_norm": 0.1425185650587082, | |
| "learning_rate": 7.4074074074074075e-06, | |
| "loss": 0.0093, | |
| "step": 1690 | |
| }, | |
| { | |
| "epoch": 25.86, | |
| "grad_norm": 0.6362813115119934, | |
| "learning_rate": 7.122507122507123e-06, | |
| "loss": 0.0164, | |
| "step": 1700 | |
| }, | |
| { | |
| "epoch": 25.99, | |
| "eval_accuracy": 0.7228571428571429, | |
| "eval_loss": 1.352995753288269, | |
| "eval_runtime": 7.9452, | |
| "eval_samples_per_second": 44.052, | |
| "eval_steps_per_second": 3.776, | |
| "step": 1709 | |
| }, | |
| { | |
| "epoch": 26.01, | |
| "grad_norm": 0.11444679647684097, | |
| "learning_rate": 6.837606837606839e-06, | |
| "loss": 0.0198, | |
| "step": 1710 | |
| }, | |
| { | |
| "epoch": 26.16, | |
| "grad_norm": 0.34033504128456116, | |
| "learning_rate": 6.5527065527065525e-06, | |
| "loss": 0.013, | |
| "step": 1720 | |
| }, | |
| { | |
| "epoch": 26.31, | |
| "grad_norm": 1.7793394327163696, | |
| "learning_rate": 6.267806267806268e-06, | |
| "loss": 0.0122, | |
| "step": 1730 | |
| }, | |
| { | |
| "epoch": 26.46, | |
| "grad_norm": 0.11746495217084885, | |
| "learning_rate": 5.982905982905984e-06, | |
| "loss": 0.0153, | |
| "step": 1740 | |
| }, | |
| { | |
| "epoch": 26.62, | |
| "grad_norm": 4.355152606964111, | |
| "learning_rate": 5.6980056980056985e-06, | |
| "loss": 0.0153, | |
| "step": 1750 | |
| }, | |
| { | |
| "epoch": 26.77, | |
| "grad_norm": 0.5570241808891296, | |
| "learning_rate": 5.413105413105413e-06, | |
| "loss": 0.013, | |
| "step": 1760 | |
| }, | |
| { | |
| "epoch": 26.92, | |
| "grad_norm": 0.22895778715610504, | |
| "learning_rate": 5.128205128205128e-06, | |
| "loss": 0.0148, | |
| "step": 1770 | |
| }, | |
| { | |
| "epoch": 27.0, | |
| "eval_accuracy": 0.7257142857142858, | |
| "eval_loss": 1.3564364910125732, | |
| "eval_runtime": 8.0323, | |
| "eval_samples_per_second": 43.574, | |
| "eval_steps_per_second": 3.735, | |
| "step": 1775 | |
| }, | |
| { | |
| "epoch": 27.07, | |
| "grad_norm": 1.6692248582839966, | |
| "learning_rate": 4.8433048433048435e-06, | |
| "loss": 0.0217, | |
| "step": 1780 | |
| }, | |
| { | |
| "epoch": 27.22, | |
| "grad_norm": 0.4036758542060852, | |
| "learning_rate": 4.558404558404559e-06, | |
| "loss": 0.0068, | |
| "step": 1790 | |
| }, | |
| { | |
| "epoch": 27.38, | |
| "grad_norm": 0.1422310322523117, | |
| "learning_rate": 4.273504273504274e-06, | |
| "loss": 0.0086, | |
| "step": 1800 | |
| }, | |
| { | |
| "epoch": 27.53, | |
| "grad_norm": 0.36455395817756653, | |
| "learning_rate": 3.988603988603989e-06, | |
| "loss": 0.0097, | |
| "step": 1810 | |
| }, | |
| { | |
| "epoch": 27.68, | |
| "grad_norm": 2.0207414627075195, | |
| "learning_rate": 3.7037037037037037e-06, | |
| "loss": 0.009, | |
| "step": 1820 | |
| }, | |
| { | |
| "epoch": 27.83, | |
| "grad_norm": 0.2137887328863144, | |
| "learning_rate": 3.4188034188034193e-06, | |
| "loss": 0.0073, | |
| "step": 1830 | |
| }, | |
| { | |
| "epoch": 27.98, | |
| "grad_norm": 1.0078092813491821, | |
| "learning_rate": 3.133903133903134e-06, | |
| "loss": 0.0095, | |
| "step": 1840 | |
| }, | |
| { | |
| "epoch": 28.0, | |
| "eval_accuracy": 0.7228571428571429, | |
| "eval_loss": 1.3562867641448975, | |
| "eval_runtime": 8.0528, | |
| "eval_samples_per_second": 43.463, | |
| "eval_steps_per_second": 3.725, | |
| "step": 1841 | |
| }, | |
| { | |
| "epoch": 28.14, | |
| "grad_norm": 0.11777978390455246, | |
| "learning_rate": 2.8490028490028492e-06, | |
| "loss": 0.0076, | |
| "step": 1850 | |
| }, | |
| { | |
| "epoch": 28.29, | |
| "grad_norm": 0.4021410644054413, | |
| "learning_rate": 2.564102564102564e-06, | |
| "loss": 0.0091, | |
| "step": 1860 | |
| }, | |
| { | |
| "epoch": 28.44, | |
| "grad_norm": 0.19985055923461914, | |
| "learning_rate": 2.2792022792022796e-06, | |
| "loss": 0.0091, | |
| "step": 1870 | |
| }, | |
| { | |
| "epoch": 28.59, | |
| "grad_norm": 0.30899757146835327, | |
| "learning_rate": 1.9943019943019943e-06, | |
| "loss": 0.0096, | |
| "step": 1880 | |
| }, | |
| { | |
| "epoch": 28.75, | |
| "grad_norm": 0.1285697966814041, | |
| "learning_rate": 1.7094017094017097e-06, | |
| "loss": 0.0108, | |
| "step": 1890 | |
| }, | |
| { | |
| "epoch": 28.9, | |
| "grad_norm": 1.3066548109054565, | |
| "learning_rate": 1.4245014245014246e-06, | |
| "loss": 0.0105, | |
| "step": 1900 | |
| }, | |
| { | |
| "epoch": 28.99, | |
| "eval_accuracy": 0.7171428571428572, | |
| "eval_loss": 1.3570489883422852, | |
| "eval_runtime": 8.0496, | |
| "eval_samples_per_second": 43.481, | |
| "eval_steps_per_second": 3.727, | |
| "step": 1906 | |
| }, | |
| { | |
| "epoch": 29.05, | |
| "grad_norm": 0.1782771348953247, | |
| "learning_rate": 1.1396011396011398e-06, | |
| "loss": 0.0109, | |
| "step": 1910 | |
| }, | |
| { | |
| "epoch": 29.2, | |
| "grad_norm": 0.0780392736196518, | |
| "learning_rate": 8.547008547008548e-07, | |
| "loss": 0.0058, | |
| "step": 1920 | |
| }, | |
| { | |
| "epoch": 29.35, | |
| "grad_norm": 1.1122561693191528, | |
| "learning_rate": 5.698005698005699e-07, | |
| "loss": 0.012, | |
| "step": 1930 | |
| }, | |
| { | |
| "epoch": 29.51, | |
| "grad_norm": 0.21714162826538086, | |
| "learning_rate": 2.8490028490028494e-07, | |
| "loss": 0.0088, | |
| "step": 1940 | |
| }, | |
| { | |
| "epoch": 29.66, | |
| "grad_norm": 0.504612922668457, | |
| "learning_rate": 0.0, | |
| "loss": 0.0105, | |
| "step": 1950 | |
| }, | |
| { | |
| "epoch": 29.66, | |
| "eval_accuracy": 0.7171428571428572, | |
| "eval_loss": 1.3564331531524658, | |
| "eval_runtime": 8.0797, | |
| "eval_samples_per_second": 43.319, | |
| "eval_steps_per_second": 3.713, | |
| "step": 1950 | |
| } | |
| ], | |
| "logging_steps": 10, | |
| "max_steps": 1950, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 30, | |
| "save_steps": 500, | |
| "total_flos": 5.87953618460352e+18, | |
| "train_batch_size": 12, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |