| { |
| "best_metric": 0.8068181818181818, |
| "best_model_checkpoint": "2024_08_16_swinv2-base-patch4-window8-256/checkpoint-1131", |
| "epoch": 30.0, |
| "eval_steps": 500, |
| "global_step": 2610, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 0.11494252873563218, |
| "grad_norm": 2.4547059535980225, |
| "learning_rate": 3.831417624521073e-07, |
| "loss": 0.7176, |
| "step": 10 |
| }, |
| { |
| "epoch": 0.22988505747126436, |
| "grad_norm": 9.423152923583984, |
| "learning_rate": 7.662835249042146e-07, |
| "loss": 0.7432, |
| "step": 20 |
| }, |
| { |
| "epoch": 0.3448275862068966, |
| "grad_norm": 9.08972454071045, |
| "learning_rate": 1.1494252873563219e-06, |
| "loss": 0.7233, |
| "step": 30 |
| }, |
| { |
| "epoch": 0.45977011494252873, |
| "grad_norm": 12.878296852111816, |
| "learning_rate": 1.5325670498084292e-06, |
| "loss": 0.7788, |
| "step": 40 |
| }, |
| { |
| "epoch": 0.5747126436781609, |
| "grad_norm": 6.728691577911377, |
| "learning_rate": 1.9157088122605367e-06, |
| "loss": 0.7239, |
| "step": 50 |
| }, |
| { |
| "epoch": 0.6896551724137931, |
| "grad_norm": 8.155921936035156, |
| "learning_rate": 2.2988505747126437e-06, |
| "loss": 0.7309, |
| "step": 60 |
| }, |
| { |
| "epoch": 0.8045977011494253, |
| "grad_norm": 3.8686904907226562, |
| "learning_rate": 2.6819923371647512e-06, |
| "loss": 0.6971, |
| "step": 70 |
| }, |
| { |
| "epoch": 0.9195402298850575, |
| "grad_norm": 19.639543533325195, |
| "learning_rate": 3.0651340996168583e-06, |
| "loss": 0.6942, |
| "step": 80 |
| }, |
| { |
| "epoch": 1.0, |
| "eval_accuracy": 0.6818181818181818, |
| "eval_loss": 0.6796370148658752, |
| "eval_runtime": 31.4085, |
| "eval_samples_per_second": 2.802, |
| "eval_steps_per_second": 2.802, |
| "step": 87 |
| }, |
| { |
| "epoch": 1.0344827586206897, |
| "grad_norm": 5.551658630371094, |
| "learning_rate": 3.448275862068966e-06, |
| "loss": 0.7127, |
| "step": 90 |
| }, |
| { |
| "epoch": 1.1494252873563218, |
| "grad_norm": 13.254731178283691, |
| "learning_rate": 3.831417624521073e-06, |
| "loss": 0.6705, |
| "step": 100 |
| }, |
| { |
| "epoch": 1.264367816091954, |
| "grad_norm": 2.0981507301330566, |
| "learning_rate": 4.214559386973181e-06, |
| "loss": 0.648, |
| "step": 110 |
| }, |
| { |
| "epoch": 1.3793103448275863, |
| "grad_norm": 3.42364764213562, |
| "learning_rate": 4.5977011494252875e-06, |
| "loss": 0.5994, |
| "step": 120 |
| }, |
| { |
| "epoch": 1.4942528735632183, |
| "grad_norm": 6.003719806671143, |
| "learning_rate": 4.980842911877395e-06, |
| "loss": 0.6737, |
| "step": 130 |
| }, |
| { |
| "epoch": 1.6091954022988506, |
| "grad_norm": 5.974655628204346, |
| "learning_rate": 5.3639846743295025e-06, |
| "loss": 0.6538, |
| "step": 140 |
| }, |
| { |
| "epoch": 1.7241379310344827, |
| "grad_norm": 14.554160118103027, |
| "learning_rate": 5.747126436781609e-06, |
| "loss": 0.6732, |
| "step": 150 |
| }, |
| { |
| "epoch": 1.839080459770115, |
| "grad_norm": 20.19268226623535, |
| "learning_rate": 6.130268199233717e-06, |
| "loss": 0.622, |
| "step": 160 |
| }, |
| { |
| "epoch": 1.9540229885057472, |
| "grad_norm": 7.188900947570801, |
| "learning_rate": 6.513409961685824e-06, |
| "loss": 0.6661, |
| "step": 170 |
| }, |
| { |
| "epoch": 2.0, |
| "eval_accuracy": 0.7045454545454546, |
| "eval_loss": 0.5972675085067749, |
| "eval_runtime": 30.8716, |
| "eval_samples_per_second": 2.851, |
| "eval_steps_per_second": 2.851, |
| "step": 174 |
| }, |
| { |
| "epoch": 2.0689655172413794, |
| "grad_norm": 5.064095497131348, |
| "learning_rate": 6.896551724137932e-06, |
| "loss": 0.4822, |
| "step": 180 |
| }, |
| { |
| "epoch": 2.1839080459770113, |
| "grad_norm": 7.63368558883667, |
| "learning_rate": 7.279693486590039e-06, |
| "loss": 0.7185, |
| "step": 190 |
| }, |
| { |
| "epoch": 2.2988505747126435, |
| "grad_norm": 6.4705305099487305, |
| "learning_rate": 7.662835249042147e-06, |
| "loss": 0.5569, |
| "step": 200 |
| }, |
| { |
| "epoch": 2.413793103448276, |
| "grad_norm": 5.615090847015381, |
| "learning_rate": 8.045977011494253e-06, |
| "loss": 0.6903, |
| "step": 210 |
| }, |
| { |
| "epoch": 2.528735632183908, |
| "grad_norm": 3.7187066078186035, |
| "learning_rate": 8.429118773946362e-06, |
| "loss": 0.6982, |
| "step": 220 |
| }, |
| { |
| "epoch": 2.6436781609195403, |
| "grad_norm": 9.921281814575195, |
| "learning_rate": 8.812260536398468e-06, |
| "loss": 0.7645, |
| "step": 230 |
| }, |
| { |
| "epoch": 2.7586206896551726, |
| "grad_norm": 9.287795066833496, |
| "learning_rate": 9.195402298850575e-06, |
| "loss": 0.6746, |
| "step": 240 |
| }, |
| { |
| "epoch": 2.873563218390805, |
| "grad_norm": 9.319347381591797, |
| "learning_rate": 9.578544061302683e-06, |
| "loss": 0.5795, |
| "step": 250 |
| }, |
| { |
| "epoch": 2.9885057471264367, |
| "grad_norm": 5.477638244628906, |
| "learning_rate": 9.96168582375479e-06, |
| "loss": 0.5666, |
| "step": 260 |
| }, |
| { |
| "epoch": 3.0, |
| "eval_accuracy": 0.7045454545454546, |
| "eval_loss": 0.584419846534729, |
| "eval_runtime": 32.2691, |
| "eval_samples_per_second": 2.727, |
| "eval_steps_per_second": 2.727, |
| "step": 261 |
| }, |
| { |
| "epoch": 3.103448275862069, |
| "grad_norm": 7.59515380859375, |
| "learning_rate": 9.96168582375479e-06, |
| "loss": 0.6473, |
| "step": 270 |
| }, |
| { |
| "epoch": 3.218390804597701, |
| "grad_norm": 17.690759658813477, |
| "learning_rate": 9.919114516815667e-06, |
| "loss": 0.6716, |
| "step": 280 |
| }, |
| { |
| "epoch": 3.3333333333333335, |
| "grad_norm": 17.594921112060547, |
| "learning_rate": 9.876543209876543e-06, |
| "loss": 0.6721, |
| "step": 290 |
| }, |
| { |
| "epoch": 3.4482758620689653, |
| "grad_norm": 9.295193672180176, |
| "learning_rate": 9.833971902937422e-06, |
| "loss": 0.4501, |
| "step": 300 |
| }, |
| { |
| "epoch": 3.5632183908045976, |
| "grad_norm": 14.351675987243652, |
| "learning_rate": 9.791400595998298e-06, |
| "loss": 0.5773, |
| "step": 310 |
| }, |
| { |
| "epoch": 3.67816091954023, |
| "grad_norm": 8.47179126739502, |
| "learning_rate": 9.748829289059175e-06, |
| "loss": 0.6971, |
| "step": 320 |
| }, |
| { |
| "epoch": 3.793103448275862, |
| "grad_norm": 9.863703727722168, |
| "learning_rate": 9.706257982120052e-06, |
| "loss": 0.656, |
| "step": 330 |
| }, |
| { |
| "epoch": 3.9080459770114944, |
| "grad_norm": 10.022062301635742, |
| "learning_rate": 9.663686675180928e-06, |
| "loss": 0.6134, |
| "step": 340 |
| }, |
| { |
| "epoch": 4.0, |
| "eval_accuracy": 0.7045454545454546, |
| "eval_loss": 0.5798187851905823, |
| "eval_runtime": 28.7377, |
| "eval_samples_per_second": 3.062, |
| "eval_steps_per_second": 3.062, |
| "step": 348 |
| }, |
| { |
| "epoch": 4.022988505747127, |
| "grad_norm": 3.447582244873047, |
| "learning_rate": 9.621115368241805e-06, |
| "loss": 0.706, |
| "step": 350 |
| }, |
| { |
| "epoch": 4.137931034482759, |
| "grad_norm": 12.809653282165527, |
| "learning_rate": 9.578544061302683e-06, |
| "loss": 0.6176, |
| "step": 360 |
| }, |
| { |
| "epoch": 4.252873563218391, |
| "grad_norm": 2.6797659397125244, |
| "learning_rate": 9.53597275436356e-06, |
| "loss": 0.4304, |
| "step": 370 |
| }, |
| { |
| "epoch": 4.3678160919540225, |
| "grad_norm": 10.38992691040039, |
| "learning_rate": 9.493401447424437e-06, |
| "loss": 0.6554, |
| "step": 380 |
| }, |
| { |
| "epoch": 4.482758620689655, |
| "grad_norm": 10.206748008728027, |
| "learning_rate": 9.450830140485315e-06, |
| "loss": 0.6586, |
| "step": 390 |
| }, |
| { |
| "epoch": 4.597701149425287, |
| "grad_norm": 8.086981773376465, |
| "learning_rate": 9.408258833546192e-06, |
| "loss": 0.6302, |
| "step": 400 |
| }, |
| { |
| "epoch": 4.712643678160919, |
| "grad_norm": 4.448511123657227, |
| "learning_rate": 9.365687526607068e-06, |
| "loss": 0.6168, |
| "step": 410 |
| }, |
| { |
| "epoch": 4.827586206896552, |
| "grad_norm": 21.668298721313477, |
| "learning_rate": 9.323116219667945e-06, |
| "loss": 0.8768, |
| "step": 420 |
| }, |
| { |
| "epoch": 4.942528735632184, |
| "grad_norm": 7.051491737365723, |
| "learning_rate": 9.280544912728822e-06, |
| "loss": 0.5207, |
| "step": 430 |
| }, |
| { |
| "epoch": 5.0, |
| "eval_accuracy": 0.6931818181818182, |
| "eval_loss": 0.5816909670829773, |
| "eval_runtime": 32.2089, |
| "eval_samples_per_second": 2.732, |
| "eval_steps_per_second": 2.732, |
| "step": 435 |
| }, |
| { |
| "epoch": 5.057471264367816, |
| "grad_norm": 22.34807014465332, |
| "learning_rate": 9.237973605789698e-06, |
| "loss": 0.6565, |
| "step": 440 |
| }, |
| { |
| "epoch": 5.172413793103448, |
| "grad_norm": 6.780879974365234, |
| "learning_rate": 9.195402298850575e-06, |
| "loss": 0.6252, |
| "step": 450 |
| }, |
| { |
| "epoch": 5.287356321839081, |
| "grad_norm": 6.310813903808594, |
| "learning_rate": 9.152830991911452e-06, |
| "loss": 0.567, |
| "step": 460 |
| }, |
| { |
| "epoch": 5.402298850574713, |
| "grad_norm": 4.648807048797607, |
| "learning_rate": 9.110259684972328e-06, |
| "loss": 0.5762, |
| "step": 470 |
| }, |
| { |
| "epoch": 5.517241379310345, |
| "grad_norm": 3.909266233444214, |
| "learning_rate": 9.067688378033207e-06, |
| "loss": 0.4264, |
| "step": 480 |
| }, |
| { |
| "epoch": 5.6321839080459775, |
| "grad_norm": 8.082531929016113, |
| "learning_rate": 9.025117071094083e-06, |
| "loss": 0.6777, |
| "step": 490 |
| }, |
| { |
| "epoch": 5.747126436781609, |
| "grad_norm": 14.604034423828125, |
| "learning_rate": 8.98254576415496e-06, |
| "loss": 0.7608, |
| "step": 500 |
| }, |
| { |
| "epoch": 5.862068965517241, |
| "grad_norm": 17.84528350830078, |
| "learning_rate": 8.939974457215838e-06, |
| "loss": 0.7654, |
| "step": 510 |
| }, |
| { |
| "epoch": 5.977011494252873, |
| "grad_norm": 13.81564998626709, |
| "learning_rate": 8.897403150276715e-06, |
| "loss": 0.75, |
| "step": 520 |
| }, |
| { |
| "epoch": 6.0, |
| "eval_accuracy": 0.7159090909090909, |
| "eval_loss": 0.5488123297691345, |
| "eval_runtime": 30.0538, |
| "eval_samples_per_second": 2.928, |
| "eval_steps_per_second": 2.928, |
| "step": 522 |
| }, |
| { |
| "epoch": 6.091954022988506, |
| "grad_norm": 11.913701057434082, |
| "learning_rate": 8.854831843337592e-06, |
| "loss": 0.7151, |
| "step": 530 |
| }, |
| { |
| "epoch": 6.206896551724138, |
| "grad_norm": 10.623793601989746, |
| "learning_rate": 8.812260536398468e-06, |
| "loss": 0.5189, |
| "step": 540 |
| }, |
| { |
| "epoch": 6.32183908045977, |
| "grad_norm": 6.790339946746826, |
| "learning_rate": 8.769689229459345e-06, |
| "loss": 0.5661, |
| "step": 550 |
| }, |
| { |
| "epoch": 6.436781609195402, |
| "grad_norm": 13.506879806518555, |
| "learning_rate": 8.727117922520222e-06, |
| "loss": 0.6536, |
| "step": 560 |
| }, |
| { |
| "epoch": 6.551724137931035, |
| "grad_norm": 9.31980037689209, |
| "learning_rate": 8.684546615581098e-06, |
| "loss": 0.5059, |
| "step": 570 |
| }, |
| { |
| "epoch": 6.666666666666667, |
| "grad_norm": 8.148804664611816, |
| "learning_rate": 8.641975308641975e-06, |
| "loss": 0.7024, |
| "step": 580 |
| }, |
| { |
| "epoch": 6.781609195402299, |
| "grad_norm": 19.33616828918457, |
| "learning_rate": 8.599404001702853e-06, |
| "loss": 0.5221, |
| "step": 590 |
| }, |
| { |
| "epoch": 6.896551724137931, |
| "grad_norm": 13.092130661010742, |
| "learning_rate": 8.55683269476373e-06, |
| "loss": 0.4155, |
| "step": 600 |
| }, |
| { |
| "epoch": 7.0, |
| "eval_accuracy": 0.7045454545454546, |
| "eval_loss": 0.5373237133026123, |
| "eval_runtime": 28.4584, |
| "eval_samples_per_second": 3.092, |
| "eval_steps_per_second": 3.092, |
| "step": 609 |
| }, |
| { |
| "epoch": 7.011494252873563, |
| "grad_norm": 6.196229934692383, |
| "learning_rate": 8.514261387824607e-06, |
| "loss": 0.6034, |
| "step": 610 |
| }, |
| { |
| "epoch": 7.126436781609195, |
| "grad_norm": 20.647804260253906, |
| "learning_rate": 8.471690080885483e-06, |
| "loss": 0.5305, |
| "step": 620 |
| }, |
| { |
| "epoch": 7.241379310344827, |
| "grad_norm": 5.7887444496154785, |
| "learning_rate": 8.429118773946362e-06, |
| "loss": 0.5569, |
| "step": 630 |
| }, |
| { |
| "epoch": 7.35632183908046, |
| "grad_norm": 5.177017688751221, |
| "learning_rate": 8.386547467007238e-06, |
| "loss": 0.7564, |
| "step": 640 |
| }, |
| { |
| "epoch": 7.471264367816092, |
| "grad_norm": 5.901717662811279, |
| "learning_rate": 8.343976160068115e-06, |
| "loss": 0.5897, |
| "step": 650 |
| }, |
| { |
| "epoch": 7.586206896551724, |
| "grad_norm": 7.752755641937256, |
| "learning_rate": 8.301404853128992e-06, |
| "loss": 0.6331, |
| "step": 660 |
| }, |
| { |
| "epoch": 7.7011494252873565, |
| "grad_norm": 11.76085376739502, |
| "learning_rate": 8.258833546189868e-06, |
| "loss": 0.7341, |
| "step": 670 |
| }, |
| { |
| "epoch": 7.816091954022989, |
| "grad_norm": 5.332805156707764, |
| "learning_rate": 8.216262239250745e-06, |
| "loss": 0.5145, |
| "step": 680 |
| }, |
| { |
| "epoch": 7.931034482758621, |
| "grad_norm": 4.825902938842773, |
| "learning_rate": 8.173690932311623e-06, |
| "loss": 0.5122, |
| "step": 690 |
| }, |
| { |
| "epoch": 8.0, |
| "eval_accuracy": 0.7386363636363636, |
| "eval_loss": 0.5056865811347961, |
| "eval_runtime": 27.6759, |
| "eval_samples_per_second": 3.18, |
| "eval_steps_per_second": 3.18, |
| "step": 696 |
| }, |
| { |
| "epoch": 8.045977011494253, |
| "grad_norm": 16.281993865966797, |
| "learning_rate": 8.1311196253725e-06, |
| "loss": 0.7392, |
| "step": 700 |
| }, |
| { |
| "epoch": 8.160919540229886, |
| "grad_norm": 7.009737014770508, |
| "learning_rate": 8.088548318433377e-06, |
| "loss": 0.4734, |
| "step": 710 |
| }, |
| { |
| "epoch": 8.275862068965518, |
| "grad_norm": 15.383010864257812, |
| "learning_rate": 8.045977011494253e-06, |
| "loss": 0.5816, |
| "step": 720 |
| }, |
| { |
| "epoch": 8.39080459770115, |
| "grad_norm": 9.5961332321167, |
| "learning_rate": 8.00340570455513e-06, |
| "loss": 0.6211, |
| "step": 730 |
| }, |
| { |
| "epoch": 8.505747126436782, |
| "grad_norm": 23.88817024230957, |
| "learning_rate": 7.960834397616007e-06, |
| "loss": 0.6137, |
| "step": 740 |
| }, |
| { |
| "epoch": 8.620689655172415, |
| "grad_norm": 3.827619791030884, |
| "learning_rate": 7.918263090676885e-06, |
| "loss": 0.5017, |
| "step": 750 |
| }, |
| { |
| "epoch": 8.735632183908045, |
| "grad_norm": 8.33129596710205, |
| "learning_rate": 7.875691783737762e-06, |
| "loss": 0.7234, |
| "step": 760 |
| }, |
| { |
| "epoch": 8.850574712643677, |
| "grad_norm": 21.81205177307129, |
| "learning_rate": 7.833120476798638e-06, |
| "loss": 0.5398, |
| "step": 770 |
| }, |
| { |
| "epoch": 8.96551724137931, |
| "grad_norm": 18.47542381286621, |
| "learning_rate": 7.790549169859515e-06, |
| "loss": 0.722, |
| "step": 780 |
| }, |
| { |
| "epoch": 9.0, |
| "eval_accuracy": 0.7045454545454546, |
| "eval_loss": 0.49514129757881165, |
| "eval_runtime": 31.6681, |
| "eval_samples_per_second": 2.779, |
| "eval_steps_per_second": 2.779, |
| "step": 783 |
| }, |
| { |
| "epoch": 9.080459770114942, |
| "grad_norm": 4.60050630569458, |
| "learning_rate": 7.747977862920393e-06, |
| "loss": 0.5549, |
| "step": 790 |
| }, |
| { |
| "epoch": 9.195402298850574, |
| "grad_norm": 18.596378326416016, |
| "learning_rate": 7.70540655598127e-06, |
| "loss": 0.6108, |
| "step": 800 |
| }, |
| { |
| "epoch": 9.310344827586206, |
| "grad_norm": 7.222095012664795, |
| "learning_rate": 7.662835249042147e-06, |
| "loss": 0.4888, |
| "step": 810 |
| }, |
| { |
| "epoch": 9.425287356321839, |
| "grad_norm": 11.702841758728027, |
| "learning_rate": 7.620263942103023e-06, |
| "loss": 0.5889, |
| "step": 820 |
| }, |
| { |
| "epoch": 9.540229885057471, |
| "grad_norm": 13.947061538696289, |
| "learning_rate": 7.5776926351639e-06, |
| "loss": 0.4435, |
| "step": 830 |
| }, |
| { |
| "epoch": 9.655172413793103, |
| "grad_norm": 12.971500396728516, |
| "learning_rate": 7.535121328224777e-06, |
| "loss": 0.5966, |
| "step": 840 |
| }, |
| { |
| "epoch": 9.770114942528735, |
| "grad_norm": 3.2461862564086914, |
| "learning_rate": 7.492550021285654e-06, |
| "loss": 0.4836, |
| "step": 850 |
| }, |
| { |
| "epoch": 9.885057471264368, |
| "grad_norm": 21.08283042907715, |
| "learning_rate": 7.449978714346531e-06, |
| "loss": 0.6623, |
| "step": 860 |
| }, |
| { |
| "epoch": 10.0, |
| "grad_norm": 11.169931411743164, |
| "learning_rate": 7.4074074074074075e-06, |
| "loss": 0.5301, |
| "step": 870 |
| }, |
| { |
| "epoch": 10.0, |
| "eval_accuracy": 0.7727272727272727, |
| "eval_loss": 0.534870445728302, |
| "eval_runtime": 28.2427, |
| "eval_samples_per_second": 3.116, |
| "eval_steps_per_second": 3.116, |
| "step": 870 |
| }, |
| { |
| "epoch": 10.114942528735632, |
| "grad_norm": 13.018521308898926, |
| "learning_rate": 7.364836100468284e-06, |
| "loss": 0.553, |
| "step": 880 |
| }, |
| { |
| "epoch": 10.229885057471265, |
| "grad_norm": 3.460719347000122, |
| "learning_rate": 7.3222647935291625e-06, |
| "loss": 0.6001, |
| "step": 890 |
| }, |
| { |
| "epoch": 10.344827586206897, |
| "grad_norm": 17.81122398376465, |
| "learning_rate": 7.279693486590039e-06, |
| "loss": 0.568, |
| "step": 900 |
| }, |
| { |
| "epoch": 10.459770114942529, |
| "grad_norm": 10.0086669921875, |
| "learning_rate": 7.237122179650916e-06, |
| "loss": 0.5404, |
| "step": 910 |
| }, |
| { |
| "epoch": 10.574712643678161, |
| "grad_norm": 6.889206409454346, |
| "learning_rate": 7.194550872711793e-06, |
| "loss": 0.6149, |
| "step": 920 |
| }, |
| { |
| "epoch": 10.689655172413794, |
| "grad_norm": 5.562385082244873, |
| "learning_rate": 7.15197956577267e-06, |
| "loss": 0.5564, |
| "step": 930 |
| }, |
| { |
| "epoch": 10.804597701149426, |
| "grad_norm": 11.118896484375, |
| "learning_rate": 7.109408258833547e-06, |
| "loss": 0.7635, |
| "step": 940 |
| }, |
| { |
| "epoch": 10.919540229885058, |
| "grad_norm": 11.55297565460205, |
| "learning_rate": 7.066836951894423e-06, |
| "loss": 0.5881, |
| "step": 950 |
| }, |
| { |
| "epoch": 11.0, |
| "eval_accuracy": 0.7954545454545454, |
| "eval_loss": 0.4795035421848297, |
| "eval_runtime": 26.9434, |
| "eval_samples_per_second": 3.266, |
| "eval_steps_per_second": 3.266, |
| "step": 957 |
| }, |
| { |
| "epoch": 11.03448275862069, |
| "grad_norm": 13.425756454467773, |
| "learning_rate": 7.0242656449553e-06, |
| "loss": 0.5024, |
| "step": 960 |
| }, |
| { |
| "epoch": 11.149425287356323, |
| "grad_norm": 6.85365104675293, |
| "learning_rate": 6.9816943380161775e-06, |
| "loss": 0.6155, |
| "step": 970 |
| }, |
| { |
| "epoch": 11.264367816091955, |
| "grad_norm": 5.217472553253174, |
| "learning_rate": 6.939123031077054e-06, |
| "loss": 0.5833, |
| "step": 980 |
| }, |
| { |
| "epoch": 11.379310344827585, |
| "grad_norm": 11.186458587646484, |
| "learning_rate": 6.896551724137932e-06, |
| "loss": 0.5411, |
| "step": 990 |
| }, |
| { |
| "epoch": 11.494252873563218, |
| "grad_norm": 4.962896823883057, |
| "learning_rate": 6.853980417198809e-06, |
| "loss": 0.5027, |
| "step": 1000 |
| }, |
| { |
| "epoch": 11.60919540229885, |
| "grad_norm": 3.186685562133789, |
| "learning_rate": 6.811409110259686e-06, |
| "loss": 0.4292, |
| "step": 1010 |
| }, |
| { |
| "epoch": 11.724137931034482, |
| "grad_norm": 18.477642059326172, |
| "learning_rate": 6.7688378033205625e-06, |
| "loss": 0.6719, |
| "step": 1020 |
| }, |
| { |
| "epoch": 11.839080459770114, |
| "grad_norm": 8.846318244934082, |
| "learning_rate": 6.726266496381439e-06, |
| "loss": 0.6331, |
| "step": 1030 |
| }, |
| { |
| "epoch": 11.954022988505747, |
| "grad_norm": 7.125520706176758, |
| "learning_rate": 6.683695189442317e-06, |
| "loss": 0.5295, |
| "step": 1040 |
| }, |
| { |
| "epoch": 12.0, |
| "eval_accuracy": 0.7954545454545454, |
| "eval_loss": 0.4843308627605438, |
| "eval_runtime": 29.3019, |
| "eval_samples_per_second": 3.003, |
| "eval_steps_per_second": 3.003, |
| "step": 1044 |
| }, |
| { |
| "epoch": 12.068965517241379, |
| "grad_norm": 10.659321784973145, |
| "learning_rate": 6.641123882503193e-06, |
| "loss": 0.72, |
| "step": 1050 |
| }, |
| { |
| "epoch": 12.183908045977011, |
| "grad_norm": 11.522878646850586, |
| "learning_rate": 6.59855257556407e-06, |
| "loss": 0.4383, |
| "step": 1060 |
| }, |
| { |
| "epoch": 12.298850574712644, |
| "grad_norm": 9.809093475341797, |
| "learning_rate": 6.555981268624947e-06, |
| "loss": 0.5691, |
| "step": 1070 |
| }, |
| { |
| "epoch": 12.413793103448276, |
| "grad_norm": 12.34288215637207, |
| "learning_rate": 6.513409961685824e-06, |
| "loss": 0.5763, |
| "step": 1080 |
| }, |
| { |
| "epoch": 12.528735632183908, |
| "grad_norm": 16.13566017150879, |
| "learning_rate": 6.470838654746702e-06, |
| "loss": 0.6894, |
| "step": 1090 |
| }, |
| { |
| "epoch": 12.64367816091954, |
| "grad_norm": 8.791162490844727, |
| "learning_rate": 6.428267347807578e-06, |
| "loss": 0.6589, |
| "step": 1100 |
| }, |
| { |
| "epoch": 12.758620689655173, |
| "grad_norm": 16.64786720275879, |
| "learning_rate": 6.385696040868455e-06, |
| "loss": 0.4528, |
| "step": 1110 |
| }, |
| { |
| "epoch": 12.873563218390805, |
| "grad_norm": 12.981882095336914, |
| "learning_rate": 6.3431247339293325e-06, |
| "loss": 0.4846, |
| "step": 1120 |
| }, |
| { |
| "epoch": 12.988505747126437, |
| "grad_norm": 7.877754211425781, |
| "learning_rate": 6.300553426990209e-06, |
| "loss": 0.6252, |
| "step": 1130 |
| }, |
| { |
| "epoch": 13.0, |
| "eval_accuracy": 0.8068181818181818, |
| "eval_loss": 0.45293185114860535, |
| "eval_runtime": 28.0459, |
| "eval_samples_per_second": 3.138, |
| "eval_steps_per_second": 3.138, |
| "step": 1131 |
| }, |
| { |
| "epoch": 13.10344827586207, |
| "grad_norm": 14.569070816040039, |
| "learning_rate": 6.257982120051086e-06, |
| "loss": 0.697, |
| "step": 1140 |
| }, |
| { |
| "epoch": 13.218390804597702, |
| "grad_norm": 12.369892120361328, |
| "learning_rate": 6.2154108131119625e-06, |
| "loss": 0.5658, |
| "step": 1150 |
| }, |
| { |
| "epoch": 13.333333333333334, |
| "grad_norm": 2.994307518005371, |
| "learning_rate": 6.17283950617284e-06, |
| "loss": 0.5566, |
| "step": 1160 |
| }, |
| { |
| "epoch": 13.448275862068966, |
| "grad_norm": 11.81618881225586, |
| "learning_rate": 6.130268199233717e-06, |
| "loss": 0.4821, |
| "step": 1170 |
| }, |
| { |
| "epoch": 13.563218390804598, |
| "grad_norm": 4.969367027282715, |
| "learning_rate": 6.087696892294594e-06, |
| "loss": 0.5838, |
| "step": 1180 |
| }, |
| { |
| "epoch": 13.678160919540229, |
| "grad_norm": 4.62045955657959, |
| "learning_rate": 6.045125585355472e-06, |
| "loss": 0.441, |
| "step": 1190 |
| }, |
| { |
| "epoch": 13.793103448275861, |
| "grad_norm": 18.332008361816406, |
| "learning_rate": 6.002554278416348e-06, |
| "loss": 0.4801, |
| "step": 1200 |
| }, |
| { |
| "epoch": 13.908045977011493, |
| "grad_norm": 41.304527282714844, |
| "learning_rate": 5.959982971477225e-06, |
| "loss": 0.9347, |
| "step": 1210 |
| }, |
| { |
| "epoch": 14.0, |
| "eval_accuracy": 0.7954545454545454, |
| "eval_loss": 0.46699514985084534, |
| "eval_runtime": 31.9139, |
| "eval_samples_per_second": 2.757, |
| "eval_steps_per_second": 2.757, |
| "step": 1218 |
| }, |
| { |
| "epoch": 14.022988505747126, |
| "grad_norm": 12.79311752319336, |
| "learning_rate": 5.917411664538102e-06, |
| "loss": 0.5884, |
| "step": 1220 |
| }, |
| { |
| "epoch": 14.137931034482758, |
| "grad_norm": 6.272464275360107, |
| "learning_rate": 5.874840357598979e-06, |
| "loss": 0.6561, |
| "step": 1230 |
| }, |
| { |
| "epoch": 14.25287356321839, |
| "grad_norm": 22.40076446533203, |
| "learning_rate": 5.832269050659856e-06, |
| "loss": 0.5073, |
| "step": 1240 |
| }, |
| { |
| "epoch": 14.367816091954023, |
| "grad_norm": 4.779148578643799, |
| "learning_rate": 5.7896977437207325e-06, |
| "loss": 0.5587, |
| "step": 1250 |
| }, |
| { |
| "epoch": 14.482758620689655, |
| "grad_norm": 15.301518440246582, |
| "learning_rate": 5.747126436781609e-06, |
| "loss": 0.3783, |
| "step": 1260 |
| }, |
| { |
| "epoch": 14.597701149425287, |
| "grad_norm": 21.429903030395508, |
| "learning_rate": 5.704555129842486e-06, |
| "loss": 0.6206, |
| "step": 1270 |
| }, |
| { |
| "epoch": 14.71264367816092, |
| "grad_norm": 10.15910530090332, |
| "learning_rate": 5.661983822903364e-06, |
| "loss": 0.4738, |
| "step": 1280 |
| }, |
| { |
| "epoch": 14.827586206896552, |
| "grad_norm": 10.499754905700684, |
| "learning_rate": 5.619412515964241e-06, |
| "loss": 0.7927, |
| "step": 1290 |
| }, |
| { |
| "epoch": 14.942528735632184, |
| "grad_norm": 19.29802703857422, |
| "learning_rate": 5.5768412090251175e-06, |
| "loss": 0.5375, |
| "step": 1300 |
| }, |
| { |
| "epoch": 15.0, |
| "eval_accuracy": 0.8068181818181818, |
| "eval_loss": 0.44678133726119995, |
| "eval_runtime": 31.3219, |
| "eval_samples_per_second": 2.81, |
| "eval_steps_per_second": 2.81, |
| "step": 1305 |
| }, |
| { |
| "epoch": 15.057471264367816, |
| "grad_norm": 10.79425048828125, |
| "learning_rate": 5.534269902085995e-06, |
| "loss": 0.5573, |
| "step": 1310 |
| }, |
| { |
| "epoch": 15.172413793103448, |
| "grad_norm": 2.8401529788970947, |
| "learning_rate": 5.491698595146872e-06, |
| "loss": 0.5767, |
| "step": 1320 |
| }, |
| { |
| "epoch": 15.28735632183908, |
| "grad_norm": 19.042869567871094, |
| "learning_rate": 5.449127288207748e-06, |
| "loss": 0.3683, |
| "step": 1330 |
| }, |
| { |
| "epoch": 15.402298850574713, |
| "grad_norm": 5.155872821807861, |
| "learning_rate": 5.406555981268625e-06, |
| "loss": 0.6842, |
| "step": 1340 |
| }, |
| { |
| "epoch": 15.517241379310345, |
| "grad_norm": 3.896230936050415, |
| "learning_rate": 5.3639846743295025e-06, |
| "loss": 0.4031, |
| "step": 1350 |
| }, |
| { |
| "epoch": 15.632183908045977, |
| "grad_norm": 8.773719787597656, |
| "learning_rate": 5.321413367390379e-06, |
| "loss": 0.5851, |
| "step": 1360 |
| }, |
| { |
| "epoch": 15.74712643678161, |
| "grad_norm": 6.082934856414795, |
| "learning_rate": 5.278842060451256e-06, |
| "loss": 0.6512, |
| "step": 1370 |
| }, |
| { |
| "epoch": 15.862068965517242, |
| "grad_norm": 17.237377166748047, |
| "learning_rate": 5.236270753512134e-06, |
| "loss": 0.5887, |
| "step": 1380 |
| }, |
| { |
| "epoch": 15.977011494252874, |
| "grad_norm": 7.741724014282227, |
| "learning_rate": 5.193699446573011e-06, |
| "loss": 0.4811, |
| "step": 1390 |
| }, |
| { |
| "epoch": 16.0, |
| "eval_accuracy": 0.7840909090909091, |
| "eval_loss": 0.4911503493785858, |
| "eval_runtime": 31.5224, |
| "eval_samples_per_second": 2.792, |
| "eval_steps_per_second": 2.792, |
| "step": 1392 |
| }, |
| { |
| "epoch": 16.091954022988507, |
| "grad_norm": 7.818603992462158, |
| "learning_rate": 5.1511281396338875e-06, |
| "loss": 0.5285, |
| "step": 1400 |
| }, |
| { |
| "epoch": 16.20689655172414, |
| "grad_norm": 12.869338989257812, |
| "learning_rate": 5.108556832694764e-06, |
| "loss": 0.5544, |
| "step": 1410 |
| }, |
| { |
| "epoch": 16.32183908045977, |
| "grad_norm": 3.5446457862854004, |
| "learning_rate": 5.065985525755641e-06, |
| "loss": 0.5059, |
| "step": 1420 |
| }, |
| { |
| "epoch": 16.436781609195403, |
| "grad_norm": 17.69426918029785, |
| "learning_rate": 5.023414218816518e-06, |
| "loss": 0.4846, |
| "step": 1430 |
| }, |
| { |
| "epoch": 16.551724137931036, |
| "grad_norm": 22.48843002319336, |
| "learning_rate": 4.980842911877395e-06, |
| "loss": 0.5729, |
| "step": 1440 |
| }, |
| { |
| "epoch": 16.666666666666668, |
| "grad_norm": 20.473051071166992, |
| "learning_rate": 4.938271604938272e-06, |
| "loss": 0.5739, |
| "step": 1450 |
| }, |
| { |
| "epoch": 16.7816091954023, |
| "grad_norm": 17.0089168548584, |
| "learning_rate": 4.895700297999149e-06, |
| "loss": 0.7637, |
| "step": 1460 |
| }, |
| { |
| "epoch": 16.896551724137932, |
| "grad_norm": 33.70309829711914, |
| "learning_rate": 4.853128991060026e-06, |
| "loss": 0.5728, |
| "step": 1470 |
| }, |
| { |
| "epoch": 17.0, |
| "eval_accuracy": 0.8068181818181818, |
| "eval_loss": 0.4636143743991852, |
| "eval_runtime": 32.5312, |
| "eval_samples_per_second": 2.705, |
| "eval_steps_per_second": 2.705, |
| "step": 1479 |
| }, |
| { |
| "epoch": 17.011494252873565, |
| "grad_norm": 24.18787384033203, |
| "learning_rate": 4.8105576841209025e-06, |
| "loss": 0.6222, |
| "step": 1480 |
| }, |
| { |
| "epoch": 17.126436781609197, |
| "grad_norm": 16.18516731262207, |
| "learning_rate": 4.76798637718178e-06, |
| "loss": 0.7393, |
| "step": 1490 |
| }, |
| { |
| "epoch": 17.24137931034483, |
| "grad_norm": 12.046422958374023, |
| "learning_rate": 4.7254150702426575e-06, |
| "loss": 0.5402, |
| "step": 1500 |
| }, |
| { |
| "epoch": 17.35632183908046, |
| "grad_norm": 10.905467987060547, |
| "learning_rate": 4.682843763303534e-06, |
| "loss": 0.4967, |
| "step": 1510 |
| }, |
| { |
| "epoch": 17.47126436781609, |
| "grad_norm": 26.959226608276367, |
| "learning_rate": 4.640272456364411e-06, |
| "loss": 0.4604, |
| "step": 1520 |
| }, |
| { |
| "epoch": 17.586206896551722, |
| "grad_norm": 2.2738327980041504, |
| "learning_rate": 4.5977011494252875e-06, |
| "loss": 0.3532, |
| "step": 1530 |
| }, |
| { |
| "epoch": 17.701149425287355, |
| "grad_norm": 8.469172477722168, |
| "learning_rate": 4.555129842486164e-06, |
| "loss": 0.4417, |
| "step": 1540 |
| }, |
| { |
| "epoch": 17.816091954022987, |
| "grad_norm": 54.75737762451172, |
| "learning_rate": 4.512558535547042e-06, |
| "loss": 0.5184, |
| "step": 1550 |
| }, |
| { |
| "epoch": 17.93103448275862, |
| "grad_norm": 12.986672401428223, |
| "learning_rate": 4.469987228607919e-06, |
| "loss": 0.7997, |
| "step": 1560 |
| }, |
| { |
| "epoch": 18.0, |
| "eval_accuracy": 0.8068181818181818, |
| "eval_loss": 0.4630754590034485, |
| "eval_runtime": 28.2027, |
| "eval_samples_per_second": 3.12, |
| "eval_steps_per_second": 3.12, |
| "step": 1566 |
| }, |
| { |
| "epoch": 18.04597701149425, |
| "grad_norm": 7.84487771987915, |
| "learning_rate": 4.427415921668796e-06, |
| "loss": 0.5311, |
| "step": 1570 |
| }, |
| { |
| "epoch": 18.160919540229884, |
| "grad_norm": 9.586400032043457, |
| "learning_rate": 4.3848446147296725e-06, |
| "loss": 0.8135, |
| "step": 1580 |
| }, |
| { |
| "epoch": 18.275862068965516, |
| "grad_norm": 17.631561279296875, |
| "learning_rate": 4.342273307790549e-06, |
| "loss": 0.5206, |
| "step": 1590 |
| }, |
| { |
| "epoch": 18.39080459770115, |
| "grad_norm": 11.604460716247559, |
| "learning_rate": 4.299702000851427e-06, |
| "loss": 0.706, |
| "step": 1600 |
| }, |
| { |
| "epoch": 18.50574712643678, |
| "grad_norm": 19.73044204711914, |
| "learning_rate": 4.257130693912303e-06, |
| "loss": 0.5515, |
| "step": 1610 |
| }, |
| { |
| "epoch": 18.620689655172413, |
| "grad_norm": 8.63882064819336, |
| "learning_rate": 4.214559386973181e-06, |
| "loss": 0.5516, |
| "step": 1620 |
| }, |
| { |
| "epoch": 18.735632183908045, |
| "grad_norm": 3.5959954261779785, |
| "learning_rate": 4.1719880800340575e-06, |
| "loss": 0.4174, |
| "step": 1630 |
| }, |
| { |
| "epoch": 18.850574712643677, |
| "grad_norm": 20.151514053344727, |
| "learning_rate": 4.129416773094934e-06, |
| "loss": 0.3138, |
| "step": 1640 |
| }, |
| { |
| "epoch": 18.96551724137931, |
| "grad_norm": 32.77308654785156, |
| "learning_rate": 4.086845466155812e-06, |
| "loss": 0.4473, |
| "step": 1650 |
| }, |
| { |
| "epoch": 19.0, |
| "eval_accuracy": 0.8068181818181818, |
| "eval_loss": 0.4785434901714325, |
| "eval_runtime": 29.4548, |
| "eval_samples_per_second": 2.988, |
| "eval_steps_per_second": 2.988, |
| "step": 1653 |
| }, |
| { |
| "epoch": 19.080459770114942, |
| "grad_norm": 22.082836151123047, |
| "learning_rate": 4.044274159216688e-06, |
| "loss": 0.4738, |
| "step": 1660 |
| }, |
| { |
| "epoch": 19.195402298850574, |
| "grad_norm": 4.156625747680664, |
| "learning_rate": 4.001702852277565e-06, |
| "loss": 0.3854, |
| "step": 1670 |
| }, |
| { |
| "epoch": 19.310344827586206, |
| "grad_norm": 30.028457641601562, |
| "learning_rate": 3.9591315453384425e-06, |
| "loss": 0.8582, |
| "step": 1680 |
| }, |
| { |
| "epoch": 19.42528735632184, |
| "grad_norm": 7.054291248321533, |
| "learning_rate": 3.916560238399319e-06, |
| "loss": 0.5451, |
| "step": 1690 |
| }, |
| { |
| "epoch": 19.54022988505747, |
| "grad_norm": 21.516067504882812, |
| "learning_rate": 3.873988931460197e-06, |
| "loss": 0.545, |
| "step": 1700 |
| }, |
| { |
| "epoch": 19.655172413793103, |
| "grad_norm": 22.307510375976562, |
| "learning_rate": 3.831417624521073e-06, |
| "loss": 0.3196, |
| "step": 1710 |
| }, |
| { |
| "epoch": 19.770114942528735, |
| "grad_norm": 41.9045295715332, |
| "learning_rate": 3.78884631758195e-06, |
| "loss": 0.6646, |
| "step": 1720 |
| }, |
| { |
| "epoch": 19.885057471264368, |
| "grad_norm": 21.9637508392334, |
| "learning_rate": 3.746275010642827e-06, |
| "loss": 0.5936, |
| "step": 1730 |
| }, |
| { |
| "epoch": 20.0, |
| "grad_norm": 3.250168800354004, |
| "learning_rate": 3.7037037037037037e-06, |
| "loss": 0.4999, |
| "step": 1740 |
| }, |
| { |
| "epoch": 20.0, |
| "eval_accuracy": 0.8068181818181818, |
| "eval_loss": 0.5162312984466553, |
| "eval_runtime": 31.0481, |
| "eval_samples_per_second": 2.834, |
| "eval_steps_per_second": 2.834, |
| "step": 1740 |
| }, |
| { |
| "epoch": 20.114942528735632, |
| "grad_norm": 8.3432035446167, |
| "learning_rate": 3.6611323967645812e-06, |
| "loss": 0.4216, |
| "step": 1750 |
| }, |
| { |
| "epoch": 20.229885057471265, |
| "grad_norm": 12.663610458374023, |
| "learning_rate": 3.618561089825458e-06, |
| "loss": 0.2574, |
| "step": 1760 |
| }, |
| { |
| "epoch": 20.344827586206897, |
| "grad_norm": 13.331006050109863, |
| "learning_rate": 3.575989782886335e-06, |
| "loss": 0.5718, |
| "step": 1770 |
| }, |
| { |
| "epoch": 20.45977011494253, |
| "grad_norm": 42.82801818847656, |
| "learning_rate": 3.5334184759472117e-06, |
| "loss": 0.5883, |
| "step": 1780 |
| }, |
| { |
| "epoch": 20.57471264367816, |
| "grad_norm": 17.807729721069336, |
| "learning_rate": 3.4908471690080887e-06, |
| "loss": 0.5726, |
| "step": 1790 |
| }, |
| { |
| "epoch": 20.689655172413794, |
| "grad_norm": 1.3207107782363892, |
| "learning_rate": 3.448275862068966e-06, |
| "loss": 0.76, |
| "step": 1800 |
| }, |
| { |
| "epoch": 20.804597701149426, |
| "grad_norm": 21.988597869873047, |
| "learning_rate": 3.405704555129843e-06, |
| "loss": 0.5896, |
| "step": 1810 |
| }, |
| { |
| "epoch": 20.919540229885058, |
| "grad_norm": 3.045391082763672, |
| "learning_rate": 3.3631332481907196e-06, |
| "loss": 0.4572, |
| "step": 1820 |
| }, |
| { |
| "epoch": 21.0, |
| "eval_accuracy": 0.7954545454545454, |
| "eval_loss": 0.5742480158805847, |
| "eval_runtime": 30.886, |
| "eval_samples_per_second": 2.849, |
| "eval_steps_per_second": 2.849, |
| "step": 1827 |
| }, |
| { |
| "epoch": 21.03448275862069, |
| "grad_norm": 13.482502937316895, |
| "learning_rate": 3.3205619412515967e-06, |
| "loss": 0.7056, |
| "step": 1830 |
| }, |
| { |
| "epoch": 21.149425287356323, |
| "grad_norm": 2.777024745941162, |
| "learning_rate": 3.2779906343124733e-06, |
| "loss": 0.6454, |
| "step": 1840 |
| }, |
| { |
| "epoch": 21.264367816091955, |
| "grad_norm": 18.692609786987305, |
| "learning_rate": 3.235419327373351e-06, |
| "loss": 0.5329, |
| "step": 1850 |
| }, |
| { |
| "epoch": 21.379310344827587, |
| "grad_norm": 18.411115646362305, |
| "learning_rate": 3.1928480204342275e-06, |
| "loss": 0.4315, |
| "step": 1860 |
| }, |
| { |
| "epoch": 21.49425287356322, |
| "grad_norm": 18.076215744018555, |
| "learning_rate": 3.1502767134951046e-06, |
| "loss": 0.4398, |
| "step": 1870 |
| }, |
| { |
| "epoch": 21.60919540229885, |
| "grad_norm": 21.456092834472656, |
| "learning_rate": 3.1077054065559812e-06, |
| "loss": 0.7566, |
| "step": 1880 |
| }, |
| { |
| "epoch": 21.724137931034484, |
| "grad_norm": 13.617415428161621, |
| "learning_rate": 3.0651340996168583e-06, |
| "loss": 0.3964, |
| "step": 1890 |
| }, |
| { |
| "epoch": 21.839080459770116, |
| "grad_norm": 1.4688551425933838, |
| "learning_rate": 3.022562792677736e-06, |
| "loss": 0.6579, |
| "step": 1900 |
| }, |
| { |
| "epoch": 21.95402298850575, |
| "grad_norm": 5.6041998863220215, |
| "learning_rate": 2.9799914857386125e-06, |
| "loss": 0.2571, |
| "step": 1910 |
| }, |
| { |
| "epoch": 22.0, |
| "eval_accuracy": 0.7954545454545454, |
| "eval_loss": 0.5181425213813782, |
| "eval_runtime": 29.7097, |
| "eval_samples_per_second": 2.962, |
| "eval_steps_per_second": 2.962, |
| "step": 1914 |
| }, |
| { |
| "epoch": 22.06896551724138, |
| "grad_norm": 27.159460067749023, |
| "learning_rate": 2.9374201787994896e-06, |
| "loss": 0.6265, |
| "step": 1920 |
| }, |
| { |
| "epoch": 22.183908045977013, |
| "grad_norm": 19.524272918701172, |
| "learning_rate": 2.8948488718603662e-06, |
| "loss": 0.6449, |
| "step": 1930 |
| }, |
| { |
| "epoch": 22.298850574712645, |
| "grad_norm": 33.63201141357422, |
| "learning_rate": 2.852277564921243e-06, |
| "loss": 0.8293, |
| "step": 1940 |
| }, |
| { |
| "epoch": 22.413793103448278, |
| "grad_norm": 9.787166595458984, |
| "learning_rate": 2.8097062579821204e-06, |
| "loss": 0.3664, |
| "step": 1950 |
| }, |
| { |
| "epoch": 22.52873563218391, |
| "grad_norm": 16.723251342773438, |
| "learning_rate": 2.7671349510429975e-06, |
| "loss": 0.7581, |
| "step": 1960 |
| }, |
| { |
| "epoch": 22.64367816091954, |
| "grad_norm": 18.71817970275879, |
| "learning_rate": 2.724563644103874e-06, |
| "loss": 0.4767, |
| "step": 1970 |
| }, |
| { |
| "epoch": 22.75862068965517, |
| "grad_norm": 12.063042640686035, |
| "learning_rate": 2.6819923371647512e-06, |
| "loss": 0.4303, |
| "step": 1980 |
| }, |
| { |
| "epoch": 22.873563218390803, |
| "grad_norm": 18.196271896362305, |
| "learning_rate": 2.639421030225628e-06, |
| "loss": 0.5769, |
| "step": 1990 |
| }, |
| { |
| "epoch": 22.988505747126435, |
| "grad_norm": 28.725513458251953, |
| "learning_rate": 2.5968497232865054e-06, |
| "loss": 0.5085, |
| "step": 2000 |
| }, |
| { |
| "epoch": 23.0, |
| "eval_accuracy": 0.7954545454545454, |
| "eval_loss": 0.493715763092041, |
| "eval_runtime": 28.3428, |
| "eval_samples_per_second": 3.105, |
| "eval_steps_per_second": 3.105, |
| "step": 2001 |
| }, |
| { |
| "epoch": 23.103448275862068, |
| "grad_norm": 10.256953239440918, |
| "learning_rate": 2.554278416347382e-06, |
| "loss": 0.8933, |
| "step": 2010 |
| }, |
| { |
| "epoch": 23.2183908045977, |
| "grad_norm": 13.527802467346191, |
| "learning_rate": 2.511707109408259e-06, |
| "loss": 0.4102, |
| "step": 2020 |
| }, |
| { |
| "epoch": 23.333333333333332, |
| "grad_norm": 7.302803993225098, |
| "learning_rate": 2.469135802469136e-06, |
| "loss": 0.3981, |
| "step": 2030 |
| }, |
| { |
| "epoch": 23.448275862068964, |
| "grad_norm": 17.27892303466797, |
| "learning_rate": 2.426564495530013e-06, |
| "loss": 0.5047, |
| "step": 2040 |
| }, |
| { |
| "epoch": 23.563218390804597, |
| "grad_norm": 7.622089385986328, |
| "learning_rate": 2.38399318859089e-06, |
| "loss": 0.4652, |
| "step": 2050 |
| }, |
| { |
| "epoch": 23.67816091954023, |
| "grad_norm": 68.21133422851562, |
| "learning_rate": 2.341421881651767e-06, |
| "loss": 0.3951, |
| "step": 2060 |
| }, |
| { |
| "epoch": 23.79310344827586, |
| "grad_norm": 11.866617202758789, |
| "learning_rate": 2.2988505747126437e-06, |
| "loss": 0.6791, |
| "step": 2070 |
| }, |
| { |
| "epoch": 23.908045977011493, |
| "grad_norm": 23.61830711364746, |
| "learning_rate": 2.256279267773521e-06, |
| "loss": 0.7698, |
| "step": 2080 |
| }, |
| { |
| "epoch": 24.0, |
| "eval_accuracy": 0.7954545454545454, |
| "eval_loss": 0.47640088200569153, |
| "eval_runtime": 29.1906, |
| "eval_samples_per_second": 3.015, |
| "eval_steps_per_second": 3.015, |
| "step": 2088 |
| }, |
| { |
| "epoch": 24.022988505747126, |
| "grad_norm": 12.897066116333008, |
| "learning_rate": 2.213707960834398e-06, |
| "loss": 0.4819, |
| "step": 2090 |
| }, |
| { |
| "epoch": 24.137931034482758, |
| "grad_norm": 13.089888572692871, |
| "learning_rate": 2.1711366538952746e-06, |
| "loss": 0.3243, |
| "step": 2100 |
| }, |
| { |
| "epoch": 24.25287356321839, |
| "grad_norm": 28.530624389648438, |
| "learning_rate": 2.1285653469561517e-06, |
| "loss": 0.4375, |
| "step": 2110 |
| }, |
| { |
| "epoch": 24.367816091954023, |
| "grad_norm": 17.30528450012207, |
| "learning_rate": 2.0859940400170287e-06, |
| "loss": 0.6018, |
| "step": 2120 |
| }, |
| { |
| "epoch": 24.482758620689655, |
| "grad_norm": 13.4981050491333, |
| "learning_rate": 2.043422733077906e-06, |
| "loss": 0.4703, |
| "step": 2130 |
| }, |
| { |
| "epoch": 24.597701149425287, |
| "grad_norm": 26.453964233398438, |
| "learning_rate": 2.0008514261387825e-06, |
| "loss": 0.4469, |
| "step": 2140 |
| }, |
| { |
| "epoch": 24.71264367816092, |
| "grad_norm": 7.190347194671631, |
| "learning_rate": 1.9582801191996596e-06, |
| "loss": 0.3658, |
| "step": 2150 |
| }, |
| { |
| "epoch": 24.82758620689655, |
| "grad_norm": 33.62263870239258, |
| "learning_rate": 1.9157088122605367e-06, |
| "loss": 0.886, |
| "step": 2160 |
| }, |
| { |
| "epoch": 24.942528735632184, |
| "grad_norm": 30.23230743408203, |
| "learning_rate": 1.8731375053214135e-06, |
| "loss": 0.558, |
| "step": 2170 |
| }, |
| { |
| "epoch": 25.0, |
| "eval_accuracy": 0.8068181818181818, |
| "eval_loss": 0.47421351075172424, |
| "eval_runtime": 29.9363, |
| "eval_samples_per_second": 2.94, |
| "eval_steps_per_second": 2.94, |
| "step": 2175 |
| }, |
| { |
| "epoch": 25.057471264367816, |
| "grad_norm": 9.926159858703613, |
| "learning_rate": 1.8305661983822906e-06, |
| "loss": 0.5362, |
| "step": 2180 |
| }, |
| { |
| "epoch": 25.17241379310345, |
| "grad_norm": 18.617950439453125, |
| "learning_rate": 1.7879948914431675e-06, |
| "loss": 0.4583, |
| "step": 2190 |
| }, |
| { |
| "epoch": 25.28735632183908, |
| "grad_norm": 10.60520076751709, |
| "learning_rate": 1.7454235845040444e-06, |
| "loss": 0.4264, |
| "step": 2200 |
| }, |
| { |
| "epoch": 25.402298850574713, |
| "grad_norm": 44.09375762939453, |
| "learning_rate": 1.7028522775649215e-06, |
| "loss": 0.5265, |
| "step": 2210 |
| }, |
| { |
| "epoch": 25.517241379310345, |
| "grad_norm": 9.441521644592285, |
| "learning_rate": 1.6602809706257983e-06, |
| "loss": 0.3548, |
| "step": 2220 |
| }, |
| { |
| "epoch": 25.632183908045977, |
| "grad_norm": 26.522968292236328, |
| "learning_rate": 1.6177096636866754e-06, |
| "loss": 0.495, |
| "step": 2230 |
| }, |
| { |
| "epoch": 25.74712643678161, |
| "grad_norm": 3.981255531311035, |
| "learning_rate": 1.5751383567475523e-06, |
| "loss": 0.2129, |
| "step": 2240 |
| }, |
| { |
| "epoch": 25.862068965517242, |
| "grad_norm": 14.197047233581543, |
| "learning_rate": 1.5325670498084292e-06, |
| "loss": 0.8719, |
| "step": 2250 |
| }, |
| { |
| "epoch": 25.977011494252874, |
| "grad_norm": 32.87205505371094, |
| "learning_rate": 1.4899957428693062e-06, |
| "loss": 0.5462, |
| "step": 2260 |
| }, |
| { |
| "epoch": 26.0, |
| "eval_accuracy": 0.7840909090909091, |
| "eval_loss": 0.5320395827293396, |
| "eval_runtime": 29.7505, |
| "eval_samples_per_second": 2.958, |
| "eval_steps_per_second": 2.958, |
| "step": 2262 |
| }, |
| { |
| "epoch": 26.091954022988507, |
| "grad_norm": 26.975196838378906, |
| "learning_rate": 1.4474244359301831e-06, |
| "loss": 0.3323, |
| "step": 2270 |
| }, |
| { |
| "epoch": 26.20689655172414, |
| "grad_norm": 8.834298133850098, |
| "learning_rate": 1.4048531289910602e-06, |
| "loss": 0.5116, |
| "step": 2280 |
| }, |
| { |
| "epoch": 26.32183908045977, |
| "grad_norm": 3.3154473304748535, |
| "learning_rate": 1.362281822051937e-06, |
| "loss": 0.3695, |
| "step": 2290 |
| }, |
| { |
| "epoch": 26.436781609195403, |
| "grad_norm": 20.8482666015625, |
| "learning_rate": 1.319710515112814e-06, |
| "loss": 0.5002, |
| "step": 2300 |
| }, |
| { |
| "epoch": 26.551724137931036, |
| "grad_norm": 27.408367156982422, |
| "learning_rate": 1.277139208173691e-06, |
| "loss": 0.7037, |
| "step": 2310 |
| }, |
| { |
| "epoch": 26.666666666666668, |
| "grad_norm": 22.604793548583984, |
| "learning_rate": 1.234567901234568e-06, |
| "loss": 0.4372, |
| "step": 2320 |
| }, |
| { |
| "epoch": 26.7816091954023, |
| "grad_norm": 15.776363372802734, |
| "learning_rate": 1.191996594295445e-06, |
| "loss": 0.5509, |
| "step": 2330 |
| }, |
| { |
| "epoch": 26.896551724137932, |
| "grad_norm": 10.753557205200195, |
| "learning_rate": 1.1494252873563219e-06, |
| "loss": 0.5218, |
| "step": 2340 |
| }, |
| { |
| "epoch": 27.0, |
| "eval_accuracy": 0.7840909090909091, |
| "eval_loss": 0.5298219919204712, |
| "eval_runtime": 29.2599, |
| "eval_samples_per_second": 3.008, |
| "eval_steps_per_second": 3.008, |
| "step": 2349 |
| }, |
| { |
| "epoch": 27.011494252873565, |
| "grad_norm": 60.020790100097656, |
| "learning_rate": 1.106853980417199e-06, |
| "loss": 0.4956, |
| "step": 2350 |
| }, |
| { |
| "epoch": 27.126436781609197, |
| "grad_norm": 15.225430488586426, |
| "learning_rate": 1.0642826734780758e-06, |
| "loss": 0.3614, |
| "step": 2360 |
| }, |
| { |
| "epoch": 27.24137931034483, |
| "grad_norm": 15.540806770324707, |
| "learning_rate": 1.021711366538953e-06, |
| "loss": 0.6815, |
| "step": 2370 |
| }, |
| { |
| "epoch": 27.35632183908046, |
| "grad_norm": 28.59101676940918, |
| "learning_rate": 9.791400595998298e-07, |
| "loss": 0.4379, |
| "step": 2380 |
| }, |
| { |
| "epoch": 27.47126436781609, |
| "grad_norm": 32.09828186035156, |
| "learning_rate": 9.365687526607068e-07, |
| "loss": 0.6162, |
| "step": 2390 |
| }, |
| { |
| "epoch": 27.586206896551722, |
| "grad_norm": 34.99262237548828, |
| "learning_rate": 8.939974457215837e-07, |
| "loss": 0.6311, |
| "step": 2400 |
| }, |
| { |
| "epoch": 27.701149425287355, |
| "grad_norm": 31.068824768066406, |
| "learning_rate": 8.514261387824607e-07, |
| "loss": 0.3782, |
| "step": 2410 |
| }, |
| { |
| "epoch": 27.816091954022987, |
| "grad_norm": 5.51350212097168, |
| "learning_rate": 8.088548318433377e-07, |
| "loss": 0.4784, |
| "step": 2420 |
| }, |
| { |
| "epoch": 27.93103448275862, |
| "grad_norm": 5.18095588684082, |
| "learning_rate": 7.662835249042146e-07, |
| "loss": 0.5228, |
| "step": 2430 |
| }, |
| { |
| "epoch": 28.0, |
| "eval_accuracy": 0.7954545454545454, |
| "eval_loss": 0.5181891918182373, |
| "eval_runtime": 28.0972, |
| "eval_samples_per_second": 3.132, |
| "eval_steps_per_second": 3.132, |
| "step": 2436 |
| }, |
| { |
| "epoch": 28.04597701149425, |
| "grad_norm": 19.53252410888672, |
| "learning_rate": 7.237122179650916e-07, |
| "loss": 0.3631, |
| "step": 2440 |
| }, |
| { |
| "epoch": 28.160919540229884, |
| "grad_norm": 3.3147804737091064, |
| "learning_rate": 6.811409110259685e-07, |
| "loss": 0.418, |
| "step": 2450 |
| }, |
| { |
| "epoch": 28.275862068965516, |
| "grad_norm": 15.383197784423828, |
| "learning_rate": 6.385696040868455e-07, |
| "loss": 0.5001, |
| "step": 2460 |
| }, |
| { |
| "epoch": 28.39080459770115, |
| "grad_norm": 12.98304271697998, |
| "learning_rate": 5.959982971477225e-07, |
| "loss": 0.3665, |
| "step": 2470 |
| }, |
| { |
| "epoch": 28.50574712643678, |
| "grad_norm": 20.850807189941406, |
| "learning_rate": 5.534269902085995e-07, |
| "loss": 0.6822, |
| "step": 2480 |
| }, |
| { |
| "epoch": 28.620689655172413, |
| "grad_norm": 40.53119659423828, |
| "learning_rate": 5.108556832694765e-07, |
| "loss": 0.6982, |
| "step": 2490 |
| }, |
| { |
| "epoch": 28.735632183908045, |
| "grad_norm": 12.433013916015625, |
| "learning_rate": 4.682843763303534e-07, |
| "loss": 0.3787, |
| "step": 2500 |
| }, |
| { |
| "epoch": 28.850574712643677, |
| "grad_norm": 14.456171035766602, |
| "learning_rate": 4.2571306939123036e-07, |
| "loss": 0.6071, |
| "step": 2510 |
| }, |
| { |
| "epoch": 28.96551724137931, |
| "grad_norm": 17.83165168762207, |
| "learning_rate": 3.831417624521073e-07, |
| "loss": 0.5787, |
| "step": 2520 |
| }, |
| { |
| "epoch": 29.0, |
| "eval_accuracy": 0.8068181818181818, |
| "eval_loss": 0.5103618502616882, |
| "eval_runtime": 33.9854, |
| "eval_samples_per_second": 2.589, |
| "eval_steps_per_second": 2.589, |
| "step": 2523 |
| }, |
| { |
| "epoch": 29.080459770114942, |
| "grad_norm": 1.877414584159851, |
| "learning_rate": 3.4057045551298427e-07, |
| "loss": 0.5482, |
| "step": 2530 |
| }, |
| { |
| "epoch": 29.195402298850574, |
| "grad_norm": 21.962305068969727, |
| "learning_rate": 2.9799914857386125e-07, |
| "loss": 0.4888, |
| "step": 2540 |
| }, |
| { |
| "epoch": 29.310344827586206, |
| "grad_norm": 19.235275268554688, |
| "learning_rate": 2.5542784163473823e-07, |
| "loss": 0.3151, |
| "step": 2550 |
| }, |
| { |
| "epoch": 29.42528735632184, |
| "grad_norm": 11.410493850708008, |
| "learning_rate": 2.1285653469561518e-07, |
| "loss": 0.6639, |
| "step": 2560 |
| }, |
| { |
| "epoch": 29.54022988505747, |
| "grad_norm": 2.77133846282959, |
| "learning_rate": 1.7028522775649214e-07, |
| "loss": 0.4953, |
| "step": 2570 |
| }, |
| { |
| "epoch": 29.655172413793103, |
| "grad_norm": 16.913911819458008, |
| "learning_rate": 1.2771392081736911e-07, |
| "loss": 0.7018, |
| "step": 2580 |
| }, |
| { |
| "epoch": 29.770114942528735, |
| "grad_norm": 25.734962463378906, |
| "learning_rate": 8.514261387824607e-08, |
| "loss": 0.6178, |
| "step": 2590 |
| }, |
| { |
| "epoch": 29.885057471264368, |
| "grad_norm": 1.1237534284591675, |
| "learning_rate": 4.2571306939123034e-08, |
| "loss": 0.2741, |
| "step": 2600 |
| }, |
| { |
| "epoch": 30.0, |
| "grad_norm": 19.100055694580078, |
| "learning_rate": 0.0, |
| "loss": 0.7511, |
| "step": 2610 |
| }, |
| { |
| "epoch": 30.0, |
| "eval_accuracy": 0.8068181818181818, |
| "eval_loss": 0.5151580572128296, |
| "eval_runtime": 30.6349, |
| "eval_samples_per_second": 2.873, |
| "eval_steps_per_second": 2.873, |
| "step": 2610 |
| }, |
| { |
| "epoch": 30.0, |
| "step": 2610, |
| "total_flos": 1.0701682205471539e+18, |
| "train_loss": 0.5718246315630916, |
| "train_runtime": 5890.3645, |
| "train_samples_per_second": 1.772, |
| "train_steps_per_second": 0.443 |
| } |
| ], |
| "logging_steps": 10, |
| "max_steps": 2610, |
| "num_input_tokens_seen": 0, |
| "num_train_epochs": 30, |
| "save_steps": 500, |
| "stateful_callbacks": { |
| "TrainerControl": { |
| "args": { |
| "should_epoch_stop": false, |
| "should_evaluate": false, |
| "should_log": false, |
| "should_save": true, |
| "should_training_stop": true |
| }, |
| "attributes": {} |
| } |
| }, |
| "total_flos": 1.0701682205471539e+18, |
| "train_batch_size": 1, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|