{ "best_global_step": 37500, "best_metric": 2.956035614013672, "best_model_checkpoint": "/home/u111169/wrkdir/mgh/aav/checkpoints/esm-2_8m-thermo_final_0_2_valid/checkpoint-37500", "epoch": 297.6221335992024, "eval_steps": 100, "global_step": 37500, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.07976071784646062, "grad_norm": 573.4181518554688, "learning_rate": 6e-08, "loss": 68.7784, "step": 10 }, { "epoch": 0.15952143569292124, "grad_norm": 634.6896362304688, "learning_rate": 1.6e-07, "loss": 69.1553, "step": 20 }, { "epoch": 0.23928215353938184, "grad_norm": 715.6723022460938, "learning_rate": 2.5e-07, "loss": 65.9578, "step": 30 }, { "epoch": 0.3190428713858425, "grad_norm": 547.8534545898438, "learning_rate": 3.5e-07, "loss": 67.8186, "step": 40 }, { "epoch": 0.3988035892323031, "grad_norm": 683.612548828125, "learning_rate": 4.5e-07, "loss": 60.4619, "step": 50 }, { "epoch": 0.4785643070787637, "grad_norm": 555.7923583984375, "learning_rate": 4.999992104320635e-07, "loss": 62.6769, "step": 60 }, { "epoch": 0.5583250249252243, "grad_norm": 1337.35595703125, "learning_rate": 4.999928939184958e-07, "loss": 59.9945, "step": 70 }, { "epoch": 0.638085742771685, "grad_norm": 843.6145629882812, "learning_rate": 4.999818085657911e-07, "loss": 55.8398, "step": 80 }, { "epoch": 0.7178464606181456, "grad_norm": 228.58128356933594, "learning_rate": 4.999634912480267e-07, "loss": 49.1213, "step": 90 }, { "epoch": 0.7976071784646062, "grad_norm": 701.4474487304688, "learning_rate": 4.999388583191803e-07, "loss": 54.0468, "step": 100 }, { "epoch": 0.7976071784646062, "eval_loss": 6.189545154571533, "eval_mae": 1.8340115547180176, "eval_mse": 6.189545154571533, "eval_r2": -0.8648797273635864, "eval_rmse": 2.487879650339126, "eval_runtime": 9.8346, "eval_samples_per_second": 407.846, "eval_steps_per_second": 12.812, "step": 100 }, { "epoch": 0.8773678963110668, "grad_norm": 479.60687255859375, "learning_rate": 4.999079104016307e-07, "loss": 48.8717, "step": 110 }, { "epoch": 0.9571286141575274, "grad_norm": 171.86424255371094, "learning_rate": 4.99870648277312e-07, "loss": 46.4531, "step": 120 }, { "epoch": 1.0319042871385842, "grad_norm": 294.8586730957031, "learning_rate": 4.998270728876944e-07, "loss": 46.563, "step": 130 }, { "epoch": 1.111665004985045, "grad_norm": 288.3103332519531, "learning_rate": 4.997771853337591e-07, "loss": 47.1538, "step": 140 }, { "epoch": 1.1914257228315055, "grad_norm": 342.15130615234375, "learning_rate": 4.997209868759719e-07, "loss": 47.3056, "step": 150 }, { "epoch": 1.271186440677966, "grad_norm": 403.61468505859375, "learning_rate": 4.996584789342507e-07, "loss": 48.6825, "step": 160 }, { "epoch": 1.3509471585244266, "grad_norm": 587.3318481445312, "learning_rate": 4.995896630879293e-07, "loss": 43.8702, "step": 170 }, { "epoch": 1.4307078763708874, "grad_norm": 380.1310729980469, "learning_rate": 4.995145410757182e-07, "loss": 40.8683, "step": 180 }, { "epoch": 1.510468594217348, "grad_norm": 415.7288818359375, "learning_rate": 4.994331147956603e-07, "loss": 41.5049, "step": 190 }, { "epoch": 1.5902293120638085, "grad_norm": 236.02073669433594, "learning_rate": 4.993453863050829e-07, "loss": 39.8648, "step": 200 }, { "epoch": 1.5902293120638085, "eval_loss": 5.156213283538818, "eval_mae": 1.6833100318908691, "eval_mse": 5.156213760375977, "eval_r2": -0.5535420179367065, "eval_rmse": 2.2707297858565156, "eval_runtime": 9.5334, "eval_samples_per_second": 420.733, "eval_steps_per_second": 13.217, "step": 200 }, { "epoch": 1.6699900299102692, "grad_norm": 161.90733337402344, "learning_rate": 4.992513578205457e-07, "loss": 38.1656, "step": 210 }, { "epoch": 1.7497507477567298, "grad_norm": 197.21080017089844, "learning_rate": 4.991510317177851e-07, "loss": 41.23, "step": 220 }, { "epoch": 1.8295114656031903, "grad_norm": 457.8110656738281, "learning_rate": 4.990444105316537e-07, "loss": 36.2166, "step": 230 }, { "epoch": 1.909272183449651, "grad_norm": 281.78924560546875, "learning_rate": 4.989314969560569e-07, "loss": 39.6083, "step": 240 }, { "epoch": 1.9890329012961117, "grad_norm": 248.00437927246094, "learning_rate": 4.988122938438841e-07, "loss": 36.4834, "step": 250 }, { "epoch": 2.0638085742771684, "grad_norm": 286.20037841796875, "learning_rate": 4.986868042069371e-07, "loss": 32.0955, "step": 260 }, { "epoch": 2.143569292123629, "grad_norm": 315.5536193847656, "learning_rate": 4.985550312158541e-07, "loss": 35.8951, "step": 270 }, { "epoch": 2.22333000997009, "grad_norm": 406.2935791015625, "learning_rate": 4.984169782000289e-07, "loss": 33.277, "step": 280 }, { "epoch": 2.30309072781655, "grad_norm": 347.6233215332031, "learning_rate": 4.982726486475276e-07, "loss": 31.5701, "step": 290 }, { "epoch": 2.382851445663011, "grad_norm": 145.79922485351562, "learning_rate": 4.98122046205e-07, "loss": 32.1125, "step": 300 }, { "epoch": 2.382851445663011, "eval_loss": 3.720351219177246, "eval_mae": 1.4502124786376953, "eval_mse": 3.720351457595825, "eval_r2": -0.12092375755310059, "eval_rmse": 1.928821261184101, "eval_runtime": 9.5934, "eval_samples_per_second": 418.101, "eval_steps_per_second": 13.134, "step": 300 }, { "epoch": 2.4626121635094718, "grad_norm": 172.51040649414062, "learning_rate": 4.979651746775878e-07, "loss": 28.2744, "step": 310 }, { "epoch": 2.542372881355932, "grad_norm": 184.6218719482422, "learning_rate": 4.978020380288278e-07, "loss": 27.2333, "step": 320 }, { "epoch": 2.622133599202393, "grad_norm": 84.76827239990234, "learning_rate": 4.976326403805526e-07, "loss": 31.0476, "step": 330 }, { "epoch": 2.701894317048853, "grad_norm": 125.47854614257812, "learning_rate": 4.974569860127857e-07, "loss": 28.7382, "step": 340 }, { "epoch": 2.781655034895314, "grad_norm": 164.69036865234375, "learning_rate": 4.972750793636339e-07, "loss": 26.0502, "step": 350 }, { "epoch": 2.8614157527417747, "grad_norm": 76.86050415039062, "learning_rate": 4.970869250291753e-07, "loss": 30.6326, "step": 360 }, { "epoch": 2.9411764705882355, "grad_norm": 90.23853302001953, "learning_rate": 4.968925277633422e-07, "loss": 30.7144, "step": 370 }, { "epoch": 3.015952143569292, "grad_norm": 106.75017547607422, "learning_rate": 4.966918924778022e-07, "loss": 25.1257, "step": 380 }, { "epoch": 3.0957128614157527, "grad_norm": 147.7263641357422, "learning_rate": 4.964850242418333e-07, "loss": 28.2507, "step": 390 }, { "epoch": 3.1754735792622135, "grad_norm": 94.30587768554688, "learning_rate": 4.962719282821961e-07, "loss": 29.4349, "step": 400 }, { "epoch": 3.1754735792622135, "eval_loss": 3.5484604835510254, "eval_mae": 1.4172353744506836, "eval_mse": 3.5484604835510254, "eval_r2": -0.06913375854492188, "eval_rmse": 1.883735778592907, "eval_runtime": 9.2043, "eval_samples_per_second": 435.777, "eval_steps_per_second": 13.689, "step": 400 }, { "epoch": 3.255234297108674, "grad_norm": 136.9176788330078, "learning_rate": 4.960526099830015e-07, "loss": 28.2686, "step": 410 }, { "epoch": 3.3349950149551346, "grad_norm": 35.45146560668945, "learning_rate": 4.958270748855751e-07, "loss": 29.1841, "step": 420 }, { "epoch": 3.4147557328015954, "grad_norm": 157.3170928955078, "learning_rate": 4.955953286883171e-07, "loss": 28.8563, "step": 430 }, { "epoch": 3.4945164506480557, "grad_norm": 134.45327758789062, "learning_rate": 4.953573772465578e-07, "loss": 29.6298, "step": 440 }, { "epoch": 3.5742771684945165, "grad_norm": 55.60561752319336, "learning_rate": 4.951132265724103e-07, "loss": 27.7373, "step": 450 }, { "epoch": 3.6540378863409773, "grad_norm": 128.3103485107422, "learning_rate": 4.948628828346182e-07, "loss": 29.6246, "step": 460 }, { "epoch": 3.7337986041874376, "grad_norm": 115.28073120117188, "learning_rate": 4.946063523584002e-07, "loss": 27.5998, "step": 470 }, { "epoch": 3.8135593220338984, "grad_norm": 52.74937057495117, "learning_rate": 4.943436416252895e-07, "loss": 26.0449, "step": 480 }, { "epoch": 3.8933200398803587, "grad_norm": 200.8792724609375, "learning_rate": 4.940747572729705e-07, "loss": 27.9922, "step": 490 }, { "epoch": 3.9730807577268195, "grad_norm": 82.11042022705078, "learning_rate": 4.937997060951117e-07, "loss": 28.0655, "step": 500 }, { "epoch": 3.9730807577268195, "eval_loss": 3.4706692695617676, "eval_mae": 1.3996766805648804, "eval_mse": 3.4706692695617676, "eval_r2": -0.04569566249847412, "eval_rmse": 1.8629732337212384, "eval_runtime": 9.2717, "eval_samples_per_second": 432.606, "eval_steps_per_second": 13.59, "step": 500 }, { "epoch": 4.047856430707876, "grad_norm": 37.33889389038086, "learning_rate": 4.935184950411928e-07, "loss": 26.7186, "step": 510 }, { "epoch": 4.127617148554337, "grad_norm": 50.46540069580078, "learning_rate": 4.932311312163299e-07, "loss": 28.5682, "step": 520 }, { "epoch": 4.2073778664007975, "grad_norm": 61.525657653808594, "learning_rate": 4.929376218810962e-07, "loss": 27.7542, "step": 530 }, { "epoch": 4.287138584247258, "grad_norm": 69.10831451416016, "learning_rate": 4.926379744513378e-07, "loss": 31.3671, "step": 540 }, { "epoch": 4.366899302093719, "grad_norm": 65.49845123291016, "learning_rate": 4.923321964979867e-07, "loss": 25.1761, "step": 550 }, { "epoch": 4.44666001994018, "grad_norm": 77.61808776855469, "learning_rate": 4.920202957468699e-07, "loss": 26.5478, "step": 560 }, { "epoch": 4.526420737786641, "grad_norm": 73.5521240234375, "learning_rate": 4.917022800785133e-07, "loss": 26.3101, "step": 570 }, { "epoch": 4.6061814556331, "grad_norm": 88.35464477539062, "learning_rate": 4.913781575279436e-07, "loss": 27.0035, "step": 580 }, { "epoch": 4.685942173479561, "grad_norm": 43.666046142578125, "learning_rate": 4.910479362844847e-07, "loss": 26.8132, "step": 590 }, { "epoch": 4.765702891326022, "grad_norm": 67.6700439453125, "learning_rate": 4.907116246915507e-07, "loss": 30.1273, "step": 600 }, { "epoch": 4.765702891326022, "eval_loss": 3.4153451919555664, "eval_mae": 1.3900654315948486, "eval_mse": 3.4153451919555664, "eval_r2": -0.02902674674987793, "eval_rmse": 1.8480652564115712, "eval_runtime": 9.5716, "eval_samples_per_second": 419.053, "eval_steps_per_second": 13.164, "step": 600 }, { "epoch": 4.845463609172483, "grad_norm": 78.9489974975586, "learning_rate": 4.903692312464353e-07, "loss": 27.8143, "step": 610 }, { "epoch": 4.9252243270189435, "grad_norm": 94.63184356689453, "learning_rate": 4.900207646000974e-07, "loss": 27.2606, "step": 620 }, { "epoch": 5.0, "grad_norm": 58.72568130493164, "learning_rate": 4.896662335569419e-07, "loss": 24.7743, "step": 630 }, { "epoch": 5.079760717846461, "grad_norm": 67.03953552246094, "learning_rate": 4.893056470745975e-07, "loss": 28.5772, "step": 640 }, { "epoch": 5.1595214356929215, "grad_norm": 69.72952270507812, "learning_rate": 4.889390142636909e-07, "loss": 27.8049, "step": 650 }, { "epoch": 5.239282153539381, "grad_norm": 48.28046798706055, "learning_rate": 4.885663443876158e-07, "loss": 26.902, "step": 660 }, { "epoch": 5.319042871385842, "grad_norm": 86.15617370605469, "learning_rate": 4.88187646862299e-07, "loss": 28.749, "step": 670 }, { "epoch": 5.398803589232303, "grad_norm": 65.24939727783203, "learning_rate": 4.878029312559633e-07, "loss": 25.8315, "step": 680 }, { "epoch": 5.478564307078764, "grad_norm": 34.22309112548828, "learning_rate": 4.874122072888844e-07, "loss": 29.6742, "step": 690 }, { "epoch": 5.5583250249252245, "grad_norm": 136.18731689453125, "learning_rate": 4.870154848331468e-07, "loss": 28.3412, "step": 700 }, { "epoch": 5.5583250249252245, "eval_loss": 3.371689796447754, "eval_mae": 1.392737865447998, "eval_mse": 3.371690034866333, "eval_r2": -0.01587367057800293, "eval_rmse": 1.8362162276993232, "eval_runtime": 9.2257, "eval_samples_per_second": 434.766, "eval_steps_per_second": 13.658, "step": 700 }, { "epoch": 5.638085742771685, "grad_norm": 99.80322265625, "learning_rate": 4.866127739123931e-07, "loss": 27.3187, "step": 710 }, { "epoch": 5.717846460618146, "grad_norm": 62.02863311767578, "learning_rate": 4.862040847015712e-07, "loss": 25.038, "step": 720 }, { "epoch": 5.797607178464606, "grad_norm": 68.26268768310547, "learning_rate": 4.857894275266779e-07, "loss": 26.4793, "step": 730 }, { "epoch": 5.877367896311067, "grad_norm": 51.290000915527344, "learning_rate": 4.853688128644969e-07, "loss": 27.7466, "step": 740 }, { "epoch": 5.9571286141575275, "grad_norm": 56.944007873535156, "learning_rate": 4.849422513423343e-07, "loss": 24.5587, "step": 750 }, { "epoch": 6.031904287138584, "grad_norm": 53.164737701416016, "learning_rate": 4.845097537377511e-07, "loss": 24.6128, "step": 760 }, { "epoch": 6.111665004985045, "grad_norm": 37.63719177246094, "learning_rate": 4.840713309782894e-07, "loss": 25.3815, "step": 770 }, { "epoch": 6.1914257228315055, "grad_norm": 84.40767669677734, "learning_rate": 4.836269941411977e-07, "loss": 27.9501, "step": 780 }, { "epoch": 6.271186440677966, "grad_norm": 40.62728500366211, "learning_rate": 4.831767544531501e-07, "loss": 26.8201, "step": 790 }, { "epoch": 6.350947158524427, "grad_norm": 91.77393341064453, "learning_rate": 4.827206232899628e-07, "loss": 27.7182, "step": 800 }, { "epoch": 6.350947158524427, "eval_loss": 3.340036630630493, "eval_mae": 1.385007619857788, "eval_mse": 3.340036630630493, "eval_r2": -0.006336688995361328, "eval_rmse": 1.8275767099168487, "eval_runtime": 9.255, "eval_samples_per_second": 433.389, "eval_steps_per_second": 13.614, "step": 800 }, { "epoch": 6.430707876370887, "grad_norm": 25.9021053314209, "learning_rate": 4.822586121763069e-07, "loss": 26.24, "step": 810 }, { "epoch": 6.510468594217348, "grad_norm": 97.57054901123047, "learning_rate": 4.817907327854172e-07, "loss": 27.2087, "step": 820 }, { "epoch": 6.5902293120638085, "grad_norm": 36.019039154052734, "learning_rate": 4.813169969387971e-07, "loss": 27.6542, "step": 830 }, { "epoch": 6.669990029910269, "grad_norm": 34.63457489013672, "learning_rate": 4.8083741660592e-07, "loss": 27.4211, "step": 840 }, { "epoch": 6.74975074775673, "grad_norm": 51.112457275390625, "learning_rate": 4.80352003903927e-07, "loss": 27.1675, "step": 850 }, { "epoch": 6.829511465603191, "grad_norm": 42.236328125, "learning_rate": 4.798607710973204e-07, "loss": 25.7901, "step": 860 }, { "epoch": 6.909272183449651, "grad_norm": 81.07854461669922, "learning_rate": 4.793637305976542e-07, "loss": 26.0094, "step": 870 }, { "epoch": 6.989032901296111, "grad_norm": 22.95639991760254, "learning_rate": 4.788608949632202e-07, "loss": 27.9139, "step": 880 }, { "epoch": 7.063808574277169, "grad_norm": 70.43840026855469, "learning_rate": 4.78352276898731e-07, "loss": 25.0272, "step": 890 }, { "epoch": 7.14356929212363, "grad_norm": 140.94937133789062, "learning_rate": 4.77837889254999e-07, "loss": 27.7522, "step": 900 }, { "epoch": 7.14356929212363, "eval_loss": 3.3163321018218994, "eval_mae": 1.3711785078048706, "eval_mse": 3.3163323402404785, "eval_r2": 0.0008053183555603027, "eval_rmse": 1.8210799928175803, "eval_runtime": 9.2647, "eval_samples_per_second": 432.931, "eval_steps_per_second": 13.6, "step": 900 }, { "epoch": 7.2233300099700894, "grad_norm": 24.217979431152344, "learning_rate": 4.77317745028611e-07, "loss": 28.0138, "step": 910 }, { "epoch": 7.30309072781655, "grad_norm": 70.4435806274414, "learning_rate": 4.767918573616011e-07, "loss": 27.8576, "step": 920 }, { "epoch": 7.382851445663011, "grad_norm": 177.81341552734375, "learning_rate": 4.7626023954111744e-07, "loss": 27.0952, "step": 930 }, { "epoch": 7.462612163509472, "grad_norm": 32.845672607421875, "learning_rate": 4.7572290499908714e-07, "loss": 25.4676, "step": 940 }, { "epoch": 7.5423728813559325, "grad_norm": 54.082908630371094, "learning_rate": 4.751798673118768e-07, "loss": 24.4115, "step": 950 }, { "epoch": 7.622133599202392, "grad_norm": 59.80043029785156, "learning_rate": 4.7463114019994945e-07, "loss": 25.2318, "step": 960 }, { "epoch": 7.701894317048853, "grad_norm": 58.63386154174805, "learning_rate": 4.7407673752751787e-07, "loss": 27.5051, "step": 970 }, { "epoch": 7.781655034895314, "grad_norm": 55.40766906738281, "learning_rate": 4.7351667330219427e-07, "loss": 28.0926, "step": 980 }, { "epoch": 7.861415752741775, "grad_norm": 113.65251922607422, "learning_rate": 4.7295096167463644e-07, "loss": 25.48, "step": 990 }, { "epoch": 7.9411764705882355, "grad_norm": 101.72802734375, "learning_rate": 4.723796169381903e-07, "loss": 26.3553, "step": 1000 }, { "epoch": 7.9411764705882355, "eval_loss": 3.298834800720215, "eval_mae": 1.3671762943267822, "eval_mse": 3.2988345623016357, "eval_r2": 0.006077289581298828, "eval_rmse": 1.8162694079628263, "eval_runtime": 9.6354, "eval_samples_per_second": 416.278, "eval_steps_per_second": 13.077, "step": 1000 }, { "epoch": 8.015952143569292, "grad_norm": 28.65477752685547, "learning_rate": 4.7180265352852846e-07, "loss": 25.2742, "step": 1010 }, { "epoch": 8.095712861415752, "grad_norm": 77.14867401123047, "learning_rate": 4.7122008602328584e-07, "loss": 26.0271, "step": 1020 }, { "epoch": 8.175473579262214, "grad_norm": 62.96834182739258, "learning_rate": 4.706319291416911e-07, "loss": 26.7356, "step": 1030 }, { "epoch": 8.255234297108673, "grad_norm": 65.69111633300781, "learning_rate": 4.700381977441949e-07, "loss": 27.3675, "step": 1040 }, { "epoch": 8.334995014955135, "grad_norm": 69.17853546142578, "learning_rate": 4.694389068320943e-07, "loss": 26.3667, "step": 1050 }, { "epoch": 8.414755732801595, "grad_norm": 24.640756607055664, "learning_rate": 4.6883407154715384e-07, "loss": 25.1328, "step": 1060 }, { "epoch": 8.494516450648057, "grad_norm": 164.16966247558594, "learning_rate": 4.6822370717122294e-07, "loss": 26.6883, "step": 1070 }, { "epoch": 8.574277168494516, "grad_norm": 55.141292572021484, "learning_rate": 4.676078291258497e-07, "loss": 26.6146, "step": 1080 }, { "epoch": 8.654037886340976, "grad_norm": 56.901947021484375, "learning_rate": 4.6698645297189145e-07, "loss": 27.9764, "step": 1090 }, { "epoch": 8.733798604187438, "grad_norm": 35.33796310424805, "learning_rate": 4.663595944091213e-07, "loss": 26.8944, "step": 1100 }, { "epoch": 8.733798604187438, "eval_loss": 3.2831168174743652, "eval_mae": 1.371146321296692, "eval_mse": 3.2831168174743652, "eval_r2": 0.010812938213348389, "eval_rmse": 1.8119373105806849, "eval_runtime": 9.6483, "eval_samples_per_second": 415.723, "eval_steps_per_second": 13.059, "step": 1100 }, { "epoch": 8.813559322033898, "grad_norm": 55.841068267822266, "learning_rate": 4.6572726927583185e-07, "loss": 27.0192, "step": 1110 }, { "epoch": 8.89332003988036, "grad_norm": 70.44389343261719, "learning_rate": 4.650894935484346e-07, "loss": 25.9054, "step": 1120 }, { "epoch": 8.97308075772682, "grad_norm": 37.819854736328125, "learning_rate": 4.6444628334105673e-07, "loss": 25.7866, "step": 1130 }, { "epoch": 9.047856430707876, "grad_norm": 118.53936767578125, "learning_rate": 4.6379765490513345e-07, "loss": 22.7737, "step": 1140 }, { "epoch": 9.127617148554338, "grad_norm": 39.31532287597656, "learning_rate": 4.631436246289978e-07, "loss": 27.3392, "step": 1150 }, { "epoch": 9.207377866400797, "grad_norm": 46.137168884277344, "learning_rate": 4.6248420903746646e-07, "loss": 27.0129, "step": 1160 }, { "epoch": 9.28713858424726, "grad_norm": 49.3219108581543, "learning_rate": 4.618194247914221e-07, "loss": 27.1411, "step": 1170 }, { "epoch": 9.366899302093719, "grad_norm": 80.34762573242188, "learning_rate": 4.6114928868739263e-07, "loss": 25.0363, "step": 1180 }, { "epoch": 9.446660019940179, "grad_norm": 65.08316040039062, "learning_rate": 4.6047381765712675e-07, "loss": 27.1349, "step": 1190 }, { "epoch": 9.52642073778664, "grad_norm": 54.11983871459961, "learning_rate": 4.59793028767166e-07, "loss": 26.5226, "step": 1200 }, { "epoch": 9.52642073778664, "eval_loss": 3.269491195678711, "eval_mae": 1.3665425777435303, "eval_mse": 3.269491195678711, "eval_r2": 0.014918327331542969, "eval_rmse": 1.8081734418132325, "eval_runtime": 9.2453, "eval_samples_per_second": 433.842, "eval_steps_per_second": 13.629, "step": 1200 }, { "epoch": 9.6061814556331, "grad_norm": 99.34823608398438, "learning_rate": 4.591069392184138e-07, "loss": 26.6684, "step": 1210 }, { "epoch": 9.685942173479562, "grad_norm": 73.60244750976562, "learning_rate": 4.5841556634570073e-07, "loss": 26.9203, "step": 1220 }, { "epoch": 9.765702891326022, "grad_norm": 43.70383071899414, "learning_rate": 4.5771892761734633e-07, "loss": 26.2737, "step": 1230 }, { "epoch": 9.845463609172482, "grad_norm": 25.8046817779541, "learning_rate": 4.570170406347182e-07, "loss": 24.5509, "step": 1240 }, { "epoch": 9.925224327018944, "grad_norm": 92.3177490234375, "learning_rate": 4.5630992313178697e-07, "loss": 25.4429, "step": 1250 }, { "epoch": 10.0, "grad_norm": 52.23972702026367, "learning_rate": 4.5559759297467825e-07, "loss": 25.9577, "step": 1260 }, { "epoch": 10.07976071784646, "grad_norm": 41.57015609741211, "learning_rate": 4.548800681612212e-07, "loss": 26.6287, "step": 1270 }, { "epoch": 10.159521435692922, "grad_norm": 81.64158630371094, "learning_rate": 4.541573668204941e-07, "loss": 26.0703, "step": 1280 }, { "epoch": 10.239282153539381, "grad_norm": 27.914758682250977, "learning_rate": 4.5342950721236584e-07, "loss": 26.6378, "step": 1290 }, { "epoch": 10.319042871385843, "grad_norm": 53.98166275024414, "learning_rate": 4.526965077270348e-07, "loss": 26.146, "step": 1300 }, { "epoch": 10.319042871385843, "eval_loss": 3.2577223777770996, "eval_mae": 1.3596596717834473, "eval_mse": 3.2577223777770996, "eval_r2": 0.01846414804458618, "eval_rmse": 1.8049161691826852, "eval_runtime": 9.2569, "eval_samples_per_second": 433.299, "eval_steps_per_second": 13.611, "step": 1300 }, { "epoch": 10.398803589232303, "grad_norm": 123.96611785888672, "learning_rate": 4.5195838688456434e-07, "loss": 29.7088, "step": 1310 }, { "epoch": 10.478564307078763, "grad_norm": 62.53553009033203, "learning_rate": 4.5121516333441466e-07, "loss": 25.1537, "step": 1320 }, { "epoch": 10.558325024925225, "grad_norm": 75.32501220703125, "learning_rate": 4.5046685585497146e-07, "loss": 26.2433, "step": 1330 }, { "epoch": 10.638085742771684, "grad_norm": 49.98992156982422, "learning_rate": 4.4971348335307203e-07, "loss": 26.079, "step": 1340 }, { "epoch": 10.717846460618146, "grad_norm": 137.5787811279297, "learning_rate": 4.4895506486352686e-07, "loss": 24.1009, "step": 1350 }, { "epoch": 10.797607178464606, "grad_norm": 75.4178237915039, "learning_rate": 4.4819161954863927e-07, "loss": 24.9365, "step": 1360 }, { "epoch": 10.877367896311068, "grad_norm": 130.9047088623047, "learning_rate": 4.4742316669772096e-07, "loss": 26.0591, "step": 1370 }, { "epoch": 10.957128614157527, "grad_norm": 48.49380874633789, "learning_rate": 4.466497257266046e-07, "loss": 27.9292, "step": 1380 }, { "epoch": 11.031904287138584, "grad_norm": 28.311349868774414, "learning_rate": 4.4587131617715346e-07, "loss": 23.6428, "step": 1390 }, { "epoch": 11.111665004985046, "grad_norm": 140.38153076171875, "learning_rate": 4.4508795771676767e-07, "loss": 25.9676, "step": 1400 }, { "epoch": 11.111665004985046, "eval_loss": 3.247952938079834, "eval_mae": 1.3569178581237793, "eval_mse": 3.247952938079834, "eval_r2": 0.021407663822174072, "eval_rmse": 1.8022077954774898, "eval_runtime": 9.3541, "eval_samples_per_second": 428.795, "eval_steps_per_second": 13.47, "step": 1400 }, { "epoch": 11.191425722831505, "grad_norm": 30.292320251464844, "learning_rate": 4.4429967013788706e-07, "loss": 27.9688, "step": 1410 }, { "epoch": 11.271186440677965, "grad_norm": 57.311614990234375, "learning_rate": 4.4350647335749126e-07, "loss": 25.5213, "step": 1420 }, { "epoch": 11.350947158524427, "grad_norm": 72.53961944580078, "learning_rate": 4.4270838741659653e-07, "loss": 26.5346, "step": 1430 }, { "epoch": 11.430707876370887, "grad_norm": 91.33097839355469, "learning_rate": 4.419054324797492e-07, "loss": 25.9467, "step": 1440 }, { "epoch": 11.510468594217349, "grad_norm": 49.6580810546875, "learning_rate": 4.4109762883451637e-07, "loss": 25.582, "step": 1450 }, { "epoch": 11.590229312063808, "grad_norm": 55.276771545410156, "learning_rate": 4.402849968909732e-07, "loss": 25.4052, "step": 1460 }, { "epoch": 11.66999002991027, "grad_norm": 33.58572769165039, "learning_rate": 4.394675571811873e-07, "loss": 26.9084, "step": 1470 }, { "epoch": 11.74975074775673, "grad_norm": 119.92269134521484, "learning_rate": 4.386453303587002e-07, "loss": 26.1702, "step": 1480 }, { "epoch": 11.82951146560319, "grad_norm": 41.54932403564453, "learning_rate": 4.3781833719800474e-07, "loss": 26.7313, "step": 1490 }, { "epoch": 11.909272183449652, "grad_norm": 38.98570251464844, "learning_rate": 4.369865985940212e-07, "loss": 25.5662, "step": 1500 }, { "epoch": 11.909272183449652, "eval_loss": 3.238109588623047, "eval_mae": 1.3584039211273193, "eval_mse": 3.238109588623047, "eval_r2": 0.024373412132263184, "eval_rmse": 1.7994748091104378, "eval_runtime": 9.6443, "eval_samples_per_second": 415.893, "eval_steps_per_second": 13.065, "step": 1500 }, { "epoch": 11.989032901296111, "grad_norm": 82.44010162353516, "learning_rate": 4.361501355615685e-07, "loss": 27.1074, "step": 1510 }, { "epoch": 12.063808574277168, "grad_norm": 84.16273498535156, "learning_rate": 4.3530896923483387e-07, "loss": 24.2903, "step": 1520 }, { "epoch": 12.14356929212363, "grad_norm": 96.39720153808594, "learning_rate": 4.3446312086683846e-07, "loss": 25.0496, "step": 1530 }, { "epoch": 12.22333000997009, "grad_norm": 34.487491607666016, "learning_rate": 4.3361261182890054e-07, "loss": 26.8909, "step": 1540 }, { "epoch": 12.303090727816551, "grad_norm": 38.0224609375, "learning_rate": 4.3275746361009547e-07, "loss": 26.0252, "step": 1550 }, { "epoch": 12.382851445663011, "grad_norm": 21.54315948486328, "learning_rate": 4.3189769781671283e-07, "loss": 24.2614, "step": 1560 }, { "epoch": 12.46261216350947, "grad_norm": 49.494964599609375, "learning_rate": 4.3103333617171035e-07, "loss": 26.9778, "step": 1570 }, { "epoch": 12.542372881355933, "grad_norm": 22.109155654907227, "learning_rate": 4.3016440051416537e-07, "loss": 24.0684, "step": 1580 }, { "epoch": 12.622133599202392, "grad_norm": 37.458683013916016, "learning_rate": 4.292909127987226e-07, "loss": 26.7605, "step": 1590 }, { "epoch": 12.701894317048854, "grad_norm": 192.13340759277344, "learning_rate": 4.2841289509503977e-07, "loss": 26.3182, "step": 1600 }, { "epoch": 12.701894317048854, "eval_loss": 3.2306623458862305, "eval_mae": 1.35684072971344, "eval_mse": 3.2306623458862305, "eval_r2": 0.026617228984832764, "eval_rmse": 1.797404335670255, "eval_runtime": 9.2307, "eval_samples_per_second": 434.528, "eval_steps_per_second": 13.65, "step": 1600 }, { "epoch": 12.781655034895314, "grad_norm": 102.10505676269531, "learning_rate": 4.2753036958723e-07, "loss": 26.7662, "step": 1610 }, { "epoch": 12.861415752741774, "grad_norm": 29.1319637298584, "learning_rate": 4.2664335857330093e-07, "loss": 26.8907, "step": 1620 }, { "epoch": 12.941176470588236, "grad_norm": 39.71030044555664, "learning_rate": 4.2575188446459187e-07, "loss": 27.072, "step": 1630 }, { "epoch": 13.015952143569292, "grad_norm": 95.07027435302734, "learning_rate": 4.248559697852071e-07, "loss": 23.2089, "step": 1640 }, { "epoch": 13.095712861415752, "grad_norm": 36.529666900634766, "learning_rate": 4.239556371714471e-07, "loss": 26.037, "step": 1650 }, { "epoch": 13.175473579262214, "grad_norm": 47.9398307800293, "learning_rate": 4.2305090937123636e-07, "loss": 25.8954, "step": 1660 }, { "epoch": 13.255234297108673, "grad_norm": 71.37847900390625, "learning_rate": 4.221418092435488e-07, "loss": 23.7725, "step": 1670 }, { "epoch": 13.334995014955135, "grad_norm": 41.88365173339844, "learning_rate": 4.2122835975783005e-07, "loss": 25.5059, "step": 1680 }, { "epoch": 13.414755732801595, "grad_norm": 34.54059600830078, "learning_rate": 4.203105839934173e-07, "loss": 27.994, "step": 1690 }, { "epoch": 13.494516450648057, "grad_norm": 116.55763244628906, "learning_rate": 4.1938850513895626e-07, "loss": 26.5336, "step": 1700 }, { "epoch": 13.494516450648057, "eval_loss": 3.223775625228882, "eval_mae": 1.355072021484375, "eval_mse": 3.223775863647461, "eval_r2": 0.02869206666946411, "eval_rmse": 1.7954876395139736, "eval_runtime": 9.2588, "eval_samples_per_second": 433.21, "eval_steps_per_second": 13.609, "step": 1700 }, { "epoch": 13.574277168494516, "grad_norm": 68.4130630493164, "learning_rate": 4.1846214649181455e-07, "loss": 28.2323, "step": 1710 }, { "epoch": 13.654037886340976, "grad_norm": 50.52378845214844, "learning_rate": 4.175315314574942e-07, "loss": 26.1683, "step": 1720 }, { "epoch": 13.733798604187438, "grad_norm": 109.47400665283203, "learning_rate": 4.1659668354903934e-07, "loss": 25.7688, "step": 1730 }, { "epoch": 13.813559322033898, "grad_norm": 51.831729888916016, "learning_rate": 4.1565762638644255e-07, "loss": 25.1936, "step": 1740 }, { "epoch": 13.89332003988036, "grad_norm": 97.07423400878906, "learning_rate": 4.1471438369604795e-07, "loss": 25.2574, "step": 1750 }, { "epoch": 13.97308075772682, "grad_norm": 194.64306640625, "learning_rate": 4.13766979309952e-07, "loss": 25.8043, "step": 1760 }, { "epoch": 14.047856430707876, "grad_norm": 166.36204528808594, "learning_rate": 4.128154371654007e-07, "loss": 24.2276, "step": 1770 }, { "epoch": 14.127617148554338, "grad_norm": 28.835357666015625, "learning_rate": 4.1185978130418566e-07, "loss": 27.2479, "step": 1780 }, { "epoch": 14.207377866400797, "grad_norm": 161.6560821533203, "learning_rate": 4.109000358720358e-07, "loss": 27.8549, "step": 1790 }, { "epoch": 14.28713858424726, "grad_norm": 42.74077606201172, "learning_rate": 4.0993622511800794e-07, "loss": 25.8464, "step": 1800 }, { "epoch": 14.28713858424726, "eval_loss": 3.2174415588378906, "eval_mae": 1.352028727531433, "eval_mse": 3.2174415588378906, "eval_r2": 0.03060060739517212, "eval_rmse": 1.7937228210729468, "eval_runtime": 9.2733, "eval_samples_per_second": 432.53, "eval_steps_per_second": 13.587, "step": 1800 }, { "epoch": 14.366899302093719, "grad_norm": 94.52295684814453, "learning_rate": 4.08968373393874e-07, "loss": 26.218, "step": 1810 }, { "epoch": 14.446660019940179, "grad_norm": 102.33257293701172, "learning_rate": 4.0799650515350516e-07, "loss": 25.0329, "step": 1820 }, { "epoch": 14.52642073778664, "grad_norm": 77.26675415039062, "learning_rate": 4.070206449522547e-07, "loss": 26.2714, "step": 1830 }, { "epoch": 14.6061814556331, "grad_norm": 57.858577728271484, "learning_rate": 4.0604081744633745e-07, "loss": 25.818, "step": 1840 }, { "epoch": 14.685942173479562, "grad_norm": 53.28114318847656, "learning_rate": 4.050570473922064e-07, "loss": 26.48, "step": 1850 }, { "epoch": 14.765702891326022, "grad_norm": 93.24755096435547, "learning_rate": 4.040693596459277e-07, "loss": 26.9171, "step": 1860 }, { "epoch": 14.845463609172482, "grad_norm": 58.55867004394531, "learning_rate": 4.0307777916255237e-07, "loss": 26.2595, "step": 1870 }, { "epoch": 14.925224327018944, "grad_norm": 125.5550765991211, "learning_rate": 4.0208233099548573e-07, "loss": 23.0974, "step": 1880 }, { "epoch": 15.0, "grad_norm": 29.170337677001953, "learning_rate": 4.0108304029585474e-07, "loss": 23.3274, "step": 1890 }, { "epoch": 15.07976071784646, "grad_norm": 55.78076171875, "learning_rate": 4.000799323118722e-07, "loss": 26.2063, "step": 1900 }, { "epoch": 15.07976071784646, "eval_loss": 3.2104992866516113, "eval_mae": 1.3506470918655396, "eval_mse": 3.2104992866516113, "eval_r2": 0.03269225358963013, "eval_rmse": 1.7917866186160705, "eval_runtime": 9.5781, "eval_samples_per_second": 418.77, "eval_steps_per_second": 13.155, "step": 1900 }, { "epoch": 15.159521435692922, "grad_norm": 40.96506118774414, "learning_rate": 3.990730323881988e-07, "loss": 23.7108, "step": 1910 }, { "epoch": 15.239282153539381, "grad_norm": 68.11751556396484, "learning_rate": 3.9806236596530316e-07, "loss": 26.1233, "step": 1920 }, { "epoch": 15.319042871385843, "grad_norm": 76.66336822509766, "learning_rate": 3.9704795857881856e-07, "loss": 26.4697, "step": 1930 }, { "epoch": 15.398803589232303, "grad_norm": 160.65145874023438, "learning_rate": 3.9602983585889794e-07, "loss": 24.5925, "step": 1940 }, { "epoch": 15.478564307078763, "grad_norm": 33.02718734741211, "learning_rate": 3.950080235295666e-07, "loss": 24.7061, "step": 1950 }, { "epoch": 15.558325024925225, "grad_norm": 119.47199249267578, "learning_rate": 3.939825474080717e-07, "loss": 29.3217, "step": 1960 }, { "epoch": 15.638085742771684, "grad_norm": 72.96931457519531, "learning_rate": 3.9295343340423043e-07, "loss": 28.4587, "step": 1970 }, { "epoch": 15.717846460618146, "grad_norm": 57.194244384765625, "learning_rate": 3.9192070751977526e-07, "loss": 26.4959, "step": 1980 }, { "epoch": 15.797607178464606, "grad_norm": 34.6958122253418, "learning_rate": 3.908843958476968e-07, "loss": 25.5749, "step": 1990 }, { "epoch": 15.877367896311068, "grad_norm": 28.211673736572266, "learning_rate": 3.8984452457158466e-07, "loss": 25.7622, "step": 2000 }, { "epoch": 15.877367896311068, "eval_loss": 3.2040445804595947, "eval_mae": 1.3519315719604492, "eval_mse": 3.204044818878174, "eval_r2": 0.0346369743347168, "eval_rmse": 1.789984586212455, "eval_runtime": 9.1533, "eval_samples_per_second": 438.202, "eval_steps_per_second": 13.766, "step": 2000 }, { "epoch": 15.957128614157527, "grad_norm": 40.19672775268555, "learning_rate": 3.88801119964966e-07, "loss": 23.7187, "step": 2010 }, { "epoch": 16.031904287138584, "grad_norm": 125.82440185546875, "learning_rate": 3.877542083906414e-07, "loss": 23.9347, "step": 2020 }, { "epoch": 16.111665004985046, "grad_norm": 47.25736618041992, "learning_rate": 3.8670381630001914e-07, "loss": 26.4004, "step": 2030 }, { "epoch": 16.191425722831504, "grad_norm": 150.1745147705078, "learning_rate": 3.856499702324466e-07, "loss": 26.9673, "step": 2040 }, { "epoch": 16.271186440677965, "grad_norm": 135.0413055419922, "learning_rate": 3.845926968145398e-07, "loss": 26.9109, "step": 2050 }, { "epoch": 16.350947158524427, "grad_norm": 100.4754638671875, "learning_rate": 3.8353202275951054e-07, "loss": 24.002, "step": 2060 }, { "epoch": 16.43070787637089, "grad_norm": 95.51912689208984, "learning_rate": 3.8246797486649185e-07, "loss": 26.8029, "step": 2070 }, { "epoch": 16.510468594217347, "grad_norm": 38.787960052490234, "learning_rate": 3.8140058001986043e-07, "loss": 26.6201, "step": 2080 }, { "epoch": 16.59022931206381, "grad_norm": 155.7447967529297, "learning_rate": 3.803298651885575e-07, "loss": 26.3985, "step": 2090 }, { "epoch": 16.66999002991027, "grad_norm": 85.08961486816406, "learning_rate": 3.7925585742540766e-07, "loss": 24.1255, "step": 2100 }, { "epoch": 16.66999002991027, "eval_loss": 3.1988422870635986, "eval_mae": 1.3528438806533813, "eval_mse": 3.1988422870635986, "eval_r2": 0.03620445728302002, "eval_rmse": 1.788530762123928, "eval_runtime": 9.2596, "eval_samples_per_second": 433.172, "eval_steps_per_second": 13.608, "step": 2100 }, { "epoch": 16.74975074775673, "grad_norm": 74.64730834960938, "learning_rate": 3.781785838664349e-07, "loss": 25.9132, "step": 2110 }, { "epoch": 16.82951146560319, "grad_norm": 106.97097778320312, "learning_rate": 3.770980717301775e-07, "loss": 24.2703, "step": 2120 }, { "epoch": 16.90927218344965, "grad_norm": 78.0794448852539, "learning_rate": 3.760143483169999e-07, "loss": 27.0184, "step": 2130 }, { "epoch": 16.989032901296113, "grad_norm": 28.421470642089844, "learning_rate": 3.749274410084032e-07, "loss": 24.9771, "step": 2140 }, { "epoch": 17.063808574277168, "grad_norm": 106.12614440917969, "learning_rate": 3.7383737726633313e-07, "loss": 21.3464, "step": 2150 }, { "epoch": 17.14356929212363, "grad_norm": 93.90852355957031, "learning_rate": 3.7274418463248636e-07, "loss": 24.7134, "step": 2160 }, { "epoch": 17.22333000997009, "grad_norm": 177.743896484375, "learning_rate": 3.7164789072761453e-07, "loss": 24.8612, "step": 2170 }, { "epoch": 17.30309072781655, "grad_norm": 204.92091369628906, "learning_rate": 3.7054852325082645e-07, "loss": 26.3242, "step": 2180 }, { "epoch": 17.38285144566301, "grad_norm": 44.62697219848633, "learning_rate": 3.6944610997888817e-07, "loss": 24.804, "step": 2190 }, { "epoch": 17.462612163509473, "grad_norm": 38.966094970703125, "learning_rate": 3.6834067876552117e-07, "loss": 26.5188, "step": 2200 }, { "epoch": 17.462612163509473, "eval_loss": 3.194345235824585, "eval_mae": 1.3451436758041382, "eval_mse": 3.194345235824585, "eval_r2": 0.0375593900680542, "eval_rmse": 1.7872731284906023, "eval_runtime": 9.2905, "eval_samples_per_second": 431.733, "eval_steps_per_second": 13.562, "step": 2200 }, { "epoch": 17.54237288135593, "grad_norm": 51.10791015625, "learning_rate": 3.6723225754069874e-07, "loss": 25.2638, "step": 2210 }, { "epoch": 17.622133599202392, "grad_norm": 140.89447021484375, "learning_rate": 3.6612087430994015e-07, "loss": 26.0489, "step": 2220 }, { "epoch": 17.701894317048854, "grad_norm": 90.82955932617188, "learning_rate": 3.650065571536031e-07, "loss": 26.7658, "step": 2230 }, { "epoch": 17.781655034895316, "grad_norm": 72.15864562988281, "learning_rate": 3.638893342261742e-07, "loss": 26.9759, "step": 2240 }, { "epoch": 17.861415752741774, "grad_norm": 208.88856506347656, "learning_rate": 3.6276923375555777e-07, "loss": 26.9895, "step": 2250 }, { "epoch": 17.941176470588236, "grad_norm": 80.82164001464844, "learning_rate": 3.6164628404236253e-07, "loss": 27.1941, "step": 2260 }, { "epoch": 18.015952143569294, "grad_norm": 28.38602638244629, "learning_rate": 3.605205134591864e-07, "loss": 24.3996, "step": 2270 }, { "epoch": 18.095712861415752, "grad_norm": 59.99515914916992, "learning_rate": 3.593919504498999e-07, "loss": 26.4284, "step": 2280 }, { "epoch": 18.175473579262214, "grad_norm": 51.018592834472656, "learning_rate": 3.582606235289273e-07, "loss": 26.4741, "step": 2290 }, { "epoch": 18.255234297108675, "grad_norm": 66.35299682617188, "learning_rate": 3.5712656128052634e-07, "loss": 25.902, "step": 2300 }, { "epoch": 18.255234297108675, "eval_loss": 3.1882619857788086, "eval_mae": 1.3470134735107422, "eval_mse": 3.1882619857788086, "eval_r2": 0.03939223289489746, "eval_rmse": 1.7855704930858396, "eval_runtime": 9.6616, "eval_samples_per_second": 415.149, "eval_steps_per_second": 13.041, "step": 2300 }, { "epoch": 18.334995014955133, "grad_norm": 100.36279296875, "learning_rate": 3.559897923580657e-07, "loss": 22.7732, "step": 2310 }, { "epoch": 18.414755732801595, "grad_norm": 51.83559036254883, "learning_rate": 3.548503454833015e-07, "loss": 26.4812, "step": 2320 }, { "epoch": 18.494516450648057, "grad_norm": 34.32014083862305, "learning_rate": 3.5370824944565115e-07, "loss": 25.8285, "step": 2330 }, { "epoch": 18.57427716849452, "grad_norm": 104.95442962646484, "learning_rate": 3.525635331014664e-07, "loss": 24.9534, "step": 2340 }, { "epoch": 18.654037886340976, "grad_norm": 62.482845306396484, "learning_rate": 3.5141622537330364e-07, "loss": 25.4369, "step": 2350 }, { "epoch": 18.733798604187438, "grad_norm": 203.57952880859375, "learning_rate": 3.502663552491939e-07, "loss": 25.9546, "step": 2360 }, { "epoch": 18.8135593220339, "grad_norm": 27.640735626220703, "learning_rate": 3.4911395178190995e-07, "loss": 25.982, "step": 2370 }, { "epoch": 18.893320039880358, "grad_norm": 98.8842544555664, "learning_rate": 3.4795904408823217e-07, "loss": 26.3763, "step": 2380 }, { "epoch": 18.97308075772682, "grad_norm": 144.08743286132812, "learning_rate": 3.468016613482132e-07, "loss": 26.1877, "step": 2390 }, { "epoch": 19.047856430707878, "grad_norm": 31.12162971496582, "learning_rate": 3.4564183280444063e-07, "loss": 25.5131, "step": 2400 }, { "epoch": 19.047856430707878, "eval_loss": 3.1840460300445557, "eval_mae": 1.3496177196502686, "eval_mse": 3.1840460300445557, "eval_r2": 0.04066246747970581, "eval_rmse": 1.7843895398831937, "eval_runtime": 9.6718, "eval_samples_per_second": 414.711, "eval_steps_per_second": 13.028, "step": 2400 }, { "epoch": 19.127617148554336, "grad_norm": 161.33517456054688, "learning_rate": 3.444795877612978e-07, "loss": 28.1758, "step": 2410 }, { "epoch": 19.207377866400797, "grad_norm": 78.51399230957031, "learning_rate": 3.4331495558422387e-07, "loss": 25.6685, "step": 2420 }, { "epoch": 19.28713858424726, "grad_norm": 49.65394592285156, "learning_rate": 3.4214796569897165e-07, "loss": 24.6879, "step": 2430 }, { "epoch": 19.366899302093717, "grad_norm": 77.83604431152344, "learning_rate": 3.4097864759086386e-07, "loss": 23.8251, "step": 2440 }, { "epoch": 19.44666001994018, "grad_norm": 97.642822265625, "learning_rate": 3.398070308040487e-07, "loss": 26.196, "step": 2450 }, { "epoch": 19.52642073778664, "grad_norm": 111.97957611083984, "learning_rate": 3.386331449407529e-07, "loss": 27.0219, "step": 2460 }, { "epoch": 19.606181455633102, "grad_norm": 43.619384765625, "learning_rate": 3.3745701966053407e-07, "loss": 26.6193, "step": 2470 }, { "epoch": 19.68594217347956, "grad_norm": 83.98363494873047, "learning_rate": 3.3627868467953137e-07, "loss": 23.5233, "step": 2480 }, { "epoch": 19.765702891326022, "grad_norm": 29.117719650268555, "learning_rate": 3.3509816976971436e-07, "loss": 24.8931, "step": 2490 }, { "epoch": 19.845463609172484, "grad_norm": 51.961605072021484, "learning_rate": 3.3391550475813087e-07, "loss": 24.3229, "step": 2500 }, { "epoch": 19.845463609172484, "eval_loss": 3.180414915084839, "eval_mae": 1.3414126634597778, "eval_mse": 3.180414915084839, "eval_r2": 0.041756510734558105, "eval_rmse": 1.7833717826311033, "eval_runtime": 9.2352, "eval_samples_per_second": 434.317, "eval_steps_per_second": 13.643, "step": 2500 }, { "epoch": 19.92522432701894, "grad_norm": 93.1182632446289, "learning_rate": 3.3273071952615374e-07, "loss": 27.2673, "step": 2510 }, { "epoch": 20.0, "grad_norm": 72.23329162597656, "learning_rate": 3.315438440087255e-07, "loss": 23.8389, "step": 2520 }, { "epoch": 20.07976071784646, "grad_norm": 141.3828887939453, "learning_rate": 3.30354908193602e-07, "loss": 26.6774, "step": 2530 }, { "epoch": 20.15952143569292, "grad_norm": 17.179548263549805, "learning_rate": 3.291639421205951e-07, "loss": 24.757, "step": 2540 }, { "epoch": 20.23928215353938, "grad_norm": 21.29429817199707, "learning_rate": 3.2797097588081324e-07, "loss": 26.0694, "step": 2550 }, { "epoch": 20.319042871385843, "grad_norm": 39.92965316772461, "learning_rate": 3.2677603961590126e-07, "loss": 26.8808, "step": 2560 }, { "epoch": 20.398803589232305, "grad_norm": 71.05562591552734, "learning_rate": 3.255791635172791e-07, "loss": 24.223, "step": 2570 }, { "epoch": 20.478564307078763, "grad_norm": 64.88768005371094, "learning_rate": 3.2438037782537875e-07, "loss": 26.9973, "step": 2580 }, { "epoch": 20.558325024925225, "grad_norm": 20.247940063476562, "learning_rate": 3.231797128288801e-07, "loss": 24.806, "step": 2590 }, { "epoch": 20.638085742771686, "grad_norm": 36.828975677490234, "learning_rate": 3.2197719886394616e-07, "loss": 25.024, "step": 2600 }, { "epoch": 20.638085742771686, "eval_loss": 3.1747312545776367, "eval_mae": 1.3484524488449097, "eval_mse": 3.1747310161590576, "eval_r2": 0.04346907138824463, "eval_rmse": 1.7817774878359693, "eval_runtime": 9.278, "eval_samples_per_second": 432.315, "eval_steps_per_second": 13.581, "step": 2600 }, { "epoch": 20.717846460618144, "grad_norm": 77.34175872802734, "learning_rate": 3.207728663134558e-07, "loss": 26.2727, "step": 2610 }, { "epoch": 20.797607178464606, "grad_norm": 35.301483154296875, "learning_rate": 3.1956674560623677e-07, "loss": 26.4504, "step": 2620 }, { "epoch": 20.877367896311068, "grad_norm": 107.94976043701172, "learning_rate": 3.183588672162965e-07, "loss": 25.7567, "step": 2630 }, { "epoch": 20.957128614157526, "grad_norm": 156.19349670410156, "learning_rate": 3.171492616620524e-07, "loss": 24.9266, "step": 2640 }, { "epoch": 21.031904287138584, "grad_norm": 52.308631896972656, "learning_rate": 3.159379595055605e-07, "loss": 23.7358, "step": 2650 }, { "epoch": 21.111665004985046, "grad_norm": 82.55281829833984, "learning_rate": 3.1472499135174356e-07, "loss": 25.9235, "step": 2660 }, { "epoch": 21.191425722831504, "grad_norm": 69.1916732788086, "learning_rate": 3.135103878476176e-07, "loss": 24.3697, "step": 2670 }, { "epoch": 21.271186440677965, "grad_norm": 46.42279815673828, "learning_rate": 3.122941796815175e-07, "loss": 27.117, "step": 2680 }, { "epoch": 21.350947158524427, "grad_norm": 140.99227905273438, "learning_rate": 3.110763975823219e-07, "loss": 24.7398, "step": 2690 }, { "epoch": 21.43070787637089, "grad_norm": 52.34705352783203, "learning_rate": 3.0985707231867647e-07, "loss": 25.9564, "step": 2700 }, { "epoch": 21.43070787637089, "eval_loss": 3.1698925495147705, "eval_mae": 1.3458428382873535, "eval_mse": 3.1698925495147705, "eval_r2": 0.04492682218551636, "eval_rmse": 1.7804192061182587, "eval_runtime": 9.6939, "eval_samples_per_second": 413.764, "eval_steps_per_second": 12.998, "step": 2700 }, { "epoch": 21.510468594217347, "grad_norm": 20.247802734375, "learning_rate": 3.08636234698217e-07, "loss": 26.2885, "step": 2710 }, { "epoch": 21.59022931206381, "grad_norm": 39.07258987426758, "learning_rate": 3.074139155667903e-07, "loss": 25.4281, "step": 2720 }, { "epoch": 21.66999002991027, "grad_norm": 34.06509017944336, "learning_rate": 3.061901458076755e-07, "loss": 26.7851, "step": 2730 }, { "epoch": 21.74975074775673, "grad_norm": 90.38104248046875, "learning_rate": 3.049649563408035e-07, "loss": 24.766, "step": 2740 }, { "epoch": 21.82951146560319, "grad_norm": 115.99075317382812, "learning_rate": 3.037383781219755e-07, "loss": 26.1506, "step": 2750 }, { "epoch": 21.90927218344965, "grad_norm": 36.51300811767578, "learning_rate": 3.0251044214208114e-07, "loss": 24.8899, "step": 2760 }, { "epoch": 21.989032901296113, "grad_norm": 138.48594665527344, "learning_rate": 3.012811794263157e-07, "loss": 25.5732, "step": 2770 }, { "epoch": 22.063808574277168, "grad_norm": 39.69606399536133, "learning_rate": 3.000506210333956e-07, "loss": 24.5779, "step": 2780 }, { "epoch": 22.14356929212363, "grad_norm": 130.00613403320312, "learning_rate": 2.988187980547744e-07, "loss": 24.9146, "step": 2790 }, { "epoch": 22.22333000997009, "grad_norm": 224.0641326904297, "learning_rate": 2.975857416138564e-07, "loss": 27.6134, "step": 2800 }, { "epoch": 22.22333000997009, "eval_loss": 3.166327476501465, "eval_mae": 1.3399311304092407, "eval_mse": 3.1663272380828857, "eval_r2": 0.04600107669830322, "eval_rmse": 1.7794176682507359, "eval_runtime": 9.242, "eval_samples_per_second": 433.999, "eval_steps_per_second": 13.633, "step": 2800 }, { "epoch": 22.30309072781655, "grad_norm": 52.53582000732422, "learning_rate": 2.9635148286521113e-07, "loss": 25.1255, "step": 2810 }, { "epoch": 22.38285144566301, "grad_norm": 132.7754364013672, "learning_rate": 2.9511605299378536e-07, "loss": 26.1952, "step": 2820 }, { "epoch": 22.462612163509473, "grad_norm": 38.92604446411133, "learning_rate": 2.93879483214116e-07, "loss": 23.0946, "step": 2830 }, { "epoch": 22.54237288135593, "grad_norm": 32.08353805541992, "learning_rate": 2.92641804769541e-07, "loss": 25.6264, "step": 2840 }, { "epoch": 22.622133599202392, "grad_norm": 93.6625747680664, "learning_rate": 2.914030489314098e-07, "loss": 24.4846, "step": 2850 }, { "epoch": 22.701894317048854, "grad_norm": 70.16485595703125, "learning_rate": 2.901632469982934e-07, "loss": 24.4818, "step": 2860 }, { "epoch": 22.781655034895316, "grad_norm": 36.66552734375, "learning_rate": 2.8892243029519373e-07, "loss": 26.8278, "step": 2870 }, { "epoch": 22.861415752741774, "grad_norm": 34.02045440673828, "learning_rate": 2.876806301727519e-07, "loss": 27.2244, "step": 2880 }, { "epoch": 22.941176470588236, "grad_norm": 136.84756469726562, "learning_rate": 2.864378780064564e-07, "loss": 27.2033, "step": 2890 }, { "epoch": 23.015952143569294, "grad_norm": 82.451171875, "learning_rate": 2.8519420519585004e-07, "loss": 21.5234, "step": 2900 }, { "epoch": 23.015952143569294, "eval_loss": 3.1617584228515625, "eval_mae": 1.339278221130371, "eval_mse": 3.1617584228515625, "eval_r2": 0.047377586364746094, "eval_rmse": 1.7781334097450514, "eval_runtime": 9.3218, "eval_samples_per_second": 430.281, "eval_steps_per_second": 13.517, "step": 2900 }, { "epoch": 23.095712861415752, "grad_norm": 115.3865966796875, "learning_rate": 2.839496431637367e-07, "loss": 25.5581, "step": 2910 }, { "epoch": 23.175473579262214, "grad_norm": 81.9321517944336, "learning_rate": 2.827042233553877e-07, "loss": 24.1154, "step": 2920 }, { "epoch": 23.255234297108675, "grad_norm": 226.57139587402344, "learning_rate": 2.814579772377467e-07, "loss": 26.7607, "step": 2930 }, { "epoch": 23.334995014955133, "grad_norm": 73.78599548339844, "learning_rate": 2.802109362986353e-07, "loss": 24.8509, "step": 2940 }, { "epoch": 23.414755732801595, "grad_norm": 77.19922637939453, "learning_rate": 2.7896313204595703e-07, "loss": 26.0284, "step": 2950 }, { "epoch": 23.494516450648057, "grad_norm": 218.84909057617188, "learning_rate": 2.777145960069015e-07, "loss": 26.3203, "step": 2960 }, { "epoch": 23.57427716849452, "grad_norm": 51.319217681884766, "learning_rate": 2.764653597271476e-07, "loss": 25.4633, "step": 2970 }, { "epoch": 23.654037886340976, "grad_norm": 65.45964050292969, "learning_rate": 2.752154547700667e-07, "loss": 26.4497, "step": 2980 }, { "epoch": 23.733798604187438, "grad_norm": 101.2373275756836, "learning_rate": 2.739649127159248e-07, "loss": 23.2934, "step": 2990 }, { "epoch": 23.8135593220339, "grad_norm": 92.46833801269531, "learning_rate": 2.7271376516108545e-07, "loss": 26.7957, "step": 3000 }, { "epoch": 23.8135593220339, "eval_loss": 3.157956838607788, "eval_mae": 1.3402751684188843, "eval_mse": 3.157956838607788, "eval_r2": 0.04852306842803955, "eval_rmse": 1.7770641064991965, "eval_runtime": 9.9161, "eval_samples_per_second": 404.494, "eval_steps_per_second": 12.707, "step": 3000 }, { "epoch": 23.893320039880358, "grad_norm": 82.04583740234375, "learning_rate": 2.7146204371721024e-07, "loss": 26.716, "step": 3010 }, { "epoch": 23.97308075772682, "grad_norm": 126.5661849975586, "learning_rate": 2.702097800104612e-07, "loss": 25.6039, "step": 3020 }, { "epoch": 24.047856430707878, "grad_norm": 124.41060638427734, "learning_rate": 2.6895700568070086e-07, "loss": 23.313, "step": 3030 }, { "epoch": 24.127617148554336, "grad_norm": 153.29127502441406, "learning_rate": 2.6770375238069356e-07, "loss": 26.2654, "step": 3040 }, { "epoch": 24.207377866400797, "grad_norm": 74.55110168457031, "learning_rate": 2.664500517753049e-07, "loss": 25.8805, "step": 3050 }, { "epoch": 24.28713858424726, "grad_norm": 190.80921936035156, "learning_rate": 2.6519593554070273e-07, "loss": 26.5513, "step": 3060 }, { "epoch": 24.366899302093717, "grad_norm": 38.84217071533203, "learning_rate": 2.6394143536355595e-07, "loss": 25.2968, "step": 3070 }, { "epoch": 24.44666001994018, "grad_norm": 30.24015998840332, "learning_rate": 2.626865829402341e-07, "loss": 25.6536, "step": 3080 }, { "epoch": 24.52642073778664, "grad_norm": 68.18790435791016, "learning_rate": 2.614314099760068e-07, "loss": 26.5879, "step": 3090 }, { "epoch": 24.606181455633102, "grad_norm": 130.46934509277344, "learning_rate": 2.601759481842426e-07, "loss": 25.9434, "step": 3100 }, { "epoch": 24.606181455633102, "eval_loss": 3.155501127243042, "eval_mae": 1.3429107666015625, "eval_mse": 3.155501127243042, "eval_r2": 0.049262940883636475, "eval_rmse": 1.7763730259275619, "eval_runtime": 9.2949, "eval_samples_per_second": 431.528, "eval_steps_per_second": 13.556, "step": 3100 }, { "epoch": 24.68594217347956, "grad_norm": 145.48199462890625, "learning_rate": 2.5892022928560715e-07, "loss": 26.4445, "step": 3110 }, { "epoch": 24.765702891326022, "grad_norm": 65.02415466308594, "learning_rate": 2.576642850072627e-07, "loss": 26.3289, "step": 3120 }, { "epoch": 24.845463609172484, "grad_norm": 83.2958984375, "learning_rate": 2.564081470820657e-07, "loss": 24.1421, "step": 3130 }, { "epoch": 24.92522432701894, "grad_norm": 28.578340530395508, "learning_rate": 2.551518472477654e-07, "loss": 24.9301, "step": 3140 }, { "epoch": 25.0, "grad_norm": 8.585793495178223, "learning_rate": 2.5389541724620184e-07, "loss": 21.1339, "step": 3150 }, { "epoch": 25.07976071784646, "grad_norm": 77.31856536865234, "learning_rate": 2.526388888225039e-07, "loss": 23.9865, "step": 3160 }, { "epoch": 25.15952143569292, "grad_norm": 150.84129333496094, "learning_rate": 2.5138229372428715e-07, "loss": 28.0155, "step": 3170 }, { "epoch": 25.23928215353938, "grad_norm": 23.556215286254883, "learning_rate": 2.5012566370085186e-07, "loss": 24.9378, "step": 3180 }, { "epoch": 25.319042871385843, "grad_norm": 183.71372985839844, "learning_rate": 2.4886903050238065e-07, "loss": 25.0401, "step": 3190 }, { "epoch": 25.398803589232305, "grad_norm": 242.62158203125, "learning_rate": 2.4761242587913633e-07, "loss": 26.1274, "step": 3200 }, { "epoch": 25.398803589232305, "eval_loss": 3.1510255336761475, "eval_mae": 1.340527892112732, "eval_mse": 3.1510257720947266, "eval_r2": 0.05061131715774536, "eval_rmse": 1.775112889957911, "eval_runtime": 9.3595, "eval_samples_per_second": 428.548, "eval_steps_per_second": 13.462, "step": 3200 }, { "epoch": 25.478564307078763, "grad_norm": 49.5938835144043, "learning_rate": 2.4635588158065996e-07, "loss": 24.9653, "step": 3210 }, { "epoch": 25.558325024925225, "grad_norm": 72.53789520263672, "learning_rate": 2.450994293549681e-07, "loss": 24.6724, "step": 3220 }, { "epoch": 25.638085742771686, "grad_norm": 61.71761703491211, "learning_rate": 2.438431009477512e-07, "loss": 25.9375, "step": 3230 }, { "epoch": 25.717846460618144, "grad_norm": 119.82528686523438, "learning_rate": 2.425869281015711e-07, "loss": 25.5312, "step": 3240 }, { "epoch": 25.797607178464606, "grad_norm": 134.8277587890625, "learning_rate": 2.413309425550595e-07, "loss": 27.2442, "step": 3250 }, { "epoch": 25.877367896311068, "grad_norm": 87.44007110595703, "learning_rate": 2.400751760421156e-07, "loss": 25.8056, "step": 3260 }, { "epoch": 25.957128614157526, "grad_norm": 62.196311950683594, "learning_rate": 2.3881966029110437e-07, "loss": 26.1153, "step": 3270 }, { "epoch": 26.031904287138584, "grad_norm": 125.53699493408203, "learning_rate": 2.3756442702405522e-07, "loss": 22.7786, "step": 3280 }, { "epoch": 26.111665004985046, "grad_norm": 97.53251647949219, "learning_rate": 2.3630950795585997e-07, "loss": 25.4978, "step": 3290 }, { "epoch": 26.191425722831504, "grad_norm": 307.3661804199219, "learning_rate": 2.350549347934723e-07, "loss": 26.4714, "step": 3300 }, { "epoch": 26.191425722831504, "eval_loss": 3.1477982997894287, "eval_mae": 1.3374989032745361, "eval_mse": 3.1477982997894287, "eval_r2": 0.051583707332611084, "eval_rmse": 1.7742035677422783, "eval_runtime": 9.9848, "eval_samples_per_second": 401.71, "eval_steps_per_second": 12.619, "step": 3300 }, { "epoch": 26.271186440677965, "grad_norm": 195.30360412597656, "learning_rate": 2.338007392351057e-07, "loss": 26.7384, "step": 3310 }, { "epoch": 26.350947158524427, "grad_norm": 118.61621856689453, "learning_rate": 2.3254695296943337e-07, "loss": 26.8368, "step": 3320 }, { "epoch": 26.43070787637089, "grad_norm": 67.60140991210938, "learning_rate": 2.3129360767478738e-07, "loss": 24.9358, "step": 3330 }, { "epoch": 26.510468594217347, "grad_norm": 60.42992401123047, "learning_rate": 2.300407350183577e-07, "loss": 23.5213, "step": 3340 }, { "epoch": 26.59022931206381, "grad_norm": 64.20198822021484, "learning_rate": 2.2878836665539304e-07, "loss": 25.2233, "step": 3350 }, { "epoch": 26.66999002991027, "grad_norm": 130.40182495117188, "learning_rate": 2.2753653422840017e-07, "loss": 23.0435, "step": 3360 }, { "epoch": 26.74975074775673, "grad_norm": 171.5955352783203, "learning_rate": 2.2628526936634513e-07, "loss": 27.6001, "step": 3370 }, { "epoch": 26.82951146560319, "grad_norm": 43.720558166503906, "learning_rate": 2.2503460368385376e-07, "loss": 25.7998, "step": 3380 }, { "epoch": 26.90927218344965, "grad_norm": 55.378265380859375, "learning_rate": 2.2378456878041273e-07, "loss": 26.9747, "step": 3390 }, { "epoch": 26.989032901296113, "grad_norm": 60.05023193359375, "learning_rate": 2.225351962395716e-07, "loss": 22.6912, "step": 3400 }, { "epoch": 26.989032901296113, "eval_loss": 3.1460859775543213, "eval_mae": 1.3359055519104004, "eval_mse": 3.146085739135742, "eval_r2": 0.05209970474243164, "eval_rmse": 1.773720874076793, "eval_runtime": 9.3107, "eval_samples_per_second": 430.797, "eval_steps_per_second": 13.533, "step": 3400 }, { "epoch": 27.063808574277168, "grad_norm": 164.95831298828125, "learning_rate": 2.2128651762814442e-07, "loss": 25.3825, "step": 3410 }, { "epoch": 27.14356929212363, "grad_norm": 133.99693298339844, "learning_rate": 2.2003856449541235e-07, "loss": 25.5549, "step": 3420 }, { "epoch": 27.22333000997009, "grad_norm": 98.84943389892578, "learning_rate": 2.1879136837232675e-07, "loss": 26.5847, "step": 3430 }, { "epoch": 27.30309072781655, "grad_norm": 142.6348114013672, "learning_rate": 2.1754496077071186e-07, "loss": 26.7426, "step": 3440 }, { "epoch": 27.38285144566301, "grad_norm": 79.17008972167969, "learning_rate": 2.162993731824694e-07, "loss": 27.3471, "step": 3450 }, { "epoch": 27.462612163509473, "grad_norm": 51.78144836425781, "learning_rate": 2.150546370787823e-07, "loss": 23.8934, "step": 3460 }, { "epoch": 27.54237288135593, "grad_norm": 250.04104614257812, "learning_rate": 2.1381078390931987e-07, "loss": 24.5301, "step": 3470 }, { "epoch": 27.622133599202392, "grad_norm": 111.69416046142578, "learning_rate": 2.1256784510144288e-07, "loss": 23.3404, "step": 3480 }, { "epoch": 27.701894317048854, "grad_norm": 27.399375915527344, "learning_rate": 2.1132585205941006e-07, "loss": 25.2206, "step": 3490 }, { "epoch": 27.781655034895316, "grad_norm": 111.54031372070312, "learning_rate": 2.1008483616358422e-07, "loss": 23.6541, "step": 3500 }, { "epoch": 27.781655034895316, "eval_loss": 3.142723560333252, "eval_mae": 1.3382033109664917, "eval_mse": 3.142723798751831, "eval_r2": 0.053112685680389404, "eval_rmse": 1.7727729123471598, "eval_runtime": 9.9648, "eval_samples_per_second": 402.517, "eval_steps_per_second": 12.645, "step": 3500 }, { "epoch": 27.861415752741774, "grad_norm": 124.3917465209961, "learning_rate": 2.0884482876963915e-07, "loss": 26.0723, "step": 3510 }, { "epoch": 27.941176470588236, "grad_norm": 86.09922790527344, "learning_rate": 2.07605861207768e-07, "loss": 25.5361, "step": 3520 }, { "epoch": 28.015952143569294, "grad_norm": 218.16212463378906, "learning_rate": 2.063679647818913e-07, "loss": 25.9154, "step": 3530 }, { "epoch": 28.095712861415752, "grad_norm": 134.55062866210938, "learning_rate": 2.0513117076886604e-07, "loss": 26.8341, "step": 3540 }, { "epoch": 28.175473579262214, "grad_norm": 173.00433349609375, "learning_rate": 2.0389551041769575e-07, "loss": 23.9771, "step": 3550 }, { "epoch": 28.255234297108675, "grad_norm": 102.30731964111328, "learning_rate": 2.026610149487403e-07, "loss": 24.7714, "step": 3560 }, { "epoch": 28.334995014955133, "grad_norm": 58.39441680908203, "learning_rate": 2.0142771555292793e-07, "loss": 27.4694, "step": 3570 }, { "epoch": 28.414755732801595, "grad_norm": 72.3934326171875, "learning_rate": 2.0019564339096635e-07, "loss": 25.1937, "step": 3580 }, { "epoch": 28.494516450648057, "grad_norm": 81.90557098388672, "learning_rate": 1.9896482959255598e-07, "loss": 26.5238, "step": 3590 }, { "epoch": 28.57427716849452, "grad_norm": 78.32896423339844, "learning_rate": 1.9773530525560345e-07, "loss": 22.4524, "step": 3600 }, { "epoch": 28.57427716849452, "eval_loss": 3.140214443206787, "eval_mae": 1.3380094766616821, "eval_mse": 3.140214443206787, "eval_r2": 0.05386871099472046, "eval_rmse": 1.7720650222852397, "eval_runtime": 9.2577, "eval_samples_per_second": 433.26, "eval_steps_per_second": 13.61, "step": 3600 }, { "epoch": 28.654037886340976, "grad_norm": 238.5843963623047, "learning_rate": 1.9650710144543527e-07, "loss": 26.2189, "step": 3610 }, { "epoch": 28.733798604187438, "grad_norm": 152.3238067626953, "learning_rate": 1.952802491940138e-07, "loss": 25.3015, "step": 3620 }, { "epoch": 28.8135593220339, "grad_norm": 68.33666229248047, "learning_rate": 1.9405477949915233e-07, "loss": 23.9232, "step": 3630 }, { "epoch": 28.893320039880358, "grad_norm": 164.30014038085938, "learning_rate": 1.9283072332373251e-07, "loss": 25.088, "step": 3640 }, { "epoch": 28.97308075772682, "grad_norm": 45.72752380371094, "learning_rate": 1.9160811159492183e-07, "loss": 26.5676, "step": 3650 }, { "epoch": 29.047856430707878, "grad_norm": 187.55999755859375, "learning_rate": 1.90386975203392e-07, "loss": 22.3533, "step": 3660 }, { "epoch": 29.127617148554336, "grad_norm": 90.50538635253906, "learning_rate": 1.8916734500253906e-07, "loss": 25.7678, "step": 3670 }, { "epoch": 29.207377866400797, "grad_norm": 135.21221923828125, "learning_rate": 1.8794925180770287e-07, "loss": 25.1531, "step": 3680 }, { "epoch": 29.28713858424726, "grad_norm": 108.64622497558594, "learning_rate": 1.8673272639538973e-07, "loss": 24.7013, "step": 3690 }, { "epoch": 29.366899302093717, "grad_norm": 94.64742279052734, "learning_rate": 1.8551779950249353e-07, "loss": 27.9527, "step": 3700 }, { "epoch": 29.366899302093717, "eval_loss": 3.140504837036133, "eval_mae": 1.331715703010559, "eval_mse": 3.140505075454712, "eval_r2": 0.053781211376190186, "eval_rmse": 1.7721470242208213, "eval_runtime": 9.3146, "eval_samples_per_second": 430.615, "eval_steps_per_second": 13.527, "step": 3700 }, { "epoch": 29.44666001994018, "grad_norm": 78.31409454345703, "learning_rate": 1.843045018255201e-07, "loss": 26.9881, "step": 3710 }, { "epoch": 29.52642073778664, "grad_norm": 194.01712036132812, "learning_rate": 1.8309286401981133e-07, "loss": 24.7781, "step": 3720 }, { "epoch": 29.606181455633102, "grad_norm": 154.0265655517578, "learning_rate": 1.8188291669877026e-07, "loss": 25.0849, "step": 3730 }, { "epoch": 29.68594217347956, "grad_norm": 32.36880874633789, "learning_rate": 1.8067469043308835e-07, "loss": 23.4199, "step": 3740 }, { "epoch": 29.765702891326022, "grad_norm": 122.14185333251953, "learning_rate": 1.7946821574997228e-07, "loss": 26.3242, "step": 3750 }, { "epoch": 29.845463609172484, "grad_norm": 159.14520263671875, "learning_rate": 1.7826352313237315e-07, "loss": 25.3263, "step": 3760 }, { "epoch": 29.92522432701894, "grad_norm": 118.12110137939453, "learning_rate": 1.770606430182161e-07, "loss": 24.5008, "step": 3770 }, { "epoch": 30.0, "grad_norm": 62.7591438293457, "learning_rate": 1.758596057996314e-07, "loss": 25.0837, "step": 3780 }, { "epoch": 30.07976071784646, "grad_norm": 43.26075744628906, "learning_rate": 1.746604418221866e-07, "loss": 26.6551, "step": 3790 }, { "epoch": 30.15952143569292, "grad_norm": 110.1336898803711, "learning_rate": 1.7346318138411922e-07, "loss": 24.0462, "step": 3800 }, { "epoch": 30.15952143569292, "eval_loss": 3.136179208755493, "eval_mae": 1.335479974746704, "eval_mse": 3.136179208755493, "eval_r2": 0.05508452653884888, "eval_rmse": 1.7709260878860793, "eval_runtime": 9.9855, "eval_samples_per_second": 401.681, "eval_steps_per_second": 12.618, "step": 3800 }, { "epoch": 30.23928215353938, "grad_norm": 49.133968353271484, "learning_rate": 1.7226785473557232e-07, "loss": 25.7876, "step": 3810 }, { "epoch": 30.319042871385843, "grad_norm": 46.56238555908203, "learning_rate": 1.71074492077829e-07, "loss": 26.4264, "step": 3820 }, { "epoch": 30.398803589232305, "grad_norm": 52.70458221435547, "learning_rate": 1.6988312356255024e-07, "loss": 27.3161, "step": 3830 }, { "epoch": 30.478564307078763, "grad_norm": 45.012142181396484, "learning_rate": 1.6869377929101281e-07, "loss": 24.2407, "step": 3840 }, { "epoch": 30.558325024925225, "grad_norm": 208.66177368164062, "learning_rate": 1.6750648931334826e-07, "loss": 25.4931, "step": 3850 }, { "epoch": 30.638085742771686, "grad_norm": 68.44234466552734, "learning_rate": 1.6632128362778463e-07, "loss": 23.7536, "step": 3860 }, { "epoch": 30.717846460618144, "grad_norm": 161.40869140625, "learning_rate": 1.6513819217988745e-07, "loss": 25.3655, "step": 3870 }, { "epoch": 30.797607178464606, "grad_norm": 251.0177764892578, "learning_rate": 1.63957244861804e-07, "loss": 24.2239, "step": 3880 }, { "epoch": 30.877367896311068, "grad_norm": 60.6568489074707, "learning_rate": 1.6277847151150752e-07, "loss": 25.3857, "step": 3890 }, { "epoch": 30.957128614157526, "grad_norm": 103.65233612060547, "learning_rate": 1.6160190191204342e-07, "loss": 26.6993, "step": 3900 }, { "epoch": 30.957128614157526, "eval_loss": 3.1340854167938232, "eval_mae": 1.3334490060806274, "eval_mse": 3.134085178375244, "eval_r2": 0.05571550130844116, "eval_rmse": 1.770334764493779, "eval_runtime": 9.2742, "eval_samples_per_second": 432.49, "eval_steps_per_second": 13.586, "step": 3900 }, { "epoch": 31.031904287138584, "grad_norm": 233.1190185546875, "learning_rate": 1.604275657907771e-07, "loss": 22.8734, "step": 3910 }, { "epoch": 31.111665004985046, "grad_norm": 189.4170684814453, "learning_rate": 1.5925549281864225e-07, "loss": 24.9487, "step": 3920 }, { "epoch": 31.191425722831504, "grad_norm": 67.26908874511719, "learning_rate": 1.5808571260939192e-07, "loss": 25.9776, "step": 3930 }, { "epoch": 31.271186440677965, "grad_norm": 58.05997848510742, "learning_rate": 1.5691825471884957e-07, "loss": 26.7025, "step": 3940 }, { "epoch": 31.350947158524427, "grad_norm": 31.262191772460938, "learning_rate": 1.5575314864416272e-07, "loss": 23.673, "step": 3950 }, { "epoch": 31.43070787637089, "grad_norm": 154.3089141845703, "learning_rate": 1.5459042382305793e-07, "loss": 27.3652, "step": 3960 }, { "epoch": 31.510468594217347, "grad_norm": 48.65864181518555, "learning_rate": 1.5343010963309608e-07, "loss": 25.3723, "step": 3970 }, { "epoch": 31.59022931206381, "grad_norm": 122.0504150390625, "learning_rate": 1.5227223539093136e-07, "loss": 24.5209, "step": 3980 }, { "epoch": 31.66999002991027, "grad_norm": 42.498260498046875, "learning_rate": 1.5111683035156937e-07, "loss": 23.8867, "step": 3990 }, { "epoch": 31.74975074775673, "grad_norm": 65.39444732666016, "learning_rate": 1.4996392370762884e-07, "loss": 25.3385, "step": 4000 }, { "epoch": 31.74975074775673, "eval_loss": 3.13254976272583, "eval_mae": 1.3360387086868286, "eval_mse": 3.132550001144409, "eval_r2": 0.05617797374725342, "eval_rmse": 1.7699011275052652, "eval_runtime": 9.3296, "eval_samples_per_second": 429.924, "eval_steps_per_second": 13.505, "step": 4000 }, { "epoch": 31.82951146560319, "grad_norm": 39.13220977783203, "learning_rate": 1.4881354458860368e-07, "loss": 26.1535, "step": 4010 }, { "epoch": 31.90927218344965, "grad_norm": 89.6006088256836, "learning_rate": 1.4766572206012676e-07, "loss": 25.305, "step": 4020 }, { "epoch": 31.989032901296113, "grad_norm": 115.49569702148438, "learning_rate": 1.4652048512323618e-07, "loss": 26.6158, "step": 4030 }, { "epoch": 32.06380857427717, "grad_norm": 71.1096420288086, "learning_rate": 1.4537786271364166e-07, "loss": 25.0504, "step": 4040 }, { "epoch": 32.14356929212363, "grad_norm": 67.03511047363281, "learning_rate": 1.4423788370099443e-07, "loss": 26.0869, "step": 4050 }, { "epoch": 32.22333000997009, "grad_norm": 91.05354309082031, "learning_rate": 1.43100576888157e-07, "loss": 24.8662, "step": 4060 }, { "epoch": 32.30309072781655, "grad_norm": 88.30418395996094, "learning_rate": 1.4196597101047572e-07, "loss": 25.5, "step": 4070 }, { "epoch": 32.38285144566301, "grad_norm": 133.8420867919922, "learning_rate": 1.4083409473505493e-07, "loss": 26.1354, "step": 4080 }, { "epoch": 32.46261216350947, "grad_norm": 126.43040466308594, "learning_rate": 1.3970497666003225e-07, "loss": 23.7797, "step": 4090 }, { "epoch": 32.54237288135593, "grad_norm": 273.7431335449219, "learning_rate": 1.3857864531385638e-07, "loss": 23.6945, "step": 4100 }, { "epoch": 32.54237288135593, "eval_loss": 3.132538318634033, "eval_mae": 1.3303931951522827, "eval_mse": 3.132538318634033, "eval_r2": 0.056181490421295166, "eval_rmse": 1.7698978271736572, "eval_runtime": 9.9463, "eval_samples_per_second": 403.265, "eval_steps_per_second": 12.668, "step": 4100 }, { "epoch": 32.62213359920239, "grad_norm": 40.9385871887207, "learning_rate": 1.37455129154566e-07, "loss": 24.5797, "step": 4110 }, { "epoch": 32.701894317048854, "grad_norm": 126.75995635986328, "learning_rate": 1.363344565690711e-07, "loss": 26.4947, "step": 4120 }, { "epoch": 32.781655034895316, "grad_norm": 218.6414794921875, "learning_rate": 1.3521665587243563e-07, "loss": 24.3611, "step": 4130 }, { "epoch": 32.86141575274178, "grad_norm": 29.135433197021484, "learning_rate": 1.3410175530716163e-07, "loss": 26.2829, "step": 4140 }, { "epoch": 32.94117647058823, "grad_norm": 88.4239501953125, "learning_rate": 1.3298978304247638e-07, "loss": 25.2017, "step": 4150 }, { "epoch": 33.015952143569294, "grad_norm": 73.62918090820312, "learning_rate": 1.318807671736201e-07, "loss": 25.3726, "step": 4160 }, { "epoch": 33.095712861415755, "grad_norm": 112.5821304321289, "learning_rate": 1.3077473572113645e-07, "loss": 26.48, "step": 4170 }, { "epoch": 33.17547357926221, "grad_norm": 121.62039947509766, "learning_rate": 1.2967171663016456e-07, "loss": 23.0178, "step": 4180 }, { "epoch": 33.25523429710867, "grad_norm": 65.80647277832031, "learning_rate": 1.285717377697324e-07, "loss": 25.4537, "step": 4190 }, { "epoch": 33.33499501495513, "grad_norm": 264.9825134277344, "learning_rate": 1.2747482693205356e-07, "loss": 26.105, "step": 4200 }, { "epoch": 33.33499501495513, "eval_loss": 3.1293115615844727, "eval_mae": 1.3327357769012451, "eval_mse": 3.1293118000030518, "eval_r2": 0.05715364217758179, "eval_rmse": 1.7689860937845305, "eval_runtime": 9.2653, "eval_samples_per_second": 432.905, "eval_steps_per_second": 13.599, "step": 4200 }, { "epoch": 33.414755732801595, "grad_norm": 54.90214157104492, "learning_rate": 1.263810118318242e-07, "loss": 25.4921, "step": 4210 }, { "epoch": 33.49451645064806, "grad_norm": 65.57184600830078, "learning_rate": 1.252903201055232e-07, "loss": 24.6091, "step": 4220 }, { "epoch": 33.57427716849452, "grad_norm": 46.68421936035156, "learning_rate": 1.242027793107142e-07, "loss": 26.6366, "step": 4230 }, { "epoch": 33.65403788634098, "grad_norm": 157.0302734375, "learning_rate": 1.2311841692534843e-07, "loss": 26.8544, "step": 4240 }, { "epoch": 33.733798604187434, "grad_norm": 58.255130767822266, "learning_rate": 1.2203726034707158e-07, "loss": 26.418, "step": 4250 }, { "epoch": 33.813559322033896, "grad_norm": 99.26273345947266, "learning_rate": 1.2095933689253047e-07, "loss": 24.3008, "step": 4260 }, { "epoch": 33.89332003988036, "grad_norm": 158.2358856201172, "learning_rate": 1.198846737966838e-07, "loss": 25.159, "step": 4270 }, { "epoch": 33.97308075772682, "grad_norm": 197.09414672851562, "learning_rate": 1.1881329821211325e-07, "loss": 23.8238, "step": 4280 }, { "epoch": 34.047856430707874, "grad_norm": 129.14163208007812, "learning_rate": 1.1774523720833806e-07, "loss": 23.8336, "step": 4290 }, { "epoch": 34.127617148554336, "grad_norm": 72.48125457763672, "learning_rate": 1.1668051777113089e-07, "loss": 26.3832, "step": 4300 }, { "epoch": 34.127617148554336, "eval_loss": 3.128072738647461, "eval_mae": 1.3328605890274048, "eval_mse": 3.128072500228882, "eval_r2": 0.057527005672454834, "eval_rmse": 1.7686357737614835, "eval_runtime": 9.3433, "eval_samples_per_second": 429.292, "eval_steps_per_second": 13.486, "step": 4300 }, { "epoch": 34.2073778664008, "grad_norm": 198.8756561279297, "learning_rate": 1.1561916680183554e-07, "loss": 24.7351, "step": 4310 }, { "epoch": 34.28713858424726, "grad_norm": 112.67052459716797, "learning_rate": 1.1456121111668809e-07, "loss": 27.0009, "step": 4320 }, { "epoch": 34.36689930209372, "grad_norm": 48.918434143066406, "learning_rate": 1.1350667744613868e-07, "loss": 25.6622, "step": 4330 }, { "epoch": 34.44666001994018, "grad_norm": 70.8290786743164, "learning_rate": 1.1245559243417624e-07, "loss": 25.7665, "step": 4340 }, { "epoch": 34.52642073778664, "grad_norm": 154.0522003173828, "learning_rate": 1.1140798263765583e-07, "loss": 25.922, "step": 4350 }, { "epoch": 34.6061814556331, "grad_norm": 102.49356842041016, "learning_rate": 1.1036387452562681e-07, "loss": 26.0428, "step": 4360 }, { "epoch": 34.68594217347956, "grad_norm": 101.10346221923828, "learning_rate": 1.0932329447866495e-07, "loss": 24.2896, "step": 4370 }, { "epoch": 34.76570289132602, "grad_norm": 101.76194763183594, "learning_rate": 1.0828626878820501e-07, "loss": 24.0958, "step": 4380 }, { "epoch": 34.845463609172484, "grad_norm": 187.35472106933594, "learning_rate": 1.0725282365587729e-07, "loss": 24.7648, "step": 4390 }, { "epoch": 34.925224327018945, "grad_norm": 77.70819854736328, "learning_rate": 1.062229851928449e-07, "loss": 24.5196, "step": 4400 }, { "epoch": 34.925224327018945, "eval_loss": 3.127197504043579, "eval_mae": 1.3322991132736206, "eval_mse": 3.127197504043579, "eval_r2": 0.05779063701629639, "eval_rmse": 1.7683883917407903, "eval_runtime": 9.9349, "eval_samples_per_second": 403.728, "eval_steps_per_second": 12.683, "step": 4400 }, { "epoch": 35.0, "grad_norm": 67.70870208740234, "learning_rate": 1.051967794191446e-07, "loss": 24.2208, "step": 4410 }, { "epoch": 35.07976071784646, "grad_norm": 219.30227661132812, "learning_rate": 1.041742322630291e-07, "loss": 27.2756, "step": 4420 }, { "epoch": 35.15952143569292, "grad_norm": 84.1348876953125, "learning_rate": 1.0315536956031182e-07, "loss": 25.4429, "step": 4430 }, { "epoch": 35.239282153539385, "grad_norm": 99.25320434570312, "learning_rate": 1.0214021705371453e-07, "loss": 23.5892, "step": 4440 }, { "epoch": 35.31904287138584, "grad_norm": 42.8789176940918, "learning_rate": 1.0112880039221652e-07, "loss": 27.1501, "step": 4450 }, { "epoch": 35.3988035892323, "grad_norm": 52.440799713134766, "learning_rate": 1.0012114513040665e-07, "loss": 24.3347, "step": 4460 }, { "epoch": 35.47856430707876, "grad_norm": 59.711448669433594, "learning_rate": 9.911727672783802e-08, "loss": 24.4259, "step": 4470 }, { "epoch": 35.558325024925225, "grad_norm": 104.88539123535156, "learning_rate": 9.811722054838412e-08, "loss": 26.4102, "step": 4480 }, { "epoch": 35.638085742771686, "grad_norm": 134.7513427734375, "learning_rate": 9.712100185959862e-08, "loss": 25.6119, "step": 4490 }, { "epoch": 35.71784646061815, "grad_norm": 183.24839782714844, "learning_rate": 9.61286458320763e-08, "loss": 24.3452, "step": 4500 }, { "epoch": 35.71784646061815, "eval_loss": 3.1262333393096924, "eval_mae": 1.333138108253479, "eval_mse": 3.1262331008911133, "eval_r2": 0.058081209659576416, "eval_rmse": 1.7681156921681096, "eval_runtime": 9.0519, "eval_samples_per_second": 443.114, "eval_steps_per_second": 13.92, "step": 4500 }, { "epoch": 35.79760717846461, "grad_norm": 215.09974670410156, "learning_rate": 9.51401775388177e-08, "loss": 24.9939, "step": 4510 }, { "epoch": 35.877367896311064, "grad_norm": 102.00492858886719, "learning_rate": 9.41556219545953e-08, "loss": 25.3147, "step": 4520 }, { "epoch": 35.957128614157526, "grad_norm": 96.8487319946289, "learning_rate": 9.317500395532235e-08, "loss": 25.4636, "step": 4530 }, { "epoch": 36.03190428713859, "grad_norm": 68.97376251220703, "learning_rate": 9.219834831742482e-08, "loss": 22.8502, "step": 4540 }, { "epoch": 36.11166500498504, "grad_norm": 51.5819206237793, "learning_rate": 9.122567971721484e-08, "loss": 25.1066, "step": 4550 }, { "epoch": 36.191425722831504, "grad_norm": 137.3096466064453, "learning_rate": 9.025702273026787e-08, "loss": 25.7935, "step": 4560 }, { "epoch": 36.271186440677965, "grad_norm": 303.2586669921875, "learning_rate": 8.929240183080106e-08, "loss": 23.7943, "step": 4570 }, { "epoch": 36.35094715852443, "grad_norm": 298.3479309082031, "learning_rate": 8.833184139105534e-08, "loss": 25.0741, "step": 4580 }, { "epoch": 36.43070787637089, "grad_norm": 202.3203125, "learning_rate": 8.737536568067974e-08, "loss": 23.5568, "step": 4590 }, { "epoch": 36.51046859421735, "grad_norm": 118.80555725097656, "learning_rate": 8.642299886611759e-08, "loss": 25.5861, "step": 4600 }, { "epoch": 36.51046859421735, "eval_loss": 3.1254942417144775, "eval_mae": 1.3321858644485474, "eval_mse": 3.1254942417144775, "eval_r2": 0.0583038330078125, "eval_rmse": 1.7679067401066373, "eval_runtime": 9.063, "eval_samples_per_second": 442.571, "eval_steps_per_second": 13.903, "step": 4600 }, { "epoch": 36.59022931206381, "grad_norm": 145.08285522460938, "learning_rate": 8.54747650099967e-08, "loss": 27.5149, "step": 4610 }, { "epoch": 36.66999002991027, "grad_norm": 164.43328857421875, "learning_rate": 8.453068807052064e-08, "loss": 26.1562, "step": 4620 }, { "epoch": 36.74975074775673, "grad_norm": 46.407039642333984, "learning_rate": 8.359079190086402e-08, "loss": 26.4529, "step": 4630 }, { "epoch": 36.82951146560319, "grad_norm": 102.09172058105469, "learning_rate": 8.265510024856951e-08, "loss": 25.0347, "step": 4640 }, { "epoch": 36.90927218344965, "grad_norm": 119.56255340576172, "learning_rate": 8.172363675494772e-08, "loss": 25.3944, "step": 4650 }, { "epoch": 36.98903290129611, "grad_norm": 220.11642456054688, "learning_rate": 8.079642495448033e-08, "loss": 25.6366, "step": 4660 }, { "epoch": 37.06380857427717, "grad_norm": 172.42779541015625, "learning_rate": 7.987348827422483e-08, "loss": 25.4652, "step": 4670 }, { "epoch": 37.14356929212363, "grad_norm": 95.17620086669922, "learning_rate": 7.895485003322327e-08, "loss": 25.5158, "step": 4680 }, { "epoch": 37.22333000997009, "grad_norm": 81.66320037841797, "learning_rate": 7.804053344191255e-08, "loss": 25.609, "step": 4690 }, { "epoch": 37.30309072781655, "grad_norm": 112.18912506103516, "learning_rate": 7.713056160153813e-08, "loss": 24.5701, "step": 4700 }, { "epoch": 37.30309072781655, "eval_loss": 3.1239125728607178, "eval_mae": 1.332130789756775, "eval_mse": 3.1239125728607178, "eval_r2": 0.05878037214279175, "eval_rmse": 1.7674593553631488, "eval_runtime": 9.0645, "eval_samples_per_second": 442.496, "eval_steps_per_second": 13.9, "step": 4700 }, { "epoch": 37.38285144566301, "grad_norm": 103.58297729492188, "learning_rate": 7.622495750357072e-08, "loss": 25.5507, "step": 4710 }, { "epoch": 37.46261216350947, "grad_norm": 39.11042022705078, "learning_rate": 7.53237440291247e-08, "loss": 25.7191, "step": 4720 }, { "epoch": 37.54237288135593, "grad_norm": 199.56143188476562, "learning_rate": 7.44269439483807e-08, "loss": 25.2238, "step": 4730 }, { "epoch": 37.62213359920239, "grad_norm": 168.63783264160156, "learning_rate": 7.353457992000961e-08, "loss": 24.6829, "step": 4740 }, { "epoch": 37.701894317048854, "grad_norm": 55.98255157470703, "learning_rate": 7.264667449060074e-08, "loss": 25.5242, "step": 4750 }, { "epoch": 37.781655034895316, "grad_norm": 53.036888122558594, "learning_rate": 7.176325009409175e-08, "loss": 25.344, "step": 4760 }, { "epoch": 37.86141575274178, "grad_norm": 265.2929382324219, "learning_rate": 7.088432905120176e-08, "loss": 24.8157, "step": 4770 }, { "epoch": 37.94117647058823, "grad_norm": 120.75648498535156, "learning_rate": 7.000993356886778e-08, "loss": 23.7128, "step": 4780 }, { "epoch": 38.015952143569294, "grad_norm": 171.9890899658203, "learning_rate": 6.914008573968317e-08, "loss": 26.0064, "step": 4790 }, { "epoch": 38.095712861415755, "grad_norm": 53.58652114868164, "learning_rate": 6.827480754133993e-08, "loss": 24.0923, "step": 4800 }, { "epoch": 38.095712861415755, "eval_loss": 3.123528003692627, "eval_mae": 1.3319824934005737, "eval_mse": 3.123528003692627, "eval_r2": 0.05889630317687988, "eval_rmse": 1.767350560498009, "eval_runtime": 9.0553, "eval_samples_per_second": 442.943, "eval_steps_per_second": 13.914, "step": 4800 }, { "epoch": 38.17547357926221, "grad_norm": 96.7684097290039, "learning_rate": 6.741412083607301e-08, "loss": 25.6224, "step": 4810 }, { "epoch": 38.25523429710867, "grad_norm": 328.3621826171875, "learning_rate": 6.655804737010795e-08, "loss": 26.9059, "step": 4820 }, { "epoch": 38.33499501495513, "grad_norm": 89.13323211669922, "learning_rate": 6.570660877311193e-08, "loss": 24.2715, "step": 4830 }, { "epoch": 38.414755732801595, "grad_norm": 157.61807250976562, "learning_rate": 6.485982655764649e-08, "loss": 25.0064, "step": 4840 }, { "epoch": 38.49451645064806, "grad_norm": 151.26788330078125, "learning_rate": 6.401772211862468e-08, "loss": 23.3381, "step": 4850 }, { "epoch": 38.57427716849452, "grad_norm": 57.38077926635742, "learning_rate": 6.31803167327703e-08, "loss": 23.812, "step": 4860 }, { "epoch": 38.65403788634098, "grad_norm": 203.0427703857422, "learning_rate": 6.23476315580799e-08, "loss": 26.5058, "step": 4870 }, { "epoch": 38.733798604187434, "grad_norm": 163.02796936035156, "learning_rate": 6.151968763328888e-08, "loss": 26.1078, "step": 4880 }, { "epoch": 38.813559322033896, "grad_norm": 108.4669418334961, "learning_rate": 6.069650587733932e-08, "loss": 24.9732, "step": 4890 }, { "epoch": 38.89332003988036, "grad_norm": 362.93994140625, "learning_rate": 5.987810708885196e-08, "loss": 26.611, "step": 4900 }, { "epoch": 38.89332003988036, "eval_loss": 3.122894525527954, "eval_mae": 1.3315399885177612, "eval_mse": 3.122894525527954, "eval_r2": 0.059087157249450684, "eval_rmse": 1.7671713345139894, "eval_runtime": 9.0533, "eval_samples_per_second": 443.044, "eval_steps_per_second": 13.918, "step": 4900 }, { "epoch": 38.97308075772682, "grad_norm": 93.97080993652344, "learning_rate": 5.906451194560019e-08, "loss": 25.4004, "step": 4910 }, { "epoch": 39.047856430707874, "grad_norm": 147.77755737304688, "learning_rate": 5.825574100398811e-08, "loss": 23.691, "step": 4920 }, { "epoch": 39.127617148554336, "grad_norm": 246.35997009277344, "learning_rate": 5.7451814698530665e-08, "loss": 25.2164, "step": 4930 }, { "epoch": 39.2073778664008, "grad_norm": 153.21450805664062, "learning_rate": 5.6652753341337623e-08, "loss": 24.0185, "step": 4940 }, { "epoch": 39.28713858424726, "grad_norm": 143.73780822753906, "learning_rate": 5.58585771216005e-08, "loss": 25.8635, "step": 4950 }, { "epoch": 39.36689930209372, "grad_norm": 58.399566650390625, "learning_rate": 5.506930610508201e-08, "loss": 25.9007, "step": 4960 }, { "epoch": 39.44666001994018, "grad_norm": 26.310766220092773, "learning_rate": 5.4284960233609536e-08, "loss": 23.4994, "step": 4970 }, { "epoch": 39.52642073778664, "grad_norm": 146.9088897705078, "learning_rate": 5.350555932457107e-08, "loss": 26.3982, "step": 4980 }, { "epoch": 39.6061814556331, "grad_norm": 132.27772521972656, "learning_rate": 5.273112307041436e-08, "loss": 25.3715, "step": 4990 }, { "epoch": 39.68594217347956, "grad_norm": 55.70192337036133, "learning_rate": 5.1961671038149745e-08, "loss": 27.1358, "step": 5000 }, { "epoch": 39.68594217347956, "eval_loss": 3.1226961612701416, "eval_mae": 1.3344401121139526, "eval_mse": 3.1226959228515625, "eval_r2": 0.0591469407081604, "eval_rmse": 1.767115141367863, "eval_runtime": 9.0408, "eval_samples_per_second": 443.654, "eval_steps_per_second": 13.937, "step": 5000 }, { "epoch": 39.76570289132602, "grad_norm": 45.21358871459961, "learning_rate": 5.119722266885534e-08, "loss": 25.5138, "step": 5010 }, { "epoch": 39.845463609172484, "grad_norm": 101.61859130859375, "learning_rate": 5.043779727718622e-08, "loss": 23.5845, "step": 5020 }, { "epoch": 39.925224327018945, "grad_norm": 57.091800689697266, "learning_rate": 4.968341405088602e-08, "loss": 26.287, "step": 5030 }, { "epoch": 40.0, "grad_norm": 21.503507614135742, "learning_rate": 4.8934092050302636e-08, "loss": 25.0861, "step": 5040 }, { "epoch": 40.07976071784646, "grad_norm": 46.6530647277832, "learning_rate": 4.818985020790603e-08, "loss": 26.3137, "step": 5050 }, { "epoch": 40.15952143569292, "grad_norm": 110.19184112548828, "learning_rate": 4.7450707327810534e-08, "loss": 25.7188, "step": 5060 }, { "epoch": 40.239282153539385, "grad_norm": 114.34848022460938, "learning_rate": 4.6716682085299135e-08, "loss": 24.9849, "step": 5070 }, { "epoch": 40.31904287138584, "grad_norm": 76.02941131591797, "learning_rate": 4.598779302635203e-08, "loss": 25.1661, "step": 5080 }, { "epoch": 40.3988035892323, "grad_norm": 161.83319091796875, "learning_rate": 4.5264058567177906e-08, "loss": 24.9227, "step": 5090 }, { "epoch": 40.47856430707876, "grad_norm": 110.09339904785156, "learning_rate": 4.454549699374874e-08, "loss": 23.4286, "step": 5100 }, { "epoch": 40.47856430707876, "eval_loss": 3.1224095821380615, "eval_mae": 1.3299893140792847, "eval_mse": 3.1224091053009033, "eval_r2": 0.05923336744308472, "eval_rmse": 1.76703398532708, "eval_runtime": 9.0409, "eval_samples_per_second": 443.649, "eval_steps_per_second": 13.937, "step": 5100 }, { "epoch": 40.558325024925225, "grad_norm": 51.97368621826172, "learning_rate": 4.383212646133741e-08, "loss": 24.7696, "step": 5110 }, { "epoch": 40.638085742771686, "grad_norm": 203.3778533935547, "learning_rate": 4.319454622188645e-08, "loss": 24.8723, "step": 5120 }, { "epoch": 40.71784646061815, "grad_norm": 96.84310150146484, "learning_rate": 4.2491088215078766e-08, "loss": 26.5034, "step": 5130 }, { "epoch": 40.79760717846461, "grad_norm": 56.1543083190918, "learning_rate": 4.17928731562566e-08, "loss": 24.4977, "step": 5140 }, { "epoch": 40.877367896311064, "grad_norm": 65.11670684814453, "learning_rate": 4.109991868661525e-08, "loss": 27.2082, "step": 5150 }, { "epoch": 40.957128614157526, "grad_norm": 77.25297546386719, "learning_rate": 4.041224231443538e-08, "loss": 24.6315, "step": 5160 }, { "epoch": 41.03190428713859, "grad_norm": 134.2405548095703, "learning_rate": 3.972986141464027e-08, "loss": 26.111, "step": 5170 }, { "epoch": 41.11166500498504, "grad_norm": 53.60357666015625, "learning_rate": 3.905279322835728e-08, "loss": 25.429, "step": 5180 }, { "epoch": 41.191425722831504, "grad_norm": 84.80567932128906, "learning_rate": 3.838105486248186e-08, "loss": 23.1782, "step": 5190 }, { "epoch": 41.271186440677965, "grad_norm": 85.67137908935547, "learning_rate": 3.77146632892458e-08, "loss": 26.1872, "step": 5200 }, { "epoch": 41.271186440677965, "eval_loss": 3.1218793392181396, "eval_mae": 1.3309307098388672, "eval_mse": 3.1218795776367188, "eval_r2": 0.05939292907714844, "eval_rmse": 1.7668841438070348, "eval_runtime": 9.0453, "eval_samples_per_second": 443.435, "eval_steps_per_second": 13.93, "step": 5200 }, { "epoch": 41.35094715852443, "grad_norm": 66.26146697998047, "learning_rate": 3.705363534578779e-08, "loss": 24.4168, "step": 5210 }, { "epoch": 41.43070787637089, "grad_norm": 84.41664123535156, "learning_rate": 3.639798773372854e-08, "loss": 24.8912, "step": 5220 }, { "epoch": 41.51046859421735, "grad_norm": 154.61509704589844, "learning_rate": 3.574773701874853e-08, "loss": 25.9521, "step": 5230 }, { "epoch": 41.59022931206381, "grad_norm": 180.2846221923828, "learning_rate": 3.510289963016946e-08, "loss": 25.1476, "step": 5240 }, { "epoch": 41.66999002991027, "grad_norm": 39.9274787902832, "learning_rate": 3.446349186053929e-08, "loss": 24.6444, "step": 5250 }, { "epoch": 41.74975074775673, "grad_norm": 60.37506103515625, "learning_rate": 3.382952986522039e-08, "loss": 28.0159, "step": 5260 }, { "epoch": 41.82951146560319, "grad_norm": 220.5933380126953, "learning_rate": 3.320102966198155e-08, "loss": 23.8454, "step": 5270 }, { "epoch": 41.90927218344965, "grad_norm": 68.86772155761719, "learning_rate": 3.257800713059325e-08, "loss": 25.9979, "step": 5280 }, { "epoch": 41.98903290129611, "grad_norm": 271.44146728515625, "learning_rate": 3.1960478012426215e-08, "loss": 25.2016, "step": 5290 }, { "epoch": 42.06380857427717, "grad_norm": 168.9261016845703, "learning_rate": 3.134845791005391e-08, "loss": 23.1833, "step": 5300 }, { "epoch": 42.06380857427717, "eval_loss": 3.1214566230773926, "eval_mae": 1.3316025733947754, "eval_mse": 3.1214566230773926, "eval_r2": 0.059520423412323, "eval_rmse": 1.7667644503660902, "eval_runtime": 9.0542, "eval_samples_per_second": 442.999, "eval_steps_per_second": 13.916, "step": 5300 }, { "epoch": 42.14356929212363, "grad_norm": 194.90185546875, "learning_rate": 3.074196228685833e-08, "loss": 22.7641, "step": 5310 }, { "epoch": 42.22333000997009, "grad_norm": 67.69960021972656, "learning_rate": 3.0141006466639166e-08, "loss": 25.6798, "step": 5320 }, { "epoch": 42.30309072781655, "grad_norm": 37.08260726928711, "learning_rate": 2.954560563322686e-08, "loss": 26.3616, "step": 5330 }, { "epoch": 42.38285144566301, "grad_norm": 79.85238647460938, "learning_rate": 2.8955774830098534e-08, "loss": 25.6113, "step": 5340 }, { "epoch": 42.46261216350947, "grad_norm": 93.74951934814453, "learning_rate": 2.8371528959998475e-08, "loss": 26.3547, "step": 5350 }, { "epoch": 42.54237288135593, "grad_norm": 133.0259552001953, "learning_rate": 2.7792882784561046e-08, "loss": 25.1082, "step": 5360 }, { "epoch": 42.62213359920239, "grad_norm": 161.1022491455078, "learning_rate": 2.721985092393822e-08, "loss": 25.5645, "step": 5370 }, { "epoch": 42.701894317048854, "grad_norm": 104.63832092285156, "learning_rate": 2.66524478564297e-08, "loss": 26.045, "step": 5380 }, { "epoch": 42.781655034895316, "grad_norm": 153.9829559326172, "learning_rate": 2.60906879181175e-08, "loss": 26.1566, "step": 5390 }, { "epoch": 42.86141575274178, "grad_norm": 88.28377532958984, "learning_rate": 2.5534585302503576e-08, "loss": 25.2555, "step": 5400 }, { "epoch": 42.86141575274178, "eval_loss": 3.1208252906799316, "eval_mae": 1.331380009651184, "eval_mse": 3.1208252906799316, "eval_r2": 0.05971062183380127, "eval_rmse": 1.766585772239755, "eval_runtime": 9.0542, "eval_samples_per_second": 443.0, "eval_steps_per_second": 13.916, "step": 5400 }, { "epoch": 42.94117647058823, "grad_norm": 147.16366577148438, "learning_rate": 2.498415406015114e-08, "loss": 24.5054, "step": 5410 }, { "epoch": 43.015952143569294, "grad_norm": 234.27850341796875, "learning_rate": 2.4439408098329754e-08, "loss": 24.1556, "step": 5420 }, { "epoch": 43.095712861415755, "grad_norm": 66.06194305419922, "learning_rate": 2.3900361180663907e-08, "loss": 24.8309, "step": 5430 }, { "epoch": 43.17547357926221, "grad_norm": 100.03466796875, "learning_rate": 2.336702692678527e-08, "loss": 23.5658, "step": 5440 }, { "epoch": 43.25523429710867, "grad_norm": 102.8028564453125, "learning_rate": 2.2839418811988697e-08, "loss": 27.2123, "step": 5450 }, { "epoch": 43.33499501495513, "grad_norm": 141.64247131347656, "learning_rate": 2.2317550166891435e-08, "loss": 25.9267, "step": 5460 }, { "epoch": 43.414755732801595, "grad_norm": 185.2597198486328, "learning_rate": 2.1801434177096762e-08, "loss": 24.6855, "step": 5470 }, { "epoch": 43.49451645064806, "grad_norm": 198.41571044921875, "learning_rate": 2.1291083882860355e-08, "loss": 26.7005, "step": 5480 }, { "epoch": 43.57427716849452, "grad_norm": 55.69032669067383, "learning_rate": 2.0786512178761207e-08, "loss": 22.5448, "step": 5490 }, { "epoch": 43.65403788634098, "grad_norm": 88.44397735595703, "learning_rate": 2.0287731813375702e-08, "loss": 25.6887, "step": 5500 }, { "epoch": 43.65403788634098, "eval_loss": 3.1209118366241455, "eval_mae": 1.3314346075057983, "eval_mse": 3.1209118366241455, "eval_r2": 0.05968451499938965, "eval_rmse": 1.7666102673267088, "eval_runtime": 9.0592, "eval_samples_per_second": 442.753, "eval_steps_per_second": 13.908, "step": 5500 }, { "epoch": 43.733798604187434, "grad_norm": 84.4006118774414, "learning_rate": 1.9794755388955363e-08, "loss": 26.3546, "step": 5510 }, { "epoch": 43.813559322033896, "grad_norm": 121.09300231933594, "learning_rate": 1.9307595361108654e-08, "loss": 24.7965, "step": 5520 }, { "epoch": 43.89332003988036, "grad_norm": 55.71806716918945, "learning_rate": 1.882626403848611e-08, "loss": 24.5852, "step": 5530 }, { "epoch": 43.97308075772682, "grad_norm": 51.23670959472656, "learning_rate": 1.8350773582469502e-08, "loss": 27.0388, "step": 5540 }, { "epoch": 44.047856430707874, "grad_norm": 179.18077087402344, "learning_rate": 1.7881136006864388e-08, "loss": 22.9556, "step": 5550 }, { "epoch": 44.127617148554336, "grad_norm": 59.81156921386719, "learning_rate": 1.7417363177596733e-08, "loss": 27.1492, "step": 5560 }, { "epoch": 44.2073778664008, "grad_norm": 61.35225296020508, "learning_rate": 1.6959466812413086e-08, "loss": 25.5271, "step": 5570 }, { "epoch": 44.28713858424726, "grad_norm": 25.337446212768555, "learning_rate": 1.6507458480584297e-08, "loss": 24.9083, "step": 5580 }, { "epoch": 44.36689930209372, "grad_norm": 127.35221862792969, "learning_rate": 1.6061349602613573e-08, "loss": 24.5705, "step": 5590 }, { "epoch": 44.44666001994018, "grad_norm": 141.15887451171875, "learning_rate": 1.562115144994755e-08, "loss": 24.4761, "step": 5600 }, { "epoch": 44.44666001994018, "eval_loss": 3.120940923690796, "eval_mae": 1.331071376800537, "eval_mse": 3.120941162109375, "eval_r2": 0.05967569351196289, "eval_rmse": 1.7666185672378107, "eval_runtime": 14.0638, "eval_samples_per_second": 285.2, "eval_steps_per_second": 8.959, "step": 5600 }, { "epoch": 44.52642073778664, "grad_norm": 270.5105895996094, "learning_rate": 1.51868751446918e-08, "loss": 25.0175, "step": 5610 }, { "epoch": 44.6061814556331, "grad_norm": 158.59524536132812, "learning_rate": 1.4758531659329747e-08, "loss": 25.5007, "step": 5620 }, { "epoch": 44.68594217347956, "grad_norm": 108.44390869140625, "learning_rate": 1.433613181644519e-08, "loss": 26.3937, "step": 5630 }, { "epoch": 44.76570289132602, "grad_norm": 67.45941162109375, "learning_rate": 1.3919686288449305e-08, "loss": 25.3727, "step": 5640 }, { "epoch": 44.845463609172484, "grad_norm": 173.1921844482422, "learning_rate": 1.3509205597310586e-08, "loss": 24.0026, "step": 5650 }, { "epoch": 44.925224327018945, "grad_norm": 332.9158630371094, "learning_rate": 1.3104700114289185e-08, "loss": 25.2033, "step": 5660 }, { "epoch": 45.0, "grad_norm": 18.381704330444336, "learning_rate": 1.2706180059674915e-08, "loss": 25.1115, "step": 5670 }, { "epoch": 45.07976071784646, "grad_norm": 36.26863098144531, "learning_rate": 1.231365550252883e-08, "loss": 24.8773, "step": 5680 }, { "epoch": 45.15952143569292, "grad_norm": 150.63861083984375, "learning_rate": 1.1927136360429108e-08, "loss": 24.7349, "step": 5690 }, { "epoch": 45.239282153539385, "grad_norm": 135.55755615234375, "learning_rate": 1.154663239922013e-08, "loss": 25.1996, "step": 5700 }, { "epoch": 45.239282153539385, "eval_loss": 3.120323896408081, "eval_mae": 1.3315786123275757, "eval_mse": 3.120323896408081, "eval_r2": 0.05986166000366211, "eval_rmse": 1.7664438560022453, "eval_runtime": 9.0365, "eval_samples_per_second": 443.865, "eval_steps_per_second": 13.943, "step": 5700 }, { "epoch": 45.31904287138584, "grad_norm": 92.68675994873047, "learning_rate": 1.1172153232766073e-08, "loss": 26.8737, "step": 5710 }, { "epoch": 45.3988035892323, "grad_norm": 75.78144073486328, "learning_rate": 1.080370832270769e-08, "loss": 24.521, "step": 5720 }, { "epoch": 45.47856430707876, "grad_norm": 30.703096389770508, "learning_rate": 1.0441306978223513e-08, "loss": 25.99, "step": 5730 }, { "epoch": 45.558325024925225, "grad_norm": 132.82154846191406, "learning_rate": 1.0084958355794537e-08, "loss": 26.4817, "step": 5740 }, { "epoch": 45.638085742771686, "grad_norm": 111.67121887207031, "learning_rate": 9.734671458972749e-09, "loss": 26.3786, "step": 5750 }, { "epoch": 45.71784646061815, "grad_norm": 74.11526489257812, "learning_rate": 9.390455138153924e-09, "loss": 24.5637, "step": 5760 }, { "epoch": 45.79760717846461, "grad_norm": 64.91019439697266, "learning_rate": 9.052318090353728e-09, "loss": 23.873, "step": 5770 }, { "epoch": 45.877367896311064, "grad_norm": 99.60811614990234, "learning_rate": 8.720268858988105e-09, "loss": 24.5365, "step": 5780 }, { "epoch": 45.957128614157526, "grad_norm": 116.65836334228516, "learning_rate": 8.394315833657544e-09, "loss": 24.941, "step": 5790 }, { "epoch": 46.03190428713859, "grad_norm": 65.44205474853516, "learning_rate": 8.074467249934796e-09, "loss": 25.0614, "step": 5800 }, { "epoch": 46.03190428713859, "eval_loss": 3.120579481124878, "eval_mae": 1.3315255641937256, "eval_mse": 3.120579481124878, "eval_r2": 0.059784650802612305, "eval_rmse": 1.7665161989421092, "eval_runtime": 9.0413, "eval_samples_per_second": 443.631, "eval_steps_per_second": 13.936, "step": 5800 }, { "epoch": 46.11166500498504, "grad_norm": 87.28318786621094, "learning_rate": 7.760731189157156e-09, "loss": 25.5226, "step": 5810 }, { "epoch": 46.191425722831504, "grad_norm": 251.4473114013672, "learning_rate": 7.453115578221897e-09, "loss": 26.1362, "step": 5820 }, { "epoch": 46.271186440677965, "grad_norm": 58.065940856933594, "learning_rate": 7.15162818938636e-09, "loss": 25.5341, "step": 5830 }, { "epoch": 46.35094715852443, "grad_norm": 78.00777435302734, "learning_rate": 6.8562766400712676e-09, "loss": 25.3617, "step": 5840 }, { "epoch": 46.43070787637089, "grad_norm": 62.271507263183594, "learning_rate": 6.56706839266849e-09, "loss": 25.0898, "step": 5850 }, { "epoch": 46.51046859421735, "grad_norm": 45.634952545166016, "learning_rate": 6.284010754352448e-09, "loss": 24.7997, "step": 5860 }, { "epoch": 46.59022931206381, "grad_norm": 89.99065399169922, "learning_rate": 6.007110876895261e-09, "loss": 24.4887, "step": 5870 }, { "epoch": 46.66999002991027, "grad_norm": 37.37367248535156, "learning_rate": 5.736375756486444e-09, "loss": 25.5751, "step": 5880 }, { "epoch": 46.74975074775673, "grad_norm": 123.9768295288086, "learning_rate": 5.471812233555834e-09, "loss": 24.7693, "step": 5890 }, { "epoch": 46.82951146560319, "grad_norm": 130.57916259765625, "learning_rate": 5.2134269926008005e-09, "loss": 24.1577, "step": 5900 }, { "epoch": 46.82951146560319, "eval_loss": 3.1201043128967285, "eval_mae": 1.3313286304473877, "eval_mse": 3.1201043128967285, "eval_r2": 0.05992782115936279, "eval_rmse": 1.766381700793101, "eval_runtime": 9.0588, "eval_samples_per_second": 442.774, "eval_steps_per_second": 13.909, "step": 5900 }, { "epoch": 46.90927218344965, "grad_norm": 136.50294494628906, "learning_rate": 4.961226562017584e-09, "loss": 27.04, "step": 5910 }, { "epoch": 46.98903290129611, "grad_norm": 162.00277709960938, "learning_rate": 4.71521731393601e-09, "loss": 25.3749, "step": 5920 }, { "epoch": 47.06380857427717, "grad_norm": 155.60572814941406, "learning_rate": 4.4754054640587515e-09, "loss": 25.0107, "step": 5930 }, { "epoch": 47.14356929212363, "grad_norm": 141.03631591796875, "learning_rate": 4.241797071504127e-09, "loss": 26.8127, "step": 5940 }, { "epoch": 47.22333000997009, "grad_norm": 51.73516082763672, "learning_rate": 4.014398038653111e-09, "loss": 24.7251, "step": 5950 }, { "epoch": 47.30309072781655, "grad_norm": 256.9667663574219, "learning_rate": 3.793214111000198e-09, "loss": 26.5637, "step": 5960 }, { "epoch": 47.38285144566301, "grad_norm": 47.89490509033203, "learning_rate": 3.57825087700811e-09, "loss": 25.4085, "step": 5970 }, { "epoch": 47.46261216350947, "grad_norm": 144.0662078857422, "learning_rate": 3.369513767966764e-09, "loss": 24.9812, "step": 5980 }, { "epoch": 47.54237288135593, "grad_norm": 59.69084167480469, "learning_rate": 3.167008057855941e-09, "loss": 24.4607, "step": 5990 }, { "epoch": 47.62213359920239, "grad_norm": 57.48725509643555, "learning_rate": 2.97073886321203e-09, "loss": 23.9325, "step": 6000 }, { "epoch": 47.62213359920239, "eval_loss": 3.1205005645751953, "eval_mae": 1.3313699960708618, "eval_mse": 3.1205005645751953, "eval_r2": 0.059808433055877686, "eval_rmse": 1.7664938620259045, "eval_runtime": 9.0615, "eval_samples_per_second": 442.643, "eval_steps_per_second": 13.905, "step": 6000 }, { "epoch": 47.701894317048854, "grad_norm": 173.4394073486328, "learning_rate": 2.780711142998854e-09, "loss": 24.7271, "step": 6010 }, { "epoch": 47.781655034895316, "grad_norm": 82.74398803710938, "learning_rate": 2.596929698482242e-09, "loss": 26.1529, "step": 6020 }, { "epoch": 47.86141575274178, "grad_norm": 130.01808166503906, "learning_rate": 2.419399173108849e-09, "loss": 24.504, "step": 6030 }, { "epoch": 47.94117647058823, "grad_norm": 43.647499084472656, "learning_rate": 2.2481240523886668e-09, "loss": 25.6795, "step": 6040 }, { "epoch": 48.015952143569294, "grad_norm": 56.47761535644531, "learning_rate": 2.0831086637819182e-09, "loss": 23.4324, "step": 6050 }, { "epoch": 48.095712861415755, "grad_norm": 155.72174072265625, "learning_rate": 1.9243571765895083e-09, "loss": 25.0166, "step": 6060 }, { "epoch": 48.17547357926221, "grad_norm": 111.24287414550781, "learning_rate": 1.7718736018478008e-09, "loss": 24.6729, "step": 6070 }, { "epoch": 48.25523429710867, "grad_norm": 64.62834167480469, "learning_rate": 1.6256617922272287e-09, "loss": 27.8513, "step": 6080 }, { "epoch": 48.33499501495513, "grad_norm": 78.91862487792969, "learning_rate": 1.4857254419349818e-09, "loss": 26.6801, "step": 6090 }, { "epoch": 48.414755732801595, "grad_norm": 120.07483673095703, "learning_rate": 1.352068086621666e-09, "loss": 26.0575, "step": 6100 }, { "epoch": 48.414755732801595, "eval_loss": 3.1204397678375244, "eval_mae": 1.331369161605835, "eval_mse": 3.1204397678375244, "eval_r2": 0.05982673168182373, "eval_rmse": 1.7664766536350047, "eval_runtime": 9.0446, "eval_samples_per_second": 443.468, "eval_steps_per_second": 13.931, "step": 6100 }, { "epoch": 48.49451645064806, "grad_norm": 249.7754669189453, "learning_rate": 1.2246931032919017e-09, "loss": 23.2525, "step": 6110 }, { "epoch": 48.57427716849452, "grad_norm": 73.22221374511719, "learning_rate": 1.103603710219142e-09, "loss": 26.0894, "step": 6120 }, { "epoch": 48.65403788634098, "grad_norm": 118.6893310546875, "learning_rate": 9.888029668642105e-10, "loss": 24.5071, "step": 6130 }, { "epoch": 48.733798604187434, "grad_norm": 47.15996170043945, "learning_rate": 8.802937737980298e-10, "loss": 24.6825, "step": 6140 }, { "epoch": 48.813559322033896, "grad_norm": 129.1663055419922, "learning_rate": 7.780788726284848e-10, "loss": 25.1016, "step": 6150 }, { "epoch": 48.89332003988036, "grad_norm": 158.0771942138672, "learning_rate": 6.821608459309513e-10, "loss": 26.7324, "step": 6160 }, { "epoch": 48.97308075772682, "grad_norm": 100.31987762451172, "learning_rate": 5.925421171831535e-10, "loss": 23.5741, "step": 6170 }, { "epoch": 49.047856430707874, "grad_norm": 143.13925170898438, "learning_rate": 5.092249507038793e-10, "loss": 23.3466, "step": 6180 }, { "epoch": 49.127617148554336, "grad_norm": 145.27874755859375, "learning_rate": 4.322114515958597e-10, "loss": 28.8833, "step": 6190 }, { "epoch": 49.2073778664008, "grad_norm": 60.93461608886719, "learning_rate": 3.6150356569250584e-10, "loss": 25.5008, "step": 6200 }, { "epoch": 49.2073778664008, "eval_loss": 3.1204824447631836, "eval_mae": 1.3314094543457031, "eval_mse": 3.1204826831817627, "eval_r2": 0.059813857078552246, "eval_rmse": 1.766488800751865, "eval_runtime": 9.0554, "eval_samples_per_second": 442.938, "eval_steps_per_second": 13.914, "step": 6200 }, { "epoch": 49.28713858424726, "grad_norm": 143.7784423828125, "learning_rate": 2.9710307950867044e-10, "loss": 26.3828, "step": 6210 }, { "epoch": 49.36689930209372, "grad_norm": 141.80580139160156, "learning_rate": 2.390116201957393e-10, "loss": 26.8702, "step": 6220 }, { "epoch": 49.44666001994018, "grad_norm": 56.256038665771484, "learning_rate": 1.8723065550030337e-10, "loss": 26.5281, "step": 6230 }, { "epoch": 49.52642073778664, "grad_norm": 117.0438232421875, "learning_rate": 1.4176149372713274e-10, "loss": 24.9371, "step": 6240 }, { "epoch": 49.6061814556331, "grad_norm": 50.82411575317383, "learning_rate": 1.0260528370617527e-10, "loss": 23.6784, "step": 6250 }, { "epoch": 49.68594217347956, "grad_norm": 38.99748229980469, "learning_rate": 6.976301476346869e-11, "loss": 23.8564, "step": 6260 }, { "epoch": 49.76570289132602, "grad_norm": 101.93575286865234, "learning_rate": 4.323551669621617e-11, "loss": 25.8426, "step": 6270 }, { "epoch": 49.845463609172484, "grad_norm": 90.21463012695312, "learning_rate": 2.3023459751719822e-11, "loss": 22.9579, "step": 6280 }, { "epoch": 49.925224327018945, "grad_norm": 92.85131072998047, "learning_rate": 9.127354610505289e-12, "loss": 26.2049, "step": 6290 }, { "epoch": 50.0, "grad_norm": 53.79079055786133, "learning_rate": 1.5475523734709283e-12, "loss": 20.9661, "step": 6300 }, { "epoch": 50.0, "eval_loss": 3.120378017425537, "eval_mae": 1.3313935995101929, "eval_mse": 3.120378017425537, "eval_r2": 0.05984538793563843, "eval_rmse": 1.7664591751369565, "eval_runtime": 9.0621, "eval_samples_per_second": 442.611, "eval_steps_per_second": 13.904, "step": 6300 }, { "epoch": 50.07976071784646, "grad_norm": 76.04975891113281, "learning_rate": 4.2753687772899825e-07, "loss": 22.6056, "step": 6310 }, { "epoch": 50.15952143569292, "grad_norm": 184.59645080566406, "learning_rate": 4.273168737213282e-07, "loss": 25.5011, "step": 6320 }, { "epoch": 50.239282153539385, "grad_norm": 184.94509887695312, "learning_rate": 4.270965930362009e-07, "loss": 25.8012, "step": 6330 }, { "epoch": 50.31904287138584, "grad_norm": 49.95936584472656, "learning_rate": 4.2687603601733256e-07, "loss": 26.2851, "step": 6340 }, { "epoch": 50.3988035892323, "grad_norm": 116.41128540039062, "learning_rate": 4.266552030088708e-07, "loss": 25.493, "step": 6350 }, { "epoch": 50.47856430707876, "grad_norm": 122.41911315917969, "learning_rate": 4.2643409435539355e-07, "loss": 24.7171, "step": 6360 }, { "epoch": 50.558325024925225, "grad_norm": 143.13278198242188, "learning_rate": 4.2621271040190934e-07, "loss": 25.5837, "step": 6370 }, { "epoch": 50.638085742771686, "grad_norm": 49.593753814697266, "learning_rate": 4.259910514938556e-07, "loss": 24.2556, "step": 6380 }, { "epoch": 50.71784646061815, "grad_norm": 336.3773498535156, "learning_rate": 4.2576911797709936e-07, "loss": 27.6305, "step": 6390 }, { "epoch": 50.79760717846461, "grad_norm": 80.11558532714844, "learning_rate": 4.255469101979359e-07, "loss": 25.54, "step": 6400 }, { "epoch": 50.79760717846461, "eval_loss": 3.118216037750244, "eval_mae": 1.333853840827942, "eval_mse": 3.118216037750244, "eval_r2": 0.06049680709838867, "eval_rmse": 1.7658471161882174, "eval_runtime": 9.1405, "eval_samples_per_second": 438.817, "eval_steps_per_second": 13.785, "step": 6400 }, { "epoch": 50.877367896311064, "grad_norm": 62.02965545654297, "learning_rate": 4.2532442850308846e-07, "loss": 23.936, "step": 6410 }, { "epoch": 50.957128614157526, "grad_norm": 92.97734069824219, "learning_rate": 4.2510167323970767e-07, "loss": 26.374, "step": 6420 }, { "epoch": 51.03190428713859, "grad_norm": 93.20958709716797, "learning_rate": 4.2487864475537115e-07, "loss": 24.8878, "step": 6430 }, { "epoch": 51.11166500498504, "grad_norm": 34.5943603515625, "learning_rate": 4.246553433980826e-07, "loss": 24.1878, "step": 6440 }, { "epoch": 51.191425722831504, "grad_norm": 58.39374923706055, "learning_rate": 4.2443176951627183e-07, "loss": 26.4275, "step": 6450 }, { "epoch": 51.271186440677965, "grad_norm": 172.89662170410156, "learning_rate": 4.2420792345879354e-07, "loss": 25.2616, "step": 6460 }, { "epoch": 51.35094715852443, "grad_norm": 86.6563720703125, "learning_rate": 4.239838055749273e-07, "loss": 26.9444, "step": 6470 }, { "epoch": 51.43070787637089, "grad_norm": 29.669885635375977, "learning_rate": 4.2375941621437687e-07, "loss": 24.9411, "step": 6480 }, { "epoch": 51.51046859421735, "grad_norm": 64.25094604492188, "learning_rate": 4.2353475572726955e-07, "loss": 22.4844, "step": 6490 }, { "epoch": 51.59022931206381, "grad_norm": 65.33932495117188, "learning_rate": 4.2330982446415557e-07, "loss": 25.193, "step": 6500 }, { "epoch": 51.59022931206381, "eval_loss": 3.115053653717041, "eval_mae": 1.3340669870376587, "eval_mse": 3.115053415298462, "eval_r2": 0.06144958734512329, "eval_rmse": 1.7649513917664876, "eval_runtime": 9.103, "eval_samples_per_second": 440.624, "eval_steps_per_second": 13.842, "step": 6500 }, { "epoch": 51.66999002991027, "grad_norm": 31.864547729492188, "learning_rate": 4.23084622776008e-07, "loss": 25.3146, "step": 6510 }, { "epoch": 51.74975074775673, "grad_norm": 90.17521667480469, "learning_rate": 4.228591510142214e-07, "loss": 26.0056, "step": 6520 }, { "epoch": 51.82951146560319, "grad_norm": 81.95307922363281, "learning_rate": 4.226334095306122e-07, "loss": 25.6727, "step": 6530 }, { "epoch": 51.90927218344965, "grad_norm": 328.4442138671875, "learning_rate": 4.224073986774175e-07, "loss": 25.8714, "step": 6540 }, { "epoch": 51.98903290129611, "grad_norm": 150.25486755371094, "learning_rate": 4.221811188072945e-07, "loss": 24.787, "step": 6550 }, { "epoch": 52.06380857427717, "grad_norm": 53.980255126953125, "learning_rate": 4.219545702733205e-07, "loss": 24.6424, "step": 6560 }, { "epoch": 52.14356929212363, "grad_norm": 63.03770446777344, "learning_rate": 4.217277534289919e-07, "loss": 23.153, "step": 6570 }, { "epoch": 52.22333000997009, "grad_norm": 62.21890640258789, "learning_rate": 4.2150066862822374e-07, "loss": 27.1061, "step": 6580 }, { "epoch": 52.30309072781655, "grad_norm": 62.28493118286133, "learning_rate": 4.212733162253491e-07, "loss": 25.3507, "step": 6590 }, { "epoch": 52.38285144566301, "grad_norm": 96.04239654541016, "learning_rate": 4.210456965751187e-07, "loss": 24.8468, "step": 6600 }, { "epoch": 52.38285144566301, "eval_loss": 3.11200213432312, "eval_mae": 1.3248567581176758, "eval_mse": 3.11200213432312, "eval_r2": 0.06236898899078369, "eval_rmse": 1.7640867706332135, "eval_runtime": 9.0953, "eval_samples_per_second": 440.996, "eval_steps_per_second": 13.853, "step": 6600 }, { "epoch": 52.46261216350947, "grad_norm": 303.4186096191406, "learning_rate": 4.208178100327002e-07, "loss": 23.6352, "step": 6610 }, { "epoch": 52.54237288135593, "grad_norm": 66.25389099121094, "learning_rate": 4.205896569536779e-07, "loss": 26.611, "step": 6620 }, { "epoch": 52.62213359920239, "grad_norm": 38.68503952026367, "learning_rate": 4.203612376940516e-07, "loss": 25.7687, "step": 6630 }, { "epoch": 52.701894317048854, "grad_norm": 391.4928894042969, "learning_rate": 4.201325526102369e-07, "loss": 26.5341, "step": 6640 }, { "epoch": 52.781655034895316, "grad_norm": 51.631919860839844, "learning_rate": 4.1990360205906386e-07, "loss": 25.1104, "step": 6650 }, { "epoch": 52.86141575274178, "grad_norm": 208.6623992919922, "learning_rate": 4.1967438639777676e-07, "loss": 25.5387, "step": 6660 }, { "epoch": 52.94117647058823, "grad_norm": 90.05245971679688, "learning_rate": 4.194449059840338e-07, "loss": 25.5684, "step": 6670 }, { "epoch": 53.015952143569294, "grad_norm": 96.74432373046875, "learning_rate": 4.192151611759061e-07, "loss": 21.4243, "step": 6680 }, { "epoch": 53.095712861415755, "grad_norm": 258.4862060546875, "learning_rate": 4.1898515233187737e-07, "loss": 26.9519, "step": 6690 }, { "epoch": 53.17547357926221, "grad_norm": 66.87359619140625, "learning_rate": 4.187548798108431e-07, "loss": 25.9816, "step": 6700 }, { "epoch": 53.17547357926221, "eval_loss": 3.109802722930908, "eval_mae": 1.3243112564086914, "eval_mse": 3.109802722930908, "eval_r2": 0.06303161382675171, "eval_rmse": 1.7634632751863328, "eval_runtime": 9.0994, "eval_samples_per_second": 440.8, "eval_steps_per_second": 13.847, "step": 6700 }, { "epoch": 53.25523429710867, "grad_norm": 322.5094909667969, "learning_rate": 4.185243439721108e-07, "loss": 25.8368, "step": 6710 }, { "epoch": 53.33499501495513, "grad_norm": 75.15873718261719, "learning_rate": 4.1829354517539804e-07, "loss": 24.9007, "step": 6720 }, { "epoch": 53.414755732801595, "grad_norm": 85.91637420654297, "learning_rate": 4.1806248378083344e-07, "loss": 24.7186, "step": 6730 }, { "epoch": 53.49451645064806, "grad_norm": 248.0203857421875, "learning_rate": 4.178311601489549e-07, "loss": 24.6466, "step": 6740 }, { "epoch": 53.57427716849452, "grad_norm": 68.95528411865234, "learning_rate": 4.1759957464070967e-07, "loss": 25.2751, "step": 6750 }, { "epoch": 53.65403788634098, "grad_norm": 150.67889404296875, "learning_rate": 4.173677276174536e-07, "loss": 24.6001, "step": 6760 }, { "epoch": 53.733798604187434, "grad_norm": 169.43186950683594, "learning_rate": 4.171356194409506e-07, "loss": 27.6572, "step": 6770 }, { "epoch": 53.813559322033896, "grad_norm": 97.16729736328125, "learning_rate": 4.1690325047337206e-07, "loss": 24.71, "step": 6780 }, { "epoch": 53.89332003988036, "grad_norm": 53.780696868896484, "learning_rate": 4.166706210772963e-07, "loss": 24.9079, "step": 6790 }, { "epoch": 53.97308075772682, "grad_norm": 291.2892761230469, "learning_rate": 4.1643773161570805e-07, "loss": 23.6774, "step": 6800 }, { "epoch": 53.97308075772682, "eval_loss": 3.1034927368164062, "eval_mae": 1.3314313888549805, "eval_mse": 3.1034929752349854, "eval_r2": 0.06493276357650757, "eval_rmse": 1.7616733452132904, "eval_runtime": 9.1214, "eval_samples_per_second": 439.733, "eval_steps_per_second": 13.814, "step": 6800 }, { "epoch": 54.047856430707874, "grad_norm": 231.18553161621094, "learning_rate": 4.162045824519977e-07, "loss": 25.4095, "step": 6810 }, { "epoch": 54.127617148554336, "grad_norm": 223.30455017089844, "learning_rate": 4.1597117394996117e-07, "loss": 27.0908, "step": 6820 }, { "epoch": 54.2073778664008, "grad_norm": 106.4852066040039, "learning_rate": 4.1573750647379857e-07, "loss": 25.5567, "step": 6830 }, { "epoch": 54.28713858424726, "grad_norm": 215.9276580810547, "learning_rate": 4.155035803881145e-07, "loss": 24.7604, "step": 6840 }, { "epoch": 54.36689930209372, "grad_norm": 135.57931518554688, "learning_rate": 4.1526939605791695e-07, "loss": 25.3572, "step": 6850 }, { "epoch": 54.44666001994018, "grad_norm": 117.22618103027344, "learning_rate": 4.1503495384861674e-07, "loss": 24.2714, "step": 6860 }, { "epoch": 54.52642073778664, "grad_norm": 114.61555480957031, "learning_rate": 4.148002541260274e-07, "loss": 25.8058, "step": 6870 }, { "epoch": 54.6061814556331, "grad_norm": 119.7732162475586, "learning_rate": 4.145652972563638e-07, "loss": 22.5095, "step": 6880 }, { "epoch": 54.68594217347956, "grad_norm": 135.6663360595703, "learning_rate": 4.143300836062425e-07, "loss": 26.633, "step": 6890 }, { "epoch": 54.76570289132602, "grad_norm": 99.55790710449219, "learning_rate": 4.140946135426805e-07, "loss": 24.2669, "step": 6900 }, { "epoch": 54.76570289132602, "eval_loss": 3.099317789077759, "eval_mae": 1.330242395401001, "eval_mse": 3.099317789077759, "eval_r2": 0.06619071960449219, "eval_rmse": 1.7604879406226441, "eval_runtime": 9.076, "eval_samples_per_second": 441.932, "eval_steps_per_second": 13.883, "step": 6900 }, { "epoch": 54.845463609172484, "grad_norm": 70.54505920410156, "learning_rate": 4.1385888743309495e-07, "loss": 25.2153, "step": 6910 }, { "epoch": 54.925224327018945, "grad_norm": 105.45303344726562, "learning_rate": 4.1362290564530244e-07, "loss": 25.4413, "step": 6920 }, { "epoch": 55.0, "grad_norm": 109.5523681640625, "learning_rate": 4.133866685475187e-07, "loss": 22.7176, "step": 6930 }, { "epoch": 55.07976071784646, "grad_norm": 120.8057861328125, "learning_rate": 4.131501765083576e-07, "loss": 23.361, "step": 6940 }, { "epoch": 55.15952143569292, "grad_norm": 89.11048126220703, "learning_rate": 4.12913429896831e-07, "loss": 24.928, "step": 6950 }, { "epoch": 55.239282153539385, "grad_norm": 123.72354888916016, "learning_rate": 4.126764290823479e-07, "loss": 25.3552, "step": 6960 }, { "epoch": 55.31904287138584, "grad_norm": 71.4775161743164, "learning_rate": 4.1243917443471385e-07, "loss": 25.8639, "step": 6970 }, { "epoch": 55.3988035892323, "grad_norm": 179.25799560546875, "learning_rate": 4.122016663241307e-07, "loss": 25.6156, "step": 6980 }, { "epoch": 55.47856430707876, "grad_norm": 33.811058044433594, "learning_rate": 4.119639051211957e-07, "loss": 24.9542, "step": 6990 }, { "epoch": 55.558325024925225, "grad_norm": 171.1689910888672, "learning_rate": 4.117258911969009e-07, "loss": 25.5526, "step": 7000 }, { "epoch": 55.558325024925225, "eval_loss": 3.0987918376922607, "eval_mae": 1.3330014944076538, "eval_mse": 3.0987918376922607, "eval_r2": 0.06634920835494995, "eval_rmse": 1.760338557690611, "eval_runtime": 9.0725, "eval_samples_per_second": 442.105, "eval_steps_per_second": 13.888, "step": 7000 }, { "epoch": 55.638085742771686, "grad_norm": 92.38328552246094, "learning_rate": 4.114876249226327e-07, "loss": 26.2241, "step": 7010 }, { "epoch": 55.71784646061815, "grad_norm": 348.17333984375, "learning_rate": 4.112491066701715e-07, "loss": 27.2409, "step": 7020 }, { "epoch": 55.79760717846461, "grad_norm": 254.02542114257812, "learning_rate": 4.110103368116906e-07, "loss": 24.6501, "step": 7030 }, { "epoch": 55.877367896311064, "grad_norm": 219.8361358642578, "learning_rate": 4.1077131571975595e-07, "loss": 26.0709, "step": 7040 }, { "epoch": 55.957128614157526, "grad_norm": 98.12411499023438, "learning_rate": 4.1053204376732557e-07, "loss": 24.2047, "step": 7050 }, { "epoch": 56.03190428713859, "grad_norm": 366.5306396484375, "learning_rate": 4.1029252132774907e-07, "loss": 23.0486, "step": 7060 }, { "epoch": 56.11166500498504, "grad_norm": 176.13414001464844, "learning_rate": 4.100527487747665e-07, "loss": 25.716, "step": 7070 }, { "epoch": 56.191425722831504, "grad_norm": 84.32244873046875, "learning_rate": 4.0981272648250854e-07, "loss": 27.4791, "step": 7080 }, { "epoch": 56.271186440677965, "grad_norm": 140.40318298339844, "learning_rate": 4.0957245482549543e-07, "loss": 24.0629, "step": 7090 }, { "epoch": 56.35094715852443, "grad_norm": 234.8149871826172, "learning_rate": 4.0933193417863644e-07, "loss": 25.9714, "step": 7100 }, { "epoch": 56.35094715852443, "eval_loss": 3.095909833908081, "eval_mae": 1.3204256296157837, "eval_mse": 3.09591007232666, "eval_r2": 0.06721740961074829, "eval_rmse": 1.7595198414131794, "eval_runtime": 9.086, "eval_samples_per_second": 441.45, "eval_steps_per_second": 13.868, "step": 7100 }, { "epoch": 56.43070787637089, "grad_norm": 65.00196838378906, "learning_rate": 4.0909116491722943e-07, "loss": 23.4879, "step": 7110 }, { "epoch": 56.51046859421735, "grad_norm": 197.07687377929688, "learning_rate": 4.088501474169603e-07, "loss": 25.5584, "step": 7120 }, { "epoch": 56.59022931206381, "grad_norm": 60.80377197265625, "learning_rate": 4.0860888205390197e-07, "loss": 26.0502, "step": 7130 }, { "epoch": 56.66999002991027, "grad_norm": 110.6761703491211, "learning_rate": 4.083673692045145e-07, "loss": 25.3108, "step": 7140 }, { "epoch": 56.74975074775673, "grad_norm": 116.65326690673828, "learning_rate": 4.0814979635070945e-07, "loss": 23.8434, "step": 7150 }, { "epoch": 56.82951146560319, "grad_norm": 84.38224029541016, "learning_rate": 4.079078143158259e-07, "loss": 23.5383, "step": 7160 }, { "epoch": 56.90927218344965, "grad_norm": 151.78848266601562, "learning_rate": 4.076655858885284e-07, "loss": 28.0889, "step": 7170 }, { "epoch": 56.98903290129611, "grad_norm": 271.9244689941406, "learning_rate": 4.0742311144677964e-07, "loss": 22.9197, "step": 7180 }, { "epoch": 57.06380857427717, "grad_norm": 108.05704498291016, "learning_rate": 4.071803913689259e-07, "loss": 21.7839, "step": 7190 }, { "epoch": 57.14356929212363, "grad_norm": 219.2751922607422, "learning_rate": 4.0693742603369703e-07, "loss": 26.7117, "step": 7200 }, { "epoch": 57.14356929212363, "eval_loss": 3.090022087097168, "eval_mae": 1.3286364078521729, "eval_mse": 3.0900213718414307, "eval_r2": 0.06899166107177734, "eval_rmse": 1.7578456621220848, "eval_runtime": 9.0823, "eval_samples_per_second": 441.628, "eval_steps_per_second": 13.873, "step": 7200 }, { "epoch": 57.22333000997009, "grad_norm": 154.8135986328125, "learning_rate": 4.0669421582020544e-07, "loss": 26.9373, "step": 7210 }, { "epoch": 57.30309072781655, "grad_norm": 94.78240203857422, "learning_rate": 4.0645076110794566e-07, "loss": 24.1204, "step": 7220 }, { "epoch": 57.38285144566301, "grad_norm": 147.9627227783203, "learning_rate": 4.062070622767938e-07, "loss": 25.5562, "step": 7230 }, { "epoch": 57.46261216350947, "grad_norm": 209.6161651611328, "learning_rate": 4.0596311970700665e-07, "loss": 24.3868, "step": 7240 }, { "epoch": 57.54237288135593, "grad_norm": 72.75938415527344, "learning_rate": 4.057189337792216e-07, "loss": 25.9237, "step": 7250 }, { "epoch": 57.62213359920239, "grad_norm": 241.09524536132812, "learning_rate": 4.0547450487445556e-07, "loss": 24.6371, "step": 7260 }, { "epoch": 57.701894317048854, "grad_norm": 136.7520294189453, "learning_rate": 4.0522983337410477e-07, "loss": 25.3354, "step": 7270 }, { "epoch": 57.781655034895316, "grad_norm": 203.94277954101562, "learning_rate": 4.0498491965994377e-07, "loss": 24.1224, "step": 7280 }, { "epoch": 57.86141575274178, "grad_norm": 61.92915725708008, "learning_rate": 4.047397641141252e-07, "loss": 25.7716, "step": 7290 }, { "epoch": 57.94117647058823, "grad_norm": 59.817298889160156, "learning_rate": 4.0449436711917897e-07, "loss": 24.8604, "step": 7300 }, { "epoch": 57.94117647058823, "eval_loss": 3.088762044906616, "eval_mae": 1.318792462348938, "eval_mse": 3.088761806488037, "eval_r2": 0.06937116384506226, "eval_rmse": 1.7574873559966333, "eval_runtime": 9.0877, "eval_samples_per_second": 441.366, "eval_steps_per_second": 13.865, "step": 7300 }, { "epoch": 58.015952143569294, "grad_norm": 94.05938720703125, "learning_rate": 4.042487290580118e-07, "loss": 23.8257, "step": 7310 }, { "epoch": 58.095712861415755, "grad_norm": 54.836669921875, "learning_rate": 4.040028503139065e-07, "loss": 27.0251, "step": 7320 }, { "epoch": 58.17547357926221, "grad_norm": 150.6589813232422, "learning_rate": 4.037567312705216e-07, "loss": 24.3402, "step": 7330 }, { "epoch": 58.25523429710867, "grad_norm": 400.803955078125, "learning_rate": 4.035103723118901e-07, "loss": 23.3259, "step": 7340 }, { "epoch": 58.33499501495513, "grad_norm": 128.96749877929688, "learning_rate": 4.032637738224201e-07, "loss": 24.5398, "step": 7350 }, { "epoch": 58.414755732801595, "grad_norm": 235.96865844726562, "learning_rate": 4.030169361868927e-07, "loss": 23.7088, "step": 7360 }, { "epoch": 58.49451645064806, "grad_norm": 75.51648712158203, "learning_rate": 4.0276985979046276e-07, "loss": 27.9453, "step": 7370 }, { "epoch": 58.57427716849452, "grad_norm": 84.60974884033203, "learning_rate": 4.025225450186573e-07, "loss": 24.5951, "step": 7380 }, { "epoch": 58.65403788634098, "grad_norm": 157.3654022216797, "learning_rate": 4.0227499225737537e-07, "loss": 25.4845, "step": 7390 }, { "epoch": 58.733798604187434, "grad_norm": 164.58987426757812, "learning_rate": 4.020272018928876e-07, "loss": 21.6683, "step": 7400 }, { "epoch": 58.733798604187434, "eval_loss": 3.0835020542144775, "eval_mae": 1.3220032453536987, "eval_mse": 3.0835020542144775, "eval_r2": 0.07095593214035034, "eval_rmse": 1.7559903343169283, "eval_runtime": 9.0819, "eval_samples_per_second": 441.648, "eval_steps_per_second": 13.874, "step": 7400 }, { "epoch": 58.813559322033896, "grad_norm": 92.88033294677734, "learning_rate": 4.0177917431183503e-07, "loss": 27.4979, "step": 7410 }, { "epoch": 58.89332003988036, "grad_norm": 404.1705322265625, "learning_rate": 4.0153090990122906e-07, "loss": 23.6773, "step": 7420 }, { "epoch": 58.97308075772682, "grad_norm": 193.15248107910156, "learning_rate": 4.0128240904845056e-07, "loss": 26.0743, "step": 7430 }, { "epoch": 59.047856430707874, "grad_norm": 395.1861877441406, "learning_rate": 4.0103367214124927e-07, "loss": 26.1012, "step": 7440 }, { "epoch": 59.127617148554336, "grad_norm": 56.67768859863281, "learning_rate": 4.007846995677434e-07, "loss": 25.2291, "step": 7450 }, { "epoch": 59.2073778664008, "grad_norm": 194.84588623046875, "learning_rate": 4.005354917164189e-07, "loss": 24.8622, "step": 7460 }, { "epoch": 59.28713858424726, "grad_norm": 164.9954833984375, "learning_rate": 4.0028604897612855e-07, "loss": 24.4034, "step": 7470 }, { "epoch": 59.36689930209372, "grad_norm": 91.07857513427734, "learning_rate": 4.0003637173609195e-07, "loss": 25.145, "step": 7480 }, { "epoch": 59.44666001994018, "grad_norm": 255.81103515625, "learning_rate": 3.9978646038589453e-07, "loss": 26.5347, "step": 7490 }, { "epoch": 59.52642073778664, "grad_norm": 91.14227294921875, "learning_rate": 3.995363153154869e-07, "loss": 25.1552, "step": 7500 }, { "epoch": 59.52642073778664, "eval_loss": 3.0817205905914307, "eval_mae": 1.3204900026321411, "eval_mse": 3.0817203521728516, "eval_r2": 0.07149273157119751, "eval_rmse": 1.7554829398694969, "eval_runtime": 9.1094, "eval_samples_per_second": 440.315, "eval_steps_per_second": 13.832, "step": 7500 }, { "epoch": 59.6061814556331, "grad_norm": 278.22918701171875, "learning_rate": 3.9928593691518453e-07, "loss": 26.0224, "step": 7510 }, { "epoch": 59.68594217347956, "grad_norm": 78.10284423828125, "learning_rate": 3.990353255756667e-07, "loss": 23.5015, "step": 7520 }, { "epoch": 59.76570289132602, "grad_norm": 85.6133041381836, "learning_rate": 3.987844816879765e-07, "loss": 22.773, "step": 7530 }, { "epoch": 59.845463609172484, "grad_norm": 130.89801025390625, "learning_rate": 3.9853340564351965e-07, "loss": 25.6739, "step": 7540 }, { "epoch": 59.925224327018945, "grad_norm": 308.4039306640625, "learning_rate": 3.982820978340642e-07, "loss": 24.2176, "step": 7550 }, { "epoch": 60.0, "grad_norm": 216.79660034179688, "learning_rate": 3.9803055865173974e-07, "loss": 25.2403, "step": 7560 }, { "epoch": 60.07976071784646, "grad_norm": 84.96922302246094, "learning_rate": 3.97778788489037e-07, "loss": 25.8891, "step": 7570 }, { "epoch": 60.15952143569292, "grad_norm": 169.84339904785156, "learning_rate": 3.975267877388071e-07, "loss": 24.7238, "step": 7580 }, { "epoch": 60.239282153539385, "grad_norm": 49.10818862915039, "learning_rate": 3.972745567942609e-07, "loss": 26.6454, "step": 7590 }, { "epoch": 60.31904287138584, "grad_norm": 108.38031005859375, "learning_rate": 3.9702209604896853e-07, "loss": 24.8128, "step": 7600 }, { "epoch": 60.31904287138584, "eval_loss": 3.0798652172088623, "eval_mae": 1.3183189630508423, "eval_mse": 3.0798652172088623, "eval_r2": 0.07205164432525635, "eval_rmse": 1.754954477246878, "eval_runtime": 9.1086, "eval_samples_per_second": 440.353, "eval_steps_per_second": 13.833, "step": 7600 }, { "epoch": 60.3988035892323, "grad_norm": 38.540428161621094, "learning_rate": 3.967694058968586e-07, "loss": 26.0966, "step": 7610 }, { "epoch": 60.47856430707876, "grad_norm": 88.73666381835938, "learning_rate": 3.965164867322177e-07, "loss": 22.8718, "step": 7620 }, { "epoch": 60.558325024925225, "grad_norm": 106.24761199951172, "learning_rate": 3.9626333894968986e-07, "loss": 26.8745, "step": 7630 }, { "epoch": 60.638085742771686, "grad_norm": 251.55950927734375, "learning_rate": 3.960099629442757e-07, "loss": 24.539, "step": 7640 }, { "epoch": 60.71784646061815, "grad_norm": 125.47510528564453, "learning_rate": 3.957563591113319e-07, "loss": 24.5211, "step": 7650 }, { "epoch": 60.79760717846461, "grad_norm": 29.564306259155273, "learning_rate": 3.9550252784657097e-07, "loss": 24.385, "step": 7660 }, { "epoch": 60.877367896311064, "grad_norm": 111.65841674804688, "learning_rate": 3.9524846954605985e-07, "loss": 24.8668, "step": 7670 }, { "epoch": 60.957128614157526, "grad_norm": 300.87506103515625, "learning_rate": 3.9499418460622e-07, "loss": 25.2645, "step": 7680 }, { "epoch": 61.03190428713859, "grad_norm": 47.71473693847656, "learning_rate": 3.9473967342382657e-07, "loss": 24.3245, "step": 7690 }, { "epoch": 61.11166500498504, "grad_norm": 217.0494384765625, "learning_rate": 3.944849363960075e-07, "loss": 24.9409, "step": 7700 }, { "epoch": 61.11166500498504, "eval_loss": 3.0771336555480957, "eval_mae": 1.3210324048995972, "eval_mse": 3.0771336555480957, "eval_r2": 0.07287466526031494, "eval_rmse": 1.754176061730434, "eval_runtime": 9.1043, "eval_samples_per_second": 440.562, "eval_steps_per_second": 13.84, "step": 7700 }, { "epoch": 61.191425722831504, "grad_norm": 127.66291809082031, "learning_rate": 3.942299739202434e-07, "loss": 25.6905, "step": 7710 }, { "epoch": 61.271186440677965, "grad_norm": 89.4798812866211, "learning_rate": 3.9397478639436633e-07, "loss": 23.6006, "step": 7720 }, { "epoch": 61.35094715852443, "grad_norm": 42.74026870727539, "learning_rate": 3.937193742165598e-07, "loss": 25.0052, "step": 7730 }, { "epoch": 61.43070787637089, "grad_norm": 358.45074462890625, "learning_rate": 3.9346373778535776e-07, "loss": 25.0835, "step": 7740 }, { "epoch": 61.51046859421735, "grad_norm": 209.05079650878906, "learning_rate": 3.9320787749964397e-07, "loss": 26.9053, "step": 7750 }, { "epoch": 61.59022931206381, "grad_norm": 123.0264663696289, "learning_rate": 3.929517937586516e-07, "loss": 24.7298, "step": 7760 }, { "epoch": 61.66999002991027, "grad_norm": 431.9937744140625, "learning_rate": 3.926954869619625e-07, "loss": 24.2174, "step": 7770 }, { "epoch": 61.74975074775673, "grad_norm": 199.51316833496094, "learning_rate": 3.9243895750950664e-07, "loss": 23.6582, "step": 7780 }, { "epoch": 61.82951146560319, "grad_norm": 459.22894287109375, "learning_rate": 3.9218220580156116e-07, "loss": 24.6488, "step": 7790 }, { "epoch": 61.90927218344965, "grad_norm": 65.73253631591797, "learning_rate": 3.9192523223875006e-07, "loss": 25.5879, "step": 7800 }, { "epoch": 61.90927218344965, "eval_loss": 3.0825397968292236, "eval_mae": 1.3149493932724, "eval_mse": 3.0825395584106445, "eval_r2": 0.07124590873718262, "eval_rmse": 1.7557162522488208, "eval_runtime": 9.0657, "eval_samples_per_second": 442.435, "eval_steps_per_second": 13.898, "step": 7800 }, { "epoch": 61.98903290129611, "grad_norm": 253.05636596679688, "learning_rate": 3.916680372220438e-07, "loss": 25.8713, "step": 7810 }, { "epoch": 62.06380857427717, "grad_norm": 163.3178253173828, "learning_rate": 3.914106211527581e-07, "loss": 24.4312, "step": 7820 }, { "epoch": 62.14356929212363, "grad_norm": 317.12725830078125, "learning_rate": 3.911529844325536e-07, "loss": 26.369, "step": 7830 }, { "epoch": 62.22333000997009, "grad_norm": 71.54997253417969, "learning_rate": 3.908951274634355e-07, "loss": 25.9222, "step": 7840 }, { "epoch": 62.30309072781655, "grad_norm": 171.40684509277344, "learning_rate": 3.9063705064775225e-07, "loss": 25.9971, "step": 7850 }, { "epoch": 62.38285144566301, "grad_norm": 223.0928192138672, "learning_rate": 3.903787543881958e-07, "loss": 24.2405, "step": 7860 }, { "epoch": 62.46261216350947, "grad_norm": 304.8570556640625, "learning_rate": 3.901202390878001e-07, "loss": 24.8631, "step": 7870 }, { "epoch": 62.54237288135593, "grad_norm": 351.0543212890625, "learning_rate": 3.898615051499412e-07, "loss": 26.5244, "step": 7880 }, { "epoch": 62.62213359920239, "grad_norm": 60.6923828125, "learning_rate": 3.89602552978336e-07, "loss": 25.2301, "step": 7890 }, { "epoch": 62.701894317048854, "grad_norm": 103.87010955810547, "learning_rate": 3.893433829770423e-07, "loss": 22.7804, "step": 7900 }, { "epoch": 62.701894317048854, "eval_loss": 3.075007200241089, "eval_mae": 1.3185298442840576, "eval_mse": 3.075007200241089, "eval_r2": 0.07351541519165039, "eval_rmse": 1.7535698446999735, "eval_runtime": 9.0851, "eval_samples_per_second": 441.493, "eval_steps_per_second": 13.869, "step": 7900 }, { "epoch": 62.781655034895316, "grad_norm": 148.22166442871094, "learning_rate": 3.8908399555045746e-07, "loss": 24.4337, "step": 7910 }, { "epoch": 62.86141575274178, "grad_norm": 56.29420471191406, "learning_rate": 3.888243911033182e-07, "loss": 24.513, "step": 7920 }, { "epoch": 62.94117647058823, "grad_norm": 693.5757446289062, "learning_rate": 3.8856457004069987e-07, "loss": 25.2368, "step": 7930 }, { "epoch": 63.015952143569294, "grad_norm": 330.87225341796875, "learning_rate": 3.8830453276801603e-07, "loss": 22.621, "step": 7940 }, { "epoch": 63.095712861415755, "grad_norm": 104.25527954101562, "learning_rate": 3.8804427969101714e-07, "loss": 24.8392, "step": 7950 }, { "epoch": 63.17547357926221, "grad_norm": 59.79703140258789, "learning_rate": 3.8778381121579086e-07, "loss": 24.2671, "step": 7960 }, { "epoch": 63.25523429710867, "grad_norm": 186.5470733642578, "learning_rate": 3.8752312774876075e-07, "loss": 25.6232, "step": 7970 }, { "epoch": 63.33499501495513, "grad_norm": 264.2695617675781, "learning_rate": 3.8726222969668563e-07, "loss": 25.2552, "step": 7980 }, { "epoch": 63.414755732801595, "grad_norm": 68.55497741699219, "learning_rate": 3.8700111746665966e-07, "loss": 23.5333, "step": 7990 }, { "epoch": 63.49451645064806, "grad_norm": 189.8446044921875, "learning_rate": 3.8673979146611065e-07, "loss": 25.5833, "step": 8000 }, { "epoch": 63.49451645064806, "eval_loss": 3.0853989124298096, "eval_mae": 1.312778115272522, "eval_mse": 3.0853989124298096, "eval_r2": 0.07038438320159912, "eval_rmse": 1.756530361943627, "eval_runtime": 9.0969, "eval_samples_per_second": 440.922, "eval_steps_per_second": 13.851, "step": 8000 }, { "epoch": 63.57427716849452, "grad_norm": 71.05472564697266, "learning_rate": 3.864782521028003e-07, "loss": 25.5786, "step": 8010 }, { "epoch": 63.65403788634098, "grad_norm": 308.1141662597656, "learning_rate": 3.862164997848231e-07, "loss": 24.9246, "step": 8020 }, { "epoch": 63.733798604187434, "grad_norm": 233.69918823242188, "learning_rate": 3.8595453492060593e-07, "loss": 26.6499, "step": 8030 }, { "epoch": 63.813559322033896, "grad_norm": 284.7898864746094, "learning_rate": 3.856923579189072e-07, "loss": 22.7096, "step": 8040 }, { "epoch": 63.89332003988036, "grad_norm": 46.84286880493164, "learning_rate": 3.854299691888164e-07, "loss": 26.9125, "step": 8050 }, { "epoch": 63.97308075772682, "grad_norm": 390.2908630371094, "learning_rate": 3.8516736913975333e-07, "loss": 26.2214, "step": 8060 }, { "epoch": 64.04785643070788, "grad_norm": 244.6831512451172, "learning_rate": 3.8490455818146763e-07, "loss": 22.5445, "step": 8070 }, { "epoch": 64.12761714855434, "grad_norm": 119.25518035888672, "learning_rate": 3.84641536724038e-07, "loss": 23.0345, "step": 8080 }, { "epoch": 64.2073778664008, "grad_norm": 76.47522735595703, "learning_rate": 3.843783051778714e-07, "loss": 25.3076, "step": 8090 }, { "epoch": 64.28713858424726, "grad_norm": 264.4443664550781, "learning_rate": 3.841148639537029e-07, "loss": 25.8505, "step": 8100 }, { "epoch": 64.28713858424726, "eval_loss": 3.0770719051361084, "eval_mae": 1.3144989013671875, "eval_mse": 3.0770719051361084, "eval_r2": 0.07289326190948486, "eval_rmse": 1.7541584606688496, "eval_runtime": 9.0998, "eval_samples_per_second": 440.78, "eval_steps_per_second": 13.847, "step": 8100 }, { "epoch": 64.36689930209371, "grad_norm": 404.09783935546875, "learning_rate": 3.8385121346259465e-07, "loss": 23.8394, "step": 8110 }, { "epoch": 64.44666001994018, "grad_norm": 122.11978149414062, "learning_rate": 3.8358735411593514e-07, "loss": 25.0254, "step": 8120 }, { "epoch": 64.52642073778664, "grad_norm": 140.75172424316406, "learning_rate": 3.8332328632543903e-07, "loss": 26.4085, "step": 8130 }, { "epoch": 64.6061814556331, "grad_norm": 115.68319702148438, "learning_rate": 3.8305901050314604e-07, "loss": 25.4538, "step": 8140 }, { "epoch": 64.68594217347956, "grad_norm": 91.7467041015625, "learning_rate": 3.827945270614205e-07, "loss": 26.4521, "step": 8150 }, { "epoch": 64.76570289132601, "grad_norm": 192.41465759277344, "learning_rate": 3.825298364129509e-07, "loss": 24.4931, "step": 8160 }, { "epoch": 64.84546360917248, "grad_norm": 183.51048278808594, "learning_rate": 3.822649389707487e-07, "loss": 24.4489, "step": 8170 }, { "epoch": 64.92522432701894, "grad_norm": 93.11408233642578, "learning_rate": 3.819998351481484e-07, "loss": 24.1044, "step": 8180 }, { "epoch": 65.0, "grad_norm": 95.1280746459961, "learning_rate": 3.8173452535880625e-07, "loss": 23.9528, "step": 8190 }, { "epoch": 65.07976071784645, "grad_norm": 113.3239974975586, "learning_rate": 3.814690100167e-07, "loss": 24.4158, "step": 8200 }, { "epoch": 65.07976071784645, "eval_loss": 3.0694262981414795, "eval_mae": 1.322281002998352, "eval_mse": 3.0694262981414795, "eval_r2": 0.07519686222076416, "eval_rmse": 1.751977824671728, "eval_runtime": 9.0964, "eval_samples_per_second": 440.944, "eval_steps_per_second": 13.852, "step": 8200 }, { "epoch": 65.15952143569292, "grad_norm": 130.2467041015625, "learning_rate": 3.812032895361282e-07, "loss": 25.5138, "step": 8210 }, { "epoch": 65.23928215353938, "grad_norm": 43.810115814208984, "learning_rate": 3.809373643317093e-07, "loss": 27.3361, "step": 8220 }, { "epoch": 65.31904287138585, "grad_norm": 40.04730224609375, "learning_rate": 3.806712348183813e-07, "loss": 24.0978, "step": 8230 }, { "epoch": 65.3988035892323, "grad_norm": 110.29949188232422, "learning_rate": 3.8040490141140113e-07, "loss": 23.6178, "step": 8240 }, { "epoch": 65.47856430707877, "grad_norm": 298.765625, "learning_rate": 3.801383645263437e-07, "loss": 24.6665, "step": 8250 }, { "epoch": 65.55832502492522, "grad_norm": 292.2880859375, "learning_rate": 3.7987162457910127e-07, "loss": 24.6888, "step": 8260 }, { "epoch": 65.63808574277168, "grad_norm": 221.99192810058594, "learning_rate": 3.796046819858834e-07, "loss": 25.0066, "step": 8270 }, { "epoch": 65.71784646061815, "grad_norm": 441.9041442871094, "learning_rate": 3.7933753716321533e-07, "loss": 25.6156, "step": 8280 }, { "epoch": 65.7976071784646, "grad_norm": 149.19468688964844, "learning_rate": 3.790701905279383e-07, "loss": 24.9847, "step": 8290 }, { "epoch": 65.87736789631107, "grad_norm": 74.34953308105469, "learning_rate": 3.7880264249720805e-07, "loss": 25.2725, "step": 8300 }, { "epoch": 65.87736789631107, "eval_loss": 3.069687843322754, "eval_mae": 1.3177374601364136, "eval_mse": 3.069688081741333, "eval_r2": 0.07511800527572632, "eval_rmse": 1.7520525339559123, "eval_runtime": 9.0827, "eval_samples_per_second": 441.611, "eval_steps_per_second": 13.873, "step": 8300 }, { "epoch": 65.95712861415753, "grad_norm": 209.19964599609375, "learning_rate": 3.78534893488495e-07, "loss": 25.0926, "step": 8310 }, { "epoch": 66.03190428713859, "grad_norm": 314.91265869140625, "learning_rate": 3.782669439195828e-07, "loss": 22.914, "step": 8320 }, { "epoch": 66.11166500498504, "grad_norm": 370.3639221191406, "learning_rate": 3.7799879420856825e-07, "loss": 25.2253, "step": 8330 }, { "epoch": 66.19142572283151, "grad_norm": 121.62500762939453, "learning_rate": 3.777304447738602e-07, "loss": 26.6113, "step": 8340 }, { "epoch": 66.27118644067797, "grad_norm": 178.5107421875, "learning_rate": 3.774618960341796e-07, "loss": 23.7175, "step": 8350 }, { "epoch": 66.35094715852442, "grad_norm": 282.17498779296875, "learning_rate": 3.7719314840855786e-07, "loss": 27.1242, "step": 8360 }, { "epoch": 66.43070787637089, "grad_norm": 77.5106430053711, "learning_rate": 3.7692420231633707e-07, "loss": 23.654, "step": 8370 }, { "epoch": 66.51046859421734, "grad_norm": 481.6351013183594, "learning_rate": 3.766550581771689e-07, "loss": 24.4763, "step": 8380 }, { "epoch": 66.59022931206381, "grad_norm": 571.66650390625, "learning_rate": 3.76385716411014e-07, "loss": 24.758, "step": 8390 }, { "epoch": 66.66999002991027, "grad_norm": 385.8674011230469, "learning_rate": 3.761161774381414e-07, "loss": 24.5836, "step": 8400 }, { "epoch": 66.66999002991027, "eval_loss": 3.07013201713562, "eval_mae": 1.3153153657913208, "eval_mse": 3.070132255554199, "eval_r2": 0.07498413324356079, "eval_rmse": 1.7521792875029083, "eval_runtime": 9.061, "eval_samples_per_second": 442.668, "eval_steps_per_second": 13.906, "step": 8400 }, { "epoch": 66.74975074775674, "grad_norm": 127.92801666259766, "learning_rate": 3.758464416791279e-07, "loss": 25.1251, "step": 8410 }, { "epoch": 66.82951146560319, "grad_norm": 459.4199523925781, "learning_rate": 3.755765095548574e-07, "loss": 25.6292, "step": 8420 }, { "epoch": 66.90927218344964, "grad_norm": 123.8630142211914, "learning_rate": 3.7530638148652e-07, "loss": 25.9062, "step": 8430 }, { "epoch": 66.98903290129611, "grad_norm": 66.76941680908203, "learning_rate": 3.7503605789561176e-07, "loss": 22.7821, "step": 8440 }, { "epoch": 67.06380857427718, "grad_norm": 114.29212188720703, "learning_rate": 3.7476553920393364e-07, "loss": 23.4223, "step": 8450 }, { "epoch": 67.14356929212363, "grad_norm": 115.19644927978516, "learning_rate": 3.7449482583359116e-07, "loss": 26.318, "step": 8460 }, { "epoch": 67.22333000997008, "grad_norm": 61.220420837402344, "learning_rate": 3.7422391820699367e-07, "loss": 23.5731, "step": 8470 }, { "epoch": 67.30309072781655, "grad_norm": 76.07804107666016, "learning_rate": 3.7395281674685334e-07, "loss": 24.1083, "step": 8480 }, { "epoch": 67.38285144566301, "grad_norm": 206.1667938232422, "learning_rate": 3.736815218761851e-07, "loss": 25.1364, "step": 8490 }, { "epoch": 67.46261216350948, "grad_norm": 109.1873550415039, "learning_rate": 3.734100340183055e-07, "loss": 24.0216, "step": 8500 }, { "epoch": 67.46261216350948, "eval_loss": 3.066004514694214, "eval_mae": 1.3180415630340576, "eval_mse": 3.066004514694214, "eval_r2": 0.07622784376144409, "eval_rmse": 1.7510010036245593, "eval_runtime": 9.0849, "eval_samples_per_second": 441.501, "eval_steps_per_second": 13.869, "step": 8500 }, { "epoch": 67.54237288135593, "grad_norm": 313.42364501953125, "learning_rate": 3.731383535968323e-07, "loss": 25.2754, "step": 8510 }, { "epoch": 67.6221335992024, "grad_norm": 327.2785339355469, "learning_rate": 3.728664810356836e-07, "loss": 23.376, "step": 8520 }, { "epoch": 67.70189431704885, "grad_norm": 128.79061889648438, "learning_rate": 3.7259441675907753e-07, "loss": 25.4697, "step": 8530 }, { "epoch": 67.78165503489531, "grad_norm": 306.5848693847656, "learning_rate": 3.7232216119153114e-07, "loss": 25.0118, "step": 8540 }, { "epoch": 67.86141575274178, "grad_norm": 230.59901428222656, "learning_rate": 3.720497147578601e-07, "loss": 24.424, "step": 8550 }, { "epoch": 67.94117647058823, "grad_norm": 385.51251220703125, "learning_rate": 3.717770778831777e-07, "loss": 26.901, "step": 8560 }, { "epoch": 68.0159521435693, "grad_norm": 107.0721206665039, "learning_rate": 3.715042509928948e-07, "loss": 25.78, "step": 8570 }, { "epoch": 68.09571286141575, "grad_norm": 263.0525817871094, "learning_rate": 3.712312345127183e-07, "loss": 23.3912, "step": 8580 }, { "epoch": 68.17547357926222, "grad_norm": 170.27700805664062, "learning_rate": 3.709580288686511e-07, "loss": 25.7638, "step": 8590 }, { "epoch": 68.25523429710867, "grad_norm": 190.2525634765625, "learning_rate": 3.7068463448699144e-07, "loss": 26.8153, "step": 8600 }, { "epoch": 68.25523429710867, "eval_loss": 3.0627195835113525, "eval_mae": 1.320117473602295, "eval_mse": 3.0627195835113525, "eval_r2": 0.07721757888793945, "eval_rmse": 1.7500627370215482, "eval_runtime": 9.084, "eval_samples_per_second": 441.543, "eval_steps_per_second": 13.87, "step": 8600 }, { "epoch": 68.33499501495514, "grad_norm": 91.61470031738281, "learning_rate": 3.704110517943319e-07, "loss": 22.9137, "step": 8610 }, { "epoch": 68.4147557328016, "grad_norm": 75.98213958740234, "learning_rate": 3.701372812175586e-07, "loss": 23.1728, "step": 8620 }, { "epoch": 68.49451645064805, "grad_norm": 267.9504089355469, "learning_rate": 3.698633231838514e-07, "loss": 23.4502, "step": 8630 }, { "epoch": 68.57427716849452, "grad_norm": 85.68649291992188, "learning_rate": 3.6958917812068224e-07, "loss": 26.7422, "step": 8640 }, { "epoch": 68.65403788634097, "grad_norm": 296.14923095703125, "learning_rate": 3.6931484645581524e-07, "loss": 23.2774, "step": 8650 }, { "epoch": 68.73379860418744, "grad_norm": 160.84161376953125, "learning_rate": 3.690403286173051e-07, "loss": 25.5654, "step": 8660 }, { "epoch": 68.8135593220339, "grad_norm": 243.06686401367188, "learning_rate": 3.6876562503349777e-07, "loss": 26.0291, "step": 8670 }, { "epoch": 68.89332003988036, "grad_norm": 92.43791198730469, "learning_rate": 3.684907361330284e-07, "loss": 25.0494, "step": 8680 }, { "epoch": 68.97308075772682, "grad_norm": 315.2274475097656, "learning_rate": 3.6821566234482164e-07, "loss": 25.2462, "step": 8690 }, { "epoch": 69.04785643070788, "grad_norm": 377.940673828125, "learning_rate": 3.679404040980906e-07, "loss": 26.6367, "step": 8700 }, { "epoch": 69.04785643070788, "eval_loss": 3.0620410442352295, "eval_mae": 1.3219993114471436, "eval_mse": 3.0620410442352295, "eval_r2": 0.07742202281951904, "eval_rmse": 1.7498688648682306, "eval_runtime": 9.0786, "eval_samples_per_second": 441.809, "eval_steps_per_second": 13.879, "step": 8700 }, { "epoch": 69.12761714855434, "grad_norm": 118.64423370361328, "learning_rate": 3.676649618223361e-07, "loss": 23.4573, "step": 8710 }, { "epoch": 69.2073778664008, "grad_norm": 484.994384765625, "learning_rate": 3.67389335947346e-07, "loss": 25.0094, "step": 8720 }, { "epoch": 69.28713858424726, "grad_norm": 293.2475891113281, "learning_rate": 3.671135269031951e-07, "loss": 26.0007, "step": 8730 }, { "epoch": 69.36689930209371, "grad_norm": 196.64553833007812, "learning_rate": 3.668375351202435e-07, "loss": 24.5714, "step": 8740 }, { "epoch": 69.44666001994018, "grad_norm": 341.39593505859375, "learning_rate": 3.665613610291367e-07, "loss": 24.8776, "step": 8750 }, { "epoch": 69.52642073778664, "grad_norm": 79.00114440917969, "learning_rate": 3.662850050608046e-07, "loss": 23.8761, "step": 8760 }, { "epoch": 69.6061814556331, "grad_norm": 102.5728759765625, "learning_rate": 3.660084676464611e-07, "loss": 24.649, "step": 8770 }, { "epoch": 69.68594217347956, "grad_norm": 297.4561462402344, "learning_rate": 3.657317492176027e-07, "loss": 26.3547, "step": 8780 }, { "epoch": 69.76570289132601, "grad_norm": 214.3977508544922, "learning_rate": 3.6545485020600884e-07, "loss": 25.4936, "step": 8790 }, { "epoch": 69.84546360917248, "grad_norm": 221.43826293945312, "learning_rate": 3.6517777104374063e-07, "loss": 26.0423, "step": 8800 }, { "epoch": 69.84546360917248, "eval_loss": 3.061256170272827, "eval_mae": 1.322737216949463, "eval_mse": 3.061255931854248, "eval_r2": 0.07765859365463257, "eval_rmse": 1.7496445158529341, "eval_runtime": 9.0972, "eval_samples_per_second": 440.906, "eval_steps_per_second": 13.85, "step": 8800 }, { "epoch": 69.92522432701894, "grad_norm": 119.96556091308594, "learning_rate": 3.6490051216314024e-07, "loss": 22.6416, "step": 8810 }, { "epoch": 70.0, "grad_norm": 139.15509033203125, "learning_rate": 3.6462307399683e-07, "loss": 23.221, "step": 8820 }, { "epoch": 70.07976071784645, "grad_norm": 473.70220947265625, "learning_rate": 3.6437322671565705e-07, "loss": 27.2323, "step": 8830 }, { "epoch": 70.15952143569292, "grad_norm": 51.05868911743164, "learning_rate": 3.6409544909937506e-07, "loss": 24.7301, "step": 8840 }, { "epoch": 70.23928215353938, "grad_norm": 139.17239379882812, "learning_rate": 3.638174934535683e-07, "loss": 22.8306, "step": 8850 }, { "epoch": 70.31904287138585, "grad_norm": 389.852783203125, "learning_rate": 3.635393602119468e-07, "loss": 23.017, "step": 8860 }, { "epoch": 70.3988035892323, "grad_norm": 489.2827453613281, "learning_rate": 3.6326104980849716e-07, "loss": 24.0965, "step": 8870 }, { "epoch": 70.47856430707877, "grad_norm": 314.7647705078125, "learning_rate": 3.6298256267748296e-07, "loss": 26.8876, "step": 8880 }, { "epoch": 70.55832502492522, "grad_norm": 408.3915710449219, "learning_rate": 3.627038992534431e-07, "loss": 25.819, "step": 8890 }, { "epoch": 70.63808574277168, "grad_norm": 126.84314727783203, "learning_rate": 3.6242505997119174e-07, "loss": 26.666, "step": 8900 }, { "epoch": 70.63808574277168, "eval_loss": 3.0710322856903076, "eval_mae": 1.3310539722442627, "eval_mse": 3.0710322856903076, "eval_r2": 0.07471299171447754, "eval_rmse": 1.752436100315874, "eval_runtime": 9.1199, "eval_samples_per_second": 439.808, "eval_steps_per_second": 13.816, "step": 8900 }, { "epoch": 70.71784646061815, "grad_norm": 358.9391174316406, "learning_rate": 3.621460452658176e-07, "loss": 25.3601, "step": 8910 }, { "epoch": 70.7976071784646, "grad_norm": 293.7834167480469, "learning_rate": 3.6186685557268286e-07, "loss": 26.088, "step": 8920 }, { "epoch": 70.87736789631107, "grad_norm": 43.07698440551758, "learning_rate": 3.615874913274228e-07, "loss": 24.3503, "step": 8930 }, { "epoch": 70.95712861415753, "grad_norm": 155.59963989257812, "learning_rate": 3.613079529659452e-07, "loss": 22.833, "step": 8940 }, { "epoch": 71.03190428713859, "grad_norm": 97.04302978515625, "learning_rate": 3.6102824092442933e-07, "loss": 22.8683, "step": 8950 }, { "epoch": 71.11166500498504, "grad_norm": 115.6922836303711, "learning_rate": 3.6074835563932576e-07, "loss": 25.648, "step": 8960 }, { "epoch": 71.19142572283151, "grad_norm": 118.38312530517578, "learning_rate": 3.60468297547355e-07, "loss": 25.867, "step": 8970 }, { "epoch": 71.27118644067797, "grad_norm": 113.87028503417969, "learning_rate": 3.601880670855075e-07, "loss": 24.057, "step": 8980 }, { "epoch": 71.35094715852442, "grad_norm": 132.84255981445312, "learning_rate": 3.599076646910424e-07, "loss": 22.2716, "step": 8990 }, { "epoch": 71.43070787637089, "grad_norm": 122.4183120727539, "learning_rate": 3.5962709080148743e-07, "loss": 25.7429, "step": 9000 }, { "epoch": 71.43070787637089, "eval_loss": 3.058762550354004, "eval_mae": 1.3227847814559937, "eval_mse": 3.058762550354004, "eval_r2": 0.07840979099273682, "eval_rmse": 1.748931831248435, "eval_runtime": 9.0988, "eval_samples_per_second": 440.826, "eval_steps_per_second": 13.848, "step": 9000 }, { "epoch": 71.51046859421734, "grad_norm": 62.26708221435547, "learning_rate": 3.5934634585463774e-07, "loss": 22.6924, "step": 9010 }, { "epoch": 71.59022931206381, "grad_norm": 422.3989562988281, "learning_rate": 3.590654302885555e-07, "loss": 26.4849, "step": 9020 }, { "epoch": 71.66999002991027, "grad_norm": 486.0423278808594, "learning_rate": 3.587843445415687e-07, "loss": 25.1107, "step": 9030 }, { "epoch": 71.74975074775674, "grad_norm": 322.81256103515625, "learning_rate": 3.5850308905227143e-07, "loss": 24.4619, "step": 9040 }, { "epoch": 71.82951146560319, "grad_norm": 120.65798950195312, "learning_rate": 3.5822166425952236e-07, "loss": 25.5674, "step": 9050 }, { "epoch": 71.90927218344964, "grad_norm": 301.5791015625, "learning_rate": 3.5794007060244443e-07, "loss": 26.1748, "step": 9060 }, { "epoch": 71.98903290129611, "grad_norm": 84.74629211425781, "learning_rate": 3.576583085204238e-07, "loss": 24.3629, "step": 9070 }, { "epoch": 72.06380857427718, "grad_norm": 330.45648193359375, "learning_rate": 3.5737637845310993e-07, "loss": 24.3095, "step": 9080 }, { "epoch": 72.14356929212363, "grad_norm": 46.20743942260742, "learning_rate": 3.570942808404138e-07, "loss": 23.7282, "step": 9090 }, { "epoch": 72.22333000997008, "grad_norm": 608.0449829101562, "learning_rate": 3.5681201612250837e-07, "loss": 23.4555, "step": 9100 }, { "epoch": 72.22333000997008, "eval_loss": 3.056166410446167, "eval_mae": 1.316311001777649, "eval_mse": 3.056166410446167, "eval_r2": 0.07919204235076904, "eval_rmse": 1.7481894664040758, "eval_runtime": 9.0777, "eval_samples_per_second": 441.853, "eval_steps_per_second": 13.88, "step": 9100 }, { "epoch": 72.30309072781655, "grad_norm": 306.81744384765625, "learning_rate": 3.5652958473982696e-07, "loss": 25.5228, "step": 9110 }, { "epoch": 72.38285144566301, "grad_norm": 222.90760803222656, "learning_rate": 3.562469871330632e-07, "loss": 25.3907, "step": 9120 }, { "epoch": 72.46261216350948, "grad_norm": 258.46002197265625, "learning_rate": 3.559642237431698e-07, "loss": 25.5679, "step": 9130 }, { "epoch": 72.54237288135593, "grad_norm": 330.5743103027344, "learning_rate": 3.5568129501135856e-07, "loss": 23.9446, "step": 9140 }, { "epoch": 72.6221335992024, "grad_norm": 368.6435546875, "learning_rate": 3.553982013790989e-07, "loss": 24.0174, "step": 9150 }, { "epoch": 72.70189431704885, "grad_norm": 69.02025604248047, "learning_rate": 3.5511494328811767e-07, "loss": 24.9693, "step": 9160 }, { "epoch": 72.78165503489531, "grad_norm": 279.7677001953125, "learning_rate": 3.548315211803984e-07, "loss": 27.9484, "step": 9170 }, { "epoch": 72.86141575274178, "grad_norm": 102.18354797363281, "learning_rate": 3.545479354981804e-07, "loss": 24.234, "step": 9180 }, { "epoch": 72.94117647058823, "grad_norm": 181.75677490234375, "learning_rate": 3.542641866839585e-07, "loss": 24.5007, "step": 9190 }, { "epoch": 73.0159521435693, "grad_norm": 593.4639282226562, "learning_rate": 3.5398027518048164e-07, "loss": 24.3578, "step": 9200 }, { "epoch": 73.0159521435693, "eval_loss": 3.0623042583465576, "eval_mae": 1.310591220855713, "eval_mse": 3.0623042583465576, "eval_r2": 0.077342689037323, "eval_rmse": 1.749944072919634, "eval_runtime": 9.0868, "eval_samples_per_second": 441.41, "eval_steps_per_second": 13.866, "step": 9200 }, { "epoch": 73.09571286141575, "grad_norm": 263.2080993652344, "learning_rate": 3.536962014307531e-07, "loss": 26.6369, "step": 9210 }, { "epoch": 73.17547357926222, "grad_norm": 169.20558166503906, "learning_rate": 3.534119658780288e-07, "loss": 26.397, "step": 9220 }, { "epoch": 73.25523429710867, "grad_norm": 147.1641082763672, "learning_rate": 3.531275689658176e-07, "loss": 24.3629, "step": 9230 }, { "epoch": 73.33499501495514, "grad_norm": 129.83119201660156, "learning_rate": 3.5284301113787996e-07, "loss": 25.1567, "step": 9240 }, { "epoch": 73.4147557328016, "grad_norm": 364.77545166015625, "learning_rate": 3.525582928382273e-07, "loss": 27.498, "step": 9250 }, { "epoch": 73.49451645064805, "grad_norm": 280.6644592285156, "learning_rate": 3.5227341451112156e-07, "loss": 24.1225, "step": 9260 }, { "epoch": 73.57427716849452, "grad_norm": 337.0046691894531, "learning_rate": 3.519883766010744e-07, "loss": 22.7654, "step": 9270 }, { "epoch": 73.65403788634097, "grad_norm": 85.38751220703125, "learning_rate": 3.5170317955284644e-07, "loss": 25.1829, "step": 9280 }, { "epoch": 73.73379860418744, "grad_norm": 293.2549133300781, "learning_rate": 3.514178238114466e-07, "loss": 25.0107, "step": 9290 }, { "epoch": 73.8135593220339, "grad_norm": 478.9608459472656, "learning_rate": 3.511323098221315e-07, "loss": 23.8991, "step": 9300 }, { "epoch": 73.8135593220339, "eval_loss": 3.056279182434082, "eval_mae": 1.3124663829803467, "eval_mse": 3.056279420852661, "eval_r2": 0.0791580080986023, "eval_rmse": 1.7482217882330209, "eval_runtime": 9.0865, "eval_samples_per_second": 441.424, "eval_steps_per_second": 13.867, "step": 9300 }, { "epoch": 73.89332003988036, "grad_norm": 317.2913513183594, "learning_rate": 3.508466380304046e-07, "loss": 23.5169, "step": 9310 }, { "epoch": 73.97308075772682, "grad_norm": 217.1324005126953, "learning_rate": 3.505608088820155e-07, "loss": 23.1523, "step": 9320 }, { "epoch": 74.04785643070788, "grad_norm": 103.88399505615234, "learning_rate": 3.502748228229596e-07, "loss": 21.5205, "step": 9330 }, { "epoch": 74.12761714855434, "grad_norm": 106.8578109741211, "learning_rate": 3.499886802994768e-07, "loss": 23.9702, "step": 9340 }, { "epoch": 74.2073778664008, "grad_norm": 567.977294921875, "learning_rate": 3.497023817580516e-07, "loss": 27.3229, "step": 9350 }, { "epoch": 74.28713858424726, "grad_norm": 101.11739349365234, "learning_rate": 3.4941592764541127e-07, "loss": 25.0238, "step": 9360 }, { "epoch": 74.36689930209371, "grad_norm": 84.21052551269531, "learning_rate": 3.491293184085266e-07, "loss": 23.8544, "step": 9370 }, { "epoch": 74.44666001994018, "grad_norm": 391.4809265136719, "learning_rate": 3.488425544946098e-07, "loss": 24.1024, "step": 9380 }, { "epoch": 74.52642073778664, "grad_norm": 82.73574829101562, "learning_rate": 3.485556363511148e-07, "loss": 27.272, "step": 9390 }, { "epoch": 74.6061814556331, "grad_norm": 104.74907684326172, "learning_rate": 3.4826856442573603e-07, "loss": 23.9074, "step": 9400 }, { "epoch": 74.6061814556331, "eval_loss": 3.0523128509521484, "eval_mae": 1.320926308631897, "eval_mse": 3.0523128509521484, "eval_r2": 0.0803530216217041, "eval_rmse": 1.7470869614739126, "eval_runtime": 9.097, "eval_samples_per_second": 440.914, "eval_steps_per_second": 13.851, "step": 9400 }, { "epoch": 74.68594217347956, "grad_norm": 50.035621643066406, "learning_rate": 3.4798133916640794e-07, "loss": 24.9706, "step": 9410 }, { "epoch": 74.76570289132601, "grad_norm": 165.34947204589844, "learning_rate": 3.4769396102130413e-07, "loss": 25.6866, "step": 9420 }, { "epoch": 74.84546360917248, "grad_norm": 184.37969970703125, "learning_rate": 3.474064304388368e-07, "loss": 24.5739, "step": 9430 }, { "epoch": 74.92522432701894, "grad_norm": 276.6813659667969, "learning_rate": 3.4711874786765614e-07, "loss": 24.6645, "step": 9440 }, { "epoch": 75.0, "grad_norm": 88.17737579345703, "learning_rate": 3.468309137566494e-07, "loss": 22.8896, "step": 9450 }, { "epoch": 75.07976071784645, "grad_norm": 123.72151184082031, "learning_rate": 3.465429285549401e-07, "loss": 25.006, "step": 9460 }, { "epoch": 75.15952143569292, "grad_norm": 141.5162811279297, "learning_rate": 3.46254792711888e-07, "loss": 24.1427, "step": 9470 }, { "epoch": 75.23928215353938, "grad_norm": 191.08383178710938, "learning_rate": 3.4596650667708727e-07, "loss": 25.8205, "step": 9480 }, { "epoch": 75.31904287138585, "grad_norm": 177.44964599609375, "learning_rate": 3.4567807090036696e-07, "loss": 25.1382, "step": 9490 }, { "epoch": 75.3988035892323, "grad_norm": 38.57638931274414, "learning_rate": 3.453894858317895e-07, "loss": 26.9749, "step": 9500 }, { "epoch": 75.3988035892323, "eval_loss": 3.0497334003448486, "eval_mae": 1.314713478088379, "eval_mse": 3.0497334003448486, "eval_r2": 0.0811302661895752, "eval_rmse": 1.7463485907300549, "eval_runtime": 9.1004, "eval_samples_per_second": 440.749, "eval_steps_per_second": 13.846, "step": 9500 }, { "epoch": 75.47856430707877, "grad_norm": 314.9055480957031, "learning_rate": 3.451007519216506e-07, "loss": 24.3219, "step": 9510 }, { "epoch": 75.55832502492522, "grad_norm": 411.2994384765625, "learning_rate": 3.448118696204775e-07, "loss": 24.8036, "step": 9520 }, { "epoch": 75.63808574277168, "grad_norm": 74.12139129638672, "learning_rate": 3.445228393790298e-07, "loss": 24.0938, "step": 9530 }, { "epoch": 75.71784646061815, "grad_norm": 609.5748901367188, "learning_rate": 3.442336616482975e-07, "loss": 25.024, "step": 9540 }, { "epoch": 75.7976071784646, "grad_norm": 200.36746215820312, "learning_rate": 3.439443368795009e-07, "loss": 24.2689, "step": 9550 }, { "epoch": 75.87736789631107, "grad_norm": 208.6078338623047, "learning_rate": 3.436548655240894e-07, "loss": 24.5859, "step": 9560 }, { "epoch": 75.95712861415753, "grad_norm": 691.5971069335938, "learning_rate": 3.433652480337417e-07, "loss": 23.3654, "step": 9570 }, { "epoch": 76.03190428713859, "grad_norm": 252.24432373046875, "learning_rate": 3.43075484860364e-07, "loss": 24.1112, "step": 9580 }, { "epoch": 76.11166500498504, "grad_norm": 83.96273040771484, "learning_rate": 3.427855764560901e-07, "loss": 24.871, "step": 9590 }, { "epoch": 76.19142572283151, "grad_norm": 276.2619323730469, "learning_rate": 3.4249552327328027e-07, "loss": 25.5983, "step": 9600 }, { "epoch": 76.19142572283151, "eval_loss": 3.0511128902435303, "eval_mae": 1.3213589191436768, "eval_mse": 3.0511128902435303, "eval_r2": 0.08071458339691162, "eval_rmse": 1.7467435101478208, "eval_runtime": 9.1002, "eval_samples_per_second": 440.759, "eval_steps_per_second": 13.846, "step": 9600 }, { "epoch": 76.27118644067797, "grad_norm": 399.31427001953125, "learning_rate": 3.4220532576452085e-07, "loss": 25.2765, "step": 9610 }, { "epoch": 76.35094715852442, "grad_norm": 464.6283264160156, "learning_rate": 3.419149843826231e-07, "loss": 25.7047, "step": 9620 }, { "epoch": 76.43070787637089, "grad_norm": 209.1724090576172, "learning_rate": 3.4162449958062316e-07, "loss": 22.4275, "step": 9630 }, { "epoch": 76.51046859421734, "grad_norm": 285.52593994140625, "learning_rate": 3.413338718117806e-07, "loss": 25.4166, "step": 9640 }, { "epoch": 76.59022931206381, "grad_norm": 255.88510131835938, "learning_rate": 3.410431015295784e-07, "loss": 24.787, "step": 9650 }, { "epoch": 76.66999002991027, "grad_norm": 137.2398223876953, "learning_rate": 3.4075218918772154e-07, "loss": 24.8588, "step": 9660 }, { "epoch": 76.74975074775674, "grad_norm": 371.6817626953125, "learning_rate": 3.404611352401369e-07, "loss": 22.7635, "step": 9670 }, { "epoch": 76.82951146560319, "grad_norm": 58.25032424926758, "learning_rate": 3.4016994014097233e-07, "loss": 24.8311, "step": 9680 }, { "epoch": 76.90927218344964, "grad_norm": 227.07810974121094, "learning_rate": 3.3987860434459583e-07, "loss": 24.4209, "step": 9690 }, { "epoch": 76.98903290129611, "grad_norm": 142.50608825683594, "learning_rate": 3.3958712830559487e-07, "loss": 26.006, "step": 9700 }, { "epoch": 76.98903290129611, "eval_loss": 3.0453178882598877, "eval_mae": 1.3163361549377441, "eval_mse": 3.0453178882598877, "eval_r2": 0.08246058225631714, "eval_rmse": 1.7450839201195705, "eval_runtime": 9.0755, "eval_samples_per_second": 441.961, "eval_steps_per_second": 13.884, "step": 9700 }, { "epoch": 77.06380857427718, "grad_norm": 285.72149658203125, "learning_rate": 3.3929551247877606e-07, "loss": 25.6414, "step": 9710 }, { "epoch": 77.14356929212363, "grad_norm": 255.40960693359375, "learning_rate": 3.3900375731916374e-07, "loss": 22.5476, "step": 9720 }, { "epoch": 77.22333000997008, "grad_norm": 208.32432556152344, "learning_rate": 3.3871186328199995e-07, "loss": 25.8574, "step": 9730 }, { "epoch": 77.30309072781655, "grad_norm": 108.06330871582031, "learning_rate": 3.384198308227434e-07, "loss": 24.569, "step": 9740 }, { "epoch": 77.38285144566301, "grad_norm": 337.4876403808594, "learning_rate": 3.3812766039706846e-07, "loss": 25.0633, "step": 9750 }, { "epoch": 77.46261216350948, "grad_norm": 154.1691436767578, "learning_rate": 3.3783535246086516e-07, "loss": 24.416, "step": 9760 }, { "epoch": 77.54237288135593, "grad_norm": 136.4617156982422, "learning_rate": 3.3754290747023796e-07, "loss": 22.9575, "step": 9770 }, { "epoch": 77.6221335992024, "grad_norm": 164.31016540527344, "learning_rate": 3.3725032588150517e-07, "loss": 27.5521, "step": 9780 }, { "epoch": 77.70189431704885, "grad_norm": 422.6942138671875, "learning_rate": 3.3695760815119816e-07, "loss": 26.1196, "step": 9790 }, { "epoch": 77.78165503489531, "grad_norm": 227.1068878173828, "learning_rate": 3.36664754736061e-07, "loss": 24.7913, "step": 9800 }, { "epoch": 77.78165503489531, "eval_loss": 3.0548555850982666, "eval_mae": 1.3078426122665405, "eval_mse": 3.0548558235168457, "eval_r2": 0.079586923122406, "eval_rmse": 1.7478145849937416, "eval_runtime": 9.0735, "eval_samples_per_second": 442.054, "eval_steps_per_second": 13.887, "step": 9800 }, { "epoch": 77.86141575274178, "grad_norm": 448.2280578613281, "learning_rate": 3.3637176609304905e-07, "loss": 23.4003, "step": 9810 }, { "epoch": 77.94117647058823, "grad_norm": 374.3587951660156, "learning_rate": 3.3607864267932913e-07, "loss": 23.7702, "step": 9820 }, { "epoch": 78.0159521435693, "grad_norm": 439.8951721191406, "learning_rate": 3.3578538495227796e-07, "loss": 22.7544, "step": 9830 }, { "epoch": 78.09571286141575, "grad_norm": 292.9596862792969, "learning_rate": 3.354919933694822e-07, "loss": 26.3637, "step": 9840 }, { "epoch": 78.17547357926222, "grad_norm": 216.87107849121094, "learning_rate": 3.35198468388737e-07, "loss": 21.3863, "step": 9850 }, { "epoch": 78.25523429710867, "grad_norm": 498.3560791015625, "learning_rate": 3.3490481046804594e-07, "loss": 24.6341, "step": 9860 }, { "epoch": 78.33499501495514, "grad_norm": 280.01220703125, "learning_rate": 3.3461102006561985e-07, "loss": 25.052, "step": 9870 }, { "epoch": 78.4147557328016, "grad_norm": 182.12396240234375, "learning_rate": 3.343170976398765e-07, "loss": 22.8939, "step": 9880 }, { "epoch": 78.49451645064805, "grad_norm": 413.0950622558594, "learning_rate": 3.340230436494393e-07, "loss": 24.4764, "step": 9890 }, { "epoch": 78.57427716849452, "grad_norm": 204.4065704345703, "learning_rate": 3.337288585531375e-07, "loss": 26.2159, "step": 9900 }, { "epoch": 78.57427716849452, "eval_loss": 3.047255754470825, "eval_mae": 1.3226542472839355, "eval_mse": 3.047255754470825, "eval_r2": 0.08187675476074219, "eval_rmse": 1.745639067639936, "eval_runtime": 9.09, "eval_samples_per_second": 441.252, "eval_steps_per_second": 13.861, "step": 9900 }, { "epoch": 78.65403788634097, "grad_norm": 152.0811309814453, "learning_rate": 3.3343454281000427e-07, "loss": 25.7904, "step": 9910 }, { "epoch": 78.73379860418744, "grad_norm": 128.9251708984375, "learning_rate": 3.3314009687927717e-07, "loss": 26.1647, "step": 9920 }, { "epoch": 78.8135593220339, "grad_norm": 136.84832763671875, "learning_rate": 3.328455212203966e-07, "loss": 25.1437, "step": 9930 }, { "epoch": 78.89332003988036, "grad_norm": 158.07907104492188, "learning_rate": 3.325508162930056e-07, "loss": 23.8966, "step": 9940 }, { "epoch": 78.97308075772682, "grad_norm": 363.46539306640625, "learning_rate": 3.3225598255694854e-07, "loss": 25.4471, "step": 9950 }, { "epoch": 79.04785643070788, "grad_norm": 80.3748779296875, "learning_rate": 3.319610204722713e-07, "loss": 23.815, "step": 9960 }, { "epoch": 79.12761714855434, "grad_norm": 132.05052185058594, "learning_rate": 3.3166593049921947e-07, "loss": 23.0523, "step": 9970 }, { "epoch": 79.2073778664008, "grad_norm": 59.55046081542969, "learning_rate": 3.313707130982387e-07, "loss": 24.2984, "step": 9980 }, { "epoch": 79.28713858424726, "grad_norm": 404.2379150390625, "learning_rate": 3.310753687299732e-07, "loss": 22.0736, "step": 9990 }, { "epoch": 79.36689930209371, "grad_norm": 548.7941284179688, "learning_rate": 3.3077989785526536e-07, "loss": 24.7664, "step": 10000 }, { "epoch": 79.36689930209371, "eval_loss": 3.0421335697174072, "eval_mae": 1.3189619779586792, "eval_mse": 3.0421340465545654, "eval_r2": 0.08341985940933228, "eval_rmse": 1.7441714498737118, "eval_runtime": 9.0942, "eval_samples_per_second": 441.052, "eval_steps_per_second": 13.855, "step": 10000 }, { "epoch": 79.44666001994018, "grad_norm": 132.6288299560547, "learning_rate": 3.3048430093515483e-07, "loss": 24.042, "step": 10010 }, { "epoch": 79.52642073778664, "grad_norm": 94.08177185058594, "learning_rate": 3.301885784308782e-07, "loss": 22.9365, "step": 10020 }, { "epoch": 79.6061814556331, "grad_norm": 486.14080810546875, "learning_rate": 3.298927308038678e-07, "loss": 24.6133, "step": 10030 }, { "epoch": 79.68594217347956, "grad_norm": 149.77662658691406, "learning_rate": 3.295967585157512e-07, "loss": 25.6675, "step": 10040 }, { "epoch": 79.76570289132601, "grad_norm": 269.9638671875, "learning_rate": 3.2930066202835065e-07, "loss": 27.9391, "step": 10050 }, { "epoch": 79.84546360917248, "grad_norm": 112.71063995361328, "learning_rate": 3.2900444180368213e-07, "loss": 27.5189, "step": 10060 }, { "epoch": 79.92522432701894, "grad_norm": 312.2144775390625, "learning_rate": 3.287080983039545e-07, "loss": 24.9859, "step": 10070 }, { "epoch": 80.0, "grad_norm": 385.76629638671875, "learning_rate": 3.284116319915693e-07, "loss": 22.5648, "step": 10080 }, { "epoch": 80.07976071784645, "grad_norm": 402.8841857910156, "learning_rate": 3.2811504332911944e-07, "loss": 23.9249, "step": 10090 }, { "epoch": 80.15952143569292, "grad_norm": 354.6750183105469, "learning_rate": 3.278183327793889e-07, "loss": 24.9814, "step": 10100 }, { "epoch": 80.15952143569292, "eval_loss": 3.04233717918396, "eval_mae": 1.3198978900909424, "eval_mse": 3.04233717918396, "eval_r2": 0.08335870504379272, "eval_rmse": 1.7442296807427513, "eval_runtime": 9.0921, "eval_samples_per_second": 441.155, "eval_steps_per_second": 13.858, "step": 10100 }, { "epoch": 80.23928215353938, "grad_norm": 125.52257537841797, "learning_rate": 3.2752150080535167e-07, "loss": 24.5656, "step": 10110 }, { "epoch": 80.31904287138585, "grad_norm": 440.5180969238281, "learning_rate": 3.2722454787017147e-07, "loss": 24.8485, "step": 10120 }, { "epoch": 80.3988035892323, "grad_norm": 243.70953369140625, "learning_rate": 3.269274744372005e-07, "loss": 24.074, "step": 10130 }, { "epoch": 80.47856430707877, "grad_norm": 169.30943298339844, "learning_rate": 3.2663028096997906e-07, "loss": 24.8247, "step": 10140 }, { "epoch": 80.55832502492522, "grad_norm": 444.5276184082031, "learning_rate": 3.2633296793223493e-07, "loss": 25.5621, "step": 10150 }, { "epoch": 80.63808574277168, "grad_norm": 171.6515655517578, "learning_rate": 3.2603553578788223e-07, "loss": 23.3385, "step": 10160 }, { "epoch": 80.71784646061815, "grad_norm": 193.34361267089844, "learning_rate": 3.257379850010209e-07, "loss": 27.2977, "step": 10170 }, { "epoch": 80.7976071784646, "grad_norm": 62.21059036254883, "learning_rate": 3.2544031603593624e-07, "loss": 23.6639, "step": 10180 }, { "epoch": 80.87736789631107, "grad_norm": 452.9521179199219, "learning_rate": 3.2514252935709774e-07, "loss": 24.8742, "step": 10190 }, { "epoch": 80.95712861415753, "grad_norm": 547.2555541992188, "learning_rate": 3.248446254291588e-07, "loss": 24.5573, "step": 10200 }, { "epoch": 80.95712861415753, "eval_loss": 3.0389792919158936, "eval_mae": 1.3104017972946167, "eval_mse": 3.0389792919158936, "eval_r2": 0.0843704342842102, "eval_rmse": 1.7432668447245514, "eval_runtime": 9.0821, "eval_samples_per_second": 441.64, "eval_steps_per_second": 13.873, "step": 10200 }, { "epoch": 81.03190428713859, "grad_norm": 432.891845703125, "learning_rate": 3.2454660471695536e-07, "loss": 23.64, "step": 10210 }, { "epoch": 81.11166500498504, "grad_norm": 121.47529602050781, "learning_rate": 3.242484676855061e-07, "loss": 26.8345, "step": 10220 }, { "epoch": 81.19142572283151, "grad_norm": 951.72412109375, "learning_rate": 3.2395021480001083e-07, "loss": 25.3862, "step": 10230 }, { "epoch": 81.27118644067797, "grad_norm": 143.4444122314453, "learning_rate": 3.236518465258502e-07, "loss": 23.5563, "step": 10240 }, { "epoch": 81.35094715852442, "grad_norm": 130.21266174316406, "learning_rate": 3.2335336332858506e-07, "loss": 22.0906, "step": 10250 }, { "epoch": 81.43070787637089, "grad_norm": 232.81362915039062, "learning_rate": 3.230547656739554e-07, "loss": 25.2647, "step": 10260 }, { "epoch": 81.51046859421734, "grad_norm": 588.8156127929688, "learning_rate": 3.227560540278798e-07, "loss": 25.4037, "step": 10270 }, { "epoch": 81.59022931206381, "grad_norm": 302.1634216308594, "learning_rate": 3.2245722885645496e-07, "loss": 24.214, "step": 10280 }, { "epoch": 81.66999002991027, "grad_norm": 87.46845245361328, "learning_rate": 3.2215829062595445e-07, "loss": 24.0789, "step": 10290 }, { "epoch": 81.74975074775674, "grad_norm": 280.2633361816406, "learning_rate": 3.2185923980282835e-07, "loss": 23.065, "step": 10300 }, { "epoch": 81.74975074775674, "eval_loss": 3.0380895137786865, "eval_mae": 1.309239149093628, "eval_mse": 3.0380895137786865, "eval_r2": 0.08463847637176514, "eval_rmse": 1.7430116218140046, "eval_runtime": 9.0729, "eval_samples_per_second": 442.087, "eval_steps_per_second": 13.888, "step": 10300 }, { "epoch": 81.82951146560319, "grad_norm": 113.61293029785156, "learning_rate": 3.2156007685370247e-07, "loss": 24.1365, "step": 10310 }, { "epoch": 81.90927218344964, "grad_norm": 299.0325927734375, "learning_rate": 3.2126080224537745e-07, "loss": 26.3868, "step": 10320 }, { "epoch": 81.98903290129611, "grad_norm": 180.27159118652344, "learning_rate": 3.2096141644482834e-07, "loss": 25.2489, "step": 10330 }, { "epoch": 82.06380857427718, "grad_norm": 373.5345458984375, "learning_rate": 3.206619199192034e-07, "loss": 24.5, "step": 10340 }, { "epoch": 82.14356929212363, "grad_norm": 415.2379455566406, "learning_rate": 3.203623131358241e-07, "loss": 23.3937, "step": 10350 }, { "epoch": 82.22333000997008, "grad_norm": 296.4977722167969, "learning_rate": 3.2006259656218346e-07, "loss": 25.374, "step": 10360 }, { "epoch": 82.30309072781655, "grad_norm": 94.92182159423828, "learning_rate": 3.1976277066594624e-07, "loss": 25.1925, "step": 10370 }, { "epoch": 82.38285144566301, "grad_norm": 91.66964721679688, "learning_rate": 3.194628359149475e-07, "loss": 24.9719, "step": 10380 }, { "epoch": 82.46261216350948, "grad_norm": 351.6086120605469, "learning_rate": 3.1916279277719225e-07, "loss": 23.6032, "step": 10390 }, { "epoch": 82.54237288135593, "grad_norm": 298.90740966796875, "learning_rate": 3.1886264172085467e-07, "loss": 25.0727, "step": 10400 }, { "epoch": 82.54237288135593, "eval_loss": 3.036163806915283, "eval_mae": 1.3075165748596191, "eval_mse": 3.036163806915283, "eval_r2": 0.08521872758865356, "eval_rmse": 1.7424591263255742, "eval_runtime": 9.072, "eval_samples_per_second": 442.131, "eval_steps_per_second": 13.889, "step": 10400 }, { "epoch": 82.6221335992024, "grad_norm": 515.6729125976562, "learning_rate": 3.185623832142771e-07, "loss": 25.3068, "step": 10410 }, { "epoch": 82.70189431704885, "grad_norm": 76.53244018554688, "learning_rate": 3.182620177259699e-07, "loss": 25.9312, "step": 10420 }, { "epoch": 82.78165503489531, "grad_norm": 468.8350830078125, "learning_rate": 3.179615457246102e-07, "loss": 23.6445, "step": 10430 }, { "epoch": 82.86141575274178, "grad_norm": 167.1678924560547, "learning_rate": 3.1766096767904113e-07, "loss": 23.0468, "step": 10440 }, { "epoch": 82.94117647058823, "grad_norm": 134.3607177734375, "learning_rate": 3.1736028405827163e-07, "loss": 25.7779, "step": 10450 }, { "epoch": 83.0159521435693, "grad_norm": 102.10858154296875, "learning_rate": 3.1705949533147515e-07, "loss": 21.8969, "step": 10460 }, { "epoch": 83.09571286141575, "grad_norm": 204.12930297851562, "learning_rate": 3.167586019679891e-07, "loss": 23.8331, "step": 10470 }, { "epoch": 83.17547357926222, "grad_norm": 258.52239990234375, "learning_rate": 3.1645760443731444e-07, "loss": 24.0841, "step": 10480 }, { "epoch": 83.25523429710867, "grad_norm": 129.42250061035156, "learning_rate": 3.161565032091144e-07, "loss": 25.0963, "step": 10490 }, { "epoch": 83.33499501495514, "grad_norm": 234.62936401367188, "learning_rate": 3.1585529875321406e-07, "loss": 25.2912, "step": 10500 }, { "epoch": 83.33499501495514, "eval_loss": 3.034513473510742, "eval_mae": 1.3175091743469238, "eval_mse": 3.034513473510742, "eval_r2": 0.08571594953536987, "eval_rmse": 1.7419854975029907, "eval_runtime": 9.0687, "eval_samples_per_second": 442.289, "eval_steps_per_second": 13.894, "step": 10500 }, { "epoch": 83.4147557328016, "grad_norm": 234.0610809326172, "learning_rate": 3.155539915395997e-07, "loss": 24.0329, "step": 10510 }, { "epoch": 83.49451645064805, "grad_norm": 201.95191955566406, "learning_rate": 3.1525258203841783e-07, "loss": 25.5342, "step": 10520 }, { "epoch": 83.57427716849452, "grad_norm": 485.1669921875, "learning_rate": 3.149510707199745e-07, "loss": 23.8518, "step": 10530 }, { "epoch": 83.65403788634097, "grad_norm": 212.77342224121094, "learning_rate": 3.1464945805473487e-07, "loss": 26.4807, "step": 10540 }, { "epoch": 83.73379860418744, "grad_norm": 329.3431091308594, "learning_rate": 3.1434774451332206e-07, "loss": 26.6428, "step": 10550 }, { "epoch": 83.8135593220339, "grad_norm": 158.35450744628906, "learning_rate": 3.1404593056651654e-07, "loss": 23.2614, "step": 10560 }, { "epoch": 83.89332003988036, "grad_norm": 67.29667663574219, "learning_rate": 3.1374401668525554e-07, "loss": 22.8227, "step": 10570 }, { "epoch": 83.97308075772682, "grad_norm": 208.1637725830078, "learning_rate": 3.134420033406322e-07, "loss": 24.3643, "step": 10580 }, { "epoch": 84.04785643070788, "grad_norm": 71.72771453857422, "learning_rate": 3.1313989100389496e-07, "loss": 22.6355, "step": 10590 }, { "epoch": 84.12761714855434, "grad_norm": 177.875, "learning_rate": 3.1283768014644653e-07, "loss": 26.4483, "step": 10600 }, { "epoch": 84.12761714855434, "eval_loss": 3.030277967453003, "eval_mae": 1.312751054763794, "eval_mse": 3.030277967453003, "eval_r2": 0.08699202537536621, "eval_rmse": 1.7407693607864894, "eval_runtime": 9.0905, "eval_samples_per_second": 441.231, "eval_steps_per_second": 13.861, "step": 10600 }, { "epoch": 84.2073778664008, "grad_norm": 70.78500366210938, "learning_rate": 3.1253537123984363e-07, "loss": 23.6658, "step": 10610 }, { "epoch": 84.28713858424726, "grad_norm": 414.3121643066406, "learning_rate": 3.122329647557955e-07, "loss": 27.3614, "step": 10620 }, { "epoch": 84.36689930209371, "grad_norm": 101.00165557861328, "learning_rate": 3.1193046116616425e-07, "loss": 25.9633, "step": 10630 }, { "epoch": 84.44666001994018, "grad_norm": 600.6487426757812, "learning_rate": 3.116278609429629e-07, "loss": 25.1654, "step": 10640 }, { "epoch": 84.52642073778664, "grad_norm": 221.32412719726562, "learning_rate": 3.1132516455835576e-07, "loss": 24.6665, "step": 10650 }, { "epoch": 84.6061814556331, "grad_norm": 91.79109954833984, "learning_rate": 3.1102237248465686e-07, "loss": 24.0079, "step": 10660 }, { "epoch": 84.68594217347956, "grad_norm": 486.3948669433594, "learning_rate": 3.107194851943297e-07, "loss": 23.8476, "step": 10670 }, { "epoch": 84.76570289132601, "grad_norm": 155.642333984375, "learning_rate": 3.104165031599861e-07, "loss": 21.3082, "step": 10680 }, { "epoch": 84.84546360917248, "grad_norm": 319.09063720703125, "learning_rate": 3.1011342685438625e-07, "loss": 22.9344, "step": 10690 }, { "epoch": 84.92522432701894, "grad_norm": 218.4431915283203, "learning_rate": 3.0981025675043675e-07, "loss": 25.8942, "step": 10700 }, { "epoch": 84.92522432701894, "eval_loss": 3.028160810470581, "eval_mae": 1.3118433952331543, "eval_mse": 3.028160810470581, "eval_r2": 0.08762991428375244, "eval_rmse": 1.7401611449720917, "eval_runtime": 9.0799, "eval_samples_per_second": 441.745, "eval_steps_per_second": 13.877, "step": 10700 }, { "epoch": 85.0, "grad_norm": 211.14683532714844, "learning_rate": 3.095069933211912e-07, "loss": 23.4049, "step": 10710 }, { "epoch": 85.07976071784645, "grad_norm": 242.6691436767578, "learning_rate": 3.092036370398484e-07, "loss": 25.8015, "step": 10720 }, { "epoch": 85.15952143569292, "grad_norm": 625.7755737304688, "learning_rate": 3.0890018837975215e-07, "loss": 24.2249, "step": 10730 }, { "epoch": 85.23928215353938, "grad_norm": 193.0255584716797, "learning_rate": 3.0859664781439037e-07, "loss": 23.4788, "step": 10740 }, { "epoch": 85.31904287138585, "grad_norm": 272.1832275390625, "learning_rate": 3.082930158173945e-07, "loss": 25.8466, "step": 10750 }, { "epoch": 85.3988035892323, "grad_norm": 408.71661376953125, "learning_rate": 3.079892928625385e-07, "loss": 25.5229, "step": 10760 }, { "epoch": 85.47856430707877, "grad_norm": 426.63787841796875, "learning_rate": 3.076854794237384e-07, "loss": 25.0958, "step": 10770 }, { "epoch": 85.55832502492522, "grad_norm": 184.2642364501953, "learning_rate": 3.0738157597505127e-07, "loss": 23.289, "step": 10780 }, { "epoch": 85.63808574277168, "grad_norm": 945.2476196289062, "learning_rate": 3.070775829906747e-07, "loss": 23.4057, "step": 10790 }, { "epoch": 85.71784646061815, "grad_norm": 553.4732666015625, "learning_rate": 3.06773500944946e-07, "loss": 24.8294, "step": 10800 }, { "epoch": 85.71784646061815, "eval_loss": 3.0269877910614014, "eval_mae": 1.3075300455093384, "eval_mse": 3.0269877910614014, "eval_r2": 0.08798336982727051, "eval_rmse": 1.7398240689970355, "eval_runtime": 9.0905, "eval_samples_per_second": 441.23, "eval_steps_per_second": 13.861, "step": 10800 }, { "epoch": 85.7976071784646, "grad_norm": 112.29386138916016, "learning_rate": 3.064693303123415e-07, "loss": 24.8945, "step": 10810 }, { "epoch": 85.87736789631107, "grad_norm": 720.1716918945312, "learning_rate": 3.061650715674755e-07, "loss": 24.4036, "step": 10820 }, { "epoch": 85.95712861415753, "grad_norm": 325.7308349609375, "learning_rate": 3.058911637534923e-07, "loss": 24.3189, "step": 10830 }, { "epoch": 86.03190428713859, "grad_norm": 230.79962158203125, "learning_rate": 3.055867389033845e-07, "loss": 21.9985, "step": 10840 }, { "epoch": 86.11166500498504, "grad_norm": 496.41143798828125, "learning_rate": 3.052822273181721e-07, "loss": 26.2874, "step": 10850 }, { "epoch": 86.19142572283151, "grad_norm": 704.2967529296875, "learning_rate": 3.049776294730017e-07, "loss": 24.9313, "step": 10860 }, { "epoch": 86.27118644067797, "grad_norm": 557.0463256835938, "learning_rate": 3.0467294584315436e-07, "loss": 24.8474, "step": 10870 }, { "epoch": 86.35094715852442, "grad_norm": 339.0418395996094, "learning_rate": 3.043681769040449e-07, "loss": 25.2613, "step": 10880 }, { "epoch": 86.43070787637089, "grad_norm": 437.7363586425781, "learning_rate": 3.040633231312216e-07, "loss": 23.9345, "step": 10890 }, { "epoch": 86.51046859421734, "grad_norm": 398.3165283203125, "learning_rate": 3.0375838500036484e-07, "loss": 24.2951, "step": 10900 }, { "epoch": 86.51046859421734, "eval_loss": 3.0261216163635254, "eval_mae": 1.307518720626831, "eval_mse": 3.0261216163635254, "eval_r2": 0.08824437856674194, "eval_rmse": 1.7395751252428064, "eval_runtime": 9.1038, "eval_samples_per_second": 440.583, "eval_steps_per_second": 13.84, "step": 10900 }, { "epoch": 86.59022931206381, "grad_norm": 95.45066833496094, "learning_rate": 3.0345336298728646e-07, "loss": 23.1709, "step": 10910 }, { "epoch": 86.66999002991027, "grad_norm": 105.39664459228516, "learning_rate": 3.031482575679297e-07, "loss": 24.337, "step": 10920 }, { "epoch": 86.74975074775674, "grad_norm": 149.99327087402344, "learning_rate": 3.028430692183675e-07, "loss": 22.713, "step": 10930 }, { "epoch": 86.82951146560319, "grad_norm": 363.6456604003906, "learning_rate": 3.025377984148023e-07, "loss": 24.9756, "step": 10940 }, { "epoch": 86.90927218344964, "grad_norm": 639.7531127929688, "learning_rate": 3.0223244563356535e-07, "loss": 24.3099, "step": 10950 }, { "epoch": 86.98903290129611, "grad_norm": 609.5497436523438, "learning_rate": 3.0192701135111576e-07, "loss": 26.0549, "step": 10960 }, { "epoch": 87.06380857427718, "grad_norm": 71.53561401367188, "learning_rate": 3.0162149604403957e-07, "loss": 26.1172, "step": 10970 }, { "epoch": 87.14356929212363, "grad_norm": 281.8210754394531, "learning_rate": 3.0131590018904955e-07, "loss": 24.8328, "step": 10980 }, { "epoch": 87.22333000997008, "grad_norm": 528.9259033203125, "learning_rate": 3.010102242629841e-07, "loss": 23.5778, "step": 10990 }, { "epoch": 87.30309072781655, "grad_norm": 221.66729736328125, "learning_rate": 3.007044687428064e-07, "loss": 22.4509, "step": 11000 }, { "epoch": 87.30309072781655, "eval_loss": 3.0431015491485596, "eval_mae": 1.3019126653671265, "eval_mse": 3.0431013107299805, "eval_r2": 0.08312851190567017, "eval_rmse": 1.7444487125536194, "eval_runtime": 9.0783, "eval_samples_per_second": 441.823, "eval_steps_per_second": 13.879, "step": 11000 }, { "epoch": 87.38285144566301, "grad_norm": 143.44786071777344, "learning_rate": 3.003986341056038e-07, "loss": 25.9225, "step": 11010 }, { "epoch": 87.46261216350948, "grad_norm": 234.01194763183594, "learning_rate": 3.000927208285876e-07, "loss": 24.0189, "step": 11020 }, { "epoch": 87.54237288135593, "grad_norm": 247.8142852783203, "learning_rate": 2.997867293890912e-07, "loss": 24.129, "step": 11030 }, { "epoch": 87.6221335992024, "grad_norm": 198.8553924560547, "learning_rate": 2.994806602645702e-07, "loss": 24.8126, "step": 11040 }, { "epoch": 87.70189431704885, "grad_norm": 483.2439880371094, "learning_rate": 2.991745139326014e-07, "loss": 23.9214, "step": 11050 }, { "epoch": 87.78165503489531, "grad_norm": 314.6399841308594, "learning_rate": 2.988682908708822e-07, "loss": 23.0557, "step": 11060 }, { "epoch": 87.86141575274178, "grad_norm": 471.4364929199219, "learning_rate": 2.985619915572293e-07, "loss": 25.7889, "step": 11070 }, { "epoch": 87.94117647058823, "grad_norm": 498.03887939453125, "learning_rate": 2.9825561646957914e-07, "loss": 25.6226, "step": 11080 }, { "epoch": 88.0159521435693, "grad_norm": 281.46551513671875, "learning_rate": 2.9794916608598563e-07, "loss": 21.588, "step": 11090 }, { "epoch": 88.09571286141575, "grad_norm": 768.0538940429688, "learning_rate": 2.9764264088462057e-07, "loss": 23.5801, "step": 11100 }, { "epoch": 88.09571286141575, "eval_loss": 3.024217128753662, "eval_mae": 1.3123266696929932, "eval_mse": 3.024217128753662, "eval_r2": 0.08881819248199463, "eval_rmse": 1.7390276388699697, "eval_runtime": 9.0429, "eval_samples_per_second": 443.554, "eval_steps_per_second": 13.934, "step": 11100 }, { "epoch": 88.17547357926222, "grad_norm": 146.67355346679688, "learning_rate": 2.9733604134377237e-07, "loss": 26.1583, "step": 11110 }, { "epoch": 88.25523429710867, "grad_norm": 307.28668212890625, "learning_rate": 2.970293679418456e-07, "loss": 26.4266, "step": 11120 }, { "epoch": 88.33499501495514, "grad_norm": 218.9013214111328, "learning_rate": 2.967226211573598e-07, "loss": 24.7804, "step": 11130 }, { "epoch": 88.4147557328016, "grad_norm": 410.3785705566406, "learning_rate": 2.964158014689493e-07, "loss": 23.9603, "step": 11140 }, { "epoch": 88.49451645064805, "grad_norm": 278.336669921875, "learning_rate": 2.961089093553619e-07, "loss": 26.3096, "step": 11150 }, { "epoch": 88.57427716849452, "grad_norm": 621.4981689453125, "learning_rate": 2.9580194529545886e-07, "loss": 23.0465, "step": 11160 }, { "epoch": 88.65403788634097, "grad_norm": 165.18826293945312, "learning_rate": 2.95494909768213e-07, "loss": 23.998, "step": 11170 }, { "epoch": 88.73379860418744, "grad_norm": 293.3717041015625, "learning_rate": 2.9518780325270943e-07, "loss": 23.3783, "step": 11180 }, { "epoch": 88.8135593220339, "grad_norm": 271.273193359375, "learning_rate": 2.948806262281435e-07, "loss": 24.6496, "step": 11190 }, { "epoch": 88.89332003988036, "grad_norm": 340.6837158203125, "learning_rate": 2.9457337917382074e-07, "loss": 25.3883, "step": 11200 }, { "epoch": 88.89332003988036, "eval_loss": 3.0350711345672607, "eval_mae": 1.3221527338027954, "eval_mse": 3.03507137298584, "eval_r2": 0.08554786443710327, "eval_rmse": 1.7421456233581163, "eval_runtime": 9.137, "eval_samples_per_second": 438.983, "eval_steps_per_second": 13.79, "step": 11200 }, { "epoch": 88.97308075772682, "grad_norm": 373.01025390625, "learning_rate": 2.9426606256915596e-07, "loss": 22.5712, "step": 11210 }, { "epoch": 89.04785643070788, "grad_norm": 82.71126556396484, "learning_rate": 2.9395867689367247e-07, "loss": 26.1899, "step": 11220 }, { "epoch": 89.12761714855434, "grad_norm": 524.1490478515625, "learning_rate": 2.936512226270015e-07, "loss": 25.1711, "step": 11230 }, { "epoch": 89.2073778664008, "grad_norm": 611.0742797851562, "learning_rate": 2.9334370024888087e-07, "loss": 24.6458, "step": 11240 }, { "epoch": 89.28713858424726, "grad_norm": 149.1772003173828, "learning_rate": 2.9303611023915536e-07, "loss": 23.5635, "step": 11250 }, { "epoch": 89.36689930209371, "grad_norm": 138.20068359375, "learning_rate": 2.9272845307777467e-07, "loss": 24.7863, "step": 11260 }, { "epoch": 89.44666001994018, "grad_norm": 285.6581115722656, "learning_rate": 2.9242072924479355e-07, "loss": 25.1381, "step": 11270 }, { "epoch": 89.52642073778664, "grad_norm": 210.21249389648438, "learning_rate": 2.921129392203707e-07, "loss": 26.7203, "step": 11280 }, { "epoch": 89.6061814556331, "grad_norm": 69.21088409423828, "learning_rate": 2.9180508348476836e-07, "loss": 23.4501, "step": 11290 }, { "epoch": 89.68594217347956, "grad_norm": 387.38494873046875, "learning_rate": 2.914971625183509e-07, "loss": 23.5194, "step": 11300 }, { "epoch": 89.68594217347956, "eval_loss": 3.022109031677246, "eval_mae": 1.3071075677871704, "eval_mse": 3.022109270095825, "eval_r2": 0.08945322036743164, "eval_rmse": 1.7384214880447795, "eval_runtime": 9.0702, "eval_samples_per_second": 442.218, "eval_steps_per_second": 13.892, "step": 11300 }, { "epoch": 89.76570289132601, "grad_norm": 171.12950134277344, "learning_rate": 2.9118917680158463e-07, "loss": 23.5455, "step": 11310 }, { "epoch": 89.84546360917248, "grad_norm": 144.1996612548828, "learning_rate": 2.90881126815037e-07, "loss": 22.2914, "step": 11320 }, { "epoch": 89.92522432701894, "grad_norm": 207.52590942382812, "learning_rate": 2.905730130393757e-07, "loss": 23.5734, "step": 11330 }, { "epoch": 90.0, "grad_norm": 225.84323120117188, "learning_rate": 2.9026483595536795e-07, "loss": 23.3279, "step": 11340 }, { "epoch": 90.07976071784645, "grad_norm": 454.1545104980469, "learning_rate": 2.8995659604387967e-07, "loss": 24.7852, "step": 11350 }, { "epoch": 90.15952143569292, "grad_norm": 412.3381042480469, "learning_rate": 2.8964829378587484e-07, "loss": 26.0871, "step": 11360 }, { "epoch": 90.23928215353938, "grad_norm": 111.9667739868164, "learning_rate": 2.8933992966241484e-07, "loss": 24.6989, "step": 11370 }, { "epoch": 90.31904287138585, "grad_norm": 1320.287841796875, "learning_rate": 2.8903150415465735e-07, "loss": 24.0967, "step": 11380 }, { "epoch": 90.3988035892323, "grad_norm": 128.83531188964844, "learning_rate": 2.887230177438563e-07, "loss": 24.6157, "step": 11390 }, { "epoch": 90.47856430707877, "grad_norm": 792.4503784179688, "learning_rate": 2.8841447091135985e-07, "loss": 25.8294, "step": 11400 }, { "epoch": 90.47856430707877, "eval_loss": 3.02121639251709, "eval_mae": 1.3112235069274902, "eval_mse": 3.0212161540985107, "eval_r2": 0.0897223949432373, "eval_rmse": 1.7381645935004288, "eval_runtime": 9.0986, "eval_samples_per_second": 440.837, "eval_steps_per_second": 13.848, "step": 11400 }, { "epoch": 90.55832502492522, "grad_norm": 716.572509765625, "learning_rate": 2.881058641386113e-07, "loss": 24.2307, "step": 11410 }, { "epoch": 90.63808574277168, "grad_norm": 1163.026611328125, "learning_rate": 2.8779719790714695e-07, "loss": 24.2046, "step": 11420 }, { "epoch": 90.71784646061815, "grad_norm": 120.25177001953125, "learning_rate": 2.8748847269859604e-07, "loss": 24.3986, "step": 11430 }, { "epoch": 90.7976071784646, "grad_norm": 140.4076385498047, "learning_rate": 2.871796889946799e-07, "loss": 23.751, "step": 11440 }, { "epoch": 90.87736789631107, "grad_norm": 162.21217346191406, "learning_rate": 2.868708472772109e-07, "loss": 23.8611, "step": 11450 }, { "epoch": 90.95712861415753, "grad_norm": 423.1454162597656, "learning_rate": 2.865619480280922e-07, "loss": 23.9691, "step": 11460 }, { "epoch": 91.03190428713859, "grad_norm": 100.34803771972656, "learning_rate": 2.862529917293165e-07, "loss": 21.8766, "step": 11470 }, { "epoch": 91.11166500498504, "grad_norm": 195.40789794921875, "learning_rate": 2.8594397886296575e-07, "loss": 24.3027, "step": 11480 }, { "epoch": 91.19142572283151, "grad_norm": 97.23872375488281, "learning_rate": 2.8563490991121006e-07, "loss": 23.9407, "step": 11490 }, { "epoch": 91.27118644067797, "grad_norm": 614.5779418945312, "learning_rate": 2.8532578535630683e-07, "loss": 22.0744, "step": 11500 }, { "epoch": 91.27118644067797, "eval_loss": 3.0190556049346924, "eval_mae": 1.3079864978790283, "eval_mse": 3.0190556049346924, "eval_r2": 0.09037333726882935, "eval_rmse": 1.7375429793057473, "eval_runtime": 9.0572, "eval_samples_per_second": 442.854, "eval_steps_per_second": 13.912, "step": 11500 }, { "epoch": 91.35094715852442, "grad_norm": 434.12109375, "learning_rate": 2.8501660568060073e-07, "loss": 25.5922, "step": 11510 }, { "epoch": 91.43070787637089, "grad_norm": 52.7608642578125, "learning_rate": 2.8470737136652186e-07, "loss": 24.937, "step": 11520 }, { "epoch": 91.51046859421734, "grad_norm": 124.67076110839844, "learning_rate": 2.84398082896586e-07, "loss": 23.3723, "step": 11530 }, { "epoch": 91.59022931206381, "grad_norm": 158.92762756347656, "learning_rate": 2.8408874075339333e-07, "loss": 24.7098, "step": 11540 }, { "epoch": 91.66999002991027, "grad_norm": 210.74998474121094, "learning_rate": 2.8377934541962766e-07, "loss": 24.8712, "step": 11550 }, { "epoch": 91.74975074775674, "grad_norm": 453.7698974609375, "learning_rate": 2.834698973780558e-07, "loss": 23.5147, "step": 11560 }, { "epoch": 91.82951146560319, "grad_norm": 74.43639373779297, "learning_rate": 2.831603971115271e-07, "loss": 22.7803, "step": 11570 }, { "epoch": 91.90927218344964, "grad_norm": 875.6751098632812, "learning_rate": 2.8285084510297196e-07, "loss": 26.8268, "step": 11580 }, { "epoch": 91.98903290129611, "grad_norm": 164.56968688964844, "learning_rate": 2.8254124183540185e-07, "loss": 26.4578, "step": 11590 }, { "epoch": 92.06380857427718, "grad_norm": 126.27661895751953, "learning_rate": 2.82231587791908e-07, "loss": 22.2847, "step": 11600 }, { "epoch": 92.06380857427718, "eval_loss": 3.0181376934051514, "eval_mae": 1.30584716796875, "eval_mse": 3.0181376934051514, "eval_r2": 0.09064990282058716, "eval_rmse": 1.7372788185565238, "eval_runtime": 9.0573, "eval_samples_per_second": 442.849, "eval_steps_per_second": 13.911, "step": 11600 }, { "epoch": 92.14356929212363, "grad_norm": 81.6953353881836, "learning_rate": 2.819218834556611e-07, "loss": 25.436, "step": 11610 }, { "epoch": 92.22333000997008, "grad_norm": 546.46044921875, "learning_rate": 2.8161212930991e-07, "loss": 24.1348, "step": 11620 }, { "epoch": 92.30309072781655, "grad_norm": 314.1280212402344, "learning_rate": 2.8130232583798163e-07, "loss": 25.676, "step": 11630 }, { "epoch": 92.38285144566301, "grad_norm": 1007.1983642578125, "learning_rate": 2.809924735232797e-07, "loss": 24.7171, "step": 11640 }, { "epoch": 92.46261216350948, "grad_norm": 263.078125, "learning_rate": 2.806825728492841e-07, "loss": 23.8003, "step": 11650 }, { "epoch": 92.54237288135593, "grad_norm": 202.86151123046875, "learning_rate": 2.803726242995501e-07, "loss": 23.4182, "step": 11660 }, { "epoch": 92.6221335992024, "grad_norm": 133.31997680664062, "learning_rate": 2.8006262835770803e-07, "loss": 23.3144, "step": 11670 }, { "epoch": 92.70189431704885, "grad_norm": 346.8004150390625, "learning_rate": 2.797525855074618e-07, "loss": 23.841, "step": 11680 }, { "epoch": 92.78165503489531, "grad_norm": 271.6881103515625, "learning_rate": 2.7944249623258865e-07, "loss": 24.4543, "step": 11690 }, { "epoch": 92.86141575274178, "grad_norm": 335.6261901855469, "learning_rate": 2.7913236101693825e-07, "loss": 25.058, "step": 11700 }, { "epoch": 92.86141575274178, "eval_loss": 3.0201170444488525, "eval_mae": 1.3131814002990723, "eval_mse": 3.0201170444488525, "eval_r2": 0.0900534987449646, "eval_rmse": 1.7378483951279675, "eval_runtime": 9.0569, "eval_samples_per_second": 442.868, "eval_steps_per_second": 13.912, "step": 11700 }, { "epoch": 92.94117647058823, "grad_norm": 81.703857421875, "learning_rate": 2.7882218034443205e-07, "loss": 25.6484, "step": 11710 }, { "epoch": 93.0159521435693, "grad_norm": 325.6644592285156, "learning_rate": 2.7851195469906224e-07, "loss": 22.4518, "step": 11720 }, { "epoch": 93.09571286141575, "grad_norm": 160.6322784423828, "learning_rate": 2.7820168456489137e-07, "loss": 25.4646, "step": 11730 }, { "epoch": 93.17547357926222, "grad_norm": 157.33323669433594, "learning_rate": 2.778913704260512e-07, "loss": 23.0021, "step": 11740 }, { "epoch": 93.25523429710867, "grad_norm": 486.8304748535156, "learning_rate": 2.775810127667425e-07, "loss": 24.9866, "step": 11750 }, { "epoch": 93.33499501495514, "grad_norm": 686.371826171875, "learning_rate": 2.772706120712333e-07, "loss": 26.729, "step": 11760 }, { "epoch": 93.4147557328016, "grad_norm": 607.1654663085938, "learning_rate": 2.7696016882385965e-07, "loss": 25.4789, "step": 11770 }, { "epoch": 93.49451645064805, "grad_norm": 944.3500366210938, "learning_rate": 2.766496835090234e-07, "loss": 23.3535, "step": 11780 }, { "epoch": 93.57427716849452, "grad_norm": 238.7883758544922, "learning_rate": 2.763391566111921e-07, "loss": 22.0236, "step": 11790 }, { "epoch": 93.65403788634097, "grad_norm": 242.50460815429688, "learning_rate": 2.7602858861489836e-07, "loss": 25.0649, "step": 11800 }, { "epoch": 93.65403788634097, "eval_loss": 3.016071081161499, "eval_mae": 1.3057998418807983, "eval_mse": 3.016071319580078, "eval_r2": 0.09127247333526611, "eval_rmse": 1.7366840010721807, "eval_runtime": 9.0577, "eval_samples_per_second": 442.826, "eval_steps_per_second": 13.911, "step": 11800 }, { "epoch": 93.73379860418744, "grad_norm": 610.6392822265625, "learning_rate": 2.757179800047388e-07, "loss": 25.7441, "step": 11810 }, { "epoch": 93.8135593220339, "grad_norm": 132.06114196777344, "learning_rate": 2.7540733126537347e-07, "loss": 23.5077, "step": 11820 }, { "epoch": 93.89332003988036, "grad_norm": 398.90533447265625, "learning_rate": 2.75096642881525e-07, "loss": 26.1888, "step": 11830 }, { "epoch": 93.97308075772682, "grad_norm": 99.74539184570312, "learning_rate": 2.747859153379779e-07, "loss": 22.6881, "step": 11840 }, { "epoch": 94.04785643070788, "grad_norm": 413.5274963378906, "learning_rate": 2.744751491195778e-07, "loss": 22.3016, "step": 11850 }, { "epoch": 94.12761714855434, "grad_norm": 133.99134826660156, "learning_rate": 2.741643447112306e-07, "loss": 24.7051, "step": 11860 }, { "epoch": 94.2073778664008, "grad_norm": 280.4942321777344, "learning_rate": 2.7385350259790174e-07, "loss": 24.2716, "step": 11870 }, { "epoch": 94.28713858424726, "grad_norm": 116.86431884765625, "learning_rate": 2.7354262326461583e-07, "loss": 21.2392, "step": 11880 }, { "epoch": 94.36689930209371, "grad_norm": 579.6485595703125, "learning_rate": 2.732317071964551e-07, "loss": 26.4136, "step": 11890 }, { "epoch": 94.44666001994018, "grad_norm": 457.6625671386719, "learning_rate": 2.7292075487855953e-07, "loss": 23.9248, "step": 11900 }, { "epoch": 94.44666001994018, "eval_loss": 3.0183889865875244, "eval_mae": 1.309451699256897, "eval_mse": 3.0183889865875244, "eval_r2": 0.09057420492172241, "eval_rmse": 1.737351140842727, "eval_runtime": 9.0629, "eval_samples_per_second": 442.573, "eval_steps_per_second": 13.903, "step": 11900 }, { "epoch": 94.52642073778664, "grad_norm": 76.22840881347656, "learning_rate": 2.726097667961253e-07, "loss": 21.309, "step": 11910 }, { "epoch": 94.6061814556331, "grad_norm": 411.46966552734375, "learning_rate": 2.7229874343440457e-07, "loss": 24.1257, "step": 11920 }, { "epoch": 94.68594217347956, "grad_norm": 240.69602966308594, "learning_rate": 2.719876852787045e-07, "loss": 24.3583, "step": 11930 }, { "epoch": 94.76570289132601, "grad_norm": 134.96070861816406, "learning_rate": 2.716765928143867e-07, "loss": 25.1843, "step": 11940 }, { "epoch": 94.84546360917248, "grad_norm": 162.1640167236328, "learning_rate": 2.7136546652686597e-07, "loss": 27.6058, "step": 11950 }, { "epoch": 94.92522432701894, "grad_norm": 493.3135070800781, "learning_rate": 2.710543069016103e-07, "loss": 25.1438, "step": 11960 }, { "epoch": 95.0, "grad_norm": 204.4506378173828, "learning_rate": 2.707431144241394e-07, "loss": 22.6437, "step": 11970 }, { "epoch": 95.07976071784645, "grad_norm": 404.43487548828125, "learning_rate": 2.7043188958002433e-07, "loss": 22.4641, "step": 11980 }, { "epoch": 95.15952143569292, "grad_norm": 255.8114013671875, "learning_rate": 2.701206328548866e-07, "loss": 24.3847, "step": 11990 }, { "epoch": 95.23928215353938, "grad_norm": 99.701416015625, "learning_rate": 2.698093447343977e-07, "loss": 24.5863, "step": 12000 }, { "epoch": 95.23928215353938, "eval_loss": 3.018806219100952, "eval_mae": 1.302077293395996, "eval_mse": 3.018806219100952, "eval_r2": 0.09044843912124634, "eval_rmse": 1.7374712138913129, "eval_runtime": 9.0712, "eval_samples_per_second": 442.166, "eval_steps_per_second": 13.89, "step": 12000 }, { "epoch": 95.31904287138585, "grad_norm": 299.7443542480469, "learning_rate": 2.694980257042778e-07, "loss": 26.4086, "step": 12010 }, { "epoch": 95.3988035892323, "grad_norm": 141.34422302246094, "learning_rate": 2.691866762502954e-07, "loss": 24.9383, "step": 12020 }, { "epoch": 95.47856430707877, "grad_norm": 125.73936462402344, "learning_rate": 2.688752968582666e-07, "loss": 23.8145, "step": 12030 }, { "epoch": 95.55832502492522, "grad_norm": 325.127685546875, "learning_rate": 2.6856388801405417e-07, "loss": 22.7498, "step": 12040 }, { "epoch": 95.63808574277168, "grad_norm": 281.2471008300781, "learning_rate": 2.6825245020356656e-07, "loss": 23.1331, "step": 12050 }, { "epoch": 95.71784646061815, "grad_norm": 340.7892761230469, "learning_rate": 2.679409839127579e-07, "loss": 23.1226, "step": 12060 }, { "epoch": 95.7976071784646, "grad_norm": 173.33863830566406, "learning_rate": 2.676294896276264e-07, "loss": 26.4354, "step": 12070 }, { "epoch": 95.87736789631107, "grad_norm": 119.07532501220703, "learning_rate": 2.67317967834214e-07, "loss": 24.2303, "step": 12080 }, { "epoch": 95.95712861415753, "grad_norm": 769.8329467773438, "learning_rate": 2.670064190186056e-07, "loss": 25.4267, "step": 12090 }, { "epoch": 96.03190428713859, "grad_norm": 285.7257080078125, "learning_rate": 2.666948436669284e-07, "loss": 22.7673, "step": 12100 }, { "epoch": 96.03190428713859, "eval_loss": 3.018017292022705, "eval_mae": 1.3130460977554321, "eval_mse": 3.018017292022705, "eval_r2": 0.0906861424446106, "eval_rmse": 1.7372441659198932, "eval_runtime": 9.0712, "eval_samples_per_second": 442.169, "eval_steps_per_second": 13.89, "step": 12100 }, { "epoch": 96.11166500498504, "grad_norm": 663.724365234375, "learning_rate": 2.663832422653508e-07, "loss": 23.2171, "step": 12110 }, { "epoch": 96.19142572283151, "grad_norm": 966.6635131835938, "learning_rate": 2.66071615300082e-07, "loss": 23.5348, "step": 12120 }, { "epoch": 96.27118644067797, "grad_norm": 242.81092834472656, "learning_rate": 2.6575996325737095e-07, "loss": 24.815, "step": 12130 }, { "epoch": 96.35094715852442, "grad_norm": 310.6782531738281, "learning_rate": 2.654482866235059e-07, "loss": 24.0334, "step": 12140 }, { "epoch": 96.43070787637089, "grad_norm": 231.919921875, "learning_rate": 2.6513658588481316e-07, "loss": 22.9641, "step": 12150 }, { "epoch": 96.51046859421734, "grad_norm": 422.7674560546875, "learning_rate": 2.6482486152765707e-07, "loss": 26.6106, "step": 12160 }, { "epoch": 96.59022931206381, "grad_norm": 187.1031036376953, "learning_rate": 2.6451311403843856e-07, "loss": 24.2406, "step": 12170 }, { "epoch": 96.66999002991027, "grad_norm": 338.5788269042969, "learning_rate": 2.6420134390359473e-07, "loss": 22.6762, "step": 12180 }, { "epoch": 96.74975074775674, "grad_norm": 251.15664672851562, "learning_rate": 2.6388955160959794e-07, "loss": 26.2033, "step": 12190 }, { "epoch": 96.82951146560319, "grad_norm": 271.2379455566406, "learning_rate": 2.6357773764295527e-07, "loss": 26.1962, "step": 12200 }, { "epoch": 96.82951146560319, "eval_loss": 3.0130105018615723, "eval_mae": 1.3046283721923828, "eval_mse": 3.0130105018615723, "eval_r2": 0.09219461679458618, "eval_rmse": 1.735802552671695, "eval_runtime": 9.0648, "eval_samples_per_second": 442.482, "eval_steps_per_second": 13.9, "step": 12200 }, { "epoch": 96.90927218344964, "grad_norm": 424.63641357421875, "learning_rate": 2.632659024902074e-07, "loss": 24.5048, "step": 12210 }, { "epoch": 96.98903290129611, "grad_norm": 644.2691650390625, "learning_rate": 2.629540466379284e-07, "loss": 23.5517, "step": 12220 }, { "epoch": 97.06380857427718, "grad_norm": 154.794921875, "learning_rate": 2.6264217057272414e-07, "loss": 21.9214, "step": 12230 }, { "epoch": 97.14356929212363, "grad_norm": 594.4990234375, "learning_rate": 2.6233027478123266e-07, "loss": 24.9934, "step": 12240 }, { "epoch": 97.22333000997008, "grad_norm": 130.84840393066406, "learning_rate": 2.6201835975012215e-07, "loss": 26.1297, "step": 12250 }, { "epoch": 97.30309072781655, "grad_norm": 130.3658447265625, "learning_rate": 2.617064259660914e-07, "loss": 24.8638, "step": 12260 }, { "epoch": 97.38285144566301, "grad_norm": 263.8542175292969, "learning_rate": 2.613944739158679e-07, "loss": 24.361, "step": 12270 }, { "epoch": 97.46261216350948, "grad_norm": 865.0307006835938, "learning_rate": 2.610825040862081e-07, "loss": 24.5309, "step": 12280 }, { "epoch": 97.54237288135593, "grad_norm": 265.0056457519531, "learning_rate": 2.6077051696389596e-07, "loss": 21.9346, "step": 12290 }, { "epoch": 97.6221335992024, "grad_norm": 288.1367492675781, "learning_rate": 2.604585130357425e-07, "loss": 24.2231, "step": 12300 }, { "epoch": 97.6221335992024, "eval_loss": 3.013296365737915, "eval_mae": 1.3062692880630493, "eval_mse": 3.013295888900757, "eval_r2": 0.09210866689682007, "eval_rmse": 1.735884756802927, "eval_runtime": 9.062, "eval_samples_per_second": 442.62, "eval_steps_per_second": 13.904, "step": 12300 }, { "epoch": 97.70189431704885, "grad_norm": 181.19395446777344, "learning_rate": 2.601464927885848e-07, "loss": 22.9514, "step": 12310 }, { "epoch": 97.78165503489531, "grad_norm": 434.2189025878906, "learning_rate": 2.5983445670928584e-07, "loss": 24.1628, "step": 12320 }, { "epoch": 97.86141575274178, "grad_norm": 972.9837036132812, "learning_rate": 2.595224052847327e-07, "loss": 25.6507, "step": 12330 }, { "epoch": 97.94117647058823, "grad_norm": 75.59383392333984, "learning_rate": 2.5921033900183686e-07, "loss": 24.7835, "step": 12340 }, { "epoch": 98.0159521435693, "grad_norm": 193.68312072753906, "learning_rate": 2.5889825834753275e-07, "loss": 22.2126, "step": 12350 }, { "epoch": 98.09571286141575, "grad_norm": 606.6470336914062, "learning_rate": 2.5858616380877744e-07, "loss": 26.1878, "step": 12360 }, { "epoch": 98.17547357926222, "grad_norm": 162.39686584472656, "learning_rate": 2.582740558725495e-07, "loss": 25.0336, "step": 12370 }, { "epoch": 98.25523429710867, "grad_norm": 252.45741271972656, "learning_rate": 2.579619350258482e-07, "loss": 23.3831, "step": 12380 }, { "epoch": 98.33499501495514, "grad_norm": 379.183837890625, "learning_rate": 2.576498017556936e-07, "loss": 25.4473, "step": 12390 }, { "epoch": 98.4147557328016, "grad_norm": 163.36947631835938, "learning_rate": 2.5733765654912444e-07, "loss": 23.0562, "step": 12400 }, { "epoch": 98.4147557328016, "eval_loss": 3.018754482269287, "eval_mae": 1.313119888305664, "eval_mse": 3.018754243850708, "eval_r2": 0.09046405553817749, "eval_rmse": 1.737456256672584, "eval_runtime": 9.0448, "eval_samples_per_second": 443.459, "eval_steps_per_second": 13.931, "step": 12400 }, { "epoch": 98.49451645064805, "grad_norm": 354.3284912109375, "learning_rate": 2.5702549989319846e-07, "loss": 25.1572, "step": 12410 }, { "epoch": 98.57427716849452, "grad_norm": 224.61061096191406, "learning_rate": 2.5671333227499114e-07, "loss": 22.5498, "step": 12420 }, { "epoch": 98.65403788634097, "grad_norm": 147.78477478027344, "learning_rate": 2.564011541815953e-07, "loss": 24.1911, "step": 12430 }, { "epoch": 98.73379860418744, "grad_norm": 600.3642578125, "learning_rate": 2.560889661001197e-07, "loss": 22.6389, "step": 12440 }, { "epoch": 98.8135593220339, "grad_norm": 491.30615234375, "learning_rate": 2.5577676851768897e-07, "loss": 24.767, "step": 12450 }, { "epoch": 98.89332003988036, "grad_norm": 287.7174377441406, "learning_rate": 2.554645619214426e-07, "loss": 25.9285, "step": 12460 }, { "epoch": 98.97308075772682, "grad_norm": 302.0243835449219, "learning_rate": 2.5515234679853384e-07, "loss": 25.2223, "step": 12470 }, { "epoch": 99.04785643070788, "grad_norm": 507.0435791015625, "learning_rate": 2.548401236361296e-07, "loss": 22.9346, "step": 12480 }, { "epoch": 99.12761714855434, "grad_norm": 666.8696899414062, "learning_rate": 2.545278929214092e-07, "loss": 23.28, "step": 12490 }, { "epoch": 99.2073778664008, "grad_norm": 985.0255737304688, "learning_rate": 2.5421565514156363e-07, "loss": 24.0838, "step": 12500 }, { "epoch": 99.2073778664008, "eval_loss": 3.0152266025543213, "eval_mae": 1.3043137788772583, "eval_mse": 3.0152266025543213, "eval_r2": 0.09152692556381226, "eval_rmse": 1.7364407857898068, "eval_runtime": 9.0555, "eval_samples_per_second": 442.934, "eval_steps_per_second": 13.914, "step": 12500 }, { "epoch": 99.28713858424726, "grad_norm": 222.13906860351562, "learning_rate": 2.53903410783795e-07, "loss": 25.2658, "step": 12510 }, { "epoch": 99.36689930209371, "grad_norm": 152.34921264648438, "learning_rate": 2.535911603353158e-07, "loss": 24.5804, "step": 12520 }, { "epoch": 99.44666001994018, "grad_norm": 160.31858825683594, "learning_rate": 2.5327890428334787e-07, "loss": 23.144, "step": 12530 }, { "epoch": 99.52642073778664, "grad_norm": 447.6943359375, "learning_rate": 2.529666431151216e-07, "loss": 24.772, "step": 12540 }, { "epoch": 99.6061814556331, "grad_norm": 331.9676513671875, "learning_rate": 2.52654377317876e-07, "loss": 26.1398, "step": 12550 }, { "epoch": 99.68594217347956, "grad_norm": 339.2489929199219, "learning_rate": 2.5234210737885657e-07, "loss": 24.3045, "step": 12560 }, { "epoch": 99.76570289132601, "grad_norm": 315.4028625488281, "learning_rate": 2.5202983378531574e-07, "loss": 22.0449, "step": 12570 }, { "epoch": 99.84546360917248, "grad_norm": 145.0469207763672, "learning_rate": 2.5171755702451145e-07, "loss": 24.9994, "step": 12580 }, { "epoch": 99.92522432701894, "grad_norm": 112.26505279541016, "learning_rate": 2.514052775837068e-07, "loss": 24.0593, "step": 12590 }, { "epoch": 100.0, "grad_norm": 175.2232208251953, "learning_rate": 2.510929959501686e-07, "loss": 22.7972, "step": 12600 }, { "epoch": 100.0, "eval_loss": 3.0114493370056152, "eval_mae": 1.3021821975708008, "eval_mse": 3.0114493370056152, "eval_r2": 0.09266507625579834, "eval_rmse": 1.7353527990024435, "eval_runtime": 9.0557, "eval_samples_per_second": 442.926, "eval_steps_per_second": 13.914, "step": 12600 }, { "epoch": 100.07976071784645, "grad_norm": 133.75238037109375, "learning_rate": 2.507807126111676e-07, "loss": 24.7391, "step": 12610 }, { "epoch": 100.15952143569292, "grad_norm": 530.0465698242188, "learning_rate": 2.504684280539769e-07, "loss": 24.52, "step": 12620 }, { "epoch": 100.23928215353938, "grad_norm": 147.8439483642578, "learning_rate": 2.501561427658718e-07, "loss": 23.4067, "step": 12630 }, { "epoch": 100.31904287138585, "grad_norm": 270.1204528808594, "learning_rate": 2.498438572341282e-07, "loss": 24.5654, "step": 12640 }, { "epoch": 100.3988035892323, "grad_norm": 371.5059814453125, "learning_rate": 2.49531571946023e-07, "loss": 23.2518, "step": 12650 }, { "epoch": 100.47856430707877, "grad_norm": 1076.48388671875, "learning_rate": 2.4921928738883245e-07, "loss": 25.6973, "step": 12660 }, { "epoch": 100.55832502492522, "grad_norm": 365.20404052734375, "learning_rate": 2.4890700404983143e-07, "loss": 25.2121, "step": 12670 }, { "epoch": 100.63808574277168, "grad_norm": 402.4895324707031, "learning_rate": 2.4859472241629325e-07, "loss": 23.8951, "step": 12680 }, { "epoch": 100.71784646061815, "grad_norm": 89.0658950805664, "learning_rate": 2.4828244297548847e-07, "loss": 24.1582, "step": 12690 }, { "epoch": 100.7976071784646, "grad_norm": 401.564208984375, "learning_rate": 2.4797016621468424e-07, "loss": 24.6082, "step": 12700 }, { "epoch": 100.7976071784646, "eval_loss": 3.0105321407318115, "eval_mae": 1.3034520149230957, "eval_mse": 3.0105321407318115, "eval_r2": 0.09294140338897705, "eval_rmse": 1.7350885109215066, "eval_runtime": 9.0681, "eval_samples_per_second": 442.319, "eval_steps_per_second": 13.895, "step": 12700 }, { "epoch": 100.87736789631107, "grad_norm": 164.41128540039062, "learning_rate": 2.476578926211434e-07, "loss": 24.2216, "step": 12710 }, { "epoch": 100.95712861415753, "grad_norm": 408.20245361328125, "learning_rate": 2.47345622682124e-07, "loss": 22.3349, "step": 12720 }, { "epoch": 101.03190428713859, "grad_norm": 675.1469116210938, "learning_rate": 2.4703335688487833e-07, "loss": 23.9997, "step": 12730 }, { "epoch": 101.11166500498504, "grad_norm": 650.2384033203125, "learning_rate": 2.467210957166522e-07, "loss": 26.1559, "step": 12740 }, { "epoch": 101.19142572283151, "grad_norm": 754.37939453125, "learning_rate": 2.464088396646842e-07, "loss": 22.9195, "step": 12750 }, { "epoch": 101.27118644067797, "grad_norm": 78.88389587402344, "learning_rate": 2.46096589216205e-07, "loss": 25.7293, "step": 12760 }, { "epoch": 101.35094715852442, "grad_norm": 853.6570434570312, "learning_rate": 2.4578434485843634e-07, "loss": 25.0469, "step": 12770 }, { "epoch": 101.43070787637089, "grad_norm": 112.20699310302734, "learning_rate": 2.4547210707859085e-07, "loss": 24.1414, "step": 12780 }, { "epoch": 101.51046859421734, "grad_norm": 456.0918884277344, "learning_rate": 2.451598763638704e-07, "loss": 22.4086, "step": 12790 }, { "epoch": 101.59022931206381, "grad_norm": 497.4643859863281, "learning_rate": 2.4484765320146613e-07, "loss": 25.9025, "step": 12800 }, { "epoch": 101.59022931206381, "eval_loss": 3.0093250274658203, "eval_mae": 1.304242730140686, "eval_mse": 3.0093250274658203, "eval_r2": 0.09330511093139648, "eval_rmse": 1.7347406225328963, "eval_runtime": 9.0622, "eval_samples_per_second": 442.609, "eval_steps_per_second": 13.904, "step": 12800 }, { "epoch": 101.66999002991027, "grad_norm": 251.6475372314453, "learning_rate": 2.445354380785574e-07, "loss": 25.1064, "step": 12810 }, { "epoch": 101.74975074775674, "grad_norm": 1158.7481689453125, "learning_rate": 2.4422323148231106e-07, "loss": 23.5001, "step": 12820 }, { "epoch": 101.82951146560319, "grad_norm": 301.5332336425781, "learning_rate": 2.439110338998803e-07, "loss": 23.0398, "step": 12830 }, { "epoch": 101.90927218344964, "grad_norm": 755.46826171875, "learning_rate": 2.435988458184047e-07, "loss": 24.2049, "step": 12840 }, { "epoch": 101.98903290129611, "grad_norm": 554.9736938476562, "learning_rate": 2.433178850710021e-07, "loss": 22.9534, "step": 12850 }, { "epoch": 102.06380857427718, "grad_norm": 902.829345703125, "learning_rate": 2.430057163833565e-07, "loss": 21.9611, "step": 12860 }, { "epoch": 102.14356929212363, "grad_norm": 263.4960021972656, "learning_rate": 2.426935586092837e-07, "loss": 26.8969, "step": 12870 }, { "epoch": 102.22333000997008, "grad_norm": 396.06732177734375, "learning_rate": 2.4238141223586095e-07, "loss": 24.1553, "step": 12880 }, { "epoch": 102.30309072781655, "grad_norm": 95.65509033203125, "learning_rate": 2.420692777501478e-07, "loss": 26.1854, "step": 12890 }, { "epoch": 102.38285144566301, "grad_norm": 586.4658203125, "learning_rate": 2.4175715563918506e-07, "loss": 24.511, "step": 12900 }, { "epoch": 102.38285144566301, "eval_loss": 3.0080695152282715, "eval_mae": 1.3039594888687134, "eval_mse": 3.0080695152282715, "eval_r2": 0.09368336200714111, "eval_rmse": 1.734378711593368, "eval_runtime": 9.0565, "eval_samples_per_second": 442.885, "eval_steps_per_second": 13.913, "step": 12900 }, { "epoch": 102.46261216350948, "grad_norm": 553.6513671875, "learning_rate": 2.414450463899944e-07, "loss": 21.9434, "step": 12910 }, { "epoch": 102.54237288135593, "grad_norm": 198.5731201171875, "learning_rate": 2.4113295048957736e-07, "loss": 26.5607, "step": 12920 }, { "epoch": 102.6221335992024, "grad_norm": 322.967041015625, "learning_rate": 2.4082086842491485e-07, "loss": 22.1324, "step": 12930 }, { "epoch": 102.70189431704885, "grad_norm": 430.9654846191406, "learning_rate": 2.4050880068296575e-07, "loss": 21.6502, "step": 12940 }, { "epoch": 102.78165503489531, "grad_norm": 337.82489013671875, "learning_rate": 2.401967477506669e-07, "loss": 23.6025, "step": 12950 }, { "epoch": 102.86141575274178, "grad_norm": 136.6009521484375, "learning_rate": 2.398847101149321e-07, "loss": 25.4659, "step": 12960 }, { "epoch": 102.94117647058823, "grad_norm": 982.8609619140625, "learning_rate": 2.3957268826265124e-07, "loss": 25.8761, "step": 12970 }, { "epoch": 103.0159521435693, "grad_norm": 431.97381591796875, "learning_rate": 2.3926068268068915e-07, "loss": 20.8754, "step": 12980 }, { "epoch": 103.09571286141575, "grad_norm": 145.51699829101562, "learning_rate": 2.3894869385588595e-07, "loss": 24.1687, "step": 12990 }, { "epoch": 103.17547357926222, "grad_norm": 159.59939575195312, "learning_rate": 2.3863672227505515e-07, "loss": 24.4449, "step": 13000 }, { "epoch": 103.17547357926222, "eval_loss": 3.012988805770874, "eval_mae": 1.3111497163772583, "eval_mse": 3.012989044189453, "eval_r2": 0.09220117330551147, "eval_rmse": 1.7357963717525893, "eval_runtime": 9.0501, "eval_samples_per_second": 443.198, "eval_steps_per_second": 13.922, "step": 13000 }, { "epoch": 103.25523429710867, "grad_norm": 113.45722961425781, "learning_rate": 2.383247684249836e-07, "loss": 26.0927, "step": 13010 }, { "epoch": 103.33499501495514, "grad_norm": 269.3424987792969, "learning_rate": 2.380128327924303e-07, "loss": 26.8863, "step": 13020 }, { "epoch": 103.4147557328016, "grad_norm": 143.92015075683594, "learning_rate": 2.377009158641259e-07, "loss": 23.3071, "step": 13030 }, { "epoch": 103.49451645064805, "grad_norm": 653.4163208007812, "learning_rate": 2.3738901812677184e-07, "loss": 23.7985, "step": 13040 }, { "epoch": 103.57427716849452, "grad_norm": 617.9258422851562, "learning_rate": 2.3707714006703978e-07, "loss": 24.1426, "step": 13050 }, { "epoch": 103.65403788634097, "grad_norm": 101.50848388671875, "learning_rate": 2.3676528217157034e-07, "loss": 22.7891, "step": 13060 }, { "epoch": 103.73379860418744, "grad_norm": 145.66314697265625, "learning_rate": 2.3645344492697303e-07, "loss": 25.2242, "step": 13070 }, { "epoch": 103.8135593220339, "grad_norm": 667.2296752929688, "learning_rate": 2.3614162881982492e-07, "loss": 21.9651, "step": 13080 }, { "epoch": 103.89332003988036, "grad_norm": 245.80865478515625, "learning_rate": 2.3582983433667023e-07, "loss": 24.2691, "step": 13090 }, { "epoch": 103.97308075772682, "grad_norm": 120.25707244873047, "learning_rate": 2.3551806196401925e-07, "loss": 24.1889, "step": 13100 }, { "epoch": 103.97308075772682, "eval_loss": 3.008435010910034, "eval_mae": 1.3024288415908813, "eval_mse": 3.008435010910034, "eval_r2": 0.09357321262359619, "eval_rmse": 1.734484076291862, "eval_runtime": 9.0327, "eval_samples_per_second": 444.052, "eval_steps_per_second": 13.949, "step": 13100 }, { "epoch": 104.04785643070788, "grad_norm": 273.62322998046875, "learning_rate": 2.3520631218834785e-07, "loss": 22.3676, "step": 13110 }, { "epoch": 104.12761714855434, "grad_norm": 240.7616424560547, "learning_rate": 2.348945854960969e-07, "loss": 25.3213, "step": 13120 }, { "epoch": 104.2073778664008, "grad_norm": 803.4730224609375, "learning_rate": 2.34582882373671e-07, "loss": 26.0542, "step": 13130 }, { "epoch": 104.28713858424726, "grad_norm": 475.0230712890625, "learning_rate": 2.3427120330743772e-07, "loss": 23.7392, "step": 13140 }, { "epoch": 104.36689930209371, "grad_norm": 298.427490234375, "learning_rate": 2.3395954878372765e-07, "loss": 24.3703, "step": 13150 }, { "epoch": 104.44666001994018, "grad_norm": 445.6641845703125, "learning_rate": 2.3364791928883273e-07, "loss": 25.0728, "step": 13160 }, { "epoch": 104.52642073778664, "grad_norm": 1277.1727294921875, "learning_rate": 2.3333631530900598e-07, "loss": 22.9316, "step": 13170 }, { "epoch": 104.6061814556331, "grad_norm": 218.64808654785156, "learning_rate": 2.3302473733046046e-07, "loss": 23.0187, "step": 13180 }, { "epoch": 104.68594217347956, "grad_norm": 415.3343505859375, "learning_rate": 2.3271318583936882e-07, "loss": 22.822, "step": 13190 }, { "epoch": 104.76570289132601, "grad_norm": 667.0796508789062, "learning_rate": 2.3240166132186228e-07, "loss": 24.5512, "step": 13200 }, { "epoch": 104.76570289132601, "eval_loss": 3.019092559814453, "eval_mae": 1.2983890771865845, "eval_mse": 3.019092559814453, "eval_r2": 0.09036219120025635, "eval_rmse": 1.7375536135079266, "eval_runtime": 9.0355, "eval_samples_per_second": 443.914, "eval_steps_per_second": 13.945, "step": 13200 }, { "epoch": 104.84546360917248, "grad_norm": 70.65787506103516, "learning_rate": 2.3209016426403012e-07, "loss": 22.9797, "step": 13210 }, { "epoch": 104.92522432701894, "grad_norm": 702.3170776367188, "learning_rate": 2.3177869515191846e-07, "loss": 26.5521, "step": 13220 }, { "epoch": 105.0, "grad_norm": 385.10113525390625, "learning_rate": 2.3146725447153015e-07, "loss": 22.5486, "step": 13230 }, { "epoch": 105.07976071784645, "grad_norm": 162.52066040039062, "learning_rate": 2.311558427088235e-07, "loss": 25.1436, "step": 13240 }, { "epoch": 105.15952143569292, "grad_norm": 175.56246948242188, "learning_rate": 2.308444603497117e-07, "loss": 23.1313, "step": 13250 }, { "epoch": 105.23928215353938, "grad_norm": 135.90574645996094, "learning_rate": 2.3053310788006232e-07, "loss": 23.9574, "step": 13260 }, { "epoch": 105.31904287138585, "grad_norm": 741.7534790039062, "learning_rate": 2.3022178578569577e-07, "loss": 22.7555, "step": 13270 }, { "epoch": 105.3988035892323, "grad_norm": 619.00341796875, "learning_rate": 2.2991049455238543e-07, "loss": 25.2691, "step": 13280 }, { "epoch": 105.47856430707877, "grad_norm": 187.55543518066406, "learning_rate": 2.2959923466585662e-07, "loss": 24.2961, "step": 13290 }, { "epoch": 105.55832502492522, "grad_norm": 225.87303161621094, "learning_rate": 2.2928800661178556e-07, "loss": 24.1411, "step": 13300 }, { "epoch": 105.55832502492522, "eval_loss": 3.007848024368286, "eval_mae": 1.307215929031372, "eval_mse": 3.007847785949707, "eval_r2": 0.09375017881393433, "eval_rmse": 1.7343147885979946, "eval_runtime": 9.0519, "eval_samples_per_second": 443.109, "eval_steps_per_second": 13.92, "step": 13300 }, { "epoch": 105.63808574277168, "grad_norm": 872.1564331054688, "learning_rate": 2.289768108757986e-07, "loss": 25.6431, "step": 13310 }, { "epoch": 105.71784646061815, "grad_norm": 203.1824493408203, "learning_rate": 2.2866564794347216e-07, "loss": 25.1792, "step": 13320 }, { "epoch": 105.7976071784646, "grad_norm": 400.51959228515625, "learning_rate": 2.2835451830033105e-07, "loss": 24.4278, "step": 13330 }, { "epoch": 105.87736789631107, "grad_norm": 103.01773071289062, "learning_rate": 2.2804342243184841e-07, "loss": 22.2797, "step": 13340 }, { "epoch": 105.95712861415753, "grad_norm": 441.1045837402344, "learning_rate": 2.2773236082344443e-07, "loss": 23.2526, "step": 13350 }, { "epoch": 106.03190428713859, "grad_norm": 530.3433837890625, "learning_rate": 2.2742133396048602e-07, "loss": 22.9374, "step": 13360 }, { "epoch": 106.11166500498504, "grad_norm": 233.91416931152344, "learning_rate": 2.2711034232828585e-07, "loss": 24.1659, "step": 13370 }, { "epoch": 106.19142572283151, "grad_norm": 145.86288452148438, "learning_rate": 2.2679938641210166e-07, "loss": 23.9056, "step": 13380 }, { "epoch": 106.27118644067797, "grad_norm": 311.06671142578125, "learning_rate": 2.2648846669713525e-07, "loss": 24.7324, "step": 13390 }, { "epoch": 106.35094715852442, "grad_norm": 700.2940673828125, "learning_rate": 2.2617758366853216e-07, "loss": 25.6364, "step": 13400 }, { "epoch": 106.35094715852442, "eval_loss": 3.0100624561309814, "eval_mae": 1.3105032444000244, "eval_mse": 3.0100626945495605, "eval_r2": 0.09308284521102905, "eval_rmse": 1.7349532254644677, "eval_runtime": 9.0428, "eval_samples_per_second": 443.557, "eval_steps_per_second": 13.934, "step": 13400 }, { "epoch": 106.43070787637089, "grad_norm": 1145.089111328125, "learning_rate": 2.2586673781138056e-07, "loss": 23.707, "step": 13410 }, { "epoch": 106.51046859421734, "grad_norm": 577.6820068359375, "learning_rate": 2.2555592961071085e-07, "loss": 23.0162, "step": 13420 }, { "epoch": 106.59022931206381, "grad_norm": 449.773193359375, "learning_rate": 2.2524515955149423e-07, "loss": 25.7381, "step": 13430 }, { "epoch": 106.66999002991027, "grad_norm": 265.3964538574219, "learning_rate": 2.249344281186427e-07, "loss": 22.8916, "step": 13440 }, { "epoch": 106.74975074775674, "grad_norm": 185.62965393066406, "learning_rate": 2.2462373579700806e-07, "loss": 26.5899, "step": 13450 }, { "epoch": 106.82951146560319, "grad_norm": 283.2608642578125, "learning_rate": 2.2431308307138094e-07, "loss": 25.1778, "step": 13460 }, { "epoch": 106.90927218344964, "grad_norm": 156.2736053466797, "learning_rate": 2.2400247042649004e-07, "loss": 23.2291, "step": 13470 }, { "epoch": 106.98903290129611, "grad_norm": 154.14137268066406, "learning_rate": 2.236918983470018e-07, "loss": 22.8961, "step": 13480 }, { "epoch": 107.06380857427718, "grad_norm": 113.89803314208984, "learning_rate": 2.2338136731751924e-07, "loss": 24.3879, "step": 13490 }, { "epoch": 107.14356929212363, "grad_norm": 179.24110412597656, "learning_rate": 2.2307087782258139e-07, "loss": 22.0681, "step": 13500 }, { "epoch": 107.14356929212363, "eval_loss": 3.004845142364502, "eval_mae": 1.3042222261428833, "eval_mse": 3.004845142364502, "eval_r2": 0.0946548581123352, "eval_rmse": 1.7334489154181907, "eval_runtime": 9.0518, "eval_samples_per_second": 443.117, "eval_steps_per_second": 13.92, "step": 13500 }, { "epoch": 107.22333000997008, "grad_norm": 345.2991943359375, "learning_rate": 2.2276043034666225e-07, "loss": 23.8587, "step": 13510 }, { "epoch": 107.30309072781655, "grad_norm": 252.33543395996094, "learning_rate": 2.2245002537417053e-07, "loss": 24.27, "step": 13520 }, { "epoch": 107.38285144566301, "grad_norm": 220.6058349609375, "learning_rate": 2.221396633894485e-07, "loss": 23.0785, "step": 13530 }, { "epoch": 107.46261216350948, "grad_norm": 570.069091796875, "learning_rate": 2.2182934487677137e-07, "loss": 25.1232, "step": 13540 }, { "epoch": 107.54237288135593, "grad_norm": 559.7032470703125, "learning_rate": 2.2151907032034643e-07, "loss": 24.3478, "step": 13550 }, { "epoch": 107.6221335992024, "grad_norm": 784.8019409179688, "learning_rate": 2.2120884020431259e-07, "loss": 26.435, "step": 13560 }, { "epoch": 107.70189431704885, "grad_norm": 526.9161987304688, "learning_rate": 2.2089865501273915e-07, "loss": 23.47, "step": 13570 }, { "epoch": 107.78165503489531, "grad_norm": 127.85801696777344, "learning_rate": 2.2058851522962563e-07, "loss": 22.3902, "step": 13580 }, { "epoch": 107.86141575274178, "grad_norm": 259.3059997558594, "learning_rate": 2.2027842133890032e-07, "loss": 25.3022, "step": 13590 }, { "epoch": 107.94117647058823, "grad_norm": 161.84365844726562, "learning_rate": 2.1996837382442016e-07, "loss": 24.8369, "step": 13600 }, { "epoch": 107.94117647058823, "eval_loss": 3.0051534175872803, "eval_mae": 1.299922227859497, "eval_mse": 3.0051534175872803, "eval_r2": 0.09456199407577515, "eval_rmse": 1.7335378327533784, "eval_runtime": 9.0565, "eval_samples_per_second": 442.888, "eval_steps_per_second": 13.913, "step": 13600 }, { "epoch": 108.0159521435693, "grad_norm": 451.6736145019531, "learning_rate": 2.196583731699697e-07, "loss": 21.9785, "step": 13610 }, { "epoch": 108.09571286141575, "grad_norm": 244.07496643066406, "learning_rate": 2.193484198592605e-07, "loss": 24.9731, "step": 13620 }, { "epoch": 108.17547357926222, "grad_norm": 86.17716217041016, "learning_rate": 2.1903851437592973e-07, "loss": 23.5088, "step": 13630 }, { "epoch": 108.25523429710867, "grad_norm": 351.5981140136719, "learning_rate": 2.1872865720354056e-07, "loss": 25.4712, "step": 13640 }, { "epoch": 108.33499501495514, "grad_norm": 787.5492553710938, "learning_rate": 2.1841884882558038e-07, "loss": 23.8798, "step": 13650 }, { "epoch": 108.4147557328016, "grad_norm": 407.19866943359375, "learning_rate": 2.1810908972546065e-07, "loss": 23.8723, "step": 13660 }, { "epoch": 108.49451645064805, "grad_norm": 136.1316375732422, "learning_rate": 2.177993803865158e-07, "loss": 25.1518, "step": 13670 }, { "epoch": 108.57427716849452, "grad_norm": 170.29415893554688, "learning_rate": 2.174897212920027e-07, "loss": 23.5138, "step": 13680 }, { "epoch": 108.65403788634097, "grad_norm": 222.90199279785156, "learning_rate": 2.1718011292509974e-07, "loss": 24.3218, "step": 13690 }, { "epoch": 108.73379860418744, "grad_norm": 196.23448181152344, "learning_rate": 2.1687055576890632e-07, "loss": 23.3596, "step": 13700 }, { "epoch": 108.73379860418744, "eval_loss": 3.0122363567352295, "eval_mae": 1.2974759340286255, "eval_mse": 3.0122363567352295, "eval_r2": 0.09242790937423706, "eval_rmse": 1.7355795449172675, "eval_runtime": 9.1022, "eval_samples_per_second": 440.661, "eval_steps_per_second": 13.843, "step": 13700 }, { "epoch": 108.8135593220339, "grad_norm": 845.6932983398438, "learning_rate": 2.1656105030644167e-07, "loss": 25.9375, "step": 13710 }, { "epoch": 108.89332003988036, "grad_norm": 167.6203155517578, "learning_rate": 2.1625159702064465e-07, "loss": 22.899, "step": 13720 }, { "epoch": 108.97308075772682, "grad_norm": 1029.1893310546875, "learning_rate": 2.1594219639437244e-07, "loss": 22.4538, "step": 13730 }, { "epoch": 109.04785643070788, "grad_norm": 602.4310913085938, "learning_rate": 2.1563284891040034e-07, "loss": 24.0515, "step": 13740 }, { "epoch": 109.12761714855434, "grad_norm": 740.744873046875, "learning_rate": 2.1532355505142044e-07, "loss": 24.1448, "step": 13750 }, { "epoch": 109.2073778664008, "grad_norm": 124.3176498413086, "learning_rate": 2.1501431530004133e-07, "loss": 25.6922, "step": 13760 }, { "epoch": 109.28713858424726, "grad_norm": 1032.07421875, "learning_rate": 2.1470513013878705e-07, "loss": 23.7145, "step": 13770 }, { "epoch": 109.36689930209371, "grad_norm": 714.7402954101562, "learning_rate": 2.1439600005009674e-07, "loss": 26.4051, "step": 13780 }, { "epoch": 109.44666001994018, "grad_norm": 478.6904296875, "learning_rate": 2.1408692551632322e-07, "loss": 23.6819, "step": 13790 }, { "epoch": 109.52642073778664, "grad_norm": 928.071044921875, "learning_rate": 2.137779070197328e-07, "loss": 22.9517, "step": 13800 }, { "epoch": 109.52642073778664, "eval_loss": 3.0034821033477783, "eval_mae": 1.3013429641723633, "eval_mse": 3.0034821033477783, "eval_r2": 0.09506553411483765, "eval_rmse": 1.7330557127074069, "eval_runtime": 9.1203, "eval_samples_per_second": 439.788, "eval_steps_per_second": 13.815, "step": 13800 }, { "epoch": 109.6061814556331, "grad_norm": 193.4709014892578, "learning_rate": 2.1346894504250446e-07, "loss": 24.7091, "step": 13810 }, { "epoch": 109.68594217347956, "grad_norm": 656.5834350585938, "learning_rate": 2.1316004006672888e-07, "loss": 22.5456, "step": 13820 }, { "epoch": 109.76570289132601, "grad_norm": 1204.993408203125, "learning_rate": 2.1285119257440784e-07, "loss": 25.7378, "step": 13830 }, { "epoch": 109.84546360917248, "grad_norm": 492.1981201171875, "learning_rate": 2.125424030474533e-07, "loss": 24.8542, "step": 13840 }, { "epoch": 109.92522432701894, "grad_norm": 255.29493713378906, "learning_rate": 2.1223367196768688e-07, "loss": 22.1661, "step": 13850 }, { "epoch": 110.0, "grad_norm": 339.08758544921875, "learning_rate": 2.1192499981683904e-07, "loss": 21.9219, "step": 13860 }, { "epoch": 110.07976071784645, "grad_norm": 869.034423828125, "learning_rate": 2.1161638707654828e-07, "loss": 23.117, "step": 13870 }, { "epoch": 110.15952143569292, "grad_norm": 211.4332733154297, "learning_rate": 2.1130783422836025e-07, "loss": 24.1192, "step": 13880 }, { "epoch": 110.23928215353938, "grad_norm": 340.9952697753906, "learning_rate": 2.1099934175372725e-07, "loss": 23.43, "step": 13890 }, { "epoch": 110.31904287138585, "grad_norm": 476.6876220703125, "learning_rate": 2.1069091013400742e-07, "loss": 23.6174, "step": 13900 }, { "epoch": 110.31904287138585, "eval_loss": 3.0029051303863525, "eval_mae": 1.2983053922653198, "eval_mse": 3.0029051303863525, "eval_r2": 0.09523940086364746, "eval_rmse": 1.7328892435428043, "eval_runtime": 9.0663, "eval_samples_per_second": 442.409, "eval_steps_per_second": 13.898, "step": 13900 }, { "epoch": 110.3988035892323, "grad_norm": 759.9443969726562, "learning_rate": 2.1038253985046405e-07, "loss": 24.5152, "step": 13910 }, { "epoch": 110.47856430707877, "grad_norm": 207.55154418945312, "learning_rate": 2.1007423138426435e-07, "loss": 24.6316, "step": 13920 }, { "epoch": 110.55832502492522, "grad_norm": 635.3157348632812, "learning_rate": 2.0976598521647933e-07, "loss": 26.1485, "step": 13930 }, { "epoch": 110.63808574277168, "grad_norm": 457.217041015625, "learning_rate": 2.0945780182808293e-07, "loss": 23.8358, "step": 13940 }, { "epoch": 110.71784646061815, "grad_norm": 196.58578491210938, "learning_rate": 2.0914968169995091e-07, "loss": 24.8853, "step": 13950 }, { "epoch": 110.7976071784646, "grad_norm": 198.74093627929688, "learning_rate": 2.088416253128602e-07, "loss": 24.7008, "step": 13960 }, { "epoch": 110.87736789631107, "grad_norm": 609.0301513671875, "learning_rate": 2.0853363314748866e-07, "loss": 23.6362, "step": 13970 }, { "epoch": 110.95712861415753, "grad_norm": 733.3483276367188, "learning_rate": 2.0822570568441362e-07, "loss": 22.5534, "step": 13980 }, { "epoch": 111.03190428713859, "grad_norm": 212.95008850097656, "learning_rate": 2.0791784340411162e-07, "loss": 25.2944, "step": 13990 }, { "epoch": 111.11166500498504, "grad_norm": 151.1733856201172, "learning_rate": 2.0761004678695737e-07, "loss": 26.0677, "step": 14000 }, { "epoch": 111.11166500498504, "eval_loss": 3.0067334175109863, "eval_mae": 1.3094115257263184, "eval_mse": 3.0067334175109863, "eval_r2": 0.0940859317779541, "eval_rmse": 1.7339934883127406, "eval_runtime": 9.0922, "eval_samples_per_second": 441.147, "eval_steps_per_second": 13.858, "step": 14000 }, { "epoch": 111.19142572283151, "grad_norm": 947.1033935546875, "learning_rate": 2.0730231631322325e-07, "loss": 22.3109, "step": 14010 }, { "epoch": 111.27118644067797, "grad_norm": 829.9580688476562, "learning_rate": 2.069946524630783e-07, "loss": 24.7575, "step": 14020 }, { "epoch": 111.35094715852442, "grad_norm": 245.0848388671875, "learning_rate": 2.066870557165878e-07, "loss": 26.3338, "step": 14030 }, { "epoch": 111.43070787637089, "grad_norm": 375.2764892578125, "learning_rate": 2.06379526553712e-07, "loss": 24.0418, "step": 14040 }, { "epoch": 111.51046859421734, "grad_norm": 151.50494384765625, "learning_rate": 2.06072065454306e-07, "loss": 22.7789, "step": 14050 }, { "epoch": 111.59022931206381, "grad_norm": 825.5809936523438, "learning_rate": 2.057646728981186e-07, "loss": 24.1213, "step": 14060 }, { "epoch": 111.66999002991027, "grad_norm": 328.1300048828125, "learning_rate": 2.0545734936479166e-07, "loss": 24.0528, "step": 14070 }, { "epoch": 111.74975074775674, "grad_norm": 421.9128723144531, "learning_rate": 2.0515009533385924e-07, "loss": 23.4848, "step": 14080 }, { "epoch": 111.82951146560319, "grad_norm": 530.5689086914062, "learning_rate": 2.0484291128474698e-07, "loss": 23.4212, "step": 14090 }, { "epoch": 111.90927218344964, "grad_norm": 478.70452880859375, "learning_rate": 2.045357976967715e-07, "loss": 23.7455, "step": 14100 }, { "epoch": 111.90927218344964, "eval_loss": 3.0043723583221436, "eval_mae": 1.299153208732605, "eval_mse": 3.0043723583221436, "eval_r2": 0.09479731321334839, "eval_rmse": 1.7333125391348623, "eval_runtime": 9.0905, "eval_samples_per_second": 441.228, "eval_steps_per_second": 13.861, "step": 14100 }, { "epoch": 111.98903290129611, "grad_norm": 353.66131591796875, "learning_rate": 2.0422875504913946e-07, "loss": 22.9223, "step": 14110 }, { "epoch": 112.06380857427718, "grad_norm": 609.3904418945312, "learning_rate": 2.0392178382094632e-07, "loss": 23.194, "step": 14120 }, { "epoch": 112.14356929212363, "grad_norm": 857.0011596679688, "learning_rate": 2.0361488449117682e-07, "loss": 22.9318, "step": 14130 }, { "epoch": 112.22333000997008, "grad_norm": 675.8402709960938, "learning_rate": 2.0330805753870306e-07, "loss": 22.2699, "step": 14140 }, { "epoch": 112.30309072781655, "grad_norm": 543.199951171875, "learning_rate": 2.0300130344228444e-07, "loss": 25.7679, "step": 14150 }, { "epoch": 112.38285144566301, "grad_norm": 442.4114685058594, "learning_rate": 2.0269462268056642e-07, "loss": 26.0135, "step": 14160 }, { "epoch": 112.46261216350948, "grad_norm": 256.444580078125, "learning_rate": 2.0238801573208028e-07, "loss": 21.2013, "step": 14170 }, { "epoch": 112.54237288135593, "grad_norm": 509.817138671875, "learning_rate": 2.02081483075242e-07, "loss": 23.3253, "step": 14180 }, { "epoch": 112.6221335992024, "grad_norm": 580.7782592773438, "learning_rate": 2.017750251883517e-07, "loss": 26.1034, "step": 14190 }, { "epoch": 112.70189431704885, "grad_norm": 255.4623260498047, "learning_rate": 2.0146864254959276e-07, "loss": 24.2658, "step": 14200 }, { "epoch": 112.70189431704885, "eval_loss": 3.004279851913452, "eval_mae": 1.2976210117340088, "eval_mse": 3.004279851913452, "eval_r2": 0.09482520818710327, "eval_rmse": 1.7332858540683507, "eval_runtime": 9.0935, "eval_samples_per_second": 441.083, "eval_steps_per_second": 13.856, "step": 14200 }, { "epoch": 112.78165503489531, "grad_norm": 419.1800537109375, "learning_rate": 2.0116233563703117e-07, "loss": 25.155, "step": 14210 }, { "epoch": 112.86141575274178, "grad_norm": 174.2738800048828, "learning_rate": 2.0085610492861484e-07, "loss": 23.2744, "step": 14220 }, { "epoch": 112.94117647058823, "grad_norm": 141.05087280273438, "learning_rate": 2.0054995090217266e-07, "loss": 24.9201, "step": 14230 }, { "epoch": 113.0159521435693, "grad_norm": 350.5917053222656, "learning_rate": 2.0024387403541394e-07, "loss": 22.327, "step": 14240 }, { "epoch": 113.09571286141575, "grad_norm": 748.1488037109375, "learning_rate": 1.999378748059275e-07, "loss": 24.5792, "step": 14250 }, { "epoch": 113.17547357926222, "grad_norm": 350.0252685546875, "learning_rate": 1.9963195369118112e-07, "loss": 23.2496, "step": 14260 }, { "epoch": 113.25523429710867, "grad_norm": 167.78985595703125, "learning_rate": 1.9932611116852088e-07, "loss": 26.3902, "step": 14270 }, { "epoch": 113.33499501495514, "grad_norm": 418.8966064453125, "learning_rate": 1.990203477151697e-07, "loss": 25.5467, "step": 14280 }, { "epoch": 113.4147557328016, "grad_norm": 363.3697814941406, "learning_rate": 1.9871466380822752e-07, "loss": 22.481, "step": 14290 }, { "epoch": 113.49451645064805, "grad_norm": 301.104736328125, "learning_rate": 1.9840905992467018e-07, "loss": 21.3924, "step": 14300 }, { "epoch": 113.49451645064805, "eval_loss": 3.0020875930786133, "eval_mae": 1.3055925369262695, "eval_mse": 3.0020875930786133, "eval_r2": 0.09548568725585938, "eval_rmse": 1.7326533389800203, "eval_runtime": 9.0853, "eval_samples_per_second": 441.484, "eval_steps_per_second": 13.869, "step": 14300 }, { "epoch": 113.57427716849452, "grad_norm": 490.4180603027344, "learning_rate": 1.9810353654134852e-07, "loss": 23.7989, "step": 14310 }, { "epoch": 113.65403788634097, "grad_norm": 273.2963562011719, "learning_rate": 1.977980941349877e-07, "loss": 24.4324, "step": 14320 }, { "epoch": 113.73379860418744, "grad_norm": 916.4816284179688, "learning_rate": 1.974927331821867e-07, "loss": 24.3051, "step": 14330 }, { "epoch": 113.8135593220339, "grad_norm": 641.0733642578125, "learning_rate": 1.9718745415941735e-07, "loss": 25.1196, "step": 14340 }, { "epoch": 113.89332003988036, "grad_norm": 709.544677734375, "learning_rate": 1.9688225754302362e-07, "loss": 24.2441, "step": 14350 }, { "epoch": 113.97308075772682, "grad_norm": 720.6399536132812, "learning_rate": 1.9657714380922086e-07, "loss": 24.4085, "step": 14360 }, { "epoch": 114.04785643070788, "grad_norm": 205.85848999023438, "learning_rate": 1.962721134340951e-07, "loss": 22.0088, "step": 14370 }, { "epoch": 114.12761714855434, "grad_norm": 115.24569702148438, "learning_rate": 1.9596716689360244e-07, "loss": 25.6115, "step": 14380 }, { "epoch": 114.2073778664008, "grad_norm": 688.999267578125, "learning_rate": 1.95662304663568e-07, "loss": 24.6186, "step": 14390 }, { "epoch": 114.28713858424726, "grad_norm": 570.3909301757812, "learning_rate": 1.9535752721968535e-07, "loss": 23.2304, "step": 14400 }, { "epoch": 114.28713858424726, "eval_loss": 3.0144083499908447, "eval_mae": 1.2939565181732178, "eval_mse": 3.0144081115722656, "eval_r2": 0.09177356958389282, "eval_rmse": 1.7362050891447893, "eval_runtime": 9.0826, "eval_samples_per_second": 441.612, "eval_steps_per_second": 13.873, "step": 14400 }, { "epoch": 114.36689930209371, "grad_norm": 666.518798828125, "learning_rate": 1.950528350375159e-07, "loss": 25.2675, "step": 14410 }, { "epoch": 114.44666001994018, "grad_norm": 186.58453369140625, "learning_rate": 1.9474822859248784e-07, "loss": 24.6354, "step": 14420 }, { "epoch": 114.52642073778664, "grad_norm": 414.18603515625, "learning_rate": 1.9444370835989585e-07, "loss": 25.7347, "step": 14430 }, { "epoch": 114.6061814556331, "grad_norm": 576.7401733398438, "learning_rate": 1.9413927481489988e-07, "loss": 24.0868, "step": 14440 }, { "epoch": 114.68594217347956, "grad_norm": 627.0114135742188, "learning_rate": 1.9383492843252448e-07, "loss": 24.3032, "step": 14450 }, { "epoch": 114.76570289132601, "grad_norm": 218.20523071289062, "learning_rate": 1.9353066968765851e-07, "loss": 22.7358, "step": 14460 }, { "epoch": 114.84546360917248, "grad_norm": 161.6814422607422, "learning_rate": 1.9322649905505396e-07, "loss": 23.4808, "step": 14470 }, { "epoch": 114.92522432701894, "grad_norm": 273.088134765625, "learning_rate": 1.929224170093253e-07, "loss": 22.8789, "step": 14480 }, { "epoch": 115.0, "grad_norm": 774.2197265625, "learning_rate": 1.926184240249487e-07, "loss": 21.4673, "step": 14490 }, { "epoch": 115.07976071784645, "grad_norm": 367.72430419921875, "learning_rate": 1.9231452057626157e-07, "loss": 24.946, "step": 14500 }, { "epoch": 115.07976071784645, "eval_loss": 3.001230001449585, "eval_mae": 1.3005801439285278, "eval_mse": 3.001230001449585, "eval_r2": 0.0957440733909607, "eval_rmse": 1.7324058420155437, "eval_runtime": 9.0947, "eval_samples_per_second": 441.027, "eval_steps_per_second": 13.854, "step": 14500 }, { "epoch": 115.15952143569292, "grad_norm": 261.6848449707031, "learning_rate": 1.9201070713746146e-07, "loss": 24.619, "step": 14510 }, { "epoch": 115.23928215353938, "grad_norm": 253.65447998046875, "learning_rate": 1.917069841826055e-07, "loss": 24.864, "step": 14520 }, { "epoch": 115.31904287138585, "grad_norm": 247.41769409179688, "learning_rate": 1.9140335218560963e-07, "loss": 23.7294, "step": 14530 }, { "epoch": 115.3988035892323, "grad_norm": 742.1896362304688, "learning_rate": 1.9109981162024788e-07, "loss": 22.7592, "step": 14540 }, { "epoch": 115.47856430707877, "grad_norm": 325.4307861328125, "learning_rate": 1.907963629601516e-07, "loss": 25.2781, "step": 14550 }, { "epoch": 115.55832502492522, "grad_norm": 158.41632080078125, "learning_rate": 1.904930066788088e-07, "loss": 23.2735, "step": 14560 }, { "epoch": 115.63808574277168, "grad_norm": 434.5899353027344, "learning_rate": 1.9018974324956323e-07, "loss": 23.4698, "step": 14570 }, { "epoch": 115.71784646061815, "grad_norm": 507.632080078125, "learning_rate": 1.8988657314561375e-07, "loss": 23.6647, "step": 14580 }, { "epoch": 115.7976071784646, "grad_norm": 140.050048828125, "learning_rate": 1.895834968400138e-07, "loss": 23.897, "step": 14590 }, { "epoch": 115.87736789631107, "grad_norm": 319.8278503417969, "learning_rate": 1.8928051480567042e-07, "loss": 23.4679, "step": 14600 }, { "epoch": 115.87736789631107, "eval_loss": 2.999934196472168, "eval_mae": 1.3039181232452393, "eval_mse": 2.999934196472168, "eval_r2": 0.0961344838142395, "eval_rmse": 1.7320318116224562, "eval_runtime": 9.0889, "eval_samples_per_second": 441.307, "eval_steps_per_second": 13.863, "step": 14600 }, { "epoch": 115.95712861415753, "grad_norm": 186.12600708007812, "learning_rate": 1.8897762751534317e-07, "loss": 24.3203, "step": 14610 }, { "epoch": 116.03190428713859, "grad_norm": 747.21435546875, "learning_rate": 1.8867483544164422e-07, "loss": 22.5211, "step": 14620 }, { "epoch": 116.11166500498504, "grad_norm": 534.8453979492188, "learning_rate": 1.8837213905703702e-07, "loss": 24.2478, "step": 14630 }, { "epoch": 116.19142572283151, "grad_norm": 704.918212890625, "learning_rate": 1.880695388338358e-07, "loss": 23.4607, "step": 14640 }, { "epoch": 116.27118644067797, "grad_norm": 923.6112060546875, "learning_rate": 1.8776703524420445e-07, "loss": 25.0206, "step": 14650 }, { "epoch": 116.35094715852442, "grad_norm": 232.6576385498047, "learning_rate": 1.874646287601564e-07, "loss": 25.6268, "step": 14660 }, { "epoch": 116.43070787637089, "grad_norm": 414.2278137207031, "learning_rate": 1.8716231985355342e-07, "loss": 21.9105, "step": 14670 }, { "epoch": 116.51046859421734, "grad_norm": 250.12484741210938, "learning_rate": 1.8686010899610504e-07, "loss": 24.9453, "step": 14680 }, { "epoch": 116.59022931206381, "grad_norm": 214.85546875, "learning_rate": 1.865579966593678e-07, "loss": 23.202, "step": 14690 }, { "epoch": 116.66999002991027, "grad_norm": 456.5260009765625, "learning_rate": 1.8625598331474451e-07, "loss": 24.1828, "step": 14700 }, { "epoch": 116.66999002991027, "eval_loss": 3.0012688636779785, "eval_mae": 1.2985434532165527, "eval_mse": 3.0012688636779785, "eval_r2": 0.09573233127593994, "eval_rmse": 1.7324170582391465, "eval_runtime": 9.1014, "eval_samples_per_second": 440.7, "eval_steps_per_second": 13.844, "step": 14700 }, { "epoch": 116.74975074775674, "grad_norm": 446.82501220703125, "learning_rate": 1.8595406943348352e-07, "loss": 25.0955, "step": 14710 }, { "epoch": 116.82951146560319, "grad_norm": 665.4845581054688, "learning_rate": 1.85652255486678e-07, "loss": 23.4462, "step": 14720 }, { "epoch": 116.90927218344964, "grad_norm": 176.94802856445312, "learning_rate": 1.853505419452651e-07, "loss": 25.5434, "step": 14730 }, { "epoch": 116.98903290129611, "grad_norm": 1226.932373046875, "learning_rate": 1.8504892928002548e-07, "loss": 23.575, "step": 14740 }, { "epoch": 117.06380857427718, "grad_norm": 566.0380859375, "learning_rate": 1.8474741796158215e-07, "loss": 22.8273, "step": 14750 }, { "epoch": 117.14356929212363, "grad_norm": 85.70375061035156, "learning_rate": 1.8444600846040035e-07, "loss": 24.948, "step": 14760 }, { "epoch": 117.22333000997008, "grad_norm": 285.765625, "learning_rate": 1.8414470124678597e-07, "loss": 24.4736, "step": 14770 }, { "epoch": 117.30309072781655, "grad_norm": 481.25933837890625, "learning_rate": 1.8384349679088558e-07, "loss": 25.229, "step": 14780 }, { "epoch": 117.38285144566301, "grad_norm": 679.0187377929688, "learning_rate": 1.8354239556268552e-07, "loss": 23.8175, "step": 14790 }, { "epoch": 117.46261216350948, "grad_norm": 844.68115234375, "learning_rate": 1.832413980320109e-07, "loss": 23.7498, "step": 14800 }, { "epoch": 117.46261216350948, "eval_loss": 2.9960711002349854, "eval_mae": 1.2975517511367798, "eval_mse": 2.9960711002349854, "eval_r2": 0.09729844331741333, "eval_rmse": 1.730916260318501, "eval_runtime": 9.0922, "eval_samples_per_second": 441.145, "eval_steps_per_second": 13.858, "step": 14800 }, { "epoch": 117.54237288135593, "grad_norm": 260.93499755859375, "learning_rate": 1.829405046685249e-07, "loss": 23.3996, "step": 14810 }, { "epoch": 117.6221335992024, "grad_norm": 917.9327392578125, "learning_rate": 1.8263971594172837e-07, "loss": 25.7007, "step": 14820 }, { "epoch": 117.70189431704885, "grad_norm": 245.7829132080078, "learning_rate": 1.8233903232095882e-07, "loss": 22.9093, "step": 14830 }, { "epoch": 117.78165503489531, "grad_norm": 128.6542205810547, "learning_rate": 1.8203845427538985e-07, "loss": 23.2522, "step": 14840 }, { "epoch": 117.86141575274178, "grad_norm": 222.97190856933594, "learning_rate": 1.817680246888133e-07, "loss": 22.5512, "step": 14850 }, { "epoch": 117.94117647058823, "grad_norm": 342.2579345703125, "learning_rate": 1.8146764852810764e-07, "loss": 25.3012, "step": 14860 }, { "epoch": 118.0159521435693, "grad_norm": 458.2828369140625, "learning_rate": 1.8116737930227148e-07, "loss": 22.3981, "step": 14870 }, { "epoch": 118.09571286141575, "grad_norm": 263.3362121582031, "learning_rate": 1.8086721747983166e-07, "loss": 24.4831, "step": 14880 }, { "epoch": 118.17547357926222, "grad_norm": 675.1715087890625, "learning_rate": 1.8056716352914753e-07, "loss": 24.5227, "step": 14890 }, { "epoch": 118.25523429710867, "grad_norm": 270.53472900390625, "learning_rate": 1.8026721791841003e-07, "loss": 24.3978, "step": 14900 }, { "epoch": 118.25523429710867, "eval_loss": 2.9966397285461426, "eval_mae": 1.299818515777588, "eval_mse": 2.9966397285461426, "eval_r2": 0.09712713956832886, "eval_rmse": 1.7310805089729775, "eval_runtime": 9.1, "eval_samples_per_second": 440.772, "eval_steps_per_second": 13.846, "step": 14900 }, { "epoch": 118.33499501495514, "grad_norm": 407.41754150390625, "learning_rate": 1.7996738111564135e-07, "loss": 24.0754, "step": 14910 }, { "epoch": 118.4147557328016, "grad_norm": 626.1751708984375, "learning_rate": 1.7966765358869333e-07, "loss": 23.3016, "step": 14920 }, { "epoch": 118.49451645064805, "grad_norm": 599.4439086914062, "learning_rate": 1.7936803580524782e-07, "loss": 23.8976, "step": 14930 }, { "epoch": 118.57427716849452, "grad_norm": 399.52374267578125, "learning_rate": 1.790685282328152e-07, "loss": 23.325, "step": 14940 }, { "epoch": 118.65403788634097, "grad_norm": 684.7321166992188, "learning_rate": 1.7876913133873396e-07, "loss": 24.2629, "step": 14950 }, { "epoch": 118.73379860418744, "grad_norm": 535.8107299804688, "learning_rate": 1.784698455901698e-07, "loss": 23.974, "step": 14960 }, { "epoch": 118.8135593220339, "grad_norm": 91.9757080078125, "learning_rate": 1.7817067145411507e-07, "loss": 22.9046, "step": 14970 }, { "epoch": 118.89332003988036, "grad_norm": 530.356689453125, "learning_rate": 1.7787160939738795e-07, "loss": 23.6471, "step": 14980 }, { "epoch": 118.97308075772682, "grad_norm": 458.6141052246094, "learning_rate": 1.7757265988663172e-07, "loss": 25.7141, "step": 14990 }, { "epoch": 119.04785643070788, "grad_norm": 1084.7567138671875, "learning_rate": 1.772738233883141e-07, "loss": 24.3464, "step": 15000 }, { "epoch": 119.04785643070788, "eval_loss": 2.9981815814971924, "eval_mae": 1.3042938709259033, "eval_mse": 2.9981815814971924, "eval_r2": 0.09666258096694946, "eval_rmse": 1.7315257957931762, "eval_runtime": 9.0853, "eval_samples_per_second": 441.483, "eval_steps_per_second": 13.869, "step": 15000 }, { "epoch": 119.12761714855434, "grad_norm": 122.95572662353516, "learning_rate": 1.7697510036872633e-07, "loss": 22.7137, "step": 15010 }, { "epoch": 119.2073778664008, "grad_norm": 285.8971862792969, "learning_rate": 1.766764912939828e-07, "loss": 24.7054, "step": 15020 }, { "epoch": 119.28713858424726, "grad_norm": 178.3037872314453, "learning_rate": 1.7637799663001996e-07, "loss": 22.655, "step": 15030 }, { "epoch": 119.36689930209371, "grad_norm": 458.0035705566406, "learning_rate": 1.760796168425957e-07, "loss": 25.3129, "step": 15040 }, { "epoch": 119.44666001994018, "grad_norm": 243.63291931152344, "learning_rate": 1.7578135239728874e-07, "loss": 24.8409, "step": 15050 }, { "epoch": 119.52642073778664, "grad_norm": 301.6819152832031, "learning_rate": 1.7548320375949782e-07, "loss": 27.2402, "step": 15060 }, { "epoch": 119.6061814556331, "grad_norm": 568.8759155273438, "learning_rate": 1.7518517139444112e-07, "loss": 24.0735, "step": 15070 }, { "epoch": 119.68594217347956, "grad_norm": 701.4216918945312, "learning_rate": 1.74887255767155e-07, "loss": 21.9961, "step": 15080 }, { "epoch": 119.76570289132601, "grad_norm": 497.3916320800781, "learning_rate": 1.7458945734249403e-07, "loss": 22.3489, "step": 15090 }, { "epoch": 119.84546360917248, "grad_norm": 642.3511962890625, "learning_rate": 1.7429177658512982e-07, "loss": 22.4557, "step": 15100 }, { "epoch": 119.84546360917248, "eval_loss": 3.0010323524475098, "eval_mae": 1.2982264757156372, "eval_mse": 3.0010321140289307, "eval_r2": 0.09580367803573608, "eval_rmse": 1.7323487276033456, "eval_runtime": 9.1132, "eval_samples_per_second": 440.133, "eval_steps_per_second": 13.826, "step": 15100 }, { "epoch": 119.92522432701894, "grad_norm": 551.1233520507812, "learning_rate": 1.739942139595504e-07, "loss": 26.5307, "step": 15110 }, { "epoch": 120.0, "grad_norm": 235.44761657714844, "learning_rate": 1.7369676993005915e-07, "loss": 20.6391, "step": 15120 }, { "epoch": 120.07976071784645, "grad_norm": 1141.905029296875, "learning_rate": 1.733994449607749e-07, "loss": 22.791, "step": 15130 }, { "epoch": 120.15952143569292, "grad_norm": 582.3103637695312, "learning_rate": 1.731022395156304e-07, "loss": 24.694, "step": 15140 }, { "epoch": 120.23928215353938, "grad_norm": 262.1140441894531, "learning_rate": 1.7280515405837203e-07, "loss": 23.0256, "step": 15150 }, { "epoch": 120.31904287138585, "grad_norm": 274.61083984375, "learning_rate": 1.7250818905255876e-07, "loss": 22.8228, "step": 15160 }, { "epoch": 120.3988035892323, "grad_norm": 195.396484375, "learning_rate": 1.7221134496156176e-07, "loss": 25.1282, "step": 15170 }, { "epoch": 120.47856430707877, "grad_norm": 929.0553588867188, "learning_rate": 1.7191462224856356e-07, "loss": 27.0083, "step": 15180 }, { "epoch": 120.55832502492522, "grad_norm": 284.4768981933594, "learning_rate": 1.7161802137655722e-07, "loss": 24.9673, "step": 15190 }, { "epoch": 120.63808574277168, "grad_norm": 919.1542358398438, "learning_rate": 1.713215428083457e-07, "loss": 23.3769, "step": 15200 }, { "epoch": 120.63808574277168, "eval_loss": 3.0014636516571045, "eval_mae": 1.2961853742599487, "eval_mse": 3.0014636516571045, "eval_r2": 0.09567362070083618, "eval_rmse": 1.7324732758854045, "eval_runtime": 9.0886, "eval_samples_per_second": 441.324, "eval_steps_per_second": 13.864, "step": 15200 }, { "epoch": 120.71784646061815, "grad_norm": 329.38873291015625, "learning_rate": 1.710251870065411e-07, "loss": 24.515, "step": 15210 }, { "epoch": 120.7976071784646, "grad_norm": 557.4713134765625, "learning_rate": 1.70728954433564e-07, "loss": 23.7734, "step": 15220 }, { "epoch": 120.87736789631107, "grad_norm": 415.6596374511719, "learning_rate": 1.7043284555164285e-07, "loss": 23.814, "step": 15230 }, { "epoch": 120.95712861415753, "grad_norm": 133.07821655273438, "learning_rate": 1.701368608228127e-07, "loss": 23.5502, "step": 15240 }, { "epoch": 121.03190428713859, "grad_norm": 182.43716430664062, "learning_rate": 1.6984100070891514e-07, "loss": 21.9608, "step": 15250 }, { "epoch": 121.11166500498504, "grad_norm": 180.09335327148438, "learning_rate": 1.695452656715974e-07, "loss": 24.405, "step": 15260 }, { "epoch": 121.19142572283151, "grad_norm": 366.22259521484375, "learning_rate": 1.6924965617231142e-07, "loss": 22.4355, "step": 15270 }, { "epoch": 121.27118644067797, "grad_norm": 109.32711791992188, "learning_rate": 1.6895417267231327e-07, "loss": 25.3911, "step": 15280 }, { "epoch": 121.35094715852442, "grad_norm": 182.69952392578125, "learning_rate": 1.6865881563266235e-07, "loss": 23.423, "step": 15290 }, { "epoch": 121.43070787637089, "grad_norm": 882.4513549804688, "learning_rate": 1.6836358551422085e-07, "loss": 23.9485, "step": 15300 }, { "epoch": 121.43070787637089, "eval_loss": 2.995577335357666, "eval_mae": 1.3027689456939697, "eval_mse": 2.995577335357666, "eval_r2": 0.0974472165107727, "eval_rmse": 1.7307736233712558, "eval_runtime": 9.0979, "eval_samples_per_second": 440.873, "eval_steps_per_second": 13.849, "step": 15300 }, { "epoch": 121.51046859421734, "grad_norm": 252.78250122070312, "learning_rate": 1.6806848277765284e-07, "loss": 22.1597, "step": 15310 }, { "epoch": 121.59022931206381, "grad_norm": 1262.2432861328125, "learning_rate": 1.6777350788342377e-07, "loss": 23.4783, "step": 15320 }, { "epoch": 121.66999002991027, "grad_norm": 442.42547607421875, "learning_rate": 1.6747866129179938e-07, "loss": 26.3166, "step": 15330 }, { "epoch": 121.74975074775674, "grad_norm": 246.87246704101562, "learning_rate": 1.6718394346284538e-07, "loss": 25.5064, "step": 15340 }, { "epoch": 121.82951146560319, "grad_norm": 1119.2406005859375, "learning_rate": 1.6688935485642646e-07, "loss": 25.2066, "step": 15350 }, { "epoch": 121.90927218344964, "grad_norm": 326.2505798339844, "learning_rate": 1.665948959322059e-07, "loss": 23.6661, "step": 15360 }, { "epoch": 121.98903290129611, "grad_norm": 148.08853149414062, "learning_rate": 1.663005671496443e-07, "loss": 23.4266, "step": 15370 }, { "epoch": 122.06380857427718, "grad_norm": 1457.82958984375, "learning_rate": 1.6600636896799936e-07, "loss": 22.0045, "step": 15380 }, { "epoch": 122.14356929212363, "grad_norm": 116.29139709472656, "learning_rate": 1.6571230184632507e-07, "loss": 24.5288, "step": 15390 }, { "epoch": 122.22333000997008, "grad_norm": 959.0142822265625, "learning_rate": 1.6541836624347095e-07, "loss": 24.791, "step": 15400 }, { "epoch": 122.22333000997008, "eval_loss": 2.9969351291656494, "eval_mae": 1.2997649908065796, "eval_mse": 2.9969353675842285, "eval_r2": 0.09703803062438965, "eval_rmse": 1.7311658983425675, "eval_runtime": 9.0986, "eval_samples_per_second": 440.835, "eval_steps_per_second": 13.848, "step": 15400 }, { "epoch": 122.30309072781655, "grad_norm": 147.13011169433594, "learning_rate": 1.6512456261808089e-07, "loss": 23.8236, "step": 15410 }, { "epoch": 122.38285144566301, "grad_norm": 85.25444793701172, "learning_rate": 1.6483089142859335e-07, "loss": 25.0383, "step": 15420 }, { "epoch": 122.46261216350948, "grad_norm": 730.5651245117188, "learning_rate": 1.6453735313324e-07, "loss": 23.6549, "step": 15430 }, { "epoch": 122.54237288135593, "grad_norm": 222.33164978027344, "learning_rate": 1.6424394819004504e-07, "loss": 23.545, "step": 15440 }, { "epoch": 122.6221335992024, "grad_norm": 675.6375732421875, "learning_rate": 1.6395067705682458e-07, "loss": 23.9993, "step": 15450 }, { "epoch": 122.70189431704885, "grad_norm": 397.6210632324219, "learning_rate": 1.636575401911861e-07, "loss": 24.0422, "step": 15460 }, { "epoch": 122.78165503489531, "grad_norm": 399.5688171386719, "learning_rate": 1.6336453805052742e-07, "loss": 22.0093, "step": 15470 }, { "epoch": 122.86141575274178, "grad_norm": 166.28970336914062, "learning_rate": 1.630716710920363e-07, "loss": 24.2535, "step": 15480 }, { "epoch": 122.94117647058823, "grad_norm": 581.7630004882812, "learning_rate": 1.6277893977268934e-07, "loss": 25.0971, "step": 15490 }, { "epoch": 123.0159521435693, "grad_norm": 343.17578125, "learning_rate": 1.6248634454925168e-07, "loss": 22.5816, "step": 15500 }, { "epoch": 123.0159521435693, "eval_loss": 2.9995081424713135, "eval_mae": 1.3060561418533325, "eval_mse": 2.9995081424713135, "eval_r2": 0.0962628722190857, "eval_rmse": 1.7319088147103223, "eval_runtime": 9.1165, "eval_samples_per_second": 439.969, "eval_steps_per_second": 13.821, "step": 15500 }, { "epoch": 123.09571286141575, "grad_norm": 189.19554138183594, "learning_rate": 1.6219388587827603e-07, "loss": 22.5818, "step": 15510 }, { "epoch": 123.17547357926222, "grad_norm": 288.20843505859375, "learning_rate": 1.619015642161022e-07, "loss": 24.2205, "step": 15520 }, { "epoch": 123.25523429710867, "grad_norm": 649.839111328125, "learning_rate": 1.6160938001885583e-07, "loss": 22.5111, "step": 15530 }, { "epoch": 123.33499501495514, "grad_norm": 294.1289978027344, "learning_rate": 1.6131733374244844e-07, "loss": 26.4326, "step": 15540 }, { "epoch": 123.4147557328016, "grad_norm": 309.74951171875, "learning_rate": 1.6102542584257608e-07, "loss": 25.1714, "step": 15550 }, { "epoch": 123.49451645064805, "grad_norm": 566.5947265625, "learning_rate": 1.607336567747193e-07, "loss": 25.061, "step": 15560 }, { "epoch": 123.57427716849452, "grad_norm": 135.1038360595703, "learning_rate": 1.6044202699414138e-07, "loss": 22.7417, "step": 15570 }, { "epoch": 123.65403788634097, "grad_norm": 704.9916381835938, "learning_rate": 1.6015053695588875e-07, "loss": 24.8618, "step": 15580 }, { "epoch": 123.73379860418744, "grad_norm": 155.9027557373047, "learning_rate": 1.5985918711478974e-07, "loss": 23.0754, "step": 15590 }, { "epoch": 123.8135593220339, "grad_norm": 139.53753662109375, "learning_rate": 1.5956797792545384e-07, "loss": 25.0568, "step": 15600 }, { "epoch": 123.8135593220339, "eval_loss": 2.999448299407959, "eval_mae": 1.2963074445724487, "eval_mse": 2.999448299407959, "eval_r2": 0.09628087282180786, "eval_rmse": 1.731891538003451, "eval_runtime": 9.1006, "eval_samples_per_second": 440.741, "eval_steps_per_second": 13.845, "step": 15600 }, { "epoch": 123.89332003988036, "grad_norm": 343.701416015625, "learning_rate": 1.5927690984227095e-07, "loss": 23.1298, "step": 15610 }, { "epoch": 123.97308075772682, "grad_norm": 455.2835388183594, "learning_rate": 1.589859833194109e-07, "loss": 22.7114, "step": 15620 }, { "epoch": 124.04785643070788, "grad_norm": 767.1268920898438, "learning_rate": 1.5869519881082277e-07, "loss": 22.7328, "step": 15630 }, { "epoch": 124.12761714855434, "grad_norm": 378.0350036621094, "learning_rate": 1.5840455677023392e-07, "loss": 24.4979, "step": 15640 }, { "epoch": 124.2073778664008, "grad_norm": 588.4114990234375, "learning_rate": 1.5811405765114933e-07, "loss": 22.3417, "step": 15650 }, { "epoch": 124.28713858424726, "grad_norm": 118.65570831298828, "learning_rate": 1.5782370190685113e-07, "loss": 23.0127, "step": 15660 }, { "epoch": 124.36689930209371, "grad_norm": 1166.2757568359375, "learning_rate": 1.575334899903976e-07, "loss": 23.5448, "step": 15670 }, { "epoch": 124.44666001994018, "grad_norm": 186.2198028564453, "learning_rate": 1.5724342235462273e-07, "loss": 22.2049, "step": 15680 }, { "epoch": 124.52642073778664, "grad_norm": 953.7238159179688, "learning_rate": 1.5695349945213525e-07, "loss": 24.5845, "step": 15690 }, { "epoch": 124.6061814556331, "grad_norm": 590.2252807617188, "learning_rate": 1.5666372173531817e-07, "loss": 25.8507, "step": 15700 }, { "epoch": 124.6061814556331, "eval_loss": 2.9973418712615967, "eval_mae": 1.2974629402160645, "eval_mse": 2.9973418712615967, "eval_r2": 0.096915602684021, "eval_rmse": 1.7312833018491216, "eval_runtime": 9.1042, "eval_samples_per_second": 440.567, "eval_steps_per_second": 13.84, "step": 15700 }, { "epoch": 124.68594217347956, "grad_norm": 581.5036010742188, "learning_rate": 1.5637408965632782e-07, "loss": 24.9864, "step": 15710 }, { "epoch": 124.76570289132601, "grad_norm": 982.7798461914062, "learning_rate": 1.5608460366709358e-07, "loss": 23.1186, "step": 15720 }, { "epoch": 124.84546360917248, "grad_norm": 523.9783325195312, "learning_rate": 1.557952642193164e-07, "loss": 25.3386, "step": 15730 }, { "epoch": 124.92522432701894, "grad_norm": 323.17022705078125, "learning_rate": 1.555060717644689e-07, "loss": 24.6167, "step": 15740 }, { "epoch": 125.0, "grad_norm": 173.55587768554688, "learning_rate": 1.552170267537944e-07, "loss": 22.7146, "step": 15750 }, { "epoch": 125.07976071784645, "grad_norm": 261.3866882324219, "learning_rate": 1.5492812963830604e-07, "loss": 24.0152, "step": 15760 }, { "epoch": 125.15952143569292, "grad_norm": 348.5075988769531, "learning_rate": 1.5463938086878599e-07, "loss": 22.3543, "step": 15770 }, { "epoch": 125.23928215353938, "grad_norm": 430.7974853515625, "learning_rate": 1.543507808957853e-07, "loss": 24.7551, "step": 15780 }, { "epoch": 125.31904287138585, "grad_norm": 265.10382080078125, "learning_rate": 1.540623301696226e-07, "loss": 23.8209, "step": 15790 }, { "epoch": 125.3988035892323, "grad_norm": 460.537841796875, "learning_rate": 1.5377402914038384e-07, "loss": 22.5712, "step": 15800 }, { "epoch": 125.3988035892323, "eval_loss": 2.99674391746521, "eval_mae": 1.2961981296539307, "eval_mse": 2.99674391746521, "eval_r2": 0.09709572792053223, "eval_rmse": 1.7311106023201435, "eval_runtime": 9.0932, "eval_samples_per_second": 441.097, "eval_steps_per_second": 13.856, "step": 15800 }, { "epoch": 125.47856430707877, "grad_norm": 145.07672119140625, "learning_rate": 1.5348587825792114e-07, "loss": 22.8911, "step": 15810 }, { "epoch": 125.55832502492522, "grad_norm": 355.4983825683594, "learning_rate": 1.5319787797185257e-07, "loss": 23.2123, "step": 15820 }, { "epoch": 125.63808574277168, "grad_norm": 122.14202117919922, "learning_rate": 1.5291002873156105e-07, "loss": 23.9367, "step": 15830 }, { "epoch": 125.71784646061815, "grad_norm": 623.5941162109375, "learning_rate": 1.5262233098619398e-07, "loss": 22.5786, "step": 15840 }, { "epoch": 125.7976071784646, "grad_norm": 1107.0355224609375, "learning_rate": 1.523347851846622e-07, "loss": 27.1192, "step": 15850 }, { "epoch": 125.87736789631107, "grad_norm": 371.8562927246094, "learning_rate": 1.5204739177563953e-07, "loss": 25.3864, "step": 15860 }, { "epoch": 125.95712861415753, "grad_norm": 474.218505859375, "learning_rate": 1.5176015120756203e-07, "loss": 24.8445, "step": 15870 }, { "epoch": 126.03190428713859, "grad_norm": 1185.120849609375, "learning_rate": 1.514730639286273e-07, "loss": 23.3745, "step": 15880 }, { "epoch": 126.11166500498504, "grad_norm": 113.61933135986328, "learning_rate": 1.511861303867938e-07, "loss": 25.0345, "step": 15890 }, { "epoch": 126.19142572283151, "grad_norm": 493.3686828613281, "learning_rate": 1.5089935102977973e-07, "loss": 23.1973, "step": 15900 }, { "epoch": 126.19142572283151, "eval_loss": 2.9987268447875977, "eval_mae": 1.2949258089065552, "eval_mse": 2.9987266063690186, "eval_r2": 0.09649831056594849, "eval_rmse": 1.731683171474799, "eval_runtime": 9.0753, "eval_samples_per_second": 441.967, "eval_steps_per_second": 13.884, "step": 15900 }, { "epoch": 126.27118644067797, "grad_norm": 353.9967041015625, "learning_rate": 1.5061272630506324e-07, "loss": 24.2967, "step": 15910 }, { "epoch": 126.35094715852442, "grad_norm": 487.5097961425781, "learning_rate": 1.503262566598809e-07, "loss": 23.7637, "step": 15920 }, { "epoch": 126.43070787637089, "grad_norm": 207.82666015625, "learning_rate": 1.500399425412274e-07, "loss": 24.2266, "step": 15930 }, { "epoch": 126.51046859421734, "grad_norm": 996.2344360351562, "learning_rate": 1.497537843958546e-07, "loss": 23.555, "step": 15940 }, { "epoch": 126.59022931206381, "grad_norm": 193.35317993164062, "learning_rate": 1.4946778267027112e-07, "loss": 24.4272, "step": 15950 }, { "epoch": 126.66999002991027, "grad_norm": 614.0579833984375, "learning_rate": 1.4918193781074153e-07, "loss": 23.7424, "step": 15960 }, { "epoch": 126.74975074775674, "grad_norm": 408.7231750488281, "learning_rate": 1.4889625026328563e-07, "loss": 24.5934, "step": 15970 }, { "epoch": 126.82951146560319, "grad_norm": 658.59814453125, "learning_rate": 1.4861072047367758e-07, "loss": 22.9295, "step": 15980 }, { "epoch": 126.90927218344964, "grad_norm": 465.7789611816406, "learning_rate": 1.4832534888744557e-07, "loss": 22.9432, "step": 15990 }, { "epoch": 126.98903290129611, "grad_norm": 424.2307434082031, "learning_rate": 1.4804013594987093e-07, "loss": 23.4441, "step": 16000 }, { "epoch": 126.98903290129611, "eval_loss": 2.9949264526367188, "eval_mae": 1.297987461090088, "eval_mse": 2.9949264526367188, "eval_r2": 0.09764325618743896, "eval_rmse": 1.730585580847338, "eval_runtime": 9.0821, "eval_samples_per_second": 441.64, "eval_steps_per_second": 13.873, "step": 16000 }, { "epoch": 127.06380857427718, "grad_norm": 611.9707641601562, "learning_rate": 1.4775508210598743e-07, "loss": 22.922, "step": 16010 }, { "epoch": 127.14356929212363, "grad_norm": 321.8793640136719, "learning_rate": 1.4747018780058042e-07, "loss": 24.6874, "step": 16020 }, { "epoch": 127.22333000997008, "grad_norm": 452.5003662109375, "learning_rate": 1.471854534781865e-07, "loss": 24.2103, "step": 16030 }, { "epoch": 127.30309072781655, "grad_norm": 291.26043701171875, "learning_rate": 1.469008795830926e-07, "loss": 24.3956, "step": 16040 }, { "epoch": 127.38285144566301, "grad_norm": 988.60693359375, "learning_rate": 1.4661646655933552e-07, "loss": 24.6791, "step": 16050 }, { "epoch": 127.46261216350948, "grad_norm": 131.3463134765625, "learning_rate": 1.4633221485070052e-07, "loss": 22.7122, "step": 16060 }, { "epoch": 127.54237288135593, "grad_norm": 216.421875, "learning_rate": 1.4604812490072159e-07, "loss": 24.6799, "step": 16070 }, { "epoch": 127.6221335992024, "grad_norm": 265.0110168457031, "learning_rate": 1.4576419715268026e-07, "loss": 23.6442, "step": 16080 }, { "epoch": 127.70189431704885, "grad_norm": 202.32069396972656, "learning_rate": 1.4548043204960496e-07, "loss": 25.1982, "step": 16090 }, { "epoch": 127.78165503489531, "grad_norm": 834.6726684570312, "learning_rate": 1.4519683003427018e-07, "loss": 23.8063, "step": 16100 }, { "epoch": 127.78165503489531, "eval_loss": 2.9989752769470215, "eval_mae": 1.2963449954986572, "eval_mse": 2.9989752769470215, "eval_r2": 0.0964234471321106, "eval_rmse": 1.7317549702388677, "eval_runtime": 9.1158, "eval_samples_per_second": 440.007, "eval_steps_per_second": 13.822, "step": 16100 }, { "epoch": 127.86141575274178, "grad_norm": 438.2342529296875, "learning_rate": 1.4491339154919595e-07, "loss": 24.8793, "step": 16110 }, { "epoch": 127.94117647058823, "grad_norm": 500.1650695800781, "learning_rate": 1.446301170366474e-07, "loss": 22.3995, "step": 16120 }, { "epoch": 128.01595214356928, "grad_norm": 660.446533203125, "learning_rate": 1.4434700693863377e-07, "loss": 22.4161, "step": 16130 }, { "epoch": 128.09571286141576, "grad_norm": 554.826416015625, "learning_rate": 1.440640616969072e-07, "loss": 24.9374, "step": 16140 }, { "epoch": 128.17547357926222, "grad_norm": 274.38031005859375, "learning_rate": 1.437812817529632e-07, "loss": 24.6947, "step": 16150 }, { "epoch": 128.25523429710867, "grad_norm": 464.1123046875, "learning_rate": 1.4349866754803936e-07, "loss": 24.7641, "step": 16160 }, { "epoch": 128.33499501495513, "grad_norm": 204.35543823242188, "learning_rate": 1.432162195231143e-07, "loss": 24.2712, "step": 16170 }, { "epoch": 128.4147557328016, "grad_norm": 322.74456787109375, "learning_rate": 1.4293393811890737e-07, "loss": 24.4455, "step": 16180 }, { "epoch": 128.49451645064806, "grad_norm": 171.8980255126953, "learning_rate": 1.4265182377587833e-07, "loss": 23.6119, "step": 16190 }, { "epoch": 128.57427716849452, "grad_norm": 227.2103271484375, "learning_rate": 1.4236987693422577e-07, "loss": 22.7084, "step": 16200 }, { "epoch": 128.57427716849452, "eval_loss": 2.9964678287506104, "eval_mae": 1.3037374019622803, "eval_mse": 2.9964678287506104, "eval_r2": 0.09717893600463867, "eval_rmse": 1.731030857249694, "eval_runtime": 9.0864, "eval_samples_per_second": 441.428, "eval_steps_per_second": 13.867, "step": 16200 }, { "epoch": 128.65403788634097, "grad_norm": 286.4161376953125, "learning_rate": 1.4208809803388739e-07, "loss": 23.3087, "step": 16210 }, { "epoch": 128.73379860418743, "grad_norm": 116.77186584472656, "learning_rate": 1.4180648751453843e-07, "loss": 23.4167, "step": 16220 }, { "epoch": 128.8135593220339, "grad_norm": 98.98394775390625, "learning_rate": 1.4152504581559156e-07, "loss": 24.9745, "step": 16230 }, { "epoch": 128.89332003988036, "grad_norm": 324.8134460449219, "learning_rate": 1.4124377337619617e-07, "loss": 23.0645, "step": 16240 }, { "epoch": 128.97308075772682, "grad_norm": 120.70247650146484, "learning_rate": 1.4096267063523735e-07, "loss": 23.2501, "step": 16250 }, { "epoch": 129.04785643070787, "grad_norm": 342.9710998535156, "learning_rate": 1.4068173803133531e-07, "loss": 21.5063, "step": 16260 }, { "epoch": 129.12761714855435, "grad_norm": 173.40664672851562, "learning_rate": 1.4040097600284522e-07, "loss": 23.2956, "step": 16270 }, { "epoch": 129.2073778664008, "grad_norm": 1194.4302978515625, "learning_rate": 1.4012038498785557e-07, "loss": 24.5195, "step": 16280 }, { "epoch": 129.28713858424726, "grad_norm": 308.144775390625, "learning_rate": 1.398399654241884e-07, "loss": 22.852, "step": 16290 }, { "epoch": 129.3668993020937, "grad_norm": 166.0808868408203, "learning_rate": 1.3955971774939807e-07, "loss": 24.0121, "step": 16300 }, { "epoch": 129.3668993020937, "eval_loss": 2.9947915077209473, "eval_mae": 1.2989612817764282, "eval_mse": 2.9947915077209473, "eval_r2": 0.09768396615982056, "eval_rmse": 1.7305465921843732, "eval_runtime": 9.0693, "eval_samples_per_second": 442.262, "eval_steps_per_second": 13.893, "step": 16300 }, { "epoch": 129.44666001994017, "grad_norm": 304.95184326171875, "learning_rate": 1.3927964240077055e-07, "loss": 26.7386, "step": 16310 }, { "epoch": 129.52642073778665, "grad_norm": 172.49710083007812, "learning_rate": 1.3899973981532326e-07, "loss": 22.5228, "step": 16320 }, { "epoch": 129.6061814556331, "grad_norm": 427.22314453125, "learning_rate": 1.3872001042980408e-07, "loss": 22.4321, "step": 16330 }, { "epoch": 129.68594217347956, "grad_norm": 335.5365295410156, "learning_rate": 1.3844045468069004e-07, "loss": 23.5267, "step": 16340 }, { "epoch": 129.76570289132601, "grad_norm": 206.60768127441406, "learning_rate": 1.3816107300418783e-07, "loss": 25.8498, "step": 16350 }, { "epoch": 129.84546360917247, "grad_norm": 352.3175354003906, "learning_rate": 1.378818658362325e-07, "loss": 26.3323, "step": 16360 }, { "epoch": 129.92522432701895, "grad_norm": 115.40145111083984, "learning_rate": 1.3760283361248647e-07, "loss": 24.2572, "step": 16370 }, { "epoch": 130.0, "grad_norm": 190.1355438232422, "learning_rate": 1.3732397676833934e-07, "loss": 20.9339, "step": 16380 }, { "epoch": 130.07976071784645, "grad_norm": 381.51068115234375, "learning_rate": 1.370452957389072e-07, "loss": 24.8343, "step": 16390 }, { "epoch": 130.1595214356929, "grad_norm": 565.8461303710938, "learning_rate": 1.367667909590315e-07, "loss": 27.1709, "step": 16400 }, { "epoch": 130.1595214356929, "eval_loss": 2.9923336505889893, "eval_mae": 1.3002452850341797, "eval_mse": 2.9923336505889893, "eval_r2": 0.09842449426651001, "eval_rmse": 1.7298363074548382, "eval_runtime": 9.0968, "eval_samples_per_second": 440.924, "eval_steps_per_second": 13.851, "step": 16400 }, { "epoch": 130.2392821535394, "grad_norm": 690.071533203125, "learning_rate": 1.3648846286327912e-07, "loss": 25.4686, "step": 16410 }, { "epoch": 130.31904287138585, "grad_norm": 202.82559204101562, "learning_rate": 1.362103118859408e-07, "loss": 22.9148, "step": 16420 }, { "epoch": 130.3988035892323, "grad_norm": 350.946533203125, "learning_rate": 1.3593233846103102e-07, "loss": 22.784, "step": 16430 }, { "epoch": 130.47856430707876, "grad_norm": 132.73452758789062, "learning_rate": 1.3565454302228757e-07, "loss": 26.8174, "step": 16440 }, { "epoch": 130.5583250249252, "grad_norm": 538.4484252929688, "learning_rate": 1.3537692600316997e-07, "loss": 22.1968, "step": 16450 }, { "epoch": 130.6380857427717, "grad_norm": 918.4005737304688, "learning_rate": 1.3509948783685985e-07, "loss": 22.7618, "step": 16460 }, { "epoch": 130.71784646061815, "grad_norm": 326.4291687011719, "learning_rate": 1.348222289562593e-07, "loss": 22.8103, "step": 16470 }, { "epoch": 130.7976071784646, "grad_norm": 255.4975128173828, "learning_rate": 1.345451497939911e-07, "loss": 25.1671, "step": 16480 }, { "epoch": 130.87736789631106, "grad_norm": 483.3714294433594, "learning_rate": 1.3426825078239727e-07, "loss": 22.1233, "step": 16490 }, { "epoch": 130.95712861415754, "grad_norm": 643.0153198242188, "learning_rate": 1.3399153235353895e-07, "loss": 23.1952, "step": 16500 }, { "epoch": 130.95712861415754, "eval_loss": 3.0047690868377686, "eval_mae": 1.2939373254776, "eval_mse": 3.0047690868377686, "eval_r2": 0.09467774629592896, "eval_rmse": 1.7334269776479678, "eval_runtime": 9.0875, "eval_samples_per_second": 441.374, "eval_steps_per_second": 13.865, "step": 16500 }, { "epoch": 131.0319042871386, "grad_norm": 323.6212158203125, "learning_rate": 1.3371499493919535e-07, "loss": 22.6509, "step": 16510 }, { "epoch": 131.11166500498504, "grad_norm": 479.4410705566406, "learning_rate": 1.3343863897086328e-07, "loss": 23.9574, "step": 16520 }, { "epoch": 131.1914257228315, "grad_norm": 190.98963928222656, "learning_rate": 1.3316246487975653e-07, "loss": 23.5103, "step": 16530 }, { "epoch": 131.27118644067798, "grad_norm": 318.1726989746094, "learning_rate": 1.3288647309680497e-07, "loss": 23.319, "step": 16540 }, { "epoch": 131.35094715852443, "grad_norm": 867.248046875, "learning_rate": 1.3261066405265397e-07, "loss": 24.4538, "step": 16550 }, { "epoch": 131.4307078763709, "grad_norm": 776.1854858398438, "learning_rate": 1.32335038177664e-07, "loss": 22.9098, "step": 16560 }, { "epoch": 131.51046859421734, "grad_norm": 262.22979736328125, "learning_rate": 1.3205959590190937e-07, "loss": 22.832, "step": 16570 }, { "epoch": 131.5902293120638, "grad_norm": 1199.579833984375, "learning_rate": 1.3178433765517833e-07, "loss": 25.2951, "step": 16580 }, { "epoch": 131.66999002991028, "grad_norm": 213.65872192382812, "learning_rate": 1.3150926386697164e-07, "loss": 22.2523, "step": 16590 }, { "epoch": 131.74975074775674, "grad_norm": 587.2889404296875, "learning_rate": 1.3123437496650224e-07, "loss": 24.5088, "step": 16600 }, { "epoch": 131.74975074775674, "eval_loss": 2.9919657707214355, "eval_mae": 1.3000578880310059, "eval_mse": 2.9919657707214355, "eval_r2": 0.09853535890579224, "eval_rmse": 1.729729970464013, "eval_runtime": 9.0844, "eval_samples_per_second": 441.528, "eval_steps_per_second": 13.87, "step": 16600 }, { "epoch": 131.8295114656032, "grad_norm": 673.372314453125, "learning_rate": 1.3095967138269478e-07, "loss": 24.6808, "step": 16610 }, { "epoch": 131.90927218344964, "grad_norm": 602.1070556640625, "learning_rate": 1.306851535441849e-07, "loss": 26.5808, "step": 16620 }, { "epoch": 131.9890329012961, "grad_norm": 168.7585906982422, "learning_rate": 1.3041082187931774e-07, "loss": 22.9096, "step": 16630 }, { "epoch": 132.06380857427718, "grad_norm": 326.1978759765625, "learning_rate": 1.3013667681614855e-07, "loss": 22.6725, "step": 16640 }, { "epoch": 132.14356929212363, "grad_norm": 503.9088134765625, "learning_rate": 1.298627187824414e-07, "loss": 23.5713, "step": 16650 }, { "epoch": 132.22333000997008, "grad_norm": 710.6509399414062, "learning_rate": 1.2958894820566824e-07, "loss": 22.3871, "step": 16660 }, { "epoch": 132.30309072781654, "grad_norm": 299.3223571777344, "learning_rate": 1.293153655130085e-07, "loss": 27.3263, "step": 16670 }, { "epoch": 132.38285144566302, "grad_norm": 118.80657958984375, "learning_rate": 1.2904197113134884e-07, "loss": 24.3728, "step": 16680 }, { "epoch": 132.46261216350948, "grad_norm": 922.7802124023438, "learning_rate": 1.2876876548728167e-07, "loss": 23.758, "step": 16690 }, { "epoch": 132.54237288135593, "grad_norm": 142.9518585205078, "learning_rate": 1.2849574900710518e-07, "loss": 23.1453, "step": 16700 }, { "epoch": 132.54237288135593, "eval_loss": 2.992398738861084, "eval_mae": 1.295898199081421, "eval_mse": 2.992398738861084, "eval_r2": 0.0984048843383789, "eval_rmse": 1.7298551207719923, "eval_runtime": 9.1062, "eval_samples_per_second": 440.471, "eval_steps_per_second": 13.837, "step": 16700 }, { "epoch": 132.62213359920239, "grad_norm": 365.12811279296875, "learning_rate": 1.2822292211682229e-07, "loss": 21.4491, "step": 16710 }, { "epoch": 132.70189431704884, "grad_norm": 153.51211547851562, "learning_rate": 1.279502852421399e-07, "loss": 23.5518, "step": 16720 }, { "epoch": 132.78165503489532, "grad_norm": 169.6097412109375, "learning_rate": 1.276778388084689e-07, "loss": 23.0115, "step": 16730 }, { "epoch": 132.86141575274178, "grad_norm": 417.13189697265625, "learning_rate": 1.2740558324092253e-07, "loss": 25.8207, "step": 16740 }, { "epoch": 132.94117647058823, "grad_norm": 302.108154296875, "learning_rate": 1.2713351896431634e-07, "loss": 23.1809, "step": 16750 }, { "epoch": 133.01595214356928, "grad_norm": 101.93446350097656, "learning_rate": 1.2686164640316773e-07, "loss": 23.1577, "step": 16760 }, { "epoch": 133.09571286141576, "grad_norm": 449.4652099609375, "learning_rate": 1.2658996598169446e-07, "loss": 23.7753, "step": 16770 }, { "epoch": 133.17547357926222, "grad_norm": 599.4085693359375, "learning_rate": 1.263184781238149e-07, "loss": 23.5495, "step": 16780 }, { "epoch": 133.25523429710867, "grad_norm": 615.1130981445312, "learning_rate": 1.260471832531467e-07, "loss": 27.3507, "step": 16790 }, { "epoch": 133.33499501495513, "grad_norm": 333.2358093261719, "learning_rate": 1.2577608179300636e-07, "loss": 24.9712, "step": 16800 }, { "epoch": 133.33499501495513, "eval_loss": 2.9936790466308594, "eval_mae": 1.3027596473693848, "eval_mse": 2.9936790466308594, "eval_r2": 0.09801912307739258, "eval_rmse": 1.7302251433356468, "eval_runtime": 9.0992, "eval_samples_per_second": 440.807, "eval_steps_per_second": 13.847, "step": 16800 }, { "epoch": 133.4147557328016, "grad_norm": 647.0711059570312, "learning_rate": 1.2550517416640874e-07, "loss": 22.4761, "step": 16810 }, { "epoch": 133.49451645064806, "grad_norm": 369.3785400390625, "learning_rate": 1.2523446079606641e-07, "loss": 22.5223, "step": 16820 }, { "epoch": 133.57427716849452, "grad_norm": 680.6244506835938, "learning_rate": 1.2496394210438827e-07, "loss": 22.5881, "step": 16830 }, { "epoch": 133.65403788634097, "grad_norm": 258.2887878417969, "learning_rate": 1.2469361851347996e-07, "loss": 23.0046, "step": 16840 }, { "epoch": 133.73379860418743, "grad_norm": Infinity, "learning_rate": 1.244234904451426e-07, "loss": 24.1551, "step": 16850 }, { "epoch": 133.8135593220339, "grad_norm": 177.69448852539062, "learning_rate": 1.241805427038098e-07, "loss": 24.7674, "step": 16860 }, { "epoch": 133.89332003988036, "grad_norm": 532.2387084960938, "learning_rate": 1.2391078728932575e-07, "loss": 25.262, "step": 16870 }, { "epoch": 133.97308075772682, "grad_norm": 595.5781860351562, "learning_rate": 1.236412286189081e-07, "loss": 23.8183, "step": 16880 }, { "epoch": 134.04785643070787, "grad_norm": 557.8573608398438, "learning_rate": 1.2337186711316429e-07, "loss": 24.132, "step": 16890 }, { "epoch": 134.12761714855435, "grad_norm": 487.1327209472656, "learning_rate": 1.2310270319239403e-07, "loss": 23.2251, "step": 16900 }, { "epoch": 134.12761714855435, "eval_loss": 2.9906692504882812, "eval_mae": 1.2998462915420532, "eval_mse": 2.9906692504882812, "eval_r2": 0.09892600774765015, "eval_rmse": 1.729355154526762, "eval_runtime": 9.0997, "eval_samples_per_second": 440.782, "eval_steps_per_second": 13.847, "step": 16900 }, { "epoch": 134.2073778664008, "grad_norm": 404.20843505859375, "learning_rate": 1.228337372765891e-07, "loss": 23.15, "step": 16910 }, { "epoch": 134.28713858424726, "grad_norm": 351.0567321777344, "learning_rate": 1.225649697854319e-07, "loss": 24.2283, "step": 16920 }, { "epoch": 134.3668993020937, "grad_norm": 334.9957580566406, "learning_rate": 1.2229640113829562e-07, "loss": 23.772, "step": 16930 }, { "epoch": 134.44666001994017, "grad_norm": 325.6705627441406, "learning_rate": 1.2202803175424286e-07, "loss": 22.0053, "step": 16940 }, { "epoch": 134.52642073778665, "grad_norm": 649.3629150390625, "learning_rate": 1.2175986205202532e-07, "loss": 25.0228, "step": 16950 }, { "epoch": 134.6061814556331, "grad_norm": 406.55242919921875, "learning_rate": 1.214918924500833e-07, "loss": 22.5482, "step": 16960 }, { "epoch": 134.68594217347956, "grad_norm": 550.158935546875, "learning_rate": 1.2122412336654503e-07, "loss": 22.7849, "step": 16970 }, { "epoch": 134.76570289132601, "grad_norm": 379.20513916015625, "learning_rate": 1.209565552192252e-07, "loss": 25.6224, "step": 16980 }, { "epoch": 134.84546360917247, "grad_norm": 259.1018981933594, "learning_rate": 1.2068918842562553e-07, "loss": 24.6256, "step": 16990 }, { "epoch": 134.92522432701895, "grad_norm": 107.85888671875, "learning_rate": 1.204220234029336e-07, "loss": 25.328, "step": 17000 }, { "epoch": 134.92522432701895, "eval_loss": 2.992455244064331, "eval_mae": 1.2952169179916382, "eval_mse": 2.992455244064331, "eval_r2": 0.09838783740997314, "eval_rmse": 1.7298714530462462, "eval_runtime": 9.09, "eval_samples_per_second": 441.255, "eval_steps_per_second": 13.861, "step": 17000 }, { "epoch": 135.0, "grad_norm": 210.70713806152344, "learning_rate": 1.2015506056802182e-07, "loss": 21.6844, "step": 17010 }, { "epoch": 135.07976071784645, "grad_norm": 236.12600708007812, "learning_rate": 1.198883003374472e-07, "loss": 22.9484, "step": 17020 }, { "epoch": 135.1595214356929, "grad_norm": 418.2464599609375, "learning_rate": 1.1962174312745088e-07, "loss": 26.0478, "step": 17030 }, { "epoch": 135.2392821535394, "grad_norm": 370.0785827636719, "learning_rate": 1.1935538935395679e-07, "loss": 22.035, "step": 17040 }, { "epoch": 135.31904287138585, "grad_norm": 756.2055053710938, "learning_rate": 1.1908923943257185e-07, "loss": 24.213, "step": 17050 }, { "epoch": 135.3988035892323, "grad_norm": 1857.18408203125, "learning_rate": 1.1882329377858461e-07, "loss": 23.3173, "step": 17060 }, { "epoch": 135.47856430707876, "grad_norm": 541.4359741210938, "learning_rate": 1.1855755280696489e-07, "loss": 24.9646, "step": 17070 }, { "epoch": 135.5583250249252, "grad_norm": 381.83001708984375, "learning_rate": 1.1829201693236335e-07, "loss": 23.9757, "step": 17080 }, { "epoch": 135.6380857427717, "grad_norm": 181.71661376953125, "learning_rate": 1.1802668656911064e-07, "loss": 24.3972, "step": 17090 }, { "epoch": 135.71784646061815, "grad_norm": 664.7344970703125, "learning_rate": 1.1776156213121621e-07, "loss": 23.7074, "step": 17100 }, { "epoch": 135.71784646061815, "eval_loss": 2.9938080310821533, "eval_mae": 1.2958862781524658, "eval_mse": 2.9938080310821533, "eval_r2": 0.09798026084899902, "eval_rmse": 1.730262416826463, "eval_runtime": 9.0841, "eval_samples_per_second": 441.541, "eval_steps_per_second": 13.87, "step": 17100 }, { "epoch": 135.7976071784646, "grad_norm": 146.8472442626953, "learning_rate": 1.1749664403236884e-07, "loss": 23.579, "step": 17110 }, { "epoch": 135.87736789631106, "grad_norm": 507.60577392578125, "learning_rate": 1.1723193268593512e-07, "loss": 24.4998, "step": 17120 }, { "epoch": 135.95712861415754, "grad_norm": 139.34336853027344, "learning_rate": 1.1696742850495891e-07, "loss": 23.7718, "step": 17130 }, { "epoch": 136.0319042871386, "grad_norm": 365.88885498046875, "learning_rate": 1.1670313190216089e-07, "loss": 21.9757, "step": 17140 }, { "epoch": 136.11166500498504, "grad_norm": 432.8541259765625, "learning_rate": 1.1643904328993795e-07, "loss": 24.0741, "step": 17150 }, { "epoch": 136.1914257228315, "grad_norm": 517.509765625, "learning_rate": 1.1617516308036224e-07, "loss": 25.0548, "step": 17160 }, { "epoch": 136.27118644067798, "grad_norm": 965.6638793945312, "learning_rate": 1.1591149168518097e-07, "loss": 21.6349, "step": 17170 }, { "epoch": 136.35094715852443, "grad_norm": 460.10772705078125, "learning_rate": 1.1564802951581537e-07, "loss": 25.4607, "step": 17180 }, { "epoch": 136.4307078763709, "grad_norm": 679.1116943359375, "learning_rate": 1.153847769833601e-07, "loss": 23.2777, "step": 17190 }, { "epoch": 136.51046859421734, "grad_norm": 374.9620666503906, "learning_rate": 1.1512173449858304e-07, "loss": 24.2546, "step": 17200 }, { "epoch": 136.51046859421734, "eval_loss": 2.9912867546081543, "eval_mae": 1.3019976615905762, "eval_mse": 2.9912867546081543, "eval_r2": 0.09873998165130615, "eval_rmse": 1.7295336812586664, "eval_runtime": 9.0801, "eval_samples_per_second": 441.735, "eval_steps_per_second": 13.877, "step": 17200 }, { "epoch": 136.5902293120638, "grad_norm": 767.04736328125, "learning_rate": 1.1485890247192401e-07, "loss": 22.6815, "step": 17210 }, { "epoch": 136.66999002991028, "grad_norm": 258.2825012207031, "learning_rate": 1.1459628131349447e-07, "loss": 23.3092, "step": 17220 }, { "epoch": 136.74975074775674, "grad_norm": 148.56033325195312, "learning_rate": 1.1433387143307712e-07, "loss": 25.3104, "step": 17230 }, { "epoch": 136.8295114656032, "grad_norm": 154.46519470214844, "learning_rate": 1.1407167324012465e-07, "loss": 25.4671, "step": 17240 }, { "epoch": 136.90927218344964, "grad_norm": 138.0871124267578, "learning_rate": 1.1380968714375974e-07, "loss": 23.5792, "step": 17250 }, { "epoch": 136.9890329012961, "grad_norm": 355.7171630859375, "learning_rate": 1.1354791355277393e-07, "loss": 23.2389, "step": 17260 }, { "epoch": 137.06380857427718, "grad_norm": 228.95948791503906, "learning_rate": 1.132863528756271e-07, "loss": 22.5726, "step": 17270 }, { "epoch": 137.14356929212363, "grad_norm": 168.3987274169922, "learning_rate": 1.1302500552044714e-07, "loss": 22.259, "step": 17280 }, { "epoch": 137.22333000997008, "grad_norm": 229.8258819580078, "learning_rate": 1.1276387189502917e-07, "loss": 22.3476, "step": 17290 }, { "epoch": 137.30309072781654, "grad_norm": 993.53466796875, "learning_rate": 1.1250295240683422e-07, "loss": 24.0292, "step": 17300 }, { "epoch": 137.30309072781654, "eval_loss": 2.989678382873535, "eval_mae": 1.2974820137023926, "eval_mse": 2.989678382873535, "eval_r2": 0.09922456741333008, "eval_rmse": 1.7290686460848035, "eval_runtime": 9.0973, "eval_samples_per_second": 440.901, "eval_steps_per_second": 13.85, "step": 17300 }, { "epoch": 137.38285144566302, "grad_norm": 200.87583923339844, "learning_rate": 1.1224224746298983e-07, "loss": 25.8856, "step": 17310 }, { "epoch": 137.46261216350948, "grad_norm": 846.8623657226562, "learning_rate": 1.119817574702886e-07, "loss": 24.2075, "step": 17320 }, { "epoch": 137.54237288135593, "grad_norm": 763.580810546875, "learning_rate": 1.1172148283518748e-07, "loss": 22.0138, "step": 17330 }, { "epoch": 137.62213359920239, "grad_norm": 218.8740234375, "learning_rate": 1.1146142396380775e-07, "loss": 23.3127, "step": 17340 }, { "epoch": 137.70189431704884, "grad_norm": 393.57061767578125, "learning_rate": 1.112015812619338e-07, "loss": 24.1285, "step": 17350 }, { "epoch": 137.78165503489532, "grad_norm": 405.6858825683594, "learning_rate": 1.1094195513501262e-07, "loss": 24.1266, "step": 17360 }, { "epoch": 137.86141575274178, "grad_norm": 251.8614044189453, "learning_rate": 1.106825459881536e-07, "loss": 25.0081, "step": 17370 }, { "epoch": 137.94117647058823, "grad_norm": 253.51295471191406, "learning_rate": 1.1042335422612729e-07, "loss": 24.7537, "step": 17380 }, { "epoch": 138.01595214356928, "grad_norm": 524.7293701171875, "learning_rate": 1.1016438025336502e-07, "loss": 23.3051, "step": 17390 }, { "epoch": 138.09571286141576, "grad_norm": 1051.2930908203125, "learning_rate": 1.0990562447395863e-07, "loss": 23.7153, "step": 17400 }, { "epoch": 138.09571286141576, "eval_loss": 2.991021156311035, "eval_mae": 1.2980806827545166, "eval_mse": 2.991020917892456, "eval_r2": 0.09881997108459473, "eval_rmse": 1.7294568274150286, "eval_runtime": 9.1055, "eval_samples_per_second": 440.505, "eval_steps_per_second": 13.838, "step": 17400 }, { "epoch": 138.17547357926222, "grad_norm": 1116.9071044921875, "learning_rate": 1.09647087291659e-07, "loss": 22.9059, "step": 17410 }, { "epoch": 138.25523429710867, "grad_norm": 268.7237548828125, "learning_rate": 1.0938876910987646e-07, "loss": 23.7649, "step": 17420 }, { "epoch": 138.33499501495513, "grad_norm": 501.5276184082031, "learning_rate": 1.091306703316792e-07, "loss": 25.158, "step": 17430 }, { "epoch": 138.4147557328016, "grad_norm": 416.10491943359375, "learning_rate": 1.0887279135979311e-07, "loss": 25.7373, "step": 17440 }, { "epoch": 138.49451645064806, "grad_norm": 473.6330871582031, "learning_rate": 1.0861513259660132e-07, "loss": 24.9958, "step": 17450 }, { "epoch": 138.57427716849452, "grad_norm": 683.8408203125, "learning_rate": 1.0835769444414344e-07, "loss": 23.4249, "step": 17460 }, { "epoch": 138.65403788634097, "grad_norm": 564.5418090820312, "learning_rate": 1.0810047730411425e-07, "loss": 22.257, "step": 17470 }, { "epoch": 138.73379860418743, "grad_norm": 1422.2911376953125, "learning_rate": 1.0784348157786429e-07, "loss": 23.0322, "step": 17480 }, { "epoch": 138.8135593220339, "grad_norm": 518.5892333984375, "learning_rate": 1.0758670766639846e-07, "loss": 24.717, "step": 17490 }, { "epoch": 138.89332003988036, "grad_norm": 437.5633544921875, "learning_rate": 1.073301559703754e-07, "loss": 23.9585, "step": 17500 }, { "epoch": 138.89332003988036, "eval_loss": 2.9898457527160645, "eval_mae": 1.298498511314392, "eval_mse": 2.9898459911346436, "eval_r2": 0.09917402267456055, "eval_rmse": 1.7291171131923493, "eval_runtime": 9.1, "eval_samples_per_second": 440.769, "eval_steps_per_second": 13.846, "step": 17500 }, { "epoch": 138.97308075772682, "grad_norm": 623.111572265625, "learning_rate": 1.0707382689010702e-07, "loss": 22.954, "step": 17510 }, { "epoch": 139.04785643070787, "grad_norm": 375.7094421386719, "learning_rate": 1.0681772082555812e-07, "loss": 22.7863, "step": 17520 }, { "epoch": 139.12761714855435, "grad_norm": 550.0701293945312, "learning_rate": 1.065618381763452e-07, "loss": 24.1999, "step": 17530 }, { "epoch": 139.2073778664008, "grad_norm": 768.4359741210938, "learning_rate": 1.0630617934173644e-07, "loss": 22.5755, "step": 17540 }, { "epoch": 139.28713858424726, "grad_norm": 1316.3155517578125, "learning_rate": 1.0605074472065056e-07, "loss": 27.1047, "step": 17550 }, { "epoch": 139.3668993020937, "grad_norm": 213.6504669189453, "learning_rate": 1.0579553471165647e-07, "loss": 23.0379, "step": 17560 }, { "epoch": 139.44666001994017, "grad_norm": 363.3310852050781, "learning_rate": 1.0554054971297271e-07, "loss": 22.4877, "step": 17570 }, { "epoch": 139.52642073778665, "grad_norm": 393.981689453125, "learning_rate": 1.0528579012246686e-07, "loss": 25.587, "step": 17580 }, { "epoch": 139.6061814556331, "grad_norm": 402.7592468261719, "learning_rate": 1.0503125633765422e-07, "loss": 21.617, "step": 17590 }, { "epoch": 139.68594217347956, "grad_norm": 1331.46728515625, "learning_rate": 1.0477694875569831e-07, "loss": 24.8125, "step": 17600 }, { "epoch": 139.68594217347956, "eval_loss": 2.9967098236083984, "eval_mae": 1.3076661825180054, "eval_mse": 2.9967100620269775, "eval_r2": 0.09710592031478882, "eval_rmse": 1.7311008237612786, "eval_runtime": 9.0864, "eval_samples_per_second": 441.43, "eval_steps_per_second": 13.867, "step": 17600 }, { "epoch": 139.76570289132601, "grad_norm": 1280.6114501953125, "learning_rate": 1.0452286777340957e-07, "loss": 22.8116, "step": 17610 }, { "epoch": 139.84546360917247, "grad_norm": 479.7171630859375, "learning_rate": 1.0426901378724476e-07, "loss": 23.9819, "step": 17620 }, { "epoch": 139.92522432701895, "grad_norm": 1034.206298828125, "learning_rate": 1.0401538719330632e-07, "loss": 24.7589, "step": 17630 }, { "epoch": 140.0, "grad_norm": 569.5013427734375, "learning_rate": 1.0376198838734229e-07, "loss": 22.1108, "step": 17640 }, { "epoch": 140.07976071784645, "grad_norm": 302.1639709472656, "learning_rate": 1.0350881776474482e-07, "loss": 23.5557, "step": 17650 }, { "epoch": 140.1595214356929, "grad_norm": 673.09326171875, "learning_rate": 1.0325587572055042e-07, "loss": 22.7825, "step": 17660 }, { "epoch": 140.2392821535394, "grad_norm": 493.1293640136719, "learning_rate": 1.0300316264943861e-07, "loss": 24.1157, "step": 17670 }, { "epoch": 140.31904287138585, "grad_norm": 282.7809753417969, "learning_rate": 1.0275067894573173e-07, "loss": 24.3361, "step": 17680 }, { "epoch": 140.3988035892323, "grad_norm": 1053.279052734375, "learning_rate": 1.024984250033944e-07, "loss": 24.004, "step": 17690 }, { "epoch": 140.47856430707876, "grad_norm": 756.5474243164062, "learning_rate": 1.0224640121603248e-07, "loss": 21.715, "step": 17700 }, { "epoch": 140.47856430707876, "eval_loss": 2.9895496368408203, "eval_mae": 1.296020746231079, "eval_mse": 2.9895496368408203, "eval_r2": 0.0992632508277893, "eval_rmse": 1.7290314158050513, "eval_runtime": 9.0911, "eval_samples_per_second": 441.199, "eval_steps_per_second": 13.86, "step": 17700 }, { "epoch": 140.5583250249252, "grad_norm": 758.3074340820312, "learning_rate": 1.0199460797689274e-07, "loss": 23.4764, "step": 17710 }, { "epoch": 140.6380857427717, "grad_norm": 194.74085998535156, "learning_rate": 1.0174304567886246e-07, "loss": 23.0024, "step": 17720 }, { "epoch": 140.71784646061815, "grad_norm": 564.7682495117188, "learning_rate": 1.0149171471446824e-07, "loss": 26.9969, "step": 17730 }, { "epoch": 140.7976071784646, "grad_norm": 430.6932678222656, "learning_rate": 1.0124061547587603e-07, "loss": 23.3746, "step": 17740 }, { "epoch": 140.87736789631106, "grad_norm": 475.7787170410156, "learning_rate": 1.0098974835488996e-07, "loss": 24.8886, "step": 17750 }, { "epoch": 140.95712861415754, "grad_norm": 1146.912841796875, "learning_rate": 1.0073911374295196e-07, "loss": 25.6257, "step": 17760 }, { "epoch": 141.0319042871386, "grad_norm": 535.0670776367188, "learning_rate": 1.0048871203114137e-07, "loss": 23.0122, "step": 17770 }, { "epoch": 141.11166500498504, "grad_norm": 176.4267120361328, "learning_rate": 1.002385436101742e-07, "loss": 22.0339, "step": 17780 }, { "epoch": 141.1914257228315, "grad_norm": 1003.4795532226562, "learning_rate": 9.998860887040189e-08, "loss": 25.6698, "step": 17790 }, { "epoch": 141.27118644067798, "grad_norm": 579.8351440429688, "learning_rate": 9.97389082018118e-08, "loss": 22.7872, "step": 17800 }, { "epoch": 141.27118644067798, "eval_loss": 2.9897937774658203, "eval_mae": 1.297013282775879, "eval_mse": 2.9897942543029785, "eval_r2": 0.0991896390914917, "eval_rmse": 1.7291021526511898, "eval_runtime": 9.0748, "eval_samples_per_second": 441.995, "eval_steps_per_second": 13.885, "step": 17800 }, { "epoch": 141.35094715852443, "grad_norm": 669.0322265625, "learning_rate": 9.948944199402596e-08, "loss": 25.5954, "step": 17810 }, { "epoch": 141.4307078763709, "grad_norm": 266.249267578125, "learning_rate": 9.924021063630042e-08, "loss": 24.4365, "step": 17820 }, { "epoch": 141.51046859421734, "grad_norm": 787.0618896484375, "learning_rate": 9.899121451752474e-08, "loss": 23.2357, "step": 17830 }, { "epoch": 141.5902293120638, "grad_norm": 1211.522705078125, "learning_rate": 9.874245402622167e-08, "loss": 22.2488, "step": 17840 }, { "epoch": 141.66999002991028, "grad_norm": 712.261474609375, "learning_rate": 9.849392955054601e-08, "loss": 23.9956, "step": 17850 }, { "epoch": 141.74975074775674, "grad_norm": 92.70515441894531, "learning_rate": 9.824564147828457e-08, "loss": 22.3979, "step": 17860 }, { "epoch": 141.8295114656032, "grad_norm": 221.4393768310547, "learning_rate": 9.799759019685507e-08, "loss": 22.6697, "step": 17870 }, { "epoch": 141.90927218344964, "grad_norm": 320.49127197265625, "learning_rate": 9.774977609330575e-08, "loss": 24.4215, "step": 17880 }, { "epoch": 141.9890329012961, "grad_norm": 216.5183868408203, "learning_rate": 9.750219955431504e-08, "loss": 25.499, "step": 17890 }, { "epoch": 142.06380857427718, "grad_norm": 1004.3078002929688, "learning_rate": 9.725486096619037e-08, "loss": 22.1796, "step": 17900 }, { "epoch": 142.06380857427718, "eval_loss": 3.0014090538024902, "eval_mae": 1.3100292682647705, "eval_mse": 3.0014090538024902, "eval_r2": 0.09569013118743896, "eval_rmse": 1.73245751861409, "eval_runtime": 9.0857, "eval_samples_per_second": 441.461, "eval_steps_per_second": 13.868, "step": 17900 }, { "epoch": 142.14356929212363, "grad_norm": 897.8434448242188, "learning_rate": 9.700776071486794e-08, "loss": 23.5536, "step": 17910 }, { "epoch": 142.22333000997008, "grad_norm": 592.3159790039062, "learning_rate": 9.676089918591226e-08, "loss": 25.0882, "step": 17920 }, { "epoch": 142.30309072781654, "grad_norm": 381.72515869140625, "learning_rate": 9.65142767645151e-08, "loss": 23.2826, "step": 17930 }, { "epoch": 142.38285144566302, "grad_norm": 295.14013671875, "learning_rate": 9.626789383549527e-08, "loss": 25.3821, "step": 17940 }, { "epoch": 142.46261216350948, "grad_norm": 551.1314697265625, "learning_rate": 9.602175078329808e-08, "loss": 21.8017, "step": 17950 }, { "epoch": 142.54237288135593, "grad_norm": 599.72509765625, "learning_rate": 9.577584799199395e-08, "loss": 23.8833, "step": 17960 }, { "epoch": 142.62213359920239, "grad_norm": 599.3013916015625, "learning_rate": 9.5530185845279e-08, "loss": 25.899, "step": 17970 }, { "epoch": 142.70189431704884, "grad_norm": 288.3550109863281, "learning_rate": 9.52847647264737e-08, "loss": 22.9422, "step": 17980 }, { "epoch": 142.78165503489532, "grad_norm": 1174.3599853515625, "learning_rate": 9.503958501852233e-08, "loss": 21.962, "step": 17990 }, { "epoch": 142.86141575274178, "grad_norm": 667.4810791015625, "learning_rate": 9.479464710399243e-08, "loss": 24.851, "step": 18000 }, { "epoch": 142.86141575274178, "eval_loss": 2.9901320934295654, "eval_mae": 1.2980480194091797, "eval_mse": 2.9901320934295654, "eval_r2": 0.09908783435821533, "eval_rmse": 1.7291998419585763, "eval_runtime": 9.0739, "eval_samples_per_second": 442.036, "eval_steps_per_second": 13.886, "step": 18000 }, { "epoch": 142.94117647058823, "grad_norm": 338.58465576171875, "learning_rate": 9.454995136507451e-08, "loss": 23.9936, "step": 18010 }, { "epoch": 143.01595214356928, "grad_norm": 1124.7645263671875, "learning_rate": 9.430549818358088e-08, "loss": 23.218, "step": 18020 }, { "epoch": 143.09571286141576, "grad_norm": 153.89315795898438, "learning_rate": 9.406128794094576e-08, "loss": 25.5854, "step": 18030 }, { "epoch": 143.17547357926222, "grad_norm": 217.41473388671875, "learning_rate": 9.381732101822393e-08, "loss": 22.5631, "step": 18040 }, { "epoch": 143.25523429710867, "grad_norm": 642.3425903320312, "learning_rate": 9.357359779609058e-08, "loss": 23.2905, "step": 18050 }, { "epoch": 143.33499501495513, "grad_norm": 984.7265014648438, "learning_rate": 9.333011865484086e-08, "loss": 24.2562, "step": 18060 }, { "epoch": 143.4147557328016, "grad_norm": 294.946533203125, "learning_rate": 9.308688397438901e-08, "loss": 24.3051, "step": 18070 }, { "epoch": 143.49451645064806, "grad_norm": 652.6814575195312, "learning_rate": 9.284389413426746e-08, "loss": 23.6761, "step": 18080 }, { "epoch": 143.57427716849452, "grad_norm": 382.2814636230469, "learning_rate": 9.260114951362702e-08, "loss": 22.0659, "step": 18090 }, { "epoch": 143.65403788634097, "grad_norm": 193.34925842285156, "learning_rate": 9.23586504912358e-08, "loss": 24.6004, "step": 18100 }, { "epoch": 143.65403788634097, "eval_loss": 2.9914941787719727, "eval_mae": 1.2961755990982056, "eval_mse": 2.9914941787719727, "eval_r2": 0.09867745637893677, "eval_rmse": 1.7295936455630185, "eval_runtime": 9.0808, "eval_samples_per_second": 441.702, "eval_steps_per_second": 13.875, "step": 18100 }, { "epoch": 143.73379860418743, "grad_norm": 255.50914001464844, "learning_rate": 9.211639744547858e-08, "loss": 21.8854, "step": 18110 }, { "epoch": 143.8135593220339, "grad_norm": 798.5938110351562, "learning_rate": 9.187439075435624e-08, "loss": 25.0725, "step": 18120 }, { "epoch": 143.89332003988036, "grad_norm": 554.2741088867188, "learning_rate": 9.163263079548556e-08, "loss": 25.6367, "step": 18130 }, { "epoch": 143.97308075772682, "grad_norm": 278.55767822265625, "learning_rate": 9.139111794609799e-08, "loss": 24.5959, "step": 18140 }, { "epoch": 144.04785643070787, "grad_norm": 290.1928405761719, "learning_rate": 9.114985258303972e-08, "loss": 20.7683, "step": 18150 }, { "epoch": 144.12761714855435, "grad_norm": 957.6152954101562, "learning_rate": 9.090883508277058e-08, "loss": 24.8539, "step": 18160 }, { "epoch": 144.2073778664008, "grad_norm": 201.96681213378906, "learning_rate": 9.066806582136357e-08, "loss": 23.961, "step": 18170 }, { "epoch": 144.28713858424726, "grad_norm": 442.1460876464844, "learning_rate": 9.042754517450463e-08, "loss": 22.6299, "step": 18180 }, { "epoch": 144.3668993020937, "grad_norm": 326.3381042480469, "learning_rate": 9.018727351749153e-08, "loss": 21.6338, "step": 18190 }, { "epoch": 144.44666001994017, "grad_norm": 411.69830322265625, "learning_rate": 8.994725122523353e-08, "loss": 22.0807, "step": 18200 }, { "epoch": 144.44666001994017, "eval_loss": 2.989143133163452, "eval_mae": 1.2954165935516357, "eval_mse": 2.989143133163452, "eval_r2": 0.09938579797744751, "eval_rmse": 1.7289138593820839, "eval_runtime": 9.07, "eval_samples_per_second": 442.228, "eval_steps_per_second": 13.892, "step": 18200 }, { "epoch": 144.52642073778665, "grad_norm": 568.3926391601562, "learning_rate": 8.9707478672251e-08, "loss": 25.0817, "step": 18210 }, { "epoch": 144.6061814556331, "grad_norm": 781.808349609375, "learning_rate": 8.94679562326744e-08, "loss": 24.5619, "step": 18220 }, { "epoch": 144.68594217347956, "grad_norm": 268.1374816894531, "learning_rate": 8.922868428024408e-08, "loss": 24.0362, "step": 18230 }, { "epoch": 144.76570289132601, "grad_norm": 390.1956787109375, "learning_rate": 8.89896631883095e-08, "loss": 25.5777, "step": 18240 }, { "epoch": 144.84546360917247, "grad_norm": 286.0571594238281, "learning_rate": 8.87508933298285e-08, "loss": 25.1571, "step": 18250 }, { "epoch": 144.92522432701895, "grad_norm": 972.77294921875, "learning_rate": 8.851237507736722e-08, "loss": 22.4625, "step": 18260 }, { "epoch": 145.0, "grad_norm": 560.763427734375, "learning_rate": 8.827410880309918e-08, "loss": 23.7497, "step": 18270 }, { "epoch": 145.07976071784645, "grad_norm": 493.7109680175781, "learning_rate": 8.803609487880431e-08, "loss": 22.5282, "step": 18280 }, { "epoch": 145.1595214356929, "grad_norm": 1222.901611328125, "learning_rate": 8.779833367586922e-08, "loss": 23.7649, "step": 18290 }, { "epoch": 145.2392821535394, "grad_norm": 182.56158447265625, "learning_rate": 8.756082556528612e-08, "loss": 25.1346, "step": 18300 }, { "epoch": 145.2392821535394, "eval_loss": 2.9885616302490234, "eval_mae": 1.2971998453140259, "eval_mse": 2.9885613918304443, "eval_r2": 0.09956103563308716, "eval_rmse": 1.7287456122375102, "eval_runtime": 9.0854, "eval_samples_per_second": 441.479, "eval_steps_per_second": 13.868, "step": 18300 }, { "epoch": 145.31904287138585, "grad_norm": 313.9729309082031, "learning_rate": 8.732357091765219e-08, "loss": 24.3856, "step": 18310 }, { "epoch": 145.3988035892323, "grad_norm": 387.6947937011719, "learning_rate": 8.7086570103169e-08, "loss": 22.9606, "step": 18320 }, { "epoch": 145.47856430707876, "grad_norm": 251.2086639404297, "learning_rate": 8.684982349164244e-08, "loss": 22.8897, "step": 18330 }, { "epoch": 145.5583250249252, "grad_norm": 218.4263916015625, "learning_rate": 8.661333145248129e-08, "loss": 24.4742, "step": 18340 }, { "epoch": 145.6380857427717, "grad_norm": 355.86907958984375, "learning_rate": 8.637709435469756e-08, "loss": 23.6665, "step": 18350 }, { "epoch": 145.71784646061815, "grad_norm": 143.9767303466797, "learning_rate": 8.61411125669051e-08, "loss": 23.7774, "step": 18360 }, { "epoch": 145.7976071784646, "grad_norm": 142.5809783935547, "learning_rate": 8.590538645731948e-08, "loss": 24.6402, "step": 18370 }, { "epoch": 145.87736789631106, "grad_norm": 287.8326110839844, "learning_rate": 8.56699163937575e-08, "loss": 24.4677, "step": 18380 }, { "epoch": 145.95712861415754, "grad_norm": 124.76634216308594, "learning_rate": 8.543470274363623e-08, "loss": 24.3025, "step": 18390 }, { "epoch": 146.0319042871386, "grad_norm": 717.8211669921875, "learning_rate": 8.519974587397263e-08, "loss": 21.0815, "step": 18400 }, { "epoch": 146.0319042871386, "eval_loss": 2.9937267303466797, "eval_mae": 1.29371178150177, "eval_mse": 2.9937262535095215, "eval_r2": 0.09800493717193604, "eval_rmse": 1.730238785113061, "eval_runtime": 9.0846, "eval_samples_per_second": 441.518, "eval_steps_per_second": 13.87, "step": 18400 }, { "epoch": 146.11166500498504, "grad_norm": 151.96337890625, "learning_rate": 8.496504615138325e-08, "loss": 24.258, "step": 18410 }, { "epoch": 146.1914257228315, "grad_norm": 306.97119140625, "learning_rate": 8.473060394208304e-08, "loss": 25.4368, "step": 18420 }, { "epoch": 146.27118644067798, "grad_norm": 157.916259765625, "learning_rate": 8.44964196118855e-08, "loss": 22.126, "step": 18430 }, { "epoch": 146.35094715852443, "grad_norm": 317.0219421386719, "learning_rate": 8.426249352620144e-08, "loss": 23.0198, "step": 18440 }, { "epoch": 146.4307078763709, "grad_norm": 213.1689910888672, "learning_rate": 8.402882605003884e-08, "loss": 26.4119, "step": 18450 }, { "epoch": 146.51046859421734, "grad_norm": 183.77806091308594, "learning_rate": 8.379541754800215e-08, "loss": 23.2543, "step": 18460 }, { "epoch": 146.5902293120638, "grad_norm": 148.0422821044922, "learning_rate": 8.356226838429198e-08, "loss": 23.2553, "step": 18470 }, { "epoch": 146.66999002991028, "grad_norm": 533.5089111328125, "learning_rate": 8.33293789227037e-08, "loss": 24.2542, "step": 18480 }, { "epoch": 146.74975074775674, "grad_norm": 433.2427673339844, "learning_rate": 8.309674952662791e-08, "loss": 24.8082, "step": 18490 }, { "epoch": 146.8295114656032, "grad_norm": 509.19036865234375, "learning_rate": 8.286438055904943e-08, "loss": 24.1001, "step": 18500 }, { "epoch": 146.8295114656032, "eval_loss": 2.9880919456481934, "eval_mae": 1.2967747449874878, "eval_mse": 2.9880919456481934, "eval_r2": 0.09970253705978394, "eval_rmse": 1.7286098303689568, "eval_runtime": 9.0561, "eval_samples_per_second": 442.906, "eval_steps_per_second": 13.913, "step": 18500 }, { "epoch": 146.90927218344964, "grad_norm": 518.4598388671875, "learning_rate": 8.263227238254639e-08, "loss": 23.1411, "step": 18510 }, { "epoch": 146.9890329012961, "grad_norm": 241.46841430664062, "learning_rate": 8.240042535929035e-08, "loss": 22.9866, "step": 18520 }, { "epoch": 147.06380857427718, "grad_norm": 152.85427856445312, "learning_rate": 8.216883985104514e-08, "loss": 22.618, "step": 18530 }, { "epoch": 147.14356929212363, "grad_norm": 417.3365783691406, "learning_rate": 8.193751621916655e-08, "loss": 22.4225, "step": 18540 }, { "epoch": 147.22333000997008, "grad_norm": 257.4353332519531, "learning_rate": 8.170645482460187e-08, "loss": 23.5731, "step": 18550 }, { "epoch": 147.30309072781654, "grad_norm": 996.5963745117188, "learning_rate": 8.14756560278893e-08, "loss": 23.9687, "step": 18560 }, { "epoch": 147.38285144566302, "grad_norm": 633.5903930664062, "learning_rate": 8.124512018915685e-08, "loss": 24.7502, "step": 18570 }, { "epoch": 147.46261216350948, "grad_norm": 803.2030029296875, "learning_rate": 8.101484766812264e-08, "loss": 24.6082, "step": 18580 }, { "epoch": 147.54237288135593, "grad_norm": 143.506591796875, "learning_rate": 8.078483882409387e-08, "loss": 25.6971, "step": 18590 }, { "epoch": 147.62213359920239, "grad_norm": 147.65036010742188, "learning_rate": 8.055509401596619e-08, "loss": 21.8963, "step": 18600 }, { "epoch": 147.62213359920239, "eval_loss": 2.990255117416382, "eval_mae": 1.295224905014038, "eval_mse": 2.9902548789978027, "eval_r2": 0.09905081987380981, "eval_rmse": 1.7292353451736413, "eval_runtime": 9.0673, "eval_samples_per_second": 442.36, "eval_steps_per_second": 13.896, "step": 18600 }, { "epoch": 147.70189431704884, "grad_norm": 811.623046875, "learning_rate": 8.032561360222318e-08, "loss": 24.3758, "step": 18610 }, { "epoch": 147.78165503489532, "grad_norm": 271.70947265625, "learning_rate": 8.009639794093617e-08, "loss": 24.3177, "step": 18620 }, { "epoch": 147.86141575274178, "grad_norm": 147.8336181640625, "learning_rate": 7.986744738976303e-08, "loss": 23.4661, "step": 18630 }, { "epoch": 147.94117647058823, "grad_norm": 1055.819580078125, "learning_rate": 7.963876230594835e-08, "loss": 22.1426, "step": 18640 }, { "epoch": 148.01595214356928, "grad_norm": 908.6998291015625, "learning_rate": 7.941034304632214e-08, "loss": 23.6974, "step": 18650 }, { "epoch": 148.09571286141576, "grad_norm": 292.4888610839844, "learning_rate": 7.918218996729973e-08, "loss": 22.3105, "step": 18660 }, { "epoch": 148.17547357926222, "grad_norm": 280.0393981933594, "learning_rate": 7.89543034248813e-08, "loss": 22.5675, "step": 18670 }, { "epoch": 148.25523429710867, "grad_norm": 578.9780883789062, "learning_rate": 7.872668377465094e-08, "loss": 22.3394, "step": 18680 }, { "epoch": 148.33499501495513, "grad_norm": 352.46331787109375, "learning_rate": 7.849933137177625e-08, "loss": 22.8286, "step": 18690 }, { "epoch": 148.4147557328016, "grad_norm": 263.3887939453125, "learning_rate": 7.827224657100809e-08, "loss": 23.9573, "step": 18700 }, { "epoch": 148.4147557328016, "eval_loss": 2.989132881164551, "eval_mae": 1.2960878610610962, "eval_mse": 2.989132881164551, "eval_r2": 0.09938883781433105, "eval_rmse": 1.7289108945126554, "eval_runtime": 9.0662, "eval_samples_per_second": 442.414, "eval_steps_per_second": 13.898, "step": 18700 }, { "epoch": 148.49451645064806, "grad_norm": 498.36920166015625, "learning_rate": 7.804542972667943e-08, "loss": 23.0484, "step": 18710 }, { "epoch": 148.57427716849452, "grad_norm": 172.67092895507812, "learning_rate": 7.781888119270552e-08, "loss": 22.4251, "step": 18720 }, { "epoch": 148.65403788634097, "grad_norm": 711.1735229492188, "learning_rate": 7.759260132258258e-08, "loss": 24.8158, "step": 18730 }, { "epoch": 148.73379860418743, "grad_norm": 180.4683380126953, "learning_rate": 7.736659046938776e-08, "loss": 26.0731, "step": 18740 }, { "epoch": 148.8135593220339, "grad_norm": 219.04393005371094, "learning_rate": 7.714084898577853e-08, "loss": 24.9301, "step": 18750 }, { "epoch": 148.89332003988036, "grad_norm": 310.49652099609375, "learning_rate": 7.691537722399208e-08, "loss": 26.3574, "step": 18760 }, { "epoch": 148.97308075772682, "grad_norm": 840.8336181640625, "learning_rate": 7.669017553584439e-08, "loss": 25.9603, "step": 18770 }, { "epoch": 149.04785643070787, "grad_norm": 538.75634765625, "learning_rate": 7.646524427273044e-08, "loss": 22.1064, "step": 18780 }, { "epoch": 149.12761714855435, "grad_norm": 635.2152709960938, "learning_rate": 7.624058378562312e-08, "loss": 23.8754, "step": 18790 }, { "epoch": 149.2073778664008, "grad_norm": 474.3734130859375, "learning_rate": 7.601619442507271e-08, "loss": 23.7222, "step": 18800 }, { "epoch": 149.2073778664008, "eval_loss": 2.988734245300293, "eval_mae": 1.2976834774017334, "eval_mse": 2.988734483718872, "eval_r2": 0.09950888156890869, "eval_rmse": 1.7287956743695514, "eval_runtime": 9.084, "eval_samples_per_second": 441.548, "eval_steps_per_second": 13.871, "step": 18800 }, { "epoch": 149.28713858424726, "grad_norm": 629.4293823242188, "learning_rate": 7.579207654120645e-08, "loss": 24.2276, "step": 18810 }, { "epoch": 149.3668993020937, "grad_norm": 448.8154602050781, "learning_rate": 7.556823048372817e-08, "loss": 24.7479, "step": 18820 }, { "epoch": 149.44666001994017, "grad_norm": 151.45706176757812, "learning_rate": 7.53446566019173e-08, "loss": 24.5128, "step": 18830 }, { "epoch": 149.52642073778665, "grad_norm": 187.52345275878906, "learning_rate": 7.512135524462881e-08, "loss": 22.5111, "step": 18840 }, { "epoch": 149.6061814556331, "grad_norm": 450.91326904296875, "learning_rate": 7.489832676029232e-08, "loss": 23.3607, "step": 18850 }, { "epoch": 149.68594217347956, "grad_norm": 212.0082244873047, "learning_rate": 7.46755714969115e-08, "loss": 23.701, "step": 18860 }, { "epoch": 149.76570289132601, "grad_norm": 790.0916137695312, "learning_rate": 7.447532565106768e-08, "loss": 24.3561, "step": 18870 }, { "epoch": 149.84546360917247, "grad_norm": 492.77972412109375, "learning_rate": 7.425309046472627e-08, "loss": 23.1056, "step": 18880 }, { "epoch": 149.92522432701895, "grad_norm": 1054.5626220703125, "learning_rate": 7.40311295061389e-08, "loss": 22.2082, "step": 18890 }, { "epoch": 150.0, "grad_norm": 734.5057983398438, "learning_rate": 7.380944312164386e-08, "loss": 23.3368, "step": 18900 }, { "epoch": 150.0, "eval_loss": 2.988919496536255, "eval_mae": 1.2994049787521362, "eval_mse": 2.988919734954834, "eval_r2": 0.0994531512260437, "eval_rmse": 1.728849251656961, "eval_runtime": 9.0753, "eval_samples_per_second": 441.97, "eval_steps_per_second": 13.884, "step": 18900 }, { "epoch": 150.07976071784645, "grad_norm": 494.13604736328125, "learning_rate": 7.358803165715091e-08, "loss": 23.7358, "step": 18910 }, { "epoch": 150.1595214356929, "grad_norm": 604.9675903320312, "learning_rate": 7.336689545814051e-08, "loss": 23.2456, "step": 18920 }, { "epoch": 150.2392821535394, "grad_norm": 1062.545654296875, "learning_rate": 7.3146034869664e-08, "loss": 23.7062, "step": 18930 }, { "epoch": 150.31904287138585, "grad_norm": 198.54518127441406, "learning_rate": 7.292545023634264e-08, "loss": 24.4768, "step": 18940 }, { "epoch": 150.3988035892323, "grad_norm": 414.9964904785156, "learning_rate": 7.270514190236691e-08, "loss": 24.0219, "step": 18950 }, { "epoch": 150.47856430707876, "grad_norm": 433.3045349121094, "learning_rate": 7.248511021149614e-08, "loss": 22.1332, "step": 18960 }, { "epoch": 150.5583250249252, "grad_norm": 282.57342529296875, "learning_rate": 7.226535550705831e-08, "loss": 22.157, "step": 18970 }, { "epoch": 150.6380857427717, "grad_norm": 497.9869384765625, "learning_rate": 7.204587813194885e-08, "loss": 23.2071, "step": 18980 }, { "epoch": 150.71784646061815, "grad_norm": 345.4354553222656, "learning_rate": 7.182667842863072e-08, "loss": 23.0731, "step": 18990 }, { "epoch": 150.7976071784646, "grad_norm": 1432.7169189453125, "learning_rate": 7.160775673913347e-08, "loss": 24.0156, "step": 19000 }, { "epoch": 150.7976071784646, "eval_loss": 2.988027572631836, "eval_mae": 1.294617772102356, "eval_mse": 2.9880270957946777, "eval_r2": 0.09972202777862549, "eval_rmse": 1.728591072461812, "eval_runtime": 9.0673, "eval_samples_per_second": 442.36, "eval_steps_per_second": 13.896, "step": 19000 }, { "epoch": 150.87736789631106, "grad_norm": 461.5654296875, "learning_rate": 7.138911340505277e-08, "loss": 26.2207, "step": 19010 }, { "epoch": 150.95712861415754, "grad_norm": 546.268798828125, "learning_rate": 7.117074876755028e-08, "loss": 25.6184, "step": 19020 }, { "epoch": 151.0319042871386, "grad_norm": 317.89990234375, "learning_rate": 7.095266316735246e-08, "loss": 21.8385, "step": 19030 }, { "epoch": 151.11166500498504, "grad_norm": 225.2183380126953, "learning_rate": 7.073485694475042e-08, "loss": 22.523, "step": 19040 }, { "epoch": 151.1914257228315, "grad_norm": 1056.931884765625, "learning_rate": 7.051733043959959e-08, "loss": 25.8547, "step": 19050 }, { "epoch": 151.27118644067798, "grad_norm": 271.0970458984375, "learning_rate": 7.030008399131859e-08, "loss": 24.099, "step": 19060 }, { "epoch": 151.35094715852443, "grad_norm": 183.71864318847656, "learning_rate": 7.008311793888943e-08, "loss": 25.2921, "step": 19070 }, { "epoch": 151.4307078763709, "grad_norm": 432.2311096191406, "learning_rate": 6.98664326208562e-08, "loss": 22.6419, "step": 19080 }, { "epoch": 151.51046859421734, "grad_norm": 131.0139923095703, "learning_rate": 6.965002837532536e-08, "loss": 26.2508, "step": 19090 }, { "epoch": 151.5902293120638, "grad_norm": 782.22900390625, "learning_rate": 6.943390553996436e-08, "loss": 23.513, "step": 19100 }, { "epoch": 151.5902293120638, "eval_loss": 2.9875853061676025, "eval_mae": 1.2965117692947388, "eval_mse": 2.9875853061676025, "eval_r2": 0.09985512495040894, "eval_rmse": 1.7284632788021859, "eval_runtime": 9.0747, "eval_samples_per_second": 441.998, "eval_steps_per_second": 13.885, "step": 19100 }, { "epoch": 151.66999002991028, "grad_norm": 759.2440185546875, "learning_rate": 6.921806445200198e-08, "loss": 22.4186, "step": 19110 }, { "epoch": 151.74975074775674, "grad_norm": 289.4839172363281, "learning_rate": 6.900250544822705e-08, "loss": 25.331, "step": 19120 }, { "epoch": 151.8295114656032, "grad_norm": 499.3775329589844, "learning_rate": 6.878722886498836e-08, "loss": 24.3565, "step": 19130 }, { "epoch": 151.90927218344964, "grad_norm": 393.87469482421875, "learning_rate": 6.85722350381941e-08, "loss": 22.3572, "step": 19140 }, { "epoch": 151.9890329012961, "grad_norm": 187.20021057128906, "learning_rate": 6.835752430331116e-08, "loss": 22.3619, "step": 19150 }, { "epoch": 152.06380857427718, "grad_norm": 673.76953125, "learning_rate": 6.814309699536466e-08, "loss": 22.2599, "step": 19160 }, { "epoch": 152.14356929212363, "grad_norm": 531.2612915039062, "learning_rate": 6.792895344893768e-08, "loss": 24.8074, "step": 19170 }, { "epoch": 152.22333000997008, "grad_norm": 632.0092163085938, "learning_rate": 6.771509399817024e-08, "loss": 25.183, "step": 19180 }, { "epoch": 152.30309072781654, "grad_norm": 1241.568115234375, "learning_rate": 6.750151897675941e-08, "loss": 23.1038, "step": 19190 }, { "epoch": 152.38285144566302, "grad_norm": 1045.9813232421875, "learning_rate": 6.728822871795817e-08, "loss": 22.1253, "step": 19200 }, { "epoch": 152.38285144566302, "eval_loss": 2.9882588386535645, "eval_mae": 1.2969865798950195, "eval_mse": 2.9882588386535645, "eval_r2": 0.0996522307395935, "eval_rmse": 1.728658103458739, "eval_runtime": 9.0569, "eval_samples_per_second": 442.869, "eval_steps_per_second": 13.912, "step": 19200 }, { "epoch": 152.46261216350948, "grad_norm": 290.79876708984375, "learning_rate": 6.70752235545752e-08, "loss": 23.7293, "step": 19210 }, { "epoch": 152.54237288135593, "grad_norm": 307.24053955078125, "learning_rate": 6.686250381897448e-08, "loss": 23.989, "step": 19220 }, { "epoch": 152.62213359920239, "grad_norm": 719.582763671875, "learning_rate": 6.665006984307467e-08, "loss": 22.8119, "step": 19230 }, { "epoch": 152.70189431704884, "grad_norm": 954.9166259765625, "learning_rate": 6.643792195834813e-08, "loss": 23.2559, "step": 19240 }, { "epoch": 152.78165503489532, "grad_norm": 157.11846923828125, "learning_rate": 6.622606049582122e-08, "loss": 25.1447, "step": 19250 }, { "epoch": 152.86141575274178, "grad_norm": 376.54119873046875, "learning_rate": 6.601448578607336e-08, "loss": 24.9452, "step": 19260 }, { "epoch": 152.94117647058823, "grad_norm": 445.1767578125, "learning_rate": 6.580319815923629e-08, "loss": 24.1425, "step": 19270 }, { "epoch": 153.01595214356928, "grad_norm": 590.1676025390625, "learning_rate": 6.559219794499396e-08, "loss": 22.0261, "step": 19280 }, { "epoch": 153.09571286141576, "grad_norm": 406.55609130859375, "learning_rate": 6.538148547258191e-08, "loss": 24.1728, "step": 19290 }, { "epoch": 153.17547357926222, "grad_norm": 123.7415771484375, "learning_rate": 6.517106107078652e-08, "loss": 24.9433, "step": 19300 }, { "epoch": 153.17547357926222, "eval_loss": 2.9871866703033447, "eval_mae": 1.2974798679351807, "eval_mse": 2.9871866703033447, "eval_r2": 0.09997528791427612, "eval_rmse": 1.7283479598458595, "eval_runtime": 9.0817, "eval_samples_per_second": 441.655, "eval_steps_per_second": 13.874, "step": 19300 }, { "epoch": 153.25523429710867, "grad_norm": 321.2386779785156, "learning_rate": 6.496092506794487e-08, "loss": 24.3306, "step": 19310 }, { "epoch": 153.33499501495513, "grad_norm": 681.6568603515625, "learning_rate": 6.475107779194392e-08, "loss": 26.5892, "step": 19320 }, { "epoch": 153.4147557328016, "grad_norm": 405.7754821777344, "learning_rate": 6.454151957022005e-08, "loss": 22.8697, "step": 19330 }, { "epoch": 153.49451645064806, "grad_norm": 145.44203186035156, "learning_rate": 6.433225072975884e-08, "loss": 23.3981, "step": 19340 }, { "epoch": 153.57427716849452, "grad_norm": 438.10137939453125, "learning_rate": 6.41232715970941e-08, "loss": 23.5633, "step": 19350 }, { "epoch": 153.65403788634097, "grad_norm": 933.510009765625, "learning_rate": 6.391458249830766e-08, "loss": 23.3836, "step": 19360 }, { "epoch": 153.73379860418743, "grad_norm": 486.6509094238281, "learning_rate": 6.370618375902891e-08, "loss": 22.858, "step": 19370 }, { "epoch": 153.8135593220339, "grad_norm": 1071.4417724609375, "learning_rate": 6.3498075704434e-08, "loss": 22.2621, "step": 19380 }, { "epoch": 153.89332003988036, "grad_norm": 136.1456298828125, "learning_rate": 6.329025865924561e-08, "loss": 23.795, "step": 19390 }, { "epoch": 153.97308075772682, "grad_norm": 863.8829345703125, "learning_rate": 6.30827329477325e-08, "loss": 23.1745, "step": 19400 }, { "epoch": 153.97308075772682, "eval_loss": 2.988656520843506, "eval_mae": 1.3007216453552246, "eval_mse": 2.9886562824249268, "eval_r2": 0.09953248500823975, "eval_rmse": 1.7287730569467257, "eval_runtime": 9.0795, "eval_samples_per_second": 441.766, "eval_steps_per_second": 13.877, "step": 19400 }, { "epoch": 154.04785643070787, "grad_norm": 736.192626953125, "learning_rate": 6.287549889370836e-08, "loss": 22.2077, "step": 19410 }, { "epoch": 154.12761714855435, "grad_norm": 219.8618927001953, "learning_rate": 6.266855682053226e-08, "loss": 24.9694, "step": 19420 }, { "epoch": 154.2073778664008, "grad_norm": 583.2711181640625, "learning_rate": 6.24619070511076e-08, "loss": 27.841, "step": 19430 }, { "epoch": 154.28713858424726, "grad_norm": 660.341552734375, "learning_rate": 6.225554990788151e-08, "loss": 24.4646, "step": 19440 }, { "epoch": 154.3668993020937, "grad_norm": 145.15283203125, "learning_rate": 6.204948571284455e-08, "loss": 23.7078, "step": 19450 }, { "epoch": 154.44666001994017, "grad_norm": 456.0956115722656, "learning_rate": 6.184371478753034e-08, "loss": 22.9063, "step": 19460 }, { "epoch": 154.52642073778665, "grad_norm": 570.6836547851562, "learning_rate": 6.163823745301466e-08, "loss": 23.8765, "step": 19470 }, { "epoch": 154.6061814556331, "grad_norm": 139.70950317382812, "learning_rate": 6.14330540299155e-08, "loss": 23.5889, "step": 19480 }, { "epoch": 154.68594217347956, "grad_norm": 708.5938720703125, "learning_rate": 6.122816483839194e-08, "loss": 23.7718, "step": 19490 }, { "epoch": 154.76570289132601, "grad_norm": 1011.361328125, "learning_rate": 6.1023570198144e-08, "loss": 23.5985, "step": 19500 }, { "epoch": 154.76570289132601, "eval_loss": 2.988107442855835, "eval_mae": 1.2986818552017212, "eval_mse": 2.988107442855835, "eval_r2": 0.09969782829284668, "eval_rmse": 1.7286143129269278, "eval_runtime": 9.0787, "eval_samples_per_second": 441.801, "eval_steps_per_second": 13.879, "step": 19500 }, { "epoch": 154.84546360917247, "grad_norm": 203.33657836914062, "learning_rate": 6.081927042841233e-08, "loss": 22.9083, "step": 19510 }, { "epoch": 154.92522432701895, "grad_norm": 678.0839233398438, "learning_rate": 6.061526584797724e-08, "loss": 22.9988, "step": 19520 }, { "epoch": 155.0, "grad_norm": 339.16741943359375, "learning_rate": 6.04115567751584e-08, "loss": 20.8858, "step": 19530 }, { "epoch": 155.07976071784645, "grad_norm": 198.96624755859375, "learning_rate": 6.020814352781469e-08, "loss": 23.7149, "step": 19540 }, { "epoch": 155.1595214356929, "grad_norm": 102.24024963378906, "learning_rate": 6.000502642334305e-08, "loss": 23.6419, "step": 19550 }, { "epoch": 155.2392821535394, "grad_norm": 699.8232421875, "learning_rate": 5.980220577867859e-08, "loss": 22.9911, "step": 19560 }, { "epoch": 155.31904287138585, "grad_norm": 709.4566650390625, "learning_rate": 5.959968191029363e-08, "loss": 23.0959, "step": 19570 }, { "epoch": 155.3988035892323, "grad_norm": 254.96136474609375, "learning_rate": 5.939745513419761e-08, "loss": 25.32, "step": 19580 }, { "epoch": 155.47856430707876, "grad_norm": 195.00021362304688, "learning_rate": 5.9195525765936226e-08, "loss": 25.8602, "step": 19590 }, { "epoch": 155.5583250249252, "grad_norm": 591.7045288085938, "learning_rate": 5.899389412059128e-08, "loss": 22.6448, "step": 19600 }, { "epoch": 155.5583250249252, "eval_loss": 2.986642360687256, "eval_mae": 1.2976558208465576, "eval_mse": 2.986642360687256, "eval_r2": 0.10013926029205322, "eval_rmse": 1.7281904873847835, "eval_runtime": 9.0698, "eval_samples_per_second": 442.235, "eval_steps_per_second": 13.892, "step": 19600 }, { "epoch": 155.6380857427717, "grad_norm": 704.7317504882812, "learning_rate": 5.8792560512779864e-08, "loss": 23.3964, "step": 19610 }, { "epoch": 155.71784646061815, "grad_norm": 186.69126892089844, "learning_rate": 5.8591525256654064e-08, "loss": 23.1593, "step": 19620 }, { "epoch": 155.7976071784646, "grad_norm": 120.61994171142578, "learning_rate": 5.8390788665900525e-08, "loss": 21.7643, "step": 19630 }, { "epoch": 155.87736789631106, "grad_norm": 1568.0780029296875, "learning_rate": 5.8190351053739816e-08, "loss": 25.4865, "step": 19640 }, { "epoch": 155.95712861415754, "grad_norm": 371.3355407714844, "learning_rate": 5.7990212732925843e-08, "loss": 24.1773, "step": 19650 }, { "epoch": 156.0319042871386, "grad_norm": 231.3843231201172, "learning_rate": 5.779037401574582e-08, "loss": 23.05, "step": 19660 }, { "epoch": 156.11166500498504, "grad_norm": 308.94384765625, "learning_rate": 5.759083521401914e-08, "loss": 25.9338, "step": 19670 }, { "epoch": 156.1914257228315, "grad_norm": 1007.2675170898438, "learning_rate": 5.739159663909754e-08, "loss": 22.6109, "step": 19680 }, { "epoch": 156.27118644067798, "grad_norm": 337.06707763671875, "learning_rate": 5.719265860186406e-08, "loss": 24.2056, "step": 19690 }, { "epoch": 156.35094715852443, "grad_norm": 229.7677001953125, "learning_rate": 5.699402141273277e-08, "loss": 24.7657, "step": 19700 }, { "epoch": 156.35094715852443, "eval_loss": 2.9869258403778076, "eval_mae": 1.298068642616272, "eval_mse": 2.9869258403778076, "eval_r2": 0.10005384683609009, "eval_rmse": 1.7282725017710048, "eval_runtime": 9.0728, "eval_samples_per_second": 442.092, "eval_steps_per_second": 13.888, "step": 19700 }, { "epoch": 156.4307078763709, "grad_norm": 316.57904052734375, "learning_rate": 5.6795685381648525e-08, "loss": 23.1404, "step": 19710 }, { "epoch": 156.51046859421734, "grad_norm": 271.0904235839844, "learning_rate": 5.659765081808629e-08, "loss": 24.78, "step": 19720 }, { "epoch": 156.5902293120638, "grad_norm": 284.07196044921875, "learning_rate": 5.639991803105018e-08, "loss": 22.9677, "step": 19730 }, { "epoch": 156.66999002991028, "grad_norm": 276.0367736816406, "learning_rate": 5.62024873290739e-08, "loss": 22.588, "step": 19740 }, { "epoch": 156.74975074775674, "grad_norm": 96.85620880126953, "learning_rate": 5.6005359020219745e-08, "loss": 23.1715, "step": 19750 }, { "epoch": 156.8295114656032, "grad_norm": 719.615478515625, "learning_rate": 5.580853341207795e-08, "loss": 23.5608, "step": 19760 }, { "epoch": 156.90927218344964, "grad_norm": 189.48875427246094, "learning_rate": 5.561201081176648e-08, "loss": 23.8051, "step": 19770 }, { "epoch": 156.9890329012961, "grad_norm": 208.35182189941406, "learning_rate": 5.541579152593073e-08, "loss": 24.8919, "step": 19780 }, { "epoch": 157.06380857427718, "grad_norm": 410.5036926269531, "learning_rate": 5.521987586074248e-08, "loss": 20.6443, "step": 19790 }, { "epoch": 157.14356929212363, "grad_norm": 713.73095703125, "learning_rate": 5.5024264121900087e-08, "loss": 24.4717, "step": 19800 }, { "epoch": 157.14356929212363, "eval_loss": 2.9873814582824707, "eval_mae": 1.2965357303619385, "eval_mse": 2.9873812198638916, "eval_r2": 0.09991669654846191, "eval_rmse": 1.7284042408718776, "eval_runtime": 9.072, "eval_samples_per_second": 442.13, "eval_steps_per_second": 13.889, "step": 19800 }, { "epoch": 157.22333000997008, "grad_norm": 355.75177001953125, "learning_rate": 5.482895661462744e-08, "loss": 25.0636, "step": 19810 }, { "epoch": 157.30309072781654, "grad_norm": 200.91207885742188, "learning_rate": 5.463395364367371e-08, "loss": 23.233, "step": 19820 }, { "epoch": 157.38285144566302, "grad_norm": 839.2678833007812, "learning_rate": 5.4439255513313104e-08, "loss": 25.1534, "step": 19830 }, { "epoch": 157.46261216350948, "grad_norm": 794.5970458984375, "learning_rate": 5.424486252734395e-08, "loss": 23.7456, "step": 19840 }, { "epoch": 157.54237288135593, "grad_norm": 797.7697143554688, "learning_rate": 5.405077498908847e-08, "loss": 22.2091, "step": 19850 }, { "epoch": 157.62213359920239, "grad_norm": 429.3815002441406, "learning_rate": 5.385699320139248e-08, "loss": 24.0605, "step": 19860 }, { "epoch": 157.70189431704884, "grad_norm": 488.4406433105469, "learning_rate": 5.36635174666244e-08, "loss": 22.0634, "step": 19870 }, { "epoch": 157.78165503489532, "grad_norm": 588.324951171875, "learning_rate": 5.3470348086675386e-08, "loss": 26.5165, "step": 19880 }, { "epoch": 157.86141575274178, "grad_norm": 530.62646484375, "learning_rate": 5.327748536295842e-08, "loss": 22.664, "step": 19890 }, { "epoch": 157.94117647058823, "grad_norm": 289.956787109375, "learning_rate": 5.308492959640795e-08, "loss": 23.8528, "step": 19900 }, { "epoch": 157.94117647058823, "eval_loss": 2.9881298542022705, "eval_mae": 1.2951724529266357, "eval_mse": 2.9881298542022705, "eval_r2": 0.09969109296798706, "eval_rmse": 1.7286207953748187, "eval_runtime": 9.0883, "eval_samples_per_second": 441.338, "eval_steps_per_second": 13.864, "step": 19900 }, { "epoch": 158.01595214356928, "grad_norm": 237.19174194335938, "learning_rate": 5.2892681087479574e-08, "loss": 22.773, "step": 19910 }, { "epoch": 158.09571286141576, "grad_norm": 274.02972412109375, "learning_rate": 5.2700740136149554e-08, "loss": 23.108, "step": 19920 }, { "epoch": 158.17547357926222, "grad_norm": 458.33648681640625, "learning_rate": 5.250910704191386e-08, "loss": 26.5027, "step": 19930 }, { "epoch": 158.25523429710867, "grad_norm": 227.19955444335938, "learning_rate": 5.2317782103788485e-08, "loss": 24.2449, "step": 19940 }, { "epoch": 158.33499501495513, "grad_norm": 455.753173828125, "learning_rate": 5.2126765620308514e-08, "loss": 23.0352, "step": 19950 }, { "epoch": 158.4147557328016, "grad_norm": 636.4472045898438, "learning_rate": 5.193605788952754e-08, "loss": 23.5401, "step": 19960 }, { "epoch": 158.49451645064806, "grad_norm": 778.9373779296875, "learning_rate": 5.174565920901766e-08, "loss": 24.3629, "step": 19970 }, { "epoch": 158.57427716849452, "grad_norm": 329.8791809082031, "learning_rate": 5.155556987586856e-08, "loss": 24.5927, "step": 19980 }, { "epoch": 158.65403788634097, "grad_norm": 900.1206665039062, "learning_rate": 5.1365790186687184e-08, "loss": 24.1182, "step": 19990 }, { "epoch": 158.73379860418743, "grad_norm": 1131.921875, "learning_rate": 5.1176320437597566e-08, "loss": 23.8033, "step": 20000 }, { "epoch": 158.73379860418743, "eval_loss": 2.988154172897339, "eval_mae": 1.2970765829086304, "eval_mse": 2.9881539344787598, "eval_r2": 0.09968382120132446, "eval_rmse": 1.7286277605310982, "eval_runtime": 9.147, "eval_samples_per_second": 438.505, "eval_steps_per_second": 13.775, "step": 20000 }, { "epoch": 158.8135593220339, "grad_norm": 357.07879638671875, "learning_rate": 5.098716092423991e-08, "loss": 24.5281, "step": 20010 }, { "epoch": 158.89332003988036, "grad_norm": 219.1493377685547, "learning_rate": 5.079831194177034e-08, "loss": 22.6291, "step": 20020 }, { "epoch": 158.97308075772682, "grad_norm": 915.48046875, "learning_rate": 5.060977378486064e-08, "loss": 22.1652, "step": 20030 }, { "epoch": 159.04785643070787, "grad_norm": 368.95794677734375, "learning_rate": 5.042154674769736e-08, "loss": 21.4069, "step": 20040 }, { "epoch": 159.12761714855435, "grad_norm": 542.30810546875, "learning_rate": 5.023363112398177e-08, "loss": 24.379, "step": 20050 }, { "epoch": 159.2073778664008, "grad_norm": 768.3283081054688, "learning_rate": 5.0046027206929046e-08, "loss": 25.799, "step": 20060 }, { "epoch": 159.28713858424726, "grad_norm": 226.7152862548828, "learning_rate": 4.985873528926826e-08, "loss": 24.935, "step": 20070 }, { "epoch": 159.3668993020937, "grad_norm": 565.6937866210938, "learning_rate": 4.9671755663241304e-08, "loss": 22.7557, "step": 20080 }, { "epoch": 159.44666001994017, "grad_norm": 238.96612548828125, "learning_rate": 4.948508862060316e-08, "loss": 23.0391, "step": 20090 }, { "epoch": 159.52642073778665, "grad_norm": 343.6816711425781, "learning_rate": 4.9298734452620755e-08, "loss": 24.5896, "step": 20100 }, { "epoch": 159.52642073778665, "eval_loss": 2.989109516143799, "eval_mae": 1.294765830039978, "eval_mse": 2.989109516143799, "eval_r2": 0.09939593076705933, "eval_rmse": 1.7289041373493785, "eval_runtime": 9.0579, "eval_samples_per_second": 442.817, "eval_steps_per_second": 13.91, "step": 20100 }, { "epoch": 159.6061814556331, "grad_norm": 803.0233764648438, "learning_rate": 4.9112693450072905e-08, "loss": 22.7036, "step": 20110 }, { "epoch": 159.68594217347956, "grad_norm": 220.58651733398438, "learning_rate": 4.892696590324996e-08, "loss": 25.0648, "step": 20120 }, { "epoch": 159.76570289132601, "grad_norm": 197.690673828125, "learning_rate": 4.874155210195291e-08, "loss": 24.4888, "step": 20130 }, { "epoch": 159.84546360917247, "grad_norm": 403.8138732910156, "learning_rate": 4.85564523354933e-08, "loss": 21.8955, "step": 20140 }, { "epoch": 159.92522432701895, "grad_norm": 137.9730224609375, "learning_rate": 4.837166689269276e-08, "loss": 22.8665, "step": 20150 }, { "epoch": 160.0, "grad_norm": 173.60696411132812, "learning_rate": 4.818719606188223e-08, "loss": 22.3084, "step": 20160 }, { "epoch": 160.07976071784645, "grad_norm": 669.76025390625, "learning_rate": 4.800304013090206e-08, "loss": 24.1283, "step": 20170 }, { "epoch": 160.1595214356929, "grad_norm": 423.6705017089844, "learning_rate": 4.7819199387100966e-08, "loss": 25.3707, "step": 20180 }, { "epoch": 160.2392821535394, "grad_norm": 1109.6336669921875, "learning_rate": 4.763567411733593e-08, "loss": 23.5949, "step": 20190 }, { "epoch": 160.31904287138585, "grad_norm": 775.6715087890625, "learning_rate": 4.7452464607971784e-08, "loss": 23.3331, "step": 20200 }, { "epoch": 160.31904287138585, "eval_loss": 2.9872071743011475, "eval_mae": 1.2983434200286865, "eval_mse": 2.9872071743011475, "eval_r2": 0.09996908903121948, "eval_rmse": 1.7283538915109797, "eval_runtime": 9.0827, "eval_samples_per_second": 441.609, "eval_steps_per_second": 13.873, "step": 20200 }, { "epoch": 160.3988035892323, "grad_norm": 564.7343139648438, "learning_rate": 4.726957114488073e-08, "loss": 22.8999, "step": 20210 }, { "epoch": 160.47856430707876, "grad_norm": 414.7461853027344, "learning_rate": 4.7086994013441375e-08, "loss": 23.5632, "step": 20220 }, { "epoch": 160.5583250249252, "grad_norm": 993.0753173828125, "learning_rate": 4.690473349853924e-08, "loss": 25.4307, "step": 20230 }, { "epoch": 160.6380857427717, "grad_norm": 496.5462646484375, "learning_rate": 4.672278988456568e-08, "loss": 23.8259, "step": 20240 }, { "epoch": 160.71784646061815, "grad_norm": 124.31321716308594, "learning_rate": 4.654116345541742e-08, "loss": 25.5149, "step": 20250 }, { "epoch": 160.7976071784646, "grad_norm": 247.69906616210938, "learning_rate": 4.635985449449631e-08, "loss": 21.4839, "step": 20260 }, { "epoch": 160.87736789631106, "grad_norm": 826.633056640625, "learning_rate": 4.617886328470902e-08, "loss": 23.509, "step": 20270 }, { "epoch": 160.95712861415754, "grad_norm": 447.5196228027344, "learning_rate": 4.599819010846614e-08, "loss": 22.9887, "step": 20280 }, { "epoch": 161.0319042871386, "grad_norm": 264.0032958984375, "learning_rate": 4.581783524768232e-08, "loss": 23.0958, "step": 20290 }, { "epoch": 161.11166500498504, "grad_norm": 990.3718872070312, "learning_rate": 4.5637798983775236e-08, "loss": 23.5094, "step": 20300 }, { "epoch": 161.11166500498504, "eval_loss": 2.988409996032715, "eval_mae": 1.3004189729690552, "eval_mse": 2.9884097576141357, "eval_r2": 0.09960675239562988, "eval_rmse": 1.7287017549635726, "eval_runtime": 9.0987, "eval_samples_per_second": 440.83, "eval_steps_per_second": 13.848, "step": 20300 }, { "epoch": 161.1914257228315, "grad_norm": 395.7349853515625, "learning_rate": 4.545808159766554e-08, "loss": 22.8364, "step": 20310 }, { "epoch": 161.27118644067798, "grad_norm": 499.4708251953125, "learning_rate": 4.527868336977644e-08, "loss": 23.2073, "step": 20320 }, { "epoch": 161.35094715852443, "grad_norm": 238.495849609375, "learning_rate": 4.509960458003301e-08, "loss": 23.3997, "step": 20330 }, { "epoch": 161.4307078763709, "grad_norm": 423.6451110839844, "learning_rate": 4.4920845507861847e-08, "loss": 23.6306, "step": 20340 }, { "epoch": 161.51046859421734, "grad_norm": 320.92340087890625, "learning_rate": 4.4742406432190873e-08, "loss": 26.0693, "step": 20350 }, { "epoch": 161.5902293120638, "grad_norm": 530.1223754882812, "learning_rate": 4.456428763144848e-08, "loss": 22.0599, "step": 20360 }, { "epoch": 161.66999002991028, "grad_norm": 875.4921264648438, "learning_rate": 4.4386489383563516e-08, "loss": 23.7559, "step": 20370 }, { "epoch": 161.74975074775674, "grad_norm": 733.9068603515625, "learning_rate": 4.420901196596452e-08, "loss": 23.7832, "step": 20380 }, { "epoch": 161.8295114656032, "grad_norm": 239.6742401123047, "learning_rate": 4.4031855655579344e-08, "loss": 24.1239, "step": 20390 }, { "epoch": 161.90927218344964, "grad_norm": 482.89874267578125, "learning_rate": 4.385502072883501e-08, "loss": 23.7691, "step": 20400 }, { "epoch": 161.90927218344964, "eval_loss": 2.986546754837036, "eval_mae": 1.2973977327346802, "eval_mse": 2.986546754837036, "eval_r2": 0.10016804933547974, "eval_rmse": 1.7281628264828046, "eval_runtime": 9.1006, "eval_samples_per_second": 440.741, "eval_steps_per_second": 13.845, "step": 20400 }, { "epoch": 161.9890329012961, "grad_norm": 102.24373626708984, "learning_rate": 4.367850746165708e-08, "loss": 24.6804, "step": 20410 }, { "epoch": 162.06380857427718, "grad_norm": 172.08877563476562, "learning_rate": 4.3502316129468837e-08, "loss": 20.5014, "step": 20420 }, { "epoch": 162.14356929212363, "grad_norm": 229.16693115234375, "learning_rate": 4.3326447007191654e-08, "loss": 22.8511, "step": 20430 }, { "epoch": 162.22333000997008, "grad_norm": 334.28741455078125, "learning_rate": 4.315090036924402e-08, "loss": 23.8031, "step": 20440 }, { "epoch": 162.30309072781654, "grad_norm": 113.47332000732422, "learning_rate": 4.2975676489541094e-08, "loss": 23.2761, "step": 20450 }, { "epoch": 162.38285144566302, "grad_norm": 1696.001708984375, "learning_rate": 4.280077564149454e-08, "loss": 23.0712, "step": 20460 }, { "epoch": 162.46261216350948, "grad_norm": 429.3016357421875, "learning_rate": 4.2626198098011986e-08, "loss": 26.6888, "step": 20470 }, { "epoch": 162.54237288135593, "grad_norm": 127.84202575683594, "learning_rate": 4.245194413149653e-08, "loss": 23.5264, "step": 20480 }, { "epoch": 162.62213359920239, "grad_norm": 718.2103881835938, "learning_rate": 4.227801401384642e-08, "loss": 24.2782, "step": 20490 }, { "epoch": 162.70189431704884, "grad_norm": 98.80939483642578, "learning_rate": 4.210440801645462e-08, "loss": 22.7801, "step": 20500 }, { "epoch": 162.70189431704884, "eval_loss": 2.986672878265381, "eval_mae": 1.296818733215332, "eval_mse": 2.98667311668396, "eval_r2": 0.10012996196746826, "eval_rmse": 1.7281993856855637, "eval_runtime": 9.0883, "eval_samples_per_second": 441.338, "eval_steps_per_second": 13.864, "step": 20500 }, { "epoch": 162.78165503489532, "grad_norm": 710.266845703125, "learning_rate": 4.193112641020816e-08, "loss": 26.5156, "step": 20510 }, { "epoch": 162.86141575274178, "grad_norm": 599.1068115234375, "learning_rate": 4.175816946548819e-08, "loss": 24.2544, "step": 20520 }, { "epoch": 162.94117647058823, "grad_norm": 640.97021484375, "learning_rate": 4.158553745216903e-08, "loss": 21.5817, "step": 20530 }, { "epoch": 163.01595214356928, "grad_norm": 779.9424438476562, "learning_rate": 4.14132306396181e-08, "loss": 24.3248, "step": 20540 }, { "epoch": 163.09571286141576, "grad_norm": 733.3753051757812, "learning_rate": 4.124124929669537e-08, "loss": 25.5774, "step": 20550 }, { "epoch": 163.17547357926222, "grad_norm": 497.6435852050781, "learning_rate": 4.1069593691753057e-08, "loss": 22.9471, "step": 20560 }, { "epoch": 163.25523429710867, "grad_norm": 440.9189453125, "learning_rate": 4.0898264092634916e-08, "loss": 24.5098, "step": 20570 }, { "epoch": 163.33499501495513, "grad_norm": 234.5493621826172, "learning_rate": 4.0727260766676237e-08, "loss": 24.8129, "step": 20580 }, { "epoch": 163.4147557328016, "grad_norm": 147.0826873779297, "learning_rate": 4.0556583980703025e-08, "loss": 23.6257, "step": 20590 }, { "epoch": 163.49451645064806, "grad_norm": 217.13864135742188, "learning_rate": 4.0386234001031776e-08, "loss": 23.653, "step": 20600 }, { "epoch": 163.49451645064806, "eval_loss": 2.986043691635132, "eval_mae": 1.299048662185669, "eval_mse": 2.986043930053711, "eval_r2": 0.10031956434249878, "eval_rmse": 1.728017340785014, "eval_runtime": 9.0988, "eval_samples_per_second": 440.828, "eval_steps_per_second": 13.848, "step": 20600 }, { "epoch": 163.57427716849452, "grad_norm": 666.421630859375, "learning_rate": 4.021621109346923e-08, "loss": 21.0761, "step": 20610 }, { "epoch": 163.65403788634097, "grad_norm": 378.9775390625, "learning_rate": 4.0046515523311595e-08, "loss": 24.4574, "step": 20620 }, { "epoch": 163.73379860418743, "grad_norm": 370.6648864746094, "learning_rate": 3.987714755534435e-08, "loss": 22.2913, "step": 20630 }, { "epoch": 163.8135593220339, "grad_norm": 836.6725463867188, "learning_rate": 3.9708107453841926e-08, "loss": 26.003, "step": 20640 }, { "epoch": 163.89332003988036, "grad_norm": 203.53436279296875, "learning_rate": 3.953939548256696e-08, "loss": 24.5101, "step": 20650 }, { "epoch": 163.97308075772682, "grad_norm": 131.53392028808594, "learning_rate": 3.9371011904770295e-08, "loss": 22.3992, "step": 20660 }, { "epoch": 164.04785643070787, "grad_norm": 511.9137268066406, "learning_rate": 3.9202956983190234e-08, "loss": 21.8256, "step": 20670 }, { "epoch": 164.12761714855435, "grad_norm": 560.97900390625, "learning_rate": 3.903523098005221e-08, "loss": 24.6533, "step": 20680 }, { "epoch": 164.2073778664008, "grad_norm": 486.2814636230469, "learning_rate": 3.8867834157068596e-08, "loss": 24.5964, "step": 20690 }, { "epoch": 164.28713858424726, "grad_norm": 784.7884521484375, "learning_rate": 3.8700766775438124e-08, "loss": 23.9616, "step": 20700 }, { "epoch": 164.28713858424726, "eval_loss": 2.9880242347717285, "eval_mae": 1.2942732572555542, "eval_mse": 2.9880242347717285, "eval_r2": 0.09972292184829712, "eval_rmse": 1.7285902449023969, "eval_runtime": 9.0853, "eval_samples_per_second": 441.481, "eval_steps_per_second": 13.869, "step": 20700 }, { "epoch": 164.3668993020937, "grad_norm": 169.08421325683594, "learning_rate": 3.853402909584516e-08, "loss": 23.0227, "step": 20710 }, { "epoch": 164.44666001994017, "grad_norm": 1243.542236328125, "learning_rate": 3.8367621378460003e-08, "loss": 21.9406, "step": 20720 }, { "epoch": 164.52642073778665, "grad_norm": 575.0645751953125, "learning_rate": 3.820154388293795e-08, "loss": 23.3995, "step": 20730 }, { "epoch": 164.6061814556331, "grad_norm": 333.740966796875, "learning_rate": 3.803579686841901e-08, "loss": 23.2316, "step": 20740 }, { "epoch": 164.68594217347956, "grad_norm": 812.60986328125, "learning_rate": 3.7870380593527484e-08, "loss": 24.5726, "step": 20750 }, { "epoch": 164.76570289132601, "grad_norm": 127.72270202636719, "learning_rate": 3.770529531637173e-08, "loss": 25.9085, "step": 20760 }, { "epoch": 164.84546360917247, "grad_norm": 478.8529052734375, "learning_rate": 3.754054129454348e-08, "loss": 24.0288, "step": 20770 }, { "epoch": 164.92522432701895, "grad_norm": 464.51904296875, "learning_rate": 3.737611878511779e-08, "loss": 23.2809, "step": 20780 }, { "epoch": 165.0, "grad_norm": 463.0782775878906, "learning_rate": 3.7212028044652233e-08, "loss": 21.8776, "step": 20790 }, { "epoch": 165.07976071784645, "grad_norm": 1210.877685546875, "learning_rate": 3.7048269329186754e-08, "loss": 25.0118, "step": 20800 }, { "epoch": 165.07976071784645, "eval_loss": 2.9878416061401367, "eval_mae": 1.2945494651794434, "eval_mse": 2.9878413677215576, "eval_r2": 0.09977799654006958, "eval_rmse": 1.7285373492411316, "eval_runtime": 9.1111, "eval_samples_per_second": 440.233, "eval_steps_per_second": 13.829, "step": 20800 }, { "epoch": 165.1595214356929, "grad_norm": 495.2569274902344, "learning_rate": 3.688484289424337e-08, "loss": 22.6857, "step": 20810 }, { "epoch": 165.2392821535394, "grad_norm": 192.6989288330078, "learning_rate": 3.672174899482544e-08, "loss": 23.7292, "step": 20820 }, { "epoch": 165.31904287138585, "grad_norm": 655.8668212890625, "learning_rate": 3.6558987885417445e-08, "loss": 22.3309, "step": 20830 }, { "epoch": 165.3988035892323, "grad_norm": 940.3027954101562, "learning_rate": 3.639655981998482e-08, "loss": 23.7985, "step": 20840 }, { "epoch": 165.47856430707876, "grad_norm": 500.3050231933594, "learning_rate": 3.623446505197306e-08, "loss": 23.809, "step": 20850 }, { "epoch": 165.5583250249252, "grad_norm": 570.357666015625, "learning_rate": 3.607270383430777e-08, "loss": 24.2061, "step": 20860 }, { "epoch": 165.6380857427717, "grad_norm": 756.9074096679688, "learning_rate": 3.592740413257877e-08, "loss": 23.6694, "step": 20870 }, { "epoch": 165.71784646061815, "grad_norm": 497.1473388671875, "learning_rate": 3.5766277355517347e-08, "loss": 23.7223, "step": 20880 }, { "epoch": 165.7976071784646, "grad_norm": 224.22097778320312, "learning_rate": 3.5605484859341893e-08, "loss": 24.6421, "step": 20890 }, { "epoch": 165.87736789631106, "grad_norm": 277.3652038574219, "learning_rate": 3.544502689494588e-08, "loss": 22.7341, "step": 20900 }, { "epoch": 165.87736789631106, "eval_loss": 2.987583875656128, "eval_mae": 1.2943426370620728, "eval_mse": 2.987583875656128, "eval_r2": 0.09985554218292236, "eval_rmse": 1.728462864991935, "eval_runtime": 9.0908, "eval_samples_per_second": 441.213, "eval_steps_per_second": 13.86, "step": 20900 }, { "epoch": 165.95712861415754, "grad_norm": 834.4371337890625, "learning_rate": 3.528490371270101e-08, "loss": 25.1784, "step": 20910 }, { "epoch": 166.0319042871386, "grad_norm": 747.2515258789062, "learning_rate": 3.512511556245631e-08, "loss": 23.1242, "step": 20920 }, { "epoch": 166.11166500498504, "grad_norm": 206.32666015625, "learning_rate": 3.496566269353832e-08, "loss": 23.3442, "step": 20930 }, { "epoch": 166.1914257228315, "grad_norm": 1007.1183471679688, "learning_rate": 3.480654535475025e-08, "loss": 21.7909, "step": 20940 }, { "epoch": 166.27118644067798, "grad_norm": 145.24815368652344, "learning_rate": 3.4647763794371705e-08, "loss": 25.2308, "step": 20950 }, { "epoch": 166.35094715852443, "grad_norm": 725.6841430664062, "learning_rate": 3.448931826015858e-08, "loss": 26.5595, "step": 20960 }, { "epoch": 166.4307078763709, "grad_norm": 207.36798095703125, "learning_rate": 3.43312089993423e-08, "loss": 22.0913, "step": 20970 }, { "epoch": 166.51046859421734, "grad_norm": 713.8314819335938, "learning_rate": 3.417343625862954e-08, "loss": 23.4224, "step": 20980 }, { "epoch": 166.5902293120638, "grad_norm": 121.8656234741211, "learning_rate": 3.401600028420201e-08, "loss": 24.7084, "step": 20990 }, { "epoch": 166.66999002991028, "grad_norm": 920.5518798828125, "learning_rate": 3.385890132171584e-08, "loss": 23.9545, "step": 21000 }, { "epoch": 166.66999002991028, "eval_loss": 2.9885149002075195, "eval_mae": 1.293936848640442, "eval_mse": 2.9885149002075195, "eval_r2": 0.09957504272460938, "eval_rmse": 1.7287321655500945, "eval_runtime": 9.0843, "eval_samples_per_second": 441.53, "eval_steps_per_second": 13.87, "step": 21000 }, { "epoch": 166.74975074775674, "grad_norm": 615.0982666015625, "learning_rate": 3.370213961630142e-08, "loss": 25.1141, "step": 21010 }, { "epoch": 166.8295114656032, "grad_norm": 381.2695617675781, "learning_rate": 3.354571541256279e-08, "loss": 21.3079, "step": 21020 }, { "epoch": 166.90927218344964, "grad_norm": 261.74169921875, "learning_rate": 3.3389628954577336e-08, "loss": 22.5619, "step": 21030 }, { "epoch": 166.9890329012961, "grad_norm": 1052.0252685546875, "learning_rate": 3.323388048589559e-08, "loss": 25.1966, "step": 21040 }, { "epoch": 167.06380857427718, "grad_norm": 293.083984375, "learning_rate": 3.307847024954069e-08, "loss": 23.9578, "step": 21050 }, { "epoch": 167.14356929212363, "grad_norm": 624.7679443359375, "learning_rate": 3.2923398488007733e-08, "loss": 24.2935, "step": 21060 }, { "epoch": 167.22333000997008, "grad_norm": 598.4464111328125, "learning_rate": 3.2768665443263984e-08, "loss": 23.0892, "step": 21070 }, { "epoch": 167.30309072781654, "grad_norm": 801.3358154296875, "learning_rate": 3.2614271356748156e-08, "loss": 23.4058, "step": 21080 }, { "epoch": 167.38285144566302, "grad_norm": 103.62716674804688, "learning_rate": 3.2460216469369944e-08, "loss": 23.3563, "step": 21090 }, { "epoch": 167.46261216350948, "grad_norm": 403.87445068359375, "learning_rate": 3.230650102150975e-08, "loss": 22.5184, "step": 21100 }, { "epoch": 167.46261216350948, "eval_loss": 2.9855480194091797, "eval_mae": 1.2961645126342773, "eval_mse": 2.9855480194091797, "eval_r2": 0.10046899318695068, "eval_rmse": 1.7278738436035137, "eval_runtime": 9.0721, "eval_samples_per_second": 442.125, "eval_steps_per_second": 13.889, "step": 21100 }, { "epoch": 167.54237288135593, "grad_norm": 494.8824157714844, "learning_rate": 3.215312525301853e-08, "loss": 22.9951, "step": 21110 }, { "epoch": 167.62213359920239, "grad_norm": 809.4344482421875, "learning_rate": 3.2000089403216966e-08, "loss": 23.5586, "step": 21120 }, { "epoch": 167.70189431704884, "grad_norm": 136.36012268066406, "learning_rate": 3.184739371089562e-08, "loss": 24.2447, "step": 21130 }, { "epoch": 167.78165503489532, "grad_norm": 115.35761260986328, "learning_rate": 3.1695038414314036e-08, "loss": 23.0265, "step": 21140 }, { "epoch": 167.86141575274178, "grad_norm": 160.2898712158203, "learning_rate": 3.154302375120069e-08, "loss": 25.8412, "step": 21150 }, { "epoch": 167.94117647058823, "grad_norm": 358.9959716796875, "learning_rate": 3.1391349958752644e-08, "loss": 23.6602, "step": 21160 }, { "epoch": 168.01595214356928, "grad_norm": 1159.0592041015625, "learning_rate": 3.124001727363512e-08, "loss": 21.6542, "step": 21170 }, { "epoch": 168.09571286141576, "grad_norm": 204.9313201904297, "learning_rate": 3.108902593198076e-08, "loss": 21.4206, "step": 21180 }, { "epoch": 168.17547357926222, "grad_norm": 379.1077880859375, "learning_rate": 3.093837616938996e-08, "loss": 22.0864, "step": 21190 }, { "epoch": 168.25523429710867, "grad_norm": 534.8294067382812, "learning_rate": 3.0788068220929995e-08, "loss": 24.5088, "step": 21200 }, { "epoch": 168.25523429710867, "eval_loss": 2.9861366748809814, "eval_mae": 1.297932505607605, "eval_mse": 2.9861366748809814, "eval_r2": 0.10029160976409912, "eval_rmse": 1.7280441761948626, "eval_runtime": 9.0903, "eval_samples_per_second": 441.239, "eval_steps_per_second": 13.861, "step": 21200 }, { "epoch": 168.33499501495513, "grad_norm": 349.81402587890625, "learning_rate": 3.0638102321134786e-08, "loss": 23.7894, "step": 21210 }, { "epoch": 168.4147557328016, "grad_norm": 262.42547607421875, "learning_rate": 3.048847870400442e-08, "loss": 26.1303, "step": 21220 }, { "epoch": 168.49451645064806, "grad_norm": 584.2164916992188, "learning_rate": 3.033919760300518e-08, "loss": 20.7866, "step": 21230 }, { "epoch": 168.57427716849452, "grad_norm": 638.7256469726562, "learning_rate": 3.019025925106858e-08, "loss": 23.7111, "step": 21240 }, { "epoch": 168.65403788634097, "grad_norm": 249.6031951904297, "learning_rate": 3.004166388059159e-08, "loss": 22.8574, "step": 21250 }, { "epoch": 168.73379860418743, "grad_norm": 939.9068603515625, "learning_rate": 2.9893411723435875e-08, "loss": 25.1982, "step": 21260 }, { "epoch": 168.8135593220339, "grad_norm": 1453.6534423828125, "learning_rate": 2.9745503010927487e-08, "loss": 25.1476, "step": 21270 }, { "epoch": 168.89332003988036, "grad_norm": 820.752197265625, "learning_rate": 2.9597937973856846e-08, "loss": 24.9524, "step": 21280 }, { "epoch": 168.97308075772682, "grad_norm": 318.9416809082031, "learning_rate": 2.945071684247785e-08, "loss": 25.4665, "step": 21290 }, { "epoch": 169.04785643070787, "grad_norm": 698.4987182617188, "learning_rate": 2.9303839846507834e-08, "loss": 22.4213, "step": 21300 }, { "epoch": 169.04785643070787, "eval_loss": 2.985820770263672, "eval_mae": 1.2957457304000854, "eval_mse": 2.985820770263672, "eval_r2": 0.10038679838180542, "eval_rmse": 1.7279527685280267, "eval_runtime": 9.1152, "eval_samples_per_second": 440.032, "eval_steps_per_second": 13.823, "step": 21300 }, { "epoch": 169.12761714855435, "grad_norm": 513.4396362304688, "learning_rate": 2.9157307215127353e-08, "loss": 22.4686, "step": 21310 }, { "epoch": 169.2073778664008, "grad_norm": 747.5562133789062, "learning_rate": 2.901111917697932e-08, "loss": 24.3735, "step": 21320 }, { "epoch": 169.28713858424726, "grad_norm": 326.5459289550781, "learning_rate": 2.886527596016927e-08, "loss": 24.3784, "step": 21330 }, { "epoch": 169.3668993020937, "grad_norm": 464.299072265625, "learning_rate": 2.8719777792264505e-08, "loss": 23.1418, "step": 21340 }, { "epoch": 169.44666001994017, "grad_norm": 176.2613067626953, "learning_rate": 2.8574624900293888e-08, "loss": 23.1151, "step": 21350 }, { "epoch": 169.52642073778665, "grad_norm": 325.4080810546875, "learning_rate": 2.842981751074769e-08, "loss": 24.0602, "step": 21360 }, { "epoch": 169.6061814556331, "grad_norm": 252.9591522216797, "learning_rate": 2.8285355849577086e-08, "loss": 24.0935, "step": 21370 }, { "epoch": 169.68594217347956, "grad_norm": 140.0712127685547, "learning_rate": 2.814124014219349e-08, "loss": 24.217, "step": 21380 }, { "epoch": 169.76570289132601, "grad_norm": 171.5577850341797, "learning_rate": 2.7997470613468853e-08, "loss": 25.4641, "step": 21390 }, { "epoch": 169.84546360917247, "grad_norm": 470.9291076660156, "learning_rate": 2.7854047487734843e-08, "loss": 23.8713, "step": 21400 }, { "epoch": 169.84546360917247, "eval_loss": 2.9879159927368164, "eval_mae": 1.294660210609436, "eval_mse": 2.9879157543182373, "eval_r2": 0.09975558519363403, "eval_rmse": 1.72855886631559, "eval_runtime": 9.0644, "eval_samples_per_second": 442.5, "eval_steps_per_second": 13.901, "step": 21400 }, { "epoch": 169.92522432701895, "grad_norm": 498.794921875, "learning_rate": 2.7710970988782584e-08, "loss": 23.0455, "step": 21410 }, { "epoch": 170.0, "grad_norm": 135.36593627929688, "learning_rate": 2.75682413398623e-08, "loss": 22.2191, "step": 21420 }, { "epoch": 170.07976071784645, "grad_norm": 1148.9171142578125, "learning_rate": 2.742585876368314e-08, "loss": 24.7924, "step": 21430 }, { "epoch": 170.1595214356929, "grad_norm": 969.6644287109375, "learning_rate": 2.728382348241254e-08, "loss": 23.3944, "step": 21440 }, { "epoch": 170.2392821535394, "grad_norm": 677.8080444335938, "learning_rate": 2.7142135717676206e-08, "loss": 23.1857, "step": 21450 }, { "epoch": 170.31904287138585, "grad_norm": 340.6452331542969, "learning_rate": 2.7000795690557436e-08, "loss": 25.0963, "step": 21460 }, { "epoch": 170.3988035892323, "grad_norm": 1116.0831298828125, "learning_rate": 2.685980362159698e-08, "loss": 21.595, "step": 21470 }, { "epoch": 170.47856430707876, "grad_norm": 704.5206298828125, "learning_rate": 2.6719159730792724e-08, "loss": 22.0435, "step": 21480 }, { "epoch": 170.5583250249252, "grad_norm": 437.9672546386719, "learning_rate": 2.657886423759914e-08, "loss": 25.7437, "step": 21490 }, { "epoch": 170.6380857427717, "grad_norm": 393.89739990234375, "learning_rate": 2.6438917360927266e-08, "loss": 23.6258, "step": 21500 }, { "epoch": 170.6380857427717, "eval_loss": 2.985738515853882, "eval_mae": 1.294340968132019, "eval_mse": 2.985738515853882, "eval_r2": 0.10041159391403198, "eval_rmse": 1.7279289672477518, "eval_runtime": 9.0977, "eval_samples_per_second": 440.882, "eval_steps_per_second": 13.85, "step": 21500 }, { "epoch": 170.71784646061815, "grad_norm": 555.5964965820312, "learning_rate": 2.6299319319143974e-08, "loss": 23.184, "step": 21510 }, { "epoch": 170.7976071784646, "grad_norm": 287.30377197265625, "learning_rate": 2.616007033007189e-08, "loss": 23.8837, "step": 21520 }, { "epoch": 170.87736789631106, "grad_norm": 648.5049438476562, "learning_rate": 2.6021170610989045e-08, "loss": 23.3464, "step": 21530 }, { "epoch": 170.95712861415754, "grad_norm": 1021.4182739257812, "learning_rate": 2.5882620378628567e-08, "loss": 25.7258, "step": 21540 }, { "epoch": 171.0319042871386, "grad_norm": 339.848388671875, "learning_rate": 2.574441984917794e-08, "loss": 21.4869, "step": 21550 }, { "epoch": 171.11166500498504, "grad_norm": 1136.0574951171875, "learning_rate": 2.560656923827931e-08, "loss": 23.1689, "step": 21560 }, { "epoch": 171.1914257228315, "grad_norm": 198.03421020507812, "learning_rate": 2.546906876102878e-08, "loss": 23.7328, "step": 21570 }, { "epoch": 171.27118644067798, "grad_norm": 463.3050231933594, "learning_rate": 2.5331918631975952e-08, "loss": 23.6093, "step": 21580 }, { "epoch": 171.35094715852443, "grad_norm": 298.0157165527344, "learning_rate": 2.5195119065123837e-08, "loss": 22.656, "step": 21590 }, { "epoch": 171.4307078763709, "grad_norm": 527.3220825195312, "learning_rate": 2.505867027392858e-08, "loss": 24.8544, "step": 21600 }, { "epoch": 171.4307078763709, "eval_loss": 2.985347032546997, "eval_mae": 1.2959703207015991, "eval_mse": 2.985347032546997, "eval_r2": 0.10052955150604248, "eval_rmse": 1.7278156824577664, "eval_runtime": 9.1181, "eval_samples_per_second": 439.893, "eval_steps_per_second": 13.819, "step": 21600 }, { "epoch": 171.51046859421734, "grad_norm": 379.0373229980469, "learning_rate": 2.492257247129878e-08, "loss": 24.2283, "step": 21610 }, { "epoch": 171.5902293120638, "grad_norm": 256.77093505859375, "learning_rate": 2.4786825869595545e-08, "loss": 24.2607, "step": 21620 }, { "epoch": 171.66999002991028, "grad_norm": 853.0988159179688, "learning_rate": 2.4651430680631857e-08, "loss": 24.1805, "step": 21630 }, { "epoch": 171.74975074775674, "grad_norm": 120.61614990234375, "learning_rate": 2.451638711567236e-08, "loss": 24.5177, "step": 21640 }, { "epoch": 171.8295114656032, "grad_norm": 637.2351684570312, "learning_rate": 2.4381695385433192e-08, "loss": 23.1552, "step": 21650 }, { "epoch": 171.90927218344964, "grad_norm": 93.33638763427734, "learning_rate": 2.424735570008149e-08, "loss": 24.8734, "step": 21660 }, { "epoch": 171.9890329012961, "grad_norm": 102.57537078857422, "learning_rate": 2.411336826923477e-08, "loss": 23.239, "step": 21670 }, { "epoch": 172.06380857427718, "grad_norm": 117.32454681396484, "learning_rate": 2.3979733301961265e-08, "loss": 22.2166, "step": 21680 }, { "epoch": 172.14356929212363, "grad_norm": 666.1997680664062, "learning_rate": 2.384645100677912e-08, "loss": 24.1691, "step": 21690 }, { "epoch": 172.22333000997008, "grad_norm": 317.80279541015625, "learning_rate": 2.3713521591656156e-08, "loss": 23.974, "step": 21700 }, { "epoch": 172.22333000997008, "eval_loss": 2.9877893924713135, "eval_mae": 1.2950363159179688, "eval_mse": 2.9877893924713135, "eval_r2": 0.09979367256164551, "eval_rmse": 1.728522314716045, "eval_runtime": 9.1078, "eval_samples_per_second": 440.394, "eval_steps_per_second": 13.834, "step": 21700 }, { "epoch": 172.30309072781654, "grad_norm": 220.41844177246094, "learning_rate": 2.35809452640095e-08, "loss": 24.3645, "step": 21710 }, { "epoch": 172.38285144566302, "grad_norm": 176.1417999267578, "learning_rate": 2.344872223070557e-08, "loss": 24.1271, "step": 21720 }, { "epoch": 172.46261216350948, "grad_norm": 750.0230712890625, "learning_rate": 2.3316852698059276e-08, "loss": 22.9614, "step": 21730 }, { "epoch": 172.54237288135593, "grad_norm": 635.2285766601562, "learning_rate": 2.3185336871834105e-08, "loss": 24.1727, "step": 21740 }, { "epoch": 172.62213359920239, "grad_norm": 276.4936218261719, "learning_rate": 2.3054174957241547e-08, "loss": 22.6836, "step": 21750 }, { "epoch": 172.70189431704884, "grad_norm": 410.6717529296875, "learning_rate": 2.2923367158940833e-08, "loss": 22.6491, "step": 21760 }, { "epoch": 172.78165503489532, "grad_norm": 966.279052734375, "learning_rate": 2.279291368103878e-08, "loss": 24.87, "step": 21770 }, { "epoch": 172.86141575274178, "grad_norm": 311.2745056152344, "learning_rate": 2.2662814727089253e-08, "loss": 24.8305, "step": 21780 }, { "epoch": 172.94117647058823, "grad_norm": 182.11019897460938, "learning_rate": 2.2533070500092897e-08, "loss": 23.1164, "step": 21790 }, { "epoch": 173.01595214356928, "grad_norm": 469.53656005859375, "learning_rate": 2.2403681202496945e-08, "loss": 21.9007, "step": 21800 }, { "epoch": 173.01595214356928, "eval_loss": 2.9855799674987793, "eval_mae": 1.2968862056732178, "eval_mse": 2.9855797290802, "eval_r2": 0.10045945644378662, "eval_rmse": 1.7278830195010888, "eval_runtime": 9.0932, "eval_samples_per_second": 441.099, "eval_steps_per_second": 13.857, "step": 21800 }, { "epoch": 173.09571286141576, "grad_norm": 457.03216552734375, "learning_rate": 2.2274647036194723e-08, "loss": 24.6794, "step": 21810 }, { "epoch": 173.17547357926222, "grad_norm": 439.5225830078125, "learning_rate": 2.2145968202525566e-08, "loss": 22.7831, "step": 21820 }, { "epoch": 173.25523429710867, "grad_norm": 752.224609375, "learning_rate": 2.20176449022742e-08, "loss": 23.3946, "step": 21830 }, { "epoch": 173.33499501495513, "grad_norm": 185.27256774902344, "learning_rate": 2.1889677335670626e-08, "loss": 25.1106, "step": 21840 }, { "epoch": 173.4147557328016, "grad_norm": 573.1307373046875, "learning_rate": 2.1762065702389863e-08, "loss": 21.3581, "step": 21850 }, { "epoch": 173.49451645064806, "grad_norm": 388.6234436035156, "learning_rate": 2.1634810201551583e-08, "loss": 24.5047, "step": 21860 }, { "epoch": 173.57427716849452, "grad_norm": 453.5081787109375, "learning_rate": 2.1507911031719506e-08, "loss": 23.7577, "step": 21870 }, { "epoch": 173.65403788634097, "grad_norm": 386.9514465332031, "learning_rate": 2.1381368390901572e-08, "loss": 22.9572, "step": 21880 }, { "epoch": 173.73379860418743, "grad_norm": 404.28570556640625, "learning_rate": 2.125518247654945e-08, "loss": 23.3758, "step": 21890 }, { "epoch": 173.8135593220339, "grad_norm": 399.77294921875, "learning_rate": 2.1129353485558054e-08, "loss": 24.8796, "step": 21900 }, { "epoch": 173.8135593220339, "eval_loss": 2.9860446453094482, "eval_mae": 1.298270344734192, "eval_mse": 2.9860446453094482, "eval_r2": 0.10031932592391968, "eval_rmse": 1.7280175477434967, "eval_runtime": 9.1532, "eval_samples_per_second": 438.207, "eval_steps_per_second": 13.766, "step": 21900 }, { "epoch": 173.89332003988036, "grad_norm": 164.7106170654297, "learning_rate": 2.1003881614265305e-08, "loss": 25.2506, "step": 21910 }, { "epoch": 173.97308075772682, "grad_norm": 167.10516357421875, "learning_rate": 2.0878767058452152e-08, "loss": 24.2321, "step": 21920 }, { "epoch": 174.04785643070787, "grad_norm": 169.80325317382812, "learning_rate": 2.0754010013341704e-08, "loss": 21.6802, "step": 21930 }, { "epoch": 174.12761714855435, "grad_norm": 402.775634765625, "learning_rate": 2.0629610673599502e-08, "loss": 24.4619, "step": 21940 }, { "epoch": 174.2073778664008, "grad_norm": 328.2489013671875, "learning_rate": 2.050556923333277e-08, "loss": 23.9299, "step": 21950 }, { "epoch": 174.28713858424726, "grad_norm": 745.198974609375, "learning_rate": 2.0381885886090245e-08, "loss": 22.8688, "step": 21960 }, { "epoch": 174.3668993020937, "grad_norm": 718.9580078125, "learning_rate": 2.0258560824862077e-08, "loss": 24.202, "step": 21970 }, { "epoch": 174.44666001994017, "grad_norm": 568.1629638671875, "learning_rate": 2.0135594242079252e-08, "loss": 24.3933, "step": 21980 }, { "epoch": 174.52642073778665, "grad_norm": 854.043701171875, "learning_rate": 2.0012986329613346e-08, "loss": 23.1882, "step": 21990 }, { "epoch": 174.6061814556331, "grad_norm": 551.8663940429688, "learning_rate": 1.9890737278776494e-08, "loss": 24.3438, "step": 22000 }, { "epoch": 174.6061814556331, "eval_loss": 2.9860429763793945, "eval_mae": 1.2983522415161133, "eval_mse": 2.9860429763793945, "eval_r2": 0.10031986236572266, "eval_rmse": 1.7280170648403315, "eval_runtime": 9.1231, "eval_samples_per_second": 439.654, "eval_steps_per_second": 13.811, "step": 22000 }, { "epoch": 174.68594217347956, "grad_norm": 310.53900146484375, "learning_rate": 1.9768847280320605e-08, "loss": 24.7496, "step": 22010 }, { "epoch": 174.76570289132601, "grad_norm": 690.4244995117188, "learning_rate": 1.964731652443752e-08, "loss": 24.1169, "step": 22020 }, { "epoch": 174.84546360917247, "grad_norm": 717.24267578125, "learning_rate": 1.9526145200758626e-08, "loss": 22.478, "step": 22030 }, { "epoch": 174.92522432701895, "grad_norm": 82.09710693359375, "learning_rate": 1.9405333498354105e-08, "loss": 22.5815, "step": 22040 }, { "epoch": 175.0, "grad_norm": 272.75677490234375, "learning_rate": 1.9284881605733357e-08, "loss": 22.7834, "step": 22050 }, { "epoch": 175.07976071784645, "grad_norm": 125.09671783447266, "learning_rate": 1.9164789710844233e-08, "loss": 26.051, "step": 22060 }, { "epoch": 175.1595214356929, "grad_norm": 937.7044067382812, "learning_rate": 1.9045058001072838e-08, "loss": 22.7696, "step": 22070 }, { "epoch": 175.2392821535394, "grad_norm": 635.612060546875, "learning_rate": 1.892568666324318e-08, "loss": 23.5138, "step": 22080 }, { "epoch": 175.31904287138585, "grad_norm": 331.49896240234375, "learning_rate": 1.8806675883617174e-08, "loss": 24.1467, "step": 22090 }, { "epoch": 175.3988035892323, "grad_norm": 672.1553344726562, "learning_rate": 1.868802584789392e-08, "loss": 21.7088, "step": 22100 }, { "epoch": 175.3988035892323, "eval_loss": 2.9872848987579346, "eval_mae": 1.2949774265289307, "eval_mse": 2.9872846603393555, "eval_r2": 0.09994572401046753, "eval_rmse": 1.7283763075034775, "eval_runtime": 9.3776, "eval_samples_per_second": 427.72, "eval_steps_per_second": 13.436, "step": 22100 }, { "epoch": 175.47856430707876, "grad_norm": 190.68898010253906, "learning_rate": 1.856973674120979e-08, "loss": 23.3435, "step": 22110 }, { "epoch": 175.5583250249252, "grad_norm": 517.9446411132812, "learning_rate": 1.8451808748137844e-08, "loss": 24.4168, "step": 22120 }, { "epoch": 175.6380857427717, "grad_norm": 399.7643737792969, "learning_rate": 1.833424205268777e-08, "loss": 24.571, "step": 22130 }, { "epoch": 175.71784646061815, "grad_norm": 825.5567626953125, "learning_rate": 1.8217036838305443e-08, "loss": 23.1831, "step": 22140 }, { "epoch": 175.7976071784646, "grad_norm": 284.5862121582031, "learning_rate": 1.810019328787285e-08, "loss": 23.4052, "step": 22150 }, { "epoch": 175.87736789631106, "grad_norm": 1046.2374267578125, "learning_rate": 1.7983711583707346e-08, "loss": 23.4903, "step": 22160 }, { "epoch": 175.95712861415754, "grad_norm": 478.9454345703125, "learning_rate": 1.7867591907561958e-08, "loss": 25.1356, "step": 22170 }, { "epoch": 176.0319042871386, "grad_norm": 249.65626525878906, "learning_rate": 1.775183444062475e-08, "loss": 23.2627, "step": 22180 }, { "epoch": 176.11166500498504, "grad_norm": 537.0014038085938, "learning_rate": 1.763643936351858e-08, "loss": 22.7277, "step": 22190 }, { "epoch": 176.1914257228315, "grad_norm": 539.47802734375, "learning_rate": 1.7521406856300797e-08, "loss": 24.1051, "step": 22200 }, { "epoch": 176.1914257228315, "eval_loss": 2.9861366748809814, "eval_mae": 1.297548770904541, "eval_mse": 2.9861366748809814, "eval_r2": 0.10029160976409912, "eval_rmse": 1.7280441761948626, "eval_runtime": 9.1459, "eval_samples_per_second": 438.559, "eval_steps_per_second": 13.777, "step": 22200 }, { "epoch": 176.27118644067798, "grad_norm": 312.5366516113281, "learning_rate": 1.740673709846313e-08, "loss": 23.4949, "step": 22210 }, { "epoch": 176.35094715852443, "grad_norm": 391.32476806640625, "learning_rate": 1.729243026893118e-08, "loss": 22.7607, "step": 22220 }, { "epoch": 176.4307078763709, "grad_norm": 257.8359375, "learning_rate": 1.7178486546064353e-08, "loss": 25.6291, "step": 22230 }, { "epoch": 176.51046859421734, "grad_norm": 715.9336547851562, "learning_rate": 1.706490610765543e-08, "loss": 24.058, "step": 22240 }, { "epoch": 176.5902293120638, "grad_norm": 565.8201904296875, "learning_rate": 1.695168913093023e-08, "loss": 24.0624, "step": 22250 }, { "epoch": 176.66999002991028, "grad_norm": 206.21766662597656, "learning_rate": 1.683883579254769e-08, "loss": 23.9974, "step": 22260 }, { "epoch": 176.74975074775674, "grad_norm": 788.8040161132812, "learning_rate": 1.6726346268599145e-08, "loss": 21.2712, "step": 22270 }, { "epoch": 176.8295114656032, "grad_norm": 308.278564453125, "learning_rate": 1.661422073460822e-08, "loss": 25.0758, "step": 22280 }, { "epoch": 176.90927218344964, "grad_norm": 585.3943481445312, "learning_rate": 1.6502459365530797e-08, "loss": 23.6577, "step": 22290 }, { "epoch": 176.9890329012961, "grad_norm": 492.2779846191406, "learning_rate": 1.6391062335754325e-08, "loss": 23.3792, "step": 22300 }, { "epoch": 176.9890329012961, "eval_loss": 2.9852004051208496, "eval_mae": 1.2953109741210938, "eval_mse": 2.9852004051208496, "eval_r2": 0.10057371854782104, "eval_rmse": 1.7277732504934928, "eval_runtime": 9.1232, "eval_samples_per_second": 439.65, "eval_steps_per_second": 13.811, "step": 22300 }, { "epoch": 177.06380857427718, "grad_norm": 381.8243713378906, "learning_rate": 1.62800298190979e-08, "loss": 21.2628, "step": 22310 }, { "epoch": 177.14356929212363, "grad_norm": 526.283935546875, "learning_rate": 1.6169361988811747e-08, "loss": 23.4491, "step": 22320 }, { "epoch": 177.22333000997008, "grad_norm": 304.78961181640625, "learning_rate": 1.605905901757701e-08, "loss": 22.4426, "step": 22330 }, { "epoch": 177.30309072781654, "grad_norm": 120.56478881835938, "learning_rate": 1.596009844021709e-08, "loss": 25.0317, "step": 22340 }, { "epoch": 177.38285144566302, "grad_norm": 744.4158935546875, "learning_rate": 1.5850489174877747e-08, "loss": 24.6723, "step": 22350 }, { "epoch": 177.46261216350948, "grad_norm": 1183.1009521484375, "learning_rate": 1.5741245266145065e-08, "loss": 22.3539, "step": 22360 }, { "epoch": 177.54237288135593, "grad_norm": 349.775634765625, "learning_rate": 1.563236688447836e-08, "loss": 23.4585, "step": 22370 }, { "epoch": 177.62213359920239, "grad_norm": 242.05673217773438, "learning_rate": 1.5523854199766732e-08, "loss": 23.8761, "step": 22380 }, { "epoch": 177.70189431704884, "grad_norm": 368.234130859375, "learning_rate": 1.541570738132858e-08, "loss": 23.9772, "step": 22390 }, { "epoch": 177.78165503489532, "grad_norm": 529.9364624023438, "learning_rate": 1.5307926597911386e-08, "loss": 22.4049, "step": 22400 }, { "epoch": 177.78165503489532, "eval_loss": 2.9866483211517334, "eval_mae": 1.2959117889404297, "eval_mse": 2.9866480827331543, "eval_r2": 0.10013753175735474, "eval_rmse": 1.7281921428860723, "eval_runtime": 9.081, "eval_samples_per_second": 441.691, "eval_steps_per_second": 13.875, "step": 22400 }, { "epoch": 177.86141575274178, "grad_norm": 845.2642211914062, "learning_rate": 1.520051201769168e-08, "loss": 24.6607, "step": 22410 }, { "epoch": 177.94117647058823, "grad_norm": 914.9999389648438, "learning_rate": 1.5093463808274338e-08, "loss": 25.4712, "step": 22420 }, { "epoch": 178.01595214356928, "grad_norm": 224.6628875732422, "learning_rate": 1.4986782136692715e-08, "loss": 23.3044, "step": 22430 }, { "epoch": 178.09571286141576, "grad_norm": 1200.2086181640625, "learning_rate": 1.4880467169408245e-08, "loss": 23.4508, "step": 22440 }, { "epoch": 178.17547357926222, "grad_norm": 532.684814453125, "learning_rate": 1.4774519072310032e-08, "loss": 23.1079, "step": 22450 }, { "epoch": 178.25523429710867, "grad_norm": 854.6232299804688, "learning_rate": 1.466893801071495e-08, "loss": 23.4958, "step": 22460 }, { "epoch": 178.33499501495513, "grad_norm": 631.4081420898438, "learning_rate": 1.4563724149366991e-08, "loss": 22.0791, "step": 22470 }, { "epoch": 178.4147557328016, "grad_norm": 964.5308227539062, "learning_rate": 1.4458877652437224e-08, "loss": 25.2987, "step": 22480 }, { "epoch": 178.49451645064806, "grad_norm": 1529.9703369140625, "learning_rate": 1.4354398683523506e-08, "loss": 24.4458, "step": 22490 }, { "epoch": 178.57427716849452, "grad_norm": 1350.5501708984375, "learning_rate": 1.425028740565032e-08, "loss": 25.5982, "step": 22500 }, { "epoch": 178.57427716849452, "eval_loss": 2.986062526702881, "eval_mae": 1.2957606315612793, "eval_mse": 2.98606276512146, "eval_r2": 0.10031390190124512, "eval_rmse": 1.728022790683462, "eval_runtime": 9.0926, "eval_samples_per_second": 441.13, "eval_steps_per_second": 13.857, "step": 22500 }, { "epoch": 178.65403788634097, "grad_norm": 275.2545471191406, "learning_rate": 1.4146543981268217e-08, "loss": 24.822, "step": 22510 }, { "epoch": 178.73379860418743, "grad_norm": 538.2731323242188, "learning_rate": 1.4043168572253905e-08, "loss": 24.7729, "step": 22520 }, { "epoch": 178.8135593220339, "grad_norm": 765.3468627929688, "learning_rate": 1.3940161339909884e-08, "loss": 23.033, "step": 22530 }, { "epoch": 178.89332003988036, "grad_norm": 199.8118133544922, "learning_rate": 1.3837522444964083e-08, "loss": 23.8803, "step": 22540 }, { "epoch": 178.97308075772682, "grad_norm": 362.73321533203125, "learning_rate": 1.3735252047569673e-08, "loss": 23.395, "step": 22550 }, { "epoch": 179.04785643070787, "grad_norm": 600.6903686523438, "learning_rate": 1.3633350307304975e-08, "loss": 20.4186, "step": 22560 }, { "epoch": 179.12761714855435, "grad_norm": 92.33502197265625, "learning_rate": 1.3531817383172911e-08, "loss": 22.7443, "step": 22570 }, { "epoch": 179.2073778664008, "grad_norm": 774.8335571289062, "learning_rate": 1.3430653433601058e-08, "loss": 26.5549, "step": 22580 }, { "epoch": 179.28713858424726, "grad_norm": 1109.836181640625, "learning_rate": 1.3329858616441175e-08, "loss": 24.2557, "step": 22590 }, { "epoch": 179.3668993020937, "grad_norm": 440.6700439453125, "learning_rate": 1.3229433088968984e-08, "loss": 23.0492, "step": 22600 }, { "epoch": 179.3668993020937, "eval_loss": 2.987112522125244, "eval_mae": 1.295152187347412, "eval_mse": 2.9871127605438232, "eval_r2": 0.09999752044677734, "eval_rmse": 1.7283265780933368, "eval_runtime": 9.1234, "eval_samples_per_second": 439.64, "eval_steps_per_second": 13.811, "step": 22600 }, { "epoch": 179.44666001994017, "grad_norm": 427.29559326171875, "learning_rate": 1.312937700788419e-08, "loss": 23.5974, "step": 22610 }, { "epoch": 179.52642073778665, "grad_norm": 238.57940673828125, "learning_rate": 1.3029690529309855e-08, "loss": 22.2489, "step": 22620 }, { "epoch": 179.6061814556331, "grad_norm": 310.37518310546875, "learning_rate": 1.2930373808792333e-08, "loss": 24.069, "step": 22630 }, { "epoch": 179.68594217347956, "grad_norm": 1182.7216796875, "learning_rate": 1.283142700130116e-08, "loss": 23.8514, "step": 22640 }, { "epoch": 179.76570289132601, "grad_norm": 594.5250854492188, "learning_rate": 1.27328502612285e-08, "loss": 23.4992, "step": 22650 }, { "epoch": 179.84546360917247, "grad_norm": 412.5122985839844, "learning_rate": 1.2634643742389234e-08, "loss": 24.1487, "step": 22660 }, { "epoch": 179.92522432701895, "grad_norm": 497.1600646972656, "learning_rate": 1.2536807598020421e-08, "loss": 23.2468, "step": 22670 }, { "epoch": 180.0, "grad_norm": 138.8757781982422, "learning_rate": 1.2439341980781365e-08, "loss": 22.8057, "step": 22680 }, { "epoch": 180.07976071784645, "grad_norm": 786.8070068359375, "learning_rate": 1.234224704275308e-08, "loss": 25.2827, "step": 22690 }, { "epoch": 180.1595214356929, "grad_norm": 284.813720703125, "learning_rate": 1.2245522935438263e-08, "loss": 23.404, "step": 22700 }, { "epoch": 180.1595214356929, "eval_loss": 2.9867138862609863, "eval_mae": 1.2956660985946655, "eval_mse": 2.9867141246795654, "eval_r2": 0.10011768341064453, "eval_rmse": 1.7282112500153346, "eval_runtime": 9.1925, "eval_samples_per_second": 436.332, "eval_steps_per_second": 13.707, "step": 22700 }, { "epoch": 180.2392821535394, "grad_norm": 392.55401611328125, "learning_rate": 1.2149169809760967e-08, "loss": 25.6318, "step": 22710 }, { "epoch": 180.31904287138585, "grad_norm": 463.6085205078125, "learning_rate": 1.2053187816066318e-08, "loss": 22.9739, "step": 22720 }, { "epoch": 180.3988035892323, "grad_norm": 143.3795166015625, "learning_rate": 1.1957577104120487e-08, "loss": 23.4349, "step": 22730 }, { "epoch": 180.47856430707876, "grad_norm": 236.1634063720703, "learning_rate": 1.1862337823110163e-08, "loss": 25.8816, "step": 22740 }, { "epoch": 180.5583250249252, "grad_norm": 581.2395629882812, "learning_rate": 1.176747012164253e-08, "loss": 23.6662, "step": 22750 }, { "epoch": 180.6380857427717, "grad_norm": 341.58148193359375, "learning_rate": 1.1672974147745068e-08, "loss": 22.0037, "step": 22760 }, { "epoch": 180.71784646061815, "grad_norm": 359.1589050292969, "learning_rate": 1.1578850048865024e-08, "loss": 22.7009, "step": 22770 }, { "epoch": 180.7976071784646, "grad_norm": 1259.181396484375, "learning_rate": 1.1485097971869584e-08, "loss": 23.9171, "step": 22780 }, { "epoch": 180.87736789631106, "grad_norm": 284.3455505371094, "learning_rate": 1.1391718063045342e-08, "loss": 22.9658, "step": 22790 }, { "epoch": 180.95712861415754, "grad_norm": 898.4352416992188, "learning_rate": 1.1298710468098188e-08, "loss": 23.8032, "step": 22800 }, { "epoch": 180.95712861415754, "eval_loss": 2.9856619834899902, "eval_mae": 1.2953579425811768, "eval_mse": 2.9856619834899902, "eval_r2": 0.10043466091156006, "eval_rmse": 1.727906821414277, "eval_runtime": 9.0627, "eval_samples_per_second": 442.582, "eval_steps_per_second": 13.903, "step": 22800 }, { "epoch": 181.0319042871386, "grad_norm": 1284.8299560546875, "learning_rate": 1.1206075332153091e-08, "loss": 23.3462, "step": 22810 }, { "epoch": 181.11166500498504, "grad_norm": 179.67510986328125, "learning_rate": 1.1113812799753958e-08, "loss": 22.7255, "step": 22820 }, { "epoch": 181.1914257228315, "grad_norm": 978.6626586914062, "learning_rate": 1.1021923014862988e-08, "loss": 24.1967, "step": 22830 }, { "epoch": 181.27118644067798, "grad_norm": 1190.5296630859375, "learning_rate": 1.0930406120861103e-08, "loss": 22.3215, "step": 22840 }, { "epoch": 181.35094715852443, "grad_norm": 299.45947265625, "learning_rate": 1.083926226054721e-08, "loss": 22.0059, "step": 22850 }, { "epoch": 181.4307078763709, "grad_norm": 464.1260070800781, "learning_rate": 1.0748491576138191e-08, "loss": 23.297, "step": 22860 }, { "epoch": 181.51046859421734, "grad_norm": 654.6446533203125, "learning_rate": 1.065809420926858e-08, "loss": 23.9606, "step": 22870 }, { "epoch": 181.5902293120638, "grad_norm": 534.0228881835938, "learning_rate": 1.056807030099055e-08, "loss": 24.3782, "step": 22880 }, { "epoch": 181.66999002991028, "grad_norm": 243.1739959716797, "learning_rate": 1.0478419991773346e-08, "loss": 23.5468, "step": 22890 }, { "epoch": 181.74975074775674, "grad_norm": 296.27593994140625, "learning_rate": 1.038914342150346e-08, "loss": 23.2563, "step": 22900 }, { "epoch": 181.74975074775674, "eval_loss": 2.9857964515686035, "eval_mae": 1.2959256172180176, "eval_mse": 2.9857964515686035, "eval_r2": 0.10039412975311279, "eval_rmse": 1.7279457316619071, "eval_runtime": 9.1154, "eval_samples_per_second": 440.022, "eval_steps_per_second": 13.823, "step": 22900 }, { "epoch": 181.8295114656032, "grad_norm": 350.1341247558594, "learning_rate": 1.03002407294841e-08, "loss": 24.5378, "step": 22910 }, { "epoch": 181.90927218344964, "grad_norm": 387.22998046875, "learning_rate": 1.0211712054435078e-08, "loss": 24.8081, "step": 22920 }, { "epoch": 181.9890329012961, "grad_norm": 320.7686462402344, "learning_rate": 1.0123557534492705e-08, "loss": 24.0877, "step": 22930 }, { "epoch": 182.06380857427718, "grad_norm": 329.71893310546875, "learning_rate": 1.0035777307209364e-08, "loss": 24.8788, "step": 22940 }, { "epoch": 182.14356929212363, "grad_norm": 362.3857421875, "learning_rate": 9.948371509553549e-09, "loss": 23.8843, "step": 22950 }, { "epoch": 182.22333000997008, "grad_norm": 880.0196533203125, "learning_rate": 9.861340277909358e-09, "loss": 24.9908, "step": 22960 }, { "epoch": 182.30309072781654, "grad_norm": 1081.274169921875, "learning_rate": 9.774683748076495e-09, "loss": 22.1578, "step": 22970 }, { "epoch": 182.38285144566302, "grad_norm": 762.17236328125, "learning_rate": 9.688402055269995e-09, "loss": 24.5478, "step": 22980 }, { "epoch": 182.46261216350948, "grad_norm": 165.5096893310547, "learning_rate": 9.602495334120136e-09, "loss": 22.1582, "step": 22990 }, { "epoch": 182.54237288135593, "grad_norm": 244.72225952148438, "learning_rate": 9.516963718671834e-09, "loss": 22.9301, "step": 23000 }, { "epoch": 182.54237288135593, "eval_loss": 2.9862356185913086, "eval_mae": 1.295803427696228, "eval_mse": 2.9862356185913086, "eval_r2": 0.10026180744171143, "eval_rmse": 1.728072804771636, "eval_runtime": 9.1456, "eval_samples_per_second": 438.573, "eval_steps_per_second": 13.777, "step": 23000 }, { "epoch": 182.62213359920239, "grad_norm": 373.03662109375, "learning_rate": 9.431807342384913e-09, "loss": 21.0979, "step": 23010 }, { "epoch": 182.70189431704884, "grad_norm": 121.8209457397461, "learning_rate": 9.347026338133696e-09, "loss": 24.8742, "step": 23020 }, { "epoch": 182.78165503489532, "grad_norm": 339.05914306640625, "learning_rate": 9.262620838206696e-09, "loss": 24.4378, "step": 23030 }, { "epoch": 182.86141575274178, "grad_norm": 252.5173797607422, "learning_rate": 9.17859097430651e-09, "loss": 23.9127, "step": 23040 }, { "epoch": 182.94117647058823, "grad_norm": 383.7898864746094, "learning_rate": 9.094936877549725e-09, "loss": 24.8554, "step": 23050 }, { "epoch": 183.01595214356928, "grad_norm": 418.6182861328125, "learning_rate": 9.011658678466428e-09, "loss": 21.7836, "step": 23060 }, { "epoch": 183.09571286141576, "grad_norm": 506.1742248535156, "learning_rate": 8.92875650700034e-09, "loss": 23.7084, "step": 23070 }, { "epoch": 183.17547357926222, "grad_norm": 480.1518249511719, "learning_rate": 8.84623049250835e-09, "loss": 24.4412, "step": 23080 }, { "epoch": 183.25523429710867, "grad_norm": 506.3466491699219, "learning_rate": 8.764080763760368e-09, "loss": 22.5461, "step": 23090 }, { "epoch": 183.33499501495513, "grad_norm": 816.4349975585938, "learning_rate": 8.682307448939274e-09, "loss": 25.0832, "step": 23100 }, { "epoch": 183.33499501495513, "eval_loss": 2.985621929168701, "eval_mae": 1.295138955116272, "eval_mse": 2.9856221675872803, "eval_r2": 0.10044664144515991, "eval_rmse": 1.7278952999494155, "eval_runtime": 9.0822, "eval_samples_per_second": 441.633, "eval_steps_per_second": 13.873, "step": 23100 }, { "epoch": 183.4147557328016, "grad_norm": 851.5493774414062, "learning_rate": 8.60091067564056e-09, "loss": 22.8906, "step": 23110 }, { "epoch": 183.49451645064806, "grad_norm": 787.2097778320312, "learning_rate": 8.519890570872101e-09, "loss": 23.4751, "step": 23120 }, { "epoch": 183.57427716849452, "grad_norm": 237.1964569091797, "learning_rate": 8.439247261054217e-09, "loss": 22.4332, "step": 23130 }, { "epoch": 183.65403788634097, "grad_norm": 765.3052368164062, "learning_rate": 8.358980872019083e-09, "loss": 23.7841, "step": 23140 }, { "epoch": 183.73379860418743, "grad_norm": 254.69822692871094, "learning_rate": 8.279091529010907e-09, "loss": 24.593, "step": 23150 }, { "epoch": 183.8135593220339, "grad_norm": 487.26171875, "learning_rate": 8.199579356685498e-09, "loss": 23.5859, "step": 23160 }, { "epoch": 183.89332003988036, "grad_norm": 392.75299072265625, "learning_rate": 8.120444479110145e-09, "loss": 24.5251, "step": 23170 }, { "epoch": 183.97308075772682, "grad_norm": 156.13031005859375, "learning_rate": 8.041687019763432e-09, "loss": 24.6889, "step": 23180 }, { "epoch": 184.04785643070787, "grad_norm": 612.9346923828125, "learning_rate": 7.96330710153506e-09, "loss": 22.7071, "step": 23190 }, { "epoch": 184.12761714855435, "grad_norm": 702.3763427734375, "learning_rate": 7.885304846725589e-09, "loss": 23.8737, "step": 23200 }, { "epoch": 184.12761714855435, "eval_loss": 2.9857828617095947, "eval_mae": 1.2956427335739136, "eval_mse": 2.985783100128174, "eval_r2": 0.10039818286895752, "eval_rmse": 1.7279418682722443, "eval_runtime": 9.1096, "eval_samples_per_second": 440.307, "eval_steps_per_second": 13.832, "step": 23200 }, { "epoch": 184.2073778664008, "grad_norm": 779.8262329101562, "learning_rate": 7.807680377046294e-09, "loss": 23.3689, "step": 23210 }, { "epoch": 184.28713858424726, "grad_norm": 1790.89892578125, "learning_rate": 7.730433813619042e-09, "loss": 23.9311, "step": 23220 }, { "epoch": 184.3668993020937, "grad_norm": 437.471435546875, "learning_rate": 7.653565276975915e-09, "loss": 23.7945, "step": 23230 }, { "epoch": 184.44666001994017, "grad_norm": 653.4305419921875, "learning_rate": 7.57707488705922e-09, "loss": 25.2285, "step": 23240 }, { "epoch": 184.52642073778665, "grad_norm": 432.324462890625, "learning_rate": 7.500962763221208e-09, "loss": 23.8279, "step": 23250 }, { "epoch": 184.6061814556331, "grad_norm": 1161.883544921875, "learning_rate": 7.425229024223878e-09, "loss": 21.3262, "step": 23260 }, { "epoch": 184.68594217347956, "grad_norm": 644.3816528320312, "learning_rate": 7.34987378823887e-09, "loss": 24.3121, "step": 23270 }, { "epoch": 184.76570289132601, "grad_norm": 789.889892578125, "learning_rate": 7.274897172847127e-09, "loss": 26.7195, "step": 23280 }, { "epoch": 184.84546360917247, "grad_norm": 519.7674560546875, "learning_rate": 7.200299295038875e-09, "loss": 23.2123, "step": 23290 }, { "epoch": 184.92522432701895, "grad_norm": 557.431640625, "learning_rate": 7.126080271213391e-09, "loss": 22.8665, "step": 23300 }, { "epoch": 184.92522432701895, "eval_loss": 2.9849884510040283, "eval_mae": 1.295828938484192, "eval_mse": 2.984988212585449, "eval_r2": 0.10063761472702026, "eval_rmse": 1.727711843041382, "eval_runtime": 9.1194, "eval_samples_per_second": 439.833, "eval_steps_per_second": 13.817, "step": 23300 }, { "epoch": 185.0, "grad_norm": 284.9809875488281, "learning_rate": 7.05224021717879e-09, "loss": 21.6302, "step": 23310 }, { "epoch": 185.07976071784645, "grad_norm": 756.0015258789062, "learning_rate": 6.978779248151795e-09, "loss": 23.6411, "step": 23320 }, { "epoch": 185.1595214356929, "grad_norm": 528.3745727539062, "learning_rate": 6.905697478757689e-09, "loss": 23.8638, "step": 23330 }, { "epoch": 185.2392821535394, "grad_norm": 1139.6221923828125, "learning_rate": 6.832995023030086e-09, "loss": 22.9624, "step": 23340 }, { "epoch": 185.31904287138585, "grad_norm": 281.22491455078125, "learning_rate": 6.76067199441066e-09, "loss": 22.7068, "step": 23350 }, { "epoch": 185.3988035892323, "grad_norm": 179.5164337158203, "learning_rate": 6.6887285057491136e-09, "loss": 25.3549, "step": 23360 }, { "epoch": 185.47856430707876, "grad_norm": 370.2801818847656, "learning_rate": 6.617164669302927e-09, "loss": 25.2967, "step": 23370 }, { "epoch": 185.5583250249252, "grad_norm": 167.0034942626953, "learning_rate": 6.545980596737083e-09, "loss": 23.1989, "step": 23380 }, { "epoch": 185.6380857427717, "grad_norm": 550.1342163085938, "learning_rate": 6.475176399124177e-09, "loss": 22.0248, "step": 23390 }, { "epoch": 185.71784646061815, "grad_norm": 861.5530395507812, "learning_rate": 6.4047521869439455e-09, "loss": 25.3384, "step": 23400 }, { "epoch": 185.71784646061815, "eval_loss": 2.984853744506836, "eval_mae": 1.2957940101623535, "eval_mse": 2.984853744506836, "eval_r2": 0.1006782054901123, "eval_rmse": 1.7276729275261669, "eval_runtime": 9.04, "eval_samples_per_second": 443.694, "eval_steps_per_second": 13.938, "step": 23400 }, { "epoch": 185.7976071784646, "grad_norm": 235.17025756835938, "learning_rate": 6.334708070083156e-09, "loss": 23.875, "step": 23410 }, { "epoch": 185.87736789631106, "grad_norm": 287.0189514160156, "learning_rate": 6.265044157835686e-09, "loss": 22.8222, "step": 23420 }, { "epoch": 185.95712861415754, "grad_norm": 823.918701171875, "learning_rate": 6.195760558901947e-09, "loss": 23.0371, "step": 23430 }, { "epoch": 186.0319042871386, "grad_norm": 427.4580993652344, "learning_rate": 6.126857381389072e-09, "loss": 24.1903, "step": 23440 }, { "epoch": 186.11166500498504, "grad_norm": 579.4691162109375, "learning_rate": 6.058334732810533e-09, "loss": 23.3387, "step": 23450 }, { "epoch": 186.1914257228315, "grad_norm": 315.81787109375, "learning_rate": 5.990192720086052e-09, "loss": 24.8437, "step": 23460 }, { "epoch": 186.27118644067798, "grad_norm": 462.8539733886719, "learning_rate": 5.922431449541465e-09, "loss": 25.4003, "step": 23470 }, { "epoch": 186.35094715852443, "grad_norm": 638.9884643554688, "learning_rate": 5.8550510269084755e-09, "loss": 23.8574, "step": 23480 }, { "epoch": 186.4307078763709, "grad_norm": 182.34124755859375, "learning_rate": 5.78805155732448e-09, "loss": 21.1267, "step": 23490 }, { "epoch": 186.51046859421734, "grad_norm": 607.1478881835938, "learning_rate": 5.721433145332549e-09, "loss": 24.8929, "step": 23500 }, { "epoch": 186.51046859421734, "eval_loss": 2.9855034351348877, "eval_mae": 1.2960774898529053, "eval_mse": 2.9855034351348877, "eval_r2": 0.10048240423202515, "eval_rmse": 1.7278609420711168, "eval_runtime": 9.0589, "eval_samples_per_second": 442.771, "eval_steps_per_second": 13.909, "step": 23500 }, { "epoch": 186.5902293120638, "grad_norm": 331.1116027832031, "learning_rate": 5.655195894881143e-09, "loss": 20.8989, "step": 23510 }, { "epoch": 186.66999002991028, "grad_norm": 475.9549865722656, "learning_rate": 5.589339909323948e-09, "loss": 21.8069, "step": 23520 }, { "epoch": 186.74975074775674, "grad_norm": 561.3901977539062, "learning_rate": 5.5238652914197105e-09, "loss": 25.9163, "step": 23530 }, { "epoch": 186.8295114656032, "grad_norm": 763.07373046875, "learning_rate": 5.45877214333218e-09, "loss": 24.2063, "step": 23540 }, { "epoch": 186.90927218344964, "grad_norm": 189.84625244140625, "learning_rate": 5.394060566629832e-09, "loss": 23.977, "step": 23550 }, { "epoch": 186.9890329012961, "grad_norm": 472.99041748046875, "learning_rate": 5.329730662285787e-09, "loss": 24.547, "step": 23560 }, { "epoch": 187.06380857427718, "grad_norm": 863.2728881835938, "learning_rate": 5.265782530677582e-09, "loss": 20.8558, "step": 23570 }, { "epoch": 187.14356929212363, "grad_norm": 216.08444213867188, "learning_rate": 5.20221627158704e-09, "loss": 24.0826, "step": 23580 }, { "epoch": 187.22333000997008, "grad_norm": 852.3370971679688, "learning_rate": 5.139031984200237e-09, "loss": 24.7418, "step": 23590 }, { "epoch": 187.30309072781654, "grad_norm": 320.7305908203125, "learning_rate": 5.076229767107115e-09, "loss": 24.4025, "step": 23600 }, { "epoch": 187.30309072781654, "eval_loss": 2.9850869178771973, "eval_mae": 1.2962417602539062, "eval_mse": 2.985086679458618, "eval_r2": 0.1006079912185669, "eval_rmse": 1.7277403391304547, "eval_runtime": 9.0865, "eval_samples_per_second": 441.425, "eval_steps_per_second": 13.867, "step": 23600 }, { "epoch": 187.38285144566302, "grad_norm": 186.22344970703125, "learning_rate": 5.013809718301454e-09, "loss": 25.3328, "step": 23610 }, { "epoch": 187.46261216350948, "grad_norm": 1284.268798828125, "learning_rate": 4.951771935180815e-09, "loss": 23.2086, "step": 23620 }, { "epoch": 187.54237288135593, "grad_norm": 425.0491638183594, "learning_rate": 4.890116514546211e-09, "loss": 22.4389, "step": 23630 }, { "epoch": 187.62213359920239, "grad_norm": 851.8677978515625, "learning_rate": 4.828843552602047e-09, "loss": 24.5933, "step": 23640 }, { "epoch": 187.70189431704884, "grad_norm": 434.36322021484375, "learning_rate": 4.767953144955927e-09, "loss": 22.7603, "step": 23650 }, { "epoch": 187.78165503489532, "grad_norm": 668.190673828125, "learning_rate": 4.707445386618658e-09, "loss": 21.8588, "step": 23660 }, { "epoch": 187.86141575274178, "grad_norm": 220.99130249023438, "learning_rate": 4.647320372003772e-09, "loss": 24.3321, "step": 23670 }, { "epoch": 187.94117647058823, "grad_norm": 598.3583374023438, "learning_rate": 4.5875781949278044e-09, "loss": 25.4108, "step": 23680 }, { "epoch": 188.01595214356928, "grad_norm": 570.7030029296875, "learning_rate": 4.5282189486097745e-09, "loss": 22.1609, "step": 23690 }, { "epoch": 188.09571286141576, "grad_norm": 250.53883361816406, "learning_rate": 4.469242725671202e-09, "loss": 21.5695, "step": 23700 }, { "epoch": 188.09571286141576, "eval_loss": 2.9850971698760986, "eval_mae": 1.2961443662643433, "eval_mse": 2.9850971698760986, "eval_r2": 0.1006048321723938, "eval_rmse": 1.7277433750057034, "eval_runtime": 9.0591, "eval_samples_per_second": 442.758, "eval_steps_per_second": 13.909, "step": 23700 }, { "epoch": 188.17547357926222, "grad_norm": 533.2770385742188, "learning_rate": 4.410649618136059e-09, "loss": 24.5917, "step": 23710 }, { "epoch": 188.25523429710867, "grad_norm": 85.58604431152344, "learning_rate": 4.352439717430434e-09, "loss": 22.8943, "step": 23720 }, { "epoch": 188.33499501495513, "grad_norm": 430.8806457519531, "learning_rate": 4.2946131143824785e-09, "loss": 24.1939, "step": 23730 }, { "epoch": 188.4147557328016, "grad_norm": 426.0600891113281, "learning_rate": 4.23716989922232e-09, "loss": 22.7485, "step": 23740 }, { "epoch": 188.49451645064806, "grad_norm": 138.37684631347656, "learning_rate": 4.1801101615817904e-09, "loss": 22.0539, "step": 23750 }, { "epoch": 188.57427716849452, "grad_norm": 168.79737854003906, "learning_rate": 4.1234339904944186e-09, "loss": 22.786, "step": 23760 }, { "epoch": 188.65403788634097, "grad_norm": 357.03338623046875, "learning_rate": 4.067141474395214e-09, "loss": 26.0838, "step": 23770 }, { "epoch": 188.73379860418743, "grad_norm": 729.7440795898438, "learning_rate": 4.011232701120498e-09, "loss": 24.8297, "step": 23780 }, { "epoch": 188.8135593220339, "grad_norm": 240.0101776123047, "learning_rate": 3.955707757907906e-09, "loss": 25.7474, "step": 23790 }, { "epoch": 188.89332003988036, "grad_norm": 332.90838623046875, "learning_rate": 3.90056673139616e-09, "loss": 24.8005, "step": 23800 }, { "epoch": 188.89332003988036, "eval_loss": 2.9858198165893555, "eval_mae": 1.296196460723877, "eval_mse": 2.9858198165893555, "eval_r2": 0.1003870964050293, "eval_rmse": 1.7279524925730323, "eval_runtime": 9.0618, "eval_samples_per_second": 442.629, "eval_steps_per_second": 13.905, "step": 23800 }, { "epoch": 188.97308075772682, "grad_norm": 1091.8680419921875, "learning_rate": 3.845809707624825e-09, "loss": 23.3533, "step": 23810 }, { "epoch": 189.04785643070787, "grad_norm": 332.0385437011719, "learning_rate": 3.791436772034362e-09, "loss": 23.0622, "step": 23820 }, { "epoch": 189.12761714855435, "grad_norm": 548.7697143554688, "learning_rate": 3.737448009465932e-09, "loss": 23.0311, "step": 23830 }, { "epoch": 189.2073778664008, "grad_norm": 172.7244415283203, "learning_rate": 3.6838435041612324e-09, "loss": 23.8273, "step": 23840 }, { "epoch": 189.28713858424726, "grad_norm": 104.53250885009766, "learning_rate": 3.6306233397623564e-09, "loss": 24.7454, "step": 23850 }, { "epoch": 189.3668993020937, "grad_norm": 458.9788513183594, "learning_rate": 3.5777875993117377e-09, "loss": 21.2395, "step": 23860 }, { "epoch": 189.44666001994017, "grad_norm": 556.30859375, "learning_rate": 3.525336365251902e-09, "loss": 24.1263, "step": 23870 }, { "epoch": 189.52642073778665, "grad_norm": 238.5287628173828, "learning_rate": 3.4732697194254924e-09, "loss": 24.3238, "step": 23880 }, { "epoch": 189.6061814556331, "grad_norm": 165.72174072265625, "learning_rate": 3.421587743074994e-09, "loss": 23.7922, "step": 23890 }, { "epoch": 189.68594217347956, "grad_norm": 871.0825805664062, "learning_rate": 3.3702905168426774e-09, "loss": 25.235, "step": 23900 }, { "epoch": 189.68594217347956, "eval_loss": 2.9857451915740967, "eval_mae": 1.2961997985839844, "eval_mse": 2.9857451915740967, "eval_r2": 0.10040956735610962, "eval_rmse": 1.7279308989580853, "eval_runtime": 9.0432, "eval_samples_per_second": 443.539, "eval_steps_per_second": 13.933, "step": 23900 }, { "epoch": 189.76570289132601, "grad_norm": 147.49085998535156, "learning_rate": 3.3193781207705163e-09, "loss": 23.5739, "step": 23910 }, { "epoch": 189.84546360917247, "grad_norm": 585.6244506835938, "learning_rate": 3.2688506342999355e-09, "loss": 23.4213, "step": 23920 }, { "epoch": 189.92522432701895, "grad_norm": 326.77081298828125, "learning_rate": 3.2187081362718415e-09, "loss": 22.6919, "step": 23930 }, { "epoch": 190.0, "grad_norm": 370.1246337890625, "learning_rate": 3.168950704926371e-09, "loss": 23.1201, "step": 23940 }, { "epoch": 190.07976071784645, "grad_norm": 268.3349914550781, "learning_rate": 3.1195784179028073e-09, "loss": 24.7828, "step": 23950 }, { "epoch": 190.1595214356929, "grad_norm": 339.0169982910156, "learning_rate": 3.0705913522395264e-09, "loss": 25.0338, "step": 23960 }, { "epoch": 190.2392821535394, "grad_norm": 451.1507873535156, "learning_rate": 3.021989584373802e-09, "loss": 23.9076, "step": 23970 }, { "epoch": 190.31904287138585, "grad_norm": 342.91912841796875, "learning_rate": 2.973773190141693e-09, "loss": 23.5402, "step": 23980 }, { "epoch": 190.3988035892323, "grad_norm": 587.4170532226562, "learning_rate": 2.925942244777879e-09, "loss": 25.0114, "step": 23990 }, { "epoch": 190.47856430707876, "grad_norm": 331.1037902832031, "learning_rate": 2.878496822915799e-09, "loss": 23.9958, "step": 24000 }, { "epoch": 190.47856430707876, "eval_loss": 2.985790967941284, "eval_mae": 1.2959747314453125, "eval_mse": 2.985790967941284, "eval_r2": 0.10039573907852173, "eval_rmse": 1.7279441449136266, "eval_runtime": 9.0531, "eval_samples_per_second": 443.055, "eval_steps_per_second": 13.918, "step": 24000 }, { "epoch": 190.5583250249252, "grad_norm": 328.3941955566406, "learning_rate": 2.8314369985870388e-09, "loss": 22.8027, "step": 24010 }, { "epoch": 190.6380857427717, "grad_norm": 265.7846374511719, "learning_rate": 2.784762845221805e-09, "loss": 24.2178, "step": 24020 }, { "epoch": 190.71784646061815, "grad_norm": 330.3060607910156, "learning_rate": 2.7384744356483412e-09, "loss": 23.3047, "step": 24030 }, { "epoch": 190.7976071784646, "grad_norm": 195.12123107910156, "learning_rate": 2.6925718420930944e-09, "loss": 23.3755, "step": 24040 }, { "epoch": 190.87736789631106, "grad_norm": 461.7795715332031, "learning_rate": 2.647055136180382e-09, "loss": 23.0593, "step": 24050 }, { "epoch": 190.95712861415754, "grad_norm": 245.96263122558594, "learning_rate": 2.6019243889325304e-09, "loss": 22.2629, "step": 24060 }, { "epoch": 191.0319042871386, "grad_norm": 347.748046875, "learning_rate": 2.557179670769516e-09, "loss": 23.5785, "step": 24070 }, { "epoch": 191.11166500498504, "grad_norm": 236.30970764160156, "learning_rate": 2.512821051509101e-09, "loss": 23.5184, "step": 24080 }, { "epoch": 191.1914257228315, "grad_norm": 953.8068237304688, "learning_rate": 2.468848600366502e-09, "loss": 22.7987, "step": 24090 }, { "epoch": 191.27118644067798, "grad_norm": 379.2378234863281, "learning_rate": 2.425262385954363e-09, "loss": 24.1852, "step": 24100 }, { "epoch": 191.27118644067798, "eval_loss": 2.985538959503174, "eval_mae": 1.2961422204971313, "eval_mse": 2.985538959503174, "eval_r2": 0.10047167539596558, "eval_rmse": 1.727871221909542, "eval_runtime": 9.0399, "eval_samples_per_second": 443.697, "eval_steps_per_second": 13.938, "step": 24100 }, { "epoch": 191.35094715852443, "grad_norm": 672.7747802734375, "learning_rate": 2.3820624762827535e-09, "loss": 23.1245, "step": 24110 }, { "epoch": 191.4307078763709, "grad_norm": 362.42791748046875, "learning_rate": 2.3392489387588366e-09, "loss": 25.7611, "step": 24120 }, { "epoch": 191.51046859421734, "grad_norm": 372.2131042480469, "learning_rate": 2.296821840187063e-09, "loss": 24.1331, "step": 24130 }, { "epoch": 191.5902293120638, "grad_norm": 323.49664306640625, "learning_rate": 2.2547812467687553e-09, "loss": 22.3181, "step": 24140 }, { "epoch": 191.66999002991028, "grad_norm": 186.12277221679688, "learning_rate": 2.2131272241022457e-09, "loss": 24.1584, "step": 24150 }, { "epoch": 191.74975074775674, "grad_norm": 604.3494873046875, "learning_rate": 2.171859837182599e-09, "loss": 23.3094, "step": 24160 }, { "epoch": 191.8295114656032, "grad_norm": 772.862060546875, "learning_rate": 2.1309791504016683e-09, "loss": 23.8219, "step": 24170 }, { "epoch": 191.90927218344964, "grad_norm": 657.0350341796875, "learning_rate": 2.0904852275478722e-09, "loss": 22.7549, "step": 24180 }, { "epoch": 191.9890329012961, "grad_norm": 277.4106750488281, "learning_rate": 2.050378131806113e-09, "loss": 24.5491, "step": 24190 }, { "epoch": 192.06380857427718, "grad_norm": 1266.8355712890625, "learning_rate": 2.010657925757803e-09, "loss": 22.8491, "step": 24200 }, { "epoch": 192.06380857427718, "eval_loss": 2.986049175262451, "eval_mae": 1.2961747646331787, "eval_mse": 2.986049175262451, "eval_r2": 0.10031801462173462, "eval_rmse": 1.7280188584799794, "eval_runtime": 9.0269, "eval_samples_per_second": 444.338, "eval_steps_per_second": 13.958, "step": 24200 }, { "epoch": 192.14356929212363, "grad_norm": 282.1344299316406, "learning_rate": 1.9713246713805587e-09, "loss": 23.7891, "step": 24210 }, { "epoch": 192.22333000997008, "grad_norm": 235.01341247558594, "learning_rate": 1.932378430048259e-09, "loss": 23.2705, "step": 24220 }, { "epoch": 192.30309072781654, "grad_norm": 218.00794982910156, "learning_rate": 1.8938192625309324e-09, "loss": 23.3808, "step": 24230 }, { "epoch": 192.38285144566302, "grad_norm": 204.65672302246094, "learning_rate": 1.8556472289946167e-09, "loss": 22.4473, "step": 24240 }, { "epoch": 192.46261216350948, "grad_norm": 563.5921630859375, "learning_rate": 1.8178623890012512e-09, "loss": 24.5627, "step": 24250 }, { "epoch": 192.54237288135593, "grad_norm": 179.89076232910156, "learning_rate": 1.7804648015086465e-09, "loss": 24.8393, "step": 24260 }, { "epoch": 192.62213359920239, "grad_norm": 831.5360107421875, "learning_rate": 1.7434545248703469e-09, "loss": 24.1278, "step": 24270 }, { "epoch": 192.70189431704884, "grad_norm": 478.6527099609375, "learning_rate": 1.7068316168355746e-09, "loss": 22.7936, "step": 24280 }, { "epoch": 192.78165503489532, "grad_norm": 280.7521667480469, "learning_rate": 1.6705961345491182e-09, "loss": 21.1134, "step": 24290 }, { "epoch": 192.86141575274178, "grad_norm": 206.8756103515625, "learning_rate": 1.6347481345511948e-09, "loss": 25.9234, "step": 24300 }, { "epoch": 192.86141575274178, "eval_loss": 2.9849839210510254, "eval_mae": 1.2959469556808472, "eval_mse": 2.9849836826324463, "eval_r2": 0.1006389856338501, "eval_rmse": 1.7277105320719806, "eval_runtime": 9.0473, "eval_samples_per_second": 443.336, "eval_steps_per_second": 13.927, "step": 24300 }, { "epoch": 192.94117647058823, "grad_norm": 380.0176086425781, "learning_rate": 1.5992876727774774e-09, "loss": 25.2448, "step": 24310 }, { "epoch": 193.01595214356928, "grad_norm": 120.8132095336914, "learning_rate": 1.5642148045588999e-09, "loss": 21.4254, "step": 24320 }, { "epoch": 193.09571286141576, "grad_norm": 375.27459716796875, "learning_rate": 1.5295295846216582e-09, "loss": 25.4883, "step": 24330 }, { "epoch": 193.17547357926222, "grad_norm": 470.91632080078125, "learning_rate": 1.4952320670869879e-09, "loss": 23.6084, "step": 24340 }, { "epoch": 193.25523429710867, "grad_norm": 140.60874938964844, "learning_rate": 1.4613223054712466e-09, "loss": 22.8232, "step": 24350 }, { "epoch": 193.33499501495513, "grad_norm": 120.59532165527344, "learning_rate": 1.4278003526857484e-09, "loss": 24.3427, "step": 24360 }, { "epoch": 193.4147557328016, "grad_norm": 519.9879150390625, "learning_rate": 1.3946662610367088e-09, "loss": 22.8856, "step": 24370 }, { "epoch": 193.49451645064806, "grad_norm": 597.9413452148438, "learning_rate": 1.3619200822250488e-09, "loss": 21.1711, "step": 24380 }, { "epoch": 193.57427716849452, "grad_norm": 187.56488037109375, "learning_rate": 1.3295618673464792e-09, "loss": 24.6198, "step": 24390 }, { "epoch": 193.65403788634097, "grad_norm": 556.2128295898438, "learning_rate": 1.2975916668913345e-09, "loss": 24.2869, "step": 24400 }, { "epoch": 193.65403788634097, "eval_loss": 2.985672950744629, "eval_mae": 1.296151041984558, "eval_mse": 2.98567271232605, "eval_r2": 0.10043138265609741, "eval_rmse": 1.7279099259874775, "eval_runtime": 9.017, "eval_samples_per_second": 444.829, "eval_steps_per_second": 13.974, "step": 24400 }, { "epoch": 193.73379860418743, "grad_norm": 558.5076293945312, "learning_rate": 1.2660095307445718e-09, "loss": 23.3984, "step": 24410 }, { "epoch": 193.8135593220339, "grad_norm": 256.103271484375, "learning_rate": 1.2348155081854661e-09, "loss": 24.0415, "step": 24420 }, { "epoch": 193.89332003988036, "grad_norm": 276.30682373046875, "learning_rate": 1.2040096478878881e-09, "loss": 25.1056, "step": 24430 }, { "epoch": 193.97308075772682, "grad_norm": 461.1793212890625, "learning_rate": 1.1735919979198871e-09, "loss": 23.1343, "step": 24440 }, { "epoch": 194.04785643070787, "grad_norm": 491.55267333984375, "learning_rate": 1.1435626057438863e-09, "loss": 23.5858, "step": 24450 }, { "epoch": 194.12761714855435, "grad_norm": 799.1927490234375, "learning_rate": 1.1139215182163765e-09, "loss": 26.8489, "step": 24460 }, { "epoch": 194.2073778664008, "grad_norm": 270.9708251953125, "learning_rate": 1.0846687815880274e-09, "loss": 22.9998, "step": 24470 }, { "epoch": 194.28713858424726, "grad_norm": 754.822509765625, "learning_rate": 1.0558044415035217e-09, "loss": 24.6583, "step": 24480 }, { "epoch": 194.3668993020937, "grad_norm": 329.4441223144531, "learning_rate": 1.0273285430015265e-09, "loss": 24.6196, "step": 24490 }, { "epoch": 194.44666001994017, "grad_norm": 203.95399475097656, "learning_rate": 9.992411305145277e-10, "loss": 24.4206, "step": 24500 }, { "epoch": 194.44666001994017, "eval_loss": 2.9850587844848633, "eval_mae": 1.2959555387496948, "eval_mse": 2.9850587844848633, "eval_r2": 0.10061639547348022, "eval_rmse": 1.7277322664362276, "eval_runtime": 9.0375, "eval_samples_per_second": 443.819, "eval_steps_per_second": 13.942, "step": 24500 }, { "epoch": 194.52642073778665, "grad_norm": 506.1182556152344, "learning_rate": 9.715422478689673e-10, "loss": 22.4749, "step": 24510 }, { "epoch": 194.6061814556331, "grad_norm": 546.7146606445312, "learning_rate": 9.442319382849118e-10, "loss": 21.9358, "step": 24520 }, { "epoch": 194.68594217347956, "grad_norm": 1289.88427734375, "learning_rate": 9.173102443761627e-10, "loss": 22.4655, "step": 24530 }, { "epoch": 194.76570289132601, "grad_norm": 116.09934997558594, "learning_rate": 8.907772081502008e-10, "loss": 22.6633, "step": 24540 }, { "epoch": 194.84546360917247, "grad_norm": 1326.2725830078125, "learning_rate": 8.646328710079364e-10, "loss": 24.1759, "step": 24550 }, { "epoch": 194.92522432701895, "grad_norm": 685.0480346679688, "learning_rate": 8.388772737439043e-10, "loss": 24.5467, "step": 24560 }, { "epoch": 195.0, "grad_norm": 294.5447998046875, "learning_rate": 8.135104565459849e-10, "loss": 21.4338, "step": 24570 }, { "epoch": 195.07976071784645, "grad_norm": 447.23480224609375, "learning_rate": 7.885324589954612e-10, "loss": 23.7994, "step": 24580 }, { "epoch": 195.1595214356929, "grad_norm": 302.0307312011719, "learning_rate": 7.63943320066851e-10, "loss": 23.8053, "step": 24590 }, { "epoch": 195.2392821535394, "grad_norm": 1048.637451171875, "learning_rate": 7.39743078128019e-10, "loss": 25.6922, "step": 24600 }, { "epoch": 195.2392821535394, "eval_loss": 2.9844701290130615, "eval_mae": 1.2956593036651611, "eval_mse": 2.9844703674316406, "eval_r2": 0.10079365968704224, "eval_rmse": 1.7275619720958322, "eval_runtime": 9.093, "eval_samples_per_second": 441.106, "eval_steps_per_second": 13.857, "step": 24600 }, { "epoch": 195.31904287138585, "grad_norm": 213.0170135498047, "learning_rate": 7.159317709399537e-10, "loss": 24.5134, "step": 24610 }, { "epoch": 195.3988035892323, "grad_norm": 996.3701782226562, "learning_rate": 6.925094356567685e-10, "loss": 24.2102, "step": 24620 }, { "epoch": 195.47856430707876, "grad_norm": 212.89291381835938, "learning_rate": 6.694761088256173e-10, "loss": 23.6198, "step": 24630 }, { "epoch": 195.5583250249252, "grad_norm": 202.31045532226562, "learning_rate": 6.468318263867234e-10, "loss": 27.476, "step": 24640 }, { "epoch": 195.6380857427717, "grad_norm": 427.94854736328125, "learning_rate": 6.24576623673212e-10, "loss": 22.7069, "step": 24650 }, { "epoch": 195.71784646061815, "grad_norm": 187.37359619140625, "learning_rate": 6.027105354111383e-10, "loss": 23.5491, "step": 24660 }, { "epoch": 195.7976071784646, "grad_norm": 397.1957702636719, "learning_rate": 5.812335957193493e-10, "loss": 22.5446, "step": 24670 }, { "epoch": 195.87736789631106, "grad_norm": 271.0002136230469, "learning_rate": 5.601458381095658e-10, "loss": 22.1646, "step": 24680 }, { "epoch": 195.95712861415754, "grad_norm": 262.593017578125, "learning_rate": 5.394472954861895e-10, "loss": 22.4295, "step": 24690 }, { "epoch": 196.0319042871386, "grad_norm": 190.5734100341797, "learning_rate": 5.191380001463019e-10, "loss": 20.8607, "step": 24700 }, { "epoch": 196.0319042871386, "eval_loss": 2.985541820526123, "eval_mae": 1.295915126800537, "eval_mse": 2.985541820526123, "eval_r2": 0.1004709005355835, "eval_rmse": 1.7278720498133313, "eval_runtime": 9.0672, "eval_samples_per_second": 442.361, "eval_steps_per_second": 13.896, "step": 24700 }, { "epoch": 196.11166500498504, "grad_norm": 480.9814758300781, "learning_rate": 4.992179837796373e-10, "loss": 22.7887, "step": 24710 }, { "epoch": 196.1914257228315, "grad_norm": 457.35003662109375, "learning_rate": 4.79687277468499e-10, "loss": 21.2072, "step": 24720 }, { "epoch": 196.27118644067798, "grad_norm": 168.44793701171875, "learning_rate": 4.6054591168778744e-10, "loss": 25.3187, "step": 24730 }, { "epoch": 196.35094715852443, "grad_norm": 497.0190124511719, "learning_rate": 4.4179391630480566e-10, "loss": 22.2018, "step": 24740 }, { "epoch": 196.4307078763709, "grad_norm": 282.4808349609375, "learning_rate": 4.234313205793427e-10, "loss": 24.2384, "step": 24750 }, { "epoch": 196.51046859421734, "grad_norm": 573.6471557617188, "learning_rate": 4.054581531636181e-10, "loss": 24.1769, "step": 24760 }, { "epoch": 196.5902293120638, "grad_norm": 178.6867218017578, "learning_rate": 3.8787444210214294e-10, "loss": 24.7551, "step": 24770 }, { "epoch": 196.66999002991028, "grad_norm": 234.75233459472656, "learning_rate": 3.7068021483180335e-10, "loss": 22.6184, "step": 24780 }, { "epoch": 196.74975074775674, "grad_norm": 255.20985412597656, "learning_rate": 3.538754981816661e-10, "loss": 25.1924, "step": 24790 }, { "epoch": 196.8295114656032, "grad_norm": 559.8621215820312, "learning_rate": 3.3746031837311727e-10, "loss": 24.9777, "step": 24800 }, { "epoch": 196.8295114656032, "eval_loss": 2.9856958389282227, "eval_mae": 1.2961050271987915, "eval_mse": 2.9856958389282227, "eval_r2": 0.10042446851730347, "eval_rmse": 1.7279166180485164, "eval_runtime": 9.0989, "eval_samples_per_second": 440.823, "eval_steps_per_second": 13.848, "step": 24800 }, { "epoch": 196.90927218344964, "grad_norm": 148.0769500732422, "learning_rate": 3.2143470101964056e-10, "loss": 23.6349, "step": 24810 }, { "epoch": 196.9890329012961, "grad_norm": 245.9391326904297, "learning_rate": 3.0579867112695556e-10, "loss": 24.8288, "step": 24820 }, { "epoch": 197.06380857427718, "grad_norm": 774.696044921875, "learning_rate": 2.9055225309276843e-10, "loss": 21.4276, "step": 24830 }, { "epoch": 197.14356929212363, "grad_norm": 493.7986145019531, "learning_rate": 2.756954707069936e-10, "loss": 22.5439, "step": 24840 }, { "epoch": 197.22333000997008, "grad_norm": 185.34024047851562, "learning_rate": 2.612283471514487e-10, "loss": 24.3253, "step": 24850 }, { "epoch": 197.30309072781654, "grad_norm": 507.3541564941406, "learning_rate": 2.4715090500004867e-10, "loss": 24.4285, "step": 24860 }, { "epoch": 197.38285144566302, "grad_norm": 680.4122314453125, "learning_rate": 2.33463166218556e-10, "loss": 23.7532, "step": 24870 }, { "epoch": 197.46261216350948, "grad_norm": 346.16668701171875, "learning_rate": 2.201651521647474e-10, "loss": 23.7976, "step": 24880 }, { "epoch": 197.54237288135593, "grad_norm": 443.5713806152344, "learning_rate": 2.0725688358824712e-10, "loss": 23.2051, "step": 24890 }, { "epoch": 197.62213359920239, "grad_norm": 155.51341247558594, "learning_rate": 1.9473838063058246e-10, "loss": 25.3578, "step": 24900 }, { "epoch": 197.62213359920239, "eval_loss": 2.9856114387512207, "eval_mae": 1.2960511445999146, "eval_mse": 2.9856114387512207, "eval_r2": 0.10044991970062256, "eval_rmse": 1.7278921953499358, "eval_runtime": 9.1052, "eval_samples_per_second": 440.517, "eval_steps_per_second": 13.838, "step": 24900 }, { "epoch": 197.70189431704884, "grad_norm": 227.46690368652344, "learning_rate": 1.826096628250451e-10, "loss": 26.1419, "step": 24910 }, { "epoch": 197.78165503489532, "grad_norm": 171.3894805908203, "learning_rate": 1.708707490967465e-10, "loss": 21.9025, "step": 24920 }, { "epoch": 197.86141575274178, "grad_norm": 258.0511779785156, "learning_rate": 1.595216577625902e-10, "loss": 24.4516, "step": 24930 }, { "epoch": 197.94117647058823, "grad_norm": 137.69100952148438, "learning_rate": 1.4856240653118857e-10, "loss": 24.0129, "step": 24940 }, { "epoch": 198.01595214356928, "grad_norm": 142.23512268066406, "learning_rate": 1.3799301250291828e-10, "loss": 20.7065, "step": 24950 }, { "epoch": 198.09571286141576, "grad_norm": 110.43464660644531, "learning_rate": 1.2781349216972603e-10, "loss": 22.97, "step": 24960 }, { "epoch": 198.17547357926222, "grad_norm": 322.2825622558594, "learning_rate": 1.1802386141532283e-10, "loss": 23.2948, "step": 24970 }, { "epoch": 198.25523429710867, "grad_norm": 394.9427490234375, "learning_rate": 1.0862413551501748e-10, "loss": 23.8814, "step": 24980 }, { "epoch": 198.33499501495513, "grad_norm": 480.8127136230469, "learning_rate": 9.96143291357443e-11, "loss": 25.1555, "step": 24990 }, { "epoch": 198.4147557328016, "grad_norm": 171.42910766601562, "learning_rate": 9.09944563359799e-11, "loss": 23.0883, "step": 25000 }, { "epoch": 198.4147557328016, "eval_loss": 2.985551357269287, "eval_mae": 1.296038031578064, "eval_mse": 2.985551118850708, "eval_r2": 0.10046803951263428, "eval_rmse": 1.7278747404979067, "eval_runtime": 9.1263, "eval_samples_per_second": 439.499, "eval_steps_per_second": 13.806, "step": 25000 }, { "epoch": 198.49451645064806, "grad_norm": 1028.7611083984375, "learning_rate": 8.276453056582645e-11, "loss": 22.364, "step": 25010 }, { "epoch": 198.57427716849452, "grad_norm": 570.9407348632812, "learning_rate": 7.49245646668728e-11, "loss": 21.5703, "step": 25020 }, { "epoch": 198.65403788634097, "grad_norm": 351.5626220703125, "learning_rate": 6.747457087227792e-11, "loss": 26.2493, "step": 25030 }, { "epoch": 198.73379860418743, "grad_norm": 409.9494934082031, "learning_rate": 6.041456080665974e-11, "loss": 23.5306, "step": 25040 }, { "epoch": 198.8135593220339, "grad_norm": 191.91433715820312, "learning_rate": 5.374454548617846e-11, "loss": 23.6993, "step": 25050 }, { "epoch": 198.89332003988036, "grad_norm": 383.2036437988281, "learning_rate": 4.746453531839778e-11, "loss": 24.7173, "step": 25060 }, { "epoch": 198.97308075772682, "grad_norm": 282.2154846191406, "learning_rate": 4.1574540102395914e-11, "loss": 24.869, "step": 25070 }, { "epoch": 199.04785643070787, "grad_norm": 209.2111358642578, "learning_rate": 3.6074569028654576e-11, "loss": 23.1518, "step": 25080 }, { "epoch": 199.12761714855435, "grad_norm": 125.33819580078125, "learning_rate": 3.096463067905897e-11, "loss": 27.0382, "step": 25090 }, { "epoch": 199.2073778664008, "grad_norm": 224.3722381591797, "learning_rate": 2.6244733027008804e-11, "loss": 23.0602, "step": 25100 }, { "epoch": 199.2073778664008, "eval_loss": 2.985177755355835, "eval_mae": 1.2959330081939697, "eval_mse": 2.985177755355835, "eval_r2": 0.10058057308197021, "eval_rmse": 1.7277666958695075, "eval_runtime": 9.1276, "eval_samples_per_second": 439.435, "eval_steps_per_second": 13.804, "step": 25100 }, { "epoch": 199.28713858424726, "grad_norm": 906.6569213867188, "learning_rate": 2.1914883437140762e-11, "loss": 25.4792, "step": 25110 }, { "epoch": 199.3668993020937, "grad_norm": 139.93795776367188, "learning_rate": 1.797508866560604e-11, "loss": 21.875, "step": 25120 }, { "epoch": 199.44666001994017, "grad_norm": 412.5982360839844, "learning_rate": 1.442535485987606e-11, "loss": 23.1991, "step": 25130 }, { "epoch": 199.52642073778665, "grad_norm": 304.7872314453125, "learning_rate": 1.1265687558797976e-11, "loss": 23.1054, "step": 25140 }, { "epoch": 199.6061814556331, "grad_norm": 691.2601928710938, "learning_rate": 8.496091692594687e-12, "loss": 23.7589, "step": 25150 }, { "epoch": 199.68594217347956, "grad_norm": 180.51930236816406, "learning_rate": 6.116571582809316e-12, "loss": 23.4738, "step": 25160 }, { "epoch": 199.76570289132601, "grad_norm": 635.4140014648438, "learning_rate": 4.127130942332968e-12, "loss": 21.3643, "step": 25170 }, { "epoch": 199.84546360917247, "grad_norm": 272.2777099609375, "learning_rate": 2.5277728754047323e-12, "loss": 22.6734, "step": 25180 }, { "epoch": 199.92522432701895, "grad_norm": 191.4556121826172, "learning_rate": 1.3184998775839228e-12, "loss": 23.7552, "step": 25190 }, { "epoch": 200.0, "grad_norm": 638.1273193359375, "learning_rate": 4.993138358055926e-13, "loss": 24.4562, "step": 25200 }, { "epoch": 200.0, "eval_loss": 2.9852001667022705, "eval_mae": 1.2959332466125488, "eval_mse": 2.9852004051208496, "eval_r2": 0.10057371854782104, "eval_rmse": 1.7277732504934928, "eval_runtime": 9.1119, "eval_samples_per_second": 440.193, "eval_steps_per_second": 13.828, "step": 25200 }, { "epoch": 200.07976071784645, "grad_norm": 233.1142578125, "learning_rate": 3.276812958733593e-07, "loss": 23.9068, "step": 25210 }, { "epoch": 200.1595214356929, "grad_norm": 264.1903991699219, "learning_rate": 3.275626967218783e-07, "loss": 22.5422, "step": 25220 }, { "epoch": 200.2392821535394, "grad_norm": 269.2657165527344, "learning_rate": 3.274440782524416e-07, "loss": 25.6559, "step": 25230 }, { "epoch": 200.31904287138585, "grad_norm": 1040.9169921875, "learning_rate": 3.273254404945924e-07, "loss": 24.3639, "step": 25240 }, { "epoch": 200.3988035892323, "grad_norm": 264.42822265625, "learning_rate": 3.272067834778791e-07, "loss": 23.6492, "step": 25250 }, { "epoch": 200.47856430707876, "grad_norm": 1180.909912109375, "learning_rate": 3.2708810723185464e-07, "loss": 24.3143, "step": 25260 }, { "epoch": 200.5583250249252, "grad_norm": 173.80987548828125, "learning_rate": 3.2696941178607686e-07, "loss": 22.7227, "step": 25270 }, { "epoch": 200.6380857427717, "grad_norm": 121.28042602539062, "learning_rate": 3.2685069717010823e-07, "loss": 25.8901, "step": 25280 }, { "epoch": 200.71784646061815, "grad_norm": 490.2680969238281, "learning_rate": 3.267319634135162e-07, "loss": 23.3299, "step": 25290 }, { "epoch": 200.7976071784646, "grad_norm": 197.51051330566406, "learning_rate": 3.2661321054587275e-07, "loss": 24.8621, "step": 25300 }, { "epoch": 200.7976071784646, "eval_loss": 2.987062454223633, "eval_mae": 1.301984429359436, "eval_mse": 2.987062454223633, "eval_r2": 0.10001271963119507, "eval_rmse": 1.7283120245556451, "eval_runtime": 9.1207, "eval_samples_per_second": 439.769, "eval_steps_per_second": 13.815, "step": 25300 }, { "epoch": 200.87736789631106, "grad_norm": 230.46575927734375, "learning_rate": 3.26494438596755e-07, "loss": 22.8219, "step": 25310 }, { "epoch": 200.95712861415754, "grad_norm": 307.9410400390625, "learning_rate": 3.263756475957444e-07, "loss": 22.7135, "step": 25320 }, { "epoch": 201.0319042871386, "grad_norm": 593.2626953125, "learning_rate": 3.262568375724274e-07, "loss": 20.4246, "step": 25330 }, { "epoch": 201.11166500498504, "grad_norm": 1156.7459716796875, "learning_rate": 3.2613800855639504e-07, "loss": 25.0621, "step": 25340 }, { "epoch": 201.1914257228315, "grad_norm": 719.022216796875, "learning_rate": 3.2601916057724335e-07, "loss": 26.0972, "step": 25350 }, { "epoch": 201.27118644067798, "grad_norm": 505.98260498046875, "learning_rate": 3.2590029366457266e-07, "loss": 23.9704, "step": 25360 }, { "epoch": 201.35094715852443, "grad_norm": 354.1245422363281, "learning_rate": 3.257814078479884e-07, "loss": 23.2421, "step": 25370 }, { "epoch": 201.4307078763709, "grad_norm": 282.1812744140625, "learning_rate": 3.256625031571005e-07, "loss": 21.8984, "step": 25380 }, { "epoch": 201.51046859421734, "grad_norm": 676.1168212890625, "learning_rate": 3.2554357962152377e-07, "loss": 23.0126, "step": 25390 }, { "epoch": 201.5902293120638, "grad_norm": 1098.18212890625, "learning_rate": 3.254246372708775e-07, "loss": 25.1928, "step": 25400 }, { "epoch": 201.5902293120638, "eval_loss": 3.001655101776123, "eval_mae": 1.3126819133758545, "eval_mse": 3.001655101776123, "eval_r2": 0.09561598300933838, "eval_rmse": 1.7325285284162344, "eval_runtime": 9.1184, "eval_samples_per_second": 439.878, "eval_steps_per_second": 13.818, "step": 25400 }, { "epoch": 201.66999002991028, "grad_norm": 400.4809265136719, "learning_rate": 3.2530567613478574e-07, "loss": 23.2146, "step": 25410 }, { "epoch": 201.74975074775674, "grad_norm": 1427.6348876953125, "learning_rate": 3.2518669624287727e-07, "loss": 24.413, "step": 25420 }, { "epoch": 201.8295114656032, "grad_norm": 258.5172119140625, "learning_rate": 3.250676976247856e-07, "loss": 24.6172, "step": 25430 }, { "epoch": 201.90927218344964, "grad_norm": 230.9508056640625, "learning_rate": 3.2494868031014867e-07, "loss": 23.6625, "step": 25440 }, { "epoch": 201.9890329012961, "grad_norm": 410.1961364746094, "learning_rate": 3.2482964432860925e-07, "loss": 23.1106, "step": 25450 }, { "epoch": 202.06380857427718, "grad_norm": 651.6411743164062, "learning_rate": 3.2471058970981485e-07, "loss": 21.1852, "step": 25460 }, { "epoch": 202.14356929212363, "grad_norm": 297.3800354003906, "learning_rate": 3.2459151648341755e-07, "loss": 25.5979, "step": 25470 }, { "epoch": 202.22333000997008, "grad_norm": 132.6808624267578, "learning_rate": 3.244724246790738e-07, "loss": 24.2881, "step": 25480 }, { "epoch": 202.30309072781654, "grad_norm": 289.0632629394531, "learning_rate": 3.24353314326445e-07, "loss": 23.7794, "step": 25490 }, { "epoch": 202.38285144566302, "grad_norm": 856.2789306640625, "learning_rate": 3.24234185455197e-07, "loss": 22.0349, "step": 25500 }, { "epoch": 202.38285144566302, "eval_loss": 2.987736940383911, "eval_mae": 1.2919999361038208, "eval_mse": 2.987736940383911, "eval_r2": 0.09980946779251099, "eval_rmse": 1.7285071421269602, "eval_runtime": 9.1273, "eval_samples_per_second": 439.453, "eval_steps_per_second": 13.805, "step": 25500 }, { "epoch": 202.46261216350948, "grad_norm": 352.2939758300781, "learning_rate": 3.241150380950006e-07, "loss": 23.3271, "step": 25510 }, { "epoch": 202.54237288135593, "grad_norm": 520.0399169921875, "learning_rate": 3.239958722755306e-07, "loss": 24.8606, "step": 25520 }, { "epoch": 202.62213359920239, "grad_norm": 606.8839721679688, "learning_rate": 3.2387668802646683e-07, "loss": 23.3053, "step": 25530 }, { "epoch": 202.70189431704884, "grad_norm": 356.62353515625, "learning_rate": 3.2375748537749375e-07, "loss": 22.8932, "step": 25540 }, { "epoch": 202.78165503489532, "grad_norm": 222.2799530029297, "learning_rate": 3.2363826435830013e-07, "loss": 24.2675, "step": 25550 }, { "epoch": 202.86141575274178, "grad_norm": 1185.181396484375, "learning_rate": 3.235190249985795e-07, "loss": 22.7562, "step": 25560 }, { "epoch": 202.94117647058823, "grad_norm": 575.6671142578125, "learning_rate": 3.2339976732802987e-07, "loss": 24.7696, "step": 25570 }, { "epoch": 203.01595214356928, "grad_norm": 172.9219207763672, "learning_rate": 3.232804913763539e-07, "loss": 24.11, "step": 25580 }, { "epoch": 203.09571286141576, "grad_norm": 1075.917724609375, "learning_rate": 3.2316119717325873e-07, "loss": 21.5316, "step": 25590 }, { "epoch": 203.17547357926222, "grad_norm": 695.1358642578125, "learning_rate": 3.2304188474845606e-07, "loss": 25.1565, "step": 25600 }, { "epoch": 203.17547357926222, "eval_loss": 2.9972896575927734, "eval_mae": 1.2894809246063232, "eval_mse": 2.9972894191741943, "eval_r2": 0.0969313383102417, "eval_rmse": 1.7312681534569376, "eval_runtime": 9.1254, "eval_samples_per_second": 439.541, "eval_steps_per_second": 13.808, "step": 25600 }, { "epoch": 203.25523429710867, "grad_norm": 467.5833435058594, "learning_rate": 3.2292255413166204e-07, "loss": 24.2298, "step": 25610 }, { "epoch": 203.33499501495513, "grad_norm": 829.6505737304688, "learning_rate": 3.2280320535259757e-07, "loss": 22.8333, "step": 25620 }, { "epoch": 203.4147557328016, "grad_norm": 712.6729736328125, "learning_rate": 3.226838384409881e-07, "loss": 23.4115, "step": 25630 }, { "epoch": 203.49451645064806, "grad_norm": 341.8227844238281, "learning_rate": 3.2256445342656304e-07, "loss": 22.5, "step": 25640 }, { "epoch": 203.57427716849452, "grad_norm": 390.042724609375, "learning_rate": 3.2244505033905704e-07, "loss": 24.5384, "step": 25650 }, { "epoch": 203.65403788634097, "grad_norm": 874.4033813476562, "learning_rate": 3.223256292082088e-07, "loss": 25.1737, "step": 25660 }, { "epoch": 203.73379860418743, "grad_norm": 768.7593383789062, "learning_rate": 3.222061900637618e-07, "loss": 24.8873, "step": 25670 }, { "epoch": 203.8135593220339, "grad_norm": 324.7832946777344, "learning_rate": 3.2208673293546356e-07, "loss": 24.5665, "step": 25680 }, { "epoch": 203.89332003988036, "grad_norm": 377.18609619140625, "learning_rate": 3.2196725785306654e-07, "loss": 23.7054, "step": 25690 }, { "epoch": 203.97308075772682, "grad_norm": 696.3908081054688, "learning_rate": 3.218477648463276e-07, "loss": 21.9615, "step": 25700 }, { "epoch": 203.97308075772682, "eval_loss": 2.9862987995147705, "eval_mae": 1.3001036643981934, "eval_mse": 2.9862983226776123, "eval_r2": 0.10024291276931763, "eval_rmse": 1.7280909474554897, "eval_runtime": 9.0986, "eval_samples_per_second": 440.836, "eval_steps_per_second": 13.848, "step": 25700 }, { "epoch": 204.04785643070787, "grad_norm": 554.8890991210938, "learning_rate": 3.2172825394500774e-07, "loss": 22.9764, "step": 25710 }, { "epoch": 204.12761714855435, "grad_norm": 612.753662109375, "learning_rate": 3.216087251788728e-07, "loss": 24.8179, "step": 25720 }, { "epoch": 204.2073778664008, "grad_norm": 660.7884521484375, "learning_rate": 3.2148917857769276e-07, "loss": 23.1987, "step": 25730 }, { "epoch": 204.28713858424726, "grad_norm": 749.5087280273438, "learning_rate": 3.213696141712424e-07, "loss": 22.4109, "step": 25740 }, { "epoch": 204.3668993020937, "grad_norm": 783.62744140625, "learning_rate": 3.212500319893005e-07, "loss": 23.2447, "step": 25750 }, { "epoch": 204.44666001994017, "grad_norm": 850.573974609375, "learning_rate": 3.2113043206165065e-07, "loss": 24.0904, "step": 25760 }, { "epoch": 204.52642073778665, "grad_norm": 613.3543701171875, "learning_rate": 3.2101081441808067e-07, "loss": 22.9554, "step": 25770 }, { "epoch": 204.6061814556331, "grad_norm": 321.5801696777344, "learning_rate": 3.2089117908838275e-07, "loss": 24.8453, "step": 25780 }, { "epoch": 204.68594217347956, "grad_norm": 598.156005859375, "learning_rate": 3.207715261023536e-07, "loss": 22.7975, "step": 25790 }, { "epoch": 204.76570289132601, "grad_norm": 295.4639892578125, "learning_rate": 3.206518554897943e-07, "loss": 22.9623, "step": 25800 }, { "epoch": 204.76570289132601, "eval_loss": 2.9853804111480713, "eval_mae": 1.2947884798049927, "eval_mse": 2.9853804111480713, "eval_r2": 0.10051953792572021, "eval_rmse": 1.7278253416211, "eval_runtime": 9.1201, "eval_samples_per_second": 439.799, "eval_steps_per_second": 13.816, "step": 25800 }, { "epoch": 204.84546360917247, "grad_norm": 192.13729858398438, "learning_rate": 3.2053216728051036e-07, "loss": 24.6751, "step": 25810 }, { "epoch": 204.92522432701895, "grad_norm": 1190.568603515625, "learning_rate": 3.204124615043115e-07, "loss": 26.1587, "step": 25820 }, { "epoch": 205.0, "grad_norm": 304.8810729980469, "learning_rate": 3.2029273819101207e-07, "loss": 21.0388, "step": 25830 }, { "epoch": 205.07976071784645, "grad_norm": 208.4384307861328, "learning_rate": 3.201729973704306e-07, "loss": 23.0309, "step": 25840 }, { "epoch": 205.1595214356929, "grad_norm": 164.4918212890625, "learning_rate": 3.200532390723899e-07, "loss": 25.1194, "step": 25850 }, { "epoch": 205.2392821535394, "grad_norm": 400.9577941894531, "learning_rate": 3.1993346332671744e-07, "loss": 22.6518, "step": 25860 }, { "epoch": 205.31904287138585, "grad_norm": 298.8614501953125, "learning_rate": 3.1981367016324477e-07, "loss": 24.188, "step": 25870 }, { "epoch": 205.3988035892323, "grad_norm": 367.3051452636719, "learning_rate": 3.1969385961180803e-07, "loss": 24.6883, "step": 25880 }, { "epoch": 205.47856430707876, "grad_norm": 600.275146484375, "learning_rate": 3.1957403170224723e-07, "loss": 22.7788, "step": 25890 }, { "epoch": 205.5583250249252, "grad_norm": 344.5098571777344, "learning_rate": 3.1945418646440726e-07, "loss": 23.5699, "step": 25900 }, { "epoch": 205.5583250249252, "eval_loss": 2.983717203140259, "eval_mae": 1.297709345817566, "eval_mse": 2.9837169647216797, "eval_r2": 0.1010206937789917, "eval_rmse": 1.7273439045892627, "eval_runtime": 9.1253, "eval_samples_per_second": 439.55, "eval_steps_per_second": 13.808, "step": 25900 }, { "epoch": 205.6380857427717, "grad_norm": 954.1924438476562, "learning_rate": 3.1933432392813704e-07, "loss": 24.4087, "step": 25910 }, { "epoch": 205.71784646061815, "grad_norm": 400.8988037109375, "learning_rate": 3.1921444412328965e-07, "loss": 26.4869, "step": 25920 }, { "epoch": 205.7976071784646, "grad_norm": 497.05010986328125, "learning_rate": 3.1909454707972286e-07, "loss": 22.0453, "step": 25930 }, { "epoch": 205.87736789631106, "grad_norm": 645.5272216796875, "learning_rate": 3.1897463282729843e-07, "loss": 23.4677, "step": 25940 }, { "epoch": 205.95712861415754, "grad_norm": 649.5585327148438, "learning_rate": 3.1885470139588243e-07, "loss": 23.6492, "step": 25950 }, { "epoch": 206.0319042871386, "grad_norm": 312.9655456542969, "learning_rate": 3.187347528153454e-07, "loss": 19.5719, "step": 25960 }, { "epoch": 206.11166500498504, "grad_norm": 301.42578125, "learning_rate": 3.1861478711556194e-07, "loss": 25.0317, "step": 25970 }, { "epoch": 206.1914257228315, "grad_norm": 354.2798767089844, "learning_rate": 3.184948043264111e-07, "loss": 23.4643, "step": 25980 }, { "epoch": 206.27118644067798, "grad_norm": 1141.8057861328125, "learning_rate": 3.18374804477776e-07, "loss": 22.9418, "step": 25990 }, { "epoch": 206.35094715852443, "grad_norm": 691.326904296875, "learning_rate": 3.182547875995441e-07, "loss": 21.9891, "step": 26000 }, { "epoch": 206.35094715852443, "eval_loss": 2.9850969314575195, "eval_mae": 1.3001097440719604, "eval_mse": 2.9850971698760986, "eval_r2": 0.1006048321723938, "eval_rmse": 1.7277433750057034, "eval_runtime": 9.1101, "eval_samples_per_second": 440.283, "eval_steps_per_second": 13.831, "step": 26000 }, { "epoch": 206.4307078763709, "grad_norm": 354.40484619140625, "learning_rate": 3.181347537216071e-07, "loss": 23.952, "step": 26010 }, { "epoch": 206.51046859421734, "grad_norm": 323.9743347167969, "learning_rate": 3.18014702873861e-07, "loss": 24.5573, "step": 26020 }, { "epoch": 206.5902293120638, "grad_norm": 783.04736328125, "learning_rate": 3.1789463508620596e-07, "loss": 25.0914, "step": 26030 }, { "epoch": 206.66999002991028, "grad_norm": 842.0777587890625, "learning_rate": 3.177745503885463e-07, "loss": 22.0903, "step": 26040 }, { "epoch": 206.74975074775674, "grad_norm": 453.91766357421875, "learning_rate": 3.176544488107905e-07, "loss": 24.8008, "step": 26050 }, { "epoch": 206.8295114656032, "grad_norm": 358.0523681640625, "learning_rate": 3.175343303828516e-07, "loss": 24.3761, "step": 26060 }, { "epoch": 206.90927218344964, "grad_norm": 288.92523193359375, "learning_rate": 3.1741419513464643e-07, "loss": 23.9646, "step": 26070 }, { "epoch": 206.9890329012961, "grad_norm": 384.78790283203125, "learning_rate": 3.1729404309609616e-07, "loss": 24.4646, "step": 26080 }, { "epoch": 207.06380857427718, "grad_norm": 440.7989501953125, "learning_rate": 3.171738742971263e-07, "loss": 20.5563, "step": 26090 }, { "epoch": 207.14356929212363, "grad_norm": 186.81570434570312, "learning_rate": 3.170536887676662e-07, "loss": 23.336, "step": 26100 }, { "epoch": 207.14356929212363, "eval_loss": 2.9858384132385254, "eval_mae": 1.3001880645751953, "eval_mse": 2.9858384132385254, "eval_r2": 0.10038143396377563, "eval_rmse": 1.7279578736874708, "eval_runtime": 9.1198, "eval_samples_per_second": 439.813, "eval_steps_per_second": 13.816, "step": 26100 }, { "epoch": 207.22333000997008, "grad_norm": 370.08123779296875, "learning_rate": 3.169334865376497e-07, "loss": 23.6388, "step": 26110 }, { "epoch": 207.30309072781654, "grad_norm": 1464.3602294921875, "learning_rate": 3.1681326763701455e-07, "loss": 22.6312, "step": 26120 }, { "epoch": 207.38285144566302, "grad_norm": 869.84716796875, "learning_rate": 3.166930320957028e-07, "loss": 21.8956, "step": 26130 }, { "epoch": 207.46261216350948, "grad_norm": 499.814697265625, "learning_rate": 3.165727799436607e-07, "loss": 24.4855, "step": 26140 }, { "epoch": 207.54237288135593, "grad_norm": 283.2919616699219, "learning_rate": 3.1645251121083847e-07, "loss": 26.9945, "step": 26150 }, { "epoch": 207.62213359920239, "grad_norm": 403.3465576171875, "learning_rate": 3.163322259271906e-07, "loss": 22.2168, "step": 26160 }, { "epoch": 207.70189431704884, "grad_norm": 417.0478515625, "learning_rate": 3.162119241226755e-07, "loss": 23.5436, "step": 26170 }, { "epoch": 207.78165503489532, "grad_norm": 1154.0826416015625, "learning_rate": 3.1609160582725596e-07, "loss": 23.8994, "step": 26180 }, { "epoch": 207.86141575274178, "grad_norm": 448.4119567871094, "learning_rate": 3.159712710708987e-07, "loss": 24.1008, "step": 26190 }, { "epoch": 207.94117647058823, "grad_norm": 294.5215148925781, "learning_rate": 3.1585091988357465e-07, "loss": 25.3929, "step": 26200 }, { "epoch": 207.94117647058823, "eval_loss": 2.9880077838897705, "eval_mae": 1.2916208505630493, "eval_mse": 2.9880077838897705, "eval_r2": 0.09972786903381348, "eval_rmse": 1.728585486428071, "eval_runtime": 9.2105, "eval_samples_per_second": 435.482, "eval_steps_per_second": 13.68, "step": 26200 }, { "epoch": 208.01595214356928, "grad_norm": 662.5714721679688, "learning_rate": 3.157305522952586e-07, "loss": 24.0349, "step": 26210 }, { "epoch": 208.09571286141576, "grad_norm": 311.1929016113281, "learning_rate": 3.1561016833592986e-07, "loss": 23.6938, "step": 26220 }, { "epoch": 208.17547357926222, "grad_norm": 391.3521423339844, "learning_rate": 3.154897680355714e-07, "loss": 21.3007, "step": 26230 }, { "epoch": 208.25523429710867, "grad_norm": 474.2955017089844, "learning_rate": 3.153693514241703e-07, "loss": 25.5512, "step": 26240 }, { "epoch": 208.33499501495513, "grad_norm": 379.4305725097656, "learning_rate": 3.15248918531718e-07, "loss": 22.6775, "step": 26250 }, { "epoch": 208.4147557328016, "grad_norm": 223.09719848632812, "learning_rate": 3.151284693882098e-07, "loss": 23.6358, "step": 26260 }, { "epoch": 208.49451645064806, "grad_norm": 170.5644989013672, "learning_rate": 3.15008004023645e-07, "loss": 23.4765, "step": 26270 }, { "epoch": 208.57427716849452, "grad_norm": 220.80567932128906, "learning_rate": 3.14887522468027e-07, "loss": 24.6062, "step": 26280 }, { "epoch": 208.65403788634097, "grad_norm": 107.43252563476562, "learning_rate": 3.147670247513632e-07, "loss": 22.696, "step": 26290 }, { "epoch": 208.73379860418743, "grad_norm": 384.41485595703125, "learning_rate": 3.1464651090366517e-07, "loss": 23.3779, "step": 26300 }, { "epoch": 208.73379860418743, "eval_loss": 2.990997314453125, "eval_mae": 1.2915152311325073, "eval_mse": 2.990997314453125, "eval_r2": 0.09882712364196777, "eval_rmse": 1.729450003455759, "eval_runtime": 9.1109, "eval_samples_per_second": 440.24, "eval_steps_per_second": 13.83, "step": 26300 }, { "epoch": 208.8135593220339, "grad_norm": 706.5662231445312, "learning_rate": 3.1452598095494836e-07, "loss": 24.5741, "step": 26310 }, { "epoch": 208.89332003988036, "grad_norm": 936.375732421875, "learning_rate": 3.144054349352322e-07, "loss": 24.9527, "step": 26320 }, { "epoch": 208.97308075772682, "grad_norm": 1180.5230712890625, "learning_rate": 3.142848728745401e-07, "loss": 24.4996, "step": 26330 }, { "epoch": 209.04785643070787, "grad_norm": 676.7444458007812, "learning_rate": 3.1416429480289974e-07, "loss": 22.0019, "step": 26340 }, { "epoch": 209.12761714855435, "grad_norm": 1116.85986328125, "learning_rate": 3.1405576087388343e-07, "loss": 24.691, "step": 26350 }, { "epoch": 209.2073778664008, "grad_norm": 266.63189697265625, "learning_rate": 3.139351524641811e-07, "loss": 22.1795, "step": 26360 }, { "epoch": 209.28713858424726, "grad_norm": 616.2620849609375, "learning_rate": 3.138145281306327e-07, "loss": 23.662, "step": 26370 }, { "epoch": 209.3668993020937, "grad_norm": 704.6864013671875, "learning_rate": 3.136938879032811e-07, "loss": 22.9147, "step": 26380 }, { "epoch": 209.44666001994017, "grad_norm": 351.6438293457031, "learning_rate": 3.1357323181217324e-07, "loss": 23.9645, "step": 26390 }, { "epoch": 209.52642073778665, "grad_norm": 122.1126480102539, "learning_rate": 3.1345255988736025e-07, "loss": 23.5815, "step": 26400 }, { "epoch": 209.52642073778665, "eval_loss": 2.9857232570648193, "eval_mae": 1.2909077405929565, "eval_mse": 2.9857232570648193, "eval_r2": 0.10041612386703491, "eval_rmse": 1.7279245519017372, "eval_runtime": 9.1149, "eval_samples_per_second": 440.05, "eval_steps_per_second": 13.824, "step": 26400 }, { "epoch": 209.6061814556331, "grad_norm": 895.959228515625, "learning_rate": 3.1333187215889674e-07, "loss": 24.7816, "step": 26410 }, { "epoch": 209.68594217347956, "grad_norm": 397.9737854003906, "learning_rate": 3.1321116865684153e-07, "loss": 24.0738, "step": 26420 }, { "epoch": 209.76570289132601, "grad_norm": 914.4462890625, "learning_rate": 3.130904494112574e-07, "loss": 22.1512, "step": 26430 }, { "epoch": 209.84546360917247, "grad_norm": 432.4562072753906, "learning_rate": 3.1296971445221093e-07, "loss": 21.9592, "step": 26440 }, { "epoch": 209.92522432701895, "grad_norm": 652.2195434570312, "learning_rate": 3.128489638097727e-07, "loss": 24.3588, "step": 26450 }, { "epoch": 210.0, "grad_norm": 416.7950134277344, "learning_rate": 3.1272819751401717e-07, "loss": 24.7862, "step": 26460 }, { "epoch": 210.07976071784645, "grad_norm": 926.331787109375, "learning_rate": 3.126074155950226e-07, "loss": 24.4526, "step": 26470 }, { "epoch": 210.1595214356929, "grad_norm": 847.6224975585938, "learning_rate": 3.1248661808287135e-07, "loss": 22.3391, "step": 26480 }, { "epoch": 210.2392821535394, "grad_norm": 1064.720458984375, "learning_rate": 3.1236580500764946e-07, "loss": 24.1305, "step": 26490 }, { "epoch": 210.31904287138585, "grad_norm": 592.7523193359375, "learning_rate": 3.1224497639944697e-07, "loss": 22.8022, "step": 26500 }, { "epoch": 210.31904287138585, "eval_loss": 2.9844868183135986, "eval_mae": 1.2950390577316284, "eval_mse": 2.9844868183135986, "eval_r2": 0.10078871250152588, "eval_rmse": 1.7275667333893643, "eval_runtime": 9.1183, "eval_samples_per_second": 439.883, "eval_steps_per_second": 13.818, "step": 26500 }, { "epoch": 210.3988035892323, "grad_norm": 326.2663879394531, "learning_rate": 3.121241322883578e-07, "loss": 22.7675, "step": 26510 }, { "epoch": 210.47856430707876, "grad_norm": 256.7446594238281, "learning_rate": 3.120032727044795e-07, "loss": 24.0059, "step": 26520 }, { "epoch": 210.5583250249252, "grad_norm": 1118.33642578125, "learning_rate": 3.1188239767791395e-07, "loss": 26.5326, "step": 26530 }, { "epoch": 210.6380857427717, "grad_norm": 410.37005615234375, "learning_rate": 3.1176150723876625e-07, "loss": 22.0273, "step": 26540 }, { "epoch": 210.71784646061815, "grad_norm": 141.73388671875, "learning_rate": 3.116406014171459e-07, "loss": 22.7835, "step": 26550 }, { "epoch": 210.7976071784646, "grad_norm": 614.6095581054688, "learning_rate": 3.1151968024316593e-07, "loss": 22.3267, "step": 26560 }, { "epoch": 210.87736789631106, "grad_norm": 1028.78955078125, "learning_rate": 3.113987437469433e-07, "loss": 25.7587, "step": 26570 }, { "epoch": 210.95712861415754, "grad_norm": 416.4407653808594, "learning_rate": 3.1127779195859873e-07, "loss": 23.3185, "step": 26580 }, { "epoch": 211.0319042871386, "grad_norm": 712.4876098632812, "learning_rate": 3.111568249082566e-07, "loss": 22.3591, "step": 26590 }, { "epoch": 211.11166500498504, "grad_norm": 751.4306640625, "learning_rate": 3.110358426260456e-07, "loss": 22.1697, "step": 26600 }, { "epoch": 211.11166500498504, "eval_loss": 2.9798662662506104, "eval_mae": 1.2977105379104614, "eval_mse": 2.9798665046691895, "eval_r2": 0.1021808385848999, "eval_rmse": 1.726228983845767, "eval_runtime": 9.0913, "eval_samples_per_second": 441.192, "eval_steps_per_second": 13.859, "step": 26600 }, { "epoch": 211.1914257228315, "grad_norm": 240.1824188232422, "learning_rate": 3.1091484514209763e-07, "loss": 22.6163, "step": 26610 }, { "epoch": 211.27118644067798, "grad_norm": 309.4950256347656, "learning_rate": 3.107938324865486e-07, "loss": 23.2412, "step": 26620 }, { "epoch": 211.35094715852443, "grad_norm": 148.27700805664062, "learning_rate": 3.106728046895384e-07, "loss": 25.1011, "step": 26630 }, { "epoch": 211.4307078763709, "grad_norm": 307.0238952636719, "learning_rate": 3.105517617812104e-07, "loss": 25.1078, "step": 26640 }, { "epoch": 211.51046859421734, "grad_norm": 364.7814025878906, "learning_rate": 3.1043070379171187e-07, "loss": 25.404, "step": 26650 }, { "epoch": 211.5902293120638, "grad_norm": 590.2925415039062, "learning_rate": 3.103096307511938e-07, "loss": 24.4948, "step": 26660 }, { "epoch": 211.66999002991028, "grad_norm": 840.1451416015625, "learning_rate": 3.101885426898109e-07, "loss": 23.8928, "step": 26670 }, { "epoch": 211.74975074775674, "grad_norm": 673.6281127929688, "learning_rate": 3.1006743963772163e-07, "loss": 23.0499, "step": 26680 }, { "epoch": 211.8295114656032, "grad_norm": 723.83837890625, "learning_rate": 3.099463216250884e-07, "loss": 23.7825, "step": 26690 }, { "epoch": 211.90927218344964, "grad_norm": 412.1143493652344, "learning_rate": 3.0982518868207696e-07, "loss": 23.5121, "step": 26700 }, { "epoch": 211.90927218344964, "eval_loss": 2.9848618507385254, "eval_mae": 1.2893383502960205, "eval_mse": 2.9848618507385254, "eval_r2": 0.10067570209503174, "eval_rmse": 1.7276752735217709, "eval_runtime": 9.0701, "eval_samples_per_second": 442.222, "eval_steps_per_second": 13.892, "step": 26700 }, { "epoch": 211.9890329012961, "grad_norm": 192.29212951660156, "learning_rate": 3.0970404083885703e-07, "loss": 22.379, "step": 26710 }, { "epoch": 212.06380857427718, "grad_norm": 435.377685546875, "learning_rate": 3.0958287812560213e-07, "loss": 22.9865, "step": 26720 }, { "epoch": 212.14356929212363, "grad_norm": 551.6611328125, "learning_rate": 3.094617005724892e-07, "loss": 23.0014, "step": 26730 }, { "epoch": 212.22333000997008, "grad_norm": 1402.768798828125, "learning_rate": 3.09340508209699e-07, "loss": 22.7538, "step": 26740 }, { "epoch": 212.30309072781654, "grad_norm": 192.71163940429688, "learning_rate": 3.0921930106741606e-07, "loss": 23.8688, "step": 26750 }, { "epoch": 212.38285144566302, "grad_norm": 152.849609375, "learning_rate": 3.0909807917582853e-07, "loss": 24.4226, "step": 26760 }, { "epoch": 212.46261216350948, "grad_norm": 523.3204345703125, "learning_rate": 3.0897684256512824e-07, "loss": 24.8181, "step": 26770 }, { "epoch": 212.54237288135593, "grad_norm": 883.080078125, "learning_rate": 3.088555912655106e-07, "loss": 23.5999, "step": 26780 }, { "epoch": 212.62213359920239, "grad_norm": 563.0305786132812, "learning_rate": 3.087343253071748e-07, "loss": 24.0943, "step": 26790 }, { "epoch": 212.70189431704884, "grad_norm": 514.7747192382812, "learning_rate": 3.0861304472032374e-07, "loss": 22.3982, "step": 26800 }, { "epoch": 212.70189431704884, "eval_loss": 2.981959104537964, "eval_mae": 1.289758324623108, "eval_mse": 2.981959104537964, "eval_r2": 0.10155034065246582, "eval_rmse": 1.726834996326506, "eval_runtime": 9.0866, "eval_samples_per_second": 441.417, "eval_steps_per_second": 13.867, "step": 26800 }, { "epoch": 212.78165503489532, "grad_norm": 395.4801025390625, "learning_rate": 3.084917495351637e-07, "loss": 22.2588, "step": 26810 }, { "epoch": 212.86141575274178, "grad_norm": 589.0004272460938, "learning_rate": 3.083704397819048e-07, "loss": 22.845, "step": 26820 }, { "epoch": 212.94117647058823, "grad_norm": 1045.0267333984375, "learning_rate": 3.082491154907609e-07, "loss": 25.0153, "step": 26830 }, { "epoch": 213.01595214356928, "grad_norm": 992.125244140625, "learning_rate": 3.08127776691949e-07, "loss": 24.5204, "step": 26840 }, { "epoch": 213.09571286141576, "grad_norm": 525.619873046875, "learning_rate": 3.080064234156904e-07, "loss": 24.5004, "step": 26850 }, { "epoch": 213.17547357926222, "grad_norm": 349.7850341796875, "learning_rate": 3.0788505569220945e-07, "loss": 25.3501, "step": 26860 }, { "epoch": 213.25523429710867, "grad_norm": 229.71084594726562, "learning_rate": 3.0776367355173427e-07, "loss": 22.593, "step": 26870 }, { "epoch": 213.33499501495513, "grad_norm": 118.41441345214844, "learning_rate": 3.076422770244967e-07, "loss": 22.9523, "step": 26880 }, { "epoch": 213.4147557328016, "grad_norm": 936.93994140625, "learning_rate": 3.0752086614073204e-07, "loss": 24.3618, "step": 26890 }, { "epoch": 213.49451645064806, "grad_norm": 938.4599609375, "learning_rate": 3.073994409306791e-07, "loss": 23.4791, "step": 26900 }, { "epoch": 213.49451645064806, "eval_loss": 2.980309247970581, "eval_mae": 1.300426721572876, "eval_mse": 2.980309009552002, "eval_r2": 0.10204744338989258, "eval_rmse": 1.726357150056732, "eval_runtime": 9.0902, "eval_samples_per_second": 441.247, "eval_steps_per_second": 13.861, "step": 26900 }, { "epoch": 213.57427716849452, "grad_norm": 547.9718017578125, "learning_rate": 3.072780014245804e-07, "loss": 24.3147, "step": 26910 }, { "epoch": 213.65403788634097, "grad_norm": 753.8389892578125, "learning_rate": 3.0715654765268195e-07, "loss": 24.3151, "step": 26920 }, { "epoch": 213.73379860418743, "grad_norm": 190.3426513671875, "learning_rate": 3.0703507964523335e-07, "loss": 23.3416, "step": 26930 }, { "epoch": 213.8135593220339, "grad_norm": 464.51312255859375, "learning_rate": 3.069135974324876e-07, "loss": 23.1847, "step": 26940 }, { "epoch": 213.89332003988036, "grad_norm": 402.635009765625, "learning_rate": 3.067921010447016e-07, "loss": 22.662, "step": 26950 }, { "epoch": 213.97308075772682, "grad_norm": 920.003173828125, "learning_rate": 3.0667059051213534e-07, "loss": 22.8849, "step": 26960 }, { "epoch": 214.04785643070787, "grad_norm": 486.64398193359375, "learning_rate": 3.0654906586505257e-07, "loss": 21.9501, "step": 26970 }, { "epoch": 214.12761714855435, "grad_norm": 1110.277099609375, "learning_rate": 3.064275271337205e-07, "loss": 24.034, "step": 26980 }, { "epoch": 214.2073778664008, "grad_norm": 1393.9989013671875, "learning_rate": 3.0630597434840996e-07, "loss": 23.2111, "step": 26990 }, { "epoch": 214.28713858424726, "grad_norm": 522.2705688476562, "learning_rate": 3.061844075393951e-07, "loss": 24.3892, "step": 27000 }, { "epoch": 214.28713858424726, "eval_loss": 2.981308698654175, "eval_mae": 1.2962442636489868, "eval_mse": 2.981308937072754, "eval_r2": 0.10174620151519775, "eval_rmse": 1.7266467319845, "eval_runtime": 9.0887, "eval_samples_per_second": 441.319, "eval_steps_per_second": 13.863, "step": 27000 }, { "epoch": 214.3668993020937, "grad_norm": 609.6259765625, "learning_rate": 3.0606282673695367e-07, "loss": 23.9073, "step": 27010 }, { "epoch": 214.44666001994017, "grad_norm": 791.1295166015625, "learning_rate": 3.059412319713669e-07, "loss": 25.4684, "step": 27020 }, { "epoch": 214.52642073778665, "grad_norm": 429.5850524902344, "learning_rate": 3.0581962327291945e-07, "loss": 24.0151, "step": 27030 }, { "epoch": 214.6061814556331, "grad_norm": 629.2057495117188, "learning_rate": 3.0569800067189956e-07, "loss": 24.1557, "step": 27040 }, { "epoch": 214.68594217347956, "grad_norm": 265.7466125488281, "learning_rate": 3.0557636419859874e-07, "loss": 23.352, "step": 27050 }, { "epoch": 214.76570289132601, "grad_norm": 484.65228271484375, "learning_rate": 3.0545471388331213e-07, "loss": 23.608, "step": 27060 }, { "epoch": 214.84546360917247, "grad_norm": 283.9285583496094, "learning_rate": 3.053330497563382e-07, "loss": 22.0524, "step": 27070 }, { "epoch": 214.92522432701895, "grad_norm": 143.06239318847656, "learning_rate": 3.05211371847979e-07, "loss": 22.4378, "step": 27080 }, { "epoch": 215.0, "grad_norm": 146.85423278808594, "learning_rate": 3.0508968018853994e-07, "loss": 21.4124, "step": 27090 }, { "epoch": 215.07976071784645, "grad_norm": 1108.4443359375, "learning_rate": 3.049679748083297e-07, "loss": 24.869, "step": 27100 }, { "epoch": 215.07976071784645, "eval_loss": 2.987786054611206, "eval_mae": 1.2889059782028198, "eval_mse": 2.987786054611206, "eval_r2": 0.09979468584060669, "eval_rmse": 1.7285213491916165, "eval_runtime": 9.0675, "eval_samples_per_second": 442.348, "eval_steps_per_second": 13.896, "step": 27100 }, { "epoch": 215.1595214356929, "grad_norm": 340.2200927734375, "learning_rate": 3.048462557376607e-07, "loss": 22.5566, "step": 27110 }, { "epoch": 215.2392821535394, "grad_norm": 342.66363525390625, "learning_rate": 3.0472452300684846e-07, "loss": 22.7565, "step": 27120 }, { "epoch": 215.31904287138585, "grad_norm": 818.2691040039062, "learning_rate": 3.0460277664621216e-07, "loss": 23.5732, "step": 27130 }, { "epoch": 215.3988035892323, "grad_norm": 315.8173828125, "learning_rate": 3.0448101668607405e-07, "loss": 25.9838, "step": 27140 }, { "epoch": 215.47856430707876, "grad_norm": 444.2835998535156, "learning_rate": 3.043592431567601e-07, "loss": 21.8117, "step": 27150 }, { "epoch": 215.5583250249252, "grad_norm": 523.1034545898438, "learning_rate": 3.042374560885996e-07, "loss": 22.2515, "step": 27160 }, { "epoch": 215.6380857427717, "grad_norm": 622.7999267578125, "learning_rate": 3.0411565551192497e-07, "loss": 23.3136, "step": 27170 }, { "epoch": 215.71784646061815, "grad_norm": 701.6234741210938, "learning_rate": 3.039938414570723e-07, "loss": 25.0017, "step": 27180 }, { "epoch": 215.7976071784646, "grad_norm": 909.46337890625, "learning_rate": 3.0387201395438075e-07, "loss": 23.118, "step": 27190 }, { "epoch": 215.87736789631106, "grad_norm": 378.4754333496094, "learning_rate": 3.037501730341931e-07, "loss": 23.0487, "step": 27200 }, { "epoch": 215.87736789631106, "eval_loss": 2.978470802307129, "eval_mae": 1.2960094213485718, "eval_mse": 2.978471040725708, "eval_r2": 0.10260123014450073, "eval_rmse": 1.7258247421814614, "eval_runtime": 9.0961, "eval_samples_per_second": 440.958, "eval_steps_per_second": 13.852, "step": 27200 }, { "epoch": 215.95712861415754, "grad_norm": 356.2110900878906, "learning_rate": 3.0362831872685534e-07, "loss": 23.9503, "step": 27210 }, { "epoch": 216.0319042871386, "grad_norm": 158.9725799560547, "learning_rate": 3.0350645106271674e-07, "loss": 23.5513, "step": 27220 }, { "epoch": 216.11166500498504, "grad_norm": 451.63873291015625, "learning_rate": 3.0338457007212995e-07, "loss": 24.9653, "step": 27230 }, { "epoch": 216.1914257228315, "grad_norm": 254.16026306152344, "learning_rate": 3.0326267578545105e-07, "loss": 25.1305, "step": 27240 }, { "epoch": 216.27118644067798, "grad_norm": 154.31466674804688, "learning_rate": 3.031407682330393e-07, "loss": 24.1676, "step": 27250 }, { "epoch": 216.35094715852443, "grad_norm": 523.8579711914062, "learning_rate": 3.030188474452572e-07, "loss": 23.739, "step": 27260 }, { "epoch": 216.4307078763709, "grad_norm": 420.2916564941406, "learning_rate": 3.028969134524707e-07, "loss": 22.0993, "step": 27270 }, { "epoch": 216.51046859421734, "grad_norm": 319.1602478027344, "learning_rate": 3.0277496628504905e-07, "loss": 24.9593, "step": 27280 }, { "epoch": 216.5902293120638, "grad_norm": 464.3196105957031, "learning_rate": 3.026530059733646e-07, "loss": 22.566, "step": 27290 }, { "epoch": 216.66999002991028, "grad_norm": 503.96002197265625, "learning_rate": 3.0253103254779313e-07, "loss": 22.2243, "step": 27300 }, { "epoch": 216.66999002991028, "eval_loss": 2.983785390853882, "eval_mae": 1.289696216583252, "eval_mse": 2.983785390853882, "eval_r2": 0.10100007057189941, "eval_rmse": 1.7273637112240958, "eval_runtime": 9.1023, "eval_samples_per_second": 440.66, "eval_steps_per_second": 13.843, "step": 27300 }, { "epoch": 216.74975074775674, "grad_norm": 250.5771484375, "learning_rate": 3.0240904603871364e-07, "loss": 22.3307, "step": 27310 }, { "epoch": 216.8295114656032, "grad_norm": 230.27337646484375, "learning_rate": 3.0228704647650846e-07, "loss": 26.9951, "step": 27320 }, { "epoch": 216.90927218344964, "grad_norm": 529.7537231445312, "learning_rate": 3.0216503389156295e-07, "loss": 21.5616, "step": 27330 }, { "epoch": 216.9890329012961, "grad_norm": 253.22357177734375, "learning_rate": 3.02043008314266e-07, "loss": 22.4429, "step": 27340 }, { "epoch": 217.06380857427718, "grad_norm": 1507.0018310546875, "learning_rate": 3.019209697750095e-07, "loss": 24.3405, "step": 27350 }, { "epoch": 217.14356929212363, "grad_norm": 398.4322814941406, "learning_rate": 3.0179891830418876e-07, "loss": 23.6838, "step": 27360 }, { "epoch": 217.22333000997008, "grad_norm": 602.019775390625, "learning_rate": 3.0167685393220217e-07, "loss": 23.0587, "step": 27370 }, { "epoch": 217.30309072781654, "grad_norm": 297.0119934082031, "learning_rate": 3.015547766894513e-07, "loss": 23.8301, "step": 27380 }, { "epoch": 217.38285144566302, "grad_norm": 910.6251831054688, "learning_rate": 3.0143268660634125e-07, "loss": 23.7992, "step": 27390 }, { "epoch": 217.46261216350948, "grad_norm": 256.3167419433594, "learning_rate": 3.013105837132798e-07, "loss": 24.3788, "step": 27400 }, { "epoch": 217.46261216350948, "eval_loss": 2.9812726974487305, "eval_mae": 1.3002257347106934, "eval_mse": 2.9812726974487305, "eval_r2": 0.10175710916519165, "eval_rmse": 1.7266362377318305, "eval_runtime": 9.1172, "eval_samples_per_second": 439.936, "eval_steps_per_second": 13.82, "step": 27400 }, { "epoch": 217.54237288135593, "grad_norm": 384.02593994140625, "learning_rate": 3.0118846804067845e-07, "loss": 23.1673, "step": 27410 }, { "epoch": 217.62213359920239, "grad_norm": 627.162109375, "learning_rate": 3.010663396189513e-07, "loss": 22.5284, "step": 27420 }, { "epoch": 217.70189431704884, "grad_norm": 474.76556396484375, "learning_rate": 3.009441984785163e-07, "loss": 23.4623, "step": 27430 }, { "epoch": 217.78165503489532, "grad_norm": 1055.54296875, "learning_rate": 3.0082204464979404e-07, "loss": 24.2294, "step": 27440 }, { "epoch": 217.86141575274178, "grad_norm": 845.8869018554688, "learning_rate": 3.0069987816320845e-07, "loss": 23.0862, "step": 27450 }, { "epoch": 217.94117647058823, "grad_norm": 661.3668823242188, "learning_rate": 3.0057769904918675e-07, "loss": 21.4018, "step": 27460 }, { "epoch": 218.01595214356928, "grad_norm": 251.98768615722656, "learning_rate": 3.0045550733815894e-07, "loss": 23.6382, "step": 27470 }, { "epoch": 218.09571286141576, "grad_norm": 373.1899719238281, "learning_rate": 3.003333030605587e-07, "loss": 23.496, "step": 27480 }, { "epoch": 218.17547357926222, "grad_norm": 744.9896240234375, "learning_rate": 3.0021108624682215e-07, "loss": 24.0153, "step": 27490 }, { "epoch": 218.25523429710867, "grad_norm": 481.24951171875, "learning_rate": 3.000888569273893e-07, "loss": 24.6332, "step": 27500 }, { "epoch": 218.25523429710867, "eval_loss": 2.977515459060669, "eval_mae": 1.291520118713379, "eval_mse": 2.977515459060669, "eval_r2": 0.10288918018341064, "eval_rmse": 1.725547872143995, "eval_runtime": 9.1128, "eval_samples_per_second": 440.149, "eval_steps_per_second": 13.827, "step": 27500 }, { "epoch": 218.33499501495513, "grad_norm": 484.2327575683594, "learning_rate": 2.999666151327025e-07, "loss": 21.8921, "step": 27510 }, { "epoch": 218.4147557328016, "grad_norm": 795.8241577148438, "learning_rate": 2.998443608932079e-07, "loss": 24.2613, "step": 27520 }, { "epoch": 218.49451645064806, "grad_norm": 686.586181640625, "learning_rate": 2.997220942393543e-07, "loss": 22.1268, "step": 27530 }, { "epoch": 218.57427716849452, "grad_norm": 200.74305725097656, "learning_rate": 2.995998152015937e-07, "loss": 24.4024, "step": 27540 }, { "epoch": 218.65403788634097, "grad_norm": 304.2043762207031, "learning_rate": 2.994775238103814e-07, "loss": 26.3741, "step": 27550 }, { "epoch": 218.73379860418743, "grad_norm": 668.7415771484375, "learning_rate": 2.9935522009617543e-07, "loss": 24.6852, "step": 27560 }, { "epoch": 218.8135593220339, "grad_norm": 1236.5673828125, "learning_rate": 2.9923290408943706e-07, "loss": 23.0106, "step": 27570 }, { "epoch": 218.89332003988036, "grad_norm": 484.6416320800781, "learning_rate": 2.9911057582063067e-07, "loss": 23.2765, "step": 27580 }, { "epoch": 218.97308075772682, "grad_norm": 399.0691223144531, "learning_rate": 2.9898823532022357e-07, "loss": 20.9575, "step": 27590 }, { "epoch": 219.04785643070787, "grad_norm": 333.0251770019531, "learning_rate": 2.9886588261868633e-07, "loss": 23.3247, "step": 27600 }, { "epoch": 219.04785643070787, "eval_loss": 2.975332021713257, "eval_mae": 1.2925277948379517, "eval_mse": 2.975332260131836, "eval_r2": 0.10354691743850708, "eval_rmse": 1.7249151457772745, "eval_runtime": 9.0693, "eval_samples_per_second": 442.261, "eval_steps_per_second": 13.893, "step": 27600 }, { "epoch": 219.12761714855435, "grad_norm": 216.87686157226562, "learning_rate": 2.987435177464922e-07, "loss": 22.6154, "step": 27610 }, { "epoch": 219.2073778664008, "grad_norm": 183.9648895263672, "learning_rate": 2.9862114073411786e-07, "loss": 26.0827, "step": 27620 }, { "epoch": 219.28713858424726, "grad_norm": 300.2879943847656, "learning_rate": 2.9849875161204277e-07, "loss": 21.6577, "step": 27630 }, { "epoch": 219.3668993020937, "grad_norm": 945.4083862304688, "learning_rate": 2.983763504107495e-07, "loss": 23.166, "step": 27640 }, { "epoch": 219.44666001994017, "grad_norm": 1042.5501708984375, "learning_rate": 2.9825393716072344e-07, "loss": 25.3464, "step": 27650 }, { "epoch": 219.52642073778665, "grad_norm": 315.9792785644531, "learning_rate": 2.981315118924533e-07, "loss": 21.6401, "step": 27660 }, { "epoch": 219.6061814556331, "grad_norm": 849.44921875, "learning_rate": 2.9800907463643063e-07, "loss": 25.0194, "step": 27670 }, { "epoch": 219.68594217347956, "grad_norm": 761.8780517578125, "learning_rate": 2.978866254231498e-07, "loss": 22.5823, "step": 27680 }, { "epoch": 219.76570289132601, "grad_norm": 815.2864379882812, "learning_rate": 2.977641642831085e-07, "loss": 22.0128, "step": 27690 }, { "epoch": 219.84546360917247, "grad_norm": 210.50242614746094, "learning_rate": 2.97641691246807e-07, "loss": 24.7933, "step": 27700 }, { "epoch": 219.84546360917247, "eval_loss": 2.9780495166778564, "eval_mae": 1.2930556535720825, "eval_mse": 2.9780492782592773, "eval_r2": 0.10272830724716187, "eval_rmse": 1.7257025462863747, "eval_runtime": 9.0794, "eval_samples_per_second": 441.771, "eval_steps_per_second": 13.878, "step": 27700 }, { "epoch": 219.92522432701895, "grad_norm": 755.2847290039062, "learning_rate": 2.975192063447489e-07, "loss": 23.8973, "step": 27710 }, { "epoch": 220.0, "grad_norm": 189.5387725830078, "learning_rate": 2.973967096074405e-07, "loss": 22.0406, "step": 27720 }, { "epoch": 220.07976071784645, "grad_norm": 973.175537109375, "learning_rate": 2.972742010653912e-07, "loss": 24.3044, "step": 27730 }, { "epoch": 220.1595214356929, "grad_norm": 270.5625305175781, "learning_rate": 2.971516807491132e-07, "loss": 24.0269, "step": 27740 }, { "epoch": 220.2392821535394, "grad_norm": 458.6932067871094, "learning_rate": 2.970291486891217e-07, "loss": 21.8895, "step": 27750 }, { "epoch": 220.31904287138585, "grad_norm": 547.2145385742188, "learning_rate": 2.9690660491593505e-07, "loss": 23.4881, "step": 27760 }, { "epoch": 220.3988035892323, "grad_norm": 706.6233520507812, "learning_rate": 2.9678404946007406e-07, "loss": 23.3825, "step": 27770 }, { "epoch": 220.47856430707876, "grad_norm": 335.0290832519531, "learning_rate": 2.9666148235206277e-07, "loss": 23.2111, "step": 27780 }, { "epoch": 220.5583250249252, "grad_norm": 266.097900390625, "learning_rate": 2.9653890362242805e-07, "loss": 23.4541, "step": 27790 }, { "epoch": 220.6380857427717, "grad_norm": 1648.7161865234375, "learning_rate": 2.964163133016998e-07, "loss": 24.1183, "step": 27800 }, { "epoch": 220.6380857427717, "eval_loss": 2.9777796268463135, "eval_mae": 1.3002476692199707, "eval_mse": 2.9777796268463135, "eval_r2": 0.10280954837799072, "eval_rmse": 1.7256244165073447, "eval_runtime": 9.1493, "eval_samples_per_second": 438.395, "eval_steps_per_second": 13.772, "step": 27800 }, { "epoch": 220.71784646061815, "grad_norm": 935.0693359375, "learning_rate": 2.962937114204104e-07, "loss": 23.7185, "step": 27810 }, { "epoch": 220.7976071784646, "grad_norm": 214.67987060546875, "learning_rate": 2.9617109800909544e-07, "loss": 25.207, "step": 27820 }, { "epoch": 220.87736789631106, "grad_norm": 528.9149780273438, "learning_rate": 2.960484730982935e-07, "loss": 23.291, "step": 27830 }, { "epoch": 220.95712861415754, "grad_norm": 151.2432861328125, "learning_rate": 2.959258367185457e-07, "loss": 23.163, "step": 27840 }, { "epoch": 221.0319042871386, "grad_norm": 839.2595825195312, "learning_rate": 2.9580318890039625e-07, "loss": 22.3116, "step": 27850 }, { "epoch": 221.11166500498504, "grad_norm": 226.69766235351562, "learning_rate": 2.956805296743919e-07, "loss": 24.5424, "step": 27860 }, { "epoch": 221.1914257228315, "grad_norm": 639.3292846679688, "learning_rate": 2.9555785907108274e-07, "loss": 23.6998, "step": 27870 }, { "epoch": 221.27118644067798, "grad_norm": 384.93487548828125, "learning_rate": 2.9543517712102124e-07, "loss": 24.3403, "step": 27880 }, { "epoch": 221.35094715852443, "grad_norm": 389.61181640625, "learning_rate": 2.953247536897466e-07, "loss": 24.1667, "step": 27890 }, { "epoch": 221.4307078763709, "grad_norm": 174.5286102294922, "learning_rate": 2.9520205026503843e-07, "loss": 22.76, "step": 27900 }, { "epoch": 221.4307078763709, "eval_loss": 2.9929704666137695, "eval_mae": 1.289552092552185, "eval_mse": 2.9929704666137695, "eval_r2": 0.09823262691497803, "eval_rmse": 1.7300203659534674, "eval_runtime": 9.1064, "eval_samples_per_second": 440.457, "eval_steps_per_second": 13.836, "step": 27900 }, { "epoch": 221.51046859421734, "grad_norm": 390.555908203125, "learning_rate": 2.950793355821965e-07, "loss": 21.9596, "step": 27910 }, { "epoch": 221.5902293120638, "grad_norm": 646.3958740234375, "learning_rate": 2.9495660967178455e-07, "loss": 24.6812, "step": 27920 }, { "epoch": 221.66999002991028, "grad_norm": 215.1212158203125, "learning_rate": 2.9483387256436886e-07, "loss": 24.6319, "step": 27930 }, { "epoch": 221.74975074775674, "grad_norm": 329.55859375, "learning_rate": 2.947111242905188e-07, "loss": 23.5231, "step": 27940 }, { "epoch": 221.8295114656032, "grad_norm": 870.319580078125, "learning_rate": 2.945883648808063e-07, "loss": 22.3164, "step": 27950 }, { "epoch": 221.90927218344964, "grad_norm": 375.7515869140625, "learning_rate": 2.9446559436580597e-07, "loss": 22.9546, "step": 27960 }, { "epoch": 221.9890329012961, "grad_norm": 353.8161926269531, "learning_rate": 2.9434281277609564e-07, "loss": 23.2956, "step": 27970 }, { "epoch": 222.06380857427718, "grad_norm": 753.7135009765625, "learning_rate": 2.9422002014225534e-07, "loss": 21.7364, "step": 27980 }, { "epoch": 222.14356929212363, "grad_norm": 159.11260986328125, "learning_rate": 2.9409721649486823e-07, "loss": 23.5395, "step": 27990 }, { "epoch": 222.22333000997008, "grad_norm": 284.4923095703125, "learning_rate": 2.9397440186452e-07, "loss": 22.6555, "step": 28000 }, { "epoch": 222.22333000997008, "eval_loss": 2.978847026824951, "eval_mae": 1.289054274559021, "eval_mse": 2.978847026824951, "eval_r2": 0.10248798131942749, "eval_rmse": 1.7259336681416673, "eval_runtime": 9.1089, "eval_samples_per_second": 440.34, "eval_steps_per_second": 13.833, "step": 28000 }, { "epoch": 222.30309072781654, "grad_norm": 795.4385375976562, "learning_rate": 2.938515762817992e-07, "loss": 24.3238, "step": 28010 }, { "epoch": 222.38285144566302, "grad_norm": 1256.491455078125, "learning_rate": 2.9372873977729715e-07, "loss": 23.0592, "step": 28020 }, { "epoch": 222.46261216350948, "grad_norm": 382.3968811035156, "learning_rate": 2.936058923816077e-07, "loss": 20.7059, "step": 28030 }, { "epoch": 222.54237288135593, "grad_norm": 209.00186157226562, "learning_rate": 2.934830341253276e-07, "loss": 24.3814, "step": 28040 }, { "epoch": 222.62213359920239, "grad_norm": 247.73770141601562, "learning_rate": 2.9336016503905615e-07, "loss": 23.7226, "step": 28050 }, { "epoch": 222.70189431704884, "grad_norm": 259.9332580566406, "learning_rate": 2.9323728515339554e-07, "loss": 24.3797, "step": 28060 }, { "epoch": 222.78165503489532, "grad_norm": 490.0107116699219, "learning_rate": 2.931143944989505e-07, "loss": 23.7283, "step": 28070 }, { "epoch": 222.86141575274178, "grad_norm": 457.6901550292969, "learning_rate": 2.929914931063283e-07, "loss": 25.6897, "step": 28080 }, { "epoch": 222.94117647058823, "grad_norm": 647.62939453125, "learning_rate": 2.9286858100613934e-07, "loss": 22.9767, "step": 28090 }, { "epoch": 223.01595214356928, "grad_norm": 668.23193359375, "learning_rate": 2.927456582289962e-07, "loss": 22.2384, "step": 28100 }, { "epoch": 223.01595214356928, "eval_loss": 2.9920876026153564, "eval_mae": 1.3085302114486694, "eval_mse": 2.9920876026153564, "eval_r2": 0.09849858283996582, "eval_rmse": 1.729765187132449, "eval_runtime": 9.1242, "eval_samples_per_second": 439.601, "eval_steps_per_second": 13.809, "step": 28100 }, { "epoch": 223.09571286141576, "grad_norm": 302.5481262207031, "learning_rate": 2.9262272480551453e-07, "loss": 22.2556, "step": 28110 }, { "epoch": 223.17547357926222, "grad_norm": 153.3507843017578, "learning_rate": 2.9249978076631214e-07, "loss": 23.7961, "step": 28120 }, { "epoch": 223.25523429710867, "grad_norm": 226.6240997314453, "learning_rate": 2.923768261420101e-07, "loss": 22.6181, "step": 28130 }, { "epoch": 223.33499501495513, "grad_norm": 372.73846435546875, "learning_rate": 2.922538609632316e-07, "loss": 23.4664, "step": 28140 }, { "epoch": 223.4147557328016, "grad_norm": 544.7764282226562, "learning_rate": 2.921308852606026e-07, "loss": 23.3307, "step": 28150 }, { "epoch": 223.49451645064806, "grad_norm": 172.4590606689453, "learning_rate": 2.920078990647519e-07, "loss": 23.3574, "step": 28160 }, { "epoch": 223.57427716849452, "grad_norm": 1462.9517822265625, "learning_rate": 2.9188490240631057e-07, "loss": 24.8231, "step": 28170 }, { "epoch": 223.65403788634097, "grad_norm": 255.56784057617188, "learning_rate": 2.9176189531591264e-07, "loss": 24.927, "step": 28180 }, { "epoch": 223.73379860418743, "grad_norm": 543.3631591796875, "learning_rate": 2.9163887782419444e-07, "loss": 24.3094, "step": 28190 }, { "epoch": 223.8135593220339, "grad_norm": 197.2646484375, "learning_rate": 2.915158499617951e-07, "loss": 22.7073, "step": 28200 }, { "epoch": 223.8135593220339, "eval_loss": 2.9783763885498047, "eval_mae": 1.2938679456710815, "eval_mse": 2.9783763885498047, "eval_r2": 0.10262978076934814, "eval_rmse": 1.7257973196612066, "eval_runtime": 9.1137, "eval_samples_per_second": 440.105, "eval_steps_per_second": 13.825, "step": 28200 }, { "epoch": 223.89332003988036, "grad_norm": 530.704345703125, "learning_rate": 2.9139281175935603e-07, "loss": 22.9435, "step": 28210 }, { "epoch": 223.97308075772682, "grad_norm": 1255.1724853515625, "learning_rate": 2.9126976324752174e-07, "loss": 23.3073, "step": 28220 }, { "epoch": 224.04785643070787, "grad_norm": 917.60205078125, "learning_rate": 2.911467044569387e-07, "loss": 23.6917, "step": 28230 }, { "epoch": 224.12761714855435, "grad_norm": 565.042724609375, "learning_rate": 2.910236354182564e-07, "loss": 22.5874, "step": 28240 }, { "epoch": 224.2073778664008, "grad_norm": 642.9319458007812, "learning_rate": 2.909005561621267e-07, "loss": 24.358, "step": 28250 }, { "epoch": 224.28713858424726, "grad_norm": 1091.9749755859375, "learning_rate": 2.9077746671920406e-07, "loss": 23.8511, "step": 28260 }, { "epoch": 224.3668993020937, "grad_norm": 1002.00537109375, "learning_rate": 2.9065436712014536e-07, "loss": 23.917, "step": 28270 }, { "epoch": 224.44666001994017, "grad_norm": 291.5292053222656, "learning_rate": 2.9053125739561e-07, "loss": 25.6457, "step": 28280 }, { "epoch": 224.52642073778665, "grad_norm": 488.26898193359375, "learning_rate": 2.9040813757626027e-07, "loss": 22.2785, "step": 28290 }, { "epoch": 224.6061814556331, "grad_norm": 1863.35546875, "learning_rate": 2.9028500769276044e-07, "loss": 22.4234, "step": 28300 }, { "epoch": 224.6061814556331, "eval_loss": 2.9912896156311035, "eval_mae": 1.3089357614517212, "eval_mse": 2.9912896156311035, "eval_r2": 0.09873908758163452, "eval_rmse": 1.7295345083666598, "eval_runtime": 9.1717, "eval_samples_per_second": 437.324, "eval_steps_per_second": 13.738, "step": 28300 }, { "epoch": 224.68594217347956, "grad_norm": 455.0877380371094, "learning_rate": 2.9016186777577757e-07, "loss": 24.4669, "step": 28310 }, { "epoch": 224.76570289132601, "grad_norm": 307.5437316894531, "learning_rate": 2.9003871785598135e-07, "loss": 23.6653, "step": 28320 }, { "epoch": 224.84546360917247, "grad_norm": 477.96917724609375, "learning_rate": 2.899155579640436e-07, "loss": 22.3114, "step": 28330 }, { "epoch": 224.92522432701895, "grad_norm": 408.08612060546875, "learning_rate": 2.8979238813063904e-07, "loss": 22.7173, "step": 28340 }, { "epoch": 225.0, "grad_norm": 688.6211547851562, "learning_rate": 2.8966920838644445e-07, "loss": 22.0629, "step": 28350 }, { "epoch": 225.07976071784645, "grad_norm": 215.12020874023438, "learning_rate": 2.895460187621394e-07, "loss": 23.4137, "step": 28360 }, { "epoch": 225.1595214356929, "grad_norm": 230.41818237304688, "learning_rate": 2.8942281928840573e-07, "loss": 23.5063, "step": 28370 }, { "epoch": 225.2392821535394, "grad_norm": 909.58740234375, "learning_rate": 2.8929960999592783e-07, "loss": 23.423, "step": 28380 }, { "epoch": 225.31904287138585, "grad_norm": 976.6878051757812, "learning_rate": 2.8917639091539256e-07, "loss": 23.2553, "step": 28390 }, { "epoch": 225.3988035892323, "grad_norm": 406.7839660644531, "learning_rate": 2.8905316207748916e-07, "loss": 23.7409, "step": 28400 }, { "epoch": 225.3988035892323, "eval_loss": 2.9810709953308105, "eval_mae": 1.2892674207687378, "eval_mse": 2.9810709953308105, "eval_r2": 0.10181784629821777, "eval_rmse": 1.7265778277653199, "eval_runtime": 9.0967, "eval_samples_per_second": 440.931, "eval_steps_per_second": 13.851, "step": 28400 }, { "epoch": 225.47856430707876, "grad_norm": 291.175048828125, "learning_rate": 2.889299235129092e-07, "loss": 25.8447, "step": 28410 }, { "epoch": 225.5583250249252, "grad_norm": 547.2565307617188, "learning_rate": 2.888066752523469e-07, "loss": 25.7814, "step": 28420 }, { "epoch": 225.6380857427717, "grad_norm": 370.3689880371094, "learning_rate": 2.8868341732649877e-07, "loss": 23.7016, "step": 28430 }, { "epoch": 225.71784646061815, "grad_norm": 180.25758361816406, "learning_rate": 2.885601497660636e-07, "loss": 21.226, "step": 28440 }, { "epoch": 225.7976071784646, "grad_norm": 1098.9267578125, "learning_rate": 2.884368726017428e-07, "loss": 23.4176, "step": 28450 }, { "epoch": 225.87736789631106, "grad_norm": 535.2625122070312, "learning_rate": 2.8831358586424016e-07, "loss": 22.0611, "step": 28460 }, { "epoch": 225.95712861415754, "grad_norm": 966.4929809570312, "learning_rate": 2.881902895842616e-07, "loss": 23.7503, "step": 28470 }, { "epoch": 226.0319042871386, "grad_norm": 1121.6949462890625, "learning_rate": 2.880669837925158e-07, "loss": 21.1102, "step": 28480 }, { "epoch": 226.11166500498504, "grad_norm": 576.8964233398438, "learning_rate": 2.879436685197134e-07, "loss": 23.5315, "step": 28490 }, { "epoch": 226.1914257228315, "grad_norm": 332.62518310546875, "learning_rate": 2.8782034379656786e-07, "loss": 22.4616, "step": 28500 }, { "epoch": 226.1914257228315, "eval_loss": 2.976295232772827, "eval_mae": 1.2895312309265137, "eval_mse": 2.976295232772827, "eval_r2": 0.10325682163238525, "eval_rmse": 1.7251942594307539, "eval_runtime": 9.0972, "eval_samples_per_second": 440.907, "eval_steps_per_second": 13.85, "step": 28500 }, { "epoch": 226.27118644067798, "grad_norm": 968.8788452148438, "learning_rate": 2.8769700965379447e-07, "loss": 23.2409, "step": 28510 }, { "epoch": 226.35094715852443, "grad_norm": 423.68475341796875, "learning_rate": 2.875736661221112e-07, "loss": 24.2685, "step": 28520 }, { "epoch": 226.4307078763709, "grad_norm": 540.13037109375, "learning_rate": 2.874503132322384e-07, "loss": 25.0156, "step": 28530 }, { "epoch": 226.51046859421734, "grad_norm": 504.5898132324219, "learning_rate": 2.8732695101489864e-07, "loss": 26.6463, "step": 28540 }, { "epoch": 226.5902293120638, "grad_norm": 1567.54638671875, "learning_rate": 2.872035795008167e-07, "loss": 22.8101, "step": 28550 }, { "epoch": 226.66999002991028, "grad_norm": 460.20635986328125, "learning_rate": 2.870801987207198e-07, "loss": 23.7526, "step": 28560 }, { "epoch": 226.74975074775674, "grad_norm": 661.027587890625, "learning_rate": 2.869568087053376e-07, "loss": 22.6854, "step": 28570 }, { "epoch": 226.8295114656032, "grad_norm": 478.668212890625, "learning_rate": 2.868334094854018e-07, "loss": 23.7253, "step": 28580 }, { "epoch": 226.90927218344964, "grad_norm": 252.7202606201172, "learning_rate": 2.867100010916466e-07, "loss": 22.2806, "step": 28590 }, { "epoch": 226.9890329012961, "grad_norm": 1410.196533203125, "learning_rate": 2.8658658355480824e-07, "loss": 21.6722, "step": 28600 }, { "epoch": 226.9890329012961, "eval_loss": 2.989102840423584, "eval_mae": 1.3085246086120605, "eval_mse": 2.989103078842163, "eval_r2": 0.09939789772033691, "eval_rmse": 1.7289022756773047, "eval_runtime": 9.0938, "eval_samples_per_second": 441.067, "eval_steps_per_second": 13.856, "step": 28600 }, { "epoch": 227.06380857427718, "grad_norm": 461.0039367675781, "learning_rate": 2.864631569056255e-07, "loss": 23.4921, "step": 28610 }, { "epoch": 227.14356929212363, "grad_norm": 921.9505004882812, "learning_rate": 2.863397211748395e-07, "loss": 24.8094, "step": 28620 }, { "epoch": 227.22333000997008, "grad_norm": 426.26715087890625, "learning_rate": 2.8621627639319314e-07, "loss": 21.713, "step": 28630 }, { "epoch": 227.30309072781654, "grad_norm": 1372.0335693359375, "learning_rate": 2.86092822591432e-07, "loss": 24.5109, "step": 28640 }, { "epoch": 227.38285144566302, "grad_norm": 1721.040283203125, "learning_rate": 2.859693598003038e-07, "loss": 25.2417, "step": 28650 }, { "epoch": 227.46261216350948, "grad_norm": 663.2008056640625, "learning_rate": 2.8584588805055854e-07, "loss": 22.8608, "step": 28660 }, { "epoch": 227.54237288135593, "grad_norm": 262.5372009277344, "learning_rate": 2.857224073729483e-07, "loss": 23.815, "step": 28670 }, { "epoch": 227.62213359920239, "grad_norm": 593.6583862304688, "learning_rate": 2.855989177982275e-07, "loss": 23.1764, "step": 28680 }, { "epoch": 227.70189431704884, "grad_norm": 270.80657958984375, "learning_rate": 2.854754193571528e-07, "loss": 23.716, "step": 28690 }, { "epoch": 227.78165503489532, "grad_norm": 495.4303283691406, "learning_rate": 2.8535191208048306e-07, "loss": 20.9373, "step": 28700 }, { "epoch": 227.78165503489532, "eval_loss": 2.97832989692688, "eval_mae": 1.2897449731826782, "eval_mse": 2.978330135345459, "eval_r2": 0.10264372825622559, "eval_rmse": 1.7257839190771997, "eval_runtime": 9.105, "eval_samples_per_second": 440.528, "eval_steps_per_second": 13.839, "step": 28700 }, { "epoch": 227.86141575274178, "grad_norm": 1146.97900390625, "learning_rate": 2.852283959989792e-07, "loss": 23.5481, "step": 28710 }, { "epoch": 227.94117647058823, "grad_norm": 376.2318115234375, "learning_rate": 2.8510487114340444e-07, "loss": 23.3373, "step": 28720 }, { "epoch": 228.01595214356928, "grad_norm": 278.4344177246094, "learning_rate": 2.849813375445243e-07, "loss": 22.1801, "step": 28730 }, { "epoch": 228.09571286141576, "grad_norm": 360.8177490234375, "learning_rate": 2.848577952331063e-07, "loss": 23.7975, "step": 28740 }, { "epoch": 228.17547357926222, "grad_norm": 780.6703491210938, "learning_rate": 2.847342442399201e-07, "loss": 22.6753, "step": 28750 }, { "epoch": 228.25523429710867, "grad_norm": 1090.07470703125, "learning_rate": 2.846106845957378e-07, "loss": 22.1895, "step": 28760 }, { "epoch": 228.33499501495513, "grad_norm": 433.43121337890625, "learning_rate": 2.844871163313333e-07, "loss": 23.5747, "step": 28770 }, { "epoch": 228.4147557328016, "grad_norm": 538.4068603515625, "learning_rate": 2.843635394774829e-07, "loss": 24.5557, "step": 28780 }, { "epoch": 228.49451645064806, "grad_norm": 1891.75439453125, "learning_rate": 2.84239954064965e-07, "loss": 24.9661, "step": 28790 }, { "epoch": 228.57427716849452, "grad_norm": 181.74830627441406, "learning_rate": 2.8411636012455996e-07, "loss": 24.4292, "step": 28800 }, { "epoch": 228.57427716849452, "eval_loss": 2.9761176109313965, "eval_mae": 1.2910175323486328, "eval_mse": 2.9761176109313965, "eval_r2": 0.10331028699874878, "eval_rmse": 1.7251427798682046, "eval_runtime": 9.0926, "eval_samples_per_second": 441.129, "eval_steps_per_second": 13.857, "step": 28800 }, { "epoch": 228.65403788634097, "grad_norm": 230.27720642089844, "learning_rate": 2.8399275768705043e-07, "loss": 23.979, "step": 28810 }, { "epoch": 228.73379860418743, "grad_norm": 303.1219482421875, "learning_rate": 2.8386914678322123e-07, "loss": 20.456, "step": 28820 }, { "epoch": 228.8135593220339, "grad_norm": 983.5076293945312, "learning_rate": 2.837455274438591e-07, "loss": 22.9024, "step": 28830 }, { "epoch": 228.89332003988036, "grad_norm": 265.1124267578125, "learning_rate": 2.83621899699753e-07, "loss": 23.7586, "step": 28840 }, { "epoch": 228.97308075772682, "grad_norm": 951.542236328125, "learning_rate": 2.834982635816939e-07, "loss": 24.138, "step": 28850 }, { "epoch": 229.04785643070787, "grad_norm": 1288.7901611328125, "learning_rate": 2.833746191204751e-07, "loss": 22.0687, "step": 28860 }, { "epoch": 229.12761714855435, "grad_norm": 963.5640869140625, "learning_rate": 2.8325096634689166e-07, "loss": 22.8958, "step": 28870 }, { "epoch": 229.2073778664008, "grad_norm": 606.7884521484375, "learning_rate": 2.831273052917408e-07, "loss": 24.818, "step": 28880 }, { "epoch": 229.28713858424726, "grad_norm": 287.0824279785156, "learning_rate": 2.8300363598582187e-07, "loss": 24.0999, "step": 28890 }, { "epoch": 229.3668993020937, "grad_norm": 1716.82421875, "learning_rate": 2.828799584599364e-07, "loss": 25.2726, "step": 28900 }, { "epoch": 229.3668993020937, "eval_loss": 2.980441093444824, "eval_mae": 1.302371621131897, "eval_mse": 2.980441093444824, "eval_r2": 0.10200768709182739, "eval_rmse": 1.7263954047218801, "eval_runtime": 9.1039, "eval_samples_per_second": 440.579, "eval_steps_per_second": 13.84, "step": 28900 }, { "epoch": 229.44666001994017, "grad_norm": 333.4840393066406, "learning_rate": 2.8275627274488755e-07, "loss": 23.8886, "step": 28910 }, { "epoch": 229.52642073778665, "grad_norm": 1196.2061767578125, "learning_rate": 2.826325788714811e-07, "loss": 22.0717, "step": 28920 }, { "epoch": 229.6061814556331, "grad_norm": 377.2966003417969, "learning_rate": 2.8250887687052425e-07, "loss": 24.145, "step": 28930 }, { "epoch": 229.68594217347956, "grad_norm": 529.4716186523438, "learning_rate": 2.823851667728268e-07, "loss": 22.3141, "step": 28940 }, { "epoch": 229.76570289132601, "grad_norm": 199.5446319580078, "learning_rate": 2.822614486092e-07, "loss": 22.8616, "step": 28950 }, { "epoch": 229.84546360917247, "grad_norm": 259.5824279785156, "learning_rate": 2.8213772241045767e-07, "loss": 23.8629, "step": 28960 }, { "epoch": 229.92522432701895, "grad_norm": 295.1745910644531, "learning_rate": 2.8201398820741516e-07, "loss": 23.6198, "step": 28970 }, { "epoch": 230.0, "grad_norm": 241.26560974121094, "learning_rate": 2.8189024603089016e-07, "loss": 20.7605, "step": 28980 }, { "epoch": 230.07976071784645, "grad_norm": 640.4485473632812, "learning_rate": 2.8176649591170217e-07, "loss": 20.1089, "step": 28990 }, { "epoch": 230.1595214356929, "grad_norm": 298.4513854980469, "learning_rate": 2.816427378806726e-07, "loss": 24.6293, "step": 29000 }, { "epoch": 230.1595214356929, "eval_loss": 2.9736437797546387, "eval_mae": 1.2933244705200195, "eval_mse": 2.9736437797546387, "eval_r2": 0.10405564308166504, "eval_rmse": 1.72442563764131, "eval_runtime": 9.0946, "eval_samples_per_second": 441.03, "eval_steps_per_second": 13.854, "step": 29000 }, { "epoch": 230.2392821535394, "grad_norm": 215.43040466308594, "learning_rate": 2.815189719686251e-07, "loss": 21.8872, "step": 29010 }, { "epoch": 230.31904287138585, "grad_norm": 339.0171203613281, "learning_rate": 2.813951982063849e-07, "loss": 22.4703, "step": 29020 }, { "epoch": 230.3988035892323, "grad_norm": 963.80029296875, "learning_rate": 2.8127141662477953e-07, "loss": 24.7596, "step": 29030 }, { "epoch": 230.47856430707876, "grad_norm": 246.45277404785156, "learning_rate": 2.811476272546383e-07, "loss": 22.9733, "step": 29040 }, { "epoch": 230.5583250249252, "grad_norm": 978.0501098632812, "learning_rate": 2.8102383012679255e-07, "loss": 23.1466, "step": 29050 }, { "epoch": 230.6380857427717, "grad_norm": 342.236328125, "learning_rate": 2.8090002527207547e-07, "loss": 23.5417, "step": 29060 }, { "epoch": 230.71784646061815, "grad_norm": 542.9784545898438, "learning_rate": 2.807762127213221e-07, "loss": 24.5957, "step": 29070 }, { "epoch": 230.7976071784646, "grad_norm": 673.6538696289062, "learning_rate": 2.8065239250536967e-07, "loss": 23.7822, "step": 29080 }, { "epoch": 230.87736789631106, "grad_norm": 489.96136474609375, "learning_rate": 2.80528564655057e-07, "loss": 25.4736, "step": 29090 }, { "epoch": 230.95712861415754, "grad_norm": 297.83221435546875, "learning_rate": 2.804047292012251e-07, "loss": 23.9836, "step": 29100 }, { "epoch": 230.95712861415754, "eval_loss": 2.975355863571167, "eval_mae": 1.2902168035507202, "eval_mse": 2.975355863571167, "eval_r2": 0.10353982448577881, "eval_rmse": 1.7249219876768824, "eval_runtime": 9.1099, "eval_samples_per_second": 440.291, "eval_steps_per_second": 13.831, "step": 29100 }, { "epoch": 231.0319042871386, "grad_norm": 830.2015380859375, "learning_rate": 2.802808861747166e-07, "loss": 23.1082, "step": 29110 }, { "epoch": 231.11166500498504, "grad_norm": 594.7340698242188, "learning_rate": 2.801570356063761e-07, "loss": 23.6181, "step": 29120 }, { "epoch": 231.1914257228315, "grad_norm": 357.4686279296875, "learning_rate": 2.8003317752705035e-07, "loss": 24.4905, "step": 29130 }, { "epoch": 231.27118644067798, "grad_norm": 425.6934509277344, "learning_rate": 2.799093119675876e-07, "loss": 22.8521, "step": 29140 }, { "epoch": 231.35094715852443, "grad_norm": 204.07247924804688, "learning_rate": 2.7978543895883813e-07, "loss": 23.1028, "step": 29150 }, { "epoch": 231.4307078763709, "grad_norm": 956.6591186523438, "learning_rate": 2.796615585316541e-07, "loss": 22.0427, "step": 29160 }, { "epoch": 231.51046859421734, "grad_norm": 947.2444458007812, "learning_rate": 2.795376707168894e-07, "loss": 23.0949, "step": 29170 }, { "epoch": 231.5902293120638, "grad_norm": 689.728271484375, "learning_rate": 2.794137755453999e-07, "loss": 25.3765, "step": 29180 }, { "epoch": 231.66999002991028, "grad_norm": 980.9293212890625, "learning_rate": 2.7928987304804315e-07, "loss": 24.1371, "step": 29190 }, { "epoch": 231.74975074775674, "grad_norm": 501.6930236816406, "learning_rate": 2.7916596325567864e-07, "loss": 23.0343, "step": 29200 }, { "epoch": 231.74975074775674, "eval_loss": 2.989731788635254, "eval_mae": 1.2870382070541382, "eval_mse": 2.989731788635254, "eval_r2": 0.09920841455459595, "eval_rmse": 1.72908408952117, "eval_runtime": 9.0984, "eval_samples_per_second": 440.845, "eval_steps_per_second": 13.849, "step": 29200 }, { "epoch": 231.8295114656032, "grad_norm": 277.04345703125, "learning_rate": 2.7904204619916773e-07, "loss": 23.9762, "step": 29210 }, { "epoch": 231.90927218344964, "grad_norm": 975.6008911132812, "learning_rate": 2.7891812190937343e-07, "loss": 23.6602, "step": 29220 }, { "epoch": 231.9890329012961, "grad_norm": 1536.518798828125, "learning_rate": 2.7879419041716063e-07, "loss": 21.931, "step": 29230 }, { "epoch": 232.06380857427718, "grad_norm": 378.703125, "learning_rate": 2.7867025175339605e-07, "loss": 21.9565, "step": 29240 }, { "epoch": 232.14356929212363, "grad_norm": 755.17138671875, "learning_rate": 2.785463059489481e-07, "loss": 23.7374, "step": 29250 }, { "epoch": 232.22333000997008, "grad_norm": 478.4455261230469, "learning_rate": 2.7842235303468715e-07, "loss": 23.8411, "step": 29260 }, { "epoch": 232.30309072781654, "grad_norm": 457.9610595703125, "learning_rate": 2.78298393041485e-07, "loss": 24.6312, "step": 29270 }, { "epoch": 232.38285144566302, "grad_norm": 350.7718505859375, "learning_rate": 2.7817442600021565e-07, "loss": 21.8555, "step": 29280 }, { "epoch": 232.46261216350948, "grad_norm": 282.9169006347656, "learning_rate": 2.7805045194175455e-07, "loss": 22.2663, "step": 29290 }, { "epoch": 232.54237288135593, "grad_norm": 893.3816528320312, "learning_rate": 2.779264708969789e-07, "loss": 22.8199, "step": 29300 }, { "epoch": 232.54237288135593, "eval_loss": 2.9826467037200928, "eval_mae": 1.2893154621124268, "eval_mse": 2.982646942138672, "eval_r2": 0.10134309530258179, "eval_rmse": 1.7270341461993945, "eval_runtime": 9.1576, "eval_samples_per_second": 437.995, "eval_steps_per_second": 13.759, "step": 29300 }, { "epoch": 232.62213359920239, "grad_norm": 1312.9954833984375, "learning_rate": 2.778024828967679e-07, "loss": 24.4557, "step": 29310 }, { "epoch": 232.70189431704884, "grad_norm": 240.9700927734375, "learning_rate": 2.776784879720021e-07, "loss": 24.1606, "step": 29320 }, { "epoch": 232.78165503489532, "grad_norm": 252.70970153808594, "learning_rate": 2.775544861535642e-07, "loss": 23.4388, "step": 29330 }, { "epoch": 232.86141575274178, "grad_norm": 442.65777587890625, "learning_rate": 2.774304774723383e-07, "loss": 22.6744, "step": 29340 }, { "epoch": 232.94117647058823, "grad_norm": 507.1012268066406, "learning_rate": 2.7730646195921014e-07, "loss": 22.4341, "step": 29350 }, { "epoch": 233.01595214356928, "grad_norm": 320.151123046875, "learning_rate": 2.771824396450676e-07, "loss": 24.6485, "step": 29360 }, { "epoch": 233.09571286141576, "grad_norm": 268.9784851074219, "learning_rate": 2.770584105607999e-07, "loss": 23.6512, "step": 29370 }, { "epoch": 233.17547357926222, "grad_norm": 196.93817138671875, "learning_rate": 2.7693437473729794e-07, "loss": 23.2546, "step": 29380 }, { "epoch": 233.25523429710867, "grad_norm": 672.9905395507812, "learning_rate": 2.7681033220545434e-07, "loss": 21.759, "step": 29390 }, { "epoch": 233.33499501495513, "grad_norm": 838.8317260742188, "learning_rate": 2.766862829961637e-07, "loss": 24.6047, "step": 29400 }, { "epoch": 233.33499501495513, "eval_loss": 2.9748923778533936, "eval_mae": 1.2966049909591675, "eval_mse": 2.9748923778533936, "eval_r2": 0.10367947816848755, "eval_rmse": 1.7247876326821785, "eval_runtime": 9.1814, "eval_samples_per_second": 436.86, "eval_steps_per_second": 13.723, "step": 29400 }, { "epoch": 233.4147557328016, "grad_norm": 738.0784301757812, "learning_rate": 2.7656222714032177e-07, "loss": 24.5345, "step": 29410 }, { "epoch": 233.49451645064806, "grad_norm": 947.1227416992188, "learning_rate": 2.764381646688263e-07, "loss": 22.5708, "step": 29420 }, { "epoch": 233.57427716849452, "grad_norm": 174.71717834472656, "learning_rate": 2.763140956125766e-07, "loss": 21.0811, "step": 29430 }, { "epoch": 233.65403788634097, "grad_norm": 1085.6224365234375, "learning_rate": 2.761900200024736e-07, "loss": 25.2356, "step": 29440 }, { "epoch": 233.73379860418743, "grad_norm": 450.60736083984375, "learning_rate": 2.760659378694199e-07, "loss": 23.2346, "step": 29450 }, { "epoch": 233.8135593220339, "grad_norm": 697.2225952148438, "learning_rate": 2.7594184924431967e-07, "loss": 23.9659, "step": 29460 }, { "epoch": 233.89332003988036, "grad_norm": 706.6405639648438, "learning_rate": 2.7581775415807874e-07, "loss": 24.0092, "step": 29470 }, { "epoch": 233.97308075772682, "grad_norm": 613.9629516601562, "learning_rate": 2.756936526416045e-07, "loss": 24.363, "step": 29480 }, { "epoch": 234.04785643070787, "grad_norm": 964.1812744140625, "learning_rate": 2.7556954472580603e-07, "loss": 20.0324, "step": 29490 }, { "epoch": 234.12761714855435, "grad_norm": 167.4209747314453, "learning_rate": 2.7544543044159394e-07, "loss": 23.8551, "step": 29500 }, { "epoch": 234.12761714855435, "eval_loss": 2.9705073833465576, "eval_mae": 1.2916990518569946, "eval_mse": 2.9705073833465576, "eval_r2": 0.10500067472457886, "eval_rmse": 1.7235159945142828, "eval_runtime": 9.1593, "eval_samples_per_second": 437.915, "eval_steps_per_second": 13.756, "step": 29500 }, { "epoch": 234.2073778664008, "grad_norm": 148.0955352783203, "learning_rate": 2.7532130981988045e-07, "loss": 21.6222, "step": 29510 }, { "epoch": 234.28713858424726, "grad_norm": 335.8790588378906, "learning_rate": 2.751971828915793e-07, "loss": 25.0059, "step": 29520 }, { "epoch": 234.3668993020937, "grad_norm": 564.429931640625, "learning_rate": 2.750730496876059e-07, "loss": 23.5836, "step": 29530 }, { "epoch": 234.44666001994017, "grad_norm": 883.6195678710938, "learning_rate": 2.749489102388772e-07, "loss": 23.2661, "step": 29540 }, { "epoch": 234.52642073778665, "grad_norm": 898.0303344726562, "learning_rate": 2.7482476457631147e-07, "loss": 23.4459, "step": 29550 }, { "epoch": 234.6061814556331, "grad_norm": 241.63389587402344, "learning_rate": 2.7470061273082907e-07, "loss": 21.2976, "step": 29560 }, { "epoch": 234.68594217347956, "grad_norm": 517.2592163085938, "learning_rate": 2.745764547333513e-07, "loss": 26.3265, "step": 29570 }, { "epoch": 234.76570289132601, "grad_norm": 501.83367919921875, "learning_rate": 2.744522906148013e-07, "loss": 23.5089, "step": 29580 }, { "epoch": 234.84546360917247, "grad_norm": 1145.0814208984375, "learning_rate": 2.7432812040610376e-07, "loss": 24.3907, "step": 29590 }, { "epoch": 234.92522432701895, "grad_norm": 498.02813720703125, "learning_rate": 2.7420394413818476e-07, "loss": 22.5578, "step": 29600 }, { "epoch": 234.92522432701895, "eval_loss": 2.9738264083862305, "eval_mae": 1.2939438819885254, "eval_mse": 2.9738266468048096, "eval_r2": 0.10400056838989258, "eval_rmse": 1.724478659422844, "eval_runtime": 9.1822, "eval_samples_per_second": 436.822, "eval_steps_per_second": 13.722, "step": 29600 }, { "epoch": 235.0, "grad_norm": 319.5545349121094, "learning_rate": 2.74079761841972e-07, "loss": 21.1581, "step": 29610 }, { "epoch": 235.07976071784645, "grad_norm": 495.12176513671875, "learning_rate": 2.739555735483947e-07, "loss": 24.0536, "step": 29620 }, { "epoch": 235.1595214356929, "grad_norm": 437.9986572265625, "learning_rate": 2.738313792883833e-07, "loss": 24.0352, "step": 29630 }, { "epoch": 235.2392821535394, "grad_norm": 949.5150146484375, "learning_rate": 2.7370717909286993e-07, "loss": 23.0663, "step": 29640 }, { "epoch": 235.31904287138585, "grad_norm": 535.3571166992188, "learning_rate": 2.7358297299278844e-07, "loss": 24.5055, "step": 29650 }, { "epoch": 235.3988035892323, "grad_norm": 832.263427734375, "learning_rate": 2.7345876101907377e-07, "loss": 23.8656, "step": 29660 }, { "epoch": 235.47856430707876, "grad_norm": 398.2953186035156, "learning_rate": 2.7333454320266236e-07, "loss": 22.129, "step": 29670 }, { "epoch": 235.5583250249252, "grad_norm": 399.34210205078125, "learning_rate": 2.7321031957449236e-07, "loss": 23.5654, "step": 29680 }, { "epoch": 235.6380857427717, "grad_norm": 531.9966430664062, "learning_rate": 2.730860901655031e-07, "loss": 23.9399, "step": 29690 }, { "epoch": 235.71784646061815, "grad_norm": 234.51853942871094, "learning_rate": 2.729618550066356e-07, "loss": 23.2349, "step": 29700 }, { "epoch": 235.71784646061815, "eval_loss": 2.9727234840393066, "eval_mae": 1.2878901958465576, "eval_mse": 2.9727234840393066, "eval_r2": 0.10433298349380493, "eval_rmse": 1.7241587757626344, "eval_runtime": 9.1214, "eval_samples_per_second": 439.736, "eval_steps_per_second": 13.814, "step": 29700 }, { "epoch": 235.7976071784646, "grad_norm": 224.88511657714844, "learning_rate": 2.7283761412883194e-07, "loss": 21.5041, "step": 29710 }, { "epoch": 235.87736789631106, "grad_norm": 459.13385009765625, "learning_rate": 2.7271336756303603e-07, "loss": 23.8674, "step": 29720 }, { "epoch": 235.95712861415754, "grad_norm": 306.1412048339844, "learning_rate": 2.7258911534019315e-07, "loss": 23.5009, "step": 29730 }, { "epoch": 236.0319042871386, "grad_norm": 208.79376220703125, "learning_rate": 2.7246485749124955e-07, "loss": 21.118, "step": 29740 }, { "epoch": 236.11166500498504, "grad_norm": 340.0598449707031, "learning_rate": 2.723405940471534e-07, "loss": 23.2676, "step": 29750 }, { "epoch": 236.1914257228315, "grad_norm": 1169.4322509765625, "learning_rate": 2.72216325038854e-07, "loss": 23.9456, "step": 29760 }, { "epoch": 236.27118644067798, "grad_norm": 575.0592651367188, "learning_rate": 2.720920504973022e-07, "loss": 21.9234, "step": 29770 }, { "epoch": 236.35094715852443, "grad_norm": 1148.34765625, "learning_rate": 2.719677704534499e-07, "loss": 24.3241, "step": 29780 }, { "epoch": 236.4307078763709, "grad_norm": 1039.1551513671875, "learning_rate": 2.718434849382508e-07, "loss": 23.3575, "step": 29790 }, { "epoch": 236.51046859421734, "grad_norm": 545.8091430664062, "learning_rate": 2.717191939826597e-07, "loss": 22.3778, "step": 29800 }, { "epoch": 236.51046859421734, "eval_loss": 2.9748647212982178, "eval_mae": 1.288765549659729, "eval_mse": 2.9748647212982178, "eval_r2": 0.10368776321411133, "eval_rmse": 1.7247796152837085, "eval_runtime": 9.1834, "eval_samples_per_second": 436.766, "eval_steps_per_second": 13.72, "step": 29800 }, { "epoch": 236.5902293120638, "grad_norm": 186.42568969726562, "learning_rate": 2.715948976176327e-07, "loss": 24.9648, "step": 29810 }, { "epoch": 236.66999002991028, "grad_norm": 611.363037109375, "learning_rate": 2.714705958741276e-07, "loss": 21.861, "step": 29820 }, { "epoch": 236.74975074775674, "grad_norm": 815.4072875976562, "learning_rate": 2.7134628878310296e-07, "loss": 24.9254, "step": 29830 }, { "epoch": 236.8295114656032, "grad_norm": 520.3768920898438, "learning_rate": 2.712219763755193e-07, "loss": 22.666, "step": 29840 }, { "epoch": 236.90927218344964, "grad_norm": 488.80523681640625, "learning_rate": 2.710976586823381e-07, "loss": 25.7122, "step": 29850 }, { "epoch": 236.9890329012961, "grad_norm": 253.51536560058594, "learning_rate": 2.709733357345221e-07, "loss": 22.0631, "step": 29860 }, { "epoch": 237.06380857427718, "grad_norm": 867.0193481445312, "learning_rate": 2.7084900756303563e-07, "loss": 23.6757, "step": 29870 }, { "epoch": 237.14356929212363, "grad_norm": 638.9913940429688, "learning_rate": 2.707246741988441e-07, "loss": 24.1338, "step": 29880 }, { "epoch": 237.22333000997008, "grad_norm": 470.91595458984375, "learning_rate": 2.7060033567291437e-07, "loss": 23.7709, "step": 29890 }, { "epoch": 237.30309072781654, "grad_norm": 347.67950439453125, "learning_rate": 2.7047599201621437e-07, "loss": 23.0163, "step": 29900 }, { "epoch": 237.30309072781654, "eval_loss": 2.9714319705963135, "eval_mae": 1.2921468019485474, "eval_mse": 2.9714322090148926, "eval_r2": 0.10472196340560913, "eval_rmse": 1.7237842698594545, "eval_runtime": 9.1011, "eval_samples_per_second": 440.715, "eval_steps_per_second": 13.844, "step": 29900 }, { "epoch": 237.38285144566302, "grad_norm": 203.6299591064453, "learning_rate": 2.703516432597135e-07, "loss": 23.4956, "step": 29910 }, { "epoch": 237.46261216350948, "grad_norm": 249.11911010742188, "learning_rate": 2.7022728943438234e-07, "loss": 24.6135, "step": 29920 }, { "epoch": 237.54237288135593, "grad_norm": 563.9261474609375, "learning_rate": 2.7010293057119286e-07, "loss": 23.2776, "step": 29930 }, { "epoch": 237.62213359920239, "grad_norm": 328.15679931640625, "learning_rate": 2.69978566701118e-07, "loss": 24.4866, "step": 29940 }, { "epoch": 237.70189431704884, "grad_norm": 276.6571350097656, "learning_rate": 2.6985419785513225e-07, "loss": 20.3041, "step": 29950 }, { "epoch": 237.78165503489532, "grad_norm": 545.2789306640625, "learning_rate": 2.697298240642113e-07, "loss": 22.3424, "step": 29960 }, { "epoch": 237.86141575274178, "grad_norm": 233.5922393798828, "learning_rate": 2.6960544535933185e-07, "loss": 24.4458, "step": 29970 }, { "epoch": 237.94117647058823, "grad_norm": 205.71714782714844, "learning_rate": 2.69481061771472e-07, "loss": 23.6896, "step": 29980 }, { "epoch": 238.01595214356928, "grad_norm": 831.3458251953125, "learning_rate": 2.6935667333161104e-07, "loss": 20.1855, "step": 29990 }, { "epoch": 238.09571286141576, "grad_norm": 711.1663208007812, "learning_rate": 2.692322800707295e-07, "loss": 22.7354, "step": 30000 }, { "epoch": 238.09571286141576, "eval_loss": 2.971733331680298, "eval_mae": 1.2866849899291992, "eval_mse": 2.971733331680298, "eval_r2": 0.10463130474090576, "eval_rmse": 1.723871611135904, "eval_runtime": 9.399, "eval_samples_per_second": 426.745, "eval_steps_per_second": 13.406, "step": 30000 }, { "epoch": 238.17547357926222, "grad_norm": 679.4271850585938, "learning_rate": 2.6910788201980903e-07, "loss": 25.9507, "step": 30010 }, { "epoch": 238.25523429710867, "grad_norm": 191.01588439941406, "learning_rate": 2.689834792098325e-07, "loss": 23.5691, "step": 30020 }, { "epoch": 238.33499501495513, "grad_norm": 490.3308410644531, "learning_rate": 2.6885907167178406e-07, "loss": 21.6508, "step": 30030 }, { "epoch": 238.4147557328016, "grad_norm": 448.085205078125, "learning_rate": 2.687346594366488e-07, "loss": 24.4007, "step": 30040 }, { "epoch": 238.49451645064806, "grad_norm": 326.50433349609375, "learning_rate": 2.686102425354134e-07, "loss": 23.1411, "step": 30050 }, { "epoch": 238.57427716849452, "grad_norm": 195.3041229248047, "learning_rate": 2.684858209990651e-07, "loss": 24.5588, "step": 30060 }, { "epoch": 238.65403788634097, "grad_norm": 298.8865966796875, "learning_rate": 2.6836139485859286e-07, "loss": 21.3608, "step": 30070 }, { "epoch": 238.73379860418743, "grad_norm": 387.4641418457031, "learning_rate": 2.6823696414498655e-07, "loss": 22.479, "step": 30080 }, { "epoch": 238.8135593220339, "grad_norm": 341.1193542480469, "learning_rate": 2.6811252888923715e-07, "loss": 24.1528, "step": 30090 }, { "epoch": 238.89332003988036, "grad_norm": 352.9144592285156, "learning_rate": 2.6798808912233677e-07, "loss": 22.1946, "step": 30100 }, { "epoch": 238.89332003988036, "eval_loss": 2.97428035736084, "eval_mae": 1.2877657413482666, "eval_mse": 2.97428035736084, "eval_r2": 0.10386383533477783, "eval_rmse": 1.7246102044696476, "eval_runtime": 9.1168, "eval_samples_per_second": 439.956, "eval_steps_per_second": 13.821, "step": 30100 }, { "epoch": 238.97308075772682, "grad_norm": 489.48028564453125, "learning_rate": 2.6786364487527874e-07, "loss": 24.4485, "step": 30110 }, { "epoch": 239.04785643070787, "grad_norm": 708.220947265625, "learning_rate": 2.6773919617905745e-07, "loss": 22.5021, "step": 30120 }, { "epoch": 239.12761714855435, "grad_norm": 494.99481201171875, "learning_rate": 2.6761474306466837e-07, "loss": 22.909, "step": 30130 }, { "epoch": 239.2073778664008, "grad_norm": 355.8766174316406, "learning_rate": 2.674902855631081e-07, "loss": 23.0503, "step": 30140 }, { "epoch": 239.28713858424726, "grad_norm": 308.775146484375, "learning_rate": 2.673658237053743e-07, "loss": 25.3727, "step": 30150 }, { "epoch": 239.3668993020937, "grad_norm": 559.693115234375, "learning_rate": 2.6724135752246586e-07, "loss": 21.4661, "step": 30160 }, { "epoch": 239.44666001994017, "grad_norm": 606.2026977539062, "learning_rate": 2.6711688704538255e-07, "loss": 22.9412, "step": 30170 }, { "epoch": 239.52642073778665, "grad_norm": 560.1284790039062, "learning_rate": 2.669924123051252e-07, "loss": 22.2418, "step": 30180 }, { "epoch": 239.6061814556331, "grad_norm": 427.8968505859375, "learning_rate": 2.6686793333269604e-07, "loss": 24.7176, "step": 30190 }, { "epoch": 239.68594217347956, "grad_norm": 1554.723388671875, "learning_rate": 2.667434501590978e-07, "loss": 24.0749, "step": 30200 }, { "epoch": 239.68594217347956, "eval_loss": 2.9791054725646973, "eval_mae": 1.3021334409713745, "eval_mse": 2.9791057109832764, "eval_r2": 0.1024099588394165, "eval_rmse": 1.7260086068682499, "eval_runtime": 9.1144, "eval_samples_per_second": 440.072, "eval_steps_per_second": 13.824, "step": 30200 }, { "epoch": 239.76570289132601, "grad_norm": 1463.669921875, "learning_rate": 2.6661896281533494e-07, "loss": 24.3954, "step": 30210 }, { "epoch": 239.84546360917247, "grad_norm": 147.11212158203125, "learning_rate": 2.664944713324123e-07, "loss": 23.247, "step": 30220 }, { "epoch": 239.92522432701895, "grad_norm": 724.29248046875, "learning_rate": 2.6636997574133603e-07, "loss": 23.0793, "step": 30230 }, { "epoch": 240.0, "grad_norm": 305.6433410644531, "learning_rate": 2.662454760731134e-07, "loss": 22.3269, "step": 30240 }, { "epoch": 240.07976071784645, "grad_norm": 596.2266235351562, "learning_rate": 2.6612097235875263e-07, "loss": 23.0829, "step": 30250 }, { "epoch": 240.1595214356929, "grad_norm": 612.7228393554688, "learning_rate": 2.659964646292629e-07, "loss": 23.3136, "step": 30260 }, { "epoch": 240.2392821535394, "grad_norm": 508.3787536621094, "learning_rate": 2.6587195291565434e-07, "loss": 25.2146, "step": 30270 }, { "epoch": 240.31904287138585, "grad_norm": 258.1715393066406, "learning_rate": 2.6574743724893825e-07, "loss": 22.4899, "step": 30280 }, { "epoch": 240.3988035892323, "grad_norm": 287.9705810546875, "learning_rate": 2.656229176601267e-07, "loss": 20.1821, "step": 30290 }, { "epoch": 240.47856430707876, "grad_norm": 324.1119384765625, "learning_rate": 2.65498394180233e-07, "loss": 22.4198, "step": 30300 }, { "epoch": 240.47856430707876, "eval_loss": 2.9733307361602783, "eval_mae": 1.2874475717544556, "eval_mse": 2.9733307361602783, "eval_r2": 0.10414999723434448, "eval_rmse": 1.724334867756341, "eval_runtime": 9.1209, "eval_samples_per_second": 439.758, "eval_steps_per_second": 13.814, "step": 30300 }, { "epoch": 240.5583250249252, "grad_norm": 266.3356018066406, "learning_rate": 2.65373866840271e-07, "loss": 23.3075, "step": 30310 }, { "epoch": 240.6380857427717, "grad_norm": 1106.54248046875, "learning_rate": 2.652493356712561e-07, "loss": 23.8427, "step": 30320 }, { "epoch": 240.71784646061815, "grad_norm": 694.6317749023438, "learning_rate": 2.651248007042041e-07, "loss": 24.2339, "step": 30330 }, { "epoch": 240.7976071784646, "grad_norm": 273.56085205078125, "learning_rate": 2.650002619701321e-07, "loss": 24.2687, "step": 30340 }, { "epoch": 240.87736789631106, "grad_norm": 841.0004272460938, "learning_rate": 2.6487571950005803e-07, "loss": 24.3471, "step": 30350 }, { "epoch": 240.95712861415754, "grad_norm": 784.8743286132812, "learning_rate": 2.6475117332500067e-07, "loss": 24.1983, "step": 30360 }, { "epoch": 241.0319042871386, "grad_norm": 509.60528564453125, "learning_rate": 2.6462662347598e-07, "loss": 23.3144, "step": 30370 }, { "epoch": 241.11166500498504, "grad_norm": 341.0653381347656, "learning_rate": 2.6450206998401634e-07, "loss": 23.0637, "step": 30380 }, { "epoch": 241.1914257228315, "grad_norm": 537.9449462890625, "learning_rate": 2.643775128801316e-07, "loss": 23.4943, "step": 30390 }, { "epoch": 241.27118644067798, "grad_norm": 310.14495849609375, "learning_rate": 2.642529521953482e-07, "loss": 21.9223, "step": 30400 }, { "epoch": 241.27118644067798, "eval_loss": 2.9676897525787354, "eval_mae": 1.2947598695755005, "eval_mse": 2.9676897525787354, "eval_r2": 0.10584962368011475, "eval_rmse": 1.722698392806685, "eval_runtime": 9.1257, "eval_samples_per_second": 439.529, "eval_steps_per_second": 13.807, "step": 30400 }, { "epoch": 241.35094715852443, "grad_norm": 383.8916320800781, "learning_rate": 2.641283879606895e-07, "loss": 22.6062, "step": 30410 }, { "epoch": 241.4307078763709, "grad_norm": 167.80691528320312, "learning_rate": 2.6400382020717983e-07, "loss": 23.409, "step": 30420 }, { "epoch": 241.51046859421734, "grad_norm": 308.71026611328125, "learning_rate": 2.638792489658442e-07, "loss": 26.3346, "step": 30430 }, { "epoch": 241.5902293120638, "grad_norm": 165.4586944580078, "learning_rate": 2.637546742677088e-07, "loss": 23.653, "step": 30440 }, { "epoch": 241.66999002991028, "grad_norm": 523.208251953125, "learning_rate": 2.6363009614380043e-07, "loss": 21.2596, "step": 30450 }, { "epoch": 241.74975074775674, "grad_norm": 666.1103515625, "learning_rate": 2.635055146251469e-07, "loss": 23.8289, "step": 30460 }, { "epoch": 241.8295114656032, "grad_norm": 384.0069580078125, "learning_rate": 2.6338092974277653e-07, "loss": 22.3794, "step": 30470 }, { "epoch": 241.90927218344964, "grad_norm": 261.10748291015625, "learning_rate": 2.6325634152771905e-07, "loss": 24.0764, "step": 30480 }, { "epoch": 241.9890329012961, "grad_norm": 452.0245361328125, "learning_rate": 2.631317500110046e-07, "loss": 22.1141, "step": 30490 }, { "epoch": 242.06380857427718, "grad_norm": 265.33489990234375, "learning_rate": 2.630071552236641e-07, "loss": 21.199, "step": 30500 }, { "epoch": 242.06380857427718, "eval_loss": 2.969336986541748, "eval_mae": 1.2936196327209473, "eval_mse": 2.969337224960327, "eval_r2": 0.10535317659378052, "eval_rmse": 1.7231764926902662, "eval_runtime": 9.1267, "eval_samples_per_second": 439.479, "eval_steps_per_second": 13.806, "step": 30500 }, { "epoch": 242.14356929212363, "grad_norm": 301.48162841796875, "learning_rate": 2.628825571967297e-07, "loss": 22.0537, "step": 30510 }, { "epoch": 242.22333000997008, "grad_norm": 328.41217041015625, "learning_rate": 2.627579559612339e-07, "loss": 22.4829, "step": 30520 }, { "epoch": 242.30309072781654, "grad_norm": 632.689453125, "learning_rate": 2.626333515482103e-07, "loss": 24.6365, "step": 30530 }, { "epoch": 242.38285144566302, "grad_norm": 210.31422424316406, "learning_rate": 2.6250874398869305e-07, "loss": 24.618, "step": 30540 }, { "epoch": 242.46261216350948, "grad_norm": 539.3073120117188, "learning_rate": 2.6238413331371735e-07, "loss": 23.2583, "step": 30550 }, { "epoch": 242.54237288135593, "grad_norm": 324.3432312011719, "learning_rate": 2.62259519554319e-07, "loss": 22.2958, "step": 30560 }, { "epoch": 242.62213359920239, "grad_norm": 381.7465515136719, "learning_rate": 2.6213490274153454e-07, "loss": 26.3655, "step": 30570 }, { "epoch": 242.70189431704884, "grad_norm": 668.5147705078125, "learning_rate": 2.620102829064014e-07, "loss": 21.8175, "step": 30580 }, { "epoch": 242.78165503489532, "grad_norm": 686.0501098632812, "learning_rate": 2.6188566007995764e-07, "loss": 23.2414, "step": 30590 }, { "epoch": 242.86141575274178, "grad_norm": 536.3604125976562, "learning_rate": 2.617610342932423e-07, "loss": 23.2917, "step": 30600 }, { "epoch": 242.86141575274178, "eval_loss": 2.97670841217041, "eval_mae": 1.2905606031417847, "eval_mse": 2.976708173751831, "eval_r2": 0.10313242673873901, "eval_rmse": 1.725313934839637, "eval_runtime": 9.1134, "eval_samples_per_second": 440.123, "eval_steps_per_second": 13.826, "step": 30600 }, { "epoch": 242.94117647058823, "grad_norm": 141.11659240722656, "learning_rate": 2.616364055772948e-07, "loss": 23.4465, "step": 30610 }, { "epoch": 243.01595214356928, "grad_norm": 891.1488647460938, "learning_rate": 2.6151177396315554e-07, "loss": 22.8134, "step": 30620 }, { "epoch": 243.09571286141576, "grad_norm": 703.3218383789062, "learning_rate": 2.6138713948186565e-07, "loss": 22.6871, "step": 30630 }, { "epoch": 243.17547357926222, "grad_norm": 830.7728271484375, "learning_rate": 2.6126250216446677e-07, "loss": 22.9452, "step": 30640 }, { "epoch": 243.25523429710867, "grad_norm": 1052.3709716796875, "learning_rate": 2.6113786204200145e-07, "loss": 21.548, "step": 30650 }, { "epoch": 243.33499501495513, "grad_norm": 476.34375, "learning_rate": 2.6101321914551275e-07, "loss": 23.4007, "step": 30660 }, { "epoch": 243.4147557328016, "grad_norm": 251.74864196777344, "learning_rate": 2.6088857350604476e-07, "loss": 22.6418, "step": 30670 }, { "epoch": 243.49451645064806, "grad_norm": 1233.851806640625, "learning_rate": 2.607639251546418e-07, "loss": 22.7851, "step": 30680 }, { "epoch": 243.57427716849452, "grad_norm": 267.61334228515625, "learning_rate": 2.6063927412234925e-07, "loss": 24.4046, "step": 30690 }, { "epoch": 243.65403788634097, "grad_norm": 589.5751953125, "learning_rate": 2.60514620440213e-07, "loss": 23.3741, "step": 30700 }, { "epoch": 243.65403788634097, "eval_loss": 2.978055953979492, "eval_mae": 1.2858299016952515, "eval_mse": 2.9780564308166504, "eval_r2": 0.10272616147994995, "eval_rmse": 1.7257046186461489, "eval_runtime": 9.1004, "eval_samples_per_second": 440.75, "eval_steps_per_second": 13.846, "step": 30700 }, { "epoch": 243.73379860418743, "grad_norm": 154.97348022460938, "learning_rate": 2.603899641392794e-07, "loss": 23.5732, "step": 30710 }, { "epoch": 243.8135593220339, "grad_norm": 659.343994140625, "learning_rate": 2.6026530525059597e-07, "loss": 24.0906, "step": 30720 }, { "epoch": 243.89332003988036, "grad_norm": 758.31982421875, "learning_rate": 2.6014064380521033e-07, "loss": 22.8632, "step": 30730 }, { "epoch": 243.97308075772682, "grad_norm": 788.9865112304688, "learning_rate": 2.6002844634404446e-07, "loss": 25.1245, "step": 30740 }, { "epoch": 244.04785643070787, "grad_norm": 1136.2060546875, "learning_rate": 2.599037801264639e-07, "loss": 24.0369, "step": 30750 }, { "epoch": 244.12761714855435, "grad_norm": 432.9892578125, "learning_rate": 2.597791114422235e-07, "loss": 21.5665, "step": 30760 }, { "epoch": 244.2073778664008, "grad_norm": 403.8238220214844, "learning_rate": 2.596544403223735e-07, "loss": 24.2965, "step": 30770 }, { "epoch": 244.28713858424726, "grad_norm": 342.4436340332031, "learning_rate": 2.5952976679796496e-07, "loss": 24.103, "step": 30780 }, { "epoch": 244.3668993020937, "grad_norm": 1157.9874267578125, "learning_rate": 2.5940509090004937e-07, "loss": 22.7689, "step": 30790 }, { "epoch": 244.44666001994017, "grad_norm": 424.2969970703125, "learning_rate": 2.592804126596786e-07, "loss": 22.9439, "step": 30800 }, { "epoch": 244.44666001994017, "eval_loss": 2.968757390975952, "eval_mae": 1.2900669574737549, "eval_mse": 2.968757390975952, "eval_r2": 0.10552793741226196, "eval_rmse": 1.7230082388009502, "eval_runtime": 9.1274, "eval_samples_per_second": 439.444, "eval_steps_per_second": 13.805, "step": 30800 }, { "epoch": 244.52642073778665, "grad_norm": 566.1431274414062, "learning_rate": 2.591557321079054e-07, "loss": 24.0186, "step": 30810 }, { "epoch": 244.6061814556331, "grad_norm": 745.6699829101562, "learning_rate": 2.590310492757832e-07, "loss": 22.743, "step": 30820 }, { "epoch": 244.68594217347956, "grad_norm": 855.7810668945312, "learning_rate": 2.589063641943656e-07, "loss": 22.8092, "step": 30830 }, { "epoch": 244.76570289132601, "grad_norm": 261.3106689453125, "learning_rate": 2.587816768947071e-07, "loss": 23.1349, "step": 30840 }, { "epoch": 244.84546360917247, "grad_norm": 297.5519104003906, "learning_rate": 2.5865698740786255e-07, "loss": 24.3311, "step": 30850 }, { "epoch": 244.92522432701895, "grad_norm": 1044.4854736328125, "learning_rate": 2.5853229576488757e-07, "loss": 24.3775, "step": 30860 }, { "epoch": 245.0, "grad_norm": 479.5684814453125, "learning_rate": 2.584076019968379e-07, "loss": 21.1021, "step": 30870 }, { "epoch": 245.07976071784645, "grad_norm": 128.2196502685547, "learning_rate": 2.5828290613477026e-07, "loss": 23.223, "step": 30880 }, { "epoch": 245.1595214356929, "grad_norm": 261.00604248046875, "learning_rate": 2.581582082097417e-07, "loss": 24.8937, "step": 30890 }, { "epoch": 245.2392821535394, "grad_norm": 781.410888671875, "learning_rate": 2.580335082528097e-07, "loss": 23.9844, "step": 30900 }, { "epoch": 245.2392821535394, "eval_loss": 2.9844777584075928, "eval_mae": 1.305625557899475, "eval_mse": 2.9844777584075928, "eval_r2": 0.10079145431518555, "eval_rmse": 1.7275641112293323, "eval_runtime": 9.1463, "eval_samples_per_second": 438.536, "eval_steps_per_second": 13.776, "step": 30900 }, { "epoch": 245.31904287138585, "grad_norm": 877.4627685546875, "learning_rate": 2.5790880629503256e-07, "loss": 22.4681, "step": 30910 }, { "epoch": 245.3988035892323, "grad_norm": 424.7146911621094, "learning_rate": 2.577841023674685e-07, "loss": 22.4541, "step": 30920 }, { "epoch": 245.47856430707876, "grad_norm": 270.17474365234375, "learning_rate": 2.5765939650117687e-07, "loss": 22.6297, "step": 30930 }, { "epoch": 245.5583250249252, "grad_norm": 532.4530029296875, "learning_rate": 2.575346887272171e-07, "loss": 21.7078, "step": 30940 }, { "epoch": 245.6380857427717, "grad_norm": 668.9741821289062, "learning_rate": 2.5740997907664923e-07, "loss": 22.8294, "step": 30950 }, { "epoch": 245.71784646061815, "grad_norm": 687.4274291992188, "learning_rate": 2.572852675805338e-07, "loss": 24.2809, "step": 30960 }, { "epoch": 245.7976071784646, "grad_norm": 290.156982421875, "learning_rate": 2.571605542699317e-07, "loss": 22.1749, "step": 30970 }, { "epoch": 245.87736789631106, "grad_norm": 493.32818603515625, "learning_rate": 2.570358391759044e-07, "loss": 24.0103, "step": 30980 }, { "epoch": 245.95712861415754, "grad_norm": 1171.052978515625, "learning_rate": 2.5691112232951375e-07, "loss": 24.4049, "step": 30990 }, { "epoch": 246.0319042871386, "grad_norm": 510.27520751953125, "learning_rate": 2.56786403761822e-07, "loss": 22.9206, "step": 31000 }, { "epoch": 246.0319042871386, "eval_loss": 2.969852924346924, "eval_mae": 1.2927972078323364, "eval_mse": 2.969852924346924, "eval_r2": 0.10519784688949585, "eval_rmse": 1.7233261224582317, "eval_runtime": 9.1208, "eval_samples_per_second": 439.762, "eval_steps_per_second": 13.815, "step": 31000 }, { "epoch": 246.11166500498504, "grad_norm": 368.5177307128906, "learning_rate": 2.5666168350389173e-07, "loss": 22.376, "step": 31010 }, { "epoch": 246.1914257228315, "grad_norm": 645.730712890625, "learning_rate": 2.565369615867863e-07, "loss": 23.4462, "step": 31020 }, { "epoch": 246.27118644067798, "grad_norm": 1023.9620361328125, "learning_rate": 2.564122380415691e-07, "loss": 25.5178, "step": 31030 }, { "epoch": 246.35094715852443, "grad_norm": 1058.9161376953125, "learning_rate": 2.5628751289930415e-07, "loss": 26.1951, "step": 31040 }, { "epoch": 246.4307078763709, "grad_norm": 1442.2529296875, "learning_rate": 2.5616278619105583e-07, "loss": 24.4609, "step": 31050 }, { "epoch": 246.51046859421734, "grad_norm": 632.6666870117188, "learning_rate": 2.560380579478888e-07, "loss": 21.0663, "step": 31060 }, { "epoch": 246.5902293120638, "grad_norm": 250.21461486816406, "learning_rate": 2.559133282008684e-07, "loss": 23.5933, "step": 31070 }, { "epoch": 246.66999002991028, "grad_norm": 334.2854919433594, "learning_rate": 2.557885969810597e-07, "loss": 22.1378, "step": 31080 }, { "epoch": 246.74975074775674, "grad_norm": 257.9935607910156, "learning_rate": 2.5566386431952893e-07, "loss": 23.9687, "step": 31090 }, { "epoch": 246.8295114656032, "grad_norm": 1875.7540283203125, "learning_rate": 2.5553913024734214e-07, "loss": 22.8232, "step": 31100 }, { "epoch": 246.8295114656032, "eval_loss": 2.9731228351593018, "eval_mae": 1.2972358465194702, "eval_mse": 2.9731228351593018, "eval_r2": 0.10421264171600342, "eval_rmse": 1.7242745822981043, "eval_runtime": 9.094, "eval_samples_per_second": 441.062, "eval_steps_per_second": 13.855, "step": 31100 }, { "epoch": 246.90927218344964, "grad_norm": 588.41845703125, "learning_rate": 2.5541439479556605e-07, "loss": 21.2464, "step": 31110 }, { "epoch": 246.9890329012961, "grad_norm": 1064.3218994140625, "learning_rate": 2.5528965799526745e-07, "loss": 22.7664, "step": 31120 }, { "epoch": 247.06380857427718, "grad_norm": 610.049560546875, "learning_rate": 2.551649198775136e-07, "loss": 21.6621, "step": 31130 }, { "epoch": 247.14356929212363, "grad_norm": 938.4146118164062, "learning_rate": 2.5504018047337215e-07, "loss": 22.9931, "step": 31140 }, { "epoch": 247.22333000997008, "grad_norm": 1247.22314453125, "learning_rate": 2.5491543981391103e-07, "loss": 23.7451, "step": 31150 }, { "epoch": 247.30309072781654, "grad_norm": 869.5410766601562, "learning_rate": 2.547906979301983e-07, "loss": 20.6923, "step": 31160 }, { "epoch": 247.38285144566302, "grad_norm": 924.4537353515625, "learning_rate": 2.546659548533026e-07, "loss": 23.5296, "step": 31170 }, { "epoch": 247.46261216350948, "grad_norm": 435.33319091796875, "learning_rate": 2.5454121061429275e-07, "loss": 24.9323, "step": 31180 }, { "epoch": 247.54237288135593, "grad_norm": 399.9759521484375, "learning_rate": 2.5441646524423783e-07, "loss": 24.7295, "step": 31190 }, { "epoch": 247.62213359920239, "grad_norm": 1023.3956909179688, "learning_rate": 2.5429171877420714e-07, "loss": 24.0972, "step": 31200 }, { "epoch": 247.62213359920239, "eval_loss": 2.971273422241211, "eval_mae": 1.2890037298202515, "eval_mse": 2.971273422241211, "eval_r2": 0.10476988554000854, "eval_rmse": 1.7237382116322684, "eval_runtime": 9.0778, "eval_samples_per_second": 441.849, "eval_steps_per_second": 13.88, "step": 31200 }, { "epoch": 247.70189431704884, "grad_norm": 410.3566589355469, "learning_rate": 2.541669712352705e-07, "loss": 23.6653, "step": 31210 }, { "epoch": 247.78165503489532, "grad_norm": 448.5334167480469, "learning_rate": 2.5404222265849774e-07, "loss": 23.3972, "step": 31220 }, { "epoch": 247.86141575274178, "grad_norm": 693.1231689453125, "learning_rate": 2.539174730749592e-07, "loss": 21.8258, "step": 31230 }, { "epoch": 247.94117647058823, "grad_norm": 941.747314453125, "learning_rate": 2.5379272251572504e-07, "loss": 24.0123, "step": 31240 }, { "epoch": 248.01595214356928, "grad_norm": 394.1688232421875, "learning_rate": 2.536679710118661e-07, "loss": 20.3078, "step": 31250 }, { "epoch": 248.09571286141576, "grad_norm": 871.2073974609375, "learning_rate": 2.5354321859445334e-07, "loss": 22.2407, "step": 31260 }, { "epoch": 248.17547357926222, "grad_norm": 1198.1002197265625, "learning_rate": 2.5341846529455787e-07, "loss": 22.9835, "step": 31270 }, { "epoch": 248.25523429710867, "grad_norm": 676.827392578125, "learning_rate": 2.53293711143251e-07, "loss": 24.2606, "step": 31280 }, { "epoch": 248.33499501495513, "grad_norm": 238.11154174804688, "learning_rate": 2.5316895617160433e-07, "loss": 22.299, "step": 31290 }, { "epoch": 248.4147557328016, "grad_norm": 201.0675048828125, "learning_rate": 2.530442004106897e-07, "loss": 21.9802, "step": 31300 }, { "epoch": 248.4147557328016, "eval_loss": 2.9748613834381104, "eval_mae": 1.2876076698303223, "eval_mse": 2.9748613834381104, "eval_r2": 0.10368883609771729, "eval_rmse": 1.7247786476641316, "eval_runtime": 9.1422, "eval_samples_per_second": 438.735, "eval_steps_per_second": 13.782, "step": 31300 }, { "epoch": 248.49451645064806, "grad_norm": 424.4210510253906, "learning_rate": 2.52919443891579e-07, "loss": 24.8464, "step": 31310 }, { "epoch": 248.57427716849452, "grad_norm": 366.867431640625, "learning_rate": 2.5279468664534443e-07, "loss": 24.1916, "step": 31320 }, { "epoch": 248.65403788634097, "grad_norm": 620.6328125, "learning_rate": 2.5266992870305837e-07, "loss": 23.2312, "step": 31330 }, { "epoch": 248.73379860418743, "grad_norm": 266.5705261230469, "learning_rate": 2.525451700957933e-07, "loss": 24.0617, "step": 31340 }, { "epoch": 248.8135593220339, "grad_norm": 601.544189453125, "learning_rate": 2.524204108546219e-07, "loss": 23.0012, "step": 31350 }, { "epoch": 248.89332003988036, "grad_norm": 638.3615112304688, "learning_rate": 2.5229565101061703e-07, "loss": 23.0096, "step": 31360 }, { "epoch": 248.97308075772682, "grad_norm": 682.67236328125, "learning_rate": 2.521708905948516e-07, "loss": 24.3243, "step": 31370 }, { "epoch": 249.04785643070787, "grad_norm": 547.697265625, "learning_rate": 2.520461296383989e-07, "loss": 22.8326, "step": 31380 }, { "epoch": 249.12761714855435, "grad_norm": 498.31414794921875, "learning_rate": 2.519213681723322e-07, "loss": 23.5225, "step": 31390 }, { "epoch": 249.2073778664008, "grad_norm": 980.2252807617188, "learning_rate": 2.517966062277247e-07, "loss": 21.6833, "step": 31400 }, { "epoch": 249.2073778664008, "eval_loss": 2.968057632446289, "eval_mae": 1.2914273738861084, "eval_mse": 2.968057870864868, "eval_r2": 0.10573869943618774, "eval_rmse": 1.7228052330036812, "eval_runtime": 9.1047, "eval_samples_per_second": 440.544, "eval_steps_per_second": 13.839, "step": 31400 }, { "epoch": 249.28713858424726, "grad_norm": 715.7168579101562, "learning_rate": 2.5167184383565003e-07, "loss": 21.8021, "step": 31410 }, { "epoch": 249.3668993020937, "grad_norm": 540.0652465820312, "learning_rate": 2.515470810271819e-07, "loss": 24.0498, "step": 31420 }, { "epoch": 249.44666001994017, "grad_norm": 903.7225952148438, "learning_rate": 2.51422317833394e-07, "loss": 24.0246, "step": 31430 }, { "epoch": 249.52642073778665, "grad_norm": 641.188232421875, "learning_rate": 2.512975542853601e-07, "loss": 24.5698, "step": 31440 }, { "epoch": 249.6061814556331, "grad_norm": 814.0216064453125, "learning_rate": 2.5117279041415406e-07, "loss": 23.8711, "step": 31450 }, { "epoch": 249.68594217347956, "grad_norm": 268.5207824707031, "learning_rate": 2.510480262508501e-07, "loss": 22.7423, "step": 31460 }, { "epoch": 249.76570289132601, "grad_norm": 471.1623229980469, "learning_rate": 2.509232618265221e-07, "loss": 24.1789, "step": 31470 }, { "epoch": 249.84546360917247, "grad_norm": 562.5411376953125, "learning_rate": 2.507984971722441e-07, "loss": 24.0431, "step": 31480 }, { "epoch": 249.92522432701895, "grad_norm": 161.48260498046875, "learning_rate": 2.506737323190906e-07, "loss": 20.4402, "step": 31490 }, { "epoch": 250.0, "grad_norm": 441.6737976074219, "learning_rate": 2.5054896729813563e-07, "loss": 21.9377, "step": 31500 }, { "epoch": 250.0, "eval_loss": 2.9698264598846436, "eval_mae": 1.2904868125915527, "eval_mse": 2.9698264598846436, "eval_r2": 0.10520583391189575, "eval_rmse": 1.7233184441317406, "eval_runtime": 9.1197, "eval_samples_per_second": 439.819, "eval_steps_per_second": 13.816, "step": 31500 }, { "epoch": 250.07976071784645, "grad_norm": 1081.425048828125, "learning_rate": 2.504242021404535e-07, "loss": 22.5199, "step": 31510 }, { "epoch": 250.1595214356929, "grad_norm": 672.3206787109375, "learning_rate": 2.5029943687711853e-07, "loss": 22.2325, "step": 31520 }, { "epoch": 250.2392821535394, "grad_norm": 266.5413818359375, "learning_rate": 2.5017467153920505e-07, "loss": 23.8743, "step": 31530 }, { "epoch": 250.31904287138585, "grad_norm": 394.8414611816406, "learning_rate": 2.5004990615778754e-07, "loss": 26.0877, "step": 31540 }, { "epoch": 250.3988035892323, "grad_norm": 203.4445037841797, "learning_rate": 2.4992514076394025e-07, "loss": 23.0688, "step": 31550 }, { "epoch": 250.47856430707876, "grad_norm": 208.2809600830078, "learning_rate": 2.498003753887375e-07, "loss": 23.1918, "step": 31560 }, { "epoch": 250.5583250249252, "grad_norm": 502.4659118652344, "learning_rate": 2.496756100632538e-07, "loss": 24.0362, "step": 31570 }, { "epoch": 250.6380857427717, "grad_norm": 310.458740234375, "learning_rate": 2.495508448185635e-07, "loss": 23.5664, "step": 31580 }, { "epoch": 250.71784646061815, "grad_norm": 1150.802001953125, "learning_rate": 2.4942607968574093e-07, "loss": 21.5783, "step": 31590 }, { "epoch": 250.7976071784646, "grad_norm": 693.6259155273438, "learning_rate": 2.4930131469586027e-07, "loss": 21.2259, "step": 31600 }, { "epoch": 250.7976071784646, "eval_loss": 2.965651035308838, "eval_mae": 1.2888035774230957, "eval_mse": 2.965651035308838, "eval_r2": 0.10646384954452515, "eval_rmse": 1.722106569091715, "eval_runtime": 9.101, "eval_samples_per_second": 440.721, "eval_steps_per_second": 13.845, "step": 31600 }, { "epoch": 250.87736789631106, "grad_norm": 524.42138671875, "learning_rate": 2.4917654987999586e-07, "loss": 24.0307, "step": 31610 }, { "epoch": 250.95712861415754, "grad_norm": 742.5140380859375, "learning_rate": 2.49051785269222e-07, "loss": 23.6902, "step": 31620 }, { "epoch": 251.0319042871386, "grad_norm": 303.7222595214844, "learning_rate": 2.489270208946128e-07, "loss": 20.7224, "step": 31630 }, { "epoch": 251.11166500498504, "grad_norm": 1036.04345703125, "learning_rate": 2.488022567872425e-07, "loss": 23.9105, "step": 31640 }, { "epoch": 251.1914257228315, "grad_norm": 408.6697082519531, "learning_rate": 2.4867749297818497e-07, "loss": 24.5924, "step": 31650 }, { "epoch": 251.27118644067798, "grad_norm": 271.9720764160156, "learning_rate": 2.485527294985144e-07, "loss": 25.3175, "step": 31660 }, { "epoch": 251.35094715852443, "grad_norm": 310.2798767089844, "learning_rate": 2.4842796637930447e-07, "loss": 23.1924, "step": 31670 }, { "epoch": 251.4307078763709, "grad_norm": 293.2641906738281, "learning_rate": 2.4830320365162916e-07, "loss": 23.8814, "step": 31680 }, { "epoch": 251.51046859421734, "grad_norm": 315.9644470214844, "learning_rate": 2.481784413465621e-07, "loss": 23.1202, "step": 31690 }, { "epoch": 251.5902293120638, "grad_norm": 836.3831787109375, "learning_rate": 2.480536794951769e-07, "loss": 22.7945, "step": 31700 }, { "epoch": 251.5902293120638, "eval_loss": 2.972355365753174, "eval_mae": 1.3000593185424805, "eval_mse": 2.972355365753174, "eval_r2": 0.10444384813308716, "eval_rmse": 1.7240520194452293, "eval_runtime": 9.1011, "eval_samples_per_second": 440.717, "eval_steps_per_second": 13.845, "step": 31700 }, { "epoch": 251.66999002991028, "grad_norm": 268.80438232421875, "learning_rate": 2.4792891812854706e-07, "loss": 21.6484, "step": 31710 }, { "epoch": 251.74975074775674, "grad_norm": 378.1847229003906, "learning_rate": 2.4780415727774594e-07, "loss": 23.6207, "step": 31720 }, { "epoch": 251.8295114656032, "grad_norm": 573.9688720703125, "learning_rate": 2.476793969738469e-07, "loss": 22.2657, "step": 31730 }, { "epoch": 251.90927218344964, "grad_norm": 347.4887390136719, "learning_rate": 2.4755463724792284e-07, "loss": 24.1973, "step": 31740 }, { "epoch": 251.9890329012961, "grad_norm": 410.47625732421875, "learning_rate": 2.474298781310469e-07, "loss": 21.5562, "step": 31750 }, { "epoch": 252.06380857427718, "grad_norm": 423.4188537597656, "learning_rate": 2.4730511965429175e-07, "loss": 25.7484, "step": 31760 }, { "epoch": 252.14356929212363, "grad_norm": 644.38818359375, "learning_rate": 2.4718036184873014e-07, "loss": 25.0031, "step": 31770 }, { "epoch": 252.22333000997008, "grad_norm": 170.18014526367188, "learning_rate": 2.4705560474543466e-07, "loss": 21.3753, "step": 31780 }, { "epoch": 252.30309072781654, "grad_norm": 645.925537109375, "learning_rate": 2.4693084837547743e-07, "loss": 23.6118, "step": 31790 }, { "epoch": 252.38285144566302, "grad_norm": 530.1683349609375, "learning_rate": 2.468060927699306e-07, "loss": 24.8149, "step": 31800 }, { "epoch": 252.38285144566302, "eval_loss": 2.9768521785736084, "eval_mae": 1.2854654788970947, "eval_mse": 2.9768521785736084, "eval_r2": 0.10308897495269775, "eval_rmse": 1.7253556672679429, "eval_runtime": 9.102, "eval_samples_per_second": 440.674, "eval_steps_per_second": 13.843, "step": 31800 }, { "epoch": 252.46261216350948, "grad_norm": 1012.8935546875, "learning_rate": 2.466813379598661e-07, "loss": 22.2498, "step": 31810 }, { "epoch": 252.54237288135593, "grad_norm": 1013.740478515625, "learning_rate": 2.4655658397635587e-07, "loss": 23.5394, "step": 31820 }, { "epoch": 252.62213359920239, "grad_norm": 495.7828063964844, "learning_rate": 2.4643183085047125e-07, "loss": 23.3707, "step": 31830 }, { "epoch": 252.70189431704884, "grad_norm": 676.0218505859375, "learning_rate": 2.463070786132835e-07, "loss": 21.1693, "step": 31840 }, { "epoch": 252.78165503489532, "grad_norm": 664.9262084960938, "learning_rate": 2.4618232729586387e-07, "loss": 23.5246, "step": 31850 }, { "epoch": 252.86141575274178, "grad_norm": 548.2990112304688, "learning_rate": 2.460575769292832e-07, "loss": 21.6169, "step": 31860 }, { "epoch": 252.94117647058823, "grad_norm": 435.6769104003906, "learning_rate": 2.459328275446122e-07, "loss": 21.8176, "step": 31870 }, { "epoch": 253.01595214356928, "grad_norm": 430.7669372558594, "learning_rate": 2.4580807917292106e-07, "loss": 23.307, "step": 31880 }, { "epoch": 253.09571286141576, "grad_norm": 966.0969848632812, "learning_rate": 2.4568333184528e-07, "loss": 22.2923, "step": 31890 }, { "epoch": 253.17547357926222, "grad_norm": 467.6542663574219, "learning_rate": 2.4555858559275894e-07, "loss": 22.0797, "step": 31900 }, { "epoch": 253.17547357926222, "eval_loss": 2.9655842781066895, "eval_mae": 1.2908598184585571, "eval_mse": 2.9655845165252686, "eval_r2": 0.10648387670516968, "eval_rmse": 1.7220872557815614, "eval_runtime": 9.1069, "eval_samples_per_second": 440.434, "eval_steps_per_second": 13.836, "step": 31900 }, { "epoch": 253.25523429710867, "grad_norm": 1069.579345703125, "learning_rate": 2.4543384044642755e-07, "loss": 23.4295, "step": 31910 }, { "epoch": 253.33499501495513, "grad_norm": 543.779541015625, "learning_rate": 2.4530909643735495e-07, "loss": 24.7901, "step": 31920 }, { "epoch": 253.4147557328016, "grad_norm": 509.000732421875, "learning_rate": 2.4518435359661035e-07, "loss": 22.0587, "step": 31930 }, { "epoch": 253.49451645064806, "grad_norm": 662.7017211914062, "learning_rate": 2.450596119552625e-07, "loss": 21.6148, "step": 31940 }, { "epoch": 253.57427716849452, "grad_norm": 606.0281372070312, "learning_rate": 2.4493487154437976e-07, "loss": 22.612, "step": 31950 }, { "epoch": 253.65403788634097, "grad_norm": 1402.4447021484375, "learning_rate": 2.4481013239503047e-07, "loss": 22.1576, "step": 31960 }, { "epoch": 253.73379860418743, "grad_norm": 388.6296081542969, "learning_rate": 2.4468539453828226e-07, "loss": 25.0013, "step": 31970 }, { "epoch": 253.8135593220339, "grad_norm": 227.42666625976562, "learning_rate": 2.445606580052028e-07, "loss": 24.0291, "step": 31980 }, { "epoch": 253.89332003988036, "grad_norm": 543.4359741210938, "learning_rate": 2.444359228268592e-07, "loss": 24.6239, "step": 31990 }, { "epoch": 253.97308075772682, "grad_norm": 441.9510803222656, "learning_rate": 2.4431118903431825e-07, "loss": 22.7114, "step": 32000 }, { "epoch": 253.97308075772682, "eval_loss": 2.9745237827301025, "eval_mae": 1.2858246564865112, "eval_mse": 2.9745237827301025, "eval_r2": 0.10379058122634888, "eval_rmse": 1.7246807770512498, "eval_runtime": 9.1158, "eval_samples_per_second": 440.006, "eval_steps_per_second": 13.822, "step": 32000 }, { "epoch": 254.04785643070787, "grad_norm": 603.4620971679688, "learning_rate": 2.441864566586467e-07, "loss": 22.6715, "step": 32010 }, { "epoch": 254.12761714855435, "grad_norm": 500.43914794921875, "learning_rate": 2.440617257309105e-07, "loss": 23.7327, "step": 32020 }, { "epoch": 254.2073778664008, "grad_norm": 1103.8165283203125, "learning_rate": 2.439369962821754e-07, "loss": 21.4156, "step": 32030 }, { "epoch": 254.28713858424726, "grad_norm": 522.1469116210938, "learning_rate": 2.438122683435069e-07, "loss": 24.635, "step": 32040 }, { "epoch": 254.3668993020937, "grad_norm": 968.4270629882812, "learning_rate": 2.436875419459701e-07, "loss": 23.3848, "step": 32050 }, { "epoch": 254.44666001994017, "grad_norm": 597.3555908203125, "learning_rate": 2.4356281712062956e-07, "loss": 22.0033, "step": 32060 }, { "epoch": 254.52642073778665, "grad_norm": 883.1405639648438, "learning_rate": 2.434380938985497e-07, "loss": 24.8752, "step": 32070 }, { "epoch": 254.6061814556331, "grad_norm": 798.9927978515625, "learning_rate": 2.4331337231079415e-07, "loss": 22.7152, "step": 32080 }, { "epoch": 254.68594217347956, "grad_norm": 477.7481384277344, "learning_rate": 2.4318865238842663e-07, "loss": 23.3734, "step": 32090 }, { "epoch": 254.76570289132601, "grad_norm": 299.9521179199219, "learning_rate": 2.430639341625101e-07, "loss": 21.8486, "step": 32100 }, { "epoch": 254.76570289132601, "eval_loss": 2.9682564735412598, "eval_mae": 1.286859393119812, "eval_mse": 2.9682564735412598, "eval_r2": 0.10567885637283325, "eval_rmse": 1.722862871368833, "eval_runtime": 9.1177, "eval_samples_per_second": 439.914, "eval_steps_per_second": 13.819, "step": 32100 }, { "epoch": 254.84546360917247, "grad_norm": 597.6689453125, "learning_rate": 2.4293921766410707e-07, "loss": 23.7895, "step": 32110 }, { "epoch": 254.92522432701895, "grad_norm": 776.0618286132812, "learning_rate": 2.428145029242798e-07, "loss": 23.4706, "step": 32120 }, { "epoch": 255.0, "grad_norm": 317.8028259277344, "learning_rate": 2.4268978997409015e-07, "loss": 23.0663, "step": 32130 }, { "epoch": 255.07976071784645, "grad_norm": 1058.2989501953125, "learning_rate": 2.4256507884459944e-07, "loss": 21.5352, "step": 32140 }, { "epoch": 255.1595214356929, "grad_norm": 476.92132568359375, "learning_rate": 2.4244036956686844e-07, "loss": 23.7225, "step": 32150 }, { "epoch": 255.2392821535394, "grad_norm": 864.2227172851562, "learning_rate": 2.423156621719575e-07, "loss": 22.7905, "step": 32160 }, { "epoch": 255.31904287138585, "grad_norm": 186.06930541992188, "learning_rate": 2.421909566909266e-07, "loss": 24.4443, "step": 32170 }, { "epoch": 255.3988035892323, "grad_norm": 454.7611083984375, "learning_rate": 2.4206625315483525e-07, "loss": 23.8067, "step": 32180 }, { "epoch": 255.47856430707876, "grad_norm": 287.2181701660156, "learning_rate": 2.4194155159474246e-07, "loss": 22.7769, "step": 32190 }, { "epoch": 255.5583250249252, "grad_norm": 530.7894897460938, "learning_rate": 2.4181685204170647e-07, "loss": 23.8572, "step": 32200 }, { "epoch": 255.5583250249252, "eval_loss": 2.971975803375244, "eval_mae": 1.2996256351470947, "eval_mse": 2.971975564956665, "eval_r2": 0.10455828905105591, "eval_rmse": 1.7239418682068908, "eval_runtime": 9.1027, "eval_samples_per_second": 440.641, "eval_steps_per_second": 13.842, "step": 32200 }, { "epoch": 255.6380857427717, "grad_norm": 1025.2225341796875, "learning_rate": 2.416921545267855e-07, "loss": 24.3539, "step": 32210 }, { "epoch": 255.71784646061815, "grad_norm": 1187.16357421875, "learning_rate": 2.415674590810369e-07, "loss": 24.031, "step": 32220 }, { "epoch": 255.7976071784646, "grad_norm": 687.1815795898438, "learning_rate": 2.4144276573551767e-07, "loss": 22.4337, "step": 32230 }, { "epoch": 255.87736789631106, "grad_norm": 203.11148071289062, "learning_rate": 2.413180745212843e-07, "loss": 23.0997, "step": 32240 }, { "epoch": 255.95712861415754, "grad_norm": 590.783935546875, "learning_rate": 2.411933854693925e-07, "loss": 22.0966, "step": 32250 }, { "epoch": 256.03190428713856, "grad_norm": 681.7847900390625, "learning_rate": 2.4106869861089786e-07, "loss": 22.7958, "step": 32260 }, { "epoch": 256.11166500498507, "grad_norm": 388.5810241699219, "learning_rate": 2.4094401397685497e-07, "loss": 24.661, "step": 32270 }, { "epoch": 256.1914257228315, "grad_norm": 329.6586608886719, "learning_rate": 2.4081933159831836e-07, "loss": 23.5575, "step": 32280 }, { "epoch": 256.271186440678, "grad_norm": 1055.8572998046875, "learning_rate": 2.406946515063414e-07, "loss": 23.048, "step": 32290 }, { "epoch": 256.35094715852443, "grad_norm": 590.2950439453125, "learning_rate": 2.405699737319775e-07, "loss": 23.0763, "step": 32300 }, { "epoch": 256.35094715852443, "eval_loss": 2.9687814712524414, "eval_mae": 1.2883718013763428, "eval_mse": 2.9687812328338623, "eval_r2": 0.10552072525024414, "eval_rmse": 1.723015157459116, "eval_runtime": 9.088, "eval_samples_per_second": 441.349, "eval_steps_per_second": 13.864, "step": 32300 }, { "epoch": 256.4307078763709, "grad_norm": 905.7960815429688, "learning_rate": 2.4044529830627907e-07, "loss": 22.4415, "step": 32310 }, { "epoch": 256.51046859421734, "grad_norm": 131.0567169189453, "learning_rate": 2.403206252602981e-07, "loss": 25.3957, "step": 32320 }, { "epoch": 256.5902293120638, "grad_norm": 357.97283935546875, "learning_rate": 2.4019595462508605e-07, "loss": 24.214, "step": 32330 }, { "epoch": 256.66999002991025, "grad_norm": 562.968994140625, "learning_rate": 2.4007128643169364e-07, "loss": 21.4636, "step": 32340 }, { "epoch": 256.7497507477567, "grad_norm": 686.8287963867188, "learning_rate": 2.399466207111709e-07, "loss": 22.2345, "step": 32350 }, { "epoch": 256.8295114656032, "grad_norm": 1011.892333984375, "learning_rate": 2.3982195749456755e-07, "loss": 21.5997, "step": 32360 }, { "epoch": 256.9092721834497, "grad_norm": 774.2239990234375, "learning_rate": 2.3969729681293256e-07, "loss": 23.8638, "step": 32370 }, { "epoch": 256.9890329012961, "grad_norm": 479.4481506347656, "learning_rate": 2.395726386973141e-07, "loss": 22.1618, "step": 32380 }, { "epoch": 257.0638085742772, "grad_norm": 1316.7379150390625, "learning_rate": 2.394479831787598e-07, "loss": 22.9087, "step": 32390 }, { "epoch": 257.14356929212363, "grad_norm": 496.0229187011719, "learning_rate": 2.3932333028831674e-07, "loss": 23.5971, "step": 32400 }, { "epoch": 257.14356929212363, "eval_loss": 2.9773855209350586, "eval_mae": 1.3040708303451538, "eval_mse": 2.9773855209350586, "eval_r2": 0.10292834043502808, "eval_rmse": 1.7255102204667054, "eval_runtime": 9.0936, "eval_samples_per_second": 441.079, "eval_steps_per_second": 13.856, "step": 32400 }, { "epoch": 257.2233300099701, "grad_norm": 2317.558349609375, "learning_rate": 2.3919868005703136e-07, "loss": 22.1975, "step": 32410 }, { "epoch": 257.30309072781654, "grad_norm": 139.14430236816406, "learning_rate": 2.390740325159492e-07, "loss": 22.7621, "step": 32420 }, { "epoch": 257.382851445663, "grad_norm": 503.3020324707031, "learning_rate": 2.3894938769611535e-07, "loss": 24.6859, "step": 32430 }, { "epoch": 257.46261216350945, "grad_norm": 765.3264770507812, "learning_rate": 2.3882474562857407e-07, "loss": 23.1125, "step": 32440 }, { "epoch": 257.54237288135596, "grad_norm": 495.54620361328125, "learning_rate": 2.3870010634436913e-07, "loss": 23.5224, "step": 32450 }, { "epoch": 257.6221335992024, "grad_norm": 534.0633544921875, "learning_rate": 2.385754698745435e-07, "loss": 21.9895, "step": 32460 }, { "epoch": 257.70189431704887, "grad_norm": 245.5608367919922, "learning_rate": 2.384508362501395e-07, "loss": 24.474, "step": 32470 }, { "epoch": 257.7816550348953, "grad_norm": 184.3255615234375, "learning_rate": 2.383262055021985e-07, "loss": 22.3899, "step": 32480 }, { "epoch": 257.8614157527418, "grad_norm": 368.9948425292969, "learning_rate": 2.3820157766176138e-07, "loss": 23.7286, "step": 32490 }, { "epoch": 257.94117647058823, "grad_norm": 268.95953369140625, "learning_rate": 2.380769527598684e-07, "loss": 23.5707, "step": 32500 }, { "epoch": 257.94117647058823, "eval_loss": 2.9620044231414795, "eval_mae": 1.2903201580047607, "eval_mse": 2.9620041847229004, "eval_r2": 0.1075626015663147, "eval_rmse": 1.7210474092025765, "eval_runtime": 9.0723, "eval_samples_per_second": 442.117, "eval_steps_per_second": 13.888, "step": 32500 }, { "epoch": 258.0159521435693, "grad_norm": 369.2539367675781, "learning_rate": 2.3795233082755888e-07, "loss": 21.1234, "step": 32510 }, { "epoch": 258.09571286141573, "grad_norm": 300.13201904296875, "learning_rate": 2.3782771189587135e-07, "loss": 23.8872, "step": 32520 }, { "epoch": 258.1754735792622, "grad_norm": 201.40646362304688, "learning_rate": 2.377030959958438e-07, "loss": 21.8254, "step": 32530 }, { "epoch": 258.2552342971087, "grad_norm": 402.68914794921875, "learning_rate": 2.3757848315851342e-07, "loss": 22.2791, "step": 32540 }, { "epoch": 258.33499501495515, "grad_norm": 332.69671630859375, "learning_rate": 2.374538734149165e-07, "loss": 24.6208, "step": 32550 }, { "epoch": 258.4147557328016, "grad_norm": 356.6276550292969, "learning_rate": 2.3732926679608877e-07, "loss": 23.3802, "step": 32560 }, { "epoch": 258.49451645064806, "grad_norm": 396.5636291503906, "learning_rate": 2.3720466333306488e-07, "loss": 22.3744, "step": 32570 }, { "epoch": 258.5742771684945, "grad_norm": 252.76632690429688, "learning_rate": 2.3708006305687905e-07, "loss": 22.4334, "step": 32580 }, { "epoch": 258.654037886341, "grad_norm": 1583.0286865234375, "learning_rate": 2.369554659985644e-07, "loss": 22.3503, "step": 32590 }, { "epoch": 258.7337986041874, "grad_norm": 1470.625244140625, "learning_rate": 2.368308721891534e-07, "loss": 24.4704, "step": 32600 }, { "epoch": 258.7337986041874, "eval_loss": 2.960970640182495, "eval_mae": 1.2870057821273804, "eval_mse": 2.960970640182495, "eval_r2": 0.10787403583526611, "eval_rmse": 1.7207471168601431, "eval_runtime": 9.0696, "eval_samples_per_second": 442.246, "eval_steps_per_second": 13.893, "step": 32600 }, { "epoch": 258.8135593220339, "grad_norm": 312.400146484375, "learning_rate": 2.3670628165967785e-07, "loss": 24.4746, "step": 32610 }, { "epoch": 258.89332003988034, "grad_norm": 391.41143798828125, "learning_rate": 2.365816944411684e-07, "loss": 22.2742, "step": 32620 }, { "epoch": 258.97308075772685, "grad_norm": 1520.7578125, "learning_rate": 2.3645711056465505e-07, "loss": 23.984, "step": 32630 }, { "epoch": 259.0478564307079, "grad_norm": 184.56324768066406, "learning_rate": 2.36332530061167e-07, "loss": 21.9073, "step": 32640 }, { "epoch": 259.12761714855435, "grad_norm": 448.33758544921875, "learning_rate": 2.3620795296173267e-07, "loss": 23.8176, "step": 32650 }, { "epoch": 259.2073778664008, "grad_norm": 1436.3748779296875, "learning_rate": 2.3608337929737946e-07, "loss": 20.6676, "step": 32660 }, { "epoch": 259.28713858424726, "grad_norm": 457.44677734375, "learning_rate": 2.359588090991339e-07, "loss": 21.6573, "step": 32670 }, { "epoch": 259.3668993020937, "grad_norm": 1274.454345703125, "learning_rate": 2.3583424239802186e-07, "loss": 22.477, "step": 32680 }, { "epoch": 259.44666001994017, "grad_norm": 594.2935180664062, "learning_rate": 2.3570967922506823e-07, "loss": 23.9723, "step": 32690 }, { "epoch": 259.5264207377866, "grad_norm": 292.4962158203125, "learning_rate": 2.355851196112971e-07, "loss": 23.6009, "step": 32700 }, { "epoch": 259.5264207377866, "eval_loss": 2.97013521194458, "eval_mae": 1.2906213998794556, "eval_mse": 2.970135450363159, "eval_r2": 0.10511273145675659, "eval_rmse": 1.7234080916495544, "eval_runtime": 9.0839, "eval_samples_per_second": 441.551, "eval_steps_per_second": 13.871, "step": 32700 }, { "epoch": 259.6061814556331, "grad_norm": 685.1624755859375, "learning_rate": 2.3546056358773136e-07, "loss": 23.991, "step": 32710 }, { "epoch": 259.6859421734796, "grad_norm": 976.3839721679688, "learning_rate": 2.3533601118539338e-07, "loss": 22.3304, "step": 32720 }, { "epoch": 259.76570289132604, "grad_norm": 1046.9388427734375, "learning_rate": 2.352114624353045e-07, "loss": 25.016, "step": 32730 }, { "epoch": 259.8454636091725, "grad_norm": 201.44747924804688, "learning_rate": 2.350869173684851e-07, "loss": 23.8758, "step": 32740 }, { "epoch": 259.92522432701895, "grad_norm": 196.49974060058594, "learning_rate": 2.349623760159548e-07, "loss": 23.2117, "step": 32750 }, { "epoch": 260.0, "grad_norm": 646.876220703125, "learning_rate": 2.348378384087319e-07, "loss": 22.9075, "step": 32760 }, { "epoch": 260.07976071784645, "grad_norm": 1059.302978515625, "learning_rate": 2.3471330457783435e-07, "loss": 23.9309, "step": 32770 }, { "epoch": 260.1595214356929, "grad_norm": 585.2809448242188, "learning_rate": 2.3458877455427865e-07, "loss": 25.4878, "step": 32780 }, { "epoch": 260.23928215353936, "grad_norm": 819.2096557617188, "learning_rate": 2.3446424836908068e-07, "loss": 21.9851, "step": 32790 }, { "epoch": 260.3190428713858, "grad_norm": 1196.5474853515625, "learning_rate": 2.343397260532551e-07, "loss": 21.1927, "step": 32800 }, { "epoch": 260.3190428713858, "eval_loss": 2.9644973278045654, "eval_mae": 1.293110728263855, "eval_mse": 2.9644970893859863, "eval_r2": 0.1068115234375, "eval_rmse": 1.7217714974368656, "eval_runtime": 9.0683, "eval_samples_per_second": 442.309, "eval_steps_per_second": 13.895, "step": 32800 }, { "epoch": 260.39880358923233, "grad_norm": 377.51409912109375, "learning_rate": 2.3421520763781583e-07, "loss": 21.3956, "step": 32810 }, { "epoch": 260.4785643070788, "grad_norm": 215.3258819580078, "learning_rate": 2.340906931537758e-07, "loss": 24.547, "step": 32820 }, { "epoch": 260.55832502492524, "grad_norm": 267.4694519042969, "learning_rate": 2.3396618263214671e-07, "loss": 22.4404, "step": 32830 }, { "epoch": 260.6380857427717, "grad_norm": 803.0358276367188, "learning_rate": 2.3384167610393968e-07, "loss": 22.7296, "step": 32840 }, { "epoch": 260.71784646061815, "grad_norm": 265.7320556640625, "learning_rate": 2.3371717360016442e-07, "loss": 22.8819, "step": 32850 }, { "epoch": 260.7976071784646, "grad_norm": 446.6425476074219, "learning_rate": 2.3359267515182992e-07, "loss": 23.9664, "step": 32860 }, { "epoch": 260.87736789631106, "grad_norm": 1641.0150146484375, "learning_rate": 2.33468180789944e-07, "loss": 22.9442, "step": 32870 }, { "epoch": 260.9571286141575, "grad_norm": 136.3773193359375, "learning_rate": 2.3334369054551372e-07, "loss": 23.8371, "step": 32880 }, { "epoch": 261.03190428713856, "grad_norm": 961.9443359375, "learning_rate": 2.3321920444954468e-07, "loss": 22.9601, "step": 32890 }, { "epoch": 261.11166500498507, "grad_norm": 441.1259765625, "learning_rate": 2.3309472253304186e-07, "loss": 23.7666, "step": 32900 }, { "epoch": 261.11166500498507, "eval_loss": 2.961627721786499, "eval_mae": 1.2911022901535034, "eval_mse": 2.96162748336792, "eval_r2": 0.10767608880996704, "eval_rmse": 1.720937966159129, "eval_runtime": 9.0616, "eval_samples_per_second": 442.638, "eval_steps_per_second": 13.905, "step": 32900 }, { "epoch": 261.1914257228315, "grad_norm": 1438.0609130859375, "learning_rate": 2.329702448270089e-07, "loss": 23.3852, "step": 32910 }, { "epoch": 261.271186440678, "grad_norm": 283.7239990234375, "learning_rate": 2.3284577136244858e-07, "loss": 22.6681, "step": 32920 }, { "epoch": 261.35094715852443, "grad_norm": 1353.781005859375, "learning_rate": 2.3272130217036275e-07, "loss": 21.8145, "step": 32930 }, { "epoch": 261.4307078763709, "grad_norm": 678.4915771484375, "learning_rate": 2.325968372817518e-07, "loss": 22.9129, "step": 32940 }, { "epoch": 261.51046859421734, "grad_norm": 603.8546142578125, "learning_rate": 2.324723767276152e-07, "loss": 22.0271, "step": 32950 }, { "epoch": 261.5902293120638, "grad_norm": 411.7084045410156, "learning_rate": 2.3234792053895156e-07, "loss": 23.6058, "step": 32960 }, { "epoch": 261.66999002991025, "grad_norm": 695.4976806640625, "learning_rate": 2.3222346874675823e-07, "loss": 23.9588, "step": 32970 }, { "epoch": 261.7497507477567, "grad_norm": 508.5700988769531, "learning_rate": 2.320990213820315e-07, "loss": 23.453, "step": 32980 }, { "epoch": 261.8295114656032, "grad_norm": 157.2057647705078, "learning_rate": 2.3197457847576638e-07, "loss": 23.8839, "step": 32990 }, { "epoch": 261.9092721834497, "grad_norm": 1685.7197265625, "learning_rate": 2.31850140058957e-07, "loss": 23.4671, "step": 33000 }, { "epoch": 261.9092721834497, "eval_loss": 2.977787971496582, "eval_mae": 1.2833104133605957, "eval_mse": 2.977787971496582, "eval_r2": 0.10280704498291016, "eval_rmse": 1.725626834369639, "eval_runtime": 9.097, "eval_samples_per_second": 440.916, "eval_steps_per_second": 13.851, "step": 33000 }, { "epoch": 261.9890329012961, "grad_norm": 975.1857299804688, "learning_rate": 2.317257061625964e-07, "loss": 23.8065, "step": 33010 }, { "epoch": 262.0638085742772, "grad_norm": 369.8857421875, "learning_rate": 2.3160127681767638e-07, "loss": 19.9302, "step": 33020 }, { "epoch": 262.14356929212363, "grad_norm": 1166.5467529296875, "learning_rate": 2.3147685205518742e-07, "loss": 22.5537, "step": 33030 }, { "epoch": 262.2233300099701, "grad_norm": 767.1246337890625, "learning_rate": 2.313524319061192e-07, "loss": 22.2758, "step": 33040 }, { "epoch": 262.30309072781654, "grad_norm": 984.523681640625, "learning_rate": 2.3122801640146018e-07, "loss": 23.4407, "step": 33050 }, { "epoch": 262.382851445663, "grad_norm": 477.3348693847656, "learning_rate": 2.3110360557219744e-07, "loss": 23.6952, "step": 33060 }, { "epoch": 262.46261216350945, "grad_norm": 435.17425537109375, "learning_rate": 2.309791994493172e-07, "loss": 24.8507, "step": 33070 }, { "epoch": 262.54237288135596, "grad_norm": 1697.8922119140625, "learning_rate": 2.308547980638042e-07, "loss": 23.2312, "step": 33080 }, { "epoch": 262.6221335992024, "grad_norm": 538.4093627929688, "learning_rate": 2.3073040144664227e-07, "loss": 22.7747, "step": 33090 }, { "epoch": 262.70189431704887, "grad_norm": 502.0633850097656, "learning_rate": 2.3060600962881386e-07, "loss": 24.6969, "step": 33100 }, { "epoch": 262.70189431704887, "eval_loss": 2.9655330181121826, "eval_mae": 1.2875216007232666, "eval_mse": 2.9655327796936035, "eval_r2": 0.10649949312210083, "eval_rmse": 1.722072234168359, "eval_runtime": 9.105, "eval_samples_per_second": 440.529, "eval_steps_per_second": 13.839, "step": 33100 }, { "epoch": 262.7816550348953, "grad_norm": 758.8497314453125, "learning_rate": 2.3048162264130041e-07, "loss": 23.9142, "step": 33110 }, { "epoch": 262.8614157527418, "grad_norm": 669.5830688476562, "learning_rate": 2.3035724051508187e-07, "loss": 23.165, "step": 33120 }, { "epoch": 262.94117647058823, "grad_norm": 499.54779052734375, "learning_rate": 2.3023286328113728e-07, "loss": 23.156, "step": 33130 }, { "epoch": 263.0159521435693, "grad_norm": 369.984619140625, "learning_rate": 2.301084909704444e-07, "loss": 20.9903, "step": 33140 }, { "epoch": 263.09571286141573, "grad_norm": 862.9248657226562, "learning_rate": 2.299841236139795e-07, "loss": 21.8262, "step": 33150 }, { "epoch": 263.1754735792622, "grad_norm": 345.6047058105469, "learning_rate": 2.298597612427181e-07, "loss": 23.0221, "step": 33160 }, { "epoch": 263.2552342971087, "grad_norm": 238.32444763183594, "learning_rate": 2.2973540388763393e-07, "loss": 22.3937, "step": 33170 }, { "epoch": 263.33499501495515, "grad_norm": 738.8783569335938, "learning_rate": 2.296110515796999e-07, "loss": 25.3277, "step": 33180 }, { "epoch": 263.4147557328016, "grad_norm": 1106.19921875, "learning_rate": 2.2948670434988743e-07, "loss": 22.6522, "step": 33190 }, { "epoch": 263.49451645064806, "grad_norm": 758.0292358398438, "learning_rate": 2.2936236222916672e-07, "loss": 23.0677, "step": 33200 }, { "epoch": 263.49451645064806, "eval_loss": 2.9774444103240967, "eval_mae": 1.2838081121444702, "eval_mse": 2.9774444103240967, "eval_r2": 0.10291057825088501, "eval_rmse": 1.7255272847231644, "eval_runtime": 9.0703, "eval_samples_per_second": 442.215, "eval_steps_per_second": 13.892, "step": 33200 }, { "epoch": 263.5742771684945, "grad_norm": 295.3321228027344, "learning_rate": 2.292380252485069e-07, "loss": 21.6681, "step": 33210 }, { "epoch": 263.654037886341, "grad_norm": 638.5050048828125, "learning_rate": 2.2911369343887545e-07, "loss": 23.3144, "step": 33220 }, { "epoch": 263.7337986041874, "grad_norm": 403.549072265625, "learning_rate": 2.2898936683123882e-07, "loss": 23.0726, "step": 33230 }, { "epoch": 263.8135593220339, "grad_norm": 254.73532104492188, "learning_rate": 2.2886504545656204e-07, "loss": 24.1246, "step": 33240 }, { "epoch": 263.89332003988034, "grad_norm": 839.294189453125, "learning_rate": 2.287407293458091e-07, "loss": 25.1882, "step": 33250 }, { "epoch": 263.97308075772685, "grad_norm": 319.6940612792969, "learning_rate": 2.286164185299423e-07, "loss": 22.9318, "step": 33260 }, { "epoch": 264.0478564307079, "grad_norm": 268.8839111328125, "learning_rate": 2.2849211303992272e-07, "loss": 19.9687, "step": 33270 }, { "epoch": 264.12761714855435, "grad_norm": 591.1959228515625, "learning_rate": 2.2836781290671034e-07, "loss": 23.6758, "step": 33280 }, { "epoch": 264.2073778664008, "grad_norm": 873.9620361328125, "learning_rate": 2.2824351816126365e-07, "loss": 22.6141, "step": 33290 }, { "epoch": 264.28713858424726, "grad_norm": 595.4581298828125, "learning_rate": 2.2811922883453989e-07, "loss": 24.4165, "step": 33300 }, { "epoch": 264.28713858424726, "eval_loss": 2.9677834510803223, "eval_mae": 1.286590337753296, "eval_mse": 2.967783212661743, "eval_r2": 0.10582143068313599, "eval_rmse": 1.722725518665624, "eval_runtime": 9.0744, "eval_samples_per_second": 442.01, "eval_steps_per_second": 13.885, "step": 33300 }, { "epoch": 264.3668993020937, "grad_norm": 220.30337524414062, "learning_rate": 2.2799494495749457e-07, "loss": 24.6584, "step": 33310 }, { "epoch": 264.44666001994017, "grad_norm": 304.1072082519531, "learning_rate": 2.2787066656108238e-07, "loss": 23.2196, "step": 33320 }, { "epoch": 264.5264207377866, "grad_norm": 1391.931884765625, "learning_rate": 2.2774639367625636e-07, "loss": 23.9871, "step": 33330 }, { "epoch": 264.6061814556331, "grad_norm": 250.1853790283203, "learning_rate": 2.2762212633396813e-07, "loss": 23.3213, "step": 33340 }, { "epoch": 264.6859421734796, "grad_norm": 300.67144775390625, "learning_rate": 2.2749786456516823e-07, "loss": 21.7446, "step": 33350 }, { "epoch": 264.76570289132604, "grad_norm": 405.91485595703125, "learning_rate": 2.2737360840080534e-07, "loss": 21.5377, "step": 33360 }, { "epoch": 264.8454636091725, "grad_norm": 350.0721740722656, "learning_rate": 2.2724935787182719e-07, "loss": 24.2495, "step": 33370 }, { "epoch": 264.92522432701895, "grad_norm": 703.5527954101562, "learning_rate": 2.271251130091798e-07, "loss": 23.5282, "step": 33380 }, { "epoch": 265.0, "grad_norm": 352.90264892578125, "learning_rate": 2.2700087384380805e-07, "loss": 21.1718, "step": 33390 }, { "epoch": 265.07976071784645, "grad_norm": 167.44500732421875, "learning_rate": 2.2687664040665508e-07, "loss": 25.0428, "step": 33400 }, { "epoch": 265.07976071784645, "eval_loss": 2.9678587913513184, "eval_mae": 1.283751368522644, "eval_mse": 2.9678587913513184, "eval_r2": 0.10579860210418701, "eval_rmse": 1.722747454315727, "eval_runtime": 9.096, "eval_samples_per_second": 440.962, "eval_steps_per_second": 13.852, "step": 33400 }, { "epoch": 265.1595214356929, "grad_norm": 1086.7607421875, "learning_rate": 2.2675241272866288e-07, "loss": 22.6771, "step": 33410 }, { "epoch": 265.23928215353936, "grad_norm": 171.2627716064453, "learning_rate": 2.2662819084077185e-07, "loss": 22.4117, "step": 33420 }, { "epoch": 265.3190428713858, "grad_norm": 365.1992492675781, "learning_rate": 2.2650397477392102e-07, "loss": 23.4348, "step": 33430 }, { "epoch": 265.39880358923233, "grad_norm": 253.0224609375, "learning_rate": 2.2637976455904806e-07, "loss": 21.7368, "step": 33440 }, { "epoch": 265.4785643070788, "grad_norm": 929.4453735351562, "learning_rate": 2.2625556022708893e-07, "loss": 22.2043, "step": 33450 }, { "epoch": 265.55832502492524, "grad_norm": 283.2198181152344, "learning_rate": 2.261313618089782e-07, "loss": 23.4093, "step": 33460 }, { "epoch": 265.6380857427717, "grad_norm": 1322.727294921875, "learning_rate": 2.2600716933564916e-07, "loss": 23.9993, "step": 33470 }, { "epoch": 265.71784646061815, "grad_norm": 414.2745666503906, "learning_rate": 2.258829828380336e-07, "loss": 22.8635, "step": 33480 }, { "epoch": 265.7976071784646, "grad_norm": 1287.536376953125, "learning_rate": 2.2575880234706143e-07, "loss": 22.029, "step": 33490 }, { "epoch": 265.87736789631106, "grad_norm": 283.93707275390625, "learning_rate": 2.2563462789366157e-07, "loss": 25.2381, "step": 33500 }, { "epoch": 265.87736789631106, "eval_loss": 2.9643938541412354, "eval_mae": 1.294697880744934, "eval_mse": 2.9643938541412354, "eval_r2": 0.10684263706207275, "eval_rmse": 1.7217415178072566, "eval_runtime": 9.0899, "eval_samples_per_second": 441.26, "eval_steps_per_second": 13.862, "step": 33500 }, { "epoch": 265.9571286141575, "grad_norm": 976.2879638671875, "learning_rate": 2.2552287607328728e-07, "loss": 24.0215, "step": 33510 }, { "epoch": 266.03190428713856, "grad_norm": 420.861083984375, "learning_rate": 2.2539871317647784e-07, "loss": 20.7929, "step": 33520 }, { "epoch": 266.11166500498507, "grad_norm": 324.5087585449219, "learning_rate": 2.252745564069253e-07, "loss": 22.9747, "step": 33530 }, { "epoch": 266.1914257228315, "grad_norm": 211.6680908203125, "learning_rate": 2.2515040579555258e-07, "loss": 22.1335, "step": 33540 }, { "epoch": 266.271186440678, "grad_norm": 562.0562133789062, "learning_rate": 2.2502626137328074e-07, "loss": 22.2194, "step": 33550 }, { "epoch": 266.35094715852443, "grad_norm": 475.1398620605469, "learning_rate": 2.2490212317102964e-07, "loss": 22.0623, "step": 33560 }, { "epoch": 266.4307078763709, "grad_norm": 356.03564453125, "learning_rate": 2.2477799121971735e-07, "loss": 22.0961, "step": 33570 }, { "epoch": 266.51046859421734, "grad_norm": 485.0445251464844, "learning_rate": 2.2465386555026062e-07, "loss": 24.8321, "step": 33580 }, { "epoch": 266.5902293120638, "grad_norm": 270.8175964355469, "learning_rate": 2.245297461935743e-07, "loss": 22.296, "step": 33590 }, { "epoch": 266.66999002991025, "grad_norm": 637.0756225585938, "learning_rate": 2.2440563318057204e-07, "loss": 23.2108, "step": 33600 }, { "epoch": 266.66999002991025, "eval_loss": 2.9591786861419678, "eval_mae": 1.293260931968689, "eval_mse": 2.9591786861419678, "eval_r2": 0.1084139347076416, "eval_rmse": 1.7202263473572215, "eval_runtime": 9.0779, "eval_samples_per_second": 441.842, "eval_steps_per_second": 13.88, "step": 33600 }, { "epoch": 266.7497507477567, "grad_norm": 958.974365234375, "learning_rate": 2.242815265421656e-07, "loss": 25.1626, "step": 33610 }, { "epoch": 266.8295114656032, "grad_norm": 366.7208251953125, "learning_rate": 2.2415742630926533e-07, "loss": 22.6531, "step": 33620 }, { "epoch": 266.9092721834497, "grad_norm": 643.1389770507812, "learning_rate": 2.2403333251278005e-07, "loss": 22.5973, "step": 33630 }, { "epoch": 266.9890329012961, "grad_norm": 793.8681640625, "learning_rate": 2.239092451836167e-07, "loss": 25.9147, "step": 33640 }, { "epoch": 267.0638085742772, "grad_norm": 199.65530395507812, "learning_rate": 2.2378516435268083e-07, "loss": 21.7216, "step": 33650 }, { "epoch": 267.14356929212363, "grad_norm": 470.8975524902344, "learning_rate": 2.236610900508763e-07, "loss": 23.0512, "step": 33660 }, { "epoch": 267.2233300099701, "grad_norm": 669.757080078125, "learning_rate": 2.2353702230910548e-07, "loss": 21.9978, "step": 33670 }, { "epoch": 267.30309072781654, "grad_norm": 1159.8905029296875, "learning_rate": 2.2341296115826888e-07, "loss": 22.9267, "step": 33680 }, { "epoch": 267.382851445663, "grad_norm": 713.3804321289062, "learning_rate": 2.2328890662926542e-07, "loss": 23.9274, "step": 33690 }, { "epoch": 267.46261216350945, "grad_norm": 351.3713073730469, "learning_rate": 2.2316485875299245e-07, "loss": 24.1228, "step": 33700 }, { "epoch": 267.46261216350945, "eval_loss": 2.970219135284424, "eval_mae": 1.299340009689331, "eval_mse": 2.970219135284424, "eval_r2": 0.1050875186920166, "eval_rmse": 1.723432370383133, "eval_runtime": 9.0756, "eval_samples_per_second": 441.955, "eval_steps_per_second": 13.883, "step": 33700 }, { "epoch": 267.54237288135596, "grad_norm": 586.8211669921875, "learning_rate": 2.230408175603458e-07, "loss": 22.3554, "step": 33710 }, { "epoch": 267.6221335992024, "grad_norm": 1108.204345703125, "learning_rate": 2.229167830822194e-07, "loss": 24.1477, "step": 33720 }, { "epoch": 267.70189431704887, "grad_norm": 360.2843322753906, "learning_rate": 2.227927553495054e-07, "loss": 22.8538, "step": 33730 }, { "epoch": 267.7816550348953, "grad_norm": 742.3580932617188, "learning_rate": 2.226687343930946e-07, "loss": 24.0058, "step": 33740 }, { "epoch": 267.8614157527418, "grad_norm": 1537.28369140625, "learning_rate": 2.2254472024387603e-07, "loss": 23.1938, "step": 33750 }, { "epoch": 267.94117647058823, "grad_norm": 407.59381103515625, "learning_rate": 2.2242071293273682e-07, "loss": 22.7664, "step": 33760 }, { "epoch": 268.0159521435693, "grad_norm": 220.43446350097656, "learning_rate": 2.222967124905627e-07, "loss": 20.4323, "step": 33770 }, { "epoch": 268.09571286141573, "grad_norm": 549.47021484375, "learning_rate": 2.2217271894823734e-07, "loss": 21.525, "step": 33780 }, { "epoch": 268.1754735792622, "grad_norm": 337.71630859375, "learning_rate": 2.22048732336643e-07, "loss": 24.2887, "step": 33790 }, { "epoch": 268.2552342971087, "grad_norm": 461.509521484375, "learning_rate": 2.2192475268665996e-07, "loss": 23.4435, "step": 33800 }, { "epoch": 268.2552342971087, "eval_loss": 2.960710048675537, "eval_mae": 1.2898505926132202, "eval_mse": 2.960710048675537, "eval_r2": 0.1079525351524353, "eval_rmse": 1.7206713947397212, "eval_runtime": 9.0974, "eval_samples_per_second": 440.897, "eval_steps_per_second": 13.85, "step": 33800 }, { "epoch": 268.33499501495515, "grad_norm": 1379.5404052734375, "learning_rate": 2.2180078002916714e-07, "loss": 21.4999, "step": 33810 }, { "epoch": 268.4147557328016, "grad_norm": 468.10601806640625, "learning_rate": 2.216768143950412e-07, "loss": 23.6818, "step": 33820 }, { "epoch": 268.49451645064806, "grad_norm": 302.5543518066406, "learning_rate": 2.2155285581515746e-07, "loss": 22.8665, "step": 33830 }, { "epoch": 268.5742771684945, "grad_norm": 536.4502563476562, "learning_rate": 2.2142890432038938e-07, "loss": 23.9307, "step": 33840 }, { "epoch": 268.654037886341, "grad_norm": 948.4057006835938, "learning_rate": 2.2130495994160854e-07, "loss": 22.1223, "step": 33850 }, { "epoch": 268.7337986041874, "grad_norm": 306.21533203125, "learning_rate": 2.2118102270968494e-07, "loss": 24.5972, "step": 33860 }, { "epoch": 268.8135593220339, "grad_norm": 840.4362182617188, "learning_rate": 2.2105709265548657e-07, "loss": 24.0767, "step": 33870 }, { "epoch": 268.89332003988034, "grad_norm": 420.22808837890625, "learning_rate": 2.209331698098798e-07, "loss": 25.0304, "step": 33880 }, { "epoch": 268.97308075772685, "grad_norm": 587.11474609375, "learning_rate": 2.2080925420372916e-07, "loss": 21.3054, "step": 33890 }, { "epoch": 269.0478564307079, "grad_norm": 547.7639770507812, "learning_rate": 2.2068534586789733e-07, "loss": 19.684, "step": 33900 }, { "epoch": 269.0478564307079, "eval_loss": 2.9617063999176025, "eval_mae": 1.2873121500015259, "eval_mse": 2.9617063999176025, "eval_r2": 0.10765236616134644, "eval_rmse": 1.7209608943603578, "eval_runtime": 9.0903, "eval_samples_per_second": 441.239, "eval_steps_per_second": 13.861, "step": 33900 }, { "epoch": 269.12761714855435, "grad_norm": 651.6534423828125, "learning_rate": 2.205614448332454e-07, "loss": 23.9525, "step": 33910 }, { "epoch": 269.2073778664008, "grad_norm": 1018.7666015625, "learning_rate": 2.204375511306323e-07, "loss": 25.1037, "step": 33920 }, { "epoch": 269.28713858424726, "grad_norm": 970.5116577148438, "learning_rate": 2.203136647909153e-07, "loss": 22.9558, "step": 33930 }, { "epoch": 269.3668993020937, "grad_norm": 222.009765625, "learning_rate": 2.2018978584494986e-07, "loss": 24.3678, "step": 33940 }, { "epoch": 269.44666001994017, "grad_norm": 195.72454833984375, "learning_rate": 2.2006591432358968e-07, "loss": 22.8183, "step": 33950 }, { "epoch": 269.5264207377866, "grad_norm": 372.6313171386719, "learning_rate": 2.199420502576864e-07, "loss": 21.7113, "step": 33960 }, { "epoch": 269.6061814556331, "grad_norm": 259.13079833984375, "learning_rate": 2.1981819367808982e-07, "loss": 24.587, "step": 33970 }, { "epoch": 269.6859421734796, "grad_norm": 737.356201171875, "learning_rate": 2.1969434461564812e-07, "loss": 22.8807, "step": 33980 }, { "epoch": 269.76570289132604, "grad_norm": 760.752685546875, "learning_rate": 2.195705031012074e-07, "loss": 22.7561, "step": 33990 }, { "epoch": 269.8454636091725, "grad_norm": 552.60205078125, "learning_rate": 2.1944666916561202e-07, "loss": 24.186, "step": 34000 }, { "epoch": 269.8454636091725, "eval_loss": 2.962271213531494, "eval_mae": 1.285582423210144, "eval_mse": 2.962270975112915, "eval_r2": 0.10748225450515747, "eval_rmse": 1.7211249156040114, "eval_runtime": 9.072, "eval_samples_per_second": 442.129, "eval_steps_per_second": 13.889, "step": 34000 }, { "epoch": 269.92522432701895, "grad_norm": 737.112548828125, "learning_rate": 2.1932284283970417e-07, "loss": 23.0482, "step": 34010 }, { "epoch": 270.0, "grad_norm": 179.2029266357422, "learning_rate": 2.1919902415432446e-07, "loss": 19.8481, "step": 34020 }, { "epoch": 270.07976071784645, "grad_norm": 638.3309326171875, "learning_rate": 2.1907521314031148e-07, "loss": 23.9653, "step": 34030 }, { "epoch": 270.1595214356929, "grad_norm": 1045.047119140625, "learning_rate": 2.1895140982850197e-07, "loss": 23.5545, "step": 34040 }, { "epoch": 270.23928215353936, "grad_norm": 982.6498413085938, "learning_rate": 2.1882761424973047e-07, "loss": 22.3798, "step": 34050 }, { "epoch": 270.3190428713858, "grad_norm": 318.80615234375, "learning_rate": 2.1870382643482995e-07, "loss": 21.9895, "step": 34060 }, { "epoch": 270.39880358923233, "grad_norm": 856.8302001953125, "learning_rate": 2.1858004641463139e-07, "loss": 20.5062, "step": 34070 }, { "epoch": 270.4785643070788, "grad_norm": 601.9522094726562, "learning_rate": 2.1845627421996357e-07, "loss": 23.5981, "step": 34080 }, { "epoch": 270.55832502492524, "grad_norm": 570.8819580078125, "learning_rate": 2.183325098816537e-07, "loss": 23.5832, "step": 34090 }, { "epoch": 270.6380857427717, "grad_norm": 380.88885498046875, "learning_rate": 2.1820875343052662e-07, "loss": 23.4134, "step": 34100 }, { "epoch": 270.6380857427717, "eval_loss": 2.9615285396575928, "eval_mae": 1.2914930582046509, "eval_mse": 2.9615285396575928, "eval_r2": 0.10770589113235474, "eval_rmse": 1.7209092188891293, "eval_runtime": 9.0815, "eval_samples_per_second": 441.666, "eval_steps_per_second": 13.874, "step": 34100 }, { "epoch": 270.71784646061815, "grad_norm": 704.3423461914062, "learning_rate": 2.1808500489740552e-07, "loss": 22.3576, "step": 34110 }, { "epoch": 270.7976071784646, "grad_norm": 511.343017578125, "learning_rate": 2.179612643131115e-07, "loss": 24.4609, "step": 34120 }, { "epoch": 270.87736789631106, "grad_norm": 784.0336303710938, "learning_rate": 2.178375317084637e-07, "loss": 24.3966, "step": 34130 }, { "epoch": 270.9571286141575, "grad_norm": 842.5420532226562, "learning_rate": 2.1771380711427933e-07, "loss": 23.4618, "step": 34140 }, { "epoch": 271.03190428713856, "grad_norm": 345.5811462402344, "learning_rate": 2.1759009056137346e-07, "loss": 22.0152, "step": 34150 }, { "epoch": 271.11166500498507, "grad_norm": 315.23004150390625, "learning_rate": 2.1746638208055917e-07, "loss": 23.3748, "step": 34160 }, { "epoch": 271.1914257228315, "grad_norm": 208.2379608154297, "learning_rate": 2.1734268170264768e-07, "loss": 22.5506, "step": 34170 }, { "epoch": 271.271186440678, "grad_norm": 330.3124084472656, "learning_rate": 2.1721898945844824e-07, "loss": 22.3357, "step": 34180 }, { "epoch": 271.35094715852443, "grad_norm": 461.0664978027344, "learning_rate": 2.1709530537876772e-07, "loss": 22.663, "step": 34190 }, { "epoch": 271.4307078763709, "grad_norm": 825.6907958984375, "learning_rate": 2.1697162949441133e-07, "loss": 24.8071, "step": 34200 }, { "epoch": 271.4307078763709, "eval_loss": 2.970717191696167, "eval_mae": 1.2850940227508545, "eval_mse": 2.970716953277588, "eval_r2": 0.10493749380111694, "eval_rmse": 1.7235767906529689, "eval_runtime": 9.0672, "eval_samples_per_second": 442.366, "eval_steps_per_second": 13.896, "step": 34200 }, { "epoch": 271.51046859421734, "grad_norm": 412.7564697265625, "learning_rate": 2.1684796183618198e-07, "loss": 22.3887, "step": 34210 }, { "epoch": 271.5902293120638, "grad_norm": 289.83331298828125, "learning_rate": 2.1672430243488068e-07, "loss": 23.1303, "step": 34220 }, { "epoch": 271.66999002991025, "grad_norm": 297.7489929199219, "learning_rate": 2.166006513213065e-07, "loss": 23.5059, "step": 34230 }, { "epoch": 271.7497507477567, "grad_norm": 964.3274536132812, "learning_rate": 2.1647700852625605e-07, "loss": 23.5733, "step": 34240 }, { "epoch": 271.8295114656032, "grad_norm": 617.3482055664062, "learning_rate": 2.163533740805242e-07, "loss": 21.2103, "step": 34250 }, { "epoch": 271.9092721834497, "grad_norm": 525.2120361328125, "learning_rate": 2.1622974801490363e-07, "loss": 22.3432, "step": 34260 }, { "epoch": 271.9890329012961, "grad_norm": 508.3149719238281, "learning_rate": 2.161061303601851e-07, "loss": 25.6466, "step": 34270 }, { "epoch": 272.0638085742772, "grad_norm": 1264.3121337890625, "learning_rate": 2.1598252114715698e-07, "loss": 21.0367, "step": 34280 }, { "epoch": 272.14356929212363, "grad_norm": 820.3016357421875, "learning_rate": 2.1585892040660565e-07, "loss": 21.2672, "step": 34290 }, { "epoch": 272.2233300099701, "grad_norm": 486.15887451171875, "learning_rate": 2.1573532816931545e-07, "loss": 23.8251, "step": 34300 }, { "epoch": 272.2233300099701, "eval_loss": 2.9611992835998535, "eval_mae": 1.287341594696045, "eval_mse": 2.9611990451812744, "eval_r2": 0.10780519247055054, "eval_rmse": 1.7208134835540063, "eval_runtime": 9.1254, "eval_samples_per_second": 439.544, "eval_steps_per_second": 13.808, "step": 34300 }, { "epoch": 272.30309072781654, "grad_norm": 546.13037109375, "learning_rate": 2.1561174446606867e-07, "loss": 23.5214, "step": 34310 }, { "epoch": 272.382851445663, "grad_norm": 800.3176879882812, "learning_rate": 2.1548816932764532e-07, "loss": 23.1447, "step": 34320 }, { "epoch": 272.46261216350945, "grad_norm": 363.2491455078125, "learning_rate": 2.153646027848232e-07, "loss": 22.5961, "step": 34330 }, { "epoch": 272.54237288135596, "grad_norm": 461.95184326171875, "learning_rate": 2.1524104486837822e-07, "loss": 22.1595, "step": 34340 }, { "epoch": 272.6221335992024, "grad_norm": 179.94744873046875, "learning_rate": 2.1511749560908403e-07, "loss": 24.6815, "step": 34350 }, { "epoch": 272.70189431704887, "grad_norm": 446.2342834472656, "learning_rate": 2.1499395503771205e-07, "loss": 24.2278, "step": 34360 }, { "epoch": 272.7816550348953, "grad_norm": 617.9832153320312, "learning_rate": 2.1487042318503172e-07, "loss": 22.8166, "step": 34370 }, { "epoch": 272.8614157527418, "grad_norm": 328.90850830078125, "learning_rate": 2.1474690008180995e-07, "loss": 23.1608, "step": 34380 }, { "epoch": 272.94117647058823, "grad_norm": 463.5259704589844, "learning_rate": 2.1462338575881195e-07, "loss": 23.3504, "step": 34390 }, { "epoch": 273.0159521435693, "grad_norm": 176.9029083251953, "learning_rate": 2.1449988024680032e-07, "loss": 21.4252, "step": 34400 }, { "epoch": 273.0159521435693, "eval_loss": 2.968507766723633, "eval_mae": 1.29869544506073, "eval_mse": 2.9685075283050537, "eval_r2": 0.10560321807861328, "eval_rmse": 1.7229357295920975, "eval_runtime": 9.0848, "eval_samples_per_second": 441.505, "eval_steps_per_second": 13.869, "step": 34400 }, { "epoch": 273.09571286141573, "grad_norm": 886.4635009765625, "learning_rate": 2.1437638357653583e-07, "loss": 22.1249, "step": 34410 }, { "epoch": 273.1754735792622, "grad_norm": 790.850830078125, "learning_rate": 2.142528957787767e-07, "loss": 23.062, "step": 34420 }, { "epoch": 273.2552342971087, "grad_norm": 862.314697265625, "learning_rate": 2.1412941688427915e-07, "loss": 21.4663, "step": 34430 }, { "epoch": 273.33499501495515, "grad_norm": 408.88726806640625, "learning_rate": 2.1400594692379714e-07, "loss": 25.0917, "step": 34440 }, { "epoch": 273.4147557328016, "grad_norm": 1136.371826171875, "learning_rate": 2.1388248592808243e-07, "loss": 23.7612, "step": 34450 }, { "epoch": 273.49451645064806, "grad_norm": 459.9880065917969, "learning_rate": 2.137590339278846e-07, "loss": 25.5118, "step": 34460 }, { "epoch": 273.5742771684945, "grad_norm": 501.0910949707031, "learning_rate": 2.1363559095395073e-07, "loss": 22.8884, "step": 34470 }, { "epoch": 273.654037886341, "grad_norm": 395.23516845703125, "learning_rate": 2.1351215703702587e-07, "loss": 24.1507, "step": 34480 }, { "epoch": 273.7337986041874, "grad_norm": 1578.13623046875, "learning_rate": 2.1338873220785285e-07, "loss": 23.6566, "step": 34490 }, { "epoch": 273.8135593220339, "grad_norm": 566.108154296875, "learning_rate": 2.1326531649717214e-07, "loss": 20.345, "step": 34500 }, { "epoch": 273.8135593220339, "eval_loss": 2.959744453430176, "eval_mae": 1.2896983623504639, "eval_mse": 2.959744453430176, "eval_r2": 0.10824346542358398, "eval_rmse": 1.7203907850922058, "eval_runtime": 9.0942, "eval_samples_per_second": 441.052, "eval_steps_per_second": 13.855, "step": 34500 }, { "epoch": 273.89332003988034, "grad_norm": 167.27906799316406, "learning_rate": 2.1314190993572196e-07, "loss": 24.3434, "step": 34510 }, { "epoch": 273.97308075772685, "grad_norm": 1610.5982666015625, "learning_rate": 2.1301851255423826e-07, "loss": 21.422, "step": 34520 }, { "epoch": 274.0478564307079, "grad_norm": 1023.9450073242188, "learning_rate": 2.128951243834546e-07, "loss": 21.6396, "step": 34530 }, { "epoch": 274.12761714855435, "grad_norm": 578.3451538085938, "learning_rate": 2.1277174545410246e-07, "loss": 23.1165, "step": 34540 }, { "epoch": 274.2073778664008, "grad_norm": 815.1294555664062, "learning_rate": 2.1264837579691085e-07, "loss": 25.2555, "step": 34550 }, { "epoch": 274.28713858424726, "grad_norm": 279.6356506347656, "learning_rate": 2.1252501544260654e-07, "loss": 21.8185, "step": 34560 }, { "epoch": 274.3668993020937, "grad_norm": 782.1747436523438, "learning_rate": 2.1240166442191388e-07, "loss": 23.5705, "step": 34570 }, { "epoch": 274.44666001994017, "grad_norm": 440.4573059082031, "learning_rate": 2.1227832276555507e-07, "loss": 23.1609, "step": 34580 }, { "epoch": 274.5264207377866, "grad_norm": 549.3892211914062, "learning_rate": 2.121549905042499e-07, "loss": 21.4242, "step": 34590 }, { "epoch": 274.6061814556331, "grad_norm": 1361.94189453125, "learning_rate": 2.120316676687158e-07, "loss": 23.3436, "step": 34600 }, { "epoch": 274.6061814556331, "eval_loss": 2.9784059524536133, "eval_mae": 1.2834292650222778, "eval_mse": 2.9784059524536133, "eval_r2": 0.10262084007263184, "eval_rmse": 1.7258058849284335, "eval_runtime": 9.097, "eval_samples_per_second": 440.916, "eval_steps_per_second": 13.851, "step": 34600 }, { "epoch": 274.6859421734796, "grad_norm": 338.5691223144531, "learning_rate": 2.1190835428966775e-07, "loss": 23.4626, "step": 34610 }, { "epoch": 274.76570289132604, "grad_norm": 707.038330078125, "learning_rate": 2.1178505039781854e-07, "loss": 22.324, "step": 34620 }, { "epoch": 274.8454636091725, "grad_norm": 794.2965698242188, "learning_rate": 2.1166175602387862e-07, "loss": 21.627, "step": 34630 }, { "epoch": 274.92522432701895, "grad_norm": 701.9091796875, "learning_rate": 2.1153847119855597e-07, "loss": 24.6566, "step": 34640 }, { "epoch": 275.0, "grad_norm": 420.62335205078125, "learning_rate": 2.1141519595255608e-07, "loss": 21.9168, "step": 34650 }, { "epoch": 275.07976071784645, "grad_norm": 435.24560546875, "learning_rate": 2.1129193031658226e-07, "loss": 22.1155, "step": 34660 }, { "epoch": 275.1595214356929, "grad_norm": 1613.70654296875, "learning_rate": 2.1116867432133546e-07, "loss": 23.537, "step": 34670 }, { "epoch": 275.23928215353936, "grad_norm": 318.3715515136719, "learning_rate": 2.1104542799751392e-07, "loss": 23.4494, "step": 34680 }, { "epoch": 275.3190428713858, "grad_norm": 196.35650634765625, "learning_rate": 2.109221913758139e-07, "loss": 24.0127, "step": 34690 }, { "epoch": 275.39880358923233, "grad_norm": 288.3596496582031, "learning_rate": 2.1079896448692884e-07, "loss": 21.391, "step": 34700 }, { "epoch": 275.39880358923233, "eval_loss": 2.961453676223755, "eval_mae": 1.286414384841919, "eval_mse": 2.961453676223755, "eval_r2": 0.10772848129272461, "eval_rmse": 1.7208874676235384, "eval_runtime": 9.0785, "eval_samples_per_second": 441.815, "eval_steps_per_second": 13.879, "step": 34700 }, { "epoch": 275.4785643070788, "grad_norm": 565.7955932617188, "learning_rate": 2.1067574736154996e-07, "loss": 22.4207, "step": 34710 }, { "epoch": 275.55832502492524, "grad_norm": 351.7076721191406, "learning_rate": 2.1055254003036604e-07, "loss": 22.5282, "step": 34720 }, { "epoch": 275.6380857427717, "grad_norm": 591.8775024414062, "learning_rate": 2.1042934252406342e-07, "loss": 23.4614, "step": 34730 }, { "epoch": 275.71784646061815, "grad_norm": 387.2130126953125, "learning_rate": 2.1030615487332607e-07, "loss": 21.8597, "step": 34740 }, { "epoch": 275.7976071784646, "grad_norm": 702.9859008789062, "learning_rate": 2.1018297710883523e-07, "loss": 23.8811, "step": 34750 }, { "epoch": 275.87736789631106, "grad_norm": 718.348388671875, "learning_rate": 2.1005980926126987e-07, "loss": 22.5782, "step": 34760 }, { "epoch": 275.9571286141575, "grad_norm": 815.12158203125, "learning_rate": 2.0993665136130652e-07, "loss": 24.5857, "step": 34770 }, { "epoch": 276.03190428713856, "grad_norm": 349.52239990234375, "learning_rate": 2.0981350343961934e-07, "loss": 21.4769, "step": 34780 }, { "epoch": 276.11166500498507, "grad_norm": 413.54833984375, "learning_rate": 2.0969036552687969e-07, "loss": 23.0135, "step": 34790 }, { "epoch": 276.1914257228315, "grad_norm": 314.8276672363281, "learning_rate": 2.0956723765375652e-07, "loss": 23.4292, "step": 34800 }, { "epoch": 276.1914257228315, "eval_loss": 2.9737393856048584, "eval_mae": 1.2879493236541748, "eval_mse": 2.9737396240234375, "eval_r2": 0.10402679443359375, "eval_rmse": 1.724453427617991, "eval_runtime": 9.0827, "eval_samples_per_second": 441.609, "eval_steps_per_second": 13.873, "step": 34800 }, { "epoch": 276.271186440678, "grad_norm": 204.84500122070312, "learning_rate": 2.0944411985091648e-07, "loss": 23.4302, "step": 34810 }, { "epoch": 276.35094715852443, "grad_norm": 445.2061462402344, "learning_rate": 2.0932101214902366e-07, "loss": 21.2021, "step": 34820 }, { "epoch": 276.4307078763709, "grad_norm": 338.8351745605469, "learning_rate": 2.0919791457873947e-07, "loss": 23.7226, "step": 34830 }, { "epoch": 276.51046859421734, "grad_norm": 590.1646118164062, "learning_rate": 2.0907482717072292e-07, "loss": 23.4003, "step": 34840 }, { "epoch": 276.5902293120638, "grad_norm": 253.9158172607422, "learning_rate": 2.089517499556304e-07, "loss": 22.5189, "step": 34850 }, { "epoch": 276.66999002991025, "grad_norm": 235.29299926757812, "learning_rate": 2.0882868296411594e-07, "loss": 22.7965, "step": 34860 }, { "epoch": 276.7497507477567, "grad_norm": 1468.8023681640625, "learning_rate": 2.0870562622683075e-07, "loss": 25.4464, "step": 34870 }, { "epoch": 276.8295114656032, "grad_norm": 281.73284912109375, "learning_rate": 2.085825797744239e-07, "loss": 23.6252, "step": 34880 }, { "epoch": 276.9092721834497, "grad_norm": 2000.7344970703125, "learning_rate": 2.084595436375413e-07, "loss": 24.2562, "step": 34890 }, { "epoch": 276.9890329012961, "grad_norm": 1607.6239013671875, "learning_rate": 2.0833651784682687e-07, "loss": 22.7612, "step": 34900 }, { "epoch": 276.9890329012961, "eval_loss": 2.9629199504852295, "eval_mae": 1.2866618633270264, "eval_mse": 2.9629199504852295, "eval_r2": 0.10728675127029419, "eval_rmse": 1.7213134376066521, "eval_runtime": 9.0793, "eval_samples_per_second": 441.772, "eval_steps_per_second": 13.878, "step": 34900 }, { "epoch": 277.0638085742772, "grad_norm": 2094.517578125, "learning_rate": 2.0821350243292172e-07, "loss": 20.5139, "step": 34910 }, { "epoch": 277.14356929212363, "grad_norm": 459.9913024902344, "learning_rate": 2.0809049742646434e-07, "loss": 23.6329, "step": 34920 }, { "epoch": 277.2233300099701, "grad_norm": 976.9309692382812, "learning_rate": 2.079675028580905e-07, "loss": 22.3355, "step": 34930 }, { "epoch": 277.30309072781654, "grad_norm": 324.9153747558594, "learning_rate": 2.078445187584337e-07, "loss": 23.7818, "step": 34940 }, { "epoch": 277.382851445663, "grad_norm": 794.9937744140625, "learning_rate": 2.0772154515812464e-07, "loss": 23.1308, "step": 34950 }, { "epoch": 277.46261216350945, "grad_norm": 552.079345703125, "learning_rate": 2.0759858208779136e-07, "loss": 22.4865, "step": 34960 }, { "epoch": 277.54237288135596, "grad_norm": 181.21153259277344, "learning_rate": 2.074756295780595e-07, "loss": 21.8313, "step": 34970 }, { "epoch": 277.6221335992024, "grad_norm": 497.83392333984375, "learning_rate": 2.0735268765955173e-07, "loss": 24.0454, "step": 34980 }, { "epoch": 277.70189431704887, "grad_norm": 647.4383544921875, "learning_rate": 2.0722975636288834e-07, "loss": 23.2122, "step": 34990 }, { "epoch": 277.7816550348953, "grad_norm": 1826.175048828125, "learning_rate": 2.0710683571868688e-07, "loss": 22.9969, "step": 35000 }, { "epoch": 277.7816550348953, "eval_loss": 2.960876226425171, "eval_mae": 1.2907391786575317, "eval_mse": 2.960876226425171, "eval_r2": 0.10790246725082397, "eval_rmse": 1.7207196826982514, "eval_runtime": 9.0922, "eval_samples_per_second": 441.149, "eval_steps_per_second": 13.858, "step": 35000 }, { "epoch": 277.8614157527418, "grad_norm": 541.8032836914062, "learning_rate": 2.0698392575756238e-07, "loss": 22.5628, "step": 35010 }, { "epoch": 277.94117647058823, "grad_norm": 661.0296020507812, "learning_rate": 2.0686102651012692e-07, "loss": 24.3329, "step": 35020 }, { "epoch": 278.0159521435693, "grad_norm": 436.8052062988281, "learning_rate": 2.0673813800699022e-07, "loss": 22.5826, "step": 35030 }, { "epoch": 278.09571286141573, "grad_norm": 789.2468872070312, "learning_rate": 2.066152602787591e-07, "loss": 25.2499, "step": 35040 }, { "epoch": 278.1754735792622, "grad_norm": 1029.5194091796875, "learning_rate": 2.064923933560378e-07, "loss": 23.2788, "step": 35050 }, { "epoch": 278.2552342971087, "grad_norm": 276.64434814453125, "learning_rate": 2.0636953726942803e-07, "loss": 23.5676, "step": 35060 }, { "epoch": 278.33499501495515, "grad_norm": 428.64154052734375, "learning_rate": 2.0624669204952843e-07, "loss": 22.2599, "step": 35070 }, { "epoch": 278.4147557328016, "grad_norm": 646.1993408203125, "learning_rate": 2.0612385772693517e-07, "loss": 22.4207, "step": 35080 }, { "epoch": 278.49451645064806, "grad_norm": 725.5416259765625, "learning_rate": 2.0600103433224164e-07, "loss": 21.0056, "step": 35090 }, { "epoch": 278.5742771684945, "grad_norm": 1010.9341430664062, "learning_rate": 2.058782218960387e-07, "loss": 20.9893, "step": 35100 }, { "epoch": 278.5742771684945, "eval_loss": 2.958890438079834, "eval_mae": 1.2890186309814453, "eval_mse": 2.958890438079834, "eval_r2": 0.10850077867507935, "eval_rmse": 1.7201425633010288, "eval_runtime": 9.0844, "eval_samples_per_second": 441.527, "eval_steps_per_second": 13.87, "step": 35100 }, { "epoch": 278.654037886341, "grad_norm": 703.8577880859375, "learning_rate": 2.0575542044891422e-07, "loss": 22.8007, "step": 35110 }, { "epoch": 278.7337986041874, "grad_norm": 216.5920867919922, "learning_rate": 2.0563263002145331e-07, "loss": 22.4238, "step": 35120 }, { "epoch": 278.8135593220339, "grad_norm": 331.3422546386719, "learning_rate": 2.055098506442386e-07, "loss": 25.6107, "step": 35130 }, { "epoch": 278.89332003988034, "grad_norm": 1468.609130859375, "learning_rate": 2.053870823478498e-07, "loss": 22.3461, "step": 35140 }, { "epoch": 278.97308075772685, "grad_norm": 895.9401245117188, "learning_rate": 2.0526432516286394e-07, "loss": 24.9516, "step": 35150 }, { "epoch": 279.0478564307079, "grad_norm": 588.5226440429688, "learning_rate": 2.0514157911985504e-07, "loss": 22.7737, "step": 35160 }, { "epoch": 279.12761714855435, "grad_norm": 413.3553161621094, "learning_rate": 2.0501884424939464e-07, "loss": 21.5904, "step": 35170 }, { "epoch": 279.2073778664008, "grad_norm": 746.3114013671875, "learning_rate": 2.0489612058205147e-07, "loss": 23.2071, "step": 35180 }, { "epoch": 279.28713858424726, "grad_norm": 1527.2125244140625, "learning_rate": 2.0477340814839123e-07, "loss": 20.4271, "step": 35190 }, { "epoch": 279.3668993020937, "grad_norm": 308.45721435546875, "learning_rate": 2.046507069789772e-07, "loss": 23.4165, "step": 35200 }, { "epoch": 279.3668993020937, "eval_loss": 2.9652814865112305, "eval_mae": 1.2923365831375122, "eval_mse": 2.9652814865112305, "eval_r2": 0.10657519102096558, "eval_rmse": 1.7219992701831295, "eval_runtime": 9.0553, "eval_samples_per_second": 442.945, "eval_steps_per_second": 13.915, "step": 35200 }, { "epoch": 279.44666001994017, "grad_norm": 956.6458129882812, "learning_rate": 2.0452801710436936e-07, "loss": 23.4536, "step": 35210 }, { "epoch": 279.5264207377866, "grad_norm": 168.210205078125, "learning_rate": 2.0440533855512537e-07, "loss": 23.5192, "step": 35220 }, { "epoch": 279.6061814556331, "grad_norm": 475.0094909667969, "learning_rate": 2.0428267136179972e-07, "loss": 24.1107, "step": 35230 }, { "epoch": 279.6859421734796, "grad_norm": 760.2296752929688, "learning_rate": 2.0416001555494433e-07, "loss": 23.5051, "step": 35240 }, { "epoch": 279.76570289132604, "grad_norm": 164.65597534179688, "learning_rate": 2.0403737116510796e-07, "loss": 23.0331, "step": 35250 }, { "epoch": 279.8454636091725, "grad_norm": 833.2396240234375, "learning_rate": 2.0391473822283688e-07, "loss": 23.6303, "step": 35260 }, { "epoch": 279.92522432701895, "grad_norm": 849.0071411132812, "learning_rate": 2.0379211675867436e-07, "loss": 22.9304, "step": 35270 }, { "epoch": 280.0, "grad_norm": 560.7474365234375, "learning_rate": 2.036695068031607e-07, "loss": 21.6578, "step": 35280 }, { "epoch": 280.07976071784645, "grad_norm": 424.35943603515625, "learning_rate": 2.035469083868336e-07, "loss": 22.7506, "step": 35290 }, { "epoch": 280.1595214356929, "grad_norm": 641.072509765625, "learning_rate": 2.0342432154022748e-07, "loss": 23.4757, "step": 35300 }, { "epoch": 280.1595214356929, "eval_loss": 2.962219715118408, "eval_mae": 1.29058039188385, "eval_mse": 2.962219476699829, "eval_r2": 0.10749781131744385, "eval_rmse": 1.7211099548546656, "eval_runtime": 9.0929, "eval_samples_per_second": 441.115, "eval_steps_per_second": 13.857, "step": 35300 }, { "epoch": 280.23928215353936, "grad_norm": 586.6846923828125, "learning_rate": 2.0330174629387436e-07, "loss": 22.5532, "step": 35310 }, { "epoch": 280.3190428713858, "grad_norm": 974.9382934570312, "learning_rate": 2.0317918267830295e-07, "loss": 22.6349, "step": 35320 }, { "epoch": 280.39880358923233, "grad_norm": 439.3430480957031, "learning_rate": 2.0305663072403934e-07, "loss": 22.8622, "step": 35330 }, { "epoch": 280.4785643070788, "grad_norm": 869.9444580078125, "learning_rate": 2.029340904616067e-07, "loss": 24.2681, "step": 35340 }, { "epoch": 280.55832502492524, "grad_norm": 1397.7738037109375, "learning_rate": 2.0281156192152505e-07, "loss": 21.9909, "step": 35350 }, { "epoch": 280.6380857427717, "grad_norm": 302.1526184082031, "learning_rate": 2.0268904513431169e-07, "loss": 22.708, "step": 35360 }, { "epoch": 280.71784646061815, "grad_norm": 340.33953857421875, "learning_rate": 2.0256654013048091e-07, "loss": 22.6771, "step": 35370 }, { "epoch": 280.7976071784646, "grad_norm": 488.6512756347656, "learning_rate": 2.0244404694054434e-07, "loss": 25.2694, "step": 35380 }, { "epoch": 280.87736789631106, "grad_norm": 776.7713012695312, "learning_rate": 2.0232156559501022e-07, "loss": 22.5324, "step": 35390 }, { "epoch": 280.9571286141575, "grad_norm": 978.2791137695312, "learning_rate": 2.02199096124384e-07, "loss": 22.7049, "step": 35400 }, { "epoch": 280.9571286141575, "eval_loss": 2.969667434692383, "eval_mae": 1.2845220565795898, "eval_mse": 2.969667434692383, "eval_r2": 0.10525375604629517, "eval_rmse": 1.7232723042782248, "eval_runtime": 9.0724, "eval_samples_per_second": 442.11, "eval_steps_per_second": 13.888, "step": 35400 }, { "epoch": 281.03190428713856, "grad_norm": 317.99188232421875, "learning_rate": 2.0207663855916838e-07, "loss": 22.1657, "step": 35410 }, { "epoch": 281.11166500498507, "grad_norm": 1002.4553833007812, "learning_rate": 2.0195419292986291e-07, "loss": 23.7062, "step": 35420 }, { "epoch": 281.1914257228315, "grad_norm": 1255.2816162109375, "learning_rate": 2.0183175926696425e-07, "loss": 22.2218, "step": 35430 }, { "epoch": 281.271186440678, "grad_norm": 406.6369323730469, "learning_rate": 2.0170933760096583e-07, "loss": 22.3458, "step": 35440 }, { "epoch": 281.35094715852443, "grad_norm": 1097.8414306640625, "learning_rate": 2.0158692796235843e-07, "loss": 23.9187, "step": 35450 }, { "epoch": 281.4307078763709, "grad_norm": 496.8609313964844, "learning_rate": 2.0146453038162974e-07, "loss": 24.5831, "step": 35460 }, { "epoch": 281.51046859421734, "grad_norm": 425.373291015625, "learning_rate": 2.013421448892643e-07, "loss": 24.6211, "step": 35470 }, { "epoch": 281.5902293120638, "grad_norm": 1004.8867797851562, "learning_rate": 2.0121977151574392e-07, "loss": 22.9665, "step": 35480 }, { "epoch": 281.66999002991025, "grad_norm": 465.9045715332031, "learning_rate": 2.0109741029154694e-07, "loss": 22.4116, "step": 35490 }, { "epoch": 281.7497507477567, "grad_norm": 1068.4315185546875, "learning_rate": 2.009750612471492e-07, "loss": 23.6084, "step": 35500 }, { "epoch": 281.7497507477567, "eval_loss": 2.960203170776367, "eval_mae": 1.2882578372955322, "eval_mse": 2.960203170776367, "eval_r2": 0.10810524225234985, "eval_rmse": 1.7205240977028968, "eval_runtime": 9.0652, "eval_samples_per_second": 442.461, "eval_steps_per_second": 13.899, "step": 35500 }, { "epoch": 281.8295114656032, "grad_norm": 391.5953674316406, "learning_rate": 2.0085272441302304e-07, "loss": 23.0571, "step": 35510 }, { "epoch": 281.9092721834497, "grad_norm": 420.6859436035156, "learning_rate": 2.007303998196382e-07, "loss": 21.3119, "step": 35520 }, { "epoch": 281.9890329012961, "grad_norm": 542.68115234375, "learning_rate": 2.0060808749746085e-07, "loss": 22.2611, "step": 35530 }, { "epoch": 282.0638085742772, "grad_norm": 252.11354064941406, "learning_rate": 2.0048578747695468e-07, "loss": 20.2335, "step": 35540 }, { "epoch": 282.14356929212363, "grad_norm": 281.56365966796875, "learning_rate": 2.0036349978857985e-07, "loss": 22.2323, "step": 35550 }, { "epoch": 282.2233300099701, "grad_norm": 200.11773681640625, "learning_rate": 2.0024122446279373e-07, "loss": 24.5286, "step": 35560 }, { "epoch": 282.30309072781654, "grad_norm": 246.14796447753906, "learning_rate": 2.0011896153005057e-07, "loss": 26.2831, "step": 35570 }, { "epoch": 282.382851445663, "grad_norm": 1766.3114013671875, "learning_rate": 1.999967110208013e-07, "loss": 23.6589, "step": 35580 }, { "epoch": 282.46261216350945, "grad_norm": 283.08966064453125, "learning_rate": 1.9987447296549402e-07, "loss": 24.1291, "step": 35590 }, { "epoch": 282.54237288135596, "grad_norm": 310.65386962890625, "learning_rate": 1.9975224739457367e-07, "loss": 20.7151, "step": 35600 }, { "epoch": 282.54237288135596, "eval_loss": 2.9648003578186035, "eval_mae": 1.284411072731018, "eval_mse": 2.9648003578186035, "eval_r2": 0.10672014951705933, "eval_rmse": 1.721859563907174, "eval_runtime": 9.0663, "eval_samples_per_second": 442.409, "eval_steps_per_second": 13.898, "step": 35600 }, { "epoch": 282.6221335992024, "grad_norm": 177.55731201171875, "learning_rate": 1.9963003433848208e-07, "loss": 22.3219, "step": 35610 }, { "epoch": 282.70189431704887, "grad_norm": 1194.229736328125, "learning_rate": 1.995078338276578e-07, "loss": 21.6743, "step": 35620 }, { "epoch": 282.7816550348953, "grad_norm": 941.6058959960938, "learning_rate": 1.993856458925365e-07, "loss": 23.208, "step": 35630 }, { "epoch": 282.8614157527418, "grad_norm": 747.1911010742188, "learning_rate": 1.9926347056355054e-07, "loss": 24.0022, "step": 35640 }, { "epoch": 282.94117647058823, "grad_norm": 341.9564208984375, "learning_rate": 1.9914130787112923e-07, "loss": 21.1185, "step": 35650 }, { "epoch": 283.0159521435693, "grad_norm": 963.5127563476562, "learning_rate": 1.9901915784569885e-07, "loss": 22.3423, "step": 35660 }, { "epoch": 283.09571286141573, "grad_norm": 266.2913513183594, "learning_rate": 1.9889702051768216e-07, "loss": 23.8738, "step": 35670 }, { "epoch": 283.1754735792622, "grad_norm": 421.998291015625, "learning_rate": 1.9877489591749907e-07, "loss": 21.798, "step": 35680 }, { "epoch": 283.2552342971087, "grad_norm": 1275.6529541015625, "learning_rate": 1.9865278407556623e-07, "loss": 24.3806, "step": 35690 }, { "epoch": 283.33499501495515, "grad_norm": 457.0713806152344, "learning_rate": 1.985306850222972e-07, "loss": 23.3556, "step": 35700 }, { "epoch": 283.33499501495515, "eval_loss": 2.9685611724853516, "eval_mae": 1.2989680767059326, "eval_mse": 2.9685611724853516, "eval_r2": 0.10558706521987915, "eval_rmse": 1.7229512971890273, "eval_runtime": 9.0899, "eval_samples_per_second": 441.26, "eval_steps_per_second": 13.862, "step": 35700 }, { "epoch": 283.4147557328016, "grad_norm": 799.69921875, "learning_rate": 1.9840859878810223e-07, "loss": 22.4187, "step": 35710 }, { "epoch": 283.49451645064806, "grad_norm": 1309.066650390625, "learning_rate": 1.982865254033883e-07, "loss": 22.0474, "step": 35720 }, { "epoch": 283.5742771684945, "grad_norm": 384.88531494140625, "learning_rate": 1.981644648985594e-07, "loss": 23.8883, "step": 35730 }, { "epoch": 283.654037886341, "grad_norm": 886.3473510742188, "learning_rate": 1.9804241730401624e-07, "loss": 23.0122, "step": 35740 }, { "epoch": 283.7337986041874, "grad_norm": 372.2348937988281, "learning_rate": 1.9792038265015634e-07, "loss": 22.1438, "step": 35750 }, { "epoch": 283.8135593220339, "grad_norm": 1198.8360595703125, "learning_rate": 1.977983609673738e-07, "loss": 24.4856, "step": 35760 }, { "epoch": 283.89332003988034, "grad_norm": 403.27880859375, "learning_rate": 1.9767635228605966e-07, "loss": 22.1644, "step": 35770 }, { "epoch": 283.97308075772685, "grad_norm": 347.24029541015625, "learning_rate": 1.9755435663660187e-07, "loss": 23.0664, "step": 35780 }, { "epoch": 284.0478564307079, "grad_norm": 771.271240234375, "learning_rate": 1.9743237404938477e-07, "loss": 20.9471, "step": 35790 }, { "epoch": 284.12761714855435, "grad_norm": 387.5115661621094, "learning_rate": 1.9731040455478982e-07, "loss": 22.5388, "step": 35800 }, { "epoch": 284.12761714855435, "eval_loss": 2.9756717681884766, "eval_mae": 1.2824524641036987, "eval_mse": 2.9756722450256348, "eval_r2": 0.10344451665878296, "eval_rmse": 1.7250136941559724, "eval_runtime": 9.0561, "eval_samples_per_second": 442.906, "eval_steps_per_second": 13.913, "step": 35800 }, { "epoch": 284.2073778664008, "grad_norm": 687.7453002929688, "learning_rate": 1.9718844818319485e-07, "loss": 24.1204, "step": 35810 }, { "epoch": 284.28713858424726, "grad_norm": 961.2636108398438, "learning_rate": 1.970665049649748e-07, "loss": 22.2689, "step": 35820 }, { "epoch": 284.3668993020937, "grad_norm": 357.94635009765625, "learning_rate": 1.96944574930501e-07, "loss": 22.8073, "step": 35830 }, { "epoch": 284.44666001994017, "grad_norm": 1568.5445556640625, "learning_rate": 1.968226581101417e-07, "loss": 22.0942, "step": 35840 }, { "epoch": 284.5264207377866, "grad_norm": 887.5462036132812, "learning_rate": 1.9670075453426192e-07, "loss": 25.5706, "step": 35850 }, { "epoch": 284.6061814556331, "grad_norm": 520.3413696289062, "learning_rate": 1.965788642332231e-07, "loss": 23.1674, "step": 35860 }, { "epoch": 284.6859421734796, "grad_norm": 478.873779296875, "learning_rate": 1.9645698723738353e-07, "loss": 23.0428, "step": 35870 }, { "epoch": 284.76570289132604, "grad_norm": 605.9752807617188, "learning_rate": 1.9633512357709825e-07, "loss": 21.5509, "step": 35880 }, { "epoch": 284.8454636091725, "grad_norm": 423.40191650390625, "learning_rate": 1.9621327328271905e-07, "loss": 23.2131, "step": 35890 }, { "epoch": 284.92522432701895, "grad_norm": 1224.188232421875, "learning_rate": 1.96091436384594e-07, "loss": 22.097, "step": 35900 }, { "epoch": 284.92522432701895, "eval_loss": 2.964362859725952, "eval_mae": 1.2939927577972412, "eval_mse": 2.964362621307373, "eval_r2": 0.10685199499130249, "eval_rmse": 1.7217324476547955, "eval_runtime": 9.061, "eval_samples_per_second": 442.669, "eval_steps_per_second": 13.906, "step": 35900 }, { "epoch": 285.0, "grad_norm": 733.3826293945312, "learning_rate": 1.9596961291306835e-07, "loss": 23.3368, "step": 35910 }, { "epoch": 285.07976071784645, "grad_norm": 1493.994140625, "learning_rate": 1.9584780289848355e-07, "loss": 26.8808, "step": 35920 }, { "epoch": 285.1595214356929, "grad_norm": 127.55492401123047, "learning_rate": 1.9572600637117805e-07, "loss": 23.3882, "step": 35930 }, { "epoch": 285.23928215353936, "grad_norm": 853.9454956054688, "learning_rate": 1.9560422336148674e-07, "loss": 22.2778, "step": 35940 }, { "epoch": 285.3190428713858, "grad_norm": 1000.6528930664062, "learning_rate": 1.954824538997412e-07, "loss": 22.7299, "step": 35950 }, { "epoch": 285.39880358923233, "grad_norm": 966.7951049804688, "learning_rate": 1.9536069801626956e-07, "loss": 22.1884, "step": 35960 }, { "epoch": 285.4785643070788, "grad_norm": 700.5227661132812, "learning_rate": 1.952389557413967e-07, "loss": 25.1173, "step": 35970 }, { "epoch": 285.55832502492524, "grad_norm": 1235.1341552734375, "learning_rate": 1.9511722710544416e-07, "loss": 22.9758, "step": 35980 }, { "epoch": 285.6380857427717, "grad_norm": 396.1883544921875, "learning_rate": 1.949955121387298e-07, "loss": 22.8223, "step": 35990 }, { "epoch": 285.71784646061815, "grad_norm": 495.91973876953125, "learning_rate": 1.948738108715683e-07, "loss": 22.37, "step": 36000 }, { "epoch": 285.71784646061815, "eval_loss": 2.9797921180725098, "eval_mae": 1.286327838897705, "eval_mse": 2.979792356491089, "eval_r2": 0.1022031307220459, "eval_rmse": 1.7262075067879554, "eval_runtime": 9.0814, "eval_samples_per_second": 441.673, "eval_steps_per_second": 13.875, "step": 36000 }, { "epoch": 285.7976071784646, "grad_norm": 313.2388916015625, "learning_rate": 1.9475212333427085e-07, "loss": 21.9463, "step": 36010 }, { "epoch": 285.87736789631106, "grad_norm": 1386.6151123046875, "learning_rate": 1.9463044955714535e-07, "loss": 21.5289, "step": 36020 }, { "epoch": 285.9571286141575, "grad_norm": 923.9249877929688, "learning_rate": 1.9450878957049616e-07, "loss": 22.8795, "step": 36030 }, { "epoch": 286.03190428713856, "grad_norm": 683.2625122070312, "learning_rate": 1.9438714340462406e-07, "loss": 18.8855, "step": 36040 }, { "epoch": 286.11166500498507, "grad_norm": 288.84393310546875, "learning_rate": 1.9426551108982663e-07, "loss": 25.6166, "step": 36050 }, { "epoch": 286.1914257228315, "grad_norm": 405.8291015625, "learning_rate": 1.9414389265639802e-07, "loss": 23.2314, "step": 36060 }, { "epoch": 286.271186440678, "grad_norm": 850.6596069335938, "learning_rate": 1.940222881346286e-07, "loss": 21.7071, "step": 36070 }, { "epoch": 286.35094715852443, "grad_norm": 548.14111328125, "learning_rate": 1.939006975548058e-07, "loss": 23.1086, "step": 36080 }, { "epoch": 286.4307078763709, "grad_norm": 673.26025390625, "learning_rate": 1.9377912094721293e-07, "loss": 23.034, "step": 36090 }, { "epoch": 286.51046859421734, "grad_norm": 414.7325439453125, "learning_rate": 1.9365755834213038e-07, "loss": 21.6649, "step": 36100 }, { "epoch": 286.51046859421734, "eval_loss": 2.957052230834961, "eval_mae": 1.289268970489502, "eval_mse": 2.957052230834961, "eval_r2": 0.10905462503433228, "eval_rmse": 1.719608162005217, "eval_runtime": 9.0841, "eval_samples_per_second": 441.54, "eval_steps_per_second": 13.87, "step": 36100 }, { "epoch": 286.5902293120638, "grad_norm": 702.4627685546875, "learning_rate": 1.9353600976983473e-07, "loss": 25.6429, "step": 36110 }, { "epoch": 286.66999002991025, "grad_norm": 408.4979248046875, "learning_rate": 1.9341447526059928e-07, "loss": 24.0296, "step": 36120 }, { "epoch": 286.7497507477567, "grad_norm": 440.3926086425781, "learning_rate": 1.9329295484469352e-07, "loss": 22.6387, "step": 36130 }, { "epoch": 286.8295114656032, "grad_norm": 452.042724609375, "learning_rate": 1.931714485523838e-07, "loss": 22.3395, "step": 36140 }, { "epoch": 286.9092721834497, "grad_norm": 333.8067932128906, "learning_rate": 1.9304995641393266e-07, "loss": 23.0052, "step": 36150 }, { "epoch": 286.9890329012961, "grad_norm": 623.7636108398438, "learning_rate": 1.929284784595993e-07, "loss": 21.9916, "step": 36160 }, { "epoch": 287.0638085742772, "grad_norm": 452.311767578125, "learning_rate": 1.9280701471963936e-07, "loss": 18.6515, "step": 36170 }, { "epoch": 287.14356929212363, "grad_norm": 335.5778503417969, "learning_rate": 1.9268556522430483e-07, "loss": 21.7196, "step": 36180 }, { "epoch": 287.2233300099701, "grad_norm": 242.38865661621094, "learning_rate": 1.9256413000384415e-07, "loss": 22.0252, "step": 36190 }, { "epoch": 287.30309072781654, "grad_norm": 641.955322265625, "learning_rate": 1.9244270908850234e-07, "loss": 22.9962, "step": 36200 }, { "epoch": 287.30309072781654, "eval_loss": 2.964320182800293, "eval_mae": 1.2858256101608276, "eval_mse": 2.964320182800293, "eval_r2": 0.1068648099899292, "eval_rmse": 1.7217201232489248, "eval_runtime": 9.1449, "eval_samples_per_second": 438.605, "eval_steps_per_second": 13.778, "step": 36200 }, { "epoch": 287.382851445663, "grad_norm": 313.9455261230469, "learning_rate": 1.9232130250852092e-07, "loss": 25.1304, "step": 36210 }, { "epoch": 287.46261216350945, "grad_norm": 691.0582275390625, "learning_rate": 1.9219991029413754e-07, "loss": 22.6834, "step": 36220 }, { "epoch": 287.54237288135596, "grad_norm": 430.69793701171875, "learning_rate": 1.9207853247558645e-07, "loss": 23.7089, "step": 36230 }, { "epoch": 287.6221335992024, "grad_norm": 366.560546875, "learning_rate": 1.9195716908309833e-07, "loss": 23.8384, "step": 36240 }, { "epoch": 287.70189431704887, "grad_norm": 492.2204895019531, "learning_rate": 1.9183582014690038e-07, "loss": 25.214, "step": 36250 }, { "epoch": 287.7816550348953, "grad_norm": 1235.56640625, "learning_rate": 1.917144856972159e-07, "loss": 22.6221, "step": 36260 }, { "epoch": 287.8614157527418, "grad_norm": 344.3128662109375, "learning_rate": 1.915931657642648e-07, "loss": 23.6879, "step": 36270 }, { "epoch": 287.94117647058823, "grad_norm": 1383.4229736328125, "learning_rate": 1.914718603782633e-07, "loss": 23.2404, "step": 36280 }, { "epoch": 288.0159521435693, "grad_norm": 192.54356384277344, "learning_rate": 1.9135056956942407e-07, "loss": 21.8429, "step": 36290 }, { "epoch": 288.09571286141573, "grad_norm": 945.7467041015625, "learning_rate": 1.91229293367956e-07, "loss": 21.5877, "step": 36300 }, { "epoch": 288.09571286141573, "eval_loss": 2.9648287296295166, "eval_mae": 1.2923362255096436, "eval_mse": 2.9648282527923584, "eval_r2": 0.106711745262146, "eval_rmse": 1.7218676641346042, "eval_runtime": 9.063, "eval_samples_per_second": 442.567, "eval_steps_per_second": 13.903, "step": 36300 }, { "epoch": 288.1754735792622, "grad_norm": 1069.1270751953125, "learning_rate": 1.9110803180406466e-07, "loss": 21.7894, "step": 36310 }, { "epoch": 288.2552342971087, "grad_norm": 659.6483764648438, "learning_rate": 1.9098678490795142e-07, "loss": 23.323, "step": 36320 }, { "epoch": 288.33499501495515, "grad_norm": 509.91754150390625, "learning_rate": 1.9086555270981456e-07, "loss": 20.9569, "step": 36330 }, { "epoch": 288.4147557328016, "grad_norm": 292.78265380859375, "learning_rate": 1.9074433523984842e-07, "loss": 23.8257, "step": 36340 }, { "epoch": 288.49451645064806, "grad_norm": 514.0445556640625, "learning_rate": 1.9062313252824381e-07, "loss": 24.2512, "step": 36350 }, { "epoch": 288.5742771684945, "grad_norm": 709.8264770507812, "learning_rate": 1.9050194460518757e-07, "loss": 23.5609, "step": 36360 }, { "epoch": 288.654037886341, "grad_norm": 1047.640625, "learning_rate": 1.9038077150086316e-07, "loss": 23.5893, "step": 36370 }, { "epoch": 288.7337986041874, "grad_norm": 397.6839294433594, "learning_rate": 1.902596132454503e-07, "loss": 24.5942, "step": 36380 }, { "epoch": 288.8135593220339, "grad_norm": 362.4794921875, "learning_rate": 1.901384698691249e-07, "loss": 22.3205, "step": 36390 }, { "epoch": 288.89332003988034, "grad_norm": 250.99215698242188, "learning_rate": 1.9001734140205935e-07, "loss": 23.9761, "step": 36400 }, { "epoch": 288.89332003988034, "eval_loss": 2.959632396697998, "eval_mae": 1.2892621755599976, "eval_mse": 2.959632396697998, "eval_r2": 0.10827720165252686, "eval_rmse": 1.7203582175517975, "eval_runtime": 9.0867, "eval_samples_per_second": 441.415, "eval_steps_per_second": 13.866, "step": 36400 }, { "epoch": 288.97308075772685, "grad_norm": 458.8247375488281, "learning_rate": 1.89896227874422e-07, "loss": 22.2856, "step": 36410 }, { "epoch": 289.0478564307079, "grad_norm": 227.16421508789062, "learning_rate": 1.8977512931637785e-07, "loss": 21.3357, "step": 36420 }, { "epoch": 289.12761714855435, "grad_norm": 136.99842834472656, "learning_rate": 1.8965404575808789e-07, "loss": 24.7046, "step": 36430 }, { "epoch": 289.2073778664008, "grad_norm": 292.46783447265625, "learning_rate": 1.8953297722970954e-07, "loss": 24.0002, "step": 36440 }, { "epoch": 289.28713858424726, "grad_norm": 437.4063415527344, "learning_rate": 1.8941192376139653e-07, "loss": 23.0407, "step": 36450 }, { "epoch": 289.3668993020937, "grad_norm": 219.44361877441406, "learning_rate": 1.8929088538329856e-07, "loss": 22.9118, "step": 36460 }, { "epoch": 289.44666001994017, "grad_norm": 897.8577880859375, "learning_rate": 1.891698621255618e-07, "loss": 22.9397, "step": 36470 }, { "epoch": 289.5264207377866, "grad_norm": 469.3039245605469, "learning_rate": 1.890488540183286e-07, "loss": 22.5491, "step": 36480 }, { "epoch": 289.6061814556331, "grad_norm": 524.674560546875, "learning_rate": 1.8892786109173767e-07, "loss": 21.8458, "step": 36490 }, { "epoch": 289.6859421734796, "grad_norm": 186.74449157714844, "learning_rate": 1.8880688337592363e-07, "loss": 22.8131, "step": 36500 }, { "epoch": 289.6859421734796, "eval_loss": 2.96305775642395, "eval_mae": 1.2956712245941162, "eval_mse": 2.96305775642395, "eval_r2": 0.10724520683288574, "eval_rmse": 1.721353466439694, "eval_runtime": 9.1063, "eval_samples_per_second": 440.463, "eval_steps_per_second": 13.837, "step": 36500 }, { "epoch": 289.76570289132604, "grad_norm": 594.638671875, "learning_rate": 1.8868592090101748e-07, "loss": 21.3693, "step": 36510 }, { "epoch": 289.8454636091725, "grad_norm": 1204.589599609375, "learning_rate": 1.8856497369714653e-07, "loss": 21.8328, "step": 36520 }, { "epoch": 289.92522432701895, "grad_norm": 1412.10498046875, "learning_rate": 1.884440417944342e-07, "loss": 23.0577, "step": 36530 }, { "epoch": 290.0, "grad_norm": 484.0316162109375, "learning_rate": 1.8832312522300007e-07, "loss": 24.1015, "step": 36540 }, { "epoch": 290.07976071784645, "grad_norm": 261.65509033203125, "learning_rate": 1.8820222401295976e-07, "loss": 25.2079, "step": 36550 }, { "epoch": 290.1595214356929, "grad_norm": 623.7129516601562, "learning_rate": 1.8808133819442539e-07, "loss": 21.8983, "step": 36560 }, { "epoch": 290.23928215353936, "grad_norm": 189.4542999267578, "learning_rate": 1.8796046779750513e-07, "loss": 24.0651, "step": 36570 }, { "epoch": 290.3190428713858, "grad_norm": 403.1977844238281, "learning_rate": 1.8783961285230312e-07, "loss": 20.8615, "step": 36580 }, { "epoch": 290.39880358923233, "grad_norm": 887.7024536132812, "learning_rate": 1.8771877338891987e-07, "loss": 23.1835, "step": 36590 }, { "epoch": 290.4785643070788, "grad_norm": 153.1597137451172, "learning_rate": 1.8759794943745182e-07, "loss": 23.9526, "step": 36600 }, { "epoch": 290.4785643070788, "eval_loss": 2.969057559967041, "eval_mae": 1.282334804534912, "eval_mse": 2.96905779838562, "eval_r2": 0.10543739795684814, "eval_rmse": 1.723095411863667, "eval_runtime": 9.099, "eval_samples_per_second": 440.816, "eval_steps_per_second": 13.848, "step": 36600 }, { "epoch": 290.55832502492524, "grad_norm": 206.9164276123047, "learning_rate": 1.8747714102799189e-07, "loss": 23.3965, "step": 36610 }, { "epoch": 290.6380857427717, "grad_norm": 787.9398193359375, "learning_rate": 1.8735634819062873e-07, "loss": 23.1207, "step": 36620 }, { "epoch": 290.71784646061815, "grad_norm": 193.9875030517578, "learning_rate": 1.872355709554475e-07, "loss": 22.1797, "step": 36630 }, { "epoch": 290.7976071784646, "grad_norm": 546.6417236328125, "learning_rate": 1.8711480935252904e-07, "loss": 22.8325, "step": 36640 }, { "epoch": 290.87736789631106, "grad_norm": 1094.3037109375, "learning_rate": 1.8699406341195067e-07, "loss": 21.845, "step": 36650 }, { "epoch": 290.9571286141575, "grad_norm": 223.6046905517578, "learning_rate": 1.868733331637857e-07, "loss": 24.3477, "step": 36660 }, { "epoch": 291.03190428713856, "grad_norm": 439.85516357421875, "learning_rate": 1.8675261863810337e-07, "loss": 20.7617, "step": 36670 }, { "epoch": 291.11166500498507, "grad_norm": 186.5644989013672, "learning_rate": 1.8663191986496937e-07, "loss": 23.0272, "step": 36680 }, { "epoch": 291.1914257228315, "grad_norm": 2083.707275390625, "learning_rate": 1.8651123687444498e-07, "loss": 24.9641, "step": 36690 }, { "epoch": 291.271186440678, "grad_norm": 237.58786010742188, "learning_rate": 1.863905696965879e-07, "loss": 21.5397, "step": 36700 }, { "epoch": 291.271186440678, "eval_loss": 2.959972858428955, "eval_mae": 1.2938432693481445, "eval_mse": 2.959972620010376, "eval_r2": 0.10817474126815796, "eval_rmse": 1.7204570962422678, "eval_runtime": 9.0858, "eval_samples_per_second": 441.459, "eval_steps_per_second": 13.868, "step": 36700 }, { "epoch": 291.35094715852443, "grad_norm": 370.0028076171875, "learning_rate": 1.8626991836145178e-07, "loss": 22.8297, "step": 36710 }, { "epoch": 291.4307078763709, "grad_norm": 291.2168884277344, "learning_rate": 1.8614928289908646e-07, "loss": 22.1641, "step": 36720 }, { "epoch": 291.51046859421734, "grad_norm": 1112.2530517578125, "learning_rate": 1.8602866333953747e-07, "loss": 24.578, "step": 36730 }, { "epoch": 291.5902293120638, "grad_norm": 402.6103820800781, "learning_rate": 1.8590805971284684e-07, "loss": 23.6851, "step": 36740 }, { "epoch": 291.66999002991025, "grad_norm": 680.296630859375, "learning_rate": 1.8578747204905222e-07, "loss": 23.5266, "step": 36750 }, { "epoch": 291.7497507477567, "grad_norm": 628.4378662109375, "learning_rate": 1.8566690037818758e-07, "loss": 23.0505, "step": 36760 }, { "epoch": 291.8295114656032, "grad_norm": 839.2183837890625, "learning_rate": 1.8554634473028287e-07, "loss": 22.2709, "step": 36770 }, { "epoch": 291.9092721834497, "grad_norm": 1796.494873046875, "learning_rate": 1.8542580513536382e-07, "loss": 22.0434, "step": 36780 }, { "epoch": 291.9890329012961, "grad_norm": 1286.6932373046875, "learning_rate": 1.8530528162345234e-07, "loss": 23.1104, "step": 36790 }, { "epoch": 292.0638085742772, "grad_norm": 1134.0379638671875, "learning_rate": 1.8518477422456636e-07, "loss": 22.2488, "step": 36800 }, { "epoch": 292.0638085742772, "eval_loss": 2.9605071544647217, "eval_mae": 1.287654995918274, "eval_mse": 2.9605071544647217, "eval_r2": 0.10801368951797485, "eval_rmse": 1.7206124358683224, "eval_runtime": 9.1038, "eval_samples_per_second": 440.586, "eval_steps_per_second": 13.84, "step": 36800 }, { "epoch": 292.14356929212363, "grad_norm": 1212.5570068359375, "learning_rate": 1.8506428296871978e-07, "loss": 23.1092, "step": 36810 }, { "epoch": 292.2233300099701, "grad_norm": 795.9725341796875, "learning_rate": 1.8494380788592249e-07, "loss": 23.9108, "step": 36820 }, { "epoch": 292.30309072781654, "grad_norm": 618.9758911132812, "learning_rate": 1.8482334900618007e-07, "loss": 22.1557, "step": 36830 }, { "epoch": 292.382851445663, "grad_norm": 199.36758422851562, "learning_rate": 1.8470290635949448e-07, "loss": 21.3498, "step": 36840 }, { "epoch": 292.46261216350945, "grad_norm": 897.5830688476562, "learning_rate": 1.845824799758635e-07, "loss": 23.5089, "step": 36850 }, { "epoch": 292.54237288135596, "grad_norm": 375.9431457519531, "learning_rate": 1.8446206988528084e-07, "loss": 21.8539, "step": 36860 }, { "epoch": 292.6221335992024, "grad_norm": 280.3815002441406, "learning_rate": 1.8434167611773593e-07, "loss": 23.5507, "step": 36870 }, { "epoch": 292.70189431704887, "grad_norm": 555.9196166992188, "learning_rate": 1.842212987032145e-07, "loss": 22.8814, "step": 36880 }, { "epoch": 292.7816550348953, "grad_norm": 351.2213439941406, "learning_rate": 1.8410093767169805e-07, "loss": 23.9657, "step": 36890 }, { "epoch": 292.8614157527418, "grad_norm": 439.0758361816406, "learning_rate": 1.8398059305316388e-07, "loss": 21.1606, "step": 36900 }, { "epoch": 292.8614157527418, "eval_loss": 2.956258535385132, "eval_mae": 1.2853070497512817, "eval_mse": 2.956258535385132, "eval_r2": 0.10929375886917114, "eval_rmse": 1.719377368521853, "eval_runtime": 9.0696, "eval_samples_per_second": 442.249, "eval_steps_per_second": 13.893, "step": 36900 }, { "epoch": 292.94117647058823, "grad_norm": 355.84674072265625, "learning_rate": 1.838602648775855e-07, "loss": 24.1349, "step": 36910 }, { "epoch": 293.0159521435693, "grad_norm": 650.1371459960938, "learning_rate": 1.837399531749319e-07, "loss": 22.0405, "step": 36920 }, { "epoch": 293.09571286141573, "grad_norm": 197.20037841796875, "learning_rate": 1.8361965797516844e-07, "loss": 23.2535, "step": 36930 }, { "epoch": 293.1754735792622, "grad_norm": 447.2896423339844, "learning_rate": 1.8349937930825598e-07, "loss": 23.4124, "step": 36940 }, { "epoch": 293.2552342971087, "grad_norm": 458.4688415527344, "learning_rate": 1.8337911720415155e-07, "loss": 21.675, "step": 36950 }, { "epoch": 293.33499501495515, "grad_norm": 777.9290771484375, "learning_rate": 1.832588716928078e-07, "loss": 21.99, "step": 36960 }, { "epoch": 293.4147557328016, "grad_norm": 377.29107666015625, "learning_rate": 1.8313864280417338e-07, "loss": 23.5075, "step": 36970 }, { "epoch": 293.49451645064806, "grad_norm": 608.5870971679688, "learning_rate": 1.8301843056819288e-07, "loss": 23.8374, "step": 36980 }, { "epoch": 293.5742771684945, "grad_norm": 262.888427734375, "learning_rate": 1.8289823501480662e-07, "loss": 24.1258, "step": 36990 }, { "epoch": 293.654037886341, "grad_norm": 1055.6136474609375, "learning_rate": 1.8277805617395086e-07, "loss": 24.0822, "step": 37000 }, { "epoch": 293.654037886341, "eval_loss": 2.9568257331848145, "eval_mae": 1.2914390563964844, "eval_mse": 2.9568257331848145, "eval_r2": 0.1091228723526001, "eval_rmse": 1.7195423034007666, "eval_runtime": 9.1967, "eval_samples_per_second": 436.135, "eval_steps_per_second": 13.701, "step": 37000 }, { "epoch": 293.7337986041874, "grad_norm": 379.95611572265625, "learning_rate": 1.8265789407555746e-07, "loss": 22.5124, "step": 37010 }, { "epoch": 293.8135593220339, "grad_norm": 483.12371826171875, "learning_rate": 1.8253774874955447e-07, "loss": 22.6095, "step": 37020 }, { "epoch": 293.89332003988034, "grad_norm": 291.4644775390625, "learning_rate": 1.8241762022586542e-07, "loss": 22.6208, "step": 37030 }, { "epoch": 293.97308075772685, "grad_norm": 459.98358154296875, "learning_rate": 1.8229750853440995e-07, "loss": 21.8293, "step": 37040 }, { "epoch": 294.0478564307079, "grad_norm": 734.4721069335938, "learning_rate": 1.821774137051034e-07, "loss": 22.4051, "step": 37050 }, { "epoch": 294.12761714855435, "grad_norm": 1470.17333984375, "learning_rate": 1.8205733576785675e-07, "loss": 22.1222, "step": 37060 }, { "epoch": 294.2073778664008, "grad_norm": 166.06588745117188, "learning_rate": 1.8193727475257695e-07, "loss": 23.2323, "step": 37070 }, { "epoch": 294.28713858424726, "grad_norm": 1280.9061279296875, "learning_rate": 1.8181723068916669e-07, "loss": 22.5115, "step": 37080 }, { "epoch": 294.3668993020937, "grad_norm": 253.1221466064453, "learning_rate": 1.8169720360752455e-07, "loss": 22.1583, "step": 37090 }, { "epoch": 294.44666001994017, "grad_norm": 348.7823791503906, "learning_rate": 1.8157719353754463e-07, "loss": 22.086, "step": 37100 }, { "epoch": 294.44666001994017, "eval_loss": 2.9736855030059814, "eval_mae": 1.2832196950912476, "eval_mse": 2.9736857414245605, "eval_r2": 0.10404306650161743, "eval_rmse": 1.7244378044523845, "eval_runtime": 9.0936, "eval_samples_per_second": 441.078, "eval_steps_per_second": 13.856, "step": 37100 }, { "epoch": 294.5264207377866, "grad_norm": 561.4784545898438, "learning_rate": 1.8145720050911693e-07, "loss": 23.6026, "step": 37110 }, { "epoch": 294.6061814556331, "grad_norm": 459.48199462890625, "learning_rate": 1.8133722455212725e-07, "loss": 24.0519, "step": 37120 }, { "epoch": 294.6859421734796, "grad_norm": 614.2304077148438, "learning_rate": 1.8121726569645713e-07, "loss": 23.5927, "step": 37130 }, { "epoch": 294.76570289132604, "grad_norm": 1104.4097900390625, "learning_rate": 1.810973239719839e-07, "loss": 20.4572, "step": 37140 }, { "epoch": 294.8454636091725, "grad_norm": 468.27069091796875, "learning_rate": 1.8097739940858028e-07, "loss": 23.6405, "step": 37150 }, { "epoch": 294.92522432701895, "grad_norm": 1032.1700439453125, "learning_rate": 1.8085749203611513e-07, "loss": 23.5576, "step": 37160 }, { "epoch": 295.0, "grad_norm": 501.94989013671875, "learning_rate": 1.8073760188445295e-07, "loss": 23.5079, "step": 37170 }, { "epoch": 295.07976071784645, "grad_norm": 586.5398559570312, "learning_rate": 1.8061772898345385e-07, "loss": 22.7941, "step": 37180 }, { "epoch": 295.1595214356929, "grad_norm": 555.3281860351562, "learning_rate": 1.8049787336297352e-07, "loss": 22.9314, "step": 37190 }, { "epoch": 295.23928215353936, "grad_norm": 343.9189147949219, "learning_rate": 1.8037803505286353e-07, "loss": 22.1603, "step": 37200 }, { "epoch": 295.23928215353936, "eval_loss": 2.9602396488189697, "eval_mae": 1.2886160612106323, "eval_mse": 2.9602396488189697, "eval_r2": 0.10809427499771118, "eval_rmse": 1.7205346985222267, "eval_runtime": 9.1032, "eval_samples_per_second": 440.612, "eval_steps_per_second": 13.841, "step": 37200 }, { "epoch": 295.3190428713858, "grad_norm": 389.4377746582031, "learning_rate": 1.8025821408297125e-07, "loss": 24.6754, "step": 37210 }, { "epoch": 295.39880358923233, "grad_norm": 999.9193725585938, "learning_rate": 1.801384104831395e-07, "loss": 23.0935, "step": 37220 }, { "epoch": 295.4785643070788, "grad_norm": 230.59426879882812, "learning_rate": 1.800186242832069e-07, "loss": 22.4545, "step": 37230 }, { "epoch": 295.55832502492524, "grad_norm": 1362.988037109375, "learning_rate": 1.798988555130076e-07, "loss": 23.5041, "step": 37240 }, { "epoch": 295.6380857427717, "grad_norm": 479.1122131347656, "learning_rate": 1.7977910420237157e-07, "loss": 22.2714, "step": 37250 }, { "epoch": 295.71784646061815, "grad_norm": 301.0849304199219, "learning_rate": 1.7965937038112433e-07, "loss": 23.3649, "step": 37260 }, { "epoch": 295.7976071784646, "grad_norm": 782.4691162109375, "learning_rate": 1.7953965407908712e-07, "loss": 23.3914, "step": 37270 }, { "epoch": 295.87736789631106, "grad_norm": 455.13458251953125, "learning_rate": 1.7941995532607684e-07, "loss": 24.1414, "step": 37280 }, { "epoch": 295.9571286141575, "grad_norm": 1449.0589599609375, "learning_rate": 1.7930027415190584e-07, "loss": 21.9694, "step": 37290 }, { "epoch": 296.03190428713856, "grad_norm": 803.688232421875, "learning_rate": 1.7918061058638218e-07, "loss": 19.707, "step": 37300 }, { "epoch": 296.03190428713856, "eval_loss": 2.9595704078674316, "eval_mae": 1.285779356956482, "eval_mse": 2.9595704078674316, "eval_r2": 0.10829591751098633, "eval_rmse": 1.7203402012007485, "eval_runtime": 9.0877, "eval_samples_per_second": 441.365, "eval_steps_per_second": 13.865, "step": 37300 }, { "epoch": 296.11166500498507, "grad_norm": 189.18777465820312, "learning_rate": 1.790609646593096e-07, "loss": 24.1162, "step": 37310 }, { "epoch": 296.1914257228315, "grad_norm": 689.5313110351562, "learning_rate": 1.789413364004876e-07, "loss": 23.4291, "step": 37320 }, { "epoch": 296.271186440678, "grad_norm": 630.0205688476562, "learning_rate": 1.788217258397108e-07, "loss": 22.7952, "step": 37330 }, { "epoch": 296.35094715852443, "grad_norm": 521.653564453125, "learning_rate": 1.7870213300676985e-07, "loss": 22.0444, "step": 37340 }, { "epoch": 296.4307078763709, "grad_norm": 375.9777526855469, "learning_rate": 1.7858255793145075e-07, "loss": 22.5383, "step": 37350 }, { "epoch": 296.51046859421734, "grad_norm": 742.4426879882812, "learning_rate": 1.7846300064353522e-07, "loss": 23.0361, "step": 37360 }, { "epoch": 296.5902293120638, "grad_norm": 624.1666870117188, "learning_rate": 1.7835541431725244e-07, "loss": 23.834, "step": 37370 }, { "epoch": 296.66999002991025, "grad_norm": 500.9219055175781, "learning_rate": 1.782358909074363e-07, "loss": 25.7155, "step": 37380 }, { "epoch": 296.7497507477567, "grad_norm": 322.7745361328125, "learning_rate": 1.7811638537136547e-07, "loss": 23.0646, "step": 37390 }, { "epoch": 296.8295114656032, "grad_norm": 221.01759338378906, "learning_rate": 1.779968977388044e-07, "loss": 22.1865, "step": 37400 }, { "epoch": 296.8295114656032, "eval_loss": 2.9581120014190674, "eval_mae": 1.294386863708496, "eval_mse": 2.9581120014190674, "eval_r2": 0.10873532295227051, "eval_rmse": 1.7199162774446515, "eval_runtime": 9.1113, "eval_samples_per_second": 440.224, "eval_steps_per_second": 13.829, "step": 37400 }, { "epoch": 296.9092721834497, "grad_norm": 1115.1885986328125, "learning_rate": 1.7787742803951275e-07, "loss": 23.3881, "step": 37410 }, { "epoch": 296.9890329012961, "grad_norm": 332.14593505859375, "learning_rate": 1.7775797630324617e-07, "loss": 21.0627, "step": 37420 }, { "epoch": 297.0638085742772, "grad_norm": 678.20703125, "learning_rate": 1.7763854255975542e-07, "loss": 20.6727, "step": 37430 }, { "epoch": 297.14356929212363, "grad_norm": 931.6592407226562, "learning_rate": 1.77519126838787e-07, "loss": 25.005, "step": 37440 }, { "epoch": 297.2233300099701, "grad_norm": 253.5115966796875, "learning_rate": 1.7739972917008296e-07, "loss": 22.7759, "step": 37450 }, { "epoch": 297.30309072781654, "grad_norm": 328.89422607421875, "learning_rate": 1.772803495833806e-07, "loss": 23.3569, "step": 37460 }, { "epoch": 297.382851445663, "grad_norm": 252.57533264160156, "learning_rate": 1.7716098810841325e-07, "loss": 22.2182, "step": 37470 }, { "epoch": 297.46261216350945, "grad_norm": 1059.1834716796875, "learning_rate": 1.7704164477490903e-07, "loss": 22.699, "step": 37480 }, { "epoch": 297.54237288135596, "grad_norm": 206.7607879638672, "learning_rate": 1.7692231961259193e-07, "loss": 23.1777, "step": 37490 }, { "epoch": 297.6221335992024, "grad_norm": 553.6284790039062, "learning_rate": 1.7680301265118148e-07, "loss": 23.6693, "step": 37500 }, { "epoch": 297.6221335992024, "eval_loss": 2.956035614013672, "eval_mae": 1.2902369499206543, "eval_mse": 2.9560353755950928, "eval_r2": 0.10936099290847778, "eval_rmse": 1.7193124717732646, "eval_runtime": 9.0791, "eval_samples_per_second": 441.785, "eval_steps_per_second": 13.878, "step": 37500 } ], "logging_steps": 10, "max_steps": 63000, "num_input_tokens_seen": 0, "num_train_epochs": 500, "save_steps": 100, "stateful_callbacks": { "EarlyStoppingCallback": { "args": { "early_stopping_patience": 20, "early_stopping_threshold": 0.0 }, "attributes": { "early_stopping_patience_counter": 0 } }, "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": false }, "attributes": {} } }, "total_flos": 1.6224599394638256e+17, "train_batch_size": 16, "trial_name": null, "trial_params": null }