MiaMao's picture
Add LoRA checkpoints (without PNG loss curves)
b843574
{
"best_global_step": 35000,
"best_metric": 0.0021512035746127367,
"best_model_checkpoint": "D:\\Task_design\\Scene\\outputs\\qwen7b-lora-will_half_fp16_v2\\checkpoint-35000",
"epoch": 1.0,
"eval_steps": 1250,
"global_step": 35821,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.0013958633589357939,
"grad_norm": 2320.0,
"learning_rate": 9.116279069767441e-06,
"loss": 120.7821,
"step": 50
},
{
"epoch": 0.0027917267178715877,
"grad_norm": 752.0,
"learning_rate": 1.841860465116279e-05,
"loss": 1.6562,
"step": 100
},
{
"epoch": 0.004187590076807381,
"grad_norm": 264.0,
"learning_rate": 2.772093023255814e-05,
"loss": 0.5144,
"step": 150
},
{
"epoch": 0.005583453435743175,
"grad_norm": 3120.0,
"learning_rate": 3.702325581395349e-05,
"loss": 0.9009,
"step": 200
},
{
"epoch": 0.006979316794678969,
"grad_norm": 1296.0,
"learning_rate": 4.632558139534884e-05,
"loss": 1.4696,
"step": 250
},
{
"epoch": 0.008375180153614763,
"grad_norm": 3632.0,
"learning_rate": 5.562790697674419e-05,
"loss": 1.8122,
"step": 300
},
{
"epoch": 0.009771043512550556,
"grad_norm": 600.0,
"learning_rate": 6.493023255813954e-05,
"loss": 1.605,
"step": 350
},
{
"epoch": 0.01116690687148635,
"grad_norm": 1888.0,
"learning_rate": 7.423255813953489e-05,
"loss": 1.046,
"step": 400
},
{
"epoch": 0.012562770230422144,
"grad_norm": 137.0,
"learning_rate": 8.353488372093023e-05,
"loss": 1.1465,
"step": 450
},
{
"epoch": 0.013958633589357937,
"grad_norm": 1984.0,
"learning_rate": 9.283720930232559e-05,
"loss": 5.8899,
"step": 500
},
{
"epoch": 0.015354496948293732,
"grad_norm": 2528.0,
"learning_rate": 0.00010213953488372094,
"loss": 1.0712,
"step": 550
},
{
"epoch": 0.016750360307229525,
"grad_norm": 4.34375,
"learning_rate": 0.00011144186046511629,
"loss": 0.4232,
"step": 600
},
{
"epoch": 0.01814622366616532,
"grad_norm": 249.0,
"learning_rate": 0.00012074418604651163,
"loss": 0.837,
"step": 650
},
{
"epoch": 0.019542087025101112,
"grad_norm": 1224.0,
"learning_rate": 0.000130046511627907,
"loss": 0.4405,
"step": 700
},
{
"epoch": 0.020937950384036907,
"grad_norm": 2800.0,
"learning_rate": 0.00013934883720930234,
"loss": 1.175,
"step": 750
},
{
"epoch": 0.0223338137429727,
"grad_norm": 124.0,
"learning_rate": 0.0001486511627906977,
"loss": 1.2591,
"step": 800
},
{
"epoch": 0.023729677101908493,
"grad_norm": 1056.0,
"learning_rate": 0.00015795348837209302,
"loss": 0.4401,
"step": 850
},
{
"epoch": 0.025125540460844288,
"grad_norm": 1208.0,
"learning_rate": 0.00016725581395348837,
"loss": 0.4283,
"step": 900
},
{
"epoch": 0.026521403819780083,
"grad_norm": 1048.0,
"learning_rate": 0.00017655813953488373,
"loss": 0.4223,
"step": 950
},
{
"epoch": 0.027917267178715875,
"grad_norm": 22.25,
"learning_rate": 0.00018586046511627908,
"loss": 0.1113,
"step": 1000
},
{
"epoch": 0.02931313053765167,
"grad_norm": 127.5,
"learning_rate": 0.00019516279069767444,
"loss": 0.0873,
"step": 1050
},
{
"epoch": 0.030708993896587464,
"grad_norm": 712.0,
"learning_rate": 0.00019986185460196858,
"loss": 0.5109,
"step": 1100
},
{
"epoch": 0.032104857255523256,
"grad_norm": 180.0,
"learning_rate": 0.0001995740516894031,
"loss": 0.4199,
"step": 1150
},
{
"epoch": 0.03350072061445905,
"grad_norm": 984.0,
"learning_rate": 0.00019928624877683763,
"loss": 0.6015,
"step": 1200
},
{
"epoch": 0.034896583973394846,
"grad_norm": 616.0,
"learning_rate": 0.00019899844586427216,
"loss": 0.5867,
"step": 1250
},
{
"epoch": 0.034896583973394846,
"eval_loss": 0.0031845432240515947,
"eval_mae": 0.045285664498806,
"eval_rmse": 0.05643175542354584,
"eval_runtime": 320.3726,
"eval_samples_per_second": 6.243,
"eval_steps_per_second": 6.243,
"step": 1250
},
{
"epoch": 0.03629244733233064,
"grad_norm": 132.0,
"learning_rate": 0.00019871064295170666,
"loss": 0.3113,
"step": 1300
},
{
"epoch": 0.03768831069126643,
"grad_norm": 26.375,
"learning_rate": 0.0001984228400391412,
"loss": 0.0702,
"step": 1350
},
{
"epoch": 0.039084174050202224,
"grad_norm": 212.0,
"learning_rate": 0.00019813503712657574,
"loss": 0.2048,
"step": 1400
},
{
"epoch": 0.04048003740913802,
"grad_norm": 260.0,
"learning_rate": 0.00019784723421401027,
"loss": 0.0669,
"step": 1450
},
{
"epoch": 0.041875900768073814,
"grad_norm": 113.5,
"learning_rate": 0.0001975594313014448,
"loss": 0.0604,
"step": 1500
},
{
"epoch": 0.04327176412700961,
"grad_norm": 36.0,
"learning_rate": 0.00019727162838887932,
"loss": 0.0539,
"step": 1550
},
{
"epoch": 0.0446676274859454,
"grad_norm": 290.0,
"learning_rate": 0.00019698382547631382,
"loss": 0.0539,
"step": 1600
},
{
"epoch": 0.04606349084488119,
"grad_norm": 202.0,
"learning_rate": 0.00019669602256374834,
"loss": 0.0621,
"step": 1650
},
{
"epoch": 0.047459354203816986,
"grad_norm": 464.0,
"learning_rate": 0.00019640821965118287,
"loss": 0.178,
"step": 1700
},
{
"epoch": 0.04885521756275278,
"grad_norm": 108.5,
"learning_rate": 0.0001961204167386174,
"loss": 0.1804,
"step": 1750
},
{
"epoch": 0.050251080921688576,
"grad_norm": 252.0,
"learning_rate": 0.00019583261382605195,
"loss": 0.1206,
"step": 1800
},
{
"epoch": 0.05164694428062437,
"grad_norm": 1376.0,
"learning_rate": 0.00019554481091348648,
"loss": 0.8041,
"step": 1850
},
{
"epoch": 0.053042807639560166,
"grad_norm": 214.0,
"learning_rate": 0.00019525700800092098,
"loss": 0.2037,
"step": 1900
},
{
"epoch": 0.054438670998495954,
"grad_norm": 324.0,
"learning_rate": 0.0001949692050883555,
"loss": 0.2246,
"step": 1950
},
{
"epoch": 0.05583453435743175,
"grad_norm": 148.0,
"learning_rate": 0.00019468140217579003,
"loss": 0.1815,
"step": 2000
},
{
"epoch": 0.057230397716367544,
"grad_norm": 270.0,
"learning_rate": 0.00019439359926322455,
"loss": 0.1532,
"step": 2050
},
{
"epoch": 0.05862626107530334,
"grad_norm": 145.0,
"learning_rate": 0.00019410579635065908,
"loss": 0.1351,
"step": 2100
},
{
"epoch": 0.060022124434239134,
"grad_norm": 130.0,
"learning_rate": 0.00019381799343809358,
"loss": 0.1393,
"step": 2150
},
{
"epoch": 0.06141798779317493,
"grad_norm": 231.0,
"learning_rate": 0.00019353019052552813,
"loss": 0.1446,
"step": 2200
},
{
"epoch": 0.06281385115211072,
"grad_norm": 3.25,
"learning_rate": 0.00019324238761296266,
"loss": 0.1403,
"step": 2250
},
{
"epoch": 0.06420971451104651,
"grad_norm": 123.5,
"learning_rate": 0.00019295458470039719,
"loss": 0.3684,
"step": 2300
},
{
"epoch": 0.0656055778699823,
"grad_norm": 20.125,
"learning_rate": 0.0001926667817878317,
"loss": 0.0694,
"step": 2350
},
{
"epoch": 0.0670014412289181,
"grad_norm": 1.734375,
"learning_rate": 0.00019237897887526624,
"loss": 0.0897,
"step": 2400
},
{
"epoch": 0.0683973045878539,
"grad_norm": 25.0,
"learning_rate": 0.00019209117596270074,
"loss": 0.128,
"step": 2450
},
{
"epoch": 0.06979316794678969,
"grad_norm": 24.125,
"learning_rate": 0.00019180337305013526,
"loss": 0.1388,
"step": 2500
},
{
"epoch": 0.06979316794678969,
"eval_loss": 0.011298904195427895,
"eval_mae": 0.09956549108028412,
"eval_rmse": 0.10629630088806152,
"eval_runtime": 316.8013,
"eval_samples_per_second": 6.313,
"eval_steps_per_second": 6.313,
"step": 2500
},
{
"epoch": 0.07118903130572549,
"grad_norm": 19.375,
"learning_rate": 0.0001915155701375698,
"loss": 0.1087,
"step": 2550
},
{
"epoch": 0.07258489466466128,
"grad_norm": 7.8125,
"learning_rate": 0.00019122776722500432,
"loss": 0.0907,
"step": 2600
},
{
"epoch": 0.07398075802359708,
"grad_norm": 13.8125,
"learning_rate": 0.00019093996431243887,
"loss": 0.0812,
"step": 2650
},
{
"epoch": 0.07537662138253286,
"grad_norm": 24.25,
"learning_rate": 0.0001906521613998734,
"loss": 0.0707,
"step": 2700
},
{
"epoch": 0.07677248474146865,
"grad_norm": 18.125,
"learning_rate": 0.0001903643584873079,
"loss": 0.0657,
"step": 2750
},
{
"epoch": 0.07816834810040445,
"grad_norm": 9.5625,
"learning_rate": 0.00019007655557474242,
"loss": 0.0598,
"step": 2800
},
{
"epoch": 0.07956421145934024,
"grad_norm": 15.3125,
"learning_rate": 0.00018978875266217695,
"loss": 0.0549,
"step": 2850
},
{
"epoch": 0.08096007481827604,
"grad_norm": 9.9375,
"learning_rate": 0.00018950094974961147,
"loss": 0.052,
"step": 2900
},
{
"epoch": 0.08235593817721183,
"grad_norm": 22.625,
"learning_rate": 0.000189213146837046,
"loss": 0.0501,
"step": 2950
},
{
"epoch": 0.08375180153614763,
"grad_norm": 16.125,
"learning_rate": 0.00018892534392448053,
"loss": 0.0433,
"step": 3000
},
{
"epoch": 0.08514766489508342,
"grad_norm": 19.875,
"learning_rate": 0.00018863754101191505,
"loss": 0.0489,
"step": 3050
},
{
"epoch": 0.08654352825401922,
"grad_norm": 15.625,
"learning_rate": 0.00018834973809934958,
"loss": 0.0422,
"step": 3100
},
{
"epoch": 0.08793939161295501,
"grad_norm": 15.0625,
"learning_rate": 0.0001880619351867841,
"loss": 0.0415,
"step": 3150
},
{
"epoch": 0.0893352549718908,
"grad_norm": 12.5625,
"learning_rate": 0.00018777413227421863,
"loss": 0.0372,
"step": 3200
},
{
"epoch": 0.0907311183308266,
"grad_norm": 13.875,
"learning_rate": 0.00018748632936165316,
"loss": 0.0393,
"step": 3250
},
{
"epoch": 0.09212698168976238,
"grad_norm": 19.5,
"learning_rate": 0.00018719852644908766,
"loss": 0.0396,
"step": 3300
},
{
"epoch": 0.09352284504869818,
"grad_norm": 14.875,
"learning_rate": 0.00018691072353652218,
"loss": 0.0293,
"step": 3350
},
{
"epoch": 0.09491870840763397,
"grad_norm": 76.5,
"learning_rate": 0.0001866229206239567,
"loss": 0.0334,
"step": 3400
},
{
"epoch": 0.09631457176656977,
"grad_norm": 9.625,
"learning_rate": 0.00018633511771139124,
"loss": 0.0627,
"step": 3450
},
{
"epoch": 0.09771043512550556,
"grad_norm": 15.3125,
"learning_rate": 0.0001860473147988258,
"loss": 0.0262,
"step": 3500
},
{
"epoch": 0.09910629848444136,
"grad_norm": 15.625,
"learning_rate": 0.00018575951188626032,
"loss": 0.0318,
"step": 3550
},
{
"epoch": 0.10050216184337715,
"grad_norm": 17.0,
"learning_rate": 0.00018547170897369482,
"loss": 0.0431,
"step": 3600
},
{
"epoch": 0.10189802520231295,
"grad_norm": 3.3125,
"learning_rate": 0.00018518390606112934,
"loss": 0.0086,
"step": 3650
},
{
"epoch": 0.10329388856124874,
"grad_norm": 1.0546875,
"learning_rate": 0.00018489610314856387,
"loss": 0.0084,
"step": 3700
},
{
"epoch": 0.10468975192018454,
"grad_norm": 3.21875,
"learning_rate": 0.0001846083002359984,
"loss": 0.0117,
"step": 3750
},
{
"epoch": 0.10468975192018454,
"eval_loss": 0.0015581471379846334,
"eval_mae": 0.036334387958049774,
"eval_rmse": 0.03947337344288826,
"eval_runtime": 319.7128,
"eval_samples_per_second": 6.256,
"eval_steps_per_second": 6.256,
"step": 3750
},
{
"epoch": 0.10608561527912033,
"grad_norm": 42.75,
"learning_rate": 0.00018432049732343292,
"loss": 0.0319,
"step": 3800
},
{
"epoch": 0.10748147863805613,
"grad_norm": 39.25,
"learning_rate": 0.00018403269441086745,
"loss": 0.0341,
"step": 3850
},
{
"epoch": 0.10887734199699191,
"grad_norm": 44.25,
"learning_rate": 0.00018374489149830197,
"loss": 0.0326,
"step": 3900
},
{
"epoch": 0.1102732053559277,
"grad_norm": 35.0,
"learning_rate": 0.0001834570885857365,
"loss": 0.032,
"step": 3950
},
{
"epoch": 0.1116690687148635,
"grad_norm": 36.75,
"learning_rate": 0.00018316928567317103,
"loss": 0.0306,
"step": 4000
},
{
"epoch": 0.11306493207379929,
"grad_norm": 25.875,
"learning_rate": 0.00018288148276060555,
"loss": 0.0298,
"step": 4050
},
{
"epoch": 0.11446079543273509,
"grad_norm": 38.0,
"learning_rate": 0.00018259367984804008,
"loss": 0.031,
"step": 4100
},
{
"epoch": 0.11585665879167088,
"grad_norm": 33.0,
"learning_rate": 0.0001823058769354746,
"loss": 0.0284,
"step": 4150
},
{
"epoch": 0.11725252215060668,
"grad_norm": 30.125,
"learning_rate": 0.0001820180740229091,
"loss": 0.0229,
"step": 4200
},
{
"epoch": 0.11864838550954247,
"grad_norm": 14.375,
"learning_rate": 0.00018173027111034363,
"loss": 0.0119,
"step": 4250
},
{
"epoch": 0.12004424886847827,
"grad_norm": 40.5,
"learning_rate": 0.00018144246819777818,
"loss": 0.0351,
"step": 4300
},
{
"epoch": 0.12144011222741406,
"grad_norm": 50.0,
"learning_rate": 0.0001811546652852127,
"loss": 0.0157,
"step": 4350
},
{
"epoch": 0.12283597558634986,
"grad_norm": 12.25,
"learning_rate": 0.00018086686237264724,
"loss": 0.0222,
"step": 4400
},
{
"epoch": 0.12423183894528565,
"grad_norm": 27.0,
"learning_rate": 0.00018057905946008174,
"loss": 0.0129,
"step": 4450
},
{
"epoch": 0.12562770230422143,
"grad_norm": 4.6875,
"learning_rate": 0.00018029125654751626,
"loss": 0.0162,
"step": 4500
},
{
"epoch": 0.12702356566315723,
"grad_norm": 6.9375,
"learning_rate": 0.0001800034536349508,
"loss": 0.0038,
"step": 4550
},
{
"epoch": 0.12841942902209302,
"grad_norm": 2.34375,
"learning_rate": 0.00017971565072238531,
"loss": 0.0032,
"step": 4600
},
{
"epoch": 0.12981529238102882,
"grad_norm": 9.0,
"learning_rate": 0.00017942784780981984,
"loss": 0.0035,
"step": 4650
},
{
"epoch": 0.1312111557399646,
"grad_norm": 5.0625,
"learning_rate": 0.00017914004489725437,
"loss": 0.0022,
"step": 4700
},
{
"epoch": 0.1326070190989004,
"grad_norm": 9.4375,
"learning_rate": 0.0001788522419846889,
"loss": 0.0017,
"step": 4750
},
{
"epoch": 0.1340028824578362,
"grad_norm": 3.53125,
"learning_rate": 0.00017856443907212342,
"loss": 0.0024,
"step": 4800
},
{
"epoch": 0.135398745816772,
"grad_norm": 11.4375,
"learning_rate": 0.00017827663615955795,
"loss": 0.0022,
"step": 4850
},
{
"epoch": 0.1367946091757078,
"grad_norm": 11.875,
"learning_rate": 0.00017798883324699247,
"loss": 0.0038,
"step": 4900
},
{
"epoch": 0.1381904725346436,
"grad_norm": 1.8125,
"learning_rate": 0.000177701030334427,
"loss": 0.0036,
"step": 4950
},
{
"epoch": 0.13958633589357938,
"grad_norm": 10.75,
"learning_rate": 0.00017741322742186153,
"loss": 0.0028,
"step": 5000
},
{
"epoch": 0.13958633589357938,
"eval_loss": 0.00022272480418905616,
"eval_mae": 0.012231973931193352,
"eval_rmse": 0.01492396742105484,
"eval_runtime": 319.6535,
"eval_samples_per_second": 6.257,
"eval_steps_per_second": 6.257,
"step": 5000
},
{
"epoch": 0.14098219925251518,
"grad_norm": 7.15625,
"learning_rate": 0.00017712542450929602,
"loss": 0.0055,
"step": 5050
},
{
"epoch": 0.14237806261145097,
"grad_norm": 6.84375,
"learning_rate": 0.00017683762159673055,
"loss": 0.0032,
"step": 5100
},
{
"epoch": 0.14377392597038677,
"grad_norm": 11.75,
"learning_rate": 0.0001765498186841651,
"loss": 0.0031,
"step": 5150
},
{
"epoch": 0.14516978932932256,
"grad_norm": 8.1875,
"learning_rate": 0.00017626201577159963,
"loss": 0.0027,
"step": 5200
},
{
"epoch": 0.14656565268825836,
"grad_norm": 10.0625,
"learning_rate": 0.00017597421285903416,
"loss": 0.0025,
"step": 5250
},
{
"epoch": 0.14796151604719415,
"grad_norm": 10.9375,
"learning_rate": 0.00017568640994646868,
"loss": 0.0027,
"step": 5300
},
{
"epoch": 0.14935737940612995,
"grad_norm": 9.875,
"learning_rate": 0.00017539860703390318,
"loss": 0.0028,
"step": 5350
},
{
"epoch": 0.15075324276506571,
"grad_norm": 7.5625,
"learning_rate": 0.0001751108041213377,
"loss": 0.0026,
"step": 5400
},
{
"epoch": 0.1521491061240015,
"grad_norm": 22.875,
"learning_rate": 0.00017482300120877223,
"loss": 0.0032,
"step": 5450
},
{
"epoch": 0.1535449694829373,
"grad_norm": 50.75,
"learning_rate": 0.00017453519829620676,
"loss": 0.0356,
"step": 5500
},
{
"epoch": 0.1549408328418731,
"grad_norm": 48.25,
"learning_rate": 0.0001742473953836413,
"loss": 0.0495,
"step": 5550
},
{
"epoch": 0.1563366962008089,
"grad_norm": 8.8125,
"learning_rate": 0.00017395959247107581,
"loss": 0.048,
"step": 5600
},
{
"epoch": 0.1577325595597447,
"grad_norm": 54.25,
"learning_rate": 0.00017367178955851034,
"loss": 0.0461,
"step": 5650
},
{
"epoch": 0.15912842291868048,
"grad_norm": 63.75,
"learning_rate": 0.00017338398664594487,
"loss": 0.0495,
"step": 5700
},
{
"epoch": 0.16052428627761628,
"grad_norm": 55.0,
"learning_rate": 0.0001730961837333794,
"loss": 0.033,
"step": 5750
},
{
"epoch": 0.16192014963655207,
"grad_norm": 39.75,
"learning_rate": 0.00017280838082081392,
"loss": 0.0453,
"step": 5800
},
{
"epoch": 0.16331601299548787,
"grad_norm": 32.75,
"learning_rate": 0.00017252057790824845,
"loss": 0.0417,
"step": 5850
},
{
"epoch": 0.16471187635442366,
"grad_norm": 44.5,
"learning_rate": 0.00017223277499568294,
"loss": 0.044,
"step": 5900
},
{
"epoch": 0.16610773971335946,
"grad_norm": 30.875,
"learning_rate": 0.00017194497208311747,
"loss": 0.0414,
"step": 5950
},
{
"epoch": 0.16750360307229525,
"grad_norm": 31.0,
"learning_rate": 0.00017165716917055202,
"loss": 0.0392,
"step": 6000
},
{
"epoch": 0.16889946643123105,
"grad_norm": 52.75,
"learning_rate": 0.00017136936625798655,
"loss": 0.0404,
"step": 6050
},
{
"epoch": 0.17029532979016684,
"grad_norm": 39.5,
"learning_rate": 0.00017108156334542108,
"loss": 0.042,
"step": 6100
},
{
"epoch": 0.17169119314910264,
"grad_norm": 19.375,
"learning_rate": 0.0001707937604328556,
"loss": 0.006,
"step": 6150
},
{
"epoch": 0.17308705650803843,
"grad_norm": 42.5,
"learning_rate": 0.0001705059575202901,
"loss": 0.014,
"step": 6200
},
{
"epoch": 0.17448291986697423,
"grad_norm": 18.5,
"learning_rate": 0.00017021815460772463,
"loss": 0.0145,
"step": 6250
},
{
"epoch": 0.17448291986697423,
"eval_loss": 0.0018714327597990632,
"eval_mae": 0.04104918614029884,
"eval_rmse": 0.043260060250759125,
"eval_runtime": 318.582,
"eval_samples_per_second": 6.278,
"eval_steps_per_second": 6.278,
"step": 6250
},
{
"epoch": 0.17587878322591002,
"grad_norm": 7.4375,
"learning_rate": 0.00016993035169515915,
"loss": 0.0104,
"step": 6300
},
{
"epoch": 0.17727464658484582,
"grad_norm": 34.25,
"learning_rate": 0.00016964254878259368,
"loss": 0.0038,
"step": 6350
},
{
"epoch": 0.1786705099437816,
"grad_norm": 18.125,
"learning_rate": 0.0001693547458700282,
"loss": 0.015,
"step": 6400
},
{
"epoch": 0.1800663733027174,
"grad_norm": 0.494140625,
"learning_rate": 0.00016906694295746276,
"loss": 0.0137,
"step": 6450
},
{
"epoch": 0.1814622366616532,
"grad_norm": 12.5,
"learning_rate": 0.00016877914004489726,
"loss": 0.0044,
"step": 6500
},
{
"epoch": 0.182858100020589,
"grad_norm": 9.625,
"learning_rate": 0.0001684913371323318,
"loss": 0.0156,
"step": 6550
},
{
"epoch": 0.18425396337952477,
"grad_norm": 6.34375,
"learning_rate": 0.0001682035342197663,
"loss": 0.0021,
"step": 6600
},
{
"epoch": 0.18564982673846056,
"grad_norm": 11.9375,
"learning_rate": 0.00016791573130720084,
"loss": 0.0019,
"step": 6650
},
{
"epoch": 0.18704569009739636,
"grad_norm": 5.65625,
"learning_rate": 0.00016762792839463537,
"loss": 0.0019,
"step": 6700
},
{
"epoch": 0.18844155345633215,
"grad_norm": 7.625,
"learning_rate": 0.00016734012548206986,
"loss": 0.0019,
"step": 6750
},
{
"epoch": 0.18983741681526795,
"grad_norm": 27.375,
"learning_rate": 0.00016705232256950442,
"loss": 0.0029,
"step": 6800
},
{
"epoch": 0.19123328017420374,
"grad_norm": 3.171875,
"learning_rate": 0.00016676451965693894,
"loss": 0.0114,
"step": 6850
},
{
"epoch": 0.19262914353313954,
"grad_norm": 6.4375,
"learning_rate": 0.00016647671674437347,
"loss": 0.0166,
"step": 6900
},
{
"epoch": 0.19402500689207533,
"grad_norm": 16.125,
"learning_rate": 0.000166188913831808,
"loss": 0.0129,
"step": 6950
},
{
"epoch": 0.19542087025101113,
"grad_norm": 7.21875,
"learning_rate": 0.00016590111091924252,
"loss": 0.0052,
"step": 7000
},
{
"epoch": 0.19681673360994692,
"grad_norm": 15.1875,
"learning_rate": 0.00016561330800667702,
"loss": 0.0051,
"step": 7050
},
{
"epoch": 0.19821259696888271,
"grad_norm": 6.46875,
"learning_rate": 0.00016532550509411155,
"loss": 0.0049,
"step": 7100
},
{
"epoch": 0.1996084603278185,
"grad_norm": 12.0625,
"learning_rate": 0.00016503770218154608,
"loss": 0.0051,
"step": 7150
},
{
"epoch": 0.2010043236867543,
"grad_norm": 3.53125,
"learning_rate": 0.0001647498992689806,
"loss": 0.0044,
"step": 7200
},
{
"epoch": 0.2024001870456901,
"grad_norm": 4.84375,
"learning_rate": 0.00016446209635641513,
"loss": 0.0066,
"step": 7250
},
{
"epoch": 0.2037960504046259,
"grad_norm": 4.03125,
"learning_rate": 0.00016417429344384968,
"loss": 0.0072,
"step": 7300
},
{
"epoch": 0.2051919137635617,
"grad_norm": 3.203125,
"learning_rate": 0.00016388649053128418,
"loss": 0.0007,
"step": 7350
},
{
"epoch": 0.20658777712249748,
"grad_norm": 4.59375,
"learning_rate": 0.0001635986876187187,
"loss": 0.001,
"step": 7400
},
{
"epoch": 0.20798364048143328,
"grad_norm": 8.375,
"learning_rate": 0.00016331088470615323,
"loss": 0.0017,
"step": 7450
},
{
"epoch": 0.20937950384036907,
"grad_norm": 7.09375,
"learning_rate": 0.00016302308179358776,
"loss": 0.0018,
"step": 7500
},
{
"epoch": 0.20937950384036907,
"eval_loss": 0.00019140982476528734,
"eval_mae": 0.012697141617536545,
"eval_rmse": 0.013835093937814236,
"eval_runtime": 321.0771,
"eval_samples_per_second": 6.229,
"eval_steps_per_second": 6.229,
"step": 7500
},
{
"epoch": 0.21077536719930487,
"grad_norm": 5.3125,
"learning_rate": 0.00016273527888102229,
"loss": 0.0018,
"step": 7550
},
{
"epoch": 0.21217123055824066,
"grad_norm": 12.4375,
"learning_rate": 0.0001624474759684568,
"loss": 0.0018,
"step": 7600
},
{
"epoch": 0.21356709391717646,
"grad_norm": 7.75,
"learning_rate": 0.00016215967305589134,
"loss": 0.0012,
"step": 7650
},
{
"epoch": 0.21496295727611225,
"grad_norm": 1.765625,
"learning_rate": 0.00016187187014332586,
"loss": 0.0029,
"step": 7700
},
{
"epoch": 0.21635882063504802,
"grad_norm": 13.6875,
"learning_rate": 0.0001615840672307604,
"loss": 0.0057,
"step": 7750
},
{
"epoch": 0.21775468399398382,
"grad_norm": 4.1875,
"learning_rate": 0.00016129626431819492,
"loss": 0.0058,
"step": 7800
},
{
"epoch": 0.2191505473529196,
"grad_norm": 29.375,
"learning_rate": 0.00016100846140562944,
"loss": 0.0038,
"step": 7850
},
{
"epoch": 0.2205464107118554,
"grad_norm": 3.15625,
"learning_rate": 0.00016072065849306394,
"loss": 0.0031,
"step": 7900
},
{
"epoch": 0.2219422740707912,
"grad_norm": 1.140625,
"learning_rate": 0.00016043285558049847,
"loss": 0.0017,
"step": 7950
},
{
"epoch": 0.223338137429727,
"grad_norm": 2.703125,
"learning_rate": 0.000160145052667933,
"loss": 0.0019,
"step": 8000
},
{
"epoch": 0.2247340007886628,
"grad_norm": 13.0625,
"learning_rate": 0.00015985724975536752,
"loss": 0.0015,
"step": 8050
},
{
"epoch": 0.22612986414759859,
"grad_norm": 1.1328125,
"learning_rate": 0.00015956944684280208,
"loss": 0.0025,
"step": 8100
},
{
"epoch": 0.22752572750653438,
"grad_norm": 20.0,
"learning_rate": 0.0001592816439302366,
"loss": 0.0018,
"step": 8150
},
{
"epoch": 0.22892159086547018,
"grad_norm": 3.625,
"learning_rate": 0.0001589938410176711,
"loss": 0.0024,
"step": 8200
},
{
"epoch": 0.23031745422440597,
"grad_norm": 11.625,
"learning_rate": 0.00015870603810510563,
"loss": 0.0071,
"step": 8250
},
{
"epoch": 0.23171331758334177,
"grad_norm": 13.125,
"learning_rate": 0.00015841823519254015,
"loss": 0.0128,
"step": 8300
},
{
"epoch": 0.23310918094227756,
"grad_norm": 17.625,
"learning_rate": 0.00015813043227997468,
"loss": 0.0122,
"step": 8350
},
{
"epoch": 0.23450504430121336,
"grad_norm": 14.0625,
"learning_rate": 0.0001578426293674092,
"loss": 0.0125,
"step": 8400
},
{
"epoch": 0.23590090766014915,
"grad_norm": 11.125,
"learning_rate": 0.00015755482645484373,
"loss": 0.0129,
"step": 8450
},
{
"epoch": 0.23729677101908495,
"grad_norm": 22.875,
"learning_rate": 0.00015726702354227826,
"loss": 0.0136,
"step": 8500
},
{
"epoch": 0.23869263437802074,
"grad_norm": 11.4375,
"learning_rate": 0.00015697922062971278,
"loss": 0.0118,
"step": 8550
},
{
"epoch": 0.24008849773695654,
"grad_norm": 14.5625,
"learning_rate": 0.0001566914177171473,
"loss": 0.0032,
"step": 8600
},
{
"epoch": 0.24148436109589233,
"grad_norm": 9.5625,
"learning_rate": 0.00015640361480458184,
"loss": 0.0014,
"step": 8650
},
{
"epoch": 0.24288022445482813,
"grad_norm": 7.28125,
"learning_rate": 0.00015611581189201636,
"loss": 0.0016,
"step": 8700
},
{
"epoch": 0.24427608781376392,
"grad_norm": 4.53125,
"learning_rate": 0.0001558280089794509,
"loss": 0.0013,
"step": 8750
},
{
"epoch": 0.24427608781376392,
"eval_loss": 0.00014836130139883608,
"eval_mae": 0.01122231688350439,
"eval_rmse": 0.01218036562204361,
"eval_runtime": 319.6629,
"eval_samples_per_second": 6.257,
"eval_steps_per_second": 6.257,
"step": 8750
},
{
"epoch": 0.24567195117269972,
"grad_norm": 3.875,
"learning_rate": 0.0001555402060668854,
"loss": 0.0013,
"step": 8800
},
{
"epoch": 0.2470678145316355,
"grad_norm": 10.4375,
"learning_rate": 0.00015525240315431992,
"loss": 0.0017,
"step": 8850
},
{
"epoch": 0.2484636778905713,
"grad_norm": 2.96875,
"learning_rate": 0.00015496460024175444,
"loss": 0.0024,
"step": 8900
},
{
"epoch": 0.24985954124950707,
"grad_norm": 10.1875,
"learning_rate": 0.000154676797329189,
"loss": 0.0051,
"step": 8950
},
{
"epoch": 0.25125540460844287,
"grad_norm": 3.015625,
"learning_rate": 0.00015438899441662352,
"loss": 0.0012,
"step": 9000
},
{
"epoch": 0.2526512679673787,
"grad_norm": 7.90625,
"learning_rate": 0.00015410119150405802,
"loss": 0.0103,
"step": 9050
},
{
"epoch": 0.25404713132631446,
"grad_norm": 2.796875,
"learning_rate": 0.00015381338859149255,
"loss": 0.0014,
"step": 9100
},
{
"epoch": 0.2554429946852503,
"grad_norm": 27.75,
"learning_rate": 0.00015352558567892707,
"loss": 0.0141,
"step": 9150
},
{
"epoch": 0.25683885804418605,
"grad_norm": 2.65625,
"learning_rate": 0.0001532377827663616,
"loss": 0.0078,
"step": 9200
},
{
"epoch": 0.25823472140312187,
"grad_norm": 16.125,
"learning_rate": 0.00015294997985379613,
"loss": 0.0141,
"step": 9250
},
{
"epoch": 0.25963058476205764,
"grad_norm": 0.609375,
"learning_rate": 0.00015266217694123065,
"loss": 0.0038,
"step": 9300
},
{
"epoch": 0.26102644812099346,
"grad_norm": 25.0,
"learning_rate": 0.00015237437402866518,
"loss": 0.0035,
"step": 9350
},
{
"epoch": 0.2624223114799292,
"grad_norm": 16.5,
"learning_rate": 0.0001520865711160997,
"loss": 0.0106,
"step": 9400
},
{
"epoch": 0.26381817483886505,
"grad_norm": 18.375,
"learning_rate": 0.00015179876820353423,
"loss": 0.0106,
"step": 9450
},
{
"epoch": 0.2652140381978008,
"grad_norm": 16.5,
"learning_rate": 0.00015151096529096876,
"loss": 0.0104,
"step": 9500
},
{
"epoch": 0.2666099015567366,
"grad_norm": 18.25,
"learning_rate": 0.00015122316237840328,
"loss": 0.0103,
"step": 9550
},
{
"epoch": 0.2680057649156724,
"grad_norm": 18.875,
"learning_rate": 0.0001509353594658378,
"loss": 0.0098,
"step": 9600
},
{
"epoch": 0.2694016282746082,
"grad_norm": 18.625,
"learning_rate": 0.0001506475565532723,
"loss": 0.0098,
"step": 9650
},
{
"epoch": 0.270797491633544,
"grad_norm": 21.625,
"learning_rate": 0.00015035975364070684,
"loss": 0.0097,
"step": 9700
},
{
"epoch": 0.27219335499247976,
"grad_norm": 21.0,
"learning_rate": 0.00015007195072814136,
"loss": 0.0094,
"step": 9750
},
{
"epoch": 0.2735892183514156,
"grad_norm": 18.25,
"learning_rate": 0.00014978414781557592,
"loss": 0.0094,
"step": 9800
},
{
"epoch": 0.27498508171035135,
"grad_norm": 14.25,
"learning_rate": 0.00014949634490301044,
"loss": 0.0095,
"step": 9850
},
{
"epoch": 0.2763809450692872,
"grad_norm": 16.25,
"learning_rate": 0.00014920854199044497,
"loss": 0.0098,
"step": 9900
},
{
"epoch": 0.27777680842822294,
"grad_norm": 18.5,
"learning_rate": 0.00014892073907787947,
"loss": 0.0092,
"step": 9950
},
{
"epoch": 0.27917267178715877,
"grad_norm": 15.4375,
"learning_rate": 0.000148632936165314,
"loss": 0.0093,
"step": 10000
},
{
"epoch": 0.27917267178715877,
"eval_loss": 8.570039790356532e-05,
"eval_mae": 0.00781923346221447,
"eval_rmse": 0.009257450699806213,
"eval_runtime": 319.3051,
"eval_samples_per_second": 6.264,
"eval_steps_per_second": 6.264,
"step": 10000
},
{
"epoch": 0.28056853514609453,
"grad_norm": 17.0,
"learning_rate": 0.00014834513325274852,
"loss": 0.0093,
"step": 10050
},
{
"epoch": 0.28196439850503036,
"grad_norm": 12.3125,
"learning_rate": 0.00014805733034018305,
"loss": 0.0093,
"step": 10100
},
{
"epoch": 0.2833602618639661,
"grad_norm": 14.875,
"learning_rate": 0.00014776952742761757,
"loss": 0.0094,
"step": 10150
},
{
"epoch": 0.28475612522290195,
"grad_norm": 15.6875,
"learning_rate": 0.0001474817245150521,
"loss": 0.0095,
"step": 10200
},
{
"epoch": 0.2861519885818377,
"grad_norm": 5.65625,
"learning_rate": 0.00014719392160248663,
"loss": 0.003,
"step": 10250
},
{
"epoch": 0.28754785194077354,
"grad_norm": 2.40625,
"learning_rate": 0.00014690611868992115,
"loss": 0.0037,
"step": 10300
},
{
"epoch": 0.2889437152997093,
"grad_norm": 15.5625,
"learning_rate": 0.00014661831577735568,
"loss": 0.0031,
"step": 10350
},
{
"epoch": 0.2903395786586451,
"grad_norm": 7.21875,
"learning_rate": 0.0001463305128647902,
"loss": 0.0011,
"step": 10400
},
{
"epoch": 0.2917354420175809,
"grad_norm": 14.375,
"learning_rate": 0.00014604270995222473,
"loss": 0.001,
"step": 10450
},
{
"epoch": 0.2931313053765167,
"grad_norm": 24.5,
"learning_rate": 0.00014575490703965923,
"loss": 0.0058,
"step": 10500
},
{
"epoch": 0.2945271687354525,
"grad_norm": 21.75,
"learning_rate": 0.00014546710412709376,
"loss": 0.0087,
"step": 10550
},
{
"epoch": 0.2959230320943883,
"grad_norm": 23.375,
"learning_rate": 0.0001451793012145283,
"loss": 0.0082,
"step": 10600
},
{
"epoch": 0.2973188954533241,
"grad_norm": 20.875,
"learning_rate": 0.00014489149830196284,
"loss": 0.0084,
"step": 10650
},
{
"epoch": 0.2987147588122599,
"grad_norm": 23.375,
"learning_rate": 0.00014460369538939736,
"loss": 0.0079,
"step": 10700
},
{
"epoch": 0.30011062217119566,
"grad_norm": 25.75,
"learning_rate": 0.0001443158924768319,
"loss": 0.0086,
"step": 10750
},
{
"epoch": 0.30150648553013143,
"grad_norm": 8.6875,
"learning_rate": 0.0001440280895642664,
"loss": 0.0018,
"step": 10800
},
{
"epoch": 0.30290234888906725,
"grad_norm": 20.375,
"learning_rate": 0.00014374028665170091,
"loss": 0.0011,
"step": 10850
},
{
"epoch": 0.304298212248003,
"grad_norm": 11.75,
"learning_rate": 0.00014345248373913544,
"loss": 0.0047,
"step": 10900
},
{
"epoch": 0.30569407560693884,
"grad_norm": 10.375,
"learning_rate": 0.00014316468082656997,
"loss": 0.0027,
"step": 10950
},
{
"epoch": 0.3070899389658746,
"grad_norm": 7.21875,
"learning_rate": 0.0001428768779140045,
"loss": 0.0026,
"step": 11000
},
{
"epoch": 0.30848580232481043,
"grad_norm": 9.0625,
"learning_rate": 0.00014258907500143902,
"loss": 0.0026,
"step": 11050
},
{
"epoch": 0.3098816656837462,
"grad_norm": 0.1865234375,
"learning_rate": 0.00014230127208887355,
"loss": 0.0026,
"step": 11100
},
{
"epoch": 0.311277529042682,
"grad_norm": 5.78125,
"learning_rate": 0.00014201346917630807,
"loss": 0.0016,
"step": 11150
},
{
"epoch": 0.3126733924016178,
"grad_norm": 16.125,
"learning_rate": 0.0001417256662637426,
"loss": 0.0027,
"step": 11200
},
{
"epoch": 0.3140692557605536,
"grad_norm": 5.6875,
"learning_rate": 0.00014143786335117712,
"loss": 0.0033,
"step": 11250
},
{
"epoch": 0.3140692557605536,
"eval_loss": 0.00012493817484937608,
"eval_mae": 0.009413574822247028,
"eval_rmse": 0.011177574284374714,
"eval_runtime": 314.115,
"eval_samples_per_second": 6.367,
"eval_steps_per_second": 6.367,
"step": 11250
},
{
"epoch": 0.3154651191194894,
"grad_norm": 10.0625,
"learning_rate": 0.00014115006043861165,
"loss": 0.013,
"step": 11300
},
{
"epoch": 0.3168609824784252,
"grad_norm": 5.5625,
"learning_rate": 0.00014086225752604615,
"loss": 0.0025,
"step": 11350
},
{
"epoch": 0.31825684583736097,
"grad_norm": 8.375,
"learning_rate": 0.00014057445461348068,
"loss": 0.0025,
"step": 11400
},
{
"epoch": 0.3196527091962968,
"grad_norm": 9.0,
"learning_rate": 0.00014028665170091523,
"loss": 0.0024,
"step": 11450
},
{
"epoch": 0.32104857255523256,
"grad_norm": 13.75,
"learning_rate": 0.00013999884878834976,
"loss": 0.0025,
"step": 11500
},
{
"epoch": 0.3224444359141684,
"grad_norm": 3.453125,
"learning_rate": 0.00013971104587578428,
"loss": 0.0025,
"step": 11550
},
{
"epoch": 0.32384029927310415,
"grad_norm": 8.375,
"learning_rate": 0.0001394232429632188,
"loss": 0.0024,
"step": 11600
},
{
"epoch": 0.32523616263203997,
"grad_norm": 8.875,
"learning_rate": 0.0001391354400506533,
"loss": 0.0025,
"step": 11650
},
{
"epoch": 0.32663202599097574,
"grad_norm": 11.8125,
"learning_rate": 0.00013884763713808783,
"loss": 0.0025,
"step": 11700
},
{
"epoch": 0.32802788934991156,
"grad_norm": 3.1875,
"learning_rate": 0.00013855983422552236,
"loss": 0.0028,
"step": 11750
},
{
"epoch": 0.32942375270884733,
"grad_norm": 1.0546875,
"learning_rate": 0.0001382720313129569,
"loss": 0.0016,
"step": 11800
},
{
"epoch": 0.33081961606778315,
"grad_norm": 7.15625,
"learning_rate": 0.0001379842284003914,
"loss": 0.0044,
"step": 11850
},
{
"epoch": 0.3322154794267189,
"grad_norm": 4.71875,
"learning_rate": 0.00013769642548782597,
"loss": 0.0022,
"step": 11900
},
{
"epoch": 0.3336113427856547,
"grad_norm": 13.9375,
"learning_rate": 0.00013740862257526047,
"loss": 0.0024,
"step": 11950
},
{
"epoch": 0.3350072061445905,
"grad_norm": 6.75,
"learning_rate": 0.000137120819662695,
"loss": 0.0025,
"step": 12000
},
{
"epoch": 0.3364030695035263,
"grad_norm": 7.15625,
"learning_rate": 0.00013683301675012952,
"loss": 0.0023,
"step": 12050
},
{
"epoch": 0.3377989328624621,
"grad_norm": 3.515625,
"learning_rate": 0.00013654521383756404,
"loss": 0.0024,
"step": 12100
},
{
"epoch": 0.33919479622139787,
"grad_norm": 12.875,
"learning_rate": 0.00013625741092499857,
"loss": 0.0024,
"step": 12150
},
{
"epoch": 0.3405906595803337,
"grad_norm": 11.25,
"learning_rate": 0.0001359696080124331,
"loss": 0.0036,
"step": 12200
},
{
"epoch": 0.34198652293926946,
"grad_norm": 8.8125,
"learning_rate": 0.0001356818050998676,
"loss": 0.0025,
"step": 12250
},
{
"epoch": 0.3433823862982053,
"grad_norm": 0.07080078125,
"learning_rate": 0.00013539400218730215,
"loss": 0.0023,
"step": 12300
},
{
"epoch": 0.34477824965714104,
"grad_norm": 7.40625,
"learning_rate": 0.00013510619927473668,
"loss": 0.005,
"step": 12350
},
{
"epoch": 0.34617411301607687,
"grad_norm": 8.0,
"learning_rate": 0.0001348183963621712,
"loss": 0.0022,
"step": 12400
},
{
"epoch": 0.34756997637501263,
"grad_norm": 4.1875,
"learning_rate": 0.00013453059344960573,
"loss": 0.0023,
"step": 12450
},
{
"epoch": 0.34896583973394846,
"grad_norm": 15.5625,
"learning_rate": 0.00013424279053704023,
"loss": 0.0048,
"step": 12500
},
{
"epoch": 0.34896583973394846,
"eval_loss": 0.0007875896408222616,
"eval_mae": 0.027495475485920906,
"eval_rmse": 0.028064027428627014,
"eval_runtime": 313.9427,
"eval_samples_per_second": 6.371,
"eval_steps_per_second": 6.371,
"step": 12500
},
{
"epoch": 0.3503617030928842,
"grad_norm": 10.125,
"learning_rate": 0.00013395498762447475,
"loss": 0.0012,
"step": 12550
},
{
"epoch": 0.35175756645182005,
"grad_norm": 1.46875,
"learning_rate": 0.00013366718471190928,
"loss": 0.0025,
"step": 12600
},
{
"epoch": 0.3531534298107558,
"grad_norm": 6.59375,
"learning_rate": 0.0001333793817993438,
"loss": 0.0009,
"step": 12650
},
{
"epoch": 0.35454929316969164,
"grad_norm": 14.9375,
"learning_rate": 0.00013309157888677833,
"loss": 0.0024,
"step": 12700
},
{
"epoch": 0.3559451565286274,
"grad_norm": 5.46875,
"learning_rate": 0.0001328037759742129,
"loss": 0.001,
"step": 12750
},
{
"epoch": 0.3573410198875632,
"grad_norm": 2.25,
"learning_rate": 0.00013251597306164739,
"loss": 0.0007,
"step": 12800
},
{
"epoch": 0.358736883246499,
"grad_norm": 3.265625,
"learning_rate": 0.0001322281701490819,
"loss": 0.0007,
"step": 12850
},
{
"epoch": 0.3601327466054348,
"grad_norm": 4.53125,
"learning_rate": 0.00013194036723651644,
"loss": 0.0007,
"step": 12900
},
{
"epoch": 0.3615286099643706,
"grad_norm": 3.671875,
"learning_rate": 0.00013165256432395096,
"loss": 0.0007,
"step": 12950
},
{
"epoch": 0.3629244733233064,
"grad_norm": 2.75,
"learning_rate": 0.0001313647614113855,
"loss": 0.0007,
"step": 13000
},
{
"epoch": 0.3643203366822422,
"grad_norm": 2.09375,
"learning_rate": 0.00013107695849882002,
"loss": 0.001,
"step": 13050
},
{
"epoch": 0.365716200041178,
"grad_norm": 4.8125,
"learning_rate": 0.00013078915558625454,
"loss": 0.0026,
"step": 13100
},
{
"epoch": 0.36711206340011376,
"grad_norm": 14.625,
"learning_rate": 0.00013050135267368907,
"loss": 0.0014,
"step": 13150
},
{
"epoch": 0.36850792675904953,
"grad_norm": 11.875,
"learning_rate": 0.0001302135497611236,
"loss": 0.0011,
"step": 13200
},
{
"epoch": 0.36990379011798535,
"grad_norm": 10.5625,
"learning_rate": 0.00012992574684855812,
"loss": 0.002,
"step": 13250
},
{
"epoch": 0.3712996534769211,
"grad_norm": 12.75,
"learning_rate": 0.00012963794393599265,
"loss": 0.0025,
"step": 13300
},
{
"epoch": 0.37269551683585694,
"grad_norm": 2.484375,
"learning_rate": 0.00012935014102342718,
"loss": 0.0019,
"step": 13350
},
{
"epoch": 0.3740913801947927,
"grad_norm": 2.953125,
"learning_rate": 0.00012906233811086167,
"loss": 0.0004,
"step": 13400
},
{
"epoch": 0.37548724355372853,
"grad_norm": 0.474609375,
"learning_rate": 0.0001287745351982962,
"loss": 0.0009,
"step": 13450
},
{
"epoch": 0.3768831069126643,
"grad_norm": 23.25,
"learning_rate": 0.00012848673228573073,
"loss": 0.0023,
"step": 13500
},
{
"epoch": 0.3782789702716001,
"grad_norm": 5.875,
"learning_rate": 0.00012819892937316525,
"loss": 0.0061,
"step": 13550
},
{
"epoch": 0.3796748336305359,
"grad_norm": 11.4375,
"learning_rate": 0.0001279111264605998,
"loss": 0.0043,
"step": 13600
},
{
"epoch": 0.3810706969894717,
"grad_norm": 1.34375,
"learning_rate": 0.0001276233235480343,
"loss": 0.0021,
"step": 13650
},
{
"epoch": 0.3824665603484075,
"grad_norm": 1.671875,
"learning_rate": 0.00012733552063546883,
"loss": 0.0022,
"step": 13700
},
{
"epoch": 0.3838624237073433,
"grad_norm": 12.0625,
"learning_rate": 0.00012704771772290336,
"loss": 0.0009,
"step": 13750
},
{
"epoch": 0.3838624237073433,
"eval_loss": 0.00010729853966040537,
"eval_mae": 0.009244485758244991,
"eval_rmse": 0.010358501225709915,
"eval_runtime": 314.0246,
"eval_samples_per_second": 6.369,
"eval_steps_per_second": 6.369,
"step": 13750
},
{
"epoch": 0.38525828706627907,
"grad_norm": 0.466796875,
"learning_rate": 0.00012675991481033788,
"loss": 0.0029,
"step": 13800
},
{
"epoch": 0.3866541504252149,
"grad_norm": 0.314453125,
"learning_rate": 0.0001264721118977724,
"loss": 0.0017,
"step": 13850
},
{
"epoch": 0.38805001378415066,
"grad_norm": 5.09375,
"learning_rate": 0.00012618430898520694,
"loss": 0.002,
"step": 13900
},
{
"epoch": 0.3894458771430865,
"grad_norm": 6.21875,
"learning_rate": 0.00012589650607264146,
"loss": 0.0015,
"step": 13950
},
{
"epoch": 0.39084174050202225,
"grad_norm": 7.78125,
"learning_rate": 0.000125608703160076,
"loss": 0.0019,
"step": 14000
},
{
"epoch": 0.3922376038609581,
"grad_norm": 8.6875,
"learning_rate": 0.00012532090024751052,
"loss": 0.0019,
"step": 14050
},
{
"epoch": 0.39363346721989384,
"grad_norm": 8.125,
"learning_rate": 0.00012503309733494504,
"loss": 0.0018,
"step": 14100
},
{
"epoch": 0.39502933057882966,
"grad_norm": 10.375,
"learning_rate": 0.00012474529442237957,
"loss": 0.002,
"step": 14150
},
{
"epoch": 0.39642519393776543,
"grad_norm": 3.5,
"learning_rate": 0.0001244574915098141,
"loss": 0.0018,
"step": 14200
},
{
"epoch": 0.39782105729670125,
"grad_norm": 5.0625,
"learning_rate": 0.0001241696885972486,
"loss": 0.0023,
"step": 14250
},
{
"epoch": 0.399216920655637,
"grad_norm": 7.15625,
"learning_rate": 0.00012388188568468312,
"loss": 0.0019,
"step": 14300
},
{
"epoch": 0.4006127840145728,
"grad_norm": 2.65625,
"learning_rate": 0.00012359408277211765,
"loss": 0.003,
"step": 14350
},
{
"epoch": 0.4020086473735086,
"grad_norm": 4.3125,
"learning_rate": 0.0001233062798595522,
"loss": 0.0024,
"step": 14400
},
{
"epoch": 0.4034045107324444,
"grad_norm": 4.96875,
"learning_rate": 0.00012301847694698673,
"loss": 0.0032,
"step": 14450
},
{
"epoch": 0.4048003740913802,
"grad_norm": 7.5625,
"learning_rate": 0.00012273067403442125,
"loss": 0.001,
"step": 14500
},
{
"epoch": 0.40619623745031597,
"grad_norm": 7.1875,
"learning_rate": 0.00012244287112185575,
"loss": 0.0017,
"step": 14550
},
{
"epoch": 0.4075921008092518,
"grad_norm": 7.875,
"learning_rate": 0.00012215506820929028,
"loss": 0.0017,
"step": 14600
},
{
"epoch": 0.40898796416818756,
"grad_norm": 9.75,
"learning_rate": 0.0001218672652967248,
"loss": 0.0018,
"step": 14650
},
{
"epoch": 0.4103838275271234,
"grad_norm": 6.125,
"learning_rate": 0.00012157946238415933,
"loss": 0.0016,
"step": 14700
},
{
"epoch": 0.41177969088605915,
"grad_norm": 11.5625,
"learning_rate": 0.00012129165947159387,
"loss": 0.0018,
"step": 14750
},
{
"epoch": 0.41317555424499497,
"grad_norm": 3.375,
"learning_rate": 0.00012100385655902837,
"loss": 0.0017,
"step": 14800
},
{
"epoch": 0.41457141760393074,
"grad_norm": 4.3125,
"learning_rate": 0.0001207160536464629,
"loss": 0.0019,
"step": 14850
},
{
"epoch": 0.41596728096286656,
"grad_norm": 6.96875,
"learning_rate": 0.00012042825073389742,
"loss": 0.0021,
"step": 14900
},
{
"epoch": 0.4173631443218023,
"grad_norm": 4.34375,
"learning_rate": 0.00012014044782133196,
"loss": 0.0005,
"step": 14950
},
{
"epoch": 0.41875900768073815,
"grad_norm": 3.890625,
"learning_rate": 0.00011985264490876649,
"loss": 0.0004,
"step": 15000
},
{
"epoch": 0.41875900768073815,
"eval_loss": 2.5809065846260637e-05,
"eval_mae": 0.004102489911019802,
"eval_rmse": 0.005080262199044228,
"eval_runtime": 313.9897,
"eval_samples_per_second": 6.37,
"eval_steps_per_second": 6.37,
"step": 15000
},
{
"epoch": 0.4201548710396739,
"grad_norm": 6.03125,
"learning_rate": 0.00011956484199620102,
"loss": 0.0015,
"step": 15050
},
{
"epoch": 0.42155073439860974,
"grad_norm": 10.0625,
"learning_rate": 0.00011927703908363553,
"loss": 0.0017,
"step": 15100
},
{
"epoch": 0.4229465977575455,
"grad_norm": 4.03125,
"learning_rate": 0.00011898923617107005,
"loss": 0.0017,
"step": 15150
},
{
"epoch": 0.42434246111648133,
"grad_norm": 6.21875,
"learning_rate": 0.00011870143325850458,
"loss": 0.0017,
"step": 15200
},
{
"epoch": 0.4257383244754171,
"grad_norm": 0.8984375,
"learning_rate": 0.00011841363034593911,
"loss": 0.0016,
"step": 15250
},
{
"epoch": 0.4271341878343529,
"grad_norm": 8.8125,
"learning_rate": 0.00011812582743337363,
"loss": 0.0015,
"step": 15300
},
{
"epoch": 0.4285300511932887,
"grad_norm": 4.4375,
"learning_rate": 0.00011783802452080816,
"loss": 0.0016,
"step": 15350
},
{
"epoch": 0.4299259145522245,
"grad_norm": 10.125,
"learning_rate": 0.00011755022160824267,
"loss": 0.0016,
"step": 15400
},
{
"epoch": 0.4313217779111603,
"grad_norm": 7.5625,
"learning_rate": 0.0001172624186956772,
"loss": 0.0017,
"step": 15450
},
{
"epoch": 0.43271764127009604,
"grad_norm": 6.46875,
"learning_rate": 0.00011697461578311173,
"loss": 0.0015,
"step": 15500
},
{
"epoch": 0.43411350462903187,
"grad_norm": 2.421875,
"learning_rate": 0.00011668681287054625,
"loss": 0.0016,
"step": 15550
},
{
"epoch": 0.43550936798796763,
"grad_norm": 5.75,
"learning_rate": 0.00011639900995798079,
"loss": 0.0016,
"step": 15600
},
{
"epoch": 0.43690523134690346,
"grad_norm": 5.28125,
"learning_rate": 0.00011611120704541532,
"loss": 0.0021,
"step": 15650
},
{
"epoch": 0.4383010947058392,
"grad_norm": 1.7734375,
"learning_rate": 0.00011582340413284982,
"loss": 0.0009,
"step": 15700
},
{
"epoch": 0.43969695806477505,
"grad_norm": 0.2734375,
"learning_rate": 0.00011553560122028436,
"loss": 0.0003,
"step": 15750
},
{
"epoch": 0.4410928214237108,
"grad_norm": 12.5625,
"learning_rate": 0.00011524779830771888,
"loss": 0.0006,
"step": 15800
},
{
"epoch": 0.44248868478264664,
"grad_norm": 4.625,
"learning_rate": 0.00011495999539515341,
"loss": 0.001,
"step": 15850
},
{
"epoch": 0.4438845481415824,
"grad_norm": 15.5,
"learning_rate": 0.00011467219248258794,
"loss": 0.0027,
"step": 15900
},
{
"epoch": 0.4452804115005182,
"grad_norm": 13.125,
"learning_rate": 0.00011438438957002245,
"loss": 0.0032,
"step": 15950
},
{
"epoch": 0.446676274859454,
"grad_norm": 14.6875,
"learning_rate": 0.00011409658665745697,
"loss": 0.0033,
"step": 16000
},
{
"epoch": 0.4480721382183898,
"grad_norm": 15.1875,
"learning_rate": 0.0001138087837448915,
"loss": 0.0032,
"step": 16050
},
{
"epoch": 0.4494680015773256,
"grad_norm": 10.4375,
"learning_rate": 0.00011352098083232603,
"loss": 0.0033,
"step": 16100
},
{
"epoch": 0.4508638649362614,
"grad_norm": 14.5,
"learning_rate": 0.00011323317791976055,
"loss": 0.0032,
"step": 16150
},
{
"epoch": 0.45225972829519717,
"grad_norm": 11.625,
"learning_rate": 0.00011294537500719508,
"loss": 0.0032,
"step": 16200
},
{
"epoch": 0.453655591654133,
"grad_norm": 12.5625,
"learning_rate": 0.00011265757209462959,
"loss": 0.0032,
"step": 16250
},
{
"epoch": 0.453655591654133,
"eval_loss": 0.0005641469615511596,
"eval_mae": 0.023370979353785515,
"eval_rmse": 0.02375177852809429,
"eval_runtime": 313.9972,
"eval_samples_per_second": 6.369,
"eval_steps_per_second": 6.369,
"step": 16250
},
{
"epoch": 0.45505145501306876,
"grad_norm": 13.0625,
"learning_rate": 0.00011236976918206412,
"loss": 0.0032,
"step": 16300
},
{
"epoch": 0.4564473183720046,
"grad_norm": 9.875,
"learning_rate": 0.00011208196626949865,
"loss": 0.0031,
"step": 16350
},
{
"epoch": 0.45784318173094035,
"grad_norm": 10.0625,
"learning_rate": 0.00011179416335693319,
"loss": 0.0031,
"step": 16400
},
{
"epoch": 0.4592390450898762,
"grad_norm": 11.25,
"learning_rate": 0.00011150636044436771,
"loss": 0.0031,
"step": 16450
},
{
"epoch": 0.46063490844881194,
"grad_norm": 11.6875,
"learning_rate": 0.00011121855753180224,
"loss": 0.003,
"step": 16500
},
{
"epoch": 0.46203077180774776,
"grad_norm": 12.25,
"learning_rate": 0.00011093075461923674,
"loss": 0.003,
"step": 16550
},
{
"epoch": 0.46342663516668353,
"grad_norm": 11.5,
"learning_rate": 0.00011064295170667128,
"loss": 0.003,
"step": 16600
},
{
"epoch": 0.46482249852561935,
"grad_norm": 14.75,
"learning_rate": 0.0001103551487941058,
"loss": 0.003,
"step": 16650
},
{
"epoch": 0.4662183618845551,
"grad_norm": 13.875,
"learning_rate": 0.00011006734588154033,
"loss": 0.0029,
"step": 16700
},
{
"epoch": 0.4676142252434909,
"grad_norm": 13.6875,
"learning_rate": 0.00010977954296897486,
"loss": 0.0031,
"step": 16750
},
{
"epoch": 0.4690100886024267,
"grad_norm": 3.796875,
"learning_rate": 0.00010949174005640938,
"loss": 0.0019,
"step": 16800
},
{
"epoch": 0.4704059519613625,
"grad_norm": 5.59375,
"learning_rate": 0.0001092039371438439,
"loss": 0.0012,
"step": 16850
},
{
"epoch": 0.4718018153202983,
"grad_norm": 2.921875,
"learning_rate": 0.00010891613423127842,
"loss": 0.0012,
"step": 16900
},
{
"epoch": 0.47319767867923407,
"grad_norm": 8.6875,
"learning_rate": 0.00010862833131871295,
"loss": 0.0012,
"step": 16950
},
{
"epoch": 0.4745935420381699,
"grad_norm": 3.96875,
"learning_rate": 0.00010834052840614747,
"loss": 0.0012,
"step": 17000
},
{
"epoch": 0.47598940539710566,
"grad_norm": 10.1875,
"learning_rate": 0.00010805272549358201,
"loss": 0.0012,
"step": 17050
},
{
"epoch": 0.4773852687560415,
"grad_norm": 0.12255859375,
"learning_rate": 0.00010776492258101651,
"loss": 0.0015,
"step": 17100
},
{
"epoch": 0.47878113211497725,
"grad_norm": 6.1875,
"learning_rate": 0.00010747711966845104,
"loss": 0.0015,
"step": 17150
},
{
"epoch": 0.48017699547391307,
"grad_norm": 3.1875,
"learning_rate": 0.00010718931675588557,
"loss": 0.0012,
"step": 17200
},
{
"epoch": 0.48157285883284884,
"grad_norm": 6.09375,
"learning_rate": 0.0001069015138433201,
"loss": 0.001,
"step": 17250
},
{
"epoch": 0.48296872219178466,
"grad_norm": 11.875,
"learning_rate": 0.00010661371093075463,
"loss": 0.0018,
"step": 17300
},
{
"epoch": 0.48436458555072043,
"grad_norm": 14.9375,
"learning_rate": 0.00010632590801818916,
"loss": 0.0029,
"step": 17350
},
{
"epoch": 0.48576044890965625,
"grad_norm": 15.75,
"learning_rate": 0.00010603810510562366,
"loss": 0.0027,
"step": 17400
},
{
"epoch": 0.487156312268592,
"grad_norm": 2.328125,
"learning_rate": 0.0001057503021930582,
"loss": 0.002,
"step": 17450
},
{
"epoch": 0.48855217562752784,
"grad_norm": 4.875,
"learning_rate": 0.00010546249928049272,
"loss": 0.0018,
"step": 17500
},
{
"epoch": 0.48855217562752784,
"eval_loss": 8.424516272498295e-05,
"eval_mae": 0.00828312523663044,
"eval_rmse": 0.009178516454994678,
"eval_runtime": 314.273,
"eval_samples_per_second": 6.364,
"eval_steps_per_second": 6.364,
"step": 17500
},
{
"epoch": 0.4899480389864636,
"grad_norm": 5.65625,
"learning_rate": 0.00010517469636792725,
"loss": 0.0016,
"step": 17550
},
{
"epoch": 0.49134390234539943,
"grad_norm": 3.375,
"learning_rate": 0.00010488689345536178,
"loss": 0.0016,
"step": 17600
},
{
"epoch": 0.4927397657043352,
"grad_norm": 9.25,
"learning_rate": 0.0001045990905427963,
"loss": 0.0006,
"step": 17650
},
{
"epoch": 0.494135629063271,
"grad_norm": 2.6875,
"learning_rate": 0.00010431128763023082,
"loss": 0.0005,
"step": 17700
},
{
"epoch": 0.4955314924222068,
"grad_norm": 1.9609375,
"learning_rate": 0.00010402348471766534,
"loss": 0.0015,
"step": 17750
},
{
"epoch": 0.4969273557811426,
"grad_norm": 2.390625,
"learning_rate": 0.00010373568180509987,
"loss": 0.0013,
"step": 17800
},
{
"epoch": 0.4983232191400784,
"grad_norm": 0.5078125,
"learning_rate": 0.0001034478788925344,
"loss": 0.0012,
"step": 17850
},
{
"epoch": 0.49971908249901414,
"grad_norm": 13.0625,
"learning_rate": 0.00010316007597996893,
"loss": 0.0011,
"step": 17900
},
{
"epoch": 0.5011149458579499,
"grad_norm": 1.1953125,
"learning_rate": 0.00010287227306740346,
"loss": 0.0006,
"step": 17950
},
{
"epoch": 0.5025108092168857,
"grad_norm": 3.578125,
"learning_rate": 0.00010258447015483796,
"loss": 0.0003,
"step": 18000
},
{
"epoch": 0.5039066725758216,
"grad_norm": 0.1259765625,
"learning_rate": 0.00010229666724227249,
"loss": 0.0003,
"step": 18050
},
{
"epoch": 0.5053025359347574,
"grad_norm": 5.34375,
"learning_rate": 0.00010200886432970703,
"loss": 0.0009,
"step": 18100
},
{
"epoch": 0.5066983992936931,
"grad_norm": 0.134765625,
"learning_rate": 0.00010172106141714155,
"loss": 0.0023,
"step": 18150
},
{
"epoch": 0.5080942626526289,
"grad_norm": 0.703125,
"learning_rate": 0.00010143325850457608,
"loss": 0.001,
"step": 18200
},
{
"epoch": 0.5094901260115647,
"grad_norm": 1.6015625,
"learning_rate": 0.00010114545559201059,
"loss": 0.0023,
"step": 18250
},
{
"epoch": 0.5108859893705006,
"grad_norm": 7.0,
"learning_rate": 0.00010085765267944512,
"loss": 0.001,
"step": 18300
},
{
"epoch": 0.5122818527294363,
"grad_norm": 0.6328125,
"learning_rate": 0.00010056984976687964,
"loss": 0.0006,
"step": 18350
},
{
"epoch": 0.5136777160883721,
"grad_norm": 3.53125,
"learning_rate": 0.00010028204685431417,
"loss": 0.0002,
"step": 18400
},
{
"epoch": 0.5150735794473079,
"grad_norm": 0.1396484375,
"learning_rate": 9.99942439417487e-05,
"loss": 0.0003,
"step": 18450
},
{
"epoch": 0.5164694428062437,
"grad_norm": 1.3203125,
"learning_rate": 9.970644102918322e-05,
"loss": 0.0003,
"step": 18500
},
{
"epoch": 0.5178653061651795,
"grad_norm": 1.7890625,
"learning_rate": 9.941863811661775e-05,
"loss": 0.0005,
"step": 18550
},
{
"epoch": 0.5192611695241153,
"grad_norm": 0.66015625,
"learning_rate": 9.913083520405228e-05,
"loss": 0.0009,
"step": 18600
},
{
"epoch": 0.5206570328830511,
"grad_norm": 0.08349609375,
"learning_rate": 9.884303229148679e-05,
"loss": 0.0002,
"step": 18650
},
{
"epoch": 0.5220528962419869,
"grad_norm": 4.125,
"learning_rate": 9.855522937892131e-05,
"loss": 0.0007,
"step": 18700
},
{
"epoch": 0.5234487596009226,
"grad_norm": 5.53125,
"learning_rate": 9.826742646635585e-05,
"loss": 0.0008,
"step": 18750
},
{
"epoch": 0.5234487596009226,
"eval_loss": 0.00011440851085353643,
"eval_mae": 0.010100271552801132,
"eval_rmse": 0.01069619134068489,
"eval_runtime": 318.3188,
"eval_samples_per_second": 6.283,
"eval_steps_per_second": 6.283,
"step": 18750
},
{
"epoch": 0.5248446229598585,
"grad_norm": 1.0625,
"learning_rate": 9.797962355379037e-05,
"loss": 0.0008,
"step": 18800
},
{
"epoch": 0.5262404863187943,
"grad_norm": 0.8984375,
"learning_rate": 9.76918206412249e-05,
"loss": 0.0007,
"step": 18850
},
{
"epoch": 0.5276363496777301,
"grad_norm": 5.6875,
"learning_rate": 9.740401772865942e-05,
"loss": 0.0009,
"step": 18900
},
{
"epoch": 0.5290322130366658,
"grad_norm": 8.25,
"learning_rate": 9.711621481609395e-05,
"loss": 0.0005,
"step": 18950
},
{
"epoch": 0.5304280763956016,
"grad_norm": 2.21875,
"learning_rate": 9.682841190352847e-05,
"loss": 0.0007,
"step": 19000
},
{
"epoch": 0.5318239397545375,
"grad_norm": 0.73046875,
"learning_rate": 9.654060899096299e-05,
"loss": 0.0003,
"step": 19050
},
{
"epoch": 0.5332198031134732,
"grad_norm": 5.90625,
"learning_rate": 9.625280607839751e-05,
"loss": 0.0007,
"step": 19100
},
{
"epoch": 0.534615666472409,
"grad_norm": 1.5859375,
"learning_rate": 9.596500316583205e-05,
"loss": 0.0006,
"step": 19150
},
{
"epoch": 0.5360115298313448,
"grad_norm": 0.546875,
"learning_rate": 9.567720025326656e-05,
"loss": 0.0004,
"step": 19200
},
{
"epoch": 0.5374073931902806,
"grad_norm": 2.9375,
"learning_rate": 9.538939734070109e-05,
"loss": 0.001,
"step": 19250
},
{
"epoch": 0.5388032565492163,
"grad_norm": 9.6875,
"learning_rate": 9.510159442813562e-05,
"loss": 0.0012,
"step": 19300
},
{
"epoch": 0.5401991199081522,
"grad_norm": 2.328125,
"learning_rate": 9.481379151557014e-05,
"loss": 0.0005,
"step": 19350
},
{
"epoch": 0.541594983267088,
"grad_norm": 0.038330078125,
"learning_rate": 9.452598860300467e-05,
"loss": 0.0002,
"step": 19400
},
{
"epoch": 0.5429908466260238,
"grad_norm": 4.71875,
"learning_rate": 9.42381856904392e-05,
"loss": 0.0011,
"step": 19450
},
{
"epoch": 0.5443867099849595,
"grad_norm": 0.34765625,
"learning_rate": 9.395038277787371e-05,
"loss": 0.0003,
"step": 19500
},
{
"epoch": 0.5457825733438953,
"grad_norm": 6.46875,
"learning_rate": 9.366257986530825e-05,
"loss": 0.0011,
"step": 19550
},
{
"epoch": 0.5471784367028312,
"grad_norm": 0.2451171875,
"learning_rate": 9.337477695274277e-05,
"loss": 0.0018,
"step": 19600
},
{
"epoch": 0.548574300061767,
"grad_norm": 3.90625,
"learning_rate": 9.308697404017729e-05,
"loss": 0.0008,
"step": 19650
},
{
"epoch": 0.5499701634207027,
"grad_norm": 8.75,
"learning_rate": 9.279917112761181e-05,
"loss": 0.0008,
"step": 19700
},
{
"epoch": 0.5513660267796385,
"grad_norm": 9.0,
"learning_rate": 9.251136821504634e-05,
"loss": 0.0019,
"step": 19750
},
{
"epoch": 0.5527618901385744,
"grad_norm": 8.75,
"learning_rate": 9.222356530248087e-05,
"loss": 0.0019,
"step": 19800
},
{
"epoch": 0.5541577534975102,
"grad_norm": 9.1875,
"learning_rate": 9.193576238991539e-05,
"loss": 0.0014,
"step": 19850
},
{
"epoch": 0.5555536168564459,
"grad_norm": 0.345703125,
"learning_rate": 9.164795947734992e-05,
"loss": 0.0018,
"step": 19900
},
{
"epoch": 0.5569494802153817,
"grad_norm": 3.34375,
"learning_rate": 9.136015656478443e-05,
"loss": 0.0005,
"step": 19950
},
{
"epoch": 0.5583453435743175,
"grad_norm": 1.15625,
"learning_rate": 9.107235365221897e-05,
"loss": 0.0007,
"step": 20000
},
{
"epoch": 0.5583453435743175,
"eval_loss": 3.620574716478586e-05,
"eval_mae": 0.0052015818655490875,
"eval_rmse": 0.006017121020704508,
"eval_runtime": 318.2368,
"eval_samples_per_second": 6.285,
"eval_steps_per_second": 6.285,
"step": 20000
},
{
"epoch": 0.5597412069332534,
"grad_norm": 3.375,
"learning_rate": 9.078455073965348e-05,
"loss": 0.0002,
"step": 20050
},
{
"epoch": 0.5611370702921891,
"grad_norm": 3.6875,
"learning_rate": 9.049674782708801e-05,
"loss": 0.0003,
"step": 20100
},
{
"epoch": 0.5625329336511249,
"grad_norm": 3.640625,
"learning_rate": 9.020894491452254e-05,
"loss": 0.0006,
"step": 20150
},
{
"epoch": 0.5639287970100607,
"grad_norm": 2.34375,
"learning_rate": 8.992114200195706e-05,
"loss": 0.0003,
"step": 20200
},
{
"epoch": 0.5653246603689965,
"grad_norm": 2.171875,
"learning_rate": 8.963333908939159e-05,
"loss": 0.0004,
"step": 20250
},
{
"epoch": 0.5667205237279322,
"grad_norm": 2.078125,
"learning_rate": 8.934553617682612e-05,
"loss": 0.0003,
"step": 20300
},
{
"epoch": 0.5681163870868681,
"grad_norm": 2.203125,
"learning_rate": 8.905773326426063e-05,
"loss": 0.0007,
"step": 20350
},
{
"epoch": 0.5695122504458039,
"grad_norm": 11.9375,
"learning_rate": 8.876993035169517e-05,
"loss": 0.0008,
"step": 20400
},
{
"epoch": 0.5709081138047396,
"grad_norm": 0.9296875,
"learning_rate": 8.84821274391297e-05,
"loss": 0.001,
"step": 20450
},
{
"epoch": 0.5723039771636754,
"grad_norm": 7.78125,
"learning_rate": 8.819432452656421e-05,
"loss": 0.0007,
"step": 20500
},
{
"epoch": 0.5736998405226112,
"grad_norm": 2.40625,
"learning_rate": 8.790652161399873e-05,
"loss": 0.0003,
"step": 20550
},
{
"epoch": 0.5750957038815471,
"grad_norm": 0.8359375,
"learning_rate": 8.761871870143326e-05,
"loss": 0.0004,
"step": 20600
},
{
"epoch": 0.5764915672404828,
"grad_norm": 3.40625,
"learning_rate": 8.733091578886779e-05,
"loss": 0.0005,
"step": 20650
},
{
"epoch": 0.5778874305994186,
"grad_norm": 7.53125,
"learning_rate": 8.704311287630231e-05,
"loss": 0.0012,
"step": 20700
},
{
"epoch": 0.5792832939583544,
"grad_norm": 2.953125,
"learning_rate": 8.675530996373684e-05,
"loss": 0.0014,
"step": 20750
},
{
"epoch": 0.5806791573172903,
"grad_norm": 0.2060546875,
"learning_rate": 8.646750705117137e-05,
"loss": 0.0003,
"step": 20800
},
{
"epoch": 0.582075020676226,
"grad_norm": 5.875,
"learning_rate": 8.617970413860589e-05,
"loss": 0.0004,
"step": 20850
},
{
"epoch": 0.5834708840351618,
"grad_norm": 2.703125,
"learning_rate": 8.589190122604042e-05,
"loss": 0.0004,
"step": 20900
},
{
"epoch": 0.5848667473940976,
"grad_norm": 5.34375,
"learning_rate": 8.560409831347493e-05,
"loss": 0.0004,
"step": 20950
},
{
"epoch": 0.5862626107530334,
"grad_norm": 0.6171875,
"learning_rate": 8.531629540090946e-05,
"loss": 0.0002,
"step": 21000
},
{
"epoch": 0.5876584741119691,
"grad_norm": 3.703125,
"learning_rate": 8.5028492488344e-05,
"loss": 0.0003,
"step": 21050
},
{
"epoch": 0.589054337470905,
"grad_norm": 1.890625,
"learning_rate": 8.474068957577851e-05,
"loss": 0.0002,
"step": 21100
},
{
"epoch": 0.5904502008298408,
"grad_norm": 0.6640625,
"learning_rate": 8.445288666321304e-05,
"loss": 0.0003,
"step": 21150
},
{
"epoch": 0.5918460641887766,
"grad_norm": 0.031005859375,
"learning_rate": 8.416508375064755e-05,
"loss": 0.0004,
"step": 21200
},
{
"epoch": 0.5932419275477123,
"grad_norm": 7.21875,
"learning_rate": 8.387728083808209e-05,
"loss": 0.0005,
"step": 21250
},
{
"epoch": 0.5932419275477123,
"eval_loss": 0.00024837159435264766,
"eval_mae": 0.015309196896851063,
"eval_rmse": 0.01575980894267559,
"eval_runtime": 314.8323,
"eval_samples_per_second": 6.353,
"eval_steps_per_second": 6.353,
"step": 21250
},
{
"epoch": 0.5946377909066481,
"grad_norm": 0.150390625,
"learning_rate": 8.358947792551661e-05,
"loss": 0.0006,
"step": 21300
},
{
"epoch": 0.596033654265584,
"grad_norm": 0.287109375,
"learning_rate": 8.330167501295113e-05,
"loss": 0.0002,
"step": 21350
},
{
"epoch": 0.5974295176245198,
"grad_norm": 4.40625,
"learning_rate": 8.301387210038565e-05,
"loss": 0.0013,
"step": 21400
},
{
"epoch": 0.5988253809834555,
"grad_norm": 1.8515625,
"learning_rate": 8.27260691878202e-05,
"loss": 0.0003,
"step": 21450
},
{
"epoch": 0.6002212443423913,
"grad_norm": 7.625,
"learning_rate": 8.24382662752547e-05,
"loss": 0.0003,
"step": 21500
},
{
"epoch": 0.6016171077013271,
"grad_norm": 0.90625,
"learning_rate": 8.215046336268923e-05,
"loss": 0.0005,
"step": 21550
},
{
"epoch": 0.6030129710602629,
"grad_norm": 2.0625,
"learning_rate": 8.186266045012376e-05,
"loss": 0.0006,
"step": 21600
},
{
"epoch": 0.6044088344191987,
"grad_norm": 0.0751953125,
"learning_rate": 8.157485753755829e-05,
"loss": 0.0002,
"step": 21650
},
{
"epoch": 0.6058046977781345,
"grad_norm": 4.6875,
"learning_rate": 8.128705462499281e-05,
"loss": 0.0002,
"step": 21700
},
{
"epoch": 0.6072005611370703,
"grad_norm": 0.392578125,
"learning_rate": 8.099925171242734e-05,
"loss": 0.0005,
"step": 21750
},
{
"epoch": 0.608596424496006,
"grad_norm": 0.609375,
"learning_rate": 8.071144879986185e-05,
"loss": 0.0003,
"step": 21800
},
{
"epoch": 0.6099922878549419,
"grad_norm": 0.71484375,
"learning_rate": 8.042364588729638e-05,
"loss": 0.0002,
"step": 21850
},
{
"epoch": 0.6113881512138777,
"grad_norm": 2.296875,
"learning_rate": 8.013584297473092e-05,
"loss": 0.0002,
"step": 21900
},
{
"epoch": 0.6127840145728135,
"grad_norm": 1.0234375,
"learning_rate": 7.984804006216543e-05,
"loss": 0.0002,
"step": 21950
},
{
"epoch": 0.6141798779317492,
"grad_norm": 3.59375,
"learning_rate": 7.956023714959996e-05,
"loss": 0.0006,
"step": 22000
},
{
"epoch": 0.615575741290685,
"grad_norm": 0.345703125,
"learning_rate": 7.927243423703448e-05,
"loss": 0.0005,
"step": 22050
},
{
"epoch": 0.6169716046496209,
"grad_norm": 0.921875,
"learning_rate": 7.898463132446901e-05,
"loss": 0.0002,
"step": 22100
},
{
"epoch": 0.6183674680085567,
"grad_norm": 3.40625,
"learning_rate": 7.869682841190354e-05,
"loss": 0.0003,
"step": 22150
},
{
"epoch": 0.6197633313674924,
"grad_norm": 2.953125,
"learning_rate": 7.840902549933806e-05,
"loss": 0.0003,
"step": 22200
},
{
"epoch": 0.6211591947264282,
"grad_norm": 2.484375,
"learning_rate": 7.812122258677257e-05,
"loss": 0.0005,
"step": 22250
},
{
"epoch": 0.622555058085364,
"grad_norm": 2.15625,
"learning_rate": 7.783341967420711e-05,
"loss": 0.0002,
"step": 22300
},
{
"epoch": 0.6239509214442999,
"grad_norm": 0.734375,
"learning_rate": 7.754561676164163e-05,
"loss": 0.0002,
"step": 22350
},
{
"epoch": 0.6253467848032356,
"grad_norm": 0.0380859375,
"learning_rate": 7.725781384907615e-05,
"loss": 0.0002,
"step": 22400
},
{
"epoch": 0.6267426481621714,
"grad_norm": 14.0625,
"learning_rate": 7.697001093651068e-05,
"loss": 0.0017,
"step": 22450
},
{
"epoch": 0.6281385115211072,
"grad_norm": 8.625,
"learning_rate": 7.66822080239452e-05,
"loss": 0.0023,
"step": 22500
},
{
"epoch": 0.6281385115211072,
"eval_loss": 6.291436875471845e-05,
"eval_mae": 0.00715098949149251,
"eval_rmse": 0.00793185830116272,
"eval_runtime": 315.2915,
"eval_samples_per_second": 6.343,
"eval_steps_per_second": 6.343,
"step": 22500
},
{
"epoch": 0.629534374880043,
"grad_norm": 9.0,
"learning_rate": 7.639440511137973e-05,
"loss": 0.0021,
"step": 22550
},
{
"epoch": 0.6309302382389788,
"grad_norm": 10.875,
"learning_rate": 7.610660219881426e-05,
"loss": 0.0022,
"step": 22600
},
{
"epoch": 0.6323261015979146,
"grad_norm": 1.9453125,
"learning_rate": 7.581879928624877e-05,
"loss": 0.002,
"step": 22650
},
{
"epoch": 0.6337219649568504,
"grad_norm": 1.15625,
"learning_rate": 7.553099637368331e-05,
"loss": 0.0006,
"step": 22700
},
{
"epoch": 0.6351178283157861,
"grad_norm": 2.40625,
"learning_rate": 7.524319346111784e-05,
"loss": 0.0002,
"step": 22750
},
{
"epoch": 0.6365136916747219,
"grad_norm": 1.203125,
"learning_rate": 7.495539054855235e-05,
"loss": 0.0003,
"step": 22800
},
{
"epoch": 0.6379095550336578,
"grad_norm": 3.46875,
"learning_rate": 7.466758763598688e-05,
"loss": 0.0003,
"step": 22850
},
{
"epoch": 0.6393054183925936,
"grad_norm": 0.484375,
"learning_rate": 7.43797847234214e-05,
"loss": 0.0003,
"step": 22900
},
{
"epoch": 0.6407012817515293,
"grad_norm": 8.0625,
"learning_rate": 7.409198181085593e-05,
"loss": 0.0005,
"step": 22950
},
{
"epoch": 0.6420971451104651,
"grad_norm": 0.158203125,
"learning_rate": 7.380417889829046e-05,
"loss": 0.0003,
"step": 23000
},
{
"epoch": 0.6434930084694009,
"grad_norm": 0.1728515625,
"learning_rate": 7.351637598572498e-05,
"loss": 0.0002,
"step": 23050
},
{
"epoch": 0.6448888718283368,
"grad_norm": 0.4765625,
"learning_rate": 7.32285730731595e-05,
"loss": 0.0002,
"step": 23100
},
{
"epoch": 0.6462847351872725,
"grad_norm": 4.28125,
"learning_rate": 7.294077016059403e-05,
"loss": 0.0002,
"step": 23150
},
{
"epoch": 0.6476805985462083,
"grad_norm": 0.0625,
"learning_rate": 7.265296724802856e-05,
"loss": 0.0009,
"step": 23200
},
{
"epoch": 0.6490764619051441,
"grad_norm": 0.361328125,
"learning_rate": 7.236516433546307e-05,
"loss": 0.0002,
"step": 23250
},
{
"epoch": 0.6504723252640799,
"grad_norm": 0.21875,
"learning_rate": 7.20773614228976e-05,
"loss": 0.0001,
"step": 23300
},
{
"epoch": 0.6518681886230157,
"grad_norm": 4.5,
"learning_rate": 7.178955851033214e-05,
"loss": 0.0003,
"step": 23350
},
{
"epoch": 0.6532640519819515,
"grad_norm": 2.90625,
"learning_rate": 7.150175559776665e-05,
"loss": 0.0003,
"step": 23400
},
{
"epoch": 0.6546599153408873,
"grad_norm": 0.1669921875,
"learning_rate": 7.121395268520118e-05,
"loss": 0.0002,
"step": 23450
},
{
"epoch": 0.6560557786998231,
"grad_norm": 3.78125,
"learning_rate": 7.092614977263569e-05,
"loss": 0.0002,
"step": 23500
},
{
"epoch": 0.6574516420587588,
"grad_norm": 3.234375,
"learning_rate": 7.063834686007023e-05,
"loss": 0.0003,
"step": 23550
},
{
"epoch": 0.6588475054176947,
"grad_norm": 2.6875,
"learning_rate": 7.035054394750476e-05,
"loss": 0.0002,
"step": 23600
},
{
"epoch": 0.6602433687766305,
"grad_norm": 0.75,
"learning_rate": 7.006274103493927e-05,
"loss": 0.0003,
"step": 23650
},
{
"epoch": 0.6616392321355663,
"grad_norm": 0.11865234375,
"learning_rate": 6.97749381223738e-05,
"loss": 0.0002,
"step": 23700
},
{
"epoch": 0.663035095494502,
"grad_norm": 0.53515625,
"learning_rate": 6.948713520980832e-05,
"loss": 0.0002,
"step": 23750
},
{
"epoch": 0.663035095494502,
"eval_loss": 1.540686389489565e-05,
"eval_mae": 0.0031748104374855757,
"eval_rmse": 0.0039251577109098434,
"eval_runtime": 315.0516,
"eval_samples_per_second": 6.348,
"eval_steps_per_second": 6.348,
"step": 23750
},
{
"epoch": 0.6644309588534378,
"grad_norm": 2.734375,
"learning_rate": 6.919933229724285e-05,
"loss": 0.0002,
"step": 23800
},
{
"epoch": 0.6658268222123737,
"grad_norm": 2.3125,
"learning_rate": 6.891152938467738e-05,
"loss": 0.0001,
"step": 23850
},
{
"epoch": 0.6672226855713094,
"grad_norm": 0.34765625,
"learning_rate": 6.86237264721119e-05,
"loss": 0.0001,
"step": 23900
},
{
"epoch": 0.6686185489302452,
"grad_norm": 0.41796875,
"learning_rate": 6.833592355954643e-05,
"loss": 0.0002,
"step": 23950
},
{
"epoch": 0.670014412289181,
"grad_norm": 4.78125,
"learning_rate": 6.804812064698095e-05,
"loss": 0.0002,
"step": 24000
},
{
"epoch": 0.6714102756481168,
"grad_norm": 2.96875,
"learning_rate": 6.776031773441548e-05,
"loss": 0.0003,
"step": 24050
},
{
"epoch": 0.6728061390070526,
"grad_norm": 0.7265625,
"learning_rate": 6.747251482185e-05,
"loss": 0.0002,
"step": 24100
},
{
"epoch": 0.6742020023659884,
"grad_norm": 1.1328125,
"learning_rate": 6.718471190928452e-05,
"loss": 0.0002,
"step": 24150
},
{
"epoch": 0.6755978657249242,
"grad_norm": 0.84765625,
"learning_rate": 6.689690899671906e-05,
"loss": 0.0003,
"step": 24200
},
{
"epoch": 0.67699372908386,
"grad_norm": 0.2421875,
"learning_rate": 6.660910608415357e-05,
"loss": 0.0002,
"step": 24250
},
{
"epoch": 0.6783895924427957,
"grad_norm": 4.40625,
"learning_rate": 6.63213031715881e-05,
"loss": 0.0003,
"step": 24300
},
{
"epoch": 0.6797854558017316,
"grad_norm": 0.30078125,
"learning_rate": 6.603350025902263e-05,
"loss": 0.0002,
"step": 24350
},
{
"epoch": 0.6811813191606674,
"grad_norm": 2.78125,
"learning_rate": 6.574569734645715e-05,
"loss": 0.0002,
"step": 24400
},
{
"epoch": 0.6825771825196032,
"grad_norm": 0.984375,
"learning_rate": 6.545789443389168e-05,
"loss": 0.0002,
"step": 24450
},
{
"epoch": 0.6839730458785389,
"grad_norm": 3.3125,
"learning_rate": 6.51700915213262e-05,
"loss": 0.0006,
"step": 24500
},
{
"epoch": 0.6853689092374747,
"grad_norm": 1.5625,
"learning_rate": 6.488228860876072e-05,
"loss": 0.0001,
"step": 24550
},
{
"epoch": 0.6867647725964106,
"grad_norm": 2.53125,
"learning_rate": 6.459448569619526e-05,
"loss": 0.0002,
"step": 24600
},
{
"epoch": 0.6881606359553464,
"grad_norm": 2.65625,
"learning_rate": 6.430668278362977e-05,
"loss": 0.0002,
"step": 24650
},
{
"epoch": 0.6895564993142821,
"grad_norm": 0.6328125,
"learning_rate": 6.40188798710643e-05,
"loss": 0.0002,
"step": 24700
},
{
"epoch": 0.6909523626732179,
"grad_norm": 3.015625,
"learning_rate": 6.373107695849882e-05,
"loss": 0.0004,
"step": 24750
},
{
"epoch": 0.6923482260321537,
"grad_norm": 3.390625,
"learning_rate": 6.344327404593335e-05,
"loss": 0.0003,
"step": 24800
},
{
"epoch": 0.6937440893910896,
"grad_norm": 0.55078125,
"learning_rate": 6.315547113336787e-05,
"loss": 0.0003,
"step": 24850
},
{
"epoch": 0.6951399527500253,
"grad_norm": 4.65625,
"learning_rate": 6.28676682208024e-05,
"loss": 0.0001,
"step": 24900
},
{
"epoch": 0.6965358161089611,
"grad_norm": 3.4375,
"learning_rate": 6.257986530823691e-05,
"loss": 0.0003,
"step": 24950
},
{
"epoch": 0.6979316794678969,
"grad_norm": 1.1953125,
"learning_rate": 6.229206239567144e-05,
"loss": 0.0003,
"step": 25000
},
{
"epoch": 0.6979316794678969,
"eval_loss": 9.060095180757344e-06,
"eval_mae": 0.002397725125774741,
"eval_rmse": 0.003009999170899391,
"eval_runtime": 314.8136,
"eval_samples_per_second": 6.353,
"eval_steps_per_second": 6.353,
"step": 25000
},
{
"epoch": 0.6993275428268326,
"grad_norm": 1.2421875,
"learning_rate": 6.200425948310598e-05,
"loss": 0.0001,
"step": 25050
},
{
"epoch": 0.7007234061857684,
"grad_norm": 3.671875,
"learning_rate": 6.171645657054049e-05,
"loss": 0.0002,
"step": 25100
},
{
"epoch": 0.7021192695447043,
"grad_norm": 0.8046875,
"learning_rate": 6.142865365797502e-05,
"loss": 0.0002,
"step": 25150
},
{
"epoch": 0.7035151329036401,
"grad_norm": 0.2890625,
"learning_rate": 6.114085074540955e-05,
"loss": 0.0001,
"step": 25200
},
{
"epoch": 0.7049109962625758,
"grad_norm": 0.439453125,
"learning_rate": 6.0853047832844065e-05,
"loss": 0.0001,
"step": 25250
},
{
"epoch": 0.7063068596215116,
"grad_norm": 1.2265625,
"learning_rate": 6.05652449202786e-05,
"loss": 0.0001,
"step": 25300
},
{
"epoch": 0.7077027229804475,
"grad_norm": 3.21875,
"learning_rate": 6.0277442007713124e-05,
"loss": 0.0002,
"step": 25350
},
{
"epoch": 0.7090985863393833,
"grad_norm": 0.1982421875,
"learning_rate": 5.9989639095147644e-05,
"loss": 0.0003,
"step": 25400
},
{
"epoch": 0.710494449698319,
"grad_norm": 2.625,
"learning_rate": 5.970183618258217e-05,
"loss": 0.0003,
"step": 25450
},
{
"epoch": 0.7118903130572548,
"grad_norm": 0.193359375,
"learning_rate": 5.94140332700167e-05,
"loss": 0.0002,
"step": 25500
},
{
"epoch": 0.7132861764161906,
"grad_norm": 3.8125,
"learning_rate": 5.9126230357451216e-05,
"loss": 0.0002,
"step": 25550
},
{
"epoch": 0.7146820397751265,
"grad_norm": 1.0390625,
"learning_rate": 5.883842744488575e-05,
"loss": 0.0003,
"step": 25600
},
{
"epoch": 0.7160779031340622,
"grad_norm": 4.28125,
"learning_rate": 5.8550624532320275e-05,
"loss": 0.0004,
"step": 25650
},
{
"epoch": 0.717473766492998,
"grad_norm": 1.1015625,
"learning_rate": 5.8262821619754795e-05,
"loss": 0.0002,
"step": 25700
},
{
"epoch": 0.7188696298519338,
"grad_norm": 1.5390625,
"learning_rate": 5.797501870718932e-05,
"loss": 0.0003,
"step": 25750
},
{
"epoch": 0.7202654932108696,
"grad_norm": 0.9765625,
"learning_rate": 5.768721579462384e-05,
"loss": 0.0002,
"step": 25800
},
{
"epoch": 0.7216613565698053,
"grad_norm": 1.9140625,
"learning_rate": 5.739941288205837e-05,
"loss": 0.0001,
"step": 25850
},
{
"epoch": 0.7230572199287412,
"grad_norm": 1.5234375,
"learning_rate": 5.711160996949289e-05,
"loss": 0.0002,
"step": 25900
},
{
"epoch": 0.724453083287677,
"grad_norm": 2.234375,
"learning_rate": 5.682380705692741e-05,
"loss": 0.0002,
"step": 25950
},
{
"epoch": 0.7258489466466128,
"grad_norm": 0.392578125,
"learning_rate": 5.6536004144361946e-05,
"loss": 0.0002,
"step": 26000
},
{
"epoch": 0.7272448100055485,
"grad_norm": 1.546875,
"learning_rate": 5.624820123179647e-05,
"loss": 0.0002,
"step": 26050
},
{
"epoch": 0.7286406733644843,
"grad_norm": 1.2265625,
"learning_rate": 5.596039831923099e-05,
"loss": 0.0001,
"step": 26100
},
{
"epoch": 0.7300365367234202,
"grad_norm": 0.8125,
"learning_rate": 5.567259540666552e-05,
"loss": 0.0002,
"step": 26150
},
{
"epoch": 0.731432400082356,
"grad_norm": 0.265625,
"learning_rate": 5.5384792494100044e-05,
"loss": 0.0001,
"step": 26200
},
{
"epoch": 0.7328282634412917,
"grad_norm": 0.050537109375,
"learning_rate": 5.5096989581534564e-05,
"loss": 0.0001,
"step": 26250
},
{
"epoch": 0.7328282634412917,
"eval_loss": 9.440889698453248e-06,
"eval_mae": 0.0024416493251919746,
"eval_rmse": 0.003072603140026331,
"eval_runtime": 310.9978,
"eval_samples_per_second": 6.431,
"eval_steps_per_second": 6.431,
"step": 26250
},
{
"epoch": 0.7342241268002275,
"grad_norm": 2.734375,
"learning_rate": 5.480918666896909e-05,
"loss": 0.0002,
"step": 26300
},
{
"epoch": 0.7356199901591634,
"grad_norm": 1.5625,
"learning_rate": 5.452138375640362e-05,
"loss": 0.0002,
"step": 26350
},
{
"epoch": 0.7370158535180991,
"grad_norm": 3.15625,
"learning_rate": 5.4233580843838136e-05,
"loss": 0.0001,
"step": 26400
},
{
"epoch": 0.7384117168770349,
"grad_norm": 0.07080078125,
"learning_rate": 5.394577793127267e-05,
"loss": 0.0002,
"step": 26450
},
{
"epoch": 0.7398075802359707,
"grad_norm": 0.035400390625,
"learning_rate": 5.3657975018707195e-05,
"loss": 0.0001,
"step": 26500
},
{
"epoch": 0.7412034435949065,
"grad_norm": 0.0390625,
"learning_rate": 5.3370172106141715e-05,
"loss": 0.0001,
"step": 26550
},
{
"epoch": 0.7425993069538422,
"grad_norm": 2.171875,
"learning_rate": 5.308236919357624e-05,
"loss": 0.0001,
"step": 26600
},
{
"epoch": 0.7439951703127781,
"grad_norm": 0.796875,
"learning_rate": 5.2794566281010774e-05,
"loss": 0.0002,
"step": 26650
},
{
"epoch": 0.7453910336717139,
"grad_norm": 2.65625,
"learning_rate": 5.250676336844529e-05,
"loss": 0.0002,
"step": 26700
},
{
"epoch": 0.7467868970306497,
"grad_norm": 0.6171875,
"learning_rate": 5.221896045587982e-05,
"loss": 0.0002,
"step": 26750
},
{
"epoch": 0.7481827603895854,
"grad_norm": 1.078125,
"learning_rate": 5.1931157543314347e-05,
"loss": 0.0001,
"step": 26800
},
{
"epoch": 0.7495786237485212,
"grad_norm": 1.6875,
"learning_rate": 5.1643354630748866e-05,
"loss": 0.0001,
"step": 26850
},
{
"epoch": 0.7509744871074571,
"grad_norm": 2.640625,
"learning_rate": 5.135555171818339e-05,
"loss": 0.0001,
"step": 26900
},
{
"epoch": 0.7523703504663929,
"grad_norm": 0.028564453125,
"learning_rate": 5.106774880561791e-05,
"loss": 0.0002,
"step": 26950
},
{
"epoch": 0.7537662138253286,
"grad_norm": 2.640625,
"learning_rate": 5.077994589305244e-05,
"loss": 0.0001,
"step": 27000
},
{
"epoch": 0.7551620771842644,
"grad_norm": 0.2412109375,
"learning_rate": 5.0492142980486965e-05,
"loss": 0.0001,
"step": 27050
},
{
"epoch": 0.7565579405432002,
"grad_norm": 0.0478515625,
"learning_rate": 5.0204340067921484e-05,
"loss": 0.0002,
"step": 27100
},
{
"epoch": 0.7579538039021361,
"grad_norm": 0.25390625,
"learning_rate": 4.991653715535601e-05,
"loss": 0.0001,
"step": 27150
},
{
"epoch": 0.7593496672610718,
"grad_norm": 1.703125,
"learning_rate": 4.9628734242790544e-05,
"loss": 0.0002,
"step": 27200
},
{
"epoch": 0.7607455306200076,
"grad_norm": 1.7578125,
"learning_rate": 4.934093133022506e-05,
"loss": 0.0002,
"step": 27250
},
{
"epoch": 0.7621413939789434,
"grad_norm": 0.79296875,
"learning_rate": 4.905312841765959e-05,
"loss": 0.0002,
"step": 27300
},
{
"epoch": 0.7635372573378792,
"grad_norm": 0.369140625,
"learning_rate": 4.876532550509411e-05,
"loss": 0.0001,
"step": 27350
},
{
"epoch": 0.764933120696815,
"grad_norm": 0.6875,
"learning_rate": 4.847752259252864e-05,
"loss": 0.0001,
"step": 27400
},
{
"epoch": 0.7663289840557508,
"grad_norm": 0.53125,
"learning_rate": 4.818971967996316e-05,
"loss": 0.0001,
"step": 27450
},
{
"epoch": 0.7677248474146866,
"grad_norm": 0.240234375,
"learning_rate": 4.790191676739769e-05,
"loss": 0.0001,
"step": 27500
},
{
"epoch": 0.7677248474146866,
"eval_loss": 1.0368624316470232e-05,
"eval_mae": 0.0025949301198124886,
"eval_rmse": 0.0032200347632169724,
"eval_runtime": 317.1945,
"eval_samples_per_second": 6.305,
"eval_steps_per_second": 6.305,
"step": 27500
},
{
"epoch": 0.7691207107736223,
"grad_norm": 0.5234375,
"learning_rate": 4.7614113854832214e-05,
"loss": 0.0001,
"step": 27550
},
{
"epoch": 0.7705165741325581,
"grad_norm": 1.4765625,
"learning_rate": 4.732631094226674e-05,
"loss": 0.0002,
"step": 27600
},
{
"epoch": 0.771912437491494,
"grad_norm": 0.455078125,
"learning_rate": 4.703850802970126e-05,
"loss": 0.0002,
"step": 27650
},
{
"epoch": 0.7733083008504298,
"grad_norm": 0.5390625,
"learning_rate": 4.675070511713579e-05,
"loss": 0.0001,
"step": 27700
},
{
"epoch": 0.7747041642093655,
"grad_norm": 0.96484375,
"learning_rate": 4.646290220457031e-05,
"loss": 0.0002,
"step": 27750
},
{
"epoch": 0.7761000275683013,
"grad_norm": 0.73046875,
"learning_rate": 4.617509929200484e-05,
"loss": 0.0001,
"step": 27800
},
{
"epoch": 0.7774958909272371,
"grad_norm": 0.1923828125,
"learning_rate": 4.588729637943936e-05,
"loss": 0.0001,
"step": 27850
},
{
"epoch": 0.778891754286173,
"grad_norm": 1.25,
"learning_rate": 4.559949346687389e-05,
"loss": 0.0001,
"step": 27900
},
{
"epoch": 0.7802876176451087,
"grad_norm": 3.453125,
"learning_rate": 4.531169055430841e-05,
"loss": 0.0001,
"step": 27950
},
{
"epoch": 0.7816834810040445,
"grad_norm": 2.296875,
"learning_rate": 4.502388764174294e-05,
"loss": 0.0002,
"step": 28000
},
{
"epoch": 0.7830793443629803,
"grad_norm": 3.109375,
"learning_rate": 4.4736084729177464e-05,
"loss": 0.0001,
"step": 28050
},
{
"epoch": 0.7844752077219161,
"grad_norm": 1.640625,
"learning_rate": 4.444828181661198e-05,
"loss": 0.0001,
"step": 28100
},
{
"epoch": 0.7858710710808519,
"grad_norm": 0.2001953125,
"learning_rate": 4.416047890404651e-05,
"loss": 0.0002,
"step": 28150
},
{
"epoch": 0.7872669344397877,
"grad_norm": 2.453125,
"learning_rate": 4.3872675991481036e-05,
"loss": 0.0002,
"step": 28200
},
{
"epoch": 0.7886627977987235,
"grad_norm": 0.69921875,
"learning_rate": 4.358487307891556e-05,
"loss": 0.0001,
"step": 28250
},
{
"epoch": 0.7900586611576593,
"grad_norm": 0.734375,
"learning_rate": 4.329707016635008e-05,
"loss": 0.0001,
"step": 28300
},
{
"epoch": 0.791454524516595,
"grad_norm": 0.02294921875,
"learning_rate": 4.3009267253784615e-05,
"loss": 0.0001,
"step": 28350
},
{
"epoch": 0.7928503878755309,
"grad_norm": 0.84765625,
"learning_rate": 4.2721464341219134e-05,
"loss": 0.0001,
"step": 28400
},
{
"epoch": 0.7942462512344667,
"grad_norm": 1.3046875,
"learning_rate": 4.243366142865366e-05,
"loss": 0.0001,
"step": 28450
},
{
"epoch": 0.7956421145934025,
"grad_norm": 0.625,
"learning_rate": 4.214585851608818e-05,
"loss": 0.0001,
"step": 28500
},
{
"epoch": 0.7970379779523382,
"grad_norm": 0.19140625,
"learning_rate": 4.185805560352271e-05,
"loss": 0.0001,
"step": 28550
},
{
"epoch": 0.798433841311274,
"grad_norm": 0.66796875,
"learning_rate": 4.157025269095723e-05,
"loss": 0.0001,
"step": 28600
},
{
"epoch": 0.7998297046702099,
"grad_norm": 0.390625,
"learning_rate": 4.128244977839176e-05,
"loss": 0.0002,
"step": 28650
},
{
"epoch": 0.8012255680291456,
"grad_norm": 2.53125,
"learning_rate": 4.0994646865826285e-05,
"loss": 0.0001,
"step": 28700
},
{
"epoch": 0.8026214313880814,
"grad_norm": 0.54296875,
"learning_rate": 4.070684395326081e-05,
"loss": 0.0001,
"step": 28750
},
{
"epoch": 0.8026214313880814,
"eval_loss": 8.319076187035535e-06,
"eval_mae": 0.002299492945894599,
"eval_rmse": 0.0028842808678746223,
"eval_runtime": 319.4261,
"eval_samples_per_second": 6.261,
"eval_steps_per_second": 6.261,
"step": 28750
},
{
"epoch": 0.8040172947470172,
"grad_norm": 1.5703125,
"learning_rate": 4.041904104069533e-05,
"loss": 0.0001,
"step": 28800
},
{
"epoch": 0.805413158105953,
"grad_norm": 2.203125,
"learning_rate": 4.0131238128129864e-05,
"loss": 0.0001,
"step": 28850
},
{
"epoch": 0.8068090214648888,
"grad_norm": 3.296875,
"learning_rate": 3.9843435215564384e-05,
"loss": 0.0001,
"step": 28900
},
{
"epoch": 0.8082048848238246,
"grad_norm": 0.671875,
"learning_rate": 3.955563230299891e-05,
"loss": 0.0001,
"step": 28950
},
{
"epoch": 0.8096007481827604,
"grad_norm": 1.453125,
"learning_rate": 3.926782939043343e-05,
"loss": 0.0001,
"step": 29000
},
{
"epoch": 0.8109966115416962,
"grad_norm": 1.0859375,
"learning_rate": 3.8980026477867956e-05,
"loss": 0.0001,
"step": 29050
},
{
"epoch": 0.8123924749006319,
"grad_norm": 0.89453125,
"learning_rate": 3.869222356530248e-05,
"loss": 0.0001,
"step": 29100
},
{
"epoch": 0.8137883382595678,
"grad_norm": 1.453125,
"learning_rate": 3.840442065273701e-05,
"loss": 0.0001,
"step": 29150
},
{
"epoch": 0.8151842016185036,
"grad_norm": 0.51953125,
"learning_rate": 3.8116617740171535e-05,
"loss": 0.0002,
"step": 29200
},
{
"epoch": 0.8165800649774394,
"grad_norm": 0.85546875,
"learning_rate": 3.7828814827606055e-05,
"loss": 0.0001,
"step": 29250
},
{
"epoch": 0.8179759283363751,
"grad_norm": 0.33203125,
"learning_rate": 3.754101191504058e-05,
"loss": 0.0001,
"step": 29300
},
{
"epoch": 0.8193717916953109,
"grad_norm": 0.37109375,
"learning_rate": 3.725320900247511e-05,
"loss": 0.0001,
"step": 29350
},
{
"epoch": 0.8207676550542468,
"grad_norm": 0.98828125,
"learning_rate": 3.6965406089909633e-05,
"loss": 0.0001,
"step": 29400
},
{
"epoch": 0.8221635184131826,
"grad_norm": 0.232421875,
"learning_rate": 3.667760317734415e-05,
"loss": 0.0001,
"step": 29450
},
{
"epoch": 0.8235593817721183,
"grad_norm": 0.89453125,
"learning_rate": 3.6389800264778686e-05,
"loss": 0.0001,
"step": 29500
},
{
"epoch": 0.8249552451310541,
"grad_norm": 1.0703125,
"learning_rate": 3.6101997352213206e-05,
"loss": 0.0001,
"step": 29550
},
{
"epoch": 0.8263511084899899,
"grad_norm": 0.47265625,
"learning_rate": 3.581419443964773e-05,
"loss": 0.0001,
"step": 29600
},
{
"epoch": 0.8277469718489258,
"grad_norm": 0.70703125,
"learning_rate": 3.552639152708225e-05,
"loss": 0.0001,
"step": 29650
},
{
"epoch": 0.8291428352078615,
"grad_norm": 0.36328125,
"learning_rate": 3.5238588614516785e-05,
"loss": 0.0001,
"step": 29700
},
{
"epoch": 0.8305386985667973,
"grad_norm": 0.84375,
"learning_rate": 3.4950785701951304e-05,
"loss": 0.0001,
"step": 29750
},
{
"epoch": 0.8319345619257331,
"grad_norm": 2.5625,
"learning_rate": 3.466298278938583e-05,
"loss": 0.0001,
"step": 29800
},
{
"epoch": 0.8333304252846688,
"grad_norm": 0.029052734375,
"learning_rate": 3.437517987682036e-05,
"loss": 0.0001,
"step": 29850
},
{
"epoch": 0.8347262886436047,
"grad_norm": 0.84765625,
"learning_rate": 3.408737696425488e-05,
"loss": 0.0001,
"step": 29900
},
{
"epoch": 0.8361221520025405,
"grad_norm": 0.5546875,
"learning_rate": 3.37995740516894e-05,
"loss": 0.0001,
"step": 29950
},
{
"epoch": 0.8375180153614763,
"grad_norm": 0.302734375,
"learning_rate": 3.351177113912393e-05,
"loss": 0.0001,
"step": 30000
},
{
"epoch": 0.8375180153614763,
"eval_loss": 7.90274134487845e-06,
"eval_mae": 0.00223693554289639,
"eval_rmse": 0.002811181591823697,
"eval_runtime": 359.2158,
"eval_samples_per_second": 5.568,
"eval_steps_per_second": 5.568,
"step": 30000
},
{
"epoch": 0.838913878720412,
"grad_norm": 0.451171875,
"learning_rate": 3.3223968226558455e-05,
"loss": 0.0001,
"step": 30050
},
{
"epoch": 0.8403097420793478,
"grad_norm": 0.9921875,
"learning_rate": 3.293616531399298e-05,
"loss": 0.0001,
"step": 30100
},
{
"epoch": 0.8417056054382837,
"grad_norm": 0.0576171875,
"learning_rate": 3.26483624014275e-05,
"loss": 0.0001,
"step": 30150
},
{
"epoch": 0.8431014687972195,
"grad_norm": 1.515625,
"learning_rate": 3.236055948886203e-05,
"loss": 0.0001,
"step": 30200
},
{
"epoch": 0.8444973321561552,
"grad_norm": 0.6953125,
"learning_rate": 3.2072756576296554e-05,
"loss": 0.0001,
"step": 30250
},
{
"epoch": 0.845893195515091,
"grad_norm": 0.23046875,
"learning_rate": 3.178495366373107e-05,
"loss": 0.0001,
"step": 30300
},
{
"epoch": 0.8472890588740268,
"grad_norm": 0.55859375,
"learning_rate": 3.1497150751165606e-05,
"loss": 0.0001,
"step": 30350
},
{
"epoch": 0.8486849222329627,
"grad_norm": 0.11328125,
"learning_rate": 3.1209347838600126e-05,
"loss": 0.0001,
"step": 30400
},
{
"epoch": 0.8500807855918984,
"grad_norm": 1.0234375,
"learning_rate": 3.092154492603465e-05,
"loss": 0.0001,
"step": 30450
},
{
"epoch": 0.8514766489508342,
"grad_norm": 0.2099609375,
"learning_rate": 3.063374201346918e-05,
"loss": 0.0001,
"step": 30500
},
{
"epoch": 0.85287251230977,
"grad_norm": 0.078125,
"learning_rate": 3.03459391009037e-05,
"loss": 0.0001,
"step": 30550
},
{
"epoch": 0.8542683756687058,
"grad_norm": 1.125,
"learning_rate": 3.0058136188338228e-05,
"loss": 0.0001,
"step": 30600
},
{
"epoch": 0.8556642390276415,
"grad_norm": 0.70703125,
"learning_rate": 2.9770333275772754e-05,
"loss": 0.0001,
"step": 30650
},
{
"epoch": 0.8570601023865774,
"grad_norm": 1.5390625,
"learning_rate": 2.9482530363207277e-05,
"loss": 0.0001,
"step": 30700
},
{
"epoch": 0.8584559657455132,
"grad_norm": 0.1689453125,
"learning_rate": 2.91947274506418e-05,
"loss": 0.0001,
"step": 30750
},
{
"epoch": 0.859851829104449,
"grad_norm": 0.51171875,
"learning_rate": 2.8906924538076323e-05,
"loss": 0.0001,
"step": 30800
},
{
"epoch": 0.8612476924633847,
"grad_norm": 0.28515625,
"learning_rate": 2.8619121625510852e-05,
"loss": 0.0001,
"step": 30850
},
{
"epoch": 0.8626435558223206,
"grad_norm": 0.8671875,
"learning_rate": 2.8331318712945375e-05,
"loss": 0.0001,
"step": 30900
},
{
"epoch": 0.8640394191812564,
"grad_norm": 1.125,
"learning_rate": 2.80435158003799e-05,
"loss": 0.0001,
"step": 30950
},
{
"epoch": 0.8654352825401921,
"grad_norm": 2.421875,
"learning_rate": 2.7755712887814428e-05,
"loss": 0.0001,
"step": 31000
},
{
"epoch": 0.8668311458991279,
"grad_norm": 0.78515625,
"learning_rate": 2.746790997524895e-05,
"loss": 0.0001,
"step": 31050
},
{
"epoch": 0.8682270092580637,
"grad_norm": 0.083984375,
"learning_rate": 2.7180107062683474e-05,
"loss": 0.0001,
"step": 31100
},
{
"epoch": 0.8696228726169996,
"grad_norm": 1.890625,
"learning_rate": 2.6892304150118004e-05,
"loss": 0.0001,
"step": 31150
},
{
"epoch": 0.8710187359759353,
"grad_norm": 0.466796875,
"learning_rate": 2.6604501237552526e-05,
"loss": 0.0001,
"step": 31200
},
{
"epoch": 0.8724145993348711,
"grad_norm": 1.1015625,
"learning_rate": 2.631669832498705e-05,
"loss": 0.0001,
"step": 31250
},
{
"epoch": 0.8724145993348711,
"eval_loss": 7.70491715229582e-06,
"eval_mae": 0.002213448518887162,
"eval_rmse": 0.002775773173198104,
"eval_runtime": 314.7817,
"eval_samples_per_second": 6.354,
"eval_steps_per_second": 6.354,
"step": 31250
},
{
"epoch": 0.8738104626938069,
"grad_norm": 1.734375,
"learning_rate": 2.6028895412421572e-05,
"loss": 0.0001,
"step": 31300
},
{
"epoch": 0.8752063260527427,
"grad_norm": 0.5625,
"learning_rate": 2.5741092499856102e-05,
"loss": 0.0001,
"step": 31350
},
{
"epoch": 0.8766021894116784,
"grad_norm": 2.203125,
"learning_rate": 2.5453289587290625e-05,
"loss": 0.0001,
"step": 31400
},
{
"epoch": 0.8779980527706143,
"grad_norm": 2.921875,
"learning_rate": 2.5165486674725148e-05,
"loss": 0.0001,
"step": 31450
},
{
"epoch": 0.8793939161295501,
"grad_norm": 0.1865234375,
"learning_rate": 2.4877683762159674e-05,
"loss": 0.0001,
"step": 31500
},
{
"epoch": 0.8807897794884859,
"grad_norm": 1.171875,
"learning_rate": 2.45898808495942e-05,
"loss": 0.0001,
"step": 31550
},
{
"epoch": 0.8821856428474216,
"grad_norm": 0.8515625,
"learning_rate": 2.4302077937028723e-05,
"loss": 0.0001,
"step": 31600
},
{
"epoch": 0.8835815062063574,
"grad_norm": 0.625,
"learning_rate": 2.401427502446325e-05,
"loss": 0.0001,
"step": 31650
},
{
"epoch": 0.8849773695652933,
"grad_norm": 0.2060546875,
"learning_rate": 2.3726472111897773e-05,
"loss": 0.0001,
"step": 31700
},
{
"epoch": 0.8863732329242291,
"grad_norm": 1.1484375,
"learning_rate": 2.3438669199332296e-05,
"loss": 0.0001,
"step": 31750
},
{
"epoch": 0.8877690962831648,
"grad_norm": 0.3359375,
"learning_rate": 2.3150866286766822e-05,
"loss": 0.0001,
"step": 31800
},
{
"epoch": 0.8891649596421006,
"grad_norm": 1.2890625,
"learning_rate": 2.2863063374201348e-05,
"loss": 0.0001,
"step": 31850
},
{
"epoch": 0.8905608230010365,
"grad_norm": 0.51953125,
"learning_rate": 2.257526046163587e-05,
"loss": 0.0001,
"step": 31900
},
{
"epoch": 0.8919566863599723,
"grad_norm": 0.455078125,
"learning_rate": 2.2287457549070397e-05,
"loss": 0.0001,
"step": 31950
},
{
"epoch": 0.893352549718908,
"grad_norm": 0.3046875,
"learning_rate": 2.1999654636504924e-05,
"loss": 0.0001,
"step": 32000
},
{
"epoch": 0.8947484130778438,
"grad_norm": 0.146484375,
"learning_rate": 2.1711851723939447e-05,
"loss": 0.0001,
"step": 32050
},
{
"epoch": 0.8961442764367796,
"grad_norm": 0.166015625,
"learning_rate": 2.1424048811373973e-05,
"loss": 0.0001,
"step": 32100
},
{
"epoch": 0.8975401397957155,
"grad_norm": 0.58984375,
"learning_rate": 2.1136245898808496e-05,
"loss": 0.0001,
"step": 32150
},
{
"epoch": 0.8989360031546512,
"grad_norm": 0.326171875,
"learning_rate": 2.0848442986243022e-05,
"loss": 0.0001,
"step": 32200
},
{
"epoch": 0.900331866513587,
"grad_norm": 0.50390625,
"learning_rate": 2.056064007367755e-05,
"loss": 0.0001,
"step": 32250
},
{
"epoch": 0.9017277298725228,
"grad_norm": 0.33203125,
"learning_rate": 2.027283716111207e-05,
"loss": 0.0001,
"step": 32300
},
{
"epoch": 0.9031235932314585,
"grad_norm": 0.9453125,
"learning_rate": 1.9985034248546598e-05,
"loss": 0.0001,
"step": 32350
},
{
"epoch": 0.9045194565903943,
"grad_norm": 1.0546875,
"learning_rate": 1.969723133598112e-05,
"loss": 0.0001,
"step": 32400
},
{
"epoch": 0.9059153199493302,
"grad_norm": 2.03125,
"learning_rate": 1.9409428423415647e-05,
"loss": 0.0001,
"step": 32450
},
{
"epoch": 0.907311183308266,
"grad_norm": 0.1025390625,
"learning_rate": 1.9121625510850173e-05,
"loss": 0.0001,
"step": 32500
},
{
"epoch": 0.907311183308266,
"eval_loss": 8.019745109777432e-06,
"eval_mae": 0.0022684482391923666,
"eval_rmse": 0.002831915393471718,
"eval_runtime": 314.5488,
"eval_samples_per_second": 6.358,
"eval_steps_per_second": 6.358,
"step": 32500
},
{
"epoch": 0.9087070466672017,
"grad_norm": 0.248046875,
"learning_rate": 1.8833822598284696e-05,
"loss": 0.0001,
"step": 32550
},
{
"epoch": 0.9101029100261375,
"grad_norm": 0.28125,
"learning_rate": 1.8546019685719223e-05,
"loss": 0.0001,
"step": 32600
},
{
"epoch": 0.9114987733850733,
"grad_norm": 0.4296875,
"learning_rate": 1.8258216773153745e-05,
"loss": 0.0001,
"step": 32650
},
{
"epoch": 0.9128946367440092,
"grad_norm": 1.5234375,
"learning_rate": 1.797041386058827e-05,
"loss": 0.0001,
"step": 32700
},
{
"epoch": 0.9142905001029449,
"grad_norm": 0.2353515625,
"learning_rate": 1.7682610948022795e-05,
"loss": 0.0001,
"step": 32750
},
{
"epoch": 0.9156863634618807,
"grad_norm": 0.6328125,
"learning_rate": 1.7394808035457318e-05,
"loss": 0.0001,
"step": 32800
},
{
"epoch": 0.9170822268208165,
"grad_norm": 0.06591796875,
"learning_rate": 1.7107005122891844e-05,
"loss": 0.0001,
"step": 32850
},
{
"epoch": 0.9184780901797523,
"grad_norm": 0.177734375,
"learning_rate": 1.6819202210326367e-05,
"loss": 0.0001,
"step": 32900
},
{
"epoch": 0.9198739535386881,
"grad_norm": 0.234375,
"learning_rate": 1.6531399297760893e-05,
"loss": 0.0001,
"step": 32950
},
{
"epoch": 0.9212698168976239,
"grad_norm": 0.208984375,
"learning_rate": 1.624359638519542e-05,
"loss": 0.0001,
"step": 33000
},
{
"epoch": 0.9226656802565597,
"grad_norm": 0.74609375,
"learning_rate": 1.5955793472629942e-05,
"loss": 0.0001,
"step": 33050
},
{
"epoch": 0.9240615436154955,
"grad_norm": 0.58984375,
"learning_rate": 1.566799056006447e-05,
"loss": 0.0001,
"step": 33100
},
{
"epoch": 0.9254574069744312,
"grad_norm": 1.203125,
"learning_rate": 1.5380187647498995e-05,
"loss": 0.0001,
"step": 33150
},
{
"epoch": 0.9268532703333671,
"grad_norm": 0.953125,
"learning_rate": 1.5092384734933518e-05,
"loss": 0.0001,
"step": 33200
},
{
"epoch": 0.9282491336923029,
"grad_norm": 0.19140625,
"learning_rate": 1.4804581822368044e-05,
"loss": 0.0001,
"step": 33250
},
{
"epoch": 0.9296449970512387,
"grad_norm": 0.99609375,
"learning_rate": 1.4516778909802567e-05,
"loss": 0.0001,
"step": 33300
},
{
"epoch": 0.9310408604101744,
"grad_norm": 0.138671875,
"learning_rate": 1.4228975997237094e-05,
"loss": 0.0001,
"step": 33350
},
{
"epoch": 0.9324367237691102,
"grad_norm": 0.5546875,
"learning_rate": 1.3941173084671618e-05,
"loss": 0.0001,
"step": 33400
},
{
"epoch": 0.9338325871280461,
"grad_norm": 0.2255859375,
"learning_rate": 1.3653370172106141e-05,
"loss": 0.0001,
"step": 33450
},
{
"epoch": 0.9352284504869818,
"grad_norm": 0.431640625,
"learning_rate": 1.3365567259540667e-05,
"loss": 0.0001,
"step": 33500
},
{
"epoch": 0.9366243138459176,
"grad_norm": 1.359375,
"learning_rate": 1.307776434697519e-05,
"loss": 0.0001,
"step": 33550
},
{
"epoch": 0.9380201772048534,
"grad_norm": 0.66015625,
"learning_rate": 1.2789961434409717e-05,
"loss": 0.0001,
"step": 33600
},
{
"epoch": 0.9394160405637892,
"grad_norm": 1.609375,
"learning_rate": 1.2502158521844243e-05,
"loss": 0.0001,
"step": 33650
},
{
"epoch": 0.940811903922725,
"grad_norm": 0.265625,
"learning_rate": 1.2214355609278766e-05,
"loss": 0.0001,
"step": 33700
},
{
"epoch": 0.9422077672816608,
"grad_norm": 0.1513671875,
"learning_rate": 1.1926552696713292e-05,
"loss": 0.0001,
"step": 33750
},
{
"epoch": 0.9422077672816608,
"eval_loss": 7.356254627666203e-06,
"eval_mae": 0.0021641200874000788,
"eval_rmse": 0.002712241606786847,
"eval_runtime": 314.5626,
"eval_samples_per_second": 6.358,
"eval_steps_per_second": 6.358,
"step": 33750
},
{
"epoch": 0.9436036306405966,
"grad_norm": 0.01300048828125,
"learning_rate": 1.1638749784147817e-05,
"loss": 0.0001,
"step": 33800
},
{
"epoch": 0.9449994939995324,
"grad_norm": 0.056640625,
"learning_rate": 1.1350946871582341e-05,
"loss": 0.0001,
"step": 33850
},
{
"epoch": 0.9463953573584681,
"grad_norm": 0.70703125,
"learning_rate": 1.1063143959016866e-05,
"loss": 0.0001,
"step": 33900
},
{
"epoch": 0.947791220717404,
"grad_norm": 0.3515625,
"learning_rate": 1.077534104645139e-05,
"loss": 0.0001,
"step": 33950
},
{
"epoch": 0.9491870840763398,
"grad_norm": 0.365234375,
"learning_rate": 1.0487538133885915e-05,
"loss": 0.0001,
"step": 34000
},
{
"epoch": 0.9505829474352756,
"grad_norm": 0.283203125,
"learning_rate": 1.019973522132044e-05,
"loss": 0.0001,
"step": 34050
},
{
"epoch": 0.9519788107942113,
"grad_norm": 0.61328125,
"learning_rate": 9.911932308754965e-06,
"loss": 0.0001,
"step": 34100
},
{
"epoch": 0.9533746741531471,
"grad_norm": 0.5546875,
"learning_rate": 9.624129396189489e-06,
"loss": 0.0001,
"step": 34150
},
{
"epoch": 0.954770537512083,
"grad_norm": 0.400390625,
"learning_rate": 9.336326483624015e-06,
"loss": 0.0001,
"step": 34200
},
{
"epoch": 0.9561664008710188,
"grad_norm": 0.2119140625,
"learning_rate": 9.04852357105854e-06,
"loss": 0.0001,
"step": 34250
},
{
"epoch": 0.9575622642299545,
"grad_norm": 0.294921875,
"learning_rate": 8.760720658493065e-06,
"loss": 0.0001,
"step": 34300
},
{
"epoch": 0.9589581275888903,
"grad_norm": 0.404296875,
"learning_rate": 8.47291774592759e-06,
"loss": 0.0001,
"step": 34350
},
{
"epoch": 0.9603539909478261,
"grad_norm": 1.03125,
"learning_rate": 8.185114833362114e-06,
"loss": 0.0001,
"step": 34400
},
{
"epoch": 0.961749854306762,
"grad_norm": 0.1357421875,
"learning_rate": 7.897311920796639e-06,
"loss": 0.0001,
"step": 34450
},
{
"epoch": 0.9631457176656977,
"grad_norm": 0.341796875,
"learning_rate": 7.609509008231164e-06,
"loss": 0.0001,
"step": 34500
},
{
"epoch": 0.9645415810246335,
"grad_norm": 0.71875,
"learning_rate": 7.321706095665689e-06,
"loss": 0.0001,
"step": 34550
},
{
"epoch": 0.9659374443835693,
"grad_norm": 0.1787109375,
"learning_rate": 7.033903183100212e-06,
"loss": 0.0001,
"step": 34600
},
{
"epoch": 0.967333307742505,
"grad_norm": 0.052734375,
"learning_rate": 6.746100270534739e-06,
"loss": 0.0001,
"step": 34650
},
{
"epoch": 0.9687291711014409,
"grad_norm": 0.875,
"learning_rate": 6.458297357969263e-06,
"loss": 0.0001,
"step": 34700
},
{
"epoch": 0.9701250344603767,
"grad_norm": 0.310546875,
"learning_rate": 6.170494445403788e-06,
"loss": 0.0001,
"step": 34750
},
{
"epoch": 0.9715208978193125,
"grad_norm": 0.453125,
"learning_rate": 5.8826915328383125e-06,
"loss": 0.0001,
"step": 34800
},
{
"epoch": 0.9729167611782482,
"grad_norm": 0.88671875,
"learning_rate": 5.594888620272837e-06,
"loss": 0.0001,
"step": 34850
},
{
"epoch": 0.974312624537184,
"grad_norm": 0.032470703125,
"learning_rate": 5.307085707707362e-06,
"loss": 0.0001,
"step": 34900
},
{
"epoch": 0.9757084878961199,
"grad_norm": 1.5546875,
"learning_rate": 5.019282795141887e-06,
"loss": 0.0001,
"step": 34950
},
{
"epoch": 0.9771043512550557,
"grad_norm": 1.2578125,
"learning_rate": 4.731479882576412e-06,
"loss": 0.0001,
"step": 35000
},
{
"epoch": 0.9771043512550557,
"eval_loss": 7.189828011178179e-06,
"eval_mae": 0.0021512035746127367,
"eval_rmse": 0.0026813854929059744,
"eval_runtime": 314.4052,
"eval_samples_per_second": 6.361,
"eval_steps_per_second": 6.361,
"step": 35000
},
{
"epoch": 0.9785002146139914,
"grad_norm": 0.99609375,
"learning_rate": 4.443676970010937e-06,
"loss": 0.0001,
"step": 35050
},
{
"epoch": 0.9798960779729272,
"grad_norm": 0.5390625,
"learning_rate": 4.155874057445461e-06,
"loss": 0.0001,
"step": 35100
},
{
"epoch": 0.981291941331863,
"grad_norm": 0.83203125,
"learning_rate": 3.8680711448799866e-06,
"loss": 0.0001,
"step": 35150
},
{
"epoch": 0.9826878046907989,
"grad_norm": 0.48046875,
"learning_rate": 3.580268232314511e-06,
"loss": 0.0001,
"step": 35200
},
{
"epoch": 0.9840836680497346,
"grad_norm": 1.078125,
"learning_rate": 3.292465319749036e-06,
"loss": 0.0001,
"step": 35250
},
{
"epoch": 0.9854795314086704,
"grad_norm": 0.3828125,
"learning_rate": 3.004662407183561e-06,
"loss": 0.0001,
"step": 35300
},
{
"epoch": 0.9868753947676062,
"grad_norm": 0.466796875,
"learning_rate": 2.716859494618086e-06,
"loss": 0.0001,
"step": 35350
},
{
"epoch": 0.988271258126542,
"grad_norm": 0.7734375,
"learning_rate": 2.4290565820526105e-06,
"loss": 0.0001,
"step": 35400
},
{
"epoch": 0.9896671214854778,
"grad_norm": 1.3359375,
"learning_rate": 2.141253669487135e-06,
"loss": 0.0001,
"step": 35450
},
{
"epoch": 0.9910629848444136,
"grad_norm": 0.2119140625,
"learning_rate": 1.8534507569216602e-06,
"loss": 0.0001,
"step": 35500
},
{
"epoch": 0.9924588482033494,
"grad_norm": 0.1728515625,
"learning_rate": 1.565647844356185e-06,
"loss": 0.0001,
"step": 35550
},
{
"epoch": 0.9938547115622852,
"grad_norm": 0.294921875,
"learning_rate": 1.2778449317907098e-06,
"loss": 0.0001,
"step": 35600
},
{
"epoch": 0.9952505749212209,
"grad_norm": 0.2734375,
"learning_rate": 9.900420192252346e-07,
"loss": 0.0001,
"step": 35650
},
{
"epoch": 0.9966464382801568,
"grad_norm": 0.287109375,
"learning_rate": 7.022391066597595e-07,
"loss": 0.0001,
"step": 35700
},
{
"epoch": 0.9980423016390926,
"grad_norm": 0.98046875,
"learning_rate": 4.144361940942842e-07,
"loss": 0.0001,
"step": 35750
},
{
"epoch": 0.9994381649980283,
"grad_norm": 0.341796875,
"learning_rate": 1.2663328152880908e-07,
"loss": 0.0001,
"step": 35800
}
],
"logging_steps": 50,
"max_steps": 35821,
"num_input_tokens_seen": 0,
"num_train_epochs": 1,
"save_steps": 1250,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": true
},
"attributes": {}
}
},
"total_flos": 5.780150075109409e+18,
"train_batch_size": 1,
"trial_name": null,
"trial_params": null
}