{ "best_global_step": 35000, "best_metric": 0.0021512035746127367, "best_model_checkpoint": "D:\\Task_design\\Scene\\outputs\\qwen7b-lora-will_half_fp16_v2\\checkpoint-35000", "epoch": 1.0, "eval_steps": 1250, "global_step": 35821, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.0013958633589357939, "grad_norm": 2320.0, "learning_rate": 9.116279069767441e-06, "loss": 120.7821, "step": 50 }, { "epoch": 0.0027917267178715877, "grad_norm": 752.0, "learning_rate": 1.841860465116279e-05, "loss": 1.6562, "step": 100 }, { "epoch": 0.004187590076807381, "grad_norm": 264.0, "learning_rate": 2.772093023255814e-05, "loss": 0.5144, "step": 150 }, { "epoch": 0.005583453435743175, "grad_norm": 3120.0, "learning_rate": 3.702325581395349e-05, "loss": 0.9009, "step": 200 }, { "epoch": 0.006979316794678969, "grad_norm": 1296.0, "learning_rate": 4.632558139534884e-05, "loss": 1.4696, "step": 250 }, { "epoch": 0.008375180153614763, "grad_norm": 3632.0, "learning_rate": 5.562790697674419e-05, "loss": 1.8122, "step": 300 }, { "epoch": 0.009771043512550556, "grad_norm": 600.0, "learning_rate": 6.493023255813954e-05, "loss": 1.605, "step": 350 }, { "epoch": 0.01116690687148635, "grad_norm": 1888.0, "learning_rate": 7.423255813953489e-05, "loss": 1.046, "step": 400 }, { "epoch": 0.012562770230422144, "grad_norm": 137.0, "learning_rate": 8.353488372093023e-05, "loss": 1.1465, "step": 450 }, { "epoch": 0.013958633589357937, "grad_norm": 1984.0, "learning_rate": 9.283720930232559e-05, "loss": 5.8899, "step": 500 }, { "epoch": 0.015354496948293732, "grad_norm": 2528.0, "learning_rate": 0.00010213953488372094, "loss": 1.0712, "step": 550 }, { "epoch": 0.016750360307229525, "grad_norm": 4.34375, "learning_rate": 0.00011144186046511629, "loss": 0.4232, "step": 600 }, { "epoch": 0.01814622366616532, "grad_norm": 249.0, "learning_rate": 0.00012074418604651163, "loss": 0.837, "step": 650 }, { "epoch": 0.019542087025101112, "grad_norm": 1224.0, "learning_rate": 0.000130046511627907, "loss": 0.4405, "step": 700 }, { "epoch": 0.020937950384036907, "grad_norm": 2800.0, "learning_rate": 0.00013934883720930234, "loss": 1.175, "step": 750 }, { "epoch": 0.0223338137429727, "grad_norm": 124.0, "learning_rate": 0.0001486511627906977, "loss": 1.2591, "step": 800 }, { "epoch": 0.023729677101908493, "grad_norm": 1056.0, "learning_rate": 0.00015795348837209302, "loss": 0.4401, "step": 850 }, { "epoch": 0.025125540460844288, "grad_norm": 1208.0, "learning_rate": 0.00016725581395348837, "loss": 0.4283, "step": 900 }, { "epoch": 0.026521403819780083, "grad_norm": 1048.0, "learning_rate": 0.00017655813953488373, "loss": 0.4223, "step": 950 }, { "epoch": 0.027917267178715875, "grad_norm": 22.25, "learning_rate": 0.00018586046511627908, "loss": 0.1113, "step": 1000 }, { "epoch": 0.02931313053765167, "grad_norm": 127.5, "learning_rate": 0.00019516279069767444, "loss": 0.0873, "step": 1050 }, { "epoch": 0.030708993896587464, "grad_norm": 712.0, "learning_rate": 0.00019986185460196858, "loss": 0.5109, "step": 1100 }, { "epoch": 0.032104857255523256, "grad_norm": 180.0, "learning_rate": 0.0001995740516894031, "loss": 0.4199, "step": 1150 }, { "epoch": 0.03350072061445905, "grad_norm": 984.0, "learning_rate": 0.00019928624877683763, "loss": 0.6015, "step": 1200 }, { "epoch": 0.034896583973394846, "grad_norm": 616.0, "learning_rate": 0.00019899844586427216, "loss": 0.5867, "step": 1250 }, { "epoch": 0.034896583973394846, "eval_loss": 0.0031845432240515947, "eval_mae": 0.045285664498806, "eval_rmse": 0.05643175542354584, "eval_runtime": 320.3726, "eval_samples_per_second": 6.243, "eval_steps_per_second": 6.243, "step": 1250 }, { "epoch": 0.03629244733233064, "grad_norm": 132.0, "learning_rate": 0.00019871064295170666, "loss": 0.3113, "step": 1300 }, { "epoch": 0.03768831069126643, "grad_norm": 26.375, "learning_rate": 0.0001984228400391412, "loss": 0.0702, "step": 1350 }, { "epoch": 0.039084174050202224, "grad_norm": 212.0, "learning_rate": 0.00019813503712657574, "loss": 0.2048, "step": 1400 }, { "epoch": 0.04048003740913802, "grad_norm": 260.0, "learning_rate": 0.00019784723421401027, "loss": 0.0669, "step": 1450 }, { "epoch": 0.041875900768073814, "grad_norm": 113.5, "learning_rate": 0.0001975594313014448, "loss": 0.0604, "step": 1500 }, { "epoch": 0.04327176412700961, "grad_norm": 36.0, "learning_rate": 0.00019727162838887932, "loss": 0.0539, "step": 1550 }, { "epoch": 0.0446676274859454, "grad_norm": 290.0, "learning_rate": 0.00019698382547631382, "loss": 0.0539, "step": 1600 }, { "epoch": 0.04606349084488119, "grad_norm": 202.0, "learning_rate": 0.00019669602256374834, "loss": 0.0621, "step": 1650 }, { "epoch": 0.047459354203816986, "grad_norm": 464.0, "learning_rate": 0.00019640821965118287, "loss": 0.178, "step": 1700 }, { "epoch": 0.04885521756275278, "grad_norm": 108.5, "learning_rate": 0.0001961204167386174, "loss": 0.1804, "step": 1750 }, { "epoch": 0.050251080921688576, "grad_norm": 252.0, "learning_rate": 0.00019583261382605195, "loss": 0.1206, "step": 1800 }, { "epoch": 0.05164694428062437, "grad_norm": 1376.0, "learning_rate": 0.00019554481091348648, "loss": 0.8041, "step": 1850 }, { "epoch": 0.053042807639560166, "grad_norm": 214.0, "learning_rate": 0.00019525700800092098, "loss": 0.2037, "step": 1900 }, { "epoch": 0.054438670998495954, "grad_norm": 324.0, "learning_rate": 0.0001949692050883555, "loss": 0.2246, "step": 1950 }, { "epoch": 0.05583453435743175, "grad_norm": 148.0, "learning_rate": 0.00019468140217579003, "loss": 0.1815, "step": 2000 }, { "epoch": 0.057230397716367544, "grad_norm": 270.0, "learning_rate": 0.00019439359926322455, "loss": 0.1532, "step": 2050 }, { "epoch": 0.05862626107530334, "grad_norm": 145.0, "learning_rate": 0.00019410579635065908, "loss": 0.1351, "step": 2100 }, { "epoch": 0.060022124434239134, "grad_norm": 130.0, "learning_rate": 0.00019381799343809358, "loss": 0.1393, "step": 2150 }, { "epoch": 0.06141798779317493, "grad_norm": 231.0, "learning_rate": 0.00019353019052552813, "loss": 0.1446, "step": 2200 }, { "epoch": 0.06281385115211072, "grad_norm": 3.25, "learning_rate": 0.00019324238761296266, "loss": 0.1403, "step": 2250 }, { "epoch": 0.06420971451104651, "grad_norm": 123.5, "learning_rate": 0.00019295458470039719, "loss": 0.3684, "step": 2300 }, { "epoch": 0.0656055778699823, "grad_norm": 20.125, "learning_rate": 0.0001926667817878317, "loss": 0.0694, "step": 2350 }, { "epoch": 0.0670014412289181, "grad_norm": 1.734375, "learning_rate": 0.00019237897887526624, "loss": 0.0897, "step": 2400 }, { "epoch": 0.0683973045878539, "grad_norm": 25.0, "learning_rate": 0.00019209117596270074, "loss": 0.128, "step": 2450 }, { "epoch": 0.06979316794678969, "grad_norm": 24.125, "learning_rate": 0.00019180337305013526, "loss": 0.1388, "step": 2500 }, { "epoch": 0.06979316794678969, "eval_loss": 0.011298904195427895, "eval_mae": 0.09956549108028412, "eval_rmse": 0.10629630088806152, "eval_runtime": 316.8013, "eval_samples_per_second": 6.313, "eval_steps_per_second": 6.313, "step": 2500 }, { "epoch": 0.07118903130572549, "grad_norm": 19.375, "learning_rate": 0.0001915155701375698, "loss": 0.1087, "step": 2550 }, { "epoch": 0.07258489466466128, "grad_norm": 7.8125, "learning_rate": 0.00019122776722500432, "loss": 0.0907, "step": 2600 }, { "epoch": 0.07398075802359708, "grad_norm": 13.8125, "learning_rate": 0.00019093996431243887, "loss": 0.0812, "step": 2650 }, { "epoch": 0.07537662138253286, "grad_norm": 24.25, "learning_rate": 0.0001906521613998734, "loss": 0.0707, "step": 2700 }, { "epoch": 0.07677248474146865, "grad_norm": 18.125, "learning_rate": 0.0001903643584873079, "loss": 0.0657, "step": 2750 }, { "epoch": 0.07816834810040445, "grad_norm": 9.5625, "learning_rate": 0.00019007655557474242, "loss": 0.0598, "step": 2800 }, { "epoch": 0.07956421145934024, "grad_norm": 15.3125, "learning_rate": 0.00018978875266217695, "loss": 0.0549, "step": 2850 }, { "epoch": 0.08096007481827604, "grad_norm": 9.9375, "learning_rate": 0.00018950094974961147, "loss": 0.052, "step": 2900 }, { "epoch": 0.08235593817721183, "grad_norm": 22.625, "learning_rate": 0.000189213146837046, "loss": 0.0501, "step": 2950 }, { "epoch": 0.08375180153614763, "grad_norm": 16.125, "learning_rate": 0.00018892534392448053, "loss": 0.0433, "step": 3000 }, { "epoch": 0.08514766489508342, "grad_norm": 19.875, "learning_rate": 0.00018863754101191505, "loss": 0.0489, "step": 3050 }, { "epoch": 0.08654352825401922, "grad_norm": 15.625, "learning_rate": 0.00018834973809934958, "loss": 0.0422, "step": 3100 }, { "epoch": 0.08793939161295501, "grad_norm": 15.0625, "learning_rate": 0.0001880619351867841, "loss": 0.0415, "step": 3150 }, { "epoch": 0.0893352549718908, "grad_norm": 12.5625, "learning_rate": 0.00018777413227421863, "loss": 0.0372, "step": 3200 }, { "epoch": 0.0907311183308266, "grad_norm": 13.875, "learning_rate": 0.00018748632936165316, "loss": 0.0393, "step": 3250 }, { "epoch": 0.09212698168976238, "grad_norm": 19.5, "learning_rate": 0.00018719852644908766, "loss": 0.0396, "step": 3300 }, { "epoch": 0.09352284504869818, "grad_norm": 14.875, "learning_rate": 0.00018691072353652218, "loss": 0.0293, "step": 3350 }, { "epoch": 0.09491870840763397, "grad_norm": 76.5, "learning_rate": 0.0001866229206239567, "loss": 0.0334, "step": 3400 }, { "epoch": 0.09631457176656977, "grad_norm": 9.625, "learning_rate": 0.00018633511771139124, "loss": 0.0627, "step": 3450 }, { "epoch": 0.09771043512550556, "grad_norm": 15.3125, "learning_rate": 0.0001860473147988258, "loss": 0.0262, "step": 3500 }, { "epoch": 0.09910629848444136, "grad_norm": 15.625, "learning_rate": 0.00018575951188626032, "loss": 0.0318, "step": 3550 }, { "epoch": 0.10050216184337715, "grad_norm": 17.0, "learning_rate": 0.00018547170897369482, "loss": 0.0431, "step": 3600 }, { "epoch": 0.10189802520231295, "grad_norm": 3.3125, "learning_rate": 0.00018518390606112934, "loss": 0.0086, "step": 3650 }, { "epoch": 0.10329388856124874, "grad_norm": 1.0546875, "learning_rate": 0.00018489610314856387, "loss": 0.0084, "step": 3700 }, { "epoch": 0.10468975192018454, "grad_norm": 3.21875, "learning_rate": 0.0001846083002359984, "loss": 0.0117, "step": 3750 }, { "epoch": 0.10468975192018454, "eval_loss": 0.0015581471379846334, "eval_mae": 0.036334387958049774, "eval_rmse": 0.03947337344288826, "eval_runtime": 319.7128, "eval_samples_per_second": 6.256, "eval_steps_per_second": 6.256, "step": 3750 }, { "epoch": 0.10608561527912033, "grad_norm": 42.75, "learning_rate": 0.00018432049732343292, "loss": 0.0319, "step": 3800 }, { "epoch": 0.10748147863805613, "grad_norm": 39.25, "learning_rate": 0.00018403269441086745, "loss": 0.0341, "step": 3850 }, { "epoch": 0.10887734199699191, "grad_norm": 44.25, "learning_rate": 0.00018374489149830197, "loss": 0.0326, "step": 3900 }, { "epoch": 0.1102732053559277, "grad_norm": 35.0, "learning_rate": 0.0001834570885857365, "loss": 0.032, "step": 3950 }, { "epoch": 0.1116690687148635, "grad_norm": 36.75, "learning_rate": 0.00018316928567317103, "loss": 0.0306, "step": 4000 }, { "epoch": 0.11306493207379929, "grad_norm": 25.875, "learning_rate": 0.00018288148276060555, "loss": 0.0298, "step": 4050 }, { "epoch": 0.11446079543273509, "grad_norm": 38.0, "learning_rate": 0.00018259367984804008, "loss": 0.031, "step": 4100 }, { "epoch": 0.11585665879167088, "grad_norm": 33.0, "learning_rate": 0.0001823058769354746, "loss": 0.0284, "step": 4150 }, { "epoch": 0.11725252215060668, "grad_norm": 30.125, "learning_rate": 0.0001820180740229091, "loss": 0.0229, "step": 4200 }, { "epoch": 0.11864838550954247, "grad_norm": 14.375, "learning_rate": 0.00018173027111034363, "loss": 0.0119, "step": 4250 }, { "epoch": 0.12004424886847827, "grad_norm": 40.5, "learning_rate": 0.00018144246819777818, "loss": 0.0351, "step": 4300 }, { "epoch": 0.12144011222741406, "grad_norm": 50.0, "learning_rate": 0.0001811546652852127, "loss": 0.0157, "step": 4350 }, { "epoch": 0.12283597558634986, "grad_norm": 12.25, "learning_rate": 0.00018086686237264724, "loss": 0.0222, "step": 4400 }, { "epoch": 0.12423183894528565, "grad_norm": 27.0, "learning_rate": 0.00018057905946008174, "loss": 0.0129, "step": 4450 }, { "epoch": 0.12562770230422143, "grad_norm": 4.6875, "learning_rate": 0.00018029125654751626, "loss": 0.0162, "step": 4500 }, { "epoch": 0.12702356566315723, "grad_norm": 6.9375, "learning_rate": 0.0001800034536349508, "loss": 0.0038, "step": 4550 }, { "epoch": 0.12841942902209302, "grad_norm": 2.34375, "learning_rate": 0.00017971565072238531, "loss": 0.0032, "step": 4600 }, { "epoch": 0.12981529238102882, "grad_norm": 9.0, "learning_rate": 0.00017942784780981984, "loss": 0.0035, "step": 4650 }, { "epoch": 0.1312111557399646, "grad_norm": 5.0625, "learning_rate": 0.00017914004489725437, "loss": 0.0022, "step": 4700 }, { "epoch": 0.1326070190989004, "grad_norm": 9.4375, "learning_rate": 0.0001788522419846889, "loss": 0.0017, "step": 4750 }, { "epoch": 0.1340028824578362, "grad_norm": 3.53125, "learning_rate": 0.00017856443907212342, "loss": 0.0024, "step": 4800 }, { "epoch": 0.135398745816772, "grad_norm": 11.4375, "learning_rate": 0.00017827663615955795, "loss": 0.0022, "step": 4850 }, { "epoch": 0.1367946091757078, "grad_norm": 11.875, "learning_rate": 0.00017798883324699247, "loss": 0.0038, "step": 4900 }, { "epoch": 0.1381904725346436, "grad_norm": 1.8125, "learning_rate": 0.000177701030334427, "loss": 0.0036, "step": 4950 }, { "epoch": 0.13958633589357938, "grad_norm": 10.75, "learning_rate": 0.00017741322742186153, "loss": 0.0028, "step": 5000 }, { "epoch": 0.13958633589357938, "eval_loss": 0.00022272480418905616, "eval_mae": 0.012231973931193352, "eval_rmse": 0.01492396742105484, "eval_runtime": 319.6535, "eval_samples_per_second": 6.257, "eval_steps_per_second": 6.257, "step": 5000 }, { "epoch": 0.14098219925251518, "grad_norm": 7.15625, "learning_rate": 0.00017712542450929602, "loss": 0.0055, "step": 5050 }, { "epoch": 0.14237806261145097, "grad_norm": 6.84375, "learning_rate": 0.00017683762159673055, "loss": 0.0032, "step": 5100 }, { "epoch": 0.14377392597038677, "grad_norm": 11.75, "learning_rate": 0.0001765498186841651, "loss": 0.0031, "step": 5150 }, { "epoch": 0.14516978932932256, "grad_norm": 8.1875, "learning_rate": 0.00017626201577159963, "loss": 0.0027, "step": 5200 }, { "epoch": 0.14656565268825836, "grad_norm": 10.0625, "learning_rate": 0.00017597421285903416, "loss": 0.0025, "step": 5250 }, { "epoch": 0.14796151604719415, "grad_norm": 10.9375, "learning_rate": 0.00017568640994646868, "loss": 0.0027, "step": 5300 }, { "epoch": 0.14935737940612995, "grad_norm": 9.875, "learning_rate": 0.00017539860703390318, "loss": 0.0028, "step": 5350 }, { "epoch": 0.15075324276506571, "grad_norm": 7.5625, "learning_rate": 0.0001751108041213377, "loss": 0.0026, "step": 5400 }, { "epoch": 0.1521491061240015, "grad_norm": 22.875, "learning_rate": 0.00017482300120877223, "loss": 0.0032, "step": 5450 }, { "epoch": 0.1535449694829373, "grad_norm": 50.75, "learning_rate": 0.00017453519829620676, "loss": 0.0356, "step": 5500 }, { "epoch": 0.1549408328418731, "grad_norm": 48.25, "learning_rate": 0.0001742473953836413, "loss": 0.0495, "step": 5550 }, { "epoch": 0.1563366962008089, "grad_norm": 8.8125, "learning_rate": 0.00017395959247107581, "loss": 0.048, "step": 5600 }, { "epoch": 0.1577325595597447, "grad_norm": 54.25, "learning_rate": 0.00017367178955851034, "loss": 0.0461, "step": 5650 }, { "epoch": 0.15912842291868048, "grad_norm": 63.75, "learning_rate": 0.00017338398664594487, "loss": 0.0495, "step": 5700 }, { "epoch": 0.16052428627761628, "grad_norm": 55.0, "learning_rate": 0.0001730961837333794, "loss": 0.033, "step": 5750 }, { "epoch": 0.16192014963655207, "grad_norm": 39.75, "learning_rate": 0.00017280838082081392, "loss": 0.0453, "step": 5800 }, { "epoch": 0.16331601299548787, "grad_norm": 32.75, "learning_rate": 0.00017252057790824845, "loss": 0.0417, "step": 5850 }, { "epoch": 0.16471187635442366, "grad_norm": 44.5, "learning_rate": 0.00017223277499568294, "loss": 0.044, "step": 5900 }, { "epoch": 0.16610773971335946, "grad_norm": 30.875, "learning_rate": 0.00017194497208311747, "loss": 0.0414, "step": 5950 }, { "epoch": 0.16750360307229525, "grad_norm": 31.0, "learning_rate": 0.00017165716917055202, "loss": 0.0392, "step": 6000 }, { "epoch": 0.16889946643123105, "grad_norm": 52.75, "learning_rate": 0.00017136936625798655, "loss": 0.0404, "step": 6050 }, { "epoch": 0.17029532979016684, "grad_norm": 39.5, "learning_rate": 0.00017108156334542108, "loss": 0.042, "step": 6100 }, { "epoch": 0.17169119314910264, "grad_norm": 19.375, "learning_rate": 0.0001707937604328556, "loss": 0.006, "step": 6150 }, { "epoch": 0.17308705650803843, "grad_norm": 42.5, "learning_rate": 0.0001705059575202901, "loss": 0.014, "step": 6200 }, { "epoch": 0.17448291986697423, "grad_norm": 18.5, "learning_rate": 0.00017021815460772463, "loss": 0.0145, "step": 6250 }, { "epoch": 0.17448291986697423, "eval_loss": 0.0018714327597990632, "eval_mae": 0.04104918614029884, "eval_rmse": 0.043260060250759125, "eval_runtime": 318.582, "eval_samples_per_second": 6.278, "eval_steps_per_second": 6.278, "step": 6250 }, { "epoch": 0.17587878322591002, "grad_norm": 7.4375, "learning_rate": 0.00016993035169515915, "loss": 0.0104, "step": 6300 }, { "epoch": 0.17727464658484582, "grad_norm": 34.25, "learning_rate": 0.00016964254878259368, "loss": 0.0038, "step": 6350 }, { "epoch": 0.1786705099437816, "grad_norm": 18.125, "learning_rate": 0.0001693547458700282, "loss": 0.015, "step": 6400 }, { "epoch": 0.1800663733027174, "grad_norm": 0.494140625, "learning_rate": 0.00016906694295746276, "loss": 0.0137, "step": 6450 }, { "epoch": 0.1814622366616532, "grad_norm": 12.5, "learning_rate": 0.00016877914004489726, "loss": 0.0044, "step": 6500 }, { "epoch": 0.182858100020589, "grad_norm": 9.625, "learning_rate": 0.0001684913371323318, "loss": 0.0156, "step": 6550 }, { "epoch": 0.18425396337952477, "grad_norm": 6.34375, "learning_rate": 0.0001682035342197663, "loss": 0.0021, "step": 6600 }, { "epoch": 0.18564982673846056, "grad_norm": 11.9375, "learning_rate": 0.00016791573130720084, "loss": 0.0019, "step": 6650 }, { "epoch": 0.18704569009739636, "grad_norm": 5.65625, "learning_rate": 0.00016762792839463537, "loss": 0.0019, "step": 6700 }, { "epoch": 0.18844155345633215, "grad_norm": 7.625, "learning_rate": 0.00016734012548206986, "loss": 0.0019, "step": 6750 }, { "epoch": 0.18983741681526795, "grad_norm": 27.375, "learning_rate": 0.00016705232256950442, "loss": 0.0029, "step": 6800 }, { "epoch": 0.19123328017420374, "grad_norm": 3.171875, "learning_rate": 0.00016676451965693894, "loss": 0.0114, "step": 6850 }, { "epoch": 0.19262914353313954, "grad_norm": 6.4375, "learning_rate": 0.00016647671674437347, "loss": 0.0166, "step": 6900 }, { "epoch": 0.19402500689207533, "grad_norm": 16.125, "learning_rate": 0.000166188913831808, "loss": 0.0129, "step": 6950 }, { "epoch": 0.19542087025101113, "grad_norm": 7.21875, "learning_rate": 0.00016590111091924252, "loss": 0.0052, "step": 7000 }, { "epoch": 0.19681673360994692, "grad_norm": 15.1875, "learning_rate": 0.00016561330800667702, "loss": 0.0051, "step": 7050 }, { "epoch": 0.19821259696888271, "grad_norm": 6.46875, "learning_rate": 0.00016532550509411155, "loss": 0.0049, "step": 7100 }, { "epoch": 0.1996084603278185, "grad_norm": 12.0625, "learning_rate": 0.00016503770218154608, "loss": 0.0051, "step": 7150 }, { "epoch": 0.2010043236867543, "grad_norm": 3.53125, "learning_rate": 0.0001647498992689806, "loss": 0.0044, "step": 7200 }, { "epoch": 0.2024001870456901, "grad_norm": 4.84375, "learning_rate": 0.00016446209635641513, "loss": 0.0066, "step": 7250 }, { "epoch": 0.2037960504046259, "grad_norm": 4.03125, "learning_rate": 0.00016417429344384968, "loss": 0.0072, "step": 7300 }, { "epoch": 0.2051919137635617, "grad_norm": 3.203125, "learning_rate": 0.00016388649053128418, "loss": 0.0007, "step": 7350 }, { "epoch": 0.20658777712249748, "grad_norm": 4.59375, "learning_rate": 0.0001635986876187187, "loss": 0.001, "step": 7400 }, { "epoch": 0.20798364048143328, "grad_norm": 8.375, "learning_rate": 0.00016331088470615323, "loss": 0.0017, "step": 7450 }, { "epoch": 0.20937950384036907, "grad_norm": 7.09375, "learning_rate": 0.00016302308179358776, "loss": 0.0018, "step": 7500 }, { "epoch": 0.20937950384036907, "eval_loss": 0.00019140982476528734, "eval_mae": 0.012697141617536545, "eval_rmse": 0.013835093937814236, "eval_runtime": 321.0771, "eval_samples_per_second": 6.229, "eval_steps_per_second": 6.229, "step": 7500 }, { "epoch": 0.21077536719930487, "grad_norm": 5.3125, "learning_rate": 0.00016273527888102229, "loss": 0.0018, "step": 7550 }, { "epoch": 0.21217123055824066, "grad_norm": 12.4375, "learning_rate": 0.0001624474759684568, "loss": 0.0018, "step": 7600 }, { "epoch": 0.21356709391717646, "grad_norm": 7.75, "learning_rate": 0.00016215967305589134, "loss": 0.0012, "step": 7650 }, { "epoch": 0.21496295727611225, "grad_norm": 1.765625, "learning_rate": 0.00016187187014332586, "loss": 0.0029, "step": 7700 }, { "epoch": 0.21635882063504802, "grad_norm": 13.6875, "learning_rate": 0.0001615840672307604, "loss": 0.0057, "step": 7750 }, { "epoch": 0.21775468399398382, "grad_norm": 4.1875, "learning_rate": 0.00016129626431819492, "loss": 0.0058, "step": 7800 }, { "epoch": 0.2191505473529196, "grad_norm": 29.375, "learning_rate": 0.00016100846140562944, "loss": 0.0038, "step": 7850 }, { "epoch": 0.2205464107118554, "grad_norm": 3.15625, "learning_rate": 0.00016072065849306394, "loss": 0.0031, "step": 7900 }, { "epoch": 0.2219422740707912, "grad_norm": 1.140625, "learning_rate": 0.00016043285558049847, "loss": 0.0017, "step": 7950 }, { "epoch": 0.223338137429727, "grad_norm": 2.703125, "learning_rate": 0.000160145052667933, "loss": 0.0019, "step": 8000 }, { "epoch": 0.2247340007886628, "grad_norm": 13.0625, "learning_rate": 0.00015985724975536752, "loss": 0.0015, "step": 8050 }, { "epoch": 0.22612986414759859, "grad_norm": 1.1328125, "learning_rate": 0.00015956944684280208, "loss": 0.0025, "step": 8100 }, { "epoch": 0.22752572750653438, "grad_norm": 20.0, "learning_rate": 0.0001592816439302366, "loss": 0.0018, "step": 8150 }, { "epoch": 0.22892159086547018, "grad_norm": 3.625, "learning_rate": 0.0001589938410176711, "loss": 0.0024, "step": 8200 }, { "epoch": 0.23031745422440597, "grad_norm": 11.625, "learning_rate": 0.00015870603810510563, "loss": 0.0071, "step": 8250 }, { "epoch": 0.23171331758334177, "grad_norm": 13.125, "learning_rate": 0.00015841823519254015, "loss": 0.0128, "step": 8300 }, { "epoch": 0.23310918094227756, "grad_norm": 17.625, "learning_rate": 0.00015813043227997468, "loss": 0.0122, "step": 8350 }, { "epoch": 0.23450504430121336, "grad_norm": 14.0625, "learning_rate": 0.0001578426293674092, "loss": 0.0125, "step": 8400 }, { "epoch": 0.23590090766014915, "grad_norm": 11.125, "learning_rate": 0.00015755482645484373, "loss": 0.0129, "step": 8450 }, { "epoch": 0.23729677101908495, "grad_norm": 22.875, "learning_rate": 0.00015726702354227826, "loss": 0.0136, "step": 8500 }, { "epoch": 0.23869263437802074, "grad_norm": 11.4375, "learning_rate": 0.00015697922062971278, "loss": 0.0118, "step": 8550 }, { "epoch": 0.24008849773695654, "grad_norm": 14.5625, "learning_rate": 0.0001566914177171473, "loss": 0.0032, "step": 8600 }, { "epoch": 0.24148436109589233, "grad_norm": 9.5625, "learning_rate": 0.00015640361480458184, "loss": 0.0014, "step": 8650 }, { "epoch": 0.24288022445482813, "grad_norm": 7.28125, "learning_rate": 0.00015611581189201636, "loss": 0.0016, "step": 8700 }, { "epoch": 0.24427608781376392, "grad_norm": 4.53125, "learning_rate": 0.0001558280089794509, "loss": 0.0013, "step": 8750 }, { "epoch": 0.24427608781376392, "eval_loss": 0.00014836130139883608, "eval_mae": 0.01122231688350439, "eval_rmse": 0.01218036562204361, "eval_runtime": 319.6629, "eval_samples_per_second": 6.257, "eval_steps_per_second": 6.257, "step": 8750 }, { "epoch": 0.24567195117269972, "grad_norm": 3.875, "learning_rate": 0.0001555402060668854, "loss": 0.0013, "step": 8800 }, { "epoch": 0.2470678145316355, "grad_norm": 10.4375, "learning_rate": 0.00015525240315431992, "loss": 0.0017, "step": 8850 }, { "epoch": 0.2484636778905713, "grad_norm": 2.96875, "learning_rate": 0.00015496460024175444, "loss": 0.0024, "step": 8900 }, { "epoch": 0.24985954124950707, "grad_norm": 10.1875, "learning_rate": 0.000154676797329189, "loss": 0.0051, "step": 8950 }, { "epoch": 0.25125540460844287, "grad_norm": 3.015625, "learning_rate": 0.00015438899441662352, "loss": 0.0012, "step": 9000 }, { "epoch": 0.2526512679673787, "grad_norm": 7.90625, "learning_rate": 0.00015410119150405802, "loss": 0.0103, "step": 9050 }, { "epoch": 0.25404713132631446, "grad_norm": 2.796875, "learning_rate": 0.00015381338859149255, "loss": 0.0014, "step": 9100 }, { "epoch": 0.2554429946852503, "grad_norm": 27.75, "learning_rate": 0.00015352558567892707, "loss": 0.0141, "step": 9150 }, { "epoch": 0.25683885804418605, "grad_norm": 2.65625, "learning_rate": 0.0001532377827663616, "loss": 0.0078, "step": 9200 }, { "epoch": 0.25823472140312187, "grad_norm": 16.125, "learning_rate": 0.00015294997985379613, "loss": 0.0141, "step": 9250 }, { "epoch": 0.25963058476205764, "grad_norm": 0.609375, "learning_rate": 0.00015266217694123065, "loss": 0.0038, "step": 9300 }, { "epoch": 0.26102644812099346, "grad_norm": 25.0, "learning_rate": 0.00015237437402866518, "loss": 0.0035, "step": 9350 }, { "epoch": 0.2624223114799292, "grad_norm": 16.5, "learning_rate": 0.0001520865711160997, "loss": 0.0106, "step": 9400 }, { "epoch": 0.26381817483886505, "grad_norm": 18.375, "learning_rate": 0.00015179876820353423, "loss": 0.0106, "step": 9450 }, { "epoch": 0.2652140381978008, "grad_norm": 16.5, "learning_rate": 0.00015151096529096876, "loss": 0.0104, "step": 9500 }, { "epoch": 0.2666099015567366, "grad_norm": 18.25, "learning_rate": 0.00015122316237840328, "loss": 0.0103, "step": 9550 }, { "epoch": 0.2680057649156724, "grad_norm": 18.875, "learning_rate": 0.0001509353594658378, "loss": 0.0098, "step": 9600 }, { "epoch": 0.2694016282746082, "grad_norm": 18.625, "learning_rate": 0.0001506475565532723, "loss": 0.0098, "step": 9650 }, { "epoch": 0.270797491633544, "grad_norm": 21.625, "learning_rate": 0.00015035975364070684, "loss": 0.0097, "step": 9700 }, { "epoch": 0.27219335499247976, "grad_norm": 21.0, "learning_rate": 0.00015007195072814136, "loss": 0.0094, "step": 9750 }, { "epoch": 0.2735892183514156, "grad_norm": 18.25, "learning_rate": 0.00014978414781557592, "loss": 0.0094, "step": 9800 }, { "epoch": 0.27498508171035135, "grad_norm": 14.25, "learning_rate": 0.00014949634490301044, "loss": 0.0095, "step": 9850 }, { "epoch": 0.2763809450692872, "grad_norm": 16.25, "learning_rate": 0.00014920854199044497, "loss": 0.0098, "step": 9900 }, { "epoch": 0.27777680842822294, "grad_norm": 18.5, "learning_rate": 0.00014892073907787947, "loss": 0.0092, "step": 9950 }, { "epoch": 0.27917267178715877, "grad_norm": 15.4375, "learning_rate": 0.000148632936165314, "loss": 0.0093, "step": 10000 }, { "epoch": 0.27917267178715877, "eval_loss": 8.570039790356532e-05, "eval_mae": 0.00781923346221447, "eval_rmse": 0.009257450699806213, "eval_runtime": 319.3051, "eval_samples_per_second": 6.264, "eval_steps_per_second": 6.264, "step": 10000 }, { "epoch": 0.28056853514609453, "grad_norm": 17.0, "learning_rate": 0.00014834513325274852, "loss": 0.0093, "step": 10050 }, { "epoch": 0.28196439850503036, "grad_norm": 12.3125, "learning_rate": 0.00014805733034018305, "loss": 0.0093, "step": 10100 }, { "epoch": 0.2833602618639661, "grad_norm": 14.875, "learning_rate": 0.00014776952742761757, "loss": 0.0094, "step": 10150 }, { "epoch": 0.28475612522290195, "grad_norm": 15.6875, "learning_rate": 0.0001474817245150521, "loss": 0.0095, "step": 10200 }, { "epoch": 0.2861519885818377, "grad_norm": 5.65625, "learning_rate": 0.00014719392160248663, "loss": 0.003, "step": 10250 }, { "epoch": 0.28754785194077354, "grad_norm": 2.40625, "learning_rate": 0.00014690611868992115, "loss": 0.0037, "step": 10300 }, { "epoch": 0.2889437152997093, "grad_norm": 15.5625, "learning_rate": 0.00014661831577735568, "loss": 0.0031, "step": 10350 }, { "epoch": 0.2903395786586451, "grad_norm": 7.21875, "learning_rate": 0.0001463305128647902, "loss": 0.0011, "step": 10400 }, { "epoch": 0.2917354420175809, "grad_norm": 14.375, "learning_rate": 0.00014604270995222473, "loss": 0.001, "step": 10450 }, { "epoch": 0.2931313053765167, "grad_norm": 24.5, "learning_rate": 0.00014575490703965923, "loss": 0.0058, "step": 10500 }, { "epoch": 0.2945271687354525, "grad_norm": 21.75, "learning_rate": 0.00014546710412709376, "loss": 0.0087, "step": 10550 }, { "epoch": 0.2959230320943883, "grad_norm": 23.375, "learning_rate": 0.0001451793012145283, "loss": 0.0082, "step": 10600 }, { "epoch": 0.2973188954533241, "grad_norm": 20.875, "learning_rate": 0.00014489149830196284, "loss": 0.0084, "step": 10650 }, { "epoch": 0.2987147588122599, "grad_norm": 23.375, "learning_rate": 0.00014460369538939736, "loss": 0.0079, "step": 10700 }, { "epoch": 0.30011062217119566, "grad_norm": 25.75, "learning_rate": 0.0001443158924768319, "loss": 0.0086, "step": 10750 }, { "epoch": 0.30150648553013143, "grad_norm": 8.6875, "learning_rate": 0.0001440280895642664, "loss": 0.0018, "step": 10800 }, { "epoch": 0.30290234888906725, "grad_norm": 20.375, "learning_rate": 0.00014374028665170091, "loss": 0.0011, "step": 10850 }, { "epoch": 0.304298212248003, "grad_norm": 11.75, "learning_rate": 0.00014345248373913544, "loss": 0.0047, "step": 10900 }, { "epoch": 0.30569407560693884, "grad_norm": 10.375, "learning_rate": 0.00014316468082656997, "loss": 0.0027, "step": 10950 }, { "epoch": 0.3070899389658746, "grad_norm": 7.21875, "learning_rate": 0.0001428768779140045, "loss": 0.0026, "step": 11000 }, { "epoch": 0.30848580232481043, "grad_norm": 9.0625, "learning_rate": 0.00014258907500143902, "loss": 0.0026, "step": 11050 }, { "epoch": 0.3098816656837462, "grad_norm": 0.1865234375, "learning_rate": 0.00014230127208887355, "loss": 0.0026, "step": 11100 }, { "epoch": 0.311277529042682, "grad_norm": 5.78125, "learning_rate": 0.00014201346917630807, "loss": 0.0016, "step": 11150 }, { "epoch": 0.3126733924016178, "grad_norm": 16.125, "learning_rate": 0.0001417256662637426, "loss": 0.0027, "step": 11200 }, { "epoch": 0.3140692557605536, "grad_norm": 5.6875, "learning_rate": 0.00014143786335117712, "loss": 0.0033, "step": 11250 }, { "epoch": 0.3140692557605536, "eval_loss": 0.00012493817484937608, "eval_mae": 0.009413574822247028, "eval_rmse": 0.011177574284374714, "eval_runtime": 314.115, "eval_samples_per_second": 6.367, "eval_steps_per_second": 6.367, "step": 11250 }, { "epoch": 0.3154651191194894, "grad_norm": 10.0625, "learning_rate": 0.00014115006043861165, "loss": 0.013, "step": 11300 }, { "epoch": 0.3168609824784252, "grad_norm": 5.5625, "learning_rate": 0.00014086225752604615, "loss": 0.0025, "step": 11350 }, { "epoch": 0.31825684583736097, "grad_norm": 8.375, "learning_rate": 0.00014057445461348068, "loss": 0.0025, "step": 11400 }, { "epoch": 0.3196527091962968, "grad_norm": 9.0, "learning_rate": 0.00014028665170091523, "loss": 0.0024, "step": 11450 }, { "epoch": 0.32104857255523256, "grad_norm": 13.75, "learning_rate": 0.00013999884878834976, "loss": 0.0025, "step": 11500 }, { "epoch": 0.3224444359141684, "grad_norm": 3.453125, "learning_rate": 0.00013971104587578428, "loss": 0.0025, "step": 11550 }, { "epoch": 0.32384029927310415, "grad_norm": 8.375, "learning_rate": 0.0001394232429632188, "loss": 0.0024, "step": 11600 }, { "epoch": 0.32523616263203997, "grad_norm": 8.875, "learning_rate": 0.0001391354400506533, "loss": 0.0025, "step": 11650 }, { "epoch": 0.32663202599097574, "grad_norm": 11.8125, "learning_rate": 0.00013884763713808783, "loss": 0.0025, "step": 11700 }, { "epoch": 0.32802788934991156, "grad_norm": 3.1875, "learning_rate": 0.00013855983422552236, "loss": 0.0028, "step": 11750 }, { "epoch": 0.32942375270884733, "grad_norm": 1.0546875, "learning_rate": 0.0001382720313129569, "loss": 0.0016, "step": 11800 }, { "epoch": 0.33081961606778315, "grad_norm": 7.15625, "learning_rate": 0.0001379842284003914, "loss": 0.0044, "step": 11850 }, { "epoch": 0.3322154794267189, "grad_norm": 4.71875, "learning_rate": 0.00013769642548782597, "loss": 0.0022, "step": 11900 }, { "epoch": 0.3336113427856547, "grad_norm": 13.9375, "learning_rate": 0.00013740862257526047, "loss": 0.0024, "step": 11950 }, { "epoch": 0.3350072061445905, "grad_norm": 6.75, "learning_rate": 0.000137120819662695, "loss": 0.0025, "step": 12000 }, { "epoch": 0.3364030695035263, "grad_norm": 7.15625, "learning_rate": 0.00013683301675012952, "loss": 0.0023, "step": 12050 }, { "epoch": 0.3377989328624621, "grad_norm": 3.515625, "learning_rate": 0.00013654521383756404, "loss": 0.0024, "step": 12100 }, { "epoch": 0.33919479622139787, "grad_norm": 12.875, "learning_rate": 0.00013625741092499857, "loss": 0.0024, "step": 12150 }, { "epoch": 0.3405906595803337, "grad_norm": 11.25, "learning_rate": 0.0001359696080124331, "loss": 0.0036, "step": 12200 }, { "epoch": 0.34198652293926946, "grad_norm": 8.8125, "learning_rate": 0.0001356818050998676, "loss": 0.0025, "step": 12250 }, { "epoch": 0.3433823862982053, "grad_norm": 0.07080078125, "learning_rate": 0.00013539400218730215, "loss": 0.0023, "step": 12300 }, { "epoch": 0.34477824965714104, "grad_norm": 7.40625, "learning_rate": 0.00013510619927473668, "loss": 0.005, "step": 12350 }, { "epoch": 0.34617411301607687, "grad_norm": 8.0, "learning_rate": 0.0001348183963621712, "loss": 0.0022, "step": 12400 }, { "epoch": 0.34756997637501263, "grad_norm": 4.1875, "learning_rate": 0.00013453059344960573, "loss": 0.0023, "step": 12450 }, { "epoch": 0.34896583973394846, "grad_norm": 15.5625, "learning_rate": 0.00013424279053704023, "loss": 0.0048, "step": 12500 }, { "epoch": 0.34896583973394846, "eval_loss": 0.0007875896408222616, "eval_mae": 0.027495475485920906, "eval_rmse": 0.028064027428627014, "eval_runtime": 313.9427, "eval_samples_per_second": 6.371, "eval_steps_per_second": 6.371, "step": 12500 }, { "epoch": 0.3503617030928842, "grad_norm": 10.125, "learning_rate": 0.00013395498762447475, "loss": 0.0012, "step": 12550 }, { "epoch": 0.35175756645182005, "grad_norm": 1.46875, "learning_rate": 0.00013366718471190928, "loss": 0.0025, "step": 12600 }, { "epoch": 0.3531534298107558, "grad_norm": 6.59375, "learning_rate": 0.0001333793817993438, "loss": 0.0009, "step": 12650 }, { "epoch": 0.35454929316969164, "grad_norm": 14.9375, "learning_rate": 0.00013309157888677833, "loss": 0.0024, "step": 12700 }, { "epoch": 0.3559451565286274, "grad_norm": 5.46875, "learning_rate": 0.0001328037759742129, "loss": 0.001, "step": 12750 }, { "epoch": 0.3573410198875632, "grad_norm": 2.25, "learning_rate": 0.00013251597306164739, "loss": 0.0007, "step": 12800 }, { "epoch": 0.358736883246499, "grad_norm": 3.265625, "learning_rate": 0.0001322281701490819, "loss": 0.0007, "step": 12850 }, { "epoch": 0.3601327466054348, "grad_norm": 4.53125, "learning_rate": 0.00013194036723651644, "loss": 0.0007, "step": 12900 }, { "epoch": 0.3615286099643706, "grad_norm": 3.671875, "learning_rate": 0.00013165256432395096, "loss": 0.0007, "step": 12950 }, { "epoch": 0.3629244733233064, "grad_norm": 2.75, "learning_rate": 0.0001313647614113855, "loss": 0.0007, "step": 13000 }, { "epoch": 0.3643203366822422, "grad_norm": 2.09375, "learning_rate": 0.00013107695849882002, "loss": 0.001, "step": 13050 }, { "epoch": 0.365716200041178, "grad_norm": 4.8125, "learning_rate": 0.00013078915558625454, "loss": 0.0026, "step": 13100 }, { "epoch": 0.36711206340011376, "grad_norm": 14.625, "learning_rate": 0.00013050135267368907, "loss": 0.0014, "step": 13150 }, { "epoch": 0.36850792675904953, "grad_norm": 11.875, "learning_rate": 0.0001302135497611236, "loss": 0.0011, "step": 13200 }, { "epoch": 0.36990379011798535, "grad_norm": 10.5625, "learning_rate": 0.00012992574684855812, "loss": 0.002, "step": 13250 }, { "epoch": 0.3712996534769211, "grad_norm": 12.75, "learning_rate": 0.00012963794393599265, "loss": 0.0025, "step": 13300 }, { "epoch": 0.37269551683585694, "grad_norm": 2.484375, "learning_rate": 0.00012935014102342718, "loss": 0.0019, "step": 13350 }, { "epoch": 0.3740913801947927, "grad_norm": 2.953125, "learning_rate": 0.00012906233811086167, "loss": 0.0004, "step": 13400 }, { "epoch": 0.37548724355372853, "grad_norm": 0.474609375, "learning_rate": 0.0001287745351982962, "loss": 0.0009, "step": 13450 }, { "epoch": 0.3768831069126643, "grad_norm": 23.25, "learning_rate": 0.00012848673228573073, "loss": 0.0023, "step": 13500 }, { "epoch": 0.3782789702716001, "grad_norm": 5.875, "learning_rate": 0.00012819892937316525, "loss": 0.0061, "step": 13550 }, { "epoch": 0.3796748336305359, "grad_norm": 11.4375, "learning_rate": 0.0001279111264605998, "loss": 0.0043, "step": 13600 }, { "epoch": 0.3810706969894717, "grad_norm": 1.34375, "learning_rate": 0.0001276233235480343, "loss": 0.0021, "step": 13650 }, { "epoch": 0.3824665603484075, "grad_norm": 1.671875, "learning_rate": 0.00012733552063546883, "loss": 0.0022, "step": 13700 }, { "epoch": 0.3838624237073433, "grad_norm": 12.0625, "learning_rate": 0.00012704771772290336, "loss": 0.0009, "step": 13750 }, { "epoch": 0.3838624237073433, "eval_loss": 0.00010729853966040537, "eval_mae": 0.009244485758244991, "eval_rmse": 0.010358501225709915, "eval_runtime": 314.0246, "eval_samples_per_second": 6.369, "eval_steps_per_second": 6.369, "step": 13750 }, { "epoch": 0.38525828706627907, "grad_norm": 0.466796875, "learning_rate": 0.00012675991481033788, "loss": 0.0029, "step": 13800 }, { "epoch": 0.3866541504252149, "grad_norm": 0.314453125, "learning_rate": 0.0001264721118977724, "loss": 0.0017, "step": 13850 }, { "epoch": 0.38805001378415066, "grad_norm": 5.09375, "learning_rate": 0.00012618430898520694, "loss": 0.002, "step": 13900 }, { "epoch": 0.3894458771430865, "grad_norm": 6.21875, "learning_rate": 0.00012589650607264146, "loss": 0.0015, "step": 13950 }, { "epoch": 0.39084174050202225, "grad_norm": 7.78125, "learning_rate": 0.000125608703160076, "loss": 0.0019, "step": 14000 }, { "epoch": 0.3922376038609581, "grad_norm": 8.6875, "learning_rate": 0.00012532090024751052, "loss": 0.0019, "step": 14050 }, { "epoch": 0.39363346721989384, "grad_norm": 8.125, "learning_rate": 0.00012503309733494504, "loss": 0.0018, "step": 14100 }, { "epoch": 0.39502933057882966, "grad_norm": 10.375, "learning_rate": 0.00012474529442237957, "loss": 0.002, "step": 14150 }, { "epoch": 0.39642519393776543, "grad_norm": 3.5, "learning_rate": 0.0001244574915098141, "loss": 0.0018, "step": 14200 }, { "epoch": 0.39782105729670125, "grad_norm": 5.0625, "learning_rate": 0.0001241696885972486, "loss": 0.0023, "step": 14250 }, { "epoch": 0.399216920655637, "grad_norm": 7.15625, "learning_rate": 0.00012388188568468312, "loss": 0.0019, "step": 14300 }, { "epoch": 0.4006127840145728, "grad_norm": 2.65625, "learning_rate": 0.00012359408277211765, "loss": 0.003, "step": 14350 }, { "epoch": 0.4020086473735086, "grad_norm": 4.3125, "learning_rate": 0.0001233062798595522, "loss": 0.0024, "step": 14400 }, { "epoch": 0.4034045107324444, "grad_norm": 4.96875, "learning_rate": 0.00012301847694698673, "loss": 0.0032, "step": 14450 }, { "epoch": 0.4048003740913802, "grad_norm": 7.5625, "learning_rate": 0.00012273067403442125, "loss": 0.001, "step": 14500 }, { "epoch": 0.40619623745031597, "grad_norm": 7.1875, "learning_rate": 0.00012244287112185575, "loss": 0.0017, "step": 14550 }, { "epoch": 0.4075921008092518, "grad_norm": 7.875, "learning_rate": 0.00012215506820929028, "loss": 0.0017, "step": 14600 }, { "epoch": 0.40898796416818756, "grad_norm": 9.75, "learning_rate": 0.0001218672652967248, "loss": 0.0018, "step": 14650 }, { "epoch": 0.4103838275271234, "grad_norm": 6.125, "learning_rate": 0.00012157946238415933, "loss": 0.0016, "step": 14700 }, { "epoch": 0.41177969088605915, "grad_norm": 11.5625, "learning_rate": 0.00012129165947159387, "loss": 0.0018, "step": 14750 }, { "epoch": 0.41317555424499497, "grad_norm": 3.375, "learning_rate": 0.00012100385655902837, "loss": 0.0017, "step": 14800 }, { "epoch": 0.41457141760393074, "grad_norm": 4.3125, "learning_rate": 0.0001207160536464629, "loss": 0.0019, "step": 14850 }, { "epoch": 0.41596728096286656, "grad_norm": 6.96875, "learning_rate": 0.00012042825073389742, "loss": 0.0021, "step": 14900 }, { "epoch": 0.4173631443218023, "grad_norm": 4.34375, "learning_rate": 0.00012014044782133196, "loss": 0.0005, "step": 14950 }, { "epoch": 0.41875900768073815, "grad_norm": 3.890625, "learning_rate": 0.00011985264490876649, "loss": 0.0004, "step": 15000 }, { "epoch": 0.41875900768073815, "eval_loss": 2.5809065846260637e-05, "eval_mae": 0.004102489911019802, "eval_rmse": 0.005080262199044228, "eval_runtime": 313.9897, "eval_samples_per_second": 6.37, "eval_steps_per_second": 6.37, "step": 15000 }, { "epoch": 0.4201548710396739, "grad_norm": 6.03125, "learning_rate": 0.00011956484199620102, "loss": 0.0015, "step": 15050 }, { "epoch": 0.42155073439860974, "grad_norm": 10.0625, "learning_rate": 0.00011927703908363553, "loss": 0.0017, "step": 15100 }, { "epoch": 0.4229465977575455, "grad_norm": 4.03125, "learning_rate": 0.00011898923617107005, "loss": 0.0017, "step": 15150 }, { "epoch": 0.42434246111648133, "grad_norm": 6.21875, "learning_rate": 0.00011870143325850458, "loss": 0.0017, "step": 15200 }, { "epoch": 0.4257383244754171, "grad_norm": 0.8984375, "learning_rate": 0.00011841363034593911, "loss": 0.0016, "step": 15250 }, { "epoch": 0.4271341878343529, "grad_norm": 8.8125, "learning_rate": 0.00011812582743337363, "loss": 0.0015, "step": 15300 }, { "epoch": 0.4285300511932887, "grad_norm": 4.4375, "learning_rate": 0.00011783802452080816, "loss": 0.0016, "step": 15350 }, { "epoch": 0.4299259145522245, "grad_norm": 10.125, "learning_rate": 0.00011755022160824267, "loss": 0.0016, "step": 15400 }, { "epoch": 0.4313217779111603, "grad_norm": 7.5625, "learning_rate": 0.0001172624186956772, "loss": 0.0017, "step": 15450 }, { "epoch": 0.43271764127009604, "grad_norm": 6.46875, "learning_rate": 0.00011697461578311173, "loss": 0.0015, "step": 15500 }, { "epoch": 0.43411350462903187, "grad_norm": 2.421875, "learning_rate": 0.00011668681287054625, "loss": 0.0016, "step": 15550 }, { "epoch": 0.43550936798796763, "grad_norm": 5.75, "learning_rate": 0.00011639900995798079, "loss": 0.0016, "step": 15600 }, { "epoch": 0.43690523134690346, "grad_norm": 5.28125, "learning_rate": 0.00011611120704541532, "loss": 0.0021, "step": 15650 }, { "epoch": 0.4383010947058392, "grad_norm": 1.7734375, "learning_rate": 0.00011582340413284982, "loss": 0.0009, "step": 15700 }, { "epoch": 0.43969695806477505, "grad_norm": 0.2734375, "learning_rate": 0.00011553560122028436, "loss": 0.0003, "step": 15750 }, { "epoch": 0.4410928214237108, "grad_norm": 12.5625, "learning_rate": 0.00011524779830771888, "loss": 0.0006, "step": 15800 }, { "epoch": 0.44248868478264664, "grad_norm": 4.625, "learning_rate": 0.00011495999539515341, "loss": 0.001, "step": 15850 }, { "epoch": 0.4438845481415824, "grad_norm": 15.5, "learning_rate": 0.00011467219248258794, "loss": 0.0027, "step": 15900 }, { "epoch": 0.4452804115005182, "grad_norm": 13.125, "learning_rate": 0.00011438438957002245, "loss": 0.0032, "step": 15950 }, { "epoch": 0.446676274859454, "grad_norm": 14.6875, "learning_rate": 0.00011409658665745697, "loss": 0.0033, "step": 16000 }, { "epoch": 0.4480721382183898, "grad_norm": 15.1875, "learning_rate": 0.0001138087837448915, "loss": 0.0032, "step": 16050 }, { "epoch": 0.4494680015773256, "grad_norm": 10.4375, "learning_rate": 0.00011352098083232603, "loss": 0.0033, "step": 16100 }, { "epoch": 0.4508638649362614, "grad_norm": 14.5, "learning_rate": 0.00011323317791976055, "loss": 0.0032, "step": 16150 }, { "epoch": 0.45225972829519717, "grad_norm": 11.625, "learning_rate": 0.00011294537500719508, "loss": 0.0032, "step": 16200 }, { "epoch": 0.453655591654133, "grad_norm": 12.5625, "learning_rate": 0.00011265757209462959, "loss": 0.0032, "step": 16250 }, { "epoch": 0.453655591654133, "eval_loss": 0.0005641469615511596, "eval_mae": 0.023370979353785515, "eval_rmse": 0.02375177852809429, "eval_runtime": 313.9972, "eval_samples_per_second": 6.369, "eval_steps_per_second": 6.369, "step": 16250 }, { "epoch": 0.45505145501306876, "grad_norm": 13.0625, "learning_rate": 0.00011236976918206412, "loss": 0.0032, "step": 16300 }, { "epoch": 0.4564473183720046, "grad_norm": 9.875, "learning_rate": 0.00011208196626949865, "loss": 0.0031, "step": 16350 }, { "epoch": 0.45784318173094035, "grad_norm": 10.0625, "learning_rate": 0.00011179416335693319, "loss": 0.0031, "step": 16400 }, { "epoch": 0.4592390450898762, "grad_norm": 11.25, "learning_rate": 0.00011150636044436771, "loss": 0.0031, "step": 16450 }, { "epoch": 0.46063490844881194, "grad_norm": 11.6875, "learning_rate": 0.00011121855753180224, "loss": 0.003, "step": 16500 }, { "epoch": 0.46203077180774776, "grad_norm": 12.25, "learning_rate": 0.00011093075461923674, "loss": 0.003, "step": 16550 }, { "epoch": 0.46342663516668353, "grad_norm": 11.5, "learning_rate": 0.00011064295170667128, "loss": 0.003, "step": 16600 }, { "epoch": 0.46482249852561935, "grad_norm": 14.75, "learning_rate": 0.0001103551487941058, "loss": 0.003, "step": 16650 }, { "epoch": 0.4662183618845551, "grad_norm": 13.875, "learning_rate": 0.00011006734588154033, "loss": 0.0029, "step": 16700 }, { "epoch": 0.4676142252434909, "grad_norm": 13.6875, "learning_rate": 0.00010977954296897486, "loss": 0.0031, "step": 16750 }, { "epoch": 0.4690100886024267, "grad_norm": 3.796875, "learning_rate": 0.00010949174005640938, "loss": 0.0019, "step": 16800 }, { "epoch": 0.4704059519613625, "grad_norm": 5.59375, "learning_rate": 0.0001092039371438439, "loss": 0.0012, "step": 16850 }, { "epoch": 0.4718018153202983, "grad_norm": 2.921875, "learning_rate": 0.00010891613423127842, "loss": 0.0012, "step": 16900 }, { "epoch": 0.47319767867923407, "grad_norm": 8.6875, "learning_rate": 0.00010862833131871295, "loss": 0.0012, "step": 16950 }, { "epoch": 0.4745935420381699, "grad_norm": 3.96875, "learning_rate": 0.00010834052840614747, "loss": 0.0012, "step": 17000 }, { "epoch": 0.47598940539710566, "grad_norm": 10.1875, "learning_rate": 0.00010805272549358201, "loss": 0.0012, "step": 17050 }, { "epoch": 0.4773852687560415, "grad_norm": 0.12255859375, "learning_rate": 0.00010776492258101651, "loss": 0.0015, "step": 17100 }, { "epoch": 0.47878113211497725, "grad_norm": 6.1875, "learning_rate": 0.00010747711966845104, "loss": 0.0015, "step": 17150 }, { "epoch": 0.48017699547391307, "grad_norm": 3.1875, "learning_rate": 0.00010718931675588557, "loss": 0.0012, "step": 17200 }, { "epoch": 0.48157285883284884, "grad_norm": 6.09375, "learning_rate": 0.0001069015138433201, "loss": 0.001, "step": 17250 }, { "epoch": 0.48296872219178466, "grad_norm": 11.875, "learning_rate": 0.00010661371093075463, "loss": 0.0018, "step": 17300 }, { "epoch": 0.48436458555072043, "grad_norm": 14.9375, "learning_rate": 0.00010632590801818916, "loss": 0.0029, "step": 17350 }, { "epoch": 0.48576044890965625, "grad_norm": 15.75, "learning_rate": 0.00010603810510562366, "loss": 0.0027, "step": 17400 }, { "epoch": 0.487156312268592, "grad_norm": 2.328125, "learning_rate": 0.0001057503021930582, "loss": 0.002, "step": 17450 }, { "epoch": 0.48855217562752784, "grad_norm": 4.875, "learning_rate": 0.00010546249928049272, "loss": 0.0018, "step": 17500 }, { "epoch": 0.48855217562752784, "eval_loss": 8.424516272498295e-05, "eval_mae": 0.00828312523663044, "eval_rmse": 0.009178516454994678, "eval_runtime": 314.273, "eval_samples_per_second": 6.364, "eval_steps_per_second": 6.364, "step": 17500 }, { "epoch": 0.4899480389864636, "grad_norm": 5.65625, "learning_rate": 0.00010517469636792725, "loss": 0.0016, "step": 17550 }, { "epoch": 0.49134390234539943, "grad_norm": 3.375, "learning_rate": 0.00010488689345536178, "loss": 0.0016, "step": 17600 }, { "epoch": 0.4927397657043352, "grad_norm": 9.25, "learning_rate": 0.0001045990905427963, "loss": 0.0006, "step": 17650 }, { "epoch": 0.494135629063271, "grad_norm": 2.6875, "learning_rate": 0.00010431128763023082, "loss": 0.0005, "step": 17700 }, { "epoch": 0.4955314924222068, "grad_norm": 1.9609375, "learning_rate": 0.00010402348471766534, "loss": 0.0015, "step": 17750 }, { "epoch": 0.4969273557811426, "grad_norm": 2.390625, "learning_rate": 0.00010373568180509987, "loss": 0.0013, "step": 17800 }, { "epoch": 0.4983232191400784, "grad_norm": 0.5078125, "learning_rate": 0.0001034478788925344, "loss": 0.0012, "step": 17850 }, { "epoch": 0.49971908249901414, "grad_norm": 13.0625, "learning_rate": 0.00010316007597996893, "loss": 0.0011, "step": 17900 }, { "epoch": 0.5011149458579499, "grad_norm": 1.1953125, "learning_rate": 0.00010287227306740346, "loss": 0.0006, "step": 17950 }, { "epoch": 0.5025108092168857, "grad_norm": 3.578125, "learning_rate": 0.00010258447015483796, "loss": 0.0003, "step": 18000 }, { "epoch": 0.5039066725758216, "grad_norm": 0.1259765625, "learning_rate": 0.00010229666724227249, "loss": 0.0003, "step": 18050 }, { "epoch": 0.5053025359347574, "grad_norm": 5.34375, "learning_rate": 0.00010200886432970703, "loss": 0.0009, "step": 18100 }, { "epoch": 0.5066983992936931, "grad_norm": 0.134765625, "learning_rate": 0.00010172106141714155, "loss": 0.0023, "step": 18150 }, { "epoch": 0.5080942626526289, "grad_norm": 0.703125, "learning_rate": 0.00010143325850457608, "loss": 0.001, "step": 18200 }, { "epoch": 0.5094901260115647, "grad_norm": 1.6015625, "learning_rate": 0.00010114545559201059, "loss": 0.0023, "step": 18250 }, { "epoch": 0.5108859893705006, "grad_norm": 7.0, "learning_rate": 0.00010085765267944512, "loss": 0.001, "step": 18300 }, { "epoch": 0.5122818527294363, "grad_norm": 0.6328125, "learning_rate": 0.00010056984976687964, "loss": 0.0006, "step": 18350 }, { "epoch": 0.5136777160883721, "grad_norm": 3.53125, "learning_rate": 0.00010028204685431417, "loss": 0.0002, "step": 18400 }, { "epoch": 0.5150735794473079, "grad_norm": 0.1396484375, "learning_rate": 9.99942439417487e-05, "loss": 0.0003, "step": 18450 }, { "epoch": 0.5164694428062437, "grad_norm": 1.3203125, "learning_rate": 9.970644102918322e-05, "loss": 0.0003, "step": 18500 }, { "epoch": 0.5178653061651795, "grad_norm": 1.7890625, "learning_rate": 9.941863811661775e-05, "loss": 0.0005, "step": 18550 }, { "epoch": 0.5192611695241153, "grad_norm": 0.66015625, "learning_rate": 9.913083520405228e-05, "loss": 0.0009, "step": 18600 }, { "epoch": 0.5206570328830511, "grad_norm": 0.08349609375, "learning_rate": 9.884303229148679e-05, "loss": 0.0002, "step": 18650 }, { "epoch": 0.5220528962419869, "grad_norm": 4.125, "learning_rate": 9.855522937892131e-05, "loss": 0.0007, "step": 18700 }, { "epoch": 0.5234487596009226, "grad_norm": 5.53125, "learning_rate": 9.826742646635585e-05, "loss": 0.0008, "step": 18750 }, { "epoch": 0.5234487596009226, "eval_loss": 0.00011440851085353643, "eval_mae": 0.010100271552801132, "eval_rmse": 0.01069619134068489, "eval_runtime": 318.3188, "eval_samples_per_second": 6.283, "eval_steps_per_second": 6.283, "step": 18750 }, { "epoch": 0.5248446229598585, "grad_norm": 1.0625, "learning_rate": 9.797962355379037e-05, "loss": 0.0008, "step": 18800 }, { "epoch": 0.5262404863187943, "grad_norm": 0.8984375, "learning_rate": 9.76918206412249e-05, "loss": 0.0007, "step": 18850 }, { "epoch": 0.5276363496777301, "grad_norm": 5.6875, "learning_rate": 9.740401772865942e-05, "loss": 0.0009, "step": 18900 }, { "epoch": 0.5290322130366658, "grad_norm": 8.25, "learning_rate": 9.711621481609395e-05, "loss": 0.0005, "step": 18950 }, { "epoch": 0.5304280763956016, "grad_norm": 2.21875, "learning_rate": 9.682841190352847e-05, "loss": 0.0007, "step": 19000 }, { "epoch": 0.5318239397545375, "grad_norm": 0.73046875, "learning_rate": 9.654060899096299e-05, "loss": 0.0003, "step": 19050 }, { "epoch": 0.5332198031134732, "grad_norm": 5.90625, "learning_rate": 9.625280607839751e-05, "loss": 0.0007, "step": 19100 }, { "epoch": 0.534615666472409, "grad_norm": 1.5859375, "learning_rate": 9.596500316583205e-05, "loss": 0.0006, "step": 19150 }, { "epoch": 0.5360115298313448, "grad_norm": 0.546875, "learning_rate": 9.567720025326656e-05, "loss": 0.0004, "step": 19200 }, { "epoch": 0.5374073931902806, "grad_norm": 2.9375, "learning_rate": 9.538939734070109e-05, "loss": 0.001, "step": 19250 }, { "epoch": 0.5388032565492163, "grad_norm": 9.6875, "learning_rate": 9.510159442813562e-05, "loss": 0.0012, "step": 19300 }, { "epoch": 0.5401991199081522, "grad_norm": 2.328125, "learning_rate": 9.481379151557014e-05, "loss": 0.0005, "step": 19350 }, { "epoch": 0.541594983267088, "grad_norm": 0.038330078125, "learning_rate": 9.452598860300467e-05, "loss": 0.0002, "step": 19400 }, { "epoch": 0.5429908466260238, "grad_norm": 4.71875, "learning_rate": 9.42381856904392e-05, "loss": 0.0011, "step": 19450 }, { "epoch": 0.5443867099849595, "grad_norm": 0.34765625, "learning_rate": 9.395038277787371e-05, "loss": 0.0003, "step": 19500 }, { "epoch": 0.5457825733438953, "grad_norm": 6.46875, "learning_rate": 9.366257986530825e-05, "loss": 0.0011, "step": 19550 }, { "epoch": 0.5471784367028312, "grad_norm": 0.2451171875, "learning_rate": 9.337477695274277e-05, "loss": 0.0018, "step": 19600 }, { "epoch": 0.548574300061767, "grad_norm": 3.90625, "learning_rate": 9.308697404017729e-05, "loss": 0.0008, "step": 19650 }, { "epoch": 0.5499701634207027, "grad_norm": 8.75, "learning_rate": 9.279917112761181e-05, "loss": 0.0008, "step": 19700 }, { "epoch": 0.5513660267796385, "grad_norm": 9.0, "learning_rate": 9.251136821504634e-05, "loss": 0.0019, "step": 19750 }, { "epoch": 0.5527618901385744, "grad_norm": 8.75, "learning_rate": 9.222356530248087e-05, "loss": 0.0019, "step": 19800 }, { "epoch": 0.5541577534975102, "grad_norm": 9.1875, "learning_rate": 9.193576238991539e-05, "loss": 0.0014, "step": 19850 }, { "epoch": 0.5555536168564459, "grad_norm": 0.345703125, "learning_rate": 9.164795947734992e-05, "loss": 0.0018, "step": 19900 }, { "epoch": 0.5569494802153817, "grad_norm": 3.34375, "learning_rate": 9.136015656478443e-05, "loss": 0.0005, "step": 19950 }, { "epoch": 0.5583453435743175, "grad_norm": 1.15625, "learning_rate": 9.107235365221897e-05, "loss": 0.0007, "step": 20000 }, { "epoch": 0.5583453435743175, "eval_loss": 3.620574716478586e-05, "eval_mae": 0.0052015818655490875, "eval_rmse": 0.006017121020704508, "eval_runtime": 318.2368, "eval_samples_per_second": 6.285, "eval_steps_per_second": 6.285, "step": 20000 }, { "epoch": 0.5597412069332534, "grad_norm": 3.375, "learning_rate": 9.078455073965348e-05, "loss": 0.0002, "step": 20050 }, { "epoch": 0.5611370702921891, "grad_norm": 3.6875, "learning_rate": 9.049674782708801e-05, "loss": 0.0003, "step": 20100 }, { "epoch": 0.5625329336511249, "grad_norm": 3.640625, "learning_rate": 9.020894491452254e-05, "loss": 0.0006, "step": 20150 }, { "epoch": 0.5639287970100607, "grad_norm": 2.34375, "learning_rate": 8.992114200195706e-05, "loss": 0.0003, "step": 20200 }, { "epoch": 0.5653246603689965, "grad_norm": 2.171875, "learning_rate": 8.963333908939159e-05, "loss": 0.0004, "step": 20250 }, { "epoch": 0.5667205237279322, "grad_norm": 2.078125, "learning_rate": 8.934553617682612e-05, "loss": 0.0003, "step": 20300 }, { "epoch": 0.5681163870868681, "grad_norm": 2.203125, "learning_rate": 8.905773326426063e-05, "loss": 0.0007, "step": 20350 }, { "epoch": 0.5695122504458039, "grad_norm": 11.9375, "learning_rate": 8.876993035169517e-05, "loss": 0.0008, "step": 20400 }, { "epoch": 0.5709081138047396, "grad_norm": 0.9296875, "learning_rate": 8.84821274391297e-05, "loss": 0.001, "step": 20450 }, { "epoch": 0.5723039771636754, "grad_norm": 7.78125, "learning_rate": 8.819432452656421e-05, "loss": 0.0007, "step": 20500 }, { "epoch": 0.5736998405226112, "grad_norm": 2.40625, "learning_rate": 8.790652161399873e-05, "loss": 0.0003, "step": 20550 }, { "epoch": 0.5750957038815471, "grad_norm": 0.8359375, "learning_rate": 8.761871870143326e-05, "loss": 0.0004, "step": 20600 }, { "epoch": 0.5764915672404828, "grad_norm": 3.40625, "learning_rate": 8.733091578886779e-05, "loss": 0.0005, "step": 20650 }, { "epoch": 0.5778874305994186, "grad_norm": 7.53125, "learning_rate": 8.704311287630231e-05, "loss": 0.0012, "step": 20700 }, { "epoch": 0.5792832939583544, "grad_norm": 2.953125, "learning_rate": 8.675530996373684e-05, "loss": 0.0014, "step": 20750 }, { "epoch": 0.5806791573172903, "grad_norm": 0.2060546875, "learning_rate": 8.646750705117137e-05, "loss": 0.0003, "step": 20800 }, { "epoch": 0.582075020676226, "grad_norm": 5.875, "learning_rate": 8.617970413860589e-05, "loss": 0.0004, "step": 20850 }, { "epoch": 0.5834708840351618, "grad_norm": 2.703125, "learning_rate": 8.589190122604042e-05, "loss": 0.0004, "step": 20900 }, { "epoch": 0.5848667473940976, "grad_norm": 5.34375, "learning_rate": 8.560409831347493e-05, "loss": 0.0004, "step": 20950 }, { "epoch": 0.5862626107530334, "grad_norm": 0.6171875, "learning_rate": 8.531629540090946e-05, "loss": 0.0002, "step": 21000 }, { "epoch": 0.5876584741119691, "grad_norm": 3.703125, "learning_rate": 8.5028492488344e-05, "loss": 0.0003, "step": 21050 }, { "epoch": 0.589054337470905, "grad_norm": 1.890625, "learning_rate": 8.474068957577851e-05, "loss": 0.0002, "step": 21100 }, { "epoch": 0.5904502008298408, "grad_norm": 0.6640625, "learning_rate": 8.445288666321304e-05, "loss": 0.0003, "step": 21150 }, { "epoch": 0.5918460641887766, "grad_norm": 0.031005859375, "learning_rate": 8.416508375064755e-05, "loss": 0.0004, "step": 21200 }, { "epoch": 0.5932419275477123, "grad_norm": 7.21875, "learning_rate": 8.387728083808209e-05, "loss": 0.0005, "step": 21250 }, { "epoch": 0.5932419275477123, "eval_loss": 0.00024837159435264766, "eval_mae": 0.015309196896851063, "eval_rmse": 0.01575980894267559, "eval_runtime": 314.8323, "eval_samples_per_second": 6.353, "eval_steps_per_second": 6.353, "step": 21250 }, { "epoch": 0.5946377909066481, "grad_norm": 0.150390625, "learning_rate": 8.358947792551661e-05, "loss": 0.0006, "step": 21300 }, { "epoch": 0.596033654265584, "grad_norm": 0.287109375, "learning_rate": 8.330167501295113e-05, "loss": 0.0002, "step": 21350 }, { "epoch": 0.5974295176245198, "grad_norm": 4.40625, "learning_rate": 8.301387210038565e-05, "loss": 0.0013, "step": 21400 }, { "epoch": 0.5988253809834555, "grad_norm": 1.8515625, "learning_rate": 8.27260691878202e-05, "loss": 0.0003, "step": 21450 }, { "epoch": 0.6002212443423913, "grad_norm": 7.625, "learning_rate": 8.24382662752547e-05, "loss": 0.0003, "step": 21500 }, { "epoch": 0.6016171077013271, "grad_norm": 0.90625, "learning_rate": 8.215046336268923e-05, "loss": 0.0005, "step": 21550 }, { "epoch": 0.6030129710602629, "grad_norm": 2.0625, "learning_rate": 8.186266045012376e-05, "loss": 0.0006, "step": 21600 }, { "epoch": 0.6044088344191987, "grad_norm": 0.0751953125, "learning_rate": 8.157485753755829e-05, "loss": 0.0002, "step": 21650 }, { "epoch": 0.6058046977781345, "grad_norm": 4.6875, "learning_rate": 8.128705462499281e-05, "loss": 0.0002, "step": 21700 }, { "epoch": 0.6072005611370703, "grad_norm": 0.392578125, "learning_rate": 8.099925171242734e-05, "loss": 0.0005, "step": 21750 }, { "epoch": 0.608596424496006, "grad_norm": 0.609375, "learning_rate": 8.071144879986185e-05, "loss": 0.0003, "step": 21800 }, { "epoch": 0.6099922878549419, "grad_norm": 0.71484375, "learning_rate": 8.042364588729638e-05, "loss": 0.0002, "step": 21850 }, { "epoch": 0.6113881512138777, "grad_norm": 2.296875, "learning_rate": 8.013584297473092e-05, "loss": 0.0002, "step": 21900 }, { "epoch": 0.6127840145728135, "grad_norm": 1.0234375, "learning_rate": 7.984804006216543e-05, "loss": 0.0002, "step": 21950 }, { "epoch": 0.6141798779317492, "grad_norm": 3.59375, "learning_rate": 7.956023714959996e-05, "loss": 0.0006, "step": 22000 }, { "epoch": 0.615575741290685, "grad_norm": 0.345703125, "learning_rate": 7.927243423703448e-05, "loss": 0.0005, "step": 22050 }, { "epoch": 0.6169716046496209, "grad_norm": 0.921875, "learning_rate": 7.898463132446901e-05, "loss": 0.0002, "step": 22100 }, { "epoch": 0.6183674680085567, "grad_norm": 3.40625, "learning_rate": 7.869682841190354e-05, "loss": 0.0003, "step": 22150 }, { "epoch": 0.6197633313674924, "grad_norm": 2.953125, "learning_rate": 7.840902549933806e-05, "loss": 0.0003, "step": 22200 }, { "epoch": 0.6211591947264282, "grad_norm": 2.484375, "learning_rate": 7.812122258677257e-05, "loss": 0.0005, "step": 22250 }, { "epoch": 0.622555058085364, "grad_norm": 2.15625, "learning_rate": 7.783341967420711e-05, "loss": 0.0002, "step": 22300 }, { "epoch": 0.6239509214442999, "grad_norm": 0.734375, "learning_rate": 7.754561676164163e-05, "loss": 0.0002, "step": 22350 }, { "epoch": 0.6253467848032356, "grad_norm": 0.0380859375, "learning_rate": 7.725781384907615e-05, "loss": 0.0002, "step": 22400 }, { "epoch": 0.6267426481621714, "grad_norm": 14.0625, "learning_rate": 7.697001093651068e-05, "loss": 0.0017, "step": 22450 }, { "epoch": 0.6281385115211072, "grad_norm": 8.625, "learning_rate": 7.66822080239452e-05, "loss": 0.0023, "step": 22500 }, { "epoch": 0.6281385115211072, "eval_loss": 6.291436875471845e-05, "eval_mae": 0.00715098949149251, "eval_rmse": 0.00793185830116272, "eval_runtime": 315.2915, "eval_samples_per_second": 6.343, "eval_steps_per_second": 6.343, "step": 22500 }, { "epoch": 0.629534374880043, "grad_norm": 9.0, "learning_rate": 7.639440511137973e-05, "loss": 0.0021, "step": 22550 }, { "epoch": 0.6309302382389788, "grad_norm": 10.875, "learning_rate": 7.610660219881426e-05, "loss": 0.0022, "step": 22600 }, { "epoch": 0.6323261015979146, "grad_norm": 1.9453125, "learning_rate": 7.581879928624877e-05, "loss": 0.002, "step": 22650 }, { "epoch": 0.6337219649568504, "grad_norm": 1.15625, "learning_rate": 7.553099637368331e-05, "loss": 0.0006, "step": 22700 }, { "epoch": 0.6351178283157861, "grad_norm": 2.40625, "learning_rate": 7.524319346111784e-05, "loss": 0.0002, "step": 22750 }, { "epoch": 0.6365136916747219, "grad_norm": 1.203125, "learning_rate": 7.495539054855235e-05, "loss": 0.0003, "step": 22800 }, { "epoch": 0.6379095550336578, "grad_norm": 3.46875, "learning_rate": 7.466758763598688e-05, "loss": 0.0003, "step": 22850 }, { "epoch": 0.6393054183925936, "grad_norm": 0.484375, "learning_rate": 7.43797847234214e-05, "loss": 0.0003, "step": 22900 }, { "epoch": 0.6407012817515293, "grad_norm": 8.0625, "learning_rate": 7.409198181085593e-05, "loss": 0.0005, "step": 22950 }, { "epoch": 0.6420971451104651, "grad_norm": 0.158203125, "learning_rate": 7.380417889829046e-05, "loss": 0.0003, "step": 23000 }, { "epoch": 0.6434930084694009, "grad_norm": 0.1728515625, "learning_rate": 7.351637598572498e-05, "loss": 0.0002, "step": 23050 }, { "epoch": 0.6448888718283368, "grad_norm": 0.4765625, "learning_rate": 7.32285730731595e-05, "loss": 0.0002, "step": 23100 }, { "epoch": 0.6462847351872725, "grad_norm": 4.28125, "learning_rate": 7.294077016059403e-05, "loss": 0.0002, "step": 23150 }, { "epoch": 0.6476805985462083, "grad_norm": 0.0625, "learning_rate": 7.265296724802856e-05, "loss": 0.0009, "step": 23200 }, { "epoch": 0.6490764619051441, "grad_norm": 0.361328125, "learning_rate": 7.236516433546307e-05, "loss": 0.0002, "step": 23250 }, { "epoch": 0.6504723252640799, "grad_norm": 0.21875, "learning_rate": 7.20773614228976e-05, "loss": 0.0001, "step": 23300 }, { "epoch": 0.6518681886230157, "grad_norm": 4.5, "learning_rate": 7.178955851033214e-05, "loss": 0.0003, "step": 23350 }, { "epoch": 0.6532640519819515, "grad_norm": 2.90625, "learning_rate": 7.150175559776665e-05, "loss": 0.0003, "step": 23400 }, { "epoch": 0.6546599153408873, "grad_norm": 0.1669921875, "learning_rate": 7.121395268520118e-05, "loss": 0.0002, "step": 23450 }, { "epoch": 0.6560557786998231, "grad_norm": 3.78125, "learning_rate": 7.092614977263569e-05, "loss": 0.0002, "step": 23500 }, { "epoch": 0.6574516420587588, "grad_norm": 3.234375, "learning_rate": 7.063834686007023e-05, "loss": 0.0003, "step": 23550 }, { "epoch": 0.6588475054176947, "grad_norm": 2.6875, "learning_rate": 7.035054394750476e-05, "loss": 0.0002, "step": 23600 }, { "epoch": 0.6602433687766305, "grad_norm": 0.75, "learning_rate": 7.006274103493927e-05, "loss": 0.0003, "step": 23650 }, { "epoch": 0.6616392321355663, "grad_norm": 0.11865234375, "learning_rate": 6.97749381223738e-05, "loss": 0.0002, "step": 23700 }, { "epoch": 0.663035095494502, "grad_norm": 0.53515625, "learning_rate": 6.948713520980832e-05, "loss": 0.0002, "step": 23750 }, { "epoch": 0.663035095494502, "eval_loss": 1.540686389489565e-05, "eval_mae": 0.0031748104374855757, "eval_rmse": 0.0039251577109098434, "eval_runtime": 315.0516, "eval_samples_per_second": 6.348, "eval_steps_per_second": 6.348, "step": 23750 }, { "epoch": 0.6644309588534378, "grad_norm": 2.734375, "learning_rate": 6.919933229724285e-05, "loss": 0.0002, "step": 23800 }, { "epoch": 0.6658268222123737, "grad_norm": 2.3125, "learning_rate": 6.891152938467738e-05, "loss": 0.0001, "step": 23850 }, { "epoch": 0.6672226855713094, "grad_norm": 0.34765625, "learning_rate": 6.86237264721119e-05, "loss": 0.0001, "step": 23900 }, { "epoch": 0.6686185489302452, "grad_norm": 0.41796875, "learning_rate": 6.833592355954643e-05, "loss": 0.0002, "step": 23950 }, { "epoch": 0.670014412289181, "grad_norm": 4.78125, "learning_rate": 6.804812064698095e-05, "loss": 0.0002, "step": 24000 }, { "epoch": 0.6714102756481168, "grad_norm": 2.96875, "learning_rate": 6.776031773441548e-05, "loss": 0.0003, "step": 24050 }, { "epoch": 0.6728061390070526, "grad_norm": 0.7265625, "learning_rate": 6.747251482185e-05, "loss": 0.0002, "step": 24100 }, { "epoch": 0.6742020023659884, "grad_norm": 1.1328125, "learning_rate": 6.718471190928452e-05, "loss": 0.0002, "step": 24150 }, { "epoch": 0.6755978657249242, "grad_norm": 0.84765625, "learning_rate": 6.689690899671906e-05, "loss": 0.0003, "step": 24200 }, { "epoch": 0.67699372908386, "grad_norm": 0.2421875, "learning_rate": 6.660910608415357e-05, "loss": 0.0002, "step": 24250 }, { "epoch": 0.6783895924427957, "grad_norm": 4.40625, "learning_rate": 6.63213031715881e-05, "loss": 0.0003, "step": 24300 }, { "epoch": 0.6797854558017316, "grad_norm": 0.30078125, "learning_rate": 6.603350025902263e-05, "loss": 0.0002, "step": 24350 }, { "epoch": 0.6811813191606674, "grad_norm": 2.78125, "learning_rate": 6.574569734645715e-05, "loss": 0.0002, "step": 24400 }, { "epoch": 0.6825771825196032, "grad_norm": 0.984375, "learning_rate": 6.545789443389168e-05, "loss": 0.0002, "step": 24450 }, { "epoch": 0.6839730458785389, "grad_norm": 3.3125, "learning_rate": 6.51700915213262e-05, "loss": 0.0006, "step": 24500 }, { "epoch": 0.6853689092374747, "grad_norm": 1.5625, "learning_rate": 6.488228860876072e-05, "loss": 0.0001, "step": 24550 }, { "epoch": 0.6867647725964106, "grad_norm": 2.53125, "learning_rate": 6.459448569619526e-05, "loss": 0.0002, "step": 24600 }, { "epoch": 0.6881606359553464, "grad_norm": 2.65625, "learning_rate": 6.430668278362977e-05, "loss": 0.0002, "step": 24650 }, { "epoch": 0.6895564993142821, "grad_norm": 0.6328125, "learning_rate": 6.40188798710643e-05, "loss": 0.0002, "step": 24700 }, { "epoch": 0.6909523626732179, "grad_norm": 3.015625, "learning_rate": 6.373107695849882e-05, "loss": 0.0004, "step": 24750 }, { "epoch": 0.6923482260321537, "grad_norm": 3.390625, "learning_rate": 6.344327404593335e-05, "loss": 0.0003, "step": 24800 }, { "epoch": 0.6937440893910896, "grad_norm": 0.55078125, "learning_rate": 6.315547113336787e-05, "loss": 0.0003, "step": 24850 }, { "epoch": 0.6951399527500253, "grad_norm": 4.65625, "learning_rate": 6.28676682208024e-05, "loss": 0.0001, "step": 24900 }, { "epoch": 0.6965358161089611, "grad_norm": 3.4375, "learning_rate": 6.257986530823691e-05, "loss": 0.0003, "step": 24950 }, { "epoch": 0.6979316794678969, "grad_norm": 1.1953125, "learning_rate": 6.229206239567144e-05, "loss": 0.0003, "step": 25000 }, { "epoch": 0.6979316794678969, "eval_loss": 9.060095180757344e-06, "eval_mae": 0.002397725125774741, "eval_rmse": 0.003009999170899391, "eval_runtime": 314.8136, "eval_samples_per_second": 6.353, "eval_steps_per_second": 6.353, "step": 25000 }, { "epoch": 0.6993275428268326, "grad_norm": 1.2421875, "learning_rate": 6.200425948310598e-05, "loss": 0.0001, "step": 25050 }, { "epoch": 0.7007234061857684, "grad_norm": 3.671875, "learning_rate": 6.171645657054049e-05, "loss": 0.0002, "step": 25100 }, { "epoch": 0.7021192695447043, "grad_norm": 0.8046875, "learning_rate": 6.142865365797502e-05, "loss": 0.0002, "step": 25150 }, { "epoch": 0.7035151329036401, "grad_norm": 0.2890625, "learning_rate": 6.114085074540955e-05, "loss": 0.0001, "step": 25200 }, { "epoch": 0.7049109962625758, "grad_norm": 0.439453125, "learning_rate": 6.0853047832844065e-05, "loss": 0.0001, "step": 25250 }, { "epoch": 0.7063068596215116, "grad_norm": 1.2265625, "learning_rate": 6.05652449202786e-05, "loss": 0.0001, "step": 25300 }, { "epoch": 0.7077027229804475, "grad_norm": 3.21875, "learning_rate": 6.0277442007713124e-05, "loss": 0.0002, "step": 25350 }, { "epoch": 0.7090985863393833, "grad_norm": 0.1982421875, "learning_rate": 5.9989639095147644e-05, "loss": 0.0003, "step": 25400 }, { "epoch": 0.710494449698319, "grad_norm": 2.625, "learning_rate": 5.970183618258217e-05, "loss": 0.0003, "step": 25450 }, { "epoch": 0.7118903130572548, "grad_norm": 0.193359375, "learning_rate": 5.94140332700167e-05, "loss": 0.0002, "step": 25500 }, { "epoch": 0.7132861764161906, "grad_norm": 3.8125, "learning_rate": 5.9126230357451216e-05, "loss": 0.0002, "step": 25550 }, { "epoch": 0.7146820397751265, "grad_norm": 1.0390625, "learning_rate": 5.883842744488575e-05, "loss": 0.0003, "step": 25600 }, { "epoch": 0.7160779031340622, "grad_norm": 4.28125, "learning_rate": 5.8550624532320275e-05, "loss": 0.0004, "step": 25650 }, { "epoch": 0.717473766492998, "grad_norm": 1.1015625, "learning_rate": 5.8262821619754795e-05, "loss": 0.0002, "step": 25700 }, { "epoch": 0.7188696298519338, "grad_norm": 1.5390625, "learning_rate": 5.797501870718932e-05, "loss": 0.0003, "step": 25750 }, { "epoch": 0.7202654932108696, "grad_norm": 0.9765625, "learning_rate": 5.768721579462384e-05, "loss": 0.0002, "step": 25800 }, { "epoch": 0.7216613565698053, "grad_norm": 1.9140625, "learning_rate": 5.739941288205837e-05, "loss": 0.0001, "step": 25850 }, { "epoch": 0.7230572199287412, "grad_norm": 1.5234375, "learning_rate": 5.711160996949289e-05, "loss": 0.0002, "step": 25900 }, { "epoch": 0.724453083287677, "grad_norm": 2.234375, "learning_rate": 5.682380705692741e-05, "loss": 0.0002, "step": 25950 }, { "epoch": 0.7258489466466128, "grad_norm": 0.392578125, "learning_rate": 5.6536004144361946e-05, "loss": 0.0002, "step": 26000 }, { "epoch": 0.7272448100055485, "grad_norm": 1.546875, "learning_rate": 5.624820123179647e-05, "loss": 0.0002, "step": 26050 }, { "epoch": 0.7286406733644843, "grad_norm": 1.2265625, "learning_rate": 5.596039831923099e-05, "loss": 0.0001, "step": 26100 }, { "epoch": 0.7300365367234202, "grad_norm": 0.8125, "learning_rate": 5.567259540666552e-05, "loss": 0.0002, "step": 26150 }, { "epoch": 0.731432400082356, "grad_norm": 0.265625, "learning_rate": 5.5384792494100044e-05, "loss": 0.0001, "step": 26200 }, { "epoch": 0.7328282634412917, "grad_norm": 0.050537109375, "learning_rate": 5.5096989581534564e-05, "loss": 0.0001, "step": 26250 }, { "epoch": 0.7328282634412917, "eval_loss": 9.440889698453248e-06, "eval_mae": 0.0024416493251919746, "eval_rmse": 0.003072603140026331, "eval_runtime": 310.9978, "eval_samples_per_second": 6.431, "eval_steps_per_second": 6.431, "step": 26250 }, { "epoch": 0.7342241268002275, "grad_norm": 2.734375, "learning_rate": 5.480918666896909e-05, "loss": 0.0002, "step": 26300 }, { "epoch": 0.7356199901591634, "grad_norm": 1.5625, "learning_rate": 5.452138375640362e-05, "loss": 0.0002, "step": 26350 }, { "epoch": 0.7370158535180991, "grad_norm": 3.15625, "learning_rate": 5.4233580843838136e-05, "loss": 0.0001, "step": 26400 }, { "epoch": 0.7384117168770349, "grad_norm": 0.07080078125, "learning_rate": 5.394577793127267e-05, "loss": 0.0002, "step": 26450 }, { "epoch": 0.7398075802359707, "grad_norm": 0.035400390625, "learning_rate": 5.3657975018707195e-05, "loss": 0.0001, "step": 26500 }, { "epoch": 0.7412034435949065, "grad_norm": 0.0390625, "learning_rate": 5.3370172106141715e-05, "loss": 0.0001, "step": 26550 }, { "epoch": 0.7425993069538422, "grad_norm": 2.171875, "learning_rate": 5.308236919357624e-05, "loss": 0.0001, "step": 26600 }, { "epoch": 0.7439951703127781, "grad_norm": 0.796875, "learning_rate": 5.2794566281010774e-05, "loss": 0.0002, "step": 26650 }, { "epoch": 0.7453910336717139, "grad_norm": 2.65625, "learning_rate": 5.250676336844529e-05, "loss": 0.0002, "step": 26700 }, { "epoch": 0.7467868970306497, "grad_norm": 0.6171875, "learning_rate": 5.221896045587982e-05, "loss": 0.0002, "step": 26750 }, { "epoch": 0.7481827603895854, "grad_norm": 1.078125, "learning_rate": 5.1931157543314347e-05, "loss": 0.0001, "step": 26800 }, { "epoch": 0.7495786237485212, "grad_norm": 1.6875, "learning_rate": 5.1643354630748866e-05, "loss": 0.0001, "step": 26850 }, { "epoch": 0.7509744871074571, "grad_norm": 2.640625, "learning_rate": 5.135555171818339e-05, "loss": 0.0001, "step": 26900 }, { "epoch": 0.7523703504663929, "grad_norm": 0.028564453125, "learning_rate": 5.106774880561791e-05, "loss": 0.0002, "step": 26950 }, { "epoch": 0.7537662138253286, "grad_norm": 2.640625, "learning_rate": 5.077994589305244e-05, "loss": 0.0001, "step": 27000 }, { "epoch": 0.7551620771842644, "grad_norm": 0.2412109375, "learning_rate": 5.0492142980486965e-05, "loss": 0.0001, "step": 27050 }, { "epoch": 0.7565579405432002, "grad_norm": 0.0478515625, "learning_rate": 5.0204340067921484e-05, "loss": 0.0002, "step": 27100 }, { "epoch": 0.7579538039021361, "grad_norm": 0.25390625, "learning_rate": 4.991653715535601e-05, "loss": 0.0001, "step": 27150 }, { "epoch": 0.7593496672610718, "grad_norm": 1.703125, "learning_rate": 4.9628734242790544e-05, "loss": 0.0002, "step": 27200 }, { "epoch": 0.7607455306200076, "grad_norm": 1.7578125, "learning_rate": 4.934093133022506e-05, "loss": 0.0002, "step": 27250 }, { "epoch": 0.7621413939789434, "grad_norm": 0.79296875, "learning_rate": 4.905312841765959e-05, "loss": 0.0002, "step": 27300 }, { "epoch": 0.7635372573378792, "grad_norm": 0.369140625, "learning_rate": 4.876532550509411e-05, "loss": 0.0001, "step": 27350 }, { "epoch": 0.764933120696815, "grad_norm": 0.6875, "learning_rate": 4.847752259252864e-05, "loss": 0.0001, "step": 27400 }, { "epoch": 0.7663289840557508, "grad_norm": 0.53125, "learning_rate": 4.818971967996316e-05, "loss": 0.0001, "step": 27450 }, { "epoch": 0.7677248474146866, "grad_norm": 0.240234375, "learning_rate": 4.790191676739769e-05, "loss": 0.0001, "step": 27500 }, { "epoch": 0.7677248474146866, "eval_loss": 1.0368624316470232e-05, "eval_mae": 0.0025949301198124886, "eval_rmse": 0.0032200347632169724, "eval_runtime": 317.1945, "eval_samples_per_second": 6.305, "eval_steps_per_second": 6.305, "step": 27500 }, { "epoch": 0.7691207107736223, "grad_norm": 0.5234375, "learning_rate": 4.7614113854832214e-05, "loss": 0.0001, "step": 27550 }, { "epoch": 0.7705165741325581, "grad_norm": 1.4765625, "learning_rate": 4.732631094226674e-05, "loss": 0.0002, "step": 27600 }, { "epoch": 0.771912437491494, "grad_norm": 0.455078125, "learning_rate": 4.703850802970126e-05, "loss": 0.0002, "step": 27650 }, { "epoch": 0.7733083008504298, "grad_norm": 0.5390625, "learning_rate": 4.675070511713579e-05, "loss": 0.0001, "step": 27700 }, { "epoch": 0.7747041642093655, "grad_norm": 0.96484375, "learning_rate": 4.646290220457031e-05, "loss": 0.0002, "step": 27750 }, { "epoch": 0.7761000275683013, "grad_norm": 0.73046875, "learning_rate": 4.617509929200484e-05, "loss": 0.0001, "step": 27800 }, { "epoch": 0.7774958909272371, "grad_norm": 0.1923828125, "learning_rate": 4.588729637943936e-05, "loss": 0.0001, "step": 27850 }, { "epoch": 0.778891754286173, "grad_norm": 1.25, "learning_rate": 4.559949346687389e-05, "loss": 0.0001, "step": 27900 }, { "epoch": 0.7802876176451087, "grad_norm": 3.453125, "learning_rate": 4.531169055430841e-05, "loss": 0.0001, "step": 27950 }, { "epoch": 0.7816834810040445, "grad_norm": 2.296875, "learning_rate": 4.502388764174294e-05, "loss": 0.0002, "step": 28000 }, { "epoch": 0.7830793443629803, "grad_norm": 3.109375, "learning_rate": 4.4736084729177464e-05, "loss": 0.0001, "step": 28050 }, { "epoch": 0.7844752077219161, "grad_norm": 1.640625, "learning_rate": 4.444828181661198e-05, "loss": 0.0001, "step": 28100 }, { "epoch": 0.7858710710808519, "grad_norm": 0.2001953125, "learning_rate": 4.416047890404651e-05, "loss": 0.0002, "step": 28150 }, { "epoch": 0.7872669344397877, "grad_norm": 2.453125, "learning_rate": 4.3872675991481036e-05, "loss": 0.0002, "step": 28200 }, { "epoch": 0.7886627977987235, "grad_norm": 0.69921875, "learning_rate": 4.358487307891556e-05, "loss": 0.0001, "step": 28250 }, { "epoch": 0.7900586611576593, "grad_norm": 0.734375, "learning_rate": 4.329707016635008e-05, "loss": 0.0001, "step": 28300 }, { "epoch": 0.791454524516595, "grad_norm": 0.02294921875, "learning_rate": 4.3009267253784615e-05, "loss": 0.0001, "step": 28350 }, { "epoch": 0.7928503878755309, "grad_norm": 0.84765625, "learning_rate": 4.2721464341219134e-05, "loss": 0.0001, "step": 28400 }, { "epoch": 0.7942462512344667, "grad_norm": 1.3046875, "learning_rate": 4.243366142865366e-05, "loss": 0.0001, "step": 28450 }, { "epoch": 0.7956421145934025, "grad_norm": 0.625, "learning_rate": 4.214585851608818e-05, "loss": 0.0001, "step": 28500 }, { "epoch": 0.7970379779523382, "grad_norm": 0.19140625, "learning_rate": 4.185805560352271e-05, "loss": 0.0001, "step": 28550 }, { "epoch": 0.798433841311274, "grad_norm": 0.66796875, "learning_rate": 4.157025269095723e-05, "loss": 0.0001, "step": 28600 }, { "epoch": 0.7998297046702099, "grad_norm": 0.390625, "learning_rate": 4.128244977839176e-05, "loss": 0.0002, "step": 28650 }, { "epoch": 0.8012255680291456, "grad_norm": 2.53125, "learning_rate": 4.0994646865826285e-05, "loss": 0.0001, "step": 28700 }, { "epoch": 0.8026214313880814, "grad_norm": 0.54296875, "learning_rate": 4.070684395326081e-05, "loss": 0.0001, "step": 28750 }, { "epoch": 0.8026214313880814, "eval_loss": 8.319076187035535e-06, "eval_mae": 0.002299492945894599, "eval_rmse": 0.0028842808678746223, "eval_runtime": 319.4261, "eval_samples_per_second": 6.261, "eval_steps_per_second": 6.261, "step": 28750 }, { "epoch": 0.8040172947470172, "grad_norm": 1.5703125, "learning_rate": 4.041904104069533e-05, "loss": 0.0001, "step": 28800 }, { "epoch": 0.805413158105953, "grad_norm": 2.203125, "learning_rate": 4.0131238128129864e-05, "loss": 0.0001, "step": 28850 }, { "epoch": 0.8068090214648888, "grad_norm": 3.296875, "learning_rate": 3.9843435215564384e-05, "loss": 0.0001, "step": 28900 }, { "epoch": 0.8082048848238246, "grad_norm": 0.671875, "learning_rate": 3.955563230299891e-05, "loss": 0.0001, "step": 28950 }, { "epoch": 0.8096007481827604, "grad_norm": 1.453125, "learning_rate": 3.926782939043343e-05, "loss": 0.0001, "step": 29000 }, { "epoch": 0.8109966115416962, "grad_norm": 1.0859375, "learning_rate": 3.8980026477867956e-05, "loss": 0.0001, "step": 29050 }, { "epoch": 0.8123924749006319, "grad_norm": 0.89453125, "learning_rate": 3.869222356530248e-05, "loss": 0.0001, "step": 29100 }, { "epoch": 0.8137883382595678, "grad_norm": 1.453125, "learning_rate": 3.840442065273701e-05, "loss": 0.0001, "step": 29150 }, { "epoch": 0.8151842016185036, "grad_norm": 0.51953125, "learning_rate": 3.8116617740171535e-05, "loss": 0.0002, "step": 29200 }, { "epoch": 0.8165800649774394, "grad_norm": 0.85546875, "learning_rate": 3.7828814827606055e-05, "loss": 0.0001, "step": 29250 }, { "epoch": 0.8179759283363751, "grad_norm": 0.33203125, "learning_rate": 3.754101191504058e-05, "loss": 0.0001, "step": 29300 }, { "epoch": 0.8193717916953109, "grad_norm": 0.37109375, "learning_rate": 3.725320900247511e-05, "loss": 0.0001, "step": 29350 }, { "epoch": 0.8207676550542468, "grad_norm": 0.98828125, "learning_rate": 3.6965406089909633e-05, "loss": 0.0001, "step": 29400 }, { "epoch": 0.8221635184131826, "grad_norm": 0.232421875, "learning_rate": 3.667760317734415e-05, "loss": 0.0001, "step": 29450 }, { "epoch": 0.8235593817721183, "grad_norm": 0.89453125, "learning_rate": 3.6389800264778686e-05, "loss": 0.0001, "step": 29500 }, { "epoch": 0.8249552451310541, "grad_norm": 1.0703125, "learning_rate": 3.6101997352213206e-05, "loss": 0.0001, "step": 29550 }, { "epoch": 0.8263511084899899, "grad_norm": 0.47265625, "learning_rate": 3.581419443964773e-05, "loss": 0.0001, "step": 29600 }, { "epoch": 0.8277469718489258, "grad_norm": 0.70703125, "learning_rate": 3.552639152708225e-05, "loss": 0.0001, "step": 29650 }, { "epoch": 0.8291428352078615, "grad_norm": 0.36328125, "learning_rate": 3.5238588614516785e-05, "loss": 0.0001, "step": 29700 }, { "epoch": 0.8305386985667973, "grad_norm": 0.84375, "learning_rate": 3.4950785701951304e-05, "loss": 0.0001, "step": 29750 }, { "epoch": 0.8319345619257331, "grad_norm": 2.5625, "learning_rate": 3.466298278938583e-05, "loss": 0.0001, "step": 29800 }, { "epoch": 0.8333304252846688, "grad_norm": 0.029052734375, "learning_rate": 3.437517987682036e-05, "loss": 0.0001, "step": 29850 }, { "epoch": 0.8347262886436047, "grad_norm": 0.84765625, "learning_rate": 3.408737696425488e-05, "loss": 0.0001, "step": 29900 }, { "epoch": 0.8361221520025405, "grad_norm": 0.5546875, "learning_rate": 3.37995740516894e-05, "loss": 0.0001, "step": 29950 }, { "epoch": 0.8375180153614763, "grad_norm": 0.302734375, "learning_rate": 3.351177113912393e-05, "loss": 0.0001, "step": 30000 }, { "epoch": 0.8375180153614763, "eval_loss": 7.90274134487845e-06, "eval_mae": 0.00223693554289639, "eval_rmse": 0.002811181591823697, "eval_runtime": 359.2158, "eval_samples_per_second": 5.568, "eval_steps_per_second": 5.568, "step": 30000 }, { "epoch": 0.838913878720412, "grad_norm": 0.451171875, "learning_rate": 3.3223968226558455e-05, "loss": 0.0001, "step": 30050 }, { "epoch": 0.8403097420793478, "grad_norm": 0.9921875, "learning_rate": 3.293616531399298e-05, "loss": 0.0001, "step": 30100 }, { "epoch": 0.8417056054382837, "grad_norm": 0.0576171875, "learning_rate": 3.26483624014275e-05, "loss": 0.0001, "step": 30150 }, { "epoch": 0.8431014687972195, "grad_norm": 1.515625, "learning_rate": 3.236055948886203e-05, "loss": 0.0001, "step": 30200 }, { "epoch": 0.8444973321561552, "grad_norm": 0.6953125, "learning_rate": 3.2072756576296554e-05, "loss": 0.0001, "step": 30250 }, { "epoch": 0.845893195515091, "grad_norm": 0.23046875, "learning_rate": 3.178495366373107e-05, "loss": 0.0001, "step": 30300 }, { "epoch": 0.8472890588740268, "grad_norm": 0.55859375, "learning_rate": 3.1497150751165606e-05, "loss": 0.0001, "step": 30350 }, { "epoch": 0.8486849222329627, "grad_norm": 0.11328125, "learning_rate": 3.1209347838600126e-05, "loss": 0.0001, "step": 30400 }, { "epoch": 0.8500807855918984, "grad_norm": 1.0234375, "learning_rate": 3.092154492603465e-05, "loss": 0.0001, "step": 30450 }, { "epoch": 0.8514766489508342, "grad_norm": 0.2099609375, "learning_rate": 3.063374201346918e-05, "loss": 0.0001, "step": 30500 }, { "epoch": 0.85287251230977, "grad_norm": 0.078125, "learning_rate": 3.03459391009037e-05, "loss": 0.0001, "step": 30550 }, { "epoch": 0.8542683756687058, "grad_norm": 1.125, "learning_rate": 3.0058136188338228e-05, "loss": 0.0001, "step": 30600 }, { "epoch": 0.8556642390276415, "grad_norm": 0.70703125, "learning_rate": 2.9770333275772754e-05, "loss": 0.0001, "step": 30650 }, { "epoch": 0.8570601023865774, "grad_norm": 1.5390625, "learning_rate": 2.9482530363207277e-05, "loss": 0.0001, "step": 30700 }, { "epoch": 0.8584559657455132, "grad_norm": 0.1689453125, "learning_rate": 2.91947274506418e-05, "loss": 0.0001, "step": 30750 }, { "epoch": 0.859851829104449, "grad_norm": 0.51171875, "learning_rate": 2.8906924538076323e-05, "loss": 0.0001, "step": 30800 }, { "epoch": 0.8612476924633847, "grad_norm": 0.28515625, "learning_rate": 2.8619121625510852e-05, "loss": 0.0001, "step": 30850 }, { "epoch": 0.8626435558223206, "grad_norm": 0.8671875, "learning_rate": 2.8331318712945375e-05, "loss": 0.0001, "step": 30900 }, { "epoch": 0.8640394191812564, "grad_norm": 1.125, "learning_rate": 2.80435158003799e-05, "loss": 0.0001, "step": 30950 }, { "epoch": 0.8654352825401921, "grad_norm": 2.421875, "learning_rate": 2.7755712887814428e-05, "loss": 0.0001, "step": 31000 }, { "epoch": 0.8668311458991279, "grad_norm": 0.78515625, "learning_rate": 2.746790997524895e-05, "loss": 0.0001, "step": 31050 }, { "epoch": 0.8682270092580637, "grad_norm": 0.083984375, "learning_rate": 2.7180107062683474e-05, "loss": 0.0001, "step": 31100 }, { "epoch": 0.8696228726169996, "grad_norm": 1.890625, "learning_rate": 2.6892304150118004e-05, "loss": 0.0001, "step": 31150 }, { "epoch": 0.8710187359759353, "grad_norm": 0.466796875, "learning_rate": 2.6604501237552526e-05, "loss": 0.0001, "step": 31200 }, { "epoch": 0.8724145993348711, "grad_norm": 1.1015625, "learning_rate": 2.631669832498705e-05, "loss": 0.0001, "step": 31250 }, { "epoch": 0.8724145993348711, "eval_loss": 7.70491715229582e-06, "eval_mae": 0.002213448518887162, "eval_rmse": 0.002775773173198104, "eval_runtime": 314.7817, "eval_samples_per_second": 6.354, "eval_steps_per_second": 6.354, "step": 31250 }, { "epoch": 0.8738104626938069, "grad_norm": 1.734375, "learning_rate": 2.6028895412421572e-05, "loss": 0.0001, "step": 31300 }, { "epoch": 0.8752063260527427, "grad_norm": 0.5625, "learning_rate": 2.5741092499856102e-05, "loss": 0.0001, "step": 31350 }, { "epoch": 0.8766021894116784, "grad_norm": 2.203125, "learning_rate": 2.5453289587290625e-05, "loss": 0.0001, "step": 31400 }, { "epoch": 0.8779980527706143, "grad_norm": 2.921875, "learning_rate": 2.5165486674725148e-05, "loss": 0.0001, "step": 31450 }, { "epoch": 0.8793939161295501, "grad_norm": 0.1865234375, "learning_rate": 2.4877683762159674e-05, "loss": 0.0001, "step": 31500 }, { "epoch": 0.8807897794884859, "grad_norm": 1.171875, "learning_rate": 2.45898808495942e-05, "loss": 0.0001, "step": 31550 }, { "epoch": 0.8821856428474216, "grad_norm": 0.8515625, "learning_rate": 2.4302077937028723e-05, "loss": 0.0001, "step": 31600 }, { "epoch": 0.8835815062063574, "grad_norm": 0.625, "learning_rate": 2.401427502446325e-05, "loss": 0.0001, "step": 31650 }, { "epoch": 0.8849773695652933, "grad_norm": 0.2060546875, "learning_rate": 2.3726472111897773e-05, "loss": 0.0001, "step": 31700 }, { "epoch": 0.8863732329242291, "grad_norm": 1.1484375, "learning_rate": 2.3438669199332296e-05, "loss": 0.0001, "step": 31750 }, { "epoch": 0.8877690962831648, "grad_norm": 0.3359375, "learning_rate": 2.3150866286766822e-05, "loss": 0.0001, "step": 31800 }, { "epoch": 0.8891649596421006, "grad_norm": 1.2890625, "learning_rate": 2.2863063374201348e-05, "loss": 0.0001, "step": 31850 }, { "epoch": 0.8905608230010365, "grad_norm": 0.51953125, "learning_rate": 2.257526046163587e-05, "loss": 0.0001, "step": 31900 }, { "epoch": 0.8919566863599723, "grad_norm": 0.455078125, "learning_rate": 2.2287457549070397e-05, "loss": 0.0001, "step": 31950 }, { "epoch": 0.893352549718908, "grad_norm": 0.3046875, "learning_rate": 2.1999654636504924e-05, "loss": 0.0001, "step": 32000 }, { "epoch": 0.8947484130778438, "grad_norm": 0.146484375, "learning_rate": 2.1711851723939447e-05, "loss": 0.0001, "step": 32050 }, { "epoch": 0.8961442764367796, "grad_norm": 0.166015625, "learning_rate": 2.1424048811373973e-05, "loss": 0.0001, "step": 32100 }, { "epoch": 0.8975401397957155, "grad_norm": 0.58984375, "learning_rate": 2.1136245898808496e-05, "loss": 0.0001, "step": 32150 }, { "epoch": 0.8989360031546512, "grad_norm": 0.326171875, "learning_rate": 2.0848442986243022e-05, "loss": 0.0001, "step": 32200 }, { "epoch": 0.900331866513587, "grad_norm": 0.50390625, "learning_rate": 2.056064007367755e-05, "loss": 0.0001, "step": 32250 }, { "epoch": 0.9017277298725228, "grad_norm": 0.33203125, "learning_rate": 2.027283716111207e-05, "loss": 0.0001, "step": 32300 }, { "epoch": 0.9031235932314585, "grad_norm": 0.9453125, "learning_rate": 1.9985034248546598e-05, "loss": 0.0001, "step": 32350 }, { "epoch": 0.9045194565903943, "grad_norm": 1.0546875, "learning_rate": 1.969723133598112e-05, "loss": 0.0001, "step": 32400 }, { "epoch": 0.9059153199493302, "grad_norm": 2.03125, "learning_rate": 1.9409428423415647e-05, "loss": 0.0001, "step": 32450 }, { "epoch": 0.907311183308266, "grad_norm": 0.1025390625, "learning_rate": 1.9121625510850173e-05, "loss": 0.0001, "step": 32500 }, { "epoch": 0.907311183308266, "eval_loss": 8.019745109777432e-06, "eval_mae": 0.0022684482391923666, "eval_rmse": 0.002831915393471718, "eval_runtime": 314.5488, "eval_samples_per_second": 6.358, "eval_steps_per_second": 6.358, "step": 32500 }, { "epoch": 0.9087070466672017, "grad_norm": 0.248046875, "learning_rate": 1.8833822598284696e-05, "loss": 0.0001, "step": 32550 }, { "epoch": 0.9101029100261375, "grad_norm": 0.28125, "learning_rate": 1.8546019685719223e-05, "loss": 0.0001, "step": 32600 }, { "epoch": 0.9114987733850733, "grad_norm": 0.4296875, "learning_rate": 1.8258216773153745e-05, "loss": 0.0001, "step": 32650 }, { "epoch": 0.9128946367440092, "grad_norm": 1.5234375, "learning_rate": 1.797041386058827e-05, "loss": 0.0001, "step": 32700 }, { "epoch": 0.9142905001029449, "grad_norm": 0.2353515625, "learning_rate": 1.7682610948022795e-05, "loss": 0.0001, "step": 32750 }, { "epoch": 0.9156863634618807, "grad_norm": 0.6328125, "learning_rate": 1.7394808035457318e-05, "loss": 0.0001, "step": 32800 }, { "epoch": 0.9170822268208165, "grad_norm": 0.06591796875, "learning_rate": 1.7107005122891844e-05, "loss": 0.0001, "step": 32850 }, { "epoch": 0.9184780901797523, "grad_norm": 0.177734375, "learning_rate": 1.6819202210326367e-05, "loss": 0.0001, "step": 32900 }, { "epoch": 0.9198739535386881, "grad_norm": 0.234375, "learning_rate": 1.6531399297760893e-05, "loss": 0.0001, "step": 32950 }, { "epoch": 0.9212698168976239, "grad_norm": 0.208984375, "learning_rate": 1.624359638519542e-05, "loss": 0.0001, "step": 33000 }, { "epoch": 0.9226656802565597, "grad_norm": 0.74609375, "learning_rate": 1.5955793472629942e-05, "loss": 0.0001, "step": 33050 }, { "epoch": 0.9240615436154955, "grad_norm": 0.58984375, "learning_rate": 1.566799056006447e-05, "loss": 0.0001, "step": 33100 }, { "epoch": 0.9254574069744312, "grad_norm": 1.203125, "learning_rate": 1.5380187647498995e-05, "loss": 0.0001, "step": 33150 }, { "epoch": 0.9268532703333671, "grad_norm": 0.953125, "learning_rate": 1.5092384734933518e-05, "loss": 0.0001, "step": 33200 }, { "epoch": 0.9282491336923029, "grad_norm": 0.19140625, "learning_rate": 1.4804581822368044e-05, "loss": 0.0001, "step": 33250 }, { "epoch": 0.9296449970512387, "grad_norm": 0.99609375, "learning_rate": 1.4516778909802567e-05, "loss": 0.0001, "step": 33300 }, { "epoch": 0.9310408604101744, "grad_norm": 0.138671875, "learning_rate": 1.4228975997237094e-05, "loss": 0.0001, "step": 33350 }, { "epoch": 0.9324367237691102, "grad_norm": 0.5546875, "learning_rate": 1.3941173084671618e-05, "loss": 0.0001, "step": 33400 }, { "epoch": 0.9338325871280461, "grad_norm": 0.2255859375, "learning_rate": 1.3653370172106141e-05, "loss": 0.0001, "step": 33450 }, { "epoch": 0.9352284504869818, "grad_norm": 0.431640625, "learning_rate": 1.3365567259540667e-05, "loss": 0.0001, "step": 33500 }, { "epoch": 0.9366243138459176, "grad_norm": 1.359375, "learning_rate": 1.307776434697519e-05, "loss": 0.0001, "step": 33550 }, { "epoch": 0.9380201772048534, "grad_norm": 0.66015625, "learning_rate": 1.2789961434409717e-05, "loss": 0.0001, "step": 33600 }, { "epoch": 0.9394160405637892, "grad_norm": 1.609375, "learning_rate": 1.2502158521844243e-05, "loss": 0.0001, "step": 33650 }, { "epoch": 0.940811903922725, "grad_norm": 0.265625, "learning_rate": 1.2214355609278766e-05, "loss": 0.0001, "step": 33700 }, { "epoch": 0.9422077672816608, "grad_norm": 0.1513671875, "learning_rate": 1.1926552696713292e-05, "loss": 0.0001, "step": 33750 }, { "epoch": 0.9422077672816608, "eval_loss": 7.356254627666203e-06, "eval_mae": 0.0021641200874000788, "eval_rmse": 0.002712241606786847, "eval_runtime": 314.5626, "eval_samples_per_second": 6.358, "eval_steps_per_second": 6.358, "step": 33750 }, { "epoch": 0.9436036306405966, "grad_norm": 0.01300048828125, "learning_rate": 1.1638749784147817e-05, "loss": 0.0001, "step": 33800 }, { "epoch": 0.9449994939995324, "grad_norm": 0.056640625, "learning_rate": 1.1350946871582341e-05, "loss": 0.0001, "step": 33850 }, { "epoch": 0.9463953573584681, "grad_norm": 0.70703125, "learning_rate": 1.1063143959016866e-05, "loss": 0.0001, "step": 33900 }, { "epoch": 0.947791220717404, "grad_norm": 0.3515625, "learning_rate": 1.077534104645139e-05, "loss": 0.0001, "step": 33950 }, { "epoch": 0.9491870840763398, "grad_norm": 0.365234375, "learning_rate": 1.0487538133885915e-05, "loss": 0.0001, "step": 34000 }, { "epoch": 0.9505829474352756, "grad_norm": 0.283203125, "learning_rate": 1.019973522132044e-05, "loss": 0.0001, "step": 34050 }, { "epoch": 0.9519788107942113, "grad_norm": 0.61328125, "learning_rate": 9.911932308754965e-06, "loss": 0.0001, "step": 34100 }, { "epoch": 0.9533746741531471, "grad_norm": 0.5546875, "learning_rate": 9.624129396189489e-06, "loss": 0.0001, "step": 34150 }, { "epoch": 0.954770537512083, "grad_norm": 0.400390625, "learning_rate": 9.336326483624015e-06, "loss": 0.0001, "step": 34200 }, { "epoch": 0.9561664008710188, "grad_norm": 0.2119140625, "learning_rate": 9.04852357105854e-06, "loss": 0.0001, "step": 34250 }, { "epoch": 0.9575622642299545, "grad_norm": 0.294921875, "learning_rate": 8.760720658493065e-06, "loss": 0.0001, "step": 34300 }, { "epoch": 0.9589581275888903, "grad_norm": 0.404296875, "learning_rate": 8.47291774592759e-06, "loss": 0.0001, "step": 34350 }, { "epoch": 0.9603539909478261, "grad_norm": 1.03125, "learning_rate": 8.185114833362114e-06, "loss": 0.0001, "step": 34400 }, { "epoch": 0.961749854306762, "grad_norm": 0.1357421875, "learning_rate": 7.897311920796639e-06, "loss": 0.0001, "step": 34450 }, { "epoch": 0.9631457176656977, "grad_norm": 0.341796875, "learning_rate": 7.609509008231164e-06, "loss": 0.0001, "step": 34500 }, { "epoch": 0.9645415810246335, "grad_norm": 0.71875, "learning_rate": 7.321706095665689e-06, "loss": 0.0001, "step": 34550 }, { "epoch": 0.9659374443835693, "grad_norm": 0.1787109375, "learning_rate": 7.033903183100212e-06, "loss": 0.0001, "step": 34600 }, { "epoch": 0.967333307742505, "grad_norm": 0.052734375, "learning_rate": 6.746100270534739e-06, "loss": 0.0001, "step": 34650 }, { "epoch": 0.9687291711014409, "grad_norm": 0.875, "learning_rate": 6.458297357969263e-06, "loss": 0.0001, "step": 34700 }, { "epoch": 0.9701250344603767, "grad_norm": 0.310546875, "learning_rate": 6.170494445403788e-06, "loss": 0.0001, "step": 34750 }, { "epoch": 0.9715208978193125, "grad_norm": 0.453125, "learning_rate": 5.8826915328383125e-06, "loss": 0.0001, "step": 34800 }, { "epoch": 0.9729167611782482, "grad_norm": 0.88671875, "learning_rate": 5.594888620272837e-06, "loss": 0.0001, "step": 34850 }, { "epoch": 0.974312624537184, "grad_norm": 0.032470703125, "learning_rate": 5.307085707707362e-06, "loss": 0.0001, "step": 34900 }, { "epoch": 0.9757084878961199, "grad_norm": 1.5546875, "learning_rate": 5.019282795141887e-06, "loss": 0.0001, "step": 34950 }, { "epoch": 0.9771043512550557, "grad_norm": 1.2578125, "learning_rate": 4.731479882576412e-06, "loss": 0.0001, "step": 35000 }, { "epoch": 0.9771043512550557, "eval_loss": 7.189828011178179e-06, "eval_mae": 0.0021512035746127367, "eval_rmse": 0.0026813854929059744, "eval_runtime": 314.4052, "eval_samples_per_second": 6.361, "eval_steps_per_second": 6.361, "step": 35000 }, { "epoch": 0.9785002146139914, "grad_norm": 0.99609375, "learning_rate": 4.443676970010937e-06, "loss": 0.0001, "step": 35050 }, { "epoch": 0.9798960779729272, "grad_norm": 0.5390625, "learning_rate": 4.155874057445461e-06, "loss": 0.0001, "step": 35100 }, { "epoch": 0.981291941331863, "grad_norm": 0.83203125, "learning_rate": 3.8680711448799866e-06, "loss": 0.0001, "step": 35150 }, { "epoch": 0.9826878046907989, "grad_norm": 0.48046875, "learning_rate": 3.580268232314511e-06, "loss": 0.0001, "step": 35200 }, { "epoch": 0.9840836680497346, "grad_norm": 1.078125, "learning_rate": 3.292465319749036e-06, "loss": 0.0001, "step": 35250 }, { "epoch": 0.9854795314086704, "grad_norm": 0.3828125, "learning_rate": 3.004662407183561e-06, "loss": 0.0001, "step": 35300 }, { "epoch": 0.9868753947676062, "grad_norm": 0.466796875, "learning_rate": 2.716859494618086e-06, "loss": 0.0001, "step": 35350 }, { "epoch": 0.988271258126542, "grad_norm": 0.7734375, "learning_rate": 2.4290565820526105e-06, "loss": 0.0001, "step": 35400 }, { "epoch": 0.9896671214854778, "grad_norm": 1.3359375, "learning_rate": 2.141253669487135e-06, "loss": 0.0001, "step": 35450 }, { "epoch": 0.9910629848444136, "grad_norm": 0.2119140625, "learning_rate": 1.8534507569216602e-06, "loss": 0.0001, "step": 35500 }, { "epoch": 0.9924588482033494, "grad_norm": 0.1728515625, "learning_rate": 1.565647844356185e-06, "loss": 0.0001, "step": 35550 }, { "epoch": 0.9938547115622852, "grad_norm": 0.294921875, "learning_rate": 1.2778449317907098e-06, "loss": 0.0001, "step": 35600 }, { "epoch": 0.9952505749212209, "grad_norm": 0.2734375, "learning_rate": 9.900420192252346e-07, "loss": 0.0001, "step": 35650 }, { "epoch": 0.9966464382801568, "grad_norm": 0.287109375, "learning_rate": 7.022391066597595e-07, "loss": 0.0001, "step": 35700 }, { "epoch": 0.9980423016390926, "grad_norm": 0.98046875, "learning_rate": 4.144361940942842e-07, "loss": 0.0001, "step": 35750 }, { "epoch": 0.9994381649980283, "grad_norm": 0.341796875, "learning_rate": 1.2663328152880908e-07, "loss": 0.0001, "step": 35800 } ], "logging_steps": 50, "max_steps": 35821, "num_input_tokens_seen": 0, "num_train_epochs": 1, "save_steps": 1250, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 5.780150075109409e+18, "train_batch_size": 1, "trial_name": null, "trial_params": null }