| { |
| "best_global_step": 35000, |
| "best_metric": 0.0021512035746127367, |
| "best_model_checkpoint": "D:\\Task_design\\Scene\\outputs\\qwen7b-lora-will_half_fp16_v2\\checkpoint-35000", |
| "epoch": 1.0, |
| "eval_steps": 1250, |
| "global_step": 35821, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 0.0013958633589357939, |
| "grad_norm": 2320.0, |
| "learning_rate": 9.116279069767441e-06, |
| "loss": 120.7821, |
| "step": 50 |
| }, |
| { |
| "epoch": 0.0027917267178715877, |
| "grad_norm": 752.0, |
| "learning_rate": 1.841860465116279e-05, |
| "loss": 1.6562, |
| "step": 100 |
| }, |
| { |
| "epoch": 0.004187590076807381, |
| "grad_norm": 264.0, |
| "learning_rate": 2.772093023255814e-05, |
| "loss": 0.5144, |
| "step": 150 |
| }, |
| { |
| "epoch": 0.005583453435743175, |
| "grad_norm": 3120.0, |
| "learning_rate": 3.702325581395349e-05, |
| "loss": 0.9009, |
| "step": 200 |
| }, |
| { |
| "epoch": 0.006979316794678969, |
| "grad_norm": 1296.0, |
| "learning_rate": 4.632558139534884e-05, |
| "loss": 1.4696, |
| "step": 250 |
| }, |
| { |
| "epoch": 0.008375180153614763, |
| "grad_norm": 3632.0, |
| "learning_rate": 5.562790697674419e-05, |
| "loss": 1.8122, |
| "step": 300 |
| }, |
| { |
| "epoch": 0.009771043512550556, |
| "grad_norm": 600.0, |
| "learning_rate": 6.493023255813954e-05, |
| "loss": 1.605, |
| "step": 350 |
| }, |
| { |
| "epoch": 0.01116690687148635, |
| "grad_norm": 1888.0, |
| "learning_rate": 7.423255813953489e-05, |
| "loss": 1.046, |
| "step": 400 |
| }, |
| { |
| "epoch": 0.012562770230422144, |
| "grad_norm": 137.0, |
| "learning_rate": 8.353488372093023e-05, |
| "loss": 1.1465, |
| "step": 450 |
| }, |
| { |
| "epoch": 0.013958633589357937, |
| "grad_norm": 1984.0, |
| "learning_rate": 9.283720930232559e-05, |
| "loss": 5.8899, |
| "step": 500 |
| }, |
| { |
| "epoch": 0.015354496948293732, |
| "grad_norm": 2528.0, |
| "learning_rate": 0.00010213953488372094, |
| "loss": 1.0712, |
| "step": 550 |
| }, |
| { |
| "epoch": 0.016750360307229525, |
| "grad_norm": 4.34375, |
| "learning_rate": 0.00011144186046511629, |
| "loss": 0.4232, |
| "step": 600 |
| }, |
| { |
| "epoch": 0.01814622366616532, |
| "grad_norm": 249.0, |
| "learning_rate": 0.00012074418604651163, |
| "loss": 0.837, |
| "step": 650 |
| }, |
| { |
| "epoch": 0.019542087025101112, |
| "grad_norm": 1224.0, |
| "learning_rate": 0.000130046511627907, |
| "loss": 0.4405, |
| "step": 700 |
| }, |
| { |
| "epoch": 0.020937950384036907, |
| "grad_norm": 2800.0, |
| "learning_rate": 0.00013934883720930234, |
| "loss": 1.175, |
| "step": 750 |
| }, |
| { |
| "epoch": 0.0223338137429727, |
| "grad_norm": 124.0, |
| "learning_rate": 0.0001486511627906977, |
| "loss": 1.2591, |
| "step": 800 |
| }, |
| { |
| "epoch": 0.023729677101908493, |
| "grad_norm": 1056.0, |
| "learning_rate": 0.00015795348837209302, |
| "loss": 0.4401, |
| "step": 850 |
| }, |
| { |
| "epoch": 0.025125540460844288, |
| "grad_norm": 1208.0, |
| "learning_rate": 0.00016725581395348837, |
| "loss": 0.4283, |
| "step": 900 |
| }, |
| { |
| "epoch": 0.026521403819780083, |
| "grad_norm": 1048.0, |
| "learning_rate": 0.00017655813953488373, |
| "loss": 0.4223, |
| "step": 950 |
| }, |
| { |
| "epoch": 0.027917267178715875, |
| "grad_norm": 22.25, |
| "learning_rate": 0.00018586046511627908, |
| "loss": 0.1113, |
| "step": 1000 |
| }, |
| { |
| "epoch": 0.02931313053765167, |
| "grad_norm": 127.5, |
| "learning_rate": 0.00019516279069767444, |
| "loss": 0.0873, |
| "step": 1050 |
| }, |
| { |
| "epoch": 0.030708993896587464, |
| "grad_norm": 712.0, |
| "learning_rate": 0.00019986185460196858, |
| "loss": 0.5109, |
| "step": 1100 |
| }, |
| { |
| "epoch": 0.032104857255523256, |
| "grad_norm": 180.0, |
| "learning_rate": 0.0001995740516894031, |
| "loss": 0.4199, |
| "step": 1150 |
| }, |
| { |
| "epoch": 0.03350072061445905, |
| "grad_norm": 984.0, |
| "learning_rate": 0.00019928624877683763, |
| "loss": 0.6015, |
| "step": 1200 |
| }, |
| { |
| "epoch": 0.034896583973394846, |
| "grad_norm": 616.0, |
| "learning_rate": 0.00019899844586427216, |
| "loss": 0.5867, |
| "step": 1250 |
| }, |
| { |
| "epoch": 0.034896583973394846, |
| "eval_loss": 0.0031845432240515947, |
| "eval_mae": 0.045285664498806, |
| "eval_rmse": 0.05643175542354584, |
| "eval_runtime": 320.3726, |
| "eval_samples_per_second": 6.243, |
| "eval_steps_per_second": 6.243, |
| "step": 1250 |
| }, |
| { |
| "epoch": 0.03629244733233064, |
| "grad_norm": 132.0, |
| "learning_rate": 0.00019871064295170666, |
| "loss": 0.3113, |
| "step": 1300 |
| }, |
| { |
| "epoch": 0.03768831069126643, |
| "grad_norm": 26.375, |
| "learning_rate": 0.0001984228400391412, |
| "loss": 0.0702, |
| "step": 1350 |
| }, |
| { |
| "epoch": 0.039084174050202224, |
| "grad_norm": 212.0, |
| "learning_rate": 0.00019813503712657574, |
| "loss": 0.2048, |
| "step": 1400 |
| }, |
| { |
| "epoch": 0.04048003740913802, |
| "grad_norm": 260.0, |
| "learning_rate": 0.00019784723421401027, |
| "loss": 0.0669, |
| "step": 1450 |
| }, |
| { |
| "epoch": 0.041875900768073814, |
| "grad_norm": 113.5, |
| "learning_rate": 0.0001975594313014448, |
| "loss": 0.0604, |
| "step": 1500 |
| }, |
| { |
| "epoch": 0.04327176412700961, |
| "grad_norm": 36.0, |
| "learning_rate": 0.00019727162838887932, |
| "loss": 0.0539, |
| "step": 1550 |
| }, |
| { |
| "epoch": 0.0446676274859454, |
| "grad_norm": 290.0, |
| "learning_rate": 0.00019698382547631382, |
| "loss": 0.0539, |
| "step": 1600 |
| }, |
| { |
| "epoch": 0.04606349084488119, |
| "grad_norm": 202.0, |
| "learning_rate": 0.00019669602256374834, |
| "loss": 0.0621, |
| "step": 1650 |
| }, |
| { |
| "epoch": 0.047459354203816986, |
| "grad_norm": 464.0, |
| "learning_rate": 0.00019640821965118287, |
| "loss": 0.178, |
| "step": 1700 |
| }, |
| { |
| "epoch": 0.04885521756275278, |
| "grad_norm": 108.5, |
| "learning_rate": 0.0001961204167386174, |
| "loss": 0.1804, |
| "step": 1750 |
| }, |
| { |
| "epoch": 0.050251080921688576, |
| "grad_norm": 252.0, |
| "learning_rate": 0.00019583261382605195, |
| "loss": 0.1206, |
| "step": 1800 |
| }, |
| { |
| "epoch": 0.05164694428062437, |
| "grad_norm": 1376.0, |
| "learning_rate": 0.00019554481091348648, |
| "loss": 0.8041, |
| "step": 1850 |
| }, |
| { |
| "epoch": 0.053042807639560166, |
| "grad_norm": 214.0, |
| "learning_rate": 0.00019525700800092098, |
| "loss": 0.2037, |
| "step": 1900 |
| }, |
| { |
| "epoch": 0.054438670998495954, |
| "grad_norm": 324.0, |
| "learning_rate": 0.0001949692050883555, |
| "loss": 0.2246, |
| "step": 1950 |
| }, |
| { |
| "epoch": 0.05583453435743175, |
| "grad_norm": 148.0, |
| "learning_rate": 0.00019468140217579003, |
| "loss": 0.1815, |
| "step": 2000 |
| }, |
| { |
| "epoch": 0.057230397716367544, |
| "grad_norm": 270.0, |
| "learning_rate": 0.00019439359926322455, |
| "loss": 0.1532, |
| "step": 2050 |
| }, |
| { |
| "epoch": 0.05862626107530334, |
| "grad_norm": 145.0, |
| "learning_rate": 0.00019410579635065908, |
| "loss": 0.1351, |
| "step": 2100 |
| }, |
| { |
| "epoch": 0.060022124434239134, |
| "grad_norm": 130.0, |
| "learning_rate": 0.00019381799343809358, |
| "loss": 0.1393, |
| "step": 2150 |
| }, |
| { |
| "epoch": 0.06141798779317493, |
| "grad_norm": 231.0, |
| "learning_rate": 0.00019353019052552813, |
| "loss": 0.1446, |
| "step": 2200 |
| }, |
| { |
| "epoch": 0.06281385115211072, |
| "grad_norm": 3.25, |
| "learning_rate": 0.00019324238761296266, |
| "loss": 0.1403, |
| "step": 2250 |
| }, |
| { |
| "epoch": 0.06420971451104651, |
| "grad_norm": 123.5, |
| "learning_rate": 0.00019295458470039719, |
| "loss": 0.3684, |
| "step": 2300 |
| }, |
| { |
| "epoch": 0.0656055778699823, |
| "grad_norm": 20.125, |
| "learning_rate": 0.0001926667817878317, |
| "loss": 0.0694, |
| "step": 2350 |
| }, |
| { |
| "epoch": 0.0670014412289181, |
| "grad_norm": 1.734375, |
| "learning_rate": 0.00019237897887526624, |
| "loss": 0.0897, |
| "step": 2400 |
| }, |
| { |
| "epoch": 0.0683973045878539, |
| "grad_norm": 25.0, |
| "learning_rate": 0.00019209117596270074, |
| "loss": 0.128, |
| "step": 2450 |
| }, |
| { |
| "epoch": 0.06979316794678969, |
| "grad_norm": 24.125, |
| "learning_rate": 0.00019180337305013526, |
| "loss": 0.1388, |
| "step": 2500 |
| }, |
| { |
| "epoch": 0.06979316794678969, |
| "eval_loss": 0.011298904195427895, |
| "eval_mae": 0.09956549108028412, |
| "eval_rmse": 0.10629630088806152, |
| "eval_runtime": 316.8013, |
| "eval_samples_per_second": 6.313, |
| "eval_steps_per_second": 6.313, |
| "step": 2500 |
| }, |
| { |
| "epoch": 0.07118903130572549, |
| "grad_norm": 19.375, |
| "learning_rate": 0.0001915155701375698, |
| "loss": 0.1087, |
| "step": 2550 |
| }, |
| { |
| "epoch": 0.07258489466466128, |
| "grad_norm": 7.8125, |
| "learning_rate": 0.00019122776722500432, |
| "loss": 0.0907, |
| "step": 2600 |
| }, |
| { |
| "epoch": 0.07398075802359708, |
| "grad_norm": 13.8125, |
| "learning_rate": 0.00019093996431243887, |
| "loss": 0.0812, |
| "step": 2650 |
| }, |
| { |
| "epoch": 0.07537662138253286, |
| "grad_norm": 24.25, |
| "learning_rate": 0.0001906521613998734, |
| "loss": 0.0707, |
| "step": 2700 |
| }, |
| { |
| "epoch": 0.07677248474146865, |
| "grad_norm": 18.125, |
| "learning_rate": 0.0001903643584873079, |
| "loss": 0.0657, |
| "step": 2750 |
| }, |
| { |
| "epoch": 0.07816834810040445, |
| "grad_norm": 9.5625, |
| "learning_rate": 0.00019007655557474242, |
| "loss": 0.0598, |
| "step": 2800 |
| }, |
| { |
| "epoch": 0.07956421145934024, |
| "grad_norm": 15.3125, |
| "learning_rate": 0.00018978875266217695, |
| "loss": 0.0549, |
| "step": 2850 |
| }, |
| { |
| "epoch": 0.08096007481827604, |
| "grad_norm": 9.9375, |
| "learning_rate": 0.00018950094974961147, |
| "loss": 0.052, |
| "step": 2900 |
| }, |
| { |
| "epoch": 0.08235593817721183, |
| "grad_norm": 22.625, |
| "learning_rate": 0.000189213146837046, |
| "loss": 0.0501, |
| "step": 2950 |
| }, |
| { |
| "epoch": 0.08375180153614763, |
| "grad_norm": 16.125, |
| "learning_rate": 0.00018892534392448053, |
| "loss": 0.0433, |
| "step": 3000 |
| }, |
| { |
| "epoch": 0.08514766489508342, |
| "grad_norm": 19.875, |
| "learning_rate": 0.00018863754101191505, |
| "loss": 0.0489, |
| "step": 3050 |
| }, |
| { |
| "epoch": 0.08654352825401922, |
| "grad_norm": 15.625, |
| "learning_rate": 0.00018834973809934958, |
| "loss": 0.0422, |
| "step": 3100 |
| }, |
| { |
| "epoch": 0.08793939161295501, |
| "grad_norm": 15.0625, |
| "learning_rate": 0.0001880619351867841, |
| "loss": 0.0415, |
| "step": 3150 |
| }, |
| { |
| "epoch": 0.0893352549718908, |
| "grad_norm": 12.5625, |
| "learning_rate": 0.00018777413227421863, |
| "loss": 0.0372, |
| "step": 3200 |
| }, |
| { |
| "epoch": 0.0907311183308266, |
| "grad_norm": 13.875, |
| "learning_rate": 0.00018748632936165316, |
| "loss": 0.0393, |
| "step": 3250 |
| }, |
| { |
| "epoch": 0.09212698168976238, |
| "grad_norm": 19.5, |
| "learning_rate": 0.00018719852644908766, |
| "loss": 0.0396, |
| "step": 3300 |
| }, |
| { |
| "epoch": 0.09352284504869818, |
| "grad_norm": 14.875, |
| "learning_rate": 0.00018691072353652218, |
| "loss": 0.0293, |
| "step": 3350 |
| }, |
| { |
| "epoch": 0.09491870840763397, |
| "grad_norm": 76.5, |
| "learning_rate": 0.0001866229206239567, |
| "loss": 0.0334, |
| "step": 3400 |
| }, |
| { |
| "epoch": 0.09631457176656977, |
| "grad_norm": 9.625, |
| "learning_rate": 0.00018633511771139124, |
| "loss": 0.0627, |
| "step": 3450 |
| }, |
| { |
| "epoch": 0.09771043512550556, |
| "grad_norm": 15.3125, |
| "learning_rate": 0.0001860473147988258, |
| "loss": 0.0262, |
| "step": 3500 |
| }, |
| { |
| "epoch": 0.09910629848444136, |
| "grad_norm": 15.625, |
| "learning_rate": 0.00018575951188626032, |
| "loss": 0.0318, |
| "step": 3550 |
| }, |
| { |
| "epoch": 0.10050216184337715, |
| "grad_norm": 17.0, |
| "learning_rate": 0.00018547170897369482, |
| "loss": 0.0431, |
| "step": 3600 |
| }, |
| { |
| "epoch": 0.10189802520231295, |
| "grad_norm": 3.3125, |
| "learning_rate": 0.00018518390606112934, |
| "loss": 0.0086, |
| "step": 3650 |
| }, |
| { |
| "epoch": 0.10329388856124874, |
| "grad_norm": 1.0546875, |
| "learning_rate": 0.00018489610314856387, |
| "loss": 0.0084, |
| "step": 3700 |
| }, |
| { |
| "epoch": 0.10468975192018454, |
| "grad_norm": 3.21875, |
| "learning_rate": 0.0001846083002359984, |
| "loss": 0.0117, |
| "step": 3750 |
| }, |
| { |
| "epoch": 0.10468975192018454, |
| "eval_loss": 0.0015581471379846334, |
| "eval_mae": 0.036334387958049774, |
| "eval_rmse": 0.03947337344288826, |
| "eval_runtime": 319.7128, |
| "eval_samples_per_second": 6.256, |
| "eval_steps_per_second": 6.256, |
| "step": 3750 |
| }, |
| { |
| "epoch": 0.10608561527912033, |
| "grad_norm": 42.75, |
| "learning_rate": 0.00018432049732343292, |
| "loss": 0.0319, |
| "step": 3800 |
| }, |
| { |
| "epoch": 0.10748147863805613, |
| "grad_norm": 39.25, |
| "learning_rate": 0.00018403269441086745, |
| "loss": 0.0341, |
| "step": 3850 |
| }, |
| { |
| "epoch": 0.10887734199699191, |
| "grad_norm": 44.25, |
| "learning_rate": 0.00018374489149830197, |
| "loss": 0.0326, |
| "step": 3900 |
| }, |
| { |
| "epoch": 0.1102732053559277, |
| "grad_norm": 35.0, |
| "learning_rate": 0.0001834570885857365, |
| "loss": 0.032, |
| "step": 3950 |
| }, |
| { |
| "epoch": 0.1116690687148635, |
| "grad_norm": 36.75, |
| "learning_rate": 0.00018316928567317103, |
| "loss": 0.0306, |
| "step": 4000 |
| }, |
| { |
| "epoch": 0.11306493207379929, |
| "grad_norm": 25.875, |
| "learning_rate": 0.00018288148276060555, |
| "loss": 0.0298, |
| "step": 4050 |
| }, |
| { |
| "epoch": 0.11446079543273509, |
| "grad_norm": 38.0, |
| "learning_rate": 0.00018259367984804008, |
| "loss": 0.031, |
| "step": 4100 |
| }, |
| { |
| "epoch": 0.11585665879167088, |
| "grad_norm": 33.0, |
| "learning_rate": 0.0001823058769354746, |
| "loss": 0.0284, |
| "step": 4150 |
| }, |
| { |
| "epoch": 0.11725252215060668, |
| "grad_norm": 30.125, |
| "learning_rate": 0.0001820180740229091, |
| "loss": 0.0229, |
| "step": 4200 |
| }, |
| { |
| "epoch": 0.11864838550954247, |
| "grad_norm": 14.375, |
| "learning_rate": 0.00018173027111034363, |
| "loss": 0.0119, |
| "step": 4250 |
| }, |
| { |
| "epoch": 0.12004424886847827, |
| "grad_norm": 40.5, |
| "learning_rate": 0.00018144246819777818, |
| "loss": 0.0351, |
| "step": 4300 |
| }, |
| { |
| "epoch": 0.12144011222741406, |
| "grad_norm": 50.0, |
| "learning_rate": 0.0001811546652852127, |
| "loss": 0.0157, |
| "step": 4350 |
| }, |
| { |
| "epoch": 0.12283597558634986, |
| "grad_norm": 12.25, |
| "learning_rate": 0.00018086686237264724, |
| "loss": 0.0222, |
| "step": 4400 |
| }, |
| { |
| "epoch": 0.12423183894528565, |
| "grad_norm": 27.0, |
| "learning_rate": 0.00018057905946008174, |
| "loss": 0.0129, |
| "step": 4450 |
| }, |
| { |
| "epoch": 0.12562770230422143, |
| "grad_norm": 4.6875, |
| "learning_rate": 0.00018029125654751626, |
| "loss": 0.0162, |
| "step": 4500 |
| }, |
| { |
| "epoch": 0.12702356566315723, |
| "grad_norm": 6.9375, |
| "learning_rate": 0.0001800034536349508, |
| "loss": 0.0038, |
| "step": 4550 |
| }, |
| { |
| "epoch": 0.12841942902209302, |
| "grad_norm": 2.34375, |
| "learning_rate": 0.00017971565072238531, |
| "loss": 0.0032, |
| "step": 4600 |
| }, |
| { |
| "epoch": 0.12981529238102882, |
| "grad_norm": 9.0, |
| "learning_rate": 0.00017942784780981984, |
| "loss": 0.0035, |
| "step": 4650 |
| }, |
| { |
| "epoch": 0.1312111557399646, |
| "grad_norm": 5.0625, |
| "learning_rate": 0.00017914004489725437, |
| "loss": 0.0022, |
| "step": 4700 |
| }, |
| { |
| "epoch": 0.1326070190989004, |
| "grad_norm": 9.4375, |
| "learning_rate": 0.0001788522419846889, |
| "loss": 0.0017, |
| "step": 4750 |
| }, |
| { |
| "epoch": 0.1340028824578362, |
| "grad_norm": 3.53125, |
| "learning_rate": 0.00017856443907212342, |
| "loss": 0.0024, |
| "step": 4800 |
| }, |
| { |
| "epoch": 0.135398745816772, |
| "grad_norm": 11.4375, |
| "learning_rate": 0.00017827663615955795, |
| "loss": 0.0022, |
| "step": 4850 |
| }, |
| { |
| "epoch": 0.1367946091757078, |
| "grad_norm": 11.875, |
| "learning_rate": 0.00017798883324699247, |
| "loss": 0.0038, |
| "step": 4900 |
| }, |
| { |
| "epoch": 0.1381904725346436, |
| "grad_norm": 1.8125, |
| "learning_rate": 0.000177701030334427, |
| "loss": 0.0036, |
| "step": 4950 |
| }, |
| { |
| "epoch": 0.13958633589357938, |
| "grad_norm": 10.75, |
| "learning_rate": 0.00017741322742186153, |
| "loss": 0.0028, |
| "step": 5000 |
| }, |
| { |
| "epoch": 0.13958633589357938, |
| "eval_loss": 0.00022272480418905616, |
| "eval_mae": 0.012231973931193352, |
| "eval_rmse": 0.01492396742105484, |
| "eval_runtime": 319.6535, |
| "eval_samples_per_second": 6.257, |
| "eval_steps_per_second": 6.257, |
| "step": 5000 |
| }, |
| { |
| "epoch": 0.14098219925251518, |
| "grad_norm": 7.15625, |
| "learning_rate": 0.00017712542450929602, |
| "loss": 0.0055, |
| "step": 5050 |
| }, |
| { |
| "epoch": 0.14237806261145097, |
| "grad_norm": 6.84375, |
| "learning_rate": 0.00017683762159673055, |
| "loss": 0.0032, |
| "step": 5100 |
| }, |
| { |
| "epoch": 0.14377392597038677, |
| "grad_norm": 11.75, |
| "learning_rate": 0.0001765498186841651, |
| "loss": 0.0031, |
| "step": 5150 |
| }, |
| { |
| "epoch": 0.14516978932932256, |
| "grad_norm": 8.1875, |
| "learning_rate": 0.00017626201577159963, |
| "loss": 0.0027, |
| "step": 5200 |
| }, |
| { |
| "epoch": 0.14656565268825836, |
| "grad_norm": 10.0625, |
| "learning_rate": 0.00017597421285903416, |
| "loss": 0.0025, |
| "step": 5250 |
| }, |
| { |
| "epoch": 0.14796151604719415, |
| "grad_norm": 10.9375, |
| "learning_rate": 0.00017568640994646868, |
| "loss": 0.0027, |
| "step": 5300 |
| }, |
| { |
| "epoch": 0.14935737940612995, |
| "grad_norm": 9.875, |
| "learning_rate": 0.00017539860703390318, |
| "loss": 0.0028, |
| "step": 5350 |
| }, |
| { |
| "epoch": 0.15075324276506571, |
| "grad_norm": 7.5625, |
| "learning_rate": 0.0001751108041213377, |
| "loss": 0.0026, |
| "step": 5400 |
| }, |
| { |
| "epoch": 0.1521491061240015, |
| "grad_norm": 22.875, |
| "learning_rate": 0.00017482300120877223, |
| "loss": 0.0032, |
| "step": 5450 |
| }, |
| { |
| "epoch": 0.1535449694829373, |
| "grad_norm": 50.75, |
| "learning_rate": 0.00017453519829620676, |
| "loss": 0.0356, |
| "step": 5500 |
| }, |
| { |
| "epoch": 0.1549408328418731, |
| "grad_norm": 48.25, |
| "learning_rate": 0.0001742473953836413, |
| "loss": 0.0495, |
| "step": 5550 |
| }, |
| { |
| "epoch": 0.1563366962008089, |
| "grad_norm": 8.8125, |
| "learning_rate": 0.00017395959247107581, |
| "loss": 0.048, |
| "step": 5600 |
| }, |
| { |
| "epoch": 0.1577325595597447, |
| "grad_norm": 54.25, |
| "learning_rate": 0.00017367178955851034, |
| "loss": 0.0461, |
| "step": 5650 |
| }, |
| { |
| "epoch": 0.15912842291868048, |
| "grad_norm": 63.75, |
| "learning_rate": 0.00017338398664594487, |
| "loss": 0.0495, |
| "step": 5700 |
| }, |
| { |
| "epoch": 0.16052428627761628, |
| "grad_norm": 55.0, |
| "learning_rate": 0.0001730961837333794, |
| "loss": 0.033, |
| "step": 5750 |
| }, |
| { |
| "epoch": 0.16192014963655207, |
| "grad_norm": 39.75, |
| "learning_rate": 0.00017280838082081392, |
| "loss": 0.0453, |
| "step": 5800 |
| }, |
| { |
| "epoch": 0.16331601299548787, |
| "grad_norm": 32.75, |
| "learning_rate": 0.00017252057790824845, |
| "loss": 0.0417, |
| "step": 5850 |
| }, |
| { |
| "epoch": 0.16471187635442366, |
| "grad_norm": 44.5, |
| "learning_rate": 0.00017223277499568294, |
| "loss": 0.044, |
| "step": 5900 |
| }, |
| { |
| "epoch": 0.16610773971335946, |
| "grad_norm": 30.875, |
| "learning_rate": 0.00017194497208311747, |
| "loss": 0.0414, |
| "step": 5950 |
| }, |
| { |
| "epoch": 0.16750360307229525, |
| "grad_norm": 31.0, |
| "learning_rate": 0.00017165716917055202, |
| "loss": 0.0392, |
| "step": 6000 |
| }, |
| { |
| "epoch": 0.16889946643123105, |
| "grad_norm": 52.75, |
| "learning_rate": 0.00017136936625798655, |
| "loss": 0.0404, |
| "step": 6050 |
| }, |
| { |
| "epoch": 0.17029532979016684, |
| "grad_norm": 39.5, |
| "learning_rate": 0.00017108156334542108, |
| "loss": 0.042, |
| "step": 6100 |
| }, |
| { |
| "epoch": 0.17169119314910264, |
| "grad_norm": 19.375, |
| "learning_rate": 0.0001707937604328556, |
| "loss": 0.006, |
| "step": 6150 |
| }, |
| { |
| "epoch": 0.17308705650803843, |
| "grad_norm": 42.5, |
| "learning_rate": 0.0001705059575202901, |
| "loss": 0.014, |
| "step": 6200 |
| }, |
| { |
| "epoch": 0.17448291986697423, |
| "grad_norm": 18.5, |
| "learning_rate": 0.00017021815460772463, |
| "loss": 0.0145, |
| "step": 6250 |
| }, |
| { |
| "epoch": 0.17448291986697423, |
| "eval_loss": 0.0018714327597990632, |
| "eval_mae": 0.04104918614029884, |
| "eval_rmse": 0.043260060250759125, |
| "eval_runtime": 318.582, |
| "eval_samples_per_second": 6.278, |
| "eval_steps_per_second": 6.278, |
| "step": 6250 |
| }, |
| { |
| "epoch": 0.17587878322591002, |
| "grad_norm": 7.4375, |
| "learning_rate": 0.00016993035169515915, |
| "loss": 0.0104, |
| "step": 6300 |
| }, |
| { |
| "epoch": 0.17727464658484582, |
| "grad_norm": 34.25, |
| "learning_rate": 0.00016964254878259368, |
| "loss": 0.0038, |
| "step": 6350 |
| }, |
| { |
| "epoch": 0.1786705099437816, |
| "grad_norm": 18.125, |
| "learning_rate": 0.0001693547458700282, |
| "loss": 0.015, |
| "step": 6400 |
| }, |
| { |
| "epoch": 0.1800663733027174, |
| "grad_norm": 0.494140625, |
| "learning_rate": 0.00016906694295746276, |
| "loss": 0.0137, |
| "step": 6450 |
| }, |
| { |
| "epoch": 0.1814622366616532, |
| "grad_norm": 12.5, |
| "learning_rate": 0.00016877914004489726, |
| "loss": 0.0044, |
| "step": 6500 |
| }, |
| { |
| "epoch": 0.182858100020589, |
| "grad_norm": 9.625, |
| "learning_rate": 0.0001684913371323318, |
| "loss": 0.0156, |
| "step": 6550 |
| }, |
| { |
| "epoch": 0.18425396337952477, |
| "grad_norm": 6.34375, |
| "learning_rate": 0.0001682035342197663, |
| "loss": 0.0021, |
| "step": 6600 |
| }, |
| { |
| "epoch": 0.18564982673846056, |
| "grad_norm": 11.9375, |
| "learning_rate": 0.00016791573130720084, |
| "loss": 0.0019, |
| "step": 6650 |
| }, |
| { |
| "epoch": 0.18704569009739636, |
| "grad_norm": 5.65625, |
| "learning_rate": 0.00016762792839463537, |
| "loss": 0.0019, |
| "step": 6700 |
| }, |
| { |
| "epoch": 0.18844155345633215, |
| "grad_norm": 7.625, |
| "learning_rate": 0.00016734012548206986, |
| "loss": 0.0019, |
| "step": 6750 |
| }, |
| { |
| "epoch": 0.18983741681526795, |
| "grad_norm": 27.375, |
| "learning_rate": 0.00016705232256950442, |
| "loss": 0.0029, |
| "step": 6800 |
| }, |
| { |
| "epoch": 0.19123328017420374, |
| "grad_norm": 3.171875, |
| "learning_rate": 0.00016676451965693894, |
| "loss": 0.0114, |
| "step": 6850 |
| }, |
| { |
| "epoch": 0.19262914353313954, |
| "grad_norm": 6.4375, |
| "learning_rate": 0.00016647671674437347, |
| "loss": 0.0166, |
| "step": 6900 |
| }, |
| { |
| "epoch": 0.19402500689207533, |
| "grad_norm": 16.125, |
| "learning_rate": 0.000166188913831808, |
| "loss": 0.0129, |
| "step": 6950 |
| }, |
| { |
| "epoch": 0.19542087025101113, |
| "grad_norm": 7.21875, |
| "learning_rate": 0.00016590111091924252, |
| "loss": 0.0052, |
| "step": 7000 |
| }, |
| { |
| "epoch": 0.19681673360994692, |
| "grad_norm": 15.1875, |
| "learning_rate": 0.00016561330800667702, |
| "loss": 0.0051, |
| "step": 7050 |
| }, |
| { |
| "epoch": 0.19821259696888271, |
| "grad_norm": 6.46875, |
| "learning_rate": 0.00016532550509411155, |
| "loss": 0.0049, |
| "step": 7100 |
| }, |
| { |
| "epoch": 0.1996084603278185, |
| "grad_norm": 12.0625, |
| "learning_rate": 0.00016503770218154608, |
| "loss": 0.0051, |
| "step": 7150 |
| }, |
| { |
| "epoch": 0.2010043236867543, |
| "grad_norm": 3.53125, |
| "learning_rate": 0.0001647498992689806, |
| "loss": 0.0044, |
| "step": 7200 |
| }, |
| { |
| "epoch": 0.2024001870456901, |
| "grad_norm": 4.84375, |
| "learning_rate": 0.00016446209635641513, |
| "loss": 0.0066, |
| "step": 7250 |
| }, |
| { |
| "epoch": 0.2037960504046259, |
| "grad_norm": 4.03125, |
| "learning_rate": 0.00016417429344384968, |
| "loss": 0.0072, |
| "step": 7300 |
| }, |
| { |
| "epoch": 0.2051919137635617, |
| "grad_norm": 3.203125, |
| "learning_rate": 0.00016388649053128418, |
| "loss": 0.0007, |
| "step": 7350 |
| }, |
| { |
| "epoch": 0.20658777712249748, |
| "grad_norm": 4.59375, |
| "learning_rate": 0.0001635986876187187, |
| "loss": 0.001, |
| "step": 7400 |
| }, |
| { |
| "epoch": 0.20798364048143328, |
| "grad_norm": 8.375, |
| "learning_rate": 0.00016331088470615323, |
| "loss": 0.0017, |
| "step": 7450 |
| }, |
| { |
| "epoch": 0.20937950384036907, |
| "grad_norm": 7.09375, |
| "learning_rate": 0.00016302308179358776, |
| "loss": 0.0018, |
| "step": 7500 |
| }, |
| { |
| "epoch": 0.20937950384036907, |
| "eval_loss": 0.00019140982476528734, |
| "eval_mae": 0.012697141617536545, |
| "eval_rmse": 0.013835093937814236, |
| "eval_runtime": 321.0771, |
| "eval_samples_per_second": 6.229, |
| "eval_steps_per_second": 6.229, |
| "step": 7500 |
| }, |
| { |
| "epoch": 0.21077536719930487, |
| "grad_norm": 5.3125, |
| "learning_rate": 0.00016273527888102229, |
| "loss": 0.0018, |
| "step": 7550 |
| }, |
| { |
| "epoch": 0.21217123055824066, |
| "grad_norm": 12.4375, |
| "learning_rate": 0.0001624474759684568, |
| "loss": 0.0018, |
| "step": 7600 |
| }, |
| { |
| "epoch": 0.21356709391717646, |
| "grad_norm": 7.75, |
| "learning_rate": 0.00016215967305589134, |
| "loss": 0.0012, |
| "step": 7650 |
| }, |
| { |
| "epoch": 0.21496295727611225, |
| "grad_norm": 1.765625, |
| "learning_rate": 0.00016187187014332586, |
| "loss": 0.0029, |
| "step": 7700 |
| }, |
| { |
| "epoch": 0.21635882063504802, |
| "grad_norm": 13.6875, |
| "learning_rate": 0.0001615840672307604, |
| "loss": 0.0057, |
| "step": 7750 |
| }, |
| { |
| "epoch": 0.21775468399398382, |
| "grad_norm": 4.1875, |
| "learning_rate": 0.00016129626431819492, |
| "loss": 0.0058, |
| "step": 7800 |
| }, |
| { |
| "epoch": 0.2191505473529196, |
| "grad_norm": 29.375, |
| "learning_rate": 0.00016100846140562944, |
| "loss": 0.0038, |
| "step": 7850 |
| }, |
| { |
| "epoch": 0.2205464107118554, |
| "grad_norm": 3.15625, |
| "learning_rate": 0.00016072065849306394, |
| "loss": 0.0031, |
| "step": 7900 |
| }, |
| { |
| "epoch": 0.2219422740707912, |
| "grad_norm": 1.140625, |
| "learning_rate": 0.00016043285558049847, |
| "loss": 0.0017, |
| "step": 7950 |
| }, |
| { |
| "epoch": 0.223338137429727, |
| "grad_norm": 2.703125, |
| "learning_rate": 0.000160145052667933, |
| "loss": 0.0019, |
| "step": 8000 |
| }, |
| { |
| "epoch": 0.2247340007886628, |
| "grad_norm": 13.0625, |
| "learning_rate": 0.00015985724975536752, |
| "loss": 0.0015, |
| "step": 8050 |
| }, |
| { |
| "epoch": 0.22612986414759859, |
| "grad_norm": 1.1328125, |
| "learning_rate": 0.00015956944684280208, |
| "loss": 0.0025, |
| "step": 8100 |
| }, |
| { |
| "epoch": 0.22752572750653438, |
| "grad_norm": 20.0, |
| "learning_rate": 0.0001592816439302366, |
| "loss": 0.0018, |
| "step": 8150 |
| }, |
| { |
| "epoch": 0.22892159086547018, |
| "grad_norm": 3.625, |
| "learning_rate": 0.0001589938410176711, |
| "loss": 0.0024, |
| "step": 8200 |
| }, |
| { |
| "epoch": 0.23031745422440597, |
| "grad_norm": 11.625, |
| "learning_rate": 0.00015870603810510563, |
| "loss": 0.0071, |
| "step": 8250 |
| }, |
| { |
| "epoch": 0.23171331758334177, |
| "grad_norm": 13.125, |
| "learning_rate": 0.00015841823519254015, |
| "loss": 0.0128, |
| "step": 8300 |
| }, |
| { |
| "epoch": 0.23310918094227756, |
| "grad_norm": 17.625, |
| "learning_rate": 0.00015813043227997468, |
| "loss": 0.0122, |
| "step": 8350 |
| }, |
| { |
| "epoch": 0.23450504430121336, |
| "grad_norm": 14.0625, |
| "learning_rate": 0.0001578426293674092, |
| "loss": 0.0125, |
| "step": 8400 |
| }, |
| { |
| "epoch": 0.23590090766014915, |
| "grad_norm": 11.125, |
| "learning_rate": 0.00015755482645484373, |
| "loss": 0.0129, |
| "step": 8450 |
| }, |
| { |
| "epoch": 0.23729677101908495, |
| "grad_norm": 22.875, |
| "learning_rate": 0.00015726702354227826, |
| "loss": 0.0136, |
| "step": 8500 |
| }, |
| { |
| "epoch": 0.23869263437802074, |
| "grad_norm": 11.4375, |
| "learning_rate": 0.00015697922062971278, |
| "loss": 0.0118, |
| "step": 8550 |
| }, |
| { |
| "epoch": 0.24008849773695654, |
| "grad_norm": 14.5625, |
| "learning_rate": 0.0001566914177171473, |
| "loss": 0.0032, |
| "step": 8600 |
| }, |
| { |
| "epoch": 0.24148436109589233, |
| "grad_norm": 9.5625, |
| "learning_rate": 0.00015640361480458184, |
| "loss": 0.0014, |
| "step": 8650 |
| }, |
| { |
| "epoch": 0.24288022445482813, |
| "grad_norm": 7.28125, |
| "learning_rate": 0.00015611581189201636, |
| "loss": 0.0016, |
| "step": 8700 |
| }, |
| { |
| "epoch": 0.24427608781376392, |
| "grad_norm": 4.53125, |
| "learning_rate": 0.0001558280089794509, |
| "loss": 0.0013, |
| "step": 8750 |
| }, |
| { |
| "epoch": 0.24427608781376392, |
| "eval_loss": 0.00014836130139883608, |
| "eval_mae": 0.01122231688350439, |
| "eval_rmse": 0.01218036562204361, |
| "eval_runtime": 319.6629, |
| "eval_samples_per_second": 6.257, |
| "eval_steps_per_second": 6.257, |
| "step": 8750 |
| }, |
| { |
| "epoch": 0.24567195117269972, |
| "grad_norm": 3.875, |
| "learning_rate": 0.0001555402060668854, |
| "loss": 0.0013, |
| "step": 8800 |
| }, |
| { |
| "epoch": 0.2470678145316355, |
| "grad_norm": 10.4375, |
| "learning_rate": 0.00015525240315431992, |
| "loss": 0.0017, |
| "step": 8850 |
| }, |
| { |
| "epoch": 0.2484636778905713, |
| "grad_norm": 2.96875, |
| "learning_rate": 0.00015496460024175444, |
| "loss": 0.0024, |
| "step": 8900 |
| }, |
| { |
| "epoch": 0.24985954124950707, |
| "grad_norm": 10.1875, |
| "learning_rate": 0.000154676797329189, |
| "loss": 0.0051, |
| "step": 8950 |
| }, |
| { |
| "epoch": 0.25125540460844287, |
| "grad_norm": 3.015625, |
| "learning_rate": 0.00015438899441662352, |
| "loss": 0.0012, |
| "step": 9000 |
| }, |
| { |
| "epoch": 0.2526512679673787, |
| "grad_norm": 7.90625, |
| "learning_rate": 0.00015410119150405802, |
| "loss": 0.0103, |
| "step": 9050 |
| }, |
| { |
| "epoch": 0.25404713132631446, |
| "grad_norm": 2.796875, |
| "learning_rate": 0.00015381338859149255, |
| "loss": 0.0014, |
| "step": 9100 |
| }, |
| { |
| "epoch": 0.2554429946852503, |
| "grad_norm": 27.75, |
| "learning_rate": 0.00015352558567892707, |
| "loss": 0.0141, |
| "step": 9150 |
| }, |
| { |
| "epoch": 0.25683885804418605, |
| "grad_norm": 2.65625, |
| "learning_rate": 0.0001532377827663616, |
| "loss": 0.0078, |
| "step": 9200 |
| }, |
| { |
| "epoch": 0.25823472140312187, |
| "grad_norm": 16.125, |
| "learning_rate": 0.00015294997985379613, |
| "loss": 0.0141, |
| "step": 9250 |
| }, |
| { |
| "epoch": 0.25963058476205764, |
| "grad_norm": 0.609375, |
| "learning_rate": 0.00015266217694123065, |
| "loss": 0.0038, |
| "step": 9300 |
| }, |
| { |
| "epoch": 0.26102644812099346, |
| "grad_norm": 25.0, |
| "learning_rate": 0.00015237437402866518, |
| "loss": 0.0035, |
| "step": 9350 |
| }, |
| { |
| "epoch": 0.2624223114799292, |
| "grad_norm": 16.5, |
| "learning_rate": 0.0001520865711160997, |
| "loss": 0.0106, |
| "step": 9400 |
| }, |
| { |
| "epoch": 0.26381817483886505, |
| "grad_norm": 18.375, |
| "learning_rate": 0.00015179876820353423, |
| "loss": 0.0106, |
| "step": 9450 |
| }, |
| { |
| "epoch": 0.2652140381978008, |
| "grad_norm": 16.5, |
| "learning_rate": 0.00015151096529096876, |
| "loss": 0.0104, |
| "step": 9500 |
| }, |
| { |
| "epoch": 0.2666099015567366, |
| "grad_norm": 18.25, |
| "learning_rate": 0.00015122316237840328, |
| "loss": 0.0103, |
| "step": 9550 |
| }, |
| { |
| "epoch": 0.2680057649156724, |
| "grad_norm": 18.875, |
| "learning_rate": 0.0001509353594658378, |
| "loss": 0.0098, |
| "step": 9600 |
| }, |
| { |
| "epoch": 0.2694016282746082, |
| "grad_norm": 18.625, |
| "learning_rate": 0.0001506475565532723, |
| "loss": 0.0098, |
| "step": 9650 |
| }, |
| { |
| "epoch": 0.270797491633544, |
| "grad_norm": 21.625, |
| "learning_rate": 0.00015035975364070684, |
| "loss": 0.0097, |
| "step": 9700 |
| }, |
| { |
| "epoch": 0.27219335499247976, |
| "grad_norm": 21.0, |
| "learning_rate": 0.00015007195072814136, |
| "loss": 0.0094, |
| "step": 9750 |
| }, |
| { |
| "epoch": 0.2735892183514156, |
| "grad_norm": 18.25, |
| "learning_rate": 0.00014978414781557592, |
| "loss": 0.0094, |
| "step": 9800 |
| }, |
| { |
| "epoch": 0.27498508171035135, |
| "grad_norm": 14.25, |
| "learning_rate": 0.00014949634490301044, |
| "loss": 0.0095, |
| "step": 9850 |
| }, |
| { |
| "epoch": 0.2763809450692872, |
| "grad_norm": 16.25, |
| "learning_rate": 0.00014920854199044497, |
| "loss": 0.0098, |
| "step": 9900 |
| }, |
| { |
| "epoch": 0.27777680842822294, |
| "grad_norm": 18.5, |
| "learning_rate": 0.00014892073907787947, |
| "loss": 0.0092, |
| "step": 9950 |
| }, |
| { |
| "epoch": 0.27917267178715877, |
| "grad_norm": 15.4375, |
| "learning_rate": 0.000148632936165314, |
| "loss": 0.0093, |
| "step": 10000 |
| }, |
| { |
| "epoch": 0.27917267178715877, |
| "eval_loss": 8.570039790356532e-05, |
| "eval_mae": 0.00781923346221447, |
| "eval_rmse": 0.009257450699806213, |
| "eval_runtime": 319.3051, |
| "eval_samples_per_second": 6.264, |
| "eval_steps_per_second": 6.264, |
| "step": 10000 |
| }, |
| { |
| "epoch": 0.28056853514609453, |
| "grad_norm": 17.0, |
| "learning_rate": 0.00014834513325274852, |
| "loss": 0.0093, |
| "step": 10050 |
| }, |
| { |
| "epoch": 0.28196439850503036, |
| "grad_norm": 12.3125, |
| "learning_rate": 0.00014805733034018305, |
| "loss": 0.0093, |
| "step": 10100 |
| }, |
| { |
| "epoch": 0.2833602618639661, |
| "grad_norm": 14.875, |
| "learning_rate": 0.00014776952742761757, |
| "loss": 0.0094, |
| "step": 10150 |
| }, |
| { |
| "epoch": 0.28475612522290195, |
| "grad_norm": 15.6875, |
| "learning_rate": 0.0001474817245150521, |
| "loss": 0.0095, |
| "step": 10200 |
| }, |
| { |
| "epoch": 0.2861519885818377, |
| "grad_norm": 5.65625, |
| "learning_rate": 0.00014719392160248663, |
| "loss": 0.003, |
| "step": 10250 |
| }, |
| { |
| "epoch": 0.28754785194077354, |
| "grad_norm": 2.40625, |
| "learning_rate": 0.00014690611868992115, |
| "loss": 0.0037, |
| "step": 10300 |
| }, |
| { |
| "epoch": 0.2889437152997093, |
| "grad_norm": 15.5625, |
| "learning_rate": 0.00014661831577735568, |
| "loss": 0.0031, |
| "step": 10350 |
| }, |
| { |
| "epoch": 0.2903395786586451, |
| "grad_norm": 7.21875, |
| "learning_rate": 0.0001463305128647902, |
| "loss": 0.0011, |
| "step": 10400 |
| }, |
| { |
| "epoch": 0.2917354420175809, |
| "grad_norm": 14.375, |
| "learning_rate": 0.00014604270995222473, |
| "loss": 0.001, |
| "step": 10450 |
| }, |
| { |
| "epoch": 0.2931313053765167, |
| "grad_norm": 24.5, |
| "learning_rate": 0.00014575490703965923, |
| "loss": 0.0058, |
| "step": 10500 |
| }, |
| { |
| "epoch": 0.2945271687354525, |
| "grad_norm": 21.75, |
| "learning_rate": 0.00014546710412709376, |
| "loss": 0.0087, |
| "step": 10550 |
| }, |
| { |
| "epoch": 0.2959230320943883, |
| "grad_norm": 23.375, |
| "learning_rate": 0.0001451793012145283, |
| "loss": 0.0082, |
| "step": 10600 |
| }, |
| { |
| "epoch": 0.2973188954533241, |
| "grad_norm": 20.875, |
| "learning_rate": 0.00014489149830196284, |
| "loss": 0.0084, |
| "step": 10650 |
| }, |
| { |
| "epoch": 0.2987147588122599, |
| "grad_norm": 23.375, |
| "learning_rate": 0.00014460369538939736, |
| "loss": 0.0079, |
| "step": 10700 |
| }, |
| { |
| "epoch": 0.30011062217119566, |
| "grad_norm": 25.75, |
| "learning_rate": 0.0001443158924768319, |
| "loss": 0.0086, |
| "step": 10750 |
| }, |
| { |
| "epoch": 0.30150648553013143, |
| "grad_norm": 8.6875, |
| "learning_rate": 0.0001440280895642664, |
| "loss": 0.0018, |
| "step": 10800 |
| }, |
| { |
| "epoch": 0.30290234888906725, |
| "grad_norm": 20.375, |
| "learning_rate": 0.00014374028665170091, |
| "loss": 0.0011, |
| "step": 10850 |
| }, |
| { |
| "epoch": 0.304298212248003, |
| "grad_norm": 11.75, |
| "learning_rate": 0.00014345248373913544, |
| "loss": 0.0047, |
| "step": 10900 |
| }, |
| { |
| "epoch": 0.30569407560693884, |
| "grad_norm": 10.375, |
| "learning_rate": 0.00014316468082656997, |
| "loss": 0.0027, |
| "step": 10950 |
| }, |
| { |
| "epoch": 0.3070899389658746, |
| "grad_norm": 7.21875, |
| "learning_rate": 0.0001428768779140045, |
| "loss": 0.0026, |
| "step": 11000 |
| }, |
| { |
| "epoch": 0.30848580232481043, |
| "grad_norm": 9.0625, |
| "learning_rate": 0.00014258907500143902, |
| "loss": 0.0026, |
| "step": 11050 |
| }, |
| { |
| "epoch": 0.3098816656837462, |
| "grad_norm": 0.1865234375, |
| "learning_rate": 0.00014230127208887355, |
| "loss": 0.0026, |
| "step": 11100 |
| }, |
| { |
| "epoch": 0.311277529042682, |
| "grad_norm": 5.78125, |
| "learning_rate": 0.00014201346917630807, |
| "loss": 0.0016, |
| "step": 11150 |
| }, |
| { |
| "epoch": 0.3126733924016178, |
| "grad_norm": 16.125, |
| "learning_rate": 0.0001417256662637426, |
| "loss": 0.0027, |
| "step": 11200 |
| }, |
| { |
| "epoch": 0.3140692557605536, |
| "grad_norm": 5.6875, |
| "learning_rate": 0.00014143786335117712, |
| "loss": 0.0033, |
| "step": 11250 |
| }, |
| { |
| "epoch": 0.3140692557605536, |
| "eval_loss": 0.00012493817484937608, |
| "eval_mae": 0.009413574822247028, |
| "eval_rmse": 0.011177574284374714, |
| "eval_runtime": 314.115, |
| "eval_samples_per_second": 6.367, |
| "eval_steps_per_second": 6.367, |
| "step": 11250 |
| }, |
| { |
| "epoch": 0.3154651191194894, |
| "grad_norm": 10.0625, |
| "learning_rate": 0.00014115006043861165, |
| "loss": 0.013, |
| "step": 11300 |
| }, |
| { |
| "epoch": 0.3168609824784252, |
| "grad_norm": 5.5625, |
| "learning_rate": 0.00014086225752604615, |
| "loss": 0.0025, |
| "step": 11350 |
| }, |
| { |
| "epoch": 0.31825684583736097, |
| "grad_norm": 8.375, |
| "learning_rate": 0.00014057445461348068, |
| "loss": 0.0025, |
| "step": 11400 |
| }, |
| { |
| "epoch": 0.3196527091962968, |
| "grad_norm": 9.0, |
| "learning_rate": 0.00014028665170091523, |
| "loss": 0.0024, |
| "step": 11450 |
| }, |
| { |
| "epoch": 0.32104857255523256, |
| "grad_norm": 13.75, |
| "learning_rate": 0.00013999884878834976, |
| "loss": 0.0025, |
| "step": 11500 |
| }, |
| { |
| "epoch": 0.3224444359141684, |
| "grad_norm": 3.453125, |
| "learning_rate": 0.00013971104587578428, |
| "loss": 0.0025, |
| "step": 11550 |
| }, |
| { |
| "epoch": 0.32384029927310415, |
| "grad_norm": 8.375, |
| "learning_rate": 0.0001394232429632188, |
| "loss": 0.0024, |
| "step": 11600 |
| }, |
| { |
| "epoch": 0.32523616263203997, |
| "grad_norm": 8.875, |
| "learning_rate": 0.0001391354400506533, |
| "loss": 0.0025, |
| "step": 11650 |
| }, |
| { |
| "epoch": 0.32663202599097574, |
| "grad_norm": 11.8125, |
| "learning_rate": 0.00013884763713808783, |
| "loss": 0.0025, |
| "step": 11700 |
| }, |
| { |
| "epoch": 0.32802788934991156, |
| "grad_norm": 3.1875, |
| "learning_rate": 0.00013855983422552236, |
| "loss": 0.0028, |
| "step": 11750 |
| }, |
| { |
| "epoch": 0.32942375270884733, |
| "grad_norm": 1.0546875, |
| "learning_rate": 0.0001382720313129569, |
| "loss": 0.0016, |
| "step": 11800 |
| }, |
| { |
| "epoch": 0.33081961606778315, |
| "grad_norm": 7.15625, |
| "learning_rate": 0.0001379842284003914, |
| "loss": 0.0044, |
| "step": 11850 |
| }, |
| { |
| "epoch": 0.3322154794267189, |
| "grad_norm": 4.71875, |
| "learning_rate": 0.00013769642548782597, |
| "loss": 0.0022, |
| "step": 11900 |
| }, |
| { |
| "epoch": 0.3336113427856547, |
| "grad_norm": 13.9375, |
| "learning_rate": 0.00013740862257526047, |
| "loss": 0.0024, |
| "step": 11950 |
| }, |
| { |
| "epoch": 0.3350072061445905, |
| "grad_norm": 6.75, |
| "learning_rate": 0.000137120819662695, |
| "loss": 0.0025, |
| "step": 12000 |
| }, |
| { |
| "epoch": 0.3364030695035263, |
| "grad_norm": 7.15625, |
| "learning_rate": 0.00013683301675012952, |
| "loss": 0.0023, |
| "step": 12050 |
| }, |
| { |
| "epoch": 0.3377989328624621, |
| "grad_norm": 3.515625, |
| "learning_rate": 0.00013654521383756404, |
| "loss": 0.0024, |
| "step": 12100 |
| }, |
| { |
| "epoch": 0.33919479622139787, |
| "grad_norm": 12.875, |
| "learning_rate": 0.00013625741092499857, |
| "loss": 0.0024, |
| "step": 12150 |
| }, |
| { |
| "epoch": 0.3405906595803337, |
| "grad_norm": 11.25, |
| "learning_rate": 0.0001359696080124331, |
| "loss": 0.0036, |
| "step": 12200 |
| }, |
| { |
| "epoch": 0.34198652293926946, |
| "grad_norm": 8.8125, |
| "learning_rate": 0.0001356818050998676, |
| "loss": 0.0025, |
| "step": 12250 |
| }, |
| { |
| "epoch": 0.3433823862982053, |
| "grad_norm": 0.07080078125, |
| "learning_rate": 0.00013539400218730215, |
| "loss": 0.0023, |
| "step": 12300 |
| }, |
| { |
| "epoch": 0.34477824965714104, |
| "grad_norm": 7.40625, |
| "learning_rate": 0.00013510619927473668, |
| "loss": 0.005, |
| "step": 12350 |
| }, |
| { |
| "epoch": 0.34617411301607687, |
| "grad_norm": 8.0, |
| "learning_rate": 0.0001348183963621712, |
| "loss": 0.0022, |
| "step": 12400 |
| }, |
| { |
| "epoch": 0.34756997637501263, |
| "grad_norm": 4.1875, |
| "learning_rate": 0.00013453059344960573, |
| "loss": 0.0023, |
| "step": 12450 |
| }, |
| { |
| "epoch": 0.34896583973394846, |
| "grad_norm": 15.5625, |
| "learning_rate": 0.00013424279053704023, |
| "loss": 0.0048, |
| "step": 12500 |
| }, |
| { |
| "epoch": 0.34896583973394846, |
| "eval_loss": 0.0007875896408222616, |
| "eval_mae": 0.027495475485920906, |
| "eval_rmse": 0.028064027428627014, |
| "eval_runtime": 313.9427, |
| "eval_samples_per_second": 6.371, |
| "eval_steps_per_second": 6.371, |
| "step": 12500 |
| }, |
| { |
| "epoch": 0.3503617030928842, |
| "grad_norm": 10.125, |
| "learning_rate": 0.00013395498762447475, |
| "loss": 0.0012, |
| "step": 12550 |
| }, |
| { |
| "epoch": 0.35175756645182005, |
| "grad_norm": 1.46875, |
| "learning_rate": 0.00013366718471190928, |
| "loss": 0.0025, |
| "step": 12600 |
| }, |
| { |
| "epoch": 0.3531534298107558, |
| "grad_norm": 6.59375, |
| "learning_rate": 0.0001333793817993438, |
| "loss": 0.0009, |
| "step": 12650 |
| }, |
| { |
| "epoch": 0.35454929316969164, |
| "grad_norm": 14.9375, |
| "learning_rate": 0.00013309157888677833, |
| "loss": 0.0024, |
| "step": 12700 |
| }, |
| { |
| "epoch": 0.3559451565286274, |
| "grad_norm": 5.46875, |
| "learning_rate": 0.0001328037759742129, |
| "loss": 0.001, |
| "step": 12750 |
| }, |
| { |
| "epoch": 0.3573410198875632, |
| "grad_norm": 2.25, |
| "learning_rate": 0.00013251597306164739, |
| "loss": 0.0007, |
| "step": 12800 |
| }, |
| { |
| "epoch": 0.358736883246499, |
| "grad_norm": 3.265625, |
| "learning_rate": 0.0001322281701490819, |
| "loss": 0.0007, |
| "step": 12850 |
| }, |
| { |
| "epoch": 0.3601327466054348, |
| "grad_norm": 4.53125, |
| "learning_rate": 0.00013194036723651644, |
| "loss": 0.0007, |
| "step": 12900 |
| }, |
| { |
| "epoch": 0.3615286099643706, |
| "grad_norm": 3.671875, |
| "learning_rate": 0.00013165256432395096, |
| "loss": 0.0007, |
| "step": 12950 |
| }, |
| { |
| "epoch": 0.3629244733233064, |
| "grad_norm": 2.75, |
| "learning_rate": 0.0001313647614113855, |
| "loss": 0.0007, |
| "step": 13000 |
| }, |
| { |
| "epoch": 0.3643203366822422, |
| "grad_norm": 2.09375, |
| "learning_rate": 0.00013107695849882002, |
| "loss": 0.001, |
| "step": 13050 |
| }, |
| { |
| "epoch": 0.365716200041178, |
| "grad_norm": 4.8125, |
| "learning_rate": 0.00013078915558625454, |
| "loss": 0.0026, |
| "step": 13100 |
| }, |
| { |
| "epoch": 0.36711206340011376, |
| "grad_norm": 14.625, |
| "learning_rate": 0.00013050135267368907, |
| "loss": 0.0014, |
| "step": 13150 |
| }, |
| { |
| "epoch": 0.36850792675904953, |
| "grad_norm": 11.875, |
| "learning_rate": 0.0001302135497611236, |
| "loss": 0.0011, |
| "step": 13200 |
| }, |
| { |
| "epoch": 0.36990379011798535, |
| "grad_norm": 10.5625, |
| "learning_rate": 0.00012992574684855812, |
| "loss": 0.002, |
| "step": 13250 |
| }, |
| { |
| "epoch": 0.3712996534769211, |
| "grad_norm": 12.75, |
| "learning_rate": 0.00012963794393599265, |
| "loss": 0.0025, |
| "step": 13300 |
| }, |
| { |
| "epoch": 0.37269551683585694, |
| "grad_norm": 2.484375, |
| "learning_rate": 0.00012935014102342718, |
| "loss": 0.0019, |
| "step": 13350 |
| }, |
| { |
| "epoch": 0.3740913801947927, |
| "grad_norm": 2.953125, |
| "learning_rate": 0.00012906233811086167, |
| "loss": 0.0004, |
| "step": 13400 |
| }, |
| { |
| "epoch": 0.37548724355372853, |
| "grad_norm": 0.474609375, |
| "learning_rate": 0.0001287745351982962, |
| "loss": 0.0009, |
| "step": 13450 |
| }, |
| { |
| "epoch": 0.3768831069126643, |
| "grad_norm": 23.25, |
| "learning_rate": 0.00012848673228573073, |
| "loss": 0.0023, |
| "step": 13500 |
| }, |
| { |
| "epoch": 0.3782789702716001, |
| "grad_norm": 5.875, |
| "learning_rate": 0.00012819892937316525, |
| "loss": 0.0061, |
| "step": 13550 |
| }, |
| { |
| "epoch": 0.3796748336305359, |
| "grad_norm": 11.4375, |
| "learning_rate": 0.0001279111264605998, |
| "loss": 0.0043, |
| "step": 13600 |
| }, |
| { |
| "epoch": 0.3810706969894717, |
| "grad_norm": 1.34375, |
| "learning_rate": 0.0001276233235480343, |
| "loss": 0.0021, |
| "step": 13650 |
| }, |
| { |
| "epoch": 0.3824665603484075, |
| "grad_norm": 1.671875, |
| "learning_rate": 0.00012733552063546883, |
| "loss": 0.0022, |
| "step": 13700 |
| }, |
| { |
| "epoch": 0.3838624237073433, |
| "grad_norm": 12.0625, |
| "learning_rate": 0.00012704771772290336, |
| "loss": 0.0009, |
| "step": 13750 |
| }, |
| { |
| "epoch": 0.3838624237073433, |
| "eval_loss": 0.00010729853966040537, |
| "eval_mae": 0.009244485758244991, |
| "eval_rmse": 0.010358501225709915, |
| "eval_runtime": 314.0246, |
| "eval_samples_per_second": 6.369, |
| "eval_steps_per_second": 6.369, |
| "step": 13750 |
| }, |
| { |
| "epoch": 0.38525828706627907, |
| "grad_norm": 0.466796875, |
| "learning_rate": 0.00012675991481033788, |
| "loss": 0.0029, |
| "step": 13800 |
| }, |
| { |
| "epoch": 0.3866541504252149, |
| "grad_norm": 0.314453125, |
| "learning_rate": 0.0001264721118977724, |
| "loss": 0.0017, |
| "step": 13850 |
| }, |
| { |
| "epoch": 0.38805001378415066, |
| "grad_norm": 5.09375, |
| "learning_rate": 0.00012618430898520694, |
| "loss": 0.002, |
| "step": 13900 |
| }, |
| { |
| "epoch": 0.3894458771430865, |
| "grad_norm": 6.21875, |
| "learning_rate": 0.00012589650607264146, |
| "loss": 0.0015, |
| "step": 13950 |
| }, |
| { |
| "epoch": 0.39084174050202225, |
| "grad_norm": 7.78125, |
| "learning_rate": 0.000125608703160076, |
| "loss": 0.0019, |
| "step": 14000 |
| }, |
| { |
| "epoch": 0.3922376038609581, |
| "grad_norm": 8.6875, |
| "learning_rate": 0.00012532090024751052, |
| "loss": 0.0019, |
| "step": 14050 |
| }, |
| { |
| "epoch": 0.39363346721989384, |
| "grad_norm": 8.125, |
| "learning_rate": 0.00012503309733494504, |
| "loss": 0.0018, |
| "step": 14100 |
| }, |
| { |
| "epoch": 0.39502933057882966, |
| "grad_norm": 10.375, |
| "learning_rate": 0.00012474529442237957, |
| "loss": 0.002, |
| "step": 14150 |
| }, |
| { |
| "epoch": 0.39642519393776543, |
| "grad_norm": 3.5, |
| "learning_rate": 0.0001244574915098141, |
| "loss": 0.0018, |
| "step": 14200 |
| }, |
| { |
| "epoch": 0.39782105729670125, |
| "grad_norm": 5.0625, |
| "learning_rate": 0.0001241696885972486, |
| "loss": 0.0023, |
| "step": 14250 |
| }, |
| { |
| "epoch": 0.399216920655637, |
| "grad_norm": 7.15625, |
| "learning_rate": 0.00012388188568468312, |
| "loss": 0.0019, |
| "step": 14300 |
| }, |
| { |
| "epoch": 0.4006127840145728, |
| "grad_norm": 2.65625, |
| "learning_rate": 0.00012359408277211765, |
| "loss": 0.003, |
| "step": 14350 |
| }, |
| { |
| "epoch": 0.4020086473735086, |
| "grad_norm": 4.3125, |
| "learning_rate": 0.0001233062798595522, |
| "loss": 0.0024, |
| "step": 14400 |
| }, |
| { |
| "epoch": 0.4034045107324444, |
| "grad_norm": 4.96875, |
| "learning_rate": 0.00012301847694698673, |
| "loss": 0.0032, |
| "step": 14450 |
| }, |
| { |
| "epoch": 0.4048003740913802, |
| "grad_norm": 7.5625, |
| "learning_rate": 0.00012273067403442125, |
| "loss": 0.001, |
| "step": 14500 |
| }, |
| { |
| "epoch": 0.40619623745031597, |
| "grad_norm": 7.1875, |
| "learning_rate": 0.00012244287112185575, |
| "loss": 0.0017, |
| "step": 14550 |
| }, |
| { |
| "epoch": 0.4075921008092518, |
| "grad_norm": 7.875, |
| "learning_rate": 0.00012215506820929028, |
| "loss": 0.0017, |
| "step": 14600 |
| }, |
| { |
| "epoch": 0.40898796416818756, |
| "grad_norm": 9.75, |
| "learning_rate": 0.0001218672652967248, |
| "loss": 0.0018, |
| "step": 14650 |
| }, |
| { |
| "epoch": 0.4103838275271234, |
| "grad_norm": 6.125, |
| "learning_rate": 0.00012157946238415933, |
| "loss": 0.0016, |
| "step": 14700 |
| }, |
| { |
| "epoch": 0.41177969088605915, |
| "grad_norm": 11.5625, |
| "learning_rate": 0.00012129165947159387, |
| "loss": 0.0018, |
| "step": 14750 |
| }, |
| { |
| "epoch": 0.41317555424499497, |
| "grad_norm": 3.375, |
| "learning_rate": 0.00012100385655902837, |
| "loss": 0.0017, |
| "step": 14800 |
| }, |
| { |
| "epoch": 0.41457141760393074, |
| "grad_norm": 4.3125, |
| "learning_rate": 0.0001207160536464629, |
| "loss": 0.0019, |
| "step": 14850 |
| }, |
| { |
| "epoch": 0.41596728096286656, |
| "grad_norm": 6.96875, |
| "learning_rate": 0.00012042825073389742, |
| "loss": 0.0021, |
| "step": 14900 |
| }, |
| { |
| "epoch": 0.4173631443218023, |
| "grad_norm": 4.34375, |
| "learning_rate": 0.00012014044782133196, |
| "loss": 0.0005, |
| "step": 14950 |
| }, |
| { |
| "epoch": 0.41875900768073815, |
| "grad_norm": 3.890625, |
| "learning_rate": 0.00011985264490876649, |
| "loss": 0.0004, |
| "step": 15000 |
| }, |
| { |
| "epoch": 0.41875900768073815, |
| "eval_loss": 2.5809065846260637e-05, |
| "eval_mae": 0.004102489911019802, |
| "eval_rmse": 0.005080262199044228, |
| "eval_runtime": 313.9897, |
| "eval_samples_per_second": 6.37, |
| "eval_steps_per_second": 6.37, |
| "step": 15000 |
| }, |
| { |
| "epoch": 0.4201548710396739, |
| "grad_norm": 6.03125, |
| "learning_rate": 0.00011956484199620102, |
| "loss": 0.0015, |
| "step": 15050 |
| }, |
| { |
| "epoch": 0.42155073439860974, |
| "grad_norm": 10.0625, |
| "learning_rate": 0.00011927703908363553, |
| "loss": 0.0017, |
| "step": 15100 |
| }, |
| { |
| "epoch": 0.4229465977575455, |
| "grad_norm": 4.03125, |
| "learning_rate": 0.00011898923617107005, |
| "loss": 0.0017, |
| "step": 15150 |
| }, |
| { |
| "epoch": 0.42434246111648133, |
| "grad_norm": 6.21875, |
| "learning_rate": 0.00011870143325850458, |
| "loss": 0.0017, |
| "step": 15200 |
| }, |
| { |
| "epoch": 0.4257383244754171, |
| "grad_norm": 0.8984375, |
| "learning_rate": 0.00011841363034593911, |
| "loss": 0.0016, |
| "step": 15250 |
| }, |
| { |
| "epoch": 0.4271341878343529, |
| "grad_norm": 8.8125, |
| "learning_rate": 0.00011812582743337363, |
| "loss": 0.0015, |
| "step": 15300 |
| }, |
| { |
| "epoch": 0.4285300511932887, |
| "grad_norm": 4.4375, |
| "learning_rate": 0.00011783802452080816, |
| "loss": 0.0016, |
| "step": 15350 |
| }, |
| { |
| "epoch": 0.4299259145522245, |
| "grad_norm": 10.125, |
| "learning_rate": 0.00011755022160824267, |
| "loss": 0.0016, |
| "step": 15400 |
| }, |
| { |
| "epoch": 0.4313217779111603, |
| "grad_norm": 7.5625, |
| "learning_rate": 0.0001172624186956772, |
| "loss": 0.0017, |
| "step": 15450 |
| }, |
| { |
| "epoch": 0.43271764127009604, |
| "grad_norm": 6.46875, |
| "learning_rate": 0.00011697461578311173, |
| "loss": 0.0015, |
| "step": 15500 |
| }, |
| { |
| "epoch": 0.43411350462903187, |
| "grad_norm": 2.421875, |
| "learning_rate": 0.00011668681287054625, |
| "loss": 0.0016, |
| "step": 15550 |
| }, |
| { |
| "epoch": 0.43550936798796763, |
| "grad_norm": 5.75, |
| "learning_rate": 0.00011639900995798079, |
| "loss": 0.0016, |
| "step": 15600 |
| }, |
| { |
| "epoch": 0.43690523134690346, |
| "grad_norm": 5.28125, |
| "learning_rate": 0.00011611120704541532, |
| "loss": 0.0021, |
| "step": 15650 |
| }, |
| { |
| "epoch": 0.4383010947058392, |
| "grad_norm": 1.7734375, |
| "learning_rate": 0.00011582340413284982, |
| "loss": 0.0009, |
| "step": 15700 |
| }, |
| { |
| "epoch": 0.43969695806477505, |
| "grad_norm": 0.2734375, |
| "learning_rate": 0.00011553560122028436, |
| "loss": 0.0003, |
| "step": 15750 |
| }, |
| { |
| "epoch": 0.4410928214237108, |
| "grad_norm": 12.5625, |
| "learning_rate": 0.00011524779830771888, |
| "loss": 0.0006, |
| "step": 15800 |
| }, |
| { |
| "epoch": 0.44248868478264664, |
| "grad_norm": 4.625, |
| "learning_rate": 0.00011495999539515341, |
| "loss": 0.001, |
| "step": 15850 |
| }, |
| { |
| "epoch": 0.4438845481415824, |
| "grad_norm": 15.5, |
| "learning_rate": 0.00011467219248258794, |
| "loss": 0.0027, |
| "step": 15900 |
| }, |
| { |
| "epoch": 0.4452804115005182, |
| "grad_norm": 13.125, |
| "learning_rate": 0.00011438438957002245, |
| "loss": 0.0032, |
| "step": 15950 |
| }, |
| { |
| "epoch": 0.446676274859454, |
| "grad_norm": 14.6875, |
| "learning_rate": 0.00011409658665745697, |
| "loss": 0.0033, |
| "step": 16000 |
| }, |
| { |
| "epoch": 0.4480721382183898, |
| "grad_norm": 15.1875, |
| "learning_rate": 0.0001138087837448915, |
| "loss": 0.0032, |
| "step": 16050 |
| }, |
| { |
| "epoch": 0.4494680015773256, |
| "grad_norm": 10.4375, |
| "learning_rate": 0.00011352098083232603, |
| "loss": 0.0033, |
| "step": 16100 |
| }, |
| { |
| "epoch": 0.4508638649362614, |
| "grad_norm": 14.5, |
| "learning_rate": 0.00011323317791976055, |
| "loss": 0.0032, |
| "step": 16150 |
| }, |
| { |
| "epoch": 0.45225972829519717, |
| "grad_norm": 11.625, |
| "learning_rate": 0.00011294537500719508, |
| "loss": 0.0032, |
| "step": 16200 |
| }, |
| { |
| "epoch": 0.453655591654133, |
| "grad_norm": 12.5625, |
| "learning_rate": 0.00011265757209462959, |
| "loss": 0.0032, |
| "step": 16250 |
| }, |
| { |
| "epoch": 0.453655591654133, |
| "eval_loss": 0.0005641469615511596, |
| "eval_mae": 0.023370979353785515, |
| "eval_rmse": 0.02375177852809429, |
| "eval_runtime": 313.9972, |
| "eval_samples_per_second": 6.369, |
| "eval_steps_per_second": 6.369, |
| "step": 16250 |
| }, |
| { |
| "epoch": 0.45505145501306876, |
| "grad_norm": 13.0625, |
| "learning_rate": 0.00011236976918206412, |
| "loss": 0.0032, |
| "step": 16300 |
| }, |
| { |
| "epoch": 0.4564473183720046, |
| "grad_norm": 9.875, |
| "learning_rate": 0.00011208196626949865, |
| "loss": 0.0031, |
| "step": 16350 |
| }, |
| { |
| "epoch": 0.45784318173094035, |
| "grad_norm": 10.0625, |
| "learning_rate": 0.00011179416335693319, |
| "loss": 0.0031, |
| "step": 16400 |
| }, |
| { |
| "epoch": 0.4592390450898762, |
| "grad_norm": 11.25, |
| "learning_rate": 0.00011150636044436771, |
| "loss": 0.0031, |
| "step": 16450 |
| }, |
| { |
| "epoch": 0.46063490844881194, |
| "grad_norm": 11.6875, |
| "learning_rate": 0.00011121855753180224, |
| "loss": 0.003, |
| "step": 16500 |
| }, |
| { |
| "epoch": 0.46203077180774776, |
| "grad_norm": 12.25, |
| "learning_rate": 0.00011093075461923674, |
| "loss": 0.003, |
| "step": 16550 |
| }, |
| { |
| "epoch": 0.46342663516668353, |
| "grad_norm": 11.5, |
| "learning_rate": 0.00011064295170667128, |
| "loss": 0.003, |
| "step": 16600 |
| }, |
| { |
| "epoch": 0.46482249852561935, |
| "grad_norm": 14.75, |
| "learning_rate": 0.0001103551487941058, |
| "loss": 0.003, |
| "step": 16650 |
| }, |
| { |
| "epoch": 0.4662183618845551, |
| "grad_norm": 13.875, |
| "learning_rate": 0.00011006734588154033, |
| "loss": 0.0029, |
| "step": 16700 |
| }, |
| { |
| "epoch": 0.4676142252434909, |
| "grad_norm": 13.6875, |
| "learning_rate": 0.00010977954296897486, |
| "loss": 0.0031, |
| "step": 16750 |
| }, |
| { |
| "epoch": 0.4690100886024267, |
| "grad_norm": 3.796875, |
| "learning_rate": 0.00010949174005640938, |
| "loss": 0.0019, |
| "step": 16800 |
| }, |
| { |
| "epoch": 0.4704059519613625, |
| "grad_norm": 5.59375, |
| "learning_rate": 0.0001092039371438439, |
| "loss": 0.0012, |
| "step": 16850 |
| }, |
| { |
| "epoch": 0.4718018153202983, |
| "grad_norm": 2.921875, |
| "learning_rate": 0.00010891613423127842, |
| "loss": 0.0012, |
| "step": 16900 |
| }, |
| { |
| "epoch": 0.47319767867923407, |
| "grad_norm": 8.6875, |
| "learning_rate": 0.00010862833131871295, |
| "loss": 0.0012, |
| "step": 16950 |
| }, |
| { |
| "epoch": 0.4745935420381699, |
| "grad_norm": 3.96875, |
| "learning_rate": 0.00010834052840614747, |
| "loss": 0.0012, |
| "step": 17000 |
| }, |
| { |
| "epoch": 0.47598940539710566, |
| "grad_norm": 10.1875, |
| "learning_rate": 0.00010805272549358201, |
| "loss": 0.0012, |
| "step": 17050 |
| }, |
| { |
| "epoch": 0.4773852687560415, |
| "grad_norm": 0.12255859375, |
| "learning_rate": 0.00010776492258101651, |
| "loss": 0.0015, |
| "step": 17100 |
| }, |
| { |
| "epoch": 0.47878113211497725, |
| "grad_norm": 6.1875, |
| "learning_rate": 0.00010747711966845104, |
| "loss": 0.0015, |
| "step": 17150 |
| }, |
| { |
| "epoch": 0.48017699547391307, |
| "grad_norm": 3.1875, |
| "learning_rate": 0.00010718931675588557, |
| "loss": 0.0012, |
| "step": 17200 |
| }, |
| { |
| "epoch": 0.48157285883284884, |
| "grad_norm": 6.09375, |
| "learning_rate": 0.0001069015138433201, |
| "loss": 0.001, |
| "step": 17250 |
| }, |
| { |
| "epoch": 0.48296872219178466, |
| "grad_norm": 11.875, |
| "learning_rate": 0.00010661371093075463, |
| "loss": 0.0018, |
| "step": 17300 |
| }, |
| { |
| "epoch": 0.48436458555072043, |
| "grad_norm": 14.9375, |
| "learning_rate": 0.00010632590801818916, |
| "loss": 0.0029, |
| "step": 17350 |
| }, |
| { |
| "epoch": 0.48576044890965625, |
| "grad_norm": 15.75, |
| "learning_rate": 0.00010603810510562366, |
| "loss": 0.0027, |
| "step": 17400 |
| }, |
| { |
| "epoch": 0.487156312268592, |
| "grad_norm": 2.328125, |
| "learning_rate": 0.0001057503021930582, |
| "loss": 0.002, |
| "step": 17450 |
| }, |
| { |
| "epoch": 0.48855217562752784, |
| "grad_norm": 4.875, |
| "learning_rate": 0.00010546249928049272, |
| "loss": 0.0018, |
| "step": 17500 |
| }, |
| { |
| "epoch": 0.48855217562752784, |
| "eval_loss": 8.424516272498295e-05, |
| "eval_mae": 0.00828312523663044, |
| "eval_rmse": 0.009178516454994678, |
| "eval_runtime": 314.273, |
| "eval_samples_per_second": 6.364, |
| "eval_steps_per_second": 6.364, |
| "step": 17500 |
| }, |
| { |
| "epoch": 0.4899480389864636, |
| "grad_norm": 5.65625, |
| "learning_rate": 0.00010517469636792725, |
| "loss": 0.0016, |
| "step": 17550 |
| }, |
| { |
| "epoch": 0.49134390234539943, |
| "grad_norm": 3.375, |
| "learning_rate": 0.00010488689345536178, |
| "loss": 0.0016, |
| "step": 17600 |
| }, |
| { |
| "epoch": 0.4927397657043352, |
| "grad_norm": 9.25, |
| "learning_rate": 0.0001045990905427963, |
| "loss": 0.0006, |
| "step": 17650 |
| }, |
| { |
| "epoch": 0.494135629063271, |
| "grad_norm": 2.6875, |
| "learning_rate": 0.00010431128763023082, |
| "loss": 0.0005, |
| "step": 17700 |
| }, |
| { |
| "epoch": 0.4955314924222068, |
| "grad_norm": 1.9609375, |
| "learning_rate": 0.00010402348471766534, |
| "loss": 0.0015, |
| "step": 17750 |
| }, |
| { |
| "epoch": 0.4969273557811426, |
| "grad_norm": 2.390625, |
| "learning_rate": 0.00010373568180509987, |
| "loss": 0.0013, |
| "step": 17800 |
| }, |
| { |
| "epoch": 0.4983232191400784, |
| "grad_norm": 0.5078125, |
| "learning_rate": 0.0001034478788925344, |
| "loss": 0.0012, |
| "step": 17850 |
| }, |
| { |
| "epoch": 0.49971908249901414, |
| "grad_norm": 13.0625, |
| "learning_rate": 0.00010316007597996893, |
| "loss": 0.0011, |
| "step": 17900 |
| }, |
| { |
| "epoch": 0.5011149458579499, |
| "grad_norm": 1.1953125, |
| "learning_rate": 0.00010287227306740346, |
| "loss": 0.0006, |
| "step": 17950 |
| }, |
| { |
| "epoch": 0.5025108092168857, |
| "grad_norm": 3.578125, |
| "learning_rate": 0.00010258447015483796, |
| "loss": 0.0003, |
| "step": 18000 |
| }, |
| { |
| "epoch": 0.5039066725758216, |
| "grad_norm": 0.1259765625, |
| "learning_rate": 0.00010229666724227249, |
| "loss": 0.0003, |
| "step": 18050 |
| }, |
| { |
| "epoch": 0.5053025359347574, |
| "grad_norm": 5.34375, |
| "learning_rate": 0.00010200886432970703, |
| "loss": 0.0009, |
| "step": 18100 |
| }, |
| { |
| "epoch": 0.5066983992936931, |
| "grad_norm": 0.134765625, |
| "learning_rate": 0.00010172106141714155, |
| "loss": 0.0023, |
| "step": 18150 |
| }, |
| { |
| "epoch": 0.5080942626526289, |
| "grad_norm": 0.703125, |
| "learning_rate": 0.00010143325850457608, |
| "loss": 0.001, |
| "step": 18200 |
| }, |
| { |
| "epoch": 0.5094901260115647, |
| "grad_norm": 1.6015625, |
| "learning_rate": 0.00010114545559201059, |
| "loss": 0.0023, |
| "step": 18250 |
| }, |
| { |
| "epoch": 0.5108859893705006, |
| "grad_norm": 7.0, |
| "learning_rate": 0.00010085765267944512, |
| "loss": 0.001, |
| "step": 18300 |
| }, |
| { |
| "epoch": 0.5122818527294363, |
| "grad_norm": 0.6328125, |
| "learning_rate": 0.00010056984976687964, |
| "loss": 0.0006, |
| "step": 18350 |
| }, |
| { |
| "epoch": 0.5136777160883721, |
| "grad_norm": 3.53125, |
| "learning_rate": 0.00010028204685431417, |
| "loss": 0.0002, |
| "step": 18400 |
| }, |
| { |
| "epoch": 0.5150735794473079, |
| "grad_norm": 0.1396484375, |
| "learning_rate": 9.99942439417487e-05, |
| "loss": 0.0003, |
| "step": 18450 |
| }, |
| { |
| "epoch": 0.5164694428062437, |
| "grad_norm": 1.3203125, |
| "learning_rate": 9.970644102918322e-05, |
| "loss": 0.0003, |
| "step": 18500 |
| }, |
| { |
| "epoch": 0.5178653061651795, |
| "grad_norm": 1.7890625, |
| "learning_rate": 9.941863811661775e-05, |
| "loss": 0.0005, |
| "step": 18550 |
| }, |
| { |
| "epoch": 0.5192611695241153, |
| "grad_norm": 0.66015625, |
| "learning_rate": 9.913083520405228e-05, |
| "loss": 0.0009, |
| "step": 18600 |
| }, |
| { |
| "epoch": 0.5206570328830511, |
| "grad_norm": 0.08349609375, |
| "learning_rate": 9.884303229148679e-05, |
| "loss": 0.0002, |
| "step": 18650 |
| }, |
| { |
| "epoch": 0.5220528962419869, |
| "grad_norm": 4.125, |
| "learning_rate": 9.855522937892131e-05, |
| "loss": 0.0007, |
| "step": 18700 |
| }, |
| { |
| "epoch": 0.5234487596009226, |
| "grad_norm": 5.53125, |
| "learning_rate": 9.826742646635585e-05, |
| "loss": 0.0008, |
| "step": 18750 |
| }, |
| { |
| "epoch": 0.5234487596009226, |
| "eval_loss": 0.00011440851085353643, |
| "eval_mae": 0.010100271552801132, |
| "eval_rmse": 0.01069619134068489, |
| "eval_runtime": 318.3188, |
| "eval_samples_per_second": 6.283, |
| "eval_steps_per_second": 6.283, |
| "step": 18750 |
| }, |
| { |
| "epoch": 0.5248446229598585, |
| "grad_norm": 1.0625, |
| "learning_rate": 9.797962355379037e-05, |
| "loss": 0.0008, |
| "step": 18800 |
| }, |
| { |
| "epoch": 0.5262404863187943, |
| "grad_norm": 0.8984375, |
| "learning_rate": 9.76918206412249e-05, |
| "loss": 0.0007, |
| "step": 18850 |
| }, |
| { |
| "epoch": 0.5276363496777301, |
| "grad_norm": 5.6875, |
| "learning_rate": 9.740401772865942e-05, |
| "loss": 0.0009, |
| "step": 18900 |
| }, |
| { |
| "epoch": 0.5290322130366658, |
| "grad_norm": 8.25, |
| "learning_rate": 9.711621481609395e-05, |
| "loss": 0.0005, |
| "step": 18950 |
| }, |
| { |
| "epoch": 0.5304280763956016, |
| "grad_norm": 2.21875, |
| "learning_rate": 9.682841190352847e-05, |
| "loss": 0.0007, |
| "step": 19000 |
| }, |
| { |
| "epoch": 0.5318239397545375, |
| "grad_norm": 0.73046875, |
| "learning_rate": 9.654060899096299e-05, |
| "loss": 0.0003, |
| "step": 19050 |
| }, |
| { |
| "epoch": 0.5332198031134732, |
| "grad_norm": 5.90625, |
| "learning_rate": 9.625280607839751e-05, |
| "loss": 0.0007, |
| "step": 19100 |
| }, |
| { |
| "epoch": 0.534615666472409, |
| "grad_norm": 1.5859375, |
| "learning_rate": 9.596500316583205e-05, |
| "loss": 0.0006, |
| "step": 19150 |
| }, |
| { |
| "epoch": 0.5360115298313448, |
| "grad_norm": 0.546875, |
| "learning_rate": 9.567720025326656e-05, |
| "loss": 0.0004, |
| "step": 19200 |
| }, |
| { |
| "epoch": 0.5374073931902806, |
| "grad_norm": 2.9375, |
| "learning_rate": 9.538939734070109e-05, |
| "loss": 0.001, |
| "step": 19250 |
| }, |
| { |
| "epoch": 0.5388032565492163, |
| "grad_norm": 9.6875, |
| "learning_rate": 9.510159442813562e-05, |
| "loss": 0.0012, |
| "step": 19300 |
| }, |
| { |
| "epoch": 0.5401991199081522, |
| "grad_norm": 2.328125, |
| "learning_rate": 9.481379151557014e-05, |
| "loss": 0.0005, |
| "step": 19350 |
| }, |
| { |
| "epoch": 0.541594983267088, |
| "grad_norm": 0.038330078125, |
| "learning_rate": 9.452598860300467e-05, |
| "loss": 0.0002, |
| "step": 19400 |
| }, |
| { |
| "epoch": 0.5429908466260238, |
| "grad_norm": 4.71875, |
| "learning_rate": 9.42381856904392e-05, |
| "loss": 0.0011, |
| "step": 19450 |
| }, |
| { |
| "epoch": 0.5443867099849595, |
| "grad_norm": 0.34765625, |
| "learning_rate": 9.395038277787371e-05, |
| "loss": 0.0003, |
| "step": 19500 |
| }, |
| { |
| "epoch": 0.5457825733438953, |
| "grad_norm": 6.46875, |
| "learning_rate": 9.366257986530825e-05, |
| "loss": 0.0011, |
| "step": 19550 |
| }, |
| { |
| "epoch": 0.5471784367028312, |
| "grad_norm": 0.2451171875, |
| "learning_rate": 9.337477695274277e-05, |
| "loss": 0.0018, |
| "step": 19600 |
| }, |
| { |
| "epoch": 0.548574300061767, |
| "grad_norm": 3.90625, |
| "learning_rate": 9.308697404017729e-05, |
| "loss": 0.0008, |
| "step": 19650 |
| }, |
| { |
| "epoch": 0.5499701634207027, |
| "grad_norm": 8.75, |
| "learning_rate": 9.279917112761181e-05, |
| "loss": 0.0008, |
| "step": 19700 |
| }, |
| { |
| "epoch": 0.5513660267796385, |
| "grad_norm": 9.0, |
| "learning_rate": 9.251136821504634e-05, |
| "loss": 0.0019, |
| "step": 19750 |
| }, |
| { |
| "epoch": 0.5527618901385744, |
| "grad_norm": 8.75, |
| "learning_rate": 9.222356530248087e-05, |
| "loss": 0.0019, |
| "step": 19800 |
| }, |
| { |
| "epoch": 0.5541577534975102, |
| "grad_norm": 9.1875, |
| "learning_rate": 9.193576238991539e-05, |
| "loss": 0.0014, |
| "step": 19850 |
| }, |
| { |
| "epoch": 0.5555536168564459, |
| "grad_norm": 0.345703125, |
| "learning_rate": 9.164795947734992e-05, |
| "loss": 0.0018, |
| "step": 19900 |
| }, |
| { |
| "epoch": 0.5569494802153817, |
| "grad_norm": 3.34375, |
| "learning_rate": 9.136015656478443e-05, |
| "loss": 0.0005, |
| "step": 19950 |
| }, |
| { |
| "epoch": 0.5583453435743175, |
| "grad_norm": 1.15625, |
| "learning_rate": 9.107235365221897e-05, |
| "loss": 0.0007, |
| "step": 20000 |
| }, |
| { |
| "epoch": 0.5583453435743175, |
| "eval_loss": 3.620574716478586e-05, |
| "eval_mae": 0.0052015818655490875, |
| "eval_rmse": 0.006017121020704508, |
| "eval_runtime": 318.2368, |
| "eval_samples_per_second": 6.285, |
| "eval_steps_per_second": 6.285, |
| "step": 20000 |
| }, |
| { |
| "epoch": 0.5597412069332534, |
| "grad_norm": 3.375, |
| "learning_rate": 9.078455073965348e-05, |
| "loss": 0.0002, |
| "step": 20050 |
| }, |
| { |
| "epoch": 0.5611370702921891, |
| "grad_norm": 3.6875, |
| "learning_rate": 9.049674782708801e-05, |
| "loss": 0.0003, |
| "step": 20100 |
| }, |
| { |
| "epoch": 0.5625329336511249, |
| "grad_norm": 3.640625, |
| "learning_rate": 9.020894491452254e-05, |
| "loss": 0.0006, |
| "step": 20150 |
| }, |
| { |
| "epoch": 0.5639287970100607, |
| "grad_norm": 2.34375, |
| "learning_rate": 8.992114200195706e-05, |
| "loss": 0.0003, |
| "step": 20200 |
| }, |
| { |
| "epoch": 0.5653246603689965, |
| "grad_norm": 2.171875, |
| "learning_rate": 8.963333908939159e-05, |
| "loss": 0.0004, |
| "step": 20250 |
| }, |
| { |
| "epoch": 0.5667205237279322, |
| "grad_norm": 2.078125, |
| "learning_rate": 8.934553617682612e-05, |
| "loss": 0.0003, |
| "step": 20300 |
| }, |
| { |
| "epoch": 0.5681163870868681, |
| "grad_norm": 2.203125, |
| "learning_rate": 8.905773326426063e-05, |
| "loss": 0.0007, |
| "step": 20350 |
| }, |
| { |
| "epoch": 0.5695122504458039, |
| "grad_norm": 11.9375, |
| "learning_rate": 8.876993035169517e-05, |
| "loss": 0.0008, |
| "step": 20400 |
| }, |
| { |
| "epoch": 0.5709081138047396, |
| "grad_norm": 0.9296875, |
| "learning_rate": 8.84821274391297e-05, |
| "loss": 0.001, |
| "step": 20450 |
| }, |
| { |
| "epoch": 0.5723039771636754, |
| "grad_norm": 7.78125, |
| "learning_rate": 8.819432452656421e-05, |
| "loss": 0.0007, |
| "step": 20500 |
| }, |
| { |
| "epoch": 0.5736998405226112, |
| "grad_norm": 2.40625, |
| "learning_rate": 8.790652161399873e-05, |
| "loss": 0.0003, |
| "step": 20550 |
| }, |
| { |
| "epoch": 0.5750957038815471, |
| "grad_norm": 0.8359375, |
| "learning_rate": 8.761871870143326e-05, |
| "loss": 0.0004, |
| "step": 20600 |
| }, |
| { |
| "epoch": 0.5764915672404828, |
| "grad_norm": 3.40625, |
| "learning_rate": 8.733091578886779e-05, |
| "loss": 0.0005, |
| "step": 20650 |
| }, |
| { |
| "epoch": 0.5778874305994186, |
| "grad_norm": 7.53125, |
| "learning_rate": 8.704311287630231e-05, |
| "loss": 0.0012, |
| "step": 20700 |
| }, |
| { |
| "epoch": 0.5792832939583544, |
| "grad_norm": 2.953125, |
| "learning_rate": 8.675530996373684e-05, |
| "loss": 0.0014, |
| "step": 20750 |
| }, |
| { |
| "epoch": 0.5806791573172903, |
| "grad_norm": 0.2060546875, |
| "learning_rate": 8.646750705117137e-05, |
| "loss": 0.0003, |
| "step": 20800 |
| }, |
| { |
| "epoch": 0.582075020676226, |
| "grad_norm": 5.875, |
| "learning_rate": 8.617970413860589e-05, |
| "loss": 0.0004, |
| "step": 20850 |
| }, |
| { |
| "epoch": 0.5834708840351618, |
| "grad_norm": 2.703125, |
| "learning_rate": 8.589190122604042e-05, |
| "loss": 0.0004, |
| "step": 20900 |
| }, |
| { |
| "epoch": 0.5848667473940976, |
| "grad_norm": 5.34375, |
| "learning_rate": 8.560409831347493e-05, |
| "loss": 0.0004, |
| "step": 20950 |
| }, |
| { |
| "epoch": 0.5862626107530334, |
| "grad_norm": 0.6171875, |
| "learning_rate": 8.531629540090946e-05, |
| "loss": 0.0002, |
| "step": 21000 |
| }, |
| { |
| "epoch": 0.5876584741119691, |
| "grad_norm": 3.703125, |
| "learning_rate": 8.5028492488344e-05, |
| "loss": 0.0003, |
| "step": 21050 |
| }, |
| { |
| "epoch": 0.589054337470905, |
| "grad_norm": 1.890625, |
| "learning_rate": 8.474068957577851e-05, |
| "loss": 0.0002, |
| "step": 21100 |
| }, |
| { |
| "epoch": 0.5904502008298408, |
| "grad_norm": 0.6640625, |
| "learning_rate": 8.445288666321304e-05, |
| "loss": 0.0003, |
| "step": 21150 |
| }, |
| { |
| "epoch": 0.5918460641887766, |
| "grad_norm": 0.031005859375, |
| "learning_rate": 8.416508375064755e-05, |
| "loss": 0.0004, |
| "step": 21200 |
| }, |
| { |
| "epoch": 0.5932419275477123, |
| "grad_norm": 7.21875, |
| "learning_rate": 8.387728083808209e-05, |
| "loss": 0.0005, |
| "step": 21250 |
| }, |
| { |
| "epoch": 0.5932419275477123, |
| "eval_loss": 0.00024837159435264766, |
| "eval_mae": 0.015309196896851063, |
| "eval_rmse": 0.01575980894267559, |
| "eval_runtime": 314.8323, |
| "eval_samples_per_second": 6.353, |
| "eval_steps_per_second": 6.353, |
| "step": 21250 |
| }, |
| { |
| "epoch": 0.5946377909066481, |
| "grad_norm": 0.150390625, |
| "learning_rate": 8.358947792551661e-05, |
| "loss": 0.0006, |
| "step": 21300 |
| }, |
| { |
| "epoch": 0.596033654265584, |
| "grad_norm": 0.287109375, |
| "learning_rate": 8.330167501295113e-05, |
| "loss": 0.0002, |
| "step": 21350 |
| }, |
| { |
| "epoch": 0.5974295176245198, |
| "grad_norm": 4.40625, |
| "learning_rate": 8.301387210038565e-05, |
| "loss": 0.0013, |
| "step": 21400 |
| }, |
| { |
| "epoch": 0.5988253809834555, |
| "grad_norm": 1.8515625, |
| "learning_rate": 8.27260691878202e-05, |
| "loss": 0.0003, |
| "step": 21450 |
| }, |
| { |
| "epoch": 0.6002212443423913, |
| "grad_norm": 7.625, |
| "learning_rate": 8.24382662752547e-05, |
| "loss": 0.0003, |
| "step": 21500 |
| }, |
| { |
| "epoch": 0.6016171077013271, |
| "grad_norm": 0.90625, |
| "learning_rate": 8.215046336268923e-05, |
| "loss": 0.0005, |
| "step": 21550 |
| }, |
| { |
| "epoch": 0.6030129710602629, |
| "grad_norm": 2.0625, |
| "learning_rate": 8.186266045012376e-05, |
| "loss": 0.0006, |
| "step": 21600 |
| }, |
| { |
| "epoch": 0.6044088344191987, |
| "grad_norm": 0.0751953125, |
| "learning_rate": 8.157485753755829e-05, |
| "loss": 0.0002, |
| "step": 21650 |
| }, |
| { |
| "epoch": 0.6058046977781345, |
| "grad_norm": 4.6875, |
| "learning_rate": 8.128705462499281e-05, |
| "loss": 0.0002, |
| "step": 21700 |
| }, |
| { |
| "epoch": 0.6072005611370703, |
| "grad_norm": 0.392578125, |
| "learning_rate": 8.099925171242734e-05, |
| "loss": 0.0005, |
| "step": 21750 |
| }, |
| { |
| "epoch": 0.608596424496006, |
| "grad_norm": 0.609375, |
| "learning_rate": 8.071144879986185e-05, |
| "loss": 0.0003, |
| "step": 21800 |
| }, |
| { |
| "epoch": 0.6099922878549419, |
| "grad_norm": 0.71484375, |
| "learning_rate": 8.042364588729638e-05, |
| "loss": 0.0002, |
| "step": 21850 |
| }, |
| { |
| "epoch": 0.6113881512138777, |
| "grad_norm": 2.296875, |
| "learning_rate": 8.013584297473092e-05, |
| "loss": 0.0002, |
| "step": 21900 |
| }, |
| { |
| "epoch": 0.6127840145728135, |
| "grad_norm": 1.0234375, |
| "learning_rate": 7.984804006216543e-05, |
| "loss": 0.0002, |
| "step": 21950 |
| }, |
| { |
| "epoch": 0.6141798779317492, |
| "grad_norm": 3.59375, |
| "learning_rate": 7.956023714959996e-05, |
| "loss": 0.0006, |
| "step": 22000 |
| }, |
| { |
| "epoch": 0.615575741290685, |
| "grad_norm": 0.345703125, |
| "learning_rate": 7.927243423703448e-05, |
| "loss": 0.0005, |
| "step": 22050 |
| }, |
| { |
| "epoch": 0.6169716046496209, |
| "grad_norm": 0.921875, |
| "learning_rate": 7.898463132446901e-05, |
| "loss": 0.0002, |
| "step": 22100 |
| }, |
| { |
| "epoch": 0.6183674680085567, |
| "grad_norm": 3.40625, |
| "learning_rate": 7.869682841190354e-05, |
| "loss": 0.0003, |
| "step": 22150 |
| }, |
| { |
| "epoch": 0.6197633313674924, |
| "grad_norm": 2.953125, |
| "learning_rate": 7.840902549933806e-05, |
| "loss": 0.0003, |
| "step": 22200 |
| }, |
| { |
| "epoch": 0.6211591947264282, |
| "grad_norm": 2.484375, |
| "learning_rate": 7.812122258677257e-05, |
| "loss": 0.0005, |
| "step": 22250 |
| }, |
| { |
| "epoch": 0.622555058085364, |
| "grad_norm": 2.15625, |
| "learning_rate": 7.783341967420711e-05, |
| "loss": 0.0002, |
| "step": 22300 |
| }, |
| { |
| "epoch": 0.6239509214442999, |
| "grad_norm": 0.734375, |
| "learning_rate": 7.754561676164163e-05, |
| "loss": 0.0002, |
| "step": 22350 |
| }, |
| { |
| "epoch": 0.6253467848032356, |
| "grad_norm": 0.0380859375, |
| "learning_rate": 7.725781384907615e-05, |
| "loss": 0.0002, |
| "step": 22400 |
| }, |
| { |
| "epoch": 0.6267426481621714, |
| "grad_norm": 14.0625, |
| "learning_rate": 7.697001093651068e-05, |
| "loss": 0.0017, |
| "step": 22450 |
| }, |
| { |
| "epoch": 0.6281385115211072, |
| "grad_norm": 8.625, |
| "learning_rate": 7.66822080239452e-05, |
| "loss": 0.0023, |
| "step": 22500 |
| }, |
| { |
| "epoch": 0.6281385115211072, |
| "eval_loss": 6.291436875471845e-05, |
| "eval_mae": 0.00715098949149251, |
| "eval_rmse": 0.00793185830116272, |
| "eval_runtime": 315.2915, |
| "eval_samples_per_second": 6.343, |
| "eval_steps_per_second": 6.343, |
| "step": 22500 |
| }, |
| { |
| "epoch": 0.629534374880043, |
| "grad_norm": 9.0, |
| "learning_rate": 7.639440511137973e-05, |
| "loss": 0.0021, |
| "step": 22550 |
| }, |
| { |
| "epoch": 0.6309302382389788, |
| "grad_norm": 10.875, |
| "learning_rate": 7.610660219881426e-05, |
| "loss": 0.0022, |
| "step": 22600 |
| }, |
| { |
| "epoch": 0.6323261015979146, |
| "grad_norm": 1.9453125, |
| "learning_rate": 7.581879928624877e-05, |
| "loss": 0.002, |
| "step": 22650 |
| }, |
| { |
| "epoch": 0.6337219649568504, |
| "grad_norm": 1.15625, |
| "learning_rate": 7.553099637368331e-05, |
| "loss": 0.0006, |
| "step": 22700 |
| }, |
| { |
| "epoch": 0.6351178283157861, |
| "grad_norm": 2.40625, |
| "learning_rate": 7.524319346111784e-05, |
| "loss": 0.0002, |
| "step": 22750 |
| }, |
| { |
| "epoch": 0.6365136916747219, |
| "grad_norm": 1.203125, |
| "learning_rate": 7.495539054855235e-05, |
| "loss": 0.0003, |
| "step": 22800 |
| }, |
| { |
| "epoch": 0.6379095550336578, |
| "grad_norm": 3.46875, |
| "learning_rate": 7.466758763598688e-05, |
| "loss": 0.0003, |
| "step": 22850 |
| }, |
| { |
| "epoch": 0.6393054183925936, |
| "grad_norm": 0.484375, |
| "learning_rate": 7.43797847234214e-05, |
| "loss": 0.0003, |
| "step": 22900 |
| }, |
| { |
| "epoch": 0.6407012817515293, |
| "grad_norm": 8.0625, |
| "learning_rate": 7.409198181085593e-05, |
| "loss": 0.0005, |
| "step": 22950 |
| }, |
| { |
| "epoch": 0.6420971451104651, |
| "grad_norm": 0.158203125, |
| "learning_rate": 7.380417889829046e-05, |
| "loss": 0.0003, |
| "step": 23000 |
| }, |
| { |
| "epoch": 0.6434930084694009, |
| "grad_norm": 0.1728515625, |
| "learning_rate": 7.351637598572498e-05, |
| "loss": 0.0002, |
| "step": 23050 |
| }, |
| { |
| "epoch": 0.6448888718283368, |
| "grad_norm": 0.4765625, |
| "learning_rate": 7.32285730731595e-05, |
| "loss": 0.0002, |
| "step": 23100 |
| }, |
| { |
| "epoch": 0.6462847351872725, |
| "grad_norm": 4.28125, |
| "learning_rate": 7.294077016059403e-05, |
| "loss": 0.0002, |
| "step": 23150 |
| }, |
| { |
| "epoch": 0.6476805985462083, |
| "grad_norm": 0.0625, |
| "learning_rate": 7.265296724802856e-05, |
| "loss": 0.0009, |
| "step": 23200 |
| }, |
| { |
| "epoch": 0.6490764619051441, |
| "grad_norm": 0.361328125, |
| "learning_rate": 7.236516433546307e-05, |
| "loss": 0.0002, |
| "step": 23250 |
| }, |
| { |
| "epoch": 0.6504723252640799, |
| "grad_norm": 0.21875, |
| "learning_rate": 7.20773614228976e-05, |
| "loss": 0.0001, |
| "step": 23300 |
| }, |
| { |
| "epoch": 0.6518681886230157, |
| "grad_norm": 4.5, |
| "learning_rate": 7.178955851033214e-05, |
| "loss": 0.0003, |
| "step": 23350 |
| }, |
| { |
| "epoch": 0.6532640519819515, |
| "grad_norm": 2.90625, |
| "learning_rate": 7.150175559776665e-05, |
| "loss": 0.0003, |
| "step": 23400 |
| }, |
| { |
| "epoch": 0.6546599153408873, |
| "grad_norm": 0.1669921875, |
| "learning_rate": 7.121395268520118e-05, |
| "loss": 0.0002, |
| "step": 23450 |
| }, |
| { |
| "epoch": 0.6560557786998231, |
| "grad_norm": 3.78125, |
| "learning_rate": 7.092614977263569e-05, |
| "loss": 0.0002, |
| "step": 23500 |
| }, |
| { |
| "epoch": 0.6574516420587588, |
| "grad_norm": 3.234375, |
| "learning_rate": 7.063834686007023e-05, |
| "loss": 0.0003, |
| "step": 23550 |
| }, |
| { |
| "epoch": 0.6588475054176947, |
| "grad_norm": 2.6875, |
| "learning_rate": 7.035054394750476e-05, |
| "loss": 0.0002, |
| "step": 23600 |
| }, |
| { |
| "epoch": 0.6602433687766305, |
| "grad_norm": 0.75, |
| "learning_rate": 7.006274103493927e-05, |
| "loss": 0.0003, |
| "step": 23650 |
| }, |
| { |
| "epoch": 0.6616392321355663, |
| "grad_norm": 0.11865234375, |
| "learning_rate": 6.97749381223738e-05, |
| "loss": 0.0002, |
| "step": 23700 |
| }, |
| { |
| "epoch": 0.663035095494502, |
| "grad_norm": 0.53515625, |
| "learning_rate": 6.948713520980832e-05, |
| "loss": 0.0002, |
| "step": 23750 |
| }, |
| { |
| "epoch": 0.663035095494502, |
| "eval_loss": 1.540686389489565e-05, |
| "eval_mae": 0.0031748104374855757, |
| "eval_rmse": 0.0039251577109098434, |
| "eval_runtime": 315.0516, |
| "eval_samples_per_second": 6.348, |
| "eval_steps_per_second": 6.348, |
| "step": 23750 |
| }, |
| { |
| "epoch": 0.6644309588534378, |
| "grad_norm": 2.734375, |
| "learning_rate": 6.919933229724285e-05, |
| "loss": 0.0002, |
| "step": 23800 |
| }, |
| { |
| "epoch": 0.6658268222123737, |
| "grad_norm": 2.3125, |
| "learning_rate": 6.891152938467738e-05, |
| "loss": 0.0001, |
| "step": 23850 |
| }, |
| { |
| "epoch": 0.6672226855713094, |
| "grad_norm": 0.34765625, |
| "learning_rate": 6.86237264721119e-05, |
| "loss": 0.0001, |
| "step": 23900 |
| }, |
| { |
| "epoch": 0.6686185489302452, |
| "grad_norm": 0.41796875, |
| "learning_rate": 6.833592355954643e-05, |
| "loss": 0.0002, |
| "step": 23950 |
| }, |
| { |
| "epoch": 0.670014412289181, |
| "grad_norm": 4.78125, |
| "learning_rate": 6.804812064698095e-05, |
| "loss": 0.0002, |
| "step": 24000 |
| }, |
| { |
| "epoch": 0.6714102756481168, |
| "grad_norm": 2.96875, |
| "learning_rate": 6.776031773441548e-05, |
| "loss": 0.0003, |
| "step": 24050 |
| }, |
| { |
| "epoch": 0.6728061390070526, |
| "grad_norm": 0.7265625, |
| "learning_rate": 6.747251482185e-05, |
| "loss": 0.0002, |
| "step": 24100 |
| }, |
| { |
| "epoch": 0.6742020023659884, |
| "grad_norm": 1.1328125, |
| "learning_rate": 6.718471190928452e-05, |
| "loss": 0.0002, |
| "step": 24150 |
| }, |
| { |
| "epoch": 0.6755978657249242, |
| "grad_norm": 0.84765625, |
| "learning_rate": 6.689690899671906e-05, |
| "loss": 0.0003, |
| "step": 24200 |
| }, |
| { |
| "epoch": 0.67699372908386, |
| "grad_norm": 0.2421875, |
| "learning_rate": 6.660910608415357e-05, |
| "loss": 0.0002, |
| "step": 24250 |
| }, |
| { |
| "epoch": 0.6783895924427957, |
| "grad_norm": 4.40625, |
| "learning_rate": 6.63213031715881e-05, |
| "loss": 0.0003, |
| "step": 24300 |
| }, |
| { |
| "epoch": 0.6797854558017316, |
| "grad_norm": 0.30078125, |
| "learning_rate": 6.603350025902263e-05, |
| "loss": 0.0002, |
| "step": 24350 |
| }, |
| { |
| "epoch": 0.6811813191606674, |
| "grad_norm": 2.78125, |
| "learning_rate": 6.574569734645715e-05, |
| "loss": 0.0002, |
| "step": 24400 |
| }, |
| { |
| "epoch": 0.6825771825196032, |
| "grad_norm": 0.984375, |
| "learning_rate": 6.545789443389168e-05, |
| "loss": 0.0002, |
| "step": 24450 |
| }, |
| { |
| "epoch": 0.6839730458785389, |
| "grad_norm": 3.3125, |
| "learning_rate": 6.51700915213262e-05, |
| "loss": 0.0006, |
| "step": 24500 |
| }, |
| { |
| "epoch": 0.6853689092374747, |
| "grad_norm": 1.5625, |
| "learning_rate": 6.488228860876072e-05, |
| "loss": 0.0001, |
| "step": 24550 |
| }, |
| { |
| "epoch": 0.6867647725964106, |
| "grad_norm": 2.53125, |
| "learning_rate": 6.459448569619526e-05, |
| "loss": 0.0002, |
| "step": 24600 |
| }, |
| { |
| "epoch": 0.6881606359553464, |
| "grad_norm": 2.65625, |
| "learning_rate": 6.430668278362977e-05, |
| "loss": 0.0002, |
| "step": 24650 |
| }, |
| { |
| "epoch": 0.6895564993142821, |
| "grad_norm": 0.6328125, |
| "learning_rate": 6.40188798710643e-05, |
| "loss": 0.0002, |
| "step": 24700 |
| }, |
| { |
| "epoch": 0.6909523626732179, |
| "grad_norm": 3.015625, |
| "learning_rate": 6.373107695849882e-05, |
| "loss": 0.0004, |
| "step": 24750 |
| }, |
| { |
| "epoch": 0.6923482260321537, |
| "grad_norm": 3.390625, |
| "learning_rate": 6.344327404593335e-05, |
| "loss": 0.0003, |
| "step": 24800 |
| }, |
| { |
| "epoch": 0.6937440893910896, |
| "grad_norm": 0.55078125, |
| "learning_rate": 6.315547113336787e-05, |
| "loss": 0.0003, |
| "step": 24850 |
| }, |
| { |
| "epoch": 0.6951399527500253, |
| "grad_norm": 4.65625, |
| "learning_rate": 6.28676682208024e-05, |
| "loss": 0.0001, |
| "step": 24900 |
| }, |
| { |
| "epoch": 0.6965358161089611, |
| "grad_norm": 3.4375, |
| "learning_rate": 6.257986530823691e-05, |
| "loss": 0.0003, |
| "step": 24950 |
| }, |
| { |
| "epoch": 0.6979316794678969, |
| "grad_norm": 1.1953125, |
| "learning_rate": 6.229206239567144e-05, |
| "loss": 0.0003, |
| "step": 25000 |
| }, |
| { |
| "epoch": 0.6979316794678969, |
| "eval_loss": 9.060095180757344e-06, |
| "eval_mae": 0.002397725125774741, |
| "eval_rmse": 0.003009999170899391, |
| "eval_runtime": 314.8136, |
| "eval_samples_per_second": 6.353, |
| "eval_steps_per_second": 6.353, |
| "step": 25000 |
| }, |
| { |
| "epoch": 0.6993275428268326, |
| "grad_norm": 1.2421875, |
| "learning_rate": 6.200425948310598e-05, |
| "loss": 0.0001, |
| "step": 25050 |
| }, |
| { |
| "epoch": 0.7007234061857684, |
| "grad_norm": 3.671875, |
| "learning_rate": 6.171645657054049e-05, |
| "loss": 0.0002, |
| "step": 25100 |
| }, |
| { |
| "epoch": 0.7021192695447043, |
| "grad_norm": 0.8046875, |
| "learning_rate": 6.142865365797502e-05, |
| "loss": 0.0002, |
| "step": 25150 |
| }, |
| { |
| "epoch": 0.7035151329036401, |
| "grad_norm": 0.2890625, |
| "learning_rate": 6.114085074540955e-05, |
| "loss": 0.0001, |
| "step": 25200 |
| }, |
| { |
| "epoch": 0.7049109962625758, |
| "grad_norm": 0.439453125, |
| "learning_rate": 6.0853047832844065e-05, |
| "loss": 0.0001, |
| "step": 25250 |
| }, |
| { |
| "epoch": 0.7063068596215116, |
| "grad_norm": 1.2265625, |
| "learning_rate": 6.05652449202786e-05, |
| "loss": 0.0001, |
| "step": 25300 |
| }, |
| { |
| "epoch": 0.7077027229804475, |
| "grad_norm": 3.21875, |
| "learning_rate": 6.0277442007713124e-05, |
| "loss": 0.0002, |
| "step": 25350 |
| }, |
| { |
| "epoch": 0.7090985863393833, |
| "grad_norm": 0.1982421875, |
| "learning_rate": 5.9989639095147644e-05, |
| "loss": 0.0003, |
| "step": 25400 |
| }, |
| { |
| "epoch": 0.710494449698319, |
| "grad_norm": 2.625, |
| "learning_rate": 5.970183618258217e-05, |
| "loss": 0.0003, |
| "step": 25450 |
| }, |
| { |
| "epoch": 0.7118903130572548, |
| "grad_norm": 0.193359375, |
| "learning_rate": 5.94140332700167e-05, |
| "loss": 0.0002, |
| "step": 25500 |
| }, |
| { |
| "epoch": 0.7132861764161906, |
| "grad_norm": 3.8125, |
| "learning_rate": 5.9126230357451216e-05, |
| "loss": 0.0002, |
| "step": 25550 |
| }, |
| { |
| "epoch": 0.7146820397751265, |
| "grad_norm": 1.0390625, |
| "learning_rate": 5.883842744488575e-05, |
| "loss": 0.0003, |
| "step": 25600 |
| }, |
| { |
| "epoch": 0.7160779031340622, |
| "grad_norm": 4.28125, |
| "learning_rate": 5.8550624532320275e-05, |
| "loss": 0.0004, |
| "step": 25650 |
| }, |
| { |
| "epoch": 0.717473766492998, |
| "grad_norm": 1.1015625, |
| "learning_rate": 5.8262821619754795e-05, |
| "loss": 0.0002, |
| "step": 25700 |
| }, |
| { |
| "epoch": 0.7188696298519338, |
| "grad_norm": 1.5390625, |
| "learning_rate": 5.797501870718932e-05, |
| "loss": 0.0003, |
| "step": 25750 |
| }, |
| { |
| "epoch": 0.7202654932108696, |
| "grad_norm": 0.9765625, |
| "learning_rate": 5.768721579462384e-05, |
| "loss": 0.0002, |
| "step": 25800 |
| }, |
| { |
| "epoch": 0.7216613565698053, |
| "grad_norm": 1.9140625, |
| "learning_rate": 5.739941288205837e-05, |
| "loss": 0.0001, |
| "step": 25850 |
| }, |
| { |
| "epoch": 0.7230572199287412, |
| "grad_norm": 1.5234375, |
| "learning_rate": 5.711160996949289e-05, |
| "loss": 0.0002, |
| "step": 25900 |
| }, |
| { |
| "epoch": 0.724453083287677, |
| "grad_norm": 2.234375, |
| "learning_rate": 5.682380705692741e-05, |
| "loss": 0.0002, |
| "step": 25950 |
| }, |
| { |
| "epoch": 0.7258489466466128, |
| "grad_norm": 0.392578125, |
| "learning_rate": 5.6536004144361946e-05, |
| "loss": 0.0002, |
| "step": 26000 |
| }, |
| { |
| "epoch": 0.7272448100055485, |
| "grad_norm": 1.546875, |
| "learning_rate": 5.624820123179647e-05, |
| "loss": 0.0002, |
| "step": 26050 |
| }, |
| { |
| "epoch": 0.7286406733644843, |
| "grad_norm": 1.2265625, |
| "learning_rate": 5.596039831923099e-05, |
| "loss": 0.0001, |
| "step": 26100 |
| }, |
| { |
| "epoch": 0.7300365367234202, |
| "grad_norm": 0.8125, |
| "learning_rate": 5.567259540666552e-05, |
| "loss": 0.0002, |
| "step": 26150 |
| }, |
| { |
| "epoch": 0.731432400082356, |
| "grad_norm": 0.265625, |
| "learning_rate": 5.5384792494100044e-05, |
| "loss": 0.0001, |
| "step": 26200 |
| }, |
| { |
| "epoch": 0.7328282634412917, |
| "grad_norm": 0.050537109375, |
| "learning_rate": 5.5096989581534564e-05, |
| "loss": 0.0001, |
| "step": 26250 |
| }, |
| { |
| "epoch": 0.7328282634412917, |
| "eval_loss": 9.440889698453248e-06, |
| "eval_mae": 0.0024416493251919746, |
| "eval_rmse": 0.003072603140026331, |
| "eval_runtime": 310.9978, |
| "eval_samples_per_second": 6.431, |
| "eval_steps_per_second": 6.431, |
| "step": 26250 |
| }, |
| { |
| "epoch": 0.7342241268002275, |
| "grad_norm": 2.734375, |
| "learning_rate": 5.480918666896909e-05, |
| "loss": 0.0002, |
| "step": 26300 |
| }, |
| { |
| "epoch": 0.7356199901591634, |
| "grad_norm": 1.5625, |
| "learning_rate": 5.452138375640362e-05, |
| "loss": 0.0002, |
| "step": 26350 |
| }, |
| { |
| "epoch": 0.7370158535180991, |
| "grad_norm": 3.15625, |
| "learning_rate": 5.4233580843838136e-05, |
| "loss": 0.0001, |
| "step": 26400 |
| }, |
| { |
| "epoch": 0.7384117168770349, |
| "grad_norm": 0.07080078125, |
| "learning_rate": 5.394577793127267e-05, |
| "loss": 0.0002, |
| "step": 26450 |
| }, |
| { |
| "epoch": 0.7398075802359707, |
| "grad_norm": 0.035400390625, |
| "learning_rate": 5.3657975018707195e-05, |
| "loss": 0.0001, |
| "step": 26500 |
| }, |
| { |
| "epoch": 0.7412034435949065, |
| "grad_norm": 0.0390625, |
| "learning_rate": 5.3370172106141715e-05, |
| "loss": 0.0001, |
| "step": 26550 |
| }, |
| { |
| "epoch": 0.7425993069538422, |
| "grad_norm": 2.171875, |
| "learning_rate": 5.308236919357624e-05, |
| "loss": 0.0001, |
| "step": 26600 |
| }, |
| { |
| "epoch": 0.7439951703127781, |
| "grad_norm": 0.796875, |
| "learning_rate": 5.2794566281010774e-05, |
| "loss": 0.0002, |
| "step": 26650 |
| }, |
| { |
| "epoch": 0.7453910336717139, |
| "grad_norm": 2.65625, |
| "learning_rate": 5.250676336844529e-05, |
| "loss": 0.0002, |
| "step": 26700 |
| }, |
| { |
| "epoch": 0.7467868970306497, |
| "grad_norm": 0.6171875, |
| "learning_rate": 5.221896045587982e-05, |
| "loss": 0.0002, |
| "step": 26750 |
| }, |
| { |
| "epoch": 0.7481827603895854, |
| "grad_norm": 1.078125, |
| "learning_rate": 5.1931157543314347e-05, |
| "loss": 0.0001, |
| "step": 26800 |
| }, |
| { |
| "epoch": 0.7495786237485212, |
| "grad_norm": 1.6875, |
| "learning_rate": 5.1643354630748866e-05, |
| "loss": 0.0001, |
| "step": 26850 |
| }, |
| { |
| "epoch": 0.7509744871074571, |
| "grad_norm": 2.640625, |
| "learning_rate": 5.135555171818339e-05, |
| "loss": 0.0001, |
| "step": 26900 |
| }, |
| { |
| "epoch": 0.7523703504663929, |
| "grad_norm": 0.028564453125, |
| "learning_rate": 5.106774880561791e-05, |
| "loss": 0.0002, |
| "step": 26950 |
| }, |
| { |
| "epoch": 0.7537662138253286, |
| "grad_norm": 2.640625, |
| "learning_rate": 5.077994589305244e-05, |
| "loss": 0.0001, |
| "step": 27000 |
| }, |
| { |
| "epoch": 0.7551620771842644, |
| "grad_norm": 0.2412109375, |
| "learning_rate": 5.0492142980486965e-05, |
| "loss": 0.0001, |
| "step": 27050 |
| }, |
| { |
| "epoch": 0.7565579405432002, |
| "grad_norm": 0.0478515625, |
| "learning_rate": 5.0204340067921484e-05, |
| "loss": 0.0002, |
| "step": 27100 |
| }, |
| { |
| "epoch": 0.7579538039021361, |
| "grad_norm": 0.25390625, |
| "learning_rate": 4.991653715535601e-05, |
| "loss": 0.0001, |
| "step": 27150 |
| }, |
| { |
| "epoch": 0.7593496672610718, |
| "grad_norm": 1.703125, |
| "learning_rate": 4.9628734242790544e-05, |
| "loss": 0.0002, |
| "step": 27200 |
| }, |
| { |
| "epoch": 0.7607455306200076, |
| "grad_norm": 1.7578125, |
| "learning_rate": 4.934093133022506e-05, |
| "loss": 0.0002, |
| "step": 27250 |
| }, |
| { |
| "epoch": 0.7621413939789434, |
| "grad_norm": 0.79296875, |
| "learning_rate": 4.905312841765959e-05, |
| "loss": 0.0002, |
| "step": 27300 |
| }, |
| { |
| "epoch": 0.7635372573378792, |
| "grad_norm": 0.369140625, |
| "learning_rate": 4.876532550509411e-05, |
| "loss": 0.0001, |
| "step": 27350 |
| }, |
| { |
| "epoch": 0.764933120696815, |
| "grad_norm": 0.6875, |
| "learning_rate": 4.847752259252864e-05, |
| "loss": 0.0001, |
| "step": 27400 |
| }, |
| { |
| "epoch": 0.7663289840557508, |
| "grad_norm": 0.53125, |
| "learning_rate": 4.818971967996316e-05, |
| "loss": 0.0001, |
| "step": 27450 |
| }, |
| { |
| "epoch": 0.7677248474146866, |
| "grad_norm": 0.240234375, |
| "learning_rate": 4.790191676739769e-05, |
| "loss": 0.0001, |
| "step": 27500 |
| }, |
| { |
| "epoch": 0.7677248474146866, |
| "eval_loss": 1.0368624316470232e-05, |
| "eval_mae": 0.0025949301198124886, |
| "eval_rmse": 0.0032200347632169724, |
| "eval_runtime": 317.1945, |
| "eval_samples_per_second": 6.305, |
| "eval_steps_per_second": 6.305, |
| "step": 27500 |
| }, |
| { |
| "epoch": 0.7691207107736223, |
| "grad_norm": 0.5234375, |
| "learning_rate": 4.7614113854832214e-05, |
| "loss": 0.0001, |
| "step": 27550 |
| }, |
| { |
| "epoch": 0.7705165741325581, |
| "grad_norm": 1.4765625, |
| "learning_rate": 4.732631094226674e-05, |
| "loss": 0.0002, |
| "step": 27600 |
| }, |
| { |
| "epoch": 0.771912437491494, |
| "grad_norm": 0.455078125, |
| "learning_rate": 4.703850802970126e-05, |
| "loss": 0.0002, |
| "step": 27650 |
| }, |
| { |
| "epoch": 0.7733083008504298, |
| "grad_norm": 0.5390625, |
| "learning_rate": 4.675070511713579e-05, |
| "loss": 0.0001, |
| "step": 27700 |
| }, |
| { |
| "epoch": 0.7747041642093655, |
| "grad_norm": 0.96484375, |
| "learning_rate": 4.646290220457031e-05, |
| "loss": 0.0002, |
| "step": 27750 |
| }, |
| { |
| "epoch": 0.7761000275683013, |
| "grad_norm": 0.73046875, |
| "learning_rate": 4.617509929200484e-05, |
| "loss": 0.0001, |
| "step": 27800 |
| }, |
| { |
| "epoch": 0.7774958909272371, |
| "grad_norm": 0.1923828125, |
| "learning_rate": 4.588729637943936e-05, |
| "loss": 0.0001, |
| "step": 27850 |
| }, |
| { |
| "epoch": 0.778891754286173, |
| "grad_norm": 1.25, |
| "learning_rate": 4.559949346687389e-05, |
| "loss": 0.0001, |
| "step": 27900 |
| }, |
| { |
| "epoch": 0.7802876176451087, |
| "grad_norm": 3.453125, |
| "learning_rate": 4.531169055430841e-05, |
| "loss": 0.0001, |
| "step": 27950 |
| }, |
| { |
| "epoch": 0.7816834810040445, |
| "grad_norm": 2.296875, |
| "learning_rate": 4.502388764174294e-05, |
| "loss": 0.0002, |
| "step": 28000 |
| }, |
| { |
| "epoch": 0.7830793443629803, |
| "grad_norm": 3.109375, |
| "learning_rate": 4.4736084729177464e-05, |
| "loss": 0.0001, |
| "step": 28050 |
| }, |
| { |
| "epoch": 0.7844752077219161, |
| "grad_norm": 1.640625, |
| "learning_rate": 4.444828181661198e-05, |
| "loss": 0.0001, |
| "step": 28100 |
| }, |
| { |
| "epoch": 0.7858710710808519, |
| "grad_norm": 0.2001953125, |
| "learning_rate": 4.416047890404651e-05, |
| "loss": 0.0002, |
| "step": 28150 |
| }, |
| { |
| "epoch": 0.7872669344397877, |
| "grad_norm": 2.453125, |
| "learning_rate": 4.3872675991481036e-05, |
| "loss": 0.0002, |
| "step": 28200 |
| }, |
| { |
| "epoch": 0.7886627977987235, |
| "grad_norm": 0.69921875, |
| "learning_rate": 4.358487307891556e-05, |
| "loss": 0.0001, |
| "step": 28250 |
| }, |
| { |
| "epoch": 0.7900586611576593, |
| "grad_norm": 0.734375, |
| "learning_rate": 4.329707016635008e-05, |
| "loss": 0.0001, |
| "step": 28300 |
| }, |
| { |
| "epoch": 0.791454524516595, |
| "grad_norm": 0.02294921875, |
| "learning_rate": 4.3009267253784615e-05, |
| "loss": 0.0001, |
| "step": 28350 |
| }, |
| { |
| "epoch": 0.7928503878755309, |
| "grad_norm": 0.84765625, |
| "learning_rate": 4.2721464341219134e-05, |
| "loss": 0.0001, |
| "step": 28400 |
| }, |
| { |
| "epoch": 0.7942462512344667, |
| "grad_norm": 1.3046875, |
| "learning_rate": 4.243366142865366e-05, |
| "loss": 0.0001, |
| "step": 28450 |
| }, |
| { |
| "epoch": 0.7956421145934025, |
| "grad_norm": 0.625, |
| "learning_rate": 4.214585851608818e-05, |
| "loss": 0.0001, |
| "step": 28500 |
| }, |
| { |
| "epoch": 0.7970379779523382, |
| "grad_norm": 0.19140625, |
| "learning_rate": 4.185805560352271e-05, |
| "loss": 0.0001, |
| "step": 28550 |
| }, |
| { |
| "epoch": 0.798433841311274, |
| "grad_norm": 0.66796875, |
| "learning_rate": 4.157025269095723e-05, |
| "loss": 0.0001, |
| "step": 28600 |
| }, |
| { |
| "epoch": 0.7998297046702099, |
| "grad_norm": 0.390625, |
| "learning_rate": 4.128244977839176e-05, |
| "loss": 0.0002, |
| "step": 28650 |
| }, |
| { |
| "epoch": 0.8012255680291456, |
| "grad_norm": 2.53125, |
| "learning_rate": 4.0994646865826285e-05, |
| "loss": 0.0001, |
| "step": 28700 |
| }, |
| { |
| "epoch": 0.8026214313880814, |
| "grad_norm": 0.54296875, |
| "learning_rate": 4.070684395326081e-05, |
| "loss": 0.0001, |
| "step": 28750 |
| }, |
| { |
| "epoch": 0.8026214313880814, |
| "eval_loss": 8.319076187035535e-06, |
| "eval_mae": 0.002299492945894599, |
| "eval_rmse": 0.0028842808678746223, |
| "eval_runtime": 319.4261, |
| "eval_samples_per_second": 6.261, |
| "eval_steps_per_second": 6.261, |
| "step": 28750 |
| }, |
| { |
| "epoch": 0.8040172947470172, |
| "grad_norm": 1.5703125, |
| "learning_rate": 4.041904104069533e-05, |
| "loss": 0.0001, |
| "step": 28800 |
| }, |
| { |
| "epoch": 0.805413158105953, |
| "grad_norm": 2.203125, |
| "learning_rate": 4.0131238128129864e-05, |
| "loss": 0.0001, |
| "step": 28850 |
| }, |
| { |
| "epoch": 0.8068090214648888, |
| "grad_norm": 3.296875, |
| "learning_rate": 3.9843435215564384e-05, |
| "loss": 0.0001, |
| "step": 28900 |
| }, |
| { |
| "epoch": 0.8082048848238246, |
| "grad_norm": 0.671875, |
| "learning_rate": 3.955563230299891e-05, |
| "loss": 0.0001, |
| "step": 28950 |
| }, |
| { |
| "epoch": 0.8096007481827604, |
| "grad_norm": 1.453125, |
| "learning_rate": 3.926782939043343e-05, |
| "loss": 0.0001, |
| "step": 29000 |
| }, |
| { |
| "epoch": 0.8109966115416962, |
| "grad_norm": 1.0859375, |
| "learning_rate": 3.8980026477867956e-05, |
| "loss": 0.0001, |
| "step": 29050 |
| }, |
| { |
| "epoch": 0.8123924749006319, |
| "grad_norm": 0.89453125, |
| "learning_rate": 3.869222356530248e-05, |
| "loss": 0.0001, |
| "step": 29100 |
| }, |
| { |
| "epoch": 0.8137883382595678, |
| "grad_norm": 1.453125, |
| "learning_rate": 3.840442065273701e-05, |
| "loss": 0.0001, |
| "step": 29150 |
| }, |
| { |
| "epoch": 0.8151842016185036, |
| "grad_norm": 0.51953125, |
| "learning_rate": 3.8116617740171535e-05, |
| "loss": 0.0002, |
| "step": 29200 |
| }, |
| { |
| "epoch": 0.8165800649774394, |
| "grad_norm": 0.85546875, |
| "learning_rate": 3.7828814827606055e-05, |
| "loss": 0.0001, |
| "step": 29250 |
| }, |
| { |
| "epoch": 0.8179759283363751, |
| "grad_norm": 0.33203125, |
| "learning_rate": 3.754101191504058e-05, |
| "loss": 0.0001, |
| "step": 29300 |
| }, |
| { |
| "epoch": 0.8193717916953109, |
| "grad_norm": 0.37109375, |
| "learning_rate": 3.725320900247511e-05, |
| "loss": 0.0001, |
| "step": 29350 |
| }, |
| { |
| "epoch": 0.8207676550542468, |
| "grad_norm": 0.98828125, |
| "learning_rate": 3.6965406089909633e-05, |
| "loss": 0.0001, |
| "step": 29400 |
| }, |
| { |
| "epoch": 0.8221635184131826, |
| "grad_norm": 0.232421875, |
| "learning_rate": 3.667760317734415e-05, |
| "loss": 0.0001, |
| "step": 29450 |
| }, |
| { |
| "epoch": 0.8235593817721183, |
| "grad_norm": 0.89453125, |
| "learning_rate": 3.6389800264778686e-05, |
| "loss": 0.0001, |
| "step": 29500 |
| }, |
| { |
| "epoch": 0.8249552451310541, |
| "grad_norm": 1.0703125, |
| "learning_rate": 3.6101997352213206e-05, |
| "loss": 0.0001, |
| "step": 29550 |
| }, |
| { |
| "epoch": 0.8263511084899899, |
| "grad_norm": 0.47265625, |
| "learning_rate": 3.581419443964773e-05, |
| "loss": 0.0001, |
| "step": 29600 |
| }, |
| { |
| "epoch": 0.8277469718489258, |
| "grad_norm": 0.70703125, |
| "learning_rate": 3.552639152708225e-05, |
| "loss": 0.0001, |
| "step": 29650 |
| }, |
| { |
| "epoch": 0.8291428352078615, |
| "grad_norm": 0.36328125, |
| "learning_rate": 3.5238588614516785e-05, |
| "loss": 0.0001, |
| "step": 29700 |
| }, |
| { |
| "epoch": 0.8305386985667973, |
| "grad_norm": 0.84375, |
| "learning_rate": 3.4950785701951304e-05, |
| "loss": 0.0001, |
| "step": 29750 |
| }, |
| { |
| "epoch": 0.8319345619257331, |
| "grad_norm": 2.5625, |
| "learning_rate": 3.466298278938583e-05, |
| "loss": 0.0001, |
| "step": 29800 |
| }, |
| { |
| "epoch": 0.8333304252846688, |
| "grad_norm": 0.029052734375, |
| "learning_rate": 3.437517987682036e-05, |
| "loss": 0.0001, |
| "step": 29850 |
| }, |
| { |
| "epoch": 0.8347262886436047, |
| "grad_norm": 0.84765625, |
| "learning_rate": 3.408737696425488e-05, |
| "loss": 0.0001, |
| "step": 29900 |
| }, |
| { |
| "epoch": 0.8361221520025405, |
| "grad_norm": 0.5546875, |
| "learning_rate": 3.37995740516894e-05, |
| "loss": 0.0001, |
| "step": 29950 |
| }, |
| { |
| "epoch": 0.8375180153614763, |
| "grad_norm": 0.302734375, |
| "learning_rate": 3.351177113912393e-05, |
| "loss": 0.0001, |
| "step": 30000 |
| }, |
| { |
| "epoch": 0.8375180153614763, |
| "eval_loss": 7.90274134487845e-06, |
| "eval_mae": 0.00223693554289639, |
| "eval_rmse": 0.002811181591823697, |
| "eval_runtime": 359.2158, |
| "eval_samples_per_second": 5.568, |
| "eval_steps_per_second": 5.568, |
| "step": 30000 |
| }, |
| { |
| "epoch": 0.838913878720412, |
| "grad_norm": 0.451171875, |
| "learning_rate": 3.3223968226558455e-05, |
| "loss": 0.0001, |
| "step": 30050 |
| }, |
| { |
| "epoch": 0.8403097420793478, |
| "grad_norm": 0.9921875, |
| "learning_rate": 3.293616531399298e-05, |
| "loss": 0.0001, |
| "step": 30100 |
| }, |
| { |
| "epoch": 0.8417056054382837, |
| "grad_norm": 0.0576171875, |
| "learning_rate": 3.26483624014275e-05, |
| "loss": 0.0001, |
| "step": 30150 |
| }, |
| { |
| "epoch": 0.8431014687972195, |
| "grad_norm": 1.515625, |
| "learning_rate": 3.236055948886203e-05, |
| "loss": 0.0001, |
| "step": 30200 |
| }, |
| { |
| "epoch": 0.8444973321561552, |
| "grad_norm": 0.6953125, |
| "learning_rate": 3.2072756576296554e-05, |
| "loss": 0.0001, |
| "step": 30250 |
| }, |
| { |
| "epoch": 0.845893195515091, |
| "grad_norm": 0.23046875, |
| "learning_rate": 3.178495366373107e-05, |
| "loss": 0.0001, |
| "step": 30300 |
| }, |
| { |
| "epoch": 0.8472890588740268, |
| "grad_norm": 0.55859375, |
| "learning_rate": 3.1497150751165606e-05, |
| "loss": 0.0001, |
| "step": 30350 |
| }, |
| { |
| "epoch": 0.8486849222329627, |
| "grad_norm": 0.11328125, |
| "learning_rate": 3.1209347838600126e-05, |
| "loss": 0.0001, |
| "step": 30400 |
| }, |
| { |
| "epoch": 0.8500807855918984, |
| "grad_norm": 1.0234375, |
| "learning_rate": 3.092154492603465e-05, |
| "loss": 0.0001, |
| "step": 30450 |
| }, |
| { |
| "epoch": 0.8514766489508342, |
| "grad_norm": 0.2099609375, |
| "learning_rate": 3.063374201346918e-05, |
| "loss": 0.0001, |
| "step": 30500 |
| }, |
| { |
| "epoch": 0.85287251230977, |
| "grad_norm": 0.078125, |
| "learning_rate": 3.03459391009037e-05, |
| "loss": 0.0001, |
| "step": 30550 |
| }, |
| { |
| "epoch": 0.8542683756687058, |
| "grad_norm": 1.125, |
| "learning_rate": 3.0058136188338228e-05, |
| "loss": 0.0001, |
| "step": 30600 |
| }, |
| { |
| "epoch": 0.8556642390276415, |
| "grad_norm": 0.70703125, |
| "learning_rate": 2.9770333275772754e-05, |
| "loss": 0.0001, |
| "step": 30650 |
| }, |
| { |
| "epoch": 0.8570601023865774, |
| "grad_norm": 1.5390625, |
| "learning_rate": 2.9482530363207277e-05, |
| "loss": 0.0001, |
| "step": 30700 |
| }, |
| { |
| "epoch": 0.8584559657455132, |
| "grad_norm": 0.1689453125, |
| "learning_rate": 2.91947274506418e-05, |
| "loss": 0.0001, |
| "step": 30750 |
| }, |
| { |
| "epoch": 0.859851829104449, |
| "grad_norm": 0.51171875, |
| "learning_rate": 2.8906924538076323e-05, |
| "loss": 0.0001, |
| "step": 30800 |
| }, |
| { |
| "epoch": 0.8612476924633847, |
| "grad_norm": 0.28515625, |
| "learning_rate": 2.8619121625510852e-05, |
| "loss": 0.0001, |
| "step": 30850 |
| }, |
| { |
| "epoch": 0.8626435558223206, |
| "grad_norm": 0.8671875, |
| "learning_rate": 2.8331318712945375e-05, |
| "loss": 0.0001, |
| "step": 30900 |
| }, |
| { |
| "epoch": 0.8640394191812564, |
| "grad_norm": 1.125, |
| "learning_rate": 2.80435158003799e-05, |
| "loss": 0.0001, |
| "step": 30950 |
| }, |
| { |
| "epoch": 0.8654352825401921, |
| "grad_norm": 2.421875, |
| "learning_rate": 2.7755712887814428e-05, |
| "loss": 0.0001, |
| "step": 31000 |
| }, |
| { |
| "epoch": 0.8668311458991279, |
| "grad_norm": 0.78515625, |
| "learning_rate": 2.746790997524895e-05, |
| "loss": 0.0001, |
| "step": 31050 |
| }, |
| { |
| "epoch": 0.8682270092580637, |
| "grad_norm": 0.083984375, |
| "learning_rate": 2.7180107062683474e-05, |
| "loss": 0.0001, |
| "step": 31100 |
| }, |
| { |
| "epoch": 0.8696228726169996, |
| "grad_norm": 1.890625, |
| "learning_rate": 2.6892304150118004e-05, |
| "loss": 0.0001, |
| "step": 31150 |
| }, |
| { |
| "epoch": 0.8710187359759353, |
| "grad_norm": 0.466796875, |
| "learning_rate": 2.6604501237552526e-05, |
| "loss": 0.0001, |
| "step": 31200 |
| }, |
| { |
| "epoch": 0.8724145993348711, |
| "grad_norm": 1.1015625, |
| "learning_rate": 2.631669832498705e-05, |
| "loss": 0.0001, |
| "step": 31250 |
| }, |
| { |
| "epoch": 0.8724145993348711, |
| "eval_loss": 7.70491715229582e-06, |
| "eval_mae": 0.002213448518887162, |
| "eval_rmse": 0.002775773173198104, |
| "eval_runtime": 314.7817, |
| "eval_samples_per_second": 6.354, |
| "eval_steps_per_second": 6.354, |
| "step": 31250 |
| }, |
| { |
| "epoch": 0.8738104626938069, |
| "grad_norm": 1.734375, |
| "learning_rate": 2.6028895412421572e-05, |
| "loss": 0.0001, |
| "step": 31300 |
| }, |
| { |
| "epoch": 0.8752063260527427, |
| "grad_norm": 0.5625, |
| "learning_rate": 2.5741092499856102e-05, |
| "loss": 0.0001, |
| "step": 31350 |
| }, |
| { |
| "epoch": 0.8766021894116784, |
| "grad_norm": 2.203125, |
| "learning_rate": 2.5453289587290625e-05, |
| "loss": 0.0001, |
| "step": 31400 |
| }, |
| { |
| "epoch": 0.8779980527706143, |
| "grad_norm": 2.921875, |
| "learning_rate": 2.5165486674725148e-05, |
| "loss": 0.0001, |
| "step": 31450 |
| }, |
| { |
| "epoch": 0.8793939161295501, |
| "grad_norm": 0.1865234375, |
| "learning_rate": 2.4877683762159674e-05, |
| "loss": 0.0001, |
| "step": 31500 |
| }, |
| { |
| "epoch": 0.8807897794884859, |
| "grad_norm": 1.171875, |
| "learning_rate": 2.45898808495942e-05, |
| "loss": 0.0001, |
| "step": 31550 |
| }, |
| { |
| "epoch": 0.8821856428474216, |
| "grad_norm": 0.8515625, |
| "learning_rate": 2.4302077937028723e-05, |
| "loss": 0.0001, |
| "step": 31600 |
| }, |
| { |
| "epoch": 0.8835815062063574, |
| "grad_norm": 0.625, |
| "learning_rate": 2.401427502446325e-05, |
| "loss": 0.0001, |
| "step": 31650 |
| }, |
| { |
| "epoch": 0.8849773695652933, |
| "grad_norm": 0.2060546875, |
| "learning_rate": 2.3726472111897773e-05, |
| "loss": 0.0001, |
| "step": 31700 |
| }, |
| { |
| "epoch": 0.8863732329242291, |
| "grad_norm": 1.1484375, |
| "learning_rate": 2.3438669199332296e-05, |
| "loss": 0.0001, |
| "step": 31750 |
| }, |
| { |
| "epoch": 0.8877690962831648, |
| "grad_norm": 0.3359375, |
| "learning_rate": 2.3150866286766822e-05, |
| "loss": 0.0001, |
| "step": 31800 |
| }, |
| { |
| "epoch": 0.8891649596421006, |
| "grad_norm": 1.2890625, |
| "learning_rate": 2.2863063374201348e-05, |
| "loss": 0.0001, |
| "step": 31850 |
| }, |
| { |
| "epoch": 0.8905608230010365, |
| "grad_norm": 0.51953125, |
| "learning_rate": 2.257526046163587e-05, |
| "loss": 0.0001, |
| "step": 31900 |
| }, |
| { |
| "epoch": 0.8919566863599723, |
| "grad_norm": 0.455078125, |
| "learning_rate": 2.2287457549070397e-05, |
| "loss": 0.0001, |
| "step": 31950 |
| }, |
| { |
| "epoch": 0.893352549718908, |
| "grad_norm": 0.3046875, |
| "learning_rate": 2.1999654636504924e-05, |
| "loss": 0.0001, |
| "step": 32000 |
| }, |
| { |
| "epoch": 0.8947484130778438, |
| "grad_norm": 0.146484375, |
| "learning_rate": 2.1711851723939447e-05, |
| "loss": 0.0001, |
| "step": 32050 |
| }, |
| { |
| "epoch": 0.8961442764367796, |
| "grad_norm": 0.166015625, |
| "learning_rate": 2.1424048811373973e-05, |
| "loss": 0.0001, |
| "step": 32100 |
| }, |
| { |
| "epoch": 0.8975401397957155, |
| "grad_norm": 0.58984375, |
| "learning_rate": 2.1136245898808496e-05, |
| "loss": 0.0001, |
| "step": 32150 |
| }, |
| { |
| "epoch": 0.8989360031546512, |
| "grad_norm": 0.326171875, |
| "learning_rate": 2.0848442986243022e-05, |
| "loss": 0.0001, |
| "step": 32200 |
| }, |
| { |
| "epoch": 0.900331866513587, |
| "grad_norm": 0.50390625, |
| "learning_rate": 2.056064007367755e-05, |
| "loss": 0.0001, |
| "step": 32250 |
| }, |
| { |
| "epoch": 0.9017277298725228, |
| "grad_norm": 0.33203125, |
| "learning_rate": 2.027283716111207e-05, |
| "loss": 0.0001, |
| "step": 32300 |
| }, |
| { |
| "epoch": 0.9031235932314585, |
| "grad_norm": 0.9453125, |
| "learning_rate": 1.9985034248546598e-05, |
| "loss": 0.0001, |
| "step": 32350 |
| }, |
| { |
| "epoch": 0.9045194565903943, |
| "grad_norm": 1.0546875, |
| "learning_rate": 1.969723133598112e-05, |
| "loss": 0.0001, |
| "step": 32400 |
| }, |
| { |
| "epoch": 0.9059153199493302, |
| "grad_norm": 2.03125, |
| "learning_rate": 1.9409428423415647e-05, |
| "loss": 0.0001, |
| "step": 32450 |
| }, |
| { |
| "epoch": 0.907311183308266, |
| "grad_norm": 0.1025390625, |
| "learning_rate": 1.9121625510850173e-05, |
| "loss": 0.0001, |
| "step": 32500 |
| }, |
| { |
| "epoch": 0.907311183308266, |
| "eval_loss": 8.019745109777432e-06, |
| "eval_mae": 0.0022684482391923666, |
| "eval_rmse": 0.002831915393471718, |
| "eval_runtime": 314.5488, |
| "eval_samples_per_second": 6.358, |
| "eval_steps_per_second": 6.358, |
| "step": 32500 |
| }, |
| { |
| "epoch": 0.9087070466672017, |
| "grad_norm": 0.248046875, |
| "learning_rate": 1.8833822598284696e-05, |
| "loss": 0.0001, |
| "step": 32550 |
| }, |
| { |
| "epoch": 0.9101029100261375, |
| "grad_norm": 0.28125, |
| "learning_rate": 1.8546019685719223e-05, |
| "loss": 0.0001, |
| "step": 32600 |
| }, |
| { |
| "epoch": 0.9114987733850733, |
| "grad_norm": 0.4296875, |
| "learning_rate": 1.8258216773153745e-05, |
| "loss": 0.0001, |
| "step": 32650 |
| }, |
| { |
| "epoch": 0.9128946367440092, |
| "grad_norm": 1.5234375, |
| "learning_rate": 1.797041386058827e-05, |
| "loss": 0.0001, |
| "step": 32700 |
| }, |
| { |
| "epoch": 0.9142905001029449, |
| "grad_norm": 0.2353515625, |
| "learning_rate": 1.7682610948022795e-05, |
| "loss": 0.0001, |
| "step": 32750 |
| }, |
| { |
| "epoch": 0.9156863634618807, |
| "grad_norm": 0.6328125, |
| "learning_rate": 1.7394808035457318e-05, |
| "loss": 0.0001, |
| "step": 32800 |
| }, |
| { |
| "epoch": 0.9170822268208165, |
| "grad_norm": 0.06591796875, |
| "learning_rate": 1.7107005122891844e-05, |
| "loss": 0.0001, |
| "step": 32850 |
| }, |
| { |
| "epoch": 0.9184780901797523, |
| "grad_norm": 0.177734375, |
| "learning_rate": 1.6819202210326367e-05, |
| "loss": 0.0001, |
| "step": 32900 |
| }, |
| { |
| "epoch": 0.9198739535386881, |
| "grad_norm": 0.234375, |
| "learning_rate": 1.6531399297760893e-05, |
| "loss": 0.0001, |
| "step": 32950 |
| }, |
| { |
| "epoch": 0.9212698168976239, |
| "grad_norm": 0.208984375, |
| "learning_rate": 1.624359638519542e-05, |
| "loss": 0.0001, |
| "step": 33000 |
| }, |
| { |
| "epoch": 0.9226656802565597, |
| "grad_norm": 0.74609375, |
| "learning_rate": 1.5955793472629942e-05, |
| "loss": 0.0001, |
| "step": 33050 |
| }, |
| { |
| "epoch": 0.9240615436154955, |
| "grad_norm": 0.58984375, |
| "learning_rate": 1.566799056006447e-05, |
| "loss": 0.0001, |
| "step": 33100 |
| }, |
| { |
| "epoch": 0.9254574069744312, |
| "grad_norm": 1.203125, |
| "learning_rate": 1.5380187647498995e-05, |
| "loss": 0.0001, |
| "step": 33150 |
| }, |
| { |
| "epoch": 0.9268532703333671, |
| "grad_norm": 0.953125, |
| "learning_rate": 1.5092384734933518e-05, |
| "loss": 0.0001, |
| "step": 33200 |
| }, |
| { |
| "epoch": 0.9282491336923029, |
| "grad_norm": 0.19140625, |
| "learning_rate": 1.4804581822368044e-05, |
| "loss": 0.0001, |
| "step": 33250 |
| }, |
| { |
| "epoch": 0.9296449970512387, |
| "grad_norm": 0.99609375, |
| "learning_rate": 1.4516778909802567e-05, |
| "loss": 0.0001, |
| "step": 33300 |
| }, |
| { |
| "epoch": 0.9310408604101744, |
| "grad_norm": 0.138671875, |
| "learning_rate": 1.4228975997237094e-05, |
| "loss": 0.0001, |
| "step": 33350 |
| }, |
| { |
| "epoch": 0.9324367237691102, |
| "grad_norm": 0.5546875, |
| "learning_rate": 1.3941173084671618e-05, |
| "loss": 0.0001, |
| "step": 33400 |
| }, |
| { |
| "epoch": 0.9338325871280461, |
| "grad_norm": 0.2255859375, |
| "learning_rate": 1.3653370172106141e-05, |
| "loss": 0.0001, |
| "step": 33450 |
| }, |
| { |
| "epoch": 0.9352284504869818, |
| "grad_norm": 0.431640625, |
| "learning_rate": 1.3365567259540667e-05, |
| "loss": 0.0001, |
| "step": 33500 |
| }, |
| { |
| "epoch": 0.9366243138459176, |
| "grad_norm": 1.359375, |
| "learning_rate": 1.307776434697519e-05, |
| "loss": 0.0001, |
| "step": 33550 |
| }, |
| { |
| "epoch": 0.9380201772048534, |
| "grad_norm": 0.66015625, |
| "learning_rate": 1.2789961434409717e-05, |
| "loss": 0.0001, |
| "step": 33600 |
| }, |
| { |
| "epoch": 0.9394160405637892, |
| "grad_norm": 1.609375, |
| "learning_rate": 1.2502158521844243e-05, |
| "loss": 0.0001, |
| "step": 33650 |
| }, |
| { |
| "epoch": 0.940811903922725, |
| "grad_norm": 0.265625, |
| "learning_rate": 1.2214355609278766e-05, |
| "loss": 0.0001, |
| "step": 33700 |
| }, |
| { |
| "epoch": 0.9422077672816608, |
| "grad_norm": 0.1513671875, |
| "learning_rate": 1.1926552696713292e-05, |
| "loss": 0.0001, |
| "step": 33750 |
| }, |
| { |
| "epoch": 0.9422077672816608, |
| "eval_loss": 7.356254627666203e-06, |
| "eval_mae": 0.0021641200874000788, |
| "eval_rmse": 0.002712241606786847, |
| "eval_runtime": 314.5626, |
| "eval_samples_per_second": 6.358, |
| "eval_steps_per_second": 6.358, |
| "step": 33750 |
| }, |
| { |
| "epoch": 0.9436036306405966, |
| "grad_norm": 0.01300048828125, |
| "learning_rate": 1.1638749784147817e-05, |
| "loss": 0.0001, |
| "step": 33800 |
| }, |
| { |
| "epoch": 0.9449994939995324, |
| "grad_norm": 0.056640625, |
| "learning_rate": 1.1350946871582341e-05, |
| "loss": 0.0001, |
| "step": 33850 |
| }, |
| { |
| "epoch": 0.9463953573584681, |
| "grad_norm": 0.70703125, |
| "learning_rate": 1.1063143959016866e-05, |
| "loss": 0.0001, |
| "step": 33900 |
| }, |
| { |
| "epoch": 0.947791220717404, |
| "grad_norm": 0.3515625, |
| "learning_rate": 1.077534104645139e-05, |
| "loss": 0.0001, |
| "step": 33950 |
| }, |
| { |
| "epoch": 0.9491870840763398, |
| "grad_norm": 0.365234375, |
| "learning_rate": 1.0487538133885915e-05, |
| "loss": 0.0001, |
| "step": 34000 |
| }, |
| { |
| "epoch": 0.9505829474352756, |
| "grad_norm": 0.283203125, |
| "learning_rate": 1.019973522132044e-05, |
| "loss": 0.0001, |
| "step": 34050 |
| }, |
| { |
| "epoch": 0.9519788107942113, |
| "grad_norm": 0.61328125, |
| "learning_rate": 9.911932308754965e-06, |
| "loss": 0.0001, |
| "step": 34100 |
| }, |
| { |
| "epoch": 0.9533746741531471, |
| "grad_norm": 0.5546875, |
| "learning_rate": 9.624129396189489e-06, |
| "loss": 0.0001, |
| "step": 34150 |
| }, |
| { |
| "epoch": 0.954770537512083, |
| "grad_norm": 0.400390625, |
| "learning_rate": 9.336326483624015e-06, |
| "loss": 0.0001, |
| "step": 34200 |
| }, |
| { |
| "epoch": 0.9561664008710188, |
| "grad_norm": 0.2119140625, |
| "learning_rate": 9.04852357105854e-06, |
| "loss": 0.0001, |
| "step": 34250 |
| }, |
| { |
| "epoch": 0.9575622642299545, |
| "grad_norm": 0.294921875, |
| "learning_rate": 8.760720658493065e-06, |
| "loss": 0.0001, |
| "step": 34300 |
| }, |
| { |
| "epoch": 0.9589581275888903, |
| "grad_norm": 0.404296875, |
| "learning_rate": 8.47291774592759e-06, |
| "loss": 0.0001, |
| "step": 34350 |
| }, |
| { |
| "epoch": 0.9603539909478261, |
| "grad_norm": 1.03125, |
| "learning_rate": 8.185114833362114e-06, |
| "loss": 0.0001, |
| "step": 34400 |
| }, |
| { |
| "epoch": 0.961749854306762, |
| "grad_norm": 0.1357421875, |
| "learning_rate": 7.897311920796639e-06, |
| "loss": 0.0001, |
| "step": 34450 |
| }, |
| { |
| "epoch": 0.9631457176656977, |
| "grad_norm": 0.341796875, |
| "learning_rate": 7.609509008231164e-06, |
| "loss": 0.0001, |
| "step": 34500 |
| }, |
| { |
| "epoch": 0.9645415810246335, |
| "grad_norm": 0.71875, |
| "learning_rate": 7.321706095665689e-06, |
| "loss": 0.0001, |
| "step": 34550 |
| }, |
| { |
| "epoch": 0.9659374443835693, |
| "grad_norm": 0.1787109375, |
| "learning_rate": 7.033903183100212e-06, |
| "loss": 0.0001, |
| "step": 34600 |
| }, |
| { |
| "epoch": 0.967333307742505, |
| "grad_norm": 0.052734375, |
| "learning_rate": 6.746100270534739e-06, |
| "loss": 0.0001, |
| "step": 34650 |
| }, |
| { |
| "epoch": 0.9687291711014409, |
| "grad_norm": 0.875, |
| "learning_rate": 6.458297357969263e-06, |
| "loss": 0.0001, |
| "step": 34700 |
| }, |
| { |
| "epoch": 0.9701250344603767, |
| "grad_norm": 0.310546875, |
| "learning_rate": 6.170494445403788e-06, |
| "loss": 0.0001, |
| "step": 34750 |
| }, |
| { |
| "epoch": 0.9715208978193125, |
| "grad_norm": 0.453125, |
| "learning_rate": 5.8826915328383125e-06, |
| "loss": 0.0001, |
| "step": 34800 |
| }, |
| { |
| "epoch": 0.9729167611782482, |
| "grad_norm": 0.88671875, |
| "learning_rate": 5.594888620272837e-06, |
| "loss": 0.0001, |
| "step": 34850 |
| }, |
| { |
| "epoch": 0.974312624537184, |
| "grad_norm": 0.032470703125, |
| "learning_rate": 5.307085707707362e-06, |
| "loss": 0.0001, |
| "step": 34900 |
| }, |
| { |
| "epoch": 0.9757084878961199, |
| "grad_norm": 1.5546875, |
| "learning_rate": 5.019282795141887e-06, |
| "loss": 0.0001, |
| "step": 34950 |
| }, |
| { |
| "epoch": 0.9771043512550557, |
| "grad_norm": 1.2578125, |
| "learning_rate": 4.731479882576412e-06, |
| "loss": 0.0001, |
| "step": 35000 |
| }, |
| { |
| "epoch": 0.9771043512550557, |
| "eval_loss": 7.189828011178179e-06, |
| "eval_mae": 0.0021512035746127367, |
| "eval_rmse": 0.0026813854929059744, |
| "eval_runtime": 314.4052, |
| "eval_samples_per_second": 6.361, |
| "eval_steps_per_second": 6.361, |
| "step": 35000 |
| }, |
| { |
| "epoch": 0.9785002146139914, |
| "grad_norm": 0.99609375, |
| "learning_rate": 4.443676970010937e-06, |
| "loss": 0.0001, |
| "step": 35050 |
| }, |
| { |
| "epoch": 0.9798960779729272, |
| "grad_norm": 0.5390625, |
| "learning_rate": 4.155874057445461e-06, |
| "loss": 0.0001, |
| "step": 35100 |
| }, |
| { |
| "epoch": 0.981291941331863, |
| "grad_norm": 0.83203125, |
| "learning_rate": 3.8680711448799866e-06, |
| "loss": 0.0001, |
| "step": 35150 |
| }, |
| { |
| "epoch": 0.9826878046907989, |
| "grad_norm": 0.48046875, |
| "learning_rate": 3.580268232314511e-06, |
| "loss": 0.0001, |
| "step": 35200 |
| }, |
| { |
| "epoch": 0.9840836680497346, |
| "grad_norm": 1.078125, |
| "learning_rate": 3.292465319749036e-06, |
| "loss": 0.0001, |
| "step": 35250 |
| }, |
| { |
| "epoch": 0.9854795314086704, |
| "grad_norm": 0.3828125, |
| "learning_rate": 3.004662407183561e-06, |
| "loss": 0.0001, |
| "step": 35300 |
| }, |
| { |
| "epoch": 0.9868753947676062, |
| "grad_norm": 0.466796875, |
| "learning_rate": 2.716859494618086e-06, |
| "loss": 0.0001, |
| "step": 35350 |
| }, |
| { |
| "epoch": 0.988271258126542, |
| "grad_norm": 0.7734375, |
| "learning_rate": 2.4290565820526105e-06, |
| "loss": 0.0001, |
| "step": 35400 |
| }, |
| { |
| "epoch": 0.9896671214854778, |
| "grad_norm": 1.3359375, |
| "learning_rate": 2.141253669487135e-06, |
| "loss": 0.0001, |
| "step": 35450 |
| }, |
| { |
| "epoch": 0.9910629848444136, |
| "grad_norm": 0.2119140625, |
| "learning_rate": 1.8534507569216602e-06, |
| "loss": 0.0001, |
| "step": 35500 |
| }, |
| { |
| "epoch": 0.9924588482033494, |
| "grad_norm": 0.1728515625, |
| "learning_rate": 1.565647844356185e-06, |
| "loss": 0.0001, |
| "step": 35550 |
| }, |
| { |
| "epoch": 0.9938547115622852, |
| "grad_norm": 0.294921875, |
| "learning_rate": 1.2778449317907098e-06, |
| "loss": 0.0001, |
| "step": 35600 |
| }, |
| { |
| "epoch": 0.9952505749212209, |
| "grad_norm": 0.2734375, |
| "learning_rate": 9.900420192252346e-07, |
| "loss": 0.0001, |
| "step": 35650 |
| }, |
| { |
| "epoch": 0.9966464382801568, |
| "grad_norm": 0.287109375, |
| "learning_rate": 7.022391066597595e-07, |
| "loss": 0.0001, |
| "step": 35700 |
| }, |
| { |
| "epoch": 0.9980423016390926, |
| "grad_norm": 0.98046875, |
| "learning_rate": 4.144361940942842e-07, |
| "loss": 0.0001, |
| "step": 35750 |
| }, |
| { |
| "epoch": 0.9994381649980283, |
| "grad_norm": 0.341796875, |
| "learning_rate": 1.2663328152880908e-07, |
| "loss": 0.0001, |
| "step": 35800 |
| } |
| ], |
| "logging_steps": 50, |
| "max_steps": 35821, |
| "num_input_tokens_seen": 0, |
| "num_train_epochs": 1, |
| "save_steps": 1250, |
| "stateful_callbacks": { |
| "TrainerControl": { |
| "args": { |
| "should_epoch_stop": false, |
| "should_evaluate": false, |
| "should_log": false, |
| "should_save": true, |
| "should_training_stop": true |
| }, |
| "attributes": {} |
| } |
| }, |
| "total_flos": 5.780150075109409e+18, |
| "train_batch_size": 1, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|