PEFT
Safetensors
task-13-google-gemma-2-2b / trainer_state.json
ManyingZ's picture
Upload folder using huggingface_hub
7776ece verified
Raw
History Blame Contribute Delete
272 kB
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 146.4307504575961,
"eval_steps": 500,
"global_step": 30000,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.09762050030506407,
"grad_norm": 1.6181267499923706,
"learning_rate": 4e-05,
"loss": 2.5822,
"step": 20
},
{
"epoch": 0.19524100061012814,
"grad_norm": 1.536891222000122,
"learning_rate": 8e-05,
"loss": 2.2602,
"step": 40
},
{
"epoch": 0.2928615009151922,
"grad_norm": 1.1685199737548828,
"learning_rate": 0.00012,
"loss": 1.7209,
"step": 60
},
{
"epoch": 0.3904820012202563,
"grad_norm": 1.5937044620513916,
"learning_rate": 0.00016,
"loss": 1.6235,
"step": 80
},
{
"epoch": 0.4881025015253203,
"grad_norm": 2.1078925132751465,
"learning_rate": 0.0002,
"loss": 1.5259,
"step": 100
},
{
"epoch": 0.5857230018303844,
"grad_norm": 2.369110345840454,
"learning_rate": 0.0001998688524590164,
"loss": 1.5145,
"step": 120
},
{
"epoch": 0.6833435021354485,
"grad_norm": 2.531956434249878,
"learning_rate": 0.0001997377049180328,
"loss": 1.4408,
"step": 140
},
{
"epoch": 0.7809640024405126,
"grad_norm": 2.254030466079712,
"learning_rate": 0.00019960655737704918,
"loss": 1.3881,
"step": 160
},
{
"epoch": 0.8785845027455765,
"grad_norm": 1.8673783540725708,
"learning_rate": 0.0001994754098360656,
"loss": 1.3656,
"step": 180
},
{
"epoch": 0.9762050030506406,
"grad_norm": 2.6715195178985596,
"learning_rate": 0.00019934426229508198,
"loss": 1.2812,
"step": 200
},
{
"epoch": 1.0738255033557047,
"grad_norm": 1.7909507751464844,
"learning_rate": 0.00019921311475409837,
"loss": 1.3006,
"step": 220
},
{
"epoch": 1.1714460036607688,
"grad_norm": 2.6789026260375977,
"learning_rate": 0.00019908196721311476,
"loss": 1.2214,
"step": 240
},
{
"epoch": 1.2690665039658329,
"grad_norm": 2.88253116607666,
"learning_rate": 0.00019895081967213115,
"loss": 1.2806,
"step": 260
},
{
"epoch": 1.366687004270897,
"grad_norm": 1.9062846899032593,
"learning_rate": 0.00019881967213114757,
"loss": 1.2342,
"step": 280
},
{
"epoch": 1.4643075045759608,
"grad_norm": 2.622042179107666,
"learning_rate": 0.00019868852459016393,
"loss": 1.1657,
"step": 300
},
{
"epoch": 1.561928004881025,
"grad_norm": 2.6899402141571045,
"learning_rate": 0.00019855737704918035,
"loss": 1.1497,
"step": 320
},
{
"epoch": 1.659548505186089,
"grad_norm": 3.025324821472168,
"learning_rate": 0.00019842622950819674,
"loss": 1.0834,
"step": 340
},
{
"epoch": 1.757169005491153,
"grad_norm": 2.729680061340332,
"learning_rate": 0.00019829508196721313,
"loss": 1.1031,
"step": 360
},
{
"epoch": 1.8547895057962172,
"grad_norm": 3.1579582691192627,
"learning_rate": 0.00019816393442622951,
"loss": 1.1726,
"step": 380
},
{
"epoch": 1.9524100061012812,
"grad_norm": 2.698084592819214,
"learning_rate": 0.0001980327868852459,
"loss": 1.1241,
"step": 400
},
{
"epoch": 2.0500305064063453,
"grad_norm": 2.4257960319519043,
"learning_rate": 0.00019790163934426232,
"loss": 1.0619,
"step": 420
},
{
"epoch": 2.1476510067114094,
"grad_norm": 3.122441291809082,
"learning_rate": 0.00019777049180327868,
"loss": 1.0413,
"step": 440
},
{
"epoch": 2.2452715070164735,
"grad_norm": 3.2882649898529053,
"learning_rate": 0.0001976393442622951,
"loss": 0.9949,
"step": 460
},
{
"epoch": 2.3428920073215376,
"grad_norm": 2.3513290882110596,
"learning_rate": 0.0001975081967213115,
"loss": 1.0627,
"step": 480
},
{
"epoch": 2.4405125076266017,
"grad_norm": 2.9709153175354004,
"learning_rate": 0.00019737704918032788,
"loss": 1.0612,
"step": 500
},
{
"epoch": 2.5381330079316657,
"grad_norm": 4.2656569480896,
"learning_rate": 0.00019724590163934427,
"loss": 0.9615,
"step": 520
},
{
"epoch": 2.63575350823673,
"grad_norm": 2.7959163188934326,
"learning_rate": 0.00019711475409836066,
"loss": 0.9915,
"step": 540
},
{
"epoch": 2.733374008541794,
"grad_norm": 2.7856175899505615,
"learning_rate": 0.00019698360655737707,
"loss": 1.0299,
"step": 560
},
{
"epoch": 2.830994508846858,
"grad_norm": 3.0926871299743652,
"learning_rate": 0.00019685245901639344,
"loss": 0.9642,
"step": 580
},
{
"epoch": 2.9286150091519216,
"grad_norm": 4.457515239715576,
"learning_rate": 0.00019672131147540985,
"loss": 0.9922,
"step": 600
},
{
"epoch": 3.026235509456986,
"grad_norm": 3.0343308448791504,
"learning_rate": 0.00019659016393442624,
"loss": 0.9777,
"step": 620
},
{
"epoch": 3.1238560097620502,
"grad_norm": 3.12493896484375,
"learning_rate": 0.00019645901639344263,
"loss": 0.8765,
"step": 640
},
{
"epoch": 3.221476510067114,
"grad_norm": 3.1015610694885254,
"learning_rate": 0.00019632786885245902,
"loss": 0.8882,
"step": 660
},
{
"epoch": 3.319097010372178,
"grad_norm": 3.334591865539551,
"learning_rate": 0.0001961967213114754,
"loss": 0.8958,
"step": 680
},
{
"epoch": 3.416717510677242,
"grad_norm": 3.3690192699432373,
"learning_rate": 0.00019606557377049183,
"loss": 0.954,
"step": 700
},
{
"epoch": 3.514338010982306,
"grad_norm": 3.070910930633545,
"learning_rate": 0.00019593442622950822,
"loss": 0.8964,
"step": 720
},
{
"epoch": 3.61195851128737,
"grad_norm": 3.369615077972412,
"learning_rate": 0.0001958032786885246,
"loss": 0.8758,
"step": 740
},
{
"epoch": 3.7095790115924343,
"grad_norm": 2.3834476470947266,
"learning_rate": 0.000195672131147541,
"loss": 0.9307,
"step": 760
},
{
"epoch": 3.8071995118974984,
"grad_norm": 2.924922466278076,
"learning_rate": 0.00019554098360655738,
"loss": 0.8911,
"step": 780
},
{
"epoch": 3.9048200122025625,
"grad_norm": 3.321655035018921,
"learning_rate": 0.00019540983606557377,
"loss": 0.86,
"step": 800
},
{
"epoch": 4.002440512507627,
"grad_norm": 4.9272074699401855,
"learning_rate": 0.00019527868852459016,
"loss": 0.9365,
"step": 820
},
{
"epoch": 4.100061012812691,
"grad_norm": 3.4161181449890137,
"learning_rate": 0.00019514754098360658,
"loss": 0.804,
"step": 840
},
{
"epoch": 4.197681513117755,
"grad_norm": 2.86344051361084,
"learning_rate": 0.00019501639344262297,
"loss": 0.8029,
"step": 860
},
{
"epoch": 4.295302013422819,
"grad_norm": 3.9372735023498535,
"learning_rate": 0.00019488524590163936,
"loss": 0.7969,
"step": 880
},
{
"epoch": 4.392922513727883,
"grad_norm": 3.2105510234832764,
"learning_rate": 0.00019475409836065575,
"loss": 0.7838,
"step": 900
},
{
"epoch": 4.490543014032947,
"grad_norm": 3.1704697608947754,
"learning_rate": 0.00019462295081967214,
"loss": 0.8246,
"step": 920
},
{
"epoch": 4.588163514338011,
"grad_norm": 2.9047462940216064,
"learning_rate": 0.00019449180327868855,
"loss": 0.79,
"step": 940
},
{
"epoch": 4.685784014643075,
"grad_norm": 3.0593173503875732,
"learning_rate": 0.00019436065573770491,
"loss": 0.8105,
"step": 960
},
{
"epoch": 4.783404514948139,
"grad_norm": 3.4904775619506836,
"learning_rate": 0.00019422950819672133,
"loss": 0.864,
"step": 980
},
{
"epoch": 4.881025015253203,
"grad_norm": 3.080754280090332,
"learning_rate": 0.00019409836065573772,
"loss": 0.8163,
"step": 1000
},
{
"epoch": 4.978645515558267,
"grad_norm": 3.663107395172119,
"learning_rate": 0.0001939672131147541,
"loss": 0.8192,
"step": 1020
},
{
"epoch": 5.0762660158633315,
"grad_norm": 3.986875534057617,
"learning_rate": 0.0001938360655737705,
"loss": 0.7943,
"step": 1040
},
{
"epoch": 5.173886516168396,
"grad_norm": 3.140963554382324,
"learning_rate": 0.0001937049180327869,
"loss": 0.7274,
"step": 1060
},
{
"epoch": 5.27150701647346,
"grad_norm": 4.659877300262451,
"learning_rate": 0.0001935737704918033,
"loss": 0.7494,
"step": 1080
},
{
"epoch": 5.369127516778524,
"grad_norm": 4.54330587387085,
"learning_rate": 0.00019344262295081967,
"loss": 0.6986,
"step": 1100
},
{
"epoch": 5.466748017083588,
"grad_norm": 3.893850088119507,
"learning_rate": 0.00019331147540983608,
"loss": 0.7555,
"step": 1120
},
{
"epoch": 5.564368517388652,
"grad_norm": 3.24857759475708,
"learning_rate": 0.00019318032786885247,
"loss": 0.721,
"step": 1140
},
{
"epoch": 5.661989017693716,
"grad_norm": 4.82593297958374,
"learning_rate": 0.00019304918032786886,
"loss": 0.7523,
"step": 1160
},
{
"epoch": 5.75960951799878,
"grad_norm": 3.055907726287842,
"learning_rate": 0.00019291803278688525,
"loss": 0.7719,
"step": 1180
},
{
"epoch": 5.857230018303844,
"grad_norm": 3.192716121673584,
"learning_rate": 0.00019278688524590164,
"loss": 0.7732,
"step": 1200
},
{
"epoch": 5.954850518608908,
"grad_norm": 3.196037769317627,
"learning_rate": 0.00019265573770491806,
"loss": 0.745,
"step": 1220
},
{
"epoch": 6.052471018913972,
"grad_norm": 3.7806878089904785,
"learning_rate": 0.00019252459016393442,
"loss": 0.713,
"step": 1240
},
{
"epoch": 6.150091519219036,
"grad_norm": 3.6102254390716553,
"learning_rate": 0.00019239344262295084,
"loss": 0.6063,
"step": 1260
},
{
"epoch": 6.2477120195241005,
"grad_norm": 4.120365142822266,
"learning_rate": 0.00019226229508196723,
"loss": 0.6771,
"step": 1280
},
{
"epoch": 6.345332519829164,
"grad_norm": 3.118666887283325,
"learning_rate": 0.00019213114754098362,
"loss": 0.7173,
"step": 1300
},
{
"epoch": 6.442953020134228,
"grad_norm": 3.8055977821350098,
"learning_rate": 0.000192,
"loss": 0.6609,
"step": 1320
},
{
"epoch": 6.540573520439292,
"grad_norm": 3.4207704067230225,
"learning_rate": 0.0001918688524590164,
"loss": 0.7084,
"step": 1340
},
{
"epoch": 6.638194020744356,
"grad_norm": 3.812415838241577,
"learning_rate": 0.0001917377049180328,
"loss": 0.744,
"step": 1360
},
{
"epoch": 6.73581452104942,
"grad_norm": 4.302225112915039,
"learning_rate": 0.00019160655737704917,
"loss": 0.667,
"step": 1380
},
{
"epoch": 6.833435021354484,
"grad_norm": 3.3958733081817627,
"learning_rate": 0.0001914754098360656,
"loss": 0.6769,
"step": 1400
},
{
"epoch": 6.931055521659548,
"grad_norm": 3.6200642585754395,
"learning_rate": 0.00019134426229508198,
"loss": 0.7494,
"step": 1420
},
{
"epoch": 7.028676021964612,
"grad_norm": 3.2580292224884033,
"learning_rate": 0.00019121311475409837,
"loss": 0.6252,
"step": 1440
},
{
"epoch": 7.126296522269676,
"grad_norm": 3.581437826156616,
"learning_rate": 0.00019108196721311476,
"loss": 0.6072,
"step": 1460
},
{
"epoch": 7.22391702257474,
"grad_norm": 3.7347512245178223,
"learning_rate": 0.00019095081967213115,
"loss": 0.5715,
"step": 1480
},
{
"epoch": 7.3215375228798045,
"grad_norm": 3.564328193664551,
"learning_rate": 0.00019081967213114756,
"loss": 0.5937,
"step": 1500
},
{
"epoch": 7.419158023184869,
"grad_norm": 3.8675971031188965,
"learning_rate": 0.00019068852459016395,
"loss": 0.6441,
"step": 1520
},
{
"epoch": 7.516778523489933,
"grad_norm": 3.7394795417785645,
"learning_rate": 0.00019055737704918034,
"loss": 0.6345,
"step": 1540
},
{
"epoch": 7.614399023794997,
"grad_norm": 3.7391791343688965,
"learning_rate": 0.00019042622950819673,
"loss": 0.6334,
"step": 1560
},
{
"epoch": 7.712019524100061,
"grad_norm": 3.2816712856292725,
"learning_rate": 0.00019029508196721312,
"loss": 0.6448,
"step": 1580
},
{
"epoch": 7.809640024405125,
"grad_norm": 3.741111993789673,
"learning_rate": 0.0001901639344262295,
"loss": 0.6605,
"step": 1600
},
{
"epoch": 7.907260524710189,
"grad_norm": 3.5533151626586914,
"learning_rate": 0.0001900327868852459,
"loss": 0.6434,
"step": 1620
},
{
"epoch": 8.004881025015253,
"grad_norm": 3.243546724319458,
"learning_rate": 0.00018990163934426232,
"loss": 0.6923,
"step": 1640
},
{
"epoch": 8.102501525320317,
"grad_norm": 3.860666513442993,
"learning_rate": 0.0001897704918032787,
"loss": 0.5619,
"step": 1660
},
{
"epoch": 8.200122025625381,
"grad_norm": 3.4826905727386475,
"learning_rate": 0.0001896393442622951,
"loss": 0.5446,
"step": 1680
},
{
"epoch": 8.297742525930445,
"grad_norm": 5.119688034057617,
"learning_rate": 0.00018950819672131148,
"loss": 0.5525,
"step": 1700
},
{
"epoch": 8.39536302623551,
"grad_norm": 3.46353816986084,
"learning_rate": 0.00018937704918032787,
"loss": 0.5346,
"step": 1720
},
{
"epoch": 8.492983526540574,
"grad_norm": 4.458425045013428,
"learning_rate": 0.0001892459016393443,
"loss": 0.6127,
"step": 1740
},
{
"epoch": 8.590604026845638,
"grad_norm": 3.592191457748413,
"learning_rate": 0.00018911475409836065,
"loss": 0.5721,
"step": 1760
},
{
"epoch": 8.688224527150702,
"grad_norm": 4.131028652191162,
"learning_rate": 0.00018898360655737707,
"loss": 0.5799,
"step": 1780
},
{
"epoch": 8.785845027455766,
"grad_norm": 3.6534175872802734,
"learning_rate": 0.00018885245901639346,
"loss": 0.6198,
"step": 1800
},
{
"epoch": 8.88346552776083,
"grad_norm": 3.0888559818267822,
"learning_rate": 0.00018872131147540985,
"loss": 0.6309,
"step": 1820
},
{
"epoch": 8.981086028065894,
"grad_norm": 2.7711477279663086,
"learning_rate": 0.00018859016393442624,
"loss": 0.601,
"step": 1840
},
{
"epoch": 9.078706528370958,
"grad_norm": 2.7783164978027344,
"learning_rate": 0.00018845901639344263,
"loss": 0.5696,
"step": 1860
},
{
"epoch": 9.176327028676022,
"grad_norm": 3.6661510467529297,
"learning_rate": 0.00018832786885245904,
"loss": 0.5214,
"step": 1880
},
{
"epoch": 9.273947528981086,
"grad_norm": 3.4614779949188232,
"learning_rate": 0.0001881967213114754,
"loss": 0.5181,
"step": 1900
},
{
"epoch": 9.37156802928615,
"grad_norm": 3.470071315765381,
"learning_rate": 0.00018806557377049182,
"loss": 0.5196,
"step": 1920
},
{
"epoch": 9.469188529591214,
"grad_norm": 3.686056613922119,
"learning_rate": 0.0001879344262295082,
"loss": 0.5432,
"step": 1940
},
{
"epoch": 9.566809029896278,
"grad_norm": 3.4668374061584473,
"learning_rate": 0.0001878032786885246,
"loss": 0.5502,
"step": 1960
},
{
"epoch": 9.664429530201343,
"grad_norm": 4.547699928283691,
"learning_rate": 0.000187672131147541,
"loss": 0.5482,
"step": 1980
},
{
"epoch": 9.762050030506407,
"grad_norm": 3.784132957458496,
"learning_rate": 0.00018754098360655738,
"loss": 0.516,
"step": 2000
},
{
"epoch": 9.85967053081147,
"grad_norm": 4.160193920135498,
"learning_rate": 0.0001874098360655738,
"loss": 0.5658,
"step": 2020
},
{
"epoch": 9.957291031116535,
"grad_norm": 4.115555286407471,
"learning_rate": 0.00018727868852459016,
"loss": 0.5775,
"step": 2040
},
{
"epoch": 10.054911531421599,
"grad_norm": 3.361625909805298,
"learning_rate": 0.00018714754098360657,
"loss": 0.5105,
"step": 2060
},
{
"epoch": 10.152532031726663,
"grad_norm": 3.663980484008789,
"learning_rate": 0.00018701639344262296,
"loss": 0.4556,
"step": 2080
},
{
"epoch": 10.250152532031727,
"grad_norm": 3.6515650749206543,
"learning_rate": 0.00018688524590163935,
"loss": 0.4937,
"step": 2100
},
{
"epoch": 10.347773032336791,
"grad_norm": 3.2449493408203125,
"learning_rate": 0.00018675409836065574,
"loss": 0.481,
"step": 2120
},
{
"epoch": 10.445393532641855,
"grad_norm": 4.262176513671875,
"learning_rate": 0.00018662295081967213,
"loss": 0.4559,
"step": 2140
},
{
"epoch": 10.54301403294692,
"grad_norm": 3.885936737060547,
"learning_rate": 0.00018649180327868855,
"loss": 0.5263,
"step": 2160
},
{
"epoch": 10.640634533251983,
"grad_norm": 3.676922559738159,
"learning_rate": 0.00018636065573770494,
"loss": 0.4942,
"step": 2180
},
{
"epoch": 10.738255033557047,
"grad_norm": 4.31233024597168,
"learning_rate": 0.00018622950819672133,
"loss": 0.5339,
"step": 2200
},
{
"epoch": 10.835875533862112,
"grad_norm": 3.410269260406494,
"learning_rate": 0.00018609836065573772,
"loss": 0.5365,
"step": 2220
},
{
"epoch": 10.933496034167176,
"grad_norm": 3.114283800125122,
"learning_rate": 0.0001859672131147541,
"loss": 0.5252,
"step": 2240
},
{
"epoch": 11.03111653447224,
"grad_norm": 3.4641740322113037,
"learning_rate": 0.0001858360655737705,
"loss": 0.4993,
"step": 2260
},
{
"epoch": 11.128737034777304,
"grad_norm": 5.0364203453063965,
"learning_rate": 0.00018570491803278688,
"loss": 0.4097,
"step": 2280
},
{
"epoch": 11.226357535082368,
"grad_norm": 3.811703681945801,
"learning_rate": 0.0001855737704918033,
"loss": 0.4265,
"step": 2300
},
{
"epoch": 11.323978035387432,
"grad_norm": 3.528463125228882,
"learning_rate": 0.0001854426229508197,
"loss": 0.4542,
"step": 2320
},
{
"epoch": 11.421598535692496,
"grad_norm": 3.013249158859253,
"learning_rate": 0.00018531147540983608,
"loss": 0.4824,
"step": 2340
},
{
"epoch": 11.51921903599756,
"grad_norm": 3.7065834999084473,
"learning_rate": 0.00018518032786885247,
"loss": 0.4724,
"step": 2360
},
{
"epoch": 11.616839536302624,
"grad_norm": 3.271639347076416,
"learning_rate": 0.00018504918032786886,
"loss": 0.4731,
"step": 2380
},
{
"epoch": 11.714460036607688,
"grad_norm": 3.738567590713501,
"learning_rate": 0.00018491803278688527,
"loss": 0.5024,
"step": 2400
},
{
"epoch": 11.812080536912752,
"grad_norm": 3.5277130603790283,
"learning_rate": 0.00018478688524590164,
"loss": 0.4552,
"step": 2420
},
{
"epoch": 11.909701037217816,
"grad_norm": 3.909186840057373,
"learning_rate": 0.00018465573770491805,
"loss": 0.4454,
"step": 2440
},
{
"epoch": 12.00732153752288,
"grad_norm": 3.629971981048584,
"learning_rate": 0.00018452459016393444,
"loss": 0.4928,
"step": 2460
},
{
"epoch": 12.104942037827945,
"grad_norm": 4.8608245849609375,
"learning_rate": 0.0001843934426229508,
"loss": 0.395,
"step": 2480
},
{
"epoch": 12.202562538133009,
"grad_norm": 3.377361536026001,
"learning_rate": 0.00018426229508196722,
"loss": 0.4124,
"step": 2500
},
{
"epoch": 12.300183038438073,
"grad_norm": 3.1160948276519775,
"learning_rate": 0.0001841311475409836,
"loss": 0.4317,
"step": 2520
},
{
"epoch": 12.397803538743137,
"grad_norm": 4.747882843017578,
"learning_rate": 0.00018400000000000003,
"loss": 0.4054,
"step": 2540
},
{
"epoch": 12.495424039048201,
"grad_norm": 3.0192835330963135,
"learning_rate": 0.0001838688524590164,
"loss": 0.4384,
"step": 2560
},
{
"epoch": 12.593044539353265,
"grad_norm": 3.397606372833252,
"learning_rate": 0.0001837377049180328,
"loss": 0.4095,
"step": 2580
},
{
"epoch": 12.690665039658327,
"grad_norm": 3.571641445159912,
"learning_rate": 0.0001836065573770492,
"loss": 0.4526,
"step": 2600
},
{
"epoch": 12.788285539963393,
"grad_norm": 3.9279720783233643,
"learning_rate": 0.00018347540983606558,
"loss": 0.4447,
"step": 2620
},
{
"epoch": 12.885906040268456,
"grad_norm": 5.057145118713379,
"learning_rate": 0.00018334426229508197,
"loss": 0.4285,
"step": 2640
},
{
"epoch": 12.98352654057352,
"grad_norm": 4.515413761138916,
"learning_rate": 0.00018321311475409836,
"loss": 0.4614,
"step": 2660
},
{
"epoch": 13.081147040878584,
"grad_norm": 5.738150119781494,
"learning_rate": 0.00018308196721311478,
"loss": 0.3594,
"step": 2680
},
{
"epoch": 13.178767541183648,
"grad_norm": 4.123675346374512,
"learning_rate": 0.00018295081967213114,
"loss": 0.3604,
"step": 2700
},
{
"epoch": 13.276388041488712,
"grad_norm": 4.020540237426758,
"learning_rate": 0.00018281967213114756,
"loss": 0.3455,
"step": 2720
},
{
"epoch": 13.374008541793776,
"grad_norm": 3.946470260620117,
"learning_rate": 0.00018268852459016395,
"loss": 0.3936,
"step": 2740
},
{
"epoch": 13.47162904209884,
"grad_norm": 3.4082658290863037,
"learning_rate": 0.00018255737704918034,
"loss": 0.3853,
"step": 2760
},
{
"epoch": 13.569249542403904,
"grad_norm": 3.8602256774902344,
"learning_rate": 0.00018242622950819673,
"loss": 0.4138,
"step": 2780
},
{
"epoch": 13.666870042708968,
"grad_norm": 3.4819118976593018,
"learning_rate": 0.00018229508196721312,
"loss": 0.4235,
"step": 2800
},
{
"epoch": 13.764490543014032,
"grad_norm": 4.620802402496338,
"learning_rate": 0.00018216393442622953,
"loss": 0.3917,
"step": 2820
},
{
"epoch": 13.862111043319096,
"grad_norm": 4.1257500648498535,
"learning_rate": 0.00018203278688524592,
"loss": 0.4289,
"step": 2840
},
{
"epoch": 13.95973154362416,
"grad_norm": 4.032405376434326,
"learning_rate": 0.0001819016393442623,
"loss": 0.4501,
"step": 2860
},
{
"epoch": 14.057352043929225,
"grad_norm": 3.9458096027374268,
"learning_rate": 0.0001817704918032787,
"loss": 0.3863,
"step": 2880
},
{
"epoch": 14.154972544234289,
"grad_norm": 5.01477575302124,
"learning_rate": 0.0001816393442622951,
"loss": 0.3421,
"step": 2900
},
{
"epoch": 14.252593044539353,
"grad_norm": 3.396898031234741,
"learning_rate": 0.00018150819672131148,
"loss": 0.3438,
"step": 2920
},
{
"epoch": 14.350213544844417,
"grad_norm": 4.596593856811523,
"learning_rate": 0.00018137704918032787,
"loss": 0.3435,
"step": 2940
},
{
"epoch": 14.44783404514948,
"grad_norm": 3.6386988162994385,
"learning_rate": 0.00018124590163934429,
"loss": 0.3703,
"step": 2960
},
{
"epoch": 14.545454545454545,
"grad_norm": 3.3389110565185547,
"learning_rate": 0.00018111475409836067,
"loss": 0.3727,
"step": 2980
},
{
"epoch": 14.643075045759609,
"grad_norm": 4.675887107849121,
"learning_rate": 0.00018098360655737704,
"loss": 0.3972,
"step": 3000
},
{
"epoch": 14.740695546064673,
"grad_norm": 3.3281068801879883,
"learning_rate": 0.00018085245901639345,
"loss": 0.38,
"step": 3020
},
{
"epoch": 14.838316046369737,
"grad_norm": 3.9550278186798096,
"learning_rate": 0.00018072131147540984,
"loss": 0.3995,
"step": 3040
},
{
"epoch": 14.935936546674801,
"grad_norm": 2.93839168548584,
"learning_rate": 0.00018059016393442626,
"loss": 0.3885,
"step": 3060
},
{
"epoch": 15.033557046979865,
"grad_norm": 3.583588123321533,
"learning_rate": 0.00018045901639344262,
"loss": 0.3407,
"step": 3080
},
{
"epoch": 15.13117754728493,
"grad_norm": 3.6306777000427246,
"learning_rate": 0.00018032786885245904,
"loss": 0.3016,
"step": 3100
},
{
"epoch": 15.228798047589994,
"grad_norm": 3.278693437576294,
"learning_rate": 0.00018019672131147543,
"loss": 0.292,
"step": 3120
},
{
"epoch": 15.326418547895058,
"grad_norm": 2.9507830142974854,
"learning_rate": 0.0001800655737704918,
"loss": 0.3204,
"step": 3140
},
{
"epoch": 15.424039048200122,
"grad_norm": 2.957294464111328,
"learning_rate": 0.0001799344262295082,
"loss": 0.3446,
"step": 3160
},
{
"epoch": 15.521659548505186,
"grad_norm": 3.4953787326812744,
"learning_rate": 0.0001798032786885246,
"loss": 0.3307,
"step": 3180
},
{
"epoch": 15.61928004881025,
"grad_norm": 3.349458694458008,
"learning_rate": 0.000179672131147541,
"loss": 0.3448,
"step": 3200
},
{
"epoch": 15.716900549115314,
"grad_norm": 4.927302837371826,
"learning_rate": 0.00017954098360655737,
"loss": 0.3661,
"step": 3220
},
{
"epoch": 15.814521049420378,
"grad_norm": 4.214022636413574,
"learning_rate": 0.0001794098360655738,
"loss": 0.3595,
"step": 3240
},
{
"epoch": 15.912141549725442,
"grad_norm": 3.7332723140716553,
"learning_rate": 0.00017927868852459018,
"loss": 0.3916,
"step": 3260
},
{
"epoch": 16.009762050030506,
"grad_norm": 3.1130378246307373,
"learning_rate": 0.00017914754098360657,
"loss": 0.365,
"step": 3280
},
{
"epoch": 16.107382550335572,
"grad_norm": 3.9815053939819336,
"learning_rate": 0.00017901639344262296,
"loss": 0.2466,
"step": 3300
},
{
"epoch": 16.205003050640634,
"grad_norm": 4.437533855438232,
"learning_rate": 0.00017888524590163935,
"loss": 0.2917,
"step": 3320
},
{
"epoch": 16.3026235509457,
"grad_norm": 3.869553565979004,
"learning_rate": 0.00017875409836065576,
"loss": 0.2982,
"step": 3340
},
{
"epoch": 16.400244051250763,
"grad_norm": 3.1284611225128174,
"learning_rate": 0.00017862295081967213,
"loss": 0.3146,
"step": 3360
},
{
"epoch": 16.49786455155583,
"grad_norm": 3.837712049484253,
"learning_rate": 0.00017849180327868852,
"loss": 0.3129,
"step": 3380
},
{
"epoch": 16.59548505186089,
"grad_norm": 3.348344326019287,
"learning_rate": 0.00017836065573770493,
"loss": 0.3379,
"step": 3400
},
{
"epoch": 16.693105552165953,
"grad_norm": 3.809512138366699,
"learning_rate": 0.00017822950819672132,
"loss": 0.3193,
"step": 3420
},
{
"epoch": 16.79072605247102,
"grad_norm": 3.3745222091674805,
"learning_rate": 0.0001780983606557377,
"loss": 0.3308,
"step": 3440
},
{
"epoch": 16.888346552776085,
"grad_norm": 4.550785541534424,
"learning_rate": 0.0001779672131147541,
"loss": 0.3609,
"step": 3460
},
{
"epoch": 16.985967053081147,
"grad_norm": 4.031665802001953,
"learning_rate": 0.00017783606557377052,
"loss": 0.3648,
"step": 3480
},
{
"epoch": 17.08358755338621,
"grad_norm": 3.0923168659210205,
"learning_rate": 0.0001777049180327869,
"loss": 0.2731,
"step": 3500
},
{
"epoch": 17.181208053691275,
"grad_norm": 3.291416883468628,
"learning_rate": 0.00017757377049180327,
"loss": 0.2829,
"step": 3520
},
{
"epoch": 17.278828553996338,
"grad_norm": 3.059995651245117,
"learning_rate": 0.00017744262295081969,
"loss": 0.3047,
"step": 3540
},
{
"epoch": 17.376449054301403,
"grad_norm": 3.1089327335357666,
"learning_rate": 0.00017731147540983607,
"loss": 0.3092,
"step": 3560
},
{
"epoch": 17.474069554606466,
"grad_norm": 3.9796876907348633,
"learning_rate": 0.00017718032786885246,
"loss": 0.2647,
"step": 3580
},
{
"epoch": 17.57169005491153,
"grad_norm": 3.587038040161133,
"learning_rate": 0.00017704918032786885,
"loss": 0.308,
"step": 3600
},
{
"epoch": 17.669310555216594,
"grad_norm": 3.7032790184020996,
"learning_rate": 0.00017691803278688527,
"loss": 0.3101,
"step": 3620
},
{
"epoch": 17.76693105552166,
"grad_norm": 3.7440781593322754,
"learning_rate": 0.00017678688524590166,
"loss": 0.2922,
"step": 3640
},
{
"epoch": 17.864551555826722,
"grad_norm": 3.4123542308807373,
"learning_rate": 0.00017665573770491802,
"loss": 0.3097,
"step": 3660
},
{
"epoch": 17.962172056131788,
"grad_norm": 3.958204507827759,
"learning_rate": 0.00017652459016393444,
"loss": 0.2955,
"step": 3680
},
{
"epoch": 18.05979255643685,
"grad_norm": 4.549073696136475,
"learning_rate": 0.00017639344262295083,
"loss": 0.2846,
"step": 3700
},
{
"epoch": 18.157413056741916,
"grad_norm": 3.5509791374206543,
"learning_rate": 0.00017626229508196724,
"loss": 0.2651,
"step": 3720
},
{
"epoch": 18.25503355704698,
"grad_norm": 4.044325828552246,
"learning_rate": 0.0001761311475409836,
"loss": 0.242,
"step": 3740
},
{
"epoch": 18.352654057352044,
"grad_norm": 3.255535125732422,
"learning_rate": 0.00017600000000000002,
"loss": 0.2857,
"step": 3760
},
{
"epoch": 18.450274557657107,
"grad_norm": 3.6761083602905273,
"learning_rate": 0.0001758688524590164,
"loss": 0.285,
"step": 3780
},
{
"epoch": 18.547895057962172,
"grad_norm": 4.1334381103515625,
"learning_rate": 0.00017573770491803277,
"loss": 0.2648,
"step": 3800
},
{
"epoch": 18.645515558267235,
"grad_norm": 3.5164451599121094,
"learning_rate": 0.0001756065573770492,
"loss": 0.2654,
"step": 3820
},
{
"epoch": 18.7431360585723,
"grad_norm": 3.96533465385437,
"learning_rate": 0.00017547540983606558,
"loss": 0.2862,
"step": 3840
},
{
"epoch": 18.840756558877363,
"grad_norm": 3.932554006576538,
"learning_rate": 0.000175344262295082,
"loss": 0.2803,
"step": 3860
},
{
"epoch": 18.93837705918243,
"grad_norm": 4.888127326965332,
"learning_rate": 0.00017521311475409836,
"loss": 0.2902,
"step": 3880
},
{
"epoch": 19.03599755948749,
"grad_norm": 3.475789785385132,
"learning_rate": 0.00017508196721311475,
"loss": 0.2815,
"step": 3900
},
{
"epoch": 19.133618059792557,
"grad_norm": 2.57926607131958,
"learning_rate": 0.00017495081967213116,
"loss": 0.21,
"step": 3920
},
{
"epoch": 19.23123856009762,
"grad_norm": 3.3455469608306885,
"learning_rate": 0.00017481967213114753,
"loss": 0.232,
"step": 3940
},
{
"epoch": 19.328859060402685,
"grad_norm": 3.574331045150757,
"learning_rate": 0.00017468852459016394,
"loss": 0.2506,
"step": 3960
},
{
"epoch": 19.426479560707747,
"grad_norm": 4.422223091125488,
"learning_rate": 0.00017455737704918033,
"loss": 0.2494,
"step": 3980
},
{
"epoch": 19.524100061012813,
"grad_norm": 3.8312060832977295,
"learning_rate": 0.00017442622950819675,
"loss": 0.2758,
"step": 4000
},
{
"epoch": 19.621720561317876,
"grad_norm": 3.464089870452881,
"learning_rate": 0.0001742950819672131,
"loss": 0.2628,
"step": 4020
},
{
"epoch": 19.71934106162294,
"grad_norm": 4.096322536468506,
"learning_rate": 0.0001741639344262295,
"loss": 0.2651,
"step": 4040
},
{
"epoch": 19.816961561928004,
"grad_norm": 5.72092866897583,
"learning_rate": 0.00017403278688524592,
"loss": 0.29,
"step": 4060
},
{
"epoch": 19.91458206223307,
"grad_norm": 3.7189135551452637,
"learning_rate": 0.0001739016393442623,
"loss": 0.2715,
"step": 4080
},
{
"epoch": 20.012202562538132,
"grad_norm": 3.4471986293792725,
"learning_rate": 0.0001737704918032787,
"loss": 0.2762,
"step": 4100
},
{
"epoch": 20.109823062843198,
"grad_norm": 3.046600580215454,
"learning_rate": 0.00017363934426229509,
"loss": 0.2219,
"step": 4120
},
{
"epoch": 20.20744356314826,
"grad_norm": 3.6281490325927734,
"learning_rate": 0.0001735081967213115,
"loss": 0.231,
"step": 4140
},
{
"epoch": 20.305064063453326,
"grad_norm": 2.8515024185180664,
"learning_rate": 0.00017337704918032786,
"loss": 0.2212,
"step": 4160
},
{
"epoch": 20.40268456375839,
"grad_norm": 2.9778804779052734,
"learning_rate": 0.00017324590163934425,
"loss": 0.2327,
"step": 4180
},
{
"epoch": 20.500305064063454,
"grad_norm": 2.92470121383667,
"learning_rate": 0.00017311475409836067,
"loss": 0.2378,
"step": 4200
},
{
"epoch": 20.597925564368516,
"grad_norm": 3.8426191806793213,
"learning_rate": 0.00017298360655737706,
"loss": 0.2652,
"step": 4220
},
{
"epoch": 20.695546064673582,
"grad_norm": 3.8123021125793457,
"learning_rate": 0.00017285245901639345,
"loss": 0.2446,
"step": 4240
},
{
"epoch": 20.793166564978645,
"grad_norm": 3.650644540786743,
"learning_rate": 0.00017272131147540984,
"loss": 0.2331,
"step": 4260
},
{
"epoch": 20.89078706528371,
"grad_norm": 4.259769916534424,
"learning_rate": 0.00017259016393442625,
"loss": 0.2595,
"step": 4280
},
{
"epoch": 20.988407565588773,
"grad_norm": 4.065052509307861,
"learning_rate": 0.00017245901639344264,
"loss": 0.2672,
"step": 4300
},
{
"epoch": 21.08602806589384,
"grad_norm": 3.565068244934082,
"learning_rate": 0.000172327868852459,
"loss": 0.1997,
"step": 4320
},
{
"epoch": 21.1836485661989,
"grad_norm": 3.036924362182617,
"learning_rate": 0.00017219672131147542,
"loss": 0.1936,
"step": 4340
},
{
"epoch": 21.281269066503967,
"grad_norm": 3.2253048419952393,
"learning_rate": 0.0001720655737704918,
"loss": 0.2099,
"step": 4360
},
{
"epoch": 21.37888956680903,
"grad_norm": 3.6241676807403564,
"learning_rate": 0.0001719344262295082,
"loss": 0.2232,
"step": 4380
},
{
"epoch": 21.476510067114095,
"grad_norm": 3.3272531032562256,
"learning_rate": 0.0001718032786885246,
"loss": 0.2103,
"step": 4400
},
{
"epoch": 21.574130567419157,
"grad_norm": 3.6076695919036865,
"learning_rate": 0.00017167213114754098,
"loss": 0.2323,
"step": 4420
},
{
"epoch": 21.671751067724223,
"grad_norm": 3.642751455307007,
"learning_rate": 0.0001715409836065574,
"loss": 0.2385,
"step": 4440
},
{
"epoch": 21.769371568029285,
"grad_norm": 3.1882801055908203,
"learning_rate": 0.00017140983606557376,
"loss": 0.2516,
"step": 4460
},
{
"epoch": 21.86699206833435,
"grad_norm": 3.4758126735687256,
"learning_rate": 0.00017127868852459018,
"loss": 0.2365,
"step": 4480
},
{
"epoch": 21.964612568639414,
"grad_norm": 3.498697519302368,
"learning_rate": 0.00017114754098360656,
"loss": 0.2436,
"step": 4500
},
{
"epoch": 22.06223306894448,
"grad_norm": 3.926394462585449,
"learning_rate": 0.00017101639344262298,
"loss": 0.2101,
"step": 4520
},
{
"epoch": 22.15985356924954,
"grad_norm": 2.982520580291748,
"learning_rate": 0.00017088524590163934,
"loss": 0.1655,
"step": 4540
},
{
"epoch": 22.257474069554608,
"grad_norm": 3.843905448913574,
"learning_rate": 0.00017075409836065573,
"loss": 0.1896,
"step": 4560
},
{
"epoch": 22.35509456985967,
"grad_norm": 2.8714027404785156,
"learning_rate": 0.00017062295081967215,
"loss": 0.2113,
"step": 4580
},
{
"epoch": 22.452715070164736,
"grad_norm": 3.552220344543457,
"learning_rate": 0.0001704918032786885,
"loss": 0.2311,
"step": 4600
},
{
"epoch": 22.550335570469798,
"grad_norm": 4.077561378479004,
"learning_rate": 0.00017036065573770493,
"loss": 0.2205,
"step": 4620
},
{
"epoch": 22.647956070774864,
"grad_norm": 3.475970506668091,
"learning_rate": 0.00017022950819672132,
"loss": 0.2047,
"step": 4640
},
{
"epoch": 22.745576571079926,
"grad_norm": 2.9817659854888916,
"learning_rate": 0.00017009836065573773,
"loss": 0.2312,
"step": 4660
},
{
"epoch": 22.843197071384992,
"grad_norm": 2.8980283737182617,
"learning_rate": 0.0001699672131147541,
"loss": 0.2113,
"step": 4680
},
{
"epoch": 22.940817571690054,
"grad_norm": 2.9917445182800293,
"learning_rate": 0.00016983606557377049,
"loss": 0.2334,
"step": 4700
},
{
"epoch": 23.03843807199512,
"grad_norm": 2.9001080989837646,
"learning_rate": 0.0001697049180327869,
"loss": 0.2096,
"step": 4720
},
{
"epoch": 23.136058572300183,
"grad_norm": 2.7678308486938477,
"learning_rate": 0.0001695737704918033,
"loss": 0.18,
"step": 4740
},
{
"epoch": 23.23367907260525,
"grad_norm": 4.077008247375488,
"learning_rate": 0.00016944262295081968,
"loss": 0.175,
"step": 4760
},
{
"epoch": 23.33129957291031,
"grad_norm": 3.806175708770752,
"learning_rate": 0.00016931147540983607,
"loss": 0.1962,
"step": 4780
},
{
"epoch": 23.428920073215377,
"grad_norm": 3.203763246536255,
"learning_rate": 0.00016918032786885249,
"loss": 0.2072,
"step": 4800
},
{
"epoch": 23.52654057352044,
"grad_norm": 3.717864513397217,
"learning_rate": 0.00016904918032786885,
"loss": 0.203,
"step": 4820
},
{
"epoch": 23.624161073825505,
"grad_norm": 3.1602675914764404,
"learning_rate": 0.00016891803278688524,
"loss": 0.1925,
"step": 4840
},
{
"epoch": 23.721781574130567,
"grad_norm": 4.2820000648498535,
"learning_rate": 0.00016878688524590165,
"loss": 0.2037,
"step": 4860
},
{
"epoch": 23.819402074435633,
"grad_norm": 3.9633703231811523,
"learning_rate": 0.00016865573770491804,
"loss": 0.2143,
"step": 4880
},
{
"epoch": 23.917022574740695,
"grad_norm": 2.801804542541504,
"learning_rate": 0.00016852459016393443,
"loss": 0.1938,
"step": 4900
},
{
"epoch": 24.01464307504576,
"grad_norm": 2.719308614730835,
"learning_rate": 0.00016839344262295082,
"loss": 0.215,
"step": 4920
},
{
"epoch": 24.112263575350823,
"grad_norm": 3.0437123775482178,
"learning_rate": 0.0001682622950819672,
"loss": 0.1806,
"step": 4940
},
{
"epoch": 24.20988407565589,
"grad_norm": 3.314267158508301,
"learning_rate": 0.00016813114754098363,
"loss": 0.169,
"step": 4960
},
{
"epoch": 24.30750457596095,
"grad_norm": 2.26737117767334,
"learning_rate": 0.000168,
"loss": 0.1704,
"step": 4980
},
{
"epoch": 24.405125076266017,
"grad_norm": 4.0000715255737305,
"learning_rate": 0.0001678688524590164,
"loss": 0.176,
"step": 5000
},
{
"epoch": 24.50274557657108,
"grad_norm": 3.358684778213501,
"learning_rate": 0.0001677377049180328,
"loss": 0.1756,
"step": 5020
},
{
"epoch": 24.600366076876146,
"grad_norm": 3.2529423236846924,
"learning_rate": 0.00016760655737704919,
"loss": 0.201,
"step": 5040
},
{
"epoch": 24.697986577181208,
"grad_norm": 4.20505952835083,
"learning_rate": 0.00016747540983606558,
"loss": 0.1863,
"step": 5060
},
{
"epoch": 24.795607077486274,
"grad_norm": 3.8670005798339844,
"learning_rate": 0.00016734426229508196,
"loss": 0.1955,
"step": 5080
},
{
"epoch": 24.893227577791336,
"grad_norm": 3.262305736541748,
"learning_rate": 0.00016721311475409838,
"loss": 0.2145,
"step": 5100
},
{
"epoch": 24.990848078096402,
"grad_norm": 3.3101160526275635,
"learning_rate": 0.00016708196721311474,
"loss": 0.2064,
"step": 5120
},
{
"epoch": 25.088468578401464,
"grad_norm": 2.9248740673065186,
"learning_rate": 0.00016695081967213116,
"loss": 0.1591,
"step": 5140
},
{
"epoch": 25.18608907870653,
"grad_norm": 3.645301103591919,
"learning_rate": 0.00016681967213114755,
"loss": 0.1433,
"step": 5160
},
{
"epoch": 25.283709579011592,
"grad_norm": 3.857302665710449,
"learning_rate": 0.00016668852459016397,
"loss": 0.1636,
"step": 5180
},
{
"epoch": 25.381330079316655,
"grad_norm": 3.101661205291748,
"learning_rate": 0.00016655737704918033,
"loss": 0.1665,
"step": 5200
},
{
"epoch": 25.47895057962172,
"grad_norm": 3.0728678703308105,
"learning_rate": 0.00016642622950819672,
"loss": 0.1769,
"step": 5220
},
{
"epoch": 25.576571079926783,
"grad_norm": 3.7951607704162598,
"learning_rate": 0.00016629508196721313,
"loss": 0.1692,
"step": 5240
},
{
"epoch": 25.67419158023185,
"grad_norm": 2.699662446975708,
"learning_rate": 0.0001661639344262295,
"loss": 0.1983,
"step": 5260
},
{
"epoch": 25.77181208053691,
"grad_norm": 2.861830949783325,
"learning_rate": 0.0001660327868852459,
"loss": 0.1838,
"step": 5280
},
{
"epoch": 25.869432580841977,
"grad_norm": 3.30417799949646,
"learning_rate": 0.0001659016393442623,
"loss": 0.1953,
"step": 5300
},
{
"epoch": 25.96705308114704,
"grad_norm": 3.30916690826416,
"learning_rate": 0.0001657704918032787,
"loss": 0.2021,
"step": 5320
},
{
"epoch": 26.064673581452105,
"grad_norm": 2.5547962188720703,
"learning_rate": 0.00016563934426229508,
"loss": 0.1494,
"step": 5340
},
{
"epoch": 26.162294081757167,
"grad_norm": 2.986764907836914,
"learning_rate": 0.00016550819672131147,
"loss": 0.1569,
"step": 5360
},
{
"epoch": 26.259914582062233,
"grad_norm": 3.37117862701416,
"learning_rate": 0.00016537704918032789,
"loss": 0.1651,
"step": 5380
},
{
"epoch": 26.357535082367296,
"grad_norm": 2.9431986808776855,
"learning_rate": 0.00016524590163934428,
"loss": 0.1497,
"step": 5400
},
{
"epoch": 26.45515558267236,
"grad_norm": 3.107166051864624,
"learning_rate": 0.00016511475409836067,
"loss": 0.1824,
"step": 5420
},
{
"epoch": 26.552776082977424,
"grad_norm": 3.1725735664367676,
"learning_rate": 0.00016498360655737705,
"loss": 0.1624,
"step": 5440
},
{
"epoch": 26.65039658328249,
"grad_norm": 4.063880443572998,
"learning_rate": 0.00016485245901639344,
"loss": 0.1613,
"step": 5460
},
{
"epoch": 26.748017083587552,
"grad_norm": 3.5230929851531982,
"learning_rate": 0.00016472131147540983,
"loss": 0.1753,
"step": 5480
},
{
"epoch": 26.845637583892618,
"grad_norm": 3.2048895359039307,
"learning_rate": 0.00016459016393442622,
"loss": 0.1797,
"step": 5500
},
{
"epoch": 26.94325808419768,
"grad_norm": 4.232358455657959,
"learning_rate": 0.00016445901639344264,
"loss": 0.1771,
"step": 5520
},
{
"epoch": 27.040878584502746,
"grad_norm": 3.4074463844299316,
"learning_rate": 0.00016432786885245903,
"loss": 0.1695,
"step": 5540
},
{
"epoch": 27.13849908480781,
"grad_norm": 2.856752872467041,
"learning_rate": 0.00016419672131147542,
"loss": 0.137,
"step": 5560
},
{
"epoch": 27.236119585112874,
"grad_norm": 3.1748337745666504,
"learning_rate": 0.0001640655737704918,
"loss": 0.1535,
"step": 5580
},
{
"epoch": 27.333740085417936,
"grad_norm": 3.6794328689575195,
"learning_rate": 0.0001639344262295082,
"loss": 0.1598,
"step": 5600
},
{
"epoch": 27.431360585723002,
"grad_norm": 3.439868927001953,
"learning_rate": 0.0001638032786885246,
"loss": 0.1465,
"step": 5620
},
{
"epoch": 27.528981086028065,
"grad_norm": 2.997490644454956,
"learning_rate": 0.00016367213114754098,
"loss": 0.1686,
"step": 5640
},
{
"epoch": 27.62660158633313,
"grad_norm": 4.262759208679199,
"learning_rate": 0.0001635409836065574,
"loss": 0.1589,
"step": 5660
},
{
"epoch": 27.724222086638193,
"grad_norm": 2.9930195808410645,
"learning_rate": 0.00016340983606557378,
"loss": 0.1526,
"step": 5680
},
{
"epoch": 27.82184258694326,
"grad_norm": 3.221529245376587,
"learning_rate": 0.00016327868852459017,
"loss": 0.1649,
"step": 5700
},
{
"epoch": 27.91946308724832,
"grad_norm": 3.318105697631836,
"learning_rate": 0.00016314754098360656,
"loss": 0.1835,
"step": 5720
},
{
"epoch": 28.017083587553387,
"grad_norm": 2.85650372505188,
"learning_rate": 0.00016301639344262295,
"loss": 0.164,
"step": 5740
},
{
"epoch": 28.11470408785845,
"grad_norm": 2.4882328510284424,
"learning_rate": 0.00016288524590163937,
"loss": 0.1325,
"step": 5760
},
{
"epoch": 28.212324588163515,
"grad_norm": 2.730262041091919,
"learning_rate": 0.00016275409836065573,
"loss": 0.1329,
"step": 5780
},
{
"epoch": 28.309945088468577,
"grad_norm": 3.4027230739593506,
"learning_rate": 0.00016262295081967214,
"loss": 0.1471,
"step": 5800
},
{
"epoch": 28.407565588773643,
"grad_norm": 3.170252799987793,
"learning_rate": 0.00016249180327868853,
"loss": 0.1459,
"step": 5820
},
{
"epoch": 28.505186089078705,
"grad_norm": 2.631844997406006,
"learning_rate": 0.00016236065573770492,
"loss": 0.1492,
"step": 5840
},
{
"epoch": 28.60280658938377,
"grad_norm": 3.2945570945739746,
"learning_rate": 0.0001622295081967213,
"loss": 0.1398,
"step": 5860
},
{
"epoch": 28.700427089688834,
"grad_norm": 3.442495822906494,
"learning_rate": 0.0001620983606557377,
"loss": 0.1495,
"step": 5880
},
{
"epoch": 28.7980475899939,
"grad_norm": 3.4794762134552,
"learning_rate": 0.00016196721311475412,
"loss": 0.1697,
"step": 5900
},
{
"epoch": 28.89566809029896,
"grad_norm": 3.2846438884735107,
"learning_rate": 0.00016183606557377048,
"loss": 0.1603,
"step": 5920
},
{
"epoch": 28.993288590604028,
"grad_norm": 2.8020057678222656,
"learning_rate": 0.0001617049180327869,
"loss": 0.1705,
"step": 5940
},
{
"epoch": 29.09090909090909,
"grad_norm": 2.949068069458008,
"learning_rate": 0.0001615737704918033,
"loss": 0.1213,
"step": 5960
},
{
"epoch": 29.188529591214156,
"grad_norm": 2.6021780967712402,
"learning_rate": 0.00016144262295081968,
"loss": 0.1247,
"step": 5980
},
{
"epoch": 29.286150091519218,
"grad_norm": 2.752091884613037,
"learning_rate": 0.00016131147540983607,
"loss": 0.1226,
"step": 6000
},
{
"epoch": 29.383770591824284,
"grad_norm": 3.845365285873413,
"learning_rate": 0.00016118032786885245,
"loss": 0.1452,
"step": 6020
},
{
"epoch": 29.481391092129346,
"grad_norm": 3.541273832321167,
"learning_rate": 0.00016104918032786887,
"loss": 0.1392,
"step": 6040
},
{
"epoch": 29.579011592434412,
"grad_norm": 3.554525375366211,
"learning_rate": 0.00016091803278688526,
"loss": 0.1466,
"step": 6060
},
{
"epoch": 29.676632092739474,
"grad_norm": 3.1420257091522217,
"learning_rate": 0.00016078688524590165,
"loss": 0.1433,
"step": 6080
},
{
"epoch": 29.77425259304454,
"grad_norm": 3.15902042388916,
"learning_rate": 0.00016065573770491804,
"loss": 0.1532,
"step": 6100
},
{
"epoch": 29.871873093349603,
"grad_norm": 2.8925654888153076,
"learning_rate": 0.00016052459016393443,
"loss": 0.1499,
"step": 6120
},
{
"epoch": 29.96949359365467,
"grad_norm": 3.0476958751678467,
"learning_rate": 0.00016039344262295082,
"loss": 0.1706,
"step": 6140
},
{
"epoch": 30.06711409395973,
"grad_norm": 3.0503270626068115,
"learning_rate": 0.0001602622950819672,
"loss": 0.116,
"step": 6160
},
{
"epoch": 30.164734594264797,
"grad_norm": 2.6872098445892334,
"learning_rate": 0.00016013114754098362,
"loss": 0.1177,
"step": 6180
},
{
"epoch": 30.26235509456986,
"grad_norm": 3.1871418952941895,
"learning_rate": 0.00016,
"loss": 0.134,
"step": 6200
},
{
"epoch": 30.359975594874925,
"grad_norm": 2.611163854598999,
"learning_rate": 0.0001598688524590164,
"loss": 0.1363,
"step": 6220
},
{
"epoch": 30.457596095179987,
"grad_norm": 2.7208786010742188,
"learning_rate": 0.0001597377049180328,
"loss": 0.1346,
"step": 6240
},
{
"epoch": 30.555216595485053,
"grad_norm": 3.424114227294922,
"learning_rate": 0.00015960655737704918,
"loss": 0.1336,
"step": 6260
},
{
"epoch": 30.652837095790115,
"grad_norm": 2.9046945571899414,
"learning_rate": 0.0001594754098360656,
"loss": 0.1391,
"step": 6280
},
{
"epoch": 30.75045759609518,
"grad_norm": 3.6124610900878906,
"learning_rate": 0.00015934426229508196,
"loss": 0.147,
"step": 6300
},
{
"epoch": 30.848078096400243,
"grad_norm": 3.3340744972229004,
"learning_rate": 0.00015921311475409838,
"loss": 0.1471,
"step": 6320
},
{
"epoch": 30.94569859670531,
"grad_norm": 3.6494932174682617,
"learning_rate": 0.00015908196721311477,
"loss": 0.1392,
"step": 6340
},
{
"epoch": 31.04331909701037,
"grad_norm": 2.10213303565979,
"learning_rate": 0.00015895081967213116,
"loss": 0.1319,
"step": 6360
},
{
"epoch": 31.140939597315437,
"grad_norm": 2.641988515853882,
"learning_rate": 0.00015881967213114754,
"loss": 0.1124,
"step": 6380
},
{
"epoch": 31.2385600976205,
"grad_norm": 3.3964974880218506,
"learning_rate": 0.00015868852459016393,
"loss": 0.1214,
"step": 6400
},
{
"epoch": 31.336180597925566,
"grad_norm": 3.438147783279419,
"learning_rate": 0.00015855737704918035,
"loss": 0.1302,
"step": 6420
},
{
"epoch": 31.433801098230628,
"grad_norm": 3.079634189605713,
"learning_rate": 0.0001584262295081967,
"loss": 0.1297,
"step": 6440
},
{
"epoch": 31.531421598535694,
"grad_norm": 4.332663536071777,
"learning_rate": 0.00015829508196721313,
"loss": 0.1328,
"step": 6460
},
{
"epoch": 31.629042098840756,
"grad_norm": 3.123251438140869,
"learning_rate": 0.00015816393442622952,
"loss": 0.1201,
"step": 6480
},
{
"epoch": 31.726662599145822,
"grad_norm": 2.725952386856079,
"learning_rate": 0.0001580327868852459,
"loss": 0.1378,
"step": 6500
},
{
"epoch": 31.824283099450884,
"grad_norm": 3.1809983253479004,
"learning_rate": 0.0001579016393442623,
"loss": 0.1395,
"step": 6520
},
{
"epoch": 31.92190359975595,
"grad_norm": 3.050304889678955,
"learning_rate": 0.0001577704918032787,
"loss": 0.1208,
"step": 6540
},
{
"epoch": 32.01952410006101,
"grad_norm": 3.966635227203369,
"learning_rate": 0.0001576393442622951,
"loss": 0.1496,
"step": 6560
},
{
"epoch": 32.117144600366075,
"grad_norm": 2.607820510864258,
"learning_rate": 0.00015750819672131147,
"loss": 0.1028,
"step": 6580
},
{
"epoch": 32.214765100671144,
"grad_norm": 3.476501941680908,
"learning_rate": 0.00015737704918032788,
"loss": 0.1048,
"step": 6600
},
{
"epoch": 32.31238560097621,
"grad_norm": 2.745434284210205,
"learning_rate": 0.00015724590163934427,
"loss": 0.1173,
"step": 6620
},
{
"epoch": 32.41000610128127,
"grad_norm": 3.01653790473938,
"learning_rate": 0.00015711475409836066,
"loss": 0.1256,
"step": 6640
},
{
"epoch": 32.50762660158633,
"grad_norm": 2.9087026119232178,
"learning_rate": 0.00015698360655737705,
"loss": 0.1343,
"step": 6660
},
{
"epoch": 32.6052471018914,
"grad_norm": 2.6568126678466797,
"learning_rate": 0.00015685245901639344,
"loss": 0.1248,
"step": 6680
},
{
"epoch": 32.70286760219646,
"grad_norm": 3.052931547164917,
"learning_rate": 0.00015672131147540986,
"loss": 0.1343,
"step": 6700
},
{
"epoch": 32.800488102501525,
"grad_norm": 3.3897056579589844,
"learning_rate": 0.00015659016393442622,
"loss": 0.136,
"step": 6720
},
{
"epoch": 32.89810860280659,
"grad_norm": 3.269697904586792,
"learning_rate": 0.00015645901639344263,
"loss": 0.1304,
"step": 6740
},
{
"epoch": 32.99572910311166,
"grad_norm": 3.2755305767059326,
"learning_rate": 0.00015632786885245902,
"loss": 0.1368,
"step": 6760
},
{
"epoch": 33.09334960341672,
"grad_norm": 1.8959623575210571,
"learning_rate": 0.0001561967213114754,
"loss": 0.1067,
"step": 6780
},
{
"epoch": 33.19097010372178,
"grad_norm": 2.583085536956787,
"learning_rate": 0.0001560655737704918,
"loss": 0.1031,
"step": 6800
},
{
"epoch": 33.288590604026844,
"grad_norm": 2.4109268188476562,
"learning_rate": 0.0001559344262295082,
"loss": 0.1035,
"step": 6820
},
{
"epoch": 33.38621110433191,
"grad_norm": 3.176445722579956,
"learning_rate": 0.0001558032786885246,
"loss": 0.1153,
"step": 6840
},
{
"epoch": 33.483831604636975,
"grad_norm": 3.5049378871917725,
"learning_rate": 0.000155672131147541,
"loss": 0.1195,
"step": 6860
},
{
"epoch": 33.58145210494204,
"grad_norm": 2.7021915912628174,
"learning_rate": 0.0001555409836065574,
"loss": 0.1205,
"step": 6880
},
{
"epoch": 33.6790726052471,
"grad_norm": 4.213137626647949,
"learning_rate": 0.00015540983606557378,
"loss": 0.1192,
"step": 6900
},
{
"epoch": 33.77669310555217,
"grad_norm": 3.276128053665161,
"learning_rate": 0.00015527868852459017,
"loss": 0.1252,
"step": 6920
},
{
"epoch": 33.87431360585723,
"grad_norm": 3.3642396926879883,
"learning_rate": 0.00015514754098360656,
"loss": 0.1234,
"step": 6940
},
{
"epoch": 33.971934106162294,
"grad_norm": 3.0481181144714355,
"learning_rate": 0.00015501639344262294,
"loss": 0.1464,
"step": 6960
},
{
"epoch": 34.06955460646736,
"grad_norm": 2.147581100463867,
"learning_rate": 0.00015488524590163936,
"loss": 0.1029,
"step": 6980
},
{
"epoch": 34.16717510677242,
"grad_norm": 2.751429557800293,
"learning_rate": 0.00015475409836065575,
"loss": 0.103,
"step": 7000
},
{
"epoch": 34.26479560707749,
"grad_norm": 2.6378426551818848,
"learning_rate": 0.00015462295081967214,
"loss": 0.1146,
"step": 7020
},
{
"epoch": 34.36241610738255,
"grad_norm": 2.744255304336548,
"learning_rate": 0.00015449180327868853,
"loss": 0.106,
"step": 7040
},
{
"epoch": 34.46003660768761,
"grad_norm": 3.518716812133789,
"learning_rate": 0.00015436065573770492,
"loss": 0.1146,
"step": 7060
},
{
"epoch": 34.557657107992675,
"grad_norm": 4.025069236755371,
"learning_rate": 0.00015422950819672133,
"loss": 0.1173,
"step": 7080
},
{
"epoch": 34.655277608297745,
"grad_norm": 2.4882397651672363,
"learning_rate": 0.0001540983606557377,
"loss": 0.1118,
"step": 7100
},
{
"epoch": 34.75289810860281,
"grad_norm": 2.998798131942749,
"learning_rate": 0.00015396721311475411,
"loss": 0.1277,
"step": 7120
},
{
"epoch": 34.85051860890787,
"grad_norm": 3.375034809112549,
"learning_rate": 0.0001538360655737705,
"loss": 0.1137,
"step": 7140
},
{
"epoch": 34.94813910921293,
"grad_norm": 2.8172638416290283,
"learning_rate": 0.0001537049180327869,
"loss": 0.1083,
"step": 7160
},
{
"epoch": 35.045759609518,
"grad_norm": 2.669013500213623,
"learning_rate": 0.00015357377049180328,
"loss": 0.1086,
"step": 7180
},
{
"epoch": 35.14338010982306,
"grad_norm": 3.0433802604675293,
"learning_rate": 0.00015344262295081967,
"loss": 0.0921,
"step": 7200
},
{
"epoch": 35.241000610128125,
"grad_norm": 3.2270560264587402,
"learning_rate": 0.0001533114754098361,
"loss": 0.0991,
"step": 7220
},
{
"epoch": 35.33862111043319,
"grad_norm": 4.046402931213379,
"learning_rate": 0.00015318032786885245,
"loss": 0.0979,
"step": 7240
},
{
"epoch": 35.43624161073826,
"grad_norm": 3.2767364978790283,
"learning_rate": 0.00015304918032786887,
"loss": 0.1098,
"step": 7260
},
{
"epoch": 35.53386211104332,
"grad_norm": 3.503641366958618,
"learning_rate": 0.00015291803278688526,
"loss": 0.1093,
"step": 7280
},
{
"epoch": 35.63148261134838,
"grad_norm": 2.7504665851593018,
"learning_rate": 0.00015278688524590165,
"loss": 0.1087,
"step": 7300
},
{
"epoch": 35.729103111653444,
"grad_norm": 2.6193149089813232,
"learning_rate": 0.00015265573770491803,
"loss": 0.1207,
"step": 7320
},
{
"epoch": 35.82672361195851,
"grad_norm": 3.204240322113037,
"learning_rate": 0.00015252459016393442,
"loss": 0.1189,
"step": 7340
},
{
"epoch": 35.924344112263576,
"grad_norm": 3.0229642391204834,
"learning_rate": 0.00015239344262295084,
"loss": 0.1221,
"step": 7360
},
{
"epoch": 36.02196461256864,
"grad_norm": 2.1176633834838867,
"learning_rate": 0.0001522622950819672,
"loss": 0.1065,
"step": 7380
},
{
"epoch": 36.1195851128737,
"grad_norm": 2.1961264610290527,
"learning_rate": 0.00015213114754098362,
"loss": 0.0946,
"step": 7400
},
{
"epoch": 36.21720561317877,
"grad_norm": 2.6912176609039307,
"learning_rate": 0.000152,
"loss": 0.1052,
"step": 7420
},
{
"epoch": 36.31482611348383,
"grad_norm": 2.8034870624542236,
"learning_rate": 0.0001518688524590164,
"loss": 0.0935,
"step": 7440
},
{
"epoch": 36.412446613788894,
"grad_norm": 2.85784649848938,
"learning_rate": 0.0001517377049180328,
"loss": 0.0984,
"step": 7460
},
{
"epoch": 36.51006711409396,
"grad_norm": 3.637848377227783,
"learning_rate": 0.00015160655737704918,
"loss": 0.1134,
"step": 7480
},
{
"epoch": 36.607687614399026,
"grad_norm": 3.2717106342315674,
"learning_rate": 0.0001514754098360656,
"loss": 0.1059,
"step": 7500
},
{
"epoch": 36.70530811470409,
"grad_norm": 2.9539332389831543,
"learning_rate": 0.00015134426229508198,
"loss": 0.1033,
"step": 7520
},
{
"epoch": 36.80292861500915,
"grad_norm": 3.410210132598877,
"learning_rate": 0.00015121311475409837,
"loss": 0.1107,
"step": 7540
},
{
"epoch": 36.90054911531421,
"grad_norm": 3.643808126449585,
"learning_rate": 0.00015108196721311476,
"loss": 0.118,
"step": 7560
},
{
"epoch": 36.99816961561928,
"grad_norm": 2.9359118938446045,
"learning_rate": 0.00015095081967213115,
"loss": 0.1167,
"step": 7580
},
{
"epoch": 37.095790115924345,
"grad_norm": 2.369500160217285,
"learning_rate": 0.00015081967213114754,
"loss": 0.0853,
"step": 7600
},
{
"epoch": 37.19341061622941,
"grad_norm": 2.8940887451171875,
"learning_rate": 0.00015068852459016393,
"loss": 0.0951,
"step": 7620
},
{
"epoch": 37.29103111653447,
"grad_norm": 2.3819053173065186,
"learning_rate": 0.00015055737704918035,
"loss": 0.1047,
"step": 7640
},
{
"epoch": 37.38865161683954,
"grad_norm": 2.7095139026641846,
"learning_rate": 0.00015042622950819673,
"loss": 0.1004,
"step": 7660
},
{
"epoch": 37.4862721171446,
"grad_norm": 2.892263650894165,
"learning_rate": 0.00015029508196721312,
"loss": 0.1031,
"step": 7680
},
{
"epoch": 37.58389261744966,
"grad_norm": 3.0117244720458984,
"learning_rate": 0.00015016393442622951,
"loss": 0.1036,
"step": 7700
},
{
"epoch": 37.681513117754726,
"grad_norm": 2.048797130584717,
"learning_rate": 0.0001500327868852459,
"loss": 0.0975,
"step": 7720
},
{
"epoch": 37.779133618059795,
"grad_norm": 3.051039218902588,
"learning_rate": 0.00014990163934426232,
"loss": 0.1087,
"step": 7740
},
{
"epoch": 37.87675411836486,
"grad_norm": 2.2224767208099365,
"learning_rate": 0.00014977049180327868,
"loss": 0.1072,
"step": 7760
},
{
"epoch": 37.97437461866992,
"grad_norm": 2.8031728267669678,
"learning_rate": 0.0001496393442622951,
"loss": 0.1017,
"step": 7780
},
{
"epoch": 38.07199511897498,
"grad_norm": 2.108718156814575,
"learning_rate": 0.0001495081967213115,
"loss": 0.0921,
"step": 7800
},
{
"epoch": 38.16961561928005,
"grad_norm": 2.253871202468872,
"learning_rate": 0.00014937704918032788,
"loss": 0.0898,
"step": 7820
},
{
"epoch": 38.267236119585114,
"grad_norm": 2.5459353923797607,
"learning_rate": 0.00014924590163934427,
"loss": 0.0903,
"step": 7840
},
{
"epoch": 38.364856619890176,
"grad_norm": 2.238715171813965,
"learning_rate": 0.00014911475409836066,
"loss": 0.0906,
"step": 7860
},
{
"epoch": 38.46247712019524,
"grad_norm": 2.611955404281616,
"learning_rate": 0.00014898360655737707,
"loss": 0.0882,
"step": 7880
},
{
"epoch": 38.56009762050031,
"grad_norm": 3.3407866954803467,
"learning_rate": 0.00014885245901639343,
"loss": 0.0994,
"step": 7900
},
{
"epoch": 38.65771812080537,
"grad_norm": 2.6214687824249268,
"learning_rate": 0.00014872131147540985,
"loss": 0.0918,
"step": 7920
},
{
"epoch": 38.75533862111043,
"grad_norm": 3.7567331790924072,
"learning_rate": 0.00014859016393442624,
"loss": 0.1039,
"step": 7940
},
{
"epoch": 38.852959121415495,
"grad_norm": 3.809807538986206,
"learning_rate": 0.00014845901639344263,
"loss": 0.1081,
"step": 7960
},
{
"epoch": 38.950579621720564,
"grad_norm": 3.56388258934021,
"learning_rate": 0.00014832786885245902,
"loss": 0.108,
"step": 7980
},
{
"epoch": 39.04820012202563,
"grad_norm": 2.5101027488708496,
"learning_rate": 0.0001481967213114754,
"loss": 0.0941,
"step": 8000
},
{
"epoch": 39.14582062233069,
"grad_norm": 2.393336296081543,
"learning_rate": 0.00014806557377049182,
"loss": 0.0786,
"step": 8020
},
{
"epoch": 39.24344112263575,
"grad_norm": 3.982912063598633,
"learning_rate": 0.0001479344262295082,
"loss": 0.0848,
"step": 8040
},
{
"epoch": 39.34106162294082,
"grad_norm": 2.0777554512023926,
"learning_rate": 0.0001478032786885246,
"loss": 0.0971,
"step": 8060
},
{
"epoch": 39.43868212324588,
"grad_norm": 3.667409658432007,
"learning_rate": 0.000147672131147541,
"loss": 0.0943,
"step": 8080
},
{
"epoch": 39.536302623550945,
"grad_norm": 3.234408378601074,
"learning_rate": 0.00014754098360655738,
"loss": 0.1012,
"step": 8100
},
{
"epoch": 39.63392312385601,
"grad_norm": 2.9830737113952637,
"learning_rate": 0.00014740983606557377,
"loss": 0.0889,
"step": 8120
},
{
"epoch": 39.73154362416108,
"grad_norm": 2.235419750213623,
"learning_rate": 0.00014727868852459016,
"loss": 0.0994,
"step": 8140
},
{
"epoch": 39.82916412446614,
"grad_norm": 2.695769786834717,
"learning_rate": 0.00014714754098360658,
"loss": 0.0952,
"step": 8160
},
{
"epoch": 39.9267846247712,
"grad_norm": 2.6697440147399902,
"learning_rate": 0.00014701639344262297,
"loss": 0.1067,
"step": 8180
},
{
"epoch": 40.024405125076264,
"grad_norm": 2.091428756713867,
"learning_rate": 0.00014688524590163936,
"loss": 0.0915,
"step": 8200
},
{
"epoch": 40.12202562538133,
"grad_norm": 2.332493305206299,
"learning_rate": 0.00014675409836065575,
"loss": 0.0786,
"step": 8220
},
{
"epoch": 40.219646125686396,
"grad_norm": 2.596400260925293,
"learning_rate": 0.00014662295081967214,
"loss": 0.0855,
"step": 8240
},
{
"epoch": 40.31726662599146,
"grad_norm": 2.1886463165283203,
"learning_rate": 0.00014649180327868852,
"loss": 0.0823,
"step": 8260
},
{
"epoch": 40.41488712629652,
"grad_norm": 3.2993671894073486,
"learning_rate": 0.00014636065573770491,
"loss": 0.0892,
"step": 8280
},
{
"epoch": 40.51250762660159,
"grad_norm": 2.4500246047973633,
"learning_rate": 0.00014622950819672133,
"loss": 0.087,
"step": 8300
},
{
"epoch": 40.61012812690665,
"grad_norm": 2.4187991619110107,
"learning_rate": 0.00014609836065573772,
"loss": 0.0911,
"step": 8320
},
{
"epoch": 40.707748627211714,
"grad_norm": 2.3600058555603027,
"learning_rate": 0.0001459672131147541,
"loss": 0.0916,
"step": 8340
},
{
"epoch": 40.80536912751678,
"grad_norm": 2.7269983291625977,
"learning_rate": 0.0001458360655737705,
"loss": 0.0986,
"step": 8360
},
{
"epoch": 40.902989627821846,
"grad_norm": 2.907344102859497,
"learning_rate": 0.0001457049180327869,
"loss": 0.1055,
"step": 8380
},
{
"epoch": 41.00061012812691,
"grad_norm": 2.574118137359619,
"learning_rate": 0.0001455737704918033,
"loss": 0.099,
"step": 8400
},
{
"epoch": 41.09823062843197,
"grad_norm": 2.291900634765625,
"learning_rate": 0.00014544262295081967,
"loss": 0.0676,
"step": 8420
},
{
"epoch": 41.19585112873703,
"grad_norm": 2.241544246673584,
"learning_rate": 0.00014531147540983608,
"loss": 0.0795,
"step": 8440
},
{
"epoch": 41.2934716290421,
"grad_norm": 2.6499416828155518,
"learning_rate": 0.00014518032786885247,
"loss": 0.083,
"step": 8460
},
{
"epoch": 41.391092129347165,
"grad_norm": 2.436126947402954,
"learning_rate": 0.00014504918032786886,
"loss": 0.0927,
"step": 8480
},
{
"epoch": 41.48871262965223,
"grad_norm": 3.5703392028808594,
"learning_rate": 0.00014491803278688525,
"loss": 0.0804,
"step": 8500
},
{
"epoch": 41.58633312995729,
"grad_norm": 2.8533120155334473,
"learning_rate": 0.00014478688524590164,
"loss": 0.0871,
"step": 8520
},
{
"epoch": 41.68395363026236,
"grad_norm": 2.9216787815093994,
"learning_rate": 0.00014465573770491806,
"loss": 0.091,
"step": 8540
},
{
"epoch": 41.78157413056742,
"grad_norm": 2.6128993034362793,
"learning_rate": 0.00014452459016393442,
"loss": 0.099,
"step": 8560
},
{
"epoch": 41.87919463087248,
"grad_norm": 3.376518726348877,
"learning_rate": 0.00014439344262295084,
"loss": 0.0926,
"step": 8580
},
{
"epoch": 41.976815131177545,
"grad_norm": 2.5684266090393066,
"learning_rate": 0.00014426229508196722,
"loss": 0.1061,
"step": 8600
},
{
"epoch": 42.074435631482615,
"grad_norm": 2.173293113708496,
"learning_rate": 0.00014413114754098361,
"loss": 0.0744,
"step": 8620
},
{
"epoch": 42.17205613178768,
"grad_norm": 1.6150739192962646,
"learning_rate": 0.000144,
"loss": 0.0802,
"step": 8640
},
{
"epoch": 42.26967663209274,
"grad_norm": 2.8161544799804688,
"learning_rate": 0.0001438688524590164,
"loss": 0.0825,
"step": 8660
},
{
"epoch": 42.3672971323978,
"grad_norm": 2.475748062133789,
"learning_rate": 0.0001437377049180328,
"loss": 0.0792,
"step": 8680
},
{
"epoch": 42.464917632702864,
"grad_norm": 2.540273427963257,
"learning_rate": 0.00014360655737704917,
"loss": 0.0895,
"step": 8700
},
{
"epoch": 42.56253813300793,
"grad_norm": 3.0831921100616455,
"learning_rate": 0.0001434754098360656,
"loss": 0.0834,
"step": 8720
},
{
"epoch": 42.660158633312996,
"grad_norm": 2.467629909515381,
"learning_rate": 0.00014334426229508198,
"loss": 0.0816,
"step": 8740
},
{
"epoch": 42.75777913361806,
"grad_norm": 2.2306694984436035,
"learning_rate": 0.00014321311475409837,
"loss": 0.0933,
"step": 8760
},
{
"epoch": 42.85539963392312,
"grad_norm": 2.662369728088379,
"learning_rate": 0.00014308196721311476,
"loss": 0.0906,
"step": 8780
},
{
"epoch": 42.95302013422819,
"grad_norm": 3.169682741165161,
"learning_rate": 0.00014295081967213115,
"loss": 0.0888,
"step": 8800
},
{
"epoch": 43.05064063453325,
"grad_norm": 3.4279258251190186,
"learning_rate": 0.00014281967213114756,
"loss": 0.0816,
"step": 8820
},
{
"epoch": 43.148261134838314,
"grad_norm": 2.4985101222991943,
"learning_rate": 0.00014268852459016395,
"loss": 0.0789,
"step": 8840
},
{
"epoch": 43.24588163514338,
"grad_norm": 3.5532596111297607,
"learning_rate": 0.00014255737704918034,
"loss": 0.0711,
"step": 8860
},
{
"epoch": 43.343502135448446,
"grad_norm": 2.270078420639038,
"learning_rate": 0.00014242622950819673,
"loss": 0.0753,
"step": 8880
},
{
"epoch": 43.44112263575351,
"grad_norm": 2.114807367324829,
"learning_rate": 0.00014229508196721312,
"loss": 0.0751,
"step": 8900
},
{
"epoch": 43.53874313605857,
"grad_norm": 3.013507843017578,
"learning_rate": 0.0001421639344262295,
"loss": 0.0858,
"step": 8920
},
{
"epoch": 43.63636363636363,
"grad_norm": 2.022995948791504,
"learning_rate": 0.0001420327868852459,
"loss": 0.0803,
"step": 8940
},
{
"epoch": 43.7339841366687,
"grad_norm": 2.6093649864196777,
"learning_rate": 0.00014190163934426231,
"loss": 0.0828,
"step": 8960
},
{
"epoch": 43.831604636973765,
"grad_norm": 3.3971455097198486,
"learning_rate": 0.0001417704918032787,
"loss": 0.0909,
"step": 8980
},
{
"epoch": 43.92922513727883,
"grad_norm": 2.7203516960144043,
"learning_rate": 0.0001416393442622951,
"loss": 0.0884,
"step": 9000
},
{
"epoch": 44.02684563758389,
"grad_norm": 1.8878543376922607,
"learning_rate": 0.00014150819672131148,
"loss": 0.0782,
"step": 9020
},
{
"epoch": 44.12446613788896,
"grad_norm": 2.146353006362915,
"learning_rate": 0.00014137704918032787,
"loss": 0.0702,
"step": 9040
},
{
"epoch": 44.22208663819402,
"grad_norm": 2.4737651348114014,
"learning_rate": 0.00014124590163934426,
"loss": 0.0762,
"step": 9060
},
{
"epoch": 44.31970713849908,
"grad_norm": 2.3397083282470703,
"learning_rate": 0.00014111475409836065,
"loss": 0.072,
"step": 9080
},
{
"epoch": 44.417327638804146,
"grad_norm": 1.8726561069488525,
"learning_rate": 0.00014098360655737707,
"loss": 0.068,
"step": 9100
},
{
"epoch": 44.514948139109215,
"grad_norm": 2.164581775665283,
"learning_rate": 0.00014085245901639346,
"loss": 0.0767,
"step": 9120
},
{
"epoch": 44.61256863941428,
"grad_norm": 3.2708849906921387,
"learning_rate": 0.00014072131147540985,
"loss": 0.0842,
"step": 9140
},
{
"epoch": 44.71018913971934,
"grad_norm": 3.566901206970215,
"learning_rate": 0.00014059016393442624,
"loss": 0.0813,
"step": 9160
},
{
"epoch": 44.8078096400244,
"grad_norm": 3.0178351402282715,
"learning_rate": 0.00014045901639344262,
"loss": 0.0837,
"step": 9180
},
{
"epoch": 44.90543014032947,
"grad_norm": 3.19291353225708,
"learning_rate": 0.00014032786885245904,
"loss": 0.0886,
"step": 9200
},
{
"epoch": 45.003050640634534,
"grad_norm": 2.640066146850586,
"learning_rate": 0.0001401967213114754,
"loss": 0.0842,
"step": 9220
},
{
"epoch": 45.100671140939596,
"grad_norm": 2.125232458114624,
"learning_rate": 0.00014006557377049182,
"loss": 0.0657,
"step": 9240
},
{
"epoch": 45.19829164124466,
"grad_norm": 2.5191051959991455,
"learning_rate": 0.0001399344262295082,
"loss": 0.0708,
"step": 9260
},
{
"epoch": 45.29591214154973,
"grad_norm": 1.9102871417999268,
"learning_rate": 0.0001398032786885246,
"loss": 0.0665,
"step": 9280
},
{
"epoch": 45.39353264185479,
"grad_norm": 2.4303011894226074,
"learning_rate": 0.000139672131147541,
"loss": 0.0737,
"step": 9300
},
{
"epoch": 45.49115314215985,
"grad_norm": 2.9228925704956055,
"learning_rate": 0.00013954098360655738,
"loss": 0.0731,
"step": 9320
},
{
"epoch": 45.588773642464915,
"grad_norm": 2.2215464115142822,
"learning_rate": 0.0001394098360655738,
"loss": 0.0778,
"step": 9340
},
{
"epoch": 45.686394142769984,
"grad_norm": 3.2468485832214355,
"learning_rate": 0.00013927868852459016,
"loss": 0.0799,
"step": 9360
},
{
"epoch": 45.78401464307505,
"grad_norm": 2.814979076385498,
"learning_rate": 0.00013914754098360657,
"loss": 0.0857,
"step": 9380
},
{
"epoch": 45.88163514338011,
"grad_norm": 2.6760339736938477,
"learning_rate": 0.00013901639344262296,
"loss": 0.0762,
"step": 9400
},
{
"epoch": 45.97925564368517,
"grad_norm": 2.8788065910339355,
"learning_rate": 0.00013888524590163935,
"loss": 0.0881,
"step": 9420
},
{
"epoch": 46.07687614399024,
"grad_norm": 2.4430296421051025,
"learning_rate": 0.00013875409836065574,
"loss": 0.0656,
"step": 9440
},
{
"epoch": 46.1744966442953,
"grad_norm": 2.3718602657318115,
"learning_rate": 0.00013862295081967213,
"loss": 0.0667,
"step": 9460
},
{
"epoch": 46.272117144600365,
"grad_norm": 2.2495276927948,
"learning_rate": 0.00013849180327868855,
"loss": 0.0741,
"step": 9480
},
{
"epoch": 46.36973764490543,
"grad_norm": 3.612171173095703,
"learning_rate": 0.0001383606557377049,
"loss": 0.0751,
"step": 9500
},
{
"epoch": 46.4673581452105,
"grad_norm": 2.656705379486084,
"learning_rate": 0.00013822950819672133,
"loss": 0.0765,
"step": 9520
},
{
"epoch": 46.56497864551556,
"grad_norm": 2.154595136642456,
"learning_rate": 0.00013809836065573771,
"loss": 0.0772,
"step": 9540
},
{
"epoch": 46.66259914582062,
"grad_norm": 1.911001205444336,
"learning_rate": 0.0001379672131147541,
"loss": 0.0708,
"step": 9560
},
{
"epoch": 46.760219646125684,
"grad_norm": 3.0681228637695312,
"learning_rate": 0.0001378360655737705,
"loss": 0.0803,
"step": 9580
},
{
"epoch": 46.85784014643075,
"grad_norm": 2.6590416431427,
"learning_rate": 0.00013770491803278688,
"loss": 0.0784,
"step": 9600
},
{
"epoch": 46.955460646735816,
"grad_norm": 2.7904417514801025,
"learning_rate": 0.0001375737704918033,
"loss": 0.0775,
"step": 9620
},
{
"epoch": 47.05308114704088,
"grad_norm": 1.802498459815979,
"learning_rate": 0.0001374426229508197,
"loss": 0.0747,
"step": 9640
},
{
"epoch": 47.15070164734594,
"grad_norm": 1.8101750612258911,
"learning_rate": 0.00013731147540983608,
"loss": 0.0648,
"step": 9660
},
{
"epoch": 47.24832214765101,
"grad_norm": 3.6735594272613525,
"learning_rate": 0.00013718032786885247,
"loss": 0.0657,
"step": 9680
},
{
"epoch": 47.34594264795607,
"grad_norm": 2.0004684925079346,
"learning_rate": 0.00013704918032786886,
"loss": 0.0628,
"step": 9700
},
{
"epoch": 47.443563148261134,
"grad_norm": 2.4625582695007324,
"learning_rate": 0.00013691803278688525,
"loss": 0.0654,
"step": 9720
},
{
"epoch": 47.5411836485662,
"grad_norm": 2.3431763648986816,
"learning_rate": 0.00013678688524590164,
"loss": 0.0741,
"step": 9740
},
{
"epoch": 47.638804148871266,
"grad_norm": 3.0707414150238037,
"learning_rate": 0.00013665573770491805,
"loss": 0.0734,
"step": 9760
},
{
"epoch": 47.73642464917633,
"grad_norm": 3.3095669746398926,
"learning_rate": 0.00013652459016393444,
"loss": 0.0793,
"step": 9780
},
{
"epoch": 47.83404514948139,
"grad_norm": 2.388031482696533,
"learning_rate": 0.00013639344262295083,
"loss": 0.0789,
"step": 9800
},
{
"epoch": 47.93166564978645,
"grad_norm": 2.9367451667785645,
"learning_rate": 0.00013626229508196722,
"loss": 0.0798,
"step": 9820
},
{
"epoch": 48.02928615009152,
"grad_norm": 3.24287486076355,
"learning_rate": 0.0001361311475409836,
"loss": 0.0754,
"step": 9840
},
{
"epoch": 48.126906650396585,
"grad_norm": 2.868478536605835,
"learning_rate": 0.00013600000000000003,
"loss": 0.056,
"step": 9860
},
{
"epoch": 48.22452715070165,
"grad_norm": 1.8352900743484497,
"learning_rate": 0.0001358688524590164,
"loss": 0.0686,
"step": 9880
},
{
"epoch": 48.32214765100671,
"grad_norm": 1.727157711982727,
"learning_rate": 0.0001357377049180328,
"loss": 0.0618,
"step": 9900
},
{
"epoch": 48.41976815131178,
"grad_norm": 1.8096739053726196,
"learning_rate": 0.0001356065573770492,
"loss": 0.0694,
"step": 9920
},
{
"epoch": 48.51738865161684,
"grad_norm": 2.4526126384735107,
"learning_rate": 0.00013547540983606556,
"loss": 0.0762,
"step": 9940
},
{
"epoch": 48.6150091519219,
"grad_norm": 2.2709124088287354,
"learning_rate": 0.00013534426229508197,
"loss": 0.0764,
"step": 9960
},
{
"epoch": 48.712629652226966,
"grad_norm": 3.1081132888793945,
"learning_rate": 0.00013521311475409836,
"loss": 0.0682,
"step": 9980
},
{
"epoch": 48.810250152532035,
"grad_norm": 2.288539171218872,
"learning_rate": 0.00013508196721311478,
"loss": 0.0737,
"step": 10000
},
{
"epoch": 48.9078706528371,
"grad_norm": 2.9011008739471436,
"learning_rate": 0.00013495081967213114,
"loss": 0.0756,
"step": 10020
},
{
"epoch": 49.00549115314216,
"grad_norm": 2.3980066776275635,
"learning_rate": 0.00013481967213114756,
"loss": 0.0778,
"step": 10040
},
{
"epoch": 49.10311165344722,
"grad_norm": 2.349588394165039,
"learning_rate": 0.00013468852459016395,
"loss": 0.0515,
"step": 10060
},
{
"epoch": 49.20073215375229,
"grad_norm": 1.9128116369247437,
"learning_rate": 0.00013455737704918034,
"loss": 0.06,
"step": 10080
},
{
"epoch": 49.298352654057354,
"grad_norm": 3.1744940280914307,
"learning_rate": 0.00013442622950819673,
"loss": 0.065,
"step": 10100
},
{
"epoch": 49.395973154362416,
"grad_norm": 4.267724990844727,
"learning_rate": 0.00013429508196721311,
"loss": 0.0681,
"step": 10120
},
{
"epoch": 49.49359365466748,
"grad_norm": 2.734342575073242,
"learning_rate": 0.00013416393442622953,
"loss": 0.0696,
"step": 10140
},
{
"epoch": 49.59121415497255,
"grad_norm": 2.0303688049316406,
"learning_rate": 0.0001340327868852459,
"loss": 0.0666,
"step": 10160
},
{
"epoch": 49.68883465527761,
"grad_norm": 2.434067964553833,
"learning_rate": 0.0001339016393442623,
"loss": 0.0742,
"step": 10180
},
{
"epoch": 49.78645515558267,
"grad_norm": 2.633126735687256,
"learning_rate": 0.0001337704918032787,
"loss": 0.0741,
"step": 10200
},
{
"epoch": 49.884075655887735,
"grad_norm": 3.312375545501709,
"learning_rate": 0.0001336393442622951,
"loss": 0.0773,
"step": 10220
},
{
"epoch": 49.981696156192804,
"grad_norm": 2.251260280609131,
"learning_rate": 0.00013350819672131148,
"loss": 0.0795,
"step": 10240
},
{
"epoch": 50.079316656497866,
"grad_norm": 2.0748329162597656,
"learning_rate": 0.00013337704918032787,
"loss": 0.061,
"step": 10260
},
{
"epoch": 50.17693715680293,
"grad_norm": 2.7968759536743164,
"learning_rate": 0.00013324590163934428,
"loss": 0.0634,
"step": 10280
},
{
"epoch": 50.27455765710799,
"grad_norm": 2.5612518787384033,
"learning_rate": 0.00013311475409836067,
"loss": 0.0632,
"step": 10300
},
{
"epoch": 50.37217815741306,
"grad_norm": 3.2175679206848145,
"learning_rate": 0.00013298360655737706,
"loss": 0.0642,
"step": 10320
},
{
"epoch": 50.46979865771812,
"grad_norm": 2.260554790496826,
"learning_rate": 0.00013285245901639345,
"loss": 0.0609,
"step": 10340
},
{
"epoch": 50.567419158023185,
"grad_norm": 2.8259806632995605,
"learning_rate": 0.00013272131147540984,
"loss": 0.0735,
"step": 10360
},
{
"epoch": 50.66503965832825,
"grad_norm": 2.393419027328491,
"learning_rate": 0.00013259016393442623,
"loss": 0.0672,
"step": 10380
},
{
"epoch": 50.76266015863331,
"grad_norm": 2.3658840656280518,
"learning_rate": 0.00013245901639344262,
"loss": 0.0713,
"step": 10400
},
{
"epoch": 50.86028065893838,
"grad_norm": 2.526512384414673,
"learning_rate": 0.00013232786885245904,
"loss": 0.0699,
"step": 10420
},
{
"epoch": 50.95790115924344,
"grad_norm": 2.856234073638916,
"learning_rate": 0.00013219672131147543,
"loss": 0.0713,
"step": 10440
},
{
"epoch": 51.0555216595485,
"grad_norm": 1.5620033740997314,
"learning_rate": 0.0001320655737704918,
"loss": 0.06,
"step": 10460
},
{
"epoch": 51.153142159853566,
"grad_norm": 3.7794861793518066,
"learning_rate": 0.0001319344262295082,
"loss": 0.0581,
"step": 10480
},
{
"epoch": 51.250762660158635,
"grad_norm": 2.4568800926208496,
"learning_rate": 0.0001318032786885246,
"loss": 0.059,
"step": 10500
},
{
"epoch": 51.3483831604637,
"grad_norm": 2.32920503616333,
"learning_rate": 0.000131672131147541,
"loss": 0.0629,
"step": 10520
},
{
"epoch": 51.44600366076876,
"grad_norm": 2.1552681922912598,
"learning_rate": 0.00013154098360655737,
"loss": 0.0646,
"step": 10540
},
{
"epoch": 51.54362416107382,
"grad_norm": 2.146484375,
"learning_rate": 0.0001314098360655738,
"loss": 0.064,
"step": 10560
},
{
"epoch": 51.64124466137889,
"grad_norm": 2.428022623062134,
"learning_rate": 0.00013127868852459018,
"loss": 0.0644,
"step": 10580
},
{
"epoch": 51.738865161683954,
"grad_norm": 3.3938937187194824,
"learning_rate": 0.00013114754098360654,
"loss": 0.0736,
"step": 10600
},
{
"epoch": 51.836485661989016,
"grad_norm": 2.2218217849731445,
"learning_rate": 0.00013101639344262296,
"loss": 0.0699,
"step": 10620
},
{
"epoch": 51.93410616229408,
"grad_norm": 2.2436861991882324,
"learning_rate": 0.00013088524590163935,
"loss": 0.0718,
"step": 10640
},
{
"epoch": 52.03172666259915,
"grad_norm": 1.7175501585006714,
"learning_rate": 0.00013075409836065576,
"loss": 0.0667,
"step": 10660
},
{
"epoch": 52.12934716290421,
"grad_norm": 2.127976417541504,
"learning_rate": 0.00013062295081967213,
"loss": 0.0531,
"step": 10680
},
{
"epoch": 52.22696766320927,
"grad_norm": 1.9709439277648926,
"learning_rate": 0.00013049180327868854,
"loss": 0.0553,
"step": 10700
},
{
"epoch": 52.324588163514335,
"grad_norm": 2.644279718399048,
"learning_rate": 0.00013036065573770493,
"loss": 0.0624,
"step": 10720
},
{
"epoch": 52.422208663819404,
"grad_norm": 1.570968747138977,
"learning_rate": 0.00013022950819672132,
"loss": 0.061,
"step": 10740
},
{
"epoch": 52.51982916412447,
"grad_norm": 2.2433953285217285,
"learning_rate": 0.0001300983606557377,
"loss": 0.0642,
"step": 10760
},
{
"epoch": 52.61744966442953,
"grad_norm": 2.019834280014038,
"learning_rate": 0.0001299672131147541,
"loss": 0.0681,
"step": 10780
},
{
"epoch": 52.71507016473459,
"grad_norm": 2.366257429122925,
"learning_rate": 0.00012983606557377052,
"loss": 0.0697,
"step": 10800
},
{
"epoch": 52.81269066503966,
"grad_norm": 2.3033928871154785,
"learning_rate": 0.00012970491803278688,
"loss": 0.0631,
"step": 10820
},
{
"epoch": 52.91031116534472,
"grad_norm": 2.430032968521118,
"learning_rate": 0.0001295737704918033,
"loss": 0.0676,
"step": 10840
},
{
"epoch": 53.007931665649785,
"grad_norm": 1.5559980869293213,
"learning_rate": 0.00012944262295081968,
"loss": 0.063,
"step": 10860
},
{
"epoch": 53.10555216595485,
"grad_norm": 2.612412452697754,
"learning_rate": 0.00012931147540983607,
"loss": 0.0566,
"step": 10880
},
{
"epoch": 53.20317266625992,
"grad_norm": 2.391444444656372,
"learning_rate": 0.00012918032786885246,
"loss": 0.0565,
"step": 10900
},
{
"epoch": 53.30079316656498,
"grad_norm": 2.6666293144226074,
"learning_rate": 0.00012904918032786885,
"loss": 0.0598,
"step": 10920
},
{
"epoch": 53.39841366687004,
"grad_norm": 2.613943576812744,
"learning_rate": 0.00012891803278688527,
"loss": 0.063,
"step": 10940
},
{
"epoch": 53.496034167175104,
"grad_norm": 2.273446559906006,
"learning_rate": 0.00012878688524590166,
"loss": 0.0588,
"step": 10960
},
{
"epoch": 53.59365466748017,
"grad_norm": 2.058150291442871,
"learning_rate": 0.00012865573770491802,
"loss": 0.0588,
"step": 10980
},
{
"epoch": 53.691275167785236,
"grad_norm": 2.4559085369110107,
"learning_rate": 0.00012852459016393444,
"loss": 0.0648,
"step": 11000
},
{
"epoch": 53.7888956680903,
"grad_norm": 2.1946640014648438,
"learning_rate": 0.00012839344262295083,
"loss": 0.0689,
"step": 11020
},
{
"epoch": 53.88651616839536,
"grad_norm": 2.7223618030548096,
"learning_rate": 0.00012826229508196722,
"loss": 0.0637,
"step": 11040
},
{
"epoch": 53.98413666870043,
"grad_norm": 1.8997637033462524,
"learning_rate": 0.0001281311475409836,
"loss": 0.0624,
"step": 11060
},
{
"epoch": 54.08175716900549,
"grad_norm": 1.6322365999221802,
"learning_rate": 0.00012800000000000002,
"loss": 0.0529,
"step": 11080
},
{
"epoch": 54.179377669310554,
"grad_norm": 1.6490808725357056,
"learning_rate": 0.0001278688524590164,
"loss": 0.0557,
"step": 11100
},
{
"epoch": 54.27699816961562,
"grad_norm": 1.3738045692443848,
"learning_rate": 0.00012773770491803277,
"loss": 0.0553,
"step": 11120
},
{
"epoch": 54.374618669920686,
"grad_norm": 1.990561604499817,
"learning_rate": 0.0001276065573770492,
"loss": 0.0555,
"step": 11140
},
{
"epoch": 54.47223917022575,
"grad_norm": 1.8162143230438232,
"learning_rate": 0.00012747540983606558,
"loss": 0.0572,
"step": 11160
},
{
"epoch": 54.56985967053081,
"grad_norm": 1.980643391609192,
"learning_rate": 0.000127344262295082,
"loss": 0.0689,
"step": 11180
},
{
"epoch": 54.66748017083587,
"grad_norm": 2.3956120014190674,
"learning_rate": 0.00012721311475409836,
"loss": 0.0616,
"step": 11200
},
{
"epoch": 54.76510067114094,
"grad_norm": 3.9318060874938965,
"learning_rate": 0.00012708196721311477,
"loss": 0.0594,
"step": 11220
},
{
"epoch": 54.862721171446005,
"grad_norm": 2.353564739227295,
"learning_rate": 0.00012695081967213116,
"loss": 0.0589,
"step": 11240
},
{
"epoch": 54.96034167175107,
"grad_norm": 2.640501022338867,
"learning_rate": 0.00012681967213114753,
"loss": 0.0617,
"step": 11260
},
{
"epoch": 55.05796217205613,
"grad_norm": 2.2255806922912598,
"learning_rate": 0.00012668852459016394,
"loss": 0.0596,
"step": 11280
},
{
"epoch": 55.1555826723612,
"grad_norm": 1.7652024030685425,
"learning_rate": 0.00012655737704918033,
"loss": 0.0573,
"step": 11300
},
{
"epoch": 55.25320317266626,
"grad_norm": 1.6768336296081543,
"learning_rate": 0.00012642622950819675,
"loss": 0.0533,
"step": 11320
},
{
"epoch": 55.35082367297132,
"grad_norm": 1.9633594751358032,
"learning_rate": 0.0001262950819672131,
"loss": 0.0501,
"step": 11340
},
{
"epoch": 55.448444173276386,
"grad_norm": 3.0632686614990234,
"learning_rate": 0.0001261639344262295,
"loss": 0.0584,
"step": 11360
},
{
"epoch": 55.546064673581455,
"grad_norm": 3.617532968521118,
"learning_rate": 0.00012603278688524592,
"loss": 0.0573,
"step": 11380
},
{
"epoch": 55.64368517388652,
"grad_norm": 2.046466827392578,
"learning_rate": 0.0001259016393442623,
"loss": 0.0596,
"step": 11400
},
{
"epoch": 55.74130567419158,
"grad_norm": 2.1905694007873535,
"learning_rate": 0.0001257704918032787,
"loss": 0.0632,
"step": 11420
},
{
"epoch": 55.83892617449664,
"grad_norm": 3.6722793579101562,
"learning_rate": 0.00012563934426229508,
"loss": 0.0626,
"step": 11440
},
{
"epoch": 55.93654667480171,
"grad_norm": 1.6656643152236938,
"learning_rate": 0.0001255081967213115,
"loss": 0.0637,
"step": 11460
},
{
"epoch": 56.034167175106774,
"grad_norm": 1.9462730884552002,
"learning_rate": 0.00012537704918032786,
"loss": 0.0562,
"step": 11480
},
{
"epoch": 56.131787675411836,
"grad_norm": 2.050899028778076,
"learning_rate": 0.00012524590163934425,
"loss": 0.0499,
"step": 11500
},
{
"epoch": 56.2294081757169,
"grad_norm": 2.114248514175415,
"learning_rate": 0.00012511475409836067,
"loss": 0.0489,
"step": 11520
},
{
"epoch": 56.32702867602197,
"grad_norm": 2.2343602180480957,
"learning_rate": 0.00012498360655737706,
"loss": 0.0547,
"step": 11540
},
{
"epoch": 56.42464917632703,
"grad_norm": 2.2102224826812744,
"learning_rate": 0.00012485245901639345,
"loss": 0.0568,
"step": 11560
},
{
"epoch": 56.52226967663209,
"grad_norm": 9.124307632446289,
"learning_rate": 0.00012472131147540984,
"loss": 0.0639,
"step": 11580
},
{
"epoch": 56.619890176937155,
"grad_norm": 3.184844493865967,
"learning_rate": 0.00012459016393442625,
"loss": 0.0577,
"step": 11600
},
{
"epoch": 56.717510677242224,
"grad_norm": 1.8462156057357788,
"learning_rate": 0.00012445901639344262,
"loss": 0.0549,
"step": 11620
},
{
"epoch": 56.815131177547286,
"grad_norm": 1.667975902557373,
"learning_rate": 0.000124327868852459,
"loss": 0.0534,
"step": 11640
},
{
"epoch": 56.91275167785235,
"grad_norm": 2.60188364982605,
"learning_rate": 0.00012419672131147542,
"loss": 0.0637,
"step": 11660
},
{
"epoch": 57.01037217815741,
"grad_norm": 1.711301565170288,
"learning_rate": 0.0001240655737704918,
"loss": 0.0623,
"step": 11680
},
{
"epoch": 57.10799267846248,
"grad_norm": 3.080031394958496,
"learning_rate": 0.0001239344262295082,
"loss": 0.0438,
"step": 11700
},
{
"epoch": 57.20561317876754,
"grad_norm": 1.9816887378692627,
"learning_rate": 0.0001238032786885246,
"loss": 0.053,
"step": 11720
},
{
"epoch": 57.303233679072605,
"grad_norm": 2.1627466678619385,
"learning_rate": 0.000123672131147541,
"loss": 0.0544,
"step": 11740
},
{
"epoch": 57.40085417937767,
"grad_norm": 2.412473678588867,
"learning_rate": 0.0001235409836065574,
"loss": 0.0498,
"step": 11760
},
{
"epoch": 57.49847467968274,
"grad_norm": 1.8432903289794922,
"learning_rate": 0.00012340983606557376,
"loss": 0.055,
"step": 11780
},
{
"epoch": 57.5960951799878,
"grad_norm": 1.5726666450500488,
"learning_rate": 0.00012327868852459017,
"loss": 0.0555,
"step": 11800
},
{
"epoch": 57.69371568029286,
"grad_norm": 1.6197164058685303,
"learning_rate": 0.00012314754098360656,
"loss": 0.0551,
"step": 11820
},
{
"epoch": 57.79133618059792,
"grad_norm": 2.0963289737701416,
"learning_rate": 0.00012301639344262295,
"loss": 0.0606,
"step": 11840
},
{
"epoch": 57.88895668090299,
"grad_norm": 1.7842280864715576,
"learning_rate": 0.00012288524590163934,
"loss": 0.0625,
"step": 11860
},
{
"epoch": 57.986577181208055,
"grad_norm": 2.8082289695739746,
"learning_rate": 0.00012275409836065573,
"loss": 0.0572,
"step": 11880
},
{
"epoch": 58.08419768151312,
"grad_norm": 2.0027847290039062,
"learning_rate": 0.00012262295081967215,
"loss": 0.0484,
"step": 11900
},
{
"epoch": 58.18181818181818,
"grad_norm": 1.9375332593917847,
"learning_rate": 0.0001224918032786885,
"loss": 0.0503,
"step": 11920
},
{
"epoch": 58.27943868212325,
"grad_norm": 1.6671733856201172,
"learning_rate": 0.00012236065573770493,
"loss": 0.0463,
"step": 11940
},
{
"epoch": 58.37705918242831,
"grad_norm": 2.173567533493042,
"learning_rate": 0.00012222950819672132,
"loss": 0.0535,
"step": 11960
},
{
"epoch": 58.474679682733374,
"grad_norm": 2.869158983230591,
"learning_rate": 0.00012209836065573773,
"loss": 0.0591,
"step": 11980
},
{
"epoch": 58.572300183038436,
"grad_norm": 4.487265586853027,
"learning_rate": 0.0001219672131147541,
"loss": 0.054,
"step": 12000
},
{
"epoch": 58.669920683343506,
"grad_norm": 2.45060133934021,
"learning_rate": 0.0001218360655737705,
"loss": 0.0506,
"step": 12020
},
{
"epoch": 58.76754118364857,
"grad_norm": 1.8584073781967163,
"learning_rate": 0.0001217049180327869,
"loss": 0.0577,
"step": 12040
},
{
"epoch": 58.86516168395363,
"grad_norm": 2.7494070529937744,
"learning_rate": 0.00012157377049180328,
"loss": 0.0611,
"step": 12060
},
{
"epoch": 58.96278218425869,
"grad_norm": 2.600886106491089,
"learning_rate": 0.00012144262295081968,
"loss": 0.0595,
"step": 12080
},
{
"epoch": 59.060402684563755,
"grad_norm": 2.2219817638397217,
"learning_rate": 0.00012131147540983607,
"loss": 0.0496,
"step": 12100
},
{
"epoch": 59.158023184868824,
"grad_norm": 1.4466367959976196,
"learning_rate": 0.00012118032786885247,
"loss": 0.0424,
"step": 12120
},
{
"epoch": 59.25564368517389,
"grad_norm": 1.896466612815857,
"learning_rate": 0.00012104918032786885,
"loss": 0.0496,
"step": 12140
},
{
"epoch": 59.35326418547895,
"grad_norm": 1.5178048610687256,
"learning_rate": 0.00012091803278688525,
"loss": 0.0541,
"step": 12160
},
{
"epoch": 59.45088468578401,
"grad_norm": 2.189962148666382,
"learning_rate": 0.00012078688524590165,
"loss": 0.0558,
"step": 12180
},
{
"epoch": 59.54850518608908,
"grad_norm": 2.055428981781006,
"learning_rate": 0.00012065573770491804,
"loss": 0.0523,
"step": 12200
},
{
"epoch": 59.64612568639414,
"grad_norm": 2.569758415222168,
"learning_rate": 0.00012052459016393443,
"loss": 0.0547,
"step": 12220
},
{
"epoch": 59.743746186699205,
"grad_norm": 2.1483688354492188,
"learning_rate": 0.00012039344262295082,
"loss": 0.0579,
"step": 12240
},
{
"epoch": 59.84136668700427,
"grad_norm": 2.11574649810791,
"learning_rate": 0.00012026229508196722,
"loss": 0.0576,
"step": 12260
},
{
"epoch": 59.93898718730934,
"grad_norm": 2.026974678039551,
"learning_rate": 0.0001201311475409836,
"loss": 0.0556,
"step": 12280
},
{
"epoch": 60.0366076876144,
"grad_norm": 1.9671833515167236,
"learning_rate": 0.00012,
"loss": 0.0575,
"step": 12300
},
{
"epoch": 60.13422818791946,
"grad_norm": 2.3915090560913086,
"learning_rate": 0.0001198688524590164,
"loss": 0.0405,
"step": 12320
},
{
"epoch": 60.231848688224524,
"grad_norm": 2.213895559310913,
"learning_rate": 0.0001197377049180328,
"loss": 0.0455,
"step": 12340
},
{
"epoch": 60.32946918852959,
"grad_norm": 1.6794525384902954,
"learning_rate": 0.00011960655737704917,
"loss": 0.0551,
"step": 12360
},
{
"epoch": 60.427089688834656,
"grad_norm": 3.1766440868377686,
"learning_rate": 0.00011947540983606557,
"loss": 0.0526,
"step": 12380
},
{
"epoch": 60.52471018913972,
"grad_norm": 1.5646824836730957,
"learning_rate": 0.00011934426229508198,
"loss": 0.0523,
"step": 12400
},
{
"epoch": 60.62233068944478,
"grad_norm": 1.8669993877410889,
"learning_rate": 0.00011921311475409838,
"loss": 0.0525,
"step": 12420
},
{
"epoch": 60.71995118974985,
"grad_norm": 3.3208813667297363,
"learning_rate": 0.00011908196721311476,
"loss": 0.05,
"step": 12440
},
{
"epoch": 60.81757169005491,
"grad_norm": 2.8685014247894287,
"learning_rate": 0.00011895081967213116,
"loss": 0.0558,
"step": 12460
},
{
"epoch": 60.915192190359974,
"grad_norm": 2.627858877182007,
"learning_rate": 0.00011881967213114755,
"loss": 0.0554,
"step": 12480
},
{
"epoch": 61.01281269066504,
"grad_norm": 1.5304837226867676,
"learning_rate": 0.00011868852459016392,
"loss": 0.0533,
"step": 12500
},
{
"epoch": 61.110433190970106,
"grad_norm": 2.2873823642730713,
"learning_rate": 0.00011855737704918033,
"loss": 0.0451,
"step": 12520
},
{
"epoch": 61.20805369127517,
"grad_norm": 2.5284764766693115,
"learning_rate": 0.00011842622950819673,
"loss": 0.0496,
"step": 12540
},
{
"epoch": 61.30567419158023,
"grad_norm": 2.3583881855010986,
"learning_rate": 0.00011829508196721313,
"loss": 0.0502,
"step": 12560
},
{
"epoch": 61.40329469188529,
"grad_norm": 1.6773650646209717,
"learning_rate": 0.00011816393442622951,
"loss": 0.0494,
"step": 12580
},
{
"epoch": 61.50091519219036,
"grad_norm": 1.8725978136062622,
"learning_rate": 0.00011803278688524591,
"loss": 0.0496,
"step": 12600
},
{
"epoch": 61.598535692495425,
"grad_norm": 1.988142728805542,
"learning_rate": 0.0001179016393442623,
"loss": 0.0533,
"step": 12620
},
{
"epoch": 61.69615619280049,
"grad_norm": 2.436917304992676,
"learning_rate": 0.0001177704918032787,
"loss": 0.0521,
"step": 12640
},
{
"epoch": 61.79377669310555,
"grad_norm": 2.867424726486206,
"learning_rate": 0.00011763934426229508,
"loss": 0.0512,
"step": 12660
},
{
"epoch": 61.89139719341062,
"grad_norm": 3.100371837615967,
"learning_rate": 0.00011750819672131148,
"loss": 0.0563,
"step": 12680
},
{
"epoch": 61.98901769371568,
"grad_norm": 1.4640679359436035,
"learning_rate": 0.00011737704918032789,
"loss": 0.0512,
"step": 12700
},
{
"epoch": 62.08663819402074,
"grad_norm": 1.7745931148529053,
"learning_rate": 0.00011724590163934426,
"loss": 0.0451,
"step": 12720
},
{
"epoch": 62.184258694325806,
"grad_norm": 2.6083261966705322,
"learning_rate": 0.00011711475409836066,
"loss": 0.0523,
"step": 12740
},
{
"epoch": 62.281879194630875,
"grad_norm": 1.8145304918289185,
"learning_rate": 0.00011698360655737705,
"loss": 0.0469,
"step": 12760
},
{
"epoch": 62.37949969493594,
"grad_norm": 1.8948346376419067,
"learning_rate": 0.00011685245901639346,
"loss": 0.049,
"step": 12780
},
{
"epoch": 62.477120195241,
"grad_norm": 2.2340071201324463,
"learning_rate": 0.00011672131147540983,
"loss": 0.0478,
"step": 12800
},
{
"epoch": 62.57474069554606,
"grad_norm": 1.9108351469039917,
"learning_rate": 0.00011659016393442623,
"loss": 0.0477,
"step": 12820
},
{
"epoch": 62.67236119585113,
"grad_norm": 1.1131486892700195,
"learning_rate": 0.00011645901639344264,
"loss": 0.0541,
"step": 12840
},
{
"epoch": 62.769981696156194,
"grad_norm": 2.0101990699768066,
"learning_rate": 0.00011632786885245903,
"loss": 0.0435,
"step": 12860
},
{
"epoch": 62.867602196461256,
"grad_norm": 1.88633394241333,
"learning_rate": 0.0001161967213114754,
"loss": 0.051,
"step": 12880
},
{
"epoch": 62.96522269676632,
"grad_norm": 3.823934555053711,
"learning_rate": 0.0001160655737704918,
"loss": 0.0558,
"step": 12900
},
{
"epoch": 63.06284319707139,
"grad_norm": 2.754892110824585,
"learning_rate": 0.00011593442622950821,
"loss": 0.0423,
"step": 12920
},
{
"epoch": 63.16046369737645,
"grad_norm": 1.8771121501922607,
"learning_rate": 0.00011580327868852458,
"loss": 0.0434,
"step": 12940
},
{
"epoch": 63.25808419768151,
"grad_norm": 1.098620057106018,
"learning_rate": 0.00011567213114754099,
"loss": 0.0457,
"step": 12960
},
{
"epoch": 63.355704697986575,
"grad_norm": 2.0839200019836426,
"learning_rate": 0.00011554098360655739,
"loss": 0.0516,
"step": 12980
},
{
"epoch": 63.453325198291644,
"grad_norm": 1.4664433002471924,
"learning_rate": 0.00011540983606557378,
"loss": 0.0456,
"step": 13000
},
{
"epoch": 63.550945698596706,
"grad_norm": 1.9635177850723267,
"learning_rate": 0.00011527868852459016,
"loss": 0.0537,
"step": 13020
},
{
"epoch": 63.64856619890177,
"grad_norm": 1.3120068311691284,
"learning_rate": 0.00011514754098360656,
"loss": 0.0492,
"step": 13040
},
{
"epoch": 63.74618669920683,
"grad_norm": 3.893848180770874,
"learning_rate": 0.00011501639344262296,
"loss": 0.0532,
"step": 13060
},
{
"epoch": 63.8438071995119,
"grad_norm": 2.8461520671844482,
"learning_rate": 0.00011488524590163936,
"loss": 0.0525,
"step": 13080
},
{
"epoch": 63.94142769981696,
"grad_norm": 2.197197914123535,
"learning_rate": 0.00011475409836065574,
"loss": 0.0507,
"step": 13100
},
{
"epoch": 64.03904820012202,
"grad_norm": 2.491464138031006,
"learning_rate": 0.00011462295081967214,
"loss": 0.0486,
"step": 13120
},
{
"epoch": 64.1366687004271,
"grad_norm": 1.8550955057144165,
"learning_rate": 0.00011449180327868853,
"loss": 0.0421,
"step": 13140
},
{
"epoch": 64.23428920073215,
"grad_norm": 1.7782377004623413,
"learning_rate": 0.00011436065573770491,
"loss": 0.0448,
"step": 13160
},
{
"epoch": 64.33190970103722,
"grad_norm": 2.042099714279175,
"learning_rate": 0.00011422950819672131,
"loss": 0.0449,
"step": 13180
},
{
"epoch": 64.42953020134229,
"grad_norm": 2.6295580863952637,
"learning_rate": 0.00011409836065573771,
"loss": 0.0509,
"step": 13200
},
{
"epoch": 64.52715070164734,
"grad_norm": 2.4631996154785156,
"learning_rate": 0.00011396721311475412,
"loss": 0.0447,
"step": 13220
},
{
"epoch": 64.62477120195241,
"grad_norm": 1.8422377109527588,
"learning_rate": 0.00011383606557377049,
"loss": 0.0498,
"step": 13240
},
{
"epoch": 64.72239170225747,
"grad_norm": 2.1493983268737793,
"learning_rate": 0.00011370491803278688,
"loss": 0.0536,
"step": 13260
},
{
"epoch": 64.82001220256254,
"grad_norm": 2.355818033218384,
"learning_rate": 0.00011357377049180329,
"loss": 0.0529,
"step": 13280
},
{
"epoch": 64.9176327028676,
"grad_norm": 2.2813539505004883,
"learning_rate": 0.00011344262295081969,
"loss": 0.0514,
"step": 13300
},
{
"epoch": 65.01525320317266,
"grad_norm": 2.1642229557037354,
"learning_rate": 0.00011331147540983606,
"loss": 0.048,
"step": 13320
},
{
"epoch": 65.11287370347773,
"grad_norm": 1.607167363166809,
"learning_rate": 0.00011318032786885247,
"loss": 0.0393,
"step": 13340
},
{
"epoch": 65.2104942037828,
"grad_norm": 1.7760226726531982,
"learning_rate": 0.00011304918032786887,
"loss": 0.0445,
"step": 13360
},
{
"epoch": 65.30811470408786,
"grad_norm": 2.2471165657043457,
"learning_rate": 0.00011291803278688525,
"loss": 0.0388,
"step": 13380
},
{
"epoch": 65.40573520439293,
"grad_norm": 2.517448663711548,
"learning_rate": 0.00011278688524590164,
"loss": 0.0466,
"step": 13400
},
{
"epoch": 65.50335570469798,
"grad_norm": 1.7835204601287842,
"learning_rate": 0.00011265573770491804,
"loss": 0.0457,
"step": 13420
},
{
"epoch": 65.60097620500305,
"grad_norm": 2.7937209606170654,
"learning_rate": 0.00011252459016393444,
"loss": 0.0453,
"step": 13440
},
{
"epoch": 65.69859670530812,
"grad_norm": 1.775601863861084,
"learning_rate": 0.00011239344262295082,
"loss": 0.0515,
"step": 13460
},
{
"epoch": 65.79621720561317,
"grad_norm": 2.0977530479431152,
"learning_rate": 0.00011226229508196722,
"loss": 0.0494,
"step": 13480
},
{
"epoch": 65.89383770591824,
"grad_norm": 2.0289318561553955,
"learning_rate": 0.00011213114754098362,
"loss": 0.0492,
"step": 13500
},
{
"epoch": 65.99145820622331,
"grad_norm": 1.8023489713668823,
"learning_rate": 0.00011200000000000001,
"loss": 0.0491,
"step": 13520
},
{
"epoch": 66.08907870652837,
"grad_norm": 1.4527980089187622,
"learning_rate": 0.00011186885245901639,
"loss": 0.0371,
"step": 13540
},
{
"epoch": 66.18669920683344,
"grad_norm": 1.5598070621490479,
"learning_rate": 0.00011173770491803279,
"loss": 0.0421,
"step": 13560
},
{
"epoch": 66.2843197071385,
"grad_norm": 1.312072992324829,
"learning_rate": 0.0001116065573770492,
"loss": 0.0457,
"step": 13580
},
{
"epoch": 66.38194020744356,
"grad_norm": 2.0999739170074463,
"learning_rate": 0.00011147540983606557,
"loss": 0.0477,
"step": 13600
},
{
"epoch": 66.47956070774863,
"grad_norm": 1.9168506860733032,
"learning_rate": 0.00011134426229508197,
"loss": 0.0428,
"step": 13620
},
{
"epoch": 66.57718120805369,
"grad_norm": 2.1148712635040283,
"learning_rate": 0.00011121311475409838,
"loss": 0.0472,
"step": 13640
},
{
"epoch": 66.67480170835876,
"grad_norm": 1.7266321182250977,
"learning_rate": 0.00011108196721311476,
"loss": 0.0493,
"step": 13660
},
{
"epoch": 66.77242220866383,
"grad_norm": 1.0809521675109863,
"learning_rate": 0.00011095081967213114,
"loss": 0.048,
"step": 13680
},
{
"epoch": 66.87004270896888,
"grad_norm": 2.773946523666382,
"learning_rate": 0.00011081967213114754,
"loss": 0.0481,
"step": 13700
},
{
"epoch": 66.96766320927395,
"grad_norm": 2.187812089920044,
"learning_rate": 0.00011068852459016395,
"loss": 0.054,
"step": 13720
},
{
"epoch": 67.065283709579,
"grad_norm": 1.8684860467910767,
"learning_rate": 0.00011055737704918035,
"loss": 0.0414,
"step": 13740
},
{
"epoch": 67.16290420988408,
"grad_norm": 1.6898576021194458,
"learning_rate": 0.00011042622950819672,
"loss": 0.0405,
"step": 13760
},
{
"epoch": 67.26052471018915,
"grad_norm": 2.749148368835449,
"learning_rate": 0.00011029508196721311,
"loss": 0.0404,
"step": 13780
},
{
"epoch": 67.3581452104942,
"grad_norm": 3.262460231781006,
"learning_rate": 0.00011016393442622952,
"loss": 0.0476,
"step": 13800
},
{
"epoch": 67.45576571079927,
"grad_norm": 1.7177296876907349,
"learning_rate": 0.00011003278688524589,
"loss": 0.0446,
"step": 13820
},
{
"epoch": 67.55338621110434,
"grad_norm": 2.326493978500366,
"learning_rate": 0.0001099016393442623,
"loss": 0.0447,
"step": 13840
},
{
"epoch": 67.6510067114094,
"grad_norm": 1.690735101699829,
"learning_rate": 0.0001097704918032787,
"loss": 0.0475,
"step": 13860
},
{
"epoch": 67.74862721171446,
"grad_norm": 2.243262529373169,
"learning_rate": 0.0001096393442622951,
"loss": 0.0414,
"step": 13880
},
{
"epoch": 67.84624771201952,
"grad_norm": 1.8218213319778442,
"learning_rate": 0.00010950819672131148,
"loss": 0.0475,
"step": 13900
},
{
"epoch": 67.94386821232459,
"grad_norm": 2.037757635116577,
"learning_rate": 0.00010937704918032787,
"loss": 0.0517,
"step": 13920
},
{
"epoch": 68.04148871262966,
"grad_norm": 1.589010238647461,
"learning_rate": 0.00010924590163934427,
"loss": 0.0444,
"step": 13940
},
{
"epoch": 68.13910921293471,
"grad_norm": 1.6589601039886475,
"learning_rate": 0.00010911475409836067,
"loss": 0.0397,
"step": 13960
},
{
"epoch": 68.23672971323978,
"grad_norm": 0.7578281760215759,
"learning_rate": 0.00010898360655737705,
"loss": 0.0424,
"step": 13980
},
{
"epoch": 68.33435021354484,
"grad_norm": 2.5849769115448,
"learning_rate": 0.00010885245901639345,
"loss": 0.0411,
"step": 14000
},
{
"epoch": 68.4319707138499,
"grad_norm": 2.4518473148345947,
"learning_rate": 0.00010872131147540985,
"loss": 0.0463,
"step": 14020
},
{
"epoch": 68.52959121415498,
"grad_norm": 1.6705505847930908,
"learning_rate": 0.00010859016393442623,
"loss": 0.0418,
"step": 14040
},
{
"epoch": 68.62721171446003,
"grad_norm": 2.3943698406219482,
"learning_rate": 0.00010845901639344262,
"loss": 0.0454,
"step": 14060
},
{
"epoch": 68.7248322147651,
"grad_norm": 1.8809058666229248,
"learning_rate": 0.00010832786885245902,
"loss": 0.0417,
"step": 14080
},
{
"epoch": 68.82245271507017,
"grad_norm": 2.2370426654815674,
"learning_rate": 0.00010819672131147543,
"loss": 0.0428,
"step": 14100
},
{
"epoch": 68.92007321537523,
"grad_norm": 2.9675140380859375,
"learning_rate": 0.0001080655737704918,
"loss": 0.0455,
"step": 14120
},
{
"epoch": 69.0176937156803,
"grad_norm": 2.193737506866455,
"learning_rate": 0.0001079344262295082,
"loss": 0.0484,
"step": 14140
},
{
"epoch": 69.11531421598535,
"grad_norm": 1.3249075412750244,
"learning_rate": 0.00010780327868852461,
"loss": 0.0361,
"step": 14160
},
{
"epoch": 69.21293471629042,
"grad_norm": 1.6315033435821533,
"learning_rate": 0.00010767213114754098,
"loss": 0.0392,
"step": 14180
},
{
"epoch": 69.31055521659549,
"grad_norm": 3.6653645038604736,
"learning_rate": 0.00010754098360655737,
"loss": 0.0418,
"step": 14200
},
{
"epoch": 69.40817571690054,
"grad_norm": 1.652737021446228,
"learning_rate": 0.00010740983606557378,
"loss": 0.0408,
"step": 14220
},
{
"epoch": 69.50579621720561,
"grad_norm": 1.7608600854873657,
"learning_rate": 0.00010727868852459018,
"loss": 0.0481,
"step": 14240
},
{
"epoch": 69.60341671751068,
"grad_norm": 1.6108567714691162,
"learning_rate": 0.00010714754098360655,
"loss": 0.0411,
"step": 14260
},
{
"epoch": 69.70103721781574,
"grad_norm": 1.7640299797058105,
"learning_rate": 0.00010701639344262296,
"loss": 0.0433,
"step": 14280
},
{
"epoch": 69.79865771812081,
"grad_norm": 3.2369632720947266,
"learning_rate": 0.00010688524590163935,
"loss": 0.0461,
"step": 14300
},
{
"epoch": 69.89627821842586,
"grad_norm": 1.8078311681747437,
"learning_rate": 0.00010675409836065575,
"loss": 0.0503,
"step": 14320
},
{
"epoch": 69.99389871873093,
"grad_norm": 2.2205686569213867,
"learning_rate": 0.00010662295081967212,
"loss": 0.0446,
"step": 14340
},
{
"epoch": 70.091519219036,
"grad_norm": 1.8968815803527832,
"learning_rate": 0.00010649180327868853,
"loss": 0.0333,
"step": 14360
},
{
"epoch": 70.18913971934106,
"grad_norm": 1.6357848644256592,
"learning_rate": 0.00010636065573770493,
"loss": 0.0393,
"step": 14380
},
{
"epoch": 70.28676021964613,
"grad_norm": 4.227313995361328,
"learning_rate": 0.0001062295081967213,
"loss": 0.0367,
"step": 14400
},
{
"epoch": 70.3843807199512,
"grad_norm": 1.2310267686843872,
"learning_rate": 0.00010609836065573771,
"loss": 0.043,
"step": 14420
},
{
"epoch": 70.48200122025625,
"grad_norm": 1.8939145803451538,
"learning_rate": 0.0001059672131147541,
"loss": 0.0424,
"step": 14440
},
{
"epoch": 70.57962172056132,
"grad_norm": 1.7214269638061523,
"learning_rate": 0.0001058360655737705,
"loss": 0.0455,
"step": 14460
},
{
"epoch": 70.67724222086638,
"grad_norm": 1.5810576677322388,
"learning_rate": 0.00010570491803278688,
"loss": 0.0455,
"step": 14480
},
{
"epoch": 70.77486272117144,
"grad_norm": 2.2829346656799316,
"learning_rate": 0.00010557377049180328,
"loss": 0.0446,
"step": 14500
},
{
"epoch": 70.87248322147651,
"grad_norm": 1.7626519203186035,
"learning_rate": 0.00010544262295081968,
"loss": 0.0465,
"step": 14520
},
{
"epoch": 70.97010372178157,
"grad_norm": 3.180558443069458,
"learning_rate": 0.00010531147540983609,
"loss": 0.0454,
"step": 14540
},
{
"epoch": 71.06772422208664,
"grad_norm": 1.3041974306106567,
"learning_rate": 0.00010518032786885246,
"loss": 0.0396,
"step": 14560
},
{
"epoch": 71.16534472239171,
"grad_norm": 1.648926854133606,
"learning_rate": 0.00010504918032786885,
"loss": 0.0364,
"step": 14580
},
{
"epoch": 71.26296522269676,
"grad_norm": 1.6585657596588135,
"learning_rate": 0.00010491803278688525,
"loss": 0.0365,
"step": 14600
},
{
"epoch": 71.36058572300183,
"grad_norm": 2.1018893718719482,
"learning_rate": 0.00010478688524590163,
"loss": 0.0373,
"step": 14620
},
{
"epoch": 71.45820622330689,
"grad_norm": 2.348642110824585,
"learning_rate": 0.00010465573770491803,
"loss": 0.0403,
"step": 14640
},
{
"epoch": 71.55582672361196,
"grad_norm": 1.8236652612686157,
"learning_rate": 0.00010452459016393444,
"loss": 0.0385,
"step": 14660
},
{
"epoch": 71.65344722391703,
"grad_norm": 1.6800057888031006,
"learning_rate": 0.00010439344262295083,
"loss": 0.0392,
"step": 14680
},
{
"epoch": 71.75106772422208,
"grad_norm": 2.6465699672698975,
"learning_rate": 0.00010426229508196721,
"loss": 0.0474,
"step": 14700
},
{
"epoch": 71.84868822452715,
"grad_norm": 2.384202003479004,
"learning_rate": 0.0001041311475409836,
"loss": 0.0463,
"step": 14720
},
{
"epoch": 71.94630872483222,
"grad_norm": 2.788309335708618,
"learning_rate": 0.00010400000000000001,
"loss": 0.0465,
"step": 14740
},
{
"epoch": 72.04392922513728,
"grad_norm": 2.2746474742889404,
"learning_rate": 0.00010386885245901641,
"loss": 0.0423,
"step": 14760
},
{
"epoch": 72.14154972544235,
"grad_norm": 2.319316864013672,
"learning_rate": 0.00010373770491803279,
"loss": 0.0352,
"step": 14780
},
{
"epoch": 72.2391702257474,
"grad_norm": 1.8821275234222412,
"learning_rate": 0.00010360655737704919,
"loss": 0.0344,
"step": 14800
},
{
"epoch": 72.33679072605247,
"grad_norm": 1.316780686378479,
"learning_rate": 0.00010347540983606558,
"loss": 0.0383,
"step": 14820
},
{
"epoch": 72.43441122635754,
"grad_norm": 1.91647207736969,
"learning_rate": 0.00010334426229508197,
"loss": 0.0375,
"step": 14840
},
{
"epoch": 72.5320317266626,
"grad_norm": 1.4434703588485718,
"learning_rate": 0.00010321311475409836,
"loss": 0.0352,
"step": 14860
},
{
"epoch": 72.62965222696766,
"grad_norm": 1.4991461038589478,
"learning_rate": 0.00010308196721311476,
"loss": 0.0393,
"step": 14880
},
{
"epoch": 72.72727272727273,
"grad_norm": 1.8177014589309692,
"learning_rate": 0.00010295081967213116,
"loss": 0.0424,
"step": 14900
},
{
"epoch": 72.82489322757779,
"grad_norm": 2.2760417461395264,
"learning_rate": 0.00010281967213114754,
"loss": 0.0424,
"step": 14920
},
{
"epoch": 72.92251372788286,
"grad_norm": 1.6984953880310059,
"learning_rate": 0.00010268852459016394,
"loss": 0.044,
"step": 14940
},
{
"epoch": 73.02013422818791,
"grad_norm": 1.3592875003814697,
"learning_rate": 0.00010255737704918033,
"loss": 0.0412,
"step": 14960
},
{
"epoch": 73.11775472849298,
"grad_norm": 1.0483120679855347,
"learning_rate": 0.00010242622950819673,
"loss": 0.0347,
"step": 14980
},
{
"epoch": 73.21537522879805,
"grad_norm": 0.837219774723053,
"learning_rate": 0.00010229508196721311,
"loss": 0.0333,
"step": 15000
},
{
"epoch": 73.31299572910311,
"grad_norm": 1.5951837301254272,
"learning_rate": 0.00010216393442622951,
"loss": 0.0405,
"step": 15020
},
{
"epoch": 73.41061622940818,
"grad_norm": 1.8197298049926758,
"learning_rate": 0.00010203278688524592,
"loss": 0.0402,
"step": 15040
},
{
"epoch": 73.50823672971325,
"grad_norm": 1.4663337469100952,
"learning_rate": 0.00010190163934426229,
"loss": 0.0399,
"step": 15060
},
{
"epoch": 73.6058572300183,
"grad_norm": 1.5924322605133057,
"learning_rate": 0.0001017704918032787,
"loss": 0.0431,
"step": 15080
},
{
"epoch": 73.70347773032337,
"grad_norm": 1.3720799684524536,
"learning_rate": 0.00010163934426229508,
"loss": 0.0396,
"step": 15100
},
{
"epoch": 73.80109823062843,
"grad_norm": 1.6636115312576294,
"learning_rate": 0.00010150819672131149,
"loss": 0.041,
"step": 15120
},
{
"epoch": 73.8987187309335,
"grad_norm": 1.7498186826705933,
"learning_rate": 0.00010137704918032786,
"loss": 0.0415,
"step": 15140
},
{
"epoch": 73.99633923123857,
"grad_norm": 1.410224199295044,
"learning_rate": 0.00010124590163934427,
"loss": 0.0432,
"step": 15160
},
{
"epoch": 74.09395973154362,
"grad_norm": 1.576262354850769,
"learning_rate": 0.00010111475409836067,
"loss": 0.0331,
"step": 15180
},
{
"epoch": 74.19158023184869,
"grad_norm": 0.8504081964492798,
"learning_rate": 0.00010098360655737706,
"loss": 0.0365,
"step": 15200
},
{
"epoch": 74.28920073215376,
"grad_norm": 1.2342356443405151,
"learning_rate": 0.00010085245901639345,
"loss": 0.0374,
"step": 15220
},
{
"epoch": 74.38682123245881,
"grad_norm": 1.6233398914337158,
"learning_rate": 0.00010072131147540984,
"loss": 0.0435,
"step": 15240
},
{
"epoch": 74.48444173276388,
"grad_norm": 3.0741922855377197,
"learning_rate": 0.00010059016393442624,
"loss": 0.0401,
"step": 15260
},
{
"epoch": 74.58206223306894,
"grad_norm": 1.7206604480743408,
"learning_rate": 0.00010045901639344261,
"loss": 0.0387,
"step": 15280
},
{
"epoch": 74.67968273337401,
"grad_norm": 1.7133204936981201,
"learning_rate": 0.00010032786885245902,
"loss": 0.0387,
"step": 15300
},
{
"epoch": 74.77730323367908,
"grad_norm": 3.1250414848327637,
"learning_rate": 0.00010019672131147542,
"loss": 0.04,
"step": 15320
},
{
"epoch": 74.87492373398413,
"grad_norm": 1.7084505558013916,
"learning_rate": 0.00010006557377049181,
"loss": 0.0379,
"step": 15340
},
{
"epoch": 74.9725442342892,
"grad_norm": 2.0808680057525635,
"learning_rate": 9.99344262295082e-05,
"loss": 0.0419,
"step": 15360
},
{
"epoch": 75.07016473459427,
"grad_norm": 1.148889422416687,
"learning_rate": 9.980327868852459e-05,
"loss": 0.0355,
"step": 15380
},
{
"epoch": 75.16778523489933,
"grad_norm": 1.292641520500183,
"learning_rate": 9.967213114754099e-05,
"loss": 0.0354,
"step": 15400
},
{
"epoch": 75.2654057352044,
"grad_norm": 2.2540032863616943,
"learning_rate": 9.954098360655738e-05,
"loss": 0.0384,
"step": 15420
},
{
"epoch": 75.36302623550945,
"grad_norm": 1.2150137424468994,
"learning_rate": 9.940983606557378e-05,
"loss": 0.0358,
"step": 15440
},
{
"epoch": 75.46064673581452,
"grad_norm": 1.647284984588623,
"learning_rate": 9.927868852459017e-05,
"loss": 0.0351,
"step": 15460
},
{
"epoch": 75.55826723611959,
"grad_norm": 2.1576521396636963,
"learning_rate": 9.914754098360656e-05,
"loss": 0.0425,
"step": 15480
},
{
"epoch": 75.65588773642465,
"grad_norm": 1.787636637687683,
"learning_rate": 9.901639344262295e-05,
"loss": 0.0384,
"step": 15500
},
{
"epoch": 75.75350823672972,
"grad_norm": 2.0450475215911865,
"learning_rate": 9.888524590163934e-05,
"loss": 0.0373,
"step": 15520
},
{
"epoch": 75.85112873703477,
"grad_norm": 1.457287073135376,
"learning_rate": 9.875409836065574e-05,
"loss": 0.0411,
"step": 15540
},
{
"epoch": 75.94874923733984,
"grad_norm": 2.2569003105163574,
"learning_rate": 9.862295081967213e-05,
"loss": 0.0401,
"step": 15560
},
{
"epoch": 76.04636973764491,
"grad_norm": 1.942240834236145,
"learning_rate": 9.849180327868854e-05,
"loss": 0.0381,
"step": 15580
},
{
"epoch": 76.14399023794996,
"grad_norm": 1.9063150882720947,
"learning_rate": 9.836065573770493e-05,
"loss": 0.0354,
"step": 15600
},
{
"epoch": 76.24161073825503,
"grad_norm": 1.0408899784088135,
"learning_rate": 9.822950819672132e-05,
"loss": 0.0338,
"step": 15620
},
{
"epoch": 76.3392312385601,
"grad_norm": 1.3950903415679932,
"learning_rate": 9.80983606557377e-05,
"loss": 0.0361,
"step": 15640
},
{
"epoch": 76.43685173886516,
"grad_norm": 1.3238831758499146,
"learning_rate": 9.796721311475411e-05,
"loss": 0.037,
"step": 15660
},
{
"epoch": 76.53447223917023,
"grad_norm": 1.7356709241867065,
"learning_rate": 9.78360655737705e-05,
"loss": 0.0388,
"step": 15680
},
{
"epoch": 76.63209273947528,
"grad_norm": 1.4678503274917603,
"learning_rate": 9.770491803278689e-05,
"loss": 0.0353,
"step": 15700
},
{
"epoch": 76.72971323978035,
"grad_norm": 1.3024065494537354,
"learning_rate": 9.757377049180329e-05,
"loss": 0.041,
"step": 15720
},
{
"epoch": 76.82733374008542,
"grad_norm": 1.2499933242797852,
"learning_rate": 9.744262295081968e-05,
"loss": 0.0341,
"step": 15740
},
{
"epoch": 76.92495424039048,
"grad_norm": 1.7338874340057373,
"learning_rate": 9.731147540983607e-05,
"loss": 0.0432,
"step": 15760
},
{
"epoch": 77.02257474069555,
"grad_norm": 2.667750120162964,
"learning_rate": 9.718032786885246e-05,
"loss": 0.0395,
"step": 15780
},
{
"epoch": 77.12019524100062,
"grad_norm": 1.2692776918411255,
"learning_rate": 9.704918032786886e-05,
"loss": 0.0328,
"step": 15800
},
{
"epoch": 77.21781574130567,
"grad_norm": 1.413238763809204,
"learning_rate": 9.691803278688525e-05,
"loss": 0.0336,
"step": 15820
},
{
"epoch": 77.31543624161074,
"grad_norm": 1.9772465229034424,
"learning_rate": 9.678688524590165e-05,
"loss": 0.0355,
"step": 15840
},
{
"epoch": 77.4130567419158,
"grad_norm": 1.7621837854385376,
"learning_rate": 9.665573770491804e-05,
"loss": 0.0376,
"step": 15860
},
{
"epoch": 77.51067724222086,
"grad_norm": 1.381027102470398,
"learning_rate": 9.652459016393443e-05,
"loss": 0.0423,
"step": 15880
},
{
"epoch": 77.60829774252593,
"grad_norm": 1.5768849849700928,
"learning_rate": 9.639344262295082e-05,
"loss": 0.0346,
"step": 15900
},
{
"epoch": 77.70591824283099,
"grad_norm": 1.5931782722473145,
"learning_rate": 9.626229508196721e-05,
"loss": 0.0344,
"step": 15920
},
{
"epoch": 77.80353874313606,
"grad_norm": 1.40415358543396,
"learning_rate": 9.613114754098361e-05,
"loss": 0.0377,
"step": 15940
},
{
"epoch": 77.90115924344113,
"grad_norm": 1.4353222846984863,
"learning_rate": 9.6e-05,
"loss": 0.0347,
"step": 15960
},
{
"epoch": 77.99877974374618,
"grad_norm": 1.4852492809295654,
"learning_rate": 9.58688524590164e-05,
"loss": 0.0386,
"step": 15980
},
{
"epoch": 78.09640024405125,
"grad_norm": 1.287718415260315,
"learning_rate": 9.57377049180328e-05,
"loss": 0.0325,
"step": 16000
},
{
"epoch": 78.19402074435631,
"grad_norm": 1.597639799118042,
"learning_rate": 9.560655737704918e-05,
"loss": 0.0304,
"step": 16020
},
{
"epoch": 78.29164124466138,
"grad_norm": 2.2354135513305664,
"learning_rate": 9.547540983606557e-05,
"loss": 0.0343,
"step": 16040
},
{
"epoch": 78.38926174496645,
"grad_norm": 1.701223373413086,
"learning_rate": 9.534426229508198e-05,
"loss": 0.035,
"step": 16060
},
{
"epoch": 78.4868822452715,
"grad_norm": 2.1920886039733887,
"learning_rate": 9.521311475409837e-05,
"loss": 0.0362,
"step": 16080
},
{
"epoch": 78.58450274557657,
"grad_norm": 1.5436536073684692,
"learning_rate": 9.508196721311476e-05,
"loss": 0.0388,
"step": 16100
},
{
"epoch": 78.68212324588164,
"grad_norm": 0.7256617546081543,
"learning_rate": 9.495081967213116e-05,
"loss": 0.0364,
"step": 16120
},
{
"epoch": 78.7797437461867,
"grad_norm": 1.3675026893615723,
"learning_rate": 9.481967213114755e-05,
"loss": 0.0383,
"step": 16140
},
{
"epoch": 78.87736424649177,
"grad_norm": 2.07330322265625,
"learning_rate": 9.468852459016394e-05,
"loss": 0.0392,
"step": 16160
},
{
"epoch": 78.97498474679682,
"grad_norm": 1.4074386358261108,
"learning_rate": 9.455737704918033e-05,
"loss": 0.0401,
"step": 16180
},
{
"epoch": 79.07260524710189,
"grad_norm": 1.5689586400985718,
"learning_rate": 9.442622950819673e-05,
"loss": 0.0333,
"step": 16200
},
{
"epoch": 79.17022574740696,
"grad_norm": 1.3256062269210815,
"learning_rate": 9.429508196721312e-05,
"loss": 0.0305,
"step": 16220
},
{
"epoch": 79.26784624771201,
"grad_norm": 1.71085524559021,
"learning_rate": 9.416393442622952e-05,
"loss": 0.0311,
"step": 16240
},
{
"epoch": 79.36546674801708,
"grad_norm": 1.7854918241500854,
"learning_rate": 9.403278688524591e-05,
"loss": 0.0334,
"step": 16260
},
{
"epoch": 79.46308724832215,
"grad_norm": 1.381110668182373,
"learning_rate": 9.39016393442623e-05,
"loss": 0.0355,
"step": 16280
},
{
"epoch": 79.56070774862721,
"grad_norm": 2.068474292755127,
"learning_rate": 9.377049180327869e-05,
"loss": 0.0358,
"step": 16300
},
{
"epoch": 79.65832824893228,
"grad_norm": 1.4812254905700684,
"learning_rate": 9.363934426229508e-05,
"loss": 0.0382,
"step": 16320
},
{
"epoch": 79.75594874923733,
"grad_norm": 2.683461904525757,
"learning_rate": 9.350819672131148e-05,
"loss": 0.0365,
"step": 16340
},
{
"epoch": 79.8535692495424,
"grad_norm": 1.9132243394851685,
"learning_rate": 9.337704918032787e-05,
"loss": 0.0388,
"step": 16360
},
{
"epoch": 79.95118974984747,
"grad_norm": 1.8553367853164673,
"learning_rate": 9.324590163934427e-05,
"loss": 0.0386,
"step": 16380
},
{
"epoch": 80.04881025015253,
"grad_norm": 0.8551103472709656,
"learning_rate": 9.311475409836066e-05,
"loss": 0.0334,
"step": 16400
},
{
"epoch": 80.1464307504576,
"grad_norm": 2.120316743850708,
"learning_rate": 9.298360655737705e-05,
"loss": 0.0324,
"step": 16420
},
{
"epoch": 80.24405125076267,
"grad_norm": 1.456176519393921,
"learning_rate": 9.285245901639344e-05,
"loss": 0.0341,
"step": 16440
},
{
"epoch": 80.34167175106772,
"grad_norm": 1.8826713562011719,
"learning_rate": 9.272131147540985e-05,
"loss": 0.034,
"step": 16460
},
{
"epoch": 80.43929225137279,
"grad_norm": 1.0214563608169556,
"learning_rate": 9.259016393442623e-05,
"loss": 0.0318,
"step": 16480
},
{
"epoch": 80.53691275167785,
"grad_norm": 1.6423603296279907,
"learning_rate": 9.245901639344264e-05,
"loss": 0.0344,
"step": 16500
},
{
"epoch": 80.63453325198292,
"grad_norm": 1.6966345310211182,
"learning_rate": 9.232786885245903e-05,
"loss": 0.0367,
"step": 16520
},
{
"epoch": 80.73215375228799,
"grad_norm": 1.1521140336990356,
"learning_rate": 9.21967213114754e-05,
"loss": 0.0364,
"step": 16540
},
{
"epoch": 80.82977425259304,
"grad_norm": 1.5094974040985107,
"learning_rate": 9.20655737704918e-05,
"loss": 0.0355,
"step": 16560
},
{
"epoch": 80.92739475289811,
"grad_norm": 1.4688690900802612,
"learning_rate": 9.19344262295082e-05,
"loss": 0.0328,
"step": 16580
},
{
"epoch": 81.02501525320318,
"grad_norm": 1.916788935661316,
"learning_rate": 9.18032786885246e-05,
"loss": 0.0353,
"step": 16600
},
{
"epoch": 81.12263575350823,
"grad_norm": 1.6212852001190186,
"learning_rate": 9.167213114754099e-05,
"loss": 0.031,
"step": 16620
},
{
"epoch": 81.2202562538133,
"grad_norm": 1.1060786247253418,
"learning_rate": 9.154098360655739e-05,
"loss": 0.0293,
"step": 16640
},
{
"epoch": 81.31787675411836,
"grad_norm": 1.4581480026245117,
"learning_rate": 9.140983606557378e-05,
"loss": 0.0349,
"step": 16660
},
{
"epoch": 81.41549725442343,
"grad_norm": 1.1661124229431152,
"learning_rate": 9.127868852459017e-05,
"loss": 0.0315,
"step": 16680
},
{
"epoch": 81.5131177547285,
"grad_norm": 1.6387383937835693,
"learning_rate": 9.114754098360656e-05,
"loss": 0.0341,
"step": 16700
},
{
"epoch": 81.61073825503355,
"grad_norm": 1.5041887760162354,
"learning_rate": 9.101639344262296e-05,
"loss": 0.0332,
"step": 16720
},
{
"epoch": 81.70835875533862,
"grad_norm": 1.3553998470306396,
"learning_rate": 9.088524590163935e-05,
"loss": 0.0358,
"step": 16740
},
{
"epoch": 81.80597925564369,
"grad_norm": 1.5116016864776611,
"learning_rate": 9.075409836065574e-05,
"loss": 0.0344,
"step": 16760
},
{
"epoch": 81.90359975594875,
"grad_norm": 1.9211640357971191,
"learning_rate": 9.062295081967214e-05,
"loss": 0.0371,
"step": 16780
},
{
"epoch": 82.00122025625382,
"grad_norm": 3.2523958683013916,
"learning_rate": 9.049180327868852e-05,
"loss": 0.0333,
"step": 16800
},
{
"epoch": 82.09884075655887,
"grad_norm": 1.5402885675430298,
"learning_rate": 9.036065573770492e-05,
"loss": 0.0314,
"step": 16820
},
{
"epoch": 82.19646125686394,
"grad_norm": 1.5037944316864014,
"learning_rate": 9.022950819672131e-05,
"loss": 0.029,
"step": 16840
},
{
"epoch": 82.29408175716901,
"grad_norm": 2.7046449184417725,
"learning_rate": 9.009836065573771e-05,
"loss": 0.031,
"step": 16860
},
{
"epoch": 82.39170225747407,
"grad_norm": 2.5004217624664307,
"learning_rate": 8.99672131147541e-05,
"loss": 0.0318,
"step": 16880
},
{
"epoch": 82.48932275777914,
"grad_norm": 2.3502180576324463,
"learning_rate": 8.98360655737705e-05,
"loss": 0.0335,
"step": 16900
},
{
"epoch": 82.5869432580842,
"grad_norm": 1.3338574171066284,
"learning_rate": 8.97049180327869e-05,
"loss": 0.0346,
"step": 16920
},
{
"epoch": 82.68456375838926,
"grad_norm": 1.4850441217422485,
"learning_rate": 8.957377049180328e-05,
"loss": 0.0355,
"step": 16940
},
{
"epoch": 82.78218425869433,
"grad_norm": 1.3196766376495361,
"learning_rate": 8.944262295081967e-05,
"loss": 0.0354,
"step": 16960
},
{
"epoch": 82.87980475899938,
"grad_norm": 1.2028127908706665,
"learning_rate": 8.931147540983606e-05,
"loss": 0.0377,
"step": 16980
},
{
"epoch": 82.97742525930445,
"grad_norm": 1.5491008758544922,
"learning_rate": 8.918032786885247e-05,
"loss": 0.0325,
"step": 17000
},
{
"epoch": 83.07504575960952,
"grad_norm": 2.5490734577178955,
"learning_rate": 8.904918032786886e-05,
"loss": 0.0289,
"step": 17020
},
{
"epoch": 83.17266625991458,
"grad_norm": 4.350809097290039,
"learning_rate": 8.891803278688526e-05,
"loss": 0.0296,
"step": 17040
},
{
"epoch": 83.27028676021965,
"grad_norm": 1.5075066089630127,
"learning_rate": 8.878688524590163e-05,
"loss": 0.0313,
"step": 17060
},
{
"epoch": 83.36790726052472,
"grad_norm": 1.4814287424087524,
"learning_rate": 8.865573770491804e-05,
"loss": 0.0269,
"step": 17080
},
{
"epoch": 83.46552776082977,
"grad_norm": 1.050759196281433,
"learning_rate": 8.852459016393443e-05,
"loss": 0.0326,
"step": 17100
},
{
"epoch": 83.56314826113484,
"grad_norm": 1.5208618640899658,
"learning_rate": 8.839344262295083e-05,
"loss": 0.0329,
"step": 17120
},
{
"epoch": 83.6607687614399,
"grad_norm": 1.6846823692321777,
"learning_rate": 8.826229508196722e-05,
"loss": 0.031,
"step": 17140
},
{
"epoch": 83.75838926174497,
"grad_norm": 1.9100298881530762,
"learning_rate": 8.813114754098362e-05,
"loss": 0.0364,
"step": 17160
},
{
"epoch": 83.85600976205004,
"grad_norm": 1.557652235031128,
"learning_rate": 8.800000000000001e-05,
"loss": 0.0367,
"step": 17180
},
{
"epoch": 83.95363026235509,
"grad_norm": 1.104952335357666,
"learning_rate": 8.786885245901639e-05,
"loss": 0.0355,
"step": 17200
},
{
"epoch": 84.05125076266016,
"grad_norm": 2.2244253158569336,
"learning_rate": 8.773770491803279e-05,
"loss": 0.0308,
"step": 17220
},
{
"epoch": 84.14887126296523,
"grad_norm": 2.745600938796997,
"learning_rate": 8.760655737704918e-05,
"loss": 0.0284,
"step": 17240
},
{
"epoch": 84.24649176327028,
"grad_norm": 1.7342115640640259,
"learning_rate": 8.747540983606558e-05,
"loss": 0.0305,
"step": 17260
},
{
"epoch": 84.34411226357535,
"grad_norm": 0.972453773021698,
"learning_rate": 8.734426229508197e-05,
"loss": 0.0313,
"step": 17280
},
{
"epoch": 84.44173276388041,
"grad_norm": 1.6197409629821777,
"learning_rate": 8.721311475409837e-05,
"loss": 0.0338,
"step": 17300
},
{
"epoch": 84.53935326418548,
"grad_norm": 1.2944082021713257,
"learning_rate": 8.708196721311475e-05,
"loss": 0.0365,
"step": 17320
},
{
"epoch": 84.63697376449055,
"grad_norm": 2.3329808712005615,
"learning_rate": 8.695081967213115e-05,
"loss": 0.0319,
"step": 17340
},
{
"epoch": 84.7345942647956,
"grad_norm": 2.8675897121429443,
"learning_rate": 8.681967213114754e-05,
"loss": 0.0325,
"step": 17360
},
{
"epoch": 84.83221476510067,
"grad_norm": 2.0623087882995605,
"learning_rate": 8.668852459016393e-05,
"loss": 0.0335,
"step": 17380
},
{
"epoch": 84.92983526540573,
"grad_norm": 1.3979312181472778,
"learning_rate": 8.655737704918033e-05,
"loss": 0.0332,
"step": 17400
},
{
"epoch": 85.0274557657108,
"grad_norm": 1.630370855331421,
"learning_rate": 8.642622950819672e-05,
"loss": 0.0328,
"step": 17420
},
{
"epoch": 85.12507626601587,
"grad_norm": 1.1962217092514038,
"learning_rate": 8.629508196721313e-05,
"loss": 0.0317,
"step": 17440
},
{
"epoch": 85.22269676632092,
"grad_norm": 1.3756200075149536,
"learning_rate": 8.61639344262295e-05,
"loss": 0.0247,
"step": 17460
},
{
"epoch": 85.32031726662599,
"grad_norm": 1.2209444046020508,
"learning_rate": 8.60327868852459e-05,
"loss": 0.0306,
"step": 17480
},
{
"epoch": 85.41793776693106,
"grad_norm": 2.080512046813965,
"learning_rate": 8.59016393442623e-05,
"loss": 0.029,
"step": 17500
},
{
"epoch": 85.51555826723612,
"grad_norm": 1.3376110792160034,
"learning_rate": 8.57704918032787e-05,
"loss": 0.0302,
"step": 17520
},
{
"epoch": 85.61317876754119,
"grad_norm": 1.5291906595230103,
"learning_rate": 8.563934426229509e-05,
"loss": 0.0342,
"step": 17540
},
{
"epoch": 85.71079926784624,
"grad_norm": 0.9164462089538574,
"learning_rate": 8.550819672131149e-05,
"loss": 0.0314,
"step": 17560
},
{
"epoch": 85.80841976815131,
"grad_norm": 1.3751301765441895,
"learning_rate": 8.537704918032787e-05,
"loss": 0.0342,
"step": 17580
},
{
"epoch": 85.90604026845638,
"grad_norm": 1.4068491458892822,
"learning_rate": 8.524590163934426e-05,
"loss": 0.0353,
"step": 17600
},
{
"epoch": 86.00366076876143,
"grad_norm": 1.103683590888977,
"learning_rate": 8.511475409836066e-05,
"loss": 0.031,
"step": 17620
},
{
"epoch": 86.1012812690665,
"grad_norm": 1.2004616260528564,
"learning_rate": 8.498360655737705e-05,
"loss": 0.0249,
"step": 17640
},
{
"epoch": 86.19890176937157,
"grad_norm": 1.8739843368530273,
"learning_rate": 8.485245901639345e-05,
"loss": 0.0271,
"step": 17660
},
{
"epoch": 86.29652226967663,
"grad_norm": 0.8995428085327148,
"learning_rate": 8.472131147540984e-05,
"loss": 0.0286,
"step": 17680
},
{
"epoch": 86.3941427699817,
"grad_norm": 2.4829764366149902,
"learning_rate": 8.459016393442624e-05,
"loss": 0.0291,
"step": 17700
},
{
"epoch": 86.49176327028675,
"grad_norm": 1.098775863647461,
"learning_rate": 8.445901639344262e-05,
"loss": 0.033,
"step": 17720
},
{
"epoch": 86.58938377059182,
"grad_norm": 1.3387798070907593,
"learning_rate": 8.432786885245902e-05,
"loss": 0.0297,
"step": 17740
},
{
"epoch": 86.68700427089689,
"grad_norm": 2.0622024536132812,
"learning_rate": 8.419672131147541e-05,
"loss": 0.034,
"step": 17760
},
{
"epoch": 86.78462477120195,
"grad_norm": 1.90251624584198,
"learning_rate": 8.406557377049181e-05,
"loss": 0.0314,
"step": 17780
},
{
"epoch": 86.88224527150702,
"grad_norm": 0.5546866059303284,
"learning_rate": 8.39344262295082e-05,
"loss": 0.0315,
"step": 17800
},
{
"epoch": 86.97986577181209,
"grad_norm": 1.1995351314544678,
"learning_rate": 8.380327868852459e-05,
"loss": 0.0357,
"step": 17820
},
{
"epoch": 87.07748627211714,
"grad_norm": 1.374808430671692,
"learning_rate": 8.367213114754098e-05,
"loss": 0.0274,
"step": 17840
},
{
"epoch": 87.17510677242221,
"grad_norm": 1.2104483842849731,
"learning_rate": 8.354098360655737e-05,
"loss": 0.0261,
"step": 17860
},
{
"epoch": 87.27272727272727,
"grad_norm": 1.2082188129425049,
"learning_rate": 8.340983606557377e-05,
"loss": 0.0327,
"step": 17880
},
{
"epoch": 87.37034777303234,
"grad_norm": 1.6042877435684204,
"learning_rate": 8.327868852459016e-05,
"loss": 0.0298,
"step": 17900
},
{
"epoch": 87.4679682733374,
"grad_norm": 0.9819115400314331,
"learning_rate": 8.314754098360657e-05,
"loss": 0.0289,
"step": 17920
},
{
"epoch": 87.56558877364246,
"grad_norm": 0.9918608665466309,
"learning_rate": 8.301639344262296e-05,
"loss": 0.0284,
"step": 17940
},
{
"epoch": 87.66320927394753,
"grad_norm": 0.5345699191093445,
"learning_rate": 8.288524590163935e-05,
"loss": 0.0304,
"step": 17960
},
{
"epoch": 87.7608297742526,
"grad_norm": 1.5290710926055908,
"learning_rate": 8.275409836065573e-05,
"loss": 0.0313,
"step": 17980
},
{
"epoch": 87.85845027455765,
"grad_norm": 2.701918363571167,
"learning_rate": 8.262295081967214e-05,
"loss": 0.0319,
"step": 18000
},
{
"epoch": 87.95607077486272,
"grad_norm": 1.9246459007263184,
"learning_rate": 8.249180327868853e-05,
"loss": 0.0332,
"step": 18020
},
{
"epoch": 88.05369127516778,
"grad_norm": 1.6299734115600586,
"learning_rate": 8.236065573770492e-05,
"loss": 0.0278,
"step": 18040
},
{
"epoch": 88.15131177547285,
"grad_norm": 1.8780626058578491,
"learning_rate": 8.222950819672132e-05,
"loss": 0.0303,
"step": 18060
},
{
"epoch": 88.24893227577792,
"grad_norm": 1.2770717144012451,
"learning_rate": 8.209836065573771e-05,
"loss": 0.0286,
"step": 18080
},
{
"epoch": 88.34655277608297,
"grad_norm": 2.3574554920196533,
"learning_rate": 8.19672131147541e-05,
"loss": 0.0245,
"step": 18100
},
{
"epoch": 88.44417327638804,
"grad_norm": 1.6058595180511475,
"learning_rate": 8.183606557377049e-05,
"loss": 0.0322,
"step": 18120
},
{
"epoch": 88.54179377669311,
"grad_norm": 2.50612211227417,
"learning_rate": 8.170491803278689e-05,
"loss": 0.0313,
"step": 18140
},
{
"epoch": 88.63941427699817,
"grad_norm": 1.8343908786773682,
"learning_rate": 8.157377049180328e-05,
"loss": 0.0314,
"step": 18160
},
{
"epoch": 88.73703477730324,
"grad_norm": 1.3419734239578247,
"learning_rate": 8.144262295081968e-05,
"loss": 0.0306,
"step": 18180
},
{
"epoch": 88.83465527760829,
"grad_norm": 0.9935535192489624,
"learning_rate": 8.131147540983607e-05,
"loss": 0.0319,
"step": 18200
},
{
"epoch": 88.93227577791336,
"grad_norm": 1.108636498451233,
"learning_rate": 8.118032786885246e-05,
"loss": 0.0316,
"step": 18220
},
{
"epoch": 89.02989627821843,
"grad_norm": 0.9274991154670715,
"learning_rate": 8.104918032786885e-05,
"loss": 0.0299,
"step": 18240
},
{
"epoch": 89.12751677852349,
"grad_norm": 2.000669002532959,
"learning_rate": 8.091803278688524e-05,
"loss": 0.0251,
"step": 18260
},
{
"epoch": 89.22513727882856,
"grad_norm": 0.8616065382957458,
"learning_rate": 8.078688524590164e-05,
"loss": 0.0283,
"step": 18280
},
{
"epoch": 89.32275777913362,
"grad_norm": 0.7986624836921692,
"learning_rate": 8.065573770491803e-05,
"loss": 0.0263,
"step": 18300
},
{
"epoch": 89.42037827943868,
"grad_norm": 1.0566186904907227,
"learning_rate": 8.052459016393444e-05,
"loss": 0.0282,
"step": 18320
},
{
"epoch": 89.51799877974375,
"grad_norm": 2.450927734375,
"learning_rate": 8.039344262295082e-05,
"loss": 0.0304,
"step": 18340
},
{
"epoch": 89.6156192800488,
"grad_norm": 1.190073847770691,
"learning_rate": 8.026229508196721e-05,
"loss": 0.0338,
"step": 18360
},
{
"epoch": 89.71323978035387,
"grad_norm": 0.7991436719894409,
"learning_rate": 8.01311475409836e-05,
"loss": 0.0279,
"step": 18380
},
{
"epoch": 89.81086028065894,
"grad_norm": 1.010593295097351,
"learning_rate": 8e-05,
"loss": 0.0321,
"step": 18400
},
{
"epoch": 89.908480780964,
"grad_norm": 1.585942029953003,
"learning_rate": 7.98688524590164e-05,
"loss": 0.0308,
"step": 18420
},
{
"epoch": 90.00610128126907,
"grad_norm": 0.8515540957450867,
"learning_rate": 7.97377049180328e-05,
"loss": 0.0336,
"step": 18440
},
{
"epoch": 90.10372178157414,
"grad_norm": 0.7114633917808533,
"learning_rate": 7.960655737704919e-05,
"loss": 0.0245,
"step": 18460
},
{
"epoch": 90.20134228187919,
"grad_norm": 1.962902545928955,
"learning_rate": 7.947540983606558e-05,
"loss": 0.0277,
"step": 18480
},
{
"epoch": 90.29896278218426,
"grad_norm": 1.3741369247436523,
"learning_rate": 7.934426229508197e-05,
"loss": 0.0266,
"step": 18500
},
{
"epoch": 90.39658328248932,
"grad_norm": 1.7575631141662598,
"learning_rate": 7.921311475409836e-05,
"loss": 0.0251,
"step": 18520
},
{
"epoch": 90.49420378279439,
"grad_norm": 1.7947065830230713,
"learning_rate": 7.908196721311476e-05,
"loss": 0.0268,
"step": 18540
},
{
"epoch": 90.59182428309946,
"grad_norm": 1.5639177560806274,
"learning_rate": 7.895081967213115e-05,
"loss": 0.0276,
"step": 18560
},
{
"epoch": 90.68944478340451,
"grad_norm": 1.936145544052124,
"learning_rate": 7.881967213114755e-05,
"loss": 0.0285,
"step": 18580
},
{
"epoch": 90.78706528370958,
"grad_norm": 1.1903581619262695,
"learning_rate": 7.868852459016394e-05,
"loss": 0.0295,
"step": 18600
},
{
"epoch": 90.88468578401465,
"grad_norm": 1.4474736452102661,
"learning_rate": 7.855737704918033e-05,
"loss": 0.0304,
"step": 18620
},
{
"epoch": 90.9823062843197,
"grad_norm": 1.0843713283538818,
"learning_rate": 7.842622950819672e-05,
"loss": 0.0326,
"step": 18640
},
{
"epoch": 91.07992678462477,
"grad_norm": 1.3280309438705444,
"learning_rate": 7.829508196721311e-05,
"loss": 0.0251,
"step": 18660
},
{
"epoch": 91.17754728492983,
"grad_norm": 1.297050952911377,
"learning_rate": 7.816393442622951e-05,
"loss": 0.0223,
"step": 18680
},
{
"epoch": 91.2751677852349,
"grad_norm": 0.6845425367355347,
"learning_rate": 7.80327868852459e-05,
"loss": 0.0248,
"step": 18700
},
{
"epoch": 91.37278828553997,
"grad_norm": 0.9885107278823853,
"learning_rate": 7.79016393442623e-05,
"loss": 0.0272,
"step": 18720
},
{
"epoch": 91.47040878584502,
"grad_norm": 1.4001471996307373,
"learning_rate": 7.77704918032787e-05,
"loss": 0.0279,
"step": 18740
},
{
"epoch": 91.5680292861501,
"grad_norm": 1.0209612846374512,
"learning_rate": 7.763934426229508e-05,
"loss": 0.0282,
"step": 18760
},
{
"epoch": 91.66564978645516,
"grad_norm": 0.8737853765487671,
"learning_rate": 7.750819672131147e-05,
"loss": 0.0278,
"step": 18780
},
{
"epoch": 91.76327028676022,
"grad_norm": 2.5949928760528564,
"learning_rate": 7.737704918032788e-05,
"loss": 0.031,
"step": 18800
},
{
"epoch": 91.86089078706529,
"grad_norm": 1.274043083190918,
"learning_rate": 7.724590163934426e-05,
"loss": 0.03,
"step": 18820
},
{
"epoch": 91.95851128737034,
"grad_norm": 1.286889672279358,
"learning_rate": 7.711475409836067e-05,
"loss": 0.0322,
"step": 18840
},
{
"epoch": 92.05613178767541,
"grad_norm": 1.144182801246643,
"learning_rate": 7.698360655737706e-05,
"loss": 0.0285,
"step": 18860
},
{
"epoch": 92.15375228798048,
"grad_norm": 0.7334594130516052,
"learning_rate": 7.685245901639345e-05,
"loss": 0.0239,
"step": 18880
},
{
"epoch": 92.25137278828554,
"grad_norm": 0.7854740619659424,
"learning_rate": 7.672131147540984e-05,
"loss": 0.0246,
"step": 18900
},
{
"epoch": 92.3489932885906,
"grad_norm": 1.5289876461029053,
"learning_rate": 7.659016393442622e-05,
"loss": 0.0287,
"step": 18920
},
{
"epoch": 92.44661378889568,
"grad_norm": 1.1184567213058472,
"learning_rate": 7.645901639344263e-05,
"loss": 0.0255,
"step": 18940
},
{
"epoch": 92.54423428920073,
"grad_norm": 1.2199037075042725,
"learning_rate": 7.632786885245902e-05,
"loss": 0.0296,
"step": 18960
},
{
"epoch": 92.6418547895058,
"grad_norm": 1.8938370943069458,
"learning_rate": 7.619672131147542e-05,
"loss": 0.0269,
"step": 18980
},
{
"epoch": 92.73947528981085,
"grad_norm": 1.4412243366241455,
"learning_rate": 7.606557377049181e-05,
"loss": 0.0288,
"step": 19000
},
{
"epoch": 92.83709579011592,
"grad_norm": 1.6484626531600952,
"learning_rate": 7.59344262295082e-05,
"loss": 0.0285,
"step": 19020
},
{
"epoch": 92.934716290421,
"grad_norm": 1.5925848484039307,
"learning_rate": 7.580327868852459e-05,
"loss": 0.0308,
"step": 19040
},
{
"epoch": 93.03233679072605,
"grad_norm": 3.0019991397857666,
"learning_rate": 7.567213114754099e-05,
"loss": 0.0326,
"step": 19060
},
{
"epoch": 93.12995729103112,
"grad_norm": 1.6370418071746826,
"learning_rate": 7.554098360655738e-05,
"loss": 0.0242,
"step": 19080
},
{
"epoch": 93.22757779133617,
"grad_norm": 1.029890537261963,
"learning_rate": 7.540983606557377e-05,
"loss": 0.0268,
"step": 19100
},
{
"epoch": 93.32519829164124,
"grad_norm": 0.983168363571167,
"learning_rate": 7.527868852459017e-05,
"loss": 0.025,
"step": 19120
},
{
"epoch": 93.42281879194631,
"grad_norm": 0.7974419593811035,
"learning_rate": 7.514754098360656e-05,
"loss": 0.0278,
"step": 19140
},
{
"epoch": 93.52043929225137,
"grad_norm": 1.0815564393997192,
"learning_rate": 7.501639344262295e-05,
"loss": 0.0256,
"step": 19160
},
{
"epoch": 93.61805979255644,
"grad_norm": 1.217862844467163,
"learning_rate": 7.488524590163934e-05,
"loss": 0.0278,
"step": 19180
},
{
"epoch": 93.7156802928615,
"grad_norm": 1.0961949825286865,
"learning_rate": 7.475409836065574e-05,
"loss": 0.0259,
"step": 19200
},
{
"epoch": 93.81330079316656,
"grad_norm": 0.7110977172851562,
"learning_rate": 7.462295081967213e-05,
"loss": 0.0282,
"step": 19220
},
{
"epoch": 93.91092129347163,
"grad_norm": 1.6820802688598633,
"learning_rate": 7.449180327868854e-05,
"loss": 0.0277,
"step": 19240
},
{
"epoch": 94.00854179377669,
"grad_norm": 1.1288400888442993,
"learning_rate": 7.436065573770493e-05,
"loss": 0.0288,
"step": 19260
},
{
"epoch": 94.10616229408176,
"grad_norm": 1.01132071018219,
"learning_rate": 7.422950819672131e-05,
"loss": 0.0224,
"step": 19280
},
{
"epoch": 94.20378279438683,
"grad_norm": 0.8945777416229248,
"learning_rate": 7.40983606557377e-05,
"loss": 0.0254,
"step": 19300
},
{
"epoch": 94.30140329469188,
"grad_norm": 0.9037290811538696,
"learning_rate": 7.39672131147541e-05,
"loss": 0.026,
"step": 19320
},
{
"epoch": 94.39902379499695,
"grad_norm": 1.0359045267105103,
"learning_rate": 7.38360655737705e-05,
"loss": 0.0236,
"step": 19340
},
{
"epoch": 94.49664429530202,
"grad_norm": 1.0174087285995483,
"learning_rate": 7.370491803278689e-05,
"loss": 0.0259,
"step": 19360
},
{
"epoch": 94.59426479560707,
"grad_norm": 1.7219215631484985,
"learning_rate": 7.357377049180329e-05,
"loss": 0.0256,
"step": 19380
},
{
"epoch": 94.69188529591214,
"grad_norm": 1.2760013341903687,
"learning_rate": 7.344262295081968e-05,
"loss": 0.0268,
"step": 19400
},
{
"epoch": 94.7895057962172,
"grad_norm": 1.4698799848556519,
"learning_rate": 7.331147540983607e-05,
"loss": 0.0301,
"step": 19420
},
{
"epoch": 94.88712629652227,
"grad_norm": 1.4160206317901611,
"learning_rate": 7.318032786885246e-05,
"loss": 0.0288,
"step": 19440
},
{
"epoch": 94.98474679682734,
"grad_norm": 1.3946982622146606,
"learning_rate": 7.304918032786886e-05,
"loss": 0.0295,
"step": 19460
},
{
"epoch": 95.0823672971324,
"grad_norm": 1.3507988452911377,
"learning_rate": 7.291803278688525e-05,
"loss": 0.0216,
"step": 19480
},
{
"epoch": 95.17998779743746,
"grad_norm": 1.090182900428772,
"learning_rate": 7.278688524590165e-05,
"loss": 0.0246,
"step": 19500
},
{
"epoch": 95.27760829774253,
"grad_norm": 0.942014217376709,
"learning_rate": 7.265573770491804e-05,
"loss": 0.0257,
"step": 19520
},
{
"epoch": 95.37522879804759,
"grad_norm": 1.1440125703811646,
"learning_rate": 7.252459016393443e-05,
"loss": 0.0247,
"step": 19540
},
{
"epoch": 95.47284929835266,
"grad_norm": 1.1401231288909912,
"learning_rate": 7.239344262295082e-05,
"loss": 0.0249,
"step": 19560
},
{
"epoch": 95.57046979865771,
"grad_norm": 1.6751883029937744,
"learning_rate": 7.226229508196721e-05,
"loss": 0.0302,
"step": 19580
},
{
"epoch": 95.66809029896278,
"grad_norm": 0.9582850337028503,
"learning_rate": 7.213114754098361e-05,
"loss": 0.0252,
"step": 19600
},
{
"epoch": 95.76571079926785,
"grad_norm": 0.8516545295715332,
"learning_rate": 7.2e-05,
"loss": 0.0246,
"step": 19620
},
{
"epoch": 95.8633312995729,
"grad_norm": 1.0861225128173828,
"learning_rate": 7.18688524590164e-05,
"loss": 0.0288,
"step": 19640
},
{
"epoch": 95.96095179987798,
"grad_norm": 1.4560004472732544,
"learning_rate": 7.17377049180328e-05,
"loss": 0.0297,
"step": 19660
},
{
"epoch": 96.05857230018304,
"grad_norm": 1.5447992086410522,
"learning_rate": 7.160655737704918e-05,
"loss": 0.024,
"step": 19680
},
{
"epoch": 96.1561928004881,
"grad_norm": 1.0061029195785522,
"learning_rate": 7.147540983606557e-05,
"loss": 0.0231,
"step": 19700
},
{
"epoch": 96.25381330079317,
"grad_norm": 0.918874979019165,
"learning_rate": 7.134426229508198e-05,
"loss": 0.0213,
"step": 19720
},
{
"epoch": 96.35143380109822,
"grad_norm": 1.3430179357528687,
"learning_rate": 7.121311475409837e-05,
"loss": 0.0278,
"step": 19740
},
{
"epoch": 96.4490543014033,
"grad_norm": 1.0082734823226929,
"learning_rate": 7.108196721311475e-05,
"loss": 0.0267,
"step": 19760
},
{
"epoch": 96.54667480170836,
"grad_norm": 1.480941653251648,
"learning_rate": 7.095081967213116e-05,
"loss": 0.0261,
"step": 19780
},
{
"epoch": 96.64429530201342,
"grad_norm": 1.3514058589935303,
"learning_rate": 7.081967213114755e-05,
"loss": 0.0243,
"step": 19800
},
{
"epoch": 96.74191580231849,
"grad_norm": 1.8436918258666992,
"learning_rate": 7.068852459016394e-05,
"loss": 0.0233,
"step": 19820
},
{
"epoch": 96.83953630262356,
"grad_norm": 0.7598877549171448,
"learning_rate": 7.055737704918033e-05,
"loss": 0.0273,
"step": 19840
},
{
"epoch": 96.93715680292861,
"grad_norm": 1.1681586503982544,
"learning_rate": 7.042622950819673e-05,
"loss": 0.0272,
"step": 19860
},
{
"epoch": 97.03477730323368,
"grad_norm": 2.10929012298584,
"learning_rate": 7.029508196721312e-05,
"loss": 0.0249,
"step": 19880
},
{
"epoch": 97.13239780353874,
"grad_norm": 1.3854628801345825,
"learning_rate": 7.016393442622952e-05,
"loss": 0.0222,
"step": 19900
},
{
"epoch": 97.2300183038438,
"grad_norm": 0.7279977798461914,
"learning_rate": 7.003278688524591e-05,
"loss": 0.0234,
"step": 19920
},
{
"epoch": 97.32763880414888,
"grad_norm": 0.9051561951637268,
"learning_rate": 6.99016393442623e-05,
"loss": 0.0241,
"step": 19940
},
{
"epoch": 97.42525930445393,
"grad_norm": 0.7423291802406311,
"learning_rate": 6.977049180327869e-05,
"loss": 0.0234,
"step": 19960
},
{
"epoch": 97.522879804759,
"grad_norm": 1.4373456239700317,
"learning_rate": 6.963934426229508e-05,
"loss": 0.0253,
"step": 19980
},
{
"epoch": 97.62050030506407,
"grad_norm": 0.6892008781433105,
"learning_rate": 6.950819672131148e-05,
"loss": 0.0247,
"step": 20000
},
{
"epoch": 97.71812080536913,
"grad_norm": 1.0047869682312012,
"learning_rate": 6.937704918032787e-05,
"loss": 0.0249,
"step": 20020
},
{
"epoch": 97.8157413056742,
"grad_norm": 1.460539698600769,
"learning_rate": 6.924590163934427e-05,
"loss": 0.0276,
"step": 20040
},
{
"epoch": 97.91336180597925,
"grad_norm": 0.9892900586128235,
"learning_rate": 6.911475409836066e-05,
"loss": 0.0274,
"step": 20060
},
{
"epoch": 98.01098230628432,
"grad_norm": 1.0830744504928589,
"learning_rate": 6.898360655737705e-05,
"loss": 0.0264,
"step": 20080
},
{
"epoch": 98.10860280658939,
"grad_norm": 1.9523261785507202,
"learning_rate": 6.885245901639344e-05,
"loss": 0.0239,
"step": 20100
},
{
"epoch": 98.20622330689444,
"grad_norm": 1.0463730096817017,
"learning_rate": 6.872131147540984e-05,
"loss": 0.0246,
"step": 20120
},
{
"epoch": 98.30384380719951,
"grad_norm": 0.9709805250167847,
"learning_rate": 6.859016393442623e-05,
"loss": 0.0219,
"step": 20140
},
{
"epoch": 98.40146430750458,
"grad_norm": 1.2519688606262207,
"learning_rate": 6.845901639344262e-05,
"loss": 0.0265,
"step": 20160
},
{
"epoch": 98.49908480780964,
"grad_norm": 0.8213618993759155,
"learning_rate": 6.832786885245903e-05,
"loss": 0.0233,
"step": 20180
},
{
"epoch": 98.59670530811471,
"grad_norm": 1.1715772151947021,
"learning_rate": 6.819672131147542e-05,
"loss": 0.0218,
"step": 20200
},
{
"epoch": 98.69432580841976,
"grad_norm": 0.843437671661377,
"learning_rate": 6.80655737704918e-05,
"loss": 0.0272,
"step": 20220
},
{
"epoch": 98.79194630872483,
"grad_norm": 1.089414358139038,
"learning_rate": 6.79344262295082e-05,
"loss": 0.0277,
"step": 20240
},
{
"epoch": 98.8895668090299,
"grad_norm": 1.1498339176177979,
"learning_rate": 6.78032786885246e-05,
"loss": 0.0262,
"step": 20260
},
{
"epoch": 98.98718730933496,
"grad_norm": 0.7882099747657776,
"learning_rate": 6.767213114754099e-05,
"loss": 0.0261,
"step": 20280
},
{
"epoch": 99.08480780964003,
"grad_norm": 2.353572368621826,
"learning_rate": 6.754098360655739e-05,
"loss": 0.0232,
"step": 20300
},
{
"epoch": 99.1824283099451,
"grad_norm": 1.716091513633728,
"learning_rate": 6.740983606557378e-05,
"loss": 0.0232,
"step": 20320
},
{
"epoch": 99.28004881025015,
"grad_norm": 1.1512521505355835,
"learning_rate": 6.727868852459017e-05,
"loss": 0.0216,
"step": 20340
},
{
"epoch": 99.37766931055522,
"grad_norm": 1.532551884651184,
"learning_rate": 6.714754098360656e-05,
"loss": 0.0229,
"step": 20360
},
{
"epoch": 99.47528981086027,
"grad_norm": 1.1673088073730469,
"learning_rate": 6.701639344262295e-05,
"loss": 0.0225,
"step": 20380
},
{
"epoch": 99.57291031116534,
"grad_norm": 1.0088196992874146,
"learning_rate": 6.688524590163935e-05,
"loss": 0.0265,
"step": 20400
},
{
"epoch": 99.67053081147041,
"grad_norm": 1.236024260520935,
"learning_rate": 6.675409836065574e-05,
"loss": 0.0245,
"step": 20420
},
{
"epoch": 99.76815131177547,
"grad_norm": 1.9302829504013062,
"learning_rate": 6.662295081967214e-05,
"loss": 0.0243,
"step": 20440
},
{
"epoch": 99.86577181208054,
"grad_norm": 0.8187095522880554,
"learning_rate": 6.649180327868853e-05,
"loss": 0.0241,
"step": 20460
},
{
"epoch": 99.96339231238561,
"grad_norm": 1.3263179063796997,
"learning_rate": 6.636065573770492e-05,
"loss": 0.0274,
"step": 20480
},
{
"epoch": 100.06101281269066,
"grad_norm": 1.1137028932571411,
"learning_rate": 6.622950819672131e-05,
"loss": 0.0221,
"step": 20500
},
{
"epoch": 100.15863331299573,
"grad_norm": 1.171851396560669,
"learning_rate": 6.609836065573771e-05,
"loss": 0.0193,
"step": 20520
},
{
"epoch": 100.25625381330079,
"grad_norm": 1.50115966796875,
"learning_rate": 6.59672131147541e-05,
"loss": 0.0217,
"step": 20540
},
{
"epoch": 100.35387431360586,
"grad_norm": 0.9678937792778015,
"learning_rate": 6.58360655737705e-05,
"loss": 0.0241,
"step": 20560
},
{
"epoch": 100.45149481391093,
"grad_norm": 1.0585274696350098,
"learning_rate": 6.57049180327869e-05,
"loss": 0.0257,
"step": 20580
},
{
"epoch": 100.54911531421598,
"grad_norm": 0.9907383322715759,
"learning_rate": 6.557377049180327e-05,
"loss": 0.0238,
"step": 20600
},
{
"epoch": 100.64673581452105,
"grad_norm": 1.7532027959823608,
"learning_rate": 6.544262295081967e-05,
"loss": 0.0245,
"step": 20620
},
{
"epoch": 100.74435631482612,
"grad_norm": 1.7263871431350708,
"learning_rate": 6.531147540983606e-05,
"loss": 0.0274,
"step": 20640
},
{
"epoch": 100.84197681513118,
"grad_norm": 0.698143482208252,
"learning_rate": 6.518032786885247e-05,
"loss": 0.0247,
"step": 20660
},
{
"epoch": 100.93959731543625,
"grad_norm": 0.9223474860191345,
"learning_rate": 6.504918032786886e-05,
"loss": 0.0236,
"step": 20680
},
{
"epoch": 101.0372178157413,
"grad_norm": 2.3332505226135254,
"learning_rate": 6.491803278688526e-05,
"loss": 0.0278,
"step": 20700
},
{
"epoch": 101.13483831604637,
"grad_norm": 1.0863476991653442,
"learning_rate": 6.478688524590165e-05,
"loss": 0.0218,
"step": 20720
},
{
"epoch": 101.23245881635144,
"grad_norm": 1.3872435092926025,
"learning_rate": 6.465573770491804e-05,
"loss": 0.0213,
"step": 20740
},
{
"epoch": 101.3300793166565,
"grad_norm": 1.7096153497695923,
"learning_rate": 6.452459016393443e-05,
"loss": 0.0224,
"step": 20760
},
{
"epoch": 101.42769981696156,
"grad_norm": 1.117743968963623,
"learning_rate": 6.439344262295083e-05,
"loss": 0.022,
"step": 20780
},
{
"epoch": 101.52532031726662,
"grad_norm": 0.7065951824188232,
"learning_rate": 6.426229508196722e-05,
"loss": 0.023,
"step": 20800
},
{
"epoch": 101.62294081757169,
"grad_norm": 1.945495843887329,
"learning_rate": 6.413114754098361e-05,
"loss": 0.024,
"step": 20820
},
{
"epoch": 101.72056131787676,
"grad_norm": 2.141911268234253,
"learning_rate": 6.400000000000001e-05,
"loss": 0.0243,
"step": 20840
},
{
"epoch": 101.81818181818181,
"grad_norm": 1.0459177494049072,
"learning_rate": 6.386885245901639e-05,
"loss": 0.0261,
"step": 20860
},
{
"epoch": 101.91580231848688,
"grad_norm": 1.3487838506698608,
"learning_rate": 6.373770491803279e-05,
"loss": 0.0235,
"step": 20880
},
{
"epoch": 102.01342281879195,
"grad_norm": 0.9669828414916992,
"learning_rate": 6.360655737704918e-05,
"loss": 0.0262,
"step": 20900
},
{
"epoch": 102.111043319097,
"grad_norm": 0.8470537662506104,
"learning_rate": 6.347540983606558e-05,
"loss": 0.0209,
"step": 20920
},
{
"epoch": 102.20866381940208,
"grad_norm": 0.879011332988739,
"learning_rate": 6.334426229508197e-05,
"loss": 0.0203,
"step": 20940
},
{
"epoch": 102.30628431970713,
"grad_norm": 1.5658769607543945,
"learning_rate": 6.321311475409837e-05,
"loss": 0.0215,
"step": 20960
},
{
"epoch": 102.4039048200122,
"grad_norm": 0.6140362620353699,
"learning_rate": 6.308196721311475e-05,
"loss": 0.0219,
"step": 20980
},
{
"epoch": 102.50152532031727,
"grad_norm": 1.0868732929229736,
"learning_rate": 6.295081967213115e-05,
"loss": 0.0224,
"step": 21000
},
{
"epoch": 102.59914582062233,
"grad_norm": 1.1535019874572754,
"learning_rate": 6.281967213114754e-05,
"loss": 0.0274,
"step": 21020
},
{
"epoch": 102.6967663209274,
"grad_norm": 1.6501412391662598,
"learning_rate": 6.268852459016393e-05,
"loss": 0.0239,
"step": 21040
},
{
"epoch": 102.79438682123246,
"grad_norm": 1.5148719549179077,
"learning_rate": 6.255737704918033e-05,
"loss": 0.0213,
"step": 21060
},
{
"epoch": 102.89200732153752,
"grad_norm": 0.9506848454475403,
"learning_rate": 6.242622950819672e-05,
"loss": 0.0255,
"step": 21080
},
{
"epoch": 102.98962782184259,
"grad_norm": 1.6493301391601562,
"learning_rate": 6.229508196721313e-05,
"loss": 0.0271,
"step": 21100
},
{
"epoch": 103.08724832214764,
"grad_norm": 1.0388059616088867,
"learning_rate": 6.21639344262295e-05,
"loss": 0.0196,
"step": 21120
},
{
"epoch": 103.18486882245271,
"grad_norm": 0.6513581871986389,
"learning_rate": 6.20327868852459e-05,
"loss": 0.0204,
"step": 21140
},
{
"epoch": 103.28248932275778,
"grad_norm": 0.791163980960846,
"learning_rate": 6.19016393442623e-05,
"loss": 0.0221,
"step": 21160
},
{
"epoch": 103.38010982306284,
"grad_norm": 1.2468329668045044,
"learning_rate": 6.17704918032787e-05,
"loss": 0.0198,
"step": 21180
},
{
"epoch": 103.47773032336791,
"grad_norm": 0.9693405032157898,
"learning_rate": 6.163934426229509e-05,
"loss": 0.0239,
"step": 21200
},
{
"epoch": 103.57535082367298,
"grad_norm": 0.8478600382804871,
"learning_rate": 6.150819672131148e-05,
"loss": 0.0236,
"step": 21220
},
{
"epoch": 103.67297132397803,
"grad_norm": 1.0516799688339233,
"learning_rate": 6.137704918032787e-05,
"loss": 0.0236,
"step": 21240
},
{
"epoch": 103.7705918242831,
"grad_norm": 1.1466089487075806,
"learning_rate": 6.124590163934426e-05,
"loss": 0.0232,
"step": 21260
},
{
"epoch": 103.86821232458816,
"grad_norm": 2.5929698944091797,
"learning_rate": 6.111475409836066e-05,
"loss": 0.0213,
"step": 21280
},
{
"epoch": 103.96583282489323,
"grad_norm": 0.7544173002243042,
"learning_rate": 6.098360655737705e-05,
"loss": 0.0242,
"step": 21300
},
{
"epoch": 104.0634533251983,
"grad_norm": 1.013609766960144,
"learning_rate": 6.085245901639345e-05,
"loss": 0.0217,
"step": 21320
},
{
"epoch": 104.16107382550335,
"grad_norm": 1.933367133140564,
"learning_rate": 6.072131147540984e-05,
"loss": 0.0205,
"step": 21340
},
{
"epoch": 104.25869432580842,
"grad_norm": 0.9371503591537476,
"learning_rate": 6.0590163934426236e-05,
"loss": 0.0224,
"step": 21360
},
{
"epoch": 104.35631482611349,
"grad_norm": 0.8268026113510132,
"learning_rate": 6.0459016393442625e-05,
"loss": 0.0217,
"step": 21380
},
{
"epoch": 104.45393532641855,
"grad_norm": 2.129154682159424,
"learning_rate": 6.032786885245902e-05,
"loss": 0.019,
"step": 21400
},
{
"epoch": 104.55155582672361,
"grad_norm": 0.7515645623207092,
"learning_rate": 6.019672131147541e-05,
"loss": 0.0236,
"step": 21420
},
{
"epoch": 104.64917632702867,
"grad_norm": 0.7831740975379944,
"learning_rate": 6.00655737704918e-05,
"loss": 0.0205,
"step": 21440
},
{
"epoch": 104.74679682733374,
"grad_norm": 1.9743825197219849,
"learning_rate": 5.99344262295082e-05,
"loss": 0.0232,
"step": 21460
},
{
"epoch": 104.84441732763881,
"grad_norm": 1.7434648275375366,
"learning_rate": 5.9803278688524586e-05,
"loss": 0.0247,
"step": 21480
},
{
"epoch": 104.94203782794386,
"grad_norm": 1.324744701385498,
"learning_rate": 5.967213114754099e-05,
"loss": 0.0248,
"step": 21500
},
{
"epoch": 105.03965832824893,
"grad_norm": 0.7877782583236694,
"learning_rate": 5.954098360655738e-05,
"loss": 0.0218,
"step": 21520
},
{
"epoch": 105.137278828554,
"grad_norm": 1.0976682901382446,
"learning_rate": 5.9409836065573774e-05,
"loss": 0.0212,
"step": 21540
},
{
"epoch": 105.23489932885906,
"grad_norm": 1.2036750316619873,
"learning_rate": 5.927868852459016e-05,
"loss": 0.0208,
"step": 21560
},
{
"epoch": 105.33251982916413,
"grad_norm": 1.7270026206970215,
"learning_rate": 5.9147540983606566e-05,
"loss": 0.0211,
"step": 21580
},
{
"epoch": 105.43014032946918,
"grad_norm": 2.558453321456909,
"learning_rate": 5.9016393442622956e-05,
"loss": 0.0236,
"step": 21600
},
{
"epoch": 105.52776082977425,
"grad_norm": 1.4443696737289429,
"learning_rate": 5.888524590163935e-05,
"loss": 0.0228,
"step": 21620
},
{
"epoch": 105.62538133007932,
"grad_norm": 0.9000318050384521,
"learning_rate": 5.875409836065574e-05,
"loss": 0.0223,
"step": 21640
},
{
"epoch": 105.72300183038438,
"grad_norm": 1.0460553169250488,
"learning_rate": 5.862295081967213e-05,
"loss": 0.0235,
"step": 21660
},
{
"epoch": 105.82062233068945,
"grad_norm": 1.0731499195098877,
"learning_rate": 5.849180327868853e-05,
"loss": 0.0249,
"step": 21680
},
{
"epoch": 105.91824283099452,
"grad_norm": 2.9348509311676025,
"learning_rate": 5.8360655737704916e-05,
"loss": 0.0233,
"step": 21700
},
{
"epoch": 106.01586333129957,
"grad_norm": 0.761085033416748,
"learning_rate": 5.822950819672132e-05,
"loss": 0.0252,
"step": 21720
},
{
"epoch": 106.11348383160464,
"grad_norm": 0.7206617593765259,
"learning_rate": 5.80983606557377e-05,
"loss": 0.021,
"step": 21740
},
{
"epoch": 106.2111043319097,
"grad_norm": 0.8830762505531311,
"learning_rate": 5.7967213114754104e-05,
"loss": 0.0222,
"step": 21760
},
{
"epoch": 106.30872483221476,
"grad_norm": 1.1715725660324097,
"learning_rate": 5.7836065573770494e-05,
"loss": 0.0211,
"step": 21780
},
{
"epoch": 106.40634533251983,
"grad_norm": 0.6775236129760742,
"learning_rate": 5.770491803278689e-05,
"loss": 0.0207,
"step": 21800
},
{
"epoch": 106.50396583282489,
"grad_norm": 0.787869930267334,
"learning_rate": 5.757377049180328e-05,
"loss": 0.0201,
"step": 21820
},
{
"epoch": 106.60158633312996,
"grad_norm": 0.880957305431366,
"learning_rate": 5.744262295081968e-05,
"loss": 0.0227,
"step": 21840
},
{
"epoch": 106.69920683343503,
"grad_norm": 1.0342841148376465,
"learning_rate": 5.731147540983607e-05,
"loss": 0.0204,
"step": 21860
},
{
"epoch": 106.79682733374008,
"grad_norm": 1.2203606367111206,
"learning_rate": 5.7180327868852454e-05,
"loss": 0.0217,
"step": 21880
},
{
"epoch": 106.89444783404515,
"grad_norm": 1.1879390478134155,
"learning_rate": 5.704918032786886e-05,
"loss": 0.0229,
"step": 21900
},
{
"epoch": 106.99206833435021,
"grad_norm": 1.9363093376159668,
"learning_rate": 5.6918032786885246e-05,
"loss": 0.0227,
"step": 21920
},
{
"epoch": 107.08968883465528,
"grad_norm": 1.5606794357299805,
"learning_rate": 5.678688524590164e-05,
"loss": 0.0196,
"step": 21940
},
{
"epoch": 107.18730933496035,
"grad_norm": 1.0373190641403198,
"learning_rate": 5.665573770491803e-05,
"loss": 0.0218,
"step": 21960
},
{
"epoch": 107.2849298352654,
"grad_norm": 0.7485927939414978,
"learning_rate": 5.6524590163934435e-05,
"loss": 0.019,
"step": 21980
},
{
"epoch": 107.38255033557047,
"grad_norm": 1.0232642889022827,
"learning_rate": 5.639344262295082e-05,
"loss": 0.0209,
"step": 22000
},
{
"epoch": 107.48017083587554,
"grad_norm": 0.7585027813911438,
"learning_rate": 5.626229508196722e-05,
"loss": 0.0228,
"step": 22020
},
{
"epoch": 107.5777913361806,
"grad_norm": 1.87349271774292,
"learning_rate": 5.613114754098361e-05,
"loss": 0.0226,
"step": 22040
},
{
"epoch": 107.67541183648567,
"grad_norm": 0.7697243094444275,
"learning_rate": 5.6000000000000006e-05,
"loss": 0.0201,
"step": 22060
},
{
"epoch": 107.77303233679072,
"grad_norm": 0.8757901787757874,
"learning_rate": 5.5868852459016395e-05,
"loss": 0.0212,
"step": 22080
},
{
"epoch": 107.87065283709579,
"grad_norm": 0.6697644591331482,
"learning_rate": 5.5737704918032785e-05,
"loss": 0.0214,
"step": 22100
},
{
"epoch": 107.96827333740086,
"grad_norm": 0.9846328496932983,
"learning_rate": 5.560655737704919e-05,
"loss": 0.0224,
"step": 22120
},
{
"epoch": 108.06589383770591,
"grad_norm": 0.975273609161377,
"learning_rate": 5.547540983606557e-05,
"loss": 0.0228,
"step": 22140
},
{
"epoch": 108.16351433801098,
"grad_norm": 0.671629011631012,
"learning_rate": 5.534426229508197e-05,
"loss": 0.0183,
"step": 22160
},
{
"epoch": 108.26113483831605,
"grad_norm": 0.7465048432350159,
"learning_rate": 5.521311475409836e-05,
"loss": 0.0196,
"step": 22180
},
{
"epoch": 108.35875533862111,
"grad_norm": 0.9785251021385193,
"learning_rate": 5.508196721311476e-05,
"loss": 0.0234,
"step": 22200
},
{
"epoch": 108.45637583892618,
"grad_norm": 1.2198201417922974,
"learning_rate": 5.495081967213115e-05,
"loss": 0.0215,
"step": 22220
},
{
"epoch": 108.55399633923123,
"grad_norm": 1.261461853981018,
"learning_rate": 5.481967213114755e-05,
"loss": 0.0212,
"step": 22240
},
{
"epoch": 108.6516168395363,
"grad_norm": 1.1784855127334595,
"learning_rate": 5.4688524590163933e-05,
"loss": 0.021,
"step": 22260
},
{
"epoch": 108.74923733984137,
"grad_norm": 1.4498306512832642,
"learning_rate": 5.4557377049180336e-05,
"loss": 0.0215,
"step": 22280
},
{
"epoch": 108.84685784014643,
"grad_norm": 1.6949442625045776,
"learning_rate": 5.4426229508196726e-05,
"loss": 0.0214,
"step": 22300
},
{
"epoch": 108.9444783404515,
"grad_norm": 0.8981263637542725,
"learning_rate": 5.4295081967213115e-05,
"loss": 0.0229,
"step": 22320
},
{
"epoch": 109.04209884075657,
"grad_norm": 0.6954344511032104,
"learning_rate": 5.416393442622951e-05,
"loss": 0.0203,
"step": 22340
},
{
"epoch": 109.13971934106162,
"grad_norm": 1.6094328165054321,
"learning_rate": 5.40327868852459e-05,
"loss": 0.0186,
"step": 22360
},
{
"epoch": 109.23733984136669,
"grad_norm": 0.983757734298706,
"learning_rate": 5.3901639344262304e-05,
"loss": 0.0189,
"step": 22380
},
{
"epoch": 109.33496034167175,
"grad_norm": 0.5290912985801697,
"learning_rate": 5.3770491803278686e-05,
"loss": 0.0227,
"step": 22400
},
{
"epoch": 109.43258084197682,
"grad_norm": 0.7315634489059448,
"learning_rate": 5.363934426229509e-05,
"loss": 0.019,
"step": 22420
},
{
"epoch": 109.53020134228188,
"grad_norm": 2.0914411544799805,
"learning_rate": 5.350819672131148e-05,
"loss": 0.0201,
"step": 22440
},
{
"epoch": 109.62782184258694,
"grad_norm": 3.1637024879455566,
"learning_rate": 5.3377049180327875e-05,
"loss": 0.0208,
"step": 22460
},
{
"epoch": 109.72544234289201,
"grad_norm": 0.7364550828933716,
"learning_rate": 5.3245901639344264e-05,
"loss": 0.0224,
"step": 22480
},
{
"epoch": 109.82306284319706,
"grad_norm": 0.9041069746017456,
"learning_rate": 5.311475409836065e-05,
"loss": 0.0202,
"step": 22500
},
{
"epoch": 109.92068334350213,
"grad_norm": 1.2586231231689453,
"learning_rate": 5.298360655737705e-05,
"loss": 0.0251,
"step": 22520
},
{
"epoch": 110.0183038438072,
"grad_norm": 0.5133557915687561,
"learning_rate": 5.285245901639344e-05,
"loss": 0.0206,
"step": 22540
},
{
"epoch": 110.11592434411226,
"grad_norm": 0.9251378178596497,
"learning_rate": 5.272131147540984e-05,
"loss": 0.017,
"step": 22560
},
{
"epoch": 110.21354484441733,
"grad_norm": 0.8834558129310608,
"learning_rate": 5.259016393442623e-05,
"loss": 0.0182,
"step": 22580
},
{
"epoch": 110.3111653447224,
"grad_norm": 0.7891305685043335,
"learning_rate": 5.245901639344263e-05,
"loss": 0.0189,
"step": 22600
},
{
"epoch": 110.40878584502745,
"grad_norm": 1.17208993434906,
"learning_rate": 5.2327868852459017e-05,
"loss": 0.0208,
"step": 22620
},
{
"epoch": 110.50640634533252,
"grad_norm": 0.9067583084106445,
"learning_rate": 5.219672131147541e-05,
"loss": 0.0186,
"step": 22640
},
{
"epoch": 110.60402684563758,
"grad_norm": 1.1163853406906128,
"learning_rate": 5.20655737704918e-05,
"loss": 0.0215,
"step": 22660
},
{
"epoch": 110.70164734594265,
"grad_norm": 1.3030050992965698,
"learning_rate": 5.1934426229508205e-05,
"loss": 0.0243,
"step": 22680
},
{
"epoch": 110.79926784624772,
"grad_norm": 0.6330472230911255,
"learning_rate": 5.1803278688524594e-05,
"loss": 0.0215,
"step": 22700
},
{
"epoch": 110.89688834655277,
"grad_norm": 0.7558477520942688,
"learning_rate": 5.1672131147540984e-05,
"loss": 0.021,
"step": 22720
},
{
"epoch": 110.99450884685784,
"grad_norm": 1.7759897708892822,
"learning_rate": 5.154098360655738e-05,
"loss": 0.021,
"step": 22740
},
{
"epoch": 111.09212934716291,
"grad_norm": 3.0700387954711914,
"learning_rate": 5.140983606557377e-05,
"loss": 0.0175,
"step": 22760
},
{
"epoch": 111.18974984746797,
"grad_norm": 0.5508354902267456,
"learning_rate": 5.1278688524590165e-05,
"loss": 0.0174,
"step": 22780
},
{
"epoch": 111.28737034777303,
"grad_norm": 0.8367868661880493,
"learning_rate": 5.1147540983606555e-05,
"loss": 0.0188,
"step": 22800
},
{
"epoch": 111.38499084807809,
"grad_norm": 0.8336710333824158,
"learning_rate": 5.101639344262296e-05,
"loss": 0.0178,
"step": 22820
},
{
"epoch": 111.48261134838316,
"grad_norm": 0.8700584173202515,
"learning_rate": 5.088524590163935e-05,
"loss": 0.0215,
"step": 22840
},
{
"epoch": 111.58023184868823,
"grad_norm": 1.4950515031814575,
"learning_rate": 5.075409836065574e-05,
"loss": 0.0198,
"step": 22860
},
{
"epoch": 111.67785234899328,
"grad_norm": 1.4095662832260132,
"learning_rate": 5.062295081967213e-05,
"loss": 0.0199,
"step": 22880
},
{
"epoch": 111.77547284929835,
"grad_norm": 0.6769533157348633,
"learning_rate": 5.049180327868853e-05,
"loss": 0.0219,
"step": 22900
},
{
"epoch": 111.87309334960342,
"grad_norm": 0.6953234076499939,
"learning_rate": 5.036065573770492e-05,
"loss": 0.0214,
"step": 22920
},
{
"epoch": 111.97071384990848,
"grad_norm": 1.0300958156585693,
"learning_rate": 5.022950819672131e-05,
"loss": 0.0216,
"step": 22940
},
{
"epoch": 112.06833435021355,
"grad_norm": 1.323189377784729,
"learning_rate": 5.009836065573771e-05,
"loss": 0.0207,
"step": 22960
},
{
"epoch": 112.1659548505186,
"grad_norm": 1.1096831560134888,
"learning_rate": 4.99672131147541e-05,
"loss": 0.0189,
"step": 22980
},
{
"epoch": 112.26357535082367,
"grad_norm": 0.9971668124198914,
"learning_rate": 4.9836065573770496e-05,
"loss": 0.0194,
"step": 23000
},
{
"epoch": 112.36119585112874,
"grad_norm": 0.7574054002761841,
"learning_rate": 4.970491803278689e-05,
"loss": 0.0191,
"step": 23020
},
{
"epoch": 112.4588163514338,
"grad_norm": 0.9944175481796265,
"learning_rate": 4.957377049180328e-05,
"loss": 0.0183,
"step": 23040
},
{
"epoch": 112.55643685173887,
"grad_norm": 1.417324423789978,
"learning_rate": 4.944262295081967e-05,
"loss": 0.0203,
"step": 23060
},
{
"epoch": 112.65405735204394,
"grad_norm": 1.0666751861572266,
"learning_rate": 4.931147540983607e-05,
"loss": 0.0188,
"step": 23080
},
{
"epoch": 112.75167785234899,
"grad_norm": 0.9484390020370483,
"learning_rate": 4.918032786885246e-05,
"loss": 0.0198,
"step": 23100
},
{
"epoch": 112.84929835265406,
"grad_norm": 0.779443621635437,
"learning_rate": 4.904918032786885e-05,
"loss": 0.0222,
"step": 23120
},
{
"epoch": 112.94691885295912,
"grad_norm": 0.7193658351898193,
"learning_rate": 4.891803278688525e-05,
"loss": 0.0193,
"step": 23140
},
{
"epoch": 113.04453935326418,
"grad_norm": 1.551416039466858,
"learning_rate": 4.8786885245901645e-05,
"loss": 0.0185,
"step": 23160
},
{
"epoch": 113.14215985356925,
"grad_norm": 0.720435380935669,
"learning_rate": 4.8655737704918034e-05,
"loss": 0.0173,
"step": 23180
},
{
"epoch": 113.23978035387431,
"grad_norm": 1.4221702814102173,
"learning_rate": 4.852459016393443e-05,
"loss": 0.0166,
"step": 23200
},
{
"epoch": 113.33740085417938,
"grad_norm": 0.6794512271881104,
"learning_rate": 4.8393442622950826e-05,
"loss": 0.0176,
"step": 23220
},
{
"epoch": 113.43502135448445,
"grad_norm": 1.7901623249053955,
"learning_rate": 4.8262295081967216e-05,
"loss": 0.0178,
"step": 23240
},
{
"epoch": 113.5326418547895,
"grad_norm": 0.8727052211761475,
"learning_rate": 4.8131147540983605e-05,
"loss": 0.0206,
"step": 23260
},
{
"epoch": 113.63026235509457,
"grad_norm": 1.1418622732162476,
"learning_rate": 4.8e-05,
"loss": 0.0197,
"step": 23280
},
{
"epoch": 113.72788285539963,
"grad_norm": 1.1590226888656616,
"learning_rate": 4.78688524590164e-05,
"loss": 0.0211,
"step": 23300
},
{
"epoch": 113.8255033557047,
"grad_norm": 1.1312569379806519,
"learning_rate": 4.773770491803279e-05,
"loss": 0.0205,
"step": 23320
},
{
"epoch": 113.92312385600977,
"grad_norm": 0.8305485248565674,
"learning_rate": 4.760655737704918e-05,
"loss": 0.0213,
"step": 23340
},
{
"epoch": 114.02074435631482,
"grad_norm": 0.4991360902786255,
"learning_rate": 4.747540983606558e-05,
"loss": 0.0206,
"step": 23360
},
{
"epoch": 114.11836485661989,
"grad_norm": 0.5438668131828308,
"learning_rate": 4.734426229508197e-05,
"loss": 0.0158,
"step": 23380
},
{
"epoch": 114.21598535692496,
"grad_norm": 1.0973989963531494,
"learning_rate": 4.7213114754098365e-05,
"loss": 0.0175,
"step": 23400
},
{
"epoch": 114.31360585723002,
"grad_norm": 0.8698163032531738,
"learning_rate": 4.708196721311476e-05,
"loss": 0.0172,
"step": 23420
},
{
"epoch": 114.41122635753509,
"grad_norm": 0.7283509969711304,
"learning_rate": 4.695081967213115e-05,
"loss": 0.0188,
"step": 23440
},
{
"epoch": 114.50884685784014,
"grad_norm": 2.213822364807129,
"learning_rate": 4.681967213114754e-05,
"loss": 0.0195,
"step": 23460
},
{
"epoch": 114.60646735814521,
"grad_norm": 1.0742311477661133,
"learning_rate": 4.6688524590163936e-05,
"loss": 0.0196,
"step": 23480
},
{
"epoch": 114.70408785845028,
"grad_norm": 1.0777767896652222,
"learning_rate": 4.655737704918033e-05,
"loss": 0.0199,
"step": 23500
},
{
"epoch": 114.80170835875533,
"grad_norm": 1.4363033771514893,
"learning_rate": 4.642622950819672e-05,
"loss": 0.0212,
"step": 23520
},
{
"epoch": 114.8993288590604,
"grad_norm": 0.7380108833312988,
"learning_rate": 4.629508196721312e-05,
"loss": 0.0183,
"step": 23540
},
{
"epoch": 114.99694935936547,
"grad_norm": 0.7476310133934021,
"learning_rate": 4.616393442622951e-05,
"loss": 0.017,
"step": 23560
},
{
"epoch": 115.09456985967053,
"grad_norm": 0.7546923160552979,
"learning_rate": 4.60327868852459e-05,
"loss": 0.017,
"step": 23580
},
{
"epoch": 115.1921903599756,
"grad_norm": 1.3657608032226562,
"learning_rate": 4.59016393442623e-05,
"loss": 0.0178,
"step": 23600
},
{
"epoch": 115.28981086028065,
"grad_norm": 0.949591338634491,
"learning_rate": 4.5770491803278695e-05,
"loss": 0.0172,
"step": 23620
},
{
"epoch": 115.38743136058572,
"grad_norm": 0.6427412629127502,
"learning_rate": 4.5639344262295084e-05,
"loss": 0.0172,
"step": 23640
},
{
"epoch": 115.48505186089079,
"grad_norm": 1.0137767791748047,
"learning_rate": 4.550819672131148e-05,
"loss": 0.0161,
"step": 23660
},
{
"epoch": 115.58267236119585,
"grad_norm": 0.5349763035774231,
"learning_rate": 4.537704918032787e-05,
"loss": 0.02,
"step": 23680
},
{
"epoch": 115.68029286150092,
"grad_norm": 2.2959110736846924,
"learning_rate": 4.524590163934426e-05,
"loss": 0.023,
"step": 23700
},
{
"epoch": 115.77791336180599,
"grad_norm": 0.7627232074737549,
"learning_rate": 4.5114754098360655e-05,
"loss": 0.0188,
"step": 23720
},
{
"epoch": 115.87553386211104,
"grad_norm": 0.5804703831672668,
"learning_rate": 4.498360655737705e-05,
"loss": 0.0198,
"step": 23740
},
{
"epoch": 115.97315436241611,
"grad_norm": 0.5652183294296265,
"learning_rate": 4.485245901639345e-05,
"loss": 0.0193,
"step": 23760
},
{
"epoch": 116.07077486272117,
"grad_norm": 1.606610655784607,
"learning_rate": 4.472131147540984e-05,
"loss": 0.0183,
"step": 23780
},
{
"epoch": 116.16839536302624,
"grad_norm": 0.7952703237533569,
"learning_rate": 4.459016393442623e-05,
"loss": 0.0149,
"step": 23800
},
{
"epoch": 116.2660158633313,
"grad_norm": 0.6120955944061279,
"learning_rate": 4.445901639344263e-05,
"loss": 0.0174,
"step": 23820
},
{
"epoch": 116.36363636363636,
"grad_norm": 1.0184078216552734,
"learning_rate": 4.432786885245902e-05,
"loss": 0.0157,
"step": 23840
},
{
"epoch": 116.46125686394143,
"grad_norm": 1.3023227453231812,
"learning_rate": 4.4196721311475415e-05,
"loss": 0.0173,
"step": 23860
},
{
"epoch": 116.5588773642465,
"grad_norm": 2.9271557331085205,
"learning_rate": 4.406557377049181e-05,
"loss": 0.0192,
"step": 23880
},
{
"epoch": 116.65649786455155,
"grad_norm": 0.6684398651123047,
"learning_rate": 4.3934426229508194e-05,
"loss": 0.0186,
"step": 23900
},
{
"epoch": 116.75411836485662,
"grad_norm": 0.843756914138794,
"learning_rate": 4.380327868852459e-05,
"loss": 0.0179,
"step": 23920
},
{
"epoch": 116.85173886516168,
"grad_norm": 0.8446621894836426,
"learning_rate": 4.3672131147540986e-05,
"loss": 0.0188,
"step": 23940
},
{
"epoch": 116.94935936546675,
"grad_norm": 0.8657192587852478,
"learning_rate": 4.3540983606557375e-05,
"loss": 0.0226,
"step": 23960
},
{
"epoch": 117.04697986577182,
"grad_norm": 0.9363495707511902,
"learning_rate": 4.340983606557377e-05,
"loss": 0.0176,
"step": 23980
},
{
"epoch": 117.14460036607687,
"grad_norm": 0.8379567861557007,
"learning_rate": 4.327868852459017e-05,
"loss": 0.0157,
"step": 24000
},
{
"epoch": 117.24222086638194,
"grad_norm": 0.9036583304405212,
"learning_rate": 4.3147540983606564e-05,
"loss": 0.0187,
"step": 24020
},
{
"epoch": 117.33984136668701,
"grad_norm": 1.0149122476577759,
"learning_rate": 4.301639344262295e-05,
"loss": 0.0167,
"step": 24040
},
{
"epoch": 117.43746186699207,
"grad_norm": 0.9107224345207214,
"learning_rate": 4.288524590163935e-05,
"loss": 0.0169,
"step": 24060
},
{
"epoch": 117.53508236729714,
"grad_norm": 0.8786804676055908,
"learning_rate": 4.2754098360655745e-05,
"loss": 0.0179,
"step": 24080
},
{
"epoch": 117.63270286760219,
"grad_norm": 0.6965748071670532,
"learning_rate": 4.262295081967213e-05,
"loss": 0.0192,
"step": 24100
},
{
"epoch": 117.73032336790726,
"grad_norm": 1.9840143918991089,
"learning_rate": 4.2491803278688524e-05,
"loss": 0.0199,
"step": 24120
},
{
"epoch": 117.82794386821233,
"grad_norm": 0.770664632320404,
"learning_rate": 4.236065573770492e-05,
"loss": 0.0224,
"step": 24140
},
{
"epoch": 117.92556436851739,
"grad_norm": 0.6265982985496521,
"learning_rate": 4.222950819672131e-05,
"loss": 0.0201,
"step": 24160
},
{
"epoch": 118.02318486882245,
"grad_norm": 0.6708711385726929,
"learning_rate": 4.2098360655737706e-05,
"loss": 0.0181,
"step": 24180
},
{
"epoch": 118.12080536912751,
"grad_norm": 0.6563631892204285,
"learning_rate": 4.19672131147541e-05,
"loss": 0.0143,
"step": 24200
},
{
"epoch": 118.21842586943258,
"grad_norm": 0.6583575010299683,
"learning_rate": 4.183606557377049e-05,
"loss": 0.0166,
"step": 24220
},
{
"epoch": 118.31604636973765,
"grad_norm": 0.9662224650382996,
"learning_rate": 4.170491803278689e-05,
"loss": 0.0155,
"step": 24240
},
{
"epoch": 118.4136668700427,
"grad_norm": 0.5282565355300903,
"learning_rate": 4.1573770491803283e-05,
"loss": 0.017,
"step": 24260
},
{
"epoch": 118.51128737034777,
"grad_norm": 0.7242906093597412,
"learning_rate": 4.144262295081967e-05,
"loss": 0.017,
"step": 24280
},
{
"epoch": 118.60890787065284,
"grad_norm": 1.0325088500976562,
"learning_rate": 4.131147540983607e-05,
"loss": 0.0184,
"step": 24300
},
{
"epoch": 118.7065283709579,
"grad_norm": 0.7789930105209351,
"learning_rate": 4.118032786885246e-05,
"loss": 0.0191,
"step": 24320
},
{
"epoch": 118.80414887126297,
"grad_norm": 2.7775182723999023,
"learning_rate": 4.1049180327868854e-05,
"loss": 0.0187,
"step": 24340
},
{
"epoch": 118.90176937156802,
"grad_norm": 0.8794859647750854,
"learning_rate": 4.0918032786885244e-05,
"loss": 0.0194,
"step": 24360
},
{
"epoch": 118.99938987187309,
"grad_norm": 1.1197643280029297,
"learning_rate": 4.078688524590164e-05,
"loss": 0.022,
"step": 24380
},
{
"epoch": 119.09701037217816,
"grad_norm": 0.5833938121795654,
"learning_rate": 4.0655737704918036e-05,
"loss": 0.015,
"step": 24400
},
{
"epoch": 119.19463087248322,
"grad_norm": 0.7687615156173706,
"learning_rate": 4.0524590163934425e-05,
"loss": 0.0155,
"step": 24420
},
{
"epoch": 119.29225137278829,
"grad_norm": 0.7922354936599731,
"learning_rate": 4.039344262295082e-05,
"loss": 0.016,
"step": 24440
},
{
"epoch": 119.38987187309336,
"grad_norm": 0.5952595472335815,
"learning_rate": 4.026229508196722e-05,
"loss": 0.0179,
"step": 24460
},
{
"epoch": 119.48749237339841,
"grad_norm": 0.5200309157371521,
"learning_rate": 4.013114754098361e-05,
"loss": 0.0159,
"step": 24480
},
{
"epoch": 119.58511287370348,
"grad_norm": 0.6342631578445435,
"learning_rate": 4e-05,
"loss": 0.0161,
"step": 24500
},
{
"epoch": 119.68273337400854,
"grad_norm": 0.5420534610748291,
"learning_rate": 3.98688524590164e-05,
"loss": 0.0208,
"step": 24520
},
{
"epoch": 119.7803538743136,
"grad_norm": 2.066472291946411,
"learning_rate": 3.973770491803279e-05,
"loss": 0.02,
"step": 24540
},
{
"epoch": 119.87797437461867,
"grad_norm": 0.8765552043914795,
"learning_rate": 3.960655737704918e-05,
"loss": 0.0187,
"step": 24560
},
{
"epoch": 119.97559487492373,
"grad_norm": 0.7948490977287292,
"learning_rate": 3.9475409836065574e-05,
"loss": 0.021,
"step": 24580
},
{
"epoch": 120.0732153752288,
"grad_norm": 0.6576656699180603,
"learning_rate": 3.934426229508197e-05,
"loss": 0.018,
"step": 24600
},
{
"epoch": 120.17083587553387,
"grad_norm": 1.538063883781433,
"learning_rate": 3.921311475409836e-05,
"loss": 0.0172,
"step": 24620
},
{
"epoch": 120.26845637583892,
"grad_norm": 0.6647894382476807,
"learning_rate": 3.9081967213114756e-05,
"loss": 0.0142,
"step": 24640
},
{
"epoch": 120.36607687614399,
"grad_norm": 0.9566786289215088,
"learning_rate": 3.895081967213115e-05,
"loss": 0.0177,
"step": 24660
},
{
"epoch": 120.46369737644905,
"grad_norm": 0.9630607962608337,
"learning_rate": 3.881967213114754e-05,
"loss": 0.0167,
"step": 24680
},
{
"epoch": 120.56131787675412,
"grad_norm": 0.8539407849311829,
"learning_rate": 3.868852459016394e-05,
"loss": 0.02,
"step": 24700
},
{
"epoch": 120.65893837705919,
"grad_norm": 1.4023864269256592,
"learning_rate": 3.8557377049180334e-05,
"loss": 0.0161,
"step": 24720
},
{
"epoch": 120.75655887736424,
"grad_norm": 1.2014553546905518,
"learning_rate": 3.842622950819672e-05,
"loss": 0.0176,
"step": 24740
},
{
"epoch": 120.85417937766931,
"grad_norm": 0.828676164150238,
"learning_rate": 3.829508196721311e-05,
"loss": 0.0184,
"step": 24760
},
{
"epoch": 120.95179987797438,
"grad_norm": 0.7894257307052612,
"learning_rate": 3.816393442622951e-05,
"loss": 0.0186,
"step": 24780
},
{
"epoch": 121.04942037827944,
"grad_norm": 0.642427384853363,
"learning_rate": 3.8032786885245905e-05,
"loss": 0.0174,
"step": 24800
},
{
"epoch": 121.1470408785845,
"grad_norm": 0.7018671631813049,
"learning_rate": 3.7901639344262294e-05,
"loss": 0.0147,
"step": 24820
},
{
"epoch": 121.24466137888956,
"grad_norm": 0.7113062739372253,
"learning_rate": 3.777049180327869e-05,
"loss": 0.0163,
"step": 24840
},
{
"epoch": 121.34228187919463,
"grad_norm": 0.395190566778183,
"learning_rate": 3.7639344262295086e-05,
"loss": 0.0162,
"step": 24860
},
{
"epoch": 121.4399023794997,
"grad_norm": 0.6177274584770203,
"learning_rate": 3.7508196721311476e-05,
"loss": 0.0197,
"step": 24880
},
{
"epoch": 121.53752287980475,
"grad_norm": 0.8992127776145935,
"learning_rate": 3.737704918032787e-05,
"loss": 0.0171,
"step": 24900
},
{
"epoch": 121.63514338010982,
"grad_norm": 0.6104313135147095,
"learning_rate": 3.724590163934427e-05,
"loss": 0.0177,
"step": 24920
},
{
"epoch": 121.7327638804149,
"grad_norm": 1.0897140502929688,
"learning_rate": 3.711475409836066e-05,
"loss": 0.0191,
"step": 24940
},
{
"epoch": 121.83038438071995,
"grad_norm": 0.6623597145080566,
"learning_rate": 3.698360655737705e-05,
"loss": 0.0178,
"step": 24960
},
{
"epoch": 121.92800488102502,
"grad_norm": 0.6479583382606506,
"learning_rate": 3.685245901639344e-05,
"loss": 0.0175,
"step": 24980
},
{
"epoch": 122.02562538133007,
"grad_norm": 0.504220187664032,
"learning_rate": 3.672131147540984e-05,
"loss": 0.0179,
"step": 25000
},
{
"epoch": 122.12324588163514,
"grad_norm": 0.5783191323280334,
"learning_rate": 3.659016393442623e-05,
"loss": 0.0159,
"step": 25020
},
{
"epoch": 122.22086638194021,
"grad_norm": 0.6851247549057007,
"learning_rate": 3.6459016393442625e-05,
"loss": 0.0167,
"step": 25040
},
{
"epoch": 122.31848688224527,
"grad_norm": 0.5190562009811401,
"learning_rate": 3.632786885245902e-05,
"loss": 0.0168,
"step": 25060
},
{
"epoch": 122.41610738255034,
"grad_norm": 0.8032932281494141,
"learning_rate": 3.619672131147541e-05,
"loss": 0.0173,
"step": 25080
},
{
"epoch": 122.5137278828554,
"grad_norm": 1.162681221961975,
"learning_rate": 3.6065573770491806e-05,
"loss": 0.0175,
"step": 25100
},
{
"epoch": 122.61134838316046,
"grad_norm": 0.9898841977119446,
"learning_rate": 3.59344262295082e-05,
"loss": 0.0154,
"step": 25120
},
{
"epoch": 122.70896888346553,
"grad_norm": 0.7703188061714172,
"learning_rate": 3.580327868852459e-05,
"loss": 0.0177,
"step": 25140
},
{
"epoch": 122.80658938377059,
"grad_norm": 0.6557360291481018,
"learning_rate": 3.567213114754099e-05,
"loss": 0.0187,
"step": 25160
},
{
"epoch": 122.90420988407566,
"grad_norm": 0.6278268694877625,
"learning_rate": 3.554098360655738e-05,
"loss": 0.0173,
"step": 25180
},
{
"epoch": 123.00183038438072,
"grad_norm": 0.5595793128013611,
"learning_rate": 3.5409836065573773e-05,
"loss": 0.02,
"step": 25200
},
{
"epoch": 123.09945088468578,
"grad_norm": 0.8069674968719482,
"learning_rate": 3.527868852459016e-05,
"loss": 0.0157,
"step": 25220
},
{
"epoch": 123.19707138499085,
"grad_norm": 0.5641182661056519,
"learning_rate": 3.514754098360656e-05,
"loss": 0.0162,
"step": 25240
},
{
"epoch": 123.29469188529592,
"grad_norm": 1.641262412071228,
"learning_rate": 3.5016393442622955e-05,
"loss": 0.0153,
"step": 25260
},
{
"epoch": 123.39231238560097,
"grad_norm": 0.828906238079071,
"learning_rate": 3.4885245901639344e-05,
"loss": 0.0165,
"step": 25280
},
{
"epoch": 123.48993288590604,
"grad_norm": 0.4439915418624878,
"learning_rate": 3.475409836065574e-05,
"loss": 0.0175,
"step": 25300
},
{
"epoch": 123.5875533862111,
"grad_norm": 0.5250588059425354,
"learning_rate": 3.462295081967214e-05,
"loss": 0.016,
"step": 25320
},
{
"epoch": 123.68517388651617,
"grad_norm": 1.8672527074813843,
"learning_rate": 3.4491803278688526e-05,
"loss": 0.0168,
"step": 25340
},
{
"epoch": 123.78279438682124,
"grad_norm": 0.905852735042572,
"learning_rate": 3.436065573770492e-05,
"loss": 0.0166,
"step": 25360
},
{
"epoch": 123.88041488712629,
"grad_norm": 1.0820852518081665,
"learning_rate": 3.422950819672131e-05,
"loss": 0.018,
"step": 25380
},
{
"epoch": 123.97803538743136,
"grad_norm": 0.901567816734314,
"learning_rate": 3.409836065573771e-05,
"loss": 0.0175,
"step": 25400
},
{
"epoch": 124.07565588773643,
"grad_norm": 0.5788626074790955,
"learning_rate": 3.39672131147541e-05,
"loss": 0.0153,
"step": 25420
},
{
"epoch": 124.17327638804149,
"grad_norm": 2.4590864181518555,
"learning_rate": 3.383606557377049e-05,
"loss": 0.0156,
"step": 25440
},
{
"epoch": 124.27089688834656,
"grad_norm": 0.5568335056304932,
"learning_rate": 3.370491803278689e-05,
"loss": 0.0145,
"step": 25460
},
{
"epoch": 124.36851738865161,
"grad_norm": 0.9648571014404297,
"learning_rate": 3.357377049180328e-05,
"loss": 0.0139,
"step": 25480
},
{
"epoch": 124.46613788895668,
"grad_norm": 1.7256869077682495,
"learning_rate": 3.3442622950819675e-05,
"loss": 0.0161,
"step": 25500
},
{
"epoch": 124.56375838926175,
"grad_norm": 0.7551959753036499,
"learning_rate": 3.331147540983607e-05,
"loss": 0.0165,
"step": 25520
},
{
"epoch": 124.6613788895668,
"grad_norm": 0.6856973767280579,
"learning_rate": 3.318032786885246e-05,
"loss": 0.0167,
"step": 25540
},
{
"epoch": 124.75899938987187,
"grad_norm": 0.6650362610816956,
"learning_rate": 3.3049180327868857e-05,
"loss": 0.0164,
"step": 25560
},
{
"epoch": 124.85661989017694,
"grad_norm": 1.0952746868133545,
"learning_rate": 3.291803278688525e-05,
"loss": 0.0181,
"step": 25580
},
{
"epoch": 124.954240390482,
"grad_norm": 0.8695099353790283,
"learning_rate": 3.2786885245901635e-05,
"loss": 0.0167,
"step": 25600
},
{
"epoch": 125.05186089078707,
"grad_norm": 0.5697212219238281,
"learning_rate": 3.265573770491803e-05,
"loss": 0.0166,
"step": 25620
},
{
"epoch": 125.14948139109212,
"grad_norm": 0.6281394958496094,
"learning_rate": 3.252459016393443e-05,
"loss": 0.0138,
"step": 25640
},
{
"epoch": 125.2471018913972,
"grad_norm": 0.7632110118865967,
"learning_rate": 3.2393442622950824e-05,
"loss": 0.0163,
"step": 25660
},
{
"epoch": 125.34472239170226,
"grad_norm": 0.587164580821991,
"learning_rate": 3.226229508196721e-05,
"loss": 0.0169,
"step": 25680
},
{
"epoch": 125.44234289200732,
"grad_norm": 0.8123992681503296,
"learning_rate": 3.213114754098361e-05,
"loss": 0.0156,
"step": 25700
},
{
"epoch": 125.53996339231239,
"grad_norm": 0.7210849523544312,
"learning_rate": 3.2000000000000005e-05,
"loss": 0.0161,
"step": 25720
},
{
"epoch": 125.63758389261746,
"grad_norm": 0.6011385917663574,
"learning_rate": 3.1868852459016395e-05,
"loss": 0.0178,
"step": 25740
},
{
"epoch": 125.73520439292251,
"grad_norm": 0.8048945665359497,
"learning_rate": 3.173770491803279e-05,
"loss": 0.0172,
"step": 25760
},
{
"epoch": 125.83282489322758,
"grad_norm": 0.5456706285476685,
"learning_rate": 3.160655737704919e-05,
"loss": 0.0188,
"step": 25780
},
{
"epoch": 125.93044539353264,
"grad_norm": 1.419385313987732,
"learning_rate": 3.1475409836065576e-05,
"loss": 0.0187,
"step": 25800
},
{
"epoch": 126.0280658938377,
"grad_norm": 0.8208538293838501,
"learning_rate": 3.1344262295081966e-05,
"loss": 0.0149,
"step": 25820
},
{
"epoch": 126.12568639414278,
"grad_norm": 0.45135247707366943,
"learning_rate": 3.121311475409836e-05,
"loss": 0.0139,
"step": 25840
},
{
"epoch": 126.22330689444783,
"grad_norm": 0.565280556678772,
"learning_rate": 3.108196721311475e-05,
"loss": 0.0123,
"step": 25860
},
{
"epoch": 126.3209273947529,
"grad_norm": 0.742659866809845,
"learning_rate": 3.095081967213115e-05,
"loss": 0.0151,
"step": 25880
},
{
"epoch": 126.41854789505797,
"grad_norm": 1.5381386280059814,
"learning_rate": 3.0819672131147544e-05,
"loss": 0.0157,
"step": 25900
},
{
"epoch": 126.51616839536302,
"grad_norm": 0.626524031162262,
"learning_rate": 3.068852459016393e-05,
"loss": 0.0151,
"step": 25920
},
{
"epoch": 126.6137888956681,
"grad_norm": 0.6463727355003357,
"learning_rate": 3.055737704918033e-05,
"loss": 0.0174,
"step": 25940
},
{
"epoch": 126.71140939597315,
"grad_norm": 0.48679399490356445,
"learning_rate": 3.0426229508196725e-05,
"loss": 0.0154,
"step": 25960
},
{
"epoch": 126.80902989627822,
"grad_norm": 0.9534430503845215,
"learning_rate": 3.0295081967213118e-05,
"loss": 0.0185,
"step": 25980
},
{
"epoch": 126.90665039658329,
"grad_norm": 0.571997344493866,
"learning_rate": 3.016393442622951e-05,
"loss": 0.0174,
"step": 26000
},
{
"epoch": 127.00427089688834,
"grad_norm": 0.8983253836631775,
"learning_rate": 3.00327868852459e-05,
"loss": 0.0183,
"step": 26020
},
{
"epoch": 127.10189139719341,
"grad_norm": 0.37496012449264526,
"learning_rate": 2.9901639344262293e-05,
"loss": 0.0135,
"step": 26040
},
{
"epoch": 127.19951189749847,
"grad_norm": 0.7320930361747742,
"learning_rate": 2.977049180327869e-05,
"loss": 0.0138,
"step": 26060
},
{
"epoch": 127.29713239780354,
"grad_norm": 1.5510950088500977,
"learning_rate": 2.963934426229508e-05,
"loss": 0.0181,
"step": 26080
},
{
"epoch": 127.3947528981086,
"grad_norm": 0.25900664925575256,
"learning_rate": 2.9508196721311478e-05,
"loss": 0.0145,
"step": 26100
},
{
"epoch": 127.49237339841366,
"grad_norm": 0.7860931754112244,
"learning_rate": 2.937704918032787e-05,
"loss": 0.0149,
"step": 26120
},
{
"epoch": 127.58999389871873,
"grad_norm": 1.5779728889465332,
"learning_rate": 2.9245901639344263e-05,
"loss": 0.0168,
"step": 26140
},
{
"epoch": 127.6876143990238,
"grad_norm": 0.8237743377685547,
"learning_rate": 2.911475409836066e-05,
"loss": 0.0173,
"step": 26160
},
{
"epoch": 127.78523489932886,
"grad_norm": 0.5260995626449585,
"learning_rate": 2.8983606557377052e-05,
"loss": 0.0148,
"step": 26180
},
{
"epoch": 127.88285539963393,
"grad_norm": 1.158836007118225,
"learning_rate": 2.8852459016393445e-05,
"loss": 0.0179,
"step": 26200
},
{
"epoch": 127.98047589993898,
"grad_norm": 1.7822519540786743,
"learning_rate": 2.872131147540984e-05,
"loss": 0.0153,
"step": 26220
},
{
"epoch": 128.07809640024405,
"grad_norm": 0.6409000158309937,
"learning_rate": 2.8590163934426227e-05,
"loss": 0.0178,
"step": 26240
},
{
"epoch": 128.1757169005491,
"grad_norm": 0.7198218107223511,
"learning_rate": 2.8459016393442623e-05,
"loss": 0.0143,
"step": 26260
},
{
"epoch": 128.2733374008542,
"grad_norm": 0.9570964574813843,
"learning_rate": 2.8327868852459016e-05,
"loss": 0.0132,
"step": 26280
},
{
"epoch": 128.37095790115924,
"grad_norm": 0.40788573026657104,
"learning_rate": 2.819672131147541e-05,
"loss": 0.0151,
"step": 26300
},
{
"epoch": 128.4685784014643,
"grad_norm": 1.0642712116241455,
"learning_rate": 2.8065573770491805e-05,
"loss": 0.0154,
"step": 26320
},
{
"epoch": 128.56619890176938,
"grad_norm": 0.5972766280174255,
"learning_rate": 2.7934426229508198e-05,
"loss": 0.015,
"step": 26340
},
{
"epoch": 128.66381940207444,
"grad_norm": 0.5974799990653992,
"learning_rate": 2.7803278688524594e-05,
"loss": 0.0144,
"step": 26360
},
{
"epoch": 128.7614399023795,
"grad_norm": 0.8131697773933411,
"learning_rate": 2.7672131147540987e-05,
"loss": 0.0166,
"step": 26380
},
{
"epoch": 128.85906040268458,
"grad_norm": 0.8219912648200989,
"learning_rate": 2.754098360655738e-05,
"loss": 0.0186,
"step": 26400
},
{
"epoch": 128.95668090298963,
"grad_norm": 0.7310410737991333,
"learning_rate": 2.7409836065573775e-05,
"loss": 0.0158,
"step": 26420
},
{
"epoch": 129.0543014032947,
"grad_norm": 0.7448714375495911,
"learning_rate": 2.7278688524590168e-05,
"loss": 0.0161,
"step": 26440
},
{
"epoch": 129.15192190359974,
"grad_norm": 1.0379338264465332,
"learning_rate": 2.7147540983606558e-05,
"loss": 0.0129,
"step": 26460
},
{
"epoch": 129.24954240390483,
"grad_norm": 0.4505363404750824,
"learning_rate": 2.701639344262295e-05,
"loss": 0.0139,
"step": 26480
},
{
"epoch": 129.34716290420988,
"grad_norm": 0.49264198541641235,
"learning_rate": 2.6885245901639343e-05,
"loss": 0.0141,
"step": 26500
},
{
"epoch": 129.44478340451494,
"grad_norm": 0.38399410247802734,
"learning_rate": 2.675409836065574e-05,
"loss": 0.0148,
"step": 26520
},
{
"epoch": 129.54240390482002,
"grad_norm": 0.8914321660995483,
"learning_rate": 2.6622950819672132e-05,
"loss": 0.0159,
"step": 26540
},
{
"epoch": 129.64002440512508,
"grad_norm": 0.8293542265892029,
"learning_rate": 2.6491803278688525e-05,
"loss": 0.0157,
"step": 26560
},
{
"epoch": 129.73764490543013,
"grad_norm": 0.5534564256668091,
"learning_rate": 2.636065573770492e-05,
"loss": 0.0158,
"step": 26580
},
{
"epoch": 129.8352654057352,
"grad_norm": 0.7157993912696838,
"learning_rate": 2.6229508196721314e-05,
"loss": 0.016,
"step": 26600
},
{
"epoch": 129.93288590604027,
"grad_norm": 0.7746397256851196,
"learning_rate": 2.6098360655737706e-05,
"loss": 0.0192,
"step": 26620
},
{
"epoch": 130.03050640634532,
"grad_norm": 0.7727970480918884,
"learning_rate": 2.5967213114754103e-05,
"loss": 0.0151,
"step": 26640
},
{
"epoch": 130.1281269066504,
"grad_norm": 0.514680802822113,
"learning_rate": 2.5836065573770492e-05,
"loss": 0.0159,
"step": 26660
},
{
"epoch": 130.22574740695546,
"grad_norm": 0.87467360496521,
"learning_rate": 2.5704918032786885e-05,
"loss": 0.0137,
"step": 26680
},
{
"epoch": 130.32336790726052,
"grad_norm": 0.7342318296432495,
"learning_rate": 2.5573770491803277e-05,
"loss": 0.0164,
"step": 26700
},
{
"epoch": 130.4209884075656,
"grad_norm": 0.46169203519821167,
"learning_rate": 2.5442622950819674e-05,
"loss": 0.0148,
"step": 26720
},
{
"epoch": 130.51860890787066,
"grad_norm": 0.5552070140838623,
"learning_rate": 2.5311475409836066e-05,
"loss": 0.0146,
"step": 26740
},
{
"epoch": 130.6162294081757,
"grad_norm": 2.3732874393463135,
"learning_rate": 2.518032786885246e-05,
"loss": 0.0151,
"step": 26760
},
{
"epoch": 130.71384990848077,
"grad_norm": 0.7399420142173767,
"learning_rate": 2.5049180327868855e-05,
"loss": 0.0136,
"step": 26780
},
{
"epoch": 130.81147040878585,
"grad_norm": 0.7631209492683411,
"learning_rate": 2.4918032786885248e-05,
"loss": 0.0168,
"step": 26800
},
{
"epoch": 130.9090909090909,
"grad_norm": 0.4778473675251007,
"learning_rate": 2.478688524590164e-05,
"loss": 0.0144,
"step": 26820
},
{
"epoch": 131.00671140939596,
"grad_norm": 0.48981741070747375,
"learning_rate": 2.4655737704918033e-05,
"loss": 0.0174,
"step": 26840
},
{
"epoch": 131.10433190970105,
"grad_norm": 0.550786018371582,
"learning_rate": 2.4524590163934426e-05,
"loss": 0.0144,
"step": 26860
},
{
"epoch": 131.2019524100061,
"grad_norm": 1.1115200519561768,
"learning_rate": 2.4393442622950822e-05,
"loss": 0.0137,
"step": 26880
},
{
"epoch": 131.29957291031116,
"grad_norm": 0.7832316160202026,
"learning_rate": 2.4262295081967215e-05,
"loss": 0.0138,
"step": 26900
},
{
"epoch": 131.39719341061624,
"grad_norm": 0.7918095588684082,
"learning_rate": 2.4131147540983608e-05,
"loss": 0.0153,
"step": 26920
},
{
"epoch": 131.4948139109213,
"grad_norm": 0.5915355682373047,
"learning_rate": 2.4e-05,
"loss": 0.0159,
"step": 26940
},
{
"epoch": 131.59243441122635,
"grad_norm": 0.6909199357032776,
"learning_rate": 2.3868852459016393e-05,
"loss": 0.0159,
"step": 26960
},
{
"epoch": 131.69005491153143,
"grad_norm": 1.0566037893295288,
"learning_rate": 2.373770491803279e-05,
"loss": 0.0147,
"step": 26980
},
{
"epoch": 131.7876754118365,
"grad_norm": 1.6122446060180664,
"learning_rate": 2.3606557377049182e-05,
"loss": 0.0141,
"step": 27000
},
{
"epoch": 131.88529591214154,
"grad_norm": 0.8080132007598877,
"learning_rate": 2.3475409836065575e-05,
"loss": 0.0155,
"step": 27020
},
{
"epoch": 131.98291641244663,
"grad_norm": 0.45939984917640686,
"learning_rate": 2.3344262295081968e-05,
"loss": 0.0166,
"step": 27040
},
{
"epoch": 132.08053691275168,
"grad_norm": 0.8284308314323425,
"learning_rate": 2.321311475409836e-05,
"loss": 0.015,
"step": 27060
},
{
"epoch": 132.17815741305674,
"grad_norm": 0.6223374605178833,
"learning_rate": 2.3081967213114757e-05,
"loss": 0.0155,
"step": 27080
},
{
"epoch": 132.2757779133618,
"grad_norm": 1.6535650491714478,
"learning_rate": 2.295081967213115e-05,
"loss": 0.015,
"step": 27100
},
{
"epoch": 132.37339841366688,
"grad_norm": 0.6285653710365295,
"learning_rate": 2.2819672131147542e-05,
"loss": 0.0158,
"step": 27120
},
{
"epoch": 132.47101891397193,
"grad_norm": 0.6470975279808044,
"learning_rate": 2.2688524590163935e-05,
"loss": 0.0131,
"step": 27140
},
{
"epoch": 132.568639414277,
"grad_norm": 0.6603531241416931,
"learning_rate": 2.2557377049180328e-05,
"loss": 0.0155,
"step": 27160
},
{
"epoch": 132.66625991458207,
"grad_norm": 0.9789283275604248,
"learning_rate": 2.2426229508196724e-05,
"loss": 0.014,
"step": 27180
},
{
"epoch": 132.76388041488713,
"grad_norm": 0.7158600687980652,
"learning_rate": 2.2295081967213117e-05,
"loss": 0.0149,
"step": 27200
},
{
"epoch": 132.86150091519218,
"grad_norm": 0.4593288004398346,
"learning_rate": 2.216393442622951e-05,
"loss": 0.0149,
"step": 27220
},
{
"epoch": 132.95912141549726,
"grad_norm": 0.7383930087089539,
"learning_rate": 2.2032786885245905e-05,
"loss": 0.0158,
"step": 27240
},
{
"epoch": 133.05674191580232,
"grad_norm": 0.8438706398010254,
"learning_rate": 2.1901639344262295e-05,
"loss": 0.0152,
"step": 27260
},
{
"epoch": 133.15436241610738,
"grad_norm": 0.3977959156036377,
"learning_rate": 2.1770491803278688e-05,
"loss": 0.0135,
"step": 27280
},
{
"epoch": 133.25198291641246,
"grad_norm": 0.5032092332839966,
"learning_rate": 2.1639344262295084e-05,
"loss": 0.0144,
"step": 27300
},
{
"epoch": 133.3496034167175,
"grad_norm": 0.8900758028030396,
"learning_rate": 2.1508196721311476e-05,
"loss": 0.0142,
"step": 27320
},
{
"epoch": 133.44722391702257,
"grad_norm": 0.6694475412368774,
"learning_rate": 2.1377049180327873e-05,
"loss": 0.0148,
"step": 27340
},
{
"epoch": 133.54484441732765,
"grad_norm": 0.6150327920913696,
"learning_rate": 2.1245901639344262e-05,
"loss": 0.0137,
"step": 27360
},
{
"epoch": 133.6424649176327,
"grad_norm": 0.3980708718299866,
"learning_rate": 2.1114754098360655e-05,
"loss": 0.0131,
"step": 27380
},
{
"epoch": 133.74008541793776,
"grad_norm": 0.556053876876831,
"learning_rate": 2.098360655737705e-05,
"loss": 0.0173,
"step": 27400
},
{
"epoch": 133.83770591824282,
"grad_norm": 0.7154746055603027,
"learning_rate": 2.0852459016393444e-05,
"loss": 0.0142,
"step": 27420
},
{
"epoch": 133.9353264185479,
"grad_norm": 0.585117757320404,
"learning_rate": 2.0721311475409836e-05,
"loss": 0.0148,
"step": 27440
},
{
"epoch": 134.03294691885296,
"grad_norm": 0.4688512682914734,
"learning_rate": 2.059016393442623e-05,
"loss": 0.0139,
"step": 27460
},
{
"epoch": 134.130567419158,
"grad_norm": 0.3597017824649811,
"learning_rate": 2.0459016393442622e-05,
"loss": 0.0125,
"step": 27480
},
{
"epoch": 134.2281879194631,
"grad_norm": 0.6201938986778259,
"learning_rate": 2.0327868852459018e-05,
"loss": 0.014,
"step": 27500
},
{
"epoch": 134.32580841976815,
"grad_norm": 0.6969265341758728,
"learning_rate": 2.019672131147541e-05,
"loss": 0.0133,
"step": 27520
},
{
"epoch": 134.4234289200732,
"grad_norm": 0.6457026600837708,
"learning_rate": 2.0065573770491804e-05,
"loss": 0.0152,
"step": 27540
},
{
"epoch": 134.5210494203783,
"grad_norm": 0.7583892941474915,
"learning_rate": 1.99344262295082e-05,
"loss": 0.0148,
"step": 27560
},
{
"epoch": 134.61866992068335,
"grad_norm": 0.41781967878341675,
"learning_rate": 1.980327868852459e-05,
"loss": 0.0145,
"step": 27580
},
{
"epoch": 134.7162904209884,
"grad_norm": 1.2802424430847168,
"learning_rate": 1.9672131147540985e-05,
"loss": 0.0158,
"step": 27600
},
{
"epoch": 134.81391092129348,
"grad_norm": 0.3811515271663666,
"learning_rate": 1.9540983606557378e-05,
"loss": 0.0136,
"step": 27620
},
{
"epoch": 134.91153142159854,
"grad_norm": 0.41068577766418457,
"learning_rate": 1.940983606557377e-05,
"loss": 0.0166,
"step": 27640
},
{
"epoch": 135.0091519219036,
"grad_norm": 0.690075695514679,
"learning_rate": 1.9278688524590167e-05,
"loss": 0.0152,
"step": 27660
},
{
"epoch": 135.10677242220865,
"grad_norm": 0.6945540308952332,
"learning_rate": 1.9147540983606556e-05,
"loss": 0.0125,
"step": 27680
},
{
"epoch": 135.20439292251373,
"grad_norm": 0.9262276291847229,
"learning_rate": 1.9016393442622952e-05,
"loss": 0.014,
"step": 27700
},
{
"epoch": 135.3020134228188,
"grad_norm": 0.5992072224617004,
"learning_rate": 1.8885245901639345e-05,
"loss": 0.0132,
"step": 27720
},
{
"epoch": 135.39963392312384,
"grad_norm": 0.6684610247612,
"learning_rate": 1.8754098360655738e-05,
"loss": 0.0147,
"step": 27740
},
{
"epoch": 135.49725442342893,
"grad_norm": 0.647719144821167,
"learning_rate": 1.8622950819672134e-05,
"loss": 0.0168,
"step": 27760
},
{
"epoch": 135.59487492373398,
"grad_norm": 1.5291879177093506,
"learning_rate": 1.8491803278688523e-05,
"loss": 0.0159,
"step": 27780
},
{
"epoch": 135.69249542403904,
"grad_norm": 0.7436932325363159,
"learning_rate": 1.836065573770492e-05,
"loss": 0.0136,
"step": 27800
},
{
"epoch": 135.79011592434412,
"grad_norm": 0.38243773579597473,
"learning_rate": 1.8229508196721312e-05,
"loss": 0.0145,
"step": 27820
},
{
"epoch": 135.88773642464918,
"grad_norm": 0.6765353679656982,
"learning_rate": 1.8098360655737705e-05,
"loss": 0.0139,
"step": 27840
},
{
"epoch": 135.98535692495423,
"grad_norm": 0.3190823495388031,
"learning_rate": 1.79672131147541e-05,
"loss": 0.0152,
"step": 27860
},
{
"epoch": 136.08297742525932,
"grad_norm": 2.0219767093658447,
"learning_rate": 1.7836065573770494e-05,
"loss": 0.0143,
"step": 27880
},
{
"epoch": 136.18059792556437,
"grad_norm": 0.776849627494812,
"learning_rate": 1.7704918032786887e-05,
"loss": 0.0135,
"step": 27900
},
{
"epoch": 136.27821842586943,
"grad_norm": 0.5274736285209656,
"learning_rate": 1.757377049180328e-05,
"loss": 0.0123,
"step": 27920
},
{
"epoch": 136.3758389261745,
"grad_norm": 0.886225700378418,
"learning_rate": 1.7442622950819672e-05,
"loss": 0.0146,
"step": 27940
},
{
"epoch": 136.47345942647956,
"grad_norm": 0.5282070636749268,
"learning_rate": 1.731147540983607e-05,
"loss": 0.0137,
"step": 27960
},
{
"epoch": 136.57107992678462,
"grad_norm": 0.6784070730209351,
"learning_rate": 1.718032786885246e-05,
"loss": 0.0143,
"step": 27980
},
{
"epoch": 136.66870042708968,
"grad_norm": 1.7534900903701782,
"learning_rate": 1.7049180327868854e-05,
"loss": 0.0137,
"step": 28000
},
{
"epoch": 136.76632092739476,
"grad_norm": 0.40347975492477417,
"learning_rate": 1.6918032786885247e-05,
"loss": 0.0157,
"step": 28020
},
{
"epoch": 136.8639414276998,
"grad_norm": 1.0218480825424194,
"learning_rate": 1.678688524590164e-05,
"loss": 0.0145,
"step": 28040
},
{
"epoch": 136.96156192800487,
"grad_norm": 0.2875036597251892,
"learning_rate": 1.6655737704918036e-05,
"loss": 0.014,
"step": 28060
},
{
"epoch": 137.05918242830995,
"grad_norm": 1.5968719720840454,
"learning_rate": 1.6524590163934428e-05,
"loss": 0.0132,
"step": 28080
},
{
"epoch": 137.156802928615,
"grad_norm": 0.39140036702156067,
"learning_rate": 1.6393442622950818e-05,
"loss": 0.0138,
"step": 28100
},
{
"epoch": 137.25442342892006,
"grad_norm": 0.36571571230888367,
"learning_rate": 1.6262295081967214e-05,
"loss": 0.0134,
"step": 28120
},
{
"epoch": 137.35204392922515,
"grad_norm": 0.6531932950019836,
"learning_rate": 1.6131147540983607e-05,
"loss": 0.0146,
"step": 28140
},
{
"epoch": 137.4496644295302,
"grad_norm": 0.46148520708084106,
"learning_rate": 1.6000000000000003e-05,
"loss": 0.0136,
"step": 28160
},
{
"epoch": 137.54728492983526,
"grad_norm": 0.5359562635421753,
"learning_rate": 1.5868852459016395e-05,
"loss": 0.0128,
"step": 28180
},
{
"epoch": 137.64490543014034,
"grad_norm": 0.5632950663566589,
"learning_rate": 1.5737704918032788e-05,
"loss": 0.0148,
"step": 28200
},
{
"epoch": 137.7425259304454,
"grad_norm": 0.7229663729667664,
"learning_rate": 1.560655737704918e-05,
"loss": 0.0147,
"step": 28220
},
{
"epoch": 137.84014643075045,
"grad_norm": 0.5531187653541565,
"learning_rate": 1.5475409836065574e-05,
"loss": 0.014,
"step": 28240
},
{
"epoch": 137.93776693105553,
"grad_norm": 0.6305696964263916,
"learning_rate": 1.5344262295081966e-05,
"loss": 0.0157,
"step": 28260
},
{
"epoch": 138.0353874313606,
"grad_norm": 0.8933548331260681,
"learning_rate": 1.5213114754098363e-05,
"loss": 0.0156,
"step": 28280
},
{
"epoch": 138.13300793166565,
"grad_norm": 0.39126649498939514,
"learning_rate": 1.5081967213114755e-05,
"loss": 0.011,
"step": 28300
},
{
"epoch": 138.2306284319707,
"grad_norm": 0.6234102249145508,
"learning_rate": 1.4950819672131146e-05,
"loss": 0.0133,
"step": 28320
},
{
"epoch": 138.32824893227578,
"grad_norm": 0.5867244005203247,
"learning_rate": 1.481967213114754e-05,
"loss": 0.0138,
"step": 28340
},
{
"epoch": 138.42586943258084,
"grad_norm": 0.6564351916313171,
"learning_rate": 1.4688524590163935e-05,
"loss": 0.014,
"step": 28360
},
{
"epoch": 138.5234899328859,
"grad_norm": 1.0982993841171265,
"learning_rate": 1.455737704918033e-05,
"loss": 0.0133,
"step": 28380
},
{
"epoch": 138.62111043319098,
"grad_norm": 0.8140943646430969,
"learning_rate": 1.4426229508196722e-05,
"loss": 0.0149,
"step": 28400
},
{
"epoch": 138.71873093349603,
"grad_norm": 0.7273306846618652,
"learning_rate": 1.4295081967213114e-05,
"loss": 0.0149,
"step": 28420
},
{
"epoch": 138.8163514338011,
"grad_norm": 0.46543437242507935,
"learning_rate": 1.4163934426229508e-05,
"loss": 0.0143,
"step": 28440
},
{
"epoch": 138.91397193410617,
"grad_norm": 1.823746681213379,
"learning_rate": 1.4032786885245902e-05,
"loss": 0.0141,
"step": 28460
},
{
"epoch": 139.01159243441123,
"grad_norm": 0.602825939655304,
"learning_rate": 1.3901639344262297e-05,
"loss": 0.0129,
"step": 28480
},
{
"epoch": 139.10921293471628,
"grad_norm": 0.30030643939971924,
"learning_rate": 1.377049180327869e-05,
"loss": 0.0108,
"step": 28500
},
{
"epoch": 139.20683343502137,
"grad_norm": 0.7023382186889648,
"learning_rate": 1.3639344262295084e-05,
"loss": 0.013,
"step": 28520
},
{
"epoch": 139.30445393532642,
"grad_norm": 0.8771500587463379,
"learning_rate": 1.3508196721311475e-05,
"loss": 0.0144,
"step": 28540
},
{
"epoch": 139.40207443563148,
"grad_norm": 0.6988272666931152,
"learning_rate": 1.337704918032787e-05,
"loss": 0.0125,
"step": 28560
},
{
"epoch": 139.49969493593656,
"grad_norm": 0.8657557368278503,
"learning_rate": 1.3245901639344262e-05,
"loss": 0.0138,
"step": 28580
},
{
"epoch": 139.59731543624162,
"grad_norm": 0.6832662224769592,
"learning_rate": 1.3114754098360657e-05,
"loss": 0.0127,
"step": 28600
},
{
"epoch": 139.69493593654667,
"grad_norm": 0.9065951108932495,
"learning_rate": 1.2983606557377051e-05,
"loss": 0.015,
"step": 28620
},
{
"epoch": 139.79255643685173,
"grad_norm": 0.9211568236351013,
"learning_rate": 1.2852459016393442e-05,
"loss": 0.0131,
"step": 28640
},
{
"epoch": 139.8901769371568,
"grad_norm": 0.6160862445831299,
"learning_rate": 1.2721311475409837e-05,
"loss": 0.0163,
"step": 28660
},
{
"epoch": 139.98779743746186,
"grad_norm": 0.8593130111694336,
"learning_rate": 1.259016393442623e-05,
"loss": 0.0135,
"step": 28680
},
{
"epoch": 140.08541793776692,
"grad_norm": 0.7746515274047852,
"learning_rate": 1.2459016393442624e-05,
"loss": 0.0141,
"step": 28700
},
{
"epoch": 140.183038438072,
"grad_norm": 0.7830790877342224,
"learning_rate": 1.2327868852459017e-05,
"loss": 0.0126,
"step": 28720
},
{
"epoch": 140.28065893837706,
"grad_norm": 0.49005040526390076,
"learning_rate": 1.2196721311475411e-05,
"loss": 0.0127,
"step": 28740
},
{
"epoch": 140.3782794386821,
"grad_norm": 0.9640679359436035,
"learning_rate": 1.2065573770491804e-05,
"loss": 0.0125,
"step": 28760
},
{
"epoch": 140.4758999389872,
"grad_norm": 0.8114829659461975,
"learning_rate": 1.1934426229508197e-05,
"loss": 0.0138,
"step": 28780
},
{
"epoch": 140.57352043929225,
"grad_norm": 0.8460706472396851,
"learning_rate": 1.1803278688524591e-05,
"loss": 0.0148,
"step": 28800
},
{
"epoch": 140.6711409395973,
"grad_norm": 0.4882986843585968,
"learning_rate": 1.1672131147540984e-05,
"loss": 0.0141,
"step": 28820
},
{
"epoch": 140.7687614399024,
"grad_norm": 1.0322729349136353,
"learning_rate": 1.1540983606557378e-05,
"loss": 0.0148,
"step": 28840
},
{
"epoch": 140.86638194020745,
"grad_norm": 1.2970582246780396,
"learning_rate": 1.1409836065573771e-05,
"loss": 0.0144,
"step": 28860
},
{
"epoch": 140.9640024405125,
"grad_norm": 0.9063767790794373,
"learning_rate": 1.1278688524590164e-05,
"loss": 0.0123,
"step": 28880
},
{
"epoch": 141.06162294081759,
"grad_norm": 0.60384202003479,
"learning_rate": 1.1147540983606558e-05,
"loss": 0.0122,
"step": 28900
},
{
"epoch": 141.15924344112264,
"grad_norm": 0.5142499804496765,
"learning_rate": 1.1016393442622953e-05,
"loss": 0.0125,
"step": 28920
},
{
"epoch": 141.2568639414277,
"grad_norm": 0.6854032874107361,
"learning_rate": 1.0885245901639344e-05,
"loss": 0.0132,
"step": 28940
},
{
"epoch": 141.35448444173275,
"grad_norm": 1.138895034790039,
"learning_rate": 1.0754098360655738e-05,
"loss": 0.0138,
"step": 28960
},
{
"epoch": 141.45210494203783,
"grad_norm": 0.5815340280532837,
"learning_rate": 1.0622950819672131e-05,
"loss": 0.0115,
"step": 28980
},
{
"epoch": 141.5497254423429,
"grad_norm": 0.6024242639541626,
"learning_rate": 1.0491803278688525e-05,
"loss": 0.0127,
"step": 29000
},
{
"epoch": 141.64734594264795,
"grad_norm": 0.44016191363334656,
"learning_rate": 1.0360655737704918e-05,
"loss": 0.0146,
"step": 29020
},
{
"epoch": 141.74496644295303,
"grad_norm": 2.051720142364502,
"learning_rate": 1.0229508196721311e-05,
"loss": 0.0151,
"step": 29040
},
{
"epoch": 141.84258694325808,
"grad_norm": 0.6961409449577332,
"learning_rate": 1.0098360655737705e-05,
"loss": 0.013,
"step": 29060
},
{
"epoch": 141.94020744356314,
"grad_norm": 1.1912919282913208,
"learning_rate": 9.9672131147541e-06,
"loss": 0.0131,
"step": 29080
},
{
"epoch": 142.03782794386822,
"grad_norm": 0.6203546524047852,
"learning_rate": 9.836065573770493e-06,
"loss": 0.013,
"step": 29100
},
{
"epoch": 142.13544844417328,
"grad_norm": 0.5386860966682434,
"learning_rate": 9.704918032786885e-06,
"loss": 0.0123,
"step": 29120
},
{
"epoch": 142.23306894447833,
"grad_norm": 0.5639663934707642,
"learning_rate": 9.573770491803278e-06,
"loss": 0.0123,
"step": 29140
},
{
"epoch": 142.33068944478342,
"grad_norm": 0.577315628528595,
"learning_rate": 9.442622950819673e-06,
"loss": 0.0125,
"step": 29160
},
{
"epoch": 142.42830994508847,
"grad_norm": 0.5142390727996826,
"learning_rate": 9.311475409836067e-06,
"loss": 0.0133,
"step": 29180
},
{
"epoch": 142.52593044539353,
"grad_norm": 0.7933589816093445,
"learning_rate": 9.18032786885246e-06,
"loss": 0.0151,
"step": 29200
},
{
"epoch": 142.6235509456986,
"grad_norm": 0.8499199151992798,
"learning_rate": 9.049180327868853e-06,
"loss": 0.0136,
"step": 29220
},
{
"epoch": 142.72117144600367,
"grad_norm": 0.6795129179954529,
"learning_rate": 8.918032786885247e-06,
"loss": 0.0136,
"step": 29240
},
{
"epoch": 142.81879194630872,
"grad_norm": 0.3827701210975647,
"learning_rate": 8.78688524590164e-06,
"loss": 0.0122,
"step": 29260
},
{
"epoch": 142.91641244661378,
"grad_norm": 0.6248555779457092,
"learning_rate": 8.655737704918034e-06,
"loss": 0.0113,
"step": 29280
},
{
"epoch": 143.01403294691886,
"grad_norm": 0.9943171739578247,
"learning_rate": 8.524590163934427e-06,
"loss": 0.0145,
"step": 29300
},
{
"epoch": 143.11165344722392,
"grad_norm": 0.3848264217376709,
"learning_rate": 8.39344262295082e-06,
"loss": 0.0119,
"step": 29320
},
{
"epoch": 143.20927394752897,
"grad_norm": 1.02989661693573,
"learning_rate": 8.262295081967214e-06,
"loss": 0.0123,
"step": 29340
},
{
"epoch": 143.30689444783405,
"grad_norm": 0.5843254923820496,
"learning_rate": 8.131147540983607e-06,
"loss": 0.0124,
"step": 29360
},
{
"epoch": 143.4045149481391,
"grad_norm": 0.5134753584861755,
"learning_rate": 8.000000000000001e-06,
"loss": 0.0122,
"step": 29380
},
{
"epoch": 143.50213544844416,
"grad_norm": 0.4464253783226013,
"learning_rate": 7.868852459016394e-06,
"loss": 0.0116,
"step": 29400
},
{
"epoch": 143.59975594874925,
"grad_norm": 0.445730060338974,
"learning_rate": 7.737704918032787e-06,
"loss": 0.0116,
"step": 29420
},
{
"epoch": 143.6973764490543,
"grad_norm": 0.7831693887710571,
"learning_rate": 7.606557377049181e-06,
"loss": 0.0122,
"step": 29440
},
{
"epoch": 143.79499694935936,
"grad_norm": 0.33939194679260254,
"learning_rate": 7.475409836065573e-06,
"loss": 0.0131,
"step": 29460
},
{
"epoch": 143.89261744966444,
"grad_norm": 0.36323612928390503,
"learning_rate": 7.344262295081968e-06,
"loss": 0.0157,
"step": 29480
},
{
"epoch": 143.9902379499695,
"grad_norm": 0.6487870216369629,
"learning_rate": 7.213114754098361e-06,
"loss": 0.0157,
"step": 29500
},
{
"epoch": 144.08785845027455,
"grad_norm": 0.3841145932674408,
"learning_rate": 7.081967213114754e-06,
"loss": 0.0106,
"step": 29520
},
{
"epoch": 144.1854789505796,
"grad_norm": 1.0142998695373535,
"learning_rate": 6.9508196721311484e-06,
"loss": 0.0129,
"step": 29540
},
{
"epoch": 144.2830994508847,
"grad_norm": 1.5330740213394165,
"learning_rate": 6.819672131147542e-06,
"loss": 0.0142,
"step": 29560
},
{
"epoch": 144.38071995118975,
"grad_norm": 2.0231475830078125,
"learning_rate": 6.688524590163935e-06,
"loss": 0.0115,
"step": 29580
},
{
"epoch": 144.4783404514948,
"grad_norm": 0.542549192905426,
"learning_rate": 6.557377049180328e-06,
"loss": 0.0131,
"step": 29600
},
{
"epoch": 144.57596095179989,
"grad_norm": 0.6942082047462463,
"learning_rate": 6.426229508196721e-06,
"loss": 0.013,
"step": 29620
},
{
"epoch": 144.67358145210494,
"grad_norm": 0.4934479296207428,
"learning_rate": 6.295081967213115e-06,
"loss": 0.0124,
"step": 29640
},
{
"epoch": 144.77120195241,
"grad_norm": 0.9981206655502319,
"learning_rate": 6.163934426229508e-06,
"loss": 0.013,
"step": 29660
},
{
"epoch": 144.86882245271508,
"grad_norm": 0.5263285636901855,
"learning_rate": 6.032786885245902e-06,
"loss": 0.013,
"step": 29680
},
{
"epoch": 144.96644295302013,
"grad_norm": 0.4131539762020111,
"learning_rate": 5.9016393442622956e-06,
"loss": 0.0132,
"step": 29700
},
{
"epoch": 145.0640634533252,
"grad_norm": 0.9396491646766663,
"learning_rate": 5.770491803278689e-06,
"loss": 0.012,
"step": 29720
},
{
"epoch": 145.16168395363027,
"grad_norm": 0.37081795930862427,
"learning_rate": 5.639344262295082e-06,
"loss": 0.0118,
"step": 29740
},
{
"epoch": 145.25930445393533,
"grad_norm": 0.5653529167175293,
"learning_rate": 5.508196721311476e-06,
"loss": 0.0122,
"step": 29760
},
{
"epoch": 145.35692495424038,
"grad_norm": 0.49712416529655457,
"learning_rate": 5.377049180327869e-06,
"loss": 0.012,
"step": 29780
},
{
"epoch": 145.45454545454547,
"grad_norm": 0.6723568439483643,
"learning_rate": 5.245901639344263e-06,
"loss": 0.0132,
"step": 29800
},
{
"epoch": 145.55216595485052,
"grad_norm": 0.6191849708557129,
"learning_rate": 5.1147540983606555e-06,
"loss": 0.0142,
"step": 29820
},
{
"epoch": 145.64978645515558,
"grad_norm": 0.8201606273651123,
"learning_rate": 4.98360655737705e-06,
"loss": 0.014,
"step": 29840
},
{
"epoch": 145.74740695546063,
"grad_norm": 0.4357975423336029,
"learning_rate": 4.852459016393443e-06,
"loss": 0.0119,
"step": 29860
},
{
"epoch": 145.84502745576572,
"grad_norm": 0.5062920451164246,
"learning_rate": 4.721311475409836e-06,
"loss": 0.0112,
"step": 29880
},
{
"epoch": 145.94264795607077,
"grad_norm": 0.6272954940795898,
"learning_rate": 4.59016393442623e-06,
"loss": 0.0121,
"step": 29900
},
{
"epoch": 146.04026845637583,
"grad_norm": 0.3578208088874817,
"learning_rate": 4.4590163934426235e-06,
"loss": 0.0137,
"step": 29920
},
{
"epoch": 146.1378889566809,
"grad_norm": 0.4044102132320404,
"learning_rate": 4.327868852459017e-06,
"loss": 0.0133,
"step": 29940
},
{
"epoch": 146.23550945698597,
"grad_norm": 0.4162692725658417,
"learning_rate": 4.19672131147541e-06,
"loss": 0.013,
"step": 29960
},
{
"epoch": 146.33312995729102,
"grad_norm": 0.6349827647209167,
"learning_rate": 4.0655737704918034e-06,
"loss": 0.0138,
"step": 29980
},
{
"epoch": 146.4307504575961,
"grad_norm": 0.6992813348770142,
"learning_rate": 3.934426229508197e-06,
"loss": 0.0142,
"step": 30000
}
],
"logging_steps": 20,
"max_steps": 30600,
"num_input_tokens_seen": 0,
"num_train_epochs": 150,
"save_steps": 500,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": false
},
"attributes": {}
}
},
"total_flos": 3.9434336130018816e+17,
"train_batch_size": 1,
"trial_name": null,
"trial_params": null
}