FastLongSpeech / trainer_state.json

Upload folder using huggingface_hub

567160a verified 9 months ago

283 kB

	{
	"best_metric": 0.443807452917099,
	"best_model_checkpoint": "CTCLLMs_self_tokenizer/checkpoints/LongSpeech_CTC-Shrink_augment_data_self_tokenizer_addMLS_projector_restore/checkpoint-30000",
	"epoch": 1.0,
	"eval_steps": 1000,
	"global_step": 31479,
	"is_hyper_param_search": false,
	"is_local_process_zero": true,
	"is_world_process_zero": true,
	"log_history": [
	{
	"epoch": 0.0006353441977191143,
	"grad_norm": 45.06840896606445,
	"learning_rate": 3.597883597883598e-06,
	"loss": 72.1477,
	"step": 20
	},
	{
	"epoch": 0.0012706883954382287,
	"grad_norm": 56.45563507080078,
	"learning_rate": 7.830687830687831e-06,
	"loss": 71.8917,
	"step": 40
	},
	{
	"epoch": 0.001906032593157343,
	"grad_norm": 62.59088897705078,
	"learning_rate": 1.1851851851851853e-05,
	"loss": 71.7764,
	"step": 60
	},
	{
	"epoch": 0.0025413767908764573,
	"grad_norm": 75.64707946777344,
	"learning_rate": 1.6084656084656086e-05,
	"loss": 70.9277,
	"step": 80
	},
	{
	"epoch": 0.003176720988595572,
	"grad_norm": 73.5933837890625,
	"learning_rate": 2.031746031746032e-05,
	"loss": 68.0688,
	"step": 100
	},
	{
	"epoch": 0.003812065186314686,
	"grad_norm": 77.9434814453125,
	"learning_rate": 2.4550264550264552e-05,
	"loss": 65.4844,
	"step": 120
	},
	{
	"epoch": 0.004447409384033801,
	"grad_norm": 81.92144775390625,
	"learning_rate": 2.8783068783068785e-05,
	"loss": 61.2486,
	"step": 140
	},
	{
	"epoch": 0.005082753581752915,
	"grad_norm": 91.82105255126953,
	"learning_rate": 3.3015873015873014e-05,
	"loss": 55.9783,
	"step": 160
	},
	{
	"epoch": 0.005718097779472029,
	"grad_norm": 103.17108917236328,
	"learning_rate": 3.724867724867725e-05,
	"loss": 51.7487,
	"step": 180
	},
	{
	"epoch": 0.006353441977191144,
	"grad_norm": 98.97240447998047,
	"learning_rate": 4.148148148148148e-05,
	"loss": 45.0213,
	"step": 200
	},
	{
	"epoch": 0.006988786174910258,
	"grad_norm": 81.4900894165039,
	"learning_rate": 4.5714285714285716e-05,
	"loss": 38.3125,
	"step": 220
	},
	{
	"epoch": 0.007624130372629372,
	"grad_norm": 71.47420501708984,
	"learning_rate": 4.9947089947089946e-05,
	"loss": 33.2395,
	"step": 240
	},
	{
	"epoch": 0.008259474570348486,
	"grad_norm": 63.618309020996094,
	"learning_rate": 5.417989417989419e-05,
	"loss": 28.4421,
	"step": 260
	},
	{
	"epoch": 0.008894818768067601,
	"grad_norm": 58.004974365234375,
	"learning_rate": 5.841269841269842e-05,
	"loss": 25.048,
	"step": 280
	},
	{
	"epoch": 0.009530162965786714,
	"grad_norm": 46.489200592041016,
	"learning_rate": 6.264550264550265e-05,
	"loss": 21.9312,
	"step": 300
	},
	{
	"epoch": 0.01016550716350583,
	"grad_norm": 37.90148162841797,
	"learning_rate": 6.687830687830688e-05,
	"loss": 19.0696,
	"step": 320
	},
	{
	"epoch": 0.010800851361224944,
	"grad_norm": 36.47368240356445,
	"learning_rate": 7.111111111111112e-05,
	"loss": 17.0151,
	"step": 340
	},
	{
	"epoch": 0.011436195558944057,
	"grad_norm": 32.80181884765625,
	"learning_rate": 7.534391534391536e-05,
	"loss": 15.5522,
	"step": 360
	},
	{
	"epoch": 0.012071539756663172,
	"grad_norm": 25.543760299682617,
	"learning_rate": 7.957671957671958e-05,
	"loss": 14.1982,
	"step": 380
	},
	{
	"epoch": 0.012706883954382287,
	"grad_norm": 22.31871223449707,
	"learning_rate": 8.380952380952382e-05,
	"loss": 13.2314,
	"step": 400
	},
	{
	"epoch": 0.0133422281521014,
	"grad_norm": 18.374950408935547,
	"learning_rate": 8.804232804232805e-05,
	"loss": 12.4637,
	"step": 420
	},
	{
	"epoch": 0.013977572349820515,
	"grad_norm": 18.497610092163086,
	"learning_rate": 9.227513227513229e-05,
	"loss": 11.9765,
	"step": 440
	},
	{
	"epoch": 0.01461291654753963,
	"grad_norm": 14.529912948608398,
	"learning_rate": 9.650793650793651e-05,
	"loss": 11.2678,
	"step": 460
	},
	{
	"epoch": 0.015248260745258743,
	"grad_norm": 12.937056541442871,
	"learning_rate": 0.00010074074074074073,
	"loss": 10.6223,
	"step": 480
	},
	{
	"epoch": 0.015883604942977858,
	"grad_norm": 12.284934043884277,
	"learning_rate": 0.00010497354497354497,
	"loss": 10.189,
	"step": 500
	},
	{
	"epoch": 0.016518949140696973,
	"grad_norm": 9.824132919311523,
	"learning_rate": 0.0001092063492063492,
	"loss": 9.8138,
	"step": 520
	},
	{
	"epoch": 0.017154293338416088,
	"grad_norm": 8.129488945007324,
	"learning_rate": 0.00011343915343915343,
	"loss": 9.4242,
	"step": 540
	},
	{
	"epoch": 0.017789637536135203,
	"grad_norm": 9.27999496459961,
	"learning_rate": 0.00011767195767195766,
	"loss": 9.1365,
	"step": 560
	},
	{
	"epoch": 0.018424981733854314,
	"grad_norm": 5.250537872314453,
	"learning_rate": 0.00012190476190476193,
	"loss": 8.8276,
	"step": 580
	},
	{
	"epoch": 0.01906032593157343,
	"grad_norm": 5.430091381072998,
	"learning_rate": 0.00012613756613756615,
	"loss": 8.5892,
	"step": 600
	},
	{
	"epoch": 0.019695670129292544,
	"grad_norm": 3.3930234909057617,
	"learning_rate": 0.0001303703703703704,
	"loss": 8.3652,
	"step": 620
	},
	{
	"epoch": 0.02033101432701166,
	"grad_norm": 2.841287136077881,
	"learning_rate": 0.00013460317460317462,
	"loss": 8.1527,
	"step": 640
	},
	{
	"epoch": 0.020966358524730774,
	"grad_norm": 2.188707113265991,
	"learning_rate": 0.00013883597883597885,
	"loss": 7.9891,
	"step": 660
	},
	{
	"epoch": 0.02160170272244989,
	"grad_norm": 2.6337716579437256,
	"learning_rate": 0.0001430687830687831,
	"loss": 7.8345,
	"step": 680
	},
	{
	"epoch": 0.022237046920169,
	"grad_norm": 1.7390124797821045,
	"learning_rate": 0.00014730158730158732,
	"loss": 7.6817,
	"step": 700
	},
	{
	"epoch": 0.022872391117888115,
	"grad_norm": 1.6422362327575684,
	"learning_rate": 0.00015153439153439154,
	"loss": 7.5748,
	"step": 720
	},
	{
	"epoch": 0.02350773531560723,
	"grad_norm": 1.6876453161239624,
	"learning_rate": 0.0001557671957671958,
	"loss": 7.3896,
	"step": 740
	},
	{
	"epoch": 0.024143079513326345,
	"grad_norm": 1.230586290359497,
	"learning_rate": 0.00016,
	"loss": 7.3337,
	"step": 760
	},
	{
	"epoch": 0.02477842371104546,
	"grad_norm": 1.2059415578842163,
	"learning_rate": 0.00016423280423280424,
	"loss": 7.2545,
	"step": 780
	},
	{
	"epoch": 0.025413767908764574,
	"grad_norm": 1.5651260614395142,
	"learning_rate": 0.00016846560846560849,
	"loss": 7.1927,
	"step": 800
	},
	{
	"epoch": 0.02604911210648369,
	"grad_norm": 2.234393358230591,
	"learning_rate": 0.0001726984126984127,
	"loss": 7.1617,
	"step": 820
	},
	{
	"epoch": 0.0266844563042028,
	"grad_norm": 1.6703732013702393,
	"learning_rate": 0.00017693121693121696,
	"loss": 7.093,
	"step": 840
	},
	{
	"epoch": 0.027319800501921915,
	"grad_norm": 0.796870231628418,
	"learning_rate": 0.00018116402116402118,
	"loss": 7.0105,
	"step": 860
	},
	{
	"epoch": 0.02795514469964103,
	"grad_norm": 1.0919573307037354,
	"learning_rate": 0.0001853968253968254,
	"loss": 6.9911,
	"step": 880
	},
	{
	"epoch": 0.028590488897360145,
	"grad_norm": 1.3225408792495728,
	"learning_rate": 0.00018962962962962965,
	"loss": 6.9353,
	"step": 900
	},
	{
	"epoch": 0.02922583309507926,
	"grad_norm": 0.9445711970329285,
	"learning_rate": 0.00019386243386243388,
	"loss": 6.9075,
	"step": 920
	},
	{
	"epoch": 0.029861177292798375,
	"grad_norm": 1.0021796226501465,
	"learning_rate": 0.0001980952380952381,
	"loss": 6.8545,
	"step": 940
	},
	{
	"epoch": 0.030496521490517486,
	"grad_norm": 1.147709608078003,
	"learning_rate": 0.00019999993595464,
	"loss": 6.8145,
	"step": 960
	},
	{
	"epoch": 0.0311318656882366,
	"grad_norm": 1.4438824653625488,
	"learning_rate": 0.00019999949134260042,
	"loss": 6.7156,
	"step": 980
	},
	{
	"epoch": 0.031767209885955716,
	"grad_norm": 1.4000093936920166,
	"learning_rate": 0.0001999986232924222,
	"loss": 6.6363,
	"step": 1000
	},
	{
	"epoch": 0.031767209885955716,
	"eval_loss": 6.87591028213501,
	"eval_runtime": 46.4669,
	"eval_samples_per_second": 58.17,
	"eval_steps_per_second": 29.096,
	"step": 1000
	},
	{
	"epoch": 0.03240255408367483,
	"grad_norm": 2.151993989944458,
	"learning_rate": 0.00019999733180778103,
	"loss": 6.5176,
	"step": 1020
	},
	{
	"epoch": 0.033037898281393946,
	"grad_norm": 1.611135721206665,
	"learning_rate": 0.00019999561689414561,
	"loss": 6.4132,
	"step": 1040
	},
	{
	"epoch": 0.03367324247911306,
	"grad_norm": 2.1010184288024902,
	"learning_rate": 0.00019999347855877755,
	"loss": 6.2465,
	"step": 1060
	},
	{
	"epoch": 0.034308586676832176,
	"grad_norm": 1.5021122694015503,
	"learning_rate": 0.0001999909168107314,
	"loss": 6.1662,
	"step": 1080
	},
	{
	"epoch": 0.03494393087455129,
	"grad_norm": 1.4672967195510864,
	"learning_rate": 0.0001999879316608547,
	"loss": 6.0509,
	"step": 1100
	},
	{
	"epoch": 0.035579275072270405,
	"grad_norm": 1.4146413803100586,
	"learning_rate": 0.0001999845231217877,
	"loss": 5.9012,
	"step": 1120
	},
	{
	"epoch": 0.03621461926998951,
	"grad_norm": 1.252382755279541,
	"learning_rate": 0.00019998069120796358,
	"loss": 5.815,
	"step": 1140
	},
	{
	"epoch": 0.03684996346770863,
	"grad_norm": 1.6317933797836304,
	"learning_rate": 0.0001999764359356082,
	"loss": 5.771,
	"step": 1160
	},
	{
	"epoch": 0.03748530766542774,
	"grad_norm": 1.2354493141174316,
	"learning_rate": 0.0001999717573227401,
	"loss": 5.6189,
	"step": 1180
	},
	{
	"epoch": 0.03812065186314686,
	"grad_norm": 1.1442275047302246,
	"learning_rate": 0.0001999666553891704,
	"loss": 5.5078,
	"step": 1200
	},
	{
	"epoch": 0.03875599606086597,
	"grad_norm": 1.3596833944320679,
	"learning_rate": 0.0001999611301565027,
	"loss": 5.4507,
	"step": 1220
	},
	{
	"epoch": 0.03939134025858509,
	"grad_norm": 1.5420782566070557,
	"learning_rate": 0.00019995518164813315,
	"loss": 5.3225,
	"step": 1240
	},
	{
	"epoch": 0.0400266844563042,
	"grad_norm": 2.335935354232788,
	"learning_rate": 0.00019994880988925007,
	"loss": 5.3398,
	"step": 1260
	},
	{
	"epoch": 0.04066202865402332,
	"grad_norm": 1.2030448913574219,
	"learning_rate": 0.00019994201490683406,
	"loss": 5.2367,
	"step": 1280
	},
	{
	"epoch": 0.04129737285174243,
	"grad_norm": 1.1881422996520996,
	"learning_rate": 0.00019993479672965783,
	"loss": 5.2073,
	"step": 1300
	},
	{
	"epoch": 0.04193271704946155,
	"grad_norm": 1.2961896657943726,
	"learning_rate": 0.00019992715538828609,
	"loss": 5.157,
	"step": 1320
	},
	{
	"epoch": 0.04256806124718066,
	"grad_norm": 0.9343932271003723,
	"learning_rate": 0.00019991909091507525,
	"loss": 5.0156,
	"step": 1340
	},
	{
	"epoch": 0.04320340544489978,
	"grad_norm": 0.9654686450958252,
	"learning_rate": 0.00019991060334417364,
	"loss": 5.054,
	"step": 1360
	},
	{
	"epoch": 0.04383874964261889,
	"grad_norm": 1.4537482261657715,
	"learning_rate": 0.00019990169271152098,
	"loss": 4.9824,
	"step": 1380
	},
	{
	"epoch": 0.044474093840338,
	"grad_norm": 1.0155112743377686,
	"learning_rate": 0.00019989235905484853,
	"loss": 4.8496,
	"step": 1400
	},
	{
	"epoch": 0.045109438038057115,
	"grad_norm": 0.8903729915618896,
	"learning_rate": 0.00019988260241367875,
	"loss": 4.8407,
	"step": 1420
	},
	{
	"epoch": 0.04574478223577623,
	"grad_norm": 1.0020333528518677,
	"learning_rate": 0.00019987242282932518,
	"loss": 4.7753,
	"step": 1440
	},
	{
	"epoch": 0.046380126433495344,
	"grad_norm": 1.2074095010757446,
	"learning_rate": 0.0001998618203448923,
	"loss": 4.6939,
	"step": 1460
	},
	{
	"epoch": 0.04701547063121446,
	"grad_norm": 2.5281686782836914,
	"learning_rate": 0.00019985079500527527,
	"loss": 4.6567,
	"step": 1480
	},
	{
	"epoch": 0.047650814828933574,
	"grad_norm": 1.257580280303955,
	"learning_rate": 0.00019983934685715982,
	"loss": 4.5615,
	"step": 1500
	},
	{
	"epoch": 0.04828615902665269,
	"grad_norm": 1.5581581592559814,
	"learning_rate": 0.00019982747594902203,
	"loss": 4.6081,
	"step": 1520
	},
	{
	"epoch": 0.048921503224371804,
	"grad_norm": 1.029440999031067,
	"learning_rate": 0.0001998151823311281,
	"loss": 4.491,
	"step": 1540
	},
	{
	"epoch": 0.04955684742209092,
	"grad_norm": 0.9729529023170471,
	"learning_rate": 0.0001998024660555342,
	"loss": 4.4692,
	"step": 1560
	},
	{
	"epoch": 0.050192191619810034,
	"grad_norm": 1.1230270862579346,
	"learning_rate": 0.00019978932717608613,
	"loss": 4.3839,
	"step": 1580
	},
	{
	"epoch": 0.05082753581752915,
	"grad_norm": 1.048663854598999,
	"learning_rate": 0.0001997757657484192,
	"loss": 4.3907,
	"step": 1600
	},
	{
	"epoch": 0.051462880015248263,
	"grad_norm": 1.2080233097076416,
	"learning_rate": 0.000199761781829958,
	"loss": 4.3147,
	"step": 1620
	},
	{
	"epoch": 0.05209822421296738,
	"grad_norm": 1.1026450395584106,
	"learning_rate": 0.000199747375479916,
	"loss": 4.2496,
	"step": 1640
	},
	{
	"epoch": 0.052733568410686486,
	"grad_norm": 1.037937879562378,
	"learning_rate": 0.00019973254675929554,
	"loss": 4.2614,
	"step": 1660
	},
	{
	"epoch": 0.0533689126084056,
	"grad_norm": 1.1000276803970337,
	"learning_rate": 0.00019971729573088742,
	"loss": 4.1367,
	"step": 1680
	},
	{
	"epoch": 0.054004256806124716,
	"grad_norm": 1.4259387254714966,
	"learning_rate": 0.0001997016224592706,
	"loss": 4.1126,
	"step": 1700
	},
	{
	"epoch": 0.05463960100384383,
	"grad_norm": 1.2918739318847656,
	"learning_rate": 0.00019968552701081203,
	"loss": 4.0945,
	"step": 1720
	},
	{
	"epoch": 0.055274945201562946,
	"grad_norm": 1.0148296356201172,
	"learning_rate": 0.00019966900945366634,
	"loss": 3.9981,
	"step": 1740
	},
	{
	"epoch": 0.05591028939928206,
	"grad_norm": 1.4177788496017456,
	"learning_rate": 0.0001996520698577755,
	"loss": 3.9247,
	"step": 1760
	},
	{
	"epoch": 0.056545633597001176,
	"grad_norm": 1.1384249925613403,
	"learning_rate": 0.00019963470829486858,
	"loss": 3.9204,
	"step": 1780
	},
	{
	"epoch": 0.05718097779472029,
	"grad_norm": 1.2175607681274414,
	"learning_rate": 0.0001996169248384615,
	"loss": 3.9023,
	"step": 1800
	},
	{
	"epoch": 0.057816321992439405,
	"grad_norm": 1.7040660381317139,
	"learning_rate": 0.0001995987195638565,
	"loss": 3.8349,
	"step": 1820
	},
	{
	"epoch": 0.05845166619015852,
	"grad_norm": 1.4229464530944824,
	"learning_rate": 0.0001995800925481421,
	"loss": 3.7969,
	"step": 1840
	},
	{
	"epoch": 0.059087010387877635,
	"grad_norm": 1.1412523984909058,
	"learning_rate": 0.0001995610438701925,
	"loss": 3.6494,
	"step": 1860
	},
	{
	"epoch": 0.05972235458559675,
	"grad_norm": 1.3119606971740723,
	"learning_rate": 0.00019954157361066764,
	"loss": 3.6137,
	"step": 1880
	},
	{
	"epoch": 0.06035769878331586,
	"grad_norm": 1.260469675064087,
	"learning_rate": 0.0001995216818520123,
	"loss": 3.5703,
	"step": 1900
	},
	{
	"epoch": 0.06099304298103497,
	"grad_norm": 1.6222745180130005,
	"learning_rate": 0.00019950136867845627,
	"loss": 3.4526,
	"step": 1920
	},
	{
	"epoch": 0.06162838717875409,
	"grad_norm": 1.399109125137329,
	"learning_rate": 0.00019948063417601369,
	"loss": 3.4467,
	"step": 1940
	},
	{
	"epoch": 0.0622637313764732,
	"grad_norm": 1.1804718971252441,
	"learning_rate": 0.00019945947843248276,
	"loss": 3.3017,
	"step": 1960
	},
	{
	"epoch": 0.06289907557419232,
	"grad_norm": 1.1146492958068848,
	"learning_rate": 0.0001994379015374455,
	"loss": 3.2564,
	"step": 1980
	},
	{
	"epoch": 0.06353441977191143,
	"grad_norm": 1.3201006650924683,
	"learning_rate": 0.00019941590358226713,
	"loss": 3.2076,
	"step": 2000
	},
	{
	"epoch": 0.06353441977191143,
	"eval_loss": 3.1886417865753174,
	"eval_runtime": 45.0925,
	"eval_samples_per_second": 59.943,
	"eval_steps_per_second": 29.983,
	"step": 2000
	},
	{
	"epoch": 0.06416976396963055,
	"grad_norm": 1.4352892637252808,
	"learning_rate": 0.00019939348466009588,
	"loss": 3.1246,
	"step": 2020
	},
	{
	"epoch": 0.06480510816734966,
	"grad_norm": 1.4391227960586548,
	"learning_rate": 0.0001993706448658625,
	"loss": 3.1187,
	"step": 2040
	},
	{
	"epoch": 0.06544045236506878,
	"grad_norm": 1.2951711416244507,
	"learning_rate": 0.0001993473842962798,
	"loss": 3.0175,
	"step": 2060
	},
	{
	"epoch": 0.06607579656278789,
	"grad_norm": 1.559552550315857,
	"learning_rate": 0.00019932370304984255,
	"loss": 2.8894,
	"step": 2080
	},
	{
	"epoch": 0.066711140760507,
	"grad_norm": 1.2822929620742798,
	"learning_rate": 0.00019929960122682655,
	"loss": 2.8483,
	"step": 2100
	},
	{
	"epoch": 0.06734648495822612,
	"grad_norm": 1.4227052927017212,
	"learning_rate": 0.00019927507892928873,
	"loss": 2.8691,
	"step": 2120
	},
	{
	"epoch": 0.06798182915594524,
	"grad_norm": 1.643660306930542,
	"learning_rate": 0.00019925013626106633,
	"loss": 2.8578,
	"step": 2140
	},
	{
	"epoch": 0.06861717335366435,
	"grad_norm": 1.1360414028167725,
	"learning_rate": 0.00019922477332777664,
	"loss": 2.7094,
	"step": 2160
	},
	{
	"epoch": 0.06925251755138347,
	"grad_norm": 1.224853277206421,
	"learning_rate": 0.00019919899023681658,
	"loss": 2.6953,
	"step": 2180
	},
	{
	"epoch": 0.06988786174910258,
	"grad_norm": 1.093682885169983,
	"learning_rate": 0.00019917278709736212,
	"loss": 2.6255,
	"step": 2200
	},
	{
	"epoch": 0.0705232059468217,
	"grad_norm": 1.238864779472351,
	"learning_rate": 0.00019914616402036796,
	"loss": 2.5893,
	"step": 2220
	},
	{
	"epoch": 0.07115855014454081,
	"grad_norm": 1.1016559600830078,
	"learning_rate": 0.00019911912111856688,
	"loss": 2.4743,
	"step": 2240
	},
	{
	"epoch": 0.07179389434225993,
	"grad_norm": 1.12881600856781,
	"learning_rate": 0.00019909165850646941,
	"loss": 2.5057,
	"step": 2260
	},
	{
	"epoch": 0.07242923853997903,
	"grad_norm": 1.216238021850586,
	"learning_rate": 0.00019906377630036338,
	"loss": 2.4624,
	"step": 2280
	},
	{
	"epoch": 0.07306458273769814,
	"grad_norm": 1.1429589986801147,
	"learning_rate": 0.00019903547461831323,
	"loss": 2.3835,
	"step": 2300
	},
	{
	"epoch": 0.07369992693541726,
	"grad_norm": 0.9367678165435791,
	"learning_rate": 0.00019900675358015967,
	"loss": 2.3971,
	"step": 2320
	},
	{
	"epoch": 0.07433527113313637,
	"grad_norm": 1.0869677066802979,
	"learning_rate": 0.00019897761330751922,
	"loss": 2.3241,
	"step": 2340
	},
	{
	"epoch": 0.07497061533085549,
	"grad_norm": 0.958840548992157,
	"learning_rate": 0.0001989480539237835,
	"loss": 2.2828,
	"step": 2360
	},
	{
	"epoch": 0.0756059595285746,
	"grad_norm": 0.9724891781806946,
	"learning_rate": 0.00019891807555411884,
	"loss": 2.2858,
	"step": 2380
	},
	{
	"epoch": 0.07624130372629372,
	"grad_norm": 1.045828104019165,
	"learning_rate": 0.00019888767832546572,
	"loss": 2.2949,
	"step": 2400
	},
	{
	"epoch": 0.07687664792401283,
	"grad_norm": 1.0283712148666382,
	"learning_rate": 0.0001988568623665383,
	"loss": 2.2034,
	"step": 2420
	},
	{
	"epoch": 0.07751199212173195,
	"grad_norm": 1.0930371284484863,
	"learning_rate": 0.00019882562780782376,
	"loss": 2.2283,
	"step": 2440
	},
	{
	"epoch": 0.07814733631945106,
	"grad_norm": 0.892132580280304,
	"learning_rate": 0.00019879397478158177,
	"loss": 2.1872,
	"step": 2460
	},
	{
	"epoch": 0.07878268051717018,
	"grad_norm": 1.0107035636901855,
	"learning_rate": 0.00019876190342184402,
	"loss": 2.1874,
	"step": 2480
	},
	{
	"epoch": 0.07941802471488929,
	"grad_norm": 1.1195555925369263,
	"learning_rate": 0.00019872941386441358,
	"loss": 2.0823,
	"step": 2500
	},
	{
	"epoch": 0.0800533689126084,
	"grad_norm": 1.2803888320922852,
	"learning_rate": 0.0001986965062468643,
	"loss": 2.0905,
	"step": 2520
	},
	{
	"epoch": 0.08068871311032752,
	"grad_norm": 1.0955703258514404,
	"learning_rate": 0.00019866318070854033,
	"loss": 2.0645,
	"step": 2540
	},
	{
	"epoch": 0.08132405730804664,
	"grad_norm": 1.117477297782898,
	"learning_rate": 0.00019862943739055536,
	"loss": 2.0259,
	"step": 2560
	},
	{
	"epoch": 0.08195940150576575,
	"grad_norm": 0.9660820960998535,
	"learning_rate": 0.0001985952764357923,
	"loss": 1.9881,
	"step": 2580
	},
	{
	"epoch": 0.08259474570348486,
	"grad_norm": 0.9186820983886719,
	"learning_rate": 0.0001985606979889023,
	"loss": 1.9571,
	"step": 2600
	},
	{
	"epoch": 0.08323008990120398,
	"grad_norm": 1.1236801147460938,
	"learning_rate": 0.00019852570219630445,
	"loss": 1.9506,
	"step": 2620
	},
	{
	"epoch": 0.0838654340989231,
	"grad_norm": 0.9719575047492981,
	"learning_rate": 0.0001984902892061851,
	"loss": 1.9359,
	"step": 2640
	},
	{
	"epoch": 0.08450077829664221,
	"grad_norm": 1.3401118516921997,
	"learning_rate": 0.00019845445916849704,
	"loss": 1.9707,
	"step": 2660
	},
	{
	"epoch": 0.08513612249436132,
	"grad_norm": 0.980446457862854,
	"learning_rate": 0.00019841821223495916,
	"loss": 1.88,
	"step": 2680
	},
	{
	"epoch": 0.08577146669208044,
	"grad_norm": 1.178143858909607,
	"learning_rate": 0.00019838154855905552,
	"loss": 1.8629,
	"step": 2700
	},
	{
	"epoch": 0.08640681088979955,
	"grad_norm": 0.9232170581817627,
	"learning_rate": 0.00019834446829603494,
	"loss": 1.8467,
	"step": 2720
	},
	{
	"epoch": 0.08704215508751867,
	"grad_norm": 1.7343891859054565,
	"learning_rate": 0.00019830697160291017,
	"loss": 1.8194,
	"step": 2740
	},
	{
	"epoch": 0.08767749928523778,
	"grad_norm": 0.878983199596405,
	"learning_rate": 0.0001982690586384573,
	"loss": 1.8232,
	"step": 2760
	},
	{
	"epoch": 0.0883128434829569,
	"grad_norm": 1.0917317867279053,
	"learning_rate": 0.00019823072956321513,
	"loss": 1.7668,
	"step": 2780
	},
	{
	"epoch": 0.088948187680676,
	"grad_norm": 1.0753387212753296,
	"learning_rate": 0.00019819198453948443,
	"loss": 1.7968,
	"step": 2800
	},
	{
	"epoch": 0.08958353187839511,
	"grad_norm": 1.0904388427734375,
	"learning_rate": 0.00019815282373132718,
	"loss": 1.7834,
	"step": 2820
	},
	{
	"epoch": 0.09021887607611423,
	"grad_norm": 0.9622576236724854,
	"learning_rate": 0.00019811324730456607,
	"loss": 1.7773,
	"step": 2840
	},
	{
	"epoch": 0.09085422027383334,
	"grad_norm": 0.8677240610122681,
	"learning_rate": 0.0001980732554267836,
	"loss": 1.7322,
	"step": 2860
	},
	{
	"epoch": 0.09148956447155246,
	"grad_norm": 1.0953987836837769,
	"learning_rate": 0.0001980328482673215,
	"loss": 1.7123,
	"step": 2880
	},
	{
	"epoch": 0.09212490866927157,
	"grad_norm": 1.0277127027511597,
	"learning_rate": 0.00019799202599727998,
	"loss": 1.7558,
	"step": 2900
	},
	{
	"epoch": 0.09276025286699069,
	"grad_norm": 1.1174383163452148,
	"learning_rate": 0.000197950788789517,
	"loss": 1.7222,
	"step": 2920
	},
	{
	"epoch": 0.0933955970647098,
	"grad_norm": 0.9651451706886292,
	"learning_rate": 0.00019790913681864747,
	"loss": 1.6652,
	"step": 2940
	},
	{
	"epoch": 0.09403094126242892,
	"grad_norm": 0.9669461250305176,
	"learning_rate": 0.00019786707026104265,
	"loss": 1.6381,
	"step": 2960
	},
	{
	"epoch": 0.09466628546014803,
	"grad_norm": 0.9406834244728088,
	"learning_rate": 0.0001978245892948293,
	"loss": 1.6276,
	"step": 2980
	},
	{
	"epoch": 0.09530162965786715,
	"grad_norm": 0.9768303632736206,
	"learning_rate": 0.0001977816940998889,
	"loss": 1.6071,
	"step": 3000
	},
	{
	"epoch": 0.09530162965786715,
	"eval_loss": 1.5878759622573853,
	"eval_runtime": 45.9166,
	"eval_samples_per_second": 58.868,
	"eval_steps_per_second": 29.445,
	"step": 3000
	},
	{
	"epoch": 0.09593697385558626,
	"grad_norm": 1.083208441734314,
	"learning_rate": 0.00019773838485785702,
	"loss": 1.6341,
	"step": 3020
	},
	{
	"epoch": 0.09657231805330538,
	"grad_norm": 0.9333330988883972,
	"learning_rate": 0.00019769466175212244,
	"loss": 1.5931,
	"step": 3040
	},
	{
	"epoch": 0.0972076622510245,
	"grad_norm": 0.9718533754348755,
	"learning_rate": 0.00019765052496782638,
	"loss": 1.5735,
	"step": 3060
	},
	{
	"epoch": 0.09784300644874361,
	"grad_norm": 1.2169800996780396,
	"learning_rate": 0.00019760597469186184,
	"loss": 1.5507,
	"step": 3080
	},
	{
	"epoch": 0.09847835064646272,
	"grad_norm": 0.9822967648506165,
	"learning_rate": 0.00019756101111287257,
	"loss": 1.5784,
	"step": 3100
	},
	{
	"epoch": 0.09911369484418184,
	"grad_norm": 0.9830970168113708,
	"learning_rate": 0.0001975156344212525,
	"loss": 1.5473,
	"step": 3120
	},
	{
	"epoch": 0.09974903904190095,
	"grad_norm": 0.8926035761833191,
	"learning_rate": 0.00019746984480914484,
	"loss": 1.5141,
	"step": 3140
	},
	{
	"epoch": 0.10038438323962007,
	"grad_norm": 0.8814927339553833,
	"learning_rate": 0.00019742364247044125,
	"loss": 1.5164,
	"step": 3160
	},
	{
	"epoch": 0.10101972743733918,
	"grad_norm": 0.8626115322113037,
	"learning_rate": 0.00019737702760078105,
	"loss": 1.4495,
	"step": 3180
	},
	{
	"epoch": 0.1016550716350583,
	"grad_norm": 1.0857669115066528,
	"learning_rate": 0.00019733000039755036,
	"loss": 1.511,
	"step": 3200
	},
	{
	"epoch": 0.10229041583277741,
	"grad_norm": 0.8834457397460938,
	"learning_rate": 0.00019728256105988132,
	"loss": 1.4764,
	"step": 3220
	},
	{
	"epoch": 0.10292576003049653,
	"grad_norm": 0.8241048455238342,
	"learning_rate": 0.00019723470978865118,
	"loss": 1.4253,
	"step": 3240
	},
	{
	"epoch": 0.10356110422821564,
	"grad_norm": 0.9844352006912231,
	"learning_rate": 0.00019718644678648158,
	"loss": 1.4595,
	"step": 3260
	},
	{
	"epoch": 0.10419644842593476,
	"grad_norm": 0.8982945084571838,
	"learning_rate": 0.00019713777225773745,
	"loss": 1.3535,
	"step": 3280
	},
	{
	"epoch": 0.10483179262365386,
	"grad_norm": 1.2204469442367554,
	"learning_rate": 0.0001970886864085263,
	"loss": 1.4283,
	"step": 3300
	},
	{
	"epoch": 0.10546713682137297,
	"grad_norm": 1.0676652193069458,
	"learning_rate": 0.00019703918944669754,
	"loss": 1.3858,
	"step": 3320
	},
	{
	"epoch": 0.10610248101909209,
	"grad_norm": 1.17191743850708,
	"learning_rate": 0.00019698928158184116,
	"loss": 1.4426,
	"step": 3340
	},
	{
	"epoch": 0.1067378252168112,
	"grad_norm": 0.9601316452026367,
	"learning_rate": 0.00019693896302528716,
	"loss": 1.3621,
	"step": 3360
	},
	{
	"epoch": 0.10737316941453032,
	"grad_norm": 0.9755037426948547,
	"learning_rate": 0.00019688823399010463,
	"loss": 1.3901,
	"step": 3380
	},
	{
	"epoch": 0.10800851361224943,
	"grad_norm": 1.0111849308013916,
	"learning_rate": 0.0001968370946911007,
	"loss": 1.3748,
	"step": 3400
	},
	{
	"epoch": 0.10864385780996855,
	"grad_norm": 0.8471179604530334,
	"learning_rate": 0.00019678554534481978,
	"loss": 1.3227,
	"step": 3420
	},
	{
	"epoch": 0.10927920200768766,
	"grad_norm": 0.9206441640853882,
	"learning_rate": 0.0001967335861695426,
	"loss": 1.3493,
	"step": 3440
	},
	{
	"epoch": 0.10991454620540678,
	"grad_norm": 1.055109977722168,
	"learning_rate": 0.0001966812173852852,
	"loss": 1.3549,
	"step": 3460
	},
	{
	"epoch": 0.11054989040312589,
	"grad_norm": 0.995614767074585,
	"learning_rate": 0.00019662843921379816,
	"loss": 1.3468,
	"step": 3480
	},
	{
	"epoch": 0.111185234600845,
	"grad_norm": 0.8873100876808167,
	"learning_rate": 0.0001965752518785655,
	"loss": 1.3129,
	"step": 3500
	},
	{
	"epoch": 0.11182057879856412,
	"grad_norm": 0.9802286624908447,
	"learning_rate": 0.00019652165560480383,
	"loss": 1.34,
	"step": 3520
	},
	{
	"epoch": 0.11245592299628324,
	"grad_norm": 0.9177120923995972,
	"learning_rate": 0.00019646765061946133,
	"loss": 1.3321,
	"step": 3540
	},
	{
	"epoch": 0.11309126719400235,
	"grad_norm": 1.0982646942138672,
	"learning_rate": 0.00019641323715121692,
	"loss": 1.292,
	"step": 3560
	},
	{
	"epoch": 0.11372661139172147,
	"grad_norm": 1.1567240953445435,
	"learning_rate": 0.00019635841543047918,
	"loss": 1.3052,
	"step": 3580
	},
	{
	"epoch": 0.11436195558944058,
	"grad_norm": 0.8516421914100647,
	"learning_rate": 0.00019630318568938528,
	"loss": 1.3189,
	"step": 3600
	},
	{
	"epoch": 0.1149972997871597,
	"grad_norm": 0.9710924029350281,
	"learning_rate": 0.00019624754816180022,
	"loss": 1.2644,
	"step": 3620
	},
	{
	"epoch": 0.11563264398487881,
	"grad_norm": 0.9252649545669556,
	"learning_rate": 0.00019619150308331572,
	"loss": 1.2517,
	"step": 3640
	},
	{
	"epoch": 0.11626798818259793,
	"grad_norm": 0.973948061466217,
	"learning_rate": 0.0001961350506912493,
	"loss": 1.2651,
	"step": 3660
	},
	{
	"epoch": 0.11690333238031704,
	"grad_norm": 0.9078177213668823,
	"learning_rate": 0.000196078191224643,
	"loss": 1.2089,
	"step": 3680
	},
	{
	"epoch": 0.11753867657803616,
	"grad_norm": 0.8456325531005859,
	"learning_rate": 0.0001960209249242628,
	"loss": 1.2503,
	"step": 3700
	},
	{
	"epoch": 0.11817402077575527,
	"grad_norm": 1.2014869451522827,
	"learning_rate": 0.00019596325203259722,
	"loss": 1.2287,
	"step": 3720
	},
	{
	"epoch": 0.11880936497347439,
	"grad_norm": 0.903296172618866,
	"learning_rate": 0.0001959051727938566,
	"loss": 1.1999,
	"step": 3740
	},
	{
	"epoch": 0.1194447091711935,
	"grad_norm": 0.9159349799156189,
	"learning_rate": 0.00019584668745397182,
	"loss": 1.2077,
	"step": 3760
	},
	{
	"epoch": 0.12008005336891261,
	"grad_norm": 1.0457518100738525,
	"learning_rate": 0.00019578779626059332,
	"loss": 1.2395,
	"step": 3780
	},
	{
	"epoch": 0.12071539756663172,
	"grad_norm": 0.8328551650047302,
	"learning_rate": 0.0001957284994630902,
	"loss": 1.2039,
	"step": 3800
	},
	{
	"epoch": 0.12135074176435083,
	"grad_norm": 0.9112881422042847,
	"learning_rate": 0.00019566879731254902,
	"loss": 1.1987,
	"step": 3820
	},
	{
	"epoch": 0.12198608596206995,
	"grad_norm": 2.0256752967834473,
	"learning_rate": 0.00019560869006177262,
	"loss": 1.1923,
	"step": 3840
	},
	{
	"epoch": 0.12262143015978906,
	"grad_norm": 0.9714537262916565,
	"learning_rate": 0.00019554817796527943,
	"loss": 1.1726,
	"step": 3860
	},
	{
	"epoch": 0.12325677435750818,
	"grad_norm": 0.8522310256958008,
	"learning_rate": 0.00019548726127930198,
	"loss": 1.1985,
	"step": 3880
	},
	{
	"epoch": 0.12389211855522729,
	"grad_norm": 0.8728988766670227,
	"learning_rate": 0.00019542594026178612,
	"loss": 1.1662,
	"step": 3900
	},
	{
	"epoch": 0.1245274627529464,
	"grad_norm": 0.9155168533325195,
	"learning_rate": 0.00019536421517238973,
	"loss": 1.1529,
	"step": 3920
	},
	{
	"epoch": 0.12516280695066553,
	"grad_norm": 1.05704665184021,
	"learning_rate": 0.0001953020862724817,
	"loss": 1.1415,
	"step": 3940
	},
	{
	"epoch": 0.12579815114838463,
	"grad_norm": 0.7793872952461243,
	"learning_rate": 0.0001952395538251408,
	"loss": 1.1387,
	"step": 3960
	},
	{
	"epoch": 0.12643349534610376,
	"grad_norm": 0.9358331561088562,
	"learning_rate": 0.00019517661809515465,
	"loss": 1.1816,
	"step": 3980
	},
	{
	"epoch": 0.12706883954382286,
	"grad_norm": 0.8175097107887268,
	"learning_rate": 0.00019511327934901846,
	"loss": 1.126,
	"step": 4000
	},
	{
	"epoch": 0.12706883954382286,
	"eval_loss": 1.1081569194793701,
	"eval_runtime": 128.6977,
	"eval_samples_per_second": 21.003,
	"eval_steps_per_second": 10.505,
	"step": 4000
	},
	{
	"epoch": 0.127704183741542,
	"grad_norm": 0.9568232893943787,
	"learning_rate": 0.000195049537854934,
	"loss": 1.1002,
	"step": 4020
	},
	{
	"epoch": 0.1283395279392611,
	"grad_norm": 0.9011651277542114,
	"learning_rate": 0.00019498539388280848,
	"loss": 1.129,
	"step": 4040
	},
	{
	"epoch": 0.1289748721369802,
	"grad_norm": 1.045811653137207,
	"learning_rate": 0.00019492084770425327,
	"loss": 1.0945,
	"step": 4060
	},
	{
	"epoch": 0.12961021633469932,
	"grad_norm": 0.8668608069419861,
	"learning_rate": 0.00019485589959258292,
	"loss": 1.0601,
	"step": 4080
	},
	{
	"epoch": 0.13024556053241843,
	"grad_norm": 0.9976728558540344,
	"learning_rate": 0.00019479054982281393,
	"loss": 1.1127,
	"step": 4100
	},
	{
	"epoch": 0.13088090473013755,
	"grad_norm": 0.9135074019432068,
	"learning_rate": 0.00019472479867166354,
	"loss": 1.0708,
	"step": 4120
	},
	{
	"epoch": 0.13151624892785665,
	"grad_norm": 0.8302998542785645,
	"learning_rate": 0.0001946586464175486,
	"loss": 1.0925,
	"step": 4140
	},
	{
	"epoch": 0.13215159312557578,
	"grad_norm": 0.9594709277153015,
	"learning_rate": 0.0001945920933405844,
	"loss": 1.0879,
	"step": 4160
	},
	{
	"epoch": 0.13278693732329488,
	"grad_norm": 1.3145122528076172,
	"learning_rate": 0.00019452513972258352,
	"loss": 1.0706,
	"step": 4180
	},
	{
	"epoch": 0.133422281521014,
	"grad_norm": 1.0521440505981445,
	"learning_rate": 0.00019445778584705452,
	"loss": 1.1089,
	"step": 4200
	},
	{
	"epoch": 0.13405762571873311,
	"grad_norm": 1.1046104431152344,
	"learning_rate": 0.00019439003199920088,
	"loss": 1.0965,
	"step": 4220
	},
	{
	"epoch": 0.13469296991645224,
	"grad_norm": 1.1228617429733276,
	"learning_rate": 0.00019432187846591967,
	"loss": 1.0747,
	"step": 4240
	},
	{
	"epoch": 0.13532831411417134,
	"grad_norm": 0.8399156332015991,
	"learning_rate": 0.00019425332553580044,
	"loss": 1.0239,
	"step": 4260
	},
	{
	"epoch": 0.13596365831189047,
	"grad_norm": 0.9118017554283142,
	"learning_rate": 0.00019418437349912385,
	"loss": 1.0557,
	"step": 4280
	},
	{
	"epoch": 0.13659900250960957,
	"grad_norm": 1.1154282093048096,
	"learning_rate": 0.00019411502264786069,
	"loss": 1.0846,
	"step": 4300
	},
	{
	"epoch": 0.1372343467073287,
	"grad_norm": 0.8457648158073425,
	"learning_rate": 0.00019404527327567035,
	"loss": 1.0438,
	"step": 4320
	},
	{
	"epoch": 0.1378696909050478,
	"grad_norm": 0.9336498975753784,
	"learning_rate": 0.0001939751256778998,
	"loss": 1.0403,
	"step": 4340
	},
	{
	"epoch": 0.13850503510276693,
	"grad_norm": 0.9318077564239502,
	"learning_rate": 0.0001939045801515822,
	"loss": 1.0375,
	"step": 4360
	},
	{
	"epoch": 0.13914037930048603,
	"grad_norm": 0.9146689176559448,
	"learning_rate": 0.0001938336369954358,
	"loss": 1.0394,
	"step": 4380
	},
	{
	"epoch": 0.13977572349820516,
	"grad_norm": 1.2244622707366943,
	"learning_rate": 0.00019376229650986245,
	"loss": 1.0305,
	"step": 4400
	},
	{
	"epoch": 0.14041106769592426,
	"grad_norm": 0.9721834659576416,
	"learning_rate": 0.00019369055899694652,
	"loss": 1.0133,
	"step": 4420
	},
	{
	"epoch": 0.1410464118936434,
	"grad_norm": 0.8538774251937866,
	"learning_rate": 0.00019361842476045356,
	"loss": 1.0272,
	"step": 4440
	},
	{
	"epoch": 0.1416817560913625,
	"grad_norm": 0.7733943462371826,
	"learning_rate": 0.000193545894105829,
	"loss": 1.0328,
	"step": 4460
	},
	{
	"epoch": 0.14231710028908162,
	"grad_norm": 1.0937755107879639,
	"learning_rate": 0.00019347296734019683,
	"loss": 1.0501,
	"step": 4480
	},
	{
	"epoch": 0.14295244448680072,
	"grad_norm": 0.8855345845222473,
	"learning_rate": 0.00019339964477235836,
	"loss": 0.9979,
	"step": 4500
	},
	{
	"epoch": 0.14358778868451985,
	"grad_norm": 0.9113184213638306,
	"learning_rate": 0.0001933259267127909,
	"loss": 0.967,
	"step": 4520
	},
	{
	"epoch": 0.14422313288223895,
	"grad_norm": 0.9671328663825989,
	"learning_rate": 0.00019325181347364643,
	"loss": 1.016,
	"step": 4540
	},
	{
	"epoch": 0.14485847707995805,
	"grad_norm": 0.8655368685722351,
	"learning_rate": 0.00019317730536875022,
	"loss": 1.0005,
	"step": 4560
	},
	{
	"epoch": 0.14549382127767718,
	"grad_norm": 0.8673165440559387,
	"learning_rate": 0.00019310240271359967,
	"loss": 0.9697,
	"step": 4580
	},
	{
	"epoch": 0.14612916547539628,
	"grad_norm": 1.0993086099624634,
	"learning_rate": 0.00019302710582536276,
	"loss": 0.9832,
	"step": 4600
	},
	{
	"epoch": 0.1467645096731154,
	"grad_norm": 1.1561827659606934,
	"learning_rate": 0.00019295141502287687,
	"loss": 0.9603,
	"step": 4620
	},
	{
	"epoch": 0.1473998538708345,
	"grad_norm": 1.0052567720413208,
	"learning_rate": 0.00019287533062664733,
	"loss": 0.9808,
	"step": 4640
	},
	{
	"epoch": 0.14803519806855364,
	"grad_norm": 0.9202858209609985,
	"learning_rate": 0.00019279885295884618,
	"loss": 0.9564,
	"step": 4660
	},
	{
	"epoch": 0.14867054226627274,
	"grad_norm": 0.8606549501419067,
	"learning_rate": 0.0001927219823433106,
	"loss": 0.9936,
	"step": 4680
	},
	{
	"epoch": 0.14930588646399187,
	"grad_norm": 0.9188569784164429,
	"learning_rate": 0.00019264471910554183,
	"loss": 0.9833,
	"step": 4700
	},
	{
	"epoch": 0.14994123066171097,
	"grad_norm": 0.7773941159248352,
	"learning_rate": 0.0001925670635727035,
	"loss": 0.9272,
	"step": 4720
	},
	{
	"epoch": 0.1505765748594301,
	"grad_norm": 0.8689327836036682,
	"learning_rate": 0.00019248901607362047,
	"loss": 0.9462,
	"step": 4740
	},
	{
	"epoch": 0.1512119190571492,
	"grad_norm": 0.800255298614502,
	"learning_rate": 0.00019241057693877725,
	"loss": 0.9222,
	"step": 4760
	},
	{
	"epoch": 0.15184726325486833,
	"grad_norm": 0.9326597452163696,
	"learning_rate": 0.0001923317465003168,
	"loss": 0.961,
	"step": 4780
	},
	{
	"epoch": 0.15248260745258743,
	"grad_norm": 1.072416067123413,
	"learning_rate": 0.00019225252509203888,
	"loss": 0.9464,
	"step": 4800
	},
	{
	"epoch": 0.15311795165030656,
	"grad_norm": 0.9187152981758118,
	"learning_rate": 0.0001921729130493989,
	"loss": 0.9461,
	"step": 4820
	},
	{
	"epoch": 0.15375329584802566,
	"grad_norm": 0.8737976551055908,
	"learning_rate": 0.00019209291070950633,
	"loss": 0.8771,
	"step": 4840
	},
	{
	"epoch": 0.1543886400457448,
	"grad_norm": 0.9321054220199585,
	"learning_rate": 0.0001920125184111233,
	"loss": 0.9179,
	"step": 4860
	},
	{
	"epoch": 0.1550239842434639,
	"grad_norm": 0.7673978209495544,
	"learning_rate": 0.00019193173649466322,
	"loss": 0.8711,
	"step": 4880
	},
	{
	"epoch": 0.15565932844118302,
	"grad_norm": 1.0326552391052246,
	"learning_rate": 0.00019185056530218923,
	"loss": 0.9494,
	"step": 4900
	},
	{
	"epoch": 0.15629467263890212,
	"grad_norm": 0.8184536695480347,
	"learning_rate": 0.0001917690051774129,
	"loss": 0.9201,
	"step": 4920
	},
	{
	"epoch": 0.15693001683662125,
	"grad_norm": 0.8319898247718811,
	"learning_rate": 0.0001916870564656926,
	"loss": 0.9167,
	"step": 4940
	},
	{
	"epoch": 0.15756536103434035,
	"grad_norm": 1.0563160181045532,
	"learning_rate": 0.0001916047195140323,
	"loss": 0.8993,
	"step": 4960
	},
	{
	"epoch": 0.15820070523205948,
	"grad_norm": 0.8466194868087769,
	"learning_rate": 0.00019152199467107974,
	"loss": 0.9198,
	"step": 4980
	},
	{
	"epoch": 0.15883604942977858,
	"grad_norm": 1.1115593910217285,
	"learning_rate": 0.00019143888228712527,
	"loss": 0.8749,
	"step": 5000
	},
	{
	"epoch": 0.15883604942977858,
	"eval_loss": 0.8843944668769836,
	"eval_runtime": 127.8707,
	"eval_samples_per_second": 21.139,
	"eval_steps_per_second": 10.573,
	"step": 5000
	},
	{
	"epoch": 0.1594713936274977,
	"grad_norm": 0.9679493308067322,
	"learning_rate": 0.00019135538271410022,
	"loss": 0.9212,
	"step": 5020
	},
	{
	"epoch": 0.1601067378252168,
	"grad_norm": 0.8485816121101379,
	"learning_rate": 0.0001912714963055754,
	"loss": 0.9054,
	"step": 5040
	},
	{
	"epoch": 0.16074208202293594,
	"grad_norm": 1.0210843086242676,
	"learning_rate": 0.0001911872234167597,
	"loss": 0.917,
	"step": 5060
	},
	{
	"epoch": 0.16137742622065504,
	"grad_norm": 1.0072481632232666,
	"learning_rate": 0.00019110256440449844,
	"loss": 0.9014,
	"step": 5080
	},
	{
	"epoch": 0.16201277041837414,
	"grad_norm": 0.9833612442016602,
	"learning_rate": 0.00019101751962727204,
	"loss": 0.891,
	"step": 5100
	},
	{
	"epoch": 0.16264811461609327,
	"grad_norm": 1.0564861297607422,
	"learning_rate": 0.0001909320894451943,
	"loss": 0.8581,
	"step": 5120
	},
	{
	"epoch": 0.16328345881381237,
	"grad_norm": 1.1205075979232788,
	"learning_rate": 0.0001908462742200111,
	"loss": 0.8884,
	"step": 5140
	},
	{
	"epoch": 0.1639188030115315,
	"grad_norm": 0.9841699004173279,
	"learning_rate": 0.0001907600743150986,
	"loss": 0.8815,
	"step": 5160
	},
	{
	"epoch": 0.1645541472092506,
	"grad_norm": 0.852820098400116,
	"learning_rate": 0.00019067349009546197,
	"loss": 0.8594,
	"step": 5180
	},
	{
	"epoch": 0.16518949140696973,
	"grad_norm": 0.8630360960960388,
	"learning_rate": 0.00019058652192773372,
	"loss": 0.8653,
	"step": 5200
	},
	{
	"epoch": 0.16582483560468883,
	"grad_norm": 1.0112591981887817,
	"learning_rate": 0.00019049917018017207,
	"loss": 0.8715,
	"step": 5220
	},
	{
	"epoch": 0.16646017980240796,
	"grad_norm": 0.9182717204093933,
	"learning_rate": 0.00019041143522265948,
	"loss": 0.8875,
	"step": 5240
	},
	{
	"epoch": 0.16709552400012706,
	"grad_norm": 1.190596103668213,
	"learning_rate": 0.0001903233174267012,
	"loss": 0.9027,
	"step": 5260
	},
	{
	"epoch": 0.1677308681978462,
	"grad_norm": 0.8345910310745239,
	"learning_rate": 0.00019023481716542342,
	"loss": 0.8819,
	"step": 5280
	},
	{
	"epoch": 0.1683662123955653,
	"grad_norm": 0.8964826464653015,
	"learning_rate": 0.00019014593481357192,
	"loss": 0.845,
	"step": 5300
	},
	{
	"epoch": 0.16900155659328442,
	"grad_norm": 1.1423965692520142,
	"learning_rate": 0.0001900566707475104,
	"loss": 0.8463,
	"step": 5320
	},
	{
	"epoch": 0.16963690079100352,
	"grad_norm": 0.895899772644043,
	"learning_rate": 0.00018996702534521888,
	"loss": 0.8631,
	"step": 5340
	},
	{
	"epoch": 0.17027224498872265,
	"grad_norm": 1.0254230499267578,
	"learning_rate": 0.00018987699898629208,
	"loss": 0.8489,
	"step": 5360
	},
	{
	"epoch": 0.17090758918644175,
	"grad_norm": 0.9370276927947998,
	"learning_rate": 0.00018978659205193794,
	"loss": 0.8822,
	"step": 5380
	},
	{
	"epoch": 0.17154293338416088,
	"grad_norm": 1.1030024290084839,
	"learning_rate": 0.00018969580492497577,
	"loss": 0.8834,
	"step": 5400
	},
	{
	"epoch": 0.17217827758187998,
	"grad_norm": 0.9148856997489929,
	"learning_rate": 0.00018960463798983494,
	"loss": 0.8198,
	"step": 5420
	},
	{
	"epoch": 0.1728136217795991,
	"grad_norm": 0.8851357102394104,
	"learning_rate": 0.00018951309163255288,
	"loss": 0.8077,
	"step": 5440
	},
	{
	"epoch": 0.1734489659773182,
	"grad_norm": 0.9701651334762573,
	"learning_rate": 0.00018942116624077386,
	"loss": 0.8687,
	"step": 5460
	},
	{
	"epoch": 0.17408431017503734,
	"grad_norm": 0.9508700966835022,
	"learning_rate": 0.00018932886220374696,
	"loss": 0.8764,
	"step": 5480
	},
	{
	"epoch": 0.17471965437275644,
	"grad_norm": 0.9914870858192444,
	"learning_rate": 0.00018923617991232466,
	"loss": 0.8157,
	"step": 5500
	},
	{
	"epoch": 0.17535499857047557,
	"grad_norm": 1.010511040687561,
	"learning_rate": 0.00018914311975896117,
	"loss": 0.839,
	"step": 5520
	},
	{
	"epoch": 0.17599034276819467,
	"grad_norm": 0.8063015937805176,
	"learning_rate": 0.00018904968213771065,
	"loss": 0.8308,
	"step": 5540
	},
	{
	"epoch": 0.1766256869659138,
	"grad_norm": 0.8653827905654907,
	"learning_rate": 0.00018895586744422564,
	"loss": 0.8304,
	"step": 5560
	},
	{
	"epoch": 0.1772610311636329,
	"grad_norm": 1.0596357583999634,
	"learning_rate": 0.00018886167607575532,
	"loss": 0.8346,
	"step": 5580
	},
	{
	"epoch": 0.177896375361352,
	"grad_norm": 1.0251786708831787,
	"learning_rate": 0.00018876710843114398,
	"loss": 0.8639,
	"step": 5600
	},
	{
	"epoch": 0.17853171955907113,
	"grad_norm": 0.8897235989570618,
	"learning_rate": 0.00018867216491082905,
	"loss": 0.8286,
	"step": 5620
	},
	{
	"epoch": 0.17916706375679023,
	"grad_norm": 0.8118072748184204,
	"learning_rate": 0.00018857684591683967,
	"loss": 0.8597,
	"step": 5640
	},
	{
	"epoch": 0.17980240795450936,
	"grad_norm": 0.8698698878288269,
	"learning_rate": 0.0001884811518527949,
	"loss": 0.7894,
	"step": 5660
	},
	{
	"epoch": 0.18043775215222846,
	"grad_norm": 0.8228470087051392,
	"learning_rate": 0.00018838508312390192,
	"loss": 0.8302,
	"step": 5680
	},
	{
	"epoch": 0.1810730963499476,
	"grad_norm": 1.1411319971084595,
	"learning_rate": 0.00018828864013695448,
	"loss": 0.8313,
	"step": 5700
	},
	{
	"epoch": 0.1817084405476667,
	"grad_norm": 0.8076447248458862,
	"learning_rate": 0.00018819182330033103,
	"loss": 0.798,
	"step": 5720
	},
	{
	"epoch": 0.18234378474538582,
	"grad_norm": 0.8669622540473938,
	"learning_rate": 0.00018809463302399304,
	"loss": 0.7911,
	"step": 5740
	},
	{
	"epoch": 0.18297912894310492,
	"grad_norm": 0.8435181975364685,
	"learning_rate": 0.0001879970697194833,
	"loss": 0.7951,
	"step": 5760
	},
	{
	"epoch": 0.18361447314082405,
	"grad_norm": 1.1023324728012085,
	"learning_rate": 0.00018789913379992418,
	"loss": 0.8253,
	"step": 5780
	},
	{
	"epoch": 0.18424981733854315,
	"grad_norm": 0.9319256544113159,
	"learning_rate": 0.00018780082568001585,
	"loss": 0.7625,
	"step": 5800
	},
	{
	"epoch": 0.18488516153626228,
	"grad_norm": 0.8259923458099365,
	"learning_rate": 0.00018770214577603443,
	"loss": 0.8079,
	"step": 5820
	},
	{
	"epoch": 0.18552050573398138,
	"grad_norm": 0.8953514695167542,
	"learning_rate": 0.00018760309450583043,
	"loss": 0.7647,
	"step": 5840
	},
	{
	"epoch": 0.1861558499317005,
	"grad_norm": 0.8347587585449219,
	"learning_rate": 0.00018750367228882685,
	"loss": 0.8089,
	"step": 5860
	},
	{
	"epoch": 0.1867911941294196,
	"grad_norm": 0.9788545966148376,
	"learning_rate": 0.00018740387954601742,
	"loss": 0.7737,
	"step": 5880
	},
	{
	"epoch": 0.18742653832713874,
	"grad_norm": 0.9509750008583069,
	"learning_rate": 0.00018730371669996478,
	"loss": 0.8073,
	"step": 5900
	},
	{
	"epoch": 0.18806188252485784,
	"grad_norm": 0.9388551115989685,
	"learning_rate": 0.0001872031841747988,
	"loss": 0.7585,
	"step": 5920
	},
	{
	"epoch": 0.18869722672257697,
	"grad_norm": 0.8342726826667786,
	"learning_rate": 0.00018710228239621476,
	"loss": 0.8025,
	"step": 5940
	},
	{
	"epoch": 0.18933257092029607,
	"grad_norm": 1.0455151796340942,
	"learning_rate": 0.00018700101179147134,
	"loss": 0.7603,
	"step": 5960
	},
	{
	"epoch": 0.1899679151180152,
	"grad_norm": 0.820931077003479,
	"learning_rate": 0.00018689937278938915,
	"loss": 0.7972,
	"step": 5980
	},
	{
	"epoch": 0.1906032593157343,
	"grad_norm": 0.8494334816932678,
	"learning_rate": 0.00018679736582034867,
	"loss": 0.7663,
	"step": 6000
	},
	{
	"epoch": 0.1906032593157343,
	"eval_loss": 0.7605160474777222,
	"eval_runtime": 45.0866,
	"eval_samples_per_second": 59.951,
	"eval_steps_per_second": 29.987,
	"step": 6000
	},
	{
	"epoch": 0.19123860351345343,
	"grad_norm": 0.9915199279785156,
	"learning_rate": 0.00018669499131628847,
	"loss": 0.7911,
	"step": 6020
	},
	{
	"epoch": 0.19187394771117253,
	"grad_norm": 1.009752869606018,
	"learning_rate": 0.00018659739550293418,
	"loss": 0.7791,
	"step": 6040
	},
	{
	"epoch": 0.19250929190889166,
	"grad_norm": 1.008296012878418,
	"learning_rate": 0.00018649430555384115,
	"loss": 0.7741,
	"step": 6060
	},
	{
	"epoch": 0.19314463610661076,
	"grad_norm": 0.9730678200721741,
	"learning_rate": 0.0001863908493530077,
	"loss": 0.8028,
	"step": 6080
	},
	{
	"epoch": 0.19377998030432986,
	"grad_norm": 0.8386117815971375,
	"learning_rate": 0.0001862870273385091,
	"loss": 0.789,
	"step": 6100
	},
	{
	"epoch": 0.194415324502049,
	"grad_norm": 0.8517867922782898,
	"learning_rate": 0.00018618283994996954,
	"loss": 0.7472,
	"step": 6120
	},
	{
	"epoch": 0.1950506686997681,
	"grad_norm": 0.8791770339012146,
	"learning_rate": 0.00018607828762856046,
	"loss": 0.7871,
	"step": 6140
	},
	{
	"epoch": 0.19568601289748722,
	"grad_norm": 0.9248822331428528,
	"learning_rate": 0.00018597337081699848,
	"loss": 0.762,
	"step": 6160
	},
	{
	"epoch": 0.19632135709520632,
	"grad_norm": 0.8059686422348022,
	"learning_rate": 0.00018586808995954367,
	"loss": 0.7345,
	"step": 6180
	},
	{
	"epoch": 0.19695670129292545,
	"grad_norm": 0.7610188126564026,
	"learning_rate": 0.00018576244550199758,
	"loss": 0.7478,
	"step": 6200
	},
	{
	"epoch": 0.19759204549064455,
	"grad_norm": 0.7763079404830933,
	"learning_rate": 0.00018565643789170144,
	"loss": 0.7552,
	"step": 6220
	},
	{
	"epoch": 0.19822738968836368,
	"grad_norm": 1.1734811067581177,
	"learning_rate": 0.00018555006757753418,
	"loss": 0.7645,
	"step": 6240
	},
	{
	"epoch": 0.19886273388608278,
	"grad_norm": 0.7641186714172363,
	"learning_rate": 0.00018544333500991053,
	"loss": 0.7267,
	"step": 6260
	},
	{
	"epoch": 0.1994980780838019,
	"grad_norm": 0.8322380781173706,
	"learning_rate": 0.00018533624064077922,
	"loss": 0.7601,
	"step": 6280
	},
	{
	"epoch": 0.200133422281521,
	"grad_norm": 0.9059064388275146,
	"learning_rate": 0.00018522878492362096,
	"loss": 0.7716,
	"step": 6300
	},
	{
	"epoch": 0.20076876647924013,
	"grad_norm": 0.7728195786476135,
	"learning_rate": 0.00018512096831344653,
	"loss": 0.7435,
	"step": 6320
	},
	{
	"epoch": 0.20140411067695924,
	"grad_norm": 0.9880885481834412,
	"learning_rate": 0.00018501279126679495,
	"loss": 0.7378,
	"step": 6340
	},
	{
	"epoch": 0.20203945487467836,
	"grad_norm": 0.8192346096038818,
	"learning_rate": 0.00018490425424173138,
	"loss": 0.7376,
	"step": 6360
	},
	{
	"epoch": 0.20267479907239747,
	"grad_norm": 1.175627589225769,
	"learning_rate": 0.0001847953576978453,
	"loss": 0.7672,
	"step": 6380
	},
	{
	"epoch": 0.2033101432701166,
	"grad_norm": 0.7959802746772766,
	"learning_rate": 0.0001846861020962486,
	"loss": 0.7331,
	"step": 6400
	},
	{
	"epoch": 0.2039454874678357,
	"grad_norm": 0.8343777060508728,
	"learning_rate": 0.0001845764878995735,
	"loss": 0.7142,
	"step": 6420
	},
	{
	"epoch": 0.20458083166555482,
	"grad_norm": 0.9900172352790833,
	"learning_rate": 0.00018446651557197066,
	"loss": 0.7819,
	"step": 6440
	},
	{
	"epoch": 0.20521617586327393,
	"grad_norm": 1.111018180847168,
	"learning_rate": 0.00018435618557910725,
	"loss": 0.7226,
	"step": 6460
	},
	{
	"epoch": 0.20585152006099305,
	"grad_norm": 0.9301121830940247,
	"learning_rate": 0.00018424549838816492,
	"loss": 0.7295,
	"step": 6480
	},
	{
	"epoch": 0.20648686425871215,
	"grad_norm": 0.894797146320343,
	"learning_rate": 0.0001841344544678378,
	"loss": 0.7199,
	"step": 6500
	},
	{
	"epoch": 0.20712220845643128,
	"grad_norm": 1.041779637336731,
	"learning_rate": 0.0001840230542883306,
	"loss": 0.7213,
	"step": 6520
	},
	{
	"epoch": 0.20775755265415038,
	"grad_norm": 0.9267428517341614,
	"learning_rate": 0.00018391129832135659,
	"loss": 0.7463,
	"step": 6540
	},
	{
	"epoch": 0.2083928968518695,
	"grad_norm": 0.8043299913406372,
	"learning_rate": 0.00018379918704013556,
	"loss": 0.6909,
	"step": 6560
	},
	{
	"epoch": 0.20902824104958861,
	"grad_norm": 0.8037667870521545,
	"learning_rate": 0.0001836867209193918,
	"loss": 0.7307,
	"step": 6580
	},
	{
	"epoch": 0.20966358524730772,
	"grad_norm": 0.9795257449150085,
	"learning_rate": 0.00018357390043535228,
	"loss": 0.7625,
	"step": 6600
	},
	{
	"epoch": 0.21029892944502684,
	"grad_norm": 1.0763206481933594,
	"learning_rate": 0.0001834607260657443,
	"loss": 0.7457,
	"step": 6620
	},
	{
	"epoch": 0.21093427364274595,
	"grad_norm": 0.8083770275115967,
	"learning_rate": 0.00018334719828979373,
	"loss": 0.7398,
	"step": 6640
	},
	{
	"epoch": 0.21156961784046507,
	"grad_norm": 0.8648799657821655,
	"learning_rate": 0.00018323331758822299,
	"loss": 0.7392,
	"step": 6660
	},
	{
	"epoch": 0.21220496203818418,
	"grad_norm": 1.322874903678894,
	"learning_rate": 0.0001831190844432488,
	"loss": 0.767,
	"step": 6680
	},
	{
	"epoch": 0.2128403062359033,
	"grad_norm": 0.8415853977203369,
	"learning_rate": 0.00018300449933858034,
	"loss": 0.7123,
	"step": 6700
	},
	{
	"epoch": 0.2134756504336224,
	"grad_norm": 0.8832991123199463,
	"learning_rate": 0.00018288956275941713,
	"loss": 0.7329,
	"step": 6720
	},
	{
	"epoch": 0.21411099463134153,
	"grad_norm": 0.8079715967178345,
	"learning_rate": 0.00018277427519244692,
	"loss": 0.6988,
	"step": 6740
	},
	{
	"epoch": 0.21474633882906063,
	"grad_norm": 0.9029518365859985,
	"learning_rate": 0.00018265863712584377,
	"loss": 0.6943,
	"step": 6760
	},
	{
	"epoch": 0.21538168302677976,
	"grad_norm": 0.9082062244415283,
	"learning_rate": 0.0001825426490492658,
	"loss": 0.7517,
	"step": 6780
	},
	{
	"epoch": 0.21601702722449886,
	"grad_norm": 0.9031996726989746,
	"learning_rate": 0.00018242631145385329,
	"loss": 0.7108,
	"step": 6800
	},
	{
	"epoch": 0.216652371422218,
	"grad_norm": 0.9114848375320435,
	"learning_rate": 0.00018230962483222648,
	"loss": 0.7151,
	"step": 6820
	},
	{
	"epoch": 0.2172877156199371,
	"grad_norm": 0.8056477308273315,
	"learning_rate": 0.00018219258967848355,
	"loss": 0.7154,
	"step": 6840
	},
	{
	"epoch": 0.21792305981765622,
	"grad_norm": 0.9029595255851746,
	"learning_rate": 0.0001820752064881985,
	"loss": 0.728,
	"step": 6860
	},
	{
	"epoch": 0.21855840401537532,
	"grad_norm": 0.9304366707801819,
	"learning_rate": 0.00018195747575841905,
	"loss": 0.7298,
	"step": 6880
	},
	{
	"epoch": 0.21919374821309445,
	"grad_norm": 1.2549713850021362,
	"learning_rate": 0.00018183939798766452,
	"loss": 0.7166,
	"step": 6900
	},
	{
	"epoch": 0.21982909241081355,
	"grad_norm": 0.8609549403190613,
	"learning_rate": 0.0001817209736759238,
	"loss": 0.7222,
	"step": 6920
	},
	{
	"epoch": 0.22046443660853268,
	"grad_norm": 0.9668901562690735,
	"learning_rate": 0.00018160220332465315,
	"loss": 0.706,
	"step": 6940
	},
	{
	"epoch": 0.22109978080625178,
	"grad_norm": 0.9426187872886658,
	"learning_rate": 0.00018148308743677407,
	"loss": 0.7549,
	"step": 6960
	},
	{
	"epoch": 0.2217351250039709,
	"grad_norm": 1.0274590253829956,
	"learning_rate": 0.00018136362651667123,
	"loss": 0.7118,
	"step": 6980
	},
	{
	"epoch": 0.22237046920169,
	"grad_norm": 1.0056123733520508,
	"learning_rate": 0.00018124382107019028,
	"loss": 0.7284,
	"step": 7000
	},
	{
	"epoch": 0.22237046920169,
	"eval_loss": 0.6820850968360901,
	"eval_runtime": 44.1137,
	"eval_samples_per_second": 61.274,
	"eval_steps_per_second": 30.648,
	"step": 7000
	},
	{
	"epoch": 0.22300581339940914,
	"grad_norm": 1.01372492313385,
	"learning_rate": 0.0001811236716046358,
	"loss": 0.7306,
	"step": 7020
	},
	{
	"epoch": 0.22364115759712824,
	"grad_norm": 0.8217781782150269,
	"learning_rate": 0.000181003178628769,
	"loss": 0.7216,
	"step": 7040
	},
	{
	"epoch": 0.22427650179484737,
	"grad_norm": 0.9484082460403442,
	"learning_rate": 0.00018088234265280573,
	"loss": 0.7164,
	"step": 7060
	},
	{
	"epoch": 0.22491184599256647,
	"grad_norm": 1.2144994735717773,
	"learning_rate": 0.0001807672312378185,
	"loss": 0.7248,
	"step": 7080
	},
	{
	"epoch": 0.22554719019028557,
	"grad_norm": 0.9574259519577026,
	"learning_rate": 0.00018064572788467363,
	"loss": 0.689,
	"step": 7100
	},
	{
	"epoch": 0.2261825343880047,
	"grad_norm": 0.7626876831054688,
	"learning_rate": 0.00018052998338935085,
	"loss": 0.748,
	"step": 7120
	},
	{
	"epoch": 0.2268178785857238,
	"grad_norm": 0.8534376621246338,
	"learning_rate": 0.00018040781461538648,
	"loss": 0.6947,
	"step": 7140
	},
	{
	"epoch": 0.22745322278344293,
	"grad_norm": 1.0029544830322266,
	"learning_rate": 0.00018028530536233676,
	"loss": 0.7319,
	"step": 7160
	},
	{
	"epoch": 0.22808856698116203,
	"grad_norm": 0.925713300704956,
	"learning_rate": 0.00018016245614895518,
	"loss": 0.7092,
	"step": 7180
	},
	{
	"epoch": 0.22872391117888116,
	"grad_norm": 0.8006899952888489,
	"learning_rate": 0.00018003926749543488,
	"loss": 0.6879,
	"step": 7200
	},
	{
	"epoch": 0.22935925537660026,
	"grad_norm": 0.8886255025863647,
	"learning_rate": 0.00017991573992340616,
	"loss": 0.6784,
	"step": 7220
	},
	{
	"epoch": 0.2299945995743194,
	"grad_norm": 0.8108293414115906,
	"learning_rate": 0.00017979187395593459,
	"loss": 0.7094,
	"step": 7240
	},
	{
	"epoch": 0.2306299437720385,
	"grad_norm": 1.0475900173187256,
	"learning_rate": 0.00017966767011751858,
	"loss": 0.696,
	"step": 7260
	},
	{
	"epoch": 0.23126528796975762,
	"grad_norm": 0.9214044809341431,
	"learning_rate": 0.0001795431289340872,
	"loss": 0.7125,
	"step": 7280
	},
	{
	"epoch": 0.23190063216747672,
	"grad_norm": 0.996101975440979,
	"learning_rate": 0.00017941825093299802,
	"loss": 0.6635,
	"step": 7300
	},
	{
	"epoch": 0.23253597636519585,
	"grad_norm": 0.9577082991600037,
	"learning_rate": 0.00017929303664303482,
	"loss": 0.6753,
	"step": 7320
	},
	{
	"epoch": 0.23317132056291495,
	"grad_norm": 1.0278524160385132,
	"learning_rate": 0.00017916748659440533,
	"loss": 0.7024,
	"step": 7340
	},
	{
	"epoch": 0.23380666476063408,
	"grad_norm": 0.758007287979126,
	"learning_rate": 0.00017904160131873906,
	"loss": 0.6877,
	"step": 7360
	},
	{
	"epoch": 0.23444200895835318,
	"grad_norm": 0.8926889300346375,
	"learning_rate": 0.00017891538134908502,
	"loss": 0.7123,
	"step": 7380
	},
	{
	"epoch": 0.2350773531560723,
	"grad_norm": 0.8747749924659729,
	"learning_rate": 0.00017878882721990936,
	"loss": 0.656,
	"step": 7400
	},
	{
	"epoch": 0.2357126973537914,
	"grad_norm": 1.012324333190918,
	"learning_rate": 0.00017866193946709327,
	"loss": 0.6885,
	"step": 7420
	},
	{
	"epoch": 0.23634804155151054,
	"grad_norm": 0.7973082065582275,
	"learning_rate": 0.00017853471862793068,
	"loss": 0.6627,
	"step": 7440
	},
	{
	"epoch": 0.23698338574922964,
	"grad_norm": 0.8259735107421875,
	"learning_rate": 0.00017840716524112582,
	"loss": 0.6861,
	"step": 7460
	},
	{
	"epoch": 0.23761872994694877,
	"grad_norm": 0.7817295789718628,
	"learning_rate": 0.00017827927984679113,
	"loss": 0.6808,
	"step": 7480
	},
	{
	"epoch": 0.23825407414466787,
	"grad_norm": 0.8139945864677429,
	"learning_rate": 0.00017815106298644495,
	"loss": 0.6891,
	"step": 7500
	},
	{
	"epoch": 0.238889418342387,
	"grad_norm": 1.0507733821868896,
	"learning_rate": 0.00017802251520300906,
	"loss": 0.6936,
	"step": 7520
	},
	{
	"epoch": 0.2395247625401061,
	"grad_norm": 0.929937481880188,
	"learning_rate": 0.0001778936370408066,
	"loss": 0.687,
	"step": 7540
	},
	{
	"epoch": 0.24016010673782523,
	"grad_norm": 1.0632777214050293,
	"learning_rate": 0.00017776442904555962,
	"loss": 0.6656,
	"step": 7560
	},
	{
	"epoch": 0.24079545093554433,
	"grad_norm": 1.1247339248657227,
	"learning_rate": 0.00017763489176438686,
	"loss": 0.6645,
	"step": 7580
	},
	{
	"epoch": 0.24143079513326343,
	"grad_norm": 0.8897901773452759,
	"learning_rate": 0.00017750502574580135,
	"loss": 0.6832,
	"step": 7600
	},
	{
	"epoch": 0.24206613933098256,
	"grad_norm": 0.9285283088684082,
	"learning_rate": 0.00017737483153970816,
	"loss": 0.6841,
	"step": 7620
	},
	{
	"epoch": 0.24270148352870166,
	"grad_norm": 0.8733476400375366,
	"learning_rate": 0.00017724430969740196,
	"loss": 0.6567,
	"step": 7640
	},
	{
	"epoch": 0.2433368277264208,
	"grad_norm": 0.9532790184020996,
	"learning_rate": 0.0001771134607715649,
	"loss": 0.6795,
	"step": 7660
	},
	{
	"epoch": 0.2439721719241399,
	"grad_norm": 1.0881035327911377,
	"learning_rate": 0.00017698228531626398,
	"loss": 0.693,
	"step": 7680
	},
	{
	"epoch": 0.24460751612185902,
	"grad_norm": 1.0936851501464844,
	"learning_rate": 0.00017685078388694897,
	"loss": 0.6852,
	"step": 7700
	},
	{
	"epoch": 0.24524286031957812,
	"grad_norm": 1.0439817905426025,
	"learning_rate": 0.0001767189570404499,
	"loss": 0.6746,
	"step": 7720
	},
	{
	"epoch": 0.24587820451729725,
	"grad_norm": 0.8599082231521606,
	"learning_rate": 0.00017658680533497477,
	"loss": 0.6719,
	"step": 7740
	},
	{
	"epoch": 0.24651354871501635,
	"grad_norm": 0.9633190035820007,
	"learning_rate": 0.00017645432933010712,
	"loss": 0.7091,
	"step": 7760
	},
	{
	"epoch": 0.24714889291273548,
	"grad_norm": 0.8989465236663818,
	"learning_rate": 0.00017632152958680378,
	"loss": 0.6649,
	"step": 7780
	},
	{
	"epoch": 0.24778423711045458,
	"grad_norm": 0.8468721508979797,
	"learning_rate": 0.00017618840666739228,
	"loss": 0.6789,
	"step": 7800
	},
	{
	"epoch": 0.2484195813081737,
	"grad_norm": 0.8482181429862976,
	"learning_rate": 0.00017605496113556882,
	"loss": 0.6902,
	"step": 7820
	},
	{
	"epoch": 0.2490549255058928,
	"grad_norm": 0.8012595176696777,
	"learning_rate": 0.00017592119355639544,
	"loss": 0.6733,
	"step": 7840
	},
	{
	"epoch": 0.24969026970361194,
	"grad_norm": 0.8117650151252747,
	"learning_rate": 0.00017578710449629804,
	"loss": 0.6916,
	"step": 7860
	},
	{
	"epoch": 0.25032561390133107,
	"grad_norm": 0.9711939096450806,
	"learning_rate": 0.00017565269452306364,
	"loss": 0.6701,
	"step": 7880
	},
	{
	"epoch": 0.25096095809905017,
	"grad_norm": 0.8234876394271851,
	"learning_rate": 0.00017551796420583833,
	"loss": 0.62,
	"step": 7900
	},
	{
	"epoch": 0.25159630229676927,
	"grad_norm": 0.8263707756996155,
	"learning_rate": 0.00017538967420545803,
	"loss": 0.6907,
	"step": 7920
	},
	{
	"epoch": 0.25223164649448837,
	"grad_norm": 1.2548505067825317,
	"learning_rate": 0.00017525432085959138,
	"loss": 0.6644,
	"step": 7940
	},
	{
	"epoch": 0.2528669906922075,
	"grad_norm": 1.1948567628860474,
	"learning_rate": 0.00017511864885660835,
	"loss": 0.6609,
	"step": 7960
	},
	{
	"epoch": 0.25350233488992663,
	"grad_norm": 0.9310169219970703,
	"learning_rate": 0.0001749826587709989,
	"loss": 0.6757,
	"step": 7980
	},
	{
	"epoch": 0.25413767908764573,
	"grad_norm": 0.8832531571388245,
	"learning_rate": 0.00017484635117859983,
	"loss": 0.6552,
	"step": 8000
	},
	{
	"epoch": 0.25413767908764573,
	"eval_loss": 0.6333429217338562,
	"eval_runtime": 44.406,
	"eval_samples_per_second": 60.87,
	"eval_steps_per_second": 30.446,
	"step": 8000
	},
	{
	"epoch": 0.25477302328536483,
	"grad_norm": 0.7624004483222961,
	"learning_rate": 0.00017470972665659245,
	"loss": 0.6567,
	"step": 8020
	},
	{
	"epoch": 0.255408367483084,
	"grad_norm": 0.9134401082992554,
	"learning_rate": 0.00017457278578350002,
	"loss": 0.6681,
	"step": 8040
	},
	{
	"epoch": 0.2560437116808031,
	"grad_norm": 0.9597674608230591,
	"learning_rate": 0.00017443552913918534,
	"loss": 0.6818,
	"step": 8060
	},
	{
	"epoch": 0.2566790558785222,
	"grad_norm": 0.961934506893158,
	"learning_rate": 0.00017429795730484836,
	"loss": 0.6833,
	"step": 8080
	},
	{
	"epoch": 0.2573144000762413,
	"grad_norm": 0.9118033647537231,
	"learning_rate": 0.00017416007086302367,
	"loss": 0.6607,
	"step": 8100
	},
	{
	"epoch": 0.2579497442739604,
	"grad_norm": 0.8447214961051941,
	"learning_rate": 0.00017402187039757805,
	"loss": 0.6409,
	"step": 8120
	},
	{
	"epoch": 0.25858508847167955,
	"grad_norm": 1.010040044784546,
	"learning_rate": 0.0001738833564937079,
	"loss": 0.6761,
	"step": 8140
	},
	{
	"epoch": 0.25922043266939865,
	"grad_norm": 0.8686466217041016,
	"learning_rate": 0.00017374452973793693,
	"loss": 0.6575,
	"step": 8160
	},
	{
	"epoch": 0.25985577686711775,
	"grad_norm": 1.0445839166641235,
	"learning_rate": 0.00017360539071811356,
	"loss": 0.667,
	"step": 8180
	},
	{
	"epoch": 0.26049112106483685,
	"grad_norm": 1.1015607118606567,
	"learning_rate": 0.00017346594002340843,
	"loss": 0.6468,
	"step": 8200
	},
	{
	"epoch": 0.261126465262556,
	"grad_norm": 1.4550483226776123,
	"learning_rate": 0.00017332617824431204,
	"loss": 0.6642,
	"step": 8220
	},
	{
	"epoch": 0.2617618094602751,
	"grad_norm": 0.8968580961227417,
	"learning_rate": 0.000173186105972632,
	"loss": 0.6695,
	"step": 8240
	},
	{
	"epoch": 0.2623971536579942,
	"grad_norm": 0.9802786111831665,
	"learning_rate": 0.00017304572380149078,
	"loss": 0.6516,
	"step": 8260
	},
	{
	"epoch": 0.2630324978557133,
	"grad_norm": 0.8785617351531982,
	"learning_rate": 0.00017290503232532305,
	"loss": 0.6857,
	"step": 8280
	},
	{
	"epoch": 0.26366784205343247,
	"grad_norm": 0.8675135970115662,
	"learning_rate": 0.00017276403213987323,
	"loss": 0.6493,
	"step": 8300
	},
	{
	"epoch": 0.26430318625115157,
	"grad_norm": 0.8159687519073486,
	"learning_rate": 0.0001726227238421929,
	"loss": 0.6445,
	"step": 8320
	},
	{
	"epoch": 0.26493853044887067,
	"grad_norm": 0.8598359823226929,
	"learning_rate": 0.00017248110803063833,
	"loss": 0.6515,
	"step": 8340
	},
	{
	"epoch": 0.26557387464658977,
	"grad_norm": 1.0304324626922607,
	"learning_rate": 0.00017233918530486792,
	"loss": 0.6431,
	"step": 8360
	},
	{
	"epoch": 0.2662092188443089,
	"grad_norm": 0.933110773563385,
	"learning_rate": 0.0001722040749834389,
	"loss": 0.6958,
	"step": 8380
	},
	{
	"epoch": 0.266844563042028,
	"grad_norm": 0.9690568447113037,
	"learning_rate": 0.0001720615555046345,
	"loss": 0.5922,
	"step": 8400
	},
	{
	"epoch": 0.26747990723974713,
	"grad_norm": 0.9293822646141052,
	"learning_rate": 0.0001719187308881687,
	"loss": 0.6407,
	"step": 8420
	},
	{
	"epoch": 0.26811525143746623,
	"grad_norm": 0.8957870602607727,
	"learning_rate": 0.00017177560173881846,
	"loss": 0.662,
	"step": 8440
	},
	{
	"epoch": 0.2687505956351854,
	"grad_norm": 1.0288225412368774,
	"learning_rate": 0.0001716321686626503,
	"loss": 0.6395,
	"step": 8460
	},
	{
	"epoch": 0.2693859398329045,
	"grad_norm": 0.838657021522522,
	"learning_rate": 0.00017148843226701764,
	"loss": 0.6313,
	"step": 8480
	},
	{
	"epoch": 0.2700212840306236,
	"grad_norm": 0.8575971722602844,
	"learning_rate": 0.00017134439316055834,
	"loss": 0.6655,
	"step": 8500
	},
	{
	"epoch": 0.2706566282283427,
	"grad_norm": 0.9840354919433594,
	"learning_rate": 0.00017120005195319195,
	"loss": 0.6646,
	"step": 8520
	},
	{
	"epoch": 0.27129197242606184,
	"grad_norm": 0.8279704451560974,
	"learning_rate": 0.00017105540925611737,
	"loss": 0.6259,
	"step": 8540
	},
	{
	"epoch": 0.27192731662378095,
	"grad_norm": 1.0609900951385498,
	"learning_rate": 0.00017091046568180996,
	"loss": 0.6561,
	"step": 8560
	},
	{
	"epoch": 0.27256266082150005,
	"grad_norm": 0.890514612197876,
	"learning_rate": 0.0001707652218440193,
	"loss": 0.6324,
	"step": 8580
	},
	{
	"epoch": 0.27319800501921915,
	"grad_norm": 0.9357948303222656,
	"learning_rate": 0.0001706196783577663,
	"loss": 0.6116,
	"step": 8600
	},
	{
	"epoch": 0.27383334921693825,
	"grad_norm": 0.9577456116676331,
	"learning_rate": 0.0001704738358393407,
	"loss": 0.6764,
	"step": 8620
	},
	{
	"epoch": 0.2744686934146574,
	"grad_norm": 0.834900438785553,
	"learning_rate": 0.0001703276949062985,
	"loss": 0.6324,
	"step": 8640
	},
	{
	"epoch": 0.2751040376123765,
	"grad_norm": 0.8283354043960571,
	"learning_rate": 0.00017018125617745933,
	"loss": 0.6187,
	"step": 8660
	},
	{
	"epoch": 0.2757393818100956,
	"grad_norm": 0.854200541973114,
	"learning_rate": 0.00017003452027290373,
	"loss": 0.6294,
	"step": 8680
	},
	{
	"epoch": 0.2763747260078147,
	"grad_norm": 0.8695046901702881,
	"learning_rate": 0.00016988748781397064,
	"loss": 0.6377,
	"step": 8700
	},
	{
	"epoch": 0.27701007020553386,
	"grad_norm": 0.7802212238311768,
	"learning_rate": 0.00016974015942325475,
	"loss": 0.6051,
	"step": 8720
	},
	{
	"epoch": 0.27764541440325297,
	"grad_norm": 1.0842890739440918,
	"learning_rate": 0.00016959253572460382,
	"loss": 0.6352,
	"step": 8740
	},
	{
	"epoch": 0.27828075860097207,
	"grad_norm": 0.8472367525100708,
	"learning_rate": 0.0001694446173431161,
	"loss": 0.5907,
	"step": 8760
	},
	{
	"epoch": 0.27891610279869117,
	"grad_norm": 0.8548029661178589,
	"learning_rate": 0.0001692964049051376,
	"loss": 0.6434,
	"step": 8780
	},
	{
	"epoch": 0.2795514469964103,
	"grad_norm": 0.9771581888198853,
	"learning_rate": 0.00016914789903825945,
	"loss": 0.6381,
	"step": 8800
	},
	{
	"epoch": 0.2801867911941294,
	"grad_norm": 0.9199798703193665,
	"learning_rate": 0.0001689991003713154,
	"loss": 0.6589,
	"step": 8820
	},
	{
	"epoch": 0.2808221353918485,
	"grad_norm": 1.0753369331359863,
	"learning_rate": 0.00016885000953437894,
	"loss": 0.6413,
	"step": 8840
	},
	{
	"epoch": 0.2814574795895676,
	"grad_norm": 1.0925753116607666,
	"learning_rate": 0.00016870062715876075,
	"loss": 0.6234,
	"step": 8860
	},
	{
	"epoch": 0.2820928237872868,
	"grad_norm": 1.0023586750030518,
	"learning_rate": 0.00016855095387700598,
	"loss": 0.6104,
	"step": 8880
	},
	{
	"epoch": 0.2827281679850059,
	"grad_norm": 0.9077417254447937,
	"learning_rate": 0.00016840099032289162,
	"loss": 0.602,
	"step": 8900
	},
	{
	"epoch": 0.283363512182725,
	"grad_norm": 0.8238940238952637,
	"learning_rate": 0.00016825073713142374,
	"loss": 0.6157,
	"step": 8920
	},
	{
	"epoch": 0.2839988563804441,
	"grad_norm": 1.111948847770691,
	"learning_rate": 0.000168100194938835,
	"loss": 0.6092,
	"step": 8940
	},
	{
	"epoch": 0.28463420057816324,
	"grad_norm": 1.0630967617034912,
	"learning_rate": 0.0001679493643825816,
	"loss": 0.5904,
	"step": 8960
	},
	{
	"epoch": 0.28526954477588234,
	"grad_norm": 0.8827186822891235,
	"learning_rate": 0.00016779824610134092,
	"loss": 0.6166,
	"step": 8980
	},
	{
	"epoch": 0.28590488897360145,
	"grad_norm": 0.9229192137718201,
	"learning_rate": 0.00016764684073500866,
	"loss": 0.6178,
	"step": 9000
	},
	{
	"epoch": 0.28590488897360145,
	"eval_loss": 0.5966877341270447,
	"eval_runtime": 44.6044,
	"eval_samples_per_second": 60.599,
	"eval_steps_per_second": 30.311,
	"step": 9000
	},
	{
	"epoch": 0.28654023317132055,
	"grad_norm": 0.8136707544326782,
	"learning_rate": 0.00016749514892469615,
	"loss": 0.6366,
	"step": 9020
	},
	{
	"epoch": 0.2871755773690397,
	"grad_norm": 0.8175415992736816,
	"learning_rate": 0.00016734317131272762,
	"loss": 0.6177,
	"step": 9040
	},
	{
	"epoch": 0.2878109215667588,
	"grad_norm": 0.929182767868042,
	"learning_rate": 0.00016719090854263753,
	"loss": 0.646,
	"step": 9060
	},
	{
	"epoch": 0.2884462657644779,
	"grad_norm": 0.9779849052429199,
	"learning_rate": 0.0001670383612591678,
	"loss": 0.6362,
	"step": 9080
	},
	{
	"epoch": 0.289081609962197,
	"grad_norm": 0.8542407751083374,
	"learning_rate": 0.00016688553010826506,
	"loss": 0.6076,
	"step": 9100
	},
	{
	"epoch": 0.2897169541599161,
	"grad_norm": 0.8885607719421387,
	"learning_rate": 0.00016673241573707804,
	"loss": 0.6055,
	"step": 9120
	},
	{
	"epoch": 0.29035229835763526,
	"grad_norm": 0.876097559928894,
	"learning_rate": 0.0001665790187939546,
	"loss": 0.6196,
	"step": 9140
	},
	{
	"epoch": 0.29098764255535436,
	"grad_norm": 1.0198227167129517,
	"learning_rate": 0.0001664253399284393,
	"loss": 0.6374,
	"step": 9160
	},
	{
	"epoch": 0.29162298675307347,
	"grad_norm": 0.8938513994216919,
	"learning_rate": 0.00016627137979127033,
	"loss": 0.6254,
	"step": 9180
	},
	{
	"epoch": 0.29225833095079257,
	"grad_norm": 0.7427443861961365,
	"learning_rate": 0.00016611713903437692,
	"loss": 0.6099,
	"step": 9200
	},
	{
	"epoch": 0.2928936751485117,
	"grad_norm": 0.9959378242492676,
	"learning_rate": 0.00016596261831087661,
	"loss": 0.648,
	"step": 9220
	},
	{
	"epoch": 0.2935290193462308,
	"grad_norm": 1.048519253730774,
	"learning_rate": 0.00016580781827507242,
	"loss": 0.6292,
	"step": 9240
	},
	{
	"epoch": 0.2941643635439499,
	"grad_norm": 0.858858585357666,
	"learning_rate": 0.00016565273958245002,
	"loss": 0.6252,
	"step": 9260
	},
	{
	"epoch": 0.294799707741669,
	"grad_norm": 0.8437022566795349,
	"learning_rate": 0.00016549738288967514,
	"loss": 0.6188,
	"step": 9280
	},
	{
	"epoch": 0.2954350519393882,
	"grad_norm": 0.8608834743499756,
	"learning_rate": 0.00016534174885459056,
	"loss": 0.6509,
	"step": 9300
	},
	{
	"epoch": 0.2960703961371073,
	"grad_norm": 1.083897590637207,
	"learning_rate": 0.00016518583813621357,
	"loss": 0.6193,
	"step": 9320
	},
	{
	"epoch": 0.2967057403348264,
	"grad_norm": 0.9606235027313232,
	"learning_rate": 0.0001650296513947329,
	"loss": 0.6287,
	"step": 9340
	},
	{
	"epoch": 0.2973410845325455,
	"grad_norm": 1.0519804954528809,
	"learning_rate": 0.00016487318929150617,
	"loss": 0.6097,
	"step": 9360
	},
	{
	"epoch": 0.29797642873026464,
	"grad_norm": 1.3490453958511353,
	"learning_rate": 0.000164716452489057,
	"loss": 0.6043,
	"step": 9380
	},
	{
	"epoch": 0.29861177292798374,
	"grad_norm": 1.1292142868041992,
	"learning_rate": 0.00016455944165107207,
	"loss": 0.5896,
	"step": 9400
	},
	{
	"epoch": 0.29924711712570284,
	"grad_norm": 0.9570278525352478,
	"learning_rate": 0.00016440215744239865,
	"loss": 0.6087,
	"step": 9420
	},
	{
	"epoch": 0.29988246132342194,
	"grad_norm": 0.8570756316184998,
	"learning_rate": 0.00016424460052904137,
	"loss": 0.6036,
	"step": 9440
	},
	{
	"epoch": 0.3005178055211411,
	"grad_norm": 0.9214951395988464,
	"learning_rate": 0.00016408677157815974,
	"loss": 0.6519,
	"step": 9460
	},
	{
	"epoch": 0.3011531497188602,
	"grad_norm": 1.1580623388290405,
	"learning_rate": 0.00016392867125806504,
	"loss": 0.5991,
	"step": 9480
	},
	{
	"epoch": 0.3017884939165793,
	"grad_norm": 1.1025846004486084,
	"learning_rate": 0.00016377030023821782,
	"loss": 0.6416,
	"step": 9500
	},
	{
	"epoch": 0.3024238381142984,
	"grad_norm": 0.8918984532356262,
	"learning_rate": 0.00016361165918922477,
	"loss": 0.6165,
	"step": 9520
	},
	{
	"epoch": 0.30305918231201756,
	"grad_norm": 0.8747968673706055,
	"learning_rate": 0.000163452748782836,
	"loss": 0.6094,
	"step": 9540
	},
	{
	"epoch": 0.30369452650973666,
	"grad_norm": 0.7480270862579346,
	"learning_rate": 0.0001632935696919422,
	"loss": 0.5987,
	"step": 9560
	},
	{
	"epoch": 0.30432987070745576,
	"grad_norm": 0.8854328393936157,
	"learning_rate": 0.00016313412259057178,
	"loss": 0.6514,
	"step": 9580
	},
	{
	"epoch": 0.30496521490517486,
	"grad_norm": 1.0659030675888062,
	"learning_rate": 0.00016297440815388802,
	"loss": 0.5796,
	"step": 9600
	},
	{
	"epoch": 0.305600559102894,
	"grad_norm": 0.9668769240379333,
	"learning_rate": 0.00016281442705818618,
	"loss": 0.6147,
	"step": 9620
	},
	{
	"epoch": 0.3062359033006131,
	"grad_norm": 0.939028263092041,
	"learning_rate": 0.00016265417998089068,
	"loss": 0.6241,
	"step": 9640
	},
	{
	"epoch": 0.3068712474983322,
	"grad_norm": 0.8955005407333374,
	"learning_rate": 0.00016249366760055222,
	"loss": 0.5832,
	"step": 9660
	},
	{
	"epoch": 0.3075065916960513,
	"grad_norm": 0.7991370558738708,
	"learning_rate": 0.00016233289059684492,
	"loss": 0.5799,
	"step": 9680
	},
	{
	"epoch": 0.3081419358937704,
	"grad_norm": 0.8115846514701843,
	"learning_rate": 0.00016217184965056336,
	"loss": 0.6109,
	"step": 9700
	},
	{
	"epoch": 0.3087772800914896,
	"grad_norm": 0.7488042712211609,
	"learning_rate": 0.00016201054544361977,
	"loss": 0.6166,
	"step": 9720
	},
	{
	"epoch": 0.3094126242892087,
	"grad_norm": 0.8463062644004822,
	"learning_rate": 0.00016184897865904123,
	"loss": 0.5779,
	"step": 9740
	},
	{
	"epoch": 0.3100479684869278,
	"grad_norm": 1.083001732826233,
	"learning_rate": 0.00016168714998096654,
	"loss": 0.6175,
	"step": 9760
	},
	{
	"epoch": 0.3106833126846469,
	"grad_norm": 0.8545092940330505,
	"learning_rate": 0.00016152506009464357,
	"loss": 0.6104,
	"step": 9780
	},
	{
	"epoch": 0.31131865688236604,
	"grad_norm": 0.9297589063644409,
	"learning_rate": 0.00016136270968642618,
	"loss": 0.5831,
	"step": 9800
	},
	{
	"epoch": 0.31195400108008514,
	"grad_norm": 0.7775977253913879,
	"learning_rate": 0.0001612000994437714,
	"loss": 0.6001,
	"step": 9820
	},
	{
	"epoch": 0.31258934527780424,
	"grad_norm": 0.943267822265625,
	"learning_rate": 0.0001610372300552366,
	"loss": 0.6089,
	"step": 9840
	},
	{
	"epoch": 0.31322468947552334,
	"grad_norm": 0.8398995399475098,
	"learning_rate": 0.0001608741022104763,
	"loss": 0.5929,
	"step": 9860
	},
	{
	"epoch": 0.3138600336732425,
	"grad_norm": 1.0078269243240356,
	"learning_rate": 0.00016071071660023954,
	"loss": 0.6215,
	"step": 9880
	},
	{
	"epoch": 0.3144953778709616,
	"grad_norm": 0.9710105657577515,
	"learning_rate": 0.0001605470739163669,
	"loss": 0.5983,
	"step": 9900
	},
	{
	"epoch": 0.3151307220686807,
	"grad_norm": 0.8864800333976746,
	"learning_rate": 0.00016038317485178734,
	"loss": 0.5812,
	"step": 9920
	},
	{
	"epoch": 0.3157660662663998,
	"grad_norm": 0.9775105118751526,
	"learning_rate": 0.0001602190201005156,
	"loss": 0.5899,
	"step": 9940
	},
	{
	"epoch": 0.31640141046411896,
	"grad_norm": 0.8554601669311523,
	"learning_rate": 0.00016005461035764902,
	"loss": 0.5989,
	"step": 9960
	},
	{
	"epoch": 0.31703675466183806,
	"grad_norm": 0.8149896264076233,
	"learning_rate": 0.0001598899463193647,
	"loss": 0.6383,
	"step": 9980
	},
	{
	"epoch": 0.31767209885955716,
	"grad_norm": 1.1985602378845215,
	"learning_rate": 0.00015972502868291652,
	"loss": 0.604,
	"step": 10000
	},
	{
	"epoch": 0.31767209885955716,
	"eval_loss": 0.5633410811424255,
	"eval_runtime": 44.2566,
	"eval_samples_per_second": 61.076,
	"eval_steps_per_second": 30.549,
	"step": 10000
	},
	{
	"epoch": 0.31830744305727626,
	"grad_norm": 0.9848890900611877,
	"learning_rate": 0.0001595598581466322,
	"loss": 0.5741,
	"step": 10020
	},
	{
	"epoch": 0.3189427872549954,
	"grad_norm": 1.0653225183486938,
	"learning_rate": 0.00015939443540991034,
	"loss": 0.6154,
	"step": 10040
	},
	{
	"epoch": 0.3195781314527145,
	"grad_norm": 0.8440039157867432,
	"learning_rate": 0.0001592287611732175,
	"loss": 0.6077,
	"step": 10060
	},
	{
	"epoch": 0.3202134756504336,
	"grad_norm": 0.8706631660461426,
	"learning_rate": 0.00015906283613808508,
	"loss": 0.6143,
	"step": 10080
	},
	{
	"epoch": 0.3208488198481527,
	"grad_norm": 1.0338808298110962,
	"learning_rate": 0.00015889666100710659,
	"loss": 0.5697,
	"step": 10100
	},
	{
	"epoch": 0.3214841640458719,
	"grad_norm": 0.8499680757522583,
	"learning_rate": 0.00015873023648393448,
	"loss": 0.5968,
	"step": 10120
	},
	{
	"epoch": 0.322119508243591,
	"grad_norm": 1.0106873512268066,
	"learning_rate": 0.00015856356327327724,
	"loss": 0.5657,
	"step": 10140
	},
	{
	"epoch": 0.3227548524413101,
	"grad_norm": 0.9771645665168762,
	"learning_rate": 0.00015839664208089634,
	"loss": 0.5989,
	"step": 10160
	},
	{
	"epoch": 0.3233901966390292,
	"grad_norm": 0.9425153136253357,
	"learning_rate": 0.0001582294736136035,
	"loss": 0.6314,
	"step": 10180
	},
	{
	"epoch": 0.3240255408367483,
	"grad_norm": 1.1419885158538818,
	"learning_rate": 0.0001580620585792572,
	"loss": 0.6137,
	"step": 10200
	},
	{
	"epoch": 0.32466088503446744,
	"grad_norm": 0.8356417417526245,
	"learning_rate": 0.00015789439768676032,
	"loss": 0.6189,
	"step": 10220
	},
	{
	"epoch": 0.32529622923218654,
	"grad_norm": 0.9876666069030762,
	"learning_rate": 0.00015772649164605648,
	"loss": 0.6069,
	"step": 10240
	},
	{
	"epoch": 0.32593157342990564,
	"grad_norm": 1.0510075092315674,
	"learning_rate": 0.0001575583411681276,
	"loss": 0.5996,
	"step": 10260
	},
	{
	"epoch": 0.32656691762762474,
	"grad_norm": 0.91109299659729,
	"learning_rate": 0.00015738994696499055,
	"loss": 0.5996,
	"step": 10280
	},
	{
	"epoch": 0.3272022618253439,
	"grad_norm": 0.8995181322097778,
	"learning_rate": 0.00015722130974969421,
	"loss": 0.5798,
	"step": 10300
	},
	{
	"epoch": 0.327837606023063,
	"grad_norm": 1.1067475080490112,
	"learning_rate": 0.00015705243023631652,
	"loss": 0.5983,
	"step": 10320
	},
	{
	"epoch": 0.3284729502207821,
	"grad_norm": 1.0324633121490479,
	"learning_rate": 0.00015688330913996135,
	"loss": 0.6011,
	"step": 10340
	},
	{
	"epoch": 0.3291082944185012,
	"grad_norm": 1.0662481784820557,
	"learning_rate": 0.0001567139471767556,
	"loss": 0.6254,
	"step": 10360
	},
	{
	"epoch": 0.32974363861622036,
	"grad_norm": 0.9539555907249451,
	"learning_rate": 0.00015654434506384607,
	"loss": 0.6176,
	"step": 10380
	},
	{
	"epoch": 0.33037898281393946,
	"grad_norm": 0.7341588139533997,
	"learning_rate": 0.00015637450351939637,
	"loss": 0.5852,
	"step": 10400
	},
	{
	"epoch": 0.33101432701165856,
	"grad_norm": 0.9077139496803284,
	"learning_rate": 0.00015620442326258414,
	"loss": 0.609,
	"step": 10420
	},
	{
	"epoch": 0.33164967120937766,
	"grad_norm": 1.083999752998352,
	"learning_rate": 0.00015603410501359766,
	"loss": 0.5768,
	"step": 10440
	},
	{
	"epoch": 0.3322850154070968,
	"grad_norm": 0.9190422296524048,
	"learning_rate": 0.000155863549493633,
	"loss": 0.5845,
	"step": 10460
	},
	{
	"epoch": 0.3329203596048159,
	"grad_norm": 1.0731889009475708,
	"learning_rate": 0.000155692757424891,
	"loss": 0.5988,
	"step": 10480
	},
	{
	"epoch": 0.333555703802535,
	"grad_norm": 0.9898316264152527,
	"learning_rate": 0.00015552172953057407,
	"loss": 0.5918,
	"step": 10500
	},
	{
	"epoch": 0.3341910480002541,
	"grad_norm": 1.135695219039917,
	"learning_rate": 0.00015535046653488322,
	"loss": 0.5882,
	"step": 10520
	},
	{
	"epoch": 0.3348263921979733,
	"grad_norm": 1.0453022718429565,
	"learning_rate": 0.000155178969163015,
	"loss": 0.609,
	"step": 10540
	},
	{
	"epoch": 0.3354617363956924,
	"grad_norm": 0.9859703183174133,
	"learning_rate": 0.00015500723814115835,
	"loss": 0.5899,
	"step": 10560
	},
	{
	"epoch": 0.3360970805934115,
	"grad_norm": 1.031168818473816,
	"learning_rate": 0.00015483527419649163,
	"loss": 0.5987,
	"step": 10580
	},
	{
	"epoch": 0.3367324247911306,
	"grad_norm": 1.1591908931732178,
	"learning_rate": 0.00015466307805717951,
	"loss": 0.6191,
	"step": 10600
	},
	{
	"epoch": 0.33736776898884974,
	"grad_norm": 0.8246921896934509,
	"learning_rate": 0.00015449065045236977,
	"loss": 0.6098,
	"step": 10620
	},
	{
	"epoch": 0.33800311318656884,
	"grad_norm": 0.8392571210861206,
	"learning_rate": 0.0001543179921121904,
	"loss": 0.5675,
	"step": 10640
	},
	{
	"epoch": 0.33863845738428794,
	"grad_norm": 0.8678343892097473,
	"learning_rate": 0.00015414510376774633,
	"loss": 0.5721,
	"step": 10660
	},
	{
	"epoch": 0.33927380158200704,
	"grad_norm": 0.8436061143875122,
	"learning_rate": 0.00015397198615111653,
	"loss": 0.5703,
	"step": 10680
	},
	{
	"epoch": 0.33990914577972614,
	"grad_norm": 0.9926438927650452,
	"learning_rate": 0.00015379863999535074,
	"loss": 0.6049,
	"step": 10700
	},
	{
	"epoch": 0.3405444899774453,
	"grad_norm": 1.098764419555664,
	"learning_rate": 0.00015362506603446637,
	"loss": 0.6007,
	"step": 10720
	},
	{
	"epoch": 0.3411798341751644,
	"grad_norm": 1.052038311958313,
	"learning_rate": 0.00015345126500344554,
	"loss": 0.5865,
	"step": 10740
	},
	{
	"epoch": 0.3418151783728835,
	"grad_norm": 0.8772541880607605,
	"learning_rate": 0.00015327723763823188,
	"loss": 0.6066,
	"step": 10760
	},
	{
	"epoch": 0.3424505225706026,
	"grad_norm": 0.7938296794891357,
	"learning_rate": 0.00015310298467572733,
	"loss": 0.5467,
	"step": 10780
	},
	{
	"epoch": 0.34308586676832176,
	"grad_norm": 1.0938440561294556,
	"learning_rate": 0.00015292850685378915,
	"loss": 0.5916,
	"step": 10800
	},
	{
	"epoch": 0.34372121096604086,
	"grad_norm": 0.8460657000541687,
	"learning_rate": 0.00015275380491122672,
	"loss": 0.603,
	"step": 10820
	},
	{
	"epoch": 0.34435655516375996,
	"grad_norm": 0.8238389492034912,
	"learning_rate": 0.00015257887958779854,
	"loss": 0.5808,
	"step": 10840
	},
	{
	"epoch": 0.34499189936147906,
	"grad_norm": 0.8064368367195129,
	"learning_rate": 0.0001524037316242088,
	"loss": 0.5862,
	"step": 10860
	},
	{
	"epoch": 0.3456272435591982,
	"grad_norm": 1.2068203687667847,
	"learning_rate": 0.00015222836176210467,
	"loss": 0.5694,
	"step": 10880
	},
	{
	"epoch": 0.3462625877569173,
	"grad_norm": 0.9752914309501648,
	"learning_rate": 0.00015205277074407266,
	"loss": 0.5367,
	"step": 10900
	},
	{
	"epoch": 0.3468979319546364,
	"grad_norm": 0.9989959597587585,
	"learning_rate": 0.00015187695931363602,
	"loss": 0.5712,
	"step": 10920
	},
	{
	"epoch": 0.3475332761523555,
	"grad_norm": 0.8734492659568787,
	"learning_rate": 0.00015170092821525114,
	"loss": 0.6029,
	"step": 10940
	},
	{
	"epoch": 0.3481686203500747,
	"grad_norm": 0.8759735822677612,
	"learning_rate": 0.00015152467819430458,
	"loss": 0.5676,
	"step": 10960
	},
	{
	"epoch": 0.3488039645477938,
	"grad_norm": 0.8554444909095764,
	"learning_rate": 0.00015134820999711,
	"loss": 0.5664,
	"step": 10980
	},
	{
	"epoch": 0.3494393087455129,
	"grad_norm": 0.730451762676239,
	"learning_rate": 0.00015117152437090482,
	"loss": 0.5735,
	"step": 11000
	},
	{
	"epoch": 0.3494393087455129,
	"eval_loss": 0.5449489951133728,
	"eval_runtime": 44.9152,
	"eval_samples_per_second": 60.18,
	"eval_steps_per_second": 30.101,
	"step": 11000
	},
	{
	"epoch": 0.350074652943232,
	"grad_norm": 0.7964712381362915,
	"learning_rate": 0.00015099462206384718,
	"loss": 0.5943,
	"step": 11020
	},
	{
	"epoch": 0.35070999714095114,
	"grad_norm": 0.809177577495575,
	"learning_rate": 0.00015081750382501277,
	"loss": 0.5986,
	"step": 11040
	},
	{
	"epoch": 0.35134534133867024,
	"grad_norm": 0.9207815527915955,
	"learning_rate": 0.00015064017040439148,
	"loss": 0.559,
	"step": 11060
	},
	{
	"epoch": 0.35198068553638934,
	"grad_norm": 0.9813947677612305,
	"learning_rate": 0.0001504626225528845,
	"loss": 0.5529,
	"step": 11080
	},
	{
	"epoch": 0.35261602973410844,
	"grad_norm": 0.9409967660903931,
	"learning_rate": 0.00015028486102230105,
	"loss": 0.5725,
	"step": 11100
	},
	{
	"epoch": 0.3532513739318276,
	"grad_norm": 0.9317089319229126,
	"learning_rate": 0.000150106886565355,
	"loss": 0.5568,
	"step": 11120
	},
	{
	"epoch": 0.3538867181295467,
	"grad_norm": 1.025341510772705,
	"learning_rate": 0.00014992869993566194,
	"loss": 0.5555,
	"step": 11140
	},
	{
	"epoch": 0.3545220623272658,
	"grad_norm": 1.0014809370040894,
	"learning_rate": 0.00014975030188773585,
	"loss": 0.5922,
	"step": 11160
	},
	{
	"epoch": 0.3551574065249849,
	"grad_norm": 0.9769735336303711,
	"learning_rate": 0.00014957169317698593,
	"loss": 0.583,
	"step": 11180
	},
	{
	"epoch": 0.355792750722704,
	"grad_norm": 0.8555041551589966,
	"learning_rate": 0.0001493928745597134,
	"loss": 0.5609,
	"step": 11200
	},
	{
	"epoch": 0.35642809492042316,
	"grad_norm": 0.9463367462158203,
	"learning_rate": 0.0001492138467931084,
	"loss": 0.5783,
	"step": 11220
	},
	{
	"epoch": 0.35706343911814226,
	"grad_norm": 0.9429970979690552,
	"learning_rate": 0.00014903461063524661,
	"loss": 0.5934,
	"step": 11240
	},
	{
	"epoch": 0.35769878331586136,
	"grad_norm": 1.4683854579925537,
	"learning_rate": 0.00014885516684508612,
	"loss": 0.5939,
	"step": 11260
	},
	{
	"epoch": 0.35833412751358046,
	"grad_norm": 0.825720489025116,
	"learning_rate": 0.00014867551618246428,
	"loss": 0.5685,
	"step": 11280
	},
	{
	"epoch": 0.3589694717112996,
	"grad_norm": 1.001832127571106,
	"learning_rate": 0.00014849565940809432,
	"loss": 0.5837,
	"step": 11300
	},
	{
	"epoch": 0.3596048159090187,
	"grad_norm": 0.9406988024711609,
	"learning_rate": 0.00014831559728356234,
	"loss": 0.5864,
	"step": 11320
	},
	{
	"epoch": 0.3602401601067378,
	"grad_norm": 0.7483388185501099,
	"learning_rate": 0.00014813533057132393,
	"loss": 0.5991,
	"step": 11340
	},
	{
	"epoch": 0.3608755043044569,
	"grad_norm": 0.8849460482597351,
	"learning_rate": 0.00014795486003470093,
	"loss": 0.5821,
	"step": 11360
	},
	{
	"epoch": 0.3615108485021761,
	"grad_norm": 0.7930045127868652,
	"learning_rate": 0.00014777418643787836,
	"loss": 0.5395,
	"step": 11380
	},
	{
	"epoch": 0.3621461926998952,
	"grad_norm": 0.9285226464271545,
	"learning_rate": 0.000147593310545901,
	"loss": 0.5713,
	"step": 11400
	},
	{
	"epoch": 0.3627815368976143,
	"grad_norm": 1.0233609676361084,
	"learning_rate": 0.00014741223312467026,
	"loss": 0.5875,
	"step": 11420
	},
	{
	"epoch": 0.3634168810953334,
	"grad_norm": 1.033948302268982,
	"learning_rate": 0.00014723095494094092,
	"loss": 0.5993,
	"step": 11440
	},
	{
	"epoch": 0.36405222529305253,
	"grad_norm": 0.9479451179504395,
	"learning_rate": 0.00014704947676231784,
	"loss": 0.571,
	"step": 11460
	},
	{
	"epoch": 0.36468756949077163,
	"grad_norm": 0.7781844735145569,
	"learning_rate": 0.0001468677993572528,
	"loss": 0.5503,
	"step": 11480
	},
	{
	"epoch": 0.36532291368849074,
	"grad_norm": 0.9249241352081299,
	"learning_rate": 0.00014668592349504101,
	"loss": 0.574,
	"step": 11500
	},
	{
	"epoch": 0.36595825788620984,
	"grad_norm": 0.9108446836471558,
	"learning_rate": 0.00014650384994581824,
	"loss": 0.557,
	"step": 11520
	},
	{
	"epoch": 0.366593602083929,
	"grad_norm": 1.0099608898162842,
	"learning_rate": 0.0001463215794805573,
	"loss": 0.5605,
	"step": 11540
	},
	{
	"epoch": 0.3672289462816481,
	"grad_norm": 0.8376953601837158,
	"learning_rate": 0.00014613911287106467,
	"loss": 0.538,
	"step": 11560
	},
	{
	"epoch": 0.3678642904793672,
	"grad_norm": 0.8893873691558838,
	"learning_rate": 0.00014595645088997757,
	"loss": 0.5606,
	"step": 11580
	},
	{
	"epoch": 0.3684996346770863,
	"grad_norm": 1.1310006380081177,
	"learning_rate": 0.00014577359431076046,
	"loss": 0.5612,
	"step": 11600
	},
	{
	"epoch": 0.36913497887480545,
	"grad_norm": 0.8577033281326294,
	"learning_rate": 0.00014559054390770167,
	"loss": 0.5688,
	"step": 11620
	},
	{
	"epoch": 0.36977032307252455,
	"grad_norm": 0.9386855959892273,
	"learning_rate": 0.00014540730045591044,
	"loss": 0.5614,
	"step": 11640
	},
	{
	"epoch": 0.37040566727024365,
	"grad_norm": 0.9492216110229492,
	"learning_rate": 0.00014522386473131332,
	"loss": 0.5878,
	"step": 11660
	},
	{
	"epoch": 0.37104101146796276,
	"grad_norm": 0.853327751159668,
	"learning_rate": 0.00014504023751065115,
	"loss": 0.5568,
	"step": 11680
	},
	{
	"epoch": 0.37167635566568186,
	"grad_norm": 0.7977784872055054,
	"learning_rate": 0.00014485641957147553,
	"loss": 0.5428,
	"step": 11700
	},
	{
	"epoch": 0.372311699863401,
	"grad_norm": 1.1006829738616943,
	"learning_rate": 0.00014467241169214567,
	"loss": 0.559,
	"step": 11720
	},
	{
	"epoch": 0.3729470440611201,
	"grad_norm": 1.08724045753479,
	"learning_rate": 0.0001444882146518251,
	"loss": 0.5642,
	"step": 11740
	},
	{
	"epoch": 0.3735823882588392,
	"grad_norm": 1.0295459032058716,
	"learning_rate": 0.00014430382923047831,
	"loss": 0.5969,
	"step": 11760
	},
	{
	"epoch": 0.3742177324565583,
	"grad_norm": 1.1096023321151733,
	"learning_rate": 0.00014411925620886742,
	"loss": 0.5678,
	"step": 11780
	},
	{
	"epoch": 0.3748530766542775,
	"grad_norm": 0.9315259456634521,
	"learning_rate": 0.000143934496368549,
	"loss": 0.5728,
	"step": 11800
	},
	{
	"epoch": 0.3754884208519966,
	"grad_norm": 0.9581449031829834,
	"learning_rate": 0.00014374955049187066,
	"loss": 0.5485,
	"step": 11820
	},
	{
	"epoch": 0.3761237650497157,
	"grad_norm": 1.472161054611206,
	"learning_rate": 0.00014356441936196776,
	"loss": 0.5931,
	"step": 11840
	},
	{
	"epoch": 0.3767591092474348,
	"grad_norm": 1.0234733819961548,
	"learning_rate": 0.00014337910376276011,
	"loss": 0.5635,
	"step": 11860
	},
	{
	"epoch": 0.37739445344515393,
	"grad_norm": 0.9299212694168091,
	"learning_rate": 0.00014319360447894862,
	"loss": 0.5802,
	"step": 11880
	},
	{
	"epoch": 0.37802979764287303,
	"grad_norm": 0.853388786315918,
	"learning_rate": 0.00014300792229601198,
	"loss": 0.5645,
	"step": 11900
	},
	{
	"epoch": 0.37866514184059213,
	"grad_norm": 0.9909472465515137,
	"learning_rate": 0.0001428220580002034,
	"loss": 0.5451,
	"step": 11920
	},
	{
	"epoch": 0.37930048603831124,
	"grad_norm": 0.8121063113212585,
	"learning_rate": 0.00014263601237854716,
	"loss": 0.5514,
	"step": 11940
	},
	{
	"epoch": 0.3799358302360304,
	"grad_norm": 0.9053930044174194,
	"learning_rate": 0.00014244978621883543,
	"loss": 0.5371,
	"step": 11960
	},
	{
	"epoch": 0.3805711744337495,
	"grad_norm": 1.0551111698150635,
	"learning_rate": 0.00014226338030962475,
	"loss": 0.5862,
	"step": 11980
	},
	{
	"epoch": 0.3812065186314686,
	"grad_norm": 0.8897386193275452,
	"learning_rate": 0.0001420767954402329,
	"loss": 0.5439,
	"step": 12000
	},
	{
	"epoch": 0.3812065186314686,
	"eval_loss": 0.5259391665458679,
	"eval_runtime": 45.0289,
	"eval_samples_per_second": 60.028,
	"eval_steps_per_second": 30.025,
	"step": 12000
	},
	{
	"epoch": 0.3818418628291877,
	"grad_norm": 0.8436812162399292,
	"learning_rate": 0.00014189003240073535,
	"loss": 0.5684,
	"step": 12020
	},
	{
	"epoch": 0.38247720702690685,
	"grad_norm": 1.2769359350204468,
	"learning_rate": 0.0001417030919819621,
	"loss": 0.5483,
	"step": 12040
	},
	{
	"epoch": 0.38311255122462595,
	"grad_norm": 0.8915470838546753,
	"learning_rate": 0.0001415159749754942,
	"loss": 0.5674,
	"step": 12060
	},
	{
	"epoch": 0.38374789542234505,
	"grad_norm": 1.1026362180709839,
	"learning_rate": 0.00014132868217366044,
	"loss": 0.5868,
	"step": 12080
	},
	{
	"epoch": 0.38438323962006415,
	"grad_norm": 0.92413729429245,
	"learning_rate": 0.00014114121436953402,
	"loss": 0.5602,
	"step": 12100
	},
	{
	"epoch": 0.3850185838177833,
	"grad_norm": 0.8880215287208557,
	"learning_rate": 0.0001409535723569291,
	"loss": 0.563,
	"step": 12120
	},
	{
	"epoch": 0.3856539280155024,
	"grad_norm": 0.7865646481513977,
	"learning_rate": 0.00014076575693039767,
	"loss": 0.5731,
	"step": 12140
	},
	{
	"epoch": 0.3862892722132215,
	"grad_norm": 0.8817760348320007,
	"learning_rate": 0.00014057776888522583,
	"loss": 0.5205,
	"step": 12160
	},
	{
	"epoch": 0.3869246164109406,
	"grad_norm": 0.7473212480545044,
	"learning_rate": 0.0001403896090174307,
	"loss": 0.5494,
	"step": 12180
	},
	{
	"epoch": 0.3875599606086597,
	"grad_norm": 0.9429736137390137,
	"learning_rate": 0.0001402012781237571,
	"loss": 0.551,
	"step": 12200
	},
	{
	"epoch": 0.38819530480637887,
	"grad_norm": 0.9144492149353027,
	"learning_rate": 0.00014001277700167382,
	"loss": 0.529,
	"step": 12220
	},
	{
	"epoch": 0.388830649004098,
	"grad_norm": 0.8465405702590942,
	"learning_rate": 0.00013982410644937057,
	"loss": 0.566,
	"step": 12240
	},
	{
	"epoch": 0.3894659932018171,
	"grad_norm": 0.8520842790603638,
	"learning_rate": 0.00013963526726575446,
	"loss": 0.61,
	"step": 12260
	},
	{
	"epoch": 0.3901013373995362,
	"grad_norm": 0.8384197354316711,
	"learning_rate": 0.00013944626025044673,
	"loss": 0.563,
	"step": 12280
	},
	{
	"epoch": 0.39073668159725533,
	"grad_norm": 0.9083155989646912,
	"learning_rate": 0.00013925708620377927,
	"loss": 0.5433,
	"step": 12300
	},
	{
	"epoch": 0.39137202579497443,
	"grad_norm": 1.0582692623138428,
	"learning_rate": 0.00013906774592679116,
	"loss": 0.5368,
	"step": 12320
	},
	{
	"epoch": 0.39200736999269353,
	"grad_norm": 0.8538171648979187,
	"learning_rate": 0.00013887824022122537,
	"loss": 0.5217,
	"step": 12340
	},
	{
	"epoch": 0.39264271419041263,
	"grad_norm": 0.8264597058296204,
	"learning_rate": 0.00013868856988952556,
	"loss": 0.5564,
	"step": 12360
	},
	{
	"epoch": 0.3932780583881318,
	"grad_norm": 0.8192921280860901,
	"learning_rate": 0.00013849873573483222,
	"loss": 0.6058,
	"step": 12380
	},
	{
	"epoch": 0.3939134025858509,
	"grad_norm": 0.8523415923118591,
	"learning_rate": 0.00013830873856097964,
	"loss": 0.5565,
	"step": 12400
	},
	{
	"epoch": 0.39454874678357,
	"grad_norm": 1.0821831226348877,
	"learning_rate": 0.00013811857917249253,
	"loss": 0.5617,
	"step": 12420
	},
	{
	"epoch": 0.3951840909812891,
	"grad_norm": 0.8053098917007446,
	"learning_rate": 0.00013792825837458225,
	"loss": 0.579,
	"step": 12440
	},
	{
	"epoch": 0.39581943517900825,
	"grad_norm": 0.9511120319366455,
	"learning_rate": 0.00013773777697314378,
	"loss": 0.5417,
	"step": 12460
	},
	{
	"epoch": 0.39645477937672735,
	"grad_norm": 1.0273131132125854,
	"learning_rate": 0.00013754713577475213,
	"loss": 0.582,
	"step": 12480
	},
	{
	"epoch": 0.39709012357444645,
	"grad_norm": 1.0347099304199219,
	"learning_rate": 0.00013735633558665893,
	"loss": 0.5679,
	"step": 12500
	},
	{
	"epoch": 0.39772546777216555,
	"grad_norm": 1.0762611627578735,
	"learning_rate": 0.00013716537721678907,
	"loss": 0.5483,
	"step": 12520
	},
	{
	"epoch": 0.3983608119698847,
	"grad_norm": 1.4243688583374023,
	"learning_rate": 0.00013697426147373721,
	"loss": 0.5558,
	"step": 12540
	},
	{
	"epoch": 0.3989961561676038,
	"grad_norm": 0.7539466023445129,
	"learning_rate": 0.00013678298916676445,
	"loss": 0.5404,
	"step": 12560
	},
	{
	"epoch": 0.3996315003653229,
	"grad_norm": 0.7736854553222656,
	"learning_rate": 0.00013659156110579476,
	"loss": 0.5578,
	"step": 12580
	},
	{
	"epoch": 0.400266844563042,
	"grad_norm": 0.9489171504974365,
	"learning_rate": 0.0001363999781014117,
	"loss": 0.5668,
	"step": 12600
	},
	{
	"epoch": 0.40090218876076117,
	"grad_norm": 0.9692643880844116,
	"learning_rate": 0.00013621783146979094,
	"loss": 0.5663,
	"step": 12620
	},
	{
	"epoch": 0.40153753295848027,
	"grad_norm": 1.0705336332321167,
	"learning_rate": 0.00013602594865967435,
	"loss": 0.5293,
	"step": 12640
	},
	{
	"epoch": 0.40217287715619937,
	"grad_norm": 1.0149205923080444,
	"learning_rate": 0.00013583391330117533,
	"loss": 0.5348,
	"step": 12660
	},
	{
	"epoch": 0.40280822135391847,
	"grad_norm": 0.9088581204414368,
	"learning_rate": 0.00013564172620744906,
	"loss": 0.5677,
	"step": 12680
	},
	{
	"epoch": 0.4034435655516376,
	"grad_norm": 1.1513986587524414,
	"learning_rate": 0.00013544938819229306,
	"loss": 0.569,
	"step": 12700
	},
	{
	"epoch": 0.40407890974935673,
	"grad_norm": 0.8725998401641846,
	"learning_rate": 0.00013525690007014406,
	"loss": 0.5692,
	"step": 12720
	},
	{
	"epoch": 0.40471425394707583,
	"grad_norm": 1.0663046836853027,
	"learning_rate": 0.00013506426265607425,
	"loss": 0.567,
	"step": 12740
	},
	{
	"epoch": 0.40534959814479493,
	"grad_norm": 0.9139559864997864,
	"learning_rate": 0.00013487147676578812,
	"loss": 0.5465,
	"step": 12760
	},
	{
	"epoch": 0.40598494234251403,
	"grad_norm": 1.3140777349472046,
	"learning_rate": 0.00013467854321561878,
	"loss": 0.5407,
	"step": 12780
	},
	{
	"epoch": 0.4066202865402332,
	"grad_norm": 0.8671903610229492,
	"learning_rate": 0.00013448546282252458,
	"loss": 0.5303,
	"step": 12800
	},
	{
	"epoch": 0.4072556307379523,
	"grad_norm": 0.692545473575592,
	"learning_rate": 0.00013429223640408578,
	"loss": 0.5333,
	"step": 12820
	},
	{
	"epoch": 0.4078909749356714,
	"grad_norm": 1.1087654829025269,
	"learning_rate": 0.00013409886477850087,
	"loss": 0.5493,
	"step": 12840
	},
	{
	"epoch": 0.4085263191333905,
	"grad_norm": 0.9659181833267212,
	"learning_rate": 0.00013390534876458319,
	"loss": 0.5902,
	"step": 12860
	},
	{
	"epoch": 0.40916166333110965,
	"grad_norm": 0.7794270515441895,
	"learning_rate": 0.00013371168918175754,
	"loss": 0.5647,
	"step": 12880
	},
	{
	"epoch": 0.40979700752882875,
	"grad_norm": 0.910505473613739,
	"learning_rate": 0.00013351788685005662,
	"loss": 0.5752,
	"step": 12900
	},
	{
	"epoch": 0.41043235172654785,
	"grad_norm": 0.9549837112426758,
	"learning_rate": 0.00013332394259011758,
	"loss": 0.5424,
	"step": 12920
	},
	{
	"epoch": 0.41106769592426695,
	"grad_norm": 1.2679826021194458,
	"learning_rate": 0.00013312985722317862,
	"loss": 0.5285,
	"step": 12940
	},
	{
	"epoch": 0.4117030401219861,
	"grad_norm": 0.8822807669639587,
	"learning_rate": 0.0001329356315710753,
	"loss": 0.5662,
	"step": 12960
	},
	{
	"epoch": 0.4123383843197052,
	"grad_norm": 0.8247064352035522,
	"learning_rate": 0.0001327412664562373,
	"loss": 0.5338,
	"step": 12980
	},
	{
	"epoch": 0.4129737285174243,
	"grad_norm": 0.8655696511268616,
	"learning_rate": 0.0001325467627016849,
	"loss": 0.5563,
	"step": 13000
	},
	{
	"epoch": 0.4129737285174243,
	"eval_loss": 0.5103311538696289,
	"eval_runtime": 44.4811,
	"eval_samples_per_second": 60.767,
	"eval_steps_per_second": 30.395,
	"step": 13000
	},
	{
	"epoch": 0.4136090727151434,
	"grad_norm": 1.1745620965957642,
	"learning_rate": 0.00013235212113102532,
	"loss": 0.5432,
	"step": 13020
	},
	{
	"epoch": 0.41424441691286257,
	"grad_norm": 1.375957727432251,
	"learning_rate": 0.0001321573425684494,
	"loss": 0.5518,
	"step": 13040
	},
	{
	"epoch": 0.41487976111058167,
	"grad_norm": 1.2425376176834106,
	"learning_rate": 0.00013196242783872805,
	"loss": 0.5667,
	"step": 13060
	},
	{
	"epoch": 0.41551510530830077,
	"grad_norm": 0.9375765919685364,
	"learning_rate": 0.00013176737776720876,
	"loss": 0.5629,
	"step": 13080
	},
	{
	"epoch": 0.41615044950601987,
	"grad_norm": 0.9392895698547363,
	"learning_rate": 0.00013157219317981217,
	"loss": 0.5577,
	"step": 13100
	},
	{
	"epoch": 0.416785793703739,
	"grad_norm": 0.9028527140617371,
	"learning_rate": 0.00013137687490302844,
	"loss": 0.5358,
	"step": 13120
	},
	{
	"epoch": 0.41742113790145813,
	"grad_norm": 0.9373983144760132,
	"learning_rate": 0.00013118142376391381,
	"loss": 0.5517,
	"step": 13140
	},
	{
	"epoch": 0.41805648209917723,
	"grad_norm": 1.3339825868606567,
	"learning_rate": 0.00013098584059008725,
	"loss": 0.5512,
	"step": 13160
	},
	{
	"epoch": 0.41869182629689633,
	"grad_norm": 0.7137243747711182,
	"learning_rate": 0.00013079012620972663,
	"loss": 0.5464,
	"step": 13180
	},
	{
	"epoch": 0.41932717049461543,
	"grad_norm": 1.1450612545013428,
	"learning_rate": 0.00013059428145156555,
	"loss": 0.564,
	"step": 13200
	},
	{
	"epoch": 0.4199625146923346,
	"grad_norm": 1.2148438692092896,
	"learning_rate": 0.00013039830714488965,
	"loss": 0.5555,
	"step": 13220
	},
	{
	"epoch": 0.4205978588900537,
	"grad_norm": 1.277346134185791,
	"learning_rate": 0.00013020220411953304,
	"loss": 0.5898,
	"step": 13240
	},
	{
	"epoch": 0.4212332030877728,
	"grad_norm": 1.0933984518051147,
	"learning_rate": 0.00013000597320587492,
	"loss": 0.553,
	"step": 13260
	},
	{
	"epoch": 0.4218685472854919,
	"grad_norm": 0.7297493815422058,
	"learning_rate": 0.00012980961523483616,
	"loss": 0.5626,
	"step": 13280
	},
	{
	"epoch": 0.42250389148321105,
	"grad_norm": 0.8859849572181702,
	"learning_rate": 0.00012961313103787548,
	"loss": 0.5455,
	"step": 13300
	},
	{
	"epoch": 0.42313923568093015,
	"grad_norm": 0.9647216200828552,
	"learning_rate": 0.00012941652144698608,
	"loss": 0.5157,
	"step": 13320
	},
	{
	"epoch": 0.42377457987864925,
	"grad_norm": 0.9097155332565308,
	"learning_rate": 0.00012921978729469222,
	"loss": 0.542,
	"step": 13340
	},
	{
	"epoch": 0.42440992407636835,
	"grad_norm": 1.0074721574783325,
	"learning_rate": 0.0001290229294140456,
	"loss": 0.5319,
	"step": 13360
	},
	{
	"epoch": 0.4250452682740875,
	"grad_norm": 0.7759230732917786,
	"learning_rate": 0.0001288259486386218,
	"loss": 0.4939,
	"step": 13380
	},
	{
	"epoch": 0.4256806124718066,
	"grad_norm": 0.8912795782089233,
	"learning_rate": 0.00012862884580251675,
	"loss": 0.5276,
	"step": 13400
	},
	{
	"epoch": 0.4263159566695257,
	"grad_norm": 1.090395450592041,
	"learning_rate": 0.00012843162174034332,
	"loss": 0.5227,
	"step": 13420
	},
	{
	"epoch": 0.4269513008672448,
	"grad_norm": 0.8524248003959656,
	"learning_rate": 0.00012823427728722762,
	"loss": 0.5438,
	"step": 13440
	},
	{
	"epoch": 0.42758664506496397,
	"grad_norm": 1.209073543548584,
	"learning_rate": 0.0001280368132788056,
	"loss": 0.5495,
	"step": 13460
	},
	{
	"epoch": 0.42822198926268307,
	"grad_norm": 0.9301733374595642,
	"learning_rate": 0.00012783923055121945,
	"loss": 0.5411,
	"step": 13480
	},
	{
	"epoch": 0.42885733346040217,
	"grad_norm": 0.916028618812561,
	"learning_rate": 0.000127641529941114,
	"loss": 0.5674,
	"step": 13500
	},
	{
	"epoch": 0.42949267765812127,
	"grad_norm": 0.9181066751480103,
	"learning_rate": 0.00012744371228563334,
	"loss": 0.5522,
	"step": 13520
	},
	{
	"epoch": 0.4301280218558404,
	"grad_norm": 1.2208302021026611,
	"learning_rate": 0.0001272457784224171,
	"loss": 0.5428,
	"step": 13540
	},
	{
	"epoch": 0.4307633660535595,
	"grad_norm": 0.8382121920585632,
	"learning_rate": 0.00012704772918959706,
	"loss": 0.5347,
	"step": 13560
	},
	{
	"epoch": 0.4313987102512786,
	"grad_norm": 0.7942314147949219,
	"learning_rate": 0.0001268495654257934,
	"loss": 0.5455,
	"step": 13580
	},
	{
	"epoch": 0.43203405444899773,
	"grad_norm": 1.0586442947387695,
	"learning_rate": 0.00012665128797011138,
	"loss": 0.5588,
	"step": 13600
	},
	{
	"epoch": 0.4326693986467169,
	"grad_norm": 0.9026583433151245,
	"learning_rate": 0.00012645289766213764,
	"loss": 0.5448,
	"step": 13620
	},
	{
	"epoch": 0.433304742844436,
	"grad_norm": 1.107459545135498,
	"learning_rate": 0.0001262643231052632,
	"loss": 0.5226,
	"step": 13640
	},
	{
	"epoch": 0.4339400870421551,
	"grad_norm": 0.7181698679924011,
	"learning_rate": 0.00012606571515198816,
	"loss": 0.5587,
	"step": 13660
	},
	{
	"epoch": 0.4345754312398742,
	"grad_norm": 0.850642740726471,
	"learning_rate": 0.0001258669968259726,
	"loss": 0.5514,
	"step": 13680
	},
	{
	"epoch": 0.4352107754375933,
	"grad_norm": 0.9803110957145691,
	"learning_rate": 0.00012567811294990802,
	"loss": 0.5612,
	"step": 13700
	},
	{
	"epoch": 0.43584611963531245,
	"grad_norm": 0.8320556282997131,
	"learning_rate": 0.00012547918181770158,
	"loss": 0.5464,
	"step": 13720
	},
	{
	"epoch": 0.43648146383303155,
	"grad_norm": 0.9645776152610779,
	"learning_rate": 0.0001252801427963731,
	"loss": 0.5394,
	"step": 13740
	},
	{
	"epoch": 0.43711680803075065,
	"grad_norm": 0.981066107749939,
	"learning_rate": 0.00012508099672873401,
	"loss": 0.5518,
	"step": 13760
	},
	{
	"epoch": 0.43775215222846975,
	"grad_norm": 0.950231671333313,
	"learning_rate": 0.00012488174445804905,
	"loss": 0.5628,
	"step": 13780
	},
	{
	"epoch": 0.4383874964261889,
	"grad_norm": 0.7942489981651306,
	"learning_rate": 0.00012468238682803256,
	"loss": 0.5682,
	"step": 13800
	},
	{
	"epoch": 0.439022840623908,
	"grad_norm": 0.9598709940910339,
	"learning_rate": 0.0001244829246828451,
	"loss": 0.5398,
	"step": 13820
	},
	{
	"epoch": 0.4396581848216271,
	"grad_norm": 0.9328323602676392,
	"learning_rate": 0.0001242833588670898,
	"loss": 0.5465,
	"step": 13840
	},
	{
	"epoch": 0.4402935290193462,
	"grad_norm": 0.9036662578582764,
	"learning_rate": 0.00012408369022580865,
	"loss": 0.5307,
	"step": 13860
	},
	{
	"epoch": 0.44092887321706536,
	"grad_norm": 1.1593483686447144,
	"learning_rate": 0.0001238839196044792,
	"loss": 0.5838,
	"step": 13880
	},
	{
	"epoch": 0.44156421741478447,
	"grad_norm": 0.9283963441848755,
	"learning_rate": 0.0001236840478490107,
	"loss": 0.5112,
	"step": 13900
	},
	{
	"epoch": 0.44219956161250357,
	"grad_norm": 1.1374804973602295,
	"learning_rate": 0.00012348407580574068,
	"loss": 0.5616,
	"step": 13920
	},
	{
	"epoch": 0.44283490581022267,
	"grad_norm": 0.8757379055023193,
	"learning_rate": 0.00012328400432143143,
	"loss": 0.5409,
	"step": 13940
	},
	{
	"epoch": 0.4434702500079418,
	"grad_norm": 0.9971847534179688,
	"learning_rate": 0.00012308383424326617,
	"loss": 0.5573,
	"step": 13960
	},
	{
	"epoch": 0.4441055942056609,
	"grad_norm": 0.8985651135444641,
	"learning_rate": 0.00012288356641884567,
	"loss": 0.5602,
	"step": 13980
	},
	{
	"epoch": 0.44474093840338,
	"grad_norm": 0.8877219557762146,
	"learning_rate": 0.0001226832016961846,
	"loss": 0.5418,
	"step": 14000
	},
	{
	"epoch": 0.44474093840338,
	"eval_loss": 0.49767744541168213,
	"eval_runtime": 45.8378,
	"eval_samples_per_second": 58.969,
	"eval_steps_per_second": 29.495,
	"step": 14000
	},
	{
	"epoch": 0.4453762826010991,
	"grad_norm": 0.9760685563087463,
	"learning_rate": 0.00012248274092370795,
	"loss": 0.5386,
	"step": 14020
	},
	{
	"epoch": 0.4460116267988183,
	"grad_norm": 0.9159601330757141,
	"learning_rate": 0.00012228218495024734,
	"loss": 0.5658,
	"step": 14040
	},
	{
	"epoch": 0.4466469709965374,
	"grad_norm": 0.9726976752281189,
	"learning_rate": 0.00012208153462503764,
	"loss": 0.5619,
	"step": 14060
	},
	{
	"epoch": 0.4472823151942565,
	"grad_norm": 0.8647946715354919,
	"learning_rate": 0.00012188079079771311,
	"loss": 0.5312,
	"step": 14080
	},
	{
	"epoch": 0.4479176593919756,
	"grad_norm": 0.8291323781013489,
	"learning_rate": 0.00012167995431830404,
	"loss": 0.5555,
	"step": 14100
	},
	{
	"epoch": 0.44855300358969474,
	"grad_norm": 1.1393893957138062,
	"learning_rate": 0.00012147902603723302,
	"loss": 0.5368,
	"step": 14120
	},
	{
	"epoch": 0.44918834778741384,
	"grad_norm": 0.9214714169502258,
	"learning_rate": 0.00012127800680531129,
	"loss": 0.5312,
	"step": 14140
	},
	{
	"epoch": 0.44982369198513295,
	"grad_norm": 0.7314972877502441,
	"learning_rate": 0.00012107689747373533,
	"loss": 0.5306,
	"step": 14160
	},
	{
	"epoch": 0.45045903618285205,
	"grad_norm": 0.9739118218421936,
	"learning_rate": 0.00012087569889408308,
	"loss": 0.5474,
	"step": 14180
	},
	{
	"epoch": 0.45109438038057115,
	"grad_norm": 1.1331558227539062,
	"learning_rate": 0.00012067441191831035,
	"loss": 0.5251,
	"step": 14200
	},
	{
	"epoch": 0.4517297245782903,
	"grad_norm": 0.9672099947929382,
	"learning_rate": 0.00012047303739874733,
	"loss": 0.5638,
	"step": 14220
	},
	{
	"epoch": 0.4523650687760094,
	"grad_norm": 0.9430161118507385,
	"learning_rate": 0.00012027157618809488,
	"loss": 0.5473,
	"step": 14240
	},
	{
	"epoch": 0.4530004129737285,
	"grad_norm": 0.9385126233100891,
	"learning_rate": 0.00012007002913942092,
	"loss": 0.5305,
	"step": 14260
	},
	{
	"epoch": 0.4536357571714476,
	"grad_norm": 1.2930362224578857,
	"learning_rate": 0.00011986839710615689,
	"loss": 0.5264,
	"step": 14280
	},
	{
	"epoch": 0.45427110136916676,
	"grad_norm": 1.098981499671936,
	"learning_rate": 0.00011966668094209401,
	"loss": 0.5945,
	"step": 14300
	},
	{
	"epoch": 0.45490644556688586,
	"grad_norm": 1.016724944114685,
	"learning_rate": 0.00011946488150137987,
	"loss": 0.5423,
	"step": 14320
	},
	{
	"epoch": 0.45554178976460497,
	"grad_norm": 1.3441358804702759,
	"learning_rate": 0.00011926299963851455,
	"loss": 0.5311,
	"step": 14340
	},
	{
	"epoch": 0.45617713396232407,
	"grad_norm": 0.8672164678573608,
	"learning_rate": 0.00011906103620834721,
	"loss": 0.5377,
	"step": 14360
	},
	{
	"epoch": 0.4568124781600432,
	"grad_norm": 0.8844342231750488,
	"learning_rate": 0.00011885899206607243,
	"loss": 0.5539,
	"step": 14380
	},
	{
	"epoch": 0.4574478223577623,
	"grad_norm": 1.0755807161331177,
	"learning_rate": 0.00011865686806722647,
	"loss": 0.5489,
	"step": 14400
	},
	{
	"epoch": 0.4580831665554814,
	"grad_norm": 0.8909132480621338,
	"learning_rate": 0.00011845466506768379,
	"loss": 0.5492,
	"step": 14420
	},
	{
	"epoch": 0.4587185107532005,
	"grad_norm": 0.7222205996513367,
	"learning_rate": 0.00011826249982356501,
	"loss": 0.5452,
	"step": 14440
	},
	{
	"epoch": 0.4593538549509197,
	"grad_norm": 0.8589527606964111,
	"learning_rate": 0.00011806014523563623,
	"loss": 0.5553,
	"step": 14460
	},
	{
	"epoch": 0.4599891991486388,
	"grad_norm": 0.8546582460403442,
	"learning_rate": 0.00011785771417377567,
	"loss": 0.518,
	"step": 14480
	},
	{
	"epoch": 0.4606245433463579,
	"grad_norm": 0.7938315272331238,
	"learning_rate": 0.00011765520749515795,
	"loss": 0.5732,
	"step": 14500
	},
	{
	"epoch": 0.461259887544077,
	"grad_norm": 1.030897617340088,
	"learning_rate": 0.000117452626057278,
	"loss": 0.5293,
	"step": 14520
	},
	{
	"epoch": 0.46189523174179614,
	"grad_norm": 0.9275230765342712,
	"learning_rate": 0.00011724997071794722,
	"loss": 0.5453,
	"step": 14540
	},
	{
	"epoch": 0.46253057593951524,
	"grad_norm": 0.8049765825271606,
	"learning_rate": 0.00011704724233528997,
	"loss": 0.5237,
	"step": 14560
	},
	{
	"epoch": 0.46316592013723434,
	"grad_norm": 0.9411914348602295,
	"learning_rate": 0.00011684444176773994,
	"loss": 0.5529,
	"step": 14580
	},
	{
	"epoch": 0.46380126433495344,
	"grad_norm": 1.0553874969482422,
	"learning_rate": 0.0001166415698740364,
	"loss": 0.5107,
	"step": 14600
	},
	{
	"epoch": 0.4644366085326726,
	"grad_norm": 1.1203105449676514,
	"learning_rate": 0.00011643862751322072,
	"loss": 0.5503,
	"step": 14620
	},
	{
	"epoch": 0.4650719527303917,
	"grad_norm": 0.9356998801231384,
	"learning_rate": 0.00011623561554463263,
	"loss": 0.5388,
	"step": 14640
	},
	{
	"epoch": 0.4657072969281108,
	"grad_norm": 1.0603325366973877,
	"learning_rate": 0.00011603253482790657,
	"loss": 0.5379,
	"step": 14660
	},
	{
	"epoch": 0.4663426411258299,
	"grad_norm": 0.7650070786476135,
	"learning_rate": 0.00011582938622296818,
	"loss": 0.5175,
	"step": 14680
	},
	{
	"epoch": 0.466977985323549,
	"grad_norm": 1.1926647424697876,
	"learning_rate": 0.00011562617059003044,
	"loss": 0.5558,
	"step": 14700
	},
	{
	"epoch": 0.46761332952126816,
	"grad_norm": 0.9466400742530823,
	"learning_rate": 0.00011542288878959025,
	"loss": 0.5288,
	"step": 14720
	},
	{
	"epoch": 0.46824867371898726,
	"grad_norm": 1.036163091659546,
	"learning_rate": 0.0001152195416824247,
	"loss": 0.5322,
	"step": 14740
	},
	{
	"epoch": 0.46888401791670636,
	"grad_norm": 0.8458572626113892,
	"learning_rate": 0.00011501613012958729,
	"loss": 0.5358,
	"step": 14760
	},
	{
	"epoch": 0.46951936211442546,
	"grad_norm": 0.789557695388794,
	"learning_rate": 0.00011481265499240455,
	"loss": 0.5067,
	"step": 14780
	},
	{
	"epoch": 0.4701547063121446,
	"grad_norm": 0.845371425151825,
	"learning_rate": 0.00011460911713247222,
	"loss": 0.5433,
	"step": 14800
	},
	{
	"epoch": 0.4707900505098637,
	"grad_norm": 0.8561549782752991,
	"learning_rate": 0.00011440551741165156,
	"loss": 0.5362,
	"step": 14820
	},
	{
	"epoch": 0.4714253947075828,
	"grad_norm": 0.921575665473938,
	"learning_rate": 0.00011420185669206582,
	"loss": 0.5093,
	"step": 14840
	},
	{
	"epoch": 0.4720607389053019,
	"grad_norm": 0.9392147660255432,
	"learning_rate": 0.0001139981358360966,
	"loss": 0.5419,
	"step": 14860
	},
	{
	"epoch": 0.4726960831030211,
	"grad_norm": 0.859464168548584,
	"learning_rate": 0.00011379435570638002,
	"loss": 0.5329,
	"step": 14880
	},
	{
	"epoch": 0.4733314273007402,
	"grad_norm": 0.9370890259742737,
	"learning_rate": 0.00011359051716580331,
	"loss": 0.516,
	"step": 14900
	},
	{
	"epoch": 0.4739667714984593,
	"grad_norm": 0.8993077278137207,
	"learning_rate": 0.00011338662107750098,
	"loss": 0.4785,
	"step": 14920
	},
	{
	"epoch": 0.4746021156961784,
	"grad_norm": 0.7652683854103088,
	"learning_rate": 0.00011318266830485119,
	"loss": 0.5348,
	"step": 14940
	},
	{
	"epoch": 0.47523745989389754,
	"grad_norm": 1.0513384342193604,
	"learning_rate": 0.00011297865971147217,
	"loss": 0.5181,
	"step": 14960
	},
	{
	"epoch": 0.47587280409161664,
	"grad_norm": 0.8159809112548828,
	"learning_rate": 0.00011277459616121851,
	"loss": 0.5368,
	"step": 14980
	},
	{
	"epoch": 0.47650814828933574,
	"grad_norm": 1.0844529867172241,
	"learning_rate": 0.00011257047851817748,
	"loss": 0.5497,
	"step": 15000
	},
	{
	"epoch": 0.47650814828933574,
	"eval_loss": 0.4893677234649658,
	"eval_runtime": 45.7511,
	"eval_samples_per_second": 59.081,
	"eval_steps_per_second": 29.551,
	"step": 15000
	},
	{
	"epoch": 0.47714349248705484,
	"grad_norm": 0.7700105309486389,
	"learning_rate": 0.0001123663076466655,
	"loss": 0.5354,
	"step": 15020
	},
	{
	"epoch": 0.477778836684774,
	"grad_norm": 0.872631847858429,
	"learning_rate": 0.0001121620844112242,
	"loss": 0.5243,
	"step": 15040
	},
	{
	"epoch": 0.4784141808824931,
	"grad_norm": 1.1037932634353638,
	"learning_rate": 0.0001119578096766171,
	"loss": 0.5412,
	"step": 15060
	},
	{
	"epoch": 0.4790495250802122,
	"grad_norm": 0.9620169997215271,
	"learning_rate": 0.00011175348430782579,
	"loss": 0.5137,
	"step": 15080
	},
	{
	"epoch": 0.4796848692779313,
	"grad_norm": 0.7465859055519104,
	"learning_rate": 0.0001115491091700461,
	"loss": 0.5213,
	"step": 15100
	},
	{
	"epoch": 0.48032021347565046,
	"grad_norm": 0.7287941575050354,
	"learning_rate": 0.00011134468512868479,
	"loss": 0.5184,
	"step": 15120
	},
	{
	"epoch": 0.48095555767336956,
	"grad_norm": 0.9596436023712158,
	"learning_rate": 0.00011114021304935558,
	"loss": 0.5471,
	"step": 15140
	},
	{
	"epoch": 0.48159090187108866,
	"grad_norm": 0.869172215461731,
	"learning_rate": 0.00011093569379787563,
	"loss": 0.5074,
	"step": 15160
	},
	{
	"epoch": 0.48222624606880776,
	"grad_norm": 1.0704097747802734,
	"learning_rate": 0.00011073112824026191,
	"loss": 0.544,
	"step": 15180
	},
	{
	"epoch": 0.48286159026652686,
	"grad_norm": 0.896312415599823,
	"learning_rate": 0.00011052651724272736,
	"loss": 0.5261,
	"step": 15200
	},
	{
	"epoch": 0.483496934464246,
	"grad_norm": 1.010606288909912,
	"learning_rate": 0.00011032186167167741,
	"loss": 0.5112,
	"step": 15220
	},
	{
	"epoch": 0.4841322786619651,
	"grad_norm": 0.980171263217926,
	"learning_rate": 0.00011011716239370625,
	"loss": 0.5414,
	"step": 15240
	},
	{
	"epoch": 0.4847676228596842,
	"grad_norm": 0.7417489290237427,
	"learning_rate": 0.00010991242027559301,
	"loss": 0.5019,
	"step": 15260
	},
	{
	"epoch": 0.4854029670574033,
	"grad_norm": 0.9232955574989319,
	"learning_rate": 0.0001097076361842984,
	"loss": 0.5293,
	"step": 15280
	},
	{
	"epoch": 0.4860383112551225,
	"grad_norm": 0.8391673564910889,
	"learning_rate": 0.00010950281098696072,
	"loss": 0.5397,
	"step": 15300
	},
	{
	"epoch": 0.4866736554528416,
	"grad_norm": 1.0795869827270508,
	"learning_rate": 0.00010929794555089239,
	"loss": 0.5293,
	"step": 15320
	},
	{
	"epoch": 0.4873089996505607,
	"grad_norm": 0.9179370403289795,
	"learning_rate": 0.00010909304074357627,
	"loss": 0.5089,
	"step": 15340
	},
	{
	"epoch": 0.4879443438482798,
	"grad_norm": 0.9346722960472107,
	"learning_rate": 0.0001088880974326618,
	"loss": 0.4981,
	"step": 15360
	},
	{
	"epoch": 0.48857968804599894,
	"grad_norm": 0.9835326075553894,
	"learning_rate": 0.00010868311648596157,
	"loss": 0.52,
	"step": 15380
	},
	{
	"epoch": 0.48921503224371804,
	"grad_norm": 0.8709509968757629,
	"learning_rate": 0.0001084780987714475,
	"loss": 0.5507,
	"step": 15400
	},
	{
	"epoch": 0.48985037644143714,
	"grad_norm": 1.0125563144683838,
	"learning_rate": 0.00010827304515724719,
	"loss": 0.5522,
	"step": 15420
	},
	{
	"epoch": 0.49048572063915624,
	"grad_norm": 0.9726683497428894,
	"learning_rate": 0.00010806795651164026,
	"loss": 0.5195,
	"step": 15440
	},
	{
	"epoch": 0.4911210648368754,
	"grad_norm": 0.9348143935203552,
	"learning_rate": 0.0001078628337030547,
	"loss": 0.5376,
	"step": 15460
	},
	{
	"epoch": 0.4917564090345945,
	"grad_norm": 1.247452735900879,
	"learning_rate": 0.00010765767760006308,
	"loss": 0.5238,
	"step": 15480
	},
	{
	"epoch": 0.4923917532323136,
	"grad_norm": 1.2584036588668823,
	"learning_rate": 0.00010745248907137906,
	"loss": 0.539,
	"step": 15500
	},
	{
	"epoch": 0.4930270974300327,
	"grad_norm": 0.9565659165382385,
	"learning_rate": 0.00010724726898585353,
	"loss": 0.546,
	"step": 15520
	},
	{
	"epoch": 0.49366244162775186,
	"grad_norm": 0.9646620750427246,
	"learning_rate": 0.000107042018212471,
	"loss": 0.5094,
	"step": 15540
	},
	{
	"epoch": 0.49429778582547096,
	"grad_norm": 0.7045026421546936,
	"learning_rate": 0.00010683673762034594,
	"loss": 0.5708,
	"step": 15560
	},
	{
	"epoch": 0.49493313002319006,
	"grad_norm": 1.1588184833526611,
	"learning_rate": 0.00010663142807871911,
	"loss": 0.5681,
	"step": 15580
	},
	{
	"epoch": 0.49556847422090916,
	"grad_norm": 0.8272905349731445,
	"learning_rate": 0.00010642609045695382,
	"loss": 0.5239,
	"step": 15600
	},
	{
	"epoch": 0.4962038184186283,
	"grad_norm": 0.9670738577842712,
	"learning_rate": 0.00010622072562453234,
	"loss": 0.486,
	"step": 15620
	},
	{
	"epoch": 0.4968391626163474,
	"grad_norm": 0.8635004162788391,
	"learning_rate": 0.00010601533445105205,
	"loss": 0.5419,
	"step": 15640
	},
	{
	"epoch": 0.4974745068140665,
	"grad_norm": 1.0769212245941162,
	"learning_rate": 0.00010580991780622196,
	"loss": 0.5252,
	"step": 15660
	},
	{
	"epoch": 0.4981098510117856,
	"grad_norm": 0.9688665270805359,
	"learning_rate": 0.00010560447655985894,
	"loss": 0.5559,
	"step": 15680
	},
	{
	"epoch": 0.4987451952095048,
	"grad_norm": 0.9587375521659851,
	"learning_rate": 0.00010539901158188398,
	"loss": 0.5136,
	"step": 15700
	},
	{
	"epoch": 0.4993805394072239,
	"grad_norm": 0.870891273021698,
	"learning_rate": 0.0001051935237423186,
	"loss": 0.5274,
	"step": 15720
	},
	{
	"epoch": 0.500015883604943,
	"grad_norm": 1.1741816997528076,
	"learning_rate": 0.00010498801391128108,
	"loss": 0.5274,
	"step": 15740
	},
	{
	"epoch": 0.5006512278026621,
	"grad_norm": 1.074429988861084,
	"learning_rate": 0.00010478248295898285,
	"loss": 0.5049,
	"step": 15760
	},
	{
	"epoch": 0.5012865720003812,
	"grad_norm": 0.7894431352615356,
	"learning_rate": 0.00010457693175572483,
	"loss": 0.5141,
	"step": 15780
	},
	{
	"epoch": 0.5019219161981003,
	"grad_norm": 0.8638029098510742,
	"learning_rate": 0.00010437136117189356,
	"loss": 0.5053,
	"step": 15800
	},
	{
	"epoch": 0.5025572603958194,
	"grad_norm": 0.9749894142150879,
	"learning_rate": 0.00010416577207795776,
	"loss": 0.5319,
	"step": 15820
	},
	{
	"epoch": 0.5031926045935385,
	"grad_norm": 0.9491709470748901,
	"learning_rate": 0.00010396016534446451,
	"loss": 0.4968,
	"step": 15840
	},
	{
	"epoch": 0.5038279487912577,
	"grad_norm": 0.880732536315918,
	"learning_rate": 0.00010375454184203555,
	"loss": 0.5292,
	"step": 15860
	},
	{
	"epoch": 0.5044632929889767,
	"grad_norm": 1.22807776927948,
	"learning_rate": 0.00010354890244136361,
	"loss": 0.5228,
	"step": 15880
	},
	{
	"epoch": 0.5050986371866959,
	"grad_norm": 0.8567366003990173,
	"learning_rate": 0.00010334324801320881,
	"loss": 0.558,
	"step": 15900
	},
	{
	"epoch": 0.505733981384415,
	"grad_norm": 0.8203198909759521,
	"learning_rate": 0.00010313757942839482,
	"loss": 0.5061,
	"step": 15920
	},
	{
	"epoch": 0.5063693255821341,
	"grad_norm": 0.9894897937774658,
	"learning_rate": 0.00010293189755780535,
	"loss": 0.5322,
	"step": 15940
	},
	{
	"epoch": 0.5070046697798533,
	"grad_norm": 1.0645695924758911,
	"learning_rate": 0.0001027262032723803,
	"loss": 0.536,
	"step": 15960
	},
	{
	"epoch": 0.5076400139775723,
	"grad_norm": 0.9940254092216492,
	"learning_rate": 0.0001025204974431121,
	"loss": 0.5211,
	"step": 15980
	},
	{
	"epoch": 0.5082753581752915,
	"grad_norm": 0.7856065630912781,
	"learning_rate": 0.00010231478094104216,
	"loss": 0.5137,
	"step": 16000
	},
	{
	"epoch": 0.5082753581752915,
	"eval_loss": 0.48191481828689575,
	"eval_runtime": 44.2211,
	"eval_samples_per_second": 61.125,
	"eval_steps_per_second": 30.574,
	"step": 16000
	},
	{
	"epoch": 0.5089107023730106,
	"grad_norm": 0.9363443851470947,
	"learning_rate": 0.00010210905463725703,
	"loss": 0.5426,
	"step": 16020
	},
	{
	"epoch": 0.5095460465707297,
	"grad_norm": 0.8720065355300903,
	"learning_rate": 0.0001019033194028848,
	"loss": 0.525,
	"step": 16040
	},
	{
	"epoch": 0.5101813907684488,
	"grad_norm": 0.9192999005317688,
	"learning_rate": 0.00010169757610909131,
	"loss": 0.5265,
	"step": 16060
	},
	{
	"epoch": 0.510816734966168,
	"grad_norm": 1.089529037475586,
	"learning_rate": 0.00010149182562707657,
	"loss": 0.5148,
	"step": 16080
	},
	{
	"epoch": 0.511452079163887,
	"grad_norm": 0.8161883354187012,
	"learning_rate": 0.00010128606882807106,
	"loss": 0.5441,
	"step": 16100
	},
	{
	"epoch": 0.5120874233616062,
	"grad_norm": 0.8635348081588745,
	"learning_rate": 0.00010108030658333192,
	"loss": 0.4981,
	"step": 16120
	},
	{
	"epoch": 0.5127227675593252,
	"grad_norm": 0.9366866946220398,
	"learning_rate": 0.00010087453976413943,
	"loss": 0.5155,
	"step": 16140
	},
	{
	"epoch": 0.5133581117570444,
	"grad_norm": 0.8161008954048157,
	"learning_rate": 0.00010066876924179321,
	"loss": 0.5178,
	"step": 16160
	},
	{
	"epoch": 0.5139934559547635,
	"grad_norm": 1.2926280498504639,
	"learning_rate": 0.00010046299588760855,
	"loss": 0.5409,
	"step": 16180
	},
	{
	"epoch": 0.5146288001524826,
	"grad_norm": 0.9963902235031128,
	"learning_rate": 0.00010025722057291273,
	"loss": 0.514,
	"step": 16200
	},
	{
	"epoch": 0.5152641443502017,
	"grad_norm": 0.7572094202041626,
	"learning_rate": 0.0001000514441690414,
	"loss": 0.5142,
	"step": 16220
	},
	{
	"epoch": 0.5158994885479208,
	"grad_norm": 0.7842695713043213,
	"learning_rate": 9.984566754733471e-05,
	"loss": 0.5419,
	"step": 16240
	},
	{
	"epoch": 0.5165348327456399,
	"grad_norm": 0.8259790539741516,
	"learning_rate": 9.96398915791338e-05,
	"loss": 0.5053,
	"step": 16260
	},
	{
	"epoch": 0.5171701769433591,
	"grad_norm": 0.7848758697509766,
	"learning_rate": 9.943411713577707e-05,
	"loss": 0.5129,
	"step": 16280
	},
	{
	"epoch": 0.5178055211410781,
	"grad_norm": 0.9001737236976624,
	"learning_rate": 9.922834508859636e-05,
	"loss": 0.5095,
	"step": 16300
	},
	{
	"epoch": 0.5184408653387973,
	"grad_norm": 1.2547895908355713,
	"learning_rate": 9.90225763089135e-05,
	"loss": 0.5402,
	"step": 16320
	},
	{
	"epoch": 0.5190762095365165,
	"grad_norm": 1.0412747859954834,
	"learning_rate": 9.881681166803634e-05,
	"loss": 0.5039,
	"step": 16340
	},
	{
	"epoch": 0.5197115537342355,
	"grad_norm": 0.8408613204956055,
	"learning_rate": 9.861105203725533e-05,
	"loss": 0.5256,
	"step": 16360
	},
	{
	"epoch": 0.5203468979319547,
	"grad_norm": 0.7325016856193542,
	"learning_rate": 9.840529828783965e-05,
	"loss": 0.5055,
	"step": 16380
	},
	{
	"epoch": 0.5209822421296737,
	"grad_norm": 1.3417218923568726,
	"learning_rate": 9.819955129103355e-05,
	"loss": 0.5336,
	"step": 16400
	},
	{
	"epoch": 0.5216175863273929,
	"grad_norm": 0.8016658425331116,
	"learning_rate": 9.799381191805272e-05,
	"loss": 0.5285,
	"step": 16420
	},
	{
	"epoch": 0.522252930525112,
	"grad_norm": 0.7678484916687012,
	"learning_rate": 9.778808104008059e-05,
	"loss": 0.5243,
	"step": 16440
	},
	{
	"epoch": 0.5228882747228311,
	"grad_norm": 1.0348572731018066,
	"learning_rate": 9.760293123314227e-05,
	"loss": 0.5305,
	"step": 16460
	},
	{
	"epoch": 0.5235236189205502,
	"grad_norm": 0.891635537147522,
	"learning_rate": 9.739721889566509e-05,
	"loss": 0.5258,
	"step": 16480
	},
	{
	"epoch": 0.5241589631182694,
	"grad_norm": 0.9525818824768066,
	"learning_rate": 9.719151757941184e-05,
	"loss": 0.5405,
	"step": 16500
	},
	{
	"epoch": 0.5247943073159884,
	"grad_norm": 0.8067079186439514,
	"learning_rate": 9.698582815540476e-05,
	"loss": 0.5058,
	"step": 16520
	},
	{
	"epoch": 0.5254296515137076,
	"grad_norm": 0.8525674939155579,
	"learning_rate": 9.678015149461577e-05,
	"loss": 0.5429,
	"step": 16540
	},
	{
	"epoch": 0.5260649957114266,
	"grad_norm": 0.9794461727142334,
	"learning_rate": 9.65744884679627e-05,
	"loss": 0.5106,
	"step": 16560
	},
	{
	"epoch": 0.5267003399091458,
	"grad_norm": 0.8107161521911621,
	"learning_rate": 9.636883994630567e-05,
	"loss": 0.5124,
	"step": 16580
	},
	{
	"epoch": 0.5273356841068649,
	"grad_norm": 0.8728024959564209,
	"learning_rate": 9.61632068004434e-05,
	"loss": 0.5483,
	"step": 16600
	},
	{
	"epoch": 0.527971028304584,
	"grad_norm": 1.0132850408554077,
	"learning_rate": 9.595758990110948e-05,
	"loss": 0.55,
	"step": 16620
	},
	{
	"epoch": 0.5286063725023031,
	"grad_norm": 1.0854065418243408,
	"learning_rate": 9.575199011896869e-05,
	"loss": 0.5022,
	"step": 16640
	},
	{
	"epoch": 0.5292417167000223,
	"grad_norm": 1.06479012966156,
	"learning_rate": 9.555668697368233e-05,
	"loss": 0.4932,
	"step": 16660
	},
	{
	"epoch": 0.5298770608977413,
	"grad_norm": 1.1619220972061157,
	"learning_rate": 9.535112307403999e-05,
	"loss": 0.5377,
	"step": 16680
	},
	{
	"epoch": 0.5305124050954605,
	"grad_norm": 1.1277661323547363,
	"learning_rate": 9.514557885961573e-05,
	"loss": 0.5267,
	"step": 16700
	},
	{
	"epoch": 0.5311477492931795,
	"grad_norm": 1.0196537971496582,
	"learning_rate": 9.494005520076655e-05,
	"loss": 0.5203,
	"step": 16720
	},
	{
	"epoch": 0.5317830934908987,
	"grad_norm": 0.9534218907356262,
	"learning_rate": 9.473455296776239e-05,
	"loss": 0.5177,
	"step": 16740
	},
	{
	"epoch": 0.5324184376886179,
	"grad_norm": 0.9330717325210571,
	"learning_rate": 9.45290730307826e-05,
	"loss": 0.55,
	"step": 16760
	},
	{
	"epoch": 0.5330537818863369,
	"grad_norm": 0.9290218949317932,
	"learning_rate": 9.43236162599119e-05,
	"loss": 0.5301,
	"step": 16780
	},
	{
	"epoch": 0.533689126084056,
	"grad_norm": 0.9842971563339233,
	"learning_rate": 9.411818352513715e-05,
	"loss": 0.4928,
	"step": 16800
	},
	{
	"epoch": 0.5343244702817751,
	"grad_norm": 0.9267326593399048,
	"learning_rate": 9.391277569634329e-05,
	"loss": 0.5443,
	"step": 16820
	},
	{
	"epoch": 0.5349598144794943,
	"grad_norm": 0.9270855784416199,
	"learning_rate": 9.370739364330982e-05,
	"loss": 0.5132,
	"step": 16840
	},
	{
	"epoch": 0.5355951586772134,
	"grad_norm": 0.9786942601203918,
	"learning_rate": 9.35020382357071e-05,
	"loss": 0.5229,
	"step": 16860
	},
	{
	"epoch": 0.5362305028749325,
	"grad_norm": 0.8397322297096252,
	"learning_rate": 9.329671034309269e-05,
	"loss": 0.5248,
	"step": 16880
	},
	{
	"epoch": 0.5368658470726516,
	"grad_norm": 0.9696868062019348,
	"learning_rate": 9.30914108349076e-05,
	"loss": 0.5635,
	"step": 16900
	},
	{
	"epoch": 0.5375011912703708,
	"grad_norm": 1.1376127004623413,
	"learning_rate": 9.28861405804727e-05,
	"loss": 0.548,
	"step": 16920
	},
	{
	"epoch": 0.5381365354680898,
	"grad_norm": 0.9028751254081726,
	"learning_rate": 9.268090044898489e-05,
	"loss": 0.5253,
	"step": 16940
	},
	{
	"epoch": 0.538771879665809,
	"grad_norm": 0.7549586296081543,
	"learning_rate": 9.247569130951365e-05,
	"loss": 0.5119,
	"step": 16960
	},
	{
	"epoch": 0.539407223863528,
	"grad_norm": 1.002920150756836,
	"learning_rate": 9.227051403099715e-05,
	"loss": 0.5383,
	"step": 16980
	},
	{
	"epoch": 0.5400425680612472,
	"grad_norm": 0.7857794761657715,
	"learning_rate": 9.206536948223862e-05,
	"loss": 0.4943,
	"step": 17000
	},
	{
	"epoch": 0.5400425680612472,
	"eval_loss": 0.47516322135925293,
	"eval_runtime": 44.9681,
	"eval_samples_per_second": 60.109,
	"eval_steps_per_second": 30.066,
	"step": 17000
	},
	{
	"epoch": 0.5406779122589663,
	"grad_norm": 0.8384699821472168,
	"learning_rate": 9.186025853190276e-05,
	"loss": 0.5005,
	"step": 17020
	},
	{
	"epoch": 0.5413132564566854,
	"grad_norm": 0.859467089176178,
	"learning_rate": 9.1655182048512e-05,
	"loss": 0.486,
	"step": 17040
	},
	{
	"epoch": 0.5419486006544045,
	"grad_norm": 0.9178836345672607,
	"learning_rate": 9.145014090044276e-05,
	"loss": 0.4866,
	"step": 17060
	},
	{
	"epoch": 0.5425839448521237,
	"grad_norm": 1.5116227865219116,
	"learning_rate": 9.12451359559219e-05,
	"loss": 0.5103,
	"step": 17080
	},
	{
	"epoch": 0.5432192890498427,
	"grad_norm": 0.8251123428344727,
	"learning_rate": 9.104016808302297e-05,
	"loss": 0.5403,
	"step": 17100
	},
	{
	"epoch": 0.5438546332475619,
	"grad_norm": 0.8845348358154297,
	"learning_rate": 9.08352381496625e-05,
	"loss": 0.5295,
	"step": 17120
	},
	{
	"epoch": 0.5444899774452809,
	"grad_norm": 0.8761606812477112,
	"learning_rate": 9.063034702359643e-05,
	"loss": 0.5175,
	"step": 17140
	},
	{
	"epoch": 0.5451253216430001,
	"grad_norm": 0.8992062211036682,
	"learning_rate": 9.042549557241629e-05,
	"loss": 0.5211,
	"step": 17160
	},
	{
	"epoch": 0.5457606658407193,
	"grad_norm": 1.0609464645385742,
	"learning_rate": 9.022068466354573e-05,
	"loss": 0.5231,
	"step": 17180
	},
	{
	"epoch": 0.5463960100384383,
	"grad_norm": 1.1660939455032349,
	"learning_rate": 9.001591516423664e-05,
	"loss": 0.5097,
	"step": 17200
	},
	{
	"epoch": 0.5470313542361575,
	"grad_norm": 0.8982824683189392,
	"learning_rate": 8.981118794156556e-05,
	"loss": 0.499,
	"step": 17220
	},
	{
	"epoch": 0.5476666984338765,
	"grad_norm": 0.9423658847808838,
	"learning_rate": 8.960650386243009e-05,
	"loss": 0.5023,
	"step": 17240
	},
	{
	"epoch": 0.5483020426315957,
	"grad_norm": 0.781741738319397,
	"learning_rate": 8.940186379354505e-05,
	"loss": 0.5098,
	"step": 17260
	},
	{
	"epoch": 0.5489373868293148,
	"grad_norm": 0.9678505063056946,
	"learning_rate": 8.919726860143895e-05,
	"loss": 0.5005,
	"step": 17280
	},
	{
	"epoch": 0.5495727310270339,
	"grad_norm": 0.9400302171707153,
	"learning_rate": 8.899271915245028e-05,
	"loss": 0.537,
	"step": 17300
	},
	{
	"epoch": 0.550208075224753,
	"grad_norm": 0.8072425127029419,
	"learning_rate": 8.878821631272384e-05,
	"loss": 0.5073,
	"step": 17320
	},
	{
	"epoch": 0.5508434194224722,
	"grad_norm": 0.9000498652458191,
	"learning_rate": 8.858376094820701e-05,
	"loss": 0.5014,
	"step": 17340
	},
	{
	"epoch": 0.5514787636201912,
	"grad_norm": 0.9222893118858337,
	"learning_rate": 8.837935392464621e-05,
	"loss": 0.5216,
	"step": 17360
	},
	{
	"epoch": 0.5521141078179104,
	"grad_norm": 0.8468360304832458,
	"learning_rate": 8.817499610758316e-05,
	"loss": 0.5282,
	"step": 17380
	},
	{
	"epoch": 0.5527494520156294,
	"grad_norm": 0.7120311260223389,
	"learning_rate": 8.797068836235116e-05,
	"loss": 0.5277,
	"step": 17400
	},
	{
	"epoch": 0.5533847962133486,
	"grad_norm": 0.880155622959137,
	"learning_rate": 8.776643155407154e-05,
	"loss": 0.523,
	"step": 17420
	},
	{
	"epoch": 0.5540201404110677,
	"grad_norm": 1.023587703704834,
	"learning_rate": 8.756222654764996e-05,
	"loss": 0.508,
	"step": 17440
	},
	{
	"epoch": 0.5546554846087868,
	"grad_norm": 0.8903362154960632,
	"learning_rate": 8.735807420777262e-05,
	"loss": 0.5165,
	"step": 17460
	},
	{
	"epoch": 0.5552908288065059,
	"grad_norm": 0.7317694425582886,
	"learning_rate": 8.715397539890287e-05,
	"loss": 0.4672,
	"step": 17480
	},
	{
	"epoch": 0.5559261730042251,
	"grad_norm": 1.0228464603424072,
	"learning_rate": 8.694993098527723e-05,
	"loss": 0.5112,
	"step": 17500
	},
	{
	"epoch": 0.5565615172019441,
	"grad_norm": 0.7797629237174988,
	"learning_rate": 8.674594183090199e-05,
	"loss": 0.477,
	"step": 17520
	},
	{
	"epoch": 0.5571968613996633,
	"grad_norm": 0.8488342761993408,
	"learning_rate": 8.654200879954945e-05,
	"loss": 0.4993,
	"step": 17540
	},
	{
	"epoch": 0.5578322055973823,
	"grad_norm": 0.8529194593429565,
	"learning_rate": 8.63381327547542e-05,
	"loss": 0.5293,
	"step": 17560
	},
	{
	"epoch": 0.5584675497951015,
	"grad_norm": 0.9537157416343689,
	"learning_rate": 8.613431455980955e-05,
	"loss": 0.5047,
	"step": 17580
	},
	{
	"epoch": 0.5591028939928206,
	"grad_norm": 0.8697558045387268,
	"learning_rate": 8.593055507776393e-05,
	"loss": 0.5293,
	"step": 17600
	},
	{
	"epoch": 0.5597382381905397,
	"grad_norm": 0.8306463360786438,
	"learning_rate": 8.5726855171417e-05,
	"loss": 0.5075,
	"step": 17620
	},
	{
	"epoch": 0.5603735823882589,
	"grad_norm": 0.8880159258842468,
	"learning_rate": 8.55232157033163e-05,
	"loss": 0.5149,
	"step": 17640
	},
	{
	"epoch": 0.561008926585978,
	"grad_norm": 0.9390746355056763,
	"learning_rate": 8.531963753575334e-05,
	"loss": 0.5196,
	"step": 17660
	},
	{
	"epoch": 0.561644270783697,
	"grad_norm": 0.968285322189331,
	"learning_rate": 8.511612153076015e-05,
	"loss": 0.5229,
	"step": 17680
	},
	{
	"epoch": 0.5622796149814162,
	"grad_norm": 0.9114767909049988,
	"learning_rate": 8.491266855010548e-05,
	"loss": 0.5008,
	"step": 17700
	},
	{
	"epoch": 0.5629149591791353,
	"grad_norm": 0.9089644551277161,
	"learning_rate": 8.470927945529123e-05,
	"loss": 0.4848,
	"step": 17720
	},
	{
	"epoch": 0.5635503033768544,
	"grad_norm": 0.7264979481697083,
	"learning_rate": 8.450595510754877e-05,
	"loss": 0.5155,
	"step": 17740
	},
	{
	"epoch": 0.5641856475745736,
	"grad_norm": 0.9070448875427246,
	"learning_rate": 8.430269636783534e-05,
	"loss": 0.524,
	"step": 17760
	},
	{
	"epoch": 0.5648209917722926,
	"grad_norm": 0.9725968241691589,
	"learning_rate": 8.40995040968303e-05,
	"loss": 0.4925,
	"step": 17780
	},
	{
	"epoch": 0.5654563359700118,
	"grad_norm": 0.8976007103919983,
	"learning_rate": 8.389637915493162e-05,
	"loss": 0.4937,
	"step": 17800
	},
	{
	"epoch": 0.5660916801677308,
	"grad_norm": 0.9926420450210571,
	"learning_rate": 8.369332240225214e-05,
	"loss": 0.5181,
	"step": 17820
	},
	{
	"epoch": 0.56672702436545,
	"grad_norm": 0.852676272392273,
	"learning_rate": 8.349033469861598e-05,
	"loss": 0.5175,
	"step": 17840
	},
	{
	"epoch": 0.5673623685631691,
	"grad_norm": 0.8739320635795593,
	"learning_rate": 8.328741690355487e-05,
	"loss": 0.4805,
	"step": 17860
	},
	{
	"epoch": 0.5679977127608882,
	"grad_norm": 0.9660511016845703,
	"learning_rate": 8.308456987630449e-05,
	"loss": 0.5063,
	"step": 17880
	},
	{
	"epoch": 0.5686330569586073,
	"grad_norm": 0.9321526288986206,
	"learning_rate": 8.288179447580088e-05,
	"loss": 0.4994,
	"step": 17900
	},
	{
	"epoch": 0.5692684011563265,
	"grad_norm": 1.0359587669372559,
	"learning_rate": 8.267909156067685e-05,
	"loss": 0.5279,
	"step": 17920
	},
	{
	"epoch": 0.5699037453540455,
	"grad_norm": 0.9722701907157898,
	"learning_rate": 8.247646198925813e-05,
	"loss": 0.5061,
	"step": 17940
	},
	{
	"epoch": 0.5705390895517647,
	"grad_norm": 0.854860782623291,
	"learning_rate": 8.227390661956006e-05,
	"loss": 0.4827,
	"step": 17960
	},
	{
	"epoch": 0.5711744337494837,
	"grad_norm": 0.8997724652290344,
	"learning_rate": 8.207142630928362e-05,
	"loss": 0.4978,
	"step": 17980
	},
	{
	"epoch": 0.5718097779472029,
	"grad_norm": 0.9234896898269653,
	"learning_rate": 8.186902191581205e-05,
	"loss": 0.4982,
	"step": 18000
	},
	{
	"epoch": 0.5718097779472029,
	"eval_loss": 0.469827800989151,
	"eval_runtime": 44.8258,
	"eval_samples_per_second": 60.3,
	"eval_steps_per_second": 30.161,
	"step": 18000
	},
	{
	"epoch": 0.572445122144922,
	"grad_norm": 0.8457797169685364,
	"learning_rate": 8.166669429620712e-05,
	"loss": 0.5263,
	"step": 18020
	},
	{
	"epoch": 0.5730804663426411,
	"grad_norm": 0.8909218907356262,
	"learning_rate": 8.146444430720545e-05,
	"loss": 0.5045,
	"step": 18040
	},
	{
	"epoch": 0.5737158105403602,
	"grad_norm": 0.950072705745697,
	"learning_rate": 8.126227280521503e-05,
	"loss": 0.5247,
	"step": 18060
	},
	{
	"epoch": 0.5743511547380794,
	"grad_norm": 0.9507225751876831,
	"learning_rate": 8.106018064631148e-05,
	"loss": 0.4851,
	"step": 18080
	},
	{
	"epoch": 0.5749864989357985,
	"grad_norm": 1.0232789516448975,
	"learning_rate": 8.085816868623436e-05,
	"loss": 0.5457,
	"step": 18100
	},
	{
	"epoch": 0.5756218431335176,
	"grad_norm": 1.0967813730239868,
	"learning_rate": 8.065623778038377e-05,
	"loss": 0.52,
	"step": 18120
	},
	{
	"epoch": 0.5762571873312367,
	"grad_norm": 0.7866876125335693,
	"learning_rate": 8.045438878381649e-05,
	"loss": 0.5117,
	"step": 18140
	},
	{
	"epoch": 0.5768925315289558,
	"grad_norm": 0.9325518012046814,
	"learning_rate": 8.025262255124248e-05,
	"loss": 0.5415,
	"step": 18160
	},
	{
	"epoch": 0.577527875726675,
	"grad_norm": 0.8899424076080322,
	"learning_rate": 8.005093993702133e-05,
	"loss": 0.4947,
	"step": 18180
	},
	{
	"epoch": 0.578163219924394,
	"grad_norm": 1.0050842761993408,
	"learning_rate": 7.984934179515843e-05,
	"loss": 0.4863,
	"step": 18200
	},
	{
	"epoch": 0.5787985641221132,
	"grad_norm": 0.836564302444458,
	"learning_rate": 7.964782897930158e-05,
	"loss": 0.5055,
	"step": 18220
	},
	{
	"epoch": 0.5794339083198322,
	"grad_norm": 1.032029628753662,
	"learning_rate": 7.944640234273724e-05,
	"loss": 0.4919,
	"step": 18240
	},
	{
	"epoch": 0.5800692525175514,
	"grad_norm": 0.854015588760376,
	"learning_rate": 7.92450627383869e-05,
	"loss": 0.5108,
	"step": 18260
	},
	{
	"epoch": 0.5807045967152705,
	"grad_norm": 1.0629216432571411,
	"learning_rate": 7.904381101880364e-05,
	"loss": 0.5312,
	"step": 18280
	},
	{
	"epoch": 0.5813399409129896,
	"grad_norm": 0.8146398067474365,
	"learning_rate": 7.884264803616827e-05,
	"loss": 0.5203,
	"step": 18300
	},
	{
	"epoch": 0.5819752851107087,
	"grad_norm": 1.1307437419891357,
	"learning_rate": 7.864157464228593e-05,
	"loss": 0.5325,
	"step": 18320
	},
	{
	"epoch": 0.5826106293084279,
	"grad_norm": 0.9609930515289307,
	"learning_rate": 7.844059168858241e-05,
	"loss": 0.5034,
	"step": 18340
	},
	{
	"epoch": 0.5832459735061469,
	"grad_norm": 0.8615232110023499,
	"learning_rate": 7.823970002610048e-05,
	"loss": 0.522,
	"step": 18360
	},
	{
	"epoch": 0.5838813177038661,
	"grad_norm": 1.014160394668579,
	"learning_rate": 7.803890050549641e-05,
	"loss": 0.5104,
	"step": 18380
	},
	{
	"epoch": 0.5845166619015851,
	"grad_norm": 1.015424370765686,
	"learning_rate": 7.78381939770363e-05,
	"loss": 0.4887,
	"step": 18400
	},
	{
	"epoch": 0.5851520060993043,
	"grad_norm": 1.0072382688522339,
	"learning_rate": 7.763758129059243e-05,
	"loss": 0.5242,
	"step": 18420
	},
	{
	"epoch": 0.5857873502970234,
	"grad_norm": 1.122096300125122,
	"learning_rate": 7.743706329563971e-05,
	"loss": 0.5408,
	"step": 18440
	},
	{
	"epoch": 0.5864226944947425,
	"grad_norm": 0.8347269296646118,
	"learning_rate": 7.723664084125218e-05,
	"loss": 0.5112,
	"step": 18460
	},
	{
	"epoch": 0.5870580386924616,
	"grad_norm": 0.9214980006217957,
	"learning_rate": 7.703631477609926e-05,
	"loss": 0.5111,
	"step": 18480
	},
	{
	"epoch": 0.5876933828901808,
	"grad_norm": 0.8427157402038574,
	"learning_rate": 7.683608594844218e-05,
	"loss": 0.5199,
	"step": 18500
	},
	{
	"epoch": 0.5883287270878998,
	"grad_norm": 0.8485844731330872,
	"learning_rate": 7.663595520613054e-05,
	"loss": 0.5193,
	"step": 18520
	},
	{
	"epoch": 0.588964071285619,
	"grad_norm": 0.8761444687843323,
	"learning_rate": 7.643592339659848e-05,
	"loss": 0.5044,
	"step": 18540
	},
	{
	"epoch": 0.589599415483338,
	"grad_norm": 0.9373889565467834,
	"learning_rate": 7.623599136686133e-05,
	"loss": 0.493,
	"step": 18560
	},
	{
	"epoch": 0.5902347596810572,
	"grad_norm": 0.9052358269691467,
	"learning_rate": 7.603615996351184e-05,
	"loss": 0.516,
	"step": 18580
	},
	{
	"epoch": 0.5908701038787764,
	"grad_norm": 0.7757846117019653,
	"learning_rate": 7.583643003271668e-05,
	"loss": 0.5043,
	"step": 18600
	},
	{
	"epoch": 0.5915054480764954,
	"grad_norm": 0.7769386172294617,
	"learning_rate": 7.563680242021285e-05,
	"loss": 0.5005,
	"step": 18620
	},
	{
	"epoch": 0.5921407922742146,
	"grad_norm": 0.7892422080039978,
	"learning_rate": 7.543727797130413e-05,
	"loss": 0.4982,
	"step": 18640
	},
	{
	"epoch": 0.5927761364719337,
	"grad_norm": 1.0471646785736084,
	"learning_rate": 7.524782606964114e-05,
	"loss": 0.5139,
	"step": 18660
	},
	{
	"epoch": 0.5934114806696528,
	"grad_norm": 0.7995429039001465,
	"learning_rate": 7.504850521939017e-05,
	"loss": 0.4736,
	"step": 18680
	},
	{
	"epoch": 0.5940468248673719,
	"grad_norm": 0.9799679517745972,
	"learning_rate": 7.484929002382169e-05,
	"loss": 0.5033,
	"step": 18700
	},
	{
	"epoch": 0.594682169065091,
	"grad_norm": 0.8607106804847717,
	"learning_rate": 7.465018132649311e-05,
	"loss": 0.498,
	"step": 18720
	},
	{
	"epoch": 0.5953175132628101,
	"grad_norm": 0.9690695405006409,
	"learning_rate": 7.445117997051085e-05,
	"loss": 0.4898,
	"step": 18740
	},
	{
	"epoch": 0.5959528574605293,
	"grad_norm": 1.331871747970581,
	"learning_rate": 7.425228679852684e-05,
	"loss": 0.5044,
	"step": 18760
	},
	{
	"epoch": 0.5965882016582483,
	"grad_norm": 0.9347879886627197,
	"learning_rate": 7.405350265273492e-05,
	"loss": 0.5088,
	"step": 18780
	},
	{
	"epoch": 0.5972235458559675,
	"grad_norm": 0.8495462536811829,
	"learning_rate": 7.385482837486725e-05,
	"loss": 0.5078,
	"step": 18800
	},
	{
	"epoch": 0.5978588900536865,
	"grad_norm": 1.318202257156372,
	"learning_rate": 7.365626480619081e-05,
	"loss": 0.5014,
	"step": 18820
	},
	{
	"epoch": 0.5984942342514057,
	"grad_norm": 1.0349724292755127,
	"learning_rate": 7.345781278750368e-05,
	"loss": 0.531,
	"step": 18840
	},
	{
	"epoch": 0.5991295784491248,
	"grad_norm": 1.047760248184204,
	"learning_rate": 7.326938745831322e-05,
	"loss": 0.4925,
	"step": 18860
	},
	{
	"epoch": 0.5997649226468439,
	"grad_norm": 0.874220073223114,
	"learning_rate": 7.307115537865903e-05,
	"loss": 0.5056,
	"step": 18880
	},
	{
	"epoch": 0.600400266844563,
	"grad_norm": 0.738158106803894,
	"learning_rate": 7.287303732658328e-05,
	"loss": 0.4938,
	"step": 18900
	},
	{
	"epoch": 0.6010356110422822,
	"grad_norm": 0.8721213936805725,
	"learning_rate": 7.267503414099758e-05,
	"loss": 0.5074,
	"step": 18920
	},
	{
	"epoch": 0.6016709552400012,
	"grad_norm": 0.7241856455802917,
	"learning_rate": 7.247714666032724e-05,
	"loss": 0.5045,
	"step": 18940
	},
	{
	"epoch": 0.6023062994377204,
	"grad_norm": 1.0385938882827759,
	"learning_rate": 7.227937572250761e-05,
	"loss": 0.5313,
	"step": 18960
	},
	{
	"epoch": 0.6029416436354395,
	"grad_norm": 1.8555858135223389,
	"learning_rate": 7.208172216498046e-05,
	"loss": 0.4989,
	"step": 18980
	},
	{
	"epoch": 0.6035769878331586,
	"grad_norm": 0.9453182816505432,
	"learning_rate": 7.188418682469064e-05,
	"loss": 0.5146,
	"step": 19000
	},
	{
	"epoch": 0.6035769878331586,
	"eval_loss": 0.46334323287010193,
	"eval_runtime": 44.8428,
	"eval_samples_per_second": 60.277,
	"eval_steps_per_second": 30.15,
	"step": 19000
	},
	{
	"epoch": 0.6042123320308778,
	"grad_norm": 0.9362254738807678,
	"learning_rate": 7.168677053808237e-05,
	"loss": 0.5148,
	"step": 19020
	},
	{
	"epoch": 0.6048476762285968,
	"grad_norm": 1.19162917137146,
	"learning_rate": 7.148947414109572e-05,
	"loss": 0.4954,
	"step": 19040
	},
	{
	"epoch": 0.605483020426316,
	"grad_norm": 0.9854863286018372,
	"learning_rate": 7.129229846916318e-05,
	"loss": 0.5173,
	"step": 19060
	},
	{
	"epoch": 0.6061183646240351,
	"grad_norm": 0.8435449600219727,
	"learning_rate": 7.109524435720597e-05,
	"loss": 0.5154,
	"step": 19080
	},
	{
	"epoch": 0.6067537088217542,
	"grad_norm": 0.920364260673523,
	"learning_rate": 7.08983126396306e-05,
	"loss": 0.5092,
	"step": 19100
	},
	{
	"epoch": 0.6073890530194733,
	"grad_norm": 1.2439565658569336,
	"learning_rate": 7.070150415032527e-05,
	"loss": 0.511,
	"step": 19120
	},
	{
	"epoch": 0.6080243972171924,
	"grad_norm": 0.7429732084274292,
	"learning_rate": 7.050481972265648e-05,
	"loss": 0.4787,
	"step": 19140
	},
	{
	"epoch": 0.6086597414149115,
	"grad_norm": 0.6966003179550171,
	"learning_rate": 7.03082601894653e-05,
	"loss": 0.5237,
	"step": 19160
	},
	{
	"epoch": 0.6092950856126307,
	"grad_norm": 0.8211964964866638,
	"learning_rate": 7.011182638306402e-05,
	"loss": 0.5349,
	"step": 19180
	},
	{
	"epoch": 0.6099304298103497,
	"grad_norm": 0.9803711771965027,
	"learning_rate": 6.991551913523253e-05,
	"loss": 0.5369,
	"step": 19200
	},
	{
	"epoch": 0.6105657740080689,
	"grad_norm": 0.9161061644554138,
	"learning_rate": 6.971933927721479e-05,
	"loss": 0.4993,
	"step": 19220
	},
	{
	"epoch": 0.611201118205788,
	"grad_norm": 0.9608227014541626,
	"learning_rate": 6.952328763971537e-05,
	"loss": 0.4837,
	"step": 19240
	},
	{
	"epoch": 0.6118364624035071,
	"grad_norm": 0.9438381195068359,
	"learning_rate": 6.932736505289592e-05,
	"loss": 0.479,
	"step": 19260
	},
	{
	"epoch": 0.6124718066012262,
	"grad_norm": 1.571315884590149,
	"learning_rate": 6.91315723463716e-05,
	"loss": 0.5417,
	"step": 19280
	},
	{
	"epoch": 0.6131071507989453,
	"grad_norm": 0.8187804818153381,
	"learning_rate": 6.893591034920763e-05,
	"loss": 0.5189,
	"step": 19300
	},
	{
	"epoch": 0.6137424949966644,
	"grad_norm": 0.7617794871330261,
	"learning_rate": 6.87403798899157e-05,
	"loss": 0.468,
	"step": 19320
	},
	{
	"epoch": 0.6143778391943836,
	"grad_norm": 0.8723959922790527,
	"learning_rate": 6.85449817964506e-05,
	"loss": 0.5044,
	"step": 19340
	},
	{
	"epoch": 0.6150131833921026,
	"grad_norm": 0.7760429382324219,
	"learning_rate": 6.834971689620659e-05,
	"loss": 0.4922,
	"step": 19360
	},
	{
	"epoch": 0.6156485275898218,
	"grad_norm": 0.925581693649292,
	"learning_rate": 6.815458601601392e-05,
	"loss": 0.5079,
	"step": 19380
	},
	{
	"epoch": 0.6162838717875408,
	"grad_norm": 0.8069369792938232,
	"learning_rate": 6.795958998213535e-05,
	"loss": 0.4995,
	"step": 19400
	},
	{
	"epoch": 0.61691921598526,
	"grad_norm": 1.3501884937286377,
	"learning_rate": 6.77647296202627e-05,
	"loss": 0.4906,
	"step": 19420
	},
	{
	"epoch": 0.6175545601829792,
	"grad_norm": 0.9078099131584167,
	"learning_rate": 6.75700057555132e-05,
	"loss": 0.4983,
	"step": 19440
	},
	{
	"epoch": 0.6181899043806982,
	"grad_norm": 0.7792625427246094,
	"learning_rate": 6.737541921242619e-05,
	"loss": 0.4869,
	"step": 19460
	},
	{
	"epoch": 0.6188252485784174,
	"grad_norm": 0.8952593803405762,
	"learning_rate": 6.718097081495947e-05,
	"loss": 0.4975,
	"step": 19480
	},
	{
	"epoch": 0.6194605927761365,
	"grad_norm": 0.9192362427711487,
	"learning_rate": 6.698666138648593e-05,
	"loss": 0.5059,
	"step": 19500
	},
	{
	"epoch": 0.6200959369738556,
	"grad_norm": 0.8911659121513367,
	"learning_rate": 6.679249174978997e-05,
	"loss": 0.5014,
	"step": 19520
	},
	{
	"epoch": 0.6207312811715747,
	"grad_norm": 0.9853730201721191,
	"learning_rate": 6.659846272706406e-05,
	"loss": 0.4935,
	"step": 19540
	},
	{
	"epoch": 0.6213666253692938,
	"grad_norm": 1.3485686779022217,
	"learning_rate": 6.640457513990527e-05,
	"loss": 0.5061,
	"step": 19560
	},
	{
	"epoch": 0.6220019695670129,
	"grad_norm": 0.8757696747779846,
	"learning_rate": 6.621082980931179e-05,
	"loss": 0.4869,
	"step": 19580
	},
	{
	"epoch": 0.6226373137647321,
	"grad_norm": 1.0088223218917847,
	"learning_rate": 6.601722755567937e-05,
	"loss": 0.5138,
	"step": 19600
	},
	{
	"epoch": 0.6232726579624511,
	"grad_norm": 0.94034343957901,
	"learning_rate": 6.582376919879798e-05,
	"loss": 0.5159,
	"step": 19620
	},
	{
	"epoch": 0.6239080021601703,
	"grad_norm": 0.834994375705719,
	"learning_rate": 6.563045555784826e-05,
	"loss": 0.4862,
	"step": 19640
	},
	{
	"epoch": 0.6245433463578894,
	"grad_norm": 1.2617956399917603,
	"learning_rate": 6.543728745139802e-05,
	"loss": 0.5112,
	"step": 19660
	},
	{
	"epoch": 0.6251786905556085,
	"grad_norm": 0.8542491793632507,
	"learning_rate": 6.524426569739892e-05,
	"loss": 0.5234,
	"step": 19680
	},
	{
	"epoch": 0.6258140347533276,
	"grad_norm": 1.162606120109558,
	"learning_rate": 6.505139111318277e-05,
	"loss": 0.4772,
	"step": 19700
	},
	{
	"epoch": 0.6264493789510467,
	"grad_norm": 1.0025289058685303,
	"learning_rate": 6.48586645154583e-05,
	"loss": 0.5212,
	"step": 19720
	},
	{
	"epoch": 0.6270847231487658,
	"grad_norm": 1.0566537380218506,
	"learning_rate": 6.466608672030763e-05,
	"loss": 0.5556,
	"step": 19740
	},
	{
	"epoch": 0.627720067346485,
	"grad_norm": 1.0380536317825317,
	"learning_rate": 6.447365854318266e-05,
	"loss": 0.4827,
	"step": 19760
	},
	{
	"epoch": 0.628355411544204,
	"grad_norm": 1.0499038696289062,
	"learning_rate": 6.42813807989019e-05,
	"loss": 0.5316,
	"step": 19780
	},
	{
	"epoch": 0.6289907557419232,
	"grad_norm": 0.7457720637321472,
	"learning_rate": 6.408925430164669e-05,
	"loss": 0.5055,
	"step": 19800
	},
	{
	"epoch": 0.6296260999396422,
	"grad_norm": 1.2990676164627075,
	"learning_rate": 6.389727986495813e-05,
	"loss": 0.5068,
	"step": 19820
	},
	{
	"epoch": 0.6302614441373614,
	"grad_norm": 0.9500844478607178,
	"learning_rate": 6.370545830173332e-05,
	"loss": 0.4889,
	"step": 19840
	},
	{
	"epoch": 0.6308967883350806,
	"grad_norm": 0.7668824195861816,
	"learning_rate": 6.351379042422199e-05,
	"loss": 0.5314,
	"step": 19860
	},
	{
	"epoch": 0.6315321325327996,
	"grad_norm": 0.9457335472106934,
	"learning_rate": 6.332227704402321e-05,
	"loss": 0.4898,
	"step": 19880
	},
	{
	"epoch": 0.6321674767305188,
	"grad_norm": 0.8252271413803101,
	"learning_rate": 6.31309189720818e-05,
	"loss": 0.5045,
	"step": 19900
	},
	{
	"epoch": 0.6328028209282379,
	"grad_norm": 0.9943385720252991,
	"learning_rate": 6.29397170186849e-05,
	"loss": 0.5243,
	"step": 19920
	},
	{
	"epoch": 0.633438165125957,
	"grad_norm": 1.1582151651382446,
	"learning_rate": 6.27582205051849e-05,
	"loss": 0.5331,
	"step": 19940
	},
	{
	"epoch": 0.6340735093236761,
	"grad_norm": 0.9436770677566528,
	"learning_rate": 6.256732531103176e-05,
	"loss": 0.4903,
	"step": 19960
	},
	{
	"epoch": 0.6347088535213952,
	"grad_norm": 0.8253883123397827,
	"learning_rate": 6.237658862190583e-05,
	"loss": 0.4934,
	"step": 19980
	},
	{
	"epoch": 0.6353441977191143,
	"grad_norm": 0.8770557641983032,
	"learning_rate": 6.21860112454631e-05,
	"loss": 0.5202,
	"step": 20000
	},
	{
	"epoch": 0.6353441977191143,
	"eval_loss": 0.45828375220298767,
	"eval_runtime": 44.5614,
	"eval_samples_per_second": 60.658,
	"eval_steps_per_second": 30.34,
	"step": 20000
	},
	{
	"epoch": 0.6359795419168335,
	"grad_norm": 1.2218546867370605,
	"learning_rate": 6.19955939886849e-05,
	"loss": 0.5171,
	"step": 20020
	},
	{
	"epoch": 0.6366148861145525,
	"grad_norm": 0.8330618143081665,
	"learning_rate": 6.180533765787468e-05,
	"loss": 0.4863,
	"step": 20040
	},
	{
	"epoch": 0.6372502303122717,
	"grad_norm": 1.0419652462005615,
	"learning_rate": 6.162474393506114e-05,
	"loss": 0.5427,
	"step": 20060
	},
	{
	"epoch": 0.6378855745099908,
	"grad_norm": 0.9472757577896118,
	"learning_rate": 6.143480372643493e-05,
	"loss": 0.5245,
	"step": 20080
	},
	{
	"epoch": 0.6385209187077099,
	"grad_norm": 0.7603405117988586,
	"learning_rate": 6.12450268183886e-05,
	"loss": 0.4964,
	"step": 20100
	},
	{
	"epoch": 0.639156262905429,
	"grad_norm": 0.8776742219924927,
	"learning_rate": 6.105541401451404e-05,
	"loss": 0.4966,
	"step": 20120
	},
	{
	"epoch": 0.6397916071031481,
	"grad_norm": 0.8271143436431885,
	"learning_rate": 6.086596611770831e-05,
	"loss": 0.5119,
	"step": 20140
	},
	{
	"epoch": 0.6404269513008672,
	"grad_norm": 1.1509547233581543,
	"learning_rate": 6.067668393017007e-05,
	"loss": 0.5031,
	"step": 20160
	},
	{
	"epoch": 0.6410622954985864,
	"grad_norm": 0.8693366050720215,
	"learning_rate": 6.048756825339643e-05,
	"loss": 0.4986,
	"step": 20180
	},
	{
	"epoch": 0.6416976396963054,
	"grad_norm": 0.949834942817688,
	"learning_rate": 6.029861988817935e-05,
	"loss": 0.4921,
	"step": 20200
	},
	{
	"epoch": 0.6423329838940246,
	"grad_norm": 0.9004225730895996,
	"learning_rate": 6.010983963460233e-05,
	"loss": 0.5023,
	"step": 20220
	},
	{
	"epoch": 0.6429683280917438,
	"grad_norm": 0.7829142808914185,
	"learning_rate": 5.9921228292037026e-05,
	"loss": 0.507,
	"step": 20240
	},
	{
	"epoch": 0.6436036722894628,
	"grad_norm": 1.1816707849502563,
	"learning_rate": 5.973278665913985e-05,
	"loss": 0.4926,
	"step": 20260
	},
	{
	"epoch": 0.644239016487182,
	"grad_norm": 0.881648063659668,
	"learning_rate": 5.9544515533848614e-05,
	"loss": 0.4885,
	"step": 20280
	},
	{
	"epoch": 0.644874360684901,
	"grad_norm": 0.9568135738372803,
	"learning_rate": 5.9356415713379145e-05,
	"loss": 0.515,
	"step": 20300
	},
	{
	"epoch": 0.6455097048826202,
	"grad_norm": 0.9377472400665283,
	"learning_rate": 5.9168487994221834e-05,
	"loss": 0.4886,
	"step": 20320
	},
	{
	"epoch": 0.6461450490803393,
	"grad_norm": 0.9032811522483826,
	"learning_rate": 5.898073317213837e-05,
	"loss": 0.5064,
	"step": 20340
	},
	{
	"epoch": 0.6467803932780584,
	"grad_norm": 0.9788734316825867,
	"learning_rate": 5.879315204215836e-05,
	"loss": 0.4698,
	"step": 20360
	},
	{
	"epoch": 0.6474157374757775,
	"grad_norm": 1.0353432893753052,
	"learning_rate": 5.860574539857584e-05,
	"loss": 0.5227,
	"step": 20380
	},
	{
	"epoch": 0.6480510816734966,
	"grad_norm": 0.8998845815658569,
	"learning_rate": 5.84185140349461e-05,
	"loss": 0.5132,
	"step": 20400
	},
	{
	"epoch": 0.6486864258712157,
	"grad_norm": 0.8317026495933533,
	"learning_rate": 5.82314587440821e-05,
	"loss": 0.468,
	"step": 20420
	},
	{
	"epoch": 0.6493217700689349,
	"grad_norm": 0.7740748524665833,
	"learning_rate": 5.80445803180514e-05,
	"loss": 0.5119,
	"step": 20440
	},
	{
	"epoch": 0.6499571142666539,
	"grad_norm": 1.0922515392303467,
	"learning_rate": 5.78578795481725e-05,
	"loss": 0.5284,
	"step": 20460
	},
	{
	"epoch": 0.6505924584643731,
	"grad_norm": 0.8265649676322937,
	"learning_rate": 5.76713572250117e-05,
	"loss": 0.5095,
	"step": 20480
	},
	{
	"epoch": 0.6512278026620922,
	"grad_norm": 1.0644861459732056,
	"learning_rate": 5.748501413837963e-05,
	"loss": 0.5028,
	"step": 20500
	},
	{
	"epoch": 0.6518631468598113,
	"grad_norm": 0.9139828681945801,
	"learning_rate": 5.729885107732808e-05,
	"loss": 0.4814,
	"step": 20520
	},
	{
	"epoch": 0.6524984910575304,
	"grad_norm": 0.7917624115943909,
	"learning_rate": 5.7112868830146416e-05,
	"loss": 0.4772,
	"step": 20540
	},
	{
	"epoch": 0.6531338352552495,
	"grad_norm": 0.7677121162414551,
	"learning_rate": 5.692706818435836e-05,
	"loss": 0.519,
	"step": 20560
	},
	{
	"epoch": 0.6537691794529686,
	"grad_norm": 0.8412395715713501,
	"learning_rate": 5.674144992671882e-05,
	"loss": 0.501,
	"step": 20580
	},
	{
	"epoch": 0.6544045236506878,
	"grad_norm": 1.014061689376831,
	"learning_rate": 5.655601484321022e-05,
	"loss": 0.5122,
	"step": 20600
	},
	{
	"epoch": 0.6550398678484068,
	"grad_norm": 1.0746990442276,
	"learning_rate": 5.6370763719039375e-05,
	"loss": 0.4969,
	"step": 20620
	},
	{
	"epoch": 0.655675212046126,
	"grad_norm": 0.9021841883659363,
	"learning_rate": 5.6185697338634304e-05,
	"loss": 0.4771,
	"step": 20640
	},
	{
	"epoch": 0.6563105562438452,
	"grad_norm": 0.8193987607955933,
	"learning_rate": 5.600081648564056e-05,
	"loss": 0.5143,
	"step": 20660
	},
	{
	"epoch": 0.6569459004415642,
	"grad_norm": 1.152421474456787,
	"learning_rate": 5.581612194291814e-05,
	"loss": 0.4873,
	"step": 20680
	},
	{
	"epoch": 0.6575812446392834,
	"grad_norm": 0.8709347248077393,
	"learning_rate": 5.5631614492538217e-05,
	"loss": 0.5199,
	"step": 20700
	},
	{
	"epoch": 0.6582165888370024,
	"grad_norm": 0.827723503112793,
	"learning_rate": 5.544729491577967e-05,
	"loss": 0.4917,
	"step": 20720
	},
	{
	"epoch": 0.6588519330347216,
	"grad_norm": 1.5408345460891724,
	"learning_rate": 5.526316399312579e-05,
	"loss": 0.5562,
	"step": 20740
	},
	{
	"epoch": 0.6594872772324407,
	"grad_norm": 0.731490433216095,
	"learning_rate": 5.507922250426118e-05,
	"loss": 0.4927,
	"step": 20760
	},
	{
	"epoch": 0.6601226214301598,
	"grad_norm": 0.950702428817749,
	"learning_rate": 5.4895471228068185e-05,
	"loss": 0.5115,
	"step": 20780
	},
	{
	"epoch": 0.6607579656278789,
	"grad_norm": 0.8342424631118774,
	"learning_rate": 5.471191094262369e-05,
	"loss": 0.4856,
	"step": 20800
	},
	{
	"epoch": 0.661393309825598,
	"grad_norm": 0.9297844767570496,
	"learning_rate": 5.4528542425196004e-05,
	"loss": 0.4896,
	"step": 20820
	},
	{
	"epoch": 0.6620286540233171,
	"grad_norm": 0.7558259963989258,
	"learning_rate": 5.434536645224126e-05,
	"loss": 0.4895,
	"step": 20840
	},
	{
	"epoch": 0.6626639982210363,
	"grad_norm": 1.2116395235061646,
	"learning_rate": 5.416238379940035e-05,
	"loss": 0.507,
	"step": 20860
	},
	{
	"epoch": 0.6632993424187553,
	"grad_norm": 0.913467526435852,
	"learning_rate": 5.39795952414955e-05,
	"loss": 0.5137,
	"step": 20880
	},
	{
	"epoch": 0.6639346866164745,
	"grad_norm": 0.868238627910614,
	"learning_rate": 5.3797001552527184e-05,
	"loss": 0.5185,
	"step": 20900
	},
	{
	"epoch": 0.6645700308141936,
	"grad_norm": 1.0668286085128784,
	"learning_rate": 5.361460350567062e-05,
	"loss": 0.5158,
	"step": 20920
	},
	{
	"epoch": 0.6652053750119127,
	"grad_norm": 0.795097291469574,
	"learning_rate": 5.3432401873272655e-05,
	"loss": 0.4985,
	"step": 20940
	},
	{
	"epoch": 0.6658407192096318,
	"grad_norm": 0.6949301958084106,
	"learning_rate": 5.325039742684839e-05,
	"loss": 0.4722,
	"step": 20960
	},
	{
	"epoch": 0.6664760634073509,
	"grad_norm": 0.7859952449798584,
	"learning_rate": 5.3068590937077945e-05,
	"loss": 0.4933,
	"step": 20980
	},
	{
	"epoch": 0.66711140760507,
	"grad_norm": 0.8529000282287598,
	"learning_rate": 5.288698317380334e-05,
	"loss": 0.5098,
	"step": 21000
	},
	{
	"epoch": 0.66711140760507,
	"eval_loss": 0.45643100142478943,
	"eval_runtime": 44.6378,
	"eval_samples_per_second": 60.554,
	"eval_steps_per_second": 30.288,
	"step": 21000
	},
	{
	"epoch": 0.6677467518027892,
	"grad_norm": 0.9853639602661133,
	"learning_rate": 5.270557490602499e-05,
	"loss": 0.4715,
	"step": 21020
	},
	{
	"epoch": 0.6683820960005082,
	"grad_norm": 0.8387131690979004,
	"learning_rate": 5.2524366901898566e-05,
	"loss": 0.5128,
	"step": 21040
	},
	{
	"epoch": 0.6690174401982274,
	"grad_norm": 0.8610044717788696,
	"learning_rate": 5.234335992873176e-05,
	"loss": 0.5424,
	"step": 21060
	},
	{
	"epoch": 0.6696527843959466,
	"grad_norm": 0.8878015279769897,
	"learning_rate": 5.216255475298109e-05,
	"loss": 0.4734,
	"step": 21080
	},
	{
	"epoch": 0.6702881285936656,
	"grad_norm": 1.0038951635360718,
	"learning_rate": 5.198195214024848e-05,
	"loss": 0.4879,
	"step": 21100
	},
	{
	"epoch": 0.6709234727913848,
	"grad_norm": 0.9256641864776611,
	"learning_rate": 5.1801552855278126e-05,
	"loss": 0.527,
	"step": 21120
	},
	{
	"epoch": 0.6715588169891038,
	"grad_norm": 0.7668296098709106,
	"learning_rate": 5.162135766195337e-05,
	"loss": 0.5161,
	"step": 21140
	},
	{
	"epoch": 0.672194161186823,
	"grad_norm": 0.7756738066673279,
	"learning_rate": 5.144136732329323e-05,
	"loss": 0.5265,
	"step": 21160
	},
	{
	"epoch": 0.6728295053845421,
	"grad_norm": 0.9279829859733582,
	"learning_rate": 5.1261582601449285e-05,
	"loss": 0.4814,
	"step": 21180
	},
	{
	"epoch": 0.6734648495822612,
	"grad_norm": 1.1274375915527344,
	"learning_rate": 5.108200425770255e-05,
	"loss": 0.5061,
	"step": 21200
	},
	{
	"epoch": 0.6741001937799803,
	"grad_norm": 1.082535982131958,
	"learning_rate": 5.090263305246006e-05,
	"loss": 0.5081,
	"step": 21220
	},
	{
	"epoch": 0.6747355379776995,
	"grad_norm": 1.0355536937713623,
	"learning_rate": 5.0723469745251725e-05,
	"loss": 0.5044,
	"step": 21240
	},
	{
	"epoch": 0.6753708821754185,
	"grad_norm": 0.9309506416320801,
	"learning_rate": 5.054451509472728e-05,
	"loss": 0.5241,
	"step": 21260
	},
	{
	"epoch": 0.6760062263731377,
	"grad_norm": 0.818247377872467,
	"learning_rate": 5.0365769858652735e-05,
	"loss": 0.5034,
	"step": 21280
	},
	{
	"epoch": 0.6766415705708567,
	"grad_norm": 0.8921930193901062,
	"learning_rate": 5.0187234793907447e-05,
	"loss": 0.5089,
	"step": 21300
	},
	{
	"epoch": 0.6772769147685759,
	"grad_norm": 0.9915839433670044,
	"learning_rate": 5.000891065648087e-05,
	"loss": 0.5049,
	"step": 21320
	},
	{
	"epoch": 0.677912258966295,
	"grad_norm": 0.8783996105194092,
	"learning_rate": 4.983079820146922e-05,
	"loss": 0.5314,
	"step": 21340
	},
	{
	"epoch": 0.6785476031640141,
	"grad_norm": 0.8735405802726746,
	"learning_rate": 4.96528981830724e-05,
	"loss": 0.5036,
	"step": 21360
	},
	{
	"epoch": 0.6791829473617332,
	"grad_norm": 0.9674988389015198,
	"learning_rate": 4.947521135459072e-05,
	"loss": 0.5269,
	"step": 21380
	},
	{
	"epoch": 0.6798182915594523,
	"grad_norm": 0.9271227717399597,
	"learning_rate": 4.9297738468421896e-05,
	"loss": 0.5061,
	"step": 21400
	},
	{
	"epoch": 0.6804536357571714,
	"grad_norm": 0.7828012704849243,
	"learning_rate": 4.912048027605759e-05,
	"loss": 0.4978,
	"step": 21420
	},
	{
	"epoch": 0.6810889799548906,
	"grad_norm": 1.3417547941207886,
	"learning_rate": 4.8943437528080385e-05,
	"loss": 0.5326,
	"step": 21440
	},
	{
	"epoch": 0.6817243241526096,
	"grad_norm": 0.8963372707366943,
	"learning_rate": 4.876661097416066e-05,
	"loss": 0.4989,
	"step": 21460
	},
	{
	"epoch": 0.6823596683503288,
	"grad_norm": 0.893553614616394,
	"learning_rate": 4.859000136305329e-05,
	"loss": 0.4859,
	"step": 21480
	},
	{
	"epoch": 0.682995012548048,
	"grad_norm": 1.2325243949890137,
	"learning_rate": 4.8413609442594445e-05,
	"loss": 0.5037,
	"step": 21500
	},
	{
	"epoch": 0.683630356745767,
	"grad_norm": 0.8049502372741699,
	"learning_rate": 4.8237435959698706e-05,
	"loss": 0.509,
	"step": 21520
	},
	{
	"epoch": 0.6842657009434862,
	"grad_norm": 1.2289927005767822,
	"learning_rate": 4.8061481660355534e-05,
	"loss": 0.5128,
	"step": 21540
	},
	{
	"epoch": 0.6849010451412052,
	"grad_norm": 0.8123481869697571,
	"learning_rate": 4.7885747289626284e-05,
	"loss": 0.5031,
	"step": 21560
	},
	{
	"epoch": 0.6855363893389244,
	"grad_norm": 0.8852875232696533,
	"learning_rate": 4.771023359164116e-05,
	"loss": 0.4875,
	"step": 21580
	},
	{
	"epoch": 0.6861717335366435,
	"grad_norm": 0.8462742567062378,
	"learning_rate": 4.753494130959586e-05,
	"loss": 0.4787,
	"step": 21600
	},
	{
	"epoch": 0.6868070777343626,
	"grad_norm": 0.99876868724823,
	"learning_rate": 4.7359871185748485e-05,
	"loss": 0.5116,
	"step": 21620
	},
	{
	"epoch": 0.6874424219320817,
	"grad_norm": 0.9393181204795837,
	"learning_rate": 4.718502396141656e-05,
	"loss": 0.4878,
	"step": 21640
	},
	{
	"epoch": 0.6880777661298009,
	"grad_norm": 0.8426542282104492,
	"learning_rate": 4.701040037697364e-05,
	"loss": 0.4897,
	"step": 21660
	},
	{
	"epoch": 0.6887131103275199,
	"grad_norm": 0.938210666179657,
	"learning_rate": 4.683600117184631e-05,
	"loss": 0.492,
	"step": 21680
	},
	{
	"epoch": 0.6893484545252391,
	"grad_norm": 0.8325148820877075,
	"learning_rate": 4.666182708451114e-05,
	"loss": 0.4842,
	"step": 21700
	},
	{
	"epoch": 0.6899837987229581,
	"grad_norm": 0.8813055753707886,
	"learning_rate": 4.648787885249136e-05,
	"loss": 0.491,
	"step": 21720
	},
	{
	"epoch": 0.6906191429206773,
	"grad_norm": 1.0838825702667236,
	"learning_rate": 4.631415721235389e-05,
	"loss": 0.4732,
	"step": 21740
	},
	{
	"epoch": 0.6912544871183964,
	"grad_norm": 0.7203667163848877,
	"learning_rate": 4.614066289970609e-05,
	"loss": 0.4692,
	"step": 21760
	},
	{
	"epoch": 0.6918898313161155,
	"grad_norm": 1.181038737297058,
	"learning_rate": 4.596739664919287e-05,
	"loss": 0.5177,
	"step": 21780
	},
	{
	"epoch": 0.6925251755138346,
	"grad_norm": 0.9107904434204102,
	"learning_rate": 4.579435919449332e-05,
	"loss": 0.5186,
	"step": 21800
	},
	{
	"epoch": 0.6931605197115537,
	"grad_norm": 0.8281117081642151,
	"learning_rate": 4.5621551268317686e-05,
	"loss": 0.4848,
	"step": 21820
	},
	{
	"epoch": 0.6937958639092728,
	"grad_norm": 0.9180241227149963,
	"learning_rate": 4.545759700573378e-05,
	"loss": 0.4979,
	"step": 21840
	},
	{
	"epoch": 0.694431208106992,
	"grad_norm": 0.912675678730011,
	"learning_rate": 4.5285238763954426e-05,
	"loss": 0.5124,
	"step": 21860
	},
	{
	"epoch": 0.695066552304711,
	"grad_norm": 0.8163600564002991,
	"learning_rate": 4.5113112206520056e-05,
	"loss": 0.5205,
	"step": 21880
	},
	{
	"epoch": 0.6957018965024302,
	"grad_norm": 0.7308365702629089,
	"learning_rate": 4.494121806228392e-05,
	"loss": 0.5208,
	"step": 21900
	},
	{
	"epoch": 0.6963372407001494,
	"grad_norm": 0.7426006197929382,
	"learning_rate": 4.476955705911504e-05,
	"loss": 0.48,
	"step": 21920
	},
	{
	"epoch": 0.6969725848978684,
	"grad_norm": 0.9886866807937622,
	"learning_rate": 4.459812992389526e-05,
	"loss": 0.5483,
	"step": 21940
	},
	{
	"epoch": 0.6976079290955876,
	"grad_norm": 0.9653937816619873,
	"learning_rate": 4.44269373825162e-05,
	"loss": 0.4613,
	"step": 21960
	},
	{
	"epoch": 0.6982432732933066,
	"grad_norm": 0.8184491991996765,
	"learning_rate": 4.425598015987602e-05,
	"loss": 0.5212,
	"step": 21980
	},
	{
	"epoch": 0.6988786174910258,
	"grad_norm": 0.9365077614784241,
	"learning_rate": 4.408525897987645e-05,
	"loss": 0.4868,
	"step": 22000
	},
	{
	"epoch": 0.6988786174910258,
	"eval_loss": 0.45187339186668396,
	"eval_runtime": 44.7631,
	"eval_samples_per_second": 60.385,
	"eval_steps_per_second": 30.203,
	"step": 22000
	},
	{
	"epoch": 0.6995139616887449,
	"grad_norm": 0.9188706874847412,
	"learning_rate": 4.391477456541983e-05,
	"loss": 0.4991,
	"step": 22020
	},
	{
	"epoch": 0.700149305886464,
	"grad_norm": 0.8599129319190979,
	"learning_rate": 4.374452763840584e-05,
	"loss": 0.5184,
	"step": 22040
	},
	{
	"epoch": 0.7007846500841831,
	"grad_norm": 0.8643587827682495,
	"learning_rate": 4.357451891972854e-05,
	"loss": 0.4966,
	"step": 22060
	},
	{
	"epoch": 0.7014199942819023,
	"grad_norm": 0.9123074412345886,
	"learning_rate": 4.340474912927332e-05,
	"loss": 0.5068,
	"step": 22080
	},
	{
	"epoch": 0.7020553384796213,
	"grad_norm": 0.8422294855117798,
	"learning_rate": 4.323521898591394e-05,
	"loss": 0.4753,
	"step": 22100
	},
	{
	"epoch": 0.7026906826773405,
	"grad_norm": 0.8830937743186951,
	"learning_rate": 4.306592920750931e-05,
	"loss": 0.4837,
	"step": 22120
	},
	{
	"epoch": 0.7033260268750595,
	"grad_norm": 0.8540763854980469,
	"learning_rate": 4.289688051090054e-05,
	"loss": 0.4733,
	"step": 22140
	},
	{
	"epoch": 0.7039613710727787,
	"grad_norm": 0.8622573614120483,
	"learning_rate": 4.272807361190797e-05,
	"loss": 0.5003,
	"step": 22160
	},
	{
	"epoch": 0.7045967152704978,
	"grad_norm": 0.9827342629432678,
	"learning_rate": 4.2559509225328e-05,
	"loss": 0.5333,
	"step": 22180
	},
	{
	"epoch": 0.7052320594682169,
	"grad_norm": 0.8439646363258362,
	"learning_rate": 4.239118806493013e-05,
	"loss": 0.4778,
	"step": 22200
	},
	{
	"epoch": 0.705867403665936,
	"grad_norm": 0.9348493814468384,
	"learning_rate": 4.222311084345405e-05,
	"loss": 0.4806,
	"step": 22220
	},
	{
	"epoch": 0.7065027478636552,
	"grad_norm": 1.0671905279159546,
	"learning_rate": 4.2055278272606404e-05,
	"loss": 0.4978,
	"step": 22240
	},
	{
	"epoch": 0.7071380920613742,
	"grad_norm": 1.2363934516906738,
	"learning_rate": 4.188769106305787e-05,
	"loss": 0.5089,
	"step": 22260
	},
	{
	"epoch": 0.7077734362590934,
	"grad_norm": 0.9339464902877808,
	"learning_rate": 4.1720349924440295e-05,
	"loss": 0.4796,
	"step": 22280
	},
	{
	"epoch": 0.7084087804568124,
	"grad_norm": 0.873092770576477,
	"learning_rate": 4.155325556534345e-05,
	"loss": 0.4931,
	"step": 22300
	},
	{
	"epoch": 0.7090441246545316,
	"grad_norm": 0.7866622805595398,
	"learning_rate": 4.138640869331215e-05,
	"loss": 0.501,
	"step": 22320
	},
	{
	"epoch": 0.7096794688522507,
	"grad_norm": 1.0133357048034668,
	"learning_rate": 4.121981001484334e-05,
	"loss": 0.481,
	"step": 22340
	},
	{
	"epoch": 0.7103148130499698,
	"grad_norm": 0.9386391043663025,
	"learning_rate": 4.105346023538292e-05,
	"loss": 0.5303,
	"step": 22360
	},
	{
	"epoch": 0.710950157247689,
	"grad_norm": 0.7917353510856628,
	"learning_rate": 4.088736005932289e-05,
	"loss": 0.4993,
	"step": 22380
	},
	{
	"epoch": 0.711585501445408,
	"grad_norm": 0.9757121801376343,
	"learning_rate": 4.0721510189998266e-05,
	"loss": 0.5102,
	"step": 22400
	},
	{
	"epoch": 0.7122208456431272,
	"grad_norm": 1.2196959257125854,
	"learning_rate": 4.055591132968432e-05,
	"loss": 0.5045,
	"step": 22420
	},
	{
	"epoch": 0.7128561898408463,
	"grad_norm": 1.0833863019943237,
	"learning_rate": 4.039056417959328e-05,
	"loss": 0.5136,
	"step": 22440
	},
	{
	"epoch": 0.7134915340385654,
	"grad_norm": 0.7548487186431885,
	"learning_rate": 4.02254694398716e-05,
	"loss": 0.4864,
	"step": 22460
	},
	{
	"epoch": 0.7141268782362845,
	"grad_norm": 1.0435632467269897,
	"learning_rate": 4.006062780959697e-05,
	"loss": 0.4866,
	"step": 22480
	},
	{
	"epoch": 0.7147622224340037,
	"grad_norm": 0.7469571828842163,
	"learning_rate": 3.9896039986775256e-05,
	"loss": 0.4825,
	"step": 22500
	},
	{
	"epoch": 0.7153975666317227,
	"grad_norm": 0.8732174634933472,
	"learning_rate": 3.9731706668337585e-05,
	"loss": 0.4905,
	"step": 22520
	},
	{
	"epoch": 0.7160329108294419,
	"grad_norm": 0.8761599063873291,
	"learning_rate": 3.956762855013749e-05,
	"loss": 0.4831,
	"step": 22540
	},
	{
	"epoch": 0.7166682550271609,
	"grad_norm": 0.9746137261390686,
	"learning_rate": 3.940380632694781e-05,
	"loss": 0.5111,
	"step": 22560
	},
	{
	"epoch": 0.7173035992248801,
	"grad_norm": 0.9219092726707458,
	"learning_rate": 3.924024069245782e-05,
	"loss": 0.4908,
	"step": 22580
	},
	{
	"epoch": 0.7179389434225992,
	"grad_norm": 1.0305086374282837,
	"learning_rate": 3.907693233927038e-05,
	"loss": 0.5215,
	"step": 22600
	},
	{
	"epoch": 0.7185742876203183,
	"grad_norm": 0.7786363363265991,
	"learning_rate": 3.891388195889882e-05,
	"loss": 0.4792,
	"step": 22620
	},
	{
	"epoch": 0.7192096318180374,
	"grad_norm": 0.8930706977844238,
	"learning_rate": 3.875109024176413e-05,
	"loss": 0.4908,
	"step": 22640
	},
	{
	"epoch": 0.7198449760157566,
	"grad_norm": 1.0214048624038696,
	"learning_rate": 3.858855787719209e-05,
	"loss": 0.5102,
	"step": 22660
	},
	{
	"epoch": 0.7204803202134756,
	"grad_norm": 0.9279896020889282,
	"learning_rate": 3.842628555341018e-05,
	"loss": 0.4772,
	"step": 22680
	},
	{
	"epoch": 0.7211156644111948,
	"grad_norm": 1.6357091665267944,
	"learning_rate": 3.826427395754482e-05,
	"loss": 0.5041,
	"step": 22700
	},
	{
	"epoch": 0.7217510086089138,
	"grad_norm": 0.8421345949172974,
	"learning_rate": 3.8102523775618325e-05,
	"loss": 0.5082,
	"step": 22720
	},
	{
	"epoch": 0.722386352806633,
	"grad_norm": 0.9193027019500732,
	"learning_rate": 3.794103569254624e-05,
	"loss": 0.485,
	"step": 22740
	},
	{
	"epoch": 0.7230216970043521,
	"grad_norm": 0.8045080304145813,
	"learning_rate": 3.777981039213411e-05,
	"loss": 0.5182,
	"step": 22760
	},
	{
	"epoch": 0.7236570412020712,
	"grad_norm": 0.8535903692245483,
	"learning_rate": 3.7618848557074804e-05,
	"loss": 0.4796,
	"step": 22780
	},
	{
	"epoch": 0.7242923853997904,
	"grad_norm": 0.8225564360618591,
	"learning_rate": 3.745815086894565e-05,
	"loss": 0.4812,
	"step": 22800
	},
	{
	"epoch": 0.7249277295975094,
	"grad_norm": 0.8030312657356262,
	"learning_rate": 3.729771800820539e-05,
	"loss": 0.481,
	"step": 22820
	},
	{
	"epoch": 0.7255630737952286,
	"grad_norm": 0.992080569267273,
	"learning_rate": 3.713755065419133e-05,
	"loss": 0.4768,
	"step": 22840
	},
	{
	"epoch": 0.7261984179929477,
	"grad_norm": 0.9184660911560059,
	"learning_rate": 3.698563821122103e-05,
	"loss": 0.5044,
	"step": 22860
	},
	{
	"epoch": 0.7268337621906668,
	"grad_norm": 0.8250758647918701,
	"learning_rate": 3.6825990545007096e-05,
	"loss": 0.5095,
	"step": 22880
	},
	{
	"epoch": 0.7274691063883859,
	"grad_norm": 1.0519983768463135,
	"learning_rate": 3.666661038300353e-05,
	"loss": 0.4944,
	"step": 22900
	},
	{
	"epoch": 0.7281044505861051,
	"grad_norm": 0.789730966091156,
	"learning_rate": 3.650749840009022e-05,
	"loss": 0.4574,
	"step": 22920
	},
	{
	"epoch": 0.7287397947838241,
	"grad_norm": 0.8896093368530273,
	"learning_rate": 3.6356591030872534e-05,
	"loss": 0.5,
	"step": 22940
	},
	{
	"epoch": 0.7293751389815433,
	"grad_norm": 0.7810101509094238,
	"learning_rate": 3.6198003934005195e-05,
	"loss": 0.5053,
	"step": 22960
	},
	{
	"epoch": 0.7300104831792623,
	"grad_norm": 0.883144199848175,
	"learning_rate": 3.603968700049657e-05,
	"loss": 0.514,
	"step": 22980
	},
	{
	"epoch": 0.7306458273769815,
	"grad_norm": 0.7069016695022583,
	"learning_rate": 3.588164090072441e-05,
	"loss": 0.522,
	"step": 23000
	},
	{
	"epoch": 0.7306458273769815,
	"eval_loss": 0.4499790668487549,
	"eval_runtime": 45.0673,
	"eval_samples_per_second": 59.977,
	"eval_steps_per_second": 30.0,
	"step": 23000
	},
	{
	"epoch": 0.7312811715747006,
	"grad_norm": 1.0385907888412476,
	"learning_rate": 3.5723866303919554e-05,
	"loss": 0.489,
	"step": 23020
	},
	{
	"epoch": 0.7319165157724197,
	"grad_norm": 0.8796695470809937,
	"learning_rate": 3.556636387816317e-05,
	"loss": 0.4963,
	"step": 23040
	},
	{
	"epoch": 0.7325518599701388,
	"grad_norm": 0.9427993893623352,
	"learning_rate": 3.540913429038407e-05,
	"loss": 0.4601,
	"step": 23060
	},
	{
	"epoch": 0.733187204167858,
	"grad_norm": 0.8525741100311279,
	"learning_rate": 3.525217820635564e-05,
	"loss": 0.5034,
	"step": 23080
	},
	{
	"epoch": 0.733822548365577,
	"grad_norm": 0.8755898475646973,
	"learning_rate": 3.5095496290693155e-05,
	"loss": 0.509,
	"step": 23100
	},
	{
	"epoch": 0.7344578925632962,
	"grad_norm": 1.0328361988067627,
	"learning_rate": 3.4939089206851025e-05,
	"loss": 0.4994,
	"step": 23120
	},
	{
	"epoch": 0.7350932367610152,
	"grad_norm": 1.130226969718933,
	"learning_rate": 3.478295761711986e-05,
	"loss": 0.4848,
	"step": 23140
	},
	{
	"epoch": 0.7357285809587344,
	"grad_norm": 0.733567476272583,
	"learning_rate": 3.4627102182623696e-05,
	"loss": 0.5123,
	"step": 23160
	},
	{
	"epoch": 0.7363639251564535,
	"grad_norm": 1.1062750816345215,
	"learning_rate": 3.447152356331721e-05,
	"loss": 0.4767,
	"step": 23180
	},
	{
	"epoch": 0.7369992693541726,
	"grad_norm": 0.9558404684066772,
	"learning_rate": 3.431622241798305e-05,
	"loss": 0.4832,
	"step": 23200
	},
	{
	"epoch": 0.7376346135518917,
	"grad_norm": 0.8974496722221375,
	"learning_rate": 3.416119940422877e-05,
	"loss": 0.4818,
	"step": 23220
	},
	{
	"epoch": 0.7382699577496109,
	"grad_norm": 1.2721449136734009,
	"learning_rate": 3.400645517848427e-05,
	"loss": 0.5102,
	"step": 23240
	},
	{
	"epoch": 0.73890530194733,
	"grad_norm": 1.0408607721328735,
	"learning_rate": 3.385199039599902e-05,
	"loss": 0.4784,
	"step": 23260
	},
	{
	"epoch": 0.7395406461450491,
	"grad_norm": 0.9826887845993042,
	"learning_rate": 3.369780571083909e-05,
	"loss": 0.5039,
	"step": 23280
	},
	{
	"epoch": 0.7401759903427682,
	"grad_norm": 0.8110315799713135,
	"learning_rate": 3.354390177588454e-05,
	"loss": 0.5034,
	"step": 23300
	},
	{
	"epoch": 0.7408113345404873,
	"grad_norm": 0.8513306975364685,
	"learning_rate": 3.339027924282673e-05,
	"loss": 0.509,
	"step": 23320
	},
	{
	"epoch": 0.7414466787382065,
	"grad_norm": 0.8255580067634583,
	"learning_rate": 3.323693876216529e-05,
	"loss": 0.4678,
	"step": 23340
	},
	{
	"epoch": 0.7420820229359255,
	"grad_norm": 1.1336640119552612,
	"learning_rate": 3.30838809832056e-05,
	"loss": 0.4848,
	"step": 23360
	},
	{
	"epoch": 0.7427173671336447,
	"grad_norm": 0.8720375895500183,
	"learning_rate": 3.2931106554056005e-05,
	"loss": 0.4929,
	"step": 23380
	},
	{
	"epoch": 0.7433527113313637,
	"grad_norm": 1.0169090032577515,
	"learning_rate": 3.277861612162498e-05,
	"loss": 0.5066,
	"step": 23400
	},
	{
	"epoch": 0.7439880555290829,
	"grad_norm": 1.2800534963607788,
	"learning_rate": 3.262641033161843e-05,
	"loss": 0.4964,
	"step": 23420
	},
	{
	"epoch": 0.744623399726802,
	"grad_norm": 0.819925844669342,
	"learning_rate": 3.2474489828537046e-05,
	"loss": 0.509,
	"step": 23440
	},
	{
	"epoch": 0.7452587439245211,
	"grad_norm": 0.8024299144744873,
	"learning_rate": 3.232285525567343e-05,
	"loss": 0.4922,
	"step": 23460
	},
	{
	"epoch": 0.7458940881222402,
	"grad_norm": 1.1049789190292358,
	"learning_rate": 3.217150725510946e-05,
	"loss": 0.4907,
	"step": 23480
	},
	{
	"epoch": 0.7465294323199594,
	"grad_norm": 1.0818272829055786,
	"learning_rate": 3.2020446467713516e-05,
	"loss": 0.4806,
	"step": 23500
	},
	{
	"epoch": 0.7471647765176784,
	"grad_norm": 0.6681995391845703,
	"learning_rate": 3.18696735331379e-05,
	"loss": 0.4504,
	"step": 23520
	},
	{
	"epoch": 0.7478001207153976,
	"grad_norm": 0.8827902674674988,
	"learning_rate": 3.171918908981595e-05,
	"loss": 0.5081,
	"step": 23540
	},
	{
	"epoch": 0.7484354649131166,
	"grad_norm": 1.0249037742614746,
	"learning_rate": 3.156899377495938e-05,
	"loss": 0.5297,
	"step": 23560
	},
	{
	"epoch": 0.7490708091108358,
	"grad_norm": 1.0797147750854492,
	"learning_rate": 3.141908822455574e-05,
	"loss": 0.4701,
	"step": 23580
	},
	{
	"epoch": 0.749706153308555,
	"grad_norm": 0.724281907081604,
	"learning_rate": 3.126947307336551e-05,
	"loss": 0.4608,
	"step": 23600
	},
	{
	"epoch": 0.750341497506274,
	"grad_norm": 0.7410632967948914,
	"learning_rate": 3.1120148954919485e-05,
	"loss": 0.4747,
	"step": 23620
	},
	{
	"epoch": 0.7509768417039931,
	"grad_norm": 1.0309559106826782,
	"learning_rate": 3.09711165015162e-05,
	"loss": 0.534,
	"step": 23640
	},
	{
	"epoch": 0.7516121859017123,
	"grad_norm": 0.9060602784156799,
	"learning_rate": 3.0822376344219105e-05,
	"loss": 0.4709,
	"step": 23660
	},
	{
	"epoch": 0.7522475300994313,
	"grad_norm": 0.9018211364746094,
	"learning_rate": 3.067392911285395e-05,
	"loss": 0.5084,
	"step": 23680
	},
	{
	"epoch": 0.7528828742971505,
	"grad_norm": 1.1375420093536377,
	"learning_rate": 3.0525775436006107e-05,
	"loss": 0.5023,
	"step": 23700
	},
	{
	"epoch": 0.7535182184948696,
	"grad_norm": 0.8034165501594543,
	"learning_rate": 3.0377915941017955e-05,
	"loss": 0.4947,
	"step": 23720
	},
	{
	"epoch": 0.7541535626925887,
	"grad_norm": 1.0958040952682495,
	"learning_rate": 3.0230351253986143e-05,
	"loss": 0.5009,
	"step": 23740
	},
	{
	"epoch": 0.7547889068903079,
	"grad_norm": 0.8740959763526917,
	"learning_rate": 3.0083081999759067e-05,
	"loss": 0.4942,
	"step": 23760
	},
	{
	"epoch": 0.7554242510880269,
	"grad_norm": 0.8798695206642151,
	"learning_rate": 2.993610880193406e-05,
	"loss": 0.4676,
	"step": 23780
	},
	{
	"epoch": 0.7560595952857461,
	"grad_norm": 0.9538172483444214,
	"learning_rate": 2.9789432282854822e-05,
	"loss": 0.4441,
	"step": 23800
	},
	{
	"epoch": 0.7566949394834651,
	"grad_norm": 0.9560829401016235,
	"learning_rate": 2.9643053063608917e-05,
	"loss": 0.4995,
	"step": 23820
	},
	{
	"epoch": 0.7573302836811843,
	"grad_norm": 1.0306763648986816,
	"learning_rate": 2.9496971764024884e-05,
	"loss": 0.5042,
	"step": 23840
	},
	{
	"epoch": 0.7579656278789034,
	"grad_norm": 0.9823128581047058,
	"learning_rate": 2.9351189002669788e-05,
	"loss": 0.5274,
	"step": 23860
	},
	{
	"epoch": 0.7586009720766225,
	"grad_norm": 0.8448672890663147,
	"learning_rate": 2.920570539684665e-05,
	"loss": 0.4713,
	"step": 23880
	},
	{
	"epoch": 0.7592363162743416,
	"grad_norm": 0.8830504417419434,
	"learning_rate": 2.9060521562591624e-05,
	"loss": 0.5069,
	"step": 23900
	},
	{
	"epoch": 0.7598716604720608,
	"grad_norm": 0.9051734805107117,
	"learning_rate": 2.891563811467154e-05,
	"loss": 0.48,
	"step": 23920
	},
	{
	"epoch": 0.7605070046697798,
	"grad_norm": 0.8309674859046936,
	"learning_rate": 2.877105566658136e-05,
	"loss": 0.5141,
	"step": 23940
	},
	{
	"epoch": 0.761142348867499,
	"grad_norm": 0.8684896230697632,
	"learning_rate": 2.863398169962057e-05,
	"loss": 0.4518,
	"step": 23960
	},
	{
	"epoch": 0.761777693065218,
	"grad_norm": 0.959536075592041,
	"learning_rate": 2.8489987960934184e-05,
	"loss": 0.483,
	"step": 23980
	},
	{
	"epoch": 0.7624130372629372,
	"grad_norm": 1.3519070148468018,
	"learning_rate": 2.8353474370325594e-05,
	"loss": 0.5062,
	"step": 24000
	},
	{
	"epoch": 0.7624130372629372,
	"eval_loss": 0.4479082524776459,
	"eval_runtime": 44.6533,
	"eval_samples_per_second": 60.533,
	"eval_steps_per_second": 30.278,
	"step": 24000
	},
	{
	"epoch": 0.7630483814606563,
	"grad_norm": 0.8832095861434937,
	"learning_rate": 2.8210071659529526e-05,
	"loss": 0.5204,
	"step": 24020
	},
	{
	"epoch": 0.7636837256583754,
	"grad_norm": 0.793205738067627,
	"learning_rate": 2.8066972936216017e-05,
	"loss": 0.5037,
	"step": 24040
	},
	{
	"epoch": 0.7643190698560945,
	"grad_norm": 0.8483644127845764,
	"learning_rate": 2.79241788063227e-05,
	"loss": 0.4812,
	"step": 24060
	},
	{
	"epoch": 0.7649544140538137,
	"grad_norm": 1.50220787525177,
	"learning_rate": 2.7781689874497406e-05,
	"loss": 0.501,
	"step": 24080
	},
	{
	"epoch": 0.7655897582515327,
	"grad_norm": 0.8091638684272766,
	"learning_rate": 2.7639506744095766e-05,
	"loss": 0.4932,
	"step": 24100
	},
	{
	"epoch": 0.7662251024492519,
	"grad_norm": 0.9171321392059326,
	"learning_rate": 2.74976300171784e-05,
	"loss": 0.5,
	"step": 24120
	},
	{
	"epoch": 0.766860446646971,
	"grad_norm": 0.9392116069793701,
	"learning_rate": 2.7356060294508502e-05,
	"loss": 0.5075,
	"step": 24140
	},
	{
	"epoch": 0.7674957908446901,
	"grad_norm": 0.9384047389030457,
	"learning_rate": 2.7214798175549395e-05,
	"loss": 0.4893,
	"step": 24160
	},
	{
	"epoch": 0.7681311350424093,
	"grad_norm": 0.7760775685310364,
	"learning_rate": 2.707384425846178e-05,
	"loss": 0.5267,
	"step": 24180
	},
	{
	"epoch": 0.7687664792401283,
	"grad_norm": 0.8666489720344543,
	"learning_rate": 2.6933199140101285e-05,
	"loss": 0.5201,
	"step": 24200
	},
	{
	"epoch": 0.7694018234378475,
	"grad_norm": 0.9711599946022034,
	"learning_rate": 2.679286341601609e-05,
	"loss": 0.4923,
	"step": 24220
	},
	{
	"epoch": 0.7700371676355666,
	"grad_norm": 0.9399335980415344,
	"learning_rate": 2.6652837680444153e-05,
	"loss": 0.5281,
	"step": 24240
	},
	{
	"epoch": 0.7706725118332857,
	"grad_norm": 0.8116670250892639,
	"learning_rate": 2.651312252631083e-05,
	"loss": 0.5111,
	"step": 24260
	},
	{
	"epoch": 0.7713078560310048,
	"grad_norm": 0.873943030834198,
	"learning_rate": 2.6373718545226445e-05,
	"loss": 0.471,
	"step": 24280
	},
	{
	"epoch": 0.7719432002287239,
	"grad_norm": 0.9560205340385437,
	"learning_rate": 2.623462632748359e-05,
	"loss": 0.5101,
	"step": 24300
	},
	{
	"epoch": 0.772578544426443,
	"grad_norm": 1.011898159980774,
	"learning_rate": 2.6095846462054763e-05,
	"loss": 0.4906,
	"step": 24320
	},
	{
	"epoch": 0.7732138886241622,
	"grad_norm": 1.0334892272949219,
	"learning_rate": 2.595737953658982e-05,
	"loss": 0.4905,
	"step": 24340
	},
	{
	"epoch": 0.7738492328218812,
	"grad_norm": 0.6994766592979431,
	"learning_rate": 2.581922613741352e-05,
	"loss": 0.4794,
	"step": 24360
	},
	{
	"epoch": 0.7744845770196004,
	"grad_norm": 0.9781257510185242,
	"learning_rate": 2.5681386849523003e-05,
	"loss": 0.4871,
	"step": 24380
	},
	{
	"epoch": 0.7751199212173194,
	"grad_norm": 1.0443729162216187,
	"learning_rate": 2.5543862256585393e-05,
	"loss": 0.5133,
	"step": 24400
	},
	{
	"epoch": 0.7757552654150386,
	"grad_norm": 0.8841618299484253,
	"learning_rate": 2.5406652940935217e-05,
	"loss": 0.4865,
	"step": 24420
	},
	{
	"epoch": 0.7763906096127577,
	"grad_norm": 0.8439558148384094,
	"learning_rate": 2.5269759483571954e-05,
	"loss": 0.4908,
	"step": 24440
	},
	{
	"epoch": 0.7770259538104768,
	"grad_norm": 0.9146759510040283,
	"learning_rate": 2.5133182464157734e-05,
	"loss": 0.4934,
	"step": 24460
	},
	{
	"epoch": 0.777661298008196,
	"grad_norm": 0.7785593867301941,
	"learning_rate": 2.499692246101466e-05,
	"loss": 0.4857,
	"step": 24480
	},
	{
	"epoch": 0.7782966422059151,
	"grad_norm": 0.9240188002586365,
	"learning_rate": 2.4860980051122474e-05,
	"loss": 0.4958,
	"step": 24500
	},
	{
	"epoch": 0.7789319864036341,
	"grad_norm": 1.0593191385269165,
	"learning_rate": 2.4725355810116103e-05,
	"loss": 0.5077,
	"step": 24520
	},
	{
	"epoch": 0.7795673306013533,
	"grad_norm": 0.8705240488052368,
	"learning_rate": 2.4590050312283263e-05,
	"loss": 0.4792,
	"step": 24540
	},
	{
	"epoch": 0.7802026747990723,
	"grad_norm": 0.8610863089561462,
	"learning_rate": 2.4455064130561944e-05,
	"loss": 0.4949,
	"step": 24560
	},
	{
	"epoch": 0.7808380189967915,
	"grad_norm": 1.152521014213562,
	"learning_rate": 2.432039783653799e-05,
	"loss": 0.5076,
	"step": 24580
	},
	{
	"epoch": 0.7814733631945107,
	"grad_norm": 0.8608033657073975,
	"learning_rate": 2.4186052000442806e-05,
	"loss": 0.4759,
	"step": 24600
	},
	{
	"epoch": 0.7821087073922297,
	"grad_norm": 1.1664726734161377,
	"learning_rate": 2.4052027191150762e-05,
	"loss": 0.4941,
	"step": 24620
	},
	{
	"epoch": 0.7827440515899489,
	"grad_norm": 0.8805221915245056,
	"learning_rate": 2.3918323976176883e-05,
	"loss": 0.4797,
	"step": 24640
	},
	{
	"epoch": 0.783379395787668,
	"grad_norm": 0.7699743509292603,
	"learning_rate": 2.3784942921674512e-05,
	"loss": 0.4903,
	"step": 24660
	},
	{
	"epoch": 0.7840147399853871,
	"grad_norm": 0.9498074650764465,
	"learning_rate": 2.365188459243274e-05,
	"loss": 0.4679,
	"step": 24680
	},
	{
	"epoch": 0.7846500841831062,
	"grad_norm": 0.815447986125946,
	"learning_rate": 2.351914955187412e-05,
	"loss": 0.5114,
	"step": 24700
	},
	{
	"epoch": 0.7852854283808253,
	"grad_norm": 0.984866738319397,
	"learning_rate": 2.3386738362052353e-05,
	"loss": 0.4725,
	"step": 24720
	},
	{
	"epoch": 0.7859207725785444,
	"grad_norm": 1.0802818536758423,
	"learning_rate": 2.3254651583649735e-05,
	"loss": 0.4684,
	"step": 24740
	},
	{
	"epoch": 0.7865561167762636,
	"grad_norm": 0.8058573007583618,
	"learning_rate": 2.3122889775974887e-05,
	"loss": 0.4847,
	"step": 24760
	},
	{
	"epoch": 0.7871914609739826,
	"grad_norm": 0.8836669921875,
	"learning_rate": 2.2991453496960447e-05,
	"loss": 0.4859,
	"step": 24780
	},
	{
	"epoch": 0.7878268051717018,
	"grad_norm": 0.7214009165763855,
	"learning_rate": 2.2860343303160535e-05,
	"loss": 0.4816,
	"step": 24800
	},
	{
	"epoch": 0.7884621493694208,
	"grad_norm": 0.8268193006515503,
	"learning_rate": 2.2729559749748575e-05,
	"loss": 0.4674,
	"step": 24820
	},
	{
	"epoch": 0.78909749356714,
	"grad_norm": 0.7158612608909607,
	"learning_rate": 2.2599103390514766e-05,
	"loss": 0.465,
	"step": 24840
	},
	{
	"epoch": 0.7897328377648591,
	"grad_norm": 0.8904339671134949,
	"learning_rate": 2.246897477786396e-05,
	"loss": 0.5024,
	"step": 24860
	},
	{
	"epoch": 0.7903681819625782,
	"grad_norm": 0.8315703272819519,
	"learning_rate": 2.2339174462813127e-05,
	"loss": 0.4609,
	"step": 24880
	},
	{
	"epoch": 0.7910035261602973,
	"grad_norm": 0.8962224721908569,
	"learning_rate": 2.2209702994989045e-05,
	"loss": 0.4906,
	"step": 24900
	},
	{
	"epoch": 0.7916388703580165,
	"grad_norm": 0.9301977753639221,
	"learning_rate": 2.208056092262616e-05,
	"loss": 0.5216,
	"step": 24920
	},
	{
	"epoch": 0.7922742145557355,
	"grad_norm": 0.8634437918663025,
	"learning_rate": 2.1951748792563985e-05,
	"loss": 0.5031,
	"step": 24940
	},
	{
	"epoch": 0.7929095587534547,
	"grad_norm": 0.8985020518302917,
	"learning_rate": 2.1823267150244964e-05,
	"loss": 0.4709,
	"step": 24960
	},
	{
	"epoch": 0.7935449029511737,
	"grad_norm": 1.1470792293548584,
	"learning_rate": 2.16951165397122e-05,
	"loss": 0.5224,
	"step": 24980
	},
	{
	"epoch": 0.7941802471488929,
	"grad_norm": 0.919326663017273,
	"learning_rate": 2.1567297503606987e-05,
	"loss": 0.5004,
	"step": 25000
	},
	{
	"epoch": 0.7941802471488929,
	"eval_loss": 0.44602036476135254,
	"eval_runtime": 44.8391,
	"eval_samples_per_second": 60.282,
	"eval_steps_per_second": 30.152,
	"step": 25000
	},
	{
	"epoch": 0.7948155913466121,
	"grad_norm": 1.1010879278182983,
	"learning_rate": 2.1439810583166587e-05,
	"loss": 0.5077,
	"step": 25020
	},
	{
	"epoch": 0.7954509355443311,
	"grad_norm": 0.8573036789894104,
	"learning_rate": 2.131900612258364e-05,
	"loss": 0.4973,
	"step": 25040
	},
	{
	"epoch": 0.7960862797420503,
	"grad_norm": 0.8931069374084473,
	"learning_rate": 2.1198502345256165e-05,
	"loss": 0.4972,
	"step": 25060
	},
	{
	"epoch": 0.7967216239397694,
	"grad_norm": 1.239161491394043,
	"learning_rate": 2.107198160794136e-05,
	"loss": 0.4981,
	"step": 25080
	},
	{
	"epoch": 0.7973569681374885,
	"grad_norm": 0.9950107336044312,
	"learning_rate": 2.0945795083658447e-05,
	"loss": 0.506,
	"step": 25100
	},
	{
	"epoch": 0.7979923123352076,
	"grad_norm": 0.7783673405647278,
	"learning_rate": 2.0819943306732082e-05,
	"loss": 0.4763,
	"step": 25120
	},
	{
	"epoch": 0.7986276565329267,
	"grad_norm": 0.912331223487854,
	"learning_rate": 2.0694426810069345e-05,
	"loss": 0.4622,
	"step": 25140
	},
	{
	"epoch": 0.7992630007306458,
	"grad_norm": 0.8284201622009277,
	"learning_rate": 2.0569246125157658e-05,
	"loss": 0.513,
	"step": 25160
	},
	{
	"epoch": 0.799898344928365,
	"grad_norm": 1.1468638181686401,
	"learning_rate": 2.0444401782062518e-05,
	"loss": 0.4719,
	"step": 25180
	},
	{
	"epoch": 0.800533689126084,
	"grad_norm": 1.0985773801803589,
	"learning_rate": 2.0319894309425146e-05,
	"loss": 0.4871,
	"step": 25200
	},
	{
	"epoch": 0.8011690333238032,
	"grad_norm": 1.1010768413543701,
	"learning_rate": 2.0195724234460322e-05,
	"loss": 0.5459,
	"step": 25220
	},
	{
	"epoch": 0.8018043775215223,
	"grad_norm": 0.9938257336616516,
	"learning_rate": 2.0071892082954248e-05,
	"loss": 0.5127,
	"step": 25240
	},
	{
	"epoch": 0.8024397217192414,
	"grad_norm": 1.1338539123535156,
	"learning_rate": 1.9954565018232684e-05,
	"loss": 0.4838,
	"step": 25260
	},
	{
	"epoch": 0.8030750659169605,
	"grad_norm": 0.7955858111381531,
	"learning_rate": 1.9831393324342518e-05,
	"loss": 0.4865,
	"step": 25280
	},
	{
	"epoch": 0.8037104101146796,
	"grad_norm": 1.0443702936172485,
	"learning_rate": 1.9708561096634902e-05,
	"loss": 0.4749,
	"step": 25300
	},
	{
	"epoch": 0.8043457543123987,
	"grad_norm": 1.0816038846969604,
	"learning_rate": 1.958606885523103e-05,
	"loss": 0.5142,
	"step": 25320
	},
	{
	"epoch": 0.8049810985101179,
	"grad_norm": 1.2127019166946411,
	"learning_rate": 1.946391711881239e-05,
	"loss": 0.4831,
	"step": 25340
	},
	{
	"epoch": 0.8056164427078369,
	"grad_norm": 0.8780348300933838,
	"learning_rate": 1.9342106404618632e-05,
	"loss": 0.5113,
	"step": 25360
	},
	{
	"epoch": 0.8062517869055561,
	"grad_norm": 0.7795581221580505,
	"learning_rate": 1.9220637228445438e-05,
	"loss": 0.4721,
	"step": 25380
	},
	{
	"epoch": 0.8068871311032751,
	"grad_norm": 0.9518604874610901,
	"learning_rate": 1.9099510104642216e-05,
	"loss": 0.4754,
	"step": 25400
	},
	{
	"epoch": 0.8075224753009943,
	"grad_norm": 1.0051589012145996,
	"learning_rate": 1.8978725546110022e-05,
	"loss": 0.4936,
	"step": 25420
	},
	{
	"epoch": 0.8081578194987135,
	"grad_norm": 0.8047780394554138,
	"learning_rate": 1.8858284064299326e-05,
	"loss": 0.4901,
	"step": 25440
	},
	{
	"epoch": 0.8087931636964325,
	"grad_norm": 1.1246352195739746,
	"learning_rate": 1.8738186169207917e-05,
	"loss": 0.5117,
	"step": 25460
	},
	{
	"epoch": 0.8094285078941517,
	"grad_norm": 0.8150719404220581,
	"learning_rate": 1.861843236937867e-05,
	"loss": 0.4685,
	"step": 25480
	},
	{
	"epoch": 0.8100638520918708,
	"grad_norm": 2.195882558822632,
	"learning_rate": 1.8499023171897388e-05,
	"loss": 0.471,
	"step": 25500
	},
	{
	"epoch": 0.8106991962895899,
	"grad_norm": 0.8962704539299011,
	"learning_rate": 1.8379959082390798e-05,
	"loss": 0.481,
	"step": 25520
	},
	{
	"epoch": 0.811334540487309,
	"grad_norm": 0.8531712889671326,
	"learning_rate": 1.8261240605024165e-05,
	"loss": 0.4881,
	"step": 25540
	},
	{
	"epoch": 0.8119698846850281,
	"grad_norm": 0.9354826807975769,
	"learning_rate": 1.8142868242499368e-05,
	"loss": 0.4761,
	"step": 25560
	},
	{
	"epoch": 0.8126052288827472,
	"grad_norm": 1.0048118829727173,
	"learning_rate": 1.8024842496052708e-05,
	"loss": 0.4968,
	"step": 25580
	},
	{
	"epoch": 0.8132405730804664,
	"grad_norm": 0.8254916071891785,
	"learning_rate": 1.790716386545275e-05,
	"loss": 0.5076,
	"step": 25600
	},
	{
	"epoch": 0.8138759172781854,
	"grad_norm": 0.9708372950553894,
	"learning_rate": 1.778983284899819e-05,
	"loss": 0.5197,
	"step": 25620
	},
	{
	"epoch": 0.8145112614759046,
	"grad_norm": 0.9034101366996765,
	"learning_rate": 1.767284994351588e-05,
	"loss": 0.4954,
	"step": 25640
	},
	{
	"epoch": 0.8151466056736237,
	"grad_norm": 1.3567668199539185,
	"learning_rate": 1.7556215644358564e-05,
	"loss": 0.5133,
	"step": 25660
	},
	{
	"epoch": 0.8157819498713428,
	"grad_norm": 0.9000421166419983,
	"learning_rate": 1.743993044540282e-05,
	"loss": 0.524,
	"step": 25680
	},
	{
	"epoch": 0.8164172940690619,
	"grad_norm": 0.7230278849601746,
	"learning_rate": 1.7323994839047086e-05,
	"loss": 0.4831,
	"step": 25700
	},
	{
	"epoch": 0.817052638266781,
	"grad_norm": 0.8648797273635864,
	"learning_rate": 1.7208409316209407e-05,
	"loss": 0.4932,
	"step": 25720
	},
	{
	"epoch": 0.8176879824645001,
	"grad_norm": 0.9017996788024902,
	"learning_rate": 1.709317436632547e-05,
	"loss": 0.4787,
	"step": 25740
	},
	{
	"epoch": 0.8183233266622193,
	"grad_norm": 0.9122520685195923,
	"learning_rate": 1.697829047734646e-05,
	"loss": 0.4721,
	"step": 25760
	},
	{
	"epoch": 0.8189586708599383,
	"grad_norm": 0.9448441863059998,
	"learning_rate": 1.6863758135737085e-05,
	"loss": 0.4772,
	"step": 25780
	},
	{
	"epoch": 0.8195940150576575,
	"grad_norm": 1.052437424659729,
	"learning_rate": 1.6749577826473405e-05,
	"loss": 0.5252,
	"step": 25800
	},
	{
	"epoch": 0.8202293592553767,
	"grad_norm": 0.9826536774635315,
	"learning_rate": 1.6635750033040842e-05,
	"loss": 0.5187,
	"step": 25820
	},
	{
	"epoch": 0.8208647034530957,
	"grad_norm": 0.8498765826225281,
	"learning_rate": 1.6522275237432193e-05,
	"loss": 0.4792,
	"step": 25840
	},
	{
	"epoch": 0.8215000476508149,
	"grad_norm": 0.9139013886451721,
	"learning_rate": 1.6409153920145416e-05,
	"loss": 0.5006,
	"step": 25860
	},
	{
	"epoch": 0.8221353918485339,
	"grad_norm": 0.9082590937614441,
	"learning_rate": 1.6296386560181744e-05,
	"loss": 0.4801,
	"step": 25880
	},
	{
	"epoch": 0.8227707360462531,
	"grad_norm": 0.8360690474510193,
	"learning_rate": 1.618397363504366e-05,
	"loss": 0.491,
	"step": 25900
	},
	{
	"epoch": 0.8234060802439722,
	"grad_norm": 0.8585413098335266,
	"learning_rate": 1.6071915620732746e-05,
	"loss": 0.4952,
	"step": 25920
	},
	{
	"epoch": 0.8240414244416913,
	"grad_norm": 0.9051182866096497,
	"learning_rate": 1.5960212991747804e-05,
	"loss": 0.5021,
	"step": 25940
	},
	{
	"epoch": 0.8246767686394104,
	"grad_norm": 1.1850552558898926,
	"learning_rate": 1.584886622108276e-05,
	"loss": 0.5194,
	"step": 25960
	},
	{
	"epoch": 0.8253121128371295,
	"grad_norm": 0.8449670672416687,
	"learning_rate": 1.57378757802247e-05,
	"loss": 0.4988,
	"step": 25980
	},
	{
	"epoch": 0.8259474570348486,
	"grad_norm": 0.9663527607917786,
	"learning_rate": 1.5627242139151867e-05,
	"loss": 0.4782,
	"step": 26000
	},
	{
	"epoch": 0.8259474570348486,
	"eval_loss": 0.44560423493385315,
	"eval_runtime": 45.0247,
	"eval_samples_per_second": 60.034,
	"eval_steps_per_second": 30.028,
	"step": 26000
	},
	{
	"epoch": 0.8265828012325678,
	"grad_norm": 1.0954176187515259,
	"learning_rate": 1.5516965766331715e-05,
	"loss": 0.4992,
	"step": 26020
	},
	{
	"epoch": 0.8272181454302868,
	"grad_norm": 0.9752370119094849,
	"learning_rate": 1.540704712871881e-05,
	"loss": 0.5109,
	"step": 26040
	},
	{
	"epoch": 0.827853489628006,
	"grad_norm": 0.7089188098907471,
	"learning_rate": 1.5297486691752928e-05,
	"loss": 0.4669,
	"step": 26060
	},
	{
	"epoch": 0.8284888338257251,
	"grad_norm": 0.8641648292541504,
	"learning_rate": 1.5188284919357155e-05,
	"loss": 0.4905,
	"step": 26080
	},
	{
	"epoch": 0.8291241780234442,
	"grad_norm": 0.8167259097099304,
	"learning_rate": 1.5079442273935773e-05,
	"loss": 0.4776,
	"step": 26100
	},
	{
	"epoch": 0.8297595222211633,
	"grad_norm": 0.9287614226341248,
	"learning_rate": 1.4970959216372372e-05,
	"loss": 0.4803,
	"step": 26120
	},
	{
	"epoch": 0.8303948664188824,
	"grad_norm": 0.8652564883232117,
	"learning_rate": 1.4862836206027975e-05,
	"loss": 0.4623,
	"step": 26140
	},
	{
	"epoch": 0.8310302106166015,
	"grad_norm": 0.9141151309013367,
	"learning_rate": 1.4755073700738953e-05,
	"loss": 0.507,
	"step": 26160
	},
	{
	"epoch": 0.8316655548143207,
	"grad_norm": 0.9454159736633301,
	"learning_rate": 1.464767215681515e-05,
	"loss": 0.5218,
	"step": 26180
	},
	{
	"epoch": 0.8323008990120397,
	"grad_norm": 0.7766212821006775,
	"learning_rate": 1.4540632029038026e-05,
	"loss": 0.5294,
	"step": 26200
	},
	{
	"epoch": 0.8329362432097589,
	"grad_norm": 0.8662501573562622,
	"learning_rate": 1.443395377065858e-05,
	"loss": 0.4931,
	"step": 26220
	},
	{
	"epoch": 0.833571587407478,
	"grad_norm": 1.0195443630218506,
	"learning_rate": 1.4327637833395525e-05,
	"loss": 0.5165,
	"step": 26240
	},
	{
	"epoch": 0.8342069316051971,
	"grad_norm": 0.9022318124771118,
	"learning_rate": 1.422168466743341e-05,
	"loss": 0.4732,
	"step": 26260
	},
	{
	"epoch": 0.8348422758029163,
	"grad_norm": 0.9162563681602478,
	"learning_rate": 1.4116094721420625e-05,
	"loss": 0.496,
	"step": 26280
	},
	{
	"epoch": 0.8354776200006353,
	"grad_norm": 1.129158854484558,
	"learning_rate": 1.401086844246755e-05,
	"loss": 0.4764,
	"step": 26300
	},
	{
	"epoch": 0.8361129641983545,
	"grad_norm": 0.8695496320724487,
	"learning_rate": 1.3906006276144601e-05,
	"loss": 0.4852,
	"step": 26320
	},
	{
	"epoch": 0.8367483083960736,
	"grad_norm": 1.7362381219863892,
	"learning_rate": 1.3801508666480512e-05,
	"loss": 0.4642,
	"step": 26340
	},
	{
	"epoch": 0.8373836525937927,
	"grad_norm": 0.7645226716995239,
	"learning_rate": 1.369737605596022e-05,
	"loss": 0.503,
	"step": 26360
	},
	{
	"epoch": 0.8380189967915118,
	"grad_norm": 0.8403562903404236,
	"learning_rate": 1.3593608885523158e-05,
	"loss": 0.4766,
	"step": 26380
	},
	{
	"epoch": 0.8386543409892309,
	"grad_norm": 0.7841979265213013,
	"learning_rate": 1.3490207594561366e-05,
	"loss": 0.4917,
	"step": 26400
	},
	{
	"epoch": 0.83928968518695,
	"grad_norm": 0.8631531000137329,
	"learning_rate": 1.3392315662821897e-05,
	"loss": 0.4972,
	"step": 26420
	},
	{
	"epoch": 0.8399250293846692,
	"grad_norm": 1.0436699390411377,
	"learning_rate": 1.3289629094769217e-05,
	"loss": 0.4847,
	"step": 26440
	},
	{
	"epoch": 0.8405603735823882,
	"grad_norm": 0.9521028399467468,
	"learning_rate": 1.318730969336468e-05,
	"loss": 0.4972,
	"step": 26460
	},
	{
	"epoch": 0.8411957177801074,
	"grad_norm": 0.9861098527908325,
	"learning_rate": 1.3085357891869909e-05,
	"loss": 0.5114,
	"step": 26480
	},
	{
	"epoch": 0.8418310619778265,
	"grad_norm": 1.3008265495300293,
	"learning_rate": 1.2983774121989888e-05,
	"loss": 0.5071,
	"step": 26500
	},
	{
	"epoch": 0.8424664061755456,
	"grad_norm": 0.7970487475395203,
	"learning_rate": 1.2882558813871204e-05,
	"loss": 0.4945,
	"step": 26520
	},
	{
	"epoch": 0.8431017503732647,
	"grad_norm": 0.7304345369338989,
	"learning_rate": 1.2781712396100287e-05,
	"loss": 0.4902,
	"step": 26540
	},
	{
	"epoch": 0.8437370945709838,
	"grad_norm": 0.9716693162918091,
	"learning_rate": 1.2681235295701488e-05,
	"loss": 0.4857,
	"step": 26560
	},
	{
	"epoch": 0.8443724387687029,
	"grad_norm": 0.9461120963096619,
	"learning_rate": 1.2581127938135328e-05,
	"loss": 0.5139,
	"step": 26580
	},
	{
	"epoch": 0.8450077829664221,
	"grad_norm": 0.8130011558532715,
	"learning_rate": 1.2481390747296717e-05,
	"loss": 0.4788,
	"step": 26600
	},
	{
	"epoch": 0.8456431271641411,
	"grad_norm": 0.959818959236145,
	"learning_rate": 1.2382024145513094e-05,
	"loss": 0.4808,
	"step": 26620
	},
	{
	"epoch": 0.8462784713618603,
	"grad_norm": 1.2069573402404785,
	"learning_rate": 1.2283028553542674e-05,
	"loss": 0.4692,
	"step": 26640
	},
	{
	"epoch": 0.8469138155595795,
	"grad_norm": 1.0251085758209229,
	"learning_rate": 1.2184404390572712e-05,
	"loss": 0.5106,
	"step": 26660
	},
	{
	"epoch": 0.8475491597572985,
	"grad_norm": 0.9423872828483582,
	"learning_rate": 1.2086152074217638e-05,
	"loss": 0.4881,
	"step": 26680
	},
	{
	"epoch": 0.8481845039550177,
	"grad_norm": 0.8245638608932495,
	"learning_rate": 1.1988272020517322e-05,
	"loss": 0.4606,
	"step": 26700
	},
	{
	"epoch": 0.8488198481527367,
	"grad_norm": 1.0099587440490723,
	"learning_rate": 1.1890764643935393e-05,
	"loss": 0.4976,
	"step": 26720
	},
	{
	"epoch": 0.8494551923504559,
	"grad_norm": 0.8285634517669678,
	"learning_rate": 1.1793630357357355e-05,
	"loss": 0.5057,
	"step": 26740
	},
	{
	"epoch": 0.850090536548175,
	"grad_norm": 0.9125322699546814,
	"learning_rate": 1.169686957208892e-05,
	"loss": 0.4856,
	"step": 26760
	},
	{
	"epoch": 0.8507258807458941,
	"grad_norm": 1.1413007974624634,
	"learning_rate": 1.1600482697854198e-05,
	"loss": 0.4916,
	"step": 26780
	},
	{
	"epoch": 0.8513612249436132,
	"grad_norm": 0.9246459603309631,
	"learning_rate": 1.1504470142794121e-05,
	"loss": 0.4807,
	"step": 26800
	},
	{
	"epoch": 0.8519965691413324,
	"grad_norm": 0.9050401449203491,
	"learning_rate": 1.140883231346449e-05,
	"loss": 0.4844,
	"step": 26820
	},
	{
	"epoch": 0.8526319133390514,
	"grad_norm": 0.8217797875404358,
	"learning_rate": 1.1313569614834408e-05,
	"loss": 0.4751,
	"step": 26840
	},
	{
	"epoch": 0.8532672575367706,
	"grad_norm": 1.0189076662063599,
	"learning_rate": 1.1218682450284545e-05,
	"loss": 0.4949,
	"step": 26860
	},
	{
	"epoch": 0.8539026017344896,
	"grad_norm": 0.7574889659881592,
	"learning_rate": 1.112417122160535e-05,
	"loss": 0.4738,
	"step": 26880
	},
	{
	"epoch": 0.8545379459322088,
	"grad_norm": 0.6649676561355591,
	"learning_rate": 1.1030036328995497e-05,
	"loss": 0.4859,
	"step": 26900
	},
	{
	"epoch": 0.8551732901299279,
	"grad_norm": 0.7144981622695923,
	"learning_rate": 1.0936278171060032e-05,
	"loss": 0.4799,
	"step": 26920
	},
	{
	"epoch": 0.855808634327647,
	"grad_norm": 0.9074038863182068,
	"learning_rate": 1.0842897144808762e-05,
	"loss": 0.4951,
	"step": 26940
	},
	{
	"epoch": 0.8564439785253661,
	"grad_norm": 0.9271389842033386,
	"learning_rate": 1.0749893645654551e-05,
	"loss": 0.4692,
	"step": 26960
	},
	{
	"epoch": 0.8570793227230852,
	"grad_norm": 0.9277658462524414,
	"learning_rate": 1.0657268067411752e-05,
	"loss": 0.4711,
	"step": 26980
	},
	{
	"epoch": 0.8577146669208043,
	"grad_norm": 1.5766148567199707,
	"learning_rate": 1.0565020802294357e-05,
	"loss": 0.5081,
	"step": 27000
	},
	{
	"epoch": 0.8577146669208043,
	"eval_loss": 0.4444785416126251,
	"eval_runtime": 45.2678,
	"eval_samples_per_second": 59.711,
	"eval_steps_per_second": 29.867,
	"step": 27000
	},
	{
	"epoch": 0.8583500111185235,
	"grad_norm": 0.7567349076271057,
	"learning_rate": 1.0473152240914419e-05,
	"loss": 0.4671,
	"step": 27020
	},
	{
	"epoch": 0.8589853553162425,
	"grad_norm": 1.0230178833007812,
	"learning_rate": 1.0381662772280498e-05,
	"loss": 0.4874,
	"step": 27040
	},
	{
	"epoch": 0.8596206995139617,
	"grad_norm": 0.7454288005828857,
	"learning_rate": 1.0290552783795849e-05,
	"loss": 0.4825,
	"step": 27060
	},
	{
	"epoch": 0.8602560437116809,
	"grad_norm": 0.9813241958618164,
	"learning_rate": 1.0199822661256852e-05,
	"loss": 0.4785,
	"step": 27080
	},
	{
	"epoch": 0.8608913879093999,
	"grad_norm": 0.8269158005714417,
	"learning_rate": 1.0109472788851427e-05,
	"loss": 0.4797,
	"step": 27100
	},
	{
	"epoch": 0.861526732107119,
	"grad_norm": 0.8101191520690918,
	"learning_rate": 1.001950354915734e-05,
	"loss": 0.4735,
	"step": 27120
	},
	{
	"epoch": 0.8621620763048381,
	"grad_norm": 0.903421938419342,
	"learning_rate": 9.929915323140571e-06,
	"loss": 0.5,
	"step": 27140
	},
	{
	"epoch": 0.8627974205025573,
	"grad_norm": 0.7358487248420715,
	"learning_rate": 9.840708490153817e-06,
	"loss": 0.4799,
	"step": 27160
	},
	{
	"epoch": 0.8634327647002764,
	"grad_norm": 0.9838561415672302,
	"learning_rate": 9.751883427934717e-06,
	"loss": 0.506,
	"step": 27180
	},
	{
	"epoch": 0.8640681088979955,
	"grad_norm": 0.9448813796043396,
	"learning_rate": 9.66344051260436e-06,
	"loss": 0.4966,
	"step": 27200
	},
	{
	"epoch": 0.8647034530957146,
	"grad_norm": 1.111055612564087,
	"learning_rate": 9.575380118665733e-06,
	"loss": 0.5118,
	"step": 27220
	},
	{
	"epoch": 0.8653387972934338,
	"grad_norm": 0.968305230140686,
	"learning_rate": 9.487702619001992e-06,
	"loss": 0.5002,
	"step": 27240
	},
	{
	"epoch": 0.8659741414911528,
	"grad_norm": 0.8771995902061462,
	"learning_rate": 9.400408384874992e-06,
	"loss": 0.497,
	"step": 27260
	},
	{
	"epoch": 0.866609485688872,
	"grad_norm": 1.0422018766403198,
	"learning_rate": 9.31349778592373e-06,
	"loss": 0.5081,
	"step": 27280
	},
	{
	"epoch": 0.867244829886591,
	"grad_norm": 0.8950514197349548,
	"learning_rate": 9.22697119016267e-06,
	"loss": 0.4957,
	"step": 27300
	},
	{
	"epoch": 0.8678801740843102,
	"grad_norm": 0.8093190789222717,
	"learning_rate": 9.140828963980297e-06,
	"loss": 0.4667,
	"step": 27320
	},
	{
	"epoch": 0.8685155182820293,
	"grad_norm": 0.8465502262115479,
	"learning_rate": 9.055071472137466e-06,
	"loss": 0.4913,
	"step": 27340
	},
	{
	"epoch": 0.8691508624797484,
	"grad_norm": 0.8349893093109131,
	"learning_rate": 8.969699077766014e-06,
	"loss": 0.4738,
	"step": 27360
	},
	{
	"epoch": 0.8697862066774675,
	"grad_norm": 0.831910252571106,
	"learning_rate": 8.884712142367024e-06,
	"loss": 0.4923,
	"step": 27380
	},
	{
	"epoch": 0.8704215508751866,
	"grad_norm": 0.9581566452980042,
	"learning_rate": 8.80011102580941e-06,
	"loss": 0.4856,
	"step": 27400
	},
	{
	"epoch": 0.8710568950729057,
	"grad_norm": 0.823250412940979,
	"learning_rate": 8.720097656085246e-06,
	"loss": 0.4886,
	"step": 27420
	},
	{
	"epoch": 0.8716922392706249,
	"grad_norm": 0.988389253616333,
	"learning_rate": 8.636249915153039e-06,
	"loss": 0.4946,
	"step": 27440
	},
	{
	"epoch": 0.8723275834683439,
	"grad_norm": 0.85055011510849,
	"learning_rate": 8.55695289500451e-06,
	"loss": 0.4885,
	"step": 27460
	},
	{
	"epoch": 0.8729629276660631,
	"grad_norm": 0.9092792272567749,
	"learning_rate": 8.473859879755397e-06,
	"loss": 0.4631,
	"step": 27480
	},
	{
	"epoch": 0.8735982718637822,
	"grad_norm": 0.930949330329895,
	"learning_rate": 8.39115442306171e-06,
	"loss": 0.4955,
	"step": 27500
	},
	{
	"epoch": 0.8742336160615013,
	"grad_norm": 0.7822802066802979,
	"learning_rate": 8.308836875131665e-06,
	"loss": 0.4842,
	"step": 27520
	},
	{
	"epoch": 0.8748689602592205,
	"grad_norm": 0.7877179384231567,
	"learning_rate": 8.22690758453094e-06,
	"loss": 0.5006,
	"step": 27540
	},
	{
	"epoch": 0.8755043044569395,
	"grad_norm": 0.9965065717697144,
	"learning_rate": 8.145366898181139e-06,
	"loss": 0.4866,
	"step": 27560
	},
	{
	"epoch": 0.8761396486546587,
	"grad_norm": 1.1015229225158691,
	"learning_rate": 8.064215161358402e-06,
	"loss": 0.5203,
	"step": 27580
	},
	{
	"epoch": 0.8767749928523778,
	"grad_norm": 0.7929244637489319,
	"learning_rate": 7.983452717691852e-06,
	"loss": 0.477,
	"step": 27600
	},
	{
	"epoch": 0.8774103370500969,
	"grad_norm": 1.0685256719589233,
	"learning_rate": 7.903079909162258e-06,
	"loss": 0.5385,
	"step": 27620
	},
	{
	"epoch": 0.878045681247816,
	"grad_norm": 1.0020925998687744,
	"learning_rate": 7.82309707610046e-06,
	"loss": 0.5061,
	"step": 27640
	},
	{
	"epoch": 0.8786810254455352,
	"grad_norm": 0.8348806500434875,
	"learning_rate": 7.743504557185976e-06,
	"loss": 0.505,
	"step": 27660
	},
	{
	"epoch": 0.8793163696432542,
	"grad_norm": 0.8327703475952148,
	"learning_rate": 7.664302689445635e-06,
	"loss": 0.4633,
	"step": 27680
	},
	{
	"epoch": 0.8799517138409734,
	"grad_norm": 0.9524950385093689,
	"learning_rate": 7.5854918082520435e-06,
	"loss": 0.4859,
	"step": 27700
	},
	{
	"epoch": 0.8805870580386924,
	"grad_norm": 0.8677568435668945,
	"learning_rate": 7.507072247322211e-06,
	"loss": 0.4832,
	"step": 27720
	},
	{
	"epoch": 0.8812224022364116,
	"grad_norm": 0.9326565265655518,
	"learning_rate": 7.429044338716196e-06,
	"loss": 0.493,
	"step": 27740
	},
	{
	"epoch": 0.8818577464341307,
	"grad_norm": 0.7510032057762146,
	"learning_rate": 7.35140841283557e-06,
	"loss": 0.489,
	"step": 27760
	},
	{
	"epoch": 0.8824930906318498,
	"grad_norm": 0.7510486841201782,
	"learning_rate": 7.274164798422134e-06,
	"loss": 0.4741,
	"step": 27780
	},
	{
	"epoch": 0.8831284348295689,
	"grad_norm": 0.8744218945503235,
	"learning_rate": 7.197313822556462e-06,
	"loss": 0.4698,
	"step": 27800
	},
	{
	"epoch": 0.8837637790272881,
	"grad_norm": 0.7554096579551697,
	"learning_rate": 7.12085581065658e-06,
	"loss": 0.4561,
	"step": 27820
	},
	{
	"epoch": 0.8843991232250071,
	"grad_norm": 1.0702250003814697,
	"learning_rate": 7.044791086476499e-06,
	"loss": 0.5074,
	"step": 27840
	},
	{
	"epoch": 0.8850344674227263,
	"grad_norm": 1.2190712690353394,
	"learning_rate": 6.969119972104898e-06,
	"loss": 0.4873,
	"step": 27860
	},
	{
	"epoch": 0.8856698116204453,
	"grad_norm": 0.8235007524490356,
	"learning_rate": 6.893842787963789e-06,
	"loss": 0.4884,
	"step": 27880
	},
	{
	"epoch": 0.8863051558181645,
	"grad_norm": 0.8809916973114014,
	"learning_rate": 6.818959852807083e-06,
	"loss": 0.4746,
	"step": 27900
	},
	{
	"epoch": 0.8869405000158836,
	"grad_norm": 0.8362717628479004,
	"learning_rate": 6.744471483719306e-06,
	"loss": 0.5139,
	"step": 27920
	},
	{
	"epoch": 0.8875758442136027,
	"grad_norm": 0.9398446083068848,
	"learning_rate": 6.67037799611423e-06,
	"loss": 0.5002,
	"step": 27940
	},
	{
	"epoch": 0.8882111884113219,
	"grad_norm": 0.750577449798584,
	"learning_rate": 6.596679703733544e-06,
	"loss": 0.4965,
	"step": 27960
	},
	{
	"epoch": 0.8888465326090409,
	"grad_norm": 1.0199640989303589,
	"learning_rate": 6.523376918645474e-06,
	"loss": 0.5101,
	"step": 27980
	},
	{
	"epoch": 0.88948187680676,
	"grad_norm": 0.8302307724952698,
	"learning_rate": 6.4504699512435985e-06,
	"loss": 0.4608,
	"step": 28000
	},
	{
	"epoch": 0.88948187680676,
	"eval_loss": 0.4442509412765503,
	"eval_runtime": 44.8835,
	"eval_samples_per_second": 60.223,
	"eval_steps_per_second": 30.122,
	"step": 28000
	},
	{
	"epoch": 0.8901172210044792,
	"grad_norm": 0.7648799419403076,
	"learning_rate": 6.377959110245357e-06,
	"loss": 0.4704,
	"step": 28020
	},
	{
	"epoch": 0.8907525652021983,
	"grad_norm": 0.8950293064117432,
	"learning_rate": 6.305844702690878e-06,
	"loss": 0.4906,
	"step": 28040
	},
	{
	"epoch": 0.8913879093999174,
	"grad_norm": 0.9124616384506226,
	"learning_rate": 6.234127033941628e-06,
	"loss": 0.4939,
	"step": 28060
	},
	{
	"epoch": 0.8920232535976366,
	"grad_norm": 0.8970253467559814,
	"learning_rate": 6.1628064076791e-06,
	"loss": 0.5088,
	"step": 28080
	},
	{
	"epoch": 0.8926585977953556,
	"grad_norm": 0.9791019558906555,
	"learning_rate": 6.091883125903575e-06,
	"loss": 0.4613,
	"step": 28100
	},
	{
	"epoch": 0.8932939419930748,
	"grad_norm": 1.3384908437728882,
	"learning_rate": 6.021357488932789e-06,
	"loss": 0.4737,
	"step": 28120
	},
	{
	"epoch": 0.8939292861907938,
	"grad_norm": 1.076692819595337,
	"learning_rate": 5.951229795400726e-06,
	"loss": 0.5094,
	"step": 28140
	},
	{
	"epoch": 0.894564630388513,
	"grad_norm": 0.9772495031356812,
	"learning_rate": 5.881500342256285e-06,
	"loss": 0.4791,
	"step": 28160
	},
	{
	"epoch": 0.8951999745862321,
	"grad_norm": 0.946626603603363,
	"learning_rate": 5.8121694247620485e-06,
	"loss": 0.4843,
	"step": 28180
	},
	{
	"epoch": 0.8958353187839512,
	"grad_norm": 0.9328265190124512,
	"learning_rate": 5.74323733649309e-06,
	"loss": 0.4822,
	"step": 28200
	},
	{
	"epoch": 0.8964706629816703,
	"grad_norm": 0.7450932264328003,
	"learning_rate": 5.674704369335637e-06,
	"loss": 0.4746,
	"step": 28220
	},
	{
	"epoch": 0.8971060071793895,
	"grad_norm": 1.0023432970046997,
	"learning_rate": 5.606570813485856e-06,
	"loss": 0.4941,
	"step": 28240
	},
	{
	"epoch": 0.8977413513771085,
	"grad_norm": 0.8717949986457825,
	"learning_rate": 5.538836957448712e-06,
	"loss": 0.4801,
	"step": 28260
	},
	{
	"epoch": 0.8983766955748277,
	"grad_norm": 0.8665459156036377,
	"learning_rate": 5.474860277416504e-06,
	"loss": 0.4782,
	"step": 28280
	},
	{
	"epoch": 0.8990120397725467,
	"grad_norm": 0.8660995364189148,
	"learning_rate": 5.407906659415618e-06,
	"loss": 0.4788,
	"step": 28300
	},
	{
	"epoch": 0.8996473839702659,
	"grad_norm": 0.9390355944633484,
	"learning_rate": 5.341353582451425e-06,
	"loss": 0.478,
	"step": 28320
	},
	{
	"epoch": 0.900282728167985,
	"grad_norm": 0.8287180662155151,
	"learning_rate": 5.275201328336477e-06,
	"loss": 0.4846,
	"step": 28340
	},
	{
	"epoch": 0.9009180723657041,
	"grad_norm": 0.8496334552764893,
	"learning_rate": 5.209450177186081e-06,
	"loss": 0.4838,
	"step": 28360
	},
	{
	"epoch": 0.9015534165634232,
	"grad_norm": 0.9892422556877136,
	"learning_rate": 5.144100407417063e-06,
	"loss": 0.4854,
	"step": 28380
	},
	{
	"epoch": 0.9021887607611423,
	"grad_norm": 0.9813452363014221,
	"learning_rate": 5.0791522957467365e-06,
	"loss": 0.4916,
	"step": 28400
	},
	{
	"epoch": 0.9028241049588615,
	"grad_norm": 0.9126195907592773,
	"learning_rate": 5.014606117191545e-06,
	"loss": 0.4949,
	"step": 28420
	},
	{
	"epoch": 0.9034594491565806,
	"grad_norm": 0.8669445514678955,
	"learning_rate": 4.950462145066015e-06,
	"loss": 0.482,
	"step": 28440
	},
	{
	"epoch": 0.9040947933542997,
	"grad_norm": 0.9803065657615662,
	"learning_rate": 4.886720650981569e-06,
	"loss": 0.5025,
	"step": 28460
	},
	{
	"epoch": 0.9047301375520188,
	"grad_norm": 0.9414586424827576,
	"learning_rate": 4.823381904845392e-06,
	"loss": 0.4856,
	"step": 28480
	},
	{
	"epoch": 0.905365481749738,
	"grad_norm": 0.9295367002487183,
	"learning_rate": 4.760446174859224e-06,
	"loss": 0.4876,
	"step": 28500
	},
	{
	"epoch": 0.906000825947457,
	"grad_norm": 0.8859279751777649,
	"learning_rate": 4.697913727518332e-06,
	"loss": 0.5152,
	"step": 28520
	},
	{
	"epoch": 0.9066361701451762,
	"grad_norm": 0.7441398501396179,
	"learning_rate": 4.63578482761029e-06,
	"loss": 0.4787,
	"step": 28540
	},
	{
	"epoch": 0.9072715143428952,
	"grad_norm": 1.459954023361206,
	"learning_rate": 4.574059738213876e-06,
	"loss": 0.4813,
	"step": 28560
	},
	{
	"epoch": 0.9079068585406144,
	"grad_norm": 0.9451243281364441,
	"learning_rate": 4.512738720698018e-06,
	"loss": 0.4835,
	"step": 28580
	},
	{
	"epoch": 0.9085422027383335,
	"grad_norm": 0.8990492820739746,
	"learning_rate": 4.451822034720587e-06,
	"loss": 0.4811,
	"step": 28600
	},
	{
	"epoch": 0.9091775469360526,
	"grad_norm": 0.7530508637428284,
	"learning_rate": 4.3913099382273835e-06,
	"loss": 0.5,
	"step": 28620
	},
	{
	"epoch": 0.9098128911337717,
	"grad_norm": 0.8113830089569092,
	"learning_rate": 4.331202687451019e-06,
	"loss": 0.5075,
	"step": 28640
	},
	{
	"epoch": 0.9104482353314909,
	"grad_norm": 0.8615418672561646,
	"learning_rate": 4.2715005369097895e-06,
	"loss": 0.5152,
	"step": 28660
	},
	{
	"epoch": 0.9110835795292099,
	"grad_norm": 0.8459773659706116,
	"learning_rate": 4.212203739406673e-06,
	"loss": 0.4804,
	"step": 28680
	},
	{
	"epoch": 0.9117189237269291,
	"grad_norm": 0.8821284770965576,
	"learning_rate": 4.153312546028199e-06,
	"loss": 0.5311,
	"step": 28700
	},
	{
	"epoch": 0.9123542679246481,
	"grad_norm": 1.0187216997146606,
	"learning_rate": 4.0948272061434035e-06,
	"loss": 0.4632,
	"step": 28720
	},
	{
	"epoch": 0.9129896121223673,
	"grad_norm": 0.9274182915687561,
	"learning_rate": 4.036747967402788e-06,
	"loss": 0.4832,
	"step": 28740
	},
	{
	"epoch": 0.9136249563200864,
	"grad_norm": 0.7573745846748352,
	"learning_rate": 3.979075075737226e-06,
	"loss": 0.4905,
	"step": 28760
	},
	{
	"epoch": 0.9142603005178055,
	"grad_norm": 0.9005789160728455,
	"learning_rate": 3.921808775357027e-06,
	"loss": 0.5114,
	"step": 28780
	},
	{
	"epoch": 0.9148956447155246,
	"grad_norm": 0.9073104858398438,
	"learning_rate": 3.864949308750743e-06,
	"loss": 0.5018,
	"step": 28800
	},
	{
	"epoch": 0.9155309889132438,
	"grad_norm": 0.7230907678604126,
	"learning_rate": 3.808496916684268e-06,
	"loss": 0.4954,
	"step": 28820
	},
	{
	"epoch": 0.9161663331109628,
	"grad_norm": 0.7139384746551514,
	"learning_rate": 3.7524518381997885e-06,
	"loss": 0.464,
	"step": 28840
	},
	{
	"epoch": 0.916801677308682,
	"grad_norm": 0.8710399866104126,
	"learning_rate": 3.696814310614749e-06,
	"loss": 0.5048,
	"step": 28860
	},
	{
	"epoch": 0.917437021506401,
	"grad_norm": 0.87566739320755,
	"learning_rate": 3.6415845695208505e-06,
	"loss": 0.484,
	"step": 28880
	},
	{
	"epoch": 0.9180723657041202,
	"grad_norm": 0.9447526335716248,
	"learning_rate": 3.586762848783076e-06,
	"loss": 0.5032,
	"step": 28900
	},
	{
	"epoch": 0.9187077099018394,
	"grad_norm": 0.7784162759780884,
	"learning_rate": 3.53234938053868e-06,
	"loss": 0.4451,
	"step": 28920
	},
	{
	"epoch": 0.9193430540995584,
	"grad_norm": 0.9225743412971497,
	"learning_rate": 3.478344395196198e-06,
	"loss": 0.4745,
	"step": 28940
	},
	{
	"epoch": 0.9199783982972776,
	"grad_norm": 0.9712013602256775,
	"learning_rate": 3.4247481214345177e-06,
	"loss": 0.4956,
	"step": 28960
	},
	{
	"epoch": 0.9206137424949966,
	"grad_norm": 1.2805237770080566,
	"learning_rate": 3.371560786201855e-06,
	"loss": 0.4971,
	"step": 28980
	},
	{
	"epoch": 0.9212490866927158,
	"grad_norm": 0.7866525053977966,
	"learning_rate": 3.3187826147147994e-06,
	"loss": 0.497,
	"step": 29000
	},
	{
	"epoch": 0.9212490866927158,
	"eval_loss": 0.44399821758270264,
	"eval_runtime": 45.0357,
	"eval_samples_per_second": 60.019,
	"eval_steps_per_second": 30.021,
	"step": 29000
	},
	{
	"epoch": 0.9218844308904349,
	"grad_norm": 0.7901077270507812,
	"learning_rate": 3.2664138304574153e-06,
	"loss": 0.514,
	"step": 29020
	},
	{
	"epoch": 0.922519775088154,
	"grad_norm": 1.0464386940002441,
	"learning_rate": 3.2144546551802323e-06,
	"loss": 0.5042,
	"step": 29040
	},
	{
	"epoch": 0.9231551192858731,
	"grad_norm": 0.8520443439483643,
	"learning_rate": 3.162905308899322e-06,
	"loss": 0.4858,
	"step": 29060
	},
	{
	"epoch": 0.9237904634835923,
	"grad_norm": 0.92030268907547,
	"learning_rate": 3.1117660098953895e-06,
	"loss": 0.4766,
	"step": 29080
	},
	{
	"epoch": 0.9244258076813113,
	"grad_norm": 0.7019485235214233,
	"learning_rate": 3.06103697471285e-06,
	"loss": 0.4903,
	"step": 29100
	},
	{
	"epoch": 0.9250611518790305,
	"grad_norm": 1.3560097217559814,
	"learning_rate": 3.0107184181588643e-06,
	"loss": 0.5125,
	"step": 29120
	},
	{
	"epoch": 0.9256964960767495,
	"grad_norm": 0.9616526365280151,
	"learning_rate": 2.960810553302462e-06,
	"loss": 0.512,
	"step": 29140
	},
	{
	"epoch": 0.9263318402744687,
	"grad_norm": 1.1742409467697144,
	"learning_rate": 2.9113135914736856e-06,
	"loss": 0.5007,
	"step": 29160
	},
	{
	"epoch": 0.9269671844721878,
	"grad_norm": 0.8712571263313293,
	"learning_rate": 2.8622277422625907e-06,
	"loss": 0.4717,
	"step": 29180
	},
	{
	"epoch": 0.9276025286699069,
	"grad_norm": 0.8578605055809021,
	"learning_rate": 2.8135532135184384e-06,
	"loss": 0.4989,
	"step": 29200
	},
	{
	"epoch": 0.928237872867626,
	"grad_norm": 0.8551231026649475,
	"learning_rate": 2.7652902113488143e-06,
	"loss": 0.4825,
	"step": 29220
	},
	{
	"epoch": 0.9288732170653452,
	"grad_norm": 0.82204669713974,
	"learning_rate": 2.7174389401186996e-06,
	"loss": 0.4702,
	"step": 29240
	},
	{
	"epoch": 0.9295085612630642,
	"grad_norm": 0.9263904690742493,
	"learning_rate": 2.6699996024496575e-06,
	"loss": 0.4996,
	"step": 29260
	},
	{
	"epoch": 0.9301439054607834,
	"grad_norm": 1.037817120552063,
	"learning_rate": 2.6229723992189704e-06,
	"loss": 0.4986,
	"step": 29280
	},
	{
	"epoch": 0.9307792496585024,
	"grad_norm": 1.0528874397277832,
	"learning_rate": 2.5763575295587593e-06,
	"loss": 0.4794,
	"step": 29300
	},
	{
	"epoch": 0.9314145938562216,
	"grad_norm": 0.8765133619308472,
	"learning_rate": 2.5301551908551545e-06,
	"loss": 0.4878,
	"step": 29320
	},
	{
	"epoch": 0.9320499380539408,
	"grad_norm": 0.8322685956954956,
	"learning_rate": 2.484365578747494e-06,
	"loss": 0.4945,
	"step": 29340
	},
	{
	"epoch": 0.9326852822516598,
	"grad_norm": 0.8344667553901672,
	"learning_rate": 2.438988887127436e-06,
	"loss": 0.4981,
	"step": 29360
	},
	{
	"epoch": 0.933320626449379,
	"grad_norm": 0.8750690817832947,
	"learning_rate": 2.3940253081381703e-06,
	"loss": 0.4969,
	"step": 29380
	},
	{
	"epoch": 0.933955970647098,
	"grad_norm": 0.808814287185669,
	"learning_rate": 2.3494750321736093e-06,
	"loss": 0.4623,
	"step": 29400
	},
	{
	"epoch": 0.9345913148448172,
	"grad_norm": 0.9626306891441345,
	"learning_rate": 2.3053382478775754e-06,
	"loss": 0.5028,
	"step": 29420
	},
	{
	"epoch": 0.9352266590425363,
	"grad_norm": 0.9727978706359863,
	"learning_rate": 2.261615142143003e-06,
	"loss": 0.5059,
	"step": 29440
	},
	{
	"epoch": 0.9358620032402554,
	"grad_norm": 0.8926533460617065,
	"learning_rate": 2.2183059001111174e-06,
	"loss": 0.4764,
	"step": 29460
	},
	{
	"epoch": 0.9364973474379745,
	"grad_norm": 1.0506230592727661,
	"learning_rate": 2.1754107051707218e-06,
	"loss": 0.5069,
	"step": 29480
	},
	{
	"epoch": 0.9371326916356937,
	"grad_norm": 0.7190736532211304,
	"learning_rate": 2.1329297389573565e-06,
	"loss": 0.49,
	"step": 29500
	},
	{
	"epoch": 0.9377680358334127,
	"grad_norm": 0.7786980867385864,
	"learning_rate": 2.09086318135252e-06,
	"loss": 0.4766,
	"step": 29520
	},
	{
	"epoch": 0.9384033800311319,
	"grad_norm": 0.8696832060813904,
	"learning_rate": 2.049211210483004e-06,
	"loss": 0.4959,
	"step": 29540
	},
	{
	"epoch": 0.9390387242288509,
	"grad_norm": 0.7167271375656128,
	"learning_rate": 2.0079740027200144e-06,
	"loss": 0.4927,
	"step": 29560
	},
	{
	"epoch": 0.9396740684265701,
	"grad_norm": 0.868259072303772,
	"learning_rate": 1.967151732678518e-06,
	"loss": 0.4788,
	"step": 29580
	},
	{
	"epoch": 0.9403094126242892,
	"grad_norm": 0.8658266663551331,
	"learning_rate": 1.9267445732164325e-06,
	"loss": 0.4919,
	"step": 29600
	},
	{
	"epoch": 0.9409447568220083,
	"grad_norm": 1.010276436805725,
	"learning_rate": 1.8867526954339688e-06,
	"loss": 0.4811,
	"step": 29620
	},
	{
	"epoch": 0.9415801010197274,
	"grad_norm": 0.9376817941665649,
	"learning_rate": 1.8471762686728344e-06,
	"loss": 0.4723,
	"step": 29640
	},
	{
	"epoch": 0.9422154452174466,
	"grad_norm": 1.520297646522522,
	"learning_rate": 1.8080154605155996e-06,
	"loss": 0.5146,
	"step": 29660
	},
	{
	"epoch": 0.9428507894151656,
	"grad_norm": 0.8532717227935791,
	"learning_rate": 1.7692704367848756e-06,
	"loss": 0.4556,
	"step": 29680
	},
	{
	"epoch": 0.9434861336128848,
	"grad_norm": 1.069378137588501,
	"learning_rate": 1.730941361542704e-06,
	"loss": 0.4789,
	"step": 29700
	},
	{
	"epoch": 0.9441214778106038,
	"grad_norm": 0.8771205544471741,
	"learning_rate": 1.6930283970898574e-06,
	"loss": 0.4819,
	"step": 29720
	},
	{
	"epoch": 0.944756822008323,
	"grad_norm": 0.8729512095451355,
	"learning_rate": 1.6555317039650852e-06,
	"loss": 0.4792,
	"step": 29740
	},
	{
	"epoch": 0.9453921662060422,
	"grad_norm": 0.8724381923675537,
	"learning_rate": 1.6184514409444795e-06,
	"loss": 0.4726,
	"step": 29760
	},
	{
	"epoch": 0.9460275104037612,
	"grad_norm": 0.9022035598754883,
	"learning_rate": 1.5817877650408541e-06,
	"loss": 0.4891,
	"step": 29780
	},
	{
	"epoch": 0.9466628546014804,
	"grad_norm": 1.003596544265747,
	"learning_rate": 1.5455408315029562e-06,
	"loss": 0.4974,
	"step": 29800
	},
	{
	"epoch": 0.9472981987991995,
	"grad_norm": 0.8569382429122925,
	"learning_rate": 1.5097107938149113e-06,
	"loss": 0.4781,
	"step": 29820
	},
	{
	"epoch": 0.9479335429969186,
	"grad_norm": 0.9094131588935852,
	"learning_rate": 1.4742978036955457e-06,
	"loss": 0.5155,
	"step": 29840
	},
	{
	"epoch": 0.9485688871946377,
	"grad_norm": 1.0451712608337402,
	"learning_rate": 1.4393020110977206e-06,
	"loss": 0.4895,
	"step": 29860
	},
	{
	"epoch": 0.9492042313923568,
	"grad_norm": 1.2386709451675415,
	"learning_rate": 1.4047235642077217e-06,
	"loss": 0.4702,
	"step": 29880
	},
	{
	"epoch": 0.9498395755900759,
	"grad_norm": 0.966143786907196,
	"learning_rate": 1.3705626094446256e-06,
	"loss": 0.4962,
	"step": 29900
	},
	{
	"epoch": 0.9504749197877951,
	"grad_norm": 0.9544230103492737,
	"learning_rate": 1.33681929145969e-06,
	"loss": 0.4788,
	"step": 29920
	},
	{
	"epoch": 0.9511102639855141,
	"grad_norm": 0.8583151698112488,
	"learning_rate": 1.3034937531357095e-06,
	"loss": 0.477,
	"step": 29940
	},
	{
	"epoch": 0.9517456081832333,
	"grad_norm": 0.8361521363258362,
	"learning_rate": 1.270586135586427e-06,
	"loss": 0.5162,
	"step": 29960
	},
	{
	"epoch": 0.9523809523809523,
	"grad_norm": 1.0520914793014526,
	"learning_rate": 1.2380965781559783e-06,
	"loss": 0.4762,
	"step": 29980
	},
	{
	"epoch": 0.9530162965786715,
	"grad_norm": 0.8727782964706421,
	"learning_rate": 1.2060252184182386e-06,
	"loss": 0.4929,
	"step": 30000
	},
	{
	"epoch": 0.9530162965786715,
	"eval_loss": 0.443807452917099,
	"eval_runtime": 44.5933,
	"eval_samples_per_second": 60.614,
	"eval_steps_per_second": 30.318,
	"step": 30000
	},
	{
	"epoch": 0.9536516407763906,
	"grad_norm": 0.7989442944526672,
	"learning_rate": 1.174372192176254e-06,
	"loss": 0.4932,
	"step": 30020
	},
	{
	"epoch": 0.9542869849741097,
	"grad_norm": 0.7544863224029541,
	"learning_rate": 1.1431376334616994e-06,
	"loss": 0.482,
	"step": 30040
	},
	{
	"epoch": 0.9549223291718288,
	"grad_norm": 0.8897516131401062,
	"learning_rate": 1.1123216745342779e-06,
	"loss": 0.4898,
	"step": 30060
	},
	{
	"epoch": 0.955557673369548,
	"grad_norm": 0.8291769027709961,
	"learning_rate": 1.0819244458811773e-06,
	"loss": 0.5021,
	"step": 30080
	},
	{
	"epoch": 0.956193017567267,
	"grad_norm": 0.8413028717041016,
	"learning_rate": 1.0519460762165144e-06,
	"loss": 0.4762,
	"step": 30100
	},
	{
	"epoch": 0.9568283617649862,
	"grad_norm": 0.9216207265853882,
	"learning_rate": 1.0223866924807924e-06,
	"loss": 0.4869,
	"step": 30120
	},
	{
	"epoch": 0.9574637059627052,
	"grad_norm": 0.8935249447822571,
	"learning_rate": 9.932464198403325e-07,
	"loss": 0.4928,
	"step": 30140
	},
	{
	"epoch": 0.9580990501604244,
	"grad_norm": 0.7496423721313477,
	"learning_rate": 9.645253816867983e-07,
	"loss": 0.5266,
	"step": 30160
	},
	{
	"epoch": 0.9587343943581436,
	"grad_norm": 0.9738262295722961,
	"learning_rate": 9.362236996366514e-07,
	"loss": 0.4735,
	"step": 30180
	},
	{
	"epoch": 0.9593697385558626,
	"grad_norm": 0.9249958395957947,
	"learning_rate": 9.083414935305956e-07,
	"loss": 0.4706,
	"step": 30200
	},
	{
	"epoch": 0.9600050827535818,
	"grad_norm": 1.0667359828948975,
	"learning_rate": 8.808788814331448e-07,
	"loss": 0.4721,
	"step": 30220
	},
	{
	"epoch": 0.9606404269513009,
	"grad_norm": 0.8088135123252869,
	"learning_rate": 8.53835979632056e-07,
	"loss": 0.4884,
	"step": 30240
	},
	{
	"epoch": 0.96127577114902,
	"grad_norm": 0.9164936542510986,
	"learning_rate": 8.272129026378639e-07,
	"loss": 0.5022,
	"step": 30260
	},
	{
	"epoch": 0.9619111153467391,
	"grad_norm": 0.7835588455200195,
	"learning_rate": 8.010097631834245e-07,
	"loss": 0.4707,
	"step": 30280
	},
	{
	"epoch": 0.9625464595444582,
	"grad_norm": 1.2730233669281006,
	"learning_rate": 7.752266722233614e-07,
	"loss": 0.4795,
	"step": 30300
	},
	{
	"epoch": 0.9631818037421773,
	"grad_norm": 0.9977156519889832,
	"learning_rate": 7.511219051883567e-07,
	"loss": 0.5209,
	"step": 30320
	},
	{
	"epoch": 0.9638171479398965,
	"grad_norm": 0.941656231880188,
	"learning_rate": 7.26158221189377e-07,
	"loss": 0.4747,
	"step": 30340
	},
	{
	"epoch": 0.9644524921376155,
	"grad_norm": 0.7258419990539551,
	"learning_rate": 7.028320832731084e-07,
	"loss": 0.4961,
	"step": 30360
	},
	{
	"epoch": 0.9650878363353347,
	"grad_norm": 0.974557638168335,
	"learning_rate": 6.786882081830093e-07,
	"loss": 0.4559,
	"step": 30380
	},
	{
	"epoch": 0.9657231805330537,
	"grad_norm": 0.973461925983429,
	"learning_rate": 6.549648995460511e-07,
	"loss": 0.4931,
	"step": 30400
	},
	{
	"epoch": 0.9663585247307729,
	"grad_norm": 1.0066043138504028,
	"learning_rate": 6.31662257816279e-07,
	"loss": 0.4901,
	"step": 30420
	},
	{
	"epoch": 0.966993868928492,
	"grad_norm": 0.9339585900306702,
	"learning_rate": 6.087803816664628e-07,
	"loss": 0.4697,
	"step": 30440
	},
	{
	"epoch": 0.9676292131262111,
	"grad_norm": 0.8802968859672546,
	"learning_rate": 5.863193679877088e-07,
	"loss": 0.4943,
	"step": 30460
	},
	{
	"epoch": 0.9682645573239302,
	"grad_norm": 0.7557999491691589,
	"learning_rate": 5.6427931188896e-07,
	"loss": 0.4761,
	"step": 30480
	},
	{
	"epoch": 0.9688999015216494,
	"grad_norm": 0.9139352440834045,
	"learning_rate": 5.426603066967295e-07,
	"loss": 0.476,
	"step": 30500
	},
	{
	"epoch": 0.9695352457193684,
	"grad_norm": 0.9125082492828369,
	"learning_rate": 5.21462443954579e-07,
	"loss": 0.4792,
	"step": 30520
	},
	{
	"epoch": 0.9701705899170876,
	"grad_norm": 0.9351817965507507,
	"learning_rate": 5.006858134228076e-07,
	"loss": 0.4976,
	"step": 30540
	},
	{
	"epoch": 0.9708059341148066,
	"grad_norm": 0.743870735168457,
	"learning_rate": 4.803305030780302e-07,
	"loss": 0.4695,
	"step": 30560
	},
	{
	"epoch": 0.9714412783125258,
	"grad_norm": 0.9468183517456055,
	"learning_rate": 4.603965991128445e-07,
	"loss": 0.5027,
	"step": 30580
	},
	{
	"epoch": 0.972076622510245,
	"grad_norm": 1.1194064617156982,
	"learning_rate": 4.408841859354307e-07,
	"loss": 0.5146,
	"step": 30600
	},
	{
	"epoch": 0.972711966707964,
	"grad_norm": 0.7916650176048279,
	"learning_rate": 4.21793346169197e-07,
	"loss": 0.4689,
	"step": 30620
	},
	{
	"epoch": 0.9733473109056832,
	"grad_norm": 0.9158383011817932,
	"learning_rate": 4.0312416065245717e-07,
	"loss": 0.5272,
	"step": 30640
	},
	{
	"epoch": 0.9739826551034023,
	"grad_norm": 0.8861019015312195,
	"learning_rate": 3.8487670843807555e-07,
	"loss": 0.4981,
	"step": 30660
	},
	{
	"epoch": 0.9746179993011214,
	"grad_norm": 1.01827871799469,
	"learning_rate": 3.670510667931004e-07,
	"loss": 0.5386,
	"step": 30680
	},
	{
	"epoch": 0.9752533434988405,
	"grad_norm": 0.9622276425361633,
	"learning_rate": 3.496473111984866e-07,
	"loss": 0.5135,
	"step": 30700
	},
	{
	"epoch": 0.9758886876965596,
	"grad_norm": 1.0768787860870361,
	"learning_rate": 3.326655153487512e-07,
	"loss": 0.4943,
	"step": 30720
	},
	{
	"epoch": 0.9765240318942787,
	"grad_norm": 1.2705291509628296,
	"learning_rate": 3.16105751151663e-07,
	"loss": 0.4924,
	"step": 30740
	},
	{
	"epoch": 0.9771593760919979,
	"grad_norm": 0.9354774951934814,
	"learning_rate": 2.99968088727931e-07,
	"loss": 0.4811,
	"step": 30760
	},
	{
	"epoch": 0.9777947202897169,
	"grad_norm": 0.8442774415016174,
	"learning_rate": 2.842525964109166e-07,
	"loss": 0.4652,
	"step": 30780
	},
	{
	"epoch": 0.9784300644874361,
	"grad_norm": 0.9658933281898499,
	"learning_rate": 2.6895934074635533e-07,
	"loss": 0.4767,
	"step": 30800
	},
	{
	"epoch": 0.9790654086851552,
	"grad_norm": 0.9930063486099243,
	"learning_rate": 2.5408838649204625e-07,
	"loss": 0.4791,
	"step": 30820
	},
	{
	"epoch": 0.9797007528828743,
	"grad_norm": 0.9439179301261902,
	"learning_rate": 2.396397966176078e-07,
	"loss": 0.4833,
	"step": 30840
	},
	{
	"epoch": 0.9803360970805934,
	"grad_norm": 0.8499469757080078,
	"learning_rate": 2.25613632304178e-07,
	"loss": 0.4969,
	"step": 30860
	},
	{
	"epoch": 0.9809714412783125,
	"grad_norm": 1.0228259563446045,
	"learning_rate": 2.1200995294420323e-07,
	"loss": 0.4709,
	"step": 30880
	},
	{
	"epoch": 0.9816067854760316,
	"grad_norm": 1.1045747995376587,
	"learning_rate": 1.988288161411389e-07,
	"loss": 0.4964,
	"step": 30900
	},
	{
	"epoch": 0.9822421296737508,
	"grad_norm": 0.8404049277305603,
	"learning_rate": 1.8607027770921602e-07,
	"loss": 0.5289,
	"step": 30920
	},
	{
	"epoch": 0.9828774738714698,
	"grad_norm": 0.8583685755729675,
	"learning_rate": 1.7373439167325257e-07,
	"loss": 0.4824,
	"step": 30940
	},
	{
	"epoch": 0.983512818069189,
	"grad_norm": 0.8340322375297546,
	"learning_rate": 1.6240682931759622e-07,
	"loss": 0.5276,
	"step": 30960
	},
	{
	"epoch": 0.984148162266908,
	"grad_norm": 0.717254638671875,
	"learning_rate": 1.508952640646988e-07,
	"loss": 0.4837,
	"step": 30980
	},
	{
	"epoch": 0.9847835064646272,
	"grad_norm": 0.7109520435333252,
	"learning_rate": 1.3980650015292806e-07,
	"loss": 0.4805,
	"step": 31000
	},
	{
	"epoch": 0.9847835064646272,
	"eval_loss": 0.4438159465789795,
	"eval_runtime": 44.826,
	"eval_samples_per_second": 60.3,
	"eval_steps_per_second": 30.161,
	"step": 31000
	},
	{
	"epoch": 0.9854188506623464,
	"grad_norm": 0.8632842302322388,
	"learning_rate": 1.2914058453658008e-07,
	"loss": 0.4787,
	"step": 31020
	},
	{
	"epoch": 0.9860541948600654,
	"grad_norm": 0.9302808046340942,
	"learning_rate": 1.1889756237943861e-07,
	"loss": 0.4733,
	"step": 31040
	},
	{
	"epoch": 0.9866895390577846,
	"grad_norm": 1.0309478044509888,
	"learning_rate": 1.090774770545755e-07,
	"loss": 0.498,
	"step": 31060
	},
	{
	"epoch": 0.9873248832555037,
	"grad_norm": 0.7432119250297546,
	"learning_rate": 9.968037014420616e-08,
	"loss": 0.4909,
	"step": 31080
	},
	{
	"epoch": 0.9879602274532228,
	"grad_norm": 1.0406357049942017,
	"learning_rate": 9.070628143946768e-08,
	"loss": 0.4913,
	"step": 31100
	},
	{
	"epoch": 0.9885955716509419,
	"grad_norm": 0.8807629346847534,
	"learning_rate": 8.215524894024107e-08,
	"loss": 0.4843,
	"step": 31120
	},
	{
	"epoch": 0.989230915848661,
	"grad_norm": 0.815077006816864,
	"learning_rate": 7.402730885507359e-08,
	"loss": 0.4877,
	"step": 31140
	},
	{
	"epoch": 0.9898662600463801,
	"grad_norm": 0.8051480054855347,
	"learning_rate": 6.632249560092341e-08,
	"loss": 0.489,
	"step": 31160
	},
	{
	"epoch": 0.9905016042440993,
	"grad_norm": 0.8251180648803711,
	"learning_rate": 5.9040841803081895e-08,
	"loss": 0.4763,
	"step": 31180
	},
	{
	"epoch": 0.9911369484418183,
	"grad_norm": 0.8782890439033508,
	"learning_rate": 5.218237829499595e-08,
	"loss": 0.5012,
	"step": 31200
	},
	{
	"epoch": 0.9917722926395375,
	"grad_norm": 0.9451269507408142,
	"learning_rate": 4.574713411816811e-08,
	"loss": 0.4765,
	"step": 31220
	},
	{
	"epoch": 0.9924076368372566,
	"grad_norm": 1.2340540885925293,
	"learning_rate": 3.973513652202332e-08,
	"loss": 0.4999,
	"step": 31240
	},
	{
	"epoch": 0.9930429810349757,
	"grad_norm": 1.0101948976516724,
	"learning_rate": 3.414641096376459e-08,
	"loss": 0.5118,
	"step": 31260
	},
	{
	"epoch": 0.9936783252326948,
	"grad_norm": 0.7806993722915649,
	"learning_rate": 2.8980981108317485e-08,
	"loss": 0.5068,
	"step": 31280
	},
	{
	"epoch": 0.9943136694304139,
	"grad_norm": 1.1223636865615845,
	"learning_rate": 2.4238868828196927e-08,
	"loss": 0.5182,
	"step": 31300
	},
	{
	"epoch": 0.994949013628133,
	"grad_norm": 0.8514977693557739,
	"learning_rate": 1.9920094203418336e-08,
	"loss": 0.5072,
	"step": 31320
	},
	{
	"epoch": 0.9955843578258522,
	"grad_norm": 1.1318073272705078,
	"learning_rate": 1.6024675521397747e-08,
	"loss": 0.4819,
	"step": 31340
	},
	{
	"epoch": 0.9962197020235712,
	"grad_norm": 0.9314286708831787,
	"learning_rate": 1.2552629276929573e-08,
	"loss": 0.4957,
	"step": 31360
	},
	{
	"epoch": 0.9968550462212904,
	"grad_norm": 0.7769533395767212,
	"learning_rate": 9.503970172031196e-09,
	"loss": 0.5149,
	"step": 31380
	},
	{
	"epoch": 0.9974903904190096,
	"grad_norm": 0.7601432800292969,
	"learning_rate": 6.878711115976266e-09,
	"loss": 0.4933,
	"step": 31400
	},
	{
	"epoch": 0.9981257346167286,
	"grad_norm": 0.987147331237793,
	"learning_rate": 4.6768632251614765e-09,
	"loss": 0.4693,
	"step": 31420
	},
	{
	"epoch": 0.9987610788144478,
	"grad_norm": 0.8807405829429626,
	"learning_rate": 2.8984358230954577e-09,
	"loss": 0.474,
	"step": 31440
	},
	{
	"epoch": 0.9993964230121668,
	"grad_norm": 0.7518433332443237,
	"learning_rate": 1.5434364403543733e-09,
	"loss": 0.5076,
	"step": 31460
	},
	{
	"epoch": 1.0,
	"step": 31479,
	"total_flos": 0.0,
	"train_loss": 0.3508217529017671,
	"train_runtime": 14676.7422,
	"train_samples_per_second": 68.633,
	"train_steps_per_second": 2.145
	}
	],
	"logging_steps": 20,
	"max_steps": 31479,
	"num_input_tokens_seen": 0,
	"num_train_epochs": 1,
	"save_steps": 2000,
	"stateful_callbacks": {
	"TrainerControl": {
	"args": {
	"should_epoch_stop": false,
	"should_evaluate": false,
	"should_log": false,
	"should_save": true,
	"should_training_stop": true
	},
	"attributes": {}
	}
	},
	"total_flos": 0.0,
	"train_batch_size": 16,
	"trial_name": null,
	"trial_params": null
	}