NLBSE2026-java / checkpoint-48015 /trainer_state.json
ThomBors's picture
Upload folder using huggingface_hub
aa09656 verified
{
"best_global_step": null,
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 5.0,
"eval_steps": 500,
"global_step": 48015,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"embedding_loss": 0.2883,
"epoch": 0.00010413412475268145,
"grad_norm": 1.762959361076355,
"learning_rate": 0.0,
"step": 1
},
{
"embedding_loss": 0.2973,
"epoch": 0.005206706237634072,
"grad_norm": 2.416536569595337,
"learning_rate": 2.0408163265306121e-07,
"step": 50
},
{
"embedding_loss": 0.2757,
"epoch": 0.010413412475268145,
"grad_norm": 1.1051920652389526,
"learning_rate": 4.1232819658475635e-07,
"step": 100
},
{
"embedding_loss": 0.2678,
"epoch": 0.015620118712902219,
"grad_norm": 1.2399098873138428,
"learning_rate": 6.205747605164515e-07,
"step": 150
},
{
"embedding_loss": 0.2554,
"epoch": 0.02082682495053629,
"grad_norm": 0.9736790657043457,
"learning_rate": 8.288213244481466e-07,
"step": 200
},
{
"embedding_loss": 0.2485,
"epoch": 0.026033531188170363,
"grad_norm": 0.9768863916397095,
"learning_rate": 1.037067888379842e-06,
"step": 250
},
{
"embedding_loss": 0.2472,
"epoch": 0.031240237425804437,
"grad_norm": 1.2124693393707275,
"learning_rate": 1.2453144523115369e-06,
"step": 300
},
{
"embedding_loss": 0.2309,
"epoch": 0.03644694366343851,
"grad_norm": 1.0609492063522339,
"learning_rate": 1.453561016243232e-06,
"step": 350
},
{
"embedding_loss": 0.225,
"epoch": 0.04165364990107258,
"grad_norm": 0.9569733142852783,
"learning_rate": 1.6618075801749272e-06,
"step": 400
},
{
"embedding_loss": 0.2123,
"epoch": 0.046860356138706656,
"grad_norm": 1.1237151622772217,
"learning_rate": 1.8700541441066226e-06,
"step": 450
},
{
"embedding_loss": 0.2045,
"epoch": 0.05206706237634073,
"grad_norm": 1.0526118278503418,
"learning_rate": 2.0783007080383173e-06,
"step": 500
},
{
"embedding_loss": 0.1934,
"epoch": 0.0572737686139748,
"grad_norm": 1.1583402156829834,
"learning_rate": 2.2865472719700125e-06,
"step": 550
},
{
"embedding_loss": 0.184,
"epoch": 0.062480474851608875,
"grad_norm": 1.313284158706665,
"learning_rate": 2.494793835901708e-06,
"step": 600
},
{
"embedding_loss": 0.183,
"epoch": 0.06768718108924295,
"grad_norm": 1.2073508501052856,
"learning_rate": 2.7030403998334032e-06,
"step": 650
},
{
"embedding_loss": 0.171,
"epoch": 0.07289388732687702,
"grad_norm": 0.9187403321266174,
"learning_rate": 2.911286963765098e-06,
"step": 700
},
{
"embedding_loss": 0.1752,
"epoch": 0.07810059356451109,
"grad_norm": 1.049507737159729,
"learning_rate": 3.119533527696793e-06,
"step": 750
},
{
"embedding_loss": 0.1627,
"epoch": 0.08330729980214516,
"grad_norm": 1.696567416191101,
"learning_rate": 3.3277800916284887e-06,
"step": 800
},
{
"embedding_loss": 0.1623,
"epoch": 0.08851400603977924,
"grad_norm": 1.0722424983978271,
"learning_rate": 3.5360266555601835e-06,
"step": 850
},
{
"embedding_loss": 0.1557,
"epoch": 0.09372071227741331,
"grad_norm": 0.8424978256225586,
"learning_rate": 3.7442732194918786e-06,
"step": 900
},
{
"embedding_loss": 0.151,
"epoch": 0.09892741851504738,
"grad_norm": 1.0043538808822632,
"learning_rate": 3.952519783423574e-06,
"step": 950
},
{
"embedding_loss": 0.1424,
"epoch": 0.10413412475268145,
"grad_norm": 0.910914957523346,
"learning_rate": 4.160766347355269e-06,
"step": 1000
},
{
"embedding_loss": 0.1347,
"epoch": 0.10934083099031552,
"grad_norm": 1.1411229372024536,
"learning_rate": 4.369012911286964e-06,
"step": 1050
},
{
"embedding_loss": 0.1342,
"epoch": 0.1145475372279496,
"grad_norm": 1.1177036762237549,
"learning_rate": 4.57725947521866e-06,
"step": 1100
},
{
"embedding_loss": 0.1282,
"epoch": 0.11975424346558367,
"grad_norm": 1.1329469680786133,
"learning_rate": 4.785506039150354e-06,
"step": 1150
},
{
"embedding_loss": 0.1309,
"epoch": 0.12496094970321775,
"grad_norm": 1.26534104347229,
"learning_rate": 4.993752603082049e-06,
"step": 1200
},
{
"embedding_loss": 0.1198,
"epoch": 0.1301676559408518,
"grad_norm": 0.8742411136627197,
"learning_rate": 5.201999167013745e-06,
"step": 1250
},
{
"embedding_loss": 0.1182,
"epoch": 0.1353743621784859,
"grad_norm": 1.0818109512329102,
"learning_rate": 5.41024573094544e-06,
"step": 1300
},
{
"embedding_loss": 0.1099,
"epoch": 0.14058106841611998,
"grad_norm": 0.9232504963874817,
"learning_rate": 5.618492294877135e-06,
"step": 1350
},
{
"embedding_loss": 0.1019,
"epoch": 0.14578777465375403,
"grad_norm": 1.013424277305603,
"learning_rate": 5.826738858808831e-06,
"step": 1400
},
{
"embedding_loss": 0.0982,
"epoch": 0.15099448089138812,
"grad_norm": 1.4098010063171387,
"learning_rate": 6.034985422740526e-06,
"step": 1450
},
{
"embedding_loss": 0.1038,
"epoch": 0.15620118712902217,
"grad_norm": 1.015489935874939,
"learning_rate": 6.24323198667222e-06,
"step": 1500
},
{
"embedding_loss": 0.1064,
"epoch": 0.16140789336665626,
"grad_norm": 1.4240305423736572,
"learning_rate": 6.451478550603915e-06,
"step": 1550
},
{
"embedding_loss": 0.1007,
"epoch": 0.16661459960429031,
"grad_norm": 1.1620804071426392,
"learning_rate": 6.659725114535611e-06,
"step": 1600
},
{
"embedding_loss": 0.0899,
"epoch": 0.1718213058419244,
"grad_norm": 2.043187379837036,
"learning_rate": 6.867971678467306e-06,
"step": 1650
},
{
"embedding_loss": 0.1019,
"epoch": 0.17702801207955848,
"grad_norm": 1.0461031198501587,
"learning_rate": 7.076218242399001e-06,
"step": 1700
},
{
"embedding_loss": 0.0954,
"epoch": 0.18223471831719254,
"grad_norm": 1.0107996463775635,
"learning_rate": 7.284464806330697e-06,
"step": 1750
},
{
"embedding_loss": 0.0799,
"epoch": 0.18744142455482662,
"grad_norm": 1.0582354068756104,
"learning_rate": 7.492711370262391e-06,
"step": 1800
},
{
"embedding_loss": 0.0864,
"epoch": 0.19264813079246068,
"grad_norm": 1.3842886686325073,
"learning_rate": 7.700957934194086e-06,
"step": 1850
},
{
"embedding_loss": 0.0863,
"epoch": 0.19785483703009477,
"grad_norm": 1.2879295349121094,
"learning_rate": 7.909204498125781e-06,
"step": 1900
},
{
"embedding_loss": 0.0823,
"epoch": 0.20306154326772882,
"grad_norm": 1.2666908502578735,
"learning_rate": 8.117451062057477e-06,
"step": 1950
},
{
"embedding_loss": 0.083,
"epoch": 0.2082682495053629,
"grad_norm": 1.3312140703201294,
"learning_rate": 8.325697625989172e-06,
"step": 2000
},
{
"embedding_loss": 0.0887,
"epoch": 0.213474955742997,
"grad_norm": 0.9761432409286499,
"learning_rate": 8.533944189920867e-06,
"step": 2050
},
{
"embedding_loss": 0.0796,
"epoch": 0.21868166198063105,
"grad_norm": 1.1199424266815186,
"learning_rate": 8.742190753852562e-06,
"step": 2100
},
{
"embedding_loss": 0.0827,
"epoch": 0.22388836821826513,
"grad_norm": 1.4127182960510254,
"learning_rate": 8.950437317784257e-06,
"step": 2150
},
{
"embedding_loss": 0.0745,
"epoch": 0.2290950744558992,
"grad_norm": 2.0330944061279297,
"learning_rate": 9.158683881715952e-06,
"step": 2200
},
{
"embedding_loss": 0.0752,
"epoch": 0.23430178069353327,
"grad_norm": 1.4823620319366455,
"learning_rate": 9.366930445647648e-06,
"step": 2250
},
{
"embedding_loss": 0.0676,
"epoch": 0.23950848693116733,
"grad_norm": 1.2921267747879028,
"learning_rate": 9.575177009579343e-06,
"step": 2300
},
{
"embedding_loss": 0.0616,
"epoch": 0.24471519316880141,
"grad_norm": 1.8864023685455322,
"learning_rate": 9.783423573511038e-06,
"step": 2350
},
{
"embedding_loss": 0.0661,
"epoch": 0.2499218994064355,
"grad_norm": 1.3003889322280884,
"learning_rate": 9.991670137442733e-06,
"step": 2400
},
{
"embedding_loss": 0.0587,
"epoch": 0.2551286056440696,
"grad_norm": 1.0105185508728027,
"learning_rate": 1.0199916701374428e-05,
"step": 2450
},
{
"embedding_loss": 0.0576,
"epoch": 0.2603353118817036,
"grad_norm": 1.3157322406768799,
"learning_rate": 1.0408163265306123e-05,
"step": 2500
},
{
"embedding_loss": 0.0548,
"epoch": 0.2655420181193377,
"grad_norm": 1.5181084871292114,
"learning_rate": 1.0616409829237819e-05,
"step": 2550
},
{
"embedding_loss": 0.0549,
"epoch": 0.2707487243569718,
"grad_norm": 0.9998575448989868,
"learning_rate": 1.0824656393169512e-05,
"step": 2600
},
{
"embedding_loss": 0.0542,
"epoch": 0.27595543059460587,
"grad_norm": 1.8933945894241333,
"learning_rate": 1.1032902957101209e-05,
"step": 2650
},
{
"embedding_loss": 0.0551,
"epoch": 0.28116213683223995,
"grad_norm": 0.8569897413253784,
"learning_rate": 1.1241149521032904e-05,
"step": 2700
},
{
"embedding_loss": 0.0535,
"epoch": 0.286368843069874,
"grad_norm": 2.692810297012329,
"learning_rate": 1.14493960849646e-05,
"step": 2750
},
{
"embedding_loss": 0.0512,
"epoch": 0.29157554930750806,
"grad_norm": 1.0588935613632202,
"learning_rate": 1.1657642648896294e-05,
"step": 2800
},
{
"embedding_loss": 0.0471,
"epoch": 0.29678225554514215,
"grad_norm": 1.4624029397964478,
"learning_rate": 1.186588921282799e-05,
"step": 2850
},
{
"embedding_loss": 0.0418,
"epoch": 0.30198896178277623,
"grad_norm": 1.4698013067245483,
"learning_rate": 1.2074135776759683e-05,
"step": 2900
},
{
"embedding_loss": 0.0456,
"epoch": 0.30719566802041026,
"grad_norm": 2.7604193687438965,
"learning_rate": 1.2282382340691378e-05,
"step": 2950
},
{
"embedding_loss": 0.0426,
"epoch": 0.31240237425804435,
"grad_norm": 1.8741382360458374,
"learning_rate": 1.2490628904623075e-05,
"step": 3000
},
{
"embedding_loss": 0.0381,
"epoch": 0.31760908049567843,
"grad_norm": 1.6704306602478027,
"learning_rate": 1.269887546855477e-05,
"step": 3050
},
{
"embedding_loss": 0.0476,
"epoch": 0.3228157867333125,
"grad_norm": 1.6957885026931763,
"learning_rate": 1.2907122032486465e-05,
"step": 3100
},
{
"embedding_loss": 0.0384,
"epoch": 0.3280224929709466,
"grad_norm": 1.3916475772857666,
"learning_rate": 1.311536859641816e-05,
"step": 3150
},
{
"embedding_loss": 0.0398,
"epoch": 0.33322919920858063,
"grad_norm": 1.3985787630081177,
"learning_rate": 1.3323615160349854e-05,
"step": 3200
},
{
"embedding_loss": 0.0383,
"epoch": 0.3384359054462147,
"grad_norm": 1.0203100442886353,
"learning_rate": 1.3531861724281549e-05,
"step": 3250
},
{
"embedding_loss": 0.0355,
"epoch": 0.3436426116838488,
"grad_norm": 1.1589559316635132,
"learning_rate": 1.3740108288213246e-05,
"step": 3300
},
{
"embedding_loss": 0.0391,
"epoch": 0.3488493179214829,
"grad_norm": 0.5909947752952576,
"learning_rate": 1.3948354852144941e-05,
"step": 3350
},
{
"embedding_loss": 0.0376,
"epoch": 0.35405602415911697,
"grad_norm": 1.096784234046936,
"learning_rate": 1.4156601416076636e-05,
"step": 3400
},
{
"embedding_loss": 0.0372,
"epoch": 0.359262730396751,
"grad_norm": 2.7773685455322266,
"learning_rate": 1.4364847980008331e-05,
"step": 3450
},
{
"embedding_loss": 0.0354,
"epoch": 0.3644694366343851,
"grad_norm": 1.3911575078964233,
"learning_rate": 1.4573094543940025e-05,
"step": 3500
},
{
"embedding_loss": 0.0292,
"epoch": 0.36967614287201916,
"grad_norm": 1.4253603219985962,
"learning_rate": 1.478134110787172e-05,
"step": 3550
},
{
"embedding_loss": 0.0341,
"epoch": 0.37488284910965325,
"grad_norm": 0.5666287541389465,
"learning_rate": 1.4989587671803415e-05,
"step": 3600
},
{
"embedding_loss": 0.032,
"epoch": 0.38008955534728733,
"grad_norm": 0.7250155210494995,
"learning_rate": 1.5197834235735112e-05,
"step": 3650
},
{
"embedding_loss": 0.0312,
"epoch": 0.38529626158492136,
"grad_norm": 1.2066556215286255,
"learning_rate": 1.5406080799666807e-05,
"step": 3700
},
{
"embedding_loss": 0.0286,
"epoch": 0.39050296782255545,
"grad_norm": 1.999817967414856,
"learning_rate": 1.5614327363598502e-05,
"step": 3750
},
{
"embedding_loss": 0.0303,
"epoch": 0.39570967406018953,
"grad_norm": 2.4242656230926514,
"learning_rate": 1.5822573927530198e-05,
"step": 3800
},
{
"embedding_loss": 0.0321,
"epoch": 0.4009163802978236,
"grad_norm": 1.0087485313415527,
"learning_rate": 1.6030820491461893e-05,
"step": 3850
},
{
"embedding_loss": 0.0291,
"epoch": 0.40612308653545764,
"grad_norm": 0.9711636900901794,
"learning_rate": 1.6239067055393588e-05,
"step": 3900
},
{
"embedding_loss": 0.0267,
"epoch": 0.41132979277309173,
"grad_norm": 2.3323183059692383,
"learning_rate": 1.6447313619325283e-05,
"step": 3950
},
{
"embedding_loss": 0.0315,
"epoch": 0.4165364990107258,
"grad_norm": 0.25967147946357727,
"learning_rate": 1.6655560183256978e-05,
"step": 4000
},
{
"embedding_loss": 0.026,
"epoch": 0.4217432052483599,
"grad_norm": 0.5975779294967651,
"learning_rate": 1.6863806747188673e-05,
"step": 4050
},
{
"embedding_loss": 0.0222,
"epoch": 0.426949911485994,
"grad_norm": 2.1180572509765625,
"learning_rate": 1.707205331112037e-05,
"step": 4100
},
{
"embedding_loss": 0.0226,
"epoch": 0.432156617723628,
"grad_norm": 2.647836923599243,
"learning_rate": 1.7280299875052064e-05,
"step": 4150
},
{
"embedding_loss": 0.025,
"epoch": 0.4373633239612621,
"grad_norm": 1.7642154693603516,
"learning_rate": 1.748854643898376e-05,
"step": 4200
},
{
"embedding_loss": 0.0258,
"epoch": 0.4425700301988962,
"grad_norm": 0.6594904065132141,
"learning_rate": 1.7696793002915454e-05,
"step": 4250
},
{
"embedding_loss": 0.0227,
"epoch": 0.44777673643653026,
"grad_norm": 1.1913025379180908,
"learning_rate": 1.790503956684715e-05,
"step": 4300
},
{
"embedding_loss": 0.025,
"epoch": 0.45298344267416435,
"grad_norm": 2.221813201904297,
"learning_rate": 1.8113286130778844e-05,
"step": 4350
},
{
"embedding_loss": 0.0192,
"epoch": 0.4581901489117984,
"grad_norm": 0.7045194506645203,
"learning_rate": 1.832153269471054e-05,
"step": 4400
},
{
"embedding_loss": 0.0214,
"epoch": 0.46339685514943246,
"grad_norm": 1.9471172094345093,
"learning_rate": 1.8529779258642235e-05,
"step": 4450
},
{
"embedding_loss": 0.0232,
"epoch": 0.46860356138706655,
"grad_norm": 0.8062028288841248,
"learning_rate": 1.873802582257393e-05,
"step": 4500
},
{
"embedding_loss": 0.0201,
"epoch": 0.47381026762470063,
"grad_norm": 0.2602122724056244,
"learning_rate": 1.8946272386505625e-05,
"step": 4550
},
{
"embedding_loss": 0.0234,
"epoch": 0.47901697386233466,
"grad_norm": 0.2223815619945526,
"learning_rate": 1.915451895043732e-05,
"step": 4600
},
{
"embedding_loss": 0.0206,
"epoch": 0.48422368009996875,
"grad_norm": 1.995078682899475,
"learning_rate": 1.9362765514369015e-05,
"step": 4650
},
{
"embedding_loss": 0.0228,
"epoch": 0.48943038633760283,
"grad_norm": 1.3003852367401123,
"learning_rate": 1.957101207830071e-05,
"step": 4700
},
{
"embedding_loss": 0.0194,
"epoch": 0.4946370925752369,
"grad_norm": 0.10561434924602509,
"learning_rate": 1.9779258642232402e-05,
"step": 4750
},
{
"embedding_loss": 0.0197,
"epoch": 0.499843798812871,
"grad_norm": 0.3040192425251007,
"learning_rate": 1.99875052061641e-05,
"step": 4800
},
{
"embedding_loss": 0.0167,
"epoch": 0.5050505050505051,
"grad_norm": 1.1733126640319824,
"learning_rate": 1.9978247286696136e-05,
"step": 4850
},
{
"embedding_loss": 0.0244,
"epoch": 0.5102572112881392,
"grad_norm": 1.6774014234542847,
"learning_rate": 1.995510610233032e-05,
"step": 4900
},
{
"embedding_loss": 0.0152,
"epoch": 0.5154639175257731,
"grad_norm": 0.4171350300312042,
"learning_rate": 1.9931964917964502e-05,
"step": 4950
},
{
"embedding_loss": 0.0191,
"epoch": 0.5206706237634072,
"grad_norm": 0.1614975482225418,
"learning_rate": 1.990882373359869e-05,
"step": 5000
},
{
"embedding_loss": 0.0174,
"epoch": 0.5258773300010413,
"grad_norm": 1.8693324327468872,
"learning_rate": 1.9885682549232872e-05,
"step": 5050
},
{
"embedding_loss": 0.0242,
"epoch": 0.5310840362386754,
"grad_norm": 0.42837658524513245,
"learning_rate": 1.9862541364867056e-05,
"step": 5100
},
{
"embedding_loss": 0.0202,
"epoch": 0.5362907424763095,
"grad_norm": 1.6628985404968262,
"learning_rate": 1.983940018050124e-05,
"step": 5150
},
{
"embedding_loss": 0.0186,
"epoch": 0.5414974487139436,
"grad_norm": 0.25852930545806885,
"learning_rate": 1.9816258996135426e-05,
"step": 5200
},
{
"embedding_loss": 0.0172,
"epoch": 0.5467041549515776,
"grad_norm": 1.1750682592391968,
"learning_rate": 1.979311781176961e-05,
"step": 5250
},
{
"embedding_loss": 0.0208,
"epoch": 0.5519108611892117,
"grad_norm": 3.196448564529419,
"learning_rate": 1.9769976627403792e-05,
"step": 5300
},
{
"embedding_loss": 0.0185,
"epoch": 0.5571175674268458,
"grad_norm": 0.057756174355745316,
"learning_rate": 1.974683544303798e-05,
"step": 5350
},
{
"embedding_loss": 0.0177,
"epoch": 0.5623242736644799,
"grad_norm": 0.08262369781732559,
"learning_rate": 1.9723694258672162e-05,
"step": 5400
},
{
"embedding_loss": 0.0169,
"epoch": 0.5675309799021139,
"grad_norm": 1.7123504877090454,
"learning_rate": 1.9700553074306345e-05,
"step": 5450
},
{
"embedding_loss": 0.0186,
"epoch": 0.572737686139748,
"grad_norm": 0.9821128249168396,
"learning_rate": 1.967741188994053e-05,
"step": 5500
},
{
"embedding_loss": 0.014,
"epoch": 0.577944392377382,
"grad_norm": 0.0532955676317215,
"learning_rate": 1.9654270705574715e-05,
"step": 5550
},
{
"embedding_loss": 0.0166,
"epoch": 0.5831510986150161,
"grad_norm": 0.13396751880645752,
"learning_rate": 1.96311295212089e-05,
"step": 5600
},
{
"embedding_loss": 0.0129,
"epoch": 0.5883578048526502,
"grad_norm": 0.14156009256839752,
"learning_rate": 1.9607988336843082e-05,
"step": 5650
},
{
"embedding_loss": 0.0114,
"epoch": 0.5935645110902843,
"grad_norm": 0.9108484387397766,
"learning_rate": 1.9584847152477265e-05,
"step": 5700
},
{
"embedding_loss": 0.0152,
"epoch": 0.5987712173279184,
"grad_norm": 0.18236614763736725,
"learning_rate": 1.9561705968111448e-05,
"step": 5750
},
{
"embedding_loss": 0.0134,
"epoch": 0.6039779235655525,
"grad_norm": 1.664371371269226,
"learning_rate": 1.953856478374563e-05,
"step": 5800
},
{
"embedding_loss": 0.0156,
"epoch": 0.6091846298031866,
"grad_norm": 0.9308391213417053,
"learning_rate": 1.9515423599379818e-05,
"step": 5850
},
{
"embedding_loss": 0.0144,
"epoch": 0.6143913360408205,
"grad_norm": 0.10233943164348602,
"learning_rate": 1.9492282415014e-05,
"step": 5900
},
{
"embedding_loss": 0.0128,
"epoch": 0.6195980422784546,
"grad_norm": 0.1212044283747673,
"learning_rate": 1.9469141230648185e-05,
"step": 5950
},
{
"embedding_loss": 0.0141,
"epoch": 0.6248047485160887,
"grad_norm": 0.03808877244591713,
"learning_rate": 1.9446000046282368e-05,
"step": 6000
},
{
"embedding_loss": 0.0129,
"epoch": 0.6300114547537228,
"grad_norm": 1.263411521911621,
"learning_rate": 1.9422858861916555e-05,
"step": 6050
},
{
"embedding_loss": 0.0128,
"epoch": 0.6352181609913569,
"grad_norm": 1.1934914588928223,
"learning_rate": 1.9399717677550738e-05,
"step": 6100
},
{
"embedding_loss": 0.0131,
"epoch": 0.640424867228991,
"grad_norm": 0.5088186264038086,
"learning_rate": 1.937657649318492e-05,
"step": 6150
},
{
"embedding_loss": 0.0133,
"epoch": 0.645631573466625,
"grad_norm": 0.2881380319595337,
"learning_rate": 1.9353435308819108e-05,
"step": 6200
},
{
"embedding_loss": 0.012,
"epoch": 0.6508382797042591,
"grad_norm": 0.1978471279144287,
"learning_rate": 1.933029412445329e-05,
"step": 6250
},
{
"embedding_loss": 0.0131,
"epoch": 0.6560449859418932,
"grad_norm": 1.7363338470458984,
"learning_rate": 1.9307152940087474e-05,
"step": 6300
},
{
"embedding_loss": 0.0124,
"epoch": 0.6612516921795273,
"grad_norm": 0.1304263323545456,
"learning_rate": 1.9284011755721658e-05,
"step": 6350
},
{
"embedding_loss": 0.0138,
"epoch": 0.6664583984171613,
"grad_norm": 0.15851274132728577,
"learning_rate": 1.9260870571355844e-05,
"step": 6400
},
{
"embedding_loss": 0.0141,
"epoch": 0.6716651046547953,
"grad_norm": 1.2692539691925049,
"learning_rate": 1.9237729386990027e-05,
"step": 6450
},
{
"embedding_loss": 0.0106,
"epoch": 0.6768718108924294,
"grad_norm": 0.30289334058761597,
"learning_rate": 1.921458820262421e-05,
"step": 6500
},
{
"embedding_loss": 0.0125,
"epoch": 0.6820785171300635,
"grad_norm": 0.02164456807076931,
"learning_rate": 1.9191447018258394e-05,
"step": 6550
},
{
"embedding_loss": 0.0132,
"epoch": 0.6872852233676976,
"grad_norm": 0.34430477023124695,
"learning_rate": 1.916830583389258e-05,
"step": 6600
},
{
"embedding_loss": 0.0143,
"epoch": 0.6924919296053317,
"grad_norm": 0.2521458864212036,
"learning_rate": 1.9145164649526764e-05,
"step": 6650
},
{
"embedding_loss": 0.0127,
"epoch": 0.6976986358429658,
"grad_norm": 0.6990224719047546,
"learning_rate": 1.9122023465160947e-05,
"step": 6700
},
{
"embedding_loss": 0.014,
"epoch": 0.7029053420805998,
"grad_norm": 0.07170717418193817,
"learning_rate": 1.9098882280795134e-05,
"step": 6750
},
{
"embedding_loss": 0.0118,
"epoch": 0.7081120483182339,
"grad_norm": 0.18331408500671387,
"learning_rate": 1.9075741096429317e-05,
"step": 6800
},
{
"embedding_loss": 0.0116,
"epoch": 0.7133187545558679,
"grad_norm": 1.0223900079727173,
"learning_rate": 1.90525999120635e-05,
"step": 6850
},
{
"embedding_loss": 0.0101,
"epoch": 0.718525460793502,
"grad_norm": 0.08013039082288742,
"learning_rate": 1.9029458727697684e-05,
"step": 6900
},
{
"embedding_loss": 0.0119,
"epoch": 0.7237321670311361,
"grad_norm": 1.7682616710662842,
"learning_rate": 1.900631754333187e-05,
"step": 6950
},
{
"embedding_loss": 0.0095,
"epoch": 0.7289388732687702,
"grad_norm": 0.10093237459659576,
"learning_rate": 1.8983176358966054e-05,
"step": 7000
},
{
"embedding_loss": 0.009,
"epoch": 0.7341455795064042,
"grad_norm": 2.3766512870788574,
"learning_rate": 1.8960035174600237e-05,
"step": 7050
},
{
"embedding_loss": 0.0086,
"epoch": 0.7393522857440383,
"grad_norm": 0.019304808229207993,
"learning_rate": 1.893689399023442e-05,
"step": 7100
},
{
"embedding_loss": 0.0083,
"epoch": 0.7445589919816724,
"grad_norm": 0.08419201523065567,
"learning_rate": 1.8913752805868607e-05,
"step": 7150
},
{
"embedding_loss": 0.0144,
"epoch": 0.7497656982193065,
"grad_norm": 1.224936842918396,
"learning_rate": 1.889061162150279e-05,
"step": 7200
},
{
"embedding_loss": 0.0107,
"epoch": 0.7549724044569406,
"grad_norm": 0.3462272584438324,
"learning_rate": 1.8867470437136973e-05,
"step": 7250
},
{
"embedding_loss": 0.0088,
"epoch": 0.7601791106945747,
"grad_norm": 0.03107067011296749,
"learning_rate": 1.884432925277116e-05,
"step": 7300
},
{
"embedding_loss": 0.0096,
"epoch": 0.7653858169322086,
"grad_norm": 0.12990835309028625,
"learning_rate": 1.8821188068405343e-05,
"step": 7350
},
{
"embedding_loss": 0.0073,
"epoch": 0.7705925231698427,
"grad_norm": 0.10413219034671783,
"learning_rate": 1.8798046884039526e-05,
"step": 7400
},
{
"embedding_loss": 0.0063,
"epoch": 0.7757992294074768,
"grad_norm": 0.39868220686912537,
"learning_rate": 1.877490569967371e-05,
"step": 7450
},
{
"embedding_loss": 0.0096,
"epoch": 0.7810059356451109,
"grad_norm": 0.4435900151729584,
"learning_rate": 1.8751764515307896e-05,
"step": 7500
},
{
"embedding_loss": 0.0091,
"epoch": 0.786212641882745,
"grad_norm": 1.100035309791565,
"learning_rate": 1.872862333094208e-05,
"step": 7550
},
{
"embedding_loss": 0.01,
"epoch": 0.7914193481203791,
"grad_norm": 0.12100836634635925,
"learning_rate": 1.8705482146576263e-05,
"step": 7600
},
{
"embedding_loss": 0.0093,
"epoch": 0.7966260543580131,
"grad_norm": 0.38435640931129456,
"learning_rate": 1.868234096221045e-05,
"step": 7650
},
{
"embedding_loss": 0.0121,
"epoch": 0.8018327605956472,
"grad_norm": 0.8386930823326111,
"learning_rate": 1.8659199777844633e-05,
"step": 7700
},
{
"embedding_loss": 0.014,
"epoch": 0.8070394668332813,
"grad_norm": 0.4830886423587799,
"learning_rate": 1.8636058593478816e-05,
"step": 7750
},
{
"embedding_loss": 0.0078,
"epoch": 0.8122461730709153,
"grad_norm": 0.026604199782013893,
"learning_rate": 1.8612917409113e-05,
"step": 7800
},
{
"embedding_loss": 0.0082,
"epoch": 0.8174528793085494,
"grad_norm": 0.5969211459159851,
"learning_rate": 1.8589776224747186e-05,
"step": 7850
},
{
"embedding_loss": 0.0086,
"epoch": 0.8226595855461835,
"grad_norm": 0.06108603999018669,
"learning_rate": 1.856663504038137e-05,
"step": 7900
},
{
"embedding_loss": 0.0066,
"epoch": 0.8278662917838175,
"grad_norm": 0.3239186406135559,
"learning_rate": 1.8543493856015553e-05,
"step": 7950
},
{
"embedding_loss": 0.0112,
"epoch": 0.8330729980214516,
"grad_norm": 0.2972595989704132,
"learning_rate": 1.8520352671649736e-05,
"step": 8000
},
{
"embedding_loss": 0.0073,
"epoch": 0.8382797042590857,
"grad_norm": 0.533140242099762,
"learning_rate": 1.8497211487283922e-05,
"step": 8050
},
{
"embedding_loss": 0.0078,
"epoch": 0.8434864104967198,
"grad_norm": 1.5684537887573242,
"learning_rate": 1.8474070302918106e-05,
"step": 8100
},
{
"embedding_loss": 0.0087,
"epoch": 0.8486931167343539,
"grad_norm": 0.4422908425331116,
"learning_rate": 1.845092911855229e-05,
"step": 8150
},
{
"embedding_loss": 0.012,
"epoch": 0.853899822971988,
"grad_norm": 0.5563941597938538,
"learning_rate": 1.8427787934186476e-05,
"step": 8200
},
{
"embedding_loss": 0.0088,
"epoch": 0.8591065292096219,
"grad_norm": 0.3089462220668793,
"learning_rate": 1.840464674982066e-05,
"step": 8250
},
{
"embedding_loss": 0.0122,
"epoch": 0.864313235447256,
"grad_norm": 4.295806884765625,
"learning_rate": 1.8381505565454842e-05,
"step": 8300
},
{
"embedding_loss": 0.0091,
"epoch": 0.8695199416848901,
"grad_norm": 0.8506584763526917,
"learning_rate": 1.8358364381089025e-05,
"step": 8350
},
{
"embedding_loss": 0.01,
"epoch": 0.8747266479225242,
"grad_norm": 0.9140012264251709,
"learning_rate": 1.8335223196723212e-05,
"step": 8400
},
{
"embedding_loss": 0.0095,
"epoch": 0.8799333541601583,
"grad_norm": 0.9452886581420898,
"learning_rate": 1.8312082012357395e-05,
"step": 8450
},
{
"embedding_loss": 0.0051,
"epoch": 0.8851400603977924,
"grad_norm": 0.34865090250968933,
"learning_rate": 1.828894082799158e-05,
"step": 8500
},
{
"embedding_loss": 0.0109,
"epoch": 0.8903467666354264,
"grad_norm": 0.027646692469716072,
"learning_rate": 1.8265799643625762e-05,
"step": 8550
},
{
"embedding_loss": 0.0074,
"epoch": 0.8955534728730605,
"grad_norm": 0.28435996174812317,
"learning_rate": 1.8242658459259945e-05,
"step": 8600
},
{
"embedding_loss": 0.0048,
"epoch": 0.9007601791106946,
"grad_norm": 0.1109330877661705,
"learning_rate": 1.821951727489413e-05,
"step": 8650
},
{
"embedding_loss": 0.0089,
"epoch": 0.9059668853483287,
"grad_norm": 0.46810364723205566,
"learning_rate": 1.8196376090528315e-05,
"step": 8700
},
{
"embedding_loss": 0.0087,
"epoch": 0.9111735915859627,
"grad_norm": 0.2674962878227234,
"learning_rate": 1.81732349061625e-05,
"step": 8750
},
{
"embedding_loss": 0.0096,
"epoch": 0.9163802978235968,
"grad_norm": 3.0557987689971924,
"learning_rate": 1.815009372179668e-05,
"step": 8800
},
{
"embedding_loss": 0.0085,
"epoch": 0.9215870040612308,
"grad_norm": 0.6088097095489502,
"learning_rate": 1.8126952537430865e-05,
"step": 8850
},
{
"embedding_loss": 0.0083,
"epoch": 0.9267937102988649,
"grad_norm": 0.12588393688201904,
"learning_rate": 1.810381135306505e-05,
"step": 8900
},
{
"embedding_loss": 0.009,
"epoch": 0.932000416536499,
"grad_norm": 0.46597975492477417,
"learning_rate": 1.8080670168699235e-05,
"step": 8950
},
{
"embedding_loss": 0.0078,
"epoch": 0.9372071227741331,
"grad_norm": 0.03179040551185608,
"learning_rate": 1.8057528984333418e-05,
"step": 9000
},
{
"embedding_loss": 0.0096,
"epoch": 0.9424138290117672,
"grad_norm": 0.476052463054657,
"learning_rate": 1.8034387799967605e-05,
"step": 9050
},
{
"embedding_loss": 0.0084,
"epoch": 0.9476205352494013,
"grad_norm": 0.6995823979377747,
"learning_rate": 1.8011246615601788e-05,
"step": 9100
},
{
"embedding_loss": 0.0073,
"epoch": 0.9528272414870353,
"grad_norm": 0.042539093643426895,
"learning_rate": 1.798810543123597e-05,
"step": 9150
},
{
"embedding_loss": 0.0055,
"epoch": 0.9580339477246693,
"grad_norm": 0.024517321959137917,
"learning_rate": 1.7964964246870154e-05,
"step": 9200
},
{
"embedding_loss": 0.0059,
"epoch": 0.9632406539623034,
"grad_norm": 0.020516090095043182,
"learning_rate": 1.794182306250434e-05,
"step": 9250
},
{
"embedding_loss": 0.008,
"epoch": 0.9684473601999375,
"grad_norm": 0.07251976430416107,
"learning_rate": 1.7918681878138524e-05,
"step": 9300
},
{
"embedding_loss": 0.0112,
"epoch": 0.9736540664375716,
"grad_norm": 0.12063586711883545,
"learning_rate": 1.7895540693772708e-05,
"step": 9350
},
{
"embedding_loss": 0.0045,
"epoch": 0.9788607726752057,
"grad_norm": 0.36446037888526917,
"learning_rate": 1.787239950940689e-05,
"step": 9400
},
{
"embedding_loss": 0.0086,
"epoch": 0.9840674789128397,
"grad_norm": 0.08372894674539566,
"learning_rate": 1.7849258325041078e-05,
"step": 9450
},
{
"embedding_loss": 0.0049,
"epoch": 0.9892741851504738,
"grad_norm": 0.04579677805304527,
"learning_rate": 1.782611714067526e-05,
"step": 9500
},
{
"embedding_loss": 0.0056,
"epoch": 0.9944808913881079,
"grad_norm": 0.1182708889245987,
"learning_rate": 1.7802975956309444e-05,
"step": 9550
},
{
"embedding_loss": 0.0067,
"epoch": 0.999687597625742,
"grad_norm": 0.01671171560883522,
"learning_rate": 1.777983477194363e-05,
"step": 9600
},
{
"embedding_loss": 0.0058,
"epoch": 1.004894303863376,
"grad_norm": 0.9829951524734497,
"learning_rate": 1.7756693587577814e-05,
"step": 9650
},
{
"embedding_loss": 0.0132,
"epoch": 1.0101010101010102,
"grad_norm": 0.0442439503967762,
"learning_rate": 1.7733552403211997e-05,
"step": 9700
},
{
"embedding_loss": 0.0076,
"epoch": 1.0153077163386441,
"grad_norm": 0.031697243452072144,
"learning_rate": 1.771041121884618e-05,
"step": 9750
},
{
"embedding_loss": 0.0069,
"epoch": 1.0205144225762783,
"grad_norm": 0.019380003213882446,
"learning_rate": 1.7687270034480367e-05,
"step": 9800
},
{
"embedding_loss": 0.0072,
"epoch": 1.0257211288139123,
"grad_norm": 0.03249906376004219,
"learning_rate": 1.766412885011455e-05,
"step": 9850
},
{
"embedding_loss": 0.0092,
"epoch": 1.0309278350515463,
"grad_norm": 0.3388296663761139,
"learning_rate": 1.7640987665748734e-05,
"step": 9900
},
{
"embedding_loss": 0.0077,
"epoch": 1.0361345412891805,
"grad_norm": 0.1678103804588318,
"learning_rate": 1.7617846481382917e-05,
"step": 9950
},
{
"embedding_loss": 0.0073,
"epoch": 1.0413412475268145,
"grad_norm": 0.015974771231412888,
"learning_rate": 1.7594705297017104e-05,
"step": 10000
},
{
"embedding_loss": 0.0071,
"epoch": 1.0465479537644486,
"grad_norm": 0.041760511696338654,
"learning_rate": 1.7571564112651287e-05,
"step": 10050
},
{
"embedding_loss": 0.0082,
"epoch": 1.0517546600020826,
"grad_norm": 1.2133060693740845,
"learning_rate": 1.754842292828547e-05,
"step": 10100
},
{
"embedding_loss": 0.0046,
"epoch": 1.0569613662397168,
"grad_norm": 0.04206147417426109,
"learning_rate": 1.7525281743919657e-05,
"step": 10150
},
{
"embedding_loss": 0.0053,
"epoch": 1.0621680724773508,
"grad_norm": 0.18272073566913605,
"learning_rate": 1.750214055955384e-05,
"step": 10200
},
{
"embedding_loss": 0.0044,
"epoch": 1.067374778714985,
"grad_norm": 0.03547310084104538,
"learning_rate": 1.7478999375188023e-05,
"step": 10250
},
{
"embedding_loss": 0.0043,
"epoch": 1.072581484952619,
"grad_norm": 0.0350540354847908,
"learning_rate": 1.7455858190822207e-05,
"step": 10300
},
{
"embedding_loss": 0.0052,
"epoch": 1.077788191190253,
"grad_norm": 1.6414012908935547,
"learning_rate": 1.7432717006456393e-05,
"step": 10350
},
{
"embedding_loss": 0.004,
"epoch": 1.0829948974278871,
"grad_norm": 3.3341734409332275,
"learning_rate": 1.7409575822090577e-05,
"step": 10400
},
{
"embedding_loss": 0.0082,
"epoch": 1.088201603665521,
"grad_norm": 0.01878177374601364,
"learning_rate": 1.738643463772476e-05,
"step": 10450
},
{
"embedding_loss": 0.006,
"epoch": 1.0934083099031553,
"grad_norm": 0.5989029407501221,
"learning_rate": 1.7363293453358946e-05,
"step": 10500
},
{
"embedding_loss": 0.0063,
"epoch": 1.0986150161407893,
"grad_norm": 0.23778136074543,
"learning_rate": 1.734015226899313e-05,
"step": 10550
},
{
"embedding_loss": 0.0038,
"epoch": 1.1038217223784235,
"grad_norm": 0.012218566611409187,
"learning_rate": 1.7317011084627313e-05,
"step": 10600
},
{
"embedding_loss": 0.0049,
"epoch": 1.1090284286160574,
"grad_norm": 0.05297623947262764,
"learning_rate": 1.7293869900261496e-05,
"step": 10650
},
{
"embedding_loss": 0.0062,
"epoch": 1.1142351348536916,
"grad_norm": 0.5116108655929565,
"learning_rate": 1.7270728715895683e-05,
"step": 10700
},
{
"embedding_loss": 0.0084,
"epoch": 1.1194418410913256,
"grad_norm": 0.4478176236152649,
"learning_rate": 1.7247587531529866e-05,
"step": 10750
},
{
"embedding_loss": 0.0057,
"epoch": 1.1246485473289598,
"grad_norm": 0.4622497856616974,
"learning_rate": 1.722444634716405e-05,
"step": 10800
},
{
"embedding_loss": 0.0074,
"epoch": 1.1298552535665938,
"grad_norm": 0.017630133777856827,
"learning_rate": 1.7201305162798233e-05,
"step": 10850
},
{
"embedding_loss": 0.0056,
"epoch": 1.1350619598042277,
"grad_norm": 0.6077584624290466,
"learning_rate": 1.717816397843242e-05,
"step": 10900
},
{
"embedding_loss": 0.0078,
"epoch": 1.140268666041862,
"grad_norm": 0.18036994338035583,
"learning_rate": 1.7155022794066603e-05,
"step": 10950
},
{
"embedding_loss": 0.0059,
"epoch": 1.145475372279496,
"grad_norm": 0.009565812535583973,
"learning_rate": 1.7131881609700786e-05,
"step": 11000
},
{
"embedding_loss": 0.0062,
"epoch": 1.1506820785171301,
"grad_norm": 2.7242627143859863,
"learning_rate": 1.7108740425334973e-05,
"step": 11050
},
{
"embedding_loss": 0.0054,
"epoch": 1.155888784754764,
"grad_norm": 0.017238834872841835,
"learning_rate": 1.7085599240969156e-05,
"step": 11100
},
{
"embedding_loss": 0.006,
"epoch": 1.1610954909923983,
"grad_norm": 1.461991548538208,
"learning_rate": 1.706245805660334e-05,
"step": 11150
},
{
"embedding_loss": 0.0077,
"epoch": 1.1663021972300323,
"grad_norm": 0.11797866970300674,
"learning_rate": 1.7039316872237522e-05,
"step": 11200
},
{
"embedding_loss": 0.005,
"epoch": 1.1715089034676662,
"grad_norm": 0.040576279163360596,
"learning_rate": 1.701617568787171e-05,
"step": 11250
},
{
"embedding_loss": 0.0061,
"epoch": 1.1767156097053004,
"grad_norm": 0.013650750741362572,
"learning_rate": 1.6993034503505892e-05,
"step": 11300
},
{
"embedding_loss": 0.0043,
"epoch": 1.1819223159429346,
"grad_norm": 0.013326168991625309,
"learning_rate": 1.6969893319140075e-05,
"step": 11350
},
{
"embedding_loss": 0.0061,
"epoch": 1.1871290221805686,
"grad_norm": 0.07993318140506744,
"learning_rate": 1.694675213477426e-05,
"step": 11400
},
{
"embedding_loss": 0.0054,
"epoch": 1.1923357284182026,
"grad_norm": 0.38105425238609314,
"learning_rate": 1.6923610950408442e-05,
"step": 11450
},
{
"embedding_loss": 0.0046,
"epoch": 1.1975424346558368,
"grad_norm": 0.13614040613174438,
"learning_rate": 1.6900469766042625e-05,
"step": 11500
},
{
"embedding_loss": 0.0054,
"epoch": 1.2027491408934707,
"grad_norm": 1.016570806503296,
"learning_rate": 1.6877328581676812e-05,
"step": 11550
},
{
"embedding_loss": 0.0039,
"epoch": 1.207955847131105,
"grad_norm": 0.4211491644382477,
"learning_rate": 1.6854187397310995e-05,
"step": 11600
},
{
"embedding_loss": 0.0076,
"epoch": 1.213162553368739,
"grad_norm": 0.020438892766833305,
"learning_rate": 1.683104621294518e-05,
"step": 11650
},
{
"embedding_loss": 0.0064,
"epoch": 1.218369259606373,
"grad_norm": 0.043074000626802444,
"learning_rate": 1.6807905028579362e-05,
"step": 11700
},
{
"embedding_loss": 0.0068,
"epoch": 1.223575965844007,
"grad_norm": 0.1304844170808792,
"learning_rate": 1.678476384421355e-05,
"step": 11750
},
{
"embedding_loss": 0.0038,
"epoch": 1.2287826720816413,
"grad_norm": 0.10536648333072662,
"learning_rate": 1.676162265984773e-05,
"step": 11800
},
{
"embedding_loss": 0.0053,
"epoch": 1.2339893783192752,
"grad_norm": 0.2895510792732239,
"learning_rate": 1.6738481475481915e-05,
"step": 11850
},
{
"embedding_loss": 0.0074,
"epoch": 1.2391960845569092,
"grad_norm": 0.14891253411769867,
"learning_rate": 1.67153402911161e-05,
"step": 11900
},
{
"embedding_loss": 0.0083,
"epoch": 1.2444027907945434,
"grad_norm": 0.03617144003510475,
"learning_rate": 1.6692199106750285e-05,
"step": 11950
},
{
"embedding_loss": 0.0053,
"epoch": 1.2496094970321774,
"grad_norm": 0.031169302761554718,
"learning_rate": 1.6669057922384468e-05,
"step": 12000
},
{
"embedding_loss": 0.004,
"epoch": 1.2548162032698116,
"grad_norm": 0.3214148283004761,
"learning_rate": 1.664591673801865e-05,
"step": 12050
},
{
"embedding_loss": 0.0052,
"epoch": 1.2600229095074456,
"grad_norm": 1.033286213874817,
"learning_rate": 1.6622775553652838e-05,
"step": 12100
},
{
"embedding_loss": 0.0079,
"epoch": 1.2652296157450795,
"grad_norm": 0.014789101667702198,
"learning_rate": 1.659963436928702e-05,
"step": 12150
},
{
"embedding_loss": 0.004,
"epoch": 1.2704363219827137,
"grad_norm": 0.5162740349769592,
"learning_rate": 1.6576493184921205e-05,
"step": 12200
},
{
"embedding_loss": 0.004,
"epoch": 1.275643028220348,
"grad_norm": 0.01165369339287281,
"learning_rate": 1.6553352000555388e-05,
"step": 12250
},
{
"embedding_loss": 0.0063,
"epoch": 1.280849734457982,
"grad_norm": 0.031679488718509674,
"learning_rate": 1.6530210816189574e-05,
"step": 12300
},
{
"embedding_loss": 0.0024,
"epoch": 1.2860564406956159,
"grad_norm": 0.008600637316703796,
"learning_rate": 1.6507069631823758e-05,
"step": 12350
},
{
"embedding_loss": 0.0065,
"epoch": 1.29126314693325,
"grad_norm": 0.014314206317067146,
"learning_rate": 1.648392844745794e-05,
"step": 12400
},
{
"embedding_loss": 0.0068,
"epoch": 1.296469853170884,
"grad_norm": 0.3240402936935425,
"learning_rate": 1.6460787263092128e-05,
"step": 12450
},
{
"embedding_loss": 0.0046,
"epoch": 1.3016765594085182,
"grad_norm": 0.030164631083607674,
"learning_rate": 1.643764607872631e-05,
"step": 12500
},
{
"embedding_loss": 0.0026,
"epoch": 1.3068832656461522,
"grad_norm": 0.0063670300878584385,
"learning_rate": 1.6414504894360494e-05,
"step": 12550
},
{
"embedding_loss": 0.0036,
"epoch": 1.3120899718837864,
"grad_norm": 0.021254096180200577,
"learning_rate": 1.6391363709994677e-05,
"step": 12600
},
{
"embedding_loss": 0.0058,
"epoch": 1.3172966781214204,
"grad_norm": 0.25233790278434753,
"learning_rate": 1.6368222525628864e-05,
"step": 12650
},
{
"embedding_loss": 0.0029,
"epoch": 1.3225033843590546,
"grad_norm": 1.2394205331802368,
"learning_rate": 1.6345081341263047e-05,
"step": 12700
},
{
"embedding_loss": 0.006,
"epoch": 1.3277100905966885,
"grad_norm": 1.9639242887496948,
"learning_rate": 1.632194015689723e-05,
"step": 12750
},
{
"embedding_loss": 0.0052,
"epoch": 1.3329167968343225,
"grad_norm": 0.0384540930390358,
"learning_rate": 1.6298798972531414e-05,
"step": 12800
},
{
"embedding_loss": 0.004,
"epoch": 1.3381235030719567,
"grad_norm": 0.16365939378738403,
"learning_rate": 1.62756577881656e-05,
"step": 12850
},
{
"embedding_loss": 0.0031,
"epoch": 1.3433302093095907,
"grad_norm": 0.03347177803516388,
"learning_rate": 1.6252516603799784e-05,
"step": 12900
},
{
"embedding_loss": 0.005,
"epoch": 1.3485369155472249,
"grad_norm": 0.014545961283147335,
"learning_rate": 1.6229375419433967e-05,
"step": 12950
},
{
"embedding_loss": 0.0079,
"epoch": 1.3537436217848589,
"grad_norm": 0.7268438935279846,
"learning_rate": 1.6206234235068154e-05,
"step": 13000
},
{
"embedding_loss": 0.0058,
"epoch": 1.358950328022493,
"grad_norm": 0.040684785693883896,
"learning_rate": 1.6183093050702337e-05,
"step": 13050
},
{
"embedding_loss": 0.0037,
"epoch": 1.364157034260127,
"grad_norm": 3.591543197631836,
"learning_rate": 1.615995186633652e-05,
"step": 13100
},
{
"embedding_loss": 0.005,
"epoch": 1.3693637404977612,
"grad_norm": 0.037789322435855865,
"learning_rate": 1.6136810681970703e-05,
"step": 13150
},
{
"embedding_loss": 0.0018,
"epoch": 1.3745704467353952,
"grad_norm": 0.549343466758728,
"learning_rate": 1.611366949760489e-05,
"step": 13200
},
{
"embedding_loss": 0.0027,
"epoch": 1.3797771529730292,
"grad_norm": 0.015304960310459137,
"learning_rate": 1.6090528313239073e-05,
"step": 13250
},
{
"embedding_loss": 0.004,
"epoch": 1.3849838592106634,
"grad_norm": 0.010115724988281727,
"learning_rate": 1.6067387128873257e-05,
"step": 13300
},
{
"embedding_loss": 0.0042,
"epoch": 1.3901905654482973,
"grad_norm": 0.004204587545245886,
"learning_rate": 1.6044245944507443e-05,
"step": 13350
},
{
"embedding_loss": 0.0046,
"epoch": 1.3953972716859315,
"grad_norm": 0.04513470083475113,
"learning_rate": 1.6021104760141627e-05,
"step": 13400
},
{
"embedding_loss": 0.0047,
"epoch": 1.4006039779235655,
"grad_norm": 0.21044224500656128,
"learning_rate": 1.599796357577581e-05,
"step": 13450
},
{
"embedding_loss": 0.005,
"epoch": 1.4058106841611997,
"grad_norm": 0.4665778577327728,
"learning_rate": 1.5974822391409993e-05,
"step": 13500
},
{
"embedding_loss": 0.0047,
"epoch": 1.4110173903988337,
"grad_norm": 0.03980934992432594,
"learning_rate": 1.595168120704418e-05,
"step": 13550
},
{
"embedding_loss": 0.0052,
"epoch": 1.4162240966364679,
"grad_norm": 0.08631590753793716,
"learning_rate": 1.5928540022678363e-05,
"step": 13600
},
{
"embedding_loss": 0.0044,
"epoch": 1.4214308028741018,
"grad_norm": 0.008251226507127285,
"learning_rate": 1.5905398838312546e-05,
"step": 13650
},
{
"embedding_loss": 0.0043,
"epoch": 1.4266375091117358,
"grad_norm": 0.10959483683109283,
"learning_rate": 1.588225765394673e-05,
"step": 13700
},
{
"embedding_loss": 0.0065,
"epoch": 1.43184421534937,
"grad_norm": 0.03955509141087532,
"learning_rate": 1.5859116469580916e-05,
"step": 13750
},
{
"embedding_loss": 0.0031,
"epoch": 1.437050921587004,
"grad_norm": 0.15788401663303375,
"learning_rate": 1.58359752852151e-05,
"step": 13800
},
{
"embedding_loss": 0.0036,
"epoch": 1.4422576278246382,
"grad_norm": 1.064596176147461,
"learning_rate": 1.5812834100849283e-05,
"step": 13850
},
{
"embedding_loss": 0.003,
"epoch": 1.4474643340622722,
"grad_norm": 2.6524391174316406,
"learning_rate": 1.578969291648347e-05,
"step": 13900
},
{
"embedding_loss": 0.006,
"epoch": 1.4526710402999063,
"grad_norm": 0.2990039885044098,
"learning_rate": 1.5766551732117653e-05,
"step": 13950
},
{
"embedding_loss": 0.0023,
"epoch": 1.4578777465375403,
"grad_norm": 0.12428417056798935,
"learning_rate": 1.5743410547751836e-05,
"step": 14000
},
{
"embedding_loss": 0.0032,
"epoch": 1.4630844527751745,
"grad_norm": 0.01266538817435503,
"learning_rate": 1.572026936338602e-05,
"step": 14050
},
{
"embedding_loss": 0.003,
"epoch": 1.4682911590128085,
"grad_norm": 0.07004108279943466,
"learning_rate": 1.5697128179020206e-05,
"step": 14100
},
{
"embedding_loss": 0.0032,
"epoch": 1.4734978652504425,
"grad_norm": 0.0223364420235157,
"learning_rate": 1.567398699465439e-05,
"step": 14150
},
{
"embedding_loss": 0.0046,
"epoch": 1.4787045714880767,
"grad_norm": 0.13812583684921265,
"learning_rate": 1.5650845810288572e-05,
"step": 14200
},
{
"embedding_loss": 0.0054,
"epoch": 1.4839112777257109,
"grad_norm": 0.11324401199817657,
"learning_rate": 1.5627704625922756e-05,
"step": 14250
},
{
"embedding_loss": 0.0054,
"epoch": 1.4891179839633448,
"grad_norm": 0.3810628354549408,
"learning_rate": 1.560456344155694e-05,
"step": 14300
},
{
"embedding_loss": 0.0029,
"epoch": 1.4943246902009788,
"grad_norm": 0.014939317479729652,
"learning_rate": 1.5581422257191122e-05,
"step": 14350
},
{
"embedding_loss": 0.0031,
"epoch": 1.499531396438613,
"grad_norm": 0.054862458258867264,
"learning_rate": 1.555828107282531e-05,
"step": 14400
},
{
"embedding_loss": 0.0034,
"epoch": 1.504738102676247,
"grad_norm": 0.11869315803050995,
"learning_rate": 1.5535139888459492e-05,
"step": 14450
},
{
"embedding_loss": 0.0025,
"epoch": 1.5099448089138812,
"grad_norm": 0.040105488151311874,
"learning_rate": 1.5511998704093675e-05,
"step": 14500
},
{
"embedding_loss": 0.0058,
"epoch": 1.5151515151515151,
"grad_norm": 0.6557055711746216,
"learning_rate": 1.548885751972786e-05,
"step": 14550
},
{
"embedding_loss": 0.0043,
"epoch": 1.5203582213891491,
"grad_norm": 0.7020523548126221,
"learning_rate": 1.5465716335362045e-05,
"step": 14600
},
{
"embedding_loss": 0.0018,
"epoch": 1.5255649276267833,
"grad_norm": 0.2461288869380951,
"learning_rate": 1.544257515099623e-05,
"step": 14650
},
{
"embedding_loss": 0.0061,
"epoch": 1.5307716338644175,
"grad_norm": 0.033834848552942276,
"learning_rate": 1.5419433966630412e-05,
"step": 14700
},
{
"embedding_loss": 0.0028,
"epoch": 1.5359783401020515,
"grad_norm": 0.0170294102281332,
"learning_rate": 1.53962927822646e-05,
"step": 14750
},
{
"embedding_loss": 0.0059,
"epoch": 1.5411850463396854,
"grad_norm": 0.038527410477399826,
"learning_rate": 1.537315159789878e-05,
"step": 14800
},
{
"embedding_loss": 0.0026,
"epoch": 1.5463917525773194,
"grad_norm": 0.020393826067447662,
"learning_rate": 1.5350010413532965e-05,
"step": 14850
},
{
"embedding_loss": 0.0041,
"epoch": 1.5515984588149536,
"grad_norm": 0.08289851248264313,
"learning_rate": 1.5326869229167148e-05,
"step": 14900
},
{
"embedding_loss": 0.0044,
"epoch": 1.5568051650525878,
"grad_norm": 0.010838224552571774,
"learning_rate": 1.5303728044801335e-05,
"step": 14950
},
{
"embedding_loss": 0.003,
"epoch": 1.5620118712902218,
"grad_norm": 0.021554453298449516,
"learning_rate": 1.5280586860435518e-05,
"step": 15000
},
{
"embedding_loss": 0.0014,
"epoch": 1.5672185775278558,
"grad_norm": 0.21896220743656158,
"learning_rate": 1.5257445676069701e-05,
"step": 15050
},
{
"embedding_loss": 0.0059,
"epoch": 1.57242528376549,
"grad_norm": 3.4779744148254395,
"learning_rate": 1.5234304491703886e-05,
"step": 15100
},
{
"embedding_loss": 0.0024,
"epoch": 1.5776319900031242,
"grad_norm": 0.010911405086517334,
"learning_rate": 1.521116330733807e-05,
"step": 15150
},
{
"embedding_loss": 0.0055,
"epoch": 1.5828386962407581,
"grad_norm": 1.0184364318847656,
"learning_rate": 1.5188022122972255e-05,
"step": 15200
},
{
"embedding_loss": 0.0052,
"epoch": 1.588045402478392,
"grad_norm": 0.01177753321826458,
"learning_rate": 1.516488093860644e-05,
"step": 15250
},
{
"embedding_loss": 0.002,
"epoch": 1.5932521087160263,
"grad_norm": 0.024036038666963577,
"learning_rate": 1.5141739754240623e-05,
"step": 15300
},
{
"embedding_loss": 0.004,
"epoch": 1.5984588149536603,
"grad_norm": 0.015944767743349075,
"learning_rate": 1.5118598569874808e-05,
"step": 15350
},
{
"embedding_loss": 0.0023,
"epoch": 1.6036655211912945,
"grad_norm": 0.0119936503469944,
"learning_rate": 1.5095457385508991e-05,
"step": 15400
},
{
"embedding_loss": 0.0023,
"epoch": 1.6088722274289284,
"grad_norm": 0.1267576962709427,
"learning_rate": 1.5072316201143176e-05,
"step": 15450
},
{
"embedding_loss": 0.003,
"epoch": 1.6140789336665624,
"grad_norm": 0.004355051554739475,
"learning_rate": 1.504917501677736e-05,
"step": 15500
},
{
"embedding_loss": 0.0027,
"epoch": 1.6192856399041966,
"grad_norm": 0.0077704135328531265,
"learning_rate": 1.5026033832411544e-05,
"step": 15550
},
{
"embedding_loss": 0.0023,
"epoch": 1.6244923461418308,
"grad_norm": 0.06213510408997536,
"learning_rate": 1.5002892648045727e-05,
"step": 15600
},
{
"embedding_loss": 0.0044,
"epoch": 1.6296990523794648,
"grad_norm": 0.10908373445272446,
"learning_rate": 1.4979751463679912e-05,
"step": 15650
},
{
"embedding_loss": 0.0074,
"epoch": 1.6349057586170987,
"grad_norm": 0.008925637230277061,
"learning_rate": 1.4956610279314097e-05,
"step": 15700
},
{
"embedding_loss": 0.0029,
"epoch": 1.640112464854733,
"grad_norm": 0.023670511320233345,
"learning_rate": 1.493346909494828e-05,
"step": 15750
},
{
"embedding_loss": 0.0014,
"epoch": 1.645319171092367,
"grad_norm": 0.006442319136112928,
"learning_rate": 1.4910327910582466e-05,
"step": 15800
},
{
"embedding_loss": 0.0018,
"epoch": 1.6505258773300011,
"grad_norm": 0.013194055296480656,
"learning_rate": 1.4887186726216649e-05,
"step": 15850
},
{
"embedding_loss": 0.004,
"epoch": 1.655732583567635,
"grad_norm": 0.011845475062727928,
"learning_rate": 1.4864045541850834e-05,
"step": 15900
},
{
"embedding_loss": 0.0044,
"epoch": 1.660939289805269,
"grad_norm": 0.007666606921702623,
"learning_rate": 1.4840904357485017e-05,
"step": 15950
},
{
"embedding_loss": 0.0024,
"epoch": 1.6661459960429033,
"grad_norm": 0.016819607466459274,
"learning_rate": 1.4817763173119202e-05,
"step": 16000
},
{
"embedding_loss": 0.0031,
"epoch": 1.6713527022805375,
"grad_norm": 0.07455668598413467,
"learning_rate": 1.4794621988753385e-05,
"step": 16050
},
{
"embedding_loss": 0.0018,
"epoch": 1.6765594085181714,
"grad_norm": 0.04744337126612663,
"learning_rate": 1.477148080438757e-05,
"step": 16100
},
{
"embedding_loss": 0.0029,
"epoch": 1.6817661147558054,
"grad_norm": 0.008270618505775928,
"learning_rate": 1.4748339620021754e-05,
"step": 16150
},
{
"embedding_loss": 0.004,
"epoch": 1.6869728209934396,
"grad_norm": 0.007761300075799227,
"learning_rate": 1.4725198435655939e-05,
"step": 16200
},
{
"embedding_loss": 0.0028,
"epoch": 1.6921795272310738,
"grad_norm": 0.06050006300210953,
"learning_rate": 1.4702057251290123e-05,
"step": 16250
},
{
"embedding_loss": 0.0019,
"epoch": 1.6973862334687078,
"grad_norm": 0.019928568974137306,
"learning_rate": 1.4678916066924307e-05,
"step": 16300
},
{
"embedding_loss": 0.0021,
"epoch": 1.7025929397063417,
"grad_norm": 0.027616068720817566,
"learning_rate": 1.4655774882558492e-05,
"step": 16350
},
{
"embedding_loss": 0.0029,
"epoch": 1.7077996459439757,
"grad_norm": 0.37783312797546387,
"learning_rate": 1.4632633698192675e-05,
"step": 16400
},
{
"embedding_loss": 0.0014,
"epoch": 1.71300635218161,
"grad_norm": 0.8646184802055359,
"learning_rate": 1.460949251382686e-05,
"step": 16450
},
{
"embedding_loss": 0.0025,
"epoch": 1.718213058419244,
"grad_norm": 0.009249920025467873,
"learning_rate": 1.4586351329461043e-05,
"step": 16500
},
{
"embedding_loss": 0.0034,
"epoch": 1.723419764656878,
"grad_norm": 0.010544302873313427,
"learning_rate": 1.4563210145095228e-05,
"step": 16550
},
{
"embedding_loss": 0.0028,
"epoch": 1.728626470894512,
"grad_norm": 0.038693223148584366,
"learning_rate": 1.4540068960729411e-05,
"step": 16600
},
{
"embedding_loss": 0.0017,
"epoch": 1.7338331771321462,
"grad_norm": 0.018318980932235718,
"learning_rate": 1.4516927776363596e-05,
"step": 16650
},
{
"embedding_loss": 0.0045,
"epoch": 1.7390398833697804,
"grad_norm": 0.18338936567306519,
"learning_rate": 1.4493786591997781e-05,
"step": 16700
},
{
"embedding_loss": 0.0025,
"epoch": 1.7442465896074144,
"grad_norm": 0.029749080538749695,
"learning_rate": 1.4470645407631965e-05,
"step": 16750
},
{
"embedding_loss": 0.003,
"epoch": 1.7494532958450484,
"grad_norm": 0.09010512381792068,
"learning_rate": 1.444750422326615e-05,
"step": 16800
},
{
"embedding_loss": 0.003,
"epoch": 1.7546600020826824,
"grad_norm": 0.017163589596748352,
"learning_rate": 1.4424363038900333e-05,
"step": 16850
},
{
"embedding_loss": 0.0025,
"epoch": 1.7598667083203166,
"grad_norm": 0.028121547773480415,
"learning_rate": 1.4401221854534518e-05,
"step": 16900
},
{
"embedding_loss": 0.0016,
"epoch": 1.7650734145579507,
"grad_norm": 0.21652670204639435,
"learning_rate": 1.4378080670168701e-05,
"step": 16950
},
{
"embedding_loss": 0.0015,
"epoch": 1.7702801207955847,
"grad_norm": 0.03087479993700981,
"learning_rate": 1.4354939485802886e-05,
"step": 17000
},
{
"embedding_loss": 0.0035,
"epoch": 1.7754868270332187,
"grad_norm": 0.0054185641929507256,
"learning_rate": 1.4331798301437068e-05,
"step": 17050
},
{
"embedding_loss": 0.0014,
"epoch": 1.780693533270853,
"grad_norm": 0.0028866103384643793,
"learning_rate": 1.4308657117071253e-05,
"step": 17100
},
{
"embedding_loss": 0.003,
"epoch": 1.785900239508487,
"grad_norm": 0.058498039841651917,
"learning_rate": 1.4285515932705436e-05,
"step": 17150
},
{
"embedding_loss": 0.0065,
"epoch": 1.791106945746121,
"grad_norm": 0.28154024481773376,
"learning_rate": 1.426237474833962e-05,
"step": 17200
},
{
"embedding_loss": 0.003,
"epoch": 1.796313651983755,
"grad_norm": 0.01061001792550087,
"learning_rate": 1.4239233563973804e-05,
"step": 17250
},
{
"embedding_loss": 0.003,
"epoch": 1.801520358221389,
"grad_norm": 0.004344331566244364,
"learning_rate": 1.4216092379607989e-05,
"step": 17300
},
{
"embedding_loss": 0.0062,
"epoch": 1.8067270644590232,
"grad_norm": 1.790716528892517,
"learning_rate": 1.4192951195242172e-05,
"step": 17350
},
{
"embedding_loss": 0.0037,
"epoch": 1.8119337706966574,
"grad_norm": 0.042736802250146866,
"learning_rate": 1.4169810010876357e-05,
"step": 17400
},
{
"embedding_loss": 0.0047,
"epoch": 1.8171404769342914,
"grad_norm": 0.003962809685617685,
"learning_rate": 1.414666882651054e-05,
"step": 17450
},
{
"embedding_loss": 0.0023,
"epoch": 1.8223471831719253,
"grad_norm": 0.012670880183577538,
"learning_rate": 1.4123527642144725e-05,
"step": 17500
},
{
"embedding_loss": 0.0037,
"epoch": 1.8275538894095595,
"grad_norm": 0.005040524061769247,
"learning_rate": 1.410038645777891e-05,
"step": 17550
},
{
"embedding_loss": 0.0027,
"epoch": 1.8327605956471937,
"grad_norm": 0.36730483174324036,
"learning_rate": 1.4077245273413094e-05,
"step": 17600
},
{
"embedding_loss": 0.0034,
"epoch": 1.8379673018848277,
"grad_norm": 0.02946503274142742,
"learning_rate": 1.4054104089047279e-05,
"step": 17650
},
{
"embedding_loss": 0.0015,
"epoch": 1.8431740081224617,
"grad_norm": 0.013080528937280178,
"learning_rate": 1.4030962904681462e-05,
"step": 17700
},
{
"embedding_loss": 0.0012,
"epoch": 1.8483807143600957,
"grad_norm": 0.02603771910071373,
"learning_rate": 1.4007821720315647e-05,
"step": 17750
},
{
"embedding_loss": 0.0024,
"epoch": 1.8535874205977299,
"grad_norm": 0.2753530740737915,
"learning_rate": 1.398468053594983e-05,
"step": 17800
},
{
"embedding_loss": 0.0019,
"epoch": 1.858794126835364,
"grad_norm": 0.09671527147293091,
"learning_rate": 1.3961539351584015e-05,
"step": 17850
},
{
"embedding_loss": 0.004,
"epoch": 1.864000833072998,
"grad_norm": 0.00563651230186224,
"learning_rate": 1.3938398167218198e-05,
"step": 17900
},
{
"embedding_loss": 0.0047,
"epoch": 1.869207539310632,
"grad_norm": 0.013191591948270798,
"learning_rate": 1.3915256982852383e-05,
"step": 17950
},
{
"embedding_loss": 0.0031,
"epoch": 1.8744142455482662,
"grad_norm": 0.0058168028481304646,
"learning_rate": 1.3892115798486567e-05,
"step": 18000
},
{
"embedding_loss": 0.0028,
"epoch": 1.8796209517859004,
"grad_norm": 0.41721343994140625,
"learning_rate": 1.3868974614120751e-05,
"step": 18050
},
{
"embedding_loss": 0.0027,
"epoch": 1.8848276580235344,
"grad_norm": 0.19165031611919403,
"learning_rate": 1.3845833429754936e-05,
"step": 18100
},
{
"embedding_loss": 0.0038,
"epoch": 1.8900343642611683,
"grad_norm": 0.006406415719538927,
"learning_rate": 1.382269224538912e-05,
"step": 18150
},
{
"embedding_loss": 0.0034,
"epoch": 1.8952410704988023,
"grad_norm": 0.01080580148845911,
"learning_rate": 1.3799551061023305e-05,
"step": 18200
},
{
"embedding_loss": 0.0024,
"epoch": 1.9004477767364365,
"grad_norm": 0.006921404041349888,
"learning_rate": 1.3776409876657488e-05,
"step": 18250
},
{
"embedding_loss": 0.0033,
"epoch": 1.9056544829740707,
"grad_norm": 0.0030105006881058216,
"learning_rate": 1.3753268692291673e-05,
"step": 18300
},
{
"embedding_loss": 0.0022,
"epoch": 1.9108611892117047,
"grad_norm": 0.025791391730308533,
"learning_rate": 1.3730127507925856e-05,
"step": 18350
},
{
"embedding_loss": 0.0015,
"epoch": 1.9160678954493386,
"grad_norm": 0.0030609758105129004,
"learning_rate": 1.3706986323560041e-05,
"step": 18400
},
{
"embedding_loss": 0.0008,
"epoch": 1.9212746016869728,
"grad_norm": 0.5819743871688843,
"learning_rate": 1.3683845139194224e-05,
"step": 18450
},
{
"embedding_loss": 0.0011,
"epoch": 1.926481307924607,
"grad_norm": 0.015468656085431576,
"learning_rate": 1.366070395482841e-05,
"step": 18500
},
{
"embedding_loss": 0.0011,
"epoch": 1.931688014162241,
"grad_norm": 0.004252830985933542,
"learning_rate": 1.3637562770462594e-05,
"step": 18550
},
{
"embedding_loss": 0.0035,
"epoch": 1.936894720399875,
"grad_norm": 0.008880583569407463,
"learning_rate": 1.3614421586096778e-05,
"step": 18600
},
{
"embedding_loss": 0.0018,
"epoch": 1.942101426637509,
"grad_norm": 0.007954990491271019,
"learning_rate": 1.3591280401730962e-05,
"step": 18650
},
{
"embedding_loss": 0.0029,
"epoch": 1.9473081328751431,
"grad_norm": 0.014845364727079868,
"learning_rate": 1.3568139217365146e-05,
"step": 18700
},
{
"embedding_loss": 0.0033,
"epoch": 1.9525148391127773,
"grad_norm": 0.004382742568850517,
"learning_rate": 1.354499803299933e-05,
"step": 18750
},
{
"embedding_loss": 0.0007,
"epoch": 1.9577215453504113,
"grad_norm": 0.2789106070995331,
"learning_rate": 1.3521856848633514e-05,
"step": 18800
},
{
"embedding_loss": 0.0019,
"epoch": 1.9629282515880453,
"grad_norm": 0.00724539440125227,
"learning_rate": 1.3498715664267699e-05,
"step": 18850
},
{
"embedding_loss": 0.0044,
"epoch": 1.9681349578256795,
"grad_norm": 0.05976763367652893,
"learning_rate": 1.3475574479901882e-05,
"step": 18900
},
{
"embedding_loss": 0.0011,
"epoch": 1.9733416640633137,
"grad_norm": 0.018617313355207443,
"learning_rate": 1.3452433295536067e-05,
"step": 18950
},
{
"embedding_loss": 0.0037,
"epoch": 1.9785483703009477,
"grad_norm": 0.07279914617538452,
"learning_rate": 1.342929211117025e-05,
"step": 19000
},
{
"embedding_loss": 0.0003,
"epoch": 1.9837550765385816,
"grad_norm": 0.005604149773716927,
"learning_rate": 1.3406150926804435e-05,
"step": 19050
},
{
"embedding_loss": 0.002,
"epoch": 1.9889617827762156,
"grad_norm": 0.4676770865917206,
"learning_rate": 1.338300974243862e-05,
"step": 19100
},
{
"embedding_loss": 0.0024,
"epoch": 1.9941684890138498,
"grad_norm": 0.006381129380315542,
"learning_rate": 1.3359868558072804e-05,
"step": 19150
},
{
"embedding_loss": 0.0022,
"epoch": 1.999375195251484,
"grad_norm": 0.44813236594200134,
"learning_rate": 1.3336727373706989e-05,
"step": 19200
},
{
"embedding_loss": 0.0017,
"epoch": 2.004581901489118,
"grad_norm": 0.01616285741329193,
"learning_rate": 1.3313586189341172e-05,
"step": 19250
},
{
"embedding_loss": 0.0013,
"epoch": 2.009788607726752,
"grad_norm": 0.006148567423224449,
"learning_rate": 1.3290445004975357e-05,
"step": 19300
},
{
"embedding_loss": 0.0028,
"epoch": 2.014995313964386,
"grad_norm": 0.009615874849259853,
"learning_rate": 1.326730382060954e-05,
"step": 19350
},
{
"embedding_loss": 0.001,
"epoch": 2.0202020202020203,
"grad_norm": 0.004251678008586168,
"learning_rate": 1.3244162636243725e-05,
"step": 19400
},
{
"embedding_loss": 0.0024,
"epoch": 2.0254087264396543,
"grad_norm": 0.008113077841699123,
"learning_rate": 1.3221021451877908e-05,
"step": 19450
},
{
"embedding_loss": 0.0012,
"epoch": 2.0306154326772883,
"grad_norm": 0.02726900391280651,
"learning_rate": 1.3197880267512093e-05,
"step": 19500
},
{
"embedding_loss": 0.0023,
"epoch": 2.0358221389149223,
"grad_norm": 0.00499620521441102,
"learning_rate": 1.3174739083146278e-05,
"step": 19550
},
{
"embedding_loss": 0.002,
"epoch": 2.0410288451525567,
"grad_norm": 1.2157723903656006,
"learning_rate": 1.3151597898780461e-05,
"step": 19600
},
{
"embedding_loss": 0.0031,
"epoch": 2.0462355513901906,
"grad_norm": 0.08977110683917999,
"learning_rate": 1.3128456714414646e-05,
"step": 19650
},
{
"embedding_loss": 0.0017,
"epoch": 2.0514422576278246,
"grad_norm": 0.05430648848414421,
"learning_rate": 1.310531553004883e-05,
"step": 19700
},
{
"embedding_loss": 0.0014,
"epoch": 2.0566489638654586,
"grad_norm": 0.01022451464086771,
"learning_rate": 1.3082174345683015e-05,
"step": 19750
},
{
"embedding_loss": 0.0015,
"epoch": 2.0618556701030926,
"grad_norm": 0.00965672917664051,
"learning_rate": 1.3059033161317198e-05,
"step": 19800
},
{
"embedding_loss": 0.0021,
"epoch": 2.067062376340727,
"grad_norm": 0.005539502017199993,
"learning_rate": 1.3035891976951383e-05,
"step": 19850
},
{
"embedding_loss": 0.0021,
"epoch": 2.072269082578361,
"grad_norm": 0.006059055682271719,
"learning_rate": 1.3012750792585564e-05,
"step": 19900
},
{
"embedding_loss": 0.0032,
"epoch": 2.077475788815995,
"grad_norm": 0.14464695751667023,
"learning_rate": 1.298960960821975e-05,
"step": 19950
},
{
"embedding_loss": 0.0017,
"epoch": 2.082682495053629,
"grad_norm": 0.011897514574229717,
"learning_rate": 1.2966468423853933e-05,
"step": 20000
},
{
"embedding_loss": 0.0009,
"epoch": 2.0878892012912633,
"grad_norm": 0.0040528737008571625,
"learning_rate": 1.2943327239488118e-05,
"step": 20050
},
{
"embedding_loss": 0.0016,
"epoch": 2.0930959075288973,
"grad_norm": 0.007819181308150291,
"learning_rate": 1.2920186055122301e-05,
"step": 20100
},
{
"embedding_loss": 0.0018,
"epoch": 2.0983026137665313,
"grad_norm": 0.013666506856679916,
"learning_rate": 1.2897044870756486e-05,
"step": 20150
},
{
"embedding_loss": 0.0057,
"epoch": 2.1035093200041652,
"grad_norm": 0.010328873060643673,
"learning_rate": 1.2873903686390669e-05,
"step": 20200
},
{
"embedding_loss": 0.0014,
"epoch": 2.108716026241799,
"grad_norm": 0.019933296367526054,
"learning_rate": 1.2850762502024854e-05,
"step": 20250
},
{
"embedding_loss": 0.0022,
"epoch": 2.1139227324794336,
"grad_norm": 0.007374211680144072,
"learning_rate": 1.2827621317659037e-05,
"step": 20300
},
{
"embedding_loss": 0.0037,
"epoch": 2.1191294387170676,
"grad_norm": 0.012251504696905613,
"learning_rate": 1.2804480133293222e-05,
"step": 20350
},
{
"embedding_loss": 0.0011,
"epoch": 2.1243361449547016,
"grad_norm": 0.005697314627468586,
"learning_rate": 1.2781338948927407e-05,
"step": 20400
},
{
"embedding_loss": 0.0016,
"epoch": 2.1295428511923356,
"grad_norm": 0.002244447823613882,
"learning_rate": 1.275819776456159e-05,
"step": 20450
},
{
"embedding_loss": 0.0013,
"epoch": 2.13474955742997,
"grad_norm": 0.015698591247200966,
"learning_rate": 1.2735056580195775e-05,
"step": 20500
},
{
"embedding_loss": 0.0013,
"epoch": 2.139956263667604,
"grad_norm": 0.01356748677790165,
"learning_rate": 1.2711915395829959e-05,
"step": 20550
},
{
"embedding_loss": 0.0013,
"epoch": 2.145162969905238,
"grad_norm": 0.008849513716995716,
"learning_rate": 1.2688774211464144e-05,
"step": 20600
},
{
"embedding_loss": 0.0037,
"epoch": 2.150369676142872,
"grad_norm": 0.6928774118423462,
"learning_rate": 1.2665633027098327e-05,
"step": 20650
},
{
"embedding_loss": 0.0039,
"epoch": 2.155576382380506,
"grad_norm": 0.03237714618444443,
"learning_rate": 1.2642491842732512e-05,
"step": 20700
},
{
"embedding_loss": 0.0037,
"epoch": 2.1607830886181403,
"grad_norm": 0.0030646566301584244,
"learning_rate": 1.2619350658366695e-05,
"step": 20750
},
{
"embedding_loss": 0.0018,
"epoch": 2.1659897948557743,
"grad_norm": 0.011956333182752132,
"learning_rate": 1.259620947400088e-05,
"step": 20800
},
{
"embedding_loss": 0.0013,
"epoch": 2.1711965010934082,
"grad_norm": 0.007671385072171688,
"learning_rate": 1.2573068289635063e-05,
"step": 20850
},
{
"embedding_loss": 0.0017,
"epoch": 2.176403207331042,
"grad_norm": 0.010090204887092113,
"learning_rate": 1.2549927105269248e-05,
"step": 20900
},
{
"embedding_loss": 0.0012,
"epoch": 2.1816099135686766,
"grad_norm": 0.010057215578854084,
"learning_rate": 1.2526785920903433e-05,
"step": 20950
},
{
"embedding_loss": 0.0018,
"epoch": 2.1868166198063106,
"grad_norm": 0.009716392494738102,
"learning_rate": 1.2503644736537617e-05,
"step": 21000
},
{
"embedding_loss": 0.0019,
"epoch": 2.1920233260439446,
"grad_norm": 0.003773706266656518,
"learning_rate": 1.2480503552171802e-05,
"step": 21050
},
{
"embedding_loss": 0.002,
"epoch": 2.1972300322815785,
"grad_norm": 0.004189903382211924,
"learning_rate": 1.2457362367805985e-05,
"step": 21100
},
{
"embedding_loss": 0.0009,
"epoch": 2.2024367385192125,
"grad_norm": 0.005080494098365307,
"learning_rate": 1.243422118344017e-05,
"step": 21150
},
{
"embedding_loss": 0.0008,
"epoch": 2.207643444756847,
"grad_norm": 0.0064069656655192375,
"learning_rate": 1.2411079999074353e-05,
"step": 21200
},
{
"embedding_loss": 0.0011,
"epoch": 2.212850150994481,
"grad_norm": 0.0846613198518753,
"learning_rate": 1.2387938814708538e-05,
"step": 21250
},
{
"embedding_loss": 0.0015,
"epoch": 2.218056857232115,
"grad_norm": 0.004274032544344664,
"learning_rate": 1.2364797630342721e-05,
"step": 21300
},
{
"embedding_loss": 0.0007,
"epoch": 2.223263563469749,
"grad_norm": 0.006647061090916395,
"learning_rate": 1.2341656445976906e-05,
"step": 21350
},
{
"embedding_loss": 0.0012,
"epoch": 2.2284702697073833,
"grad_norm": 0.1389550268650055,
"learning_rate": 1.2318515261611091e-05,
"step": 21400
},
{
"embedding_loss": 0.0015,
"epoch": 2.2336769759450172,
"grad_norm": 0.00890056136995554,
"learning_rate": 1.2295374077245274e-05,
"step": 21450
},
{
"embedding_loss": 0.0017,
"epoch": 2.238883682182651,
"grad_norm": 0.022632068023085594,
"learning_rate": 1.227223289287946e-05,
"step": 21500
},
{
"embedding_loss": 0.0008,
"epoch": 2.244090388420285,
"grad_norm": 0.007279681041836739,
"learning_rate": 1.2249091708513643e-05,
"step": 21550
},
{
"embedding_loss": 0.0008,
"epoch": 2.2492970946579196,
"grad_norm": 0.010247277095913887,
"learning_rate": 1.2225950524147828e-05,
"step": 21600
},
{
"embedding_loss": 0.0025,
"epoch": 2.2545038008955536,
"grad_norm": 0.004706698004156351,
"learning_rate": 1.2202809339782011e-05,
"step": 21650
},
{
"embedding_loss": 0.0022,
"epoch": 2.2597105071331876,
"grad_norm": 0.031804159283638,
"learning_rate": 1.2179668155416196e-05,
"step": 21700
},
{
"embedding_loss": 0.001,
"epoch": 2.2649172133708215,
"grad_norm": 0.021003112196922302,
"learning_rate": 1.2156526971050379e-05,
"step": 21750
},
{
"embedding_loss": 0.0017,
"epoch": 2.2701239196084555,
"grad_norm": 0.003928271122276783,
"learning_rate": 1.2133385786684564e-05,
"step": 21800
},
{
"embedding_loss": 0.0038,
"epoch": 2.27533062584609,
"grad_norm": 0.9323834180831909,
"learning_rate": 1.2110244602318747e-05,
"step": 21850
},
{
"embedding_loss": 0.0011,
"epoch": 2.280537332083724,
"grad_norm": 0.010309775359928608,
"learning_rate": 1.2087103417952932e-05,
"step": 21900
},
{
"embedding_loss": 0.0003,
"epoch": 2.285744038321358,
"grad_norm": 0.008217355236411095,
"learning_rate": 1.2063962233587117e-05,
"step": 21950
},
{
"embedding_loss": 0.0016,
"epoch": 2.290950744558992,
"grad_norm": 0.013672198168933392,
"learning_rate": 1.20408210492213e-05,
"step": 22000
},
{
"embedding_loss": 0.0022,
"epoch": 2.296157450796626,
"grad_norm": 0.1977008581161499,
"learning_rate": 1.2017679864855485e-05,
"step": 22050
},
{
"embedding_loss": 0.0011,
"epoch": 2.3013641570342602,
"grad_norm": 0.006241293158382177,
"learning_rate": 1.1994538680489669e-05,
"step": 22100
},
{
"embedding_loss": 0.0011,
"epoch": 2.306570863271894,
"grad_norm": 0.0057389759458601475,
"learning_rate": 1.1971397496123854e-05,
"step": 22150
},
{
"embedding_loss": 0.0011,
"epoch": 2.311777569509528,
"grad_norm": 0.006034619640558958,
"learning_rate": 1.1948256311758037e-05,
"step": 22200
},
{
"embedding_loss": 0.0021,
"epoch": 2.3169842757471626,
"grad_norm": 0.3582187592983246,
"learning_rate": 1.1925115127392222e-05,
"step": 22250
},
{
"embedding_loss": 0.0018,
"epoch": 2.3221909819847966,
"grad_norm": 0.003342969575896859,
"learning_rate": 1.1901973943026405e-05,
"step": 22300
},
{
"embedding_loss": 0.0021,
"epoch": 2.3273976882224305,
"grad_norm": 0.017463702708482742,
"learning_rate": 1.187883275866059e-05,
"step": 22350
},
{
"embedding_loss": 0.0006,
"epoch": 2.3326043944600645,
"grad_norm": 0.005371089559048414,
"learning_rate": 1.1855691574294775e-05,
"step": 22400
},
{
"embedding_loss": 0.004,
"epoch": 2.3378111006976985,
"grad_norm": 0.005444334354251623,
"learning_rate": 1.1832550389928958e-05,
"step": 22450
},
{
"embedding_loss": 0.001,
"epoch": 2.3430178069353325,
"grad_norm": 0.00549267278984189,
"learning_rate": 1.1809409205563143e-05,
"step": 22500
},
{
"embedding_loss": 0.002,
"epoch": 2.348224513172967,
"grad_norm": 0.009904368780553341,
"learning_rate": 1.1786268021197327e-05,
"step": 22550
},
{
"embedding_loss": 0.0021,
"epoch": 2.353431219410601,
"grad_norm": 0.004460447933524847,
"learning_rate": 1.1763126836831512e-05,
"step": 22600
},
{
"embedding_loss": 0.0017,
"epoch": 2.358637925648235,
"grad_norm": 0.012372348457574844,
"learning_rate": 1.1739985652465695e-05,
"step": 22650
},
{
"embedding_loss": 0.0015,
"epoch": 2.3638446318858692,
"grad_norm": 0.007495572324842215,
"learning_rate": 1.171684446809988e-05,
"step": 22700
},
{
"embedding_loss": 0.0011,
"epoch": 2.369051338123503,
"grad_norm": 0.014190604910254478,
"learning_rate": 1.1693703283734061e-05,
"step": 22750
},
{
"embedding_loss": 0.0008,
"epoch": 2.374258044361137,
"grad_norm": 0.4924188256263733,
"learning_rate": 1.1670562099368246e-05,
"step": 22800
},
{
"embedding_loss": 0.001,
"epoch": 2.379464750598771,
"grad_norm": 0.013879277743399143,
"learning_rate": 1.164742091500243e-05,
"step": 22850
},
{
"embedding_loss": 0.0004,
"epoch": 2.384671456836405,
"grad_norm": 0.02071734145283699,
"learning_rate": 1.1624279730636614e-05,
"step": 22900
},
{
"embedding_loss": 0.0005,
"epoch": 2.3898781630740396,
"grad_norm": 0.004272214137017727,
"learning_rate": 1.1601138546270798e-05,
"step": 22950
},
{
"embedding_loss": 0.0018,
"epoch": 2.3950848693116735,
"grad_norm": 0.002411644207313657,
"learning_rate": 1.1577997361904983e-05,
"step": 23000
},
{
"embedding_loss": 0.001,
"epoch": 2.4002915755493075,
"grad_norm": 0.019449541345238686,
"learning_rate": 1.1554856177539166e-05,
"step": 23050
},
{
"embedding_loss": 0.0009,
"epoch": 2.4054982817869415,
"grad_norm": 0.02527959644794464,
"learning_rate": 1.1531714993173351e-05,
"step": 23100
},
{
"embedding_loss": 0.0024,
"epoch": 2.410704988024576,
"grad_norm": 0.16354507207870483,
"learning_rate": 1.1508573808807534e-05,
"step": 23150
},
{
"embedding_loss": 0.0014,
"epoch": 2.41591169426221,
"grad_norm": 0.017682882025837898,
"learning_rate": 1.148543262444172e-05,
"step": 23200
},
{
"embedding_loss": 0.0013,
"epoch": 2.421118400499844,
"grad_norm": 0.0050527737475931644,
"learning_rate": 1.1462291440075904e-05,
"step": 23250
},
{
"embedding_loss": 0.0013,
"epoch": 2.426325106737478,
"grad_norm": 0.0023584417067468166,
"learning_rate": 1.1439150255710087e-05,
"step": 23300
},
{
"embedding_loss": 0.0003,
"epoch": 2.431531812975112,
"grad_norm": 0.3315781354904175,
"learning_rate": 1.1416009071344272e-05,
"step": 23350
},
{
"embedding_loss": 0.0003,
"epoch": 2.436738519212746,
"grad_norm": 0.003767622634768486,
"learning_rate": 1.1392867886978456e-05,
"step": 23400
},
{
"embedding_loss": 0.0013,
"epoch": 2.44194522545038,
"grad_norm": 0.06164936348795891,
"learning_rate": 1.136972670261264e-05,
"step": 23450
},
{
"embedding_loss": 0.0021,
"epoch": 2.447151931688014,
"grad_norm": 0.014987274073064327,
"learning_rate": 1.1346585518246824e-05,
"step": 23500
},
{
"embedding_loss": 0.0025,
"epoch": 2.452358637925648,
"grad_norm": 0.014723850414156914,
"learning_rate": 1.1323444333881009e-05,
"step": 23550
},
{
"embedding_loss": 0.0018,
"epoch": 2.4575653441632825,
"grad_norm": 0.007753140293061733,
"learning_rate": 1.1300303149515192e-05,
"step": 23600
},
{
"embedding_loss": 0.0021,
"epoch": 2.4627720504009165,
"grad_norm": 0.11420779675245285,
"learning_rate": 1.1277161965149377e-05,
"step": 23650
},
{
"embedding_loss": 0.0009,
"epoch": 2.4679787566385505,
"grad_norm": 0.0015545577043667436,
"learning_rate": 1.125402078078356e-05,
"step": 23700
},
{
"embedding_loss": 0.0015,
"epoch": 2.4731854628761845,
"grad_norm": 0.002739744959399104,
"learning_rate": 1.1230879596417745e-05,
"step": 23750
},
{
"embedding_loss": 0.0012,
"epoch": 2.4783921691138184,
"grad_norm": 0.14792239665985107,
"learning_rate": 1.120773841205193e-05,
"step": 23800
},
{
"embedding_loss": 0.0009,
"epoch": 2.483598875351453,
"grad_norm": 0.016194604337215424,
"learning_rate": 1.1184597227686113e-05,
"step": 23850
},
{
"embedding_loss": 0.0011,
"epoch": 2.488805581589087,
"grad_norm": 0.0649636909365654,
"learning_rate": 1.1161456043320298e-05,
"step": 23900
},
{
"embedding_loss": 0.0006,
"epoch": 2.494012287826721,
"grad_norm": 0.005290072411298752,
"learning_rate": 1.1138314858954482e-05,
"step": 23950
},
{
"embedding_loss": 0.0005,
"epoch": 2.4992189940643548,
"grad_norm": 0.010143323801457882,
"learning_rate": 1.1115173674588667e-05,
"step": 24000
},
{
"embedding_loss": 0.0016,
"epoch": 2.504425700301989,
"grad_norm": 0.00270524388179183,
"learning_rate": 1.109203249022285e-05,
"step": 24050
},
{
"embedding_loss": 0.0021,
"epoch": 2.509632406539623,
"grad_norm": 0.0045821997337043285,
"learning_rate": 1.1068891305857035e-05,
"step": 24100
},
{
"embedding_loss": 0.0022,
"epoch": 2.514839112777257,
"grad_norm": 0.003760270308703184,
"learning_rate": 1.1045750121491218e-05,
"step": 24150
},
{
"embedding_loss": 0.0037,
"epoch": 2.520045819014891,
"grad_norm": 0.08812420815229416,
"learning_rate": 1.1022608937125403e-05,
"step": 24200
},
{
"embedding_loss": 0.0018,
"epoch": 2.525252525252525,
"grad_norm": 0.27958598732948303,
"learning_rate": 1.0999467752759588e-05,
"step": 24250
},
{
"embedding_loss": 0.0014,
"epoch": 2.530459231490159,
"grad_norm": 0.004292377736419439,
"learning_rate": 1.0976326568393771e-05,
"step": 24300
},
{
"embedding_loss": 0.001,
"epoch": 2.5356659377277935,
"grad_norm": 0.21659308671951294,
"learning_rate": 1.0953185384027956e-05,
"step": 24350
},
{
"embedding_loss": 0.0009,
"epoch": 2.5408726439654274,
"grad_norm": 0.005312615539878607,
"learning_rate": 1.093004419966214e-05,
"step": 24400
},
{
"embedding_loss": 0.0003,
"epoch": 2.5460793502030614,
"grad_norm": 0.003285923507064581,
"learning_rate": 1.0906903015296324e-05,
"step": 24450
},
{
"embedding_loss": 0.0008,
"epoch": 2.551286056440696,
"grad_norm": 0.00929224118590355,
"learning_rate": 1.0883761830930508e-05,
"step": 24500
},
{
"embedding_loss": 0.0008,
"epoch": 2.55649276267833,
"grad_norm": 0.003572756890207529,
"learning_rate": 1.0860620646564693e-05,
"step": 24550
},
{
"embedding_loss": 0.0019,
"epoch": 2.561699468915964,
"grad_norm": 0.002889364492148161,
"learning_rate": 1.0837479462198876e-05,
"step": 24600
},
{
"embedding_loss": 0.002,
"epoch": 2.5669061751535978,
"grad_norm": 0.02578425034880638,
"learning_rate": 1.0814338277833061e-05,
"step": 24650
},
{
"embedding_loss": 0.0011,
"epoch": 2.5721128813912317,
"grad_norm": 0.010893690399825573,
"learning_rate": 1.0791197093467244e-05,
"step": 24700
},
{
"embedding_loss": 0.0006,
"epoch": 2.5773195876288657,
"grad_norm": 0.005054382607340813,
"learning_rate": 1.0768055909101429e-05,
"step": 24750
},
{
"embedding_loss": 0.0017,
"epoch": 2.5825262938665,
"grad_norm": 0.09210974723100662,
"learning_rate": 1.0744914724735614e-05,
"step": 24800
},
{
"embedding_loss": 0.0013,
"epoch": 2.587733000104134,
"grad_norm": 0.007207928225398064,
"learning_rate": 1.0721773540369797e-05,
"step": 24850
},
{
"embedding_loss": 0.0018,
"epoch": 2.592939706341768,
"grad_norm": 0.005150509066879749,
"learning_rate": 1.0698632356003982e-05,
"step": 24900
},
{
"embedding_loss": 0.0021,
"epoch": 2.5981464125794025,
"grad_norm": 0.007632564753293991,
"learning_rate": 1.0675491171638166e-05,
"step": 24950
},
{
"embedding_loss": 0.0018,
"epoch": 2.6033531188170365,
"grad_norm": 0.004092794377356768,
"learning_rate": 1.065234998727235e-05,
"step": 25000
},
{
"embedding_loss": 0.0027,
"epoch": 2.6085598250546704,
"grad_norm": 0.013759996742010117,
"learning_rate": 1.0629208802906534e-05,
"step": 25050
},
{
"embedding_loss": 0.0003,
"epoch": 2.6137665312923044,
"grad_norm": 0.004917910788208246,
"learning_rate": 1.0606067618540719e-05,
"step": 25100
},
{
"embedding_loss": 0.002,
"epoch": 2.6189732375299384,
"grad_norm": 0.01083595585078001,
"learning_rate": 1.0582926434174902e-05,
"step": 25150
},
{
"embedding_loss": 0.0017,
"epoch": 2.624179943767573,
"grad_norm": 0.3743145167827606,
"learning_rate": 1.0559785249809087e-05,
"step": 25200
},
{
"embedding_loss": 0.0008,
"epoch": 2.6293866500052068,
"grad_norm": 0.003947914578020573,
"learning_rate": 1.0536644065443272e-05,
"step": 25250
},
{
"embedding_loss": 0.0005,
"epoch": 2.6345933562428407,
"grad_norm": 0.0035067517310380936,
"learning_rate": 1.0513502881077455e-05,
"step": 25300
},
{
"embedding_loss": 0.0022,
"epoch": 2.6398000624804747,
"grad_norm": 0.021643230691552162,
"learning_rate": 1.049036169671164e-05,
"step": 25350
},
{
"embedding_loss": 0.0006,
"epoch": 2.645006768718109,
"grad_norm": 0.016041336581110954,
"learning_rate": 1.0467220512345823e-05,
"step": 25400
},
{
"embedding_loss": 0.0017,
"epoch": 2.650213474955743,
"grad_norm": 0.0026006808038800955,
"learning_rate": 1.0444079327980008e-05,
"step": 25450
},
{
"embedding_loss": 0.0011,
"epoch": 2.655420181193377,
"grad_norm": 0.006043120287358761,
"learning_rate": 1.0420938143614192e-05,
"step": 25500
},
{
"embedding_loss": 0.0009,
"epoch": 2.660626887431011,
"grad_norm": 0.007799636106938124,
"learning_rate": 1.0397796959248377e-05,
"step": 25550
},
{
"embedding_loss": 0.001,
"epoch": 2.665833593668645,
"grad_norm": 0.0032333596609532833,
"learning_rate": 1.0374655774882558e-05,
"step": 25600
},
{
"embedding_loss": 0.0022,
"epoch": 2.6710402999062794,
"grad_norm": 0.006502605974674225,
"learning_rate": 1.0351514590516743e-05,
"step": 25650
},
{
"embedding_loss": 0.001,
"epoch": 2.6762470061439134,
"grad_norm": 0.002495839726179838,
"learning_rate": 1.0328373406150926e-05,
"step": 25700
},
{
"embedding_loss": 0.0009,
"epoch": 2.6814537123815474,
"grad_norm": 0.011339404620230198,
"learning_rate": 1.0305232221785111e-05,
"step": 25750
},
{
"embedding_loss": 0.0026,
"epoch": 2.6866604186191814,
"grad_norm": 0.0049376110546290874,
"learning_rate": 1.0282091037419295e-05,
"step": 25800
},
{
"embedding_loss": 0.0021,
"epoch": 2.691867124856816,
"grad_norm": 0.003491663606837392,
"learning_rate": 1.025894985305348e-05,
"step": 25850
},
{
"embedding_loss": 0.001,
"epoch": 2.6970738310944498,
"grad_norm": 0.06203962489962578,
"learning_rate": 1.0235808668687663e-05,
"step": 25900
},
{
"embedding_loss": 0.0014,
"epoch": 2.7022805373320837,
"grad_norm": 0.0038036692421883345,
"learning_rate": 1.0212667484321848e-05,
"step": 25950
},
{
"embedding_loss": 0.0017,
"epoch": 2.7074872435697177,
"grad_norm": 0.013710272498428822,
"learning_rate": 1.0189526299956031e-05,
"step": 26000
},
{
"embedding_loss": 0.0016,
"epoch": 2.7126939498073517,
"grad_norm": 0.003352423897013068,
"learning_rate": 1.0166385115590216e-05,
"step": 26050
},
{
"embedding_loss": 0.0003,
"epoch": 2.717900656044986,
"grad_norm": 0.004212658852338791,
"learning_rate": 1.0143243931224401e-05,
"step": 26100
},
{
"embedding_loss": 0.0014,
"epoch": 2.72310736228262,
"grad_norm": 0.05683301389217377,
"learning_rate": 1.0120102746858584e-05,
"step": 26150
},
{
"embedding_loss": 0.0004,
"epoch": 2.728314068520254,
"grad_norm": 0.008711031638085842,
"learning_rate": 1.009696156249277e-05,
"step": 26200
},
{
"embedding_loss": 0.0019,
"epoch": 2.733520774757888,
"grad_norm": 0.007152698002755642,
"learning_rate": 1.0073820378126952e-05,
"step": 26250
},
{
"embedding_loss": 0.0011,
"epoch": 2.7387274809955224,
"grad_norm": 0.003356023458763957,
"learning_rate": 1.0050679193761137e-05,
"step": 26300
},
{
"embedding_loss": 0.0012,
"epoch": 2.7439341872331564,
"grad_norm": 0.016632454469799995,
"learning_rate": 1.002753800939532e-05,
"step": 26350
},
{
"embedding_loss": 0.0015,
"epoch": 2.7491408934707904,
"grad_norm": 0.5349009037017822,
"learning_rate": 1.0004396825029506e-05,
"step": 26400
},
{
"embedding_loss": 0.0009,
"epoch": 2.7543475997084244,
"grad_norm": 0.004687939304858446,
"learning_rate": 9.981255640663689e-06,
"step": 26450
},
{
"embedding_loss": 0.0002,
"epoch": 2.7595543059460583,
"grad_norm": 0.0024527718778699636,
"learning_rate": 9.958114456297874e-06,
"step": 26500
},
{
"embedding_loss": 0.0018,
"epoch": 2.7647610121836927,
"grad_norm": 0.08971556276082993,
"learning_rate": 9.934973271932057e-06,
"step": 26550
},
{
"embedding_loss": 0.001,
"epoch": 2.7699677184213267,
"grad_norm": 0.00650964817032218,
"learning_rate": 9.911832087566242e-06,
"step": 26600
},
{
"embedding_loss": 0.0019,
"epoch": 2.7751744246589607,
"grad_norm": 0.0036748195998370647,
"learning_rate": 9.888690903200427e-06,
"step": 26650
},
{
"embedding_loss": 0.0007,
"epoch": 2.7803811308965947,
"grad_norm": 0.003135056234896183,
"learning_rate": 9.86554971883461e-06,
"step": 26700
},
{
"embedding_loss": 0.0008,
"epoch": 2.785587837134229,
"grad_norm": 0.0061689745634794235,
"learning_rate": 9.842408534468795e-06,
"step": 26750
},
{
"embedding_loss": 0.0015,
"epoch": 2.790794543371863,
"grad_norm": 0.0035342394839972258,
"learning_rate": 9.819267350102979e-06,
"step": 26800
},
{
"embedding_loss": 0.0023,
"epoch": 2.796001249609497,
"grad_norm": 0.08970965445041656,
"learning_rate": 9.796126165737164e-06,
"step": 26850
},
{
"embedding_loss": 0.0005,
"epoch": 2.801207955847131,
"grad_norm": 0.002801700960844755,
"learning_rate": 9.772984981371347e-06,
"step": 26900
},
{
"embedding_loss": 0.0014,
"epoch": 2.806414662084765,
"grad_norm": 0.06024768948554993,
"learning_rate": 9.749843797005532e-06,
"step": 26950
},
{
"embedding_loss": 0.0014,
"epoch": 2.8116213683223994,
"grad_norm": 0.003583586309105158,
"learning_rate": 9.726702612639715e-06,
"step": 27000
},
{
"embedding_loss": 0.0008,
"epoch": 2.8168280745600334,
"grad_norm": 0.022347550839185715,
"learning_rate": 9.7035614282739e-06,
"step": 27050
},
{
"embedding_loss": 0.0016,
"epoch": 2.8220347807976673,
"grad_norm": 0.0020712248515337706,
"learning_rate": 9.680420243908085e-06,
"step": 27100
},
{
"embedding_loss": 0.0024,
"epoch": 2.8272414870353013,
"grad_norm": 0.00296349311247468,
"learning_rate": 9.657279059542268e-06,
"step": 27150
},
{
"embedding_loss": 0.001,
"epoch": 2.8324481932729357,
"grad_norm": 0.004647277761250734,
"learning_rate": 9.634137875176453e-06,
"step": 27200
},
{
"embedding_loss": 0.0024,
"epoch": 2.8376548995105697,
"grad_norm": 0.09574100375175476,
"learning_rate": 9.610996690810636e-06,
"step": 27250
},
{
"embedding_loss": 0.0005,
"epoch": 2.8428616057482037,
"grad_norm": 0.002023301785811782,
"learning_rate": 9.587855506444821e-06,
"step": 27300
},
{
"embedding_loss": 0.0001,
"epoch": 2.8480683119858377,
"grad_norm": 0.0016001787735149264,
"learning_rate": 9.564714322079005e-06,
"step": 27350
},
{
"embedding_loss": 0.0021,
"epoch": 2.8532750182234716,
"grad_norm": 0.02564910613000393,
"learning_rate": 9.54157313771319e-06,
"step": 27400
},
{
"embedding_loss": 0.0016,
"epoch": 2.858481724461106,
"grad_norm": 0.004294661805033684,
"learning_rate": 9.518431953347373e-06,
"step": 27450
},
{
"embedding_loss": 0.0011,
"epoch": 2.86368843069874,
"grad_norm": 0.02338520810008049,
"learning_rate": 9.495290768981558e-06,
"step": 27500
},
{
"embedding_loss": 0.0018,
"epoch": 2.868895136936374,
"grad_norm": 0.040758974850177765,
"learning_rate": 9.472149584615741e-06,
"step": 27550
},
{
"embedding_loss": 0.0008,
"epoch": 2.874101843174008,
"grad_norm": 0.008010084740817547,
"learning_rate": 9.449008400249926e-06,
"step": 27600
},
{
"embedding_loss": 0.0023,
"epoch": 2.8793085494116424,
"grad_norm": 0.007018416654318571,
"learning_rate": 9.425867215884111e-06,
"step": 27650
},
{
"embedding_loss": 0.0009,
"epoch": 2.8845152556492764,
"grad_norm": 0.010360274463891983,
"learning_rate": 9.402726031518293e-06,
"step": 27700
},
{
"embedding_loss": 0.0002,
"epoch": 2.8897219618869103,
"grad_norm": 0.007200753781944513,
"learning_rate": 9.379584847152478e-06,
"step": 27750
},
{
"embedding_loss": 0.0013,
"epoch": 2.8949286681245443,
"grad_norm": 0.0022850781679153442,
"learning_rate": 9.356443662786662e-06,
"step": 27800
},
{
"embedding_loss": 0.0021,
"epoch": 2.9001353743621783,
"grad_norm": 0.006049764808267355,
"learning_rate": 9.333302478420846e-06,
"step": 27850
},
{
"embedding_loss": 0.0002,
"epoch": 2.9053420805998127,
"grad_norm": 0.005760509520769119,
"learning_rate": 9.31016129405503e-06,
"step": 27900
},
{
"embedding_loss": 0.0015,
"epoch": 2.9105487868374467,
"grad_norm": 0.23274853825569153,
"learning_rate": 9.287020109689214e-06,
"step": 27950
},
{
"embedding_loss": 0.0015,
"epoch": 2.9157554930750806,
"grad_norm": 0.0031046303920447826,
"learning_rate": 9.263878925323399e-06,
"step": 28000
},
{
"embedding_loss": 0.0003,
"epoch": 2.9209621993127146,
"grad_norm": 0.007741307374089956,
"learning_rate": 9.240737740957582e-06,
"step": 28050
},
{
"embedding_loss": 0.0011,
"epoch": 2.926168905550349,
"grad_norm": 0.01169963926076889,
"learning_rate": 9.217596556591767e-06,
"step": 28100
},
{
"embedding_loss": 0.0002,
"epoch": 2.931375611787983,
"grad_norm": 0.01061971951276064,
"learning_rate": 9.19445537222595e-06,
"step": 28150
},
{
"embedding_loss": 0.002,
"epoch": 2.936582318025617,
"grad_norm": 0.0040078721940517426,
"learning_rate": 9.171314187860135e-06,
"step": 28200
},
{
"embedding_loss": 0.001,
"epoch": 2.941789024263251,
"grad_norm": 0.00869227759540081,
"learning_rate": 9.14817300349432e-06,
"step": 28250
},
{
"embedding_loss": 0.0026,
"epoch": 2.946995730500885,
"grad_norm": 0.03226366266608238,
"learning_rate": 9.125031819128504e-06,
"step": 28300
},
{
"embedding_loss": 0.0015,
"epoch": 2.9522024367385193,
"grad_norm": 0.007837221957743168,
"learning_rate": 9.101890634762689e-06,
"step": 28350
},
{
"embedding_loss": 0.0004,
"epoch": 2.9574091429761533,
"grad_norm": 0.016281556338071823,
"learning_rate": 9.078749450396872e-06,
"step": 28400
},
{
"embedding_loss": 0.001,
"epoch": 2.9626158492137873,
"grad_norm": 0.006102351471781731,
"learning_rate": 9.055608266031057e-06,
"step": 28450
},
{
"embedding_loss": 0.0023,
"epoch": 2.9678225554514217,
"grad_norm": 0.004030589014291763,
"learning_rate": 9.03246708166524e-06,
"step": 28500
},
{
"embedding_loss": 0.0013,
"epoch": 2.9730292616890557,
"grad_norm": 0.08026989549398422,
"learning_rate": 9.009325897299425e-06,
"step": 28550
},
{
"embedding_loss": 0.0002,
"epoch": 2.9782359679266897,
"grad_norm": 0.004586766008287668,
"learning_rate": 8.986184712933608e-06,
"step": 28600
},
{
"embedding_loss": 0.0005,
"epoch": 2.9834426741643236,
"grad_norm": 0.04759465157985687,
"learning_rate": 8.963043528567793e-06,
"step": 28650
},
{
"embedding_loss": 0.0006,
"epoch": 2.9886493804019576,
"grad_norm": 0.002582300454378128,
"learning_rate": 8.939902344201976e-06,
"step": 28700
},
{
"embedding_loss": 0.0028,
"epoch": 2.9938560866395916,
"grad_norm": 0.008660154417157173,
"learning_rate": 8.916761159836161e-06,
"step": 28750
},
{
"embedding_loss": 0.0016,
"epoch": 2.999062792877226,
"grad_norm": 0.003941241651773453,
"learning_rate": 8.893619975470346e-06,
"step": 28800
},
{
"embedding_loss": 0.0017,
"epoch": 3.00426949911486,
"grad_norm": 0.00856933556497097,
"learning_rate": 8.87047879110453e-06,
"step": 28850
},
{
"embedding_loss": 0.0003,
"epoch": 3.009476205352494,
"grad_norm": 0.009460404515266418,
"learning_rate": 8.847337606738715e-06,
"step": 28900
},
{
"embedding_loss": 0.0014,
"epoch": 3.014682911590128,
"grad_norm": 0.006439635530114174,
"learning_rate": 8.824196422372898e-06,
"step": 28950
},
{
"embedding_loss": 0.0014,
"epoch": 3.0198896178277623,
"grad_norm": 0.020535370334982872,
"learning_rate": 8.801055238007083e-06,
"step": 29000
},
{
"embedding_loss": 0.0021,
"epoch": 3.0250963240653963,
"grad_norm": 0.027857592329382896,
"learning_rate": 8.777914053641266e-06,
"step": 29050
},
{
"embedding_loss": 0.0007,
"epoch": 3.0303030303030303,
"grad_norm": 0.009412121027708054,
"learning_rate": 8.754772869275451e-06,
"step": 29100
},
{
"embedding_loss": 0.0007,
"epoch": 3.0355097365406642,
"grad_norm": 0.05827178806066513,
"learning_rate": 8.731631684909634e-06,
"step": 29150
},
{
"embedding_loss": 0.0003,
"epoch": 3.0407164427782982,
"grad_norm": 0.002373203868046403,
"learning_rate": 8.708490500543818e-06,
"step": 29200
},
{
"embedding_loss": 0.0013,
"epoch": 3.0459231490159326,
"grad_norm": 0.006206809543073177,
"learning_rate": 8.685349316178003e-06,
"step": 29250
},
{
"embedding_loss": 0.0015,
"epoch": 3.0511298552535666,
"grad_norm": 0.00912196934223175,
"learning_rate": 8.662208131812186e-06,
"step": 29300
},
{
"embedding_loss": 0.0005,
"epoch": 3.0563365614912006,
"grad_norm": 0.00465911440551281,
"learning_rate": 8.63906694744637e-06,
"step": 29350
},
{
"embedding_loss": 0.002,
"epoch": 3.0615432677288346,
"grad_norm": 0.18286481499671936,
"learning_rate": 8.615925763080554e-06,
"step": 29400
},
{
"embedding_loss": 0.0006,
"epoch": 3.066749973966469,
"grad_norm": 0.004343831911683083,
"learning_rate": 8.592784578714739e-06,
"step": 29450
},
{
"embedding_loss": 0.0013,
"epoch": 3.071956680204103,
"grad_norm": 0.05209621787071228,
"learning_rate": 8.569643394348924e-06,
"step": 29500
},
{
"embedding_loss": 0.0018,
"epoch": 3.077163386441737,
"grad_norm": 0.05028437450528145,
"learning_rate": 8.546502209983107e-06,
"step": 29550
},
{
"embedding_loss": 0.0002,
"epoch": 3.082370092679371,
"grad_norm": 0.0026865217369049788,
"learning_rate": 8.523361025617292e-06,
"step": 29600
},
{
"embedding_loss": 0.0012,
"epoch": 3.0875767989170053,
"grad_norm": 0.4508988559246063,
"learning_rate": 8.500219841251475e-06,
"step": 29650
},
{
"embedding_loss": 0.0007,
"epoch": 3.0927835051546393,
"grad_norm": 0.02336781658232212,
"learning_rate": 8.47707865688566e-06,
"step": 29700
},
{
"embedding_loss": 0.0015,
"epoch": 3.0979902113922733,
"grad_norm": 0.0036797483917325735,
"learning_rate": 8.453937472519844e-06,
"step": 29750
},
{
"embedding_loss": 0.0008,
"epoch": 3.1031969176299072,
"grad_norm": 0.007331520318984985,
"learning_rate": 8.430796288154029e-06,
"step": 29800
},
{
"embedding_loss": 0.0004,
"epoch": 3.108403623867541,
"grad_norm": 0.006399332545697689,
"learning_rate": 8.407655103788212e-06,
"step": 29850
},
{
"embedding_loss": 0.0032,
"epoch": 3.1136103301051756,
"grad_norm": 0.09251190721988678,
"learning_rate": 8.384513919422397e-06,
"step": 29900
},
{
"embedding_loss": 0.0015,
"epoch": 3.1188170363428096,
"grad_norm": 0.005595037247985601,
"learning_rate": 8.361372735056582e-06,
"step": 29950
},
{
"embedding_loss": 0.0014,
"epoch": 3.1240237425804436,
"grad_norm": 0.0045051900669932365,
"learning_rate": 8.338231550690765e-06,
"step": 30000
},
{
"embedding_loss": 0.0013,
"epoch": 3.1292304488180775,
"grad_norm": 0.00950851384550333,
"learning_rate": 8.31509036632495e-06,
"step": 30050
},
{
"embedding_loss": 0.0008,
"epoch": 3.1344371550557115,
"grad_norm": 0.0038670655339956284,
"learning_rate": 8.291949181959133e-06,
"step": 30100
},
{
"embedding_loss": 0.0014,
"epoch": 3.139643861293346,
"grad_norm": 0.0030759319197386503,
"learning_rate": 8.268807997593318e-06,
"step": 30150
},
{
"embedding_loss": 0.0019,
"epoch": 3.14485056753098,
"grad_norm": 0.0031105412635952234,
"learning_rate": 8.245666813227501e-06,
"step": 30200
},
{
"embedding_loss": 0.0007,
"epoch": 3.150057273768614,
"grad_norm": 0.007572552189230919,
"learning_rate": 8.222525628861686e-06,
"step": 30250
},
{
"embedding_loss": 0.0022,
"epoch": 3.155263980006248,
"grad_norm": 0.0044418093748390675,
"learning_rate": 8.19938444449587e-06,
"step": 30300
},
{
"embedding_loss": 0.0008,
"epoch": 3.1604706862438823,
"grad_norm": 0.005121790803968906,
"learning_rate": 8.176243260130055e-06,
"step": 30350
},
{
"embedding_loss": 0.0007,
"epoch": 3.1656773924815163,
"grad_norm": 0.005301581230014563,
"learning_rate": 8.153102075764238e-06,
"step": 30400
},
{
"embedding_loss": 0.0006,
"epoch": 3.1708840987191502,
"grad_norm": 0.012782045640051365,
"learning_rate": 8.129960891398423e-06,
"step": 30450
},
{
"embedding_loss": 0.0014,
"epoch": 3.176090804956784,
"grad_norm": 0.003524052444845438,
"learning_rate": 8.106819707032608e-06,
"step": 30500
},
{
"embedding_loss": 0.0006,
"epoch": 3.1812975111944186,
"grad_norm": 0.04818173870444298,
"learning_rate": 8.08367852266679e-06,
"step": 30550
},
{
"embedding_loss": 0.0008,
"epoch": 3.1865042174320526,
"grad_norm": 0.014735080301761627,
"learning_rate": 8.060537338300974e-06,
"step": 30600
},
{
"embedding_loss": 0.0014,
"epoch": 3.1917109236696866,
"grad_norm": 0.004299947526305914,
"learning_rate": 8.03739615393516e-06,
"step": 30650
},
{
"embedding_loss": 0.0007,
"epoch": 3.1969176299073205,
"grad_norm": 0.00827470701187849,
"learning_rate": 8.014254969569343e-06,
"step": 30700
},
{
"embedding_loss": 0.0017,
"epoch": 3.2021243361449545,
"grad_norm": 0.005655727814882994,
"learning_rate": 7.991113785203528e-06,
"step": 30750
},
{
"embedding_loss": 0.0016,
"epoch": 3.207331042382589,
"grad_norm": 0.0024495867546647787,
"learning_rate": 7.96797260083771e-06,
"step": 30800
},
{
"embedding_loss": 0.0017,
"epoch": 3.212537748620223,
"grad_norm": 0.019548872485756874,
"learning_rate": 7.944831416471896e-06,
"step": 30850
},
{
"embedding_loss": 0.0007,
"epoch": 3.217744454857857,
"grad_norm": 0.0044856201857328415,
"learning_rate": 7.921690232106079e-06,
"step": 30900
},
{
"embedding_loss": 0.0014,
"epoch": 3.222951161095491,
"grad_norm": 0.004103007726371288,
"learning_rate": 7.898549047740264e-06,
"step": 30950
},
{
"embedding_loss": 0.0017,
"epoch": 3.2281578673331253,
"grad_norm": 0.1315273493528366,
"learning_rate": 7.875407863374447e-06,
"step": 31000
},
{
"embedding_loss": 0.0013,
"epoch": 3.2333645735707592,
"grad_norm": 0.005692615173757076,
"learning_rate": 7.852266679008632e-06,
"step": 31050
},
{
"embedding_loss": 0.0021,
"epoch": 3.238571279808393,
"grad_norm": 0.002233312465250492,
"learning_rate": 7.829125494642816e-06,
"step": 31100
},
{
"embedding_loss": 0.0012,
"epoch": 3.243777986046027,
"grad_norm": 0.03157159686088562,
"learning_rate": 7.805984310277e-06,
"step": 31150
},
{
"embedding_loss": 0.002,
"epoch": 3.248984692283661,
"grad_norm": 0.008771988563239574,
"learning_rate": 7.782843125911185e-06,
"step": 31200
},
{
"embedding_loss": 0.0007,
"epoch": 3.2541913985212956,
"grad_norm": 0.003804140957072377,
"learning_rate": 7.759701941545369e-06,
"step": 31250
},
{
"embedding_loss": 0.0008,
"epoch": 3.2593981047589295,
"grad_norm": 0.0689612403512001,
"learning_rate": 7.736560757179554e-06,
"step": 31300
},
{
"embedding_loss": 0.0009,
"epoch": 3.2646048109965635,
"grad_norm": 0.07443096488714218,
"learning_rate": 7.713419572813737e-06,
"step": 31350
},
{
"embedding_loss": 0.0008,
"epoch": 3.2698115172341975,
"grad_norm": 0.021511824801564217,
"learning_rate": 7.690278388447922e-06,
"step": 31400
},
{
"embedding_loss": 0.0007,
"epoch": 3.275018223471832,
"grad_norm": 0.004147614352405071,
"learning_rate": 7.667137204082105e-06,
"step": 31450
},
{
"embedding_loss": 0.0014,
"epoch": 3.280224929709466,
"grad_norm": 0.027495531365275383,
"learning_rate": 7.64399601971629e-06,
"step": 31500
},
{
"embedding_loss": 0.0006,
"epoch": 3.2854316359471,
"grad_norm": 0.0021503553725779057,
"learning_rate": 7.620854835350474e-06,
"step": 31550
},
{
"embedding_loss": 0.0002,
"epoch": 3.290638342184734,
"grad_norm": 0.006075483746826649,
"learning_rate": 7.597713650984658e-06,
"step": 31600
},
{
"embedding_loss": 0.0007,
"epoch": 3.2958450484223683,
"grad_norm": 0.005264146253466606,
"learning_rate": 7.574572466618842e-06,
"step": 31650
},
{
"embedding_loss": 0.0024,
"epoch": 3.3010517546600022,
"grad_norm": 0.003356904024258256,
"learning_rate": 7.5514312822530265e-06,
"step": 31700
},
{
"embedding_loss": 0.0003,
"epoch": 3.306258460897636,
"grad_norm": 0.09297136962413788,
"learning_rate": 7.528290097887211e-06,
"step": 31750
},
{
"embedding_loss": 0.0014,
"epoch": 3.31146516713527,
"grad_norm": 0.016923336312174797,
"learning_rate": 7.505148913521395e-06,
"step": 31800
},
{
"embedding_loss": 0.0011,
"epoch": 3.316671873372904,
"grad_norm": 0.09876677393913269,
"learning_rate": 7.482007729155579e-06,
"step": 31850
},
{
"embedding_loss": 0.0024,
"epoch": 3.3218785796105386,
"grad_norm": 0.01769149675965309,
"learning_rate": 7.458866544789764e-06,
"step": 31900
},
{
"embedding_loss": 0.0007,
"epoch": 3.3270852858481725,
"grad_norm": 0.0034628412686288357,
"learning_rate": 7.435725360423948e-06,
"step": 31950
},
{
"embedding_loss": 0.0014,
"epoch": 3.3322919920858065,
"grad_norm": 0.003771421266719699,
"learning_rate": 7.41258417605813e-06,
"step": 32000
},
{
"embedding_loss": 0.0007,
"epoch": 3.3374986983234405,
"grad_norm": 0.003236924996599555,
"learning_rate": 7.389442991692315e-06,
"step": 32050
},
{
"embedding_loss": 0.0011,
"epoch": 3.342705404561075,
"grad_norm": 0.0025784943718463182,
"learning_rate": 7.366301807326499e-06,
"step": 32100
},
{
"embedding_loss": 0.0013,
"epoch": 3.347912110798709,
"grad_norm": 0.00431936327368021,
"learning_rate": 7.3431606229606835e-06,
"step": 32150
},
{
"embedding_loss": 0.0012,
"epoch": 3.353118817036343,
"grad_norm": 0.006928473711013794,
"learning_rate": 7.320019438594868e-06,
"step": 32200
},
{
"embedding_loss": 0.0009,
"epoch": 3.358325523273977,
"grad_norm": 0.002246728865429759,
"learning_rate": 7.296878254229052e-06,
"step": 32250
},
{
"embedding_loss": 0.0009,
"epoch": 3.363532229511611,
"grad_norm": 0.0024311786983162165,
"learning_rate": 7.273737069863236e-06,
"step": 32300
},
{
"embedding_loss": 0.0002,
"epoch": 3.368738935749245,
"grad_norm": 0.33023688197135925,
"learning_rate": 7.25059588549742e-06,
"step": 32350
},
{
"embedding_loss": 0.0009,
"epoch": 3.373945641986879,
"grad_norm": 0.0026327494997531176,
"learning_rate": 7.227454701131604e-06,
"step": 32400
},
{
"embedding_loss": 0.0001,
"epoch": 3.379152348224513,
"grad_norm": 0.0038416869938373566,
"learning_rate": 7.204313516765788e-06,
"step": 32450
},
{
"embedding_loss": 0.0008,
"epoch": 3.384359054462147,
"grad_norm": 0.004634499549865723,
"learning_rate": 7.181172332399972e-06,
"step": 32500
},
{
"embedding_loss": 0.0009,
"epoch": 3.3895657606997815,
"grad_norm": 0.04748839512467384,
"learning_rate": 7.158031148034157e-06,
"step": 32550
},
{
"embedding_loss": 0.0007,
"epoch": 3.3947724669374155,
"grad_norm": 0.00369762210175395,
"learning_rate": 7.134889963668341e-06,
"step": 32600
},
{
"embedding_loss": 0.0033,
"epoch": 3.3999791731750495,
"grad_norm": 0.003863842226564884,
"learning_rate": 7.1117487793025255e-06,
"step": 32650
},
{
"embedding_loss": 0.0001,
"epoch": 3.4051858794126835,
"grad_norm": 0.004467652644962072,
"learning_rate": 7.08860759493671e-06,
"step": 32700
},
{
"embedding_loss": 0.0008,
"epoch": 3.4103925856503174,
"grad_norm": 0.0017069701571017504,
"learning_rate": 7.065466410570894e-06,
"step": 32750
},
{
"embedding_loss": 0.0018,
"epoch": 3.415599291887952,
"grad_norm": 0.0025950015988200903,
"learning_rate": 7.042325226205078e-06,
"step": 32800
},
{
"embedding_loss": 0.0006,
"epoch": 3.420805998125586,
"grad_norm": 0.0029007880948483944,
"learning_rate": 7.019184041839262e-06,
"step": 32850
},
{
"embedding_loss": 0.001,
"epoch": 3.42601270436322,
"grad_norm": 0.003898217109963298,
"learning_rate": 6.996042857473446e-06,
"step": 32900
},
{
"embedding_loss": 0.0008,
"epoch": 3.431219410600854,
"grad_norm": 0.004065630491822958,
"learning_rate": 6.97290167310763e-06,
"step": 32950
},
{
"embedding_loss": 0.0014,
"epoch": 3.436426116838488,
"grad_norm": 0.09971676766872406,
"learning_rate": 6.949760488741814e-06,
"step": 33000
},
{
"embedding_loss": 0.0015,
"epoch": 3.441632823076122,
"grad_norm": 0.004332449287176132,
"learning_rate": 6.926619304375999e-06,
"step": 33050
},
{
"embedding_loss": 0.0002,
"epoch": 3.446839529313756,
"grad_norm": 0.002061097417026758,
"learning_rate": 6.903478120010183e-06,
"step": 33100
},
{
"embedding_loss": 0.0004,
"epoch": 3.45204623555139,
"grad_norm": 0.005368870683014393,
"learning_rate": 6.8803369356443674e-06,
"step": 33150
},
{
"embedding_loss": 0.0011,
"epoch": 3.457252941789024,
"grad_norm": 0.0153218824416399,
"learning_rate": 6.8571957512785516e-06,
"step": 33200
},
{
"embedding_loss": 0.0008,
"epoch": 3.4624596480266585,
"grad_norm": 0.003146272385492921,
"learning_rate": 6.834054566912736e-06,
"step": 33250
},
{
"embedding_loss": 0.0007,
"epoch": 3.4676663542642925,
"grad_norm": 0.005474725738167763,
"learning_rate": 6.81091338254692e-06,
"step": 33300
},
{
"embedding_loss": 0.0023,
"epoch": 3.4728730605019265,
"grad_norm": 0.002225042786449194,
"learning_rate": 6.787772198181104e-06,
"step": 33350
},
{
"embedding_loss": 0.0009,
"epoch": 3.4780797667395604,
"grad_norm": 0.004484266974031925,
"learning_rate": 6.764631013815287e-06,
"step": 33400
},
{
"embedding_loss": 0.0005,
"epoch": 3.483286472977195,
"grad_norm": 0.001994067570194602,
"learning_rate": 6.741489829449471e-06,
"step": 33450
},
{
"embedding_loss": 0.0008,
"epoch": 3.488493179214829,
"grad_norm": 0.002722726669162512,
"learning_rate": 6.718348645083655e-06,
"step": 33500
},
{
"embedding_loss": 0.0003,
"epoch": 3.493699885452463,
"grad_norm": 0.003505149856209755,
"learning_rate": 6.6952074607178395e-06,
"step": 33550
},
{
"embedding_loss": 0.0001,
"epoch": 3.4989065916900968,
"grad_norm": 0.002958771074190736,
"learning_rate": 6.672066276352024e-06,
"step": 33600
},
{
"embedding_loss": 0.0008,
"epoch": 3.5041132979277307,
"grad_norm": 0.007241967599838972,
"learning_rate": 6.648925091986208e-06,
"step": 33650
},
{
"embedding_loss": 0.0009,
"epoch": 3.5093200041653647,
"grad_norm": 0.0048427823930978775,
"learning_rate": 6.625783907620393e-06,
"step": 33700
},
{
"embedding_loss": 0.0011,
"epoch": 3.514526710402999,
"grad_norm": 0.004183737561106682,
"learning_rate": 6.602642723254577e-06,
"step": 33750
},
{
"embedding_loss": 0.0002,
"epoch": 3.519733416640633,
"grad_norm": 0.005011474248021841,
"learning_rate": 6.579501538888761e-06,
"step": 33800
},
{
"embedding_loss": 0.0011,
"epoch": 3.524940122878267,
"grad_norm": 0.04754041135311127,
"learning_rate": 6.556360354522945e-06,
"step": 33850
},
{
"embedding_loss": 0.0011,
"epoch": 3.5301468291159015,
"grad_norm": 0.0030108760111033916,
"learning_rate": 6.533219170157129e-06,
"step": 33900
},
{
"embedding_loss": 0.0003,
"epoch": 3.5353535353535355,
"grad_norm": 0.002894002478569746,
"learning_rate": 6.510077985791313e-06,
"step": 33950
},
{
"embedding_loss": 0.001,
"epoch": 3.5405602415911694,
"grad_norm": 0.01952524110674858,
"learning_rate": 6.486936801425497e-06,
"step": 34000
},
{
"embedding_loss": 0.0003,
"epoch": 3.5457669478288034,
"grad_norm": 0.0029994072392582893,
"learning_rate": 6.4637956170596815e-06,
"step": 34050
},
{
"embedding_loss": 0.0007,
"epoch": 3.5509736540664374,
"grad_norm": 0.006770998239517212,
"learning_rate": 6.4406544326938656e-06,
"step": 34100
},
{
"embedding_loss": 0.0001,
"epoch": 3.556180360304072,
"grad_norm": 0.0302437637001276,
"learning_rate": 6.41751324832805e-06,
"step": 34150
},
{
"embedding_loss": 0.0001,
"epoch": 3.561387066541706,
"grad_norm": 0.002121832687407732,
"learning_rate": 6.394372063962234e-06,
"step": 34200
},
{
"embedding_loss": 0.0002,
"epoch": 3.5665937727793398,
"grad_norm": 0.00341336359269917,
"learning_rate": 6.371230879596419e-06,
"step": 34250
},
{
"embedding_loss": 0.0013,
"epoch": 3.5718004790169737,
"grad_norm": 0.0027454060036689043,
"learning_rate": 6.348089695230603e-06,
"step": 34300
},
{
"embedding_loss": 0.0008,
"epoch": 3.577007185254608,
"grad_norm": 0.0077779293060302734,
"learning_rate": 6.324948510864787e-06,
"step": 34350
},
{
"embedding_loss": 0.0028,
"epoch": 3.582213891492242,
"grad_norm": 0.003297444898635149,
"learning_rate": 6.301807326498971e-06,
"step": 34400
},
{
"embedding_loss": 0.0008,
"epoch": 3.587420597729876,
"grad_norm": 0.004151192959398031,
"learning_rate": 6.278666142133155e-06,
"step": 34450
},
{
"embedding_loss": 0.0002,
"epoch": 3.59262730396751,
"grad_norm": 0.011945868842303753,
"learning_rate": 6.255524957767339e-06,
"step": 34500
},
{
"embedding_loss": 0.0012,
"epoch": 3.597834010205144,
"grad_norm": 0.002266357187181711,
"learning_rate": 6.232383773401523e-06,
"step": 34550
},
{
"embedding_loss": 0.0011,
"epoch": 3.6030407164427785,
"grad_norm": 0.031817760318517685,
"learning_rate": 6.2092425890357075e-06,
"step": 34600
},
{
"embedding_loss": 0.0027,
"epoch": 3.6082474226804124,
"grad_norm": 0.114555723965168,
"learning_rate": 6.186101404669892e-06,
"step": 34650
},
{
"embedding_loss": 0.0002,
"epoch": 3.6134541289180464,
"grad_norm": 0.0052092778496444225,
"learning_rate": 6.162960220304076e-06,
"step": 34700
},
{
"embedding_loss": 0.0011,
"epoch": 3.6186608351556804,
"grad_norm": 0.013743920251727104,
"learning_rate": 6.139819035938261e-06,
"step": 34750
},
{
"embedding_loss": 0.0015,
"epoch": 3.623867541393315,
"grad_norm": 0.04450186714529991,
"learning_rate": 6.116677851572443e-06,
"step": 34800
},
{
"embedding_loss": 0.0014,
"epoch": 3.6290742476309488,
"grad_norm": 0.011497569270431995,
"learning_rate": 6.093536667206627e-06,
"step": 34850
},
{
"embedding_loss": 0.0003,
"epoch": 3.6342809538685827,
"grad_norm": 0.001604217104613781,
"learning_rate": 6.070395482840812e-06,
"step": 34900
},
{
"embedding_loss": 0.0008,
"epoch": 3.6394876601062167,
"grad_norm": 0.014813857153058052,
"learning_rate": 6.047254298474996e-06,
"step": 34950
},
{
"embedding_loss": 0.0007,
"epoch": 3.6446943663438507,
"grad_norm": 0.002726171864196658,
"learning_rate": 6.02411311410918e-06,
"step": 35000
},
{
"embedding_loss": 0.0009,
"epoch": 3.649901072581485,
"grad_norm": 0.0028911526314914227,
"learning_rate": 6.0009719297433645e-06,
"step": 35050
},
{
"embedding_loss": 0.0008,
"epoch": 3.655107778819119,
"grad_norm": 0.0009616083116270602,
"learning_rate": 5.977830745377549e-06,
"step": 35100
},
{
"embedding_loss": 0.0007,
"epoch": 3.660314485056753,
"grad_norm": 0.0013377583818510175,
"learning_rate": 5.954689561011733e-06,
"step": 35150
},
{
"embedding_loss": 0.0005,
"epoch": 3.665521191294387,
"grad_norm": 0.01589621789753437,
"learning_rate": 5.931548376645917e-06,
"step": 35200
},
{
"embedding_loss": 0.0002,
"epoch": 3.6707278975320214,
"grad_norm": 0.05081808194518089,
"learning_rate": 5.908407192280101e-06,
"step": 35250
},
{
"embedding_loss": 0.0005,
"epoch": 3.6759346037696554,
"grad_norm": 0.04854687675833702,
"learning_rate": 5.885266007914285e-06,
"step": 35300
},
{
"embedding_loss": 0.0001,
"epoch": 3.6811413100072894,
"grad_norm": 0.0028674921486526728,
"learning_rate": 5.862124823548469e-06,
"step": 35350
},
{
"embedding_loss": 0.0004,
"epoch": 3.6863480162449234,
"grad_norm": 0.006336590740829706,
"learning_rate": 5.838983639182654e-06,
"step": 35400
},
{
"embedding_loss": 0.0008,
"epoch": 3.6915547224825573,
"grad_norm": 0.002654125215485692,
"learning_rate": 5.815842454816838e-06,
"step": 35450
},
{
"embedding_loss": 0.0008,
"epoch": 3.6967614287201918,
"grad_norm": 0.001202322542667389,
"learning_rate": 5.792701270451022e-06,
"step": 35500
},
{
"embedding_loss": 0.0017,
"epoch": 3.7019681349578257,
"grad_norm": 0.0029784284997731447,
"learning_rate": 5.7695600860852065e-06,
"step": 35550
},
{
"embedding_loss": 0.0004,
"epoch": 3.7071748411954597,
"grad_norm": 0.017423637211322784,
"learning_rate": 5.746418901719391e-06,
"step": 35600
},
{
"embedding_loss": 0.0011,
"epoch": 3.7123815474330937,
"grad_norm": 0.001634717918932438,
"learning_rate": 5.723277717353575e-06,
"step": 35650
},
{
"embedding_loss": 0.0009,
"epoch": 3.717588253670728,
"grad_norm": 0.006617635954171419,
"learning_rate": 5.700136532987759e-06,
"step": 35700
},
{
"embedding_loss": 0.0002,
"epoch": 3.722794959908362,
"grad_norm": 0.0018128909869119525,
"learning_rate": 5.676995348621943e-06,
"step": 35750
},
{
"embedding_loss": 0.0017,
"epoch": 3.728001666145996,
"grad_norm": 0.0016381378518417478,
"learning_rate": 5.653854164256127e-06,
"step": 35800
},
{
"embedding_loss": 0.0001,
"epoch": 3.73320837238363,
"grad_norm": 0.005606998223811388,
"learning_rate": 5.630712979890311e-06,
"step": 35850
},
{
"embedding_loss": 0.0009,
"epoch": 3.738415078621264,
"grad_norm": 0.0032535437494516373,
"learning_rate": 5.607571795524496e-06,
"step": 35900
},
{
"embedding_loss": 0.0002,
"epoch": 3.7436217848588984,
"grad_norm": 0.008722545579075813,
"learning_rate": 5.58443061115868e-06,
"step": 35950
},
{
"embedding_loss": 0.0001,
"epoch": 3.7488284910965324,
"grad_norm": 0.023524988442659378,
"learning_rate": 5.561289426792864e-06,
"step": 36000
},
{
"embedding_loss": 0.0007,
"epoch": 3.7540351973341664,
"grad_norm": 0.006606587208807468,
"learning_rate": 5.5381482424270484e-06,
"step": 36050
},
{
"embedding_loss": 0.0014,
"epoch": 3.7592419035718003,
"grad_norm": 0.0010090703144669533,
"learning_rate": 5.5150070580612326e-06,
"step": 36100
},
{
"embedding_loss": 0.0005,
"epoch": 3.7644486098094347,
"grad_norm": 0.00230466783978045,
"learning_rate": 5.491865873695417e-06,
"step": 36150
},
{
"embedding_loss": 0.0001,
"epoch": 3.7696553160470687,
"grad_norm": 0.004099918529391289,
"learning_rate": 5.468724689329601e-06,
"step": 36200
},
{
"embedding_loss": 0.0002,
"epoch": 3.7748620222847027,
"grad_norm": 0.007035956718027592,
"learning_rate": 5.445583504963784e-06,
"step": 36250
},
{
"embedding_loss": 0.0012,
"epoch": 3.7800687285223367,
"grad_norm": 0.010237271897494793,
"learning_rate": 5.422442320597968e-06,
"step": 36300
},
{
"embedding_loss": 0.0011,
"epoch": 3.7852754347599706,
"grad_norm": 0.0014003911055624485,
"learning_rate": 5.399301136232152e-06,
"step": 36350
},
{
"embedding_loss": 0.0014,
"epoch": 3.790482140997605,
"grad_norm": 0.0034218619111925364,
"learning_rate": 5.376159951866336e-06,
"step": 36400
},
{
"embedding_loss": 0.0004,
"epoch": 3.795688847235239,
"grad_norm": 0.0065358299762010574,
"learning_rate": 5.3530187675005205e-06,
"step": 36450
},
{
"embedding_loss": 0.0001,
"epoch": 3.800895553472873,
"grad_norm": 0.002235516905784607,
"learning_rate": 5.329877583134705e-06,
"step": 36500
},
{
"embedding_loss": 0.0013,
"epoch": 3.806102259710507,
"grad_norm": 0.0020229006186127663,
"learning_rate": 5.3067363987688896e-06,
"step": 36550
},
{
"embedding_loss": 0.0013,
"epoch": 3.8113089659481414,
"grad_norm": 0.0053365700878202915,
"learning_rate": 5.283595214403074e-06,
"step": 36600
},
{
"embedding_loss": 0.0001,
"epoch": 3.8165156721857754,
"grad_norm": 0.011895844712853432,
"learning_rate": 5.260454030037258e-06,
"step": 36650
},
{
"embedding_loss": 0.0008,
"epoch": 3.8217223784234093,
"grad_norm": 0.0022297389805316925,
"learning_rate": 5.237312845671442e-06,
"step": 36700
},
{
"embedding_loss": 0.0008,
"epoch": 3.8269290846610433,
"grad_norm": 0.0022312577348202467,
"learning_rate": 5.214171661305626e-06,
"step": 36750
},
{
"embedding_loss": 0.0008,
"epoch": 3.8321357908986773,
"grad_norm": 0.004529103171080351,
"learning_rate": 5.19103047693981e-06,
"step": 36800
},
{
"embedding_loss": 0.0011,
"epoch": 3.8373424971363117,
"grad_norm": 0.0026438578497618437,
"learning_rate": 5.167889292573994e-06,
"step": 36850
},
{
"embedding_loss": 0.0012,
"epoch": 3.8425492033739457,
"grad_norm": 0.005113155115395784,
"learning_rate": 5.144748108208178e-06,
"step": 36900
},
{
"embedding_loss": 0.001,
"epoch": 3.8477559096115796,
"grad_norm": 0.017756449058651924,
"learning_rate": 5.1216069238423624e-06,
"step": 36950
},
{
"embedding_loss": 0.0007,
"epoch": 3.8529626158492136,
"grad_norm": 0.02352430485188961,
"learning_rate": 5.0984657394765466e-06,
"step": 37000
},
{
"embedding_loss": 0.0002,
"epoch": 3.858169322086848,
"grad_norm": 0.003178997430950403,
"learning_rate": 5.075324555110731e-06,
"step": 37050
},
{
"embedding_loss": 0.0018,
"epoch": 3.863376028324482,
"grad_norm": 0.014370561577379704,
"learning_rate": 5.052183370744916e-06,
"step": 37100
},
{
"embedding_loss": 0.0007,
"epoch": 3.868582734562116,
"grad_norm": 0.0058501786552369595,
"learning_rate": 5.0290421863791e-06,
"step": 37150
},
{
"embedding_loss": 0.0008,
"epoch": 3.87378944079975,
"grad_norm": 0.0018966099014505744,
"learning_rate": 5.005901002013284e-06,
"step": 37200
},
{
"embedding_loss": 0.0002,
"epoch": 3.878996147037384,
"grad_norm": 0.002752570202574134,
"learning_rate": 4.982759817647468e-06,
"step": 37250
},
{
"embedding_loss": 0.0001,
"epoch": 3.8842028532750184,
"grad_norm": 0.003022131510078907,
"learning_rate": 4.959618633281651e-06,
"step": 37300
},
{
"embedding_loss": 0.0016,
"epoch": 3.8894095595126523,
"grad_norm": 0.006553678773343563,
"learning_rate": 4.936477448915835e-06,
"step": 37350
},
{
"embedding_loss": 0.0001,
"epoch": 3.8946162657502863,
"grad_norm": 0.0018002489814534783,
"learning_rate": 4.91333626455002e-06,
"step": 37400
},
{
"embedding_loss": 0.0001,
"epoch": 3.8998229719879207,
"grad_norm": 0.001831809408031404,
"learning_rate": 4.890195080184204e-06,
"step": 37450
},
{
"embedding_loss": 0.0001,
"epoch": 3.9050296782255547,
"grad_norm": 0.0025375783443450928,
"learning_rate": 4.8670538958183885e-06,
"step": 37500
},
{
"embedding_loss": 0.0009,
"epoch": 3.9102363844631887,
"grad_norm": 0.004411675967276096,
"learning_rate": 4.843912711452573e-06,
"step": 37550
},
{
"embedding_loss": 0.0013,
"epoch": 3.9154430907008226,
"grad_norm": 0.009407658129930496,
"learning_rate": 4.820771527086757e-06,
"step": 37600
},
{
"embedding_loss": 0.0001,
"epoch": 3.9206497969384566,
"grad_norm": 0.002038530306890607,
"learning_rate": 4.797630342720941e-06,
"step": 37650
},
{
"embedding_loss": 0.0018,
"epoch": 3.9258565031760906,
"grad_norm": 0.017908206209540367,
"learning_rate": 4.774489158355125e-06,
"step": 37700
},
{
"embedding_loss": 0.0009,
"epoch": 3.931063209413725,
"grad_norm": 0.0015109736705198884,
"learning_rate": 4.751347973989309e-06,
"step": 37750
},
{
"embedding_loss": 0.0008,
"epoch": 3.936269915651359,
"grad_norm": 0.0064455061219632626,
"learning_rate": 4.728206789623493e-06,
"step": 37800
},
{
"embedding_loss": 0.0004,
"epoch": 3.941476621888993,
"grad_norm": 0.0861746221780777,
"learning_rate": 4.705065605257677e-06,
"step": 37850
},
{
"embedding_loss": 0.0006,
"epoch": 3.9466833281266274,
"grad_norm": 0.0038613975048065186,
"learning_rate": 4.681924420891862e-06,
"step": 37900
},
{
"embedding_loss": 0.0003,
"epoch": 3.9518900343642613,
"grad_norm": 0.03049100562930107,
"learning_rate": 4.658783236526046e-06,
"step": 37950
},
{
"embedding_loss": 0.0015,
"epoch": 3.9570967406018953,
"grad_norm": 0.0011294811265543103,
"learning_rate": 4.63564205216023e-06,
"step": 38000
},
{
"embedding_loss": 0.0008,
"epoch": 3.9623034468395293,
"grad_norm": 0.003215038450434804,
"learning_rate": 4.612500867794414e-06,
"step": 38050
},
{
"embedding_loss": 0.0001,
"epoch": 3.9675101530771633,
"grad_norm": 0.002345999237149954,
"learning_rate": 4.589359683428598e-06,
"step": 38100
},
{
"embedding_loss": 0.0001,
"epoch": 3.9727168593147972,
"grad_norm": 0.0017487540608271956,
"learning_rate": 4.566218499062782e-06,
"step": 38150
},
{
"embedding_loss": 0.0001,
"epoch": 3.9779235655524317,
"grad_norm": 0.002043676795437932,
"learning_rate": 4.543077314696966e-06,
"step": 38200
},
{
"embedding_loss": 0.002,
"epoch": 3.9831302717900656,
"grad_norm": 0.0033409446477890015,
"learning_rate": 4.519936130331151e-06,
"step": 38250
},
{
"embedding_loss": 0.0002,
"epoch": 3.9883369780276996,
"grad_norm": 0.1487550139427185,
"learning_rate": 4.496794945965335e-06,
"step": 38300
},
{
"embedding_loss": 0.0001,
"epoch": 3.993543684265334,
"grad_norm": 0.003084618365392089,
"learning_rate": 4.473653761599519e-06,
"step": 38350
},
{
"embedding_loss": 0.0011,
"epoch": 3.998750390502968,
"grad_norm": 0.004644028376787901,
"learning_rate": 4.450512577233703e-06,
"step": 38400
},
{
"embedding_loss": 0.0001,
"epoch": 4.003957096740602,
"grad_norm": 0.004961004480719566,
"learning_rate": 4.4273713928678875e-06,
"step": 38450
},
{
"embedding_loss": 0.0002,
"epoch": 4.009163802978236,
"grad_norm": 0.0015071636298671365,
"learning_rate": 4.404230208502072e-06,
"step": 38500
},
{
"embedding_loss": 0.0016,
"epoch": 4.01437050921587,
"grad_norm": 0.0037345190066844225,
"learning_rate": 4.381089024136256e-06,
"step": 38550
},
{
"embedding_loss": 0.0013,
"epoch": 4.019577215453504,
"grad_norm": 0.0011363272788003087,
"learning_rate": 4.35794783977044e-06,
"step": 38600
},
{
"embedding_loss": 0.0015,
"epoch": 4.024783921691138,
"grad_norm": 0.00206565810367465,
"learning_rate": 4.334806655404624e-06,
"step": 38650
},
{
"embedding_loss": 0.0003,
"epoch": 4.029990627928772,
"grad_norm": 0.0060499319806694984,
"learning_rate": 4.311665471038808e-06,
"step": 38700
},
{
"embedding_loss": 0.0002,
"epoch": 4.035197334166407,
"grad_norm": 0.0032932923641055822,
"learning_rate": 4.288524286672992e-06,
"step": 38750
},
{
"embedding_loss": 0.0001,
"epoch": 4.040404040404041,
"grad_norm": 0.002779960399493575,
"learning_rate": 4.265383102307176e-06,
"step": 38800
},
{
"embedding_loss": 0.0014,
"epoch": 4.045610746641675,
"grad_norm": 0.037588316947221756,
"learning_rate": 4.24224191794136e-06,
"step": 38850
},
{
"embedding_loss": 0.0014,
"epoch": 4.050817452879309,
"grad_norm": 0.0021385361906141043,
"learning_rate": 4.2191007335755445e-06,
"step": 38900
},
{
"embedding_loss": 0.0011,
"epoch": 4.056024159116943,
"grad_norm": 0.002653073286637664,
"learning_rate": 4.195959549209729e-06,
"step": 38950
},
{
"embedding_loss": 0.0002,
"epoch": 4.061230865354577,
"grad_norm": 0.003462142078205943,
"learning_rate": 4.172818364843913e-06,
"step": 39000
},
{
"embedding_loss": 0.0014,
"epoch": 4.0664375715922105,
"grad_norm": 0.0043731373734772205,
"learning_rate": 4.149677180478097e-06,
"step": 39050
},
{
"embedding_loss": 0.002,
"epoch": 4.0716442778298445,
"grad_norm": 0.0031473205890506506,
"learning_rate": 4.126535996112282e-06,
"step": 39100
},
{
"embedding_loss": 0.0007,
"epoch": 4.0768509840674785,
"grad_norm": 0.0068083652295172215,
"learning_rate": 4.103394811746466e-06,
"step": 39150
},
{
"embedding_loss": 0.0014,
"epoch": 4.082057690305113,
"grad_norm": 0.0017057887744158506,
"learning_rate": 4.08025362738065e-06,
"step": 39200
},
{
"embedding_loss": 0.0008,
"epoch": 4.087264396542747,
"grad_norm": 0.0034488628152757883,
"learning_rate": 4.057112443014834e-06,
"step": 39250
},
{
"embedding_loss": 0.0001,
"epoch": 4.092471102780381,
"grad_norm": 0.004338666331022978,
"learning_rate": 4.033971258649018e-06,
"step": 39300
},
{
"embedding_loss": 0.0008,
"epoch": 4.097677809018015,
"grad_norm": 0.001688474789261818,
"learning_rate": 4.010830074283202e-06,
"step": 39350
},
{
"embedding_loss": 0.0001,
"epoch": 4.102884515255649,
"grad_norm": 0.005720613989979029,
"learning_rate": 3.9876888899173864e-06,
"step": 39400
},
{
"embedding_loss": 0.0001,
"epoch": 4.108091221493283,
"grad_norm": 0.010131466202437878,
"learning_rate": 3.9645477055515705e-06,
"step": 39450
},
{
"embedding_loss": 0.0008,
"epoch": 4.113297927730917,
"grad_norm": 0.00117829954251647,
"learning_rate": 3.941406521185755e-06,
"step": 39500
},
{
"embedding_loss": 0.0001,
"epoch": 4.118504633968551,
"grad_norm": 0.013248096220195293,
"learning_rate": 3.918265336819939e-06,
"step": 39550
},
{
"embedding_loss": 0.0002,
"epoch": 4.123711340206185,
"grad_norm": 0.005844116676598787,
"learning_rate": 3.895124152454123e-06,
"step": 39600
},
{
"embedding_loss": 0.0005,
"epoch": 4.12891804644382,
"grad_norm": 0.002914564684033394,
"learning_rate": 3.871982968088307e-06,
"step": 39650
},
{
"embedding_loss": 0.0004,
"epoch": 4.134124752681454,
"grad_norm": 0.003980652429163456,
"learning_rate": 3.848841783722491e-06,
"step": 39700
},
{
"embedding_loss": 0.0001,
"epoch": 4.139331458919088,
"grad_norm": 0.004351139068603516,
"learning_rate": 3.825700599356675e-06,
"step": 39750
},
{
"embedding_loss": 0.0009,
"epoch": 4.144538165156722,
"grad_norm": 0.00460411561653018,
"learning_rate": 3.8025594149908597e-06,
"step": 39800
},
{
"embedding_loss": 0.0001,
"epoch": 4.149744871394356,
"grad_norm": 0.002258758759126067,
"learning_rate": 3.779418230625044e-06,
"step": 39850
},
{
"embedding_loss": 0.0007,
"epoch": 4.15495157763199,
"grad_norm": 0.00147035694681108,
"learning_rate": 3.756277046259228e-06,
"step": 39900
},
{
"embedding_loss": 0.0013,
"epoch": 4.160158283869624,
"grad_norm": 0.008124323561787605,
"learning_rate": 3.733135861893412e-06,
"step": 39950
},
{
"embedding_loss": 0.0004,
"epoch": 4.165364990107258,
"grad_norm": 0.006330924108624458,
"learning_rate": 3.709994677527596e-06,
"step": 40000
},
{
"embedding_loss": 0.0003,
"epoch": 4.170571696344892,
"grad_norm": 0.0018023628508672118,
"learning_rate": 3.6868534931617807e-06,
"step": 40050
},
{
"embedding_loss": 0.0008,
"epoch": 4.175778402582527,
"grad_norm": 0.06845732778310776,
"learning_rate": 3.663712308795965e-06,
"step": 40100
},
{
"embedding_loss": 0.0007,
"epoch": 4.180985108820161,
"grad_norm": 0.05048598721623421,
"learning_rate": 3.6405711244301485e-06,
"step": 40150
},
{
"embedding_loss": 0.0004,
"epoch": 4.186191815057795,
"grad_norm": 0.008092716336250305,
"learning_rate": 3.6174299400643326e-06,
"step": 40200
},
{
"embedding_loss": 0.0007,
"epoch": 4.191398521295429,
"grad_norm": 0.0009377990500070155,
"learning_rate": 3.5942887556985167e-06,
"step": 40250
},
{
"embedding_loss": 0.0013,
"epoch": 4.1966052275330625,
"grad_norm": 0.0021307109855115414,
"learning_rate": 3.571147571332701e-06,
"step": 40300
},
{
"embedding_loss": 0.0003,
"epoch": 4.2018119337706965,
"grad_norm": 0.007595045492053032,
"learning_rate": 3.548006386966885e-06,
"step": 40350
},
{
"embedding_loss": 0.0007,
"epoch": 4.2070186400083305,
"grad_norm": 0.0017604045569896698,
"learning_rate": 3.5248652026010695e-06,
"step": 40400
},
{
"embedding_loss": 0.001,
"epoch": 4.2122253462459645,
"grad_norm": 0.0040459102019667625,
"learning_rate": 3.5017240182352536e-06,
"step": 40450
},
{
"embedding_loss": 0.0001,
"epoch": 4.217432052483598,
"grad_norm": 0.0473860502243042,
"learning_rate": 3.4785828338694377e-06,
"step": 40500
},
{
"embedding_loss": 0.0007,
"epoch": 4.222638758721233,
"grad_norm": 0.002496903296560049,
"learning_rate": 3.455441649503622e-06,
"step": 40550
},
{
"embedding_loss": 0.0001,
"epoch": 4.227845464958867,
"grad_norm": 0.007270964793860912,
"learning_rate": 3.432300465137806e-06,
"step": 40600
},
{
"embedding_loss": 0.0007,
"epoch": 4.233052171196501,
"grad_norm": 0.013144693337380886,
"learning_rate": 3.4091592807719905e-06,
"step": 40650
},
{
"embedding_loss": 0.0008,
"epoch": 4.238258877434135,
"grad_norm": 0.003847824176773429,
"learning_rate": 3.3860180964061746e-06,
"step": 40700
},
{
"embedding_loss": 0.0008,
"epoch": 4.243465583671769,
"grad_norm": 0.0023708331864327192,
"learning_rate": 3.3628769120403587e-06,
"step": 40750
},
{
"embedding_loss": 0.0007,
"epoch": 4.248672289909403,
"grad_norm": 0.018748441711068153,
"learning_rate": 3.339735727674543e-06,
"step": 40800
},
{
"embedding_loss": 0.0015,
"epoch": 4.253878996147037,
"grad_norm": 0.0020937493536621332,
"learning_rate": 3.3165945433087265e-06,
"step": 40850
},
{
"embedding_loss": 0.0001,
"epoch": 4.259085702384671,
"grad_norm": 0.042171407490968704,
"learning_rate": 3.2934533589429106e-06,
"step": 40900
},
{
"embedding_loss": 0.0001,
"epoch": 4.264292408622305,
"grad_norm": 0.00801966805011034,
"learning_rate": 3.2703121745770947e-06,
"step": 40950
},
{
"embedding_loss": 0.0002,
"epoch": 4.26949911485994,
"grad_norm": 0.008358903229236603,
"learning_rate": 3.2471709902112793e-06,
"step": 41000
},
{
"embedding_loss": 0.0001,
"epoch": 4.274705821097574,
"grad_norm": 0.001534903421998024,
"learning_rate": 3.2240298058454634e-06,
"step": 41050
},
{
"embedding_loss": 0.0001,
"epoch": 4.279912527335208,
"grad_norm": 0.0019458031747490168,
"learning_rate": 3.2008886214796475e-06,
"step": 41100
},
{
"embedding_loss": 0.0001,
"epoch": 4.285119233572842,
"grad_norm": 0.004779054783284664,
"learning_rate": 3.1777474371138316e-06,
"step": 41150
},
{
"embedding_loss": 0.0021,
"epoch": 4.290325939810476,
"grad_norm": 0.0033252162393182516,
"learning_rate": 3.1546062527480157e-06,
"step": 41200
},
{
"embedding_loss": 0.0013,
"epoch": 4.29553264604811,
"grad_norm": 0.0036777497734874487,
"learning_rate": 3.1314650683822002e-06,
"step": 41250
},
{
"embedding_loss": 0.001,
"epoch": 4.300739352285744,
"grad_norm": 0.0024451168719679117,
"learning_rate": 3.1083238840163844e-06,
"step": 41300
},
{
"embedding_loss": 0.0009,
"epoch": 4.305946058523378,
"grad_norm": 0.0027761063538491726,
"learning_rate": 3.0851826996505685e-06,
"step": 41350
},
{
"embedding_loss": 0.0001,
"epoch": 4.311152764761012,
"grad_norm": 0.002929074689745903,
"learning_rate": 3.0620415152847526e-06,
"step": 41400
},
{
"embedding_loss": 0.0001,
"epoch": 4.316359470998647,
"grad_norm": 0.26386216282844543,
"learning_rate": 3.0389003309189367e-06,
"step": 41450
},
{
"embedding_loss": 0.0007,
"epoch": 4.321566177236281,
"grad_norm": 0.004391273949295282,
"learning_rate": 3.0157591465531212e-06,
"step": 41500
},
{
"embedding_loss": 0.0007,
"epoch": 4.3267728834739145,
"grad_norm": 0.0026139123365283012,
"learning_rate": 2.992617962187305e-06,
"step": 41550
},
{
"embedding_loss": 0.0003,
"epoch": 4.3319795897115485,
"grad_norm": 0.03610287979245186,
"learning_rate": 2.969476777821489e-06,
"step": 41600
},
{
"embedding_loss": 0.0007,
"epoch": 4.3371862959491825,
"grad_norm": 0.0036759632639586926,
"learning_rate": 2.946335593455673e-06,
"step": 41650
},
{
"embedding_loss": 0.0001,
"epoch": 4.3423930021868165,
"grad_norm": 0.004986033774912357,
"learning_rate": 2.9231944090898572e-06,
"step": 41700
},
{
"embedding_loss": 0.0009,
"epoch": 4.34759970842445,
"grad_norm": 0.0021286620758473873,
"learning_rate": 2.9000532247240414e-06,
"step": 41750
},
{
"embedding_loss": 0.0013,
"epoch": 4.352806414662084,
"grad_norm": 0.0035934702027589083,
"learning_rate": 2.876912040358226e-06,
"step": 41800
},
{
"embedding_loss": 0.0001,
"epoch": 4.358013120899718,
"grad_norm": 0.0023505541030317545,
"learning_rate": 2.85377085599241e-06,
"step": 41850
},
{
"embedding_loss": 0.0019,
"epoch": 4.363219827137353,
"grad_norm": 0.002859236905351281,
"learning_rate": 2.830629671626594e-06,
"step": 41900
},
{
"embedding_loss": 0.0002,
"epoch": 4.368426533374987,
"grad_norm": 0.019494347274303436,
"learning_rate": 2.8074884872607782e-06,
"step": 41950
},
{
"embedding_loss": 0.0002,
"epoch": 4.373633239612621,
"grad_norm": 0.0020093335770070553,
"learning_rate": 2.7843473028949623e-06,
"step": 42000
},
{
"embedding_loss": 0.0006,
"epoch": 4.378839945850255,
"grad_norm": 0.003382645780220628,
"learning_rate": 2.761206118529147e-06,
"step": 42050
},
{
"embedding_loss": 0.0007,
"epoch": 4.384046652087889,
"grad_norm": 0.0038147750310599804,
"learning_rate": 2.738064934163331e-06,
"step": 42100
},
{
"embedding_loss": 0.0001,
"epoch": 4.389253358325523,
"grad_norm": 0.014739004895091057,
"learning_rate": 2.714923749797515e-06,
"step": 42150
},
{
"embedding_loss": 0.0006,
"epoch": 4.394460064563157,
"grad_norm": 0.003537252312526107,
"learning_rate": 2.691782565431699e-06,
"step": 42200
},
{
"embedding_loss": 0.0024,
"epoch": 4.399666770800791,
"grad_norm": 0.005448461975902319,
"learning_rate": 2.6686413810658833e-06,
"step": 42250
},
{
"embedding_loss": 0.0007,
"epoch": 4.404873477038425,
"grad_norm": 0.004431063774973154,
"learning_rate": 2.645500196700067e-06,
"step": 42300
},
{
"embedding_loss": 0.0008,
"epoch": 4.41008018327606,
"grad_norm": 0.08171793073415756,
"learning_rate": 2.622359012334251e-06,
"step": 42350
},
{
"embedding_loss": 0.0014,
"epoch": 4.415286889513694,
"grad_norm": 3.030642509460449,
"learning_rate": 2.5992178279684356e-06,
"step": 42400
},
{
"embedding_loss": 0.0001,
"epoch": 4.420493595751328,
"grad_norm": 0.00229825172573328,
"learning_rate": 2.5760766436026198e-06,
"step": 42450
},
{
"embedding_loss": 0.0007,
"epoch": 4.425700301988962,
"grad_norm": 0.0022334696259349585,
"learning_rate": 2.552935459236804e-06,
"step": 42500
},
{
"embedding_loss": 0.0001,
"epoch": 4.430907008226596,
"grad_norm": 0.006273935548961163,
"learning_rate": 2.529794274870988e-06,
"step": 42550
},
{
"embedding_loss": 0.0011,
"epoch": 4.43611371446423,
"grad_norm": 0.002443622797727585,
"learning_rate": 2.506653090505172e-06,
"step": 42600
},
{
"embedding_loss": 0.0015,
"epoch": 4.441320420701864,
"grad_norm": 0.007955342531204224,
"learning_rate": 2.4835119061393566e-06,
"step": 42650
},
{
"embedding_loss": 0.0008,
"epoch": 4.446527126939498,
"grad_norm": 0.005112164653837681,
"learning_rate": 2.4603707217735407e-06,
"step": 42700
},
{
"embedding_loss": 0.0001,
"epoch": 4.451733833177133,
"grad_norm": 0.007230122108012438,
"learning_rate": 2.437229537407725e-06,
"step": 42750
},
{
"embedding_loss": 0.0006,
"epoch": 4.4569405394147665,
"grad_norm": 0.0016728178597986698,
"learning_rate": 2.414088353041909e-06,
"step": 42800
},
{
"embedding_loss": 0.0004,
"epoch": 4.4621472456524005,
"grad_norm": 0.003500057151541114,
"learning_rate": 2.390947168676093e-06,
"step": 42850
},
{
"embedding_loss": 0.0008,
"epoch": 4.4673539518900345,
"grad_norm": 0.0011205794289708138,
"learning_rate": 2.367805984310277e-06,
"step": 42900
},
{
"embedding_loss": 0.0017,
"epoch": 4.4725606581276685,
"grad_norm": 0.0030274472665041685,
"learning_rate": 2.3446647999444613e-06,
"step": 42950
},
{
"embedding_loss": 0.0001,
"epoch": 4.477767364365302,
"grad_norm": 0.0006662325467914343,
"learning_rate": 2.3215236155786454e-06,
"step": 43000
},
{
"embedding_loss": 0.0005,
"epoch": 4.482974070602936,
"grad_norm": 0.003703465685248375,
"learning_rate": 2.2983824312128295e-06,
"step": 43050
},
{
"embedding_loss": 0.0012,
"epoch": 4.48818077684057,
"grad_norm": 0.0016818788135424256,
"learning_rate": 2.275241246847014e-06,
"step": 43100
},
{
"embedding_loss": 0.0002,
"epoch": 4.493387483078204,
"grad_norm": 0.020755505189299583,
"learning_rate": 2.2521000624811977e-06,
"step": 43150
},
{
"embedding_loss": 0.0014,
"epoch": 4.498594189315839,
"grad_norm": 0.0026131754275411367,
"learning_rate": 2.228958878115382e-06,
"step": 43200
},
{
"embedding_loss": 0.0007,
"epoch": 4.503800895553473,
"grad_norm": 0.010769600979983807,
"learning_rate": 2.2058176937495664e-06,
"step": 43250
},
{
"embedding_loss": 0.0003,
"epoch": 4.509007601791107,
"grad_norm": 0.0036240960471332073,
"learning_rate": 2.1826765093837505e-06,
"step": 43300
},
{
"embedding_loss": 0.0026,
"epoch": 4.514214308028741,
"grad_norm": 0.0036824876442551613,
"learning_rate": 2.1595353250179346e-06,
"step": 43350
},
{
"embedding_loss": 0.0001,
"epoch": 4.519421014266375,
"grad_norm": 0.005997397005558014,
"learning_rate": 2.1363941406521187e-06,
"step": 43400
},
{
"embedding_loss": 0.0024,
"epoch": 4.524627720504009,
"grad_norm": 0.0047904313541948795,
"learning_rate": 2.113252956286303e-06,
"step": 43450
},
{
"embedding_loss": 0.0001,
"epoch": 4.529834426741643,
"grad_norm": 0.01296373549848795,
"learning_rate": 2.0901117719204874e-06,
"step": 43500
},
{
"embedding_loss": 0.0001,
"epoch": 4.535041132979277,
"grad_norm": 0.0019739430863410234,
"learning_rate": 2.066970587554671e-06,
"step": 43550
},
{
"embedding_loss": 0.0001,
"epoch": 4.540247839216911,
"grad_norm": 0.003413254162296653,
"learning_rate": 2.043829403188855e-06,
"step": 43600
},
{
"embedding_loss": 0.0015,
"epoch": 4.545454545454545,
"grad_norm": 0.005623187869787216,
"learning_rate": 2.0206882188230397e-06,
"step": 43650
},
{
"embedding_loss": 0.0007,
"epoch": 4.55066125169218,
"grad_norm": 0.002288981107994914,
"learning_rate": 1.997547034457224e-06,
"step": 43700
},
{
"embedding_loss": 0.0013,
"epoch": 4.555867957929814,
"grad_norm": 0.0009039235883392394,
"learning_rate": 1.974405850091408e-06,
"step": 43750
},
{
"embedding_loss": 0.0018,
"epoch": 4.561074664167448,
"grad_norm": 0.004320364445447922,
"learning_rate": 1.951264665725592e-06,
"step": 43800
},
{
"embedding_loss": 0.0001,
"epoch": 4.566281370405082,
"grad_norm": 0.0032452233135700226,
"learning_rate": 1.928123481359776e-06,
"step": 43850
},
{
"embedding_loss": 0.0014,
"epoch": 4.571488076642716,
"grad_norm": 0.0020324711222201586,
"learning_rate": 1.9049822969939603e-06,
"step": 43900
},
{
"embedding_loss": 0.001,
"epoch": 4.57669478288035,
"grad_norm": 0.0033108368515968323,
"learning_rate": 1.8818411126281444e-06,
"step": 43950
},
{
"embedding_loss": 0.0005,
"epoch": 4.581901489117984,
"grad_norm": 0.002229843521490693,
"learning_rate": 1.8586999282623287e-06,
"step": 44000
},
{
"embedding_loss": 0.0001,
"epoch": 4.5871081953556185,
"grad_norm": 0.0032805639784783125,
"learning_rate": 1.8355587438965128e-06,
"step": 44050
},
{
"embedding_loss": 0.0008,
"epoch": 4.592314901593252,
"grad_norm": 0.001333653461188078,
"learning_rate": 1.812417559530697e-06,
"step": 44100
},
{
"embedding_loss": 0.0007,
"epoch": 4.5975216078308865,
"grad_norm": 0.3373894691467285,
"learning_rate": 1.7892763751648812e-06,
"step": 44150
},
{
"embedding_loss": 0.0016,
"epoch": 4.6027283140685205,
"grad_norm": 0.0035874065943062305,
"learning_rate": 1.7661351907990653e-06,
"step": 44200
},
{
"embedding_loss": 0.0004,
"epoch": 4.607935020306154,
"grad_norm": 0.001685873605310917,
"learning_rate": 1.7429940064332492e-06,
"step": 44250
},
{
"embedding_loss": 0.0001,
"epoch": 4.613141726543788,
"grad_norm": 0.0019293057266622782,
"learning_rate": 1.7198528220674336e-06,
"step": 44300
},
{
"embedding_loss": 0.0001,
"epoch": 4.618348432781422,
"grad_norm": 0.017738085240125656,
"learning_rate": 1.6967116377016177e-06,
"step": 44350
},
{
"embedding_loss": 0.0001,
"epoch": 4.623555139019056,
"grad_norm": 0.00891903880983591,
"learning_rate": 1.6735704533358018e-06,
"step": 44400
},
{
"embedding_loss": 0.0013,
"epoch": 4.62876184525669,
"grad_norm": 0.001540567958727479,
"learning_rate": 1.6504292689699861e-06,
"step": 44450
},
{
"embedding_loss": 0.0002,
"epoch": 4.633968551494325,
"grad_norm": 0.03680149465799332,
"learning_rate": 1.6272880846041702e-06,
"step": 44500
},
{
"embedding_loss": 0.0001,
"epoch": 4.639175257731958,
"grad_norm": 0.0019971744623035192,
"learning_rate": 1.6041469002383545e-06,
"step": 44550
},
{
"embedding_loss": 0.0001,
"epoch": 4.644381963969593,
"grad_norm": 0.004468118771910667,
"learning_rate": 1.5810057158725384e-06,
"step": 44600
},
{
"embedding_loss": 0.0008,
"epoch": 4.649588670207227,
"grad_norm": 0.028531698510050774,
"learning_rate": 1.5578645315067226e-06,
"step": 44650
},
{
"embedding_loss": 0.0001,
"epoch": 4.654795376444861,
"grad_norm": 0.003770474810153246,
"learning_rate": 1.5347233471409067e-06,
"step": 44700
},
{
"embedding_loss": 0.0013,
"epoch": 4.660002082682495,
"grad_norm": 0.0017020407831296325,
"learning_rate": 1.511582162775091e-06,
"step": 44750
},
{
"embedding_loss": 0.001,
"epoch": 4.665208788920129,
"grad_norm": 0.0020539036486297846,
"learning_rate": 1.488440978409275e-06,
"step": 44800
},
{
"embedding_loss": 0.0009,
"epoch": 4.670415495157763,
"grad_norm": 0.0035052604507654905,
"learning_rate": 1.4652997940434594e-06,
"step": 44850
},
{
"embedding_loss": 0.0001,
"epoch": 4.675622201395397,
"grad_norm": 0.003664996474981308,
"learning_rate": 1.4421586096776435e-06,
"step": 44900
},
{
"embedding_loss": 0.0,
"epoch": 4.680828907633032,
"grad_norm": 0.002150058513507247,
"learning_rate": 1.4190174253118274e-06,
"step": 44950
},
{
"embedding_loss": 0.0007,
"epoch": 4.686035613870665,
"grad_norm": 0.0027224977966398,
"learning_rate": 1.3958762409460115e-06,
"step": 45000
},
{
"embedding_loss": 0.0009,
"epoch": 4.6912423201083,
"grad_norm": 0.002212725579738617,
"learning_rate": 1.3727350565801959e-06,
"step": 45050
},
{
"embedding_loss": 0.0007,
"epoch": 4.696449026345934,
"grad_norm": 0.0020792309660464525,
"learning_rate": 1.34959387221438e-06,
"step": 45100
},
{
"embedding_loss": 0.0001,
"epoch": 4.701655732583568,
"grad_norm": 0.0052679735235869884,
"learning_rate": 1.3264526878485643e-06,
"step": 45150
},
{
"embedding_loss": 0.001,
"epoch": 4.706862438821202,
"grad_norm": 0.0041116694919764996,
"learning_rate": 1.3033115034827484e-06,
"step": 45200
},
{
"embedding_loss": 0.0003,
"epoch": 4.712069145058836,
"grad_norm": 0.003463329281657934,
"learning_rate": 1.2801703191169325e-06,
"step": 45250
},
{
"embedding_loss": 0.0007,
"epoch": 4.71727585129647,
"grad_norm": 0.0025421089958399534,
"learning_rate": 1.2570291347511166e-06,
"step": 45300
},
{
"embedding_loss": 0.001,
"epoch": 4.722482557534104,
"grad_norm": 0.0016696372767910361,
"learning_rate": 1.233887950385301e-06,
"step": 45350
},
{
"embedding_loss": 0.0013,
"epoch": 4.7276892637717385,
"grad_norm": 0.005751196760684252,
"learning_rate": 1.2107467660194849e-06,
"step": 45400
},
{
"embedding_loss": 0.0019,
"epoch": 4.7328959700093725,
"grad_norm": 0.013020163401961327,
"learning_rate": 1.1876055816536692e-06,
"step": 45450
},
{
"embedding_loss": 0.0009,
"epoch": 4.738102676247006,
"grad_norm": 0.004354926757514477,
"learning_rate": 1.1644643972878533e-06,
"step": 45500
},
{
"embedding_loss": 0.0009,
"epoch": 4.74330938248464,
"grad_norm": 0.003605367848649621,
"learning_rate": 1.1413232129220374e-06,
"step": 45550
},
{
"embedding_loss": 0.001,
"epoch": 4.748516088722274,
"grad_norm": 0.0030561047606170177,
"learning_rate": 1.1181820285562215e-06,
"step": 45600
},
{
"embedding_loss": 0.0,
"epoch": 4.753722794959908,
"grad_norm": 0.007909784093499184,
"learning_rate": 1.0950408441904058e-06,
"step": 45650
},
{
"embedding_loss": 0.0007,
"epoch": 4.758929501197542,
"grad_norm": 0.002514626132324338,
"learning_rate": 1.07189965982459e-06,
"step": 45700
},
{
"embedding_loss": 0.0008,
"epoch": 4.764136207435176,
"grad_norm": 0.0016800053417682648,
"learning_rate": 1.048758475458774e-06,
"step": 45750
},
{
"embedding_loss": 0.0,
"epoch": 4.76934291367281,
"grad_norm": 0.004000342451035976,
"learning_rate": 1.0256172910929582e-06,
"step": 45800
},
{
"embedding_loss": 0.0002,
"epoch": 4.774549619910445,
"grad_norm": 0.001277065253816545,
"learning_rate": 1.0024761067271425e-06,
"step": 45850
},
{
"embedding_loss": 0.0,
"epoch": 4.779756326148079,
"grad_norm": 0.004461308475583792,
"learning_rate": 9.793349223613266e-07,
"step": 45900
},
{
"embedding_loss": 0.0004,
"epoch": 4.784963032385713,
"grad_norm": 0.0651107132434845,
"learning_rate": 9.561937379955107e-07,
"step": 45950
},
{
"embedding_loss": 0.0007,
"epoch": 4.790169738623347,
"grad_norm": 0.0018568108789622784,
"learning_rate": 9.330525536296948e-07,
"step": 46000
},
{
"embedding_loss": 0.0007,
"epoch": 4.795376444860981,
"grad_norm": 0.004890389274805784,
"learning_rate": 9.09911369263879e-07,
"step": 46050
},
{
"embedding_loss": 0.0007,
"epoch": 4.800583151098615,
"grad_norm": 0.002689856104552746,
"learning_rate": 8.867701848980631e-07,
"step": 46100
},
{
"embedding_loss": 0.0,
"epoch": 4.805789857336249,
"grad_norm": 0.003079883521422744,
"learning_rate": 8.636290005322473e-07,
"step": 46150
},
{
"embedding_loss": 0.0005,
"epoch": 4.810996563573883,
"grad_norm": 0.0018577250884845853,
"learning_rate": 8.404878161664315e-07,
"step": 46200
},
{
"embedding_loss": 0.0001,
"epoch": 4.816203269811517,
"grad_norm": 0.004618423525243998,
"learning_rate": 8.173466318006157e-07,
"step": 46250
},
{
"embedding_loss": 0.0008,
"epoch": 4.821409976049152,
"grad_norm": 0.004892790224403143,
"learning_rate": 7.942054474347997e-07,
"step": 46300
},
{
"embedding_loss": 0.001,
"epoch": 4.826616682286786,
"grad_norm": 0.003912623040378094,
"learning_rate": 7.710642630689839e-07,
"step": 46350
},
{
"embedding_loss": 0.0007,
"epoch": 4.83182338852442,
"grad_norm": 0.007159634493291378,
"learning_rate": 7.479230787031681e-07,
"step": 46400
},
{
"embedding_loss": 0.0001,
"epoch": 4.837030094762054,
"grad_norm": 0.0023596896789968014,
"learning_rate": 7.247818943373522e-07,
"step": 46450
},
{
"embedding_loss": 0.0008,
"epoch": 4.842236800999688,
"grad_norm": 0.010279769077897072,
"learning_rate": 7.016407099715364e-07,
"step": 46500
},
{
"embedding_loss": 0.0002,
"epoch": 4.847443507237322,
"grad_norm": 0.008691814728081226,
"learning_rate": 6.784995256057206e-07,
"step": 46550
},
{
"embedding_loss": 0.0001,
"epoch": 4.852650213474956,
"grad_norm": 0.003329548519104719,
"learning_rate": 6.553583412399048e-07,
"step": 46600
},
{
"embedding_loss": 0.0009,
"epoch": 4.85785691971259,
"grad_norm": 0.004583888687193394,
"learning_rate": 6.322171568740888e-07,
"step": 46650
},
{
"embedding_loss": 0.0001,
"epoch": 4.863063625950224,
"grad_norm": 0.02980988658964634,
"learning_rate": 6.09075972508273e-07,
"step": 46700
},
{
"embedding_loss": 0.0014,
"epoch": 4.868270332187858,
"grad_norm": 0.007974829524755478,
"learning_rate": 5.859347881424571e-07,
"step": 46750
},
{
"embedding_loss": 0.0005,
"epoch": 4.873477038425492,
"grad_norm": 0.0035474197939038277,
"learning_rate": 5.627936037766414e-07,
"step": 46800
},
{
"embedding_loss": 0.0001,
"epoch": 4.878683744663126,
"grad_norm": 0.010695052333176136,
"learning_rate": 5.396524194108255e-07,
"step": 46850
},
{
"embedding_loss": 0.0001,
"epoch": 4.88389045090076,
"grad_norm": 0.002433580346405506,
"learning_rate": 5.165112350450097e-07,
"step": 46900
},
{
"embedding_loss": 0.0,
"epoch": 4.889097157138394,
"grad_norm": 0.003585429862141609,
"learning_rate": 4.933700506791938e-07,
"step": 46950
},
{
"embedding_loss": 0.001,
"epoch": 4.894303863376028,
"grad_norm": 0.31530049443244934,
"learning_rate": 4.70228866313378e-07,
"step": 47000
},
{
"embedding_loss": 0.001,
"epoch": 4.899510569613662,
"grad_norm": 0.0049338992685079575,
"learning_rate": 4.470876819475621e-07,
"step": 47050
},
{
"embedding_loss": 0.0013,
"epoch": 4.904717275851296,
"grad_norm": 0.00397633807733655,
"learning_rate": 4.239464975817463e-07,
"step": 47100
},
{
"embedding_loss": 0.0002,
"epoch": 4.90992398208893,
"grad_norm": 0.0035032695159316063,
"learning_rate": 4.0080531321593045e-07,
"step": 47150
},
{
"embedding_loss": 0.0001,
"epoch": 4.915130688326565,
"grad_norm": 0.0034136222675442696,
"learning_rate": 3.7766412885011456e-07,
"step": 47200
},
{
"embedding_loss": 0.0002,
"epoch": 4.920337394564199,
"grad_norm": 0.20558778941631317,
"learning_rate": 3.545229444842987e-07,
"step": 47250
},
{
"embedding_loss": 0.0018,
"epoch": 4.925544100801833,
"grad_norm": 0.010641155764460564,
"learning_rate": 3.313817601184829e-07,
"step": 47300
},
{
"embedding_loss": 0.0001,
"epoch": 4.930750807039467,
"grad_norm": 0.0028619503136724234,
"learning_rate": 3.0824057575266705e-07,
"step": 47350
},
{
"embedding_loss": 0.0001,
"epoch": 4.935957513277101,
"grad_norm": 0.001628124387934804,
"learning_rate": 2.850993913868512e-07,
"step": 47400
},
{
"embedding_loss": 0.0005,
"epoch": 4.941164219514735,
"grad_norm": 0.0017882351530715823,
"learning_rate": 2.6195820702103533e-07,
"step": 47450
},
{
"embedding_loss": 0.0008,
"epoch": 4.946370925752369,
"grad_norm": 0.00220202817581594,
"learning_rate": 2.388170226552195e-07,
"step": 47500
},
{
"embedding_loss": 0.0013,
"epoch": 4.951577631990003,
"grad_norm": 0.001214580493979156,
"learning_rate": 2.1567583828940368e-07,
"step": 47550
},
{
"embedding_loss": 0.0007,
"epoch": 4.956784338227637,
"grad_norm": 0.0018583645578473806,
"learning_rate": 1.9253465392358785e-07,
"step": 47600
},
{
"embedding_loss": 0.0004,
"epoch": 4.961991044465272,
"grad_norm": 0.005768468137830496,
"learning_rate": 1.6939346955777198e-07,
"step": 47650
},
{
"embedding_loss": 0.0003,
"epoch": 4.967197750702906,
"grad_norm": 0.0034024049527943134,
"learning_rate": 1.4625228519195615e-07,
"step": 47700
},
{
"embedding_loss": 0.0022,
"epoch": 4.97240445694054,
"grad_norm": 0.001480701263062656,
"learning_rate": 1.2311110082614029e-07,
"step": 47750
},
{
"embedding_loss": 0.0008,
"epoch": 4.977611163178174,
"grad_norm": 0.0032351568806916475,
"learning_rate": 9.996991646032445e-08,
"step": 47800
},
{
"embedding_loss": 0.0006,
"epoch": 4.982817869415808,
"grad_norm": 0.03766478970646858,
"learning_rate": 7.68287320945086e-08,
"step": 47850
},
{
"embedding_loss": 0.0,
"epoch": 4.988024575653442,
"grad_norm": 0.0029770000837743282,
"learning_rate": 5.368754772869276e-08,
"step": 47900
},
{
"embedding_loss": 0.0007,
"epoch": 4.993231281891076,
"grad_norm": 0.001190900569781661,
"learning_rate": 3.0546363362876916e-08,
"step": 47950
},
{
"embedding_loss": 0.0001,
"epoch": 4.9984379881287095,
"grad_norm": 0.019358443096280098,
"learning_rate": 7.40517899706107e-09,
"step": 48000
}
],
"logging_steps": 50,
"max_steps": 48015,
"num_input_tokens_seen": 0,
"num_train_epochs": 5,
"save_steps": 500,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": true
},
"attributes": {}
}
},
"total_flos": 0.0,
"train_batch_size": 32,
"trial_name": null,
"trial_params": null
}