{ "best_global_step": null, "best_metric": null, "best_model_checkpoint": null, "epoch": 5.0, "eval_steps": 500, "global_step": 48015, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "embedding_loss": 0.2883, "epoch": 0.00010413412475268145, "grad_norm": 1.762959361076355, "learning_rate": 0.0, "step": 1 }, { "embedding_loss": 0.2973, "epoch": 0.005206706237634072, "grad_norm": 2.416536569595337, "learning_rate": 2.0408163265306121e-07, "step": 50 }, { "embedding_loss": 0.2757, "epoch": 0.010413412475268145, "grad_norm": 1.1051920652389526, "learning_rate": 4.1232819658475635e-07, "step": 100 }, { "embedding_loss": 0.2678, "epoch": 0.015620118712902219, "grad_norm": 1.2399098873138428, "learning_rate": 6.205747605164515e-07, "step": 150 }, { "embedding_loss": 0.2554, "epoch": 0.02082682495053629, "grad_norm": 0.9736790657043457, "learning_rate": 8.288213244481466e-07, "step": 200 }, { "embedding_loss": 0.2485, "epoch": 0.026033531188170363, "grad_norm": 0.9768863916397095, "learning_rate": 1.037067888379842e-06, "step": 250 }, { "embedding_loss": 0.2472, "epoch": 0.031240237425804437, "grad_norm": 1.2124693393707275, "learning_rate": 1.2453144523115369e-06, "step": 300 }, { "embedding_loss": 0.2309, "epoch": 0.03644694366343851, "grad_norm": 1.0609492063522339, "learning_rate": 1.453561016243232e-06, "step": 350 }, { "embedding_loss": 0.225, "epoch": 0.04165364990107258, "grad_norm": 0.9569733142852783, "learning_rate": 1.6618075801749272e-06, "step": 400 }, { "embedding_loss": 0.2123, "epoch": 0.046860356138706656, "grad_norm": 1.1237151622772217, "learning_rate": 1.8700541441066226e-06, "step": 450 }, { "embedding_loss": 0.2045, "epoch": 0.05206706237634073, "grad_norm": 1.0526118278503418, "learning_rate": 2.0783007080383173e-06, "step": 500 }, { "embedding_loss": 0.1934, "epoch": 0.0572737686139748, "grad_norm": 1.1583402156829834, "learning_rate": 2.2865472719700125e-06, "step": 550 }, { "embedding_loss": 0.184, "epoch": 0.062480474851608875, "grad_norm": 1.313284158706665, "learning_rate": 2.494793835901708e-06, "step": 600 }, { "embedding_loss": 0.183, "epoch": 0.06768718108924295, "grad_norm": 1.2073508501052856, "learning_rate": 2.7030403998334032e-06, "step": 650 }, { "embedding_loss": 0.171, "epoch": 0.07289388732687702, "grad_norm": 0.9187403321266174, "learning_rate": 2.911286963765098e-06, "step": 700 }, { "embedding_loss": 0.1752, "epoch": 0.07810059356451109, "grad_norm": 1.049507737159729, "learning_rate": 3.119533527696793e-06, "step": 750 }, { "embedding_loss": 0.1627, "epoch": 0.08330729980214516, "grad_norm": 1.696567416191101, "learning_rate": 3.3277800916284887e-06, "step": 800 }, { "embedding_loss": 0.1623, "epoch": 0.08851400603977924, "grad_norm": 1.0722424983978271, "learning_rate": 3.5360266555601835e-06, "step": 850 }, { "embedding_loss": 0.1557, "epoch": 0.09372071227741331, "grad_norm": 0.8424978256225586, "learning_rate": 3.7442732194918786e-06, "step": 900 }, { "embedding_loss": 0.151, "epoch": 0.09892741851504738, "grad_norm": 1.0043538808822632, "learning_rate": 3.952519783423574e-06, "step": 950 }, { "embedding_loss": 0.1424, "epoch": 0.10413412475268145, "grad_norm": 0.910914957523346, "learning_rate": 4.160766347355269e-06, "step": 1000 }, { "embedding_loss": 0.1347, "epoch": 0.10934083099031552, "grad_norm": 1.1411229372024536, "learning_rate": 4.369012911286964e-06, "step": 1050 }, { "embedding_loss": 0.1342, "epoch": 0.1145475372279496, "grad_norm": 1.1177036762237549, "learning_rate": 4.57725947521866e-06, "step": 1100 }, { "embedding_loss": 0.1282, "epoch": 0.11975424346558367, "grad_norm": 1.1329469680786133, "learning_rate": 4.785506039150354e-06, "step": 1150 }, { "embedding_loss": 0.1309, "epoch": 0.12496094970321775, "grad_norm": 1.26534104347229, "learning_rate": 4.993752603082049e-06, "step": 1200 }, { "embedding_loss": 0.1198, "epoch": 0.1301676559408518, "grad_norm": 0.8742411136627197, "learning_rate": 5.201999167013745e-06, "step": 1250 }, { "embedding_loss": 0.1182, "epoch": 0.1353743621784859, "grad_norm": 1.0818109512329102, "learning_rate": 5.41024573094544e-06, "step": 1300 }, { "embedding_loss": 0.1099, "epoch": 0.14058106841611998, "grad_norm": 0.9232504963874817, "learning_rate": 5.618492294877135e-06, "step": 1350 }, { "embedding_loss": 0.1019, "epoch": 0.14578777465375403, "grad_norm": 1.013424277305603, "learning_rate": 5.826738858808831e-06, "step": 1400 }, { "embedding_loss": 0.0982, "epoch": 0.15099448089138812, "grad_norm": 1.4098010063171387, "learning_rate": 6.034985422740526e-06, "step": 1450 }, { "embedding_loss": 0.1038, "epoch": 0.15620118712902217, "grad_norm": 1.015489935874939, "learning_rate": 6.24323198667222e-06, "step": 1500 }, { "embedding_loss": 0.1064, "epoch": 0.16140789336665626, "grad_norm": 1.4240305423736572, "learning_rate": 6.451478550603915e-06, "step": 1550 }, { "embedding_loss": 0.1007, "epoch": 0.16661459960429031, "grad_norm": 1.1620804071426392, "learning_rate": 6.659725114535611e-06, "step": 1600 }, { "embedding_loss": 0.0899, "epoch": 0.1718213058419244, "grad_norm": 2.043187379837036, "learning_rate": 6.867971678467306e-06, "step": 1650 }, { "embedding_loss": 0.1019, "epoch": 0.17702801207955848, "grad_norm": 1.0461031198501587, "learning_rate": 7.076218242399001e-06, "step": 1700 }, { "embedding_loss": 0.0954, "epoch": 0.18223471831719254, "grad_norm": 1.0107996463775635, "learning_rate": 7.284464806330697e-06, "step": 1750 }, { "embedding_loss": 0.0799, "epoch": 0.18744142455482662, "grad_norm": 1.0582354068756104, "learning_rate": 7.492711370262391e-06, "step": 1800 }, { "embedding_loss": 0.0864, "epoch": 0.19264813079246068, "grad_norm": 1.3842886686325073, "learning_rate": 7.700957934194086e-06, "step": 1850 }, { "embedding_loss": 0.0863, "epoch": 0.19785483703009477, "grad_norm": 1.2879295349121094, "learning_rate": 7.909204498125781e-06, "step": 1900 }, { "embedding_loss": 0.0823, "epoch": 0.20306154326772882, "grad_norm": 1.2666908502578735, "learning_rate": 8.117451062057477e-06, "step": 1950 }, { "embedding_loss": 0.083, "epoch": 0.2082682495053629, "grad_norm": 1.3312140703201294, "learning_rate": 8.325697625989172e-06, "step": 2000 }, { "embedding_loss": 0.0887, "epoch": 0.213474955742997, "grad_norm": 0.9761432409286499, "learning_rate": 8.533944189920867e-06, "step": 2050 }, { "embedding_loss": 0.0796, "epoch": 0.21868166198063105, "grad_norm": 1.1199424266815186, "learning_rate": 8.742190753852562e-06, "step": 2100 }, { "embedding_loss": 0.0827, "epoch": 0.22388836821826513, "grad_norm": 1.4127182960510254, "learning_rate": 8.950437317784257e-06, "step": 2150 }, { "embedding_loss": 0.0745, "epoch": 0.2290950744558992, "grad_norm": 2.0330944061279297, "learning_rate": 9.158683881715952e-06, "step": 2200 }, { "embedding_loss": 0.0752, "epoch": 0.23430178069353327, "grad_norm": 1.4823620319366455, "learning_rate": 9.366930445647648e-06, "step": 2250 }, { "embedding_loss": 0.0676, "epoch": 0.23950848693116733, "grad_norm": 1.2921267747879028, "learning_rate": 9.575177009579343e-06, "step": 2300 }, { "embedding_loss": 0.0616, "epoch": 0.24471519316880141, "grad_norm": 1.8864023685455322, "learning_rate": 9.783423573511038e-06, "step": 2350 }, { "embedding_loss": 0.0661, "epoch": 0.2499218994064355, "grad_norm": 1.3003889322280884, "learning_rate": 9.991670137442733e-06, "step": 2400 }, { "embedding_loss": 0.0587, "epoch": 0.2551286056440696, "grad_norm": 1.0105185508728027, "learning_rate": 1.0199916701374428e-05, "step": 2450 }, { "embedding_loss": 0.0576, "epoch": 0.2603353118817036, "grad_norm": 1.3157322406768799, "learning_rate": 1.0408163265306123e-05, "step": 2500 }, { "embedding_loss": 0.0548, "epoch": 0.2655420181193377, "grad_norm": 1.5181084871292114, "learning_rate": 1.0616409829237819e-05, "step": 2550 }, { "embedding_loss": 0.0549, "epoch": 0.2707487243569718, "grad_norm": 0.9998575448989868, "learning_rate": 1.0824656393169512e-05, "step": 2600 }, { "embedding_loss": 0.0542, "epoch": 0.27595543059460587, "grad_norm": 1.8933945894241333, "learning_rate": 1.1032902957101209e-05, "step": 2650 }, { "embedding_loss": 0.0551, "epoch": 0.28116213683223995, "grad_norm": 0.8569897413253784, "learning_rate": 1.1241149521032904e-05, "step": 2700 }, { "embedding_loss": 0.0535, "epoch": 0.286368843069874, "grad_norm": 2.692810297012329, "learning_rate": 1.14493960849646e-05, "step": 2750 }, { "embedding_loss": 0.0512, "epoch": 0.29157554930750806, "grad_norm": 1.0588935613632202, "learning_rate": 1.1657642648896294e-05, "step": 2800 }, { "embedding_loss": 0.0471, "epoch": 0.29678225554514215, "grad_norm": 1.4624029397964478, "learning_rate": 1.186588921282799e-05, "step": 2850 }, { "embedding_loss": 0.0418, "epoch": 0.30198896178277623, "grad_norm": 1.4698013067245483, "learning_rate": 1.2074135776759683e-05, "step": 2900 }, { "embedding_loss": 0.0456, "epoch": 0.30719566802041026, "grad_norm": 2.7604193687438965, "learning_rate": 1.2282382340691378e-05, "step": 2950 }, { "embedding_loss": 0.0426, "epoch": 0.31240237425804435, "grad_norm": 1.8741382360458374, "learning_rate": 1.2490628904623075e-05, "step": 3000 }, { "embedding_loss": 0.0381, "epoch": 0.31760908049567843, "grad_norm": 1.6704306602478027, "learning_rate": 1.269887546855477e-05, "step": 3050 }, { "embedding_loss": 0.0476, "epoch": 0.3228157867333125, "grad_norm": 1.6957885026931763, "learning_rate": 1.2907122032486465e-05, "step": 3100 }, { "embedding_loss": 0.0384, "epoch": 0.3280224929709466, "grad_norm": 1.3916475772857666, "learning_rate": 1.311536859641816e-05, "step": 3150 }, { "embedding_loss": 0.0398, "epoch": 0.33322919920858063, "grad_norm": 1.3985787630081177, "learning_rate": 1.3323615160349854e-05, "step": 3200 }, { "embedding_loss": 0.0383, "epoch": 0.3384359054462147, "grad_norm": 1.0203100442886353, "learning_rate": 1.3531861724281549e-05, "step": 3250 }, { "embedding_loss": 0.0355, "epoch": 0.3436426116838488, "grad_norm": 1.1589559316635132, "learning_rate": 1.3740108288213246e-05, "step": 3300 }, { "embedding_loss": 0.0391, "epoch": 0.3488493179214829, "grad_norm": 0.5909947752952576, "learning_rate": 1.3948354852144941e-05, "step": 3350 }, { "embedding_loss": 0.0376, "epoch": 0.35405602415911697, "grad_norm": 1.096784234046936, "learning_rate": 1.4156601416076636e-05, "step": 3400 }, { "embedding_loss": 0.0372, "epoch": 0.359262730396751, "grad_norm": 2.7773685455322266, "learning_rate": 1.4364847980008331e-05, "step": 3450 }, { "embedding_loss": 0.0354, "epoch": 0.3644694366343851, "grad_norm": 1.3911575078964233, "learning_rate": 1.4573094543940025e-05, "step": 3500 }, { "embedding_loss": 0.0292, "epoch": 0.36967614287201916, "grad_norm": 1.4253603219985962, "learning_rate": 1.478134110787172e-05, "step": 3550 }, { "embedding_loss": 0.0341, "epoch": 0.37488284910965325, "grad_norm": 0.5666287541389465, "learning_rate": 1.4989587671803415e-05, "step": 3600 }, { "embedding_loss": 0.032, "epoch": 0.38008955534728733, "grad_norm": 0.7250155210494995, "learning_rate": 1.5197834235735112e-05, "step": 3650 }, { "embedding_loss": 0.0312, "epoch": 0.38529626158492136, "grad_norm": 1.2066556215286255, "learning_rate": 1.5406080799666807e-05, "step": 3700 }, { "embedding_loss": 0.0286, "epoch": 0.39050296782255545, "grad_norm": 1.999817967414856, "learning_rate": 1.5614327363598502e-05, "step": 3750 }, { "embedding_loss": 0.0303, "epoch": 0.39570967406018953, "grad_norm": 2.4242656230926514, "learning_rate": 1.5822573927530198e-05, "step": 3800 }, { "embedding_loss": 0.0321, "epoch": 0.4009163802978236, "grad_norm": 1.0087485313415527, "learning_rate": 1.6030820491461893e-05, "step": 3850 }, { "embedding_loss": 0.0291, "epoch": 0.40612308653545764, "grad_norm": 0.9711636900901794, "learning_rate": 1.6239067055393588e-05, "step": 3900 }, { "embedding_loss": 0.0267, "epoch": 0.41132979277309173, "grad_norm": 2.3323183059692383, "learning_rate": 1.6447313619325283e-05, "step": 3950 }, { "embedding_loss": 0.0315, "epoch": 0.4165364990107258, "grad_norm": 0.25967147946357727, "learning_rate": 1.6655560183256978e-05, "step": 4000 }, { "embedding_loss": 0.026, "epoch": 0.4217432052483599, "grad_norm": 0.5975779294967651, "learning_rate": 1.6863806747188673e-05, "step": 4050 }, { "embedding_loss": 0.0222, "epoch": 0.426949911485994, "grad_norm": 2.1180572509765625, "learning_rate": 1.707205331112037e-05, "step": 4100 }, { "embedding_loss": 0.0226, "epoch": 0.432156617723628, "grad_norm": 2.647836923599243, "learning_rate": 1.7280299875052064e-05, "step": 4150 }, { "embedding_loss": 0.025, "epoch": 0.4373633239612621, "grad_norm": 1.7642154693603516, "learning_rate": 1.748854643898376e-05, "step": 4200 }, { "embedding_loss": 0.0258, "epoch": 0.4425700301988962, "grad_norm": 0.6594904065132141, "learning_rate": 1.7696793002915454e-05, "step": 4250 }, { "embedding_loss": 0.0227, "epoch": 0.44777673643653026, "grad_norm": 1.1913025379180908, "learning_rate": 1.790503956684715e-05, "step": 4300 }, { "embedding_loss": 0.025, "epoch": 0.45298344267416435, "grad_norm": 2.221813201904297, "learning_rate": 1.8113286130778844e-05, "step": 4350 }, { "embedding_loss": 0.0192, "epoch": 0.4581901489117984, "grad_norm": 0.7045194506645203, "learning_rate": 1.832153269471054e-05, "step": 4400 }, { "embedding_loss": 0.0214, "epoch": 0.46339685514943246, "grad_norm": 1.9471172094345093, "learning_rate": 1.8529779258642235e-05, "step": 4450 }, { "embedding_loss": 0.0232, "epoch": 0.46860356138706655, "grad_norm": 0.8062028288841248, "learning_rate": 1.873802582257393e-05, "step": 4500 }, { "embedding_loss": 0.0201, "epoch": 0.47381026762470063, "grad_norm": 0.2602122724056244, "learning_rate": 1.8946272386505625e-05, "step": 4550 }, { "embedding_loss": 0.0234, "epoch": 0.47901697386233466, "grad_norm": 0.2223815619945526, "learning_rate": 1.915451895043732e-05, "step": 4600 }, { "embedding_loss": 0.0206, "epoch": 0.48422368009996875, "grad_norm": 1.995078682899475, "learning_rate": 1.9362765514369015e-05, "step": 4650 }, { "embedding_loss": 0.0228, "epoch": 0.48943038633760283, "grad_norm": 1.3003852367401123, "learning_rate": 1.957101207830071e-05, "step": 4700 }, { "embedding_loss": 0.0194, "epoch": 0.4946370925752369, "grad_norm": 0.10561434924602509, "learning_rate": 1.9779258642232402e-05, "step": 4750 }, { "embedding_loss": 0.0197, "epoch": 0.499843798812871, "grad_norm": 0.3040192425251007, "learning_rate": 1.99875052061641e-05, "step": 4800 }, { "embedding_loss": 0.0167, "epoch": 0.5050505050505051, "grad_norm": 1.1733126640319824, "learning_rate": 1.9978247286696136e-05, "step": 4850 }, { "embedding_loss": 0.0244, "epoch": 0.5102572112881392, "grad_norm": 1.6774014234542847, "learning_rate": 1.995510610233032e-05, "step": 4900 }, { "embedding_loss": 0.0152, "epoch": 0.5154639175257731, "grad_norm": 0.4171350300312042, "learning_rate": 1.9931964917964502e-05, "step": 4950 }, { "embedding_loss": 0.0191, "epoch": 0.5206706237634072, "grad_norm": 0.1614975482225418, "learning_rate": 1.990882373359869e-05, "step": 5000 }, { "embedding_loss": 0.0174, "epoch": 0.5258773300010413, "grad_norm": 1.8693324327468872, "learning_rate": 1.9885682549232872e-05, "step": 5050 }, { "embedding_loss": 0.0242, "epoch": 0.5310840362386754, "grad_norm": 0.42837658524513245, "learning_rate": 1.9862541364867056e-05, "step": 5100 }, { "embedding_loss": 0.0202, "epoch": 0.5362907424763095, "grad_norm": 1.6628985404968262, "learning_rate": 1.983940018050124e-05, "step": 5150 }, { "embedding_loss": 0.0186, "epoch": 0.5414974487139436, "grad_norm": 0.25852930545806885, "learning_rate": 1.9816258996135426e-05, "step": 5200 }, { "embedding_loss": 0.0172, "epoch": 0.5467041549515776, "grad_norm": 1.1750682592391968, "learning_rate": 1.979311781176961e-05, "step": 5250 }, { "embedding_loss": 0.0208, "epoch": 0.5519108611892117, "grad_norm": 3.196448564529419, "learning_rate": 1.9769976627403792e-05, "step": 5300 }, { "embedding_loss": 0.0185, "epoch": 0.5571175674268458, "grad_norm": 0.057756174355745316, "learning_rate": 1.974683544303798e-05, "step": 5350 }, { "embedding_loss": 0.0177, "epoch": 0.5623242736644799, "grad_norm": 0.08262369781732559, "learning_rate": 1.9723694258672162e-05, "step": 5400 }, { "embedding_loss": 0.0169, "epoch": 0.5675309799021139, "grad_norm": 1.7123504877090454, "learning_rate": 1.9700553074306345e-05, "step": 5450 }, { "embedding_loss": 0.0186, "epoch": 0.572737686139748, "grad_norm": 0.9821128249168396, "learning_rate": 1.967741188994053e-05, "step": 5500 }, { "embedding_loss": 0.014, "epoch": 0.577944392377382, "grad_norm": 0.0532955676317215, "learning_rate": 1.9654270705574715e-05, "step": 5550 }, { "embedding_loss": 0.0166, "epoch": 0.5831510986150161, "grad_norm": 0.13396751880645752, "learning_rate": 1.96311295212089e-05, "step": 5600 }, { "embedding_loss": 0.0129, "epoch": 0.5883578048526502, "grad_norm": 0.14156009256839752, "learning_rate": 1.9607988336843082e-05, "step": 5650 }, { "embedding_loss": 0.0114, "epoch": 0.5935645110902843, "grad_norm": 0.9108484387397766, "learning_rate": 1.9584847152477265e-05, "step": 5700 }, { "embedding_loss": 0.0152, "epoch": 0.5987712173279184, "grad_norm": 0.18236614763736725, "learning_rate": 1.9561705968111448e-05, "step": 5750 }, { "embedding_loss": 0.0134, "epoch": 0.6039779235655525, "grad_norm": 1.664371371269226, "learning_rate": 1.953856478374563e-05, "step": 5800 }, { "embedding_loss": 0.0156, "epoch": 0.6091846298031866, "grad_norm": 0.9308391213417053, "learning_rate": 1.9515423599379818e-05, "step": 5850 }, { "embedding_loss": 0.0144, "epoch": 0.6143913360408205, "grad_norm": 0.10233943164348602, "learning_rate": 1.9492282415014e-05, "step": 5900 }, { "embedding_loss": 0.0128, "epoch": 0.6195980422784546, "grad_norm": 0.1212044283747673, "learning_rate": 1.9469141230648185e-05, "step": 5950 }, { "embedding_loss": 0.0141, "epoch": 0.6248047485160887, "grad_norm": 0.03808877244591713, "learning_rate": 1.9446000046282368e-05, "step": 6000 }, { "embedding_loss": 0.0129, "epoch": 0.6300114547537228, "grad_norm": 1.263411521911621, "learning_rate": 1.9422858861916555e-05, "step": 6050 }, { "embedding_loss": 0.0128, "epoch": 0.6352181609913569, "grad_norm": 1.1934914588928223, "learning_rate": 1.9399717677550738e-05, "step": 6100 }, { "embedding_loss": 0.0131, "epoch": 0.640424867228991, "grad_norm": 0.5088186264038086, "learning_rate": 1.937657649318492e-05, "step": 6150 }, { "embedding_loss": 0.0133, "epoch": 0.645631573466625, "grad_norm": 0.2881380319595337, "learning_rate": 1.9353435308819108e-05, "step": 6200 }, { "embedding_loss": 0.012, "epoch": 0.6508382797042591, "grad_norm": 0.1978471279144287, "learning_rate": 1.933029412445329e-05, "step": 6250 }, { "embedding_loss": 0.0131, "epoch": 0.6560449859418932, "grad_norm": 1.7363338470458984, "learning_rate": 1.9307152940087474e-05, "step": 6300 }, { "embedding_loss": 0.0124, "epoch": 0.6612516921795273, "grad_norm": 0.1304263323545456, "learning_rate": 1.9284011755721658e-05, "step": 6350 }, { "embedding_loss": 0.0138, "epoch": 0.6664583984171613, "grad_norm": 0.15851274132728577, "learning_rate": 1.9260870571355844e-05, "step": 6400 }, { "embedding_loss": 0.0141, "epoch": 0.6716651046547953, "grad_norm": 1.2692539691925049, "learning_rate": 1.9237729386990027e-05, "step": 6450 }, { "embedding_loss": 0.0106, "epoch": 0.6768718108924294, "grad_norm": 0.30289334058761597, "learning_rate": 1.921458820262421e-05, "step": 6500 }, { "embedding_loss": 0.0125, "epoch": 0.6820785171300635, "grad_norm": 0.02164456807076931, "learning_rate": 1.9191447018258394e-05, "step": 6550 }, { "embedding_loss": 0.0132, "epoch": 0.6872852233676976, "grad_norm": 0.34430477023124695, "learning_rate": 1.916830583389258e-05, "step": 6600 }, { "embedding_loss": 0.0143, "epoch": 0.6924919296053317, "grad_norm": 0.2521458864212036, "learning_rate": 1.9145164649526764e-05, "step": 6650 }, { "embedding_loss": 0.0127, "epoch": 0.6976986358429658, "grad_norm": 0.6990224719047546, "learning_rate": 1.9122023465160947e-05, "step": 6700 }, { "embedding_loss": 0.014, "epoch": 0.7029053420805998, "grad_norm": 0.07170717418193817, "learning_rate": 1.9098882280795134e-05, "step": 6750 }, { "embedding_loss": 0.0118, "epoch": 0.7081120483182339, "grad_norm": 0.18331408500671387, "learning_rate": 1.9075741096429317e-05, "step": 6800 }, { "embedding_loss": 0.0116, "epoch": 0.7133187545558679, "grad_norm": 1.0223900079727173, "learning_rate": 1.90525999120635e-05, "step": 6850 }, { "embedding_loss": 0.0101, "epoch": 0.718525460793502, "grad_norm": 0.08013039082288742, "learning_rate": 1.9029458727697684e-05, "step": 6900 }, { "embedding_loss": 0.0119, "epoch": 0.7237321670311361, "grad_norm": 1.7682616710662842, "learning_rate": 1.900631754333187e-05, "step": 6950 }, { "embedding_loss": 0.0095, "epoch": 0.7289388732687702, "grad_norm": 0.10093237459659576, "learning_rate": 1.8983176358966054e-05, "step": 7000 }, { "embedding_loss": 0.009, "epoch": 0.7341455795064042, "grad_norm": 2.3766512870788574, "learning_rate": 1.8960035174600237e-05, "step": 7050 }, { "embedding_loss": 0.0086, "epoch": 0.7393522857440383, "grad_norm": 0.019304808229207993, "learning_rate": 1.893689399023442e-05, "step": 7100 }, { "embedding_loss": 0.0083, "epoch": 0.7445589919816724, "grad_norm": 0.08419201523065567, "learning_rate": 1.8913752805868607e-05, "step": 7150 }, { "embedding_loss": 0.0144, "epoch": 0.7497656982193065, "grad_norm": 1.224936842918396, "learning_rate": 1.889061162150279e-05, "step": 7200 }, { "embedding_loss": 0.0107, "epoch": 0.7549724044569406, "grad_norm": 0.3462272584438324, "learning_rate": 1.8867470437136973e-05, "step": 7250 }, { "embedding_loss": 0.0088, "epoch": 0.7601791106945747, "grad_norm": 0.03107067011296749, "learning_rate": 1.884432925277116e-05, "step": 7300 }, { "embedding_loss": 0.0096, "epoch": 0.7653858169322086, "grad_norm": 0.12990835309028625, "learning_rate": 1.8821188068405343e-05, "step": 7350 }, { "embedding_loss": 0.0073, "epoch": 0.7705925231698427, "grad_norm": 0.10413219034671783, "learning_rate": 1.8798046884039526e-05, "step": 7400 }, { "embedding_loss": 0.0063, "epoch": 0.7757992294074768, "grad_norm": 0.39868220686912537, "learning_rate": 1.877490569967371e-05, "step": 7450 }, { "embedding_loss": 0.0096, "epoch": 0.7810059356451109, "grad_norm": 0.4435900151729584, "learning_rate": 1.8751764515307896e-05, "step": 7500 }, { "embedding_loss": 0.0091, "epoch": 0.786212641882745, "grad_norm": 1.100035309791565, "learning_rate": 1.872862333094208e-05, "step": 7550 }, { "embedding_loss": 0.01, "epoch": 0.7914193481203791, "grad_norm": 0.12100836634635925, "learning_rate": 1.8705482146576263e-05, "step": 7600 }, { "embedding_loss": 0.0093, "epoch": 0.7966260543580131, "grad_norm": 0.38435640931129456, "learning_rate": 1.868234096221045e-05, "step": 7650 }, { "embedding_loss": 0.0121, "epoch": 0.8018327605956472, "grad_norm": 0.8386930823326111, "learning_rate": 1.8659199777844633e-05, "step": 7700 }, { "embedding_loss": 0.014, "epoch": 0.8070394668332813, "grad_norm": 0.4830886423587799, "learning_rate": 1.8636058593478816e-05, "step": 7750 }, { "embedding_loss": 0.0078, "epoch": 0.8122461730709153, "grad_norm": 0.026604199782013893, "learning_rate": 1.8612917409113e-05, "step": 7800 }, { "embedding_loss": 0.0082, "epoch": 0.8174528793085494, "grad_norm": 0.5969211459159851, "learning_rate": 1.8589776224747186e-05, "step": 7850 }, { "embedding_loss": 0.0086, "epoch": 0.8226595855461835, "grad_norm": 0.06108603999018669, "learning_rate": 1.856663504038137e-05, "step": 7900 }, { "embedding_loss": 0.0066, "epoch": 0.8278662917838175, "grad_norm": 0.3239186406135559, "learning_rate": 1.8543493856015553e-05, "step": 7950 }, { "embedding_loss": 0.0112, "epoch": 0.8330729980214516, "grad_norm": 0.2972595989704132, "learning_rate": 1.8520352671649736e-05, "step": 8000 }, { "embedding_loss": 0.0073, "epoch": 0.8382797042590857, "grad_norm": 0.533140242099762, "learning_rate": 1.8497211487283922e-05, "step": 8050 }, { "embedding_loss": 0.0078, "epoch": 0.8434864104967198, "grad_norm": 1.5684537887573242, "learning_rate": 1.8474070302918106e-05, "step": 8100 }, { "embedding_loss": 0.0087, "epoch": 0.8486931167343539, "grad_norm": 0.4422908425331116, "learning_rate": 1.845092911855229e-05, "step": 8150 }, { "embedding_loss": 0.012, "epoch": 0.853899822971988, "grad_norm": 0.5563941597938538, "learning_rate": 1.8427787934186476e-05, "step": 8200 }, { "embedding_loss": 0.0088, "epoch": 0.8591065292096219, "grad_norm": 0.3089462220668793, "learning_rate": 1.840464674982066e-05, "step": 8250 }, { "embedding_loss": 0.0122, "epoch": 0.864313235447256, "grad_norm": 4.295806884765625, "learning_rate": 1.8381505565454842e-05, "step": 8300 }, { "embedding_loss": 0.0091, "epoch": 0.8695199416848901, "grad_norm": 0.8506584763526917, "learning_rate": 1.8358364381089025e-05, "step": 8350 }, { "embedding_loss": 0.01, "epoch": 0.8747266479225242, "grad_norm": 0.9140012264251709, "learning_rate": 1.8335223196723212e-05, "step": 8400 }, { "embedding_loss": 0.0095, "epoch": 0.8799333541601583, "grad_norm": 0.9452886581420898, "learning_rate": 1.8312082012357395e-05, "step": 8450 }, { "embedding_loss": 0.0051, "epoch": 0.8851400603977924, "grad_norm": 0.34865090250968933, "learning_rate": 1.828894082799158e-05, "step": 8500 }, { "embedding_loss": 0.0109, "epoch": 0.8903467666354264, "grad_norm": 0.027646692469716072, "learning_rate": 1.8265799643625762e-05, "step": 8550 }, { "embedding_loss": 0.0074, "epoch": 0.8955534728730605, "grad_norm": 0.28435996174812317, "learning_rate": 1.8242658459259945e-05, "step": 8600 }, { "embedding_loss": 0.0048, "epoch": 0.9007601791106946, "grad_norm": 0.1109330877661705, "learning_rate": 1.821951727489413e-05, "step": 8650 }, { "embedding_loss": 0.0089, "epoch": 0.9059668853483287, "grad_norm": 0.46810364723205566, "learning_rate": 1.8196376090528315e-05, "step": 8700 }, { "embedding_loss": 0.0087, "epoch": 0.9111735915859627, "grad_norm": 0.2674962878227234, "learning_rate": 1.81732349061625e-05, "step": 8750 }, { "embedding_loss": 0.0096, "epoch": 0.9163802978235968, "grad_norm": 3.0557987689971924, "learning_rate": 1.815009372179668e-05, "step": 8800 }, { "embedding_loss": 0.0085, "epoch": 0.9215870040612308, "grad_norm": 0.6088097095489502, "learning_rate": 1.8126952537430865e-05, "step": 8850 }, { "embedding_loss": 0.0083, "epoch": 0.9267937102988649, "grad_norm": 0.12588393688201904, "learning_rate": 1.810381135306505e-05, "step": 8900 }, { "embedding_loss": 0.009, "epoch": 0.932000416536499, "grad_norm": 0.46597975492477417, "learning_rate": 1.8080670168699235e-05, "step": 8950 }, { "embedding_loss": 0.0078, "epoch": 0.9372071227741331, "grad_norm": 0.03179040551185608, "learning_rate": 1.8057528984333418e-05, "step": 9000 }, { "embedding_loss": 0.0096, "epoch": 0.9424138290117672, "grad_norm": 0.476052463054657, "learning_rate": 1.8034387799967605e-05, "step": 9050 }, { "embedding_loss": 0.0084, "epoch": 0.9476205352494013, "grad_norm": 0.6995823979377747, "learning_rate": 1.8011246615601788e-05, "step": 9100 }, { "embedding_loss": 0.0073, "epoch": 0.9528272414870353, "grad_norm": 0.042539093643426895, "learning_rate": 1.798810543123597e-05, "step": 9150 }, { "embedding_loss": 0.0055, "epoch": 0.9580339477246693, "grad_norm": 0.024517321959137917, "learning_rate": 1.7964964246870154e-05, "step": 9200 }, { "embedding_loss": 0.0059, "epoch": 0.9632406539623034, "grad_norm": 0.020516090095043182, "learning_rate": 1.794182306250434e-05, "step": 9250 }, { "embedding_loss": 0.008, "epoch": 0.9684473601999375, "grad_norm": 0.07251976430416107, "learning_rate": 1.7918681878138524e-05, "step": 9300 }, { "embedding_loss": 0.0112, "epoch": 0.9736540664375716, "grad_norm": 0.12063586711883545, "learning_rate": 1.7895540693772708e-05, "step": 9350 }, { "embedding_loss": 0.0045, "epoch": 0.9788607726752057, "grad_norm": 0.36446037888526917, "learning_rate": 1.787239950940689e-05, "step": 9400 }, { "embedding_loss": 0.0086, "epoch": 0.9840674789128397, "grad_norm": 0.08372894674539566, "learning_rate": 1.7849258325041078e-05, "step": 9450 }, { "embedding_loss": 0.0049, "epoch": 0.9892741851504738, "grad_norm": 0.04579677805304527, "learning_rate": 1.782611714067526e-05, "step": 9500 }, { "embedding_loss": 0.0056, "epoch": 0.9944808913881079, "grad_norm": 0.1182708889245987, "learning_rate": 1.7802975956309444e-05, "step": 9550 }, { "embedding_loss": 0.0067, "epoch": 0.999687597625742, "grad_norm": 0.01671171560883522, "learning_rate": 1.777983477194363e-05, "step": 9600 }, { "embedding_loss": 0.0058, "epoch": 1.004894303863376, "grad_norm": 0.9829951524734497, "learning_rate": 1.7756693587577814e-05, "step": 9650 }, { "embedding_loss": 0.0132, "epoch": 1.0101010101010102, "grad_norm": 0.0442439503967762, "learning_rate": 1.7733552403211997e-05, "step": 9700 }, { "embedding_loss": 0.0076, "epoch": 1.0153077163386441, "grad_norm": 0.031697243452072144, "learning_rate": 1.771041121884618e-05, "step": 9750 }, { "embedding_loss": 0.0069, "epoch": 1.0205144225762783, "grad_norm": 0.019380003213882446, "learning_rate": 1.7687270034480367e-05, "step": 9800 }, { "embedding_loss": 0.0072, "epoch": 1.0257211288139123, "grad_norm": 0.03249906376004219, "learning_rate": 1.766412885011455e-05, "step": 9850 }, { "embedding_loss": 0.0092, "epoch": 1.0309278350515463, "grad_norm": 0.3388296663761139, "learning_rate": 1.7640987665748734e-05, "step": 9900 }, { "embedding_loss": 0.0077, "epoch": 1.0361345412891805, "grad_norm": 0.1678103804588318, "learning_rate": 1.7617846481382917e-05, "step": 9950 }, { "embedding_loss": 0.0073, "epoch": 1.0413412475268145, "grad_norm": 0.015974771231412888, "learning_rate": 1.7594705297017104e-05, "step": 10000 }, { "embedding_loss": 0.0071, "epoch": 1.0465479537644486, "grad_norm": 0.041760511696338654, "learning_rate": 1.7571564112651287e-05, "step": 10050 }, { "embedding_loss": 0.0082, "epoch": 1.0517546600020826, "grad_norm": 1.2133060693740845, "learning_rate": 1.754842292828547e-05, "step": 10100 }, { "embedding_loss": 0.0046, "epoch": 1.0569613662397168, "grad_norm": 0.04206147417426109, "learning_rate": 1.7525281743919657e-05, "step": 10150 }, { "embedding_loss": 0.0053, "epoch": 1.0621680724773508, "grad_norm": 0.18272073566913605, "learning_rate": 1.750214055955384e-05, "step": 10200 }, { "embedding_loss": 0.0044, "epoch": 1.067374778714985, "grad_norm": 0.03547310084104538, "learning_rate": 1.7478999375188023e-05, "step": 10250 }, { "embedding_loss": 0.0043, "epoch": 1.072581484952619, "grad_norm": 0.0350540354847908, "learning_rate": 1.7455858190822207e-05, "step": 10300 }, { "embedding_loss": 0.0052, "epoch": 1.077788191190253, "grad_norm": 1.6414012908935547, "learning_rate": 1.7432717006456393e-05, "step": 10350 }, { "embedding_loss": 0.004, "epoch": 1.0829948974278871, "grad_norm": 3.3341734409332275, "learning_rate": 1.7409575822090577e-05, "step": 10400 }, { "embedding_loss": 0.0082, "epoch": 1.088201603665521, "grad_norm": 0.01878177374601364, "learning_rate": 1.738643463772476e-05, "step": 10450 }, { "embedding_loss": 0.006, "epoch": 1.0934083099031553, "grad_norm": 0.5989029407501221, "learning_rate": 1.7363293453358946e-05, "step": 10500 }, { "embedding_loss": 0.0063, "epoch": 1.0986150161407893, "grad_norm": 0.23778136074543, "learning_rate": 1.734015226899313e-05, "step": 10550 }, { "embedding_loss": 0.0038, "epoch": 1.1038217223784235, "grad_norm": 0.012218566611409187, "learning_rate": 1.7317011084627313e-05, "step": 10600 }, { "embedding_loss": 0.0049, "epoch": 1.1090284286160574, "grad_norm": 0.05297623947262764, "learning_rate": 1.7293869900261496e-05, "step": 10650 }, { "embedding_loss": 0.0062, "epoch": 1.1142351348536916, "grad_norm": 0.5116108655929565, "learning_rate": 1.7270728715895683e-05, "step": 10700 }, { "embedding_loss": 0.0084, "epoch": 1.1194418410913256, "grad_norm": 0.4478176236152649, "learning_rate": 1.7247587531529866e-05, "step": 10750 }, { "embedding_loss": 0.0057, "epoch": 1.1246485473289598, "grad_norm": 0.4622497856616974, "learning_rate": 1.722444634716405e-05, "step": 10800 }, { "embedding_loss": 0.0074, "epoch": 1.1298552535665938, "grad_norm": 0.017630133777856827, "learning_rate": 1.7201305162798233e-05, "step": 10850 }, { "embedding_loss": 0.0056, "epoch": 1.1350619598042277, "grad_norm": 0.6077584624290466, "learning_rate": 1.717816397843242e-05, "step": 10900 }, { "embedding_loss": 0.0078, "epoch": 1.140268666041862, "grad_norm": 0.18036994338035583, "learning_rate": 1.7155022794066603e-05, "step": 10950 }, { "embedding_loss": 0.0059, "epoch": 1.145475372279496, "grad_norm": 0.009565812535583973, "learning_rate": 1.7131881609700786e-05, "step": 11000 }, { "embedding_loss": 0.0062, "epoch": 1.1506820785171301, "grad_norm": 2.7242627143859863, "learning_rate": 1.7108740425334973e-05, "step": 11050 }, { "embedding_loss": 0.0054, "epoch": 1.155888784754764, "grad_norm": 0.017238834872841835, "learning_rate": 1.7085599240969156e-05, "step": 11100 }, { "embedding_loss": 0.006, "epoch": 1.1610954909923983, "grad_norm": 1.461991548538208, "learning_rate": 1.706245805660334e-05, "step": 11150 }, { "embedding_loss": 0.0077, "epoch": 1.1663021972300323, "grad_norm": 0.11797866970300674, "learning_rate": 1.7039316872237522e-05, "step": 11200 }, { "embedding_loss": 0.005, "epoch": 1.1715089034676662, "grad_norm": 0.040576279163360596, "learning_rate": 1.701617568787171e-05, "step": 11250 }, { "embedding_loss": 0.0061, "epoch": 1.1767156097053004, "grad_norm": 0.013650750741362572, "learning_rate": 1.6993034503505892e-05, "step": 11300 }, { "embedding_loss": 0.0043, "epoch": 1.1819223159429346, "grad_norm": 0.013326168991625309, "learning_rate": 1.6969893319140075e-05, "step": 11350 }, { "embedding_loss": 0.0061, "epoch": 1.1871290221805686, "grad_norm": 0.07993318140506744, "learning_rate": 1.694675213477426e-05, "step": 11400 }, { "embedding_loss": 0.0054, "epoch": 1.1923357284182026, "grad_norm": 0.38105425238609314, "learning_rate": 1.6923610950408442e-05, "step": 11450 }, { "embedding_loss": 0.0046, "epoch": 1.1975424346558368, "grad_norm": 0.13614040613174438, "learning_rate": 1.6900469766042625e-05, "step": 11500 }, { "embedding_loss": 0.0054, "epoch": 1.2027491408934707, "grad_norm": 1.016570806503296, "learning_rate": 1.6877328581676812e-05, "step": 11550 }, { "embedding_loss": 0.0039, "epoch": 1.207955847131105, "grad_norm": 0.4211491644382477, "learning_rate": 1.6854187397310995e-05, "step": 11600 }, { "embedding_loss": 0.0076, "epoch": 1.213162553368739, "grad_norm": 0.020438892766833305, "learning_rate": 1.683104621294518e-05, "step": 11650 }, { "embedding_loss": 0.0064, "epoch": 1.218369259606373, "grad_norm": 0.043074000626802444, "learning_rate": 1.6807905028579362e-05, "step": 11700 }, { "embedding_loss": 0.0068, "epoch": 1.223575965844007, "grad_norm": 0.1304844170808792, "learning_rate": 1.678476384421355e-05, "step": 11750 }, { "embedding_loss": 0.0038, "epoch": 1.2287826720816413, "grad_norm": 0.10536648333072662, "learning_rate": 1.676162265984773e-05, "step": 11800 }, { "embedding_loss": 0.0053, "epoch": 1.2339893783192752, "grad_norm": 0.2895510792732239, "learning_rate": 1.6738481475481915e-05, "step": 11850 }, { "embedding_loss": 0.0074, "epoch": 1.2391960845569092, "grad_norm": 0.14891253411769867, "learning_rate": 1.67153402911161e-05, "step": 11900 }, { "embedding_loss": 0.0083, "epoch": 1.2444027907945434, "grad_norm": 0.03617144003510475, "learning_rate": 1.6692199106750285e-05, "step": 11950 }, { "embedding_loss": 0.0053, "epoch": 1.2496094970321774, "grad_norm": 0.031169302761554718, "learning_rate": 1.6669057922384468e-05, "step": 12000 }, { "embedding_loss": 0.004, "epoch": 1.2548162032698116, "grad_norm": 0.3214148283004761, "learning_rate": 1.664591673801865e-05, "step": 12050 }, { "embedding_loss": 0.0052, "epoch": 1.2600229095074456, "grad_norm": 1.033286213874817, "learning_rate": 1.6622775553652838e-05, "step": 12100 }, { "embedding_loss": 0.0079, "epoch": 1.2652296157450795, "grad_norm": 0.014789101667702198, "learning_rate": 1.659963436928702e-05, "step": 12150 }, { "embedding_loss": 0.004, "epoch": 1.2704363219827137, "grad_norm": 0.5162740349769592, "learning_rate": 1.6576493184921205e-05, "step": 12200 }, { "embedding_loss": 0.004, "epoch": 1.275643028220348, "grad_norm": 0.01165369339287281, "learning_rate": 1.6553352000555388e-05, "step": 12250 }, { "embedding_loss": 0.0063, "epoch": 1.280849734457982, "grad_norm": 0.031679488718509674, "learning_rate": 1.6530210816189574e-05, "step": 12300 }, { "embedding_loss": 0.0024, "epoch": 1.2860564406956159, "grad_norm": 0.008600637316703796, "learning_rate": 1.6507069631823758e-05, "step": 12350 }, { "embedding_loss": 0.0065, "epoch": 1.29126314693325, "grad_norm": 0.014314206317067146, "learning_rate": 1.648392844745794e-05, "step": 12400 }, { "embedding_loss": 0.0068, "epoch": 1.296469853170884, "grad_norm": 0.3240402936935425, "learning_rate": 1.6460787263092128e-05, "step": 12450 }, { "embedding_loss": 0.0046, "epoch": 1.3016765594085182, "grad_norm": 0.030164631083607674, "learning_rate": 1.643764607872631e-05, "step": 12500 }, { "embedding_loss": 0.0026, "epoch": 1.3068832656461522, "grad_norm": 0.0063670300878584385, "learning_rate": 1.6414504894360494e-05, "step": 12550 }, { "embedding_loss": 0.0036, "epoch": 1.3120899718837864, "grad_norm": 0.021254096180200577, "learning_rate": 1.6391363709994677e-05, "step": 12600 }, { "embedding_loss": 0.0058, "epoch": 1.3172966781214204, "grad_norm": 0.25233790278434753, "learning_rate": 1.6368222525628864e-05, "step": 12650 }, { "embedding_loss": 0.0029, "epoch": 1.3225033843590546, "grad_norm": 1.2394205331802368, "learning_rate": 1.6345081341263047e-05, "step": 12700 }, { "embedding_loss": 0.006, "epoch": 1.3277100905966885, "grad_norm": 1.9639242887496948, "learning_rate": 1.632194015689723e-05, "step": 12750 }, { "embedding_loss": 0.0052, "epoch": 1.3329167968343225, "grad_norm": 0.0384540930390358, "learning_rate": 1.6298798972531414e-05, "step": 12800 }, { "embedding_loss": 0.004, "epoch": 1.3381235030719567, "grad_norm": 0.16365939378738403, "learning_rate": 1.62756577881656e-05, "step": 12850 }, { "embedding_loss": 0.0031, "epoch": 1.3433302093095907, "grad_norm": 0.03347177803516388, "learning_rate": 1.6252516603799784e-05, "step": 12900 }, { "embedding_loss": 0.005, "epoch": 1.3485369155472249, "grad_norm": 0.014545961283147335, "learning_rate": 1.6229375419433967e-05, "step": 12950 }, { "embedding_loss": 0.0079, "epoch": 1.3537436217848589, "grad_norm": 0.7268438935279846, "learning_rate": 1.6206234235068154e-05, "step": 13000 }, { "embedding_loss": 0.0058, "epoch": 1.358950328022493, "grad_norm": 0.040684785693883896, "learning_rate": 1.6183093050702337e-05, "step": 13050 }, { "embedding_loss": 0.0037, "epoch": 1.364157034260127, "grad_norm": 3.591543197631836, "learning_rate": 1.615995186633652e-05, "step": 13100 }, { "embedding_loss": 0.005, "epoch": 1.3693637404977612, "grad_norm": 0.037789322435855865, "learning_rate": 1.6136810681970703e-05, "step": 13150 }, { "embedding_loss": 0.0018, "epoch": 1.3745704467353952, "grad_norm": 0.549343466758728, "learning_rate": 1.611366949760489e-05, "step": 13200 }, { "embedding_loss": 0.0027, "epoch": 1.3797771529730292, "grad_norm": 0.015304960310459137, "learning_rate": 1.6090528313239073e-05, "step": 13250 }, { "embedding_loss": 0.004, "epoch": 1.3849838592106634, "grad_norm": 0.010115724988281727, "learning_rate": 1.6067387128873257e-05, "step": 13300 }, { "embedding_loss": 0.0042, "epoch": 1.3901905654482973, "grad_norm": 0.004204587545245886, "learning_rate": 1.6044245944507443e-05, "step": 13350 }, { "embedding_loss": 0.0046, "epoch": 1.3953972716859315, "grad_norm": 0.04513470083475113, "learning_rate": 1.6021104760141627e-05, "step": 13400 }, { "embedding_loss": 0.0047, "epoch": 1.4006039779235655, "grad_norm": 0.21044224500656128, "learning_rate": 1.599796357577581e-05, "step": 13450 }, { "embedding_loss": 0.005, "epoch": 1.4058106841611997, "grad_norm": 0.4665778577327728, "learning_rate": 1.5974822391409993e-05, "step": 13500 }, { "embedding_loss": 0.0047, "epoch": 1.4110173903988337, "grad_norm": 0.03980934992432594, "learning_rate": 1.595168120704418e-05, "step": 13550 }, { "embedding_loss": 0.0052, "epoch": 1.4162240966364679, "grad_norm": 0.08631590753793716, "learning_rate": 1.5928540022678363e-05, "step": 13600 }, { "embedding_loss": 0.0044, "epoch": 1.4214308028741018, "grad_norm": 0.008251226507127285, "learning_rate": 1.5905398838312546e-05, "step": 13650 }, { "embedding_loss": 0.0043, "epoch": 1.4266375091117358, "grad_norm": 0.10959483683109283, "learning_rate": 1.588225765394673e-05, "step": 13700 }, { "embedding_loss": 0.0065, "epoch": 1.43184421534937, "grad_norm": 0.03955509141087532, "learning_rate": 1.5859116469580916e-05, "step": 13750 }, { "embedding_loss": 0.0031, "epoch": 1.437050921587004, "grad_norm": 0.15788401663303375, "learning_rate": 1.58359752852151e-05, "step": 13800 }, { "embedding_loss": 0.0036, "epoch": 1.4422576278246382, "grad_norm": 1.064596176147461, "learning_rate": 1.5812834100849283e-05, "step": 13850 }, { "embedding_loss": 0.003, "epoch": 1.4474643340622722, "grad_norm": 2.6524391174316406, "learning_rate": 1.578969291648347e-05, "step": 13900 }, { "embedding_loss": 0.006, "epoch": 1.4526710402999063, "grad_norm": 0.2990039885044098, "learning_rate": 1.5766551732117653e-05, "step": 13950 }, { "embedding_loss": 0.0023, "epoch": 1.4578777465375403, "grad_norm": 0.12428417056798935, "learning_rate": 1.5743410547751836e-05, "step": 14000 }, { "embedding_loss": 0.0032, "epoch": 1.4630844527751745, "grad_norm": 0.01266538817435503, "learning_rate": 1.572026936338602e-05, "step": 14050 }, { "embedding_loss": 0.003, "epoch": 1.4682911590128085, "grad_norm": 0.07004108279943466, "learning_rate": 1.5697128179020206e-05, "step": 14100 }, { "embedding_loss": 0.0032, "epoch": 1.4734978652504425, "grad_norm": 0.0223364420235157, "learning_rate": 1.567398699465439e-05, "step": 14150 }, { "embedding_loss": 0.0046, "epoch": 1.4787045714880767, "grad_norm": 0.13812583684921265, "learning_rate": 1.5650845810288572e-05, "step": 14200 }, { "embedding_loss": 0.0054, "epoch": 1.4839112777257109, "grad_norm": 0.11324401199817657, "learning_rate": 1.5627704625922756e-05, "step": 14250 }, { "embedding_loss": 0.0054, "epoch": 1.4891179839633448, "grad_norm": 0.3810628354549408, "learning_rate": 1.560456344155694e-05, "step": 14300 }, { "embedding_loss": 0.0029, "epoch": 1.4943246902009788, "grad_norm": 0.014939317479729652, "learning_rate": 1.5581422257191122e-05, "step": 14350 }, { "embedding_loss": 0.0031, "epoch": 1.499531396438613, "grad_norm": 0.054862458258867264, "learning_rate": 1.555828107282531e-05, "step": 14400 }, { "embedding_loss": 0.0034, "epoch": 1.504738102676247, "grad_norm": 0.11869315803050995, "learning_rate": 1.5535139888459492e-05, "step": 14450 }, { "embedding_loss": 0.0025, "epoch": 1.5099448089138812, "grad_norm": 0.040105488151311874, "learning_rate": 1.5511998704093675e-05, "step": 14500 }, { "embedding_loss": 0.0058, "epoch": 1.5151515151515151, "grad_norm": 0.6557055711746216, "learning_rate": 1.548885751972786e-05, "step": 14550 }, { "embedding_loss": 0.0043, "epoch": 1.5203582213891491, "grad_norm": 0.7020523548126221, "learning_rate": 1.5465716335362045e-05, "step": 14600 }, { "embedding_loss": 0.0018, "epoch": 1.5255649276267833, "grad_norm": 0.2461288869380951, "learning_rate": 1.544257515099623e-05, "step": 14650 }, { "embedding_loss": 0.0061, "epoch": 1.5307716338644175, "grad_norm": 0.033834848552942276, "learning_rate": 1.5419433966630412e-05, "step": 14700 }, { "embedding_loss": 0.0028, "epoch": 1.5359783401020515, "grad_norm": 0.0170294102281332, "learning_rate": 1.53962927822646e-05, "step": 14750 }, { "embedding_loss": 0.0059, "epoch": 1.5411850463396854, "grad_norm": 0.038527410477399826, "learning_rate": 1.537315159789878e-05, "step": 14800 }, { "embedding_loss": 0.0026, "epoch": 1.5463917525773194, "grad_norm": 0.020393826067447662, "learning_rate": 1.5350010413532965e-05, "step": 14850 }, { "embedding_loss": 0.0041, "epoch": 1.5515984588149536, "grad_norm": 0.08289851248264313, "learning_rate": 1.5326869229167148e-05, "step": 14900 }, { "embedding_loss": 0.0044, "epoch": 1.5568051650525878, "grad_norm": 0.010838224552571774, "learning_rate": 1.5303728044801335e-05, "step": 14950 }, { "embedding_loss": 0.003, "epoch": 1.5620118712902218, "grad_norm": 0.021554453298449516, "learning_rate": 1.5280586860435518e-05, "step": 15000 }, { "embedding_loss": 0.0014, "epoch": 1.5672185775278558, "grad_norm": 0.21896220743656158, "learning_rate": 1.5257445676069701e-05, "step": 15050 }, { "embedding_loss": 0.0059, "epoch": 1.57242528376549, "grad_norm": 3.4779744148254395, "learning_rate": 1.5234304491703886e-05, "step": 15100 }, { "embedding_loss": 0.0024, "epoch": 1.5776319900031242, "grad_norm": 0.010911405086517334, "learning_rate": 1.521116330733807e-05, "step": 15150 }, { "embedding_loss": 0.0055, "epoch": 1.5828386962407581, "grad_norm": 1.0184364318847656, "learning_rate": 1.5188022122972255e-05, "step": 15200 }, { "embedding_loss": 0.0052, "epoch": 1.588045402478392, "grad_norm": 0.01177753321826458, "learning_rate": 1.516488093860644e-05, "step": 15250 }, { "embedding_loss": 0.002, "epoch": 1.5932521087160263, "grad_norm": 0.024036038666963577, "learning_rate": 1.5141739754240623e-05, "step": 15300 }, { "embedding_loss": 0.004, "epoch": 1.5984588149536603, "grad_norm": 0.015944767743349075, "learning_rate": 1.5118598569874808e-05, "step": 15350 }, { "embedding_loss": 0.0023, "epoch": 1.6036655211912945, "grad_norm": 0.0119936503469944, "learning_rate": 1.5095457385508991e-05, "step": 15400 }, { "embedding_loss": 0.0023, "epoch": 1.6088722274289284, "grad_norm": 0.1267576962709427, "learning_rate": 1.5072316201143176e-05, "step": 15450 }, { "embedding_loss": 0.003, "epoch": 1.6140789336665624, "grad_norm": 0.004355051554739475, "learning_rate": 1.504917501677736e-05, "step": 15500 }, { "embedding_loss": 0.0027, "epoch": 1.6192856399041966, "grad_norm": 0.0077704135328531265, "learning_rate": 1.5026033832411544e-05, "step": 15550 }, { "embedding_loss": 0.0023, "epoch": 1.6244923461418308, "grad_norm": 0.06213510408997536, "learning_rate": 1.5002892648045727e-05, "step": 15600 }, { "embedding_loss": 0.0044, "epoch": 1.6296990523794648, "grad_norm": 0.10908373445272446, "learning_rate": 1.4979751463679912e-05, "step": 15650 }, { "embedding_loss": 0.0074, "epoch": 1.6349057586170987, "grad_norm": 0.008925637230277061, "learning_rate": 1.4956610279314097e-05, "step": 15700 }, { "embedding_loss": 0.0029, "epoch": 1.640112464854733, "grad_norm": 0.023670511320233345, "learning_rate": 1.493346909494828e-05, "step": 15750 }, { "embedding_loss": 0.0014, "epoch": 1.645319171092367, "grad_norm": 0.006442319136112928, "learning_rate": 1.4910327910582466e-05, "step": 15800 }, { "embedding_loss": 0.0018, "epoch": 1.6505258773300011, "grad_norm": 0.013194055296480656, "learning_rate": 1.4887186726216649e-05, "step": 15850 }, { "embedding_loss": 0.004, "epoch": 1.655732583567635, "grad_norm": 0.011845475062727928, "learning_rate": 1.4864045541850834e-05, "step": 15900 }, { "embedding_loss": 0.0044, "epoch": 1.660939289805269, "grad_norm": 0.007666606921702623, "learning_rate": 1.4840904357485017e-05, "step": 15950 }, { "embedding_loss": 0.0024, "epoch": 1.6661459960429033, "grad_norm": 0.016819607466459274, "learning_rate": 1.4817763173119202e-05, "step": 16000 }, { "embedding_loss": 0.0031, "epoch": 1.6713527022805375, "grad_norm": 0.07455668598413467, "learning_rate": 1.4794621988753385e-05, "step": 16050 }, { "embedding_loss": 0.0018, "epoch": 1.6765594085181714, "grad_norm": 0.04744337126612663, "learning_rate": 1.477148080438757e-05, "step": 16100 }, { "embedding_loss": 0.0029, "epoch": 1.6817661147558054, "grad_norm": 0.008270618505775928, "learning_rate": 1.4748339620021754e-05, "step": 16150 }, { "embedding_loss": 0.004, "epoch": 1.6869728209934396, "grad_norm": 0.007761300075799227, "learning_rate": 1.4725198435655939e-05, "step": 16200 }, { "embedding_loss": 0.0028, "epoch": 1.6921795272310738, "grad_norm": 0.06050006300210953, "learning_rate": 1.4702057251290123e-05, "step": 16250 }, { "embedding_loss": 0.0019, "epoch": 1.6973862334687078, "grad_norm": 0.019928568974137306, "learning_rate": 1.4678916066924307e-05, "step": 16300 }, { "embedding_loss": 0.0021, "epoch": 1.7025929397063417, "grad_norm": 0.027616068720817566, "learning_rate": 1.4655774882558492e-05, "step": 16350 }, { "embedding_loss": 0.0029, "epoch": 1.7077996459439757, "grad_norm": 0.37783312797546387, "learning_rate": 1.4632633698192675e-05, "step": 16400 }, { "embedding_loss": 0.0014, "epoch": 1.71300635218161, "grad_norm": 0.8646184802055359, "learning_rate": 1.460949251382686e-05, "step": 16450 }, { "embedding_loss": 0.0025, "epoch": 1.718213058419244, "grad_norm": 0.009249920025467873, "learning_rate": 1.4586351329461043e-05, "step": 16500 }, { "embedding_loss": 0.0034, "epoch": 1.723419764656878, "grad_norm": 0.010544302873313427, "learning_rate": 1.4563210145095228e-05, "step": 16550 }, { "embedding_loss": 0.0028, "epoch": 1.728626470894512, "grad_norm": 0.038693223148584366, "learning_rate": 1.4540068960729411e-05, "step": 16600 }, { "embedding_loss": 0.0017, "epoch": 1.7338331771321462, "grad_norm": 0.018318980932235718, "learning_rate": 1.4516927776363596e-05, "step": 16650 }, { "embedding_loss": 0.0045, "epoch": 1.7390398833697804, "grad_norm": 0.18338936567306519, "learning_rate": 1.4493786591997781e-05, "step": 16700 }, { "embedding_loss": 0.0025, "epoch": 1.7442465896074144, "grad_norm": 0.029749080538749695, "learning_rate": 1.4470645407631965e-05, "step": 16750 }, { "embedding_loss": 0.003, "epoch": 1.7494532958450484, "grad_norm": 0.09010512381792068, "learning_rate": 1.444750422326615e-05, "step": 16800 }, { "embedding_loss": 0.003, "epoch": 1.7546600020826824, "grad_norm": 0.017163589596748352, "learning_rate": 1.4424363038900333e-05, "step": 16850 }, { "embedding_loss": 0.0025, "epoch": 1.7598667083203166, "grad_norm": 0.028121547773480415, "learning_rate": 1.4401221854534518e-05, "step": 16900 }, { "embedding_loss": 0.0016, "epoch": 1.7650734145579507, "grad_norm": 0.21652670204639435, "learning_rate": 1.4378080670168701e-05, "step": 16950 }, { "embedding_loss": 0.0015, "epoch": 1.7702801207955847, "grad_norm": 0.03087479993700981, "learning_rate": 1.4354939485802886e-05, "step": 17000 }, { "embedding_loss": 0.0035, "epoch": 1.7754868270332187, "grad_norm": 0.0054185641929507256, "learning_rate": 1.4331798301437068e-05, "step": 17050 }, { "embedding_loss": 0.0014, "epoch": 1.780693533270853, "grad_norm": 0.0028866103384643793, "learning_rate": 1.4308657117071253e-05, "step": 17100 }, { "embedding_loss": 0.003, "epoch": 1.785900239508487, "grad_norm": 0.058498039841651917, "learning_rate": 1.4285515932705436e-05, "step": 17150 }, { "embedding_loss": 0.0065, "epoch": 1.791106945746121, "grad_norm": 0.28154024481773376, "learning_rate": 1.426237474833962e-05, "step": 17200 }, { "embedding_loss": 0.003, "epoch": 1.796313651983755, "grad_norm": 0.01061001792550087, "learning_rate": 1.4239233563973804e-05, "step": 17250 }, { "embedding_loss": 0.003, "epoch": 1.801520358221389, "grad_norm": 0.004344331566244364, "learning_rate": 1.4216092379607989e-05, "step": 17300 }, { "embedding_loss": 0.0062, "epoch": 1.8067270644590232, "grad_norm": 1.790716528892517, "learning_rate": 1.4192951195242172e-05, "step": 17350 }, { "embedding_loss": 0.0037, "epoch": 1.8119337706966574, "grad_norm": 0.042736802250146866, "learning_rate": 1.4169810010876357e-05, "step": 17400 }, { "embedding_loss": 0.0047, "epoch": 1.8171404769342914, "grad_norm": 0.003962809685617685, "learning_rate": 1.414666882651054e-05, "step": 17450 }, { "embedding_loss": 0.0023, "epoch": 1.8223471831719253, "grad_norm": 0.012670880183577538, "learning_rate": 1.4123527642144725e-05, "step": 17500 }, { "embedding_loss": 0.0037, "epoch": 1.8275538894095595, "grad_norm": 0.005040524061769247, "learning_rate": 1.410038645777891e-05, "step": 17550 }, { "embedding_loss": 0.0027, "epoch": 1.8327605956471937, "grad_norm": 0.36730483174324036, "learning_rate": 1.4077245273413094e-05, "step": 17600 }, { "embedding_loss": 0.0034, "epoch": 1.8379673018848277, "grad_norm": 0.02946503274142742, "learning_rate": 1.4054104089047279e-05, "step": 17650 }, { "embedding_loss": 0.0015, "epoch": 1.8431740081224617, "grad_norm": 0.013080528937280178, "learning_rate": 1.4030962904681462e-05, "step": 17700 }, { "embedding_loss": 0.0012, "epoch": 1.8483807143600957, "grad_norm": 0.02603771910071373, "learning_rate": 1.4007821720315647e-05, "step": 17750 }, { "embedding_loss": 0.0024, "epoch": 1.8535874205977299, "grad_norm": 0.2753530740737915, "learning_rate": 1.398468053594983e-05, "step": 17800 }, { "embedding_loss": 0.0019, "epoch": 1.858794126835364, "grad_norm": 0.09671527147293091, "learning_rate": 1.3961539351584015e-05, "step": 17850 }, { "embedding_loss": 0.004, "epoch": 1.864000833072998, "grad_norm": 0.00563651230186224, "learning_rate": 1.3938398167218198e-05, "step": 17900 }, { "embedding_loss": 0.0047, "epoch": 1.869207539310632, "grad_norm": 0.013191591948270798, "learning_rate": 1.3915256982852383e-05, "step": 17950 }, { "embedding_loss": 0.0031, "epoch": 1.8744142455482662, "grad_norm": 0.0058168028481304646, "learning_rate": 1.3892115798486567e-05, "step": 18000 }, { "embedding_loss": 0.0028, "epoch": 1.8796209517859004, "grad_norm": 0.41721343994140625, "learning_rate": 1.3868974614120751e-05, "step": 18050 }, { "embedding_loss": 0.0027, "epoch": 1.8848276580235344, "grad_norm": 0.19165031611919403, "learning_rate": 1.3845833429754936e-05, "step": 18100 }, { "embedding_loss": 0.0038, "epoch": 1.8900343642611683, "grad_norm": 0.006406415719538927, "learning_rate": 1.382269224538912e-05, "step": 18150 }, { "embedding_loss": 0.0034, "epoch": 1.8952410704988023, "grad_norm": 0.01080580148845911, "learning_rate": 1.3799551061023305e-05, "step": 18200 }, { "embedding_loss": 0.0024, "epoch": 1.9004477767364365, "grad_norm": 0.006921404041349888, "learning_rate": 1.3776409876657488e-05, "step": 18250 }, { "embedding_loss": 0.0033, "epoch": 1.9056544829740707, "grad_norm": 0.0030105006881058216, "learning_rate": 1.3753268692291673e-05, "step": 18300 }, { "embedding_loss": 0.0022, "epoch": 1.9108611892117047, "grad_norm": 0.025791391730308533, "learning_rate": 1.3730127507925856e-05, "step": 18350 }, { "embedding_loss": 0.0015, "epoch": 1.9160678954493386, "grad_norm": 0.0030609758105129004, "learning_rate": 1.3706986323560041e-05, "step": 18400 }, { "embedding_loss": 0.0008, "epoch": 1.9212746016869728, "grad_norm": 0.5819743871688843, "learning_rate": 1.3683845139194224e-05, "step": 18450 }, { "embedding_loss": 0.0011, "epoch": 1.926481307924607, "grad_norm": 0.015468656085431576, "learning_rate": 1.366070395482841e-05, "step": 18500 }, { "embedding_loss": 0.0011, "epoch": 1.931688014162241, "grad_norm": 0.004252830985933542, "learning_rate": 1.3637562770462594e-05, "step": 18550 }, { "embedding_loss": 0.0035, "epoch": 1.936894720399875, "grad_norm": 0.008880583569407463, "learning_rate": 1.3614421586096778e-05, "step": 18600 }, { "embedding_loss": 0.0018, "epoch": 1.942101426637509, "grad_norm": 0.007954990491271019, "learning_rate": 1.3591280401730962e-05, "step": 18650 }, { "embedding_loss": 0.0029, "epoch": 1.9473081328751431, "grad_norm": 0.014845364727079868, "learning_rate": 1.3568139217365146e-05, "step": 18700 }, { "embedding_loss": 0.0033, "epoch": 1.9525148391127773, "grad_norm": 0.004382742568850517, "learning_rate": 1.354499803299933e-05, "step": 18750 }, { "embedding_loss": 0.0007, "epoch": 1.9577215453504113, "grad_norm": 0.2789106070995331, "learning_rate": 1.3521856848633514e-05, "step": 18800 }, { "embedding_loss": 0.0019, "epoch": 1.9629282515880453, "grad_norm": 0.00724539440125227, "learning_rate": 1.3498715664267699e-05, "step": 18850 }, { "embedding_loss": 0.0044, "epoch": 1.9681349578256795, "grad_norm": 0.05976763367652893, "learning_rate": 1.3475574479901882e-05, "step": 18900 }, { "embedding_loss": 0.0011, "epoch": 1.9733416640633137, "grad_norm": 0.018617313355207443, "learning_rate": 1.3452433295536067e-05, "step": 18950 }, { "embedding_loss": 0.0037, "epoch": 1.9785483703009477, "grad_norm": 0.07279914617538452, "learning_rate": 1.342929211117025e-05, "step": 19000 }, { "embedding_loss": 0.0003, "epoch": 1.9837550765385816, "grad_norm": 0.005604149773716927, "learning_rate": 1.3406150926804435e-05, "step": 19050 }, { "embedding_loss": 0.002, "epoch": 1.9889617827762156, "grad_norm": 0.4676770865917206, "learning_rate": 1.338300974243862e-05, "step": 19100 }, { "embedding_loss": 0.0024, "epoch": 1.9941684890138498, "grad_norm": 0.006381129380315542, "learning_rate": 1.3359868558072804e-05, "step": 19150 }, { "embedding_loss": 0.0022, "epoch": 1.999375195251484, "grad_norm": 0.44813236594200134, "learning_rate": 1.3336727373706989e-05, "step": 19200 }, { "embedding_loss": 0.0017, "epoch": 2.004581901489118, "grad_norm": 0.01616285741329193, "learning_rate": 1.3313586189341172e-05, "step": 19250 }, { "embedding_loss": 0.0013, "epoch": 2.009788607726752, "grad_norm": 0.006148567423224449, "learning_rate": 1.3290445004975357e-05, "step": 19300 }, { "embedding_loss": 0.0028, "epoch": 2.014995313964386, "grad_norm": 0.009615874849259853, "learning_rate": 1.326730382060954e-05, "step": 19350 }, { "embedding_loss": 0.001, "epoch": 2.0202020202020203, "grad_norm": 0.004251678008586168, "learning_rate": 1.3244162636243725e-05, "step": 19400 }, { "embedding_loss": 0.0024, "epoch": 2.0254087264396543, "grad_norm": 0.008113077841699123, "learning_rate": 1.3221021451877908e-05, "step": 19450 }, { "embedding_loss": 0.0012, "epoch": 2.0306154326772883, "grad_norm": 0.02726900391280651, "learning_rate": 1.3197880267512093e-05, "step": 19500 }, { "embedding_loss": 0.0023, "epoch": 2.0358221389149223, "grad_norm": 0.00499620521441102, "learning_rate": 1.3174739083146278e-05, "step": 19550 }, { "embedding_loss": 0.002, "epoch": 2.0410288451525567, "grad_norm": 1.2157723903656006, "learning_rate": 1.3151597898780461e-05, "step": 19600 }, { "embedding_loss": 0.0031, "epoch": 2.0462355513901906, "grad_norm": 0.08977110683917999, "learning_rate": 1.3128456714414646e-05, "step": 19650 }, { "embedding_loss": 0.0017, "epoch": 2.0514422576278246, "grad_norm": 0.05430648848414421, "learning_rate": 1.310531553004883e-05, "step": 19700 }, { "embedding_loss": 0.0014, "epoch": 2.0566489638654586, "grad_norm": 0.01022451464086771, "learning_rate": 1.3082174345683015e-05, "step": 19750 }, { "embedding_loss": 0.0015, "epoch": 2.0618556701030926, "grad_norm": 0.00965672917664051, "learning_rate": 1.3059033161317198e-05, "step": 19800 }, { "embedding_loss": 0.0021, "epoch": 2.067062376340727, "grad_norm": 0.005539502017199993, "learning_rate": 1.3035891976951383e-05, "step": 19850 }, { "embedding_loss": 0.0021, "epoch": 2.072269082578361, "grad_norm": 0.006059055682271719, "learning_rate": 1.3012750792585564e-05, "step": 19900 }, { "embedding_loss": 0.0032, "epoch": 2.077475788815995, "grad_norm": 0.14464695751667023, "learning_rate": 1.298960960821975e-05, "step": 19950 }, { "embedding_loss": 0.0017, "epoch": 2.082682495053629, "grad_norm": 0.011897514574229717, "learning_rate": 1.2966468423853933e-05, "step": 20000 }, { "embedding_loss": 0.0009, "epoch": 2.0878892012912633, "grad_norm": 0.0040528737008571625, "learning_rate": 1.2943327239488118e-05, "step": 20050 }, { "embedding_loss": 0.0016, "epoch": 2.0930959075288973, "grad_norm": 0.007819181308150291, "learning_rate": 1.2920186055122301e-05, "step": 20100 }, { "embedding_loss": 0.0018, "epoch": 2.0983026137665313, "grad_norm": 0.013666506856679916, "learning_rate": 1.2897044870756486e-05, "step": 20150 }, { "embedding_loss": 0.0057, "epoch": 2.1035093200041652, "grad_norm": 0.010328873060643673, "learning_rate": 1.2873903686390669e-05, "step": 20200 }, { "embedding_loss": 0.0014, "epoch": 2.108716026241799, "grad_norm": 0.019933296367526054, "learning_rate": 1.2850762502024854e-05, "step": 20250 }, { "embedding_loss": 0.0022, "epoch": 2.1139227324794336, "grad_norm": 0.007374211680144072, "learning_rate": 1.2827621317659037e-05, "step": 20300 }, { "embedding_loss": 0.0037, "epoch": 2.1191294387170676, "grad_norm": 0.012251504696905613, "learning_rate": 1.2804480133293222e-05, "step": 20350 }, { "embedding_loss": 0.0011, "epoch": 2.1243361449547016, "grad_norm": 0.005697314627468586, "learning_rate": 1.2781338948927407e-05, "step": 20400 }, { "embedding_loss": 0.0016, "epoch": 2.1295428511923356, "grad_norm": 0.002244447823613882, "learning_rate": 1.275819776456159e-05, "step": 20450 }, { "embedding_loss": 0.0013, "epoch": 2.13474955742997, "grad_norm": 0.015698591247200966, "learning_rate": 1.2735056580195775e-05, "step": 20500 }, { "embedding_loss": 0.0013, "epoch": 2.139956263667604, "grad_norm": 0.01356748677790165, "learning_rate": 1.2711915395829959e-05, "step": 20550 }, { "embedding_loss": 0.0013, "epoch": 2.145162969905238, "grad_norm": 0.008849513716995716, "learning_rate": 1.2688774211464144e-05, "step": 20600 }, { "embedding_loss": 0.0037, "epoch": 2.150369676142872, "grad_norm": 0.6928774118423462, "learning_rate": 1.2665633027098327e-05, "step": 20650 }, { "embedding_loss": 0.0039, "epoch": 2.155576382380506, "grad_norm": 0.03237714618444443, "learning_rate": 1.2642491842732512e-05, "step": 20700 }, { "embedding_loss": 0.0037, "epoch": 2.1607830886181403, "grad_norm": 0.0030646566301584244, "learning_rate": 1.2619350658366695e-05, "step": 20750 }, { "embedding_loss": 0.0018, "epoch": 2.1659897948557743, "grad_norm": 0.011956333182752132, "learning_rate": 1.259620947400088e-05, "step": 20800 }, { "embedding_loss": 0.0013, "epoch": 2.1711965010934082, "grad_norm": 0.007671385072171688, "learning_rate": 1.2573068289635063e-05, "step": 20850 }, { "embedding_loss": 0.0017, "epoch": 2.176403207331042, "grad_norm": 0.010090204887092113, "learning_rate": 1.2549927105269248e-05, "step": 20900 }, { "embedding_loss": 0.0012, "epoch": 2.1816099135686766, "grad_norm": 0.010057215578854084, "learning_rate": 1.2526785920903433e-05, "step": 20950 }, { "embedding_loss": 0.0018, "epoch": 2.1868166198063106, "grad_norm": 0.009716392494738102, "learning_rate": 1.2503644736537617e-05, "step": 21000 }, { "embedding_loss": 0.0019, "epoch": 2.1920233260439446, "grad_norm": 0.003773706266656518, "learning_rate": 1.2480503552171802e-05, "step": 21050 }, { "embedding_loss": 0.002, "epoch": 2.1972300322815785, "grad_norm": 0.004189903382211924, "learning_rate": 1.2457362367805985e-05, "step": 21100 }, { "embedding_loss": 0.0009, "epoch": 2.2024367385192125, "grad_norm": 0.005080494098365307, "learning_rate": 1.243422118344017e-05, "step": 21150 }, { "embedding_loss": 0.0008, "epoch": 2.207643444756847, "grad_norm": 0.0064069656655192375, "learning_rate": 1.2411079999074353e-05, "step": 21200 }, { "embedding_loss": 0.0011, "epoch": 2.212850150994481, "grad_norm": 0.0846613198518753, "learning_rate": 1.2387938814708538e-05, "step": 21250 }, { "embedding_loss": 0.0015, "epoch": 2.218056857232115, "grad_norm": 0.004274032544344664, "learning_rate": 1.2364797630342721e-05, "step": 21300 }, { "embedding_loss": 0.0007, "epoch": 2.223263563469749, "grad_norm": 0.006647061090916395, "learning_rate": 1.2341656445976906e-05, "step": 21350 }, { "embedding_loss": 0.0012, "epoch": 2.2284702697073833, "grad_norm": 0.1389550268650055, "learning_rate": 1.2318515261611091e-05, "step": 21400 }, { "embedding_loss": 0.0015, "epoch": 2.2336769759450172, "grad_norm": 0.00890056136995554, "learning_rate": 1.2295374077245274e-05, "step": 21450 }, { "embedding_loss": 0.0017, "epoch": 2.238883682182651, "grad_norm": 0.022632068023085594, "learning_rate": 1.227223289287946e-05, "step": 21500 }, { "embedding_loss": 0.0008, "epoch": 2.244090388420285, "grad_norm": 0.007279681041836739, "learning_rate": 1.2249091708513643e-05, "step": 21550 }, { "embedding_loss": 0.0008, "epoch": 2.2492970946579196, "grad_norm": 0.010247277095913887, "learning_rate": 1.2225950524147828e-05, "step": 21600 }, { "embedding_loss": 0.0025, "epoch": 2.2545038008955536, "grad_norm": 0.004706698004156351, "learning_rate": 1.2202809339782011e-05, "step": 21650 }, { "embedding_loss": 0.0022, "epoch": 2.2597105071331876, "grad_norm": 0.031804159283638, "learning_rate": 1.2179668155416196e-05, "step": 21700 }, { "embedding_loss": 0.001, "epoch": 2.2649172133708215, "grad_norm": 0.021003112196922302, "learning_rate": 1.2156526971050379e-05, "step": 21750 }, { "embedding_loss": 0.0017, "epoch": 2.2701239196084555, "grad_norm": 0.003928271122276783, "learning_rate": 1.2133385786684564e-05, "step": 21800 }, { "embedding_loss": 0.0038, "epoch": 2.27533062584609, "grad_norm": 0.9323834180831909, "learning_rate": 1.2110244602318747e-05, "step": 21850 }, { "embedding_loss": 0.0011, "epoch": 2.280537332083724, "grad_norm": 0.010309775359928608, "learning_rate": 1.2087103417952932e-05, "step": 21900 }, { "embedding_loss": 0.0003, "epoch": 2.285744038321358, "grad_norm": 0.008217355236411095, "learning_rate": 1.2063962233587117e-05, "step": 21950 }, { "embedding_loss": 0.0016, "epoch": 2.290950744558992, "grad_norm": 0.013672198168933392, "learning_rate": 1.20408210492213e-05, "step": 22000 }, { "embedding_loss": 0.0022, "epoch": 2.296157450796626, "grad_norm": 0.1977008581161499, "learning_rate": 1.2017679864855485e-05, "step": 22050 }, { "embedding_loss": 0.0011, "epoch": 2.3013641570342602, "grad_norm": 0.006241293158382177, "learning_rate": 1.1994538680489669e-05, "step": 22100 }, { "embedding_loss": 0.0011, "epoch": 2.306570863271894, "grad_norm": 0.0057389759458601475, "learning_rate": 1.1971397496123854e-05, "step": 22150 }, { "embedding_loss": 0.0011, "epoch": 2.311777569509528, "grad_norm": 0.006034619640558958, "learning_rate": 1.1948256311758037e-05, "step": 22200 }, { "embedding_loss": 0.0021, "epoch": 2.3169842757471626, "grad_norm": 0.3582187592983246, "learning_rate": 1.1925115127392222e-05, "step": 22250 }, { "embedding_loss": 0.0018, "epoch": 2.3221909819847966, "grad_norm": 0.003342969575896859, "learning_rate": 1.1901973943026405e-05, "step": 22300 }, { "embedding_loss": 0.0021, "epoch": 2.3273976882224305, "grad_norm": 0.017463702708482742, "learning_rate": 1.187883275866059e-05, "step": 22350 }, { "embedding_loss": 0.0006, "epoch": 2.3326043944600645, "grad_norm": 0.005371089559048414, "learning_rate": 1.1855691574294775e-05, "step": 22400 }, { "embedding_loss": 0.004, "epoch": 2.3378111006976985, "grad_norm": 0.005444334354251623, "learning_rate": 1.1832550389928958e-05, "step": 22450 }, { "embedding_loss": 0.001, "epoch": 2.3430178069353325, "grad_norm": 0.00549267278984189, "learning_rate": 1.1809409205563143e-05, "step": 22500 }, { "embedding_loss": 0.002, "epoch": 2.348224513172967, "grad_norm": 0.009904368780553341, "learning_rate": 1.1786268021197327e-05, "step": 22550 }, { "embedding_loss": 0.0021, "epoch": 2.353431219410601, "grad_norm": 0.004460447933524847, "learning_rate": 1.1763126836831512e-05, "step": 22600 }, { "embedding_loss": 0.0017, "epoch": 2.358637925648235, "grad_norm": 0.012372348457574844, "learning_rate": 1.1739985652465695e-05, "step": 22650 }, { "embedding_loss": 0.0015, "epoch": 2.3638446318858692, "grad_norm": 0.007495572324842215, "learning_rate": 1.171684446809988e-05, "step": 22700 }, { "embedding_loss": 0.0011, "epoch": 2.369051338123503, "grad_norm": 0.014190604910254478, "learning_rate": 1.1693703283734061e-05, "step": 22750 }, { "embedding_loss": 0.0008, "epoch": 2.374258044361137, "grad_norm": 0.4924188256263733, "learning_rate": 1.1670562099368246e-05, "step": 22800 }, { "embedding_loss": 0.001, "epoch": 2.379464750598771, "grad_norm": 0.013879277743399143, "learning_rate": 1.164742091500243e-05, "step": 22850 }, { "embedding_loss": 0.0004, "epoch": 2.384671456836405, "grad_norm": 0.02071734145283699, "learning_rate": 1.1624279730636614e-05, "step": 22900 }, { "embedding_loss": 0.0005, "epoch": 2.3898781630740396, "grad_norm": 0.004272214137017727, "learning_rate": 1.1601138546270798e-05, "step": 22950 }, { "embedding_loss": 0.0018, "epoch": 2.3950848693116735, "grad_norm": 0.002411644207313657, "learning_rate": 1.1577997361904983e-05, "step": 23000 }, { "embedding_loss": 0.001, "epoch": 2.4002915755493075, "grad_norm": 0.019449541345238686, "learning_rate": 1.1554856177539166e-05, "step": 23050 }, { "embedding_loss": 0.0009, "epoch": 2.4054982817869415, "grad_norm": 0.02527959644794464, "learning_rate": 1.1531714993173351e-05, "step": 23100 }, { "embedding_loss": 0.0024, "epoch": 2.410704988024576, "grad_norm": 0.16354507207870483, "learning_rate": 1.1508573808807534e-05, "step": 23150 }, { "embedding_loss": 0.0014, "epoch": 2.41591169426221, "grad_norm": 0.017682882025837898, "learning_rate": 1.148543262444172e-05, "step": 23200 }, { "embedding_loss": 0.0013, "epoch": 2.421118400499844, "grad_norm": 0.0050527737475931644, "learning_rate": 1.1462291440075904e-05, "step": 23250 }, { "embedding_loss": 0.0013, "epoch": 2.426325106737478, "grad_norm": 0.0023584417067468166, "learning_rate": 1.1439150255710087e-05, "step": 23300 }, { "embedding_loss": 0.0003, "epoch": 2.431531812975112, "grad_norm": 0.3315781354904175, "learning_rate": 1.1416009071344272e-05, "step": 23350 }, { "embedding_loss": 0.0003, "epoch": 2.436738519212746, "grad_norm": 0.003767622634768486, "learning_rate": 1.1392867886978456e-05, "step": 23400 }, { "embedding_loss": 0.0013, "epoch": 2.44194522545038, "grad_norm": 0.06164936348795891, "learning_rate": 1.136972670261264e-05, "step": 23450 }, { "embedding_loss": 0.0021, "epoch": 2.447151931688014, "grad_norm": 0.014987274073064327, "learning_rate": 1.1346585518246824e-05, "step": 23500 }, { "embedding_loss": 0.0025, "epoch": 2.452358637925648, "grad_norm": 0.014723850414156914, "learning_rate": 1.1323444333881009e-05, "step": 23550 }, { "embedding_loss": 0.0018, "epoch": 2.4575653441632825, "grad_norm": 0.007753140293061733, "learning_rate": 1.1300303149515192e-05, "step": 23600 }, { "embedding_loss": 0.0021, "epoch": 2.4627720504009165, "grad_norm": 0.11420779675245285, "learning_rate": 1.1277161965149377e-05, "step": 23650 }, { "embedding_loss": 0.0009, "epoch": 2.4679787566385505, "grad_norm": 0.0015545577043667436, "learning_rate": 1.125402078078356e-05, "step": 23700 }, { "embedding_loss": 0.0015, "epoch": 2.4731854628761845, "grad_norm": 0.002739744959399104, "learning_rate": 1.1230879596417745e-05, "step": 23750 }, { "embedding_loss": 0.0012, "epoch": 2.4783921691138184, "grad_norm": 0.14792239665985107, "learning_rate": 1.120773841205193e-05, "step": 23800 }, { "embedding_loss": 0.0009, "epoch": 2.483598875351453, "grad_norm": 0.016194604337215424, "learning_rate": 1.1184597227686113e-05, "step": 23850 }, { "embedding_loss": 0.0011, "epoch": 2.488805581589087, "grad_norm": 0.0649636909365654, "learning_rate": 1.1161456043320298e-05, "step": 23900 }, { "embedding_loss": 0.0006, "epoch": 2.494012287826721, "grad_norm": 0.005290072411298752, "learning_rate": 1.1138314858954482e-05, "step": 23950 }, { "embedding_loss": 0.0005, "epoch": 2.4992189940643548, "grad_norm": 0.010143323801457882, "learning_rate": 1.1115173674588667e-05, "step": 24000 }, { "embedding_loss": 0.0016, "epoch": 2.504425700301989, "grad_norm": 0.00270524388179183, "learning_rate": 1.109203249022285e-05, "step": 24050 }, { "embedding_loss": 0.0021, "epoch": 2.509632406539623, "grad_norm": 0.0045821997337043285, "learning_rate": 1.1068891305857035e-05, "step": 24100 }, { "embedding_loss": 0.0022, "epoch": 2.514839112777257, "grad_norm": 0.003760270308703184, "learning_rate": 1.1045750121491218e-05, "step": 24150 }, { "embedding_loss": 0.0037, "epoch": 2.520045819014891, "grad_norm": 0.08812420815229416, "learning_rate": 1.1022608937125403e-05, "step": 24200 }, { "embedding_loss": 0.0018, "epoch": 2.525252525252525, "grad_norm": 0.27958598732948303, "learning_rate": 1.0999467752759588e-05, "step": 24250 }, { "embedding_loss": 0.0014, "epoch": 2.530459231490159, "grad_norm": 0.004292377736419439, "learning_rate": 1.0976326568393771e-05, "step": 24300 }, { "embedding_loss": 0.001, "epoch": 2.5356659377277935, "grad_norm": 0.21659308671951294, "learning_rate": 1.0953185384027956e-05, "step": 24350 }, { "embedding_loss": 0.0009, "epoch": 2.5408726439654274, "grad_norm": 0.005312615539878607, "learning_rate": 1.093004419966214e-05, "step": 24400 }, { "embedding_loss": 0.0003, "epoch": 2.5460793502030614, "grad_norm": 0.003285923507064581, "learning_rate": 1.0906903015296324e-05, "step": 24450 }, { "embedding_loss": 0.0008, "epoch": 2.551286056440696, "grad_norm": 0.00929224118590355, "learning_rate": 1.0883761830930508e-05, "step": 24500 }, { "embedding_loss": 0.0008, "epoch": 2.55649276267833, "grad_norm": 0.003572756890207529, "learning_rate": 1.0860620646564693e-05, "step": 24550 }, { "embedding_loss": 0.0019, "epoch": 2.561699468915964, "grad_norm": 0.002889364492148161, "learning_rate": 1.0837479462198876e-05, "step": 24600 }, { "embedding_loss": 0.002, "epoch": 2.5669061751535978, "grad_norm": 0.02578425034880638, "learning_rate": 1.0814338277833061e-05, "step": 24650 }, { "embedding_loss": 0.0011, "epoch": 2.5721128813912317, "grad_norm": 0.010893690399825573, "learning_rate": 1.0791197093467244e-05, "step": 24700 }, { "embedding_loss": 0.0006, "epoch": 2.5773195876288657, "grad_norm": 0.005054382607340813, "learning_rate": 1.0768055909101429e-05, "step": 24750 }, { "embedding_loss": 0.0017, "epoch": 2.5825262938665, "grad_norm": 0.09210974723100662, "learning_rate": 1.0744914724735614e-05, "step": 24800 }, { "embedding_loss": 0.0013, "epoch": 2.587733000104134, "grad_norm": 0.007207928225398064, "learning_rate": 1.0721773540369797e-05, "step": 24850 }, { "embedding_loss": 0.0018, "epoch": 2.592939706341768, "grad_norm": 0.005150509066879749, "learning_rate": 1.0698632356003982e-05, "step": 24900 }, { "embedding_loss": 0.0021, "epoch": 2.5981464125794025, "grad_norm": 0.007632564753293991, "learning_rate": 1.0675491171638166e-05, "step": 24950 }, { "embedding_loss": 0.0018, "epoch": 2.6033531188170365, "grad_norm": 0.004092794377356768, "learning_rate": 1.065234998727235e-05, "step": 25000 }, { "embedding_loss": 0.0027, "epoch": 2.6085598250546704, "grad_norm": 0.013759996742010117, "learning_rate": 1.0629208802906534e-05, "step": 25050 }, { "embedding_loss": 0.0003, "epoch": 2.6137665312923044, "grad_norm": 0.004917910788208246, "learning_rate": 1.0606067618540719e-05, "step": 25100 }, { "embedding_loss": 0.002, "epoch": 2.6189732375299384, "grad_norm": 0.01083595585078001, "learning_rate": 1.0582926434174902e-05, "step": 25150 }, { "embedding_loss": 0.0017, "epoch": 2.624179943767573, "grad_norm": 0.3743145167827606, "learning_rate": 1.0559785249809087e-05, "step": 25200 }, { "embedding_loss": 0.0008, "epoch": 2.6293866500052068, "grad_norm": 0.003947914578020573, "learning_rate": 1.0536644065443272e-05, "step": 25250 }, { "embedding_loss": 0.0005, "epoch": 2.6345933562428407, "grad_norm": 0.0035067517310380936, "learning_rate": 1.0513502881077455e-05, "step": 25300 }, { "embedding_loss": 0.0022, "epoch": 2.6398000624804747, "grad_norm": 0.021643230691552162, "learning_rate": 1.049036169671164e-05, "step": 25350 }, { "embedding_loss": 0.0006, "epoch": 2.645006768718109, "grad_norm": 0.016041336581110954, "learning_rate": 1.0467220512345823e-05, "step": 25400 }, { "embedding_loss": 0.0017, "epoch": 2.650213474955743, "grad_norm": 0.0026006808038800955, "learning_rate": 1.0444079327980008e-05, "step": 25450 }, { "embedding_loss": 0.0011, "epoch": 2.655420181193377, "grad_norm": 0.006043120287358761, "learning_rate": 1.0420938143614192e-05, "step": 25500 }, { "embedding_loss": 0.0009, "epoch": 2.660626887431011, "grad_norm": 0.007799636106938124, "learning_rate": 1.0397796959248377e-05, "step": 25550 }, { "embedding_loss": 0.001, "epoch": 2.665833593668645, "grad_norm": 0.0032333596609532833, "learning_rate": 1.0374655774882558e-05, "step": 25600 }, { "embedding_loss": 0.0022, "epoch": 2.6710402999062794, "grad_norm": 0.006502605974674225, "learning_rate": 1.0351514590516743e-05, "step": 25650 }, { "embedding_loss": 0.001, "epoch": 2.6762470061439134, "grad_norm": 0.002495839726179838, "learning_rate": 1.0328373406150926e-05, "step": 25700 }, { "embedding_loss": 0.0009, "epoch": 2.6814537123815474, "grad_norm": 0.011339404620230198, "learning_rate": 1.0305232221785111e-05, "step": 25750 }, { "embedding_loss": 0.0026, "epoch": 2.6866604186191814, "grad_norm": 0.0049376110546290874, "learning_rate": 1.0282091037419295e-05, "step": 25800 }, { "embedding_loss": 0.0021, "epoch": 2.691867124856816, "grad_norm": 0.003491663606837392, "learning_rate": 1.025894985305348e-05, "step": 25850 }, { "embedding_loss": 0.001, "epoch": 2.6970738310944498, "grad_norm": 0.06203962489962578, "learning_rate": 1.0235808668687663e-05, "step": 25900 }, { "embedding_loss": 0.0014, "epoch": 2.7022805373320837, "grad_norm": 0.0038036692421883345, "learning_rate": 1.0212667484321848e-05, "step": 25950 }, { "embedding_loss": 0.0017, "epoch": 2.7074872435697177, "grad_norm": 0.013710272498428822, "learning_rate": 1.0189526299956031e-05, "step": 26000 }, { "embedding_loss": 0.0016, "epoch": 2.7126939498073517, "grad_norm": 0.003352423897013068, "learning_rate": 1.0166385115590216e-05, "step": 26050 }, { "embedding_loss": 0.0003, "epoch": 2.717900656044986, "grad_norm": 0.004212658852338791, "learning_rate": 1.0143243931224401e-05, "step": 26100 }, { "embedding_loss": 0.0014, "epoch": 2.72310736228262, "grad_norm": 0.05683301389217377, "learning_rate": 1.0120102746858584e-05, "step": 26150 }, { "embedding_loss": 0.0004, "epoch": 2.728314068520254, "grad_norm": 0.008711031638085842, "learning_rate": 1.009696156249277e-05, "step": 26200 }, { "embedding_loss": 0.0019, "epoch": 2.733520774757888, "grad_norm": 0.007152698002755642, "learning_rate": 1.0073820378126952e-05, "step": 26250 }, { "embedding_loss": 0.0011, "epoch": 2.7387274809955224, "grad_norm": 0.003356023458763957, "learning_rate": 1.0050679193761137e-05, "step": 26300 }, { "embedding_loss": 0.0012, "epoch": 2.7439341872331564, "grad_norm": 0.016632454469799995, "learning_rate": 1.002753800939532e-05, "step": 26350 }, { "embedding_loss": 0.0015, "epoch": 2.7491408934707904, "grad_norm": 0.5349009037017822, "learning_rate": 1.0004396825029506e-05, "step": 26400 }, { "embedding_loss": 0.0009, "epoch": 2.7543475997084244, "grad_norm": 0.004687939304858446, "learning_rate": 9.981255640663689e-06, "step": 26450 }, { "embedding_loss": 0.0002, "epoch": 2.7595543059460583, "grad_norm": 0.0024527718778699636, "learning_rate": 9.958114456297874e-06, "step": 26500 }, { "embedding_loss": 0.0018, "epoch": 2.7647610121836927, "grad_norm": 0.08971556276082993, "learning_rate": 9.934973271932057e-06, "step": 26550 }, { "embedding_loss": 0.001, "epoch": 2.7699677184213267, "grad_norm": 0.00650964817032218, "learning_rate": 9.911832087566242e-06, "step": 26600 }, { "embedding_loss": 0.0019, "epoch": 2.7751744246589607, "grad_norm": 0.0036748195998370647, "learning_rate": 9.888690903200427e-06, "step": 26650 }, { "embedding_loss": 0.0007, "epoch": 2.7803811308965947, "grad_norm": 0.003135056234896183, "learning_rate": 9.86554971883461e-06, "step": 26700 }, { "embedding_loss": 0.0008, "epoch": 2.785587837134229, "grad_norm": 0.0061689745634794235, "learning_rate": 9.842408534468795e-06, "step": 26750 }, { "embedding_loss": 0.0015, "epoch": 2.790794543371863, "grad_norm": 0.0035342394839972258, "learning_rate": 9.819267350102979e-06, "step": 26800 }, { "embedding_loss": 0.0023, "epoch": 2.796001249609497, "grad_norm": 0.08970965445041656, "learning_rate": 9.796126165737164e-06, "step": 26850 }, { "embedding_loss": 0.0005, "epoch": 2.801207955847131, "grad_norm": 0.002801700960844755, "learning_rate": 9.772984981371347e-06, "step": 26900 }, { "embedding_loss": 0.0014, "epoch": 2.806414662084765, "grad_norm": 0.06024768948554993, "learning_rate": 9.749843797005532e-06, "step": 26950 }, { "embedding_loss": 0.0014, "epoch": 2.8116213683223994, "grad_norm": 0.003583586309105158, "learning_rate": 9.726702612639715e-06, "step": 27000 }, { "embedding_loss": 0.0008, "epoch": 2.8168280745600334, "grad_norm": 0.022347550839185715, "learning_rate": 9.7035614282739e-06, "step": 27050 }, { "embedding_loss": 0.0016, "epoch": 2.8220347807976673, "grad_norm": 0.0020712248515337706, "learning_rate": 9.680420243908085e-06, "step": 27100 }, { "embedding_loss": 0.0024, "epoch": 2.8272414870353013, "grad_norm": 0.00296349311247468, "learning_rate": 9.657279059542268e-06, "step": 27150 }, { "embedding_loss": 0.001, "epoch": 2.8324481932729357, "grad_norm": 0.004647277761250734, "learning_rate": 9.634137875176453e-06, "step": 27200 }, { "embedding_loss": 0.0024, "epoch": 2.8376548995105697, "grad_norm": 0.09574100375175476, "learning_rate": 9.610996690810636e-06, "step": 27250 }, { "embedding_loss": 0.0005, "epoch": 2.8428616057482037, "grad_norm": 0.002023301785811782, "learning_rate": 9.587855506444821e-06, "step": 27300 }, { "embedding_loss": 0.0001, "epoch": 2.8480683119858377, "grad_norm": 0.0016001787735149264, "learning_rate": 9.564714322079005e-06, "step": 27350 }, { "embedding_loss": 0.0021, "epoch": 2.8532750182234716, "grad_norm": 0.02564910613000393, "learning_rate": 9.54157313771319e-06, "step": 27400 }, { "embedding_loss": 0.0016, "epoch": 2.858481724461106, "grad_norm": 0.004294661805033684, "learning_rate": 9.518431953347373e-06, "step": 27450 }, { "embedding_loss": 0.0011, "epoch": 2.86368843069874, "grad_norm": 0.02338520810008049, "learning_rate": 9.495290768981558e-06, "step": 27500 }, { "embedding_loss": 0.0018, "epoch": 2.868895136936374, "grad_norm": 0.040758974850177765, "learning_rate": 9.472149584615741e-06, "step": 27550 }, { "embedding_loss": 0.0008, "epoch": 2.874101843174008, "grad_norm": 0.008010084740817547, "learning_rate": 9.449008400249926e-06, "step": 27600 }, { "embedding_loss": 0.0023, "epoch": 2.8793085494116424, "grad_norm": 0.007018416654318571, "learning_rate": 9.425867215884111e-06, "step": 27650 }, { "embedding_loss": 0.0009, "epoch": 2.8845152556492764, "grad_norm": 0.010360274463891983, "learning_rate": 9.402726031518293e-06, "step": 27700 }, { "embedding_loss": 0.0002, "epoch": 2.8897219618869103, "grad_norm": 0.007200753781944513, "learning_rate": 9.379584847152478e-06, "step": 27750 }, { "embedding_loss": 0.0013, "epoch": 2.8949286681245443, "grad_norm": 0.0022850781679153442, "learning_rate": 9.356443662786662e-06, "step": 27800 }, { "embedding_loss": 0.0021, "epoch": 2.9001353743621783, "grad_norm": 0.006049764808267355, "learning_rate": 9.333302478420846e-06, "step": 27850 }, { "embedding_loss": 0.0002, "epoch": 2.9053420805998127, "grad_norm": 0.005760509520769119, "learning_rate": 9.31016129405503e-06, "step": 27900 }, { "embedding_loss": 0.0015, "epoch": 2.9105487868374467, "grad_norm": 0.23274853825569153, "learning_rate": 9.287020109689214e-06, "step": 27950 }, { "embedding_loss": 0.0015, "epoch": 2.9157554930750806, "grad_norm": 0.0031046303920447826, "learning_rate": 9.263878925323399e-06, "step": 28000 }, { "embedding_loss": 0.0003, "epoch": 2.9209621993127146, "grad_norm": 0.007741307374089956, "learning_rate": 9.240737740957582e-06, "step": 28050 }, { "embedding_loss": 0.0011, "epoch": 2.926168905550349, "grad_norm": 0.01169963926076889, "learning_rate": 9.217596556591767e-06, "step": 28100 }, { "embedding_loss": 0.0002, "epoch": 2.931375611787983, "grad_norm": 0.01061971951276064, "learning_rate": 9.19445537222595e-06, "step": 28150 }, { "embedding_loss": 0.002, "epoch": 2.936582318025617, "grad_norm": 0.0040078721940517426, "learning_rate": 9.171314187860135e-06, "step": 28200 }, { "embedding_loss": 0.001, "epoch": 2.941789024263251, "grad_norm": 0.00869227759540081, "learning_rate": 9.14817300349432e-06, "step": 28250 }, { "embedding_loss": 0.0026, "epoch": 2.946995730500885, "grad_norm": 0.03226366266608238, "learning_rate": 9.125031819128504e-06, "step": 28300 }, { "embedding_loss": 0.0015, "epoch": 2.9522024367385193, "grad_norm": 0.007837221957743168, "learning_rate": 9.101890634762689e-06, "step": 28350 }, { "embedding_loss": 0.0004, "epoch": 2.9574091429761533, "grad_norm": 0.016281556338071823, "learning_rate": 9.078749450396872e-06, "step": 28400 }, { "embedding_loss": 0.001, "epoch": 2.9626158492137873, "grad_norm": 0.006102351471781731, "learning_rate": 9.055608266031057e-06, "step": 28450 }, { "embedding_loss": 0.0023, "epoch": 2.9678225554514217, "grad_norm": 0.004030589014291763, "learning_rate": 9.03246708166524e-06, "step": 28500 }, { "embedding_loss": 0.0013, "epoch": 2.9730292616890557, "grad_norm": 0.08026989549398422, "learning_rate": 9.009325897299425e-06, "step": 28550 }, { "embedding_loss": 0.0002, "epoch": 2.9782359679266897, "grad_norm": 0.004586766008287668, "learning_rate": 8.986184712933608e-06, "step": 28600 }, { "embedding_loss": 0.0005, "epoch": 2.9834426741643236, "grad_norm": 0.04759465157985687, "learning_rate": 8.963043528567793e-06, "step": 28650 }, { "embedding_loss": 0.0006, "epoch": 2.9886493804019576, "grad_norm": 0.002582300454378128, "learning_rate": 8.939902344201976e-06, "step": 28700 }, { "embedding_loss": 0.0028, "epoch": 2.9938560866395916, "grad_norm": 0.008660154417157173, "learning_rate": 8.916761159836161e-06, "step": 28750 }, { "embedding_loss": 0.0016, "epoch": 2.999062792877226, "grad_norm": 0.003941241651773453, "learning_rate": 8.893619975470346e-06, "step": 28800 }, { "embedding_loss": 0.0017, "epoch": 3.00426949911486, "grad_norm": 0.00856933556497097, "learning_rate": 8.87047879110453e-06, "step": 28850 }, { "embedding_loss": 0.0003, "epoch": 3.009476205352494, "grad_norm": 0.009460404515266418, "learning_rate": 8.847337606738715e-06, "step": 28900 }, { "embedding_loss": 0.0014, "epoch": 3.014682911590128, "grad_norm": 0.006439635530114174, "learning_rate": 8.824196422372898e-06, "step": 28950 }, { "embedding_loss": 0.0014, "epoch": 3.0198896178277623, "grad_norm": 0.020535370334982872, "learning_rate": 8.801055238007083e-06, "step": 29000 }, { "embedding_loss": 0.0021, "epoch": 3.0250963240653963, "grad_norm": 0.027857592329382896, "learning_rate": 8.777914053641266e-06, "step": 29050 }, { "embedding_loss": 0.0007, "epoch": 3.0303030303030303, "grad_norm": 0.009412121027708054, "learning_rate": 8.754772869275451e-06, "step": 29100 }, { "embedding_loss": 0.0007, "epoch": 3.0355097365406642, "grad_norm": 0.05827178806066513, "learning_rate": 8.731631684909634e-06, "step": 29150 }, { "embedding_loss": 0.0003, "epoch": 3.0407164427782982, "grad_norm": 0.002373203868046403, "learning_rate": 8.708490500543818e-06, "step": 29200 }, { "embedding_loss": 0.0013, "epoch": 3.0459231490159326, "grad_norm": 0.006206809543073177, "learning_rate": 8.685349316178003e-06, "step": 29250 }, { "embedding_loss": 0.0015, "epoch": 3.0511298552535666, "grad_norm": 0.00912196934223175, "learning_rate": 8.662208131812186e-06, "step": 29300 }, { "embedding_loss": 0.0005, "epoch": 3.0563365614912006, "grad_norm": 0.00465911440551281, "learning_rate": 8.63906694744637e-06, "step": 29350 }, { "embedding_loss": 0.002, "epoch": 3.0615432677288346, "grad_norm": 0.18286481499671936, "learning_rate": 8.615925763080554e-06, "step": 29400 }, { "embedding_loss": 0.0006, "epoch": 3.066749973966469, "grad_norm": 0.004343831911683083, "learning_rate": 8.592784578714739e-06, "step": 29450 }, { "embedding_loss": 0.0013, "epoch": 3.071956680204103, "grad_norm": 0.05209621787071228, "learning_rate": 8.569643394348924e-06, "step": 29500 }, { "embedding_loss": 0.0018, "epoch": 3.077163386441737, "grad_norm": 0.05028437450528145, "learning_rate": 8.546502209983107e-06, "step": 29550 }, { "embedding_loss": 0.0002, "epoch": 3.082370092679371, "grad_norm": 0.0026865217369049788, "learning_rate": 8.523361025617292e-06, "step": 29600 }, { "embedding_loss": 0.0012, "epoch": 3.0875767989170053, "grad_norm": 0.4508988559246063, "learning_rate": 8.500219841251475e-06, "step": 29650 }, { "embedding_loss": 0.0007, "epoch": 3.0927835051546393, "grad_norm": 0.02336781658232212, "learning_rate": 8.47707865688566e-06, "step": 29700 }, { "embedding_loss": 0.0015, "epoch": 3.0979902113922733, "grad_norm": 0.0036797483917325735, "learning_rate": 8.453937472519844e-06, "step": 29750 }, { "embedding_loss": 0.0008, "epoch": 3.1031969176299072, "grad_norm": 0.007331520318984985, "learning_rate": 8.430796288154029e-06, "step": 29800 }, { "embedding_loss": 0.0004, "epoch": 3.108403623867541, "grad_norm": 0.006399332545697689, "learning_rate": 8.407655103788212e-06, "step": 29850 }, { "embedding_loss": 0.0032, "epoch": 3.1136103301051756, "grad_norm": 0.09251190721988678, "learning_rate": 8.384513919422397e-06, "step": 29900 }, { "embedding_loss": 0.0015, "epoch": 3.1188170363428096, "grad_norm": 0.005595037247985601, "learning_rate": 8.361372735056582e-06, "step": 29950 }, { "embedding_loss": 0.0014, "epoch": 3.1240237425804436, "grad_norm": 0.0045051900669932365, "learning_rate": 8.338231550690765e-06, "step": 30000 }, { "embedding_loss": 0.0013, "epoch": 3.1292304488180775, "grad_norm": 0.00950851384550333, "learning_rate": 8.31509036632495e-06, "step": 30050 }, { "embedding_loss": 0.0008, "epoch": 3.1344371550557115, "grad_norm": 0.0038670655339956284, "learning_rate": 8.291949181959133e-06, "step": 30100 }, { "embedding_loss": 0.0014, "epoch": 3.139643861293346, "grad_norm": 0.0030759319197386503, "learning_rate": 8.268807997593318e-06, "step": 30150 }, { "embedding_loss": 0.0019, "epoch": 3.14485056753098, "grad_norm": 0.0031105412635952234, "learning_rate": 8.245666813227501e-06, "step": 30200 }, { "embedding_loss": 0.0007, "epoch": 3.150057273768614, "grad_norm": 0.007572552189230919, "learning_rate": 8.222525628861686e-06, "step": 30250 }, { "embedding_loss": 0.0022, "epoch": 3.155263980006248, "grad_norm": 0.0044418093748390675, "learning_rate": 8.19938444449587e-06, "step": 30300 }, { "embedding_loss": 0.0008, "epoch": 3.1604706862438823, "grad_norm": 0.005121790803968906, "learning_rate": 8.176243260130055e-06, "step": 30350 }, { "embedding_loss": 0.0007, "epoch": 3.1656773924815163, "grad_norm": 0.005301581230014563, "learning_rate": 8.153102075764238e-06, "step": 30400 }, { "embedding_loss": 0.0006, "epoch": 3.1708840987191502, "grad_norm": 0.012782045640051365, "learning_rate": 8.129960891398423e-06, "step": 30450 }, { "embedding_loss": 0.0014, "epoch": 3.176090804956784, "grad_norm": 0.003524052444845438, "learning_rate": 8.106819707032608e-06, "step": 30500 }, { "embedding_loss": 0.0006, "epoch": 3.1812975111944186, "grad_norm": 0.04818173870444298, "learning_rate": 8.08367852266679e-06, "step": 30550 }, { "embedding_loss": 0.0008, "epoch": 3.1865042174320526, "grad_norm": 0.014735080301761627, "learning_rate": 8.060537338300974e-06, "step": 30600 }, { "embedding_loss": 0.0014, "epoch": 3.1917109236696866, "grad_norm": 0.004299947526305914, "learning_rate": 8.03739615393516e-06, "step": 30650 }, { "embedding_loss": 0.0007, "epoch": 3.1969176299073205, "grad_norm": 0.00827470701187849, "learning_rate": 8.014254969569343e-06, "step": 30700 }, { "embedding_loss": 0.0017, "epoch": 3.2021243361449545, "grad_norm": 0.005655727814882994, "learning_rate": 7.991113785203528e-06, "step": 30750 }, { "embedding_loss": 0.0016, "epoch": 3.207331042382589, "grad_norm": 0.0024495867546647787, "learning_rate": 7.96797260083771e-06, "step": 30800 }, { "embedding_loss": 0.0017, "epoch": 3.212537748620223, "grad_norm": 0.019548872485756874, "learning_rate": 7.944831416471896e-06, "step": 30850 }, { "embedding_loss": 0.0007, "epoch": 3.217744454857857, "grad_norm": 0.0044856201857328415, "learning_rate": 7.921690232106079e-06, "step": 30900 }, { "embedding_loss": 0.0014, "epoch": 3.222951161095491, "grad_norm": 0.004103007726371288, "learning_rate": 7.898549047740264e-06, "step": 30950 }, { "embedding_loss": 0.0017, "epoch": 3.2281578673331253, "grad_norm": 0.1315273493528366, "learning_rate": 7.875407863374447e-06, "step": 31000 }, { "embedding_loss": 0.0013, "epoch": 3.2333645735707592, "grad_norm": 0.005692615173757076, "learning_rate": 7.852266679008632e-06, "step": 31050 }, { "embedding_loss": 0.0021, "epoch": 3.238571279808393, "grad_norm": 0.002233312465250492, "learning_rate": 7.829125494642816e-06, "step": 31100 }, { "embedding_loss": 0.0012, "epoch": 3.243777986046027, "grad_norm": 0.03157159686088562, "learning_rate": 7.805984310277e-06, "step": 31150 }, { "embedding_loss": 0.002, "epoch": 3.248984692283661, "grad_norm": 0.008771988563239574, "learning_rate": 7.782843125911185e-06, "step": 31200 }, { "embedding_loss": 0.0007, "epoch": 3.2541913985212956, "grad_norm": 0.003804140957072377, "learning_rate": 7.759701941545369e-06, "step": 31250 }, { "embedding_loss": 0.0008, "epoch": 3.2593981047589295, "grad_norm": 0.0689612403512001, "learning_rate": 7.736560757179554e-06, "step": 31300 }, { "embedding_loss": 0.0009, "epoch": 3.2646048109965635, "grad_norm": 0.07443096488714218, "learning_rate": 7.713419572813737e-06, "step": 31350 }, { "embedding_loss": 0.0008, "epoch": 3.2698115172341975, "grad_norm": 0.021511824801564217, "learning_rate": 7.690278388447922e-06, "step": 31400 }, { "embedding_loss": 0.0007, "epoch": 3.275018223471832, "grad_norm": 0.004147614352405071, "learning_rate": 7.667137204082105e-06, "step": 31450 }, { "embedding_loss": 0.0014, "epoch": 3.280224929709466, "grad_norm": 0.027495531365275383, "learning_rate": 7.64399601971629e-06, "step": 31500 }, { "embedding_loss": 0.0006, "epoch": 3.2854316359471, "grad_norm": 0.0021503553725779057, "learning_rate": 7.620854835350474e-06, "step": 31550 }, { "embedding_loss": 0.0002, "epoch": 3.290638342184734, "grad_norm": 0.006075483746826649, "learning_rate": 7.597713650984658e-06, "step": 31600 }, { "embedding_loss": 0.0007, "epoch": 3.2958450484223683, "grad_norm": 0.005264146253466606, "learning_rate": 7.574572466618842e-06, "step": 31650 }, { "embedding_loss": 0.0024, "epoch": 3.3010517546600022, "grad_norm": 0.003356904024258256, "learning_rate": 7.5514312822530265e-06, "step": 31700 }, { "embedding_loss": 0.0003, "epoch": 3.306258460897636, "grad_norm": 0.09297136962413788, "learning_rate": 7.528290097887211e-06, "step": 31750 }, { "embedding_loss": 0.0014, "epoch": 3.31146516713527, "grad_norm": 0.016923336312174797, "learning_rate": 7.505148913521395e-06, "step": 31800 }, { "embedding_loss": 0.0011, "epoch": 3.316671873372904, "grad_norm": 0.09876677393913269, "learning_rate": 7.482007729155579e-06, "step": 31850 }, { "embedding_loss": 0.0024, "epoch": 3.3218785796105386, "grad_norm": 0.01769149675965309, "learning_rate": 7.458866544789764e-06, "step": 31900 }, { "embedding_loss": 0.0007, "epoch": 3.3270852858481725, "grad_norm": 0.0034628412686288357, "learning_rate": 7.435725360423948e-06, "step": 31950 }, { "embedding_loss": 0.0014, "epoch": 3.3322919920858065, "grad_norm": 0.003771421266719699, "learning_rate": 7.41258417605813e-06, "step": 32000 }, { "embedding_loss": 0.0007, "epoch": 3.3374986983234405, "grad_norm": 0.003236924996599555, "learning_rate": 7.389442991692315e-06, "step": 32050 }, { "embedding_loss": 0.0011, "epoch": 3.342705404561075, "grad_norm": 0.0025784943718463182, "learning_rate": 7.366301807326499e-06, "step": 32100 }, { "embedding_loss": 0.0013, "epoch": 3.347912110798709, "grad_norm": 0.00431936327368021, "learning_rate": 7.3431606229606835e-06, "step": 32150 }, { "embedding_loss": 0.0012, "epoch": 3.353118817036343, "grad_norm": 0.006928473711013794, "learning_rate": 7.320019438594868e-06, "step": 32200 }, { "embedding_loss": 0.0009, "epoch": 3.358325523273977, "grad_norm": 0.002246728865429759, "learning_rate": 7.296878254229052e-06, "step": 32250 }, { "embedding_loss": 0.0009, "epoch": 3.363532229511611, "grad_norm": 0.0024311786983162165, "learning_rate": 7.273737069863236e-06, "step": 32300 }, { "embedding_loss": 0.0002, "epoch": 3.368738935749245, "grad_norm": 0.33023688197135925, "learning_rate": 7.25059588549742e-06, "step": 32350 }, { "embedding_loss": 0.0009, "epoch": 3.373945641986879, "grad_norm": 0.0026327494997531176, "learning_rate": 7.227454701131604e-06, "step": 32400 }, { "embedding_loss": 0.0001, "epoch": 3.379152348224513, "grad_norm": 0.0038416869938373566, "learning_rate": 7.204313516765788e-06, "step": 32450 }, { "embedding_loss": 0.0008, "epoch": 3.384359054462147, "grad_norm": 0.004634499549865723, "learning_rate": 7.181172332399972e-06, "step": 32500 }, { "embedding_loss": 0.0009, "epoch": 3.3895657606997815, "grad_norm": 0.04748839512467384, "learning_rate": 7.158031148034157e-06, "step": 32550 }, { "embedding_loss": 0.0007, "epoch": 3.3947724669374155, "grad_norm": 0.00369762210175395, "learning_rate": 7.134889963668341e-06, "step": 32600 }, { "embedding_loss": 0.0033, "epoch": 3.3999791731750495, "grad_norm": 0.003863842226564884, "learning_rate": 7.1117487793025255e-06, "step": 32650 }, { "embedding_loss": 0.0001, "epoch": 3.4051858794126835, "grad_norm": 0.004467652644962072, "learning_rate": 7.08860759493671e-06, "step": 32700 }, { "embedding_loss": 0.0008, "epoch": 3.4103925856503174, "grad_norm": 0.0017069701571017504, "learning_rate": 7.065466410570894e-06, "step": 32750 }, { "embedding_loss": 0.0018, "epoch": 3.415599291887952, "grad_norm": 0.0025950015988200903, "learning_rate": 7.042325226205078e-06, "step": 32800 }, { "embedding_loss": 0.0006, "epoch": 3.420805998125586, "grad_norm": 0.0029007880948483944, "learning_rate": 7.019184041839262e-06, "step": 32850 }, { "embedding_loss": 0.001, "epoch": 3.42601270436322, "grad_norm": 0.003898217109963298, "learning_rate": 6.996042857473446e-06, "step": 32900 }, { "embedding_loss": 0.0008, "epoch": 3.431219410600854, "grad_norm": 0.004065630491822958, "learning_rate": 6.97290167310763e-06, "step": 32950 }, { "embedding_loss": 0.0014, "epoch": 3.436426116838488, "grad_norm": 0.09971676766872406, "learning_rate": 6.949760488741814e-06, "step": 33000 }, { "embedding_loss": 0.0015, "epoch": 3.441632823076122, "grad_norm": 0.004332449287176132, "learning_rate": 6.926619304375999e-06, "step": 33050 }, { "embedding_loss": 0.0002, "epoch": 3.446839529313756, "grad_norm": 0.002061097417026758, "learning_rate": 6.903478120010183e-06, "step": 33100 }, { "embedding_loss": 0.0004, "epoch": 3.45204623555139, "grad_norm": 0.005368870683014393, "learning_rate": 6.8803369356443674e-06, "step": 33150 }, { "embedding_loss": 0.0011, "epoch": 3.457252941789024, "grad_norm": 0.0153218824416399, "learning_rate": 6.8571957512785516e-06, "step": 33200 }, { "embedding_loss": 0.0008, "epoch": 3.4624596480266585, "grad_norm": 0.003146272385492921, "learning_rate": 6.834054566912736e-06, "step": 33250 }, { "embedding_loss": 0.0007, "epoch": 3.4676663542642925, "grad_norm": 0.005474725738167763, "learning_rate": 6.81091338254692e-06, "step": 33300 }, { "embedding_loss": 0.0023, "epoch": 3.4728730605019265, "grad_norm": 0.002225042786449194, "learning_rate": 6.787772198181104e-06, "step": 33350 }, { "embedding_loss": 0.0009, "epoch": 3.4780797667395604, "grad_norm": 0.004484266974031925, "learning_rate": 6.764631013815287e-06, "step": 33400 }, { "embedding_loss": 0.0005, "epoch": 3.483286472977195, "grad_norm": 0.001994067570194602, "learning_rate": 6.741489829449471e-06, "step": 33450 }, { "embedding_loss": 0.0008, "epoch": 3.488493179214829, "grad_norm": 0.002722726669162512, "learning_rate": 6.718348645083655e-06, "step": 33500 }, { "embedding_loss": 0.0003, "epoch": 3.493699885452463, "grad_norm": 0.003505149856209755, "learning_rate": 6.6952074607178395e-06, "step": 33550 }, { "embedding_loss": 0.0001, "epoch": 3.4989065916900968, "grad_norm": 0.002958771074190736, "learning_rate": 6.672066276352024e-06, "step": 33600 }, { "embedding_loss": 0.0008, "epoch": 3.5041132979277307, "grad_norm": 0.007241967599838972, "learning_rate": 6.648925091986208e-06, "step": 33650 }, { "embedding_loss": 0.0009, "epoch": 3.5093200041653647, "grad_norm": 0.0048427823930978775, "learning_rate": 6.625783907620393e-06, "step": 33700 }, { "embedding_loss": 0.0011, "epoch": 3.514526710402999, "grad_norm": 0.004183737561106682, "learning_rate": 6.602642723254577e-06, "step": 33750 }, { "embedding_loss": 0.0002, "epoch": 3.519733416640633, "grad_norm": 0.005011474248021841, "learning_rate": 6.579501538888761e-06, "step": 33800 }, { "embedding_loss": 0.0011, "epoch": 3.524940122878267, "grad_norm": 0.04754041135311127, "learning_rate": 6.556360354522945e-06, "step": 33850 }, { "embedding_loss": 0.0011, "epoch": 3.5301468291159015, "grad_norm": 0.0030108760111033916, "learning_rate": 6.533219170157129e-06, "step": 33900 }, { "embedding_loss": 0.0003, "epoch": 3.5353535353535355, "grad_norm": 0.002894002478569746, "learning_rate": 6.510077985791313e-06, "step": 33950 }, { "embedding_loss": 0.001, "epoch": 3.5405602415911694, "grad_norm": 0.01952524110674858, "learning_rate": 6.486936801425497e-06, "step": 34000 }, { "embedding_loss": 0.0003, "epoch": 3.5457669478288034, "grad_norm": 0.0029994072392582893, "learning_rate": 6.4637956170596815e-06, "step": 34050 }, { "embedding_loss": 0.0007, "epoch": 3.5509736540664374, "grad_norm": 0.006770998239517212, "learning_rate": 6.4406544326938656e-06, "step": 34100 }, { "embedding_loss": 0.0001, "epoch": 3.556180360304072, "grad_norm": 0.0302437637001276, "learning_rate": 6.41751324832805e-06, "step": 34150 }, { "embedding_loss": 0.0001, "epoch": 3.561387066541706, "grad_norm": 0.002121832687407732, "learning_rate": 6.394372063962234e-06, "step": 34200 }, { "embedding_loss": 0.0002, "epoch": 3.5665937727793398, "grad_norm": 0.00341336359269917, "learning_rate": 6.371230879596419e-06, "step": 34250 }, { "embedding_loss": 0.0013, "epoch": 3.5718004790169737, "grad_norm": 0.0027454060036689043, "learning_rate": 6.348089695230603e-06, "step": 34300 }, { "embedding_loss": 0.0008, "epoch": 3.577007185254608, "grad_norm": 0.0077779293060302734, "learning_rate": 6.324948510864787e-06, "step": 34350 }, { "embedding_loss": 0.0028, "epoch": 3.582213891492242, "grad_norm": 0.003297444898635149, "learning_rate": 6.301807326498971e-06, "step": 34400 }, { "embedding_loss": 0.0008, "epoch": 3.587420597729876, "grad_norm": 0.004151192959398031, "learning_rate": 6.278666142133155e-06, "step": 34450 }, { "embedding_loss": 0.0002, "epoch": 3.59262730396751, "grad_norm": 0.011945868842303753, "learning_rate": 6.255524957767339e-06, "step": 34500 }, { "embedding_loss": 0.0012, "epoch": 3.597834010205144, "grad_norm": 0.002266357187181711, "learning_rate": 6.232383773401523e-06, "step": 34550 }, { "embedding_loss": 0.0011, "epoch": 3.6030407164427785, "grad_norm": 0.031817760318517685, "learning_rate": 6.2092425890357075e-06, "step": 34600 }, { "embedding_loss": 0.0027, "epoch": 3.6082474226804124, "grad_norm": 0.114555723965168, "learning_rate": 6.186101404669892e-06, "step": 34650 }, { "embedding_loss": 0.0002, "epoch": 3.6134541289180464, "grad_norm": 0.0052092778496444225, "learning_rate": 6.162960220304076e-06, "step": 34700 }, { "embedding_loss": 0.0011, "epoch": 3.6186608351556804, "grad_norm": 0.013743920251727104, "learning_rate": 6.139819035938261e-06, "step": 34750 }, { "embedding_loss": 0.0015, "epoch": 3.623867541393315, "grad_norm": 0.04450186714529991, "learning_rate": 6.116677851572443e-06, "step": 34800 }, { "embedding_loss": 0.0014, "epoch": 3.6290742476309488, "grad_norm": 0.011497569270431995, "learning_rate": 6.093536667206627e-06, "step": 34850 }, { "embedding_loss": 0.0003, "epoch": 3.6342809538685827, "grad_norm": 0.001604217104613781, "learning_rate": 6.070395482840812e-06, "step": 34900 }, { "embedding_loss": 0.0008, "epoch": 3.6394876601062167, "grad_norm": 0.014813857153058052, "learning_rate": 6.047254298474996e-06, "step": 34950 }, { "embedding_loss": 0.0007, "epoch": 3.6446943663438507, "grad_norm": 0.002726171864196658, "learning_rate": 6.02411311410918e-06, "step": 35000 }, { "embedding_loss": 0.0009, "epoch": 3.649901072581485, "grad_norm": 0.0028911526314914227, "learning_rate": 6.0009719297433645e-06, "step": 35050 }, { "embedding_loss": 0.0008, "epoch": 3.655107778819119, "grad_norm": 0.0009616083116270602, "learning_rate": 5.977830745377549e-06, "step": 35100 }, { "embedding_loss": 0.0007, "epoch": 3.660314485056753, "grad_norm": 0.0013377583818510175, "learning_rate": 5.954689561011733e-06, "step": 35150 }, { "embedding_loss": 0.0005, "epoch": 3.665521191294387, "grad_norm": 0.01589621789753437, "learning_rate": 5.931548376645917e-06, "step": 35200 }, { "embedding_loss": 0.0002, "epoch": 3.6707278975320214, "grad_norm": 0.05081808194518089, "learning_rate": 5.908407192280101e-06, "step": 35250 }, { "embedding_loss": 0.0005, "epoch": 3.6759346037696554, "grad_norm": 0.04854687675833702, "learning_rate": 5.885266007914285e-06, "step": 35300 }, { "embedding_loss": 0.0001, "epoch": 3.6811413100072894, "grad_norm": 0.0028674921486526728, "learning_rate": 5.862124823548469e-06, "step": 35350 }, { "embedding_loss": 0.0004, "epoch": 3.6863480162449234, "grad_norm": 0.006336590740829706, "learning_rate": 5.838983639182654e-06, "step": 35400 }, { "embedding_loss": 0.0008, "epoch": 3.6915547224825573, "grad_norm": 0.002654125215485692, "learning_rate": 5.815842454816838e-06, "step": 35450 }, { "embedding_loss": 0.0008, "epoch": 3.6967614287201918, "grad_norm": 0.001202322542667389, "learning_rate": 5.792701270451022e-06, "step": 35500 }, { "embedding_loss": 0.0017, "epoch": 3.7019681349578257, "grad_norm": 0.0029784284997731447, "learning_rate": 5.7695600860852065e-06, "step": 35550 }, { "embedding_loss": 0.0004, "epoch": 3.7071748411954597, "grad_norm": 0.017423637211322784, "learning_rate": 5.746418901719391e-06, "step": 35600 }, { "embedding_loss": 0.0011, "epoch": 3.7123815474330937, "grad_norm": 0.001634717918932438, "learning_rate": 5.723277717353575e-06, "step": 35650 }, { "embedding_loss": 0.0009, "epoch": 3.717588253670728, "grad_norm": 0.006617635954171419, "learning_rate": 5.700136532987759e-06, "step": 35700 }, { "embedding_loss": 0.0002, "epoch": 3.722794959908362, "grad_norm": 0.0018128909869119525, "learning_rate": 5.676995348621943e-06, "step": 35750 }, { "embedding_loss": 0.0017, "epoch": 3.728001666145996, "grad_norm": 0.0016381378518417478, "learning_rate": 5.653854164256127e-06, "step": 35800 }, { "embedding_loss": 0.0001, "epoch": 3.73320837238363, "grad_norm": 0.005606998223811388, "learning_rate": 5.630712979890311e-06, "step": 35850 }, { "embedding_loss": 0.0009, "epoch": 3.738415078621264, "grad_norm": 0.0032535437494516373, "learning_rate": 5.607571795524496e-06, "step": 35900 }, { "embedding_loss": 0.0002, "epoch": 3.7436217848588984, "grad_norm": 0.008722545579075813, "learning_rate": 5.58443061115868e-06, "step": 35950 }, { "embedding_loss": 0.0001, "epoch": 3.7488284910965324, "grad_norm": 0.023524988442659378, "learning_rate": 5.561289426792864e-06, "step": 36000 }, { "embedding_loss": 0.0007, "epoch": 3.7540351973341664, "grad_norm": 0.006606587208807468, "learning_rate": 5.5381482424270484e-06, "step": 36050 }, { "embedding_loss": 0.0014, "epoch": 3.7592419035718003, "grad_norm": 0.0010090703144669533, "learning_rate": 5.5150070580612326e-06, "step": 36100 }, { "embedding_loss": 0.0005, "epoch": 3.7644486098094347, "grad_norm": 0.00230466783978045, "learning_rate": 5.491865873695417e-06, "step": 36150 }, { "embedding_loss": 0.0001, "epoch": 3.7696553160470687, "grad_norm": 0.004099918529391289, "learning_rate": 5.468724689329601e-06, "step": 36200 }, { "embedding_loss": 0.0002, "epoch": 3.7748620222847027, "grad_norm": 0.007035956718027592, "learning_rate": 5.445583504963784e-06, "step": 36250 }, { "embedding_loss": 0.0012, "epoch": 3.7800687285223367, "grad_norm": 0.010237271897494793, "learning_rate": 5.422442320597968e-06, "step": 36300 }, { "embedding_loss": 0.0011, "epoch": 3.7852754347599706, "grad_norm": 0.0014003911055624485, "learning_rate": 5.399301136232152e-06, "step": 36350 }, { "embedding_loss": 0.0014, "epoch": 3.790482140997605, "grad_norm": 0.0034218619111925364, "learning_rate": 5.376159951866336e-06, "step": 36400 }, { "embedding_loss": 0.0004, "epoch": 3.795688847235239, "grad_norm": 0.0065358299762010574, "learning_rate": 5.3530187675005205e-06, "step": 36450 }, { "embedding_loss": 0.0001, "epoch": 3.800895553472873, "grad_norm": 0.002235516905784607, "learning_rate": 5.329877583134705e-06, "step": 36500 }, { "embedding_loss": 0.0013, "epoch": 3.806102259710507, "grad_norm": 0.0020229006186127663, "learning_rate": 5.3067363987688896e-06, "step": 36550 }, { "embedding_loss": 0.0013, "epoch": 3.8113089659481414, "grad_norm": 0.0053365700878202915, "learning_rate": 5.283595214403074e-06, "step": 36600 }, { "embedding_loss": 0.0001, "epoch": 3.8165156721857754, "grad_norm": 0.011895844712853432, "learning_rate": 5.260454030037258e-06, "step": 36650 }, { "embedding_loss": 0.0008, "epoch": 3.8217223784234093, "grad_norm": 0.0022297389805316925, "learning_rate": 5.237312845671442e-06, "step": 36700 }, { "embedding_loss": 0.0008, "epoch": 3.8269290846610433, "grad_norm": 0.0022312577348202467, "learning_rate": 5.214171661305626e-06, "step": 36750 }, { "embedding_loss": 0.0008, "epoch": 3.8321357908986773, "grad_norm": 0.004529103171080351, "learning_rate": 5.19103047693981e-06, "step": 36800 }, { "embedding_loss": 0.0011, "epoch": 3.8373424971363117, "grad_norm": 0.0026438578497618437, "learning_rate": 5.167889292573994e-06, "step": 36850 }, { "embedding_loss": 0.0012, "epoch": 3.8425492033739457, "grad_norm": 0.005113155115395784, "learning_rate": 5.144748108208178e-06, "step": 36900 }, { "embedding_loss": 0.001, "epoch": 3.8477559096115796, "grad_norm": 0.017756449058651924, "learning_rate": 5.1216069238423624e-06, "step": 36950 }, { "embedding_loss": 0.0007, "epoch": 3.8529626158492136, "grad_norm": 0.02352430485188961, "learning_rate": 5.0984657394765466e-06, "step": 37000 }, { "embedding_loss": 0.0002, "epoch": 3.858169322086848, "grad_norm": 0.003178997430950403, "learning_rate": 5.075324555110731e-06, "step": 37050 }, { "embedding_loss": 0.0018, "epoch": 3.863376028324482, "grad_norm": 0.014370561577379704, "learning_rate": 5.052183370744916e-06, "step": 37100 }, { "embedding_loss": 0.0007, "epoch": 3.868582734562116, "grad_norm": 0.0058501786552369595, "learning_rate": 5.0290421863791e-06, "step": 37150 }, { "embedding_loss": 0.0008, "epoch": 3.87378944079975, "grad_norm": 0.0018966099014505744, "learning_rate": 5.005901002013284e-06, "step": 37200 }, { "embedding_loss": 0.0002, "epoch": 3.878996147037384, "grad_norm": 0.002752570202574134, "learning_rate": 4.982759817647468e-06, "step": 37250 }, { "embedding_loss": 0.0001, "epoch": 3.8842028532750184, "grad_norm": 0.003022131510078907, "learning_rate": 4.959618633281651e-06, "step": 37300 }, { "embedding_loss": 0.0016, "epoch": 3.8894095595126523, "grad_norm": 0.006553678773343563, "learning_rate": 4.936477448915835e-06, "step": 37350 }, { "embedding_loss": 0.0001, "epoch": 3.8946162657502863, "grad_norm": 0.0018002489814534783, "learning_rate": 4.91333626455002e-06, "step": 37400 }, { "embedding_loss": 0.0001, "epoch": 3.8998229719879207, "grad_norm": 0.001831809408031404, "learning_rate": 4.890195080184204e-06, "step": 37450 }, { "embedding_loss": 0.0001, "epoch": 3.9050296782255547, "grad_norm": 0.0025375783443450928, "learning_rate": 4.8670538958183885e-06, "step": 37500 }, { "embedding_loss": 0.0009, "epoch": 3.9102363844631887, "grad_norm": 0.004411675967276096, "learning_rate": 4.843912711452573e-06, "step": 37550 }, { "embedding_loss": 0.0013, "epoch": 3.9154430907008226, "grad_norm": 0.009407658129930496, "learning_rate": 4.820771527086757e-06, "step": 37600 }, { "embedding_loss": 0.0001, "epoch": 3.9206497969384566, "grad_norm": 0.002038530306890607, "learning_rate": 4.797630342720941e-06, "step": 37650 }, { "embedding_loss": 0.0018, "epoch": 3.9258565031760906, "grad_norm": 0.017908206209540367, "learning_rate": 4.774489158355125e-06, "step": 37700 }, { "embedding_loss": 0.0009, "epoch": 3.931063209413725, "grad_norm": 0.0015109736705198884, "learning_rate": 4.751347973989309e-06, "step": 37750 }, { "embedding_loss": 0.0008, "epoch": 3.936269915651359, "grad_norm": 0.0064455061219632626, "learning_rate": 4.728206789623493e-06, "step": 37800 }, { "embedding_loss": 0.0004, "epoch": 3.941476621888993, "grad_norm": 0.0861746221780777, "learning_rate": 4.705065605257677e-06, "step": 37850 }, { "embedding_loss": 0.0006, "epoch": 3.9466833281266274, "grad_norm": 0.0038613975048065186, "learning_rate": 4.681924420891862e-06, "step": 37900 }, { "embedding_loss": 0.0003, "epoch": 3.9518900343642613, "grad_norm": 0.03049100562930107, "learning_rate": 4.658783236526046e-06, "step": 37950 }, { "embedding_loss": 0.0015, "epoch": 3.9570967406018953, "grad_norm": 0.0011294811265543103, "learning_rate": 4.63564205216023e-06, "step": 38000 }, { "embedding_loss": 0.0008, "epoch": 3.9623034468395293, "grad_norm": 0.003215038450434804, "learning_rate": 4.612500867794414e-06, "step": 38050 }, { "embedding_loss": 0.0001, "epoch": 3.9675101530771633, "grad_norm": 0.002345999237149954, "learning_rate": 4.589359683428598e-06, "step": 38100 }, { "embedding_loss": 0.0001, "epoch": 3.9727168593147972, "grad_norm": 0.0017487540608271956, "learning_rate": 4.566218499062782e-06, "step": 38150 }, { "embedding_loss": 0.0001, "epoch": 3.9779235655524317, "grad_norm": 0.002043676795437932, "learning_rate": 4.543077314696966e-06, "step": 38200 }, { "embedding_loss": 0.002, "epoch": 3.9831302717900656, "grad_norm": 0.0033409446477890015, "learning_rate": 4.519936130331151e-06, "step": 38250 }, { "embedding_loss": 0.0002, "epoch": 3.9883369780276996, "grad_norm": 0.1487550139427185, "learning_rate": 4.496794945965335e-06, "step": 38300 }, { "embedding_loss": 0.0001, "epoch": 3.993543684265334, "grad_norm": 0.003084618365392089, "learning_rate": 4.473653761599519e-06, "step": 38350 }, { "embedding_loss": 0.0011, "epoch": 3.998750390502968, "grad_norm": 0.004644028376787901, "learning_rate": 4.450512577233703e-06, "step": 38400 }, { "embedding_loss": 0.0001, "epoch": 4.003957096740602, "grad_norm": 0.004961004480719566, "learning_rate": 4.4273713928678875e-06, "step": 38450 }, { "embedding_loss": 0.0002, "epoch": 4.009163802978236, "grad_norm": 0.0015071636298671365, "learning_rate": 4.404230208502072e-06, "step": 38500 }, { "embedding_loss": 0.0016, "epoch": 4.01437050921587, "grad_norm": 0.0037345190066844225, "learning_rate": 4.381089024136256e-06, "step": 38550 }, { "embedding_loss": 0.0013, "epoch": 4.019577215453504, "grad_norm": 0.0011363272788003087, "learning_rate": 4.35794783977044e-06, "step": 38600 }, { "embedding_loss": 0.0015, "epoch": 4.024783921691138, "grad_norm": 0.00206565810367465, "learning_rate": 4.334806655404624e-06, "step": 38650 }, { "embedding_loss": 0.0003, "epoch": 4.029990627928772, "grad_norm": 0.0060499319806694984, "learning_rate": 4.311665471038808e-06, "step": 38700 }, { "embedding_loss": 0.0002, "epoch": 4.035197334166407, "grad_norm": 0.0032932923641055822, "learning_rate": 4.288524286672992e-06, "step": 38750 }, { "embedding_loss": 0.0001, "epoch": 4.040404040404041, "grad_norm": 0.002779960399493575, "learning_rate": 4.265383102307176e-06, "step": 38800 }, { "embedding_loss": 0.0014, "epoch": 4.045610746641675, "grad_norm": 0.037588316947221756, "learning_rate": 4.24224191794136e-06, "step": 38850 }, { "embedding_loss": 0.0014, "epoch": 4.050817452879309, "grad_norm": 0.0021385361906141043, "learning_rate": 4.2191007335755445e-06, "step": 38900 }, { "embedding_loss": 0.0011, "epoch": 4.056024159116943, "grad_norm": 0.002653073286637664, "learning_rate": 4.195959549209729e-06, "step": 38950 }, { "embedding_loss": 0.0002, "epoch": 4.061230865354577, "grad_norm": 0.003462142078205943, "learning_rate": 4.172818364843913e-06, "step": 39000 }, { "embedding_loss": 0.0014, "epoch": 4.0664375715922105, "grad_norm": 0.0043731373734772205, "learning_rate": 4.149677180478097e-06, "step": 39050 }, { "embedding_loss": 0.002, "epoch": 4.0716442778298445, "grad_norm": 0.0031473205890506506, "learning_rate": 4.126535996112282e-06, "step": 39100 }, { "embedding_loss": 0.0007, "epoch": 4.0768509840674785, "grad_norm": 0.0068083652295172215, "learning_rate": 4.103394811746466e-06, "step": 39150 }, { "embedding_loss": 0.0014, "epoch": 4.082057690305113, "grad_norm": 0.0017057887744158506, "learning_rate": 4.08025362738065e-06, "step": 39200 }, { "embedding_loss": 0.0008, "epoch": 4.087264396542747, "grad_norm": 0.0034488628152757883, "learning_rate": 4.057112443014834e-06, "step": 39250 }, { "embedding_loss": 0.0001, "epoch": 4.092471102780381, "grad_norm": 0.004338666331022978, "learning_rate": 4.033971258649018e-06, "step": 39300 }, { "embedding_loss": 0.0008, "epoch": 4.097677809018015, "grad_norm": 0.001688474789261818, "learning_rate": 4.010830074283202e-06, "step": 39350 }, { "embedding_loss": 0.0001, "epoch": 4.102884515255649, "grad_norm": 0.005720613989979029, "learning_rate": 3.9876888899173864e-06, "step": 39400 }, { "embedding_loss": 0.0001, "epoch": 4.108091221493283, "grad_norm": 0.010131466202437878, "learning_rate": 3.9645477055515705e-06, "step": 39450 }, { "embedding_loss": 0.0008, "epoch": 4.113297927730917, "grad_norm": 0.00117829954251647, "learning_rate": 3.941406521185755e-06, "step": 39500 }, { "embedding_loss": 0.0001, "epoch": 4.118504633968551, "grad_norm": 0.013248096220195293, "learning_rate": 3.918265336819939e-06, "step": 39550 }, { "embedding_loss": 0.0002, "epoch": 4.123711340206185, "grad_norm": 0.005844116676598787, "learning_rate": 3.895124152454123e-06, "step": 39600 }, { "embedding_loss": 0.0005, "epoch": 4.12891804644382, "grad_norm": 0.002914564684033394, "learning_rate": 3.871982968088307e-06, "step": 39650 }, { "embedding_loss": 0.0004, "epoch": 4.134124752681454, "grad_norm": 0.003980652429163456, "learning_rate": 3.848841783722491e-06, "step": 39700 }, { "embedding_loss": 0.0001, "epoch": 4.139331458919088, "grad_norm": 0.004351139068603516, "learning_rate": 3.825700599356675e-06, "step": 39750 }, { "embedding_loss": 0.0009, "epoch": 4.144538165156722, "grad_norm": 0.00460411561653018, "learning_rate": 3.8025594149908597e-06, "step": 39800 }, { "embedding_loss": 0.0001, "epoch": 4.149744871394356, "grad_norm": 0.002258758759126067, "learning_rate": 3.779418230625044e-06, "step": 39850 }, { "embedding_loss": 0.0007, "epoch": 4.15495157763199, "grad_norm": 0.00147035694681108, "learning_rate": 3.756277046259228e-06, "step": 39900 }, { "embedding_loss": 0.0013, "epoch": 4.160158283869624, "grad_norm": 0.008124323561787605, "learning_rate": 3.733135861893412e-06, "step": 39950 }, { "embedding_loss": 0.0004, "epoch": 4.165364990107258, "grad_norm": 0.006330924108624458, "learning_rate": 3.709994677527596e-06, "step": 40000 }, { "embedding_loss": 0.0003, "epoch": 4.170571696344892, "grad_norm": 0.0018023628508672118, "learning_rate": 3.6868534931617807e-06, "step": 40050 }, { "embedding_loss": 0.0008, "epoch": 4.175778402582527, "grad_norm": 0.06845732778310776, "learning_rate": 3.663712308795965e-06, "step": 40100 }, { "embedding_loss": 0.0007, "epoch": 4.180985108820161, "grad_norm": 0.05048598721623421, "learning_rate": 3.6405711244301485e-06, "step": 40150 }, { "embedding_loss": 0.0004, "epoch": 4.186191815057795, "grad_norm": 0.008092716336250305, "learning_rate": 3.6174299400643326e-06, "step": 40200 }, { "embedding_loss": 0.0007, "epoch": 4.191398521295429, "grad_norm": 0.0009377990500070155, "learning_rate": 3.5942887556985167e-06, "step": 40250 }, { "embedding_loss": 0.0013, "epoch": 4.1966052275330625, "grad_norm": 0.0021307109855115414, "learning_rate": 3.571147571332701e-06, "step": 40300 }, { "embedding_loss": 0.0003, "epoch": 4.2018119337706965, "grad_norm": 0.007595045492053032, "learning_rate": 3.548006386966885e-06, "step": 40350 }, { "embedding_loss": 0.0007, "epoch": 4.2070186400083305, "grad_norm": 0.0017604045569896698, "learning_rate": 3.5248652026010695e-06, "step": 40400 }, { "embedding_loss": 0.001, "epoch": 4.2122253462459645, "grad_norm": 0.0040459102019667625, "learning_rate": 3.5017240182352536e-06, "step": 40450 }, { "embedding_loss": 0.0001, "epoch": 4.217432052483598, "grad_norm": 0.0473860502243042, "learning_rate": 3.4785828338694377e-06, "step": 40500 }, { "embedding_loss": 0.0007, "epoch": 4.222638758721233, "grad_norm": 0.002496903296560049, "learning_rate": 3.455441649503622e-06, "step": 40550 }, { "embedding_loss": 0.0001, "epoch": 4.227845464958867, "grad_norm": 0.007270964793860912, "learning_rate": 3.432300465137806e-06, "step": 40600 }, { "embedding_loss": 0.0007, "epoch": 4.233052171196501, "grad_norm": 0.013144693337380886, "learning_rate": 3.4091592807719905e-06, "step": 40650 }, { "embedding_loss": 0.0008, "epoch": 4.238258877434135, "grad_norm": 0.003847824176773429, "learning_rate": 3.3860180964061746e-06, "step": 40700 }, { "embedding_loss": 0.0008, "epoch": 4.243465583671769, "grad_norm": 0.0023708331864327192, "learning_rate": 3.3628769120403587e-06, "step": 40750 }, { "embedding_loss": 0.0007, "epoch": 4.248672289909403, "grad_norm": 0.018748441711068153, "learning_rate": 3.339735727674543e-06, "step": 40800 }, { "embedding_loss": 0.0015, "epoch": 4.253878996147037, "grad_norm": 0.0020937493536621332, "learning_rate": 3.3165945433087265e-06, "step": 40850 }, { "embedding_loss": 0.0001, "epoch": 4.259085702384671, "grad_norm": 0.042171407490968704, "learning_rate": 3.2934533589429106e-06, "step": 40900 }, { "embedding_loss": 0.0001, "epoch": 4.264292408622305, "grad_norm": 0.00801966805011034, "learning_rate": 3.2703121745770947e-06, "step": 40950 }, { "embedding_loss": 0.0002, "epoch": 4.26949911485994, "grad_norm": 0.008358903229236603, "learning_rate": 3.2471709902112793e-06, "step": 41000 }, { "embedding_loss": 0.0001, "epoch": 4.274705821097574, "grad_norm": 0.001534903421998024, "learning_rate": 3.2240298058454634e-06, "step": 41050 }, { "embedding_loss": 0.0001, "epoch": 4.279912527335208, "grad_norm": 0.0019458031747490168, "learning_rate": 3.2008886214796475e-06, "step": 41100 }, { "embedding_loss": 0.0001, "epoch": 4.285119233572842, "grad_norm": 0.004779054783284664, "learning_rate": 3.1777474371138316e-06, "step": 41150 }, { "embedding_loss": 0.0021, "epoch": 4.290325939810476, "grad_norm": 0.0033252162393182516, "learning_rate": 3.1546062527480157e-06, "step": 41200 }, { "embedding_loss": 0.0013, "epoch": 4.29553264604811, "grad_norm": 0.0036777497734874487, "learning_rate": 3.1314650683822002e-06, "step": 41250 }, { "embedding_loss": 0.001, "epoch": 4.300739352285744, "grad_norm": 0.0024451168719679117, "learning_rate": 3.1083238840163844e-06, "step": 41300 }, { "embedding_loss": 0.0009, "epoch": 4.305946058523378, "grad_norm": 0.0027761063538491726, "learning_rate": 3.0851826996505685e-06, "step": 41350 }, { "embedding_loss": 0.0001, "epoch": 4.311152764761012, "grad_norm": 0.002929074689745903, "learning_rate": 3.0620415152847526e-06, "step": 41400 }, { "embedding_loss": 0.0001, "epoch": 4.316359470998647, "grad_norm": 0.26386216282844543, "learning_rate": 3.0389003309189367e-06, "step": 41450 }, { "embedding_loss": 0.0007, "epoch": 4.321566177236281, "grad_norm": 0.004391273949295282, "learning_rate": 3.0157591465531212e-06, "step": 41500 }, { "embedding_loss": 0.0007, "epoch": 4.3267728834739145, "grad_norm": 0.0026139123365283012, "learning_rate": 2.992617962187305e-06, "step": 41550 }, { "embedding_loss": 0.0003, "epoch": 4.3319795897115485, "grad_norm": 0.03610287979245186, "learning_rate": 2.969476777821489e-06, "step": 41600 }, { "embedding_loss": 0.0007, "epoch": 4.3371862959491825, "grad_norm": 0.0036759632639586926, "learning_rate": 2.946335593455673e-06, "step": 41650 }, { "embedding_loss": 0.0001, "epoch": 4.3423930021868165, "grad_norm": 0.004986033774912357, "learning_rate": 2.9231944090898572e-06, "step": 41700 }, { "embedding_loss": 0.0009, "epoch": 4.34759970842445, "grad_norm": 0.0021286620758473873, "learning_rate": 2.9000532247240414e-06, "step": 41750 }, { "embedding_loss": 0.0013, "epoch": 4.352806414662084, "grad_norm": 0.0035934702027589083, "learning_rate": 2.876912040358226e-06, "step": 41800 }, { "embedding_loss": 0.0001, "epoch": 4.358013120899718, "grad_norm": 0.0023505541030317545, "learning_rate": 2.85377085599241e-06, "step": 41850 }, { "embedding_loss": 0.0019, "epoch": 4.363219827137353, "grad_norm": 0.002859236905351281, "learning_rate": 2.830629671626594e-06, "step": 41900 }, { "embedding_loss": 0.0002, "epoch": 4.368426533374987, "grad_norm": 0.019494347274303436, "learning_rate": 2.8074884872607782e-06, "step": 41950 }, { "embedding_loss": 0.0002, "epoch": 4.373633239612621, "grad_norm": 0.0020093335770070553, "learning_rate": 2.7843473028949623e-06, "step": 42000 }, { "embedding_loss": 0.0006, "epoch": 4.378839945850255, "grad_norm": 0.003382645780220628, "learning_rate": 2.761206118529147e-06, "step": 42050 }, { "embedding_loss": 0.0007, "epoch": 4.384046652087889, "grad_norm": 0.0038147750310599804, "learning_rate": 2.738064934163331e-06, "step": 42100 }, { "embedding_loss": 0.0001, "epoch": 4.389253358325523, "grad_norm": 0.014739004895091057, "learning_rate": 2.714923749797515e-06, "step": 42150 }, { "embedding_loss": 0.0006, "epoch": 4.394460064563157, "grad_norm": 0.003537252312526107, "learning_rate": 2.691782565431699e-06, "step": 42200 }, { "embedding_loss": 0.0024, "epoch": 4.399666770800791, "grad_norm": 0.005448461975902319, "learning_rate": 2.6686413810658833e-06, "step": 42250 }, { "embedding_loss": 0.0007, "epoch": 4.404873477038425, "grad_norm": 0.004431063774973154, "learning_rate": 2.645500196700067e-06, "step": 42300 }, { "embedding_loss": 0.0008, "epoch": 4.41008018327606, "grad_norm": 0.08171793073415756, "learning_rate": 2.622359012334251e-06, "step": 42350 }, { "embedding_loss": 0.0014, "epoch": 4.415286889513694, "grad_norm": 3.030642509460449, "learning_rate": 2.5992178279684356e-06, "step": 42400 }, { "embedding_loss": 0.0001, "epoch": 4.420493595751328, "grad_norm": 0.00229825172573328, "learning_rate": 2.5760766436026198e-06, "step": 42450 }, { "embedding_loss": 0.0007, "epoch": 4.425700301988962, "grad_norm": 0.0022334696259349585, "learning_rate": 2.552935459236804e-06, "step": 42500 }, { "embedding_loss": 0.0001, "epoch": 4.430907008226596, "grad_norm": 0.006273935548961163, "learning_rate": 2.529794274870988e-06, "step": 42550 }, { "embedding_loss": 0.0011, "epoch": 4.43611371446423, "grad_norm": 0.002443622797727585, "learning_rate": 2.506653090505172e-06, "step": 42600 }, { "embedding_loss": 0.0015, "epoch": 4.441320420701864, "grad_norm": 0.007955342531204224, "learning_rate": 2.4835119061393566e-06, "step": 42650 }, { "embedding_loss": 0.0008, "epoch": 4.446527126939498, "grad_norm": 0.005112164653837681, "learning_rate": 2.4603707217735407e-06, "step": 42700 }, { "embedding_loss": 0.0001, "epoch": 4.451733833177133, "grad_norm": 0.007230122108012438, "learning_rate": 2.437229537407725e-06, "step": 42750 }, { "embedding_loss": 0.0006, "epoch": 4.4569405394147665, "grad_norm": 0.0016728178597986698, "learning_rate": 2.414088353041909e-06, "step": 42800 }, { "embedding_loss": 0.0004, "epoch": 4.4621472456524005, "grad_norm": 0.003500057151541114, "learning_rate": 2.390947168676093e-06, "step": 42850 }, { "embedding_loss": 0.0008, "epoch": 4.4673539518900345, "grad_norm": 0.0011205794289708138, "learning_rate": 2.367805984310277e-06, "step": 42900 }, { "embedding_loss": 0.0017, "epoch": 4.4725606581276685, "grad_norm": 0.0030274472665041685, "learning_rate": 2.3446647999444613e-06, "step": 42950 }, { "embedding_loss": 0.0001, "epoch": 4.477767364365302, "grad_norm": 0.0006662325467914343, "learning_rate": 2.3215236155786454e-06, "step": 43000 }, { "embedding_loss": 0.0005, "epoch": 4.482974070602936, "grad_norm": 0.003703465685248375, "learning_rate": 2.2983824312128295e-06, "step": 43050 }, { "embedding_loss": 0.0012, "epoch": 4.48818077684057, "grad_norm": 0.0016818788135424256, "learning_rate": 2.275241246847014e-06, "step": 43100 }, { "embedding_loss": 0.0002, "epoch": 4.493387483078204, "grad_norm": 0.020755505189299583, "learning_rate": 2.2521000624811977e-06, "step": 43150 }, { "embedding_loss": 0.0014, "epoch": 4.498594189315839, "grad_norm": 0.0026131754275411367, "learning_rate": 2.228958878115382e-06, "step": 43200 }, { "embedding_loss": 0.0007, "epoch": 4.503800895553473, "grad_norm": 0.010769600979983807, "learning_rate": 2.2058176937495664e-06, "step": 43250 }, { "embedding_loss": 0.0003, "epoch": 4.509007601791107, "grad_norm": 0.0036240960471332073, "learning_rate": 2.1826765093837505e-06, "step": 43300 }, { "embedding_loss": 0.0026, "epoch": 4.514214308028741, "grad_norm": 0.0036824876442551613, "learning_rate": 2.1595353250179346e-06, "step": 43350 }, { "embedding_loss": 0.0001, "epoch": 4.519421014266375, "grad_norm": 0.005997397005558014, "learning_rate": 2.1363941406521187e-06, "step": 43400 }, { "embedding_loss": 0.0024, "epoch": 4.524627720504009, "grad_norm": 0.0047904313541948795, "learning_rate": 2.113252956286303e-06, "step": 43450 }, { "embedding_loss": 0.0001, "epoch": 4.529834426741643, "grad_norm": 0.01296373549848795, "learning_rate": 2.0901117719204874e-06, "step": 43500 }, { "embedding_loss": 0.0001, "epoch": 4.535041132979277, "grad_norm": 0.0019739430863410234, "learning_rate": 2.066970587554671e-06, "step": 43550 }, { "embedding_loss": 0.0001, "epoch": 4.540247839216911, "grad_norm": 0.003413254162296653, "learning_rate": 2.043829403188855e-06, "step": 43600 }, { "embedding_loss": 0.0015, "epoch": 4.545454545454545, "grad_norm": 0.005623187869787216, "learning_rate": 2.0206882188230397e-06, "step": 43650 }, { "embedding_loss": 0.0007, "epoch": 4.55066125169218, "grad_norm": 0.002288981107994914, "learning_rate": 1.997547034457224e-06, "step": 43700 }, { "embedding_loss": 0.0013, "epoch": 4.555867957929814, "grad_norm": 0.0009039235883392394, "learning_rate": 1.974405850091408e-06, "step": 43750 }, { "embedding_loss": 0.0018, "epoch": 4.561074664167448, "grad_norm": 0.004320364445447922, "learning_rate": 1.951264665725592e-06, "step": 43800 }, { "embedding_loss": 0.0001, "epoch": 4.566281370405082, "grad_norm": 0.0032452233135700226, "learning_rate": 1.928123481359776e-06, "step": 43850 }, { "embedding_loss": 0.0014, "epoch": 4.571488076642716, "grad_norm": 0.0020324711222201586, "learning_rate": 1.9049822969939603e-06, "step": 43900 }, { "embedding_loss": 0.001, "epoch": 4.57669478288035, "grad_norm": 0.0033108368515968323, "learning_rate": 1.8818411126281444e-06, "step": 43950 }, { "embedding_loss": 0.0005, "epoch": 4.581901489117984, "grad_norm": 0.002229843521490693, "learning_rate": 1.8586999282623287e-06, "step": 44000 }, { "embedding_loss": 0.0001, "epoch": 4.5871081953556185, "grad_norm": 0.0032805639784783125, "learning_rate": 1.8355587438965128e-06, "step": 44050 }, { "embedding_loss": 0.0008, "epoch": 4.592314901593252, "grad_norm": 0.001333653461188078, "learning_rate": 1.812417559530697e-06, "step": 44100 }, { "embedding_loss": 0.0007, "epoch": 4.5975216078308865, "grad_norm": 0.3373894691467285, "learning_rate": 1.7892763751648812e-06, "step": 44150 }, { "embedding_loss": 0.0016, "epoch": 4.6027283140685205, "grad_norm": 0.0035874065943062305, "learning_rate": 1.7661351907990653e-06, "step": 44200 }, { "embedding_loss": 0.0004, "epoch": 4.607935020306154, "grad_norm": 0.001685873605310917, "learning_rate": 1.7429940064332492e-06, "step": 44250 }, { "embedding_loss": 0.0001, "epoch": 4.613141726543788, "grad_norm": 0.0019293057266622782, "learning_rate": 1.7198528220674336e-06, "step": 44300 }, { "embedding_loss": 0.0001, "epoch": 4.618348432781422, "grad_norm": 0.017738085240125656, "learning_rate": 1.6967116377016177e-06, "step": 44350 }, { "embedding_loss": 0.0001, "epoch": 4.623555139019056, "grad_norm": 0.00891903880983591, "learning_rate": 1.6735704533358018e-06, "step": 44400 }, { "embedding_loss": 0.0013, "epoch": 4.62876184525669, "grad_norm": 0.001540567958727479, "learning_rate": 1.6504292689699861e-06, "step": 44450 }, { "embedding_loss": 0.0002, "epoch": 4.633968551494325, "grad_norm": 0.03680149465799332, "learning_rate": 1.6272880846041702e-06, "step": 44500 }, { "embedding_loss": 0.0001, "epoch": 4.639175257731958, "grad_norm": 0.0019971744623035192, "learning_rate": 1.6041469002383545e-06, "step": 44550 }, { "embedding_loss": 0.0001, "epoch": 4.644381963969593, "grad_norm": 0.004468118771910667, "learning_rate": 1.5810057158725384e-06, "step": 44600 }, { "embedding_loss": 0.0008, "epoch": 4.649588670207227, "grad_norm": 0.028531698510050774, "learning_rate": 1.5578645315067226e-06, "step": 44650 }, { "embedding_loss": 0.0001, "epoch": 4.654795376444861, "grad_norm": 0.003770474810153246, "learning_rate": 1.5347233471409067e-06, "step": 44700 }, { "embedding_loss": 0.0013, "epoch": 4.660002082682495, "grad_norm": 0.0017020407831296325, "learning_rate": 1.511582162775091e-06, "step": 44750 }, { "embedding_loss": 0.001, "epoch": 4.665208788920129, "grad_norm": 0.0020539036486297846, "learning_rate": 1.488440978409275e-06, "step": 44800 }, { "embedding_loss": 0.0009, "epoch": 4.670415495157763, "grad_norm": 0.0035052604507654905, "learning_rate": 1.4652997940434594e-06, "step": 44850 }, { "embedding_loss": 0.0001, "epoch": 4.675622201395397, "grad_norm": 0.003664996474981308, "learning_rate": 1.4421586096776435e-06, "step": 44900 }, { "embedding_loss": 0.0, "epoch": 4.680828907633032, "grad_norm": 0.002150058513507247, "learning_rate": 1.4190174253118274e-06, "step": 44950 }, { "embedding_loss": 0.0007, "epoch": 4.686035613870665, "grad_norm": 0.0027224977966398, "learning_rate": 1.3958762409460115e-06, "step": 45000 }, { "embedding_loss": 0.0009, "epoch": 4.6912423201083, "grad_norm": 0.002212725579738617, "learning_rate": 1.3727350565801959e-06, "step": 45050 }, { "embedding_loss": 0.0007, "epoch": 4.696449026345934, "grad_norm": 0.0020792309660464525, "learning_rate": 1.34959387221438e-06, "step": 45100 }, { "embedding_loss": 0.0001, "epoch": 4.701655732583568, "grad_norm": 0.0052679735235869884, "learning_rate": 1.3264526878485643e-06, "step": 45150 }, { "embedding_loss": 0.001, "epoch": 4.706862438821202, "grad_norm": 0.0041116694919764996, "learning_rate": 1.3033115034827484e-06, "step": 45200 }, { "embedding_loss": 0.0003, "epoch": 4.712069145058836, "grad_norm": 0.003463329281657934, "learning_rate": 1.2801703191169325e-06, "step": 45250 }, { "embedding_loss": 0.0007, "epoch": 4.71727585129647, "grad_norm": 0.0025421089958399534, "learning_rate": 1.2570291347511166e-06, "step": 45300 }, { "embedding_loss": 0.001, "epoch": 4.722482557534104, "grad_norm": 0.0016696372767910361, "learning_rate": 1.233887950385301e-06, "step": 45350 }, { "embedding_loss": 0.0013, "epoch": 4.7276892637717385, "grad_norm": 0.005751196760684252, "learning_rate": 1.2107467660194849e-06, "step": 45400 }, { "embedding_loss": 0.0019, "epoch": 4.7328959700093725, "grad_norm": 0.013020163401961327, "learning_rate": 1.1876055816536692e-06, "step": 45450 }, { "embedding_loss": 0.0009, "epoch": 4.738102676247006, "grad_norm": 0.004354926757514477, "learning_rate": 1.1644643972878533e-06, "step": 45500 }, { "embedding_loss": 0.0009, "epoch": 4.74330938248464, "grad_norm": 0.003605367848649621, "learning_rate": 1.1413232129220374e-06, "step": 45550 }, { "embedding_loss": 0.001, "epoch": 4.748516088722274, "grad_norm": 0.0030561047606170177, "learning_rate": 1.1181820285562215e-06, "step": 45600 }, { "embedding_loss": 0.0, "epoch": 4.753722794959908, "grad_norm": 0.007909784093499184, "learning_rate": 1.0950408441904058e-06, "step": 45650 }, { "embedding_loss": 0.0007, "epoch": 4.758929501197542, "grad_norm": 0.002514626132324338, "learning_rate": 1.07189965982459e-06, "step": 45700 }, { "embedding_loss": 0.0008, "epoch": 4.764136207435176, "grad_norm": 0.0016800053417682648, "learning_rate": 1.048758475458774e-06, "step": 45750 }, { "embedding_loss": 0.0, "epoch": 4.76934291367281, "grad_norm": 0.004000342451035976, "learning_rate": 1.0256172910929582e-06, "step": 45800 }, { "embedding_loss": 0.0002, "epoch": 4.774549619910445, "grad_norm": 0.001277065253816545, "learning_rate": 1.0024761067271425e-06, "step": 45850 }, { "embedding_loss": 0.0, "epoch": 4.779756326148079, "grad_norm": 0.004461308475583792, "learning_rate": 9.793349223613266e-07, "step": 45900 }, { "embedding_loss": 0.0004, "epoch": 4.784963032385713, "grad_norm": 0.0651107132434845, "learning_rate": 9.561937379955107e-07, "step": 45950 }, { "embedding_loss": 0.0007, "epoch": 4.790169738623347, "grad_norm": 0.0018568108789622784, "learning_rate": 9.330525536296948e-07, "step": 46000 }, { "embedding_loss": 0.0007, "epoch": 4.795376444860981, "grad_norm": 0.004890389274805784, "learning_rate": 9.09911369263879e-07, "step": 46050 }, { "embedding_loss": 0.0007, "epoch": 4.800583151098615, "grad_norm": 0.002689856104552746, "learning_rate": 8.867701848980631e-07, "step": 46100 }, { "embedding_loss": 0.0, "epoch": 4.805789857336249, "grad_norm": 0.003079883521422744, "learning_rate": 8.636290005322473e-07, "step": 46150 }, { "embedding_loss": 0.0005, "epoch": 4.810996563573883, "grad_norm": 0.0018577250884845853, "learning_rate": 8.404878161664315e-07, "step": 46200 }, { "embedding_loss": 0.0001, "epoch": 4.816203269811517, "grad_norm": 0.004618423525243998, "learning_rate": 8.173466318006157e-07, "step": 46250 }, { "embedding_loss": 0.0008, "epoch": 4.821409976049152, "grad_norm": 0.004892790224403143, "learning_rate": 7.942054474347997e-07, "step": 46300 }, { "embedding_loss": 0.001, "epoch": 4.826616682286786, "grad_norm": 0.003912623040378094, "learning_rate": 7.710642630689839e-07, "step": 46350 }, { "embedding_loss": 0.0007, "epoch": 4.83182338852442, "grad_norm": 0.007159634493291378, "learning_rate": 7.479230787031681e-07, "step": 46400 }, { "embedding_loss": 0.0001, "epoch": 4.837030094762054, "grad_norm": 0.0023596896789968014, "learning_rate": 7.247818943373522e-07, "step": 46450 }, { "embedding_loss": 0.0008, "epoch": 4.842236800999688, "grad_norm": 0.010279769077897072, "learning_rate": 7.016407099715364e-07, "step": 46500 }, { "embedding_loss": 0.0002, "epoch": 4.847443507237322, "grad_norm": 0.008691814728081226, "learning_rate": 6.784995256057206e-07, "step": 46550 }, { "embedding_loss": 0.0001, "epoch": 4.852650213474956, "grad_norm": 0.003329548519104719, "learning_rate": 6.553583412399048e-07, "step": 46600 }, { "embedding_loss": 0.0009, "epoch": 4.85785691971259, "grad_norm": 0.004583888687193394, "learning_rate": 6.322171568740888e-07, "step": 46650 }, { "embedding_loss": 0.0001, "epoch": 4.863063625950224, "grad_norm": 0.02980988658964634, "learning_rate": 6.09075972508273e-07, "step": 46700 }, { "embedding_loss": 0.0014, "epoch": 4.868270332187858, "grad_norm": 0.007974829524755478, "learning_rate": 5.859347881424571e-07, "step": 46750 }, { "embedding_loss": 0.0005, "epoch": 4.873477038425492, "grad_norm": 0.0035474197939038277, "learning_rate": 5.627936037766414e-07, "step": 46800 }, { "embedding_loss": 0.0001, "epoch": 4.878683744663126, "grad_norm": 0.010695052333176136, "learning_rate": 5.396524194108255e-07, "step": 46850 }, { "embedding_loss": 0.0001, "epoch": 4.88389045090076, "grad_norm": 0.002433580346405506, "learning_rate": 5.165112350450097e-07, "step": 46900 }, { "embedding_loss": 0.0, "epoch": 4.889097157138394, "grad_norm": 0.003585429862141609, "learning_rate": 4.933700506791938e-07, "step": 46950 }, { "embedding_loss": 0.001, "epoch": 4.894303863376028, "grad_norm": 0.31530049443244934, "learning_rate": 4.70228866313378e-07, "step": 47000 }, { "embedding_loss": 0.001, "epoch": 4.899510569613662, "grad_norm": 0.0049338992685079575, "learning_rate": 4.470876819475621e-07, "step": 47050 }, { "embedding_loss": 0.0013, "epoch": 4.904717275851296, "grad_norm": 0.00397633807733655, "learning_rate": 4.239464975817463e-07, "step": 47100 }, { "embedding_loss": 0.0002, "epoch": 4.90992398208893, "grad_norm": 0.0035032695159316063, "learning_rate": 4.0080531321593045e-07, "step": 47150 }, { "embedding_loss": 0.0001, "epoch": 4.915130688326565, "grad_norm": 0.0034136222675442696, "learning_rate": 3.7766412885011456e-07, "step": 47200 }, { "embedding_loss": 0.0002, "epoch": 4.920337394564199, "grad_norm": 0.20558778941631317, "learning_rate": 3.545229444842987e-07, "step": 47250 }, { "embedding_loss": 0.0018, "epoch": 4.925544100801833, "grad_norm": 0.010641155764460564, "learning_rate": 3.313817601184829e-07, "step": 47300 }, { "embedding_loss": 0.0001, "epoch": 4.930750807039467, "grad_norm": 0.0028619503136724234, "learning_rate": 3.0824057575266705e-07, "step": 47350 }, { "embedding_loss": 0.0001, "epoch": 4.935957513277101, "grad_norm": 0.001628124387934804, "learning_rate": 2.850993913868512e-07, "step": 47400 }, { "embedding_loss": 0.0005, "epoch": 4.941164219514735, "grad_norm": 0.0017882351530715823, "learning_rate": 2.6195820702103533e-07, "step": 47450 }, { "embedding_loss": 0.0008, "epoch": 4.946370925752369, "grad_norm": 0.00220202817581594, "learning_rate": 2.388170226552195e-07, "step": 47500 }, { "embedding_loss": 0.0013, "epoch": 4.951577631990003, "grad_norm": 0.001214580493979156, "learning_rate": 2.1567583828940368e-07, "step": 47550 }, { "embedding_loss": 0.0007, "epoch": 4.956784338227637, "grad_norm": 0.0018583645578473806, "learning_rate": 1.9253465392358785e-07, "step": 47600 }, { "embedding_loss": 0.0004, "epoch": 4.961991044465272, "grad_norm": 0.005768468137830496, "learning_rate": 1.6939346955777198e-07, "step": 47650 }, { "embedding_loss": 0.0003, "epoch": 4.967197750702906, "grad_norm": 0.0034024049527943134, "learning_rate": 1.4625228519195615e-07, "step": 47700 }, { "embedding_loss": 0.0022, "epoch": 4.97240445694054, "grad_norm": 0.001480701263062656, "learning_rate": 1.2311110082614029e-07, "step": 47750 }, { "embedding_loss": 0.0008, "epoch": 4.977611163178174, "grad_norm": 0.0032351568806916475, "learning_rate": 9.996991646032445e-08, "step": 47800 }, { "embedding_loss": 0.0006, "epoch": 4.982817869415808, "grad_norm": 0.03766478970646858, "learning_rate": 7.68287320945086e-08, "step": 47850 }, { "embedding_loss": 0.0, "epoch": 4.988024575653442, "grad_norm": 0.0029770000837743282, "learning_rate": 5.368754772869276e-08, "step": 47900 }, { "embedding_loss": 0.0007, "epoch": 4.993231281891076, "grad_norm": 0.001190900569781661, "learning_rate": 3.0546363362876916e-08, "step": 47950 }, { "embedding_loss": 0.0001, "epoch": 4.9984379881287095, "grad_norm": 0.019358443096280098, "learning_rate": 7.40517899706107e-09, "step": 48000 } ], "logging_steps": 50, "max_steps": 48015, "num_input_tokens_seen": 0, "num_train_epochs": 5, "save_steps": 500, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 0.0, "train_batch_size": 32, "trial_name": null, "trial_params": null }