{ "best_global_step": null, "best_metric": null, "best_model_checkpoint": null, "epoch": 10.0, "eval_steps": 500, "global_step": 178970, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "embedding_loss": 0.3082, "epoch": 5.58752863608426e-05, "grad_norm": 1.7655975818634033, "learning_rate": 0.0, "step": 1 }, { "embedding_loss": 0.3776, "epoch": 0.0027937643180421298, "grad_norm": 1.8435853719711304, "learning_rate": 5.475778063362575e-08, "step": 50 }, { "embedding_loss": 0.3684, "epoch": 0.0055875286360842595, "grad_norm": 2.121671438217163, "learning_rate": 1.1063306699446835e-07, "step": 100 }, { "embedding_loss": 0.3702, "epoch": 0.00838129295412639, "grad_norm": 1.5905697345733643, "learning_rate": 1.6650835335531095e-07, "step": 150 }, { "embedding_loss": 0.3521, "epoch": 0.011175057272168519, "grad_norm": 2.7827088832855225, "learning_rate": 2.2238363971615357e-07, "step": 200 }, { "embedding_loss": 0.3383, "epoch": 0.01396882159021065, "grad_norm": 1.8167310953140259, "learning_rate": 2.7825892607699617e-07, "step": 250 }, { "embedding_loss": 0.3227, "epoch": 0.01676258590825278, "grad_norm": 1.735047459602356, "learning_rate": 3.3413421243783876e-07, "step": 300 }, { "embedding_loss": 0.3141, "epoch": 0.01955635022629491, "grad_norm": 2.565088987350464, "learning_rate": 3.900094987986814e-07, "step": 350 }, { "embedding_loss": 0.2816, "epoch": 0.022350114544337038, "grad_norm": 1.6851118803024292, "learning_rate": 4.4588478515952395e-07, "step": 400 }, { "embedding_loss": 0.2717, "epoch": 0.02514387886237917, "grad_norm": 1.3322851657867432, "learning_rate": 5.017600715203665e-07, "step": 450 }, { "embedding_loss": 0.2663, "epoch": 0.0279376431804213, "grad_norm": 1.1651397943496704, "learning_rate": 5.576353578812091e-07, "step": 500 }, { "embedding_loss": 0.262, "epoch": 0.03073140749846343, "grad_norm": 1.1011055707931519, "learning_rate": 6.135106442420518e-07, "step": 550 }, { "embedding_loss": 0.2619, "epoch": 0.03352517181650556, "grad_norm": 1.0365666151046753, "learning_rate": 6.693859306028943e-07, "step": 600 }, { "embedding_loss": 0.2575, "epoch": 0.03631893613454769, "grad_norm": 1.3185657262802124, "learning_rate": 7.25261216963737e-07, "step": 650 }, { "embedding_loss": 0.253, "epoch": 0.03911270045258982, "grad_norm": 0.9703909158706665, "learning_rate": 7.811365033245796e-07, "step": 700 }, { "embedding_loss": 0.2586, "epoch": 0.04190646477063195, "grad_norm": 0.9690493941307068, "learning_rate": 8.370117896854222e-07, "step": 750 }, { "embedding_loss": 0.2535, "epoch": 0.044700229088674076, "grad_norm": 0.9179584980010986, "learning_rate": 8.928870760462648e-07, "step": 800 }, { "embedding_loss": 0.2557, "epoch": 0.04749399340671621, "grad_norm": 1.2386568784713745, "learning_rate": 9.487623624071074e-07, "step": 850 }, { "embedding_loss": 0.2555, "epoch": 0.05028775772475834, "grad_norm": 1.1495050191879272, "learning_rate": 1.00463764876795e-06, "step": 900 }, { "embedding_loss": 0.2563, "epoch": 0.05308152204280047, "grad_norm": 0.8405428528785706, "learning_rate": 1.0605129351287927e-06, "step": 950 }, { "embedding_loss": 0.2513, "epoch": 0.0558752863608426, "grad_norm": 0.8800510764122009, "learning_rate": 1.1163882214896352e-06, "step": 1000 }, { "embedding_loss": 0.2535, "epoch": 0.05866905067888473, "grad_norm": 0.9410228729248047, "learning_rate": 1.1722635078504779e-06, "step": 1050 }, { "embedding_loss": 0.2494, "epoch": 0.06146281499692686, "grad_norm": 0.9483185410499573, "learning_rate": 1.2281387942113204e-06, "step": 1100 }, { "embedding_loss": 0.251, "epoch": 0.06425657931496899, "grad_norm": 0.9613559246063232, "learning_rate": 1.2840140805721629e-06, "step": 1150 }, { "embedding_loss": 0.253, "epoch": 0.06705034363301111, "grad_norm": 1.0872608423233032, "learning_rate": 1.3398893669330058e-06, "step": 1200 }, { "embedding_loss": 0.2496, "epoch": 0.06984410795105325, "grad_norm": 0.8642127513885498, "learning_rate": 1.3957646532938483e-06, "step": 1250 }, { "embedding_loss": 0.2484, "epoch": 0.07263787226909538, "grad_norm": 0.9812522530555725, "learning_rate": 1.451639939654691e-06, "step": 1300 }, { "embedding_loss": 0.2512, "epoch": 0.07543163658713752, "grad_norm": 0.8589313626289368, "learning_rate": 1.5075152260155335e-06, "step": 1350 }, { "embedding_loss": 0.2485, "epoch": 0.07822540090517964, "grad_norm": 0.9000875353813171, "learning_rate": 1.563390512376376e-06, "step": 1400 }, { "embedding_loss": 0.2513, "epoch": 0.08101916522322176, "grad_norm": 0.984093189239502, "learning_rate": 1.6192657987372186e-06, "step": 1450 }, { "embedding_loss": 0.248, "epoch": 0.0838129295412639, "grad_norm": 1.0690335035324097, "learning_rate": 1.6751410850980611e-06, "step": 1500 }, { "embedding_loss": 0.2513, "epoch": 0.08660669385930603, "grad_norm": 0.9656662940979004, "learning_rate": 1.731016371458904e-06, "step": 1550 }, { "embedding_loss": 0.2479, "epoch": 0.08940045817734815, "grad_norm": 1.3788082599639893, "learning_rate": 1.7868916578197465e-06, "step": 1600 }, { "embedding_loss": 0.2498, "epoch": 0.09219422249539029, "grad_norm": 0.8924678564071655, "learning_rate": 1.842766944180589e-06, "step": 1650 }, { "embedding_loss": 0.2486, "epoch": 0.09498798681343242, "grad_norm": 1.2600059509277344, "learning_rate": 1.8986422305414317e-06, "step": 1700 }, { "embedding_loss": 0.2513, "epoch": 0.09778175113147455, "grad_norm": 1.2819123268127441, "learning_rate": 1.9545175169022744e-06, "step": 1750 }, { "embedding_loss": 0.2485, "epoch": 0.10057551544951668, "grad_norm": 1.1405218839645386, "learning_rate": 2.010392803263117e-06, "step": 1800 }, { "embedding_loss": 0.25, "epoch": 0.1033692797675588, "grad_norm": 0.9599745273590088, "learning_rate": 2.0662680896239594e-06, "step": 1850 }, { "embedding_loss": 0.2465, "epoch": 0.10616304408560094, "grad_norm": 1.2563879489898682, "learning_rate": 2.122143375984802e-06, "step": 1900 }, { "embedding_loss": 0.2453, "epoch": 0.10895680840364307, "grad_norm": 0.8158384561538696, "learning_rate": 2.178018662345645e-06, "step": 1950 }, { "embedding_loss": 0.2436, "epoch": 0.1117505727216852, "grad_norm": 0.8498464822769165, "learning_rate": 2.2338939487064873e-06, "step": 2000 }, { "embedding_loss": 0.2455, "epoch": 0.11454433703972733, "grad_norm": 0.8598819375038147, "learning_rate": 2.2897692350673298e-06, "step": 2050 }, { "embedding_loss": 0.2433, "epoch": 0.11733810135776945, "grad_norm": 0.846580445766449, "learning_rate": 2.3456445214281727e-06, "step": 2100 }, { "embedding_loss": 0.2469, "epoch": 0.12013186567581159, "grad_norm": 0.8778742551803589, "learning_rate": 2.401519807789015e-06, "step": 2150 }, { "embedding_loss": 0.2455, "epoch": 0.12292562999385372, "grad_norm": 0.8865863680839539, "learning_rate": 2.4573950941498577e-06, "step": 2200 }, { "embedding_loss": 0.2485, "epoch": 0.12571939431189585, "grad_norm": 0.793403685092926, "learning_rate": 2.5132703805107e-06, "step": 2250 }, { "embedding_loss": 0.2442, "epoch": 0.12851315862993798, "grad_norm": 0.9684894680976868, "learning_rate": 2.569145666871543e-06, "step": 2300 }, { "embedding_loss": 0.2459, "epoch": 0.1313069229479801, "grad_norm": 1.1254236698150635, "learning_rate": 2.6250209532323856e-06, "step": 2350 }, { "embedding_loss": 0.2466, "epoch": 0.13410068726602223, "grad_norm": 0.8372580409049988, "learning_rate": 2.680896239593228e-06, "step": 2400 }, { "embedding_loss": 0.2477, "epoch": 0.13689445158406438, "grad_norm": 1.1147046089172363, "learning_rate": 2.7367715259540705e-06, "step": 2450 }, { "embedding_loss": 0.2483, "epoch": 0.1396882159021065, "grad_norm": 0.8442330956459045, "learning_rate": 2.792646812314913e-06, "step": 2500 }, { "embedding_loss": 0.243, "epoch": 0.14248198022014863, "grad_norm": 0.8284971117973328, "learning_rate": 2.848522098675756e-06, "step": 2550 }, { "embedding_loss": 0.2429, "epoch": 0.14527574453819075, "grad_norm": 0.8427169322967529, "learning_rate": 2.9043973850365984e-06, "step": 2600 }, { "embedding_loss": 0.2434, "epoch": 0.14806950885623288, "grad_norm": 1.1996192932128906, "learning_rate": 2.9602726713974413e-06, "step": 2650 }, { "embedding_loss": 0.2396, "epoch": 0.15086327317427503, "grad_norm": 0.9847694635391235, "learning_rate": 3.016147957758284e-06, "step": 2700 }, { "embedding_loss": 0.2417, "epoch": 0.15365703749231716, "grad_norm": 0.8877982497215271, "learning_rate": 3.0720232441191263e-06, "step": 2750 }, { "embedding_loss": 0.2402, "epoch": 0.15645080181035928, "grad_norm": 0.8937920331954956, "learning_rate": 3.1278985304799692e-06, "step": 2800 }, { "embedding_loss": 0.2404, "epoch": 0.1592445661284014, "grad_norm": 0.9423437118530273, "learning_rate": 3.1837738168408117e-06, "step": 2850 }, { "embedding_loss": 0.2406, "epoch": 0.16203833044644353, "grad_norm": 0.9366545677185059, "learning_rate": 3.239649103201654e-06, "step": 2900 }, { "embedding_loss": 0.2422, "epoch": 0.16483209476448568, "grad_norm": 0.9900547862052917, "learning_rate": 3.2955243895624967e-06, "step": 2950 }, { "embedding_loss": 0.2338, "epoch": 0.1676258590825278, "grad_norm": 1.474246621131897, "learning_rate": 3.351399675923339e-06, "step": 3000 }, { "embedding_loss": 0.2369, "epoch": 0.17041962340056993, "grad_norm": 0.912102460861206, "learning_rate": 3.407274962284182e-06, "step": 3050 }, { "embedding_loss": 0.2359, "epoch": 0.17321338771861206, "grad_norm": 0.8943923115730286, "learning_rate": 3.4631502486450246e-06, "step": 3100 }, { "embedding_loss": 0.241, "epoch": 0.17600715203665418, "grad_norm": 1.1463110446929932, "learning_rate": 3.519025535005867e-06, "step": 3150 }, { "embedding_loss": 0.2349, "epoch": 0.1788009163546963, "grad_norm": 0.9385159611701965, "learning_rate": 3.5749008213667096e-06, "step": 3200 }, { "embedding_loss": 0.2365, "epoch": 0.18159468067273846, "grad_norm": 0.8809450268745422, "learning_rate": 3.630776107727552e-06, "step": 3250 }, { "embedding_loss": 0.2319, "epoch": 0.18438844499078058, "grad_norm": 1.3859418630599976, "learning_rate": 3.6866513940883954e-06, "step": 3300 }, { "embedding_loss": 0.2309, "epoch": 0.1871822093088227, "grad_norm": 0.9465262293815613, "learning_rate": 3.742526680449238e-06, "step": 3350 }, { "embedding_loss": 0.2311, "epoch": 0.18997597362686483, "grad_norm": 0.9963146448135376, "learning_rate": 3.7984019668100804e-06, "step": 3400 }, { "embedding_loss": 0.2256, "epoch": 0.19276973794490695, "grad_norm": 1.204712986946106, "learning_rate": 3.854277253170923e-06, "step": 3450 }, { "embedding_loss": 0.2367, "epoch": 0.1955635022629491, "grad_norm": 1.0632985830307007, "learning_rate": 3.910152539531765e-06, "step": 3500 }, { "embedding_loss": 0.2276, "epoch": 0.19835726658099123, "grad_norm": 0.8851169943809509, "learning_rate": 3.966027825892609e-06, "step": 3550 }, { "embedding_loss": 0.2293, "epoch": 0.20115103089903336, "grad_norm": 1.0940569639205933, "learning_rate": 4.021903112253451e-06, "step": 3600 }, { "embedding_loss": 0.2219, "epoch": 0.20394479521707548, "grad_norm": 1.0483940839767456, "learning_rate": 4.077778398614294e-06, "step": 3650 }, { "embedding_loss": 0.2255, "epoch": 0.2067385595351176, "grad_norm": 1.1843154430389404, "learning_rate": 4.133653684975136e-06, "step": 3700 }, { "embedding_loss": 0.2184, "epoch": 0.20953232385315976, "grad_norm": 1.114966869354248, "learning_rate": 4.189528971335979e-06, "step": 3750 }, { "embedding_loss": 0.2281, "epoch": 0.21232608817120188, "grad_norm": 0.9981719851493835, "learning_rate": 4.2454042576968215e-06, "step": 3800 }, { "embedding_loss": 0.2185, "epoch": 0.215119852489244, "grad_norm": 1.46576988697052, "learning_rate": 4.301279544057664e-06, "step": 3850 }, { "embedding_loss": 0.2214, "epoch": 0.21791361680728613, "grad_norm": 1.1666396856307983, "learning_rate": 4.3571548304185065e-06, "step": 3900 }, { "embedding_loss": 0.2216, "epoch": 0.22070738112532826, "grad_norm": 1.2655556201934814, "learning_rate": 4.4130301167793486e-06, "step": 3950 }, { "embedding_loss": 0.215, "epoch": 0.2235011454433704, "grad_norm": 1.0865727663040161, "learning_rate": 4.4689054031401915e-06, "step": 4000 }, { "embedding_loss": 0.2134, "epoch": 0.22629490976141253, "grad_norm": 1.3409370183944702, "learning_rate": 4.524780689501034e-06, "step": 4050 }, { "embedding_loss": 0.215, "epoch": 0.22908867407945466, "grad_norm": 1.4521331787109375, "learning_rate": 4.5806559758618765e-06, "step": 4100 }, { "embedding_loss": 0.2008, "epoch": 0.23188243839749678, "grad_norm": 1.2916017770767212, "learning_rate": 4.636531262222719e-06, "step": 4150 }, { "embedding_loss": 0.2091, "epoch": 0.2346762027155389, "grad_norm": 1.5119096040725708, "learning_rate": 4.6924065485835614e-06, "step": 4200 }, { "embedding_loss": 0.2071, "epoch": 0.23746996703358106, "grad_norm": 1.5443958044052124, "learning_rate": 4.748281834944404e-06, "step": 4250 }, { "embedding_loss": 0.2102, "epoch": 0.24026373135162318, "grad_norm": 1.45564866065979, "learning_rate": 4.804157121305246e-06, "step": 4300 }, { "embedding_loss": 0.2012, "epoch": 0.2430574956696653, "grad_norm": 1.1831332445144653, "learning_rate": 4.86003240766609e-06, "step": 4350 }, { "embedding_loss": 0.2009, "epoch": 0.24585125998770743, "grad_norm": 1.4838157892227173, "learning_rate": 4.915907694026932e-06, "step": 4400 }, { "embedding_loss": 0.1955, "epoch": 0.24864502430574956, "grad_norm": 1.557892918586731, "learning_rate": 4.971782980387775e-06, "step": 4450 }, { "embedding_loss": 0.204, "epoch": 0.2514387886237917, "grad_norm": 1.4734735488891602, "learning_rate": 5.027658266748617e-06, "step": 4500 }, { "embedding_loss": 0.1998, "epoch": 0.25423255294183383, "grad_norm": 1.5204105377197266, "learning_rate": 5.08353355310946e-06, "step": 4550 }, { "embedding_loss": 0.1888, "epoch": 0.25702631725987596, "grad_norm": 2.072387218475342, "learning_rate": 5.139408839470302e-06, "step": 4600 }, { "embedding_loss": 0.1904, "epoch": 0.2598200815779181, "grad_norm": 1.7038322687149048, "learning_rate": 5.195284125831145e-06, "step": 4650 }, { "embedding_loss": 0.1881, "epoch": 0.2626138458959602, "grad_norm": 1.8202340602874756, "learning_rate": 5.251159412191988e-06, "step": 4700 }, { "embedding_loss": 0.1882, "epoch": 0.26540761021400233, "grad_norm": 1.7814663648605347, "learning_rate": 5.307034698552831e-06, "step": 4750 }, { "embedding_loss": 0.1844, "epoch": 0.26820137453204446, "grad_norm": 1.4907888174057007, "learning_rate": 5.362909984913674e-06, "step": 4800 }, { "embedding_loss": 0.1868, "epoch": 0.2709951388500866, "grad_norm": 1.999426007270813, "learning_rate": 5.418785271274516e-06, "step": 4850 }, { "embedding_loss": 0.1812, "epoch": 0.27378890316812876, "grad_norm": 2.030625820159912, "learning_rate": 5.474660557635359e-06, "step": 4900 }, { "embedding_loss": 0.1799, "epoch": 0.2765826674861709, "grad_norm": 2.1096341609954834, "learning_rate": 5.530535843996201e-06, "step": 4950 }, { "embedding_loss": 0.1812, "epoch": 0.279376431804213, "grad_norm": 1.9995920658111572, "learning_rate": 5.586411130357044e-06, "step": 5000 }, { "embedding_loss": 0.176, "epoch": 0.28217019612225513, "grad_norm": 1.6903103590011597, "learning_rate": 5.642286416717886e-06, "step": 5050 }, { "embedding_loss": 0.1835, "epoch": 0.28496396044029726, "grad_norm": 1.7932636737823486, "learning_rate": 5.698161703078729e-06, "step": 5100 }, { "embedding_loss": 0.175, "epoch": 0.2877577247583394, "grad_norm": 1.9809973239898682, "learning_rate": 5.754036989439571e-06, "step": 5150 }, { "embedding_loss": 0.1702, "epoch": 0.2905514890763815, "grad_norm": 2.928118944168091, "learning_rate": 5.809912275800415e-06, "step": 5200 }, { "embedding_loss": 0.1738, "epoch": 0.29334525339442363, "grad_norm": 2.3507680892944336, "learning_rate": 5.865787562161257e-06, "step": 5250 }, { "embedding_loss": 0.1668, "epoch": 0.29613901771246576, "grad_norm": 2.326292037963867, "learning_rate": 5.9216628485220996e-06, "step": 5300 }, { "embedding_loss": 0.1712, "epoch": 0.2989327820305079, "grad_norm": 2.322329044342041, "learning_rate": 5.977538134882942e-06, "step": 5350 }, { "embedding_loss": 0.1728, "epoch": 0.30172654634855006, "grad_norm": 2.015127182006836, "learning_rate": 6.0334134212437845e-06, "step": 5400 }, { "embedding_loss": 0.1643, "epoch": 0.3045203106665922, "grad_norm": 1.9471898078918457, "learning_rate": 6.089288707604627e-06, "step": 5450 }, { "embedding_loss": 0.158, "epoch": 0.3073140749846343, "grad_norm": 1.9518429040908813, "learning_rate": 6.1451639939654695e-06, "step": 5500 }, { "embedding_loss": 0.167, "epoch": 0.31010783930267644, "grad_norm": 1.7494865655899048, "learning_rate": 6.201039280326312e-06, "step": 5550 }, { "embedding_loss": 0.1653, "epoch": 0.31290160362071856, "grad_norm": 1.6631413698196411, "learning_rate": 6.2569145666871545e-06, "step": 5600 }, { "embedding_loss": 0.1617, "epoch": 0.3156953679387607, "grad_norm": 2.5670852661132812, "learning_rate": 6.3127898530479966e-06, "step": 5650 }, { "embedding_loss": 0.1618, "epoch": 0.3184891322568028, "grad_norm": 1.8451708555221558, "learning_rate": 6.3686651394088395e-06, "step": 5700 }, { "embedding_loss": 0.1633, "epoch": 0.32128289657484493, "grad_norm": 1.7920405864715576, "learning_rate": 6.424540425769683e-06, "step": 5750 }, { "embedding_loss": 0.1599, "epoch": 0.32407666089288706, "grad_norm": 2.2425777912139893, "learning_rate": 6.480415712130525e-06, "step": 5800 }, { "embedding_loss": 0.1598, "epoch": 0.3268704252109292, "grad_norm": 1.9734022617340088, "learning_rate": 6.536290998491368e-06, "step": 5850 }, { "embedding_loss": 0.1532, "epoch": 0.32966418952897136, "grad_norm": 2.051893711090088, "learning_rate": 6.59216628485221e-06, "step": 5900 }, { "embedding_loss": 0.1481, "epoch": 0.3324579538470135, "grad_norm": 1.9423421621322632, "learning_rate": 6.648041571213053e-06, "step": 5950 }, { "embedding_loss": 0.163, "epoch": 0.3352517181650556, "grad_norm": 1.56923508644104, "learning_rate": 6.703916857573895e-06, "step": 6000 }, { "embedding_loss": 0.1555, "epoch": 0.33804548248309774, "grad_norm": 2.244295120239258, "learning_rate": 6.759792143934738e-06, "step": 6050 }, { "embedding_loss": 0.157, "epoch": 0.34083924680113986, "grad_norm": 2.4641387462615967, "learning_rate": 6.81566743029558e-06, "step": 6100 }, { "embedding_loss": 0.1511, "epoch": 0.343633011119182, "grad_norm": 1.9510258436203003, "learning_rate": 6.871542716656423e-06, "step": 6150 }, { "embedding_loss": 0.1438, "epoch": 0.3464267754372241, "grad_norm": 2.5124270915985107, "learning_rate": 6.927418003017265e-06, "step": 6200 }, { "embedding_loss": 0.1556, "epoch": 0.34922053975526623, "grad_norm": 2.118985414505005, "learning_rate": 6.983293289378109e-06, "step": 6250 }, { "embedding_loss": 0.1507, "epoch": 0.35201430407330836, "grad_norm": 2.6212799549102783, "learning_rate": 7.039168575738952e-06, "step": 6300 }, { "embedding_loss": 0.1445, "epoch": 0.3548080683913505, "grad_norm": 2.5810179710388184, "learning_rate": 7.095043862099794e-06, "step": 6350 }, { "embedding_loss": 0.1396, "epoch": 0.3576018327093926, "grad_norm": 2.313830852508545, "learning_rate": 7.150919148460637e-06, "step": 6400 }, { "embedding_loss": 0.1406, "epoch": 0.3603955970274348, "grad_norm": 1.972595453262329, "learning_rate": 7.206794434821479e-06, "step": 6450 }, { "embedding_loss": 0.1412, "epoch": 0.3631893613454769, "grad_norm": 2.0627424716949463, "learning_rate": 7.262669721182322e-06, "step": 6500 }, { "embedding_loss": 0.142, "epoch": 0.36598312566351904, "grad_norm": 2.2925047874450684, "learning_rate": 7.318545007543164e-06, "step": 6550 }, { "embedding_loss": 0.1372, "epoch": 0.36877688998156116, "grad_norm": 3.033403158187866, "learning_rate": 7.374420293904007e-06, "step": 6600 }, { "embedding_loss": 0.1365, "epoch": 0.3715706542996033, "grad_norm": 2.539698839187622, "learning_rate": 7.430295580264849e-06, "step": 6650 }, { "embedding_loss": 0.1396, "epoch": 0.3743644186176454, "grad_norm": 2.383155345916748, "learning_rate": 7.486170866625692e-06, "step": 6700 }, { "embedding_loss": 0.1297, "epoch": 0.37715818293568754, "grad_norm": 2.1498725414276123, "learning_rate": 7.5420461529865355e-06, "step": 6750 }, { "embedding_loss": 0.1309, "epoch": 0.37995194725372966, "grad_norm": 1.5893750190734863, "learning_rate": 7.597921439347378e-06, "step": 6800 }, { "embedding_loss": 0.1422, "epoch": 0.3827457115717718, "grad_norm": 2.724839687347412, "learning_rate": 7.65379672570822e-06, "step": 6850 }, { "embedding_loss": 0.1335, "epoch": 0.3855394758898139, "grad_norm": 3.277081251144409, "learning_rate": 7.709672012069063e-06, "step": 6900 }, { "embedding_loss": 0.1315, "epoch": 0.3883332402078561, "grad_norm": 2.503157615661621, "learning_rate": 7.765547298429905e-06, "step": 6950 }, { "embedding_loss": 0.1288, "epoch": 0.3911270045258982, "grad_norm": 2.5580062866210938, "learning_rate": 7.821422584790748e-06, "step": 7000 }, { "embedding_loss": 0.1311, "epoch": 0.39392076884394034, "grad_norm": 2.5645792484283447, "learning_rate": 7.87729787115159e-06, "step": 7050 }, { "embedding_loss": 0.1245, "epoch": 0.39671453316198246, "grad_norm": 4.76837158203125, "learning_rate": 7.933173157512433e-06, "step": 7100 }, { "embedding_loss": 0.1311, "epoch": 0.3995082974800246, "grad_norm": 2.610658645629883, "learning_rate": 7.989048443873275e-06, "step": 7150 }, { "embedding_loss": 0.1251, "epoch": 0.4023020617980667, "grad_norm": 2.752791404724121, "learning_rate": 8.044923730234118e-06, "step": 7200 }, { "embedding_loss": 0.1289, "epoch": 0.40509582611610884, "grad_norm": 2.5137243270874023, "learning_rate": 8.10079901659496e-06, "step": 7250 }, { "embedding_loss": 0.1292, "epoch": 0.40788959043415096, "grad_norm": 2.4258999824523926, "learning_rate": 8.156674302955804e-06, "step": 7300 }, { "embedding_loss": 0.125, "epoch": 0.4106833547521931, "grad_norm": 2.871145009994507, "learning_rate": 8.212549589316646e-06, "step": 7350 }, { "embedding_loss": 0.1161, "epoch": 0.4134771190702352, "grad_norm": 2.675945997238159, "learning_rate": 8.268424875677488e-06, "step": 7400 }, { "embedding_loss": 0.1215, "epoch": 0.4162708833882774, "grad_norm": 2.3135857582092285, "learning_rate": 8.324300162038332e-06, "step": 7450 }, { "embedding_loss": 0.1171, "epoch": 0.4190646477063195, "grad_norm": 3.1383519172668457, "learning_rate": 8.380175448399174e-06, "step": 7500 }, { "embedding_loss": 0.1218, "epoch": 0.42185841202436164, "grad_norm": 2.1841654777526855, "learning_rate": 8.436050734760016e-06, "step": 7550 }, { "embedding_loss": 0.1256, "epoch": 0.42465217634240376, "grad_norm": 3.220388889312744, "learning_rate": 8.491926021120858e-06, "step": 7600 }, { "embedding_loss": 0.114, "epoch": 0.4274459406604459, "grad_norm": 3.0895724296569824, "learning_rate": 8.547801307481702e-06, "step": 7650 }, { "embedding_loss": 0.1061, "epoch": 0.430239704978488, "grad_norm": 2.226816415786743, "learning_rate": 8.603676593842544e-06, "step": 7700 }, { "embedding_loss": 0.1174, "epoch": 0.43303346929653014, "grad_norm": 2.781545400619507, "learning_rate": 8.659551880203386e-06, "step": 7750 }, { "embedding_loss": 0.1157, "epoch": 0.43582723361457226, "grad_norm": 2.942214012145996, "learning_rate": 8.71542716656423e-06, "step": 7800 }, { "embedding_loss": 0.1124, "epoch": 0.4386209979326144, "grad_norm": 2.7010889053344727, "learning_rate": 8.771302452925072e-06, "step": 7850 }, { "embedding_loss": 0.1119, "epoch": 0.4414147622506565, "grad_norm": 1.8079591989517212, "learning_rate": 8.827177739285914e-06, "step": 7900 }, { "embedding_loss": 0.1106, "epoch": 0.4442085265686987, "grad_norm": 2.0186820030212402, "learning_rate": 8.883053025646758e-06, "step": 7950 }, { "embedding_loss": 0.1056, "epoch": 0.4470022908867408, "grad_norm": 3.0559425354003906, "learning_rate": 8.9389283120076e-06, "step": 8000 }, { "embedding_loss": 0.1145, "epoch": 0.44979605520478294, "grad_norm": 2.0441322326660156, "learning_rate": 8.994803598368442e-06, "step": 8050 }, { "embedding_loss": 0.1039, "epoch": 0.45258981952282507, "grad_norm": 3.024850368499756, "learning_rate": 9.050678884729284e-06, "step": 8100 }, { "embedding_loss": 0.1026, "epoch": 0.4553835838408672, "grad_norm": 2.4957871437072754, "learning_rate": 9.106554171090128e-06, "step": 8150 }, { "embedding_loss": 0.1042, "epoch": 0.4581773481589093, "grad_norm": 2.7648422718048096, "learning_rate": 9.16242945745097e-06, "step": 8200 }, { "embedding_loss": 0.1021, "epoch": 0.46097111247695144, "grad_norm": 2.7525506019592285, "learning_rate": 9.218304743811812e-06, "step": 8250 }, { "embedding_loss": 0.1006, "epoch": 0.46376487679499356, "grad_norm": 3.67626690864563, "learning_rate": 9.274180030172656e-06, "step": 8300 }, { "embedding_loss": 0.1149, "epoch": 0.4665586411130357, "grad_norm": 2.743030309677124, "learning_rate": 9.330055316533498e-06, "step": 8350 }, { "embedding_loss": 0.1024, "epoch": 0.4693524054310778, "grad_norm": 2.2772388458251953, "learning_rate": 9.385930602894341e-06, "step": 8400 }, { "embedding_loss": 0.1005, "epoch": 0.47214616974911994, "grad_norm": 1.7938578128814697, "learning_rate": 9.441805889255184e-06, "step": 8450 }, { "embedding_loss": 0.0984, "epoch": 0.4749399340671621, "grad_norm": 3.1121528148651123, "learning_rate": 9.497681175616026e-06, "step": 8500 }, { "embedding_loss": 0.1041, "epoch": 0.47773369838520424, "grad_norm": 3.0409791469573975, "learning_rate": 9.553556461976868e-06, "step": 8550 }, { "embedding_loss": 0.0983, "epoch": 0.48052746270324637, "grad_norm": 4.182548522949219, "learning_rate": 9.609431748337711e-06, "step": 8600 }, { "embedding_loss": 0.0999, "epoch": 0.4833212270212885, "grad_norm": 2.2490689754486084, "learning_rate": 9.665307034698553e-06, "step": 8650 }, { "embedding_loss": 0.101, "epoch": 0.4861149913393306, "grad_norm": 2.1894311904907227, "learning_rate": 9.721182321059396e-06, "step": 8700 }, { "embedding_loss": 0.0999, "epoch": 0.48890875565737274, "grad_norm": 1.7990760803222656, "learning_rate": 9.777057607420238e-06, "step": 8750 }, { "embedding_loss": 0.0909, "epoch": 0.49170251997541486, "grad_norm": 2.2888717651367188, "learning_rate": 9.832932893781081e-06, "step": 8800 }, { "embedding_loss": 0.0971, "epoch": 0.494496284293457, "grad_norm": 2.491044759750366, "learning_rate": 9.888808180141925e-06, "step": 8850 }, { "embedding_loss": 0.0907, "epoch": 0.4972900486114991, "grad_norm": 1.82623291015625, "learning_rate": 9.944683466502767e-06, "step": 8900 }, { "embedding_loss": 0.0989, "epoch": 0.5000838129295413, "grad_norm": 1.828441858291626, "learning_rate": 1.000055875286361e-05, "step": 8950 }, { "embedding_loss": 0.0888, "epoch": 0.5028775772475834, "grad_norm": 3.0848538875579834, "learning_rate": 1.0056434039224451e-05, "step": 9000 }, { "embedding_loss": 0.0957, "epoch": 0.5056713415656255, "grad_norm": 1.617369532585144, "learning_rate": 1.0112309325585295e-05, "step": 9050 }, { "embedding_loss": 0.101, "epoch": 0.5084651058836677, "grad_norm": 1.7082856893539429, "learning_rate": 1.0168184611946137e-05, "step": 9100 }, { "embedding_loss": 0.0974, "epoch": 0.5112588702017098, "grad_norm": 2.1935248374938965, "learning_rate": 1.022405989830698e-05, "step": 9150 }, { "embedding_loss": 0.0916, "epoch": 0.5140526345197519, "grad_norm": 1.6460356712341309, "learning_rate": 1.0279935184667821e-05, "step": 9200 }, { "embedding_loss": 0.0894, "epoch": 0.516846398837794, "grad_norm": 2.298616647720337, "learning_rate": 1.0335810471028665e-05, "step": 9250 }, { "embedding_loss": 0.0875, "epoch": 0.5196401631558362, "grad_norm": 2.7410390377044678, "learning_rate": 1.0391685757389507e-05, "step": 9300 }, { "embedding_loss": 0.0832, "epoch": 0.5224339274738783, "grad_norm": 1.8729740381240845, "learning_rate": 1.044756104375035e-05, "step": 9350 }, { "embedding_loss": 0.0895, "epoch": 0.5252276917919204, "grad_norm": 3.122060537338257, "learning_rate": 1.0503436330111191e-05, "step": 9400 }, { "embedding_loss": 0.0859, "epoch": 0.5280214561099625, "grad_norm": 3.0515353679656982, "learning_rate": 1.0559311616472035e-05, "step": 9450 }, { "embedding_loss": 0.085, "epoch": 0.5308152204280047, "grad_norm": 1.7150226831436157, "learning_rate": 1.0615186902832877e-05, "step": 9500 }, { "embedding_loss": 0.0912, "epoch": 0.5336089847460468, "grad_norm": 2.409125328063965, "learning_rate": 1.0671062189193719e-05, "step": 9550 }, { "embedding_loss": 0.0905, "epoch": 0.5364027490640889, "grad_norm": 2.443410634994507, "learning_rate": 1.0726937475554565e-05, "step": 9600 }, { "embedding_loss": 0.0853, "epoch": 0.539196513382131, "grad_norm": 2.8326950073242188, "learning_rate": 1.0782812761915407e-05, "step": 9650 }, { "embedding_loss": 0.0826, "epoch": 0.5419902777001732, "grad_norm": 2.6055142879486084, "learning_rate": 1.0838688048276249e-05, "step": 9700 }, { "embedding_loss": 0.0839, "epoch": 0.5447840420182154, "grad_norm": 1.477574348449707, "learning_rate": 1.089456333463709e-05, "step": 9750 }, { "embedding_loss": 0.0802, "epoch": 0.5475778063362575, "grad_norm": 1.9878363609313965, "learning_rate": 1.0950438620997935e-05, "step": 9800 }, { "embedding_loss": 0.0801, "epoch": 0.5503715706542996, "grad_norm": 2.654935359954834, "learning_rate": 1.1006313907358777e-05, "step": 9850 }, { "embedding_loss": 0.084, "epoch": 0.5531653349723418, "grad_norm": 1.7312135696411133, "learning_rate": 1.1062189193719619e-05, "step": 9900 }, { "embedding_loss": 0.0867, "epoch": 0.5559590992903839, "grad_norm": 2.5775086879730225, "learning_rate": 1.111806448008046e-05, "step": 9950 }, { "embedding_loss": 0.0815, "epoch": 0.558752863608426, "grad_norm": 4.105582237243652, "learning_rate": 1.1173939766441304e-05, "step": 10000 }, { "embedding_loss": 0.0804, "epoch": 0.5615466279264681, "grad_norm": 2.239165782928467, "learning_rate": 1.1229815052802147e-05, "step": 10050 }, { "embedding_loss": 0.0803, "epoch": 0.5643403922445103, "grad_norm": 1.8061590194702148, "learning_rate": 1.1285690339162989e-05, "step": 10100 }, { "embedding_loss": 0.0767, "epoch": 0.5671341565625524, "grad_norm": 1.7275691032409668, "learning_rate": 1.1341565625523832e-05, "step": 10150 }, { "embedding_loss": 0.0772, "epoch": 0.5699279208805945, "grad_norm": 2.335702419281006, "learning_rate": 1.1397440911884674e-05, "step": 10200 }, { "embedding_loss": 0.0783, "epoch": 0.5727216851986366, "grad_norm": 2.477023124694824, "learning_rate": 1.1453316198245517e-05, "step": 10250 }, { "embedding_loss": 0.0742, "epoch": 0.5755154495166788, "grad_norm": 1.2103650569915771, "learning_rate": 1.1509191484606359e-05, "step": 10300 }, { "embedding_loss": 0.0726, "epoch": 0.5783092138347209, "grad_norm": 2.4182522296905518, "learning_rate": 1.1565066770967202e-05, "step": 10350 }, { "embedding_loss": 0.0753, "epoch": 0.581102978152763, "grad_norm": 2.1317906379699707, "learning_rate": 1.1620942057328044e-05, "step": 10400 }, { "embedding_loss": 0.0744, "epoch": 0.5838967424708051, "grad_norm": 1.6081151962280273, "learning_rate": 1.1676817343688886e-05, "step": 10450 }, { "embedding_loss": 0.0767, "epoch": 0.5866905067888473, "grad_norm": 1.9290062189102173, "learning_rate": 1.1732692630049729e-05, "step": 10500 }, { "embedding_loss": 0.0717, "epoch": 0.5894842711068894, "grad_norm": 3.1308586597442627, "learning_rate": 1.1788567916410572e-05, "step": 10550 }, { "embedding_loss": 0.074, "epoch": 0.5922780354249315, "grad_norm": 3.7071943283081055, "learning_rate": 1.1844443202771416e-05, "step": 10600 }, { "embedding_loss": 0.0709, "epoch": 0.5950717997429736, "grad_norm": 1.5015625953674316, "learning_rate": 1.1900318489132258e-05, "step": 10650 }, { "embedding_loss": 0.0757, "epoch": 0.5978655640610158, "grad_norm": 2.069378137588501, "learning_rate": 1.1956193775493102e-05, "step": 10700 }, { "embedding_loss": 0.0735, "epoch": 0.6006593283790579, "grad_norm": 1.9521065950393677, "learning_rate": 1.2012069061853944e-05, "step": 10750 }, { "embedding_loss": 0.0653, "epoch": 0.6034530926971001, "grad_norm": 1.6058807373046875, "learning_rate": 1.2067944348214786e-05, "step": 10800 }, { "embedding_loss": 0.0672, "epoch": 0.6062468570151422, "grad_norm": 2.327543020248413, "learning_rate": 1.2123819634575628e-05, "step": 10850 }, { "embedding_loss": 0.0638, "epoch": 0.6090406213331844, "grad_norm": 3.087148666381836, "learning_rate": 1.2179694920936472e-05, "step": 10900 }, { "embedding_loss": 0.0697, "epoch": 0.6118343856512265, "grad_norm": 1.500040054321289, "learning_rate": 1.2235570207297314e-05, "step": 10950 }, { "embedding_loss": 0.0716, "epoch": 0.6146281499692686, "grad_norm": 1.9431390762329102, "learning_rate": 1.2291445493658156e-05, "step": 11000 }, { "embedding_loss": 0.0671, "epoch": 0.6174219142873107, "grad_norm": 1.4346779584884644, "learning_rate": 1.2347320780018998e-05, "step": 11050 }, { "embedding_loss": 0.0713, "epoch": 0.6202156786053529, "grad_norm": 2.420010566711426, "learning_rate": 1.2403196066379842e-05, "step": 11100 }, { "embedding_loss": 0.0655, "epoch": 0.623009442923395, "grad_norm": 2.475550889968872, "learning_rate": 1.2459071352740684e-05, "step": 11150 }, { "embedding_loss": 0.0667, "epoch": 0.6258032072414371, "grad_norm": 1.8394588232040405, "learning_rate": 1.2514946639101526e-05, "step": 11200 }, { "embedding_loss": 0.0649, "epoch": 0.6285969715594792, "grad_norm": 2.441422462463379, "learning_rate": 1.2570821925462368e-05, "step": 11250 }, { "embedding_loss": 0.0717, "epoch": 0.6313907358775214, "grad_norm": 2.7160696983337402, "learning_rate": 1.2626697211823212e-05, "step": 11300 }, { "embedding_loss": 0.0645, "epoch": 0.6341845001955635, "grad_norm": 2.1766295433044434, "learning_rate": 1.2682572498184054e-05, "step": 11350 }, { "embedding_loss": 0.0606, "epoch": 0.6369782645136056, "grad_norm": 1.1626685857772827, "learning_rate": 1.2738447784544896e-05, "step": 11400 }, { "embedding_loss": 0.0623, "epoch": 0.6397720288316477, "grad_norm": 2.690507411956787, "learning_rate": 1.2794323070905738e-05, "step": 11450 }, { "embedding_loss": 0.0583, "epoch": 0.6425657931496899, "grad_norm": 2.9739575386047363, "learning_rate": 1.2850198357266582e-05, "step": 11500 }, { "embedding_loss": 0.0578, "epoch": 0.645359557467732, "grad_norm": 1.1394680738449097, "learning_rate": 1.2906073643627424e-05, "step": 11550 }, { "embedding_loss": 0.0607, "epoch": 0.6481533217857741, "grad_norm": 2.838078022003174, "learning_rate": 1.2961948929988266e-05, "step": 11600 }, { "embedding_loss": 0.0573, "epoch": 0.6509470861038162, "grad_norm": 1.8478329181671143, "learning_rate": 1.3017824216349111e-05, "step": 11650 }, { "embedding_loss": 0.0581, "epoch": 0.6537408504218584, "grad_norm": 1.2568801641464233, "learning_rate": 1.3073699502709953e-05, "step": 11700 }, { "embedding_loss": 0.0544, "epoch": 0.6565346147399005, "grad_norm": 2.5175678730010986, "learning_rate": 1.3129574789070795e-05, "step": 11750 }, { "embedding_loss": 0.0576, "epoch": 0.6593283790579427, "grad_norm": 2.2409534454345703, "learning_rate": 1.318545007543164e-05, "step": 11800 }, { "embedding_loss": 0.0589, "epoch": 0.6621221433759849, "grad_norm": 1.9460983276367188, "learning_rate": 1.3241325361792481e-05, "step": 11850 }, { "embedding_loss": 0.0611, "epoch": 0.664915907694027, "grad_norm": 2.435262680053711, "learning_rate": 1.3297200648153323e-05, "step": 11900 }, { "embedding_loss": 0.0628, "epoch": 0.6677096720120691, "grad_norm": 2.100428581237793, "learning_rate": 1.3353075934514165e-05, "step": 11950 }, { "embedding_loss": 0.0603, "epoch": 0.6705034363301112, "grad_norm": 3.1242198944091797, "learning_rate": 1.3408951220875009e-05, "step": 12000 }, { "embedding_loss": 0.0572, "epoch": 0.6732972006481533, "grad_norm": 1.8112308979034424, "learning_rate": 1.3464826507235851e-05, "step": 12050 }, { "embedding_loss": 0.0601, "epoch": 0.6760909649661955, "grad_norm": 1.150494933128357, "learning_rate": 1.3520701793596693e-05, "step": 12100 }, { "embedding_loss": 0.0578, "epoch": 0.6788847292842376, "grad_norm": 2.3957972526550293, "learning_rate": 1.3576577079957535e-05, "step": 12150 }, { "embedding_loss": 0.0604, "epoch": 0.6816784936022797, "grad_norm": 2.2135627269744873, "learning_rate": 1.3632452366318379e-05, "step": 12200 }, { "embedding_loss": 0.0541, "epoch": 0.6844722579203218, "grad_norm": 2.529266119003296, "learning_rate": 1.3688327652679221e-05, "step": 12250 }, { "embedding_loss": 0.0602, "epoch": 0.687266022238364, "grad_norm": 2.736262798309326, "learning_rate": 1.3744202939040063e-05, "step": 12300 }, { "embedding_loss": 0.0564, "epoch": 0.6900597865564061, "grad_norm": 2.6058030128479004, "learning_rate": 1.3800078225400905e-05, "step": 12350 }, { "embedding_loss": 0.0535, "epoch": 0.6928535508744482, "grad_norm": 1.2330048084259033, "learning_rate": 1.3855953511761749e-05, "step": 12400 }, { "embedding_loss": 0.0495, "epoch": 0.6956473151924903, "grad_norm": 2.318178653717041, "learning_rate": 1.3911828798122591e-05, "step": 12450 }, { "embedding_loss": 0.0514, "epoch": 0.6984410795105325, "grad_norm": 1.177850365638733, "learning_rate": 1.3967704084483433e-05, "step": 12500 }, { "embedding_loss": 0.0508, "epoch": 0.7012348438285746, "grad_norm": 2.0019075870513916, "learning_rate": 1.4023579370844275e-05, "step": 12550 }, { "embedding_loss": 0.0509, "epoch": 0.7040286081466167, "grad_norm": 1.7644133567810059, "learning_rate": 1.4079454657205119e-05, "step": 12600 }, { "embedding_loss": 0.0539, "epoch": 0.7068223724646588, "grad_norm": 2.3139400482177734, "learning_rate": 1.4135329943565963e-05, "step": 12650 }, { "embedding_loss": 0.052, "epoch": 0.709616136782701, "grad_norm": 1.8325903415679932, "learning_rate": 1.4191205229926805e-05, "step": 12700 }, { "embedding_loss": 0.0525, "epoch": 0.7124099011007431, "grad_norm": 1.4533387422561646, "learning_rate": 1.4247080516287649e-05, "step": 12750 }, { "embedding_loss": 0.0501, "epoch": 0.7152036654187852, "grad_norm": 2.9430830478668213, "learning_rate": 1.430295580264849e-05, "step": 12800 }, { "embedding_loss": 0.0459, "epoch": 0.7179974297368275, "grad_norm": 2.1723198890686035, "learning_rate": 1.4358831089009333e-05, "step": 12850 }, { "embedding_loss": 0.0529, "epoch": 0.7207911940548696, "grad_norm": 1.941612958908081, "learning_rate": 1.4414706375370175e-05, "step": 12900 }, { "embedding_loss": 0.0466, "epoch": 0.7235849583729117, "grad_norm": 1.0569517612457275, "learning_rate": 1.4470581661731019e-05, "step": 12950 }, { "embedding_loss": 0.0496, "epoch": 0.7263787226909538, "grad_norm": 2.395667314529419, "learning_rate": 1.452645694809186e-05, "step": 13000 }, { "embedding_loss": 0.0494, "epoch": 0.729172487008996, "grad_norm": 2.4174811840057373, "learning_rate": 1.4582332234452703e-05, "step": 13050 }, { "embedding_loss": 0.0448, "epoch": 0.7319662513270381, "grad_norm": 1.5978491306304932, "learning_rate": 1.4638207520813545e-05, "step": 13100 }, { "embedding_loss": 0.049, "epoch": 0.7347600156450802, "grad_norm": 1.4864240884780884, "learning_rate": 1.4694082807174388e-05, "step": 13150 }, { "embedding_loss": 0.0437, "epoch": 0.7375537799631223, "grad_norm": 2.6743288040161133, "learning_rate": 1.474995809353523e-05, "step": 13200 }, { "embedding_loss": 0.0453, "epoch": 0.7403475442811644, "grad_norm": 0.6119893789291382, "learning_rate": 1.4805833379896073e-05, "step": 13250 }, { "embedding_loss": 0.0494, "epoch": 0.7431413085992066, "grad_norm": 2.415886163711548, "learning_rate": 1.4861708666256915e-05, "step": 13300 }, { "embedding_loss": 0.0464, "epoch": 0.7459350729172487, "grad_norm": 3.297279119491577, "learning_rate": 1.4917583952617758e-05, "step": 13350 }, { "embedding_loss": 0.0423, "epoch": 0.7487288372352908, "grad_norm": 1.2342664003372192, "learning_rate": 1.49734592389786e-05, "step": 13400 }, { "embedding_loss": 0.0441, "epoch": 0.751522601553333, "grad_norm": 3.0270392894744873, "learning_rate": 1.5029334525339443e-05, "step": 13450 }, { "embedding_loss": 0.0483, "epoch": 0.7543163658713751, "grad_norm": 2.746429920196533, "learning_rate": 1.5085209811700285e-05, "step": 13500 }, { "embedding_loss": 0.0412, "epoch": 0.7571101301894172, "grad_norm": 2.1152052879333496, "learning_rate": 1.5141085098061128e-05, "step": 13550 }, { "embedding_loss": 0.0482, "epoch": 0.7599038945074593, "grad_norm": 1.0390915870666504, "learning_rate": 1.519696038442197e-05, "step": 13600 }, { "embedding_loss": 0.0356, "epoch": 0.7626976588255014, "grad_norm": 1.4893919229507446, "learning_rate": 1.5252835670782813e-05, "step": 13650 }, { "embedding_loss": 0.0438, "epoch": 0.7654914231435436, "grad_norm": 1.8682059049606323, "learning_rate": 1.5308710957143658e-05, "step": 13700 }, { "embedding_loss": 0.0441, "epoch": 0.7682851874615857, "grad_norm": 1.1859805583953857, "learning_rate": 1.53645862435045e-05, "step": 13750 }, { "embedding_loss": 0.0407, "epoch": 0.7710789517796278, "grad_norm": 1.1188807487487793, "learning_rate": 1.5420461529865342e-05, "step": 13800 }, { "embedding_loss": 0.0421, "epoch": 0.77387271609767, "grad_norm": 1.1425139904022217, "learning_rate": 1.5476336816226184e-05, "step": 13850 }, { "embedding_loss": 0.0417, "epoch": 0.7766664804157122, "grad_norm": 0.7896090745925903, "learning_rate": 1.5532212102587026e-05, "step": 13900 }, { "embedding_loss": 0.0389, "epoch": 0.7794602447337543, "grad_norm": 1.684008002281189, "learning_rate": 1.558808738894787e-05, "step": 13950 }, { "embedding_loss": 0.0353, "epoch": 0.7822540090517964, "grad_norm": 2.8100898265838623, "learning_rate": 1.5643962675308714e-05, "step": 14000 }, { "embedding_loss": 0.0416, "epoch": 0.7850477733698386, "grad_norm": 1.28396737575531, "learning_rate": 1.5699837961669556e-05, "step": 14050 }, { "embedding_loss": 0.0436, "epoch": 0.7878415376878807, "grad_norm": 1.562385082244873, "learning_rate": 1.5755713248030398e-05, "step": 14100 }, { "embedding_loss": 0.041, "epoch": 0.7906353020059228, "grad_norm": 1.1698726415634155, "learning_rate": 1.581158853439124e-05, "step": 14150 }, { "embedding_loss": 0.0387, "epoch": 0.7934290663239649, "grad_norm": 1.1048386096954346, "learning_rate": 1.5867463820752082e-05, "step": 14200 }, { "embedding_loss": 0.0374, "epoch": 0.796222830642007, "grad_norm": 2.514537811279297, "learning_rate": 1.5923339107112924e-05, "step": 14250 }, { "embedding_loss": 0.0344, "epoch": 0.7990165949600492, "grad_norm": 0.5873745679855347, "learning_rate": 1.5979214393473766e-05, "step": 14300 }, { "embedding_loss": 0.0387, "epoch": 0.8018103592780913, "grad_norm": 0.7706195116043091, "learning_rate": 1.603508967983461e-05, "step": 14350 }, { "embedding_loss": 0.0387, "epoch": 0.8046041235961334, "grad_norm": 1.5083991289138794, "learning_rate": 1.6090964966195454e-05, "step": 14400 }, { "embedding_loss": 0.0394, "epoch": 0.8073978879141755, "grad_norm": 2.4971909523010254, "learning_rate": 1.6146840252556296e-05, "step": 14450 }, { "embedding_loss": 0.04, "epoch": 0.8101916522322177, "grad_norm": 0.8164982199668884, "learning_rate": 1.6202715538917138e-05, "step": 14500 }, { "embedding_loss": 0.0402, "epoch": 0.8129854165502598, "grad_norm": 1.4551202058792114, "learning_rate": 1.625859082527798e-05, "step": 14550 }, { "embedding_loss": 0.0392, "epoch": 0.8157791808683019, "grad_norm": 1.16090989112854, "learning_rate": 1.6314466111638822e-05, "step": 14600 }, { "embedding_loss": 0.0327, "epoch": 0.818572945186344, "grad_norm": 0.9194680452346802, "learning_rate": 1.6370341397999664e-05, "step": 14650 }, { "embedding_loss": 0.0322, "epoch": 0.8213667095043862, "grad_norm": 0.9461217522621155, "learning_rate": 1.642621668436051e-05, "step": 14700 }, { "embedding_loss": 0.0358, "epoch": 0.8241604738224283, "grad_norm": 1.725447177886963, "learning_rate": 1.648209197072135e-05, "step": 14750 }, { "embedding_loss": 0.0359, "epoch": 0.8269542381404704, "grad_norm": 0.6715666055679321, "learning_rate": 1.6537967257082194e-05, "step": 14800 }, { "embedding_loss": 0.0392, "epoch": 0.8297480024585125, "grad_norm": 1.6615924835205078, "learning_rate": 1.6593842543443036e-05, "step": 14850 }, { "embedding_loss": 0.0349, "epoch": 0.8325417667765548, "grad_norm": 2.5824193954467773, "learning_rate": 1.664971782980388e-05, "step": 14900 }, { "embedding_loss": 0.0364, "epoch": 0.8353355310945969, "grad_norm": 1.5869414806365967, "learning_rate": 1.6705593116164723e-05, "step": 14950 }, { "embedding_loss": 0.0352, "epoch": 0.838129295412639, "grad_norm": 0.9549688696861267, "learning_rate": 1.6761468402525565e-05, "step": 15000 }, { "embedding_loss": 0.0325, "epoch": 0.8409230597306812, "grad_norm": 0.9183236956596375, "learning_rate": 1.6817343688886407e-05, "step": 15050 }, { "embedding_loss": 0.0343, "epoch": 0.8437168240487233, "grad_norm": 2.171360731124878, "learning_rate": 1.687321897524725e-05, "step": 15100 }, { "embedding_loss": 0.0367, "epoch": 0.8465105883667654, "grad_norm": 1.571432113647461, "learning_rate": 1.692909426160809e-05, "step": 15150 }, { "embedding_loss": 0.0364, "epoch": 0.8493043526848075, "grad_norm": 1.1856991052627563, "learning_rate": 1.6984969547968933e-05, "step": 15200 }, { "embedding_loss": 0.0335, "epoch": 0.8520981170028497, "grad_norm": 1.1461403369903564, "learning_rate": 1.7040844834329776e-05, "step": 15250 }, { "embedding_loss": 0.0345, "epoch": 0.8548918813208918, "grad_norm": 1.4782109260559082, "learning_rate": 1.709672012069062e-05, "step": 15300 }, { "embedding_loss": 0.0354, "epoch": 0.8576856456389339, "grad_norm": 1.5905303955078125, "learning_rate": 1.7152595407051463e-05, "step": 15350 }, { "embedding_loss": 0.0401, "epoch": 0.860479409956976, "grad_norm": 3.7443838119506836, "learning_rate": 1.7208470693412305e-05, "step": 15400 }, { "embedding_loss": 0.0329, "epoch": 0.8632731742750182, "grad_norm": 0.46851423382759094, "learning_rate": 1.7264345979773147e-05, "step": 15450 }, { "embedding_loss": 0.0334, "epoch": 0.8660669385930603, "grad_norm": 1.8323862552642822, "learning_rate": 1.732022126613399e-05, "step": 15500 }, { "embedding_loss": 0.0315, "epoch": 0.8688607029111024, "grad_norm": 4.0182061195373535, "learning_rate": 1.737609655249483e-05, "step": 15550 }, { "embedding_loss": 0.0345, "epoch": 0.8716544672291445, "grad_norm": 1.1877408027648926, "learning_rate": 1.7431971838855673e-05, "step": 15600 }, { "embedding_loss": 0.034, "epoch": 0.8744482315471866, "grad_norm": 1.3754762411117554, "learning_rate": 1.748784712521652e-05, "step": 15650 }, { "embedding_loss": 0.0341, "epoch": 0.8772419958652288, "grad_norm": 1.7504146099090576, "learning_rate": 1.754372241157736e-05, "step": 15700 }, { "embedding_loss": 0.0295, "epoch": 0.8800357601832709, "grad_norm": 0.7237100005149841, "learning_rate": 1.7599597697938203e-05, "step": 15750 }, { "embedding_loss": 0.0303, "epoch": 0.882829524501313, "grad_norm": 1.2992289066314697, "learning_rate": 1.765547298429905e-05, "step": 15800 }, { "embedding_loss": 0.034, "epoch": 0.8856232888193551, "grad_norm": 1.0172449350357056, "learning_rate": 1.771134827065989e-05, "step": 15850 }, { "embedding_loss": 0.0314, "epoch": 0.8884170531373974, "grad_norm": 3.193023681640625, "learning_rate": 1.7767223557020733e-05, "step": 15900 }, { "embedding_loss": 0.0308, "epoch": 0.8912108174554395, "grad_norm": 1.9702247381210327, "learning_rate": 1.7823098843381575e-05, "step": 15950 }, { "embedding_loss": 0.0325, "epoch": 0.8940045817734816, "grad_norm": 1.255988359451294, "learning_rate": 1.7878974129742417e-05, "step": 16000 }, { "embedding_loss": 0.0299, "epoch": 0.8967983460915238, "grad_norm": 1.6700023412704468, "learning_rate": 1.793484941610326e-05, "step": 16050 }, { "embedding_loss": 0.0323, "epoch": 0.8995921104095659, "grad_norm": 0.9366209506988525, "learning_rate": 1.79907247024641e-05, "step": 16100 }, { "embedding_loss": 0.0306, "epoch": 0.902385874727608, "grad_norm": 1.2843196392059326, "learning_rate": 1.8046599988824943e-05, "step": 16150 }, { "embedding_loss": 0.0338, "epoch": 0.9051796390456501, "grad_norm": 0.6659603118896484, "learning_rate": 1.810247527518579e-05, "step": 16200 }, { "embedding_loss": 0.0255, "epoch": 0.9079734033636923, "grad_norm": 1.831362247467041, "learning_rate": 1.815835056154663e-05, "step": 16250 }, { "embedding_loss": 0.0318, "epoch": 0.9107671676817344, "grad_norm": 1.2318592071533203, "learning_rate": 1.8214225847907472e-05, "step": 16300 }, { "embedding_loss": 0.0293, "epoch": 0.9135609319997765, "grad_norm": 0.467202365398407, "learning_rate": 1.8270101134268315e-05, "step": 16350 }, { "embedding_loss": 0.0294, "epoch": 0.9163546963178186, "grad_norm": 3.461735248565674, "learning_rate": 1.8325976420629157e-05, "step": 16400 }, { "embedding_loss": 0.0325, "epoch": 0.9191484606358608, "grad_norm": 0.7302302122116089, "learning_rate": 1.838185170699e-05, "step": 16450 }, { "embedding_loss": 0.0333, "epoch": 0.9219422249539029, "grad_norm": 0.8994712233543396, "learning_rate": 1.843772699335084e-05, "step": 16500 }, { "embedding_loss": 0.0304, "epoch": 0.924735989271945, "grad_norm": 0.4808826744556427, "learning_rate": 1.8493602279711683e-05, "step": 16550 }, { "embedding_loss": 0.0276, "epoch": 0.9275297535899871, "grad_norm": 1.1688770055770874, "learning_rate": 1.8549477566072528e-05, "step": 16600 }, { "embedding_loss": 0.0315, "epoch": 0.9303235179080293, "grad_norm": 1.286929726600647, "learning_rate": 1.860535285243337e-05, "step": 16650 }, { "embedding_loss": 0.0287, "epoch": 0.9331172822260714, "grad_norm": 0.8156574964523315, "learning_rate": 1.8661228138794212e-05, "step": 16700 }, { "embedding_loss": 0.0282, "epoch": 0.9359110465441135, "grad_norm": 1.3065948486328125, "learning_rate": 1.8717103425155058e-05, "step": 16750 }, { "embedding_loss": 0.0275, "epoch": 0.9387048108621556, "grad_norm": 0.2685019373893738, "learning_rate": 1.87729787115159e-05, "step": 16800 }, { "embedding_loss": 0.0251, "epoch": 0.9414985751801977, "grad_norm": 0.2626272141933441, "learning_rate": 1.8828853997876742e-05, "step": 16850 }, { "embedding_loss": 0.0282, "epoch": 0.9442923394982399, "grad_norm": 0.588791012763977, "learning_rate": 1.8884729284237584e-05, "step": 16900 }, { "embedding_loss": 0.0264, "epoch": 0.9470861038162821, "grad_norm": 0.5156967639923096, "learning_rate": 1.8940604570598426e-05, "step": 16950 }, { "embedding_loss": 0.0315, "epoch": 0.9498798681343242, "grad_norm": 0.4191617965698242, "learning_rate": 1.8996479856959268e-05, "step": 17000 }, { "embedding_loss": 0.0301, "epoch": 0.9526736324523664, "grad_norm": 0.7628358006477356, "learning_rate": 1.905235514332011e-05, "step": 17050 }, { "embedding_loss": 0.0304, "epoch": 0.9554673967704085, "grad_norm": 0.6042181849479675, "learning_rate": 1.9108230429680956e-05, "step": 17100 }, { "embedding_loss": 0.0306, "epoch": 0.9582611610884506, "grad_norm": 1.8044644594192505, "learning_rate": 1.9164105716041798e-05, "step": 17150 }, { "embedding_loss": 0.027, "epoch": 0.9610549254064927, "grad_norm": 0.6014218330383301, "learning_rate": 1.921998100240264e-05, "step": 17200 }, { "embedding_loss": 0.0286, "epoch": 0.9638486897245349, "grad_norm": 0.6232359409332275, "learning_rate": 1.9275856288763482e-05, "step": 17250 }, { "embedding_loss": 0.0271, "epoch": 0.966642454042577, "grad_norm": 1.2665234804153442, "learning_rate": 1.9331731575124324e-05, "step": 17300 }, { "embedding_loss": 0.0276, "epoch": 0.9694362183606191, "grad_norm": 0.572498619556427, "learning_rate": 1.9387606861485166e-05, "step": 17350 }, { "embedding_loss": 0.0284, "epoch": 0.9722299826786612, "grad_norm": 1.042931318283081, "learning_rate": 1.9443482147846008e-05, "step": 17400 }, { "embedding_loss": 0.0261, "epoch": 0.9750237469967034, "grad_norm": 0.5786505341529846, "learning_rate": 1.949935743420685e-05, "step": 17450 }, { "embedding_loss": 0.025, "epoch": 0.9778175113147455, "grad_norm": 0.16984255611896515, "learning_rate": 1.9555232720567696e-05, "step": 17500 }, { "embedding_loss": 0.0276, "epoch": 0.9806112756327876, "grad_norm": 2.1644561290740967, "learning_rate": 1.9611108006928538e-05, "step": 17550 }, { "embedding_loss": 0.0278, "epoch": 0.9834050399508297, "grad_norm": 1.0523499250411987, "learning_rate": 1.966698329328938e-05, "step": 17600 }, { "embedding_loss": 0.0271, "epoch": 0.9861988042688719, "grad_norm": 1.6781634092330933, "learning_rate": 1.9722858579650222e-05, "step": 17650 }, { "embedding_loss": 0.0253, "epoch": 0.988992568586914, "grad_norm": 0.7380790114402771, "learning_rate": 1.9778733866011064e-05, "step": 17700 }, { "embedding_loss": 0.0249, "epoch": 0.9917863329049561, "grad_norm": 0.7555108070373535, "learning_rate": 1.9834609152371906e-05, "step": 17750 }, { "embedding_loss": 0.029, "epoch": 0.9945800972229982, "grad_norm": 0.46742114424705505, "learning_rate": 1.989048443873275e-05, "step": 17800 }, { "embedding_loss": 0.0294, "epoch": 0.9973738615410404, "grad_norm": 0.5393733382225037, "learning_rate": 1.9946359725093593e-05, "step": 17850 }, { "embedding_loss": 0.0266, "epoch": 1.0001676258590826, "grad_norm": 0.8678149580955505, "learning_rate": 1.9999751665393952e-05, "step": 17900 }, { "embedding_loss": 0.0234, "epoch": 1.0029613901771246, "grad_norm": 1.2158678770065308, "learning_rate": 1.999354330024275e-05, "step": 17950 }, { "embedding_loss": 0.0275, "epoch": 1.0057551544951668, "grad_norm": 0.622416079044342, "learning_rate": 1.9987334935091543e-05, "step": 18000 }, { "embedding_loss": 0.0251, "epoch": 1.0085489188132089, "grad_norm": 0.5201563835144043, "learning_rate": 1.998112656994034e-05, "step": 18050 }, { "embedding_loss": 0.0278, "epoch": 1.011342683131251, "grad_norm": 0.27821871638298035, "learning_rate": 1.9974918204789134e-05, "step": 18100 }, { "embedding_loss": 0.0243, "epoch": 1.014136447449293, "grad_norm": 1.3895442485809326, "learning_rate": 1.9968709839637928e-05, "step": 18150 }, { "embedding_loss": 0.024, "epoch": 1.0169302117673353, "grad_norm": 0.9142733216285706, "learning_rate": 1.9962501474486725e-05, "step": 18200 }, { "embedding_loss": 0.0266, "epoch": 1.0197239760853773, "grad_norm": 2.1112637519836426, "learning_rate": 1.995629310933552e-05, "step": 18250 }, { "embedding_loss": 0.0267, "epoch": 1.0225177404034196, "grad_norm": 0.7773581743240356, "learning_rate": 1.9950084744184316e-05, "step": 18300 }, { "embedding_loss": 0.0259, "epoch": 1.0253115047214616, "grad_norm": 0.3259076774120331, "learning_rate": 1.994387637903311e-05, "step": 18350 }, { "embedding_loss": 0.0281, "epoch": 1.0281052690395038, "grad_norm": 0.6886093616485596, "learning_rate": 1.9937668013881904e-05, "step": 18400 }, { "embedding_loss": 0.0266, "epoch": 1.0308990333575458, "grad_norm": 3.6889612674713135, "learning_rate": 1.99314596487307e-05, "step": 18450 }, { "embedding_loss": 0.0246, "epoch": 1.033692797675588, "grad_norm": 1.3187514543533325, "learning_rate": 1.9925251283579495e-05, "step": 18500 }, { "embedding_loss": 0.0253, "epoch": 1.0364865619936303, "grad_norm": 0.8006548285484314, "learning_rate": 1.9919042918428292e-05, "step": 18550 }, { "embedding_loss": 0.0223, "epoch": 1.0392803263116723, "grad_norm": 1.5592374801635742, "learning_rate": 1.9912834553277086e-05, "step": 18600 }, { "embedding_loss": 0.0247, "epoch": 1.0420740906297146, "grad_norm": 0.8292970061302185, "learning_rate": 1.990662618812588e-05, "step": 18650 }, { "embedding_loss": 0.0277, "epoch": 1.0448678549477566, "grad_norm": 0.9238064289093018, "learning_rate": 1.9900417822974677e-05, "step": 18700 }, { "embedding_loss": 0.0212, "epoch": 1.0476616192657988, "grad_norm": 0.5460510849952698, "learning_rate": 1.989420945782347e-05, "step": 18750 }, { "embedding_loss": 0.0286, "epoch": 1.0504553835838408, "grad_norm": 0.8624370694160461, "learning_rate": 1.9888001092672268e-05, "step": 18800 }, { "embedding_loss": 0.0253, "epoch": 1.053249147901883, "grad_norm": 1.2462369203567505, "learning_rate": 1.9881792727521062e-05, "step": 18850 }, { "embedding_loss": 0.023, "epoch": 1.056042912219925, "grad_norm": 1.463140845298767, "learning_rate": 1.987558436236986e-05, "step": 18900 }, { "embedding_loss": 0.0254, "epoch": 1.0588366765379673, "grad_norm": 0.7137489318847656, "learning_rate": 1.9869375997218653e-05, "step": 18950 }, { "embedding_loss": 0.0236, "epoch": 1.0616304408560093, "grad_norm": 0.876620352268219, "learning_rate": 1.9863167632067447e-05, "step": 19000 }, { "embedding_loss": 0.0255, "epoch": 1.0644242051740516, "grad_norm": 0.9772096872329712, "learning_rate": 1.9856959266916244e-05, "step": 19050 }, { "embedding_loss": 0.0231, "epoch": 1.0672179694920936, "grad_norm": 0.5689325928688049, "learning_rate": 1.9850750901765038e-05, "step": 19100 }, { "embedding_loss": 0.0242, "epoch": 1.0700117338101358, "grad_norm": 1.0084155797958374, "learning_rate": 1.9844542536613835e-05, "step": 19150 }, { "embedding_loss": 0.0249, "epoch": 1.0728054981281778, "grad_norm": 1.1132110357284546, "learning_rate": 1.983833417146263e-05, "step": 19200 }, { "embedding_loss": 0.0244, "epoch": 1.07559926244622, "grad_norm": 0.6957171559333801, "learning_rate": 1.9832125806311426e-05, "step": 19250 }, { "embedding_loss": 0.026, "epoch": 1.078393026764262, "grad_norm": 0.14215616881847382, "learning_rate": 1.982591744116022e-05, "step": 19300 }, { "embedding_loss": 0.0247, "epoch": 1.0811867910823043, "grad_norm": 0.5730525255203247, "learning_rate": 1.9819709076009017e-05, "step": 19350 }, { "embedding_loss": 0.0231, "epoch": 1.0839805554003463, "grad_norm": 0.4326806664466858, "learning_rate": 1.981350071085781e-05, "step": 19400 }, { "embedding_loss": 0.0228, "epoch": 1.0867743197183886, "grad_norm": 1.0763707160949707, "learning_rate": 1.9807292345706605e-05, "step": 19450 }, { "embedding_loss": 0.0226, "epoch": 1.0895680840364306, "grad_norm": 0.9282318949699402, "learning_rate": 1.9801083980555402e-05, "step": 19500 }, { "embedding_loss": 0.0217, "epoch": 1.0923618483544728, "grad_norm": 0.7202469706535339, "learning_rate": 1.9794875615404196e-05, "step": 19550 }, { "embedding_loss": 0.0243, "epoch": 1.095155612672515, "grad_norm": 1.0603458881378174, "learning_rate": 1.9788667250252993e-05, "step": 19600 }, { "embedding_loss": 0.0221, "epoch": 1.097949376990557, "grad_norm": 0.8984811305999756, "learning_rate": 1.9782458885101787e-05, "step": 19650 }, { "embedding_loss": 0.0275, "epoch": 1.1007431413085993, "grad_norm": 0.9731761813163757, "learning_rate": 1.9776250519950584e-05, "step": 19700 }, { "embedding_loss": 0.0234, "epoch": 1.1035369056266413, "grad_norm": 0.6321505904197693, "learning_rate": 1.9770042154799378e-05, "step": 19750 }, { "embedding_loss": 0.0249, "epoch": 1.1063306699446835, "grad_norm": 0.4798072576522827, "learning_rate": 1.9763833789648175e-05, "step": 19800 }, { "embedding_loss": 0.0219, "epoch": 1.1091244342627256, "grad_norm": 0.7189303636550903, "learning_rate": 1.975762542449697e-05, "step": 19850 }, { "embedding_loss": 0.0187, "epoch": 1.1119181985807678, "grad_norm": 0.7905238270759583, "learning_rate": 1.9751417059345763e-05, "step": 19900 }, { "embedding_loss": 0.023, "epoch": 1.1147119628988098, "grad_norm": 1.4621291160583496, "learning_rate": 1.974520869419456e-05, "step": 19950 }, { "embedding_loss": 0.0224, "epoch": 1.117505727216852, "grad_norm": 0.619705319404602, "learning_rate": 1.9739000329043354e-05, "step": 20000 }, { "embedding_loss": 0.0219, "epoch": 1.120299491534894, "grad_norm": 0.18817108869552612, "learning_rate": 1.973279196389215e-05, "step": 20050 }, { "embedding_loss": 0.0254, "epoch": 1.1230932558529363, "grad_norm": 0.7136713862419128, "learning_rate": 1.9726583598740945e-05, "step": 20100 }, { "embedding_loss": 0.0262, "epoch": 1.1258870201709783, "grad_norm": 0.4407173693180084, "learning_rate": 1.9720375233589742e-05, "step": 20150 }, { "embedding_loss": 0.0211, "epoch": 1.1286807844890205, "grad_norm": 0.5381254553794861, "learning_rate": 1.9714166868438536e-05, "step": 20200 }, { "embedding_loss": 0.0221, "epoch": 1.1314745488070626, "grad_norm": 0.9527179002761841, "learning_rate": 1.970795850328733e-05, "step": 20250 }, { "embedding_loss": 0.0224, "epoch": 1.1342683131251048, "grad_norm": 0.8440644145011902, "learning_rate": 1.9701750138136127e-05, "step": 20300 }, { "embedding_loss": 0.0256, "epoch": 1.1370620774431468, "grad_norm": 1.530969500541687, "learning_rate": 1.969554177298492e-05, "step": 20350 }, { "embedding_loss": 0.0214, "epoch": 1.139855841761189, "grad_norm": 0.39913856983184814, "learning_rate": 1.9689333407833718e-05, "step": 20400 }, { "embedding_loss": 0.0224, "epoch": 1.1426496060792313, "grad_norm": 0.461458683013916, "learning_rate": 1.968312504268251e-05, "step": 20450 }, { "embedding_loss": 0.0235, "epoch": 1.1454433703972733, "grad_norm": 0.7948645949363708, "learning_rate": 1.967691667753131e-05, "step": 20500 }, { "embedding_loss": 0.0214, "epoch": 1.1482371347153153, "grad_norm": 0.8324529528617859, "learning_rate": 1.9670708312380103e-05, "step": 20550 }, { "embedding_loss": 0.0219, "epoch": 1.1510308990333575, "grad_norm": 0.4235970675945282, "learning_rate": 1.96644999472289e-05, "step": 20600 }, { "embedding_loss": 0.0185, "epoch": 1.1538246633513998, "grad_norm": 0.5435129404067993, "learning_rate": 1.9658291582077694e-05, "step": 20650 }, { "embedding_loss": 0.0222, "epoch": 1.1566184276694418, "grad_norm": 0.6186907887458801, "learning_rate": 1.9652083216926488e-05, "step": 20700 }, { "embedding_loss": 0.0225, "epoch": 1.159412191987484, "grad_norm": 0.7346696257591248, "learning_rate": 1.9645874851775285e-05, "step": 20750 }, { "embedding_loss": 0.0251, "epoch": 1.162205956305526, "grad_norm": 0.9008952975273132, "learning_rate": 1.963966648662408e-05, "step": 20800 }, { "embedding_loss": 0.0231, "epoch": 1.1649997206235683, "grad_norm": 0.1596774011850357, "learning_rate": 1.9633458121472876e-05, "step": 20850 }, { "embedding_loss": 0.0228, "epoch": 1.1677934849416103, "grad_norm": 0.550773024559021, "learning_rate": 1.962724975632167e-05, "step": 20900 }, { "embedding_loss": 0.0208, "epoch": 1.1705872492596525, "grad_norm": 0.7559389472007751, "learning_rate": 1.9621041391170467e-05, "step": 20950 }, { "embedding_loss": 0.0242, "epoch": 1.1733810135776945, "grad_norm": 0.5092435479164124, "learning_rate": 1.961483302601926e-05, "step": 21000 }, { "embedding_loss": 0.0213, "epoch": 1.1761747778957368, "grad_norm": 0.656133234500885, "learning_rate": 1.9608624660868054e-05, "step": 21050 }, { "embedding_loss": 0.0217, "epoch": 1.1789685422137788, "grad_norm": 0.43566805124282837, "learning_rate": 1.960241629571685e-05, "step": 21100 }, { "embedding_loss": 0.0218, "epoch": 1.181762306531821, "grad_norm": 0.6337034702301025, "learning_rate": 1.9596207930565645e-05, "step": 21150 }, { "embedding_loss": 0.021, "epoch": 1.184556070849863, "grad_norm": 0.37844836711883545, "learning_rate": 1.9589999565414443e-05, "step": 21200 }, { "embedding_loss": 0.0187, "epoch": 1.1873498351679053, "grad_norm": 0.46688345074653625, "learning_rate": 1.9583791200263237e-05, "step": 21250 }, { "embedding_loss": 0.0212, "epoch": 1.1901435994859473, "grad_norm": 0.8208021521568298, "learning_rate": 1.9577582835112034e-05, "step": 21300 }, { "embedding_loss": 0.0219, "epoch": 1.1929373638039895, "grad_norm": 0.49757200479507446, "learning_rate": 1.9571374469960828e-05, "step": 21350 }, { "embedding_loss": 0.0229, "epoch": 1.1957311281220315, "grad_norm": 1.394100546836853, "learning_rate": 1.9565166104809625e-05, "step": 21400 }, { "embedding_loss": 0.0225, "epoch": 1.1985248924400738, "grad_norm": 1.7984087467193604, "learning_rate": 1.955895773965842e-05, "step": 21450 }, { "embedding_loss": 0.022, "epoch": 1.201318656758116, "grad_norm": 5.821490287780762, "learning_rate": 1.9552749374507212e-05, "step": 21500 }, { "embedding_loss": 0.0216, "epoch": 1.204112421076158, "grad_norm": 0.7248533964157104, "learning_rate": 1.954654100935601e-05, "step": 21550 }, { "embedding_loss": 0.021, "epoch": 1.2069061853942, "grad_norm": 0.35106468200683594, "learning_rate": 1.9540332644204803e-05, "step": 21600 }, { "embedding_loss": 0.0219, "epoch": 1.2096999497122423, "grad_norm": 0.24790988862514496, "learning_rate": 1.95341242790536e-05, "step": 21650 }, { "embedding_loss": 0.0203, "epoch": 1.2124937140302845, "grad_norm": 0.7155394554138184, "learning_rate": 1.9527915913902394e-05, "step": 21700 }, { "embedding_loss": 0.0202, "epoch": 1.2152874783483265, "grad_norm": 0.4455231726169586, "learning_rate": 1.9521707548751188e-05, "step": 21750 }, { "embedding_loss": 0.0206, "epoch": 1.2180812426663687, "grad_norm": 0.0884174183011055, "learning_rate": 1.9515499183599985e-05, "step": 21800 }, { "embedding_loss": 0.0203, "epoch": 1.2208750069844108, "grad_norm": 0.6101768612861633, "learning_rate": 1.950929081844878e-05, "step": 21850 }, { "embedding_loss": 0.0203, "epoch": 1.223668771302453, "grad_norm": 0.4541184604167938, "learning_rate": 1.9503082453297577e-05, "step": 21900 }, { "embedding_loss": 0.0196, "epoch": 1.226462535620495, "grad_norm": 0.5021658539772034, "learning_rate": 1.949687408814637e-05, "step": 21950 }, { "embedding_loss": 0.0189, "epoch": 1.2292562999385372, "grad_norm": 0.21004731953144073, "learning_rate": 1.9490665722995164e-05, "step": 22000 }, { "embedding_loss": 0.0217, "epoch": 1.2320500642565793, "grad_norm": 0.19675633311271667, "learning_rate": 1.948445735784396e-05, "step": 22050 }, { "embedding_loss": 0.0213, "epoch": 1.2348438285746215, "grad_norm": 0.2407734990119934, "learning_rate": 1.9478248992692755e-05, "step": 22100 }, { "embedding_loss": 0.0201, "epoch": 1.2376375928926635, "grad_norm": 0.5084452033042908, "learning_rate": 1.9472040627541552e-05, "step": 22150 }, { "embedding_loss": 0.0201, "epoch": 1.2404313572107057, "grad_norm": 0.49780312180519104, "learning_rate": 1.9465832262390346e-05, "step": 22200 }, { "embedding_loss": 0.0206, "epoch": 1.2432251215287478, "grad_norm": 0.61985182762146, "learning_rate": 1.945962389723914e-05, "step": 22250 }, { "embedding_loss": 0.023, "epoch": 1.24601888584679, "grad_norm": 0.6102951765060425, "learning_rate": 1.9453415532087937e-05, "step": 22300 }, { "embedding_loss": 0.0201, "epoch": 1.248812650164832, "grad_norm": 0.328079491853714, "learning_rate": 1.944720716693673e-05, "step": 22350 }, { "embedding_loss": 0.0202, "epoch": 1.2516064144828742, "grad_norm": 0.2907216250896454, "learning_rate": 1.9440998801785528e-05, "step": 22400 }, { "embedding_loss": 0.0214, "epoch": 1.2544001788009163, "grad_norm": 0.2377871870994568, "learning_rate": 1.9434790436634322e-05, "step": 22450 }, { "embedding_loss": 0.0183, "epoch": 1.2571939431189585, "grad_norm": 1.405790090560913, "learning_rate": 1.9428582071483116e-05, "step": 22500 }, { "embedding_loss": 0.0187, "epoch": 1.2599877074370007, "grad_norm": 0.49854809045791626, "learning_rate": 1.9422373706331913e-05, "step": 22550 }, { "embedding_loss": 0.0214, "epoch": 1.2627814717550427, "grad_norm": 0.5629355907440186, "learning_rate": 1.9416165341180707e-05, "step": 22600 }, { "embedding_loss": 0.0198, "epoch": 1.2655752360730848, "grad_norm": 0.4312259554862976, "learning_rate": 1.9409956976029504e-05, "step": 22650 }, { "embedding_loss": 0.0221, "epoch": 1.268369000391127, "grad_norm": 1.120496392250061, "learning_rate": 1.9403748610878298e-05, "step": 22700 }, { "embedding_loss": 0.0189, "epoch": 1.2711627647091692, "grad_norm": 0.4540638327598572, "learning_rate": 1.9397540245727092e-05, "step": 22750 }, { "embedding_loss": 0.0206, "epoch": 1.2739565290272112, "grad_norm": 0.2852421998977661, "learning_rate": 1.939133188057589e-05, "step": 22800 }, { "embedding_loss": 0.0205, "epoch": 1.2767502933452535, "grad_norm": 0.40443259477615356, "learning_rate": 1.9385123515424683e-05, "step": 22850 }, { "embedding_loss": 0.0221, "epoch": 1.2795440576632955, "grad_norm": 0.8682653903961182, "learning_rate": 1.937891515027348e-05, "step": 22900 }, { "embedding_loss": 0.0196, "epoch": 1.2823378219813377, "grad_norm": 0.2873919606208801, "learning_rate": 1.9372706785122274e-05, "step": 22950 }, { "embedding_loss": 0.0211, "epoch": 1.2851315862993797, "grad_norm": 0.5685202479362488, "learning_rate": 1.936649841997107e-05, "step": 23000 }, { "embedding_loss": 0.0208, "epoch": 1.287925350617422, "grad_norm": 0.8115965723991394, "learning_rate": 1.9360290054819865e-05, "step": 23050 }, { "embedding_loss": 0.0228, "epoch": 1.290719114935464, "grad_norm": 0.37130454182624817, "learning_rate": 1.935408168966866e-05, "step": 23100 }, { "embedding_loss": 0.0191, "epoch": 1.2935128792535062, "grad_norm": 0.3241022527217865, "learning_rate": 1.9347873324517456e-05, "step": 23150 }, { "embedding_loss": 0.0197, "epoch": 1.2963066435715482, "grad_norm": 0.19676360487937927, "learning_rate": 1.934166495936625e-05, "step": 23200 }, { "embedding_loss": 0.0213, "epoch": 1.2991004078895905, "grad_norm": 3.4450273513793945, "learning_rate": 1.9335456594215047e-05, "step": 23250 }, { "embedding_loss": 0.0212, "epoch": 1.3018941722076325, "grad_norm": 0.13377247750759125, "learning_rate": 1.932924822906384e-05, "step": 23300 }, { "embedding_loss": 0.0199, "epoch": 1.3046879365256747, "grad_norm": 0.5057538747787476, "learning_rate": 1.9323039863912638e-05, "step": 23350 }, { "embedding_loss": 0.0221, "epoch": 1.307481700843717, "grad_norm": 0.6099614500999451, "learning_rate": 1.9316831498761432e-05, "step": 23400 }, { "embedding_loss": 0.0178, "epoch": 1.310275465161759, "grad_norm": 0.40226316452026367, "learning_rate": 1.931062313361023e-05, "step": 23450 }, { "embedding_loss": 0.0185, "epoch": 1.313069229479801, "grad_norm": 0.44140177965164185, "learning_rate": 1.9304414768459023e-05, "step": 23500 }, { "embedding_loss": 0.021, "epoch": 1.3158629937978432, "grad_norm": 0.32889246940612793, "learning_rate": 1.9298206403307817e-05, "step": 23550 }, { "embedding_loss": 0.019, "epoch": 1.3186567581158855, "grad_norm": 0.2669200599193573, "learning_rate": 1.9291998038156614e-05, "step": 23600 }, { "embedding_loss": 0.0219, "epoch": 1.3214505224339275, "grad_norm": 0.3184114396572113, "learning_rate": 1.9285789673005408e-05, "step": 23650 }, { "embedding_loss": 0.0188, "epoch": 1.3242442867519695, "grad_norm": 0.7942718863487244, "learning_rate": 1.9279581307854205e-05, "step": 23700 }, { "embedding_loss": 0.0213, "epoch": 1.3270380510700117, "grad_norm": 0.13775759935379028, "learning_rate": 1.9273372942703e-05, "step": 23750 }, { "embedding_loss": 0.0214, "epoch": 1.329831815388054, "grad_norm": 1.1302516460418701, "learning_rate": 1.9267164577551796e-05, "step": 23800 }, { "embedding_loss": 0.0205, "epoch": 1.332625579706096, "grad_norm": 0.8058004379272461, "learning_rate": 1.926095621240059e-05, "step": 23850 }, { "embedding_loss": 0.0215, "epoch": 1.3354193440241382, "grad_norm": 0.09378646314144135, "learning_rate": 1.9254747847249387e-05, "step": 23900 }, { "embedding_loss": 0.0215, "epoch": 1.3382131083421802, "grad_norm": 0.6421366333961487, "learning_rate": 1.924853948209818e-05, "step": 23950 }, { "embedding_loss": 0.0221, "epoch": 1.3410068726602224, "grad_norm": 0.38051411509513855, "learning_rate": 1.9242331116946975e-05, "step": 24000 }, { "embedding_loss": 0.0194, "epoch": 1.3438006369782645, "grad_norm": 0.7721781134605408, "learning_rate": 1.9236122751795772e-05, "step": 24050 }, { "embedding_loss": 0.0185, "epoch": 1.3465944012963067, "grad_norm": 0.4514118432998657, "learning_rate": 1.9229914386644566e-05, "step": 24100 }, { "embedding_loss": 0.0207, "epoch": 1.3493881656143487, "grad_norm": 0.5625707507133484, "learning_rate": 1.9223706021493363e-05, "step": 24150 }, { "embedding_loss": 0.0209, "epoch": 1.352181929932391, "grad_norm": 0.30542677640914917, "learning_rate": 1.9217497656342157e-05, "step": 24200 }, { "embedding_loss": 0.02, "epoch": 1.354975694250433, "grad_norm": 0.9647425413131714, "learning_rate": 1.9211289291190954e-05, "step": 24250 }, { "embedding_loss": 0.0182, "epoch": 1.3577694585684752, "grad_norm": 0.6090138554573059, "learning_rate": 1.9205080926039748e-05, "step": 24300 }, { "embedding_loss": 0.0226, "epoch": 1.3605632228865172, "grad_norm": 0.7832806706428528, "learning_rate": 1.919887256088854e-05, "step": 24350 }, { "embedding_loss": 0.0186, "epoch": 1.3633569872045594, "grad_norm": 0.7517365217208862, "learning_rate": 1.919266419573734e-05, "step": 24400 }, { "embedding_loss": 0.0185, "epoch": 1.3661507515226017, "grad_norm": 0.1515021175146103, "learning_rate": 1.9186455830586132e-05, "step": 24450 }, { "embedding_loss": 0.021, "epoch": 1.3689445158406437, "grad_norm": 0.5233121514320374, "learning_rate": 1.918024746543493e-05, "step": 24500 }, { "embedding_loss": 0.0203, "epoch": 1.3717382801586857, "grad_norm": 0.44394803047180176, "learning_rate": 1.9174039100283724e-05, "step": 24550 }, { "embedding_loss": 0.0181, "epoch": 1.374532044476728, "grad_norm": 0.41645872592926025, "learning_rate": 1.916783073513252e-05, "step": 24600 }, { "embedding_loss": 0.0174, "epoch": 1.3773258087947702, "grad_norm": 0.46079280972480774, "learning_rate": 1.9161622369981315e-05, "step": 24650 }, { "embedding_loss": 0.0174, "epoch": 1.3801195731128122, "grad_norm": 0.6378666758537292, "learning_rate": 1.9155414004830112e-05, "step": 24700 }, { "embedding_loss": 0.0191, "epoch": 1.3829133374308542, "grad_norm": 1.0175073146820068, "learning_rate": 1.9149205639678906e-05, "step": 24750 }, { "embedding_loss": 0.0201, "epoch": 1.3857071017488964, "grad_norm": 0.6906529664993286, "learning_rate": 1.91429972745277e-05, "step": 24800 }, { "embedding_loss": 0.0198, "epoch": 1.3885008660669387, "grad_norm": 0.27532076835632324, "learning_rate": 1.9136788909376497e-05, "step": 24850 }, { "embedding_loss": 0.0184, "epoch": 1.3912946303849807, "grad_norm": 0.16508086025714874, "learning_rate": 1.913058054422529e-05, "step": 24900 }, { "embedding_loss": 0.0168, "epoch": 1.394088394703023, "grad_norm": 0.7144390940666199, "learning_rate": 1.9124372179074088e-05, "step": 24950 }, { "embedding_loss": 0.0192, "epoch": 1.396882159021065, "grad_norm": 0.2899884879589081, "learning_rate": 1.911816381392288e-05, "step": 25000 }, { "embedding_loss": 0.0222, "epoch": 1.3996759233391072, "grad_norm": 3.6277880668640137, "learning_rate": 1.911195544877168e-05, "step": 25050 }, { "embedding_loss": 0.0213, "epoch": 1.4024696876571492, "grad_norm": 0.9636306166648865, "learning_rate": 1.9105747083620472e-05, "step": 25100 }, { "embedding_loss": 0.0209, "epoch": 1.4052634519751914, "grad_norm": 1.4603201150894165, "learning_rate": 1.9099538718469266e-05, "step": 25150 }, { "embedding_loss": 0.0177, "epoch": 1.4080572162932334, "grad_norm": 0.623721182346344, "learning_rate": 1.9093330353318064e-05, "step": 25200 }, { "embedding_loss": 0.0206, "epoch": 1.4108509806112757, "grad_norm": 0.7524457573890686, "learning_rate": 1.9087121988166857e-05, "step": 25250 }, { "embedding_loss": 0.0183, "epoch": 1.4136447449293177, "grad_norm": 0.24997088313102722, "learning_rate": 1.9080913623015655e-05, "step": 25300 }, { "embedding_loss": 0.0192, "epoch": 1.41643850924736, "grad_norm": 0.291226863861084, "learning_rate": 1.907470525786445e-05, "step": 25350 }, { "embedding_loss": 0.0188, "epoch": 1.419232273565402, "grad_norm": 0.18925362825393677, "learning_rate": 1.9068496892713246e-05, "step": 25400 }, { "embedding_loss": 0.0167, "epoch": 1.4220260378834442, "grad_norm": 0.8301992416381836, "learning_rate": 1.906228852756204e-05, "step": 25450 }, { "embedding_loss": 0.0195, "epoch": 1.4248198022014864, "grad_norm": 0.2633180022239685, "learning_rate": 1.9056080162410837e-05, "step": 25500 }, { "embedding_loss": 0.0195, "epoch": 1.4276135665195284, "grad_norm": 0.5987516641616821, "learning_rate": 1.904987179725963e-05, "step": 25550 }, { "embedding_loss": 0.0184, "epoch": 1.4304073308375704, "grad_norm": 0.2670483887195587, "learning_rate": 1.9043663432108424e-05, "step": 25600 }, { "embedding_loss": 0.0189, "epoch": 1.4332010951556127, "grad_norm": 0.3217717111110687, "learning_rate": 1.903745506695722e-05, "step": 25650 }, { "embedding_loss": 0.0172, "epoch": 1.435994859473655, "grad_norm": 0.2741433084011078, "learning_rate": 1.9031246701806015e-05, "step": 25700 }, { "embedding_loss": 0.0175, "epoch": 1.438788623791697, "grad_norm": 1.8575490713119507, "learning_rate": 1.9025038336654812e-05, "step": 25750 }, { "embedding_loss": 0.0184, "epoch": 1.441582388109739, "grad_norm": 0.16026869416236877, "learning_rate": 1.9018829971503606e-05, "step": 25800 }, { "embedding_loss": 0.0168, "epoch": 1.4443761524277812, "grad_norm": 0.21690744161605835, "learning_rate": 1.90126216063524e-05, "step": 25850 }, { "embedding_loss": 0.0166, "epoch": 1.4471699167458234, "grad_norm": 0.38410404324531555, "learning_rate": 1.9006413241201197e-05, "step": 25900 }, { "embedding_loss": 0.0182, "epoch": 1.4499636810638654, "grad_norm": 0.3757111430168152, "learning_rate": 1.900020487604999e-05, "step": 25950 }, { "embedding_loss": 0.0176, "epoch": 1.4527574453819077, "grad_norm": 0.9384256601333618, "learning_rate": 1.899399651089879e-05, "step": 26000 }, { "embedding_loss": 0.0186, "epoch": 1.4555512096999497, "grad_norm": 0.48053303360939026, "learning_rate": 1.8987788145747582e-05, "step": 26050 }, { "embedding_loss": 0.0177, "epoch": 1.458344974017992, "grad_norm": 0.9136824011802673, "learning_rate": 1.8981579780596376e-05, "step": 26100 }, { "embedding_loss": 0.0179, "epoch": 1.461138738336034, "grad_norm": 0.122981496155262, "learning_rate": 1.8975371415445173e-05, "step": 26150 }, { "embedding_loss": 0.0193, "epoch": 1.4639325026540762, "grad_norm": 0.3835006058216095, "learning_rate": 1.8969163050293967e-05, "step": 26200 }, { "embedding_loss": 0.02, "epoch": 1.4667262669721182, "grad_norm": 2.1942758560180664, "learning_rate": 1.8962954685142764e-05, "step": 26250 }, { "embedding_loss": 0.0184, "epoch": 1.4695200312901604, "grad_norm": 0.13636654615402222, "learning_rate": 1.8956746319991558e-05, "step": 26300 }, { "embedding_loss": 0.0159, "epoch": 1.4723137956082024, "grad_norm": 1.1346547603607178, "learning_rate": 1.8950537954840352e-05, "step": 26350 }, { "embedding_loss": 0.0179, "epoch": 1.4751075599262446, "grad_norm": 5.024942874908447, "learning_rate": 1.894432958968915e-05, "step": 26400 }, { "embedding_loss": 0.0181, "epoch": 1.4779013242442867, "grad_norm": 0.34391865134239197, "learning_rate": 1.8938121224537943e-05, "step": 26450 }, { "embedding_loss": 0.0164, "epoch": 1.480695088562329, "grad_norm": 0.2943739593029022, "learning_rate": 1.893191285938674e-05, "step": 26500 }, { "embedding_loss": 0.0174, "epoch": 1.4834888528803711, "grad_norm": 0.2632891833782196, "learning_rate": 1.8925704494235534e-05, "step": 26550 }, { "embedding_loss": 0.0191, "epoch": 1.4862826171984131, "grad_norm": 0.22468765079975128, "learning_rate": 1.8919496129084328e-05, "step": 26600 }, { "embedding_loss": 0.0203, "epoch": 1.4890763815164552, "grad_norm": 0.3441232144832611, "learning_rate": 1.8913287763933125e-05, "step": 26650 }, { "embedding_loss": 0.0209, "epoch": 1.4918701458344974, "grad_norm": 0.20216508209705353, "learning_rate": 1.890707939878192e-05, "step": 26700 }, { "embedding_loss": 0.019, "epoch": 1.4946639101525396, "grad_norm": 0.36148911714553833, "learning_rate": 1.8900871033630716e-05, "step": 26750 }, { "embedding_loss": 0.0192, "epoch": 1.4974576744705816, "grad_norm": 0.10498618334531784, "learning_rate": 1.889466266847951e-05, "step": 26800 }, { "embedding_loss": 0.0194, "epoch": 1.5002514387886237, "grad_norm": 0.5073844790458679, "learning_rate": 1.8888454303328304e-05, "step": 26850 }, { "embedding_loss": 0.0197, "epoch": 1.503045203106666, "grad_norm": 0.3024880290031433, "learning_rate": 1.88822459381771e-05, "step": 26900 }, { "embedding_loss": 0.0184, "epoch": 1.5058389674247081, "grad_norm": 0.29277968406677246, "learning_rate": 1.8876037573025895e-05, "step": 26950 }, { "embedding_loss": 0.0167, "epoch": 1.5086327317427501, "grad_norm": 0.25764644145965576, "learning_rate": 1.8869829207874692e-05, "step": 27000 }, { "embedding_loss": 0.0189, "epoch": 1.5114264960607922, "grad_norm": 0.2962287664413452, "learning_rate": 1.8863620842723486e-05, "step": 27050 }, { "embedding_loss": 0.0199, "epoch": 1.5142202603788344, "grad_norm": 0.3723820447921753, "learning_rate": 1.8857412477572283e-05, "step": 27100 }, { "embedding_loss": 0.02, "epoch": 1.5170140246968766, "grad_norm": 1.4173369407653809, "learning_rate": 1.8851204112421077e-05, "step": 27150 }, { "embedding_loss": 0.0188, "epoch": 1.5198077890149189, "grad_norm": 0.20938609540462494, "learning_rate": 1.884499574726987e-05, "step": 27200 }, { "embedding_loss": 0.0212, "epoch": 1.5226015533329609, "grad_norm": 0.38081687688827515, "learning_rate": 1.8838787382118668e-05, "step": 27250 }, { "embedding_loss": 0.0196, "epoch": 1.525395317651003, "grad_norm": 0.09580808877944946, "learning_rate": 1.883257901696746e-05, "step": 27300 }, { "embedding_loss": 0.0204, "epoch": 1.5281890819690451, "grad_norm": 0.7693958282470703, "learning_rate": 1.882637065181626e-05, "step": 27350 }, { "embedding_loss": 0.018, "epoch": 1.5309828462870874, "grad_norm": 0.16201548278331757, "learning_rate": 1.8820162286665053e-05, "step": 27400 }, { "embedding_loss": 0.0196, "epoch": 1.5337766106051294, "grad_norm": 0.34473276138305664, "learning_rate": 1.881395392151385e-05, "step": 27450 }, { "embedding_loss": 0.0179, "epoch": 1.5365703749231714, "grad_norm": 0.13582195341587067, "learning_rate": 1.8807745556362644e-05, "step": 27500 }, { "embedding_loss": 0.0187, "epoch": 1.5393641392412136, "grad_norm": 0.28073862195014954, "learning_rate": 1.880153719121144e-05, "step": 27550 }, { "embedding_loss": 0.0189, "epoch": 1.5421579035592559, "grad_norm": 0.1607777625322342, "learning_rate": 1.8795328826060235e-05, "step": 27600 }, { "embedding_loss": 0.0168, "epoch": 1.5449516678772979, "grad_norm": 1.4966450929641724, "learning_rate": 1.878912046090903e-05, "step": 27650 }, { "embedding_loss": 0.018, "epoch": 1.5477454321953399, "grad_norm": 0.20089447498321533, "learning_rate": 1.8782912095757826e-05, "step": 27700 }, { "embedding_loss": 0.0178, "epoch": 1.5505391965133821, "grad_norm": 0.34955987334251404, "learning_rate": 1.877670373060662e-05, "step": 27750 }, { "embedding_loss": 0.0181, "epoch": 1.5533329608314244, "grad_norm": 0.22585314512252808, "learning_rate": 1.8770495365455417e-05, "step": 27800 }, { "embedding_loss": 0.0186, "epoch": 1.5561267251494664, "grad_norm": 0.1817016750574112, "learning_rate": 1.876428700030421e-05, "step": 27850 }, { "embedding_loss": 0.019, "epoch": 1.5589204894675084, "grad_norm": 0.3051886558532715, "learning_rate": 1.8758078635153008e-05, "step": 27900 }, { "embedding_loss": 0.0202, "epoch": 1.5617142537855506, "grad_norm": 0.1089596077799797, "learning_rate": 1.87518702700018e-05, "step": 27950 }, { "embedding_loss": 0.0159, "epoch": 1.5645080181035929, "grad_norm": 0.136824831366539, "learning_rate": 1.87456619048506e-05, "step": 28000 }, { "embedding_loss": 0.0204, "epoch": 1.5673017824216349, "grad_norm": 0.44312921166419983, "learning_rate": 1.8739453539699393e-05, "step": 28050 }, { "embedding_loss": 0.0182, "epoch": 1.5700955467396769, "grad_norm": 0.2167445868253708, "learning_rate": 1.8733245174548186e-05, "step": 28100 }, { "embedding_loss": 0.021, "epoch": 1.5728893110577191, "grad_norm": 0.3728499114513397, "learning_rate": 1.8727036809396984e-05, "step": 28150 }, { "embedding_loss": 0.0168, "epoch": 1.5756830753757614, "grad_norm": 0.20096856355667114, "learning_rate": 1.8720828444245777e-05, "step": 28200 }, { "embedding_loss": 0.0196, "epoch": 1.5784768396938036, "grad_norm": 0.175032839179039, "learning_rate": 1.8714620079094575e-05, "step": 28250 }, { "embedding_loss": 0.0179, "epoch": 1.5812706040118456, "grad_norm": 0.33304959535598755, "learning_rate": 1.870841171394337e-05, "step": 28300 }, { "embedding_loss": 0.019, "epoch": 1.5840643683298876, "grad_norm": 0.27643686532974243, "learning_rate": 1.8702203348792166e-05, "step": 28350 }, { "embedding_loss": 0.0193, "epoch": 1.5868581326479299, "grad_norm": 0.6751495599746704, "learning_rate": 1.869599498364096e-05, "step": 28400 }, { "embedding_loss": 0.0196, "epoch": 1.589651896965972, "grad_norm": 0.6884339451789856, "learning_rate": 1.8689786618489753e-05, "step": 28450 }, { "embedding_loss": 0.0213, "epoch": 1.592445661284014, "grad_norm": 0.135458841919899, "learning_rate": 1.868357825333855e-05, "step": 28500 }, { "embedding_loss": 0.0191, "epoch": 1.5952394256020561, "grad_norm": 0.10850473493337631, "learning_rate": 1.8677369888187344e-05, "step": 28550 }, { "embedding_loss": 0.0164, "epoch": 1.5980331899200984, "grad_norm": 0.21155652403831482, "learning_rate": 1.867116152303614e-05, "step": 28600 }, { "embedding_loss": 0.0173, "epoch": 1.6008269542381406, "grad_norm": 0.25629594922065735, "learning_rate": 1.8664953157884935e-05, "step": 28650 }, { "embedding_loss": 0.0194, "epoch": 1.6036207185561826, "grad_norm": 0.2642464339733124, "learning_rate": 1.8658744792733733e-05, "step": 28700 }, { "embedding_loss": 0.0196, "epoch": 1.6064144828742246, "grad_norm": 0.21719704568386078, "learning_rate": 1.8652536427582526e-05, "step": 28750 }, { "embedding_loss": 0.0196, "epoch": 1.6092082471922668, "grad_norm": 0.41360512375831604, "learning_rate": 1.8646328062431324e-05, "step": 28800 }, { "embedding_loss": 0.0176, "epoch": 1.612002011510309, "grad_norm": 0.3370055556297302, "learning_rate": 1.8640119697280117e-05, "step": 28850 }, { "embedding_loss": 0.0184, "epoch": 1.614795775828351, "grad_norm": 2.0176546573638916, "learning_rate": 1.863391133212891e-05, "step": 28900 }, { "embedding_loss": 0.0179, "epoch": 1.6175895401463931, "grad_norm": 0.05891651287674904, "learning_rate": 1.862770296697771e-05, "step": 28950 }, { "embedding_loss": 0.0161, "epoch": 1.6203833044644353, "grad_norm": 0.10475711524486542, "learning_rate": 1.8621494601826502e-05, "step": 29000 }, { "embedding_loss": 0.0192, "epoch": 1.6231770687824776, "grad_norm": 0.22280821204185486, "learning_rate": 1.86152862366753e-05, "step": 29050 }, { "embedding_loss": 0.0159, "epoch": 1.6259708331005196, "grad_norm": 0.10911466926336288, "learning_rate": 1.8609077871524093e-05, "step": 29100 }, { "embedding_loss": 0.02, "epoch": 1.6287645974185616, "grad_norm": 0.06282296776771545, "learning_rate": 1.860286950637289e-05, "step": 29150 }, { "embedding_loss": 0.0193, "epoch": 1.6315583617366038, "grad_norm": 0.2183886468410492, "learning_rate": 1.8596661141221684e-05, "step": 29200 }, { "embedding_loss": 0.0172, "epoch": 1.634352126054646, "grad_norm": 0.6863654851913452, "learning_rate": 1.8590452776070478e-05, "step": 29250 }, { "embedding_loss": 0.0163, "epoch": 1.6371458903726883, "grad_norm": 0.2134913206100464, "learning_rate": 1.8584244410919275e-05, "step": 29300 }, { "embedding_loss": 0.0176, "epoch": 1.6399396546907303, "grad_norm": 0.28159552812576294, "learning_rate": 1.857803604576807e-05, "step": 29350 }, { "embedding_loss": 0.0184, "epoch": 1.6427334190087723, "grad_norm": 0.1960321068763733, "learning_rate": 1.8571827680616866e-05, "step": 29400 }, { "embedding_loss": 0.0195, "epoch": 1.6455271833268146, "grad_norm": 0.2049027532339096, "learning_rate": 1.856561931546566e-05, "step": 29450 }, { "embedding_loss": 0.0162, "epoch": 1.6483209476448568, "grad_norm": 0.3159572184085846, "learning_rate": 1.8559410950314457e-05, "step": 29500 }, { "embedding_loss": 0.0175, "epoch": 1.6511147119628988, "grad_norm": 0.1894741803407669, "learning_rate": 1.855320258516325e-05, "step": 29550 }, { "embedding_loss": 0.0184, "epoch": 1.6539084762809408, "grad_norm": 0.3497653305530548, "learning_rate": 1.8546994220012045e-05, "step": 29600 }, { "embedding_loss": 0.0179, "epoch": 1.656702240598983, "grad_norm": 0.11140868067741394, "learning_rate": 1.8540785854860842e-05, "step": 29650 }, { "embedding_loss": 0.0199, "epoch": 1.6594960049170253, "grad_norm": 0.4422706961631775, "learning_rate": 1.8534577489709636e-05, "step": 29700 }, { "embedding_loss": 0.0182, "epoch": 1.6622897692350673, "grad_norm": 0.17647583782672882, "learning_rate": 1.8528369124558433e-05, "step": 29750 }, { "embedding_loss": 0.0178, "epoch": 1.6650835335531093, "grad_norm": 0.14210085570812225, "learning_rate": 1.8522160759407227e-05, "step": 29800 }, { "embedding_loss": 0.0165, "epoch": 1.6678772978711516, "grad_norm": 0.21363265812397003, "learning_rate": 1.8515952394256024e-05, "step": 29850 }, { "embedding_loss": 0.0174, "epoch": 1.6706710621891938, "grad_norm": 0.2702767252922058, "learning_rate": 1.8509744029104818e-05, "step": 29900 }, { "embedding_loss": 0.0189, "epoch": 1.6734648265072358, "grad_norm": 0.25645509362220764, "learning_rate": 1.8503535663953612e-05, "step": 29950 }, { "embedding_loss": 0.0185, "epoch": 1.6762585908252778, "grad_norm": 1.7668488025665283, "learning_rate": 1.849732729880241e-05, "step": 30000 }, { "embedding_loss": 0.0189, "epoch": 1.67905235514332, "grad_norm": 1.8552756309509277, "learning_rate": 1.8491118933651203e-05, "step": 30050 }, { "embedding_loss": 0.0161, "epoch": 1.6818461194613623, "grad_norm": 0.5067685842514038, "learning_rate": 1.84849105685e-05, "step": 30100 }, { "embedding_loss": 0.0182, "epoch": 1.6846398837794043, "grad_norm": 1.0411609411239624, "learning_rate": 1.8478702203348794e-05, "step": 30150 }, { "embedding_loss": 0.0187, "epoch": 1.6874336480974463, "grad_norm": 0.23365527391433716, "learning_rate": 1.8472493838197588e-05, "step": 30200 }, { "embedding_loss": 0.0158, "epoch": 1.6902274124154886, "grad_norm": 0.11145565658807755, "learning_rate": 1.8466285473046385e-05, "step": 30250 }, { "embedding_loss": 0.0199, "epoch": 1.6930211767335308, "grad_norm": 0.4872812032699585, "learning_rate": 1.846007710789518e-05, "step": 30300 }, { "embedding_loss": 0.0178, "epoch": 1.695814941051573, "grad_norm": 0.7491967082023621, "learning_rate": 1.8453868742743976e-05, "step": 30350 }, { "embedding_loss": 0.02, "epoch": 1.698608705369615, "grad_norm": 0.14150531589984894, "learning_rate": 1.844766037759277e-05, "step": 30400 }, { "embedding_loss": 0.021, "epoch": 1.701402469687657, "grad_norm": 0.2353522926568985, "learning_rate": 1.8441452012441564e-05, "step": 30450 }, { "embedding_loss": 0.0194, "epoch": 1.7041962340056993, "grad_norm": 0.21127188205718994, "learning_rate": 1.843524364729036e-05, "step": 30500 }, { "embedding_loss": 0.018, "epoch": 1.7069899983237415, "grad_norm": 0.7626825571060181, "learning_rate": 1.8429035282139155e-05, "step": 30550 }, { "embedding_loss": 0.0163, "epoch": 1.7097837626417836, "grad_norm": 0.13744695484638214, "learning_rate": 1.8422826916987952e-05, "step": 30600 }, { "embedding_loss": 0.0166, "epoch": 1.7125775269598256, "grad_norm": 0.22572438418865204, "learning_rate": 1.8416618551836746e-05, "step": 30650 }, { "embedding_loss": 0.0165, "epoch": 1.7153712912778678, "grad_norm": 0.6152960658073425, "learning_rate": 1.841041018668554e-05, "step": 30700 }, { "embedding_loss": 0.019, "epoch": 1.71816505559591, "grad_norm": 0.28139734268188477, "learning_rate": 1.8404201821534337e-05, "step": 30750 }, { "embedding_loss": 0.018, "epoch": 1.720958819913952, "grad_norm": 0.16665449738502502, "learning_rate": 1.839799345638313e-05, "step": 30800 }, { "embedding_loss": 0.0184, "epoch": 1.723752584231994, "grad_norm": 0.2068546712398529, "learning_rate": 1.8391785091231928e-05, "step": 30850 }, { "embedding_loss": 0.0174, "epoch": 1.7265463485500363, "grad_norm": 0.2481202334165573, "learning_rate": 1.838557672608072e-05, "step": 30900 }, { "embedding_loss": 0.0174, "epoch": 1.7293401128680785, "grad_norm": 0.2598095238208771, "learning_rate": 1.8379368360929515e-05, "step": 30950 }, { "embedding_loss": 0.0181, "epoch": 1.7321338771861206, "grad_norm": 0.3397521376609802, "learning_rate": 1.8373159995778313e-05, "step": 31000 }, { "embedding_loss": 0.0216, "epoch": 1.7349276415041626, "grad_norm": 0.38798826932907104, "learning_rate": 1.8366951630627107e-05, "step": 31050 }, { "embedding_loss": 0.0188, "epoch": 1.7377214058222048, "grad_norm": 0.26325905323028564, "learning_rate": 1.8360743265475904e-05, "step": 31100 }, { "embedding_loss": 0.0155, "epoch": 1.740515170140247, "grad_norm": 0.411531537771225, "learning_rate": 1.8354534900324698e-05, "step": 31150 }, { "embedding_loss": 0.0178, "epoch": 1.743308934458289, "grad_norm": 0.33340004086494446, "learning_rate": 1.8348326535173495e-05, "step": 31200 }, { "embedding_loss": 0.0176, "epoch": 1.746102698776331, "grad_norm": 0.20678170025348663, "learning_rate": 1.834211817002229e-05, "step": 31250 }, { "embedding_loss": 0.0154, "epoch": 1.7488964630943733, "grad_norm": 0.41241928935050964, "learning_rate": 1.8335909804871082e-05, "step": 31300 }, { "embedding_loss": 0.0198, "epoch": 1.7516902274124155, "grad_norm": 0.20526094734668732, "learning_rate": 1.832970143971988e-05, "step": 31350 }, { "embedding_loss": 0.0195, "epoch": 1.7544839917304578, "grad_norm": 0.19149324297904968, "learning_rate": 1.8323493074568673e-05, "step": 31400 }, { "embedding_loss": 0.0165, "epoch": 1.7572777560484998, "grad_norm": 0.17235389351844788, "learning_rate": 1.831728470941747e-05, "step": 31450 }, { "embedding_loss": 0.0186, "epoch": 1.7600715203665418, "grad_norm": 0.30065709352493286, "learning_rate": 1.8311076344266264e-05, "step": 31500 }, { "embedding_loss": 0.0161, "epoch": 1.762865284684584, "grad_norm": 0.27837637066841125, "learning_rate": 1.830486797911506e-05, "step": 31550 }, { "embedding_loss": 0.0186, "epoch": 1.7656590490026263, "grad_norm": 0.24076522886753082, "learning_rate": 1.8298659613963855e-05, "step": 31600 }, { "embedding_loss": 0.0179, "epoch": 1.7684528133206683, "grad_norm": 0.09771089255809784, "learning_rate": 1.8292451248812653e-05, "step": 31650 }, { "embedding_loss": 0.0173, "epoch": 1.7712465776387103, "grad_norm": 3.116872787475586, "learning_rate": 1.8286242883661447e-05, "step": 31700 }, { "embedding_loss": 0.0178, "epoch": 1.7740403419567525, "grad_norm": 0.29965507984161377, "learning_rate": 1.828003451851024e-05, "step": 31750 }, { "embedding_loss": 0.0194, "epoch": 1.7768341062747948, "grad_norm": 0.18285933136940002, "learning_rate": 1.8273826153359038e-05, "step": 31800 }, { "embedding_loss": 0.0167, "epoch": 1.7796278705928368, "grad_norm": 0.1719927340745926, "learning_rate": 1.826761778820783e-05, "step": 31850 }, { "embedding_loss": 0.015, "epoch": 1.7824216349108788, "grad_norm": 0.15590132772922516, "learning_rate": 1.826140942305663e-05, "step": 31900 }, { "embedding_loss": 0.0175, "epoch": 1.785215399228921, "grad_norm": 0.15089333057403564, "learning_rate": 1.8255201057905422e-05, "step": 31950 }, { "embedding_loss": 0.0184, "epoch": 1.7880091635469633, "grad_norm": 0.31235167384147644, "learning_rate": 1.824899269275422e-05, "step": 32000 }, { "embedding_loss": 0.019, "epoch": 1.7908029278650053, "grad_norm": 0.3370100259780884, "learning_rate": 1.8242784327603013e-05, "step": 32050 }, { "embedding_loss": 0.0183, "epoch": 1.7935966921830473, "grad_norm": 0.6238244771957397, "learning_rate": 1.823657596245181e-05, "step": 32100 }, { "embedding_loss": 0.0169, "epoch": 1.7963904565010895, "grad_norm": 1.3537018299102783, "learning_rate": 1.8230367597300604e-05, "step": 32150 }, { "embedding_loss": 0.0194, "epoch": 1.7991842208191318, "grad_norm": 0.1032712385058403, "learning_rate": 1.8224159232149398e-05, "step": 32200 }, { "embedding_loss": 0.0185, "epoch": 1.8019779851371738, "grad_norm": 0.43321025371551514, "learning_rate": 1.8217950866998195e-05, "step": 32250 }, { "embedding_loss": 0.0164, "epoch": 1.804771749455216, "grad_norm": 0.14839814603328705, "learning_rate": 1.821174250184699e-05, "step": 32300 }, { "embedding_loss": 0.0162, "epoch": 1.807565513773258, "grad_norm": 0.10876394808292389, "learning_rate": 1.8205534136695786e-05, "step": 32350 }, { "embedding_loss": 0.0165, "epoch": 1.8103592780913003, "grad_norm": 0.06760668009519577, "learning_rate": 1.819932577154458e-05, "step": 32400 }, { "embedding_loss": 0.0212, "epoch": 1.8131530424093425, "grad_norm": 3.596940040588379, "learning_rate": 1.8193117406393378e-05, "step": 32450 }, { "embedding_loss": 0.017, "epoch": 1.8159468067273845, "grad_norm": 0.35411378741264343, "learning_rate": 1.818690904124217e-05, "step": 32500 }, { "embedding_loss": 0.0181, "epoch": 1.8187405710454265, "grad_norm": 0.5256936550140381, "learning_rate": 1.8180700676090965e-05, "step": 32550 }, { "embedding_loss": 0.0168, "epoch": 1.8215343353634688, "grad_norm": 1.6288228034973145, "learning_rate": 1.8174492310939762e-05, "step": 32600 }, { "embedding_loss": 0.0168, "epoch": 1.824328099681511, "grad_norm": 0.28392860293388367, "learning_rate": 1.8168283945788556e-05, "step": 32650 }, { "embedding_loss": 0.0192, "epoch": 1.827121863999553, "grad_norm": 0.8438057899475098, "learning_rate": 1.8162075580637353e-05, "step": 32700 }, { "embedding_loss": 0.0167, "epoch": 1.829915628317595, "grad_norm": 0.3050737679004669, "learning_rate": 1.8155867215486147e-05, "step": 32750 }, { "embedding_loss": 0.0179, "epoch": 1.8327093926356373, "grad_norm": 1.0144239664077759, "learning_rate": 1.8149658850334944e-05, "step": 32800 }, { "embedding_loss": 0.0184, "epoch": 1.8355031569536795, "grad_norm": 0.25000908970832825, "learning_rate": 1.8143450485183738e-05, "step": 32850 }, { "embedding_loss": 0.0154, "epoch": 1.8382969212717215, "grad_norm": 0.22557219862937927, "learning_rate": 1.8137242120032535e-05, "step": 32900 }, { "embedding_loss": 0.0177, "epoch": 1.8410906855897635, "grad_norm": 0.2248319387435913, "learning_rate": 1.813103375488133e-05, "step": 32950 }, { "embedding_loss": 0.017, "epoch": 1.8438844499078058, "grad_norm": 0.10214349627494812, "learning_rate": 1.8124825389730123e-05, "step": 33000 }, { "embedding_loss": 0.0192, "epoch": 1.846678214225848, "grad_norm": 0.14973880350589752, "learning_rate": 1.811861702457892e-05, "step": 33050 }, { "embedding_loss": 0.0176, "epoch": 1.84947197854389, "grad_norm": 0.07681486010551453, "learning_rate": 1.8112408659427714e-05, "step": 33100 }, { "embedding_loss": 0.0198, "epoch": 1.852265742861932, "grad_norm": 0.26297181844711304, "learning_rate": 1.810620029427651e-05, "step": 33150 }, { "embedding_loss": 0.0166, "epoch": 1.8550595071799743, "grad_norm": 1.1718438863754272, "learning_rate": 1.8099991929125305e-05, "step": 33200 }, { "embedding_loss": 0.017, "epoch": 1.8578532714980165, "grad_norm": 0.3135221004486084, "learning_rate": 1.8093783563974102e-05, "step": 33250 }, { "embedding_loss": 0.0179, "epoch": 1.8606470358160585, "grad_norm": 0.2634863257408142, "learning_rate": 1.8087575198822896e-05, "step": 33300 }, { "embedding_loss": 0.0184, "epoch": 1.8634408001341007, "grad_norm": 0.36270633339881897, "learning_rate": 1.808136683367169e-05, "step": 33350 }, { "embedding_loss": 0.0148, "epoch": 1.8662345644521428, "grad_norm": 0.08535099029541016, "learning_rate": 1.8075158468520487e-05, "step": 33400 }, { "embedding_loss": 0.0159, "epoch": 1.869028328770185, "grad_norm": 0.059227943420410156, "learning_rate": 1.806895010336928e-05, "step": 33450 }, { "embedding_loss": 0.0176, "epoch": 1.8718220930882272, "grad_norm": 0.14599014818668365, "learning_rate": 1.8062741738218078e-05, "step": 33500 }, { "embedding_loss": 0.0171, "epoch": 1.8746158574062692, "grad_norm": 0.1285848468542099, "learning_rate": 1.8056533373066872e-05, "step": 33550 }, { "embedding_loss": 0.0186, "epoch": 1.8774096217243113, "grad_norm": 0.1319340020418167, "learning_rate": 1.805032500791567e-05, "step": 33600 }, { "embedding_loss": 0.0174, "epoch": 1.8802033860423535, "grad_norm": 0.12645265460014343, "learning_rate": 1.8044116642764463e-05, "step": 33650 }, { "embedding_loss": 0.0174, "epoch": 1.8829971503603957, "grad_norm": 0.29678183794021606, "learning_rate": 1.8037908277613257e-05, "step": 33700 }, { "embedding_loss": 0.0162, "epoch": 1.8857909146784377, "grad_norm": 0.13790033757686615, "learning_rate": 1.8031699912462054e-05, "step": 33750 }, { "embedding_loss": 0.0175, "epoch": 1.8885846789964797, "grad_norm": 0.10275505483150482, "learning_rate": 1.8025491547310848e-05, "step": 33800 }, { "embedding_loss": 0.0154, "epoch": 1.891378443314522, "grad_norm": 0.10015694797039032, "learning_rate": 1.8019283182159645e-05, "step": 33850 }, { "embedding_loss": 0.0189, "epoch": 1.8941722076325642, "grad_norm": 0.15756802260875702, "learning_rate": 1.801307481700844e-05, "step": 33900 }, { "embedding_loss": 0.0165, "epoch": 1.8969659719506062, "grad_norm": 0.16792014241218567, "learning_rate": 1.8006866451857233e-05, "step": 33950 }, { "embedding_loss": 0.0199, "epoch": 1.8997597362686482, "grad_norm": 0.17371293902397156, "learning_rate": 1.800065808670603e-05, "step": 34000 }, { "embedding_loss": 0.0168, "epoch": 1.9025535005866905, "grad_norm": 0.14507268369197845, "learning_rate": 1.7994449721554824e-05, "step": 34050 }, { "embedding_loss": 0.0191, "epoch": 1.9053472649047327, "grad_norm": 1.4730290174484253, "learning_rate": 1.798824135640362e-05, "step": 34100 }, { "embedding_loss": 0.0162, "epoch": 1.9081410292227747, "grad_norm": 0.1214836984872818, "learning_rate": 1.7982032991252415e-05, "step": 34150 }, { "embedding_loss": 0.0163, "epoch": 1.9109347935408167, "grad_norm": 6.867759704589844, "learning_rate": 1.7975824626101212e-05, "step": 34200 }, { "embedding_loss": 0.0176, "epoch": 1.913728557858859, "grad_norm": 0.20122753083705902, "learning_rate": 1.7969616260950006e-05, "step": 34250 }, { "embedding_loss": 0.0162, "epoch": 1.9165223221769012, "grad_norm": 0.08978613466024399, "learning_rate": 1.79634078957988e-05, "step": 34300 }, { "embedding_loss": 0.0204, "epoch": 1.9193160864949435, "grad_norm": 0.2259296476840973, "learning_rate": 1.7957199530647597e-05, "step": 34350 }, { "embedding_loss": 0.0149, "epoch": 1.9221098508129855, "grad_norm": 0.48081275820732117, "learning_rate": 1.795099116549639e-05, "step": 34400 }, { "embedding_loss": 0.0171, "epoch": 1.9249036151310275, "grad_norm": 0.19470106065273285, "learning_rate": 1.7944782800345188e-05, "step": 34450 }, { "embedding_loss": 0.0166, "epoch": 1.9276973794490697, "grad_norm": 0.19985078275203705, "learning_rate": 1.7938574435193982e-05, "step": 34500 }, { "embedding_loss": 0.0174, "epoch": 1.930491143767112, "grad_norm": 0.16239766776561737, "learning_rate": 1.7932366070042776e-05, "step": 34550 }, { "embedding_loss": 0.0173, "epoch": 1.933284908085154, "grad_norm": 0.08894529938697815, "learning_rate": 1.7926157704891573e-05, "step": 34600 }, { "embedding_loss": 0.0198, "epoch": 1.936078672403196, "grad_norm": 0.2552766799926758, "learning_rate": 1.7919949339740367e-05, "step": 34650 }, { "embedding_loss": 0.0167, "epoch": 1.9388724367212382, "grad_norm": 0.17687025666236877, "learning_rate": 1.7913740974589164e-05, "step": 34700 }, { "embedding_loss": 0.0173, "epoch": 1.9416662010392804, "grad_norm": 0.09844773262739182, "learning_rate": 1.7907532609437958e-05, "step": 34750 }, { "embedding_loss": 0.0157, "epoch": 1.9444599653573225, "grad_norm": 0.5955054759979248, "learning_rate": 1.790132424428675e-05, "step": 34800 }, { "embedding_loss": 0.0181, "epoch": 1.9472537296753645, "grad_norm": 0.09214399755001068, "learning_rate": 1.789511587913555e-05, "step": 34850 }, { "embedding_loss": 0.0169, "epoch": 1.9500474939934067, "grad_norm": 0.18111667037010193, "learning_rate": 1.7888907513984342e-05, "step": 34900 }, { "embedding_loss": 0.0189, "epoch": 1.952841258311449, "grad_norm": 0.15212734043598175, "learning_rate": 1.788269914883314e-05, "step": 34950 }, { "embedding_loss": 0.017, "epoch": 1.955635022629491, "grad_norm": 0.12944376468658447, "learning_rate": 1.7876490783681934e-05, "step": 35000 }, { "embedding_loss": 0.0179, "epoch": 1.958428786947533, "grad_norm": 0.17352861166000366, "learning_rate": 1.7870282418530727e-05, "step": 35050 }, { "embedding_loss": 0.016, "epoch": 1.9612225512655752, "grad_norm": 0.14679035544395447, "learning_rate": 1.7864074053379525e-05, "step": 35100 }, { "embedding_loss": 0.0178, "epoch": 1.9640163155836174, "grad_norm": 0.19072739779949188, "learning_rate": 1.785786568822832e-05, "step": 35150 }, { "embedding_loss": 0.0167, "epoch": 1.9668100799016595, "grad_norm": 0.18658140301704407, "learning_rate": 1.7851657323077116e-05, "step": 35200 }, { "embedding_loss": 0.0164, "epoch": 1.9696038442197015, "grad_norm": 0.08994993567466736, "learning_rate": 1.784544895792591e-05, "step": 35250 }, { "embedding_loss": 0.0196, "epoch": 1.9723976085377437, "grad_norm": 0.11216910928487778, "learning_rate": 1.7839240592774707e-05, "step": 35300 }, { "embedding_loss": 0.018, "epoch": 1.975191372855786, "grad_norm": 0.10254685580730438, "learning_rate": 1.78330322276235e-05, "step": 35350 }, { "embedding_loss": 0.0174, "epoch": 1.9779851371738282, "grad_norm": 0.16699863970279694, "learning_rate": 1.7826823862472294e-05, "step": 35400 }, { "embedding_loss": 0.0182, "epoch": 1.9807789014918702, "grad_norm": 0.19556911289691925, "learning_rate": 1.782061549732109e-05, "step": 35450 }, { "embedding_loss": 0.0163, "epoch": 1.9835726658099122, "grad_norm": 0.09969909489154816, "learning_rate": 1.7814407132169885e-05, "step": 35500 }, { "embedding_loss": 0.0153, "epoch": 1.9863664301279544, "grad_norm": 0.12502004206180573, "learning_rate": 1.7808198767018682e-05, "step": 35550 }, { "embedding_loss": 0.0166, "epoch": 1.9891601944459967, "grad_norm": 0.2053370475769043, "learning_rate": 1.7801990401867476e-05, "step": 35600 }, { "embedding_loss": 0.0178, "epoch": 1.9919539587640387, "grad_norm": 0.1355050504207611, "learning_rate": 1.7795782036716274e-05, "step": 35650 }, { "embedding_loss": 0.0165, "epoch": 1.9947477230820807, "grad_norm": 0.2781061828136444, "learning_rate": 1.7789573671565067e-05, "step": 35700 }, { "embedding_loss": 0.0199, "epoch": 1.997541487400123, "grad_norm": 0.16163823008537292, "learning_rate": 1.7783365306413865e-05, "step": 35750 }, { "embedding_loss": 0.0158, "epoch": 2.000335251718165, "grad_norm": 0.17826636135578156, "learning_rate": 1.777715694126266e-05, "step": 35800 }, { "embedding_loss": 0.0165, "epoch": 2.003129016036207, "grad_norm": 0.5439745187759399, "learning_rate": 1.7770948576111452e-05, "step": 35850 }, { "embedding_loss": 0.0185, "epoch": 2.005922780354249, "grad_norm": 1.7997641563415527, "learning_rate": 1.776474021096025e-05, "step": 35900 }, { "embedding_loss": 0.0169, "epoch": 2.0087165446722914, "grad_norm": 0.2726188898086548, "learning_rate": 1.7758531845809043e-05, "step": 35950 }, { "embedding_loss": 0.0161, "epoch": 2.0115103089903337, "grad_norm": 0.15716184675693512, "learning_rate": 1.775232348065784e-05, "step": 36000 }, { "embedding_loss": 0.0186, "epoch": 2.014304073308376, "grad_norm": 0.18701720237731934, "learning_rate": 1.7746115115506634e-05, "step": 36050 }, { "embedding_loss": 0.0168, "epoch": 2.0170978376264177, "grad_norm": 0.054208170622587204, "learning_rate": 1.773990675035543e-05, "step": 36100 }, { "embedding_loss": 0.0173, "epoch": 2.01989160194446, "grad_norm": 0.2835747301578522, "learning_rate": 1.7733698385204225e-05, "step": 36150 }, { "embedding_loss": 0.0169, "epoch": 2.022685366262502, "grad_norm": 0.48652032017707825, "learning_rate": 1.7727490020053022e-05, "step": 36200 }, { "embedding_loss": 0.0159, "epoch": 2.0254791305805444, "grad_norm": 0.2647206783294678, "learning_rate": 1.7721281654901816e-05, "step": 36250 }, { "embedding_loss": 0.0153, "epoch": 2.028272894898586, "grad_norm": 0.1096164733171463, "learning_rate": 1.771507328975061e-05, "step": 36300 }, { "embedding_loss": 0.0181, "epoch": 2.0310666592166284, "grad_norm": 0.9239135980606079, "learning_rate": 1.7708864924599407e-05, "step": 36350 }, { "embedding_loss": 0.0139, "epoch": 2.0338604235346707, "grad_norm": 0.1533185988664627, "learning_rate": 1.77026565594482e-05, "step": 36400 }, { "embedding_loss": 0.0177, "epoch": 2.036654187852713, "grad_norm": 0.4132746458053589, "learning_rate": 1.7696448194297e-05, "step": 36450 }, { "embedding_loss": 0.018, "epoch": 2.0394479521707547, "grad_norm": 0.6468183994293213, "learning_rate": 1.7690239829145792e-05, "step": 36500 }, { "embedding_loss": 0.0181, "epoch": 2.042241716488797, "grad_norm": 0.5461135506629944, "learning_rate": 1.768403146399459e-05, "step": 36550 }, { "embedding_loss": 0.0164, "epoch": 2.045035480806839, "grad_norm": 2.111876964569092, "learning_rate": 1.7677823098843383e-05, "step": 36600 }, { "embedding_loss": 0.0149, "epoch": 2.0478292451248814, "grad_norm": 7.162843704223633, "learning_rate": 1.7671614733692177e-05, "step": 36650 }, { "embedding_loss": 0.017, "epoch": 2.050623009442923, "grad_norm": 0.13612553477287292, "learning_rate": 1.7665406368540974e-05, "step": 36700 }, { "embedding_loss": 0.0177, "epoch": 2.0534167737609654, "grad_norm": 0.3146505057811737, "learning_rate": 1.7659198003389768e-05, "step": 36750 }, { "embedding_loss": 0.0159, "epoch": 2.0562105380790077, "grad_norm": 0.18114805221557617, "learning_rate": 1.7652989638238565e-05, "step": 36800 }, { "embedding_loss": 0.0172, "epoch": 2.05900430239705, "grad_norm": 0.085628941655159, "learning_rate": 1.764678127308736e-05, "step": 36850 }, { "embedding_loss": 0.0195, "epoch": 2.0617980667150917, "grad_norm": 0.07027865946292877, "learning_rate": 1.7640572907936156e-05, "step": 36900 }, { "embedding_loss": 0.017, "epoch": 2.064591831033134, "grad_norm": 0.1707303673028946, "learning_rate": 1.763436454278495e-05, "step": 36950 }, { "embedding_loss": 0.0175, "epoch": 2.067385595351176, "grad_norm": 0.10941057652235031, "learning_rate": 1.7628156177633747e-05, "step": 37000 }, { "embedding_loss": 0.0171, "epoch": 2.0701793596692184, "grad_norm": 0.10658110678195953, "learning_rate": 1.762194781248254e-05, "step": 37050 }, { "embedding_loss": 0.0194, "epoch": 2.0729731239872606, "grad_norm": 0.15819990634918213, "learning_rate": 1.7615739447331335e-05, "step": 37100 }, { "embedding_loss": 0.0175, "epoch": 2.0757668883053024, "grad_norm": 0.11032144725322723, "learning_rate": 1.7609531082180132e-05, "step": 37150 }, { "embedding_loss": 0.0168, "epoch": 2.0785606526233447, "grad_norm": 0.13372543454170227, "learning_rate": 1.7603322717028926e-05, "step": 37200 }, { "embedding_loss": 0.0175, "epoch": 2.081354416941387, "grad_norm": 0.20591333508491516, "learning_rate": 1.7597114351877723e-05, "step": 37250 }, { "embedding_loss": 0.0165, "epoch": 2.084148181259429, "grad_norm": 0.11474156379699707, "learning_rate": 1.7590905986726517e-05, "step": 37300 }, { "embedding_loss": 0.0164, "epoch": 2.086941945577471, "grad_norm": 0.1674887239933014, "learning_rate": 1.7584697621575314e-05, "step": 37350 }, { "embedding_loss": 0.0157, "epoch": 2.089735709895513, "grad_norm": 0.145951509475708, "learning_rate": 1.7578489256424108e-05, "step": 37400 }, { "embedding_loss": 0.0162, "epoch": 2.0925294742135554, "grad_norm": 0.07209179550409317, "learning_rate": 1.7572280891272902e-05, "step": 37450 }, { "embedding_loss": 0.0178, "epoch": 2.0953232385315976, "grad_norm": 0.24616046249866486, "learning_rate": 1.75660725261217e-05, "step": 37500 }, { "embedding_loss": 0.015, "epoch": 2.0981170028496394, "grad_norm": 0.1341942697763443, "learning_rate": 1.7559864160970493e-05, "step": 37550 }, { "embedding_loss": 0.0177, "epoch": 2.1009107671676817, "grad_norm": 0.14635011553764343, "learning_rate": 1.755365579581929e-05, "step": 37600 }, { "embedding_loss": 0.0164, "epoch": 2.103704531485724, "grad_norm": 0.16205435991287231, "learning_rate": 1.7547447430668084e-05, "step": 37650 }, { "embedding_loss": 0.0176, "epoch": 2.106498295803766, "grad_norm": 0.14273400604724884, "learning_rate": 1.754123906551688e-05, "step": 37700 }, { "embedding_loss": 0.0174, "epoch": 2.109292060121808, "grad_norm": 0.18893061578273773, "learning_rate": 1.7535030700365675e-05, "step": 37750 }, { "embedding_loss": 0.0161, "epoch": 2.11208582443985, "grad_norm": 0.09738266468048096, "learning_rate": 1.752882233521447e-05, "step": 37800 }, { "embedding_loss": 0.0182, "epoch": 2.1148795887578924, "grad_norm": 0.17043693363666534, "learning_rate": 1.7522613970063266e-05, "step": 37850 }, { "embedding_loss": 0.0195, "epoch": 2.1176733530759346, "grad_norm": 0.19370906054973602, "learning_rate": 1.751640560491206e-05, "step": 37900 }, { "embedding_loss": 0.0136, "epoch": 2.120467117393977, "grad_norm": 0.13810941576957703, "learning_rate": 1.7510197239760857e-05, "step": 37950 }, { "embedding_loss": 0.0149, "epoch": 2.1232608817120187, "grad_norm": 0.25488778948783875, "learning_rate": 1.750398887460965e-05, "step": 38000 }, { "embedding_loss": 0.0156, "epoch": 2.126054646030061, "grad_norm": 0.15778779983520508, "learning_rate": 1.7497780509458445e-05, "step": 38050 }, { "embedding_loss": 0.0163, "epoch": 2.128848410348103, "grad_norm": 0.07143477350473404, "learning_rate": 1.7491572144307242e-05, "step": 38100 }, { "embedding_loss": 0.0165, "epoch": 2.1316421746661454, "grad_norm": 0.18681664764881134, "learning_rate": 1.7485363779156036e-05, "step": 38150 }, { "embedding_loss": 0.0156, "epoch": 2.134435938984187, "grad_norm": 0.07002172619104385, "learning_rate": 1.7479155414004833e-05, "step": 38200 }, { "embedding_loss": 0.0159, "epoch": 2.1372297033022294, "grad_norm": 0.21546189486980438, "learning_rate": 1.7472947048853627e-05, "step": 38250 }, { "embedding_loss": 0.0173, "epoch": 2.1400234676202716, "grad_norm": 0.28822487592697144, "learning_rate": 1.746673868370242e-05, "step": 38300 }, { "embedding_loss": 0.0172, "epoch": 2.142817231938314, "grad_norm": 0.19455474615097046, "learning_rate": 1.7460530318551218e-05, "step": 38350 }, { "embedding_loss": 0.0159, "epoch": 2.1456109962563557, "grad_norm": 0.08125987648963928, "learning_rate": 1.745432195340001e-05, "step": 38400 }, { "embedding_loss": 0.0162, "epoch": 2.148404760574398, "grad_norm": 0.26057159900665283, "learning_rate": 1.744811358824881e-05, "step": 38450 }, { "embedding_loss": 0.0172, "epoch": 2.15119852489244, "grad_norm": 0.2632230222225189, "learning_rate": 1.7441905223097603e-05, "step": 38500 }, { "embedding_loss": 0.0162, "epoch": 2.1539922892104824, "grad_norm": 0.0710434690117836, "learning_rate": 1.74356968579464e-05, "step": 38550 }, { "embedding_loss": 0.0177, "epoch": 2.156786053528524, "grad_norm": 3.9064769744873047, "learning_rate": 1.7429488492795194e-05, "step": 38600 }, { "embedding_loss": 0.0189, "epoch": 2.1595798178465664, "grad_norm": 0.13290469348430634, "learning_rate": 1.7423280127643987e-05, "step": 38650 }, { "embedding_loss": 0.0184, "epoch": 2.1623735821646086, "grad_norm": 0.09622035175561905, "learning_rate": 1.7417071762492785e-05, "step": 38700 }, { "embedding_loss": 0.0168, "epoch": 2.165167346482651, "grad_norm": 0.13375109434127808, "learning_rate": 1.741086339734158e-05, "step": 38750 }, { "embedding_loss": 0.0149, "epoch": 2.1679611108006926, "grad_norm": 0.2804946005344391, "learning_rate": 1.7404655032190376e-05, "step": 38800 }, { "embedding_loss": 0.0178, "epoch": 2.170754875118735, "grad_norm": 5.22499418258667, "learning_rate": 1.739844666703917e-05, "step": 38850 }, { "embedding_loss": 0.0166, "epoch": 2.173548639436777, "grad_norm": 0.5936800241470337, "learning_rate": 1.7392238301887963e-05, "step": 38900 }, { "embedding_loss": 0.0176, "epoch": 2.1763424037548194, "grad_norm": 0.041446879506111145, "learning_rate": 1.738602993673676e-05, "step": 38950 }, { "embedding_loss": 0.0167, "epoch": 2.179136168072861, "grad_norm": 0.32559362053871155, "learning_rate": 1.7379821571585554e-05, "step": 39000 }, { "embedding_loss": 0.0169, "epoch": 2.1819299323909034, "grad_norm": 1.3919999599456787, "learning_rate": 1.737361320643435e-05, "step": 39050 }, { "embedding_loss": 0.0184, "epoch": 2.1847236967089456, "grad_norm": 0.14979113638401031, "learning_rate": 1.7367404841283145e-05, "step": 39100 }, { "embedding_loss": 0.0158, "epoch": 2.187517461026988, "grad_norm": 0.1549902856349945, "learning_rate": 1.736119647613194e-05, "step": 39150 }, { "embedding_loss": 0.0175, "epoch": 2.19031122534503, "grad_norm": 0.11732825636863708, "learning_rate": 1.7354988110980736e-05, "step": 39200 }, { "embedding_loss": 0.0169, "epoch": 2.193104989663072, "grad_norm": 0.18640615046024323, "learning_rate": 1.734877974582953e-05, "step": 39250 }, { "embedding_loss": 0.0196, "epoch": 2.195898753981114, "grad_norm": 0.1287025660276413, "learning_rate": 1.7342571380678327e-05, "step": 39300 }, { "embedding_loss": 0.016, "epoch": 2.1986925182991564, "grad_norm": 0.406075656414032, "learning_rate": 1.733636301552712e-05, "step": 39350 }, { "embedding_loss": 0.0171, "epoch": 2.2014862826171986, "grad_norm": 0.2718328833580017, "learning_rate": 1.733015465037592e-05, "step": 39400 }, { "embedding_loss": 0.0171, "epoch": 2.2042800469352404, "grad_norm": 0.19534873962402344, "learning_rate": 1.7323946285224712e-05, "step": 39450 }, { "embedding_loss": 0.0176, "epoch": 2.2070738112532826, "grad_norm": 0.20120638608932495, "learning_rate": 1.7317737920073506e-05, "step": 39500 }, { "embedding_loss": 0.0162, "epoch": 2.209867575571325, "grad_norm": 0.6685495972633362, "learning_rate": 1.7311529554922303e-05, "step": 39550 }, { "embedding_loss": 0.0152, "epoch": 2.212661339889367, "grad_norm": 0.11361811310052872, "learning_rate": 1.7305321189771097e-05, "step": 39600 }, { "embedding_loss": 0.0183, "epoch": 2.215455104207409, "grad_norm": 0.0857400894165039, "learning_rate": 1.7299112824619894e-05, "step": 39650 }, { "embedding_loss": 0.0178, "epoch": 2.218248868525451, "grad_norm": 0.23315177857875824, "learning_rate": 1.7292904459468688e-05, "step": 39700 }, { "embedding_loss": 0.0173, "epoch": 2.2210426328434933, "grad_norm": 0.692767858505249, "learning_rate": 1.7286696094317485e-05, "step": 39750 }, { "embedding_loss": 0.0175, "epoch": 2.2238363971615356, "grad_norm": 0.14933888614177704, "learning_rate": 1.728048772916628e-05, "step": 39800 }, { "embedding_loss": 0.0168, "epoch": 2.2266301614795774, "grad_norm": 0.1879744827747345, "learning_rate": 1.7274279364015076e-05, "step": 39850 }, { "embedding_loss": 0.0149, "epoch": 2.2294239257976196, "grad_norm": 0.1358872503042221, "learning_rate": 1.726807099886387e-05, "step": 39900 }, { "embedding_loss": 0.016, "epoch": 2.232217690115662, "grad_norm": 0.11000203341245651, "learning_rate": 1.7261862633712664e-05, "step": 39950 }, { "embedding_loss": 0.0164, "epoch": 2.235011454433704, "grad_norm": 0.13901792466640472, "learning_rate": 1.725565426856146e-05, "step": 40000 }, { "embedding_loss": 0.0193, "epoch": 2.2378052187517463, "grad_norm": 0.11317434161901474, "learning_rate": 1.7249445903410255e-05, "step": 40050 }, { "embedding_loss": 0.0184, "epoch": 2.240598983069788, "grad_norm": 0.09519031643867493, "learning_rate": 1.7243237538259052e-05, "step": 40100 }, { "embedding_loss": 0.0182, "epoch": 2.2433927473878303, "grad_norm": 0.1718509942293167, "learning_rate": 1.7237029173107846e-05, "step": 40150 }, { "embedding_loss": 0.0172, "epoch": 2.2461865117058726, "grad_norm": 0.21955204010009766, "learning_rate": 1.7230820807956643e-05, "step": 40200 }, { "embedding_loss": 0.0172, "epoch": 2.248980276023915, "grad_norm": 0.27736935019493103, "learning_rate": 1.7224612442805437e-05, "step": 40250 }, { "embedding_loss": 0.0149, "epoch": 2.2517740403419566, "grad_norm": 0.19411185383796692, "learning_rate": 1.7218404077654234e-05, "step": 40300 }, { "embedding_loss": 0.0168, "epoch": 2.254567804659999, "grad_norm": 0.09431339055299759, "learning_rate": 1.7212195712503028e-05, "step": 40350 }, { "embedding_loss": 0.0176, "epoch": 2.257361568978041, "grad_norm": 0.07454792410135269, "learning_rate": 1.7205987347351822e-05, "step": 40400 }, { "embedding_loss": 0.0165, "epoch": 2.2601553332960833, "grad_norm": 0.09115911275148392, "learning_rate": 1.719977898220062e-05, "step": 40450 }, { "embedding_loss": 0.0148, "epoch": 2.262949097614125, "grad_norm": 0.10218331962823868, "learning_rate": 1.7193570617049413e-05, "step": 40500 }, { "embedding_loss": 0.0182, "epoch": 2.2657428619321673, "grad_norm": 0.1421951800584793, "learning_rate": 1.718736225189821e-05, "step": 40550 }, { "embedding_loss": 0.0151, "epoch": 2.2685366262502096, "grad_norm": 0.15851043164730072, "learning_rate": 1.7181153886747004e-05, "step": 40600 }, { "embedding_loss": 0.0174, "epoch": 2.271330390568252, "grad_norm": 0.12191366404294968, "learning_rate": 1.71749455215958e-05, "step": 40650 }, { "embedding_loss": 0.0188, "epoch": 2.2741241548862936, "grad_norm": 0.18083854019641876, "learning_rate": 1.7168737156444595e-05, "step": 40700 }, { "embedding_loss": 0.0171, "epoch": 2.276917919204336, "grad_norm": 0.1721925288438797, "learning_rate": 1.716252879129339e-05, "step": 40750 }, { "embedding_loss": 0.0176, "epoch": 2.279711683522378, "grad_norm": 0.09935171902179718, "learning_rate": 1.7156320426142186e-05, "step": 40800 }, { "embedding_loss": 0.0173, "epoch": 2.2825054478404203, "grad_norm": 0.42161786556243896, "learning_rate": 1.715011206099098e-05, "step": 40850 }, { "embedding_loss": 0.0149, "epoch": 2.2852992121584625, "grad_norm": 0.25826480984687805, "learning_rate": 1.7143903695839777e-05, "step": 40900 }, { "embedding_loss": 0.016, "epoch": 2.2880929764765043, "grad_norm": 0.096546970307827, "learning_rate": 1.713769533068857e-05, "step": 40950 }, { "embedding_loss": 0.0164, "epoch": 2.2908867407945466, "grad_norm": 0.08959810435771942, "learning_rate": 1.7131486965537368e-05, "step": 41000 }, { "embedding_loss": 0.0153, "epoch": 2.293680505112589, "grad_norm": 0.16972295939922333, "learning_rate": 1.7125278600386162e-05, "step": 41050 }, { "embedding_loss": 0.0162, "epoch": 2.2964742694306306, "grad_norm": 0.13943640887737274, "learning_rate": 1.711907023523496e-05, "step": 41100 }, { "embedding_loss": 0.0181, "epoch": 2.299268033748673, "grad_norm": 0.21791496872901917, "learning_rate": 1.7112861870083753e-05, "step": 41150 }, { "embedding_loss": 0.017, "epoch": 2.302061798066715, "grad_norm": 0.3212284743785858, "learning_rate": 1.7106653504932547e-05, "step": 41200 }, { "embedding_loss": 0.0156, "epoch": 2.3048555623847573, "grad_norm": 0.0997786670923233, "learning_rate": 1.7100445139781344e-05, "step": 41250 }, { "embedding_loss": 0.017, "epoch": 2.3076493267027995, "grad_norm": 0.0641540065407753, "learning_rate": 1.7094236774630138e-05, "step": 41300 }, { "embedding_loss": 0.0165, "epoch": 2.3104430910208413, "grad_norm": 0.6422967910766602, "learning_rate": 1.7088028409478935e-05, "step": 41350 }, { "embedding_loss": 0.0168, "epoch": 2.3132368553388836, "grad_norm": 0.19409677386283875, "learning_rate": 1.708182004432773e-05, "step": 41400 }, { "embedding_loss": 0.0149, "epoch": 2.316030619656926, "grad_norm": 0.11464004218578339, "learning_rate": 1.7075611679176526e-05, "step": 41450 }, { "embedding_loss": 0.0166, "epoch": 2.318824383974968, "grad_norm": 0.18274611234664917, "learning_rate": 1.706940331402532e-05, "step": 41500 }, { "embedding_loss": 0.0184, "epoch": 2.32161814829301, "grad_norm": 0.2719441056251526, "learning_rate": 1.7063194948874114e-05, "step": 41550 }, { "embedding_loss": 0.0159, "epoch": 2.324411912611052, "grad_norm": 0.10782397538423538, "learning_rate": 1.705698658372291e-05, "step": 41600 }, { "embedding_loss": 0.0167, "epoch": 2.3272056769290943, "grad_norm": 0.12747380137443542, "learning_rate": 1.7050778218571705e-05, "step": 41650 }, { "embedding_loss": 0.0149, "epoch": 2.3299994412471365, "grad_norm": 0.7495676279067993, "learning_rate": 1.7044569853420502e-05, "step": 41700 }, { "embedding_loss": 0.0176, "epoch": 2.3327932055651783, "grad_norm": 0.11739788949489594, "learning_rate": 1.7038361488269296e-05, "step": 41750 }, { "embedding_loss": 0.0158, "epoch": 2.3355869698832206, "grad_norm": 0.09787417948246002, "learning_rate": 1.7032153123118093e-05, "step": 41800 }, { "embedding_loss": 0.0185, "epoch": 2.338380734201263, "grad_norm": 0.128550723195076, "learning_rate": 1.7025944757966887e-05, "step": 41850 }, { "embedding_loss": 0.0157, "epoch": 2.341174498519305, "grad_norm": 0.09368588030338287, "learning_rate": 1.701973639281568e-05, "step": 41900 }, { "embedding_loss": 0.0171, "epoch": 2.343968262837347, "grad_norm": 0.24065691232681274, "learning_rate": 1.7013528027664478e-05, "step": 41950 }, { "embedding_loss": 0.0171, "epoch": 2.346762027155389, "grad_norm": 0.09040940552949905, "learning_rate": 1.700731966251327e-05, "step": 42000 }, { "embedding_loss": 0.0174, "epoch": 2.3495557914734313, "grad_norm": 0.15146592259407043, "learning_rate": 1.700111129736207e-05, "step": 42050 }, { "embedding_loss": 0.0163, "epoch": 2.3523495557914735, "grad_norm": 0.2529771029949188, "learning_rate": 1.6994902932210863e-05, "step": 42100 }, { "embedding_loss": 0.0178, "epoch": 2.3551433201095158, "grad_norm": 0.12372328341007233, "learning_rate": 1.6988694567059656e-05, "step": 42150 }, { "embedding_loss": 0.0168, "epoch": 2.3579370844275576, "grad_norm": 0.18047888576984406, "learning_rate": 1.6982486201908454e-05, "step": 42200 }, { "embedding_loss": 0.0156, "epoch": 2.3607308487456, "grad_norm": 0.12089453637599945, "learning_rate": 1.6976277836757248e-05, "step": 42250 }, { "embedding_loss": 0.0183, "epoch": 2.363524613063642, "grad_norm": 0.07283234596252441, "learning_rate": 1.6970069471606045e-05, "step": 42300 }, { "embedding_loss": 0.0163, "epoch": 2.3663183773816843, "grad_norm": 0.04320630058646202, "learning_rate": 1.696386110645484e-05, "step": 42350 }, { "embedding_loss": 0.0158, "epoch": 2.369112141699726, "grad_norm": 0.04693692922592163, "learning_rate": 1.6957652741303632e-05, "step": 42400 }, { "embedding_loss": 0.0154, "epoch": 2.3719059060177683, "grad_norm": 0.05248791351914406, "learning_rate": 1.695144437615243e-05, "step": 42450 }, { "embedding_loss": 0.0165, "epoch": 2.3746996703358105, "grad_norm": 0.11502902954816818, "learning_rate": 1.6945236011001223e-05, "step": 42500 }, { "embedding_loss": 0.0163, "epoch": 2.3774934346538528, "grad_norm": 0.07500080019235611, "learning_rate": 1.693902764585002e-05, "step": 42550 }, { "embedding_loss": 0.0166, "epoch": 2.3802871989718946, "grad_norm": 0.10995463281869888, "learning_rate": 1.6932819280698814e-05, "step": 42600 }, { "embedding_loss": 0.016, "epoch": 2.383080963289937, "grad_norm": 0.035049546509981155, "learning_rate": 1.6926610915547608e-05, "step": 42650 }, { "embedding_loss": 0.018, "epoch": 2.385874727607979, "grad_norm": 0.9285727143287659, "learning_rate": 1.6920402550396405e-05, "step": 42700 }, { "embedding_loss": 0.0172, "epoch": 2.3886684919260213, "grad_norm": 0.13521267473697662, "learning_rate": 1.69141941852452e-05, "step": 42750 }, { "embedding_loss": 0.016, "epoch": 2.391462256244063, "grad_norm": 0.08192472159862518, "learning_rate": 1.6907985820093996e-05, "step": 42800 }, { "embedding_loss": 0.018, "epoch": 2.3942560205621053, "grad_norm": 0.1297745406627655, "learning_rate": 1.690177745494279e-05, "step": 42850 }, { "embedding_loss": 0.0163, "epoch": 2.3970497848801475, "grad_norm": 0.24478796124458313, "learning_rate": 1.6895569089791588e-05, "step": 42900 }, { "embedding_loss": 0.018, "epoch": 2.3998435491981898, "grad_norm": 0.9241535067558289, "learning_rate": 1.688936072464038e-05, "step": 42950 }, { "embedding_loss": 0.0171, "epoch": 2.402637313516232, "grad_norm": 0.09327184408903122, "learning_rate": 1.6883152359489175e-05, "step": 43000 }, { "embedding_loss": 0.0182, "epoch": 2.405431077834274, "grad_norm": 0.09614718705415726, "learning_rate": 1.6876943994337972e-05, "step": 43050 }, { "embedding_loss": 0.0152, "epoch": 2.408224842152316, "grad_norm": 0.08087404817342758, "learning_rate": 1.6870735629186766e-05, "step": 43100 }, { "embedding_loss": 0.0194, "epoch": 2.4110186064703583, "grad_norm": 0.1645595133304596, "learning_rate": 1.6864527264035563e-05, "step": 43150 }, { "embedding_loss": 0.0157, "epoch": 2.4138123707884, "grad_norm": 0.19709506630897522, "learning_rate": 1.6858318898884357e-05, "step": 43200 }, { "embedding_loss": 0.0162, "epoch": 2.4166061351064423, "grad_norm": 0.17061099410057068, "learning_rate": 1.685211053373315e-05, "step": 43250 }, { "embedding_loss": 0.0158, "epoch": 2.4193998994244845, "grad_norm": 0.12086191773414612, "learning_rate": 1.6845902168581948e-05, "step": 43300 }, { "embedding_loss": 0.0156, "epoch": 2.4221936637425268, "grad_norm": 0.15249724686145782, "learning_rate": 1.6839693803430742e-05, "step": 43350 }, { "embedding_loss": 0.0153, "epoch": 2.424987428060569, "grad_norm": 0.15771323442459106, "learning_rate": 1.683348543827954e-05, "step": 43400 }, { "embedding_loss": 0.0182, "epoch": 2.427781192378611, "grad_norm": 0.27228376269340515, "learning_rate": 1.6827277073128333e-05, "step": 43450 }, { "embedding_loss": 0.0148, "epoch": 2.430574956696653, "grad_norm": 0.06301997601985931, "learning_rate": 1.682106870797713e-05, "step": 43500 }, { "embedding_loss": 0.0166, "epoch": 2.4333687210146953, "grad_norm": 0.21413418650627136, "learning_rate": 1.6814860342825924e-05, "step": 43550 }, { "embedding_loss": 0.0171, "epoch": 2.4361624853327375, "grad_norm": 0.08740256726741791, "learning_rate": 1.6808651977674718e-05, "step": 43600 }, { "embedding_loss": 0.0189, "epoch": 2.4389562496507793, "grad_norm": 0.09682929515838623, "learning_rate": 1.6802443612523515e-05, "step": 43650 }, { "embedding_loss": 0.0157, "epoch": 2.4417500139688215, "grad_norm": 0.14252382516860962, "learning_rate": 1.679623524737231e-05, "step": 43700 }, { "embedding_loss": 0.0177, "epoch": 2.4445437782868638, "grad_norm": 0.10528218746185303, "learning_rate": 1.6790026882221106e-05, "step": 43750 }, { "embedding_loss": 0.0184, "epoch": 2.447337542604906, "grad_norm": 0.09754012525081635, "learning_rate": 1.67838185170699e-05, "step": 43800 }, { "embedding_loss": 0.0168, "epoch": 2.4501313069229482, "grad_norm": 0.5099379420280457, "learning_rate": 1.6777610151918697e-05, "step": 43850 }, { "embedding_loss": 0.0162, "epoch": 2.45292507124099, "grad_norm": 0.13040108978748322, "learning_rate": 1.677140178676749e-05, "step": 43900 }, { "embedding_loss": 0.0163, "epoch": 2.4557188355590323, "grad_norm": 0.19614431262016296, "learning_rate": 1.6765193421616288e-05, "step": 43950 }, { "embedding_loss": 0.0156, "epoch": 2.4585125998770745, "grad_norm": 0.9257204532623291, "learning_rate": 1.6758985056465082e-05, "step": 44000 }, { "embedding_loss": 0.0155, "epoch": 2.4613063641951163, "grad_norm": 0.057261139154434204, "learning_rate": 1.6752776691313876e-05, "step": 44050 }, { "embedding_loss": 0.0169, "epoch": 2.4641001285131585, "grad_norm": 0.2084677517414093, "learning_rate": 1.6746568326162673e-05, "step": 44100 }, { "embedding_loss": 0.0169, "epoch": 2.4668938928312008, "grad_norm": 0.13700753450393677, "learning_rate": 1.6740359961011467e-05, "step": 44150 }, { "embedding_loss": 0.0175, "epoch": 2.469687657149243, "grad_norm": 0.05712662264704704, "learning_rate": 1.6734151595860264e-05, "step": 44200 }, { "embedding_loss": 0.0175, "epoch": 2.4724814214672852, "grad_norm": 0.1795831024646759, "learning_rate": 1.6727943230709058e-05, "step": 44250 }, { "embedding_loss": 0.0191, "epoch": 2.475275185785327, "grad_norm": 0.09736839681863785, "learning_rate": 1.6721734865557855e-05, "step": 44300 }, { "embedding_loss": 0.0188, "epoch": 2.4780689501033693, "grad_norm": 0.06810817867517471, "learning_rate": 1.671552650040665e-05, "step": 44350 }, { "embedding_loss": 0.0161, "epoch": 2.4808627144214115, "grad_norm": 0.5170435905456543, "learning_rate": 1.6709318135255446e-05, "step": 44400 }, { "embedding_loss": 0.0187, "epoch": 2.4836564787394537, "grad_norm": 0.14599338173866272, "learning_rate": 1.670310977010424e-05, "step": 44450 }, { "embedding_loss": 0.0165, "epoch": 2.4864502430574955, "grad_norm": 0.2184564173221588, "learning_rate": 1.6696901404953034e-05, "step": 44500 }, { "embedding_loss": 0.0167, "epoch": 2.4892440073755377, "grad_norm": 0.12261306494474411, "learning_rate": 1.669069303980183e-05, "step": 44550 }, { "embedding_loss": 0.0173, "epoch": 2.49203777169358, "grad_norm": 0.13060683012008667, "learning_rate": 1.6684484674650625e-05, "step": 44600 }, { "embedding_loss": 0.0169, "epoch": 2.494831536011622, "grad_norm": 0.15053945779800415, "learning_rate": 1.6678276309499422e-05, "step": 44650 }, { "embedding_loss": 0.0168, "epoch": 2.497625300329664, "grad_norm": 0.07780053466558456, "learning_rate": 1.6672067944348216e-05, "step": 44700 }, { "embedding_loss": 0.016, "epoch": 2.5004190646477062, "grad_norm": 0.12818460166454315, "learning_rate": 1.6665859579197013e-05, "step": 44750 }, { "embedding_loss": 0.0178, "epoch": 2.5032128289657485, "grad_norm": 1.979898452758789, "learning_rate": 1.6659651214045807e-05, "step": 44800 }, { "embedding_loss": 0.0181, "epoch": 2.5060065932837907, "grad_norm": 0.10634288936853409, "learning_rate": 1.66534428488946e-05, "step": 44850 }, { "embedding_loss": 0.0158, "epoch": 2.5088003576018325, "grad_norm": 0.14199583232402802, "learning_rate": 1.6647234483743398e-05, "step": 44900 }, { "embedding_loss": 0.0157, "epoch": 2.5115941219198747, "grad_norm": 0.10968028753995895, "learning_rate": 1.6641026118592192e-05, "step": 44950 }, { "embedding_loss": 0.0173, "epoch": 2.514387886237917, "grad_norm": 0.13804017007350922, "learning_rate": 1.663481775344099e-05, "step": 45000 }, { "embedding_loss": 0.0173, "epoch": 2.517181650555959, "grad_norm": 0.15219607949256897, "learning_rate": 1.6628609388289783e-05, "step": 45050 }, { "embedding_loss": 0.0162, "epoch": 2.5199754148740015, "grad_norm": 0.1325286328792572, "learning_rate": 1.662240102313858e-05, "step": 45100 }, { "embedding_loss": 0.0179, "epoch": 2.5227691791920432, "grad_norm": 0.09433367103338242, "learning_rate": 1.6616192657987374e-05, "step": 45150 }, { "embedding_loss": 0.0184, "epoch": 2.5255629435100855, "grad_norm": 0.06937281787395477, "learning_rate": 1.660998429283617e-05, "step": 45200 }, { "embedding_loss": 0.0178, "epoch": 2.5283567078281277, "grad_norm": 0.1687895655632019, "learning_rate": 1.6603775927684965e-05, "step": 45250 }, { "embedding_loss": 0.0173, "epoch": 2.5311504721461695, "grad_norm": 0.07250981032848358, "learning_rate": 1.659756756253376e-05, "step": 45300 }, { "embedding_loss": 0.0175, "epoch": 2.5339442364642117, "grad_norm": 1.1047320365905762, "learning_rate": 1.6591359197382556e-05, "step": 45350 }, { "embedding_loss": 0.0162, "epoch": 2.536738000782254, "grad_norm": 0.08403404802083969, "learning_rate": 1.658515083223135e-05, "step": 45400 }, { "embedding_loss": 0.0162, "epoch": 2.539531765100296, "grad_norm": 0.11315282434225082, "learning_rate": 1.6578942467080147e-05, "step": 45450 }, { "embedding_loss": 0.0172, "epoch": 2.5423255294183384, "grad_norm": 0.24464912712574005, "learning_rate": 1.657273410192894e-05, "step": 45500 }, { "embedding_loss": 0.0165, "epoch": 2.5451192937363802, "grad_norm": 0.13639764487743378, "learning_rate": 1.6566525736777738e-05, "step": 45550 }, { "embedding_loss": 0.017, "epoch": 2.5479130580544225, "grad_norm": 0.0968150645494461, "learning_rate": 1.6560317371626532e-05, "step": 45600 }, { "embedding_loss": 0.0171, "epoch": 2.5507068223724647, "grad_norm": 0.18968167901039124, "learning_rate": 1.6554109006475326e-05, "step": 45650 }, { "embedding_loss": 0.0166, "epoch": 2.553500586690507, "grad_norm": 0.31737038493156433, "learning_rate": 1.6547900641324123e-05, "step": 45700 }, { "embedding_loss": 0.0166, "epoch": 2.5562943510085487, "grad_norm": 0.10740742832422256, "learning_rate": 1.6541692276172917e-05, "step": 45750 }, { "embedding_loss": 0.0171, "epoch": 2.559088115326591, "grad_norm": 0.09779264777898788, "learning_rate": 1.6535483911021714e-05, "step": 45800 }, { "embedding_loss": 0.0184, "epoch": 2.561881879644633, "grad_norm": 0.09841367602348328, "learning_rate": 1.6529275545870508e-05, "step": 45850 }, { "embedding_loss": 0.0189, "epoch": 2.5646756439626754, "grad_norm": 0.06580434739589691, "learning_rate": 1.6523067180719305e-05, "step": 45900 }, { "embedding_loss": 0.0169, "epoch": 2.5674694082807177, "grad_norm": 0.1137748435139656, "learning_rate": 1.65168588155681e-05, "step": 45950 }, { "embedding_loss": 0.0157, "epoch": 2.5702631725987595, "grad_norm": 0.14721417427062988, "learning_rate": 1.6510650450416892e-05, "step": 46000 }, { "embedding_loss": 0.0166, "epoch": 2.5730569369168017, "grad_norm": 0.09636078774929047, "learning_rate": 1.650444208526569e-05, "step": 46050 }, { "embedding_loss": 0.0159, "epoch": 2.575850701234844, "grad_norm": 0.5256446599960327, "learning_rate": 1.6498233720114483e-05, "step": 46100 }, { "embedding_loss": 0.0154, "epoch": 2.5786444655528857, "grad_norm": 0.1420203298330307, "learning_rate": 1.649202535496328e-05, "step": 46150 }, { "embedding_loss": 0.018, "epoch": 2.581438229870928, "grad_norm": 0.1753098964691162, "learning_rate": 1.6485816989812075e-05, "step": 46200 }, { "embedding_loss": 0.0185, "epoch": 2.58423199418897, "grad_norm": 0.04596054553985596, "learning_rate": 1.647960862466087e-05, "step": 46250 }, { "embedding_loss": 0.0154, "epoch": 2.5870257585070124, "grad_norm": 0.045322220772504807, "learning_rate": 1.6473400259509666e-05, "step": 46300 }, { "embedding_loss": 0.0171, "epoch": 2.5898195228250547, "grad_norm": 0.10397302359342575, "learning_rate": 1.646719189435846e-05, "step": 46350 }, { "embedding_loss": 0.0172, "epoch": 2.5926132871430965, "grad_norm": 0.06831733137369156, "learning_rate": 1.6460983529207257e-05, "step": 46400 }, { "embedding_loss": 0.0155, "epoch": 2.5954070514611387, "grad_norm": 0.23467573523521423, "learning_rate": 1.645477516405605e-05, "step": 46450 }, { "embedding_loss": 0.0161, "epoch": 2.598200815779181, "grad_norm": 0.13564670085906982, "learning_rate": 1.6448566798904844e-05, "step": 46500 }, { "embedding_loss": 0.0154, "epoch": 2.6009945800972227, "grad_norm": 0.09517131000757217, "learning_rate": 1.644235843375364e-05, "step": 46550 }, { "embedding_loss": 0.0187, "epoch": 2.603788344415265, "grad_norm": 0.0798448920249939, "learning_rate": 1.6436150068602435e-05, "step": 46600 }, { "embedding_loss": 0.0166, "epoch": 2.606582108733307, "grad_norm": 0.11615444719791412, "learning_rate": 1.6429941703451232e-05, "step": 46650 }, { "embedding_loss": 0.0158, "epoch": 2.6093758730513494, "grad_norm": 0.10131309181451797, "learning_rate": 1.6423733338300026e-05, "step": 46700 }, { "embedding_loss": 0.0171, "epoch": 2.6121696373693917, "grad_norm": 0.1051841527223587, "learning_rate": 1.641752497314882e-05, "step": 46750 }, { "embedding_loss": 0.0171, "epoch": 2.614963401687434, "grad_norm": 1.6406341791152954, "learning_rate": 1.6411316607997617e-05, "step": 46800 }, { "embedding_loss": 0.0153, "epoch": 2.6177571660054757, "grad_norm": 0.045248668640851974, "learning_rate": 1.640510824284641e-05, "step": 46850 }, { "embedding_loss": 0.016, "epoch": 2.620550930323518, "grad_norm": 0.07292340695858002, "learning_rate": 1.639889987769521e-05, "step": 46900 }, { "embedding_loss": 0.0147, "epoch": 2.62334469464156, "grad_norm": 0.11624530702829361, "learning_rate": 1.6392691512544002e-05, "step": 46950 }, { "embedding_loss": 0.0171, "epoch": 2.626138458959602, "grad_norm": 0.09457813948392868, "learning_rate": 1.63864831473928e-05, "step": 47000 }, { "embedding_loss": 0.016, "epoch": 2.628932223277644, "grad_norm": 0.3507966995239258, "learning_rate": 1.6380274782241593e-05, "step": 47050 }, { "embedding_loss": 0.0159, "epoch": 2.6317259875956864, "grad_norm": 0.2465100884437561, "learning_rate": 1.6374066417090387e-05, "step": 47100 }, { "embedding_loss": 0.0178, "epoch": 2.6345197519137287, "grad_norm": 0.14050370454788208, "learning_rate": 1.6367858051939184e-05, "step": 47150 }, { "embedding_loss": 0.0159, "epoch": 2.637313516231771, "grad_norm": 0.12464486062526703, "learning_rate": 1.6361649686787978e-05, "step": 47200 }, { "embedding_loss": 0.0156, "epoch": 2.6401072805498127, "grad_norm": 0.12135618925094604, "learning_rate": 1.6355441321636775e-05, "step": 47250 }, { "embedding_loss": 0.0171, "epoch": 2.642901044867855, "grad_norm": 0.07885091006755829, "learning_rate": 1.634923295648557e-05, "step": 47300 }, { "embedding_loss": 0.0173, "epoch": 2.645694809185897, "grad_norm": 0.16365808248519897, "learning_rate": 1.6343024591334363e-05, "step": 47350 }, { "embedding_loss": 0.019, "epoch": 2.648488573503939, "grad_norm": 0.15311506390571594, "learning_rate": 1.633681622618316e-05, "step": 47400 }, { "embedding_loss": 0.0158, "epoch": 2.651282337821981, "grad_norm": 0.1855958104133606, "learning_rate": 1.6330607861031954e-05, "step": 47450 }, { "embedding_loss": 0.0147, "epoch": 2.6540761021400234, "grad_norm": 0.25313621759414673, "learning_rate": 1.632439949588075e-05, "step": 47500 }, { "embedding_loss": 0.015, "epoch": 2.6568698664580657, "grad_norm": 0.10842446982860565, "learning_rate": 1.6318191130729545e-05, "step": 47550 }, { "embedding_loss": 0.018, "epoch": 2.659663630776108, "grad_norm": 0.11216917634010315, "learning_rate": 1.6311982765578342e-05, "step": 47600 }, { "embedding_loss": 0.018, "epoch": 2.6624573950941497, "grad_norm": 0.10711926221847534, "learning_rate": 1.6305774400427136e-05, "step": 47650 }, { "embedding_loss": 0.0183, "epoch": 2.665251159412192, "grad_norm": 0.03633804991841316, "learning_rate": 1.629956603527593e-05, "step": 47700 }, { "embedding_loss": 0.0179, "epoch": 2.668044923730234, "grad_norm": 0.23777492344379425, "learning_rate": 1.6293357670124727e-05, "step": 47750 }, { "embedding_loss": 0.0158, "epoch": 2.6708386880482764, "grad_norm": 0.16753429174423218, "learning_rate": 1.628714930497352e-05, "step": 47800 }, { "embedding_loss": 0.0143, "epoch": 2.673632452366318, "grad_norm": 0.17432713508605957, "learning_rate": 1.6280940939822318e-05, "step": 47850 }, { "embedding_loss": 0.0167, "epoch": 2.6764262166843604, "grad_norm": 0.1960105001926422, "learning_rate": 1.6274732574671112e-05, "step": 47900 }, { "embedding_loss": 0.0167, "epoch": 2.6792199810024027, "grad_norm": 0.04725944250822067, "learning_rate": 1.626852420951991e-05, "step": 47950 }, { "embedding_loss": 0.016, "epoch": 2.682013745320445, "grad_norm": 0.08124231547117233, "learning_rate": 1.6262315844368703e-05, "step": 48000 }, { "embedding_loss": 0.0155, "epoch": 2.684807509638487, "grad_norm": 0.028618505224585533, "learning_rate": 1.62561074792175e-05, "step": 48050 }, { "embedding_loss": 0.0168, "epoch": 2.687601273956529, "grad_norm": 0.16432331502437592, "learning_rate": 1.6249899114066294e-05, "step": 48100 }, { "embedding_loss": 0.0182, "epoch": 2.690395038274571, "grad_norm": 0.09122524410486221, "learning_rate": 1.6243690748915088e-05, "step": 48150 }, { "embedding_loss": 0.0175, "epoch": 2.6931888025926134, "grad_norm": 0.7217292189598083, "learning_rate": 1.6237482383763885e-05, "step": 48200 }, { "embedding_loss": 0.0166, "epoch": 2.695982566910655, "grad_norm": 0.567168653011322, "learning_rate": 1.623127401861268e-05, "step": 48250 }, { "embedding_loss": 0.0153, "epoch": 2.6987763312286974, "grad_norm": 0.04141933098435402, "learning_rate": 1.6225065653461476e-05, "step": 48300 }, { "embedding_loss": 0.0163, "epoch": 2.7015700955467397, "grad_norm": 0.08646483719348907, "learning_rate": 1.621885728831027e-05, "step": 48350 }, { "embedding_loss": 0.0164, "epoch": 2.704363859864782, "grad_norm": 0.10004289448261261, "learning_rate": 1.6212648923159067e-05, "step": 48400 }, { "embedding_loss": 0.0158, "epoch": 2.707157624182824, "grad_norm": 0.0878872200846672, "learning_rate": 1.620644055800786e-05, "step": 48450 }, { "embedding_loss": 0.0175, "epoch": 2.709951388500866, "grad_norm": 3.0428199768066406, "learning_rate": 1.6200232192856658e-05, "step": 48500 }, { "embedding_loss": 0.0167, "epoch": 2.712745152818908, "grad_norm": 0.15698842704296112, "learning_rate": 1.6194023827705452e-05, "step": 48550 }, { "embedding_loss": 0.0168, "epoch": 2.7155389171369504, "grad_norm": 0.10238616168498993, "learning_rate": 1.6187815462554246e-05, "step": 48600 }, { "embedding_loss": 0.0174, "epoch": 2.718332681454992, "grad_norm": 0.1541004627943039, "learning_rate": 1.6181607097403043e-05, "step": 48650 }, { "embedding_loss": 0.0159, "epoch": 2.7211264457730344, "grad_norm": 0.2722359001636505, "learning_rate": 1.6175398732251837e-05, "step": 48700 }, { "embedding_loss": 0.0158, "epoch": 2.7239202100910767, "grad_norm": 0.07985975593328476, "learning_rate": 1.6169190367100634e-05, "step": 48750 }, { "embedding_loss": 0.0166, "epoch": 2.726713974409119, "grad_norm": 0.05857423320412636, "learning_rate": 1.6162982001949428e-05, "step": 48800 }, { "embedding_loss": 0.0152, "epoch": 2.729507738727161, "grad_norm": 0.12879028916358948, "learning_rate": 1.6156773636798225e-05, "step": 48850 }, { "embedding_loss": 0.0172, "epoch": 2.7323015030452034, "grad_norm": 0.07010383158922195, "learning_rate": 1.615056527164702e-05, "step": 48900 }, { "embedding_loss": 0.0175, "epoch": 2.735095267363245, "grad_norm": 0.1771743893623352, "learning_rate": 1.6144356906495813e-05, "step": 48950 }, { "embedding_loss": 0.0173, "epoch": 2.7378890316812874, "grad_norm": 0.05862588435411453, "learning_rate": 1.613814854134461e-05, "step": 49000 }, { "embedding_loss": 0.0168, "epoch": 2.7406827959993296, "grad_norm": 0.06493280082941055, "learning_rate": 1.6131940176193404e-05, "step": 49050 }, { "embedding_loss": 0.016, "epoch": 2.7434765603173714, "grad_norm": 0.0487634651362896, "learning_rate": 1.61257318110422e-05, "step": 49100 }, { "embedding_loss": 0.016, "epoch": 2.7462703246354137, "grad_norm": 0.11700937151908875, "learning_rate": 1.6119523445890995e-05, "step": 49150 }, { "embedding_loss": 0.0137, "epoch": 2.749064088953456, "grad_norm": 0.1360594630241394, "learning_rate": 1.6113315080739792e-05, "step": 49200 }, { "embedding_loss": 0.0168, "epoch": 2.751857853271498, "grad_norm": 0.0881398543715477, "learning_rate": 1.6107106715588586e-05, "step": 49250 }, { "embedding_loss": 0.0158, "epoch": 2.7546516175895404, "grad_norm": 0.14189377427101135, "learning_rate": 1.6100898350437383e-05, "step": 49300 }, { "embedding_loss": 0.0176, "epoch": 2.757445381907582, "grad_norm": 0.18016374111175537, "learning_rate": 1.6094689985286177e-05, "step": 49350 }, { "embedding_loss": 0.0158, "epoch": 2.7602391462256244, "grad_norm": 0.07256406545639038, "learning_rate": 1.608848162013497e-05, "step": 49400 }, { "embedding_loss": 0.017, "epoch": 2.7630329105436666, "grad_norm": 0.1169869676232338, "learning_rate": 1.6082273254983768e-05, "step": 49450 }, { "embedding_loss": 0.0166, "epoch": 2.7658266748617084, "grad_norm": 0.0996696874499321, "learning_rate": 1.607606488983256e-05, "step": 49500 }, { "embedding_loss": 0.0176, "epoch": 2.7686204391797506, "grad_norm": 0.16963067650794983, "learning_rate": 1.606985652468136e-05, "step": 49550 }, { "embedding_loss": 0.0176, "epoch": 2.771414203497793, "grad_norm": 0.043379489332437515, "learning_rate": 1.6063648159530153e-05, "step": 49600 }, { "embedding_loss": 0.0157, "epoch": 2.774207967815835, "grad_norm": 0.05719267576932907, "learning_rate": 1.605743979437895e-05, "step": 49650 }, { "embedding_loss": 0.0166, "epoch": 2.7770017321338774, "grad_norm": 0.16893072426319122, "learning_rate": 1.6051231429227744e-05, "step": 49700 }, { "embedding_loss": 0.0155, "epoch": 2.7797954964519196, "grad_norm": 0.1094290241599083, "learning_rate": 1.6045023064076537e-05, "step": 49750 }, { "embedding_loss": 0.0168, "epoch": 2.7825892607699614, "grad_norm": 0.16731181740760803, "learning_rate": 1.6038814698925335e-05, "step": 49800 }, { "embedding_loss": 0.0165, "epoch": 2.7853830250880036, "grad_norm": 0.14269162714481354, "learning_rate": 1.603260633377413e-05, "step": 49850 }, { "embedding_loss": 0.0169, "epoch": 2.788176789406046, "grad_norm": 0.05558454617857933, "learning_rate": 1.6026397968622926e-05, "step": 49900 }, { "embedding_loss": 0.0141, "epoch": 2.7909705537240876, "grad_norm": 0.03616943210363388, "learning_rate": 1.602018960347172e-05, "step": 49950 }, { "embedding_loss": 0.0163, "epoch": 2.79376431804213, "grad_norm": 0.14351262152194977, "learning_rate": 1.6013981238320517e-05, "step": 50000 }, { "embedding_loss": 0.0149, "epoch": 2.796558082360172, "grad_norm": 0.04680691286921501, "learning_rate": 1.600777287316931e-05, "step": 50050 }, { "embedding_loss": 0.0167, "epoch": 2.7993518466782144, "grad_norm": 0.16730327904224396, "learning_rate": 1.6001564508018104e-05, "step": 50100 }, { "embedding_loss": 0.0169, "epoch": 2.8021456109962566, "grad_norm": 0.09103874862194061, "learning_rate": 1.59953561428669e-05, "step": 50150 }, { "embedding_loss": 0.0143, "epoch": 2.8049393753142984, "grad_norm": 0.04380844905972481, "learning_rate": 1.5989147777715695e-05, "step": 50200 }, { "embedding_loss": 0.0159, "epoch": 2.8077331396323406, "grad_norm": 0.164920836687088, "learning_rate": 1.5982939412564493e-05, "step": 50250 }, { "embedding_loss": 0.0163, "epoch": 2.810526903950383, "grad_norm": 0.08462988585233688, "learning_rate": 1.5976731047413286e-05, "step": 50300 }, { "embedding_loss": 0.0173, "epoch": 2.8133206682684246, "grad_norm": 0.3927801549434662, "learning_rate": 1.597052268226208e-05, "step": 50350 }, { "embedding_loss": 0.017, "epoch": 2.816114432586467, "grad_norm": 1.531477928161621, "learning_rate": 1.5964314317110877e-05, "step": 50400 }, { "embedding_loss": 0.0179, "epoch": 2.818908196904509, "grad_norm": 0.07166694849729538, "learning_rate": 1.595810595195967e-05, "step": 50450 }, { "embedding_loss": 0.0159, "epoch": 2.8217019612225513, "grad_norm": 0.11943697929382324, "learning_rate": 1.595189758680847e-05, "step": 50500 }, { "embedding_loss": 0.0159, "epoch": 2.8244957255405936, "grad_norm": 0.07947058230638504, "learning_rate": 1.5945689221657262e-05, "step": 50550 }, { "embedding_loss": 0.0163, "epoch": 2.8272894898586354, "grad_norm": 0.10713999718427658, "learning_rate": 1.5939480856506056e-05, "step": 50600 }, { "embedding_loss": 0.0181, "epoch": 2.8300832541766776, "grad_norm": 0.05351913347840309, "learning_rate": 1.5933272491354853e-05, "step": 50650 }, { "embedding_loss": 0.0173, "epoch": 2.83287701849472, "grad_norm": 0.22406722605228424, "learning_rate": 1.5927064126203647e-05, "step": 50700 }, { "embedding_loss": 0.0177, "epoch": 2.835670782812762, "grad_norm": 2.142700672149658, "learning_rate": 1.5920855761052444e-05, "step": 50750 }, { "embedding_loss": 0.0156, "epoch": 2.838464547130804, "grad_norm": 0.09379539638757706, "learning_rate": 1.5914647395901238e-05, "step": 50800 }, { "embedding_loss": 0.0161, "epoch": 2.841258311448846, "grad_norm": 0.07283169031143188, "learning_rate": 1.5908439030750032e-05, "step": 50850 }, { "embedding_loss": 0.0168, "epoch": 2.8440520757668883, "grad_norm": 0.07801681756973267, "learning_rate": 1.590223066559883e-05, "step": 50900 }, { "embedding_loss": 0.0171, "epoch": 2.8468458400849306, "grad_norm": 0.09356974810361862, "learning_rate": 1.5896022300447623e-05, "step": 50950 }, { "embedding_loss": 0.0168, "epoch": 2.849639604402973, "grad_norm": 0.06890945881605148, "learning_rate": 1.588981393529642e-05, "step": 51000 }, { "embedding_loss": 0.0179, "epoch": 2.8524333687210146, "grad_norm": 0.07943091541528702, "learning_rate": 1.5883605570145214e-05, "step": 51050 }, { "embedding_loss": 0.0155, "epoch": 2.855227133039057, "grad_norm": 0.10951752960681915, "learning_rate": 1.5877397204994008e-05, "step": 51100 }, { "embedding_loss": 0.0191, "epoch": 2.858020897357099, "grad_norm": 0.04391436651349068, "learning_rate": 1.5871188839842805e-05, "step": 51150 }, { "embedding_loss": 0.0169, "epoch": 2.860814661675141, "grad_norm": 0.11143721640110016, "learning_rate": 1.58649804746916e-05, "step": 51200 }, { "embedding_loss": 0.0154, "epoch": 2.863608425993183, "grad_norm": 0.040650397539138794, "learning_rate": 1.5858772109540396e-05, "step": 51250 }, { "embedding_loss": 0.0171, "epoch": 2.8664021903112253, "grad_norm": 0.08657237142324448, "learning_rate": 1.585256374438919e-05, "step": 51300 }, { "embedding_loss": 0.014, "epoch": 2.8691959546292676, "grad_norm": 0.08874646574258804, "learning_rate": 1.5846355379237987e-05, "step": 51350 }, { "embedding_loss": 0.0151, "epoch": 2.87198971894731, "grad_norm": 0.1176847442984581, "learning_rate": 1.584014701408678e-05, "step": 51400 }, { "embedding_loss": 0.0154, "epoch": 2.8747834832653516, "grad_norm": 0.08161048591136932, "learning_rate": 1.5833938648935575e-05, "step": 51450 }, { "embedding_loss": 0.0155, "epoch": 2.877577247583394, "grad_norm": 0.09063083678483963, "learning_rate": 1.5827730283784372e-05, "step": 51500 }, { "embedding_loss": 0.0173, "epoch": 2.880371011901436, "grad_norm": 0.054431721568107605, "learning_rate": 1.5821521918633166e-05, "step": 51550 }, { "embedding_loss": 0.0166, "epoch": 2.883164776219478, "grad_norm": 0.09779559075832367, "learning_rate": 1.5815313553481963e-05, "step": 51600 }, { "embedding_loss": 0.0161, "epoch": 2.88595854053752, "grad_norm": 0.10543184727430344, "learning_rate": 1.5809105188330757e-05, "step": 51650 }, { "embedding_loss": 0.0175, "epoch": 2.8887523048555623, "grad_norm": 0.12498010694980621, "learning_rate": 1.5802896823179554e-05, "step": 51700 }, { "embedding_loss": 0.0159, "epoch": 2.8915460691736046, "grad_norm": 0.08094384521245956, "learning_rate": 1.5796688458028348e-05, "step": 51750 }, { "embedding_loss": 0.0152, "epoch": 2.894339833491647, "grad_norm": 0.07796797156333923, "learning_rate": 1.579048009287714e-05, "step": 51800 }, { "embedding_loss": 0.0169, "epoch": 2.897133597809689, "grad_norm": 0.16206777095794678, "learning_rate": 1.578427172772594e-05, "step": 51850 }, { "embedding_loss": 0.0163, "epoch": 2.899927362127731, "grad_norm": 0.15665946900844574, "learning_rate": 1.5778063362574733e-05, "step": 51900 }, { "embedding_loss": 0.0166, "epoch": 2.902721126445773, "grad_norm": 0.11035211384296417, "learning_rate": 1.577185499742353e-05, "step": 51950 }, { "embedding_loss": 0.0162, "epoch": 2.9055148907638153, "grad_norm": 0.11409079283475876, "learning_rate": 1.5765646632272324e-05, "step": 52000 }, { "embedding_loss": 0.0154, "epoch": 2.908308655081857, "grad_norm": 0.21083936095237732, "learning_rate": 1.575943826712112e-05, "step": 52050 }, { "embedding_loss": 0.0164, "epoch": 2.9111024193998993, "grad_norm": 0.11316119879484177, "learning_rate": 1.5753229901969915e-05, "step": 52100 }, { "embedding_loss": 0.0159, "epoch": 2.9138961837179416, "grad_norm": 0.17598849534988403, "learning_rate": 1.5747021536818712e-05, "step": 52150 }, { "embedding_loss": 0.0164, "epoch": 2.916689948035984, "grad_norm": 0.15292847156524658, "learning_rate": 1.5740813171667506e-05, "step": 52200 }, { "embedding_loss": 0.0154, "epoch": 2.919483712354026, "grad_norm": 0.13067051768302917, "learning_rate": 1.57346048065163e-05, "step": 52250 }, { "embedding_loss": 0.0161, "epoch": 2.922277476672068, "grad_norm": 0.05509359762072563, "learning_rate": 1.5728396441365097e-05, "step": 52300 }, { "embedding_loss": 0.0156, "epoch": 2.92507124099011, "grad_norm": 0.4551430642604828, "learning_rate": 1.572218807621389e-05, "step": 52350 }, { "embedding_loss": 0.0176, "epoch": 2.9278650053081523, "grad_norm": 0.14360308647155762, "learning_rate": 1.5715979711062688e-05, "step": 52400 }, { "embedding_loss": 0.0155, "epoch": 2.930658769626194, "grad_norm": 0.052099525928497314, "learning_rate": 1.570977134591148e-05, "step": 52450 }, { "embedding_loss": 0.0172, "epoch": 2.9334525339442363, "grad_norm": 1.0601643323898315, "learning_rate": 1.570356298076028e-05, "step": 52500 }, { "embedding_loss": 0.0169, "epoch": 2.9362462982622786, "grad_norm": 0.16926689445972443, "learning_rate": 1.5697354615609073e-05, "step": 52550 }, { "embedding_loss": 0.015, "epoch": 2.939040062580321, "grad_norm": 0.12402792274951935, "learning_rate": 1.569114625045787e-05, "step": 52600 }, { "embedding_loss": 0.0184, "epoch": 2.941833826898363, "grad_norm": 0.07562118023633957, "learning_rate": 1.5684937885306664e-05, "step": 52650 }, { "embedding_loss": 0.0173, "epoch": 2.944627591216405, "grad_norm": 0.10201694816350937, "learning_rate": 1.5678729520155457e-05, "step": 52700 }, { "embedding_loss": 0.0164, "epoch": 2.947421355534447, "grad_norm": 0.08113735914230347, "learning_rate": 1.5672521155004255e-05, "step": 52750 }, { "embedding_loss": 0.0167, "epoch": 2.9502151198524893, "grad_norm": 0.04618079587817192, "learning_rate": 1.566631278985305e-05, "step": 52800 }, { "embedding_loss": 0.0184, "epoch": 2.9530088841705315, "grad_norm": 0.11442209780216217, "learning_rate": 1.5660104424701846e-05, "step": 52850 }, { "embedding_loss": 0.0173, "epoch": 2.9558026484885733, "grad_norm": 0.09475077688694, "learning_rate": 1.565389605955064e-05, "step": 52900 }, { "embedding_loss": 0.0168, "epoch": 2.9585964128066156, "grad_norm": 0.08267086744308472, "learning_rate": 1.5647687694399437e-05, "step": 52950 }, { "embedding_loss": 0.0173, "epoch": 2.961390177124658, "grad_norm": 0.07875095307826996, "learning_rate": 1.564147932924823e-05, "step": 53000 }, { "embedding_loss": 0.0153, "epoch": 2.9641839414427, "grad_norm": 0.12809091806411743, "learning_rate": 1.5635270964097024e-05, "step": 53050 }, { "embedding_loss": 0.0167, "epoch": 2.9669777057607423, "grad_norm": 0.11608462780714035, "learning_rate": 1.562906259894582e-05, "step": 53100 }, { "embedding_loss": 0.0177, "epoch": 2.969771470078784, "grad_norm": 0.042708996683359146, "learning_rate": 1.5622854233794615e-05, "step": 53150 }, { "embedding_loss": 0.018, "epoch": 2.9725652343968263, "grad_norm": 0.07037701457738876, "learning_rate": 1.5616645868643413e-05, "step": 53200 }, { "embedding_loss": 0.0156, "epoch": 2.9753589987148685, "grad_norm": 0.07375723123550415, "learning_rate": 1.5610437503492206e-05, "step": 53250 }, { "embedding_loss": 0.0181, "epoch": 2.9781527630329103, "grad_norm": 0.0696462020277977, "learning_rate": 1.5604229138341004e-05, "step": 53300 }, { "embedding_loss": 0.0151, "epoch": 2.9809465273509526, "grad_norm": 0.06957315653562546, "learning_rate": 1.5598020773189797e-05, "step": 53350 }, { "embedding_loss": 0.0166, "epoch": 2.983740291668995, "grad_norm": 0.1365799903869629, "learning_rate": 1.5591812408038595e-05, "step": 53400 }, { "embedding_loss": 0.0151, "epoch": 2.986534055987037, "grad_norm": 0.04794590547680855, "learning_rate": 1.558560404288739e-05, "step": 53450 }, { "embedding_loss": 0.0196, "epoch": 2.9893278203050793, "grad_norm": 0.051481954753398895, "learning_rate": 1.5579395677736182e-05, "step": 53500 }, { "embedding_loss": 0.0168, "epoch": 2.992121584623121, "grad_norm": 0.45760828256607056, "learning_rate": 1.557318731258498e-05, "step": 53550 }, { "embedding_loss": 0.0191, "epoch": 2.9949153489411633, "grad_norm": 0.4399777948856354, "learning_rate": 1.5566978947433773e-05, "step": 53600 }, { "embedding_loss": 0.017, "epoch": 2.9977091132592055, "grad_norm": 0.04966984689235687, "learning_rate": 1.556077058228257e-05, "step": 53650 }, { "embedding_loss": 0.017, "epoch": 3.0005028775772478, "grad_norm": 0.13330920040607452, "learning_rate": 1.5554562217131364e-05, "step": 53700 }, { "embedding_loss": 0.0153, "epoch": 3.0032966418952896, "grad_norm": 0.16959232091903687, "learning_rate": 1.554835385198016e-05, "step": 53750 }, { "embedding_loss": 0.017, "epoch": 3.006090406213332, "grad_norm": 0.06061380356550217, "learning_rate": 1.5542145486828955e-05, "step": 53800 }, { "embedding_loss": 0.0142, "epoch": 3.008884170531374, "grad_norm": 0.05335818603634834, "learning_rate": 1.553593712167775e-05, "step": 53850 }, { "embedding_loss": 0.0161, "epoch": 3.0116779348494163, "grad_norm": 0.060044437646865845, "learning_rate": 1.5529728756526546e-05, "step": 53900 }, { "embedding_loss": 0.0181, "epoch": 3.014471699167458, "grad_norm": 0.07836839556694031, "learning_rate": 1.552352039137534e-05, "step": 53950 }, { "embedding_loss": 0.0172, "epoch": 3.0172654634855003, "grad_norm": 0.21170204877853394, "learning_rate": 1.5517312026224137e-05, "step": 54000 }, { "embedding_loss": 0.0157, "epoch": 3.0200592278035425, "grad_norm": 0.08790968358516693, "learning_rate": 1.551110366107293e-05, "step": 54050 }, { "embedding_loss": 0.0149, "epoch": 3.0228529921215848, "grad_norm": 0.08240114152431488, "learning_rate": 1.550489529592173e-05, "step": 54100 }, { "embedding_loss": 0.0183, "epoch": 3.0256467564396266, "grad_norm": 0.08234784752130508, "learning_rate": 1.5498686930770522e-05, "step": 54150 }, { "embedding_loss": 0.0159, "epoch": 3.028440520757669, "grad_norm": 0.09992218017578125, "learning_rate": 1.5492478565619316e-05, "step": 54200 }, { "embedding_loss": 0.0154, "epoch": 3.031234285075711, "grad_norm": 0.039403725415468216, "learning_rate": 1.5486270200468113e-05, "step": 54250 }, { "embedding_loss": 0.0181, "epoch": 3.0340280493937533, "grad_norm": 0.11415615677833557, "learning_rate": 1.5480061835316907e-05, "step": 54300 }, { "embedding_loss": 0.0159, "epoch": 3.0368218137117955, "grad_norm": 0.1330028474330902, "learning_rate": 1.5473853470165704e-05, "step": 54350 }, { "embedding_loss": 0.0147, "epoch": 3.0396155780298373, "grad_norm": 0.08054976165294647, "learning_rate": 1.5467645105014498e-05, "step": 54400 }, { "embedding_loss": 0.0171, "epoch": 3.0424093423478795, "grad_norm": 0.07174434512853622, "learning_rate": 1.5461436739863292e-05, "step": 54450 }, { "embedding_loss": 0.0156, "epoch": 3.0452031066659218, "grad_norm": 0.6175737977027893, "learning_rate": 1.545522837471209e-05, "step": 54500 }, { "embedding_loss": 0.0191, "epoch": 3.047996870983964, "grad_norm": 0.1456877887248993, "learning_rate": 1.5449020009560883e-05, "step": 54550 }, { "embedding_loss": 0.017, "epoch": 3.050790635302006, "grad_norm": 0.05615125969052315, "learning_rate": 1.544281164440968e-05, "step": 54600 }, { "embedding_loss": 0.0162, "epoch": 3.053584399620048, "grad_norm": 0.08139850944280624, "learning_rate": 1.5436603279258474e-05, "step": 54650 }, { "embedding_loss": 0.0164, "epoch": 3.0563781639380903, "grad_norm": 0.09070863574743271, "learning_rate": 1.5430394914107268e-05, "step": 54700 }, { "embedding_loss": 0.0159, "epoch": 3.0591719282561325, "grad_norm": 0.07674960047006607, "learning_rate": 1.5424186548956065e-05, "step": 54750 }, { "embedding_loss": 0.0158, "epoch": 3.0619656925741743, "grad_norm": 0.0786769762635231, "learning_rate": 1.541797818380486e-05, "step": 54800 }, { "embedding_loss": 0.0162, "epoch": 3.0647594568922165, "grad_norm": 0.0890473872423172, "learning_rate": 1.5411769818653656e-05, "step": 54850 }, { "embedding_loss": 0.0171, "epoch": 3.0675532212102588, "grad_norm": 0.07106131315231323, "learning_rate": 1.540556145350245e-05, "step": 54900 }, { "embedding_loss": 0.0156, "epoch": 3.070346985528301, "grad_norm": 0.2797349989414215, "learning_rate": 1.5399353088351244e-05, "step": 54950 }, { "embedding_loss": 0.0156, "epoch": 3.073140749846343, "grad_norm": 0.08088713884353638, "learning_rate": 1.539314472320004e-05, "step": 55000 }, { "embedding_loss": 0.0151, "epoch": 3.075934514164385, "grad_norm": 0.11672069877386093, "learning_rate": 1.5386936358048835e-05, "step": 55050 }, { "embedding_loss": 0.0156, "epoch": 3.0787282784824272, "grad_norm": 0.21230578422546387, "learning_rate": 1.5380727992897632e-05, "step": 55100 }, { "embedding_loss": 0.0174, "epoch": 3.0815220428004695, "grad_norm": 0.056653883308172226, "learning_rate": 1.5374519627746426e-05, "step": 55150 }, { "embedding_loss": 0.0151, "epoch": 3.0843158071185113, "grad_norm": 0.06914689391851425, "learning_rate": 1.536831126259522e-05, "step": 55200 }, { "embedding_loss": 0.0173, "epoch": 3.0871095714365535, "grad_norm": 0.07307954132556915, "learning_rate": 1.5362102897444017e-05, "step": 55250 }, { "embedding_loss": 0.0164, "epoch": 3.0899033357545957, "grad_norm": 0.06976005434989929, "learning_rate": 1.535589453229281e-05, "step": 55300 }, { "embedding_loss": 0.017, "epoch": 3.092697100072638, "grad_norm": 0.07621049880981445, "learning_rate": 1.5349686167141608e-05, "step": 55350 }, { "embedding_loss": 0.0185, "epoch": 3.0954908643906798, "grad_norm": 0.04087832570075989, "learning_rate": 1.5343477801990402e-05, "step": 55400 }, { "embedding_loss": 0.016, "epoch": 3.098284628708722, "grad_norm": 0.10743864625692368, "learning_rate": 1.53372694368392e-05, "step": 55450 }, { "embedding_loss": 0.0153, "epoch": 3.1010783930267642, "grad_norm": 0.13710716366767883, "learning_rate": 1.5331061071687993e-05, "step": 55500 }, { "embedding_loss": 0.0158, "epoch": 3.1038721573448065, "grad_norm": 0.12220486253499985, "learning_rate": 1.5324852706536787e-05, "step": 55550 }, { "embedding_loss": 0.0174, "epoch": 3.1066659216628487, "grad_norm": 0.09597012400627136, "learning_rate": 1.5318644341385584e-05, "step": 55600 }, { "embedding_loss": 0.0156, "epoch": 3.1094596859808905, "grad_norm": 0.09638319164514542, "learning_rate": 1.5312435976234378e-05, "step": 55650 }, { "embedding_loss": 0.0152, "epoch": 3.1122534502989327, "grad_norm": 0.09635279327630997, "learning_rate": 1.5306227611083175e-05, "step": 55700 }, { "embedding_loss": 0.0168, "epoch": 3.115047214616975, "grad_norm": 0.10007292032241821, "learning_rate": 1.530001924593197e-05, "step": 55750 }, { "embedding_loss": 0.0159, "epoch": 3.117840978935017, "grad_norm": 0.07540718466043472, "learning_rate": 1.5293810880780766e-05, "step": 55800 }, { "embedding_loss": 0.0158, "epoch": 3.120634743253059, "grad_norm": 0.09931398928165436, "learning_rate": 1.528760251562956e-05, "step": 55850 }, { "embedding_loss": 0.0148, "epoch": 3.1234285075711012, "grad_norm": 0.09706474095582962, "learning_rate": 1.5281394150478353e-05, "step": 55900 }, { "embedding_loss": 0.014, "epoch": 3.1262222718891435, "grad_norm": 0.07098235934972763, "learning_rate": 1.527518578532715e-05, "step": 55950 }, { "embedding_loss": 0.0177, "epoch": 3.1290160362071857, "grad_norm": 0.07582474499940872, "learning_rate": 1.5268977420175945e-05, "step": 56000 }, { "embedding_loss": 0.017, "epoch": 3.1318098005252275, "grad_norm": 0.09147030860185623, "learning_rate": 1.5262769055024742e-05, "step": 56050 }, { "embedding_loss": 0.0168, "epoch": 3.1346035648432697, "grad_norm": 0.0812092125415802, "learning_rate": 1.5256560689873536e-05, "step": 56100 }, { "embedding_loss": 0.0179, "epoch": 3.137397329161312, "grad_norm": 0.08061935007572174, "learning_rate": 1.5250352324722333e-05, "step": 56150 }, { "embedding_loss": 0.014, "epoch": 3.140191093479354, "grad_norm": 0.09138236939907074, "learning_rate": 1.5244143959571127e-05, "step": 56200 }, { "embedding_loss": 0.0163, "epoch": 3.142984857797396, "grad_norm": 0.05458563566207886, "learning_rate": 1.5237935594419924e-05, "step": 56250 }, { "embedding_loss": 0.0155, "epoch": 3.1457786221154382, "grad_norm": 0.15696491301059723, "learning_rate": 1.5231727229268718e-05, "step": 56300 }, { "embedding_loss": 0.0173, "epoch": 3.1485723864334805, "grad_norm": 0.08484967052936554, "learning_rate": 1.5225518864117511e-05, "step": 56350 }, { "embedding_loss": 0.017, "epoch": 3.1513661507515227, "grad_norm": 0.044692397117614746, "learning_rate": 1.5219310498966309e-05, "step": 56400 }, { "embedding_loss": 0.0153, "epoch": 3.154159915069565, "grad_norm": 0.06900777667760849, "learning_rate": 1.5213102133815102e-05, "step": 56450 }, { "embedding_loss": 0.0156, "epoch": 3.1569536793876067, "grad_norm": 0.06301694363355637, "learning_rate": 1.52068937686639e-05, "step": 56500 }, { "embedding_loss": 0.0158, "epoch": 3.159747443705649, "grad_norm": 0.09797124564647675, "learning_rate": 1.5200685403512693e-05, "step": 56550 }, { "embedding_loss": 0.0174, "epoch": 3.162541208023691, "grad_norm": 0.07383287698030472, "learning_rate": 1.519447703836149e-05, "step": 56600 }, { "embedding_loss": 0.0184, "epoch": 3.1653349723417334, "grad_norm": 0.06480072438716888, "learning_rate": 1.5188268673210284e-05, "step": 56650 }, { "embedding_loss": 0.0153, "epoch": 3.1681287366597752, "grad_norm": 0.06945301592350006, "learning_rate": 1.5182060308059082e-05, "step": 56700 }, { "embedding_loss": 0.0151, "epoch": 3.1709225009778175, "grad_norm": 0.14317810535430908, "learning_rate": 1.5175851942907876e-05, "step": 56750 }, { "embedding_loss": 0.0158, "epoch": 3.1737162652958597, "grad_norm": 0.06953635066747665, "learning_rate": 1.516964357775667e-05, "step": 56800 }, { "embedding_loss": 0.0168, "epoch": 3.176510029613902, "grad_norm": 0.06871345639228821, "learning_rate": 1.5163435212605467e-05, "step": 56850 }, { "embedding_loss": 0.0152, "epoch": 3.1793037939319437, "grad_norm": 0.13017289340496063, "learning_rate": 1.515722684745426e-05, "step": 56900 }, { "embedding_loss": 0.0163, "epoch": 3.182097558249986, "grad_norm": 0.08666317164897919, "learning_rate": 1.5151018482303058e-05, "step": 56950 }, { "embedding_loss": 0.0155, "epoch": 3.184891322568028, "grad_norm": 0.16238130629062653, "learning_rate": 1.5144810117151851e-05, "step": 57000 }, { "embedding_loss": 0.0171, "epoch": 3.1876850868860704, "grad_norm": 0.06403030455112457, "learning_rate": 1.5138601752000649e-05, "step": 57050 }, { "embedding_loss": 0.0164, "epoch": 3.1904788512041122, "grad_norm": 0.06301498413085938, "learning_rate": 1.5132393386849442e-05, "step": 57100 }, { "embedding_loss": 0.0165, "epoch": 3.1932726155221545, "grad_norm": 0.23764283955097198, "learning_rate": 1.5126185021698236e-05, "step": 57150 }, { "embedding_loss": 0.0157, "epoch": 3.1960663798401967, "grad_norm": 0.11183730512857437, "learning_rate": 1.5119976656547033e-05, "step": 57200 }, { "embedding_loss": 0.0184, "epoch": 3.198860144158239, "grad_norm": 0.10413751751184464, "learning_rate": 1.5113768291395827e-05, "step": 57250 }, { "embedding_loss": 0.0163, "epoch": 3.201653908476281, "grad_norm": 0.06358394026756287, "learning_rate": 1.5107559926244624e-05, "step": 57300 }, { "embedding_loss": 0.0182, "epoch": 3.204447672794323, "grad_norm": 0.07351323217153549, "learning_rate": 1.5101351561093418e-05, "step": 57350 }, { "embedding_loss": 0.0151, "epoch": 3.207241437112365, "grad_norm": 0.10386596620082855, "learning_rate": 1.5095143195942214e-05, "step": 57400 }, { "embedding_loss": 0.0161, "epoch": 3.2100352014304074, "grad_norm": 0.056265074759721756, "learning_rate": 1.508893483079101e-05, "step": 57450 }, { "embedding_loss": 0.0155, "epoch": 3.2128289657484492, "grad_norm": 0.1125960499048233, "learning_rate": 1.5082726465639805e-05, "step": 57500 }, { "embedding_loss": 0.0163, "epoch": 3.2156227300664915, "grad_norm": 0.11972513794898987, "learning_rate": 1.50765181004886e-05, "step": 57550 }, { "embedding_loss": 0.0195, "epoch": 3.2184164943845337, "grad_norm": 0.09047409892082214, "learning_rate": 1.5070309735337394e-05, "step": 57600 }, { "embedding_loss": 0.0157, "epoch": 3.221210258702576, "grad_norm": 0.06715775281190872, "learning_rate": 1.506410137018619e-05, "step": 57650 }, { "embedding_loss": 0.0174, "epoch": 3.224004023020618, "grad_norm": 0.2761251926422119, "learning_rate": 1.5057893005034985e-05, "step": 57700 }, { "embedding_loss": 0.0167, "epoch": 3.22679778733866, "grad_norm": 0.10870678722858429, "learning_rate": 1.505168463988378e-05, "step": 57750 }, { "embedding_loss": 0.017, "epoch": 3.229591551656702, "grad_norm": 0.08178596198558807, "learning_rate": 1.5045476274732576e-05, "step": 57800 }, { "embedding_loss": 0.0153, "epoch": 3.2323853159747444, "grad_norm": 0.049840763211250305, "learning_rate": 1.5039267909581372e-05, "step": 57850 }, { "embedding_loss": 0.0164, "epoch": 3.2351790802927867, "grad_norm": 0.10061626881361008, "learning_rate": 1.5033059544430166e-05, "step": 57900 }, { "embedding_loss": 0.015, "epoch": 3.2379728446108285, "grad_norm": 0.10133026540279388, "learning_rate": 1.5026851179278961e-05, "step": 57950 }, { "embedding_loss": 0.0156, "epoch": 3.2407666089288707, "grad_norm": 0.11228039115667343, "learning_rate": 1.5020642814127757e-05, "step": 58000 }, { "embedding_loss": 0.0158, "epoch": 3.243560373246913, "grad_norm": 0.11657606810331345, "learning_rate": 1.5014434448976552e-05, "step": 58050 }, { "embedding_loss": 0.0148, "epoch": 3.246354137564955, "grad_norm": 0.1213662326335907, "learning_rate": 1.5008226083825348e-05, "step": 58100 }, { "embedding_loss": 0.0167, "epoch": 3.249147901882997, "grad_norm": 0.09607242047786713, "learning_rate": 1.5002017718674141e-05, "step": 58150 }, { "embedding_loss": 0.0146, "epoch": 3.251941666201039, "grad_norm": 0.07084544748067856, "learning_rate": 1.4995809353522939e-05, "step": 58200 }, { "embedding_loss": 0.0156, "epoch": 3.2547354305190814, "grad_norm": 0.11904755234718323, "learning_rate": 1.4989600988371732e-05, "step": 58250 }, { "embedding_loss": 0.0149, "epoch": 3.2575291948371237, "grad_norm": 0.05242356285452843, "learning_rate": 1.498339262322053e-05, "step": 58300 }, { "embedding_loss": 0.016, "epoch": 3.2603229591551655, "grad_norm": 0.37999510765075684, "learning_rate": 1.4977184258069324e-05, "step": 58350 }, { "embedding_loss": 0.0154, "epoch": 3.2631167234732077, "grad_norm": 0.036752067506313324, "learning_rate": 1.4970975892918117e-05, "step": 58400 }, { "embedding_loss": 0.0154, "epoch": 3.26591048779125, "grad_norm": 0.06058724597096443, "learning_rate": 1.4964767527766915e-05, "step": 58450 }, { "embedding_loss": 0.0159, "epoch": 3.268704252109292, "grad_norm": 0.03217652440071106, "learning_rate": 1.4958559162615708e-05, "step": 58500 }, { "embedding_loss": 0.0173, "epoch": 3.2714980164273344, "grad_norm": 0.09305162727832794, "learning_rate": 1.4952350797464506e-05, "step": 58550 }, { "embedding_loss": 0.0148, "epoch": 3.274291780745376, "grad_norm": 0.10874555259943008, "learning_rate": 1.49461424323133e-05, "step": 58600 }, { "embedding_loss": 0.0154, "epoch": 3.2770855450634184, "grad_norm": 0.08733858168125153, "learning_rate": 1.4939934067162097e-05, "step": 58650 }, { "embedding_loss": 0.0173, "epoch": 3.2798793093814607, "grad_norm": 0.16378502547740936, "learning_rate": 1.493372570201089e-05, "step": 58700 }, { "embedding_loss": 0.0154, "epoch": 3.282673073699503, "grad_norm": 0.050814948976039886, "learning_rate": 1.4927517336859688e-05, "step": 58750 }, { "embedding_loss": 0.0144, "epoch": 3.2854668380175447, "grad_norm": 0.06963200867176056, "learning_rate": 1.4921308971708481e-05, "step": 58800 }, { "embedding_loss": 0.0154, "epoch": 3.288260602335587, "grad_norm": 0.06667467206716537, "learning_rate": 1.4915100606557275e-05, "step": 58850 }, { "embedding_loss": 0.0181, "epoch": 3.291054366653629, "grad_norm": 0.08284938335418701, "learning_rate": 1.4908892241406072e-05, "step": 58900 }, { "embedding_loss": 0.0144, "epoch": 3.2938481309716714, "grad_norm": 0.06747850030660629, "learning_rate": 1.4902683876254866e-05, "step": 58950 }, { "embedding_loss": 0.0148, "epoch": 3.296641895289713, "grad_norm": 0.06907215714454651, "learning_rate": 1.4896475511103664e-05, "step": 59000 }, { "embedding_loss": 0.0157, "epoch": 3.2994356596077554, "grad_norm": 0.04683968052268028, "learning_rate": 1.4890267145952457e-05, "step": 59050 }, { "embedding_loss": 0.0186, "epoch": 3.3022294239257977, "grad_norm": 0.0757768452167511, "learning_rate": 1.4884058780801255e-05, "step": 59100 }, { "embedding_loss": 0.0148, "epoch": 3.30502318824384, "grad_norm": 0.10357359051704407, "learning_rate": 1.4877850415650048e-05, "step": 59150 }, { "embedding_loss": 0.0163, "epoch": 3.3078169525618817, "grad_norm": 0.09822243452072144, "learning_rate": 1.4871642050498842e-05, "step": 59200 }, { "embedding_loss": 0.0167, "epoch": 3.310610716879924, "grad_norm": 0.09923592209815979, "learning_rate": 1.486543368534764e-05, "step": 59250 }, { "embedding_loss": 0.0152, "epoch": 3.313404481197966, "grad_norm": 0.05795478820800781, "learning_rate": 1.4859225320196433e-05, "step": 59300 }, { "embedding_loss": 0.0162, "epoch": 3.3161982455160084, "grad_norm": 0.08099842071533203, "learning_rate": 1.485301695504523e-05, "step": 59350 }, { "embedding_loss": 0.0159, "epoch": 3.3189920098340506, "grad_norm": 0.0876547172665596, "learning_rate": 1.4846808589894024e-05, "step": 59400 }, { "embedding_loss": 0.0163, "epoch": 3.3217857741520924, "grad_norm": 0.09747115522623062, "learning_rate": 1.484060022474282e-05, "step": 59450 }, { "embedding_loss": 0.0159, "epoch": 3.3245795384701347, "grad_norm": 0.14204560220241547, "learning_rate": 1.4834391859591615e-05, "step": 59500 }, { "embedding_loss": 0.0148, "epoch": 3.327373302788177, "grad_norm": 0.10611604154109955, "learning_rate": 1.482818349444041e-05, "step": 59550 }, { "embedding_loss": 0.0173, "epoch": 3.3301670671062187, "grad_norm": 0.09643644094467163, "learning_rate": 1.4821975129289206e-05, "step": 59600 }, { "embedding_loss": 0.0157, "epoch": 3.332960831424261, "grad_norm": 0.07572575658559799, "learning_rate": 1.4815766764138e-05, "step": 59650 }, { "embedding_loss": 0.0163, "epoch": 3.335754595742303, "grad_norm": 0.08681756258010864, "learning_rate": 1.4809558398986796e-05, "step": 59700 }, { "embedding_loss": 0.0193, "epoch": 3.3385483600603454, "grad_norm": 0.1280823051929474, "learning_rate": 1.4803350033835591e-05, "step": 59750 }, { "embedding_loss": 0.017, "epoch": 3.3413421243783876, "grad_norm": 0.03547672554850578, "learning_rate": 1.4797141668684387e-05, "step": 59800 }, { "embedding_loss": 0.0165, "epoch": 3.3441358886964294, "grad_norm": 0.08627686649560928, "learning_rate": 1.4790933303533182e-05, "step": 59850 }, { "embedding_loss": 0.0164, "epoch": 3.3469296530144717, "grad_norm": 0.06184742972254753, "learning_rate": 1.4784724938381978e-05, "step": 59900 }, { "embedding_loss": 0.0158, "epoch": 3.349723417332514, "grad_norm": 0.12432778626680374, "learning_rate": 1.4778516573230772e-05, "step": 59950 }, { "embedding_loss": 0.0162, "epoch": 3.352517181650556, "grad_norm": 0.07285565137863159, "learning_rate": 1.4772308208079567e-05, "step": 60000 }, { "embedding_loss": 0.0185, "epoch": 3.355310945968598, "grad_norm": 0.09606505185365677, "learning_rate": 1.4766099842928363e-05, "step": 60050 }, { "embedding_loss": 0.0168, "epoch": 3.35810471028664, "grad_norm": 0.103633351624012, "learning_rate": 1.4759891477777158e-05, "step": 60100 }, { "embedding_loss": 0.0163, "epoch": 3.3608984746046824, "grad_norm": 0.043925948441028595, "learning_rate": 1.4753683112625954e-05, "step": 60150 }, { "embedding_loss": 0.0165, "epoch": 3.3636922389227246, "grad_norm": 0.08759208768606186, "learning_rate": 1.4747474747474747e-05, "step": 60200 }, { "embedding_loss": 0.0155, "epoch": 3.366486003240767, "grad_norm": 0.12016693502664566, "learning_rate": 1.4741266382323545e-05, "step": 60250 }, { "embedding_loss": 0.0161, "epoch": 3.3692797675588086, "grad_norm": 0.07255938649177551, "learning_rate": 1.4735058017172338e-05, "step": 60300 }, { "embedding_loss": 0.0155, "epoch": 3.372073531876851, "grad_norm": 0.08322722464799881, "learning_rate": 1.4728849652021136e-05, "step": 60350 }, { "embedding_loss": 0.0168, "epoch": 3.374867296194893, "grad_norm": 0.1419384926557541, "learning_rate": 1.472264128686993e-05, "step": 60400 }, { "embedding_loss": 0.0153, "epoch": 3.377661060512935, "grad_norm": 0.059970781207084656, "learning_rate": 1.4716432921718723e-05, "step": 60450 }, { "embedding_loss": 0.0148, "epoch": 3.380454824830977, "grad_norm": 0.04130103439092636, "learning_rate": 1.471022455656752e-05, "step": 60500 }, { "embedding_loss": 0.0164, "epoch": 3.3832485891490194, "grad_norm": 0.13637390732765198, "learning_rate": 1.4704016191416314e-05, "step": 60550 }, { "embedding_loss": 0.0167, "epoch": 3.3860423534670616, "grad_norm": 0.09652264416217804, "learning_rate": 1.4697807826265111e-05, "step": 60600 }, { "embedding_loss": 0.0166, "epoch": 3.388836117785104, "grad_norm": 0.08826347440481186, "learning_rate": 1.4691599461113905e-05, "step": 60650 }, { "embedding_loss": 0.0156, "epoch": 3.3916298821031456, "grad_norm": 0.07587704807519913, "learning_rate": 1.4685391095962703e-05, "step": 60700 }, { "embedding_loss": 0.0147, "epoch": 3.394423646421188, "grad_norm": 0.08993706107139587, "learning_rate": 1.4679182730811496e-05, "step": 60750 }, { "embedding_loss": 0.015, "epoch": 3.39721741073923, "grad_norm": 0.05457685887813568, "learning_rate": 1.4672974365660294e-05, "step": 60800 }, { "embedding_loss": 0.0152, "epoch": 3.4000111750572724, "grad_norm": 0.1418287456035614, "learning_rate": 1.4666766000509087e-05, "step": 60850 }, { "embedding_loss": 0.016, "epoch": 3.402804939375314, "grad_norm": 0.11760576069355011, "learning_rate": 1.4660557635357881e-05, "step": 60900 }, { "embedding_loss": 0.0139, "epoch": 3.4055987036933564, "grad_norm": 0.08234357088804245, "learning_rate": 1.4654349270206678e-05, "step": 60950 }, { "embedding_loss": 0.0167, "epoch": 3.4083924680113986, "grad_norm": 0.1018122136592865, "learning_rate": 1.4648140905055472e-05, "step": 61000 }, { "embedding_loss": 0.0161, "epoch": 3.411186232329441, "grad_norm": 0.047082170844078064, "learning_rate": 1.464193253990427e-05, "step": 61050 }, { "embedding_loss": 0.0149, "epoch": 3.4139799966474826, "grad_norm": 0.06977860629558563, "learning_rate": 1.4635724174753063e-05, "step": 61100 }, { "embedding_loss": 0.0165, "epoch": 3.416773760965525, "grad_norm": 0.22472332417964935, "learning_rate": 1.462951580960186e-05, "step": 61150 }, { "embedding_loss": 0.0159, "epoch": 3.419567525283567, "grad_norm": 0.08405973017215729, "learning_rate": 1.4623307444450654e-05, "step": 61200 }, { "embedding_loss": 0.0182, "epoch": 3.4223612896016093, "grad_norm": 0.15756012499332428, "learning_rate": 1.4617099079299448e-05, "step": 61250 }, { "embedding_loss": 0.0169, "epoch": 3.425155053919651, "grad_norm": 0.09220718592405319, "learning_rate": 1.4610890714148245e-05, "step": 61300 }, { "embedding_loss": 0.0169, "epoch": 3.4279488182376934, "grad_norm": 0.0937676653265953, "learning_rate": 1.4604682348997039e-05, "step": 61350 }, { "embedding_loss": 0.017, "epoch": 3.4307425825557356, "grad_norm": 0.13955208659172058, "learning_rate": 1.4598473983845836e-05, "step": 61400 }, { "embedding_loss": 0.0165, "epoch": 3.433536346873778, "grad_norm": 0.08823235332965851, "learning_rate": 1.459226561869463e-05, "step": 61450 }, { "embedding_loss": 0.0165, "epoch": 3.43633011119182, "grad_norm": 0.06962257623672485, "learning_rate": 1.4586057253543426e-05, "step": 61500 }, { "embedding_loss": 0.0172, "epoch": 3.439123875509862, "grad_norm": 0.10434354096651077, "learning_rate": 1.4579848888392221e-05, "step": 61550 }, { "embedding_loss": 0.0176, "epoch": 3.441917639827904, "grad_norm": 4.944464683532715, "learning_rate": 1.4573640523241017e-05, "step": 61600 }, { "embedding_loss": 0.0165, "epoch": 3.4447114041459463, "grad_norm": 0.14361931383609772, "learning_rate": 1.4567432158089812e-05, "step": 61650 }, { "embedding_loss": 0.016, "epoch": 3.447505168463988, "grad_norm": 0.08393078297376633, "learning_rate": 1.4561223792938606e-05, "step": 61700 }, { "embedding_loss": 0.0144, "epoch": 3.4502989327820304, "grad_norm": 0.06295778602361679, "learning_rate": 1.4555015427787402e-05, "step": 61750 }, { "embedding_loss": 0.0156, "epoch": 3.4530926971000726, "grad_norm": 0.06394372880458832, "learning_rate": 1.4548807062636197e-05, "step": 61800 }, { "embedding_loss": 0.0172, "epoch": 3.455886461418115, "grad_norm": 0.058186259120702744, "learning_rate": 1.4542598697484993e-05, "step": 61850 }, { "embedding_loss": 0.0152, "epoch": 3.458680225736157, "grad_norm": 0.0712067261338234, "learning_rate": 1.4536390332333788e-05, "step": 61900 }, { "embedding_loss": 0.0185, "epoch": 3.461473990054199, "grad_norm": 0.0558539517223835, "learning_rate": 1.4530181967182584e-05, "step": 61950 }, { "embedding_loss": 0.0163, "epoch": 3.464267754372241, "grad_norm": 0.05702478066086769, "learning_rate": 1.4523973602031377e-05, "step": 62000 }, { "embedding_loss": 0.016, "epoch": 3.4670615186902833, "grad_norm": 0.08504299819469452, "learning_rate": 1.4517765236880173e-05, "step": 62050 }, { "embedding_loss": 0.0153, "epoch": 3.4698552830083256, "grad_norm": 0.10056797415018082, "learning_rate": 1.4511556871728968e-05, "step": 62100 }, { "embedding_loss": 0.0156, "epoch": 3.4726490473263674, "grad_norm": 0.10793804377317429, "learning_rate": 1.4505348506577764e-05, "step": 62150 }, { "embedding_loss": 0.0163, "epoch": 3.4754428116444096, "grad_norm": 0.2053692787885666, "learning_rate": 1.449914014142656e-05, "step": 62200 }, { "embedding_loss": 0.0145, "epoch": 3.478236575962452, "grad_norm": 0.08849647641181946, "learning_rate": 1.4492931776275353e-05, "step": 62250 }, { "embedding_loss": 0.015, "epoch": 3.481030340280494, "grad_norm": 0.05130087584257126, "learning_rate": 1.448672341112415e-05, "step": 62300 }, { "embedding_loss": 0.0173, "epoch": 3.4838241045985363, "grad_norm": 0.05412415415048599, "learning_rate": 1.4480515045972944e-05, "step": 62350 }, { "embedding_loss": 0.016, "epoch": 3.486617868916578, "grad_norm": 0.08849117904901505, "learning_rate": 1.4474306680821742e-05, "step": 62400 }, { "embedding_loss": 0.0173, "epoch": 3.4894116332346203, "grad_norm": 0.1450786143541336, "learning_rate": 1.4468098315670535e-05, "step": 62450 }, { "embedding_loss": 0.017, "epoch": 3.4922053975526626, "grad_norm": 0.09223490208387375, "learning_rate": 1.446188995051933e-05, "step": 62500 }, { "embedding_loss": 0.0151, "epoch": 3.4949991618707044, "grad_norm": 0.1490708440542221, "learning_rate": 1.4455681585368126e-05, "step": 62550 }, { "embedding_loss": 0.0159, "epoch": 3.4977929261887466, "grad_norm": 0.10164310783147812, "learning_rate": 1.444947322021692e-05, "step": 62600 }, { "embedding_loss": 0.0169, "epoch": 3.500586690506789, "grad_norm": 0.09819135814905167, "learning_rate": 1.4443264855065717e-05, "step": 62650 }, { "embedding_loss": 0.0151, "epoch": 3.503380454824831, "grad_norm": 0.06607767939567566, "learning_rate": 1.4437056489914511e-05, "step": 62700 }, { "embedding_loss": 0.0159, "epoch": 3.5061742191428733, "grad_norm": 0.11521059274673462, "learning_rate": 1.4430848124763308e-05, "step": 62750 }, { "embedding_loss": 0.0174, "epoch": 3.508967983460915, "grad_norm": 0.1479118913412094, "learning_rate": 1.4424639759612102e-05, "step": 62800 }, { "embedding_loss": 0.0167, "epoch": 3.5117617477789573, "grad_norm": 0.12523439526557922, "learning_rate": 1.44184313944609e-05, "step": 62850 }, { "embedding_loss": 0.0157, "epoch": 3.5145555120969996, "grad_norm": 0.0652926042675972, "learning_rate": 1.4412223029309693e-05, "step": 62900 }, { "embedding_loss": 0.0195, "epoch": 3.5173492764150414, "grad_norm": 0.10129573196172714, "learning_rate": 1.4406014664158487e-05, "step": 62950 }, { "embedding_loss": 0.0146, "epoch": 3.5201430407330836, "grad_norm": 0.05766969919204712, "learning_rate": 1.4399806299007284e-05, "step": 63000 }, { "embedding_loss": 0.0155, "epoch": 3.522936805051126, "grad_norm": 0.06131678447127342, "learning_rate": 1.4393597933856078e-05, "step": 63050 }, { "embedding_loss": 0.015, "epoch": 3.525730569369168, "grad_norm": 0.061930689960718155, "learning_rate": 1.4387389568704875e-05, "step": 63100 }, { "embedding_loss": 0.0154, "epoch": 3.5285243336872103, "grad_norm": 2.4971840381622314, "learning_rate": 1.438118120355367e-05, "step": 63150 }, { "embedding_loss": 0.0163, "epoch": 3.5313180980052525, "grad_norm": 0.116965651512146, "learning_rate": 1.4374972838402466e-05, "step": 63200 }, { "embedding_loss": 0.0157, "epoch": 3.5341118623232943, "grad_norm": 0.07760772854089737, "learning_rate": 1.436876447325126e-05, "step": 63250 }, { "embedding_loss": 0.017, "epoch": 3.5369056266413366, "grad_norm": 0.15855468809604645, "learning_rate": 1.4362556108100054e-05, "step": 63300 }, { "embedding_loss": 0.0171, "epoch": 3.539699390959379, "grad_norm": 0.08224745094776154, "learning_rate": 1.4356347742948851e-05, "step": 63350 }, { "embedding_loss": 0.0137, "epoch": 3.5424931552774206, "grad_norm": 0.07378239184617996, "learning_rate": 1.4350139377797645e-05, "step": 63400 }, { "embedding_loss": 0.0151, "epoch": 3.545286919595463, "grad_norm": 0.05753139778971672, "learning_rate": 1.4343931012646442e-05, "step": 63450 }, { "embedding_loss": 0.0165, "epoch": 3.548080683913505, "grad_norm": 0.10862725973129272, "learning_rate": 1.4337722647495236e-05, "step": 63500 }, { "embedding_loss": 0.0173, "epoch": 3.5508744482315473, "grad_norm": 0.08160730451345444, "learning_rate": 1.4331514282344032e-05, "step": 63550 }, { "embedding_loss": 0.0174, "epoch": 3.5536682125495895, "grad_norm": 0.07029294967651367, "learning_rate": 1.4325305917192827e-05, "step": 63600 }, { "embedding_loss": 0.0157, "epoch": 3.5564619768676313, "grad_norm": 0.07000797241926193, "learning_rate": 1.4319097552041623e-05, "step": 63650 }, { "embedding_loss": 0.0172, "epoch": 3.5592557411856736, "grad_norm": 0.1455189734697342, "learning_rate": 1.4312889186890418e-05, "step": 63700 }, { "embedding_loss": 0.0171, "epoch": 3.562049505503716, "grad_norm": 0.0435003861784935, "learning_rate": 1.4306680821739212e-05, "step": 63750 }, { "embedding_loss": 0.0157, "epoch": 3.5648432698217576, "grad_norm": 0.05767713859677315, "learning_rate": 1.4300472456588007e-05, "step": 63800 }, { "embedding_loss": 0.0177, "epoch": 3.5676370341398, "grad_norm": 0.08042215555906296, "learning_rate": 1.4294264091436803e-05, "step": 63850 }, { "embedding_loss": 0.0161, "epoch": 3.570430798457842, "grad_norm": 0.08446308970451355, "learning_rate": 1.4288055726285599e-05, "step": 63900 }, { "embedding_loss": 0.0173, "epoch": 3.5732245627758843, "grad_norm": 0.050334736704826355, "learning_rate": 1.4281847361134394e-05, "step": 63950 }, { "embedding_loss": 0.0177, "epoch": 3.5760183270939265, "grad_norm": 0.0630493089556694, "learning_rate": 1.427563899598319e-05, "step": 64000 }, { "embedding_loss": 0.0154, "epoch": 3.5788120914119683, "grad_norm": 1.8067386150360107, "learning_rate": 1.4269430630831983e-05, "step": 64050 }, { "embedding_loss": 0.0149, "epoch": 3.5816058557300106, "grad_norm": 0.035506390035152435, "learning_rate": 1.4263222265680779e-05, "step": 64100 }, { "embedding_loss": 0.0167, "epoch": 3.584399620048053, "grad_norm": 0.0448855422437191, "learning_rate": 1.4257013900529574e-05, "step": 64150 }, { "embedding_loss": 0.0155, "epoch": 3.587193384366095, "grad_norm": 0.08290600776672363, "learning_rate": 1.425080553537837e-05, "step": 64200 }, { "embedding_loss": 0.0147, "epoch": 3.589987148684137, "grad_norm": 0.061873432248830795, "learning_rate": 1.4244597170227165e-05, "step": 64250 }, { "embedding_loss": 0.0168, "epoch": 3.592780913002179, "grad_norm": 0.11671735346317291, "learning_rate": 1.423838880507596e-05, "step": 64300 }, { "embedding_loss": 0.0163, "epoch": 3.5955746773202213, "grad_norm": 0.1029561311006546, "learning_rate": 1.4232180439924756e-05, "step": 64350 }, { "embedding_loss": 0.0158, "epoch": 3.5983684416382635, "grad_norm": 0.04080566018819809, "learning_rate": 1.422597207477355e-05, "step": 64400 }, { "embedding_loss": 0.0177, "epoch": 3.6011622059563058, "grad_norm": 0.09417727589607239, "learning_rate": 1.4219763709622347e-05, "step": 64450 }, { "embedding_loss": 0.0155, "epoch": 3.6039559702743476, "grad_norm": 0.07031157612800598, "learning_rate": 1.4213555344471141e-05, "step": 64500 }, { "embedding_loss": 0.0167, "epoch": 3.60674973459239, "grad_norm": 0.09221218526363373, "learning_rate": 1.4207346979319935e-05, "step": 64550 }, { "embedding_loss": 0.0158, "epoch": 3.609543498910432, "grad_norm": 0.04208045452833176, "learning_rate": 1.4201138614168732e-05, "step": 64600 }, { "embedding_loss": 0.0167, "epoch": 3.612337263228474, "grad_norm": 0.032203152775764465, "learning_rate": 1.4194930249017526e-05, "step": 64650 }, { "embedding_loss": 0.0156, "epoch": 3.615131027546516, "grad_norm": 0.08906204253435135, "learning_rate": 1.4188721883866323e-05, "step": 64700 }, { "embedding_loss": 0.0184, "epoch": 3.6179247918645583, "grad_norm": 0.08762535452842712, "learning_rate": 1.4182513518715117e-05, "step": 64750 }, { "embedding_loss": 0.0169, "epoch": 3.6207185561826005, "grad_norm": 0.39351189136505127, "learning_rate": 1.4176305153563914e-05, "step": 64800 }, { "embedding_loss": 0.0163, "epoch": 3.6235123205006428, "grad_norm": 0.05992594361305237, "learning_rate": 1.4170096788412708e-05, "step": 64850 }, { "embedding_loss": 0.0157, "epoch": 3.6263060848186845, "grad_norm": 0.0703836977481842, "learning_rate": 1.4163888423261505e-05, "step": 64900 }, { "embedding_loss": 0.0173, "epoch": 3.629099849136727, "grad_norm": 0.09270811080932617, "learning_rate": 1.41576800581103e-05, "step": 64950 }, { "embedding_loss": 0.0174, "epoch": 3.631893613454769, "grad_norm": 0.07941018790006638, "learning_rate": 1.4151471692959093e-05, "step": 65000 }, { "embedding_loss": 0.0167, "epoch": 3.634687377772811, "grad_norm": 0.04550434276461601, "learning_rate": 1.414526332780789e-05, "step": 65050 }, { "embedding_loss": 0.016, "epoch": 3.637481142090853, "grad_norm": 0.21682071685791016, "learning_rate": 1.4139054962656684e-05, "step": 65100 }, { "embedding_loss": 0.0153, "epoch": 3.6402749064088953, "grad_norm": 0.03774628788232803, "learning_rate": 1.4132846597505481e-05, "step": 65150 }, { "embedding_loss": 0.0178, "epoch": 3.6430686707269375, "grad_norm": 0.09739317744970322, "learning_rate": 1.4126638232354275e-05, "step": 65200 }, { "embedding_loss": 0.0154, "epoch": 3.6458624350449798, "grad_norm": 0.07554320991039276, "learning_rate": 1.4120429867203072e-05, "step": 65250 }, { "embedding_loss": 0.0148, "epoch": 3.648656199363022, "grad_norm": 0.08303197473287582, "learning_rate": 1.4114221502051866e-05, "step": 65300 }, { "embedding_loss": 0.0179, "epoch": 3.651449963681064, "grad_norm": 0.07035436481237411, "learning_rate": 1.410801313690066e-05, "step": 65350 }, { "embedding_loss": 0.0153, "epoch": 3.654243727999106, "grad_norm": 0.10287481546401978, "learning_rate": 1.4101804771749457e-05, "step": 65400 }, { "embedding_loss": 0.0147, "epoch": 3.6570374923171483, "grad_norm": 0.14734047651290894, "learning_rate": 1.4095596406598251e-05, "step": 65450 }, { "embedding_loss": 0.0169, "epoch": 3.65983125663519, "grad_norm": 0.04035856947302818, "learning_rate": 1.4089388041447048e-05, "step": 65500 }, { "embedding_loss": 0.0165, "epoch": 3.6626250209532323, "grad_norm": 0.08803558349609375, "learning_rate": 1.4083179676295842e-05, "step": 65550 }, { "embedding_loss": 0.0156, "epoch": 3.6654187852712745, "grad_norm": 0.11001813411712646, "learning_rate": 1.4076971311144638e-05, "step": 65600 }, { "embedding_loss": 0.0159, "epoch": 3.6682125495893168, "grad_norm": 0.08219823241233826, "learning_rate": 1.4070762945993433e-05, "step": 65650 }, { "embedding_loss": 0.0152, "epoch": 3.671006313907359, "grad_norm": 0.05899304151535034, "learning_rate": 1.4064554580842229e-05, "step": 65700 }, { "embedding_loss": 0.0155, "epoch": 3.673800078225401, "grad_norm": 0.06681883335113525, "learning_rate": 1.4058346215691024e-05, "step": 65750 }, { "embedding_loss": 0.0159, "epoch": 3.676593842543443, "grad_norm": 0.03789963573217392, "learning_rate": 1.4052137850539818e-05, "step": 65800 }, { "embedding_loss": 0.0189, "epoch": 3.6793876068614852, "grad_norm": 0.07150746136903763, "learning_rate": 1.4045929485388613e-05, "step": 65850 }, { "embedding_loss": 0.0159, "epoch": 3.682181371179527, "grad_norm": 0.062368135899305344, "learning_rate": 1.4039721120237409e-05, "step": 65900 }, { "embedding_loss": 0.016, "epoch": 3.6849751354975693, "grad_norm": 0.06801111996173859, "learning_rate": 1.4033512755086204e-05, "step": 65950 }, { "embedding_loss": 0.0152, "epoch": 3.6877688998156115, "grad_norm": 0.10438065230846405, "learning_rate": 1.4027304389935e-05, "step": 66000 }, { "embedding_loss": 0.0158, "epoch": 3.6905626641336537, "grad_norm": 0.06117303669452667, "learning_rate": 1.4021096024783795e-05, "step": 66050 }, { "embedding_loss": 0.0172, "epoch": 3.693356428451696, "grad_norm": 0.09625016897916794, "learning_rate": 1.401488765963259e-05, "step": 66100 }, { "embedding_loss": 0.0148, "epoch": 3.696150192769738, "grad_norm": 0.12556912004947662, "learning_rate": 1.4008679294481385e-05, "step": 66150 }, { "embedding_loss": 0.0168, "epoch": 3.69894395708778, "grad_norm": 0.08077012747526169, "learning_rate": 1.400247092933018e-05, "step": 66200 }, { "embedding_loss": 0.0159, "epoch": 3.7017377214058222, "grad_norm": 0.09925184398889542, "learning_rate": 1.3996262564178976e-05, "step": 66250 }, { "embedding_loss": 0.017, "epoch": 3.7045314857238645, "grad_norm": 0.12260474264621735, "learning_rate": 1.3990054199027771e-05, "step": 66300 }, { "embedding_loss": 0.0161, "epoch": 3.7073252500419063, "grad_norm": 0.10741942375898361, "learning_rate": 1.3983845833876565e-05, "step": 66350 }, { "embedding_loss": 0.0166, "epoch": 3.7101190143599485, "grad_norm": 0.06796741485595703, "learning_rate": 1.3977637468725362e-05, "step": 66400 }, { "embedding_loss": 0.0161, "epoch": 3.7129127786779907, "grad_norm": 0.05678020790219307, "learning_rate": 1.3971429103574156e-05, "step": 66450 }, { "embedding_loss": 0.0146, "epoch": 3.715706542996033, "grad_norm": 0.032734666019678116, "learning_rate": 1.3965220738422953e-05, "step": 66500 }, { "embedding_loss": 0.0158, "epoch": 3.718500307314075, "grad_norm": 0.07617931067943573, "learning_rate": 1.3959012373271747e-05, "step": 66550 }, { "embedding_loss": 0.016, "epoch": 3.721294071632117, "grad_norm": 0.0860670730471611, "learning_rate": 1.3952804008120541e-05, "step": 66600 }, { "embedding_loss": 0.0169, "epoch": 3.7240878359501592, "grad_norm": 0.05067717283964157, "learning_rate": 1.3946595642969338e-05, "step": 66650 }, { "embedding_loss": 0.0175, "epoch": 3.7268816002682015, "grad_norm": 0.12581771612167358, "learning_rate": 1.3940387277818132e-05, "step": 66700 }, { "embedding_loss": 0.0151, "epoch": 3.7296753645862433, "grad_norm": 0.057834506034851074, "learning_rate": 1.393417891266693e-05, "step": 66750 }, { "embedding_loss": 0.0174, "epoch": 3.7324691289042855, "grad_norm": 0.06856948882341385, "learning_rate": 1.3927970547515723e-05, "step": 66800 }, { "embedding_loss": 0.0165, "epoch": 3.7352628932223277, "grad_norm": 0.056936249136924744, "learning_rate": 1.392176218236452e-05, "step": 66850 }, { "embedding_loss": 0.0163, "epoch": 3.73805665754037, "grad_norm": 0.06208829954266548, "learning_rate": 1.3915553817213314e-05, "step": 66900 }, { "embedding_loss": 0.0166, "epoch": 3.740850421858412, "grad_norm": 0.1363084316253662, "learning_rate": 1.3909345452062111e-05, "step": 66950 }, { "embedding_loss": 0.0173, "epoch": 3.743644186176454, "grad_norm": 0.0655842050909996, "learning_rate": 1.3903137086910905e-05, "step": 67000 }, { "embedding_loss": 0.0171, "epoch": 3.7464379504944962, "grad_norm": 0.07802671939134598, "learning_rate": 1.3896928721759699e-05, "step": 67050 }, { "embedding_loss": 0.0174, "epoch": 3.7492317148125385, "grad_norm": 0.07455764710903168, "learning_rate": 1.3890720356608496e-05, "step": 67100 }, { "embedding_loss": 0.0157, "epoch": 3.7520254791305807, "grad_norm": 0.08284629136323929, "learning_rate": 1.388451199145729e-05, "step": 67150 }, { "embedding_loss": 0.0166, "epoch": 3.7548192434486225, "grad_norm": 0.04045828804373741, "learning_rate": 1.3878303626306087e-05, "step": 67200 }, { "embedding_loss": 0.0161, "epoch": 3.7576130077666647, "grad_norm": 0.08045527338981628, "learning_rate": 1.3872095261154881e-05, "step": 67250 }, { "embedding_loss": 0.0167, "epoch": 3.760406772084707, "grad_norm": 0.05626504495739937, "learning_rate": 1.3865886896003678e-05, "step": 67300 }, { "embedding_loss": 0.0178, "epoch": 3.763200536402749, "grad_norm": 0.08056139200925827, "learning_rate": 1.3859678530852472e-05, "step": 67350 }, { "embedding_loss": 0.0159, "epoch": 3.7659943007207914, "grad_norm": 0.07757702469825745, "learning_rate": 1.3853470165701266e-05, "step": 67400 }, { "embedding_loss": 0.0162, "epoch": 3.7687880650388332, "grad_norm": 0.21029260754585266, "learning_rate": 1.3847261800550063e-05, "step": 67450 }, { "embedding_loss": 0.016, "epoch": 3.7715818293568755, "grad_norm": 0.054988693445920944, "learning_rate": 1.3841053435398857e-05, "step": 67500 }, { "embedding_loss": 0.0184, "epoch": 3.7743755936749177, "grad_norm": 0.036203619092702866, "learning_rate": 1.3834845070247654e-05, "step": 67550 }, { "embedding_loss": 0.0164, "epoch": 3.7771693579929595, "grad_norm": 0.025842688977718353, "learning_rate": 1.3828636705096448e-05, "step": 67600 }, { "embedding_loss": 0.0161, "epoch": 3.7799631223110017, "grad_norm": 0.06484280526638031, "learning_rate": 1.3822428339945243e-05, "step": 67650 }, { "embedding_loss": 0.0158, "epoch": 3.782756886629044, "grad_norm": 0.09591369330883026, "learning_rate": 1.3816219974794039e-05, "step": 67700 }, { "embedding_loss": 0.015, "epoch": 3.785550650947086, "grad_norm": 0.12167982012033463, "learning_rate": 1.3810011609642834e-05, "step": 67750 }, { "embedding_loss": 0.0176, "epoch": 3.7883444152651284, "grad_norm": 0.26147526502609253, "learning_rate": 1.380380324449163e-05, "step": 67800 }, { "embedding_loss": 0.0137, "epoch": 3.7911381795831702, "grad_norm": 0.06845135986804962, "learning_rate": 1.3797594879340424e-05, "step": 67850 }, { "embedding_loss": 0.0167, "epoch": 3.7939319439012125, "grad_norm": 0.09817326813936234, "learning_rate": 1.379138651418922e-05, "step": 67900 }, { "embedding_loss": 0.0152, "epoch": 3.7967257082192547, "grad_norm": 0.06010628491640091, "learning_rate": 1.3785178149038015e-05, "step": 67950 }, { "embedding_loss": 0.0172, "epoch": 3.7995194725372965, "grad_norm": 0.08783887326717377, "learning_rate": 1.377896978388681e-05, "step": 68000 }, { "embedding_loss": 0.0158, "epoch": 3.8023132368553387, "grad_norm": 0.04752453789114952, "learning_rate": 1.3772761418735606e-05, "step": 68050 }, { "embedding_loss": 0.0159, "epoch": 3.805107001173381, "grad_norm": 0.09436281770467758, "learning_rate": 1.3766553053584401e-05, "step": 68100 }, { "embedding_loss": 0.0165, "epoch": 3.807900765491423, "grad_norm": 0.05882437154650688, "learning_rate": 1.3760344688433195e-05, "step": 68150 }, { "embedding_loss": 0.0169, "epoch": 3.8106945298094654, "grad_norm": 0.14768095314502716, "learning_rate": 1.375413632328199e-05, "step": 68200 }, { "embedding_loss": 0.0156, "epoch": 3.8134882941275077, "grad_norm": 0.025641750544309616, "learning_rate": 1.3747927958130786e-05, "step": 68250 }, { "embedding_loss": 0.0162, "epoch": 3.8162820584455495, "grad_norm": 0.04619910940527916, "learning_rate": 1.3741719592979582e-05, "step": 68300 }, { "embedding_loss": 0.014, "epoch": 3.8190758227635917, "grad_norm": 0.05379455164074898, "learning_rate": 1.3735511227828377e-05, "step": 68350 }, { "embedding_loss": 0.0147, "epoch": 3.821869587081634, "grad_norm": 0.058710385113954544, "learning_rate": 1.3729302862677171e-05, "step": 68400 }, { "embedding_loss": 0.0176, "epoch": 3.8246633513996757, "grad_norm": 0.05300043523311615, "learning_rate": 1.3723094497525968e-05, "step": 68450 }, { "embedding_loss": 0.0155, "epoch": 3.827457115717718, "grad_norm": 0.0587935708463192, "learning_rate": 1.3716886132374762e-05, "step": 68500 }, { "embedding_loss": 0.0159, "epoch": 3.83025088003576, "grad_norm": 0.026540197432041168, "learning_rate": 1.371067776722356e-05, "step": 68550 }, { "embedding_loss": 0.0157, "epoch": 3.8330446443538024, "grad_norm": 0.10184885561466217, "learning_rate": 1.3704469402072353e-05, "step": 68600 }, { "embedding_loss": 0.0156, "epoch": 3.8358384086718447, "grad_norm": 0.05687949061393738, "learning_rate": 1.3698261036921147e-05, "step": 68650 }, { "embedding_loss": 0.0161, "epoch": 3.8386321729898865, "grad_norm": 0.0639205202460289, "learning_rate": 1.3692052671769944e-05, "step": 68700 }, { "embedding_loss": 0.0159, "epoch": 3.8414259373079287, "grad_norm": 0.09260307252407074, "learning_rate": 1.3685844306618738e-05, "step": 68750 }, { "embedding_loss": 0.0158, "epoch": 3.844219701625971, "grad_norm": 0.0718439444899559, "learning_rate": 1.3679635941467535e-05, "step": 68800 }, { "embedding_loss": 0.0168, "epoch": 3.8470134659440127, "grad_norm": 0.10800089687108994, "learning_rate": 1.3673427576316329e-05, "step": 68850 }, { "embedding_loss": 0.016, "epoch": 3.849807230262055, "grad_norm": 0.06061045452952385, "learning_rate": 1.3667219211165126e-05, "step": 68900 }, { "embedding_loss": 0.0142, "epoch": 3.852600994580097, "grad_norm": 0.10519209504127502, "learning_rate": 1.366101084601392e-05, "step": 68950 }, { "embedding_loss": 0.0156, "epoch": 3.8553947588981394, "grad_norm": 0.07216429710388184, "learning_rate": 1.3654802480862717e-05, "step": 69000 }, { "embedding_loss": 0.0168, "epoch": 3.8581885232161817, "grad_norm": 0.11929565668106079, "learning_rate": 1.3648594115711511e-05, "step": 69050 }, { "embedding_loss": 0.0195, "epoch": 3.8609822875342235, "grad_norm": 0.10613168776035309, "learning_rate": 1.3642385750560305e-05, "step": 69100 }, { "embedding_loss": 0.0158, "epoch": 3.8637760518522657, "grad_norm": 0.09565547108650208, "learning_rate": 1.3636177385409102e-05, "step": 69150 }, { "embedding_loss": 0.0182, "epoch": 3.866569816170308, "grad_norm": 1.1654304265975952, "learning_rate": 1.3629969020257896e-05, "step": 69200 }, { "embedding_loss": 0.0162, "epoch": 3.86936358048835, "grad_norm": 0.5502240061759949, "learning_rate": 1.3623760655106693e-05, "step": 69250 }, { "embedding_loss": 0.0149, "epoch": 3.872157344806392, "grad_norm": 0.04533680900931358, "learning_rate": 1.3617552289955487e-05, "step": 69300 }, { "embedding_loss": 0.0174, "epoch": 3.874951109124434, "grad_norm": 0.06795687228441238, "learning_rate": 1.3611343924804284e-05, "step": 69350 }, { "embedding_loss": 0.0165, "epoch": 3.8777448734424764, "grad_norm": 0.17954571545124054, "learning_rate": 1.3605135559653078e-05, "step": 69400 }, { "embedding_loss": 0.0166, "epoch": 3.8805386377605187, "grad_norm": 0.14052584767341614, "learning_rate": 1.3598927194501872e-05, "step": 69450 }, { "embedding_loss": 0.0166, "epoch": 3.883332402078561, "grad_norm": 0.15988630056381226, "learning_rate": 1.3592718829350669e-05, "step": 69500 }, { "embedding_loss": 0.016, "epoch": 3.8861261663966027, "grad_norm": 0.0401727668941021, "learning_rate": 1.3586510464199463e-05, "step": 69550 }, { "embedding_loss": 0.0169, "epoch": 3.888919930714645, "grad_norm": 0.036572571843862534, "learning_rate": 1.358030209904826e-05, "step": 69600 }, { "embedding_loss": 0.0156, "epoch": 3.891713695032687, "grad_norm": 0.07787728309631348, "learning_rate": 1.3574093733897054e-05, "step": 69650 }, { "embedding_loss": 0.015, "epoch": 3.894507459350729, "grad_norm": 0.07208219915628433, "learning_rate": 1.356788536874585e-05, "step": 69700 }, { "embedding_loss": 0.0156, "epoch": 3.897301223668771, "grad_norm": 0.023180922493338585, "learning_rate": 1.3561677003594645e-05, "step": 69750 }, { "embedding_loss": 0.0162, "epoch": 3.9000949879868134, "grad_norm": 0.05877944454550743, "learning_rate": 1.355546863844344e-05, "step": 69800 }, { "embedding_loss": 0.0155, "epoch": 3.9028887523048557, "grad_norm": 0.04071388021111488, "learning_rate": 1.3549260273292236e-05, "step": 69850 }, { "embedding_loss": 0.0159, "epoch": 3.905682516622898, "grad_norm": 0.06970974057912827, "learning_rate": 1.354305190814103e-05, "step": 69900 }, { "embedding_loss": 0.0169, "epoch": 3.9084762809409397, "grad_norm": 0.14981989562511444, "learning_rate": 1.3536843542989825e-05, "step": 69950 }, { "embedding_loss": 0.0141, "epoch": 3.911270045258982, "grad_norm": 0.08438153564929962, "learning_rate": 1.353063517783862e-05, "step": 70000 }, { "embedding_loss": 0.0164, "epoch": 3.914063809577024, "grad_norm": 0.08566228300333023, "learning_rate": 1.3524426812687416e-05, "step": 70050 }, { "embedding_loss": 0.0144, "epoch": 3.916857573895066, "grad_norm": 0.035423219203948975, "learning_rate": 1.3518218447536212e-05, "step": 70100 }, { "embedding_loss": 0.0167, "epoch": 3.919651338213108, "grad_norm": 0.1970055103302002, "learning_rate": 1.3512010082385007e-05, "step": 70150 }, { "embedding_loss": 0.0159, "epoch": 3.9224451025311504, "grad_norm": 0.04837533459067345, "learning_rate": 1.3505801717233801e-05, "step": 70200 }, { "embedding_loss": 0.0136, "epoch": 3.9252388668491927, "grad_norm": 0.06001395359635353, "learning_rate": 1.3499593352082597e-05, "step": 70250 }, { "embedding_loss": 0.0177, "epoch": 3.928032631167235, "grad_norm": 0.08823635429143906, "learning_rate": 1.3493384986931392e-05, "step": 70300 }, { "embedding_loss": 0.0171, "epoch": 3.930826395485277, "grad_norm": 0.06792716681957245, "learning_rate": 1.3487176621780188e-05, "step": 70350 }, { "embedding_loss": 0.0149, "epoch": 3.933620159803319, "grad_norm": 0.07344020158052444, "learning_rate": 1.3480968256628983e-05, "step": 70400 }, { "embedding_loss": 0.0163, "epoch": 3.936413924121361, "grad_norm": 0.04783507063984871, "learning_rate": 1.3474759891477777e-05, "step": 70450 }, { "embedding_loss": 0.0165, "epoch": 3.9392076884394034, "grad_norm": 0.05495217442512512, "learning_rate": 1.3468551526326574e-05, "step": 70500 }, { "embedding_loss": 0.0152, "epoch": 3.942001452757445, "grad_norm": 0.04969799146056175, "learning_rate": 1.3462343161175368e-05, "step": 70550 }, { "embedding_loss": 0.0154, "epoch": 3.9447952170754874, "grad_norm": 0.10347384959459305, "learning_rate": 1.3456134796024165e-05, "step": 70600 }, { "embedding_loss": 0.0172, "epoch": 3.9475889813935297, "grad_norm": 0.01931648887693882, "learning_rate": 1.3449926430872959e-05, "step": 70650 }, { "embedding_loss": 0.0162, "epoch": 3.950382745711572, "grad_norm": 0.03323715925216675, "learning_rate": 1.3443718065721753e-05, "step": 70700 }, { "embedding_loss": 0.0155, "epoch": 3.953176510029614, "grad_norm": 0.11696375906467438, "learning_rate": 1.343750970057055e-05, "step": 70750 }, { "embedding_loss": 0.0155, "epoch": 3.955970274347656, "grad_norm": 0.05000406503677368, "learning_rate": 1.3431301335419344e-05, "step": 70800 }, { "embedding_loss": 0.0149, "epoch": 3.958764038665698, "grad_norm": 0.03424100950360298, "learning_rate": 1.3425092970268141e-05, "step": 70850 }, { "embedding_loss": 0.0157, "epoch": 3.9615578029837404, "grad_norm": 0.08134534955024719, "learning_rate": 1.3418884605116935e-05, "step": 70900 }, { "embedding_loss": 0.0158, "epoch": 3.964351567301782, "grad_norm": 0.10430175065994263, "learning_rate": 1.3412676239965732e-05, "step": 70950 }, { "embedding_loss": 0.0157, "epoch": 3.9671453316198244, "grad_norm": 0.036871083080768585, "learning_rate": 1.3406467874814526e-05, "step": 71000 }, { "embedding_loss": 0.0174, "epoch": 3.9699390959378666, "grad_norm": 0.08495891094207764, "learning_rate": 1.3400259509663323e-05, "step": 71050 }, { "embedding_loss": 0.0172, "epoch": 3.972732860255909, "grad_norm": 0.05372534319758415, "learning_rate": 1.3394051144512117e-05, "step": 71100 }, { "embedding_loss": 0.0171, "epoch": 3.975526624573951, "grad_norm": 0.08770659565925598, "learning_rate": 1.338784277936091e-05, "step": 71150 }, { "embedding_loss": 0.0167, "epoch": 3.978320388891993, "grad_norm": 0.050110552459955215, "learning_rate": 1.3381634414209708e-05, "step": 71200 }, { "embedding_loss": 0.0165, "epoch": 3.981114153210035, "grad_norm": 0.07471231371164322, "learning_rate": 1.3375426049058502e-05, "step": 71250 }, { "embedding_loss": 0.0158, "epoch": 3.9839079175280774, "grad_norm": 0.07288473099470139, "learning_rate": 1.3369217683907299e-05, "step": 71300 }, { "embedding_loss": 0.0163, "epoch": 3.9867016818461196, "grad_norm": 0.04829687625169754, "learning_rate": 1.3363009318756093e-05, "step": 71350 }, { "embedding_loss": 0.0159, "epoch": 3.9894954461641614, "grad_norm": 0.08314380049705505, "learning_rate": 1.335680095360489e-05, "step": 71400 }, { "embedding_loss": 0.0166, "epoch": 3.9922892104822036, "grad_norm": 0.12811216711997986, "learning_rate": 1.3350592588453684e-05, "step": 71450 }, { "embedding_loss": 0.016, "epoch": 3.995082974800246, "grad_norm": 0.07488440722227097, "learning_rate": 1.3344384223302478e-05, "step": 71500 }, { "embedding_loss": 0.0168, "epoch": 3.997876739118288, "grad_norm": 0.039746351540088654, "learning_rate": 1.3338175858151275e-05, "step": 71550 }, { "embedding_loss": 0.0182, "epoch": 4.00067050343633, "grad_norm": 0.061716124415397644, "learning_rate": 1.3331967493000069e-05, "step": 71600 }, { "embedding_loss": 0.0159, "epoch": 4.003464267754373, "grad_norm": 0.04612473025918007, "learning_rate": 1.3325759127848866e-05, "step": 71650 }, { "embedding_loss": 0.0161, "epoch": 4.006258032072414, "grad_norm": 0.05469907820224762, "learning_rate": 1.331955076269766e-05, "step": 71700 }, { "embedding_loss": 0.0175, "epoch": 4.009051796390456, "grad_norm": 0.0430472269654274, "learning_rate": 1.3313342397546455e-05, "step": 71750 }, { "embedding_loss": 0.0151, "epoch": 4.011845560708498, "grad_norm": 0.07113539427518845, "learning_rate": 1.330713403239525e-05, "step": 71800 }, { "embedding_loss": 0.0182, "epoch": 4.014639325026541, "grad_norm": 0.09788942337036133, "learning_rate": 1.3300925667244046e-05, "step": 71850 }, { "embedding_loss": 0.0156, "epoch": 4.017433089344583, "grad_norm": 0.03722594305872917, "learning_rate": 1.3294717302092842e-05, "step": 71900 }, { "embedding_loss": 0.0167, "epoch": 4.020226853662625, "grad_norm": 0.11549200117588043, "learning_rate": 1.3288508936941636e-05, "step": 71950 }, { "embedding_loss": 0.0152, "epoch": 4.023020617980667, "grad_norm": 0.046782538294792175, "learning_rate": 1.3282300571790431e-05, "step": 72000 }, { "embedding_loss": 0.0151, "epoch": 4.02581438229871, "grad_norm": 0.09480701386928558, "learning_rate": 1.3276092206639227e-05, "step": 72050 }, { "embedding_loss": 0.0165, "epoch": 4.028608146616752, "grad_norm": 0.06484128534793854, "learning_rate": 1.3269883841488022e-05, "step": 72100 }, { "embedding_loss": 0.0156, "epoch": 4.031401910934793, "grad_norm": 0.06583870947360992, "learning_rate": 1.3263675476336818e-05, "step": 72150 }, { "embedding_loss": 0.0157, "epoch": 4.034195675252835, "grad_norm": 0.06116873398423195, "learning_rate": 1.3257467111185613e-05, "step": 72200 }, { "embedding_loss": 0.0161, "epoch": 4.036989439570878, "grad_norm": 0.12423747032880783, "learning_rate": 1.3251258746034407e-05, "step": 72250 }, { "embedding_loss": 0.0161, "epoch": 4.03978320388892, "grad_norm": 0.10492444038391113, "learning_rate": 1.3245050380883203e-05, "step": 72300 }, { "embedding_loss": 0.0171, "epoch": 4.042576968206962, "grad_norm": 0.07231979072093964, "learning_rate": 1.3238842015731998e-05, "step": 72350 }, { "embedding_loss": 0.0153, "epoch": 4.045370732525004, "grad_norm": 0.06069089472293854, "learning_rate": 1.3232633650580794e-05, "step": 72400 }, { "embedding_loss": 0.0165, "epoch": 4.048164496843047, "grad_norm": 0.5161132216453552, "learning_rate": 1.3226425285429589e-05, "step": 72450 }, { "embedding_loss": 0.0161, "epoch": 4.050958261161089, "grad_norm": 0.07453744858503342, "learning_rate": 1.3220216920278383e-05, "step": 72500 }, { "embedding_loss": 0.0152, "epoch": 4.05375202547913, "grad_norm": 0.054364338517189026, "learning_rate": 1.321400855512718e-05, "step": 72550 }, { "embedding_loss": 0.0191, "epoch": 4.056545789797172, "grad_norm": 0.08722380548715591, "learning_rate": 1.3207800189975974e-05, "step": 72600 }, { "embedding_loss": 0.0155, "epoch": 4.059339554115215, "grad_norm": 0.0929870754480362, "learning_rate": 1.3201591824824771e-05, "step": 72650 }, { "embedding_loss": 0.0168, "epoch": 4.062133318433257, "grad_norm": 0.0671839788556099, "learning_rate": 1.3195383459673565e-05, "step": 72700 }, { "embedding_loss": 0.0168, "epoch": 4.064927082751299, "grad_norm": 0.09020929783582687, "learning_rate": 1.3189175094522359e-05, "step": 72750 }, { "embedding_loss": 0.017, "epoch": 4.067720847069341, "grad_norm": 0.0759124606847763, "learning_rate": 1.3182966729371156e-05, "step": 72800 }, { "embedding_loss": 0.0155, "epoch": 4.070514611387384, "grad_norm": 0.04290309548377991, "learning_rate": 1.317675836421995e-05, "step": 72850 }, { "embedding_loss": 0.0161, "epoch": 4.073308375705426, "grad_norm": 0.06841205805540085, "learning_rate": 1.3170549999068747e-05, "step": 72900 }, { "embedding_loss": 0.0159, "epoch": 4.076102140023468, "grad_norm": 0.1329040378332138, "learning_rate": 1.3164341633917541e-05, "step": 72950 }, { "embedding_loss": 0.0168, "epoch": 4.078895904341509, "grad_norm": 0.07429823279380798, "learning_rate": 1.3158133268766338e-05, "step": 73000 }, { "embedding_loss": 0.0163, "epoch": 4.081689668659552, "grad_norm": 0.09721590578556061, "learning_rate": 1.3151924903615132e-05, "step": 73050 }, { "embedding_loss": 0.0163, "epoch": 4.084483432977594, "grad_norm": 0.05783107504248619, "learning_rate": 1.3145716538463929e-05, "step": 73100 }, { "embedding_loss": 0.0169, "epoch": 4.087277197295636, "grad_norm": 0.030091717839241028, "learning_rate": 1.3139508173312723e-05, "step": 73150 }, { "embedding_loss": 0.0165, "epoch": 4.090070961613678, "grad_norm": 0.08006572723388672, "learning_rate": 1.3133299808161517e-05, "step": 73200 }, { "embedding_loss": 0.0143, "epoch": 4.092864725931721, "grad_norm": 0.04080783203244209, "learning_rate": 1.3127091443010314e-05, "step": 73250 }, { "embedding_loss": 0.0151, "epoch": 4.095658490249763, "grad_norm": 0.4964849352836609, "learning_rate": 1.3120883077859108e-05, "step": 73300 }, { "embedding_loss": 0.0156, "epoch": 4.098452254567805, "grad_norm": 0.0781889259815216, "learning_rate": 1.3114674712707905e-05, "step": 73350 }, { "embedding_loss": 0.0169, "epoch": 4.101246018885846, "grad_norm": 0.08352246880531311, "learning_rate": 1.3108466347556699e-05, "step": 73400 }, { "embedding_loss": 0.0161, "epoch": 4.104039783203889, "grad_norm": 0.06143956258893013, "learning_rate": 1.3102257982405496e-05, "step": 73450 }, { "embedding_loss": 0.0159, "epoch": 4.106833547521931, "grad_norm": 0.12455056607723236, "learning_rate": 1.309604961725429e-05, "step": 73500 }, { "embedding_loss": 0.0163, "epoch": 4.109627311839973, "grad_norm": 0.04458624869585037, "learning_rate": 1.3089841252103084e-05, "step": 73550 }, { "embedding_loss": 0.0162, "epoch": 4.112421076158015, "grad_norm": 0.09939803183078766, "learning_rate": 1.3083632886951881e-05, "step": 73600 }, { "embedding_loss": 0.0142, "epoch": 4.115214840476058, "grad_norm": 0.07721063494682312, "learning_rate": 1.3077424521800675e-05, "step": 73650 }, { "embedding_loss": 0.0175, "epoch": 4.1180086047941, "grad_norm": 0.05934765562415123, "learning_rate": 1.3071216156649472e-05, "step": 73700 }, { "embedding_loss": 0.0155, "epoch": 4.120802369112142, "grad_norm": 0.07358893007040024, "learning_rate": 1.3065007791498266e-05, "step": 73750 }, { "embedding_loss": 0.0172, "epoch": 4.123596133430183, "grad_norm": 0.06961425393819809, "learning_rate": 1.3058799426347061e-05, "step": 73800 }, { "embedding_loss": 0.0167, "epoch": 4.126389897748226, "grad_norm": 0.17337599396705627, "learning_rate": 1.3052591061195857e-05, "step": 73850 }, { "embedding_loss": 0.0146, "epoch": 4.129183662066268, "grad_norm": 0.0411703884601593, "learning_rate": 1.3046382696044652e-05, "step": 73900 }, { "embedding_loss": 0.0164, "epoch": 4.13197742638431, "grad_norm": 0.07022657990455627, "learning_rate": 1.3040174330893448e-05, "step": 73950 }, { "embedding_loss": 0.015, "epoch": 4.134771190702352, "grad_norm": 0.06088469177484512, "learning_rate": 1.3033965965742242e-05, "step": 74000 }, { "embedding_loss": 0.0171, "epoch": 4.137564955020395, "grad_norm": 0.16296818852424622, "learning_rate": 1.3027757600591037e-05, "step": 74050 }, { "embedding_loss": 0.0169, "epoch": 4.140358719338437, "grad_norm": 0.08927388489246368, "learning_rate": 1.3021549235439833e-05, "step": 74100 }, { "embedding_loss": 0.0137, "epoch": 4.143152483656479, "grad_norm": 0.0733855590224266, "learning_rate": 1.3015340870288628e-05, "step": 74150 }, { "embedding_loss": 0.0145, "epoch": 4.145946247974521, "grad_norm": 0.15029944479465485, "learning_rate": 1.3009132505137424e-05, "step": 74200 }, { "embedding_loss": 0.0157, "epoch": 4.148740012292563, "grad_norm": 0.22139666974544525, "learning_rate": 1.3002924139986219e-05, "step": 74250 }, { "embedding_loss": 0.0147, "epoch": 4.151533776610605, "grad_norm": 0.09363770484924316, "learning_rate": 1.2996715774835013e-05, "step": 74300 }, { "embedding_loss": 0.0157, "epoch": 4.154327540928647, "grad_norm": 0.041247475892305374, "learning_rate": 1.2990507409683808e-05, "step": 74350 }, { "embedding_loss": 0.0138, "epoch": 4.157121305246689, "grad_norm": 0.09664350748062134, "learning_rate": 1.2984299044532604e-05, "step": 74400 }, { "embedding_loss": 0.0171, "epoch": 4.159915069564732, "grad_norm": 0.09169263392686844, "learning_rate": 1.29780906793814e-05, "step": 74450 }, { "embedding_loss": 0.0168, "epoch": 4.162708833882774, "grad_norm": 0.039420053362846375, "learning_rate": 1.2971882314230195e-05, "step": 74500 }, { "embedding_loss": 0.0176, "epoch": 4.165502598200816, "grad_norm": 0.10537154227495193, "learning_rate": 1.2965673949078989e-05, "step": 74550 }, { "embedding_loss": 0.0151, "epoch": 4.168296362518858, "grad_norm": 0.07545773684978485, "learning_rate": 1.2959465583927786e-05, "step": 74600 }, { "embedding_loss": 0.0148, "epoch": 4.1710901268369, "grad_norm": 0.2855670154094696, "learning_rate": 1.295325721877658e-05, "step": 74650 }, { "embedding_loss": 0.0163, "epoch": 4.173883891154942, "grad_norm": 0.11504363268613815, "learning_rate": 1.2947048853625377e-05, "step": 74700 }, { "embedding_loss": 0.0166, "epoch": 4.176677655472984, "grad_norm": 0.08400005102157593, "learning_rate": 1.2940840488474171e-05, "step": 74750 }, { "embedding_loss": 0.0146, "epoch": 4.179471419791026, "grad_norm": 0.05437540262937546, "learning_rate": 1.2934632123322965e-05, "step": 74800 }, { "embedding_loss": 0.0154, "epoch": 4.182265184109069, "grad_norm": 0.027178218588232994, "learning_rate": 1.2928423758171762e-05, "step": 74850 }, { "embedding_loss": 0.0158, "epoch": 4.185058948427111, "grad_norm": 0.04279623180627823, "learning_rate": 1.2922215393020556e-05, "step": 74900 }, { "embedding_loss": 0.0154, "epoch": 4.187852712745153, "grad_norm": 0.09402908384799957, "learning_rate": 1.2916007027869353e-05, "step": 74950 }, { "embedding_loss": 0.016, "epoch": 4.190646477063195, "grad_norm": 0.2409674972295761, "learning_rate": 1.2909798662718147e-05, "step": 75000 }, { "embedding_loss": 0.0153, "epoch": 4.1934402413812375, "grad_norm": 0.04443148896098137, "learning_rate": 1.2903590297566944e-05, "step": 75050 }, { "embedding_loss": 0.0161, "epoch": 4.196234005699279, "grad_norm": 0.07477457076311111, "learning_rate": 1.2897381932415738e-05, "step": 75100 }, { "embedding_loss": 0.0145, "epoch": 4.199027770017321, "grad_norm": 0.07581410557031631, "learning_rate": 1.2891173567264535e-05, "step": 75150 }, { "embedding_loss": 0.0158, "epoch": 4.201821534335363, "grad_norm": 0.08738112449645996, "learning_rate": 1.2884965202113329e-05, "step": 75200 }, { "embedding_loss": 0.0171, "epoch": 4.2046152986534056, "grad_norm": 0.04406674578785896, "learning_rate": 1.2878756836962123e-05, "step": 75250 }, { "embedding_loss": 0.0157, "epoch": 4.207409062971448, "grad_norm": 0.059144120663404465, "learning_rate": 1.287254847181092e-05, "step": 75300 }, { "embedding_loss": 0.016, "epoch": 4.21020282728949, "grad_norm": 0.08345773816108704, "learning_rate": 1.2866340106659714e-05, "step": 75350 }, { "embedding_loss": 0.017, "epoch": 4.212996591607532, "grad_norm": 0.05116698145866394, "learning_rate": 1.2860131741508511e-05, "step": 75400 }, { "embedding_loss": 0.0161, "epoch": 4.2157903559255745, "grad_norm": 0.0331759974360466, "learning_rate": 1.2853923376357305e-05, "step": 75450 }, { "embedding_loss": 0.0162, "epoch": 4.218584120243616, "grad_norm": 0.06293383985757828, "learning_rate": 1.2847715011206102e-05, "step": 75500 }, { "embedding_loss": 0.0148, "epoch": 4.221377884561658, "grad_norm": 0.06967750936746597, "learning_rate": 1.2841506646054896e-05, "step": 75550 }, { "embedding_loss": 0.0162, "epoch": 4.2241716488797, "grad_norm": 0.04429614543914795, "learning_rate": 1.283529828090369e-05, "step": 75600 }, { "embedding_loss": 0.0158, "epoch": 4.2269654131977425, "grad_norm": 0.024388661608099937, "learning_rate": 1.2829089915752487e-05, "step": 75650 }, { "embedding_loss": 0.0162, "epoch": 4.229759177515785, "grad_norm": 0.06853888928890228, "learning_rate": 1.282288155060128e-05, "step": 75700 }, { "embedding_loss": 0.0168, "epoch": 4.232552941833827, "grad_norm": 0.06855349242687225, "learning_rate": 1.2816673185450078e-05, "step": 75750 }, { "embedding_loss": 0.0168, "epoch": 4.235346706151869, "grad_norm": 0.07484278827905655, "learning_rate": 1.2810464820298872e-05, "step": 75800 }, { "embedding_loss": 0.0162, "epoch": 4.2381404704699115, "grad_norm": 0.07473695278167725, "learning_rate": 1.2804256455147667e-05, "step": 75850 }, { "embedding_loss": 0.0154, "epoch": 4.240934234787954, "grad_norm": 0.07782084494829178, "learning_rate": 1.2798048089996463e-05, "step": 75900 }, { "embedding_loss": 0.0155, "epoch": 4.243727999105995, "grad_norm": 0.08085021376609802, "learning_rate": 1.2791839724845258e-05, "step": 75950 }, { "embedding_loss": 0.0164, "epoch": 4.246521763424037, "grad_norm": 0.043930429965257645, "learning_rate": 1.2785631359694054e-05, "step": 76000 }, { "embedding_loss": 0.0157, "epoch": 4.2493155277420795, "grad_norm": 0.07429458200931549, "learning_rate": 1.2779422994542848e-05, "step": 76050 }, { "embedding_loss": 0.015, "epoch": 4.252109292060122, "grad_norm": 0.1027563139796257, "learning_rate": 1.2773214629391643e-05, "step": 76100 }, { "embedding_loss": 0.0166, "epoch": 4.254903056378164, "grad_norm": 0.12073292583227158, "learning_rate": 1.2767006264240439e-05, "step": 76150 }, { "embedding_loss": 0.0171, "epoch": 4.257696820696206, "grad_norm": 0.12066484242677689, "learning_rate": 1.2760797899089234e-05, "step": 76200 }, { "embedding_loss": 0.0164, "epoch": 4.2604905850142485, "grad_norm": 0.08928145468235016, "learning_rate": 1.275458953393803e-05, "step": 76250 }, { "embedding_loss": 0.0163, "epoch": 4.263284349332291, "grad_norm": 0.10890141874551773, "learning_rate": 1.2748381168786825e-05, "step": 76300 }, { "embedding_loss": 0.0145, "epoch": 4.266078113650332, "grad_norm": 0.05844984948635101, "learning_rate": 1.2742172803635619e-05, "step": 76350 }, { "embedding_loss": 0.0153, "epoch": 4.268871877968374, "grad_norm": 0.4351555109024048, "learning_rate": 1.2735964438484414e-05, "step": 76400 }, { "embedding_loss": 0.015, "epoch": 4.2716656422864165, "grad_norm": 0.06795413792133331, "learning_rate": 1.272975607333321e-05, "step": 76450 }, { "embedding_loss": 0.0163, "epoch": 4.274459406604459, "grad_norm": 0.06155725568532944, "learning_rate": 1.2723547708182005e-05, "step": 76500 }, { "embedding_loss": 0.0179, "epoch": 4.277253170922501, "grad_norm": 0.052505917847156525, "learning_rate": 1.2717339343030801e-05, "step": 76550 }, { "embedding_loss": 0.0166, "epoch": 4.280046935240543, "grad_norm": 0.04832730442285538, "learning_rate": 1.2711130977879595e-05, "step": 76600 }, { "embedding_loss": 0.015, "epoch": 4.2828406995585855, "grad_norm": 0.0851714089512825, "learning_rate": 1.2704922612728392e-05, "step": 76650 }, { "embedding_loss": 0.0165, "epoch": 4.285634463876628, "grad_norm": 0.050961267203092575, "learning_rate": 1.2698714247577186e-05, "step": 76700 }, { "embedding_loss": 0.0169, "epoch": 4.28842822819467, "grad_norm": 0.09720955789089203, "learning_rate": 1.2692505882425983e-05, "step": 76750 }, { "embedding_loss": 0.0167, "epoch": 4.291221992512711, "grad_norm": 0.06262169778347015, "learning_rate": 1.2686297517274777e-05, "step": 76800 }, { "embedding_loss": 0.0157, "epoch": 4.2940157568307535, "grad_norm": 0.03940849378705025, "learning_rate": 1.268008915212357e-05, "step": 76850 }, { "embedding_loss": 0.0166, "epoch": 4.296809521148796, "grad_norm": 0.15363329648971558, "learning_rate": 1.2673880786972368e-05, "step": 76900 }, { "embedding_loss": 0.0149, "epoch": 4.299603285466838, "grad_norm": 0.06238480657339096, "learning_rate": 1.2667672421821162e-05, "step": 76950 }, { "embedding_loss": 0.0151, "epoch": 4.30239704978488, "grad_norm": 0.07697327435016632, "learning_rate": 1.2661464056669959e-05, "step": 77000 }, { "embedding_loss": 0.0164, "epoch": 4.3051908141029225, "grad_norm": 0.04467165097594261, "learning_rate": 1.2655255691518753e-05, "step": 77050 }, { "embedding_loss": 0.0158, "epoch": 4.307984578420965, "grad_norm": 0.05053519457578659, "learning_rate": 1.264904732636755e-05, "step": 77100 }, { "embedding_loss": 0.018, "epoch": 4.310778342739007, "grad_norm": 0.13906948268413544, "learning_rate": 1.2642838961216344e-05, "step": 77150 }, { "embedding_loss": 0.0164, "epoch": 4.313572107057048, "grad_norm": 0.03770260140299797, "learning_rate": 1.2636630596065141e-05, "step": 77200 }, { "embedding_loss": 0.017, "epoch": 4.3163658713750905, "grad_norm": 0.042739756405353546, "learning_rate": 1.2630422230913935e-05, "step": 77250 }, { "embedding_loss": 0.0159, "epoch": 4.319159635693133, "grad_norm": 0.07453246414661407, "learning_rate": 1.2624213865762729e-05, "step": 77300 }, { "embedding_loss": 0.0185, "epoch": 4.321953400011175, "grad_norm": 0.07838981598615646, "learning_rate": 1.2618005500611526e-05, "step": 77350 }, { "embedding_loss": 0.0164, "epoch": 4.324747164329217, "grad_norm": 0.05252889171242714, "learning_rate": 1.261179713546032e-05, "step": 77400 }, { "embedding_loss": 0.0163, "epoch": 4.3275409286472595, "grad_norm": 0.028422033414244652, "learning_rate": 1.2605588770309117e-05, "step": 77450 }, { "embedding_loss": 0.0159, "epoch": 4.330334692965302, "grad_norm": 0.0454322025179863, "learning_rate": 1.259938040515791e-05, "step": 77500 }, { "embedding_loss": 0.0163, "epoch": 4.333128457283344, "grad_norm": 0.11268892139196396, "learning_rate": 1.2593172040006708e-05, "step": 77550 }, { "embedding_loss": 0.017, "epoch": 4.335922221601385, "grad_norm": 0.07319564372301102, "learning_rate": 1.2586963674855502e-05, "step": 77600 }, { "embedding_loss": 0.0155, "epoch": 4.3387159859194275, "grad_norm": 0.05263743922114372, "learning_rate": 1.2580755309704295e-05, "step": 77650 }, { "embedding_loss": 0.016, "epoch": 4.34150975023747, "grad_norm": 0.054766859859228134, "learning_rate": 1.2574546944553093e-05, "step": 77700 }, { "embedding_loss": 0.016, "epoch": 4.344303514555512, "grad_norm": 0.06865139305591583, "learning_rate": 1.2568338579401887e-05, "step": 77750 }, { "embedding_loss": 0.017, "epoch": 4.347097278873554, "grad_norm": 0.5283175110816956, "learning_rate": 1.2562130214250684e-05, "step": 77800 }, { "embedding_loss": 0.0149, "epoch": 4.3498910431915965, "grad_norm": 0.1118299812078476, "learning_rate": 1.2555921849099478e-05, "step": 77850 }, { "embedding_loss": 0.0155, "epoch": 4.352684807509639, "grad_norm": 0.05832812562584877, "learning_rate": 1.2549713483948273e-05, "step": 77900 }, { "embedding_loss": 0.0171, "epoch": 4.355478571827681, "grad_norm": 0.0666305422782898, "learning_rate": 1.2543505118797069e-05, "step": 77950 }, { "embedding_loss": 0.0168, "epoch": 4.358272336145722, "grad_norm": 0.060663215816020966, "learning_rate": 1.2537296753645864e-05, "step": 78000 }, { "embedding_loss": 0.0159, "epoch": 4.3610661004637645, "grad_norm": 0.04881270229816437, "learning_rate": 1.253108838849466e-05, "step": 78050 }, { "embedding_loss": 0.0141, "epoch": 4.363859864781807, "grad_norm": 0.04664290323853493, "learning_rate": 1.2524880023343453e-05, "step": 78100 }, { "embedding_loss": 0.0179, "epoch": 4.366653629099849, "grad_norm": 0.05905468389391899, "learning_rate": 1.2518671658192249e-05, "step": 78150 }, { "embedding_loss": 0.0178, "epoch": 4.369447393417891, "grad_norm": 0.0943203866481781, "learning_rate": 1.2512463293041044e-05, "step": 78200 }, { "embedding_loss": 0.0158, "epoch": 4.3722411577359335, "grad_norm": 0.14120005071163177, "learning_rate": 1.250625492788984e-05, "step": 78250 }, { "embedding_loss": 0.016, "epoch": 4.375034922053976, "grad_norm": 0.09808023273944855, "learning_rate": 1.2500046562738635e-05, "step": 78300 }, { "embedding_loss": 0.015, "epoch": 4.377828686372018, "grad_norm": 0.05275879055261612, "learning_rate": 1.2493838197587431e-05, "step": 78350 }, { "embedding_loss": 0.0167, "epoch": 4.38062245069006, "grad_norm": 0.05891604721546173, "learning_rate": 1.2487629832436225e-05, "step": 78400 }, { "embedding_loss": 0.016, "epoch": 4.3834162150081015, "grad_norm": 0.15164132416248322, "learning_rate": 1.248142146728502e-05, "step": 78450 }, { "embedding_loss": 0.0152, "epoch": 4.386209979326144, "grad_norm": 0.07336529344320297, "learning_rate": 1.2475213102133816e-05, "step": 78500 }, { "embedding_loss": 0.0156, "epoch": 4.389003743644186, "grad_norm": 0.04434202238917351, "learning_rate": 1.2469004736982611e-05, "step": 78550 }, { "embedding_loss": 0.0174, "epoch": 4.391797507962228, "grad_norm": 0.07860541343688965, "learning_rate": 1.2462796371831407e-05, "step": 78600 }, { "embedding_loss": 0.0166, "epoch": 4.3945912722802705, "grad_norm": 0.08600371330976486, "learning_rate": 1.24565880066802e-05, "step": 78650 }, { "embedding_loss": 0.016, "epoch": 4.397385036598313, "grad_norm": 0.08696223050355911, "learning_rate": 1.2450379641528998e-05, "step": 78700 }, { "embedding_loss": 0.0171, "epoch": 4.400178800916355, "grad_norm": 0.09998881071805954, "learning_rate": 1.2444171276377792e-05, "step": 78750 }, { "embedding_loss": 0.0162, "epoch": 4.402972565234397, "grad_norm": 0.06174421310424805, "learning_rate": 1.2437962911226589e-05, "step": 78800 }, { "embedding_loss": 0.0142, "epoch": 4.4057663295524385, "grad_norm": 0.04622115194797516, "learning_rate": 1.2431754546075383e-05, "step": 78850 }, { "embedding_loss": 0.0152, "epoch": 4.408560093870481, "grad_norm": 0.029796816408634186, "learning_rate": 1.2425546180924177e-05, "step": 78900 }, { "embedding_loss": 0.0164, "epoch": 4.411353858188523, "grad_norm": 0.05606171116232872, "learning_rate": 1.2419337815772974e-05, "step": 78950 }, { "embedding_loss": 0.0164, "epoch": 4.414147622506565, "grad_norm": 0.0543440617620945, "learning_rate": 1.2413129450621768e-05, "step": 79000 }, { "embedding_loss": 0.0158, "epoch": 4.4169413868246075, "grad_norm": 1.9134448766708374, "learning_rate": 1.2406921085470565e-05, "step": 79050 }, { "embedding_loss": 0.0165, "epoch": 4.41973515114265, "grad_norm": 0.0465313121676445, "learning_rate": 1.2400712720319359e-05, "step": 79100 }, { "embedding_loss": 0.0181, "epoch": 4.422528915460692, "grad_norm": 0.0342165045440197, "learning_rate": 1.2394504355168156e-05, "step": 79150 }, { "embedding_loss": 0.0156, "epoch": 4.425322679778734, "grad_norm": 0.0692405253648758, "learning_rate": 1.238829599001695e-05, "step": 79200 }, { "embedding_loss": 0.0148, "epoch": 4.428116444096776, "grad_norm": 0.030866388231515884, "learning_rate": 1.2382087624865747e-05, "step": 79250 }, { "embedding_loss": 0.0167, "epoch": 4.430910208414818, "grad_norm": 0.04283587262034416, "learning_rate": 1.237587925971454e-05, "step": 79300 }, { "embedding_loss": 0.0158, "epoch": 4.43370397273286, "grad_norm": 0.06964650750160217, "learning_rate": 1.2369670894563335e-05, "step": 79350 }, { "embedding_loss": 0.0172, "epoch": 4.436497737050902, "grad_norm": 0.03488006815314293, "learning_rate": 1.2363462529412132e-05, "step": 79400 }, { "embedding_loss": 0.0157, "epoch": 4.4392915013689445, "grad_norm": 0.03613964468240738, "learning_rate": 1.2357254164260926e-05, "step": 79450 }, { "embedding_loss": 0.0149, "epoch": 4.442085265686987, "grad_norm": 0.13221246004104614, "learning_rate": 1.2351045799109723e-05, "step": 79500 }, { "embedding_loss": 0.0148, "epoch": 4.444879030005029, "grad_norm": 0.06823229044675827, "learning_rate": 1.2344837433958517e-05, "step": 79550 }, { "embedding_loss": 0.0165, "epoch": 4.447672794323071, "grad_norm": 0.07200782001018524, "learning_rate": 1.2338629068807312e-05, "step": 79600 }, { "embedding_loss": 0.0144, "epoch": 4.450466558641113, "grad_norm": 0.05653342977166176, "learning_rate": 1.2332420703656108e-05, "step": 79650 }, { "embedding_loss": 0.0152, "epoch": 4.453260322959155, "grad_norm": 0.045488934963941574, "learning_rate": 1.2326212338504901e-05, "step": 79700 }, { "embedding_loss": 0.0177, "epoch": 4.456054087277197, "grad_norm": 0.05035526305437088, "learning_rate": 1.2320003973353699e-05, "step": 79750 }, { "embedding_loss": 0.016, "epoch": 4.458847851595239, "grad_norm": 0.052263639867305756, "learning_rate": 1.2313795608202492e-05, "step": 79800 }, { "embedding_loss": 0.0165, "epoch": 4.4616416159132815, "grad_norm": 0.11251475661993027, "learning_rate": 1.230758724305129e-05, "step": 79850 }, { "embedding_loss": 0.0152, "epoch": 4.464435380231324, "grad_norm": 0.0706251934170723, "learning_rate": 1.2301378877900083e-05, "step": 79900 }, { "embedding_loss": 0.0166, "epoch": 4.467229144549366, "grad_norm": 0.053479742258787155, "learning_rate": 1.2295170512748879e-05, "step": 79950 }, { "embedding_loss": 0.0142, "epoch": 4.470022908867408, "grad_norm": 0.09645888954401016, "learning_rate": 1.2288962147597675e-05, "step": 80000 }, { "embedding_loss": 0.018, "epoch": 4.47281667318545, "grad_norm": 0.0384182408452034, "learning_rate": 1.228275378244647e-05, "step": 80050 }, { "embedding_loss": 0.0138, "epoch": 4.475610437503493, "grad_norm": 0.035119347274303436, "learning_rate": 1.2276545417295266e-05, "step": 80100 }, { "embedding_loss": 0.0154, "epoch": 4.478404201821534, "grad_norm": 0.01662597991526127, "learning_rate": 1.227033705214406e-05, "step": 80150 }, { "embedding_loss": 0.0162, "epoch": 4.481197966139576, "grad_norm": 0.039083246141672134, "learning_rate": 1.2264128686992855e-05, "step": 80200 }, { "embedding_loss": 0.0165, "epoch": 4.4839917304576185, "grad_norm": 0.057060472667217255, "learning_rate": 1.225792032184165e-05, "step": 80250 }, { "embedding_loss": 0.0171, "epoch": 4.486785494775661, "grad_norm": 0.15727172791957855, "learning_rate": 1.2251711956690446e-05, "step": 80300 }, { "embedding_loss": 0.0156, "epoch": 4.489579259093703, "grad_norm": 0.06040970981121063, "learning_rate": 1.2245503591539241e-05, "step": 80350 }, { "embedding_loss": 0.0147, "epoch": 4.492373023411745, "grad_norm": 0.12805283069610596, "learning_rate": 1.2239295226388037e-05, "step": 80400 }, { "embedding_loss": 0.0141, "epoch": 4.495166787729787, "grad_norm": 0.06356023252010345, "learning_rate": 1.223308686123683e-05, "step": 80450 }, { "embedding_loss": 0.0163, "epoch": 4.49796055204783, "grad_norm": 0.023887569084763527, "learning_rate": 1.2226878496085626e-05, "step": 80500 }, { "embedding_loss": 0.0148, "epoch": 4.500754316365871, "grad_norm": 0.08057462424039841, "learning_rate": 1.2220670130934422e-05, "step": 80550 }, { "embedding_loss": 0.0155, "epoch": 4.503548080683913, "grad_norm": 0.06846486777067184, "learning_rate": 1.2214461765783217e-05, "step": 80600 }, { "embedding_loss": 0.0153, "epoch": 4.5063418450019554, "grad_norm": 0.04434897005558014, "learning_rate": 1.2208253400632013e-05, "step": 80650 }, { "embedding_loss": 0.0144, "epoch": 4.509135609319998, "grad_norm": 0.09237220883369446, "learning_rate": 1.2202045035480807e-05, "step": 80700 }, { "embedding_loss": 0.0171, "epoch": 4.51192937363804, "grad_norm": 0.05442893132567406, "learning_rate": 1.2195836670329604e-05, "step": 80750 }, { "embedding_loss": 0.0155, "epoch": 4.514723137956082, "grad_norm": 0.10857699811458588, "learning_rate": 1.2189628305178398e-05, "step": 80800 }, { "embedding_loss": 0.0159, "epoch": 4.517516902274124, "grad_norm": 0.08100040256977081, "learning_rate": 1.2183419940027195e-05, "step": 80850 }, { "embedding_loss": 0.0166, "epoch": 4.520310666592167, "grad_norm": 0.04655581712722778, "learning_rate": 1.2177211574875989e-05, "step": 80900 }, { "embedding_loss": 0.0176, "epoch": 4.523104430910209, "grad_norm": 0.053102340549230576, "learning_rate": 1.2171003209724782e-05, "step": 80950 }, { "embedding_loss": 0.0137, "epoch": 4.52589819522825, "grad_norm": 0.11141708493232727, "learning_rate": 1.216479484457358e-05, "step": 81000 }, { "embedding_loss": 0.0168, "epoch": 4.5286919595462924, "grad_norm": 0.04485846683382988, "learning_rate": 1.2158586479422374e-05, "step": 81050 }, { "embedding_loss": 0.0169, "epoch": 4.531485723864335, "grad_norm": 0.08842894434928894, "learning_rate": 1.215237811427117e-05, "step": 81100 }, { "embedding_loss": 0.0154, "epoch": 4.534279488182377, "grad_norm": 0.06665049493312836, "learning_rate": 1.2146169749119965e-05, "step": 81150 }, { "embedding_loss": 0.0169, "epoch": 4.537073252500419, "grad_norm": 0.10172074288129807, "learning_rate": 1.2139961383968762e-05, "step": 81200 }, { "embedding_loss": 0.0169, "epoch": 4.539867016818461, "grad_norm": 0.0564190074801445, "learning_rate": 1.2133753018817556e-05, "step": 81250 }, { "embedding_loss": 0.0153, "epoch": 4.542660781136504, "grad_norm": 0.033810876309871674, "learning_rate": 1.2127544653666353e-05, "step": 81300 }, { "embedding_loss": 0.015, "epoch": 4.545454545454545, "grad_norm": 0.059025876224040985, "learning_rate": 1.2121336288515147e-05, "step": 81350 }, { "embedding_loss": 0.0152, "epoch": 4.548248309772587, "grad_norm": 0.09430671483278275, "learning_rate": 1.211512792336394e-05, "step": 81400 }, { "embedding_loss": 0.0151, "epoch": 4.551042074090629, "grad_norm": 0.053324904292821884, "learning_rate": 1.2108919558212738e-05, "step": 81450 }, { "embedding_loss": 0.0157, "epoch": 4.553835838408672, "grad_norm": 0.05495639145374298, "learning_rate": 1.2102711193061531e-05, "step": 81500 }, { "embedding_loss": 0.0165, "epoch": 4.556629602726714, "grad_norm": 0.044068727642297745, "learning_rate": 1.2096502827910329e-05, "step": 81550 }, { "embedding_loss": 0.0157, "epoch": 4.559423367044756, "grad_norm": 0.028268221765756607, "learning_rate": 1.2090294462759122e-05, "step": 81600 }, { "embedding_loss": 0.0158, "epoch": 4.562217131362798, "grad_norm": 0.05989985540509224, "learning_rate": 1.2084086097607918e-05, "step": 81650 }, { "embedding_loss": 0.016, "epoch": 4.565010895680841, "grad_norm": 0.050333283841609955, "learning_rate": 1.2077877732456714e-05, "step": 81700 }, { "embedding_loss": 0.0158, "epoch": 4.567804659998883, "grad_norm": 0.03858734667301178, "learning_rate": 1.2071669367305507e-05, "step": 81750 }, { "embedding_loss": 0.016, "epoch": 4.570598424316925, "grad_norm": 0.05940263345837593, "learning_rate": 1.2065461002154305e-05, "step": 81800 }, { "embedding_loss": 0.0163, "epoch": 4.573392188634966, "grad_norm": 0.047692444175481796, "learning_rate": 1.2059252637003098e-05, "step": 81850 }, { "embedding_loss": 0.0161, "epoch": 4.576185952953009, "grad_norm": 0.07782459259033203, "learning_rate": 1.2053044271851896e-05, "step": 81900 }, { "embedding_loss": 0.0161, "epoch": 4.578979717271051, "grad_norm": 0.0711689218878746, "learning_rate": 1.204683590670069e-05, "step": 81950 }, { "embedding_loss": 0.0181, "epoch": 4.581773481589093, "grad_norm": 0.05138721689581871, "learning_rate": 1.2040627541549485e-05, "step": 82000 }, { "embedding_loss": 0.0176, "epoch": 4.584567245907135, "grad_norm": 0.11669877916574478, "learning_rate": 1.203441917639828e-05, "step": 82050 }, { "embedding_loss": 0.0152, "epoch": 4.587361010225178, "grad_norm": 0.08897171914577484, "learning_rate": 1.2028210811247076e-05, "step": 82100 }, { "embedding_loss": 0.0171, "epoch": 4.59015477454322, "grad_norm": 0.09488705545663834, "learning_rate": 1.2022002446095871e-05, "step": 82150 }, { "embedding_loss": 0.0166, "epoch": 4.592948538861261, "grad_norm": 0.08550915122032166, "learning_rate": 1.2015794080944665e-05, "step": 82200 }, { "embedding_loss": 0.0142, "epoch": 4.595742303179303, "grad_norm": 0.07786401361227036, "learning_rate": 1.200958571579346e-05, "step": 82250 }, { "embedding_loss": 0.0175, "epoch": 4.598536067497346, "grad_norm": 0.0943010076880455, "learning_rate": 1.2003377350642256e-05, "step": 82300 }, { "embedding_loss": 0.0167, "epoch": 4.601329831815388, "grad_norm": 0.05855938792228699, "learning_rate": 1.1997168985491052e-05, "step": 82350 }, { "embedding_loss": 0.0161, "epoch": 4.60412359613343, "grad_norm": 0.05475907400250435, "learning_rate": 1.1990960620339847e-05, "step": 82400 }, { "embedding_loss": 0.0169, "epoch": 4.606917360451472, "grad_norm": 0.05655381828546524, "learning_rate": 1.1984752255188643e-05, "step": 82450 }, { "embedding_loss": 0.0177, "epoch": 4.609711124769515, "grad_norm": 0.47187140583992004, "learning_rate": 1.1978543890037437e-05, "step": 82500 }, { "embedding_loss": 0.0162, "epoch": 4.612504889087557, "grad_norm": 0.0888262614607811, "learning_rate": 1.1972335524886232e-05, "step": 82550 }, { "embedding_loss": 0.0156, "epoch": 4.615298653405599, "grad_norm": 0.07674894481897354, "learning_rate": 1.1966127159735028e-05, "step": 82600 }, { "embedding_loss": 0.0159, "epoch": 4.618092417723641, "grad_norm": 0.07480861991643906, "learning_rate": 1.1959918794583823e-05, "step": 82650 }, { "embedding_loss": 0.0179, "epoch": 4.620886182041683, "grad_norm": 0.0883016362786293, "learning_rate": 1.1953710429432619e-05, "step": 82700 }, { "embedding_loss": 0.0162, "epoch": 4.623679946359725, "grad_norm": 0.05029178410768509, "learning_rate": 1.1947502064281413e-05, "step": 82750 }, { "embedding_loss": 0.0167, "epoch": 4.626473710677767, "grad_norm": 0.13558712601661682, "learning_rate": 1.194129369913021e-05, "step": 82800 }, { "embedding_loss": 0.0146, "epoch": 4.629267474995809, "grad_norm": 0.07936955243349075, "learning_rate": 1.1935085333979004e-05, "step": 82850 }, { "embedding_loss": 0.0167, "epoch": 4.632061239313852, "grad_norm": 0.08140017092227936, "learning_rate": 1.19288769688278e-05, "step": 82900 }, { "embedding_loss": 0.0148, "epoch": 4.634855003631894, "grad_norm": 0.25676438212394714, "learning_rate": 1.1922668603676595e-05, "step": 82950 }, { "embedding_loss": 0.0161, "epoch": 4.637648767949936, "grad_norm": 0.05089529976248741, "learning_rate": 1.1916460238525388e-05, "step": 83000 }, { "embedding_loss": 0.0165, "epoch": 4.640442532267977, "grad_norm": 0.07779458910226822, "learning_rate": 1.1910251873374186e-05, "step": 83050 }, { "embedding_loss": 0.0173, "epoch": 4.64323629658602, "grad_norm": 0.07045720517635345, "learning_rate": 1.190404350822298e-05, "step": 83100 }, { "embedding_loss": 0.0152, "epoch": 4.646030060904062, "grad_norm": 0.08144287019968033, "learning_rate": 1.1897835143071777e-05, "step": 83150 }, { "embedding_loss": 0.0158, "epoch": 4.648823825222104, "grad_norm": 0.11142279952764511, "learning_rate": 1.189162677792057e-05, "step": 83200 }, { "embedding_loss": 0.016, "epoch": 4.651617589540146, "grad_norm": 0.12725767493247986, "learning_rate": 1.1885418412769368e-05, "step": 83250 }, { "embedding_loss": 0.0155, "epoch": 4.654411353858189, "grad_norm": 0.07215648144483566, "learning_rate": 1.1879210047618162e-05, "step": 83300 }, { "embedding_loss": 0.0165, "epoch": 4.657205118176231, "grad_norm": 0.11462417989969254, "learning_rate": 1.1873001682466959e-05, "step": 83350 }, { "embedding_loss": 0.0161, "epoch": 4.659998882494273, "grad_norm": 0.08495666086673737, "learning_rate": 1.1866793317315753e-05, "step": 83400 }, { "embedding_loss": 0.0172, "epoch": 4.662792646812315, "grad_norm": 0.04092303663492203, "learning_rate": 1.1860584952164546e-05, "step": 83450 }, { "embedding_loss": 0.0168, "epoch": 4.665586411130357, "grad_norm": 0.08553148806095123, "learning_rate": 1.1854376587013344e-05, "step": 83500 }, { "embedding_loss": 0.0177, "epoch": 4.668380175448399, "grad_norm": 0.08766449242830276, "learning_rate": 1.1848168221862137e-05, "step": 83550 }, { "embedding_loss": 0.0151, "epoch": 4.671173939766441, "grad_norm": 0.058372437953948975, "learning_rate": 1.1841959856710935e-05, "step": 83600 }, { "embedding_loss": 0.0171, "epoch": 4.673967704084483, "grad_norm": 0.055435314774513245, "learning_rate": 1.1835751491559728e-05, "step": 83650 }, { "embedding_loss": 0.0153, "epoch": 4.676761468402526, "grad_norm": 0.04134651646018028, "learning_rate": 1.1829543126408524e-05, "step": 83700 }, { "embedding_loss": 0.016, "epoch": 4.679555232720568, "grad_norm": 0.04847203940153122, "learning_rate": 1.182333476125732e-05, "step": 83750 }, { "embedding_loss": 0.0158, "epoch": 4.68234899703861, "grad_norm": 0.07876107096672058, "learning_rate": 1.1817126396106113e-05, "step": 83800 }, { "embedding_loss": 0.017, "epoch": 4.685142761356652, "grad_norm": 0.04590310901403427, "learning_rate": 1.181091803095491e-05, "step": 83850 }, { "embedding_loss": 0.0156, "epoch": 4.687936525674694, "grad_norm": 0.04119446501135826, "learning_rate": 1.1804709665803704e-05, "step": 83900 }, { "embedding_loss": 0.018, "epoch": 4.690730289992736, "grad_norm": 0.06028982251882553, "learning_rate": 1.1798501300652502e-05, "step": 83950 }, { "embedding_loss": 0.0157, "epoch": 4.693524054310778, "grad_norm": 0.060894958674907684, "learning_rate": 1.1792292935501295e-05, "step": 84000 }, { "embedding_loss": 0.0161, "epoch": 4.69631781862882, "grad_norm": 0.05539907142519951, "learning_rate": 1.178608457035009e-05, "step": 84050 }, { "embedding_loss": 0.0159, "epoch": 4.699111582946863, "grad_norm": 0.045127350836992264, "learning_rate": 1.1779876205198886e-05, "step": 84100 }, { "embedding_loss": 0.0172, "epoch": 4.701905347264905, "grad_norm": 0.06414756923913956, "learning_rate": 1.1773667840047682e-05, "step": 84150 }, { "embedding_loss": 0.0159, "epoch": 4.704699111582947, "grad_norm": 0.08869428187608719, "learning_rate": 1.1767459474896477e-05, "step": 84200 }, { "embedding_loss": 0.0164, "epoch": 4.707492875900989, "grad_norm": 0.07992931455373764, "learning_rate": 1.1761251109745271e-05, "step": 84250 }, { "embedding_loss": 0.0151, "epoch": 4.7102866402190315, "grad_norm": 0.0341317355632782, "learning_rate": 1.1755042744594067e-05, "step": 84300 }, { "embedding_loss": 0.0162, "epoch": 4.713080404537073, "grad_norm": 0.055489689111709595, "learning_rate": 1.1748834379442862e-05, "step": 84350 }, { "embedding_loss": 0.016, "epoch": 4.715874168855115, "grad_norm": 0.0774126946926117, "learning_rate": 1.1742626014291658e-05, "step": 84400 }, { "embedding_loss": 0.0166, "epoch": 4.718667933173157, "grad_norm": 0.06225760653614998, "learning_rate": 1.1736417649140453e-05, "step": 84450 }, { "embedding_loss": 0.016, "epoch": 4.7214616974912, "grad_norm": 0.06454392522573471, "learning_rate": 1.1730209283989249e-05, "step": 84500 }, { "embedding_loss": 0.017, "epoch": 4.724255461809242, "grad_norm": 0.04687155783176422, "learning_rate": 1.1724000918838043e-05, "step": 84550 }, { "embedding_loss": 0.0145, "epoch": 4.727049226127284, "grad_norm": 0.06152040511369705, "learning_rate": 1.1717792553686838e-05, "step": 84600 }, { "embedding_loss": 0.0148, "epoch": 4.729842990445326, "grad_norm": 0.04747496545314789, "learning_rate": 1.1711584188535634e-05, "step": 84650 }, { "embedding_loss": 0.0163, "epoch": 4.7326367547633685, "grad_norm": 0.04304226487874985, "learning_rate": 1.1705375823384429e-05, "step": 84700 }, { "embedding_loss": 0.015, "epoch": 4.73543051908141, "grad_norm": 0.07236947864294052, "learning_rate": 1.1699167458233225e-05, "step": 84750 }, { "embedding_loss": 0.016, "epoch": 4.738224283399452, "grad_norm": 0.09149173647165298, "learning_rate": 1.1692959093082018e-05, "step": 84800 }, { "embedding_loss": 0.0167, "epoch": 4.741018047717494, "grad_norm": 0.07795878499746323, "learning_rate": 1.1686750727930816e-05, "step": 84850 }, { "embedding_loss": 0.0151, "epoch": 4.743811812035537, "grad_norm": 0.06309416890144348, "learning_rate": 1.168054236277961e-05, "step": 84900 }, { "embedding_loss": 0.0159, "epoch": 4.746605576353579, "grad_norm": 0.040282152593135834, "learning_rate": 1.1674333997628407e-05, "step": 84950 }, { "embedding_loss": 0.016, "epoch": 4.749399340671621, "grad_norm": 0.09220372140407562, "learning_rate": 1.16681256324772e-05, "step": 85000 }, { "embedding_loss": 0.0142, "epoch": 4.752193104989663, "grad_norm": 0.10688124597072601, "learning_rate": 1.1661917267325994e-05, "step": 85050 }, { "embedding_loss": 0.0164, "epoch": 4.7549868693077055, "grad_norm": 0.11524878442287445, "learning_rate": 1.1655708902174792e-05, "step": 85100 }, { "embedding_loss": 0.0182, "epoch": 4.757780633625748, "grad_norm": 0.04111326113343239, "learning_rate": 1.1649500537023585e-05, "step": 85150 }, { "embedding_loss": 0.017, "epoch": 4.760574397943789, "grad_norm": 0.039056446403265, "learning_rate": 1.1643292171872383e-05, "step": 85200 }, { "embedding_loss": 0.0175, "epoch": 4.763368162261831, "grad_norm": 0.23826438188552856, "learning_rate": 1.1637083806721176e-05, "step": 85250 }, { "embedding_loss": 0.0153, "epoch": 4.766161926579874, "grad_norm": 0.05441135913133621, "learning_rate": 1.1630875441569974e-05, "step": 85300 }, { "embedding_loss": 0.017, "epoch": 4.768955690897916, "grad_norm": 0.12558680772781372, "learning_rate": 1.1624667076418767e-05, "step": 85350 }, { "embedding_loss": 0.015, "epoch": 4.771749455215958, "grad_norm": 0.06760746985673904, "learning_rate": 1.1618458711267565e-05, "step": 85400 }, { "embedding_loss": 0.0155, "epoch": 4.774543219534, "grad_norm": 0.08541610836982727, "learning_rate": 1.1612250346116358e-05, "step": 85450 }, { "embedding_loss": 0.0177, "epoch": 4.7773369838520425, "grad_norm": 0.04302240535616875, "learning_rate": 1.1606041980965152e-05, "step": 85500 }, { "embedding_loss": 0.0164, "epoch": 4.780130748170084, "grad_norm": 0.0739663690328598, "learning_rate": 1.159983361581395e-05, "step": 85550 }, { "embedding_loss": 0.0143, "epoch": 4.782924512488126, "grad_norm": 0.03223133459687233, "learning_rate": 1.1593625250662743e-05, "step": 85600 }, { "embedding_loss": 0.0162, "epoch": 4.785718276806168, "grad_norm": 0.09162987023591995, "learning_rate": 1.158741688551154e-05, "step": 85650 }, { "embedding_loss": 0.0171, "epoch": 4.788512041124211, "grad_norm": 0.0821610614657402, "learning_rate": 1.1581208520360334e-05, "step": 85700 }, { "embedding_loss": 0.0164, "epoch": 4.791305805442253, "grad_norm": 0.07440923899412155, "learning_rate": 1.157500015520913e-05, "step": 85750 }, { "embedding_loss": 0.0156, "epoch": 4.794099569760295, "grad_norm": 0.11659399420022964, "learning_rate": 1.1568791790057925e-05, "step": 85800 }, { "embedding_loss": 0.0157, "epoch": 4.796893334078337, "grad_norm": 0.12041584402322769, "learning_rate": 1.156258342490672e-05, "step": 85850 }, { "embedding_loss": 0.016, "epoch": 4.7996870983963795, "grad_norm": 0.052562128752470016, "learning_rate": 1.1556375059755516e-05, "step": 85900 }, { "embedding_loss": 0.0156, "epoch": 4.802480862714422, "grad_norm": 0.06858882308006287, "learning_rate": 1.155016669460431e-05, "step": 85950 }, { "embedding_loss": 0.0169, "epoch": 4.805274627032464, "grad_norm": 0.08864005655050278, "learning_rate": 1.1543958329453106e-05, "step": 86000 }, { "embedding_loss": 0.0149, "epoch": 4.808068391350505, "grad_norm": 0.04079374298453331, "learning_rate": 1.1537749964301901e-05, "step": 86050 }, { "embedding_loss": 0.0146, "epoch": 4.810862155668548, "grad_norm": 0.08795464783906937, "learning_rate": 1.1531541599150697e-05, "step": 86100 }, { "embedding_loss": 0.014, "epoch": 4.81365591998659, "grad_norm": 0.059991005808115005, "learning_rate": 1.1525333233999492e-05, "step": 86150 }, { "embedding_loss": 0.0145, "epoch": 4.816449684304632, "grad_norm": 0.04550354182720184, "learning_rate": 1.1519124868848288e-05, "step": 86200 }, { "embedding_loss": 0.0167, "epoch": 4.819243448622674, "grad_norm": 0.02800612896680832, "learning_rate": 1.1512916503697083e-05, "step": 86250 }, { "embedding_loss": 0.016, "epoch": 4.8220372129407165, "grad_norm": 0.05703519284725189, "learning_rate": 1.1506708138545877e-05, "step": 86300 }, { "embedding_loss": 0.0151, "epoch": 4.824830977258759, "grad_norm": 0.30840766429901123, "learning_rate": 1.1500499773394673e-05, "step": 86350 }, { "embedding_loss": 0.0161, "epoch": 4.8276247415768, "grad_norm": 0.05639660358428955, "learning_rate": 1.1494291408243468e-05, "step": 86400 }, { "embedding_loss": 0.0167, "epoch": 4.830418505894842, "grad_norm": 0.06158983334898949, "learning_rate": 1.1488083043092264e-05, "step": 86450 }, { "embedding_loss": 0.0181, "epoch": 4.833212270212885, "grad_norm": 0.08778326213359833, "learning_rate": 1.148187467794106e-05, "step": 86500 }, { "embedding_loss": 0.0141, "epoch": 4.836006034530927, "grad_norm": 0.08474594354629517, "learning_rate": 1.1475666312789855e-05, "step": 86550 }, { "embedding_loss": 0.0163, "epoch": 4.838799798848969, "grad_norm": 0.03059311956167221, "learning_rate": 1.1469457947638649e-05, "step": 86600 }, { "embedding_loss": 0.016, "epoch": 4.841593563167011, "grad_norm": 0.03569205105304718, "learning_rate": 1.1463249582487444e-05, "step": 86650 }, { "embedding_loss": 0.015, "epoch": 4.8443873274850535, "grad_norm": 0.3209797441959381, "learning_rate": 1.145704121733624e-05, "step": 86700 }, { "embedding_loss": 0.0152, "epoch": 4.847181091803096, "grad_norm": 0.08942540735006332, "learning_rate": 1.1450832852185035e-05, "step": 86750 }, { "embedding_loss": 0.0156, "epoch": 4.849974856121138, "grad_norm": 0.04483160376548767, "learning_rate": 1.144462448703383e-05, "step": 86800 }, { "embedding_loss": 0.0142, "epoch": 4.85276862043918, "grad_norm": 0.06087794527411461, "learning_rate": 1.1438416121882624e-05, "step": 86850 }, { "embedding_loss": 0.0155, "epoch": 4.855562384757222, "grad_norm": 0.11693094670772552, "learning_rate": 1.1432207756731422e-05, "step": 86900 }, { "embedding_loss": 0.0173, "epoch": 4.858356149075264, "grad_norm": 0.05042322725057602, "learning_rate": 1.1425999391580215e-05, "step": 86950 }, { "embedding_loss": 0.0171, "epoch": 4.861149913393306, "grad_norm": 0.11290831863880157, "learning_rate": 1.1419791026429013e-05, "step": 87000 }, { "embedding_loss": 0.0179, "epoch": 4.863943677711348, "grad_norm": 0.04760397970676422, "learning_rate": 1.1413582661277806e-05, "step": 87050 }, { "embedding_loss": 0.0156, "epoch": 4.8667374420293905, "grad_norm": 0.04509945958852768, "learning_rate": 1.14073742961266e-05, "step": 87100 }, { "embedding_loss": 0.0153, "epoch": 4.869531206347433, "grad_norm": 0.07312110811471939, "learning_rate": 1.1401165930975397e-05, "step": 87150 }, { "embedding_loss": 0.0161, "epoch": 4.872324970665475, "grad_norm": 0.04675517976284027, "learning_rate": 1.1394957565824191e-05, "step": 87200 }, { "embedding_loss": 0.0146, "epoch": 4.875118734983516, "grad_norm": 0.04758315160870552, "learning_rate": 1.1388749200672989e-05, "step": 87250 }, { "embedding_loss": 0.0165, "epoch": 4.877912499301559, "grad_norm": 2.1649484634399414, "learning_rate": 1.1382540835521782e-05, "step": 87300 }, { "embedding_loss": 0.0148, "epoch": 4.880706263619601, "grad_norm": 0.029610656201839447, "learning_rate": 1.137633247037058e-05, "step": 87350 }, { "embedding_loss": 0.0158, "epoch": 4.883500027937643, "grad_norm": 0.062485937029123306, "learning_rate": 1.1370124105219373e-05, "step": 87400 }, { "embedding_loss": 0.0147, "epoch": 4.886293792255685, "grad_norm": 0.06958562135696411, "learning_rate": 1.136391574006817e-05, "step": 87450 }, { "embedding_loss": 0.0153, "epoch": 4.8890875565737275, "grad_norm": 0.05534058064222336, "learning_rate": 1.1357707374916964e-05, "step": 87500 }, { "embedding_loss": 0.0136, "epoch": 4.89188132089177, "grad_norm": 0.1041051521897316, "learning_rate": 1.1351499009765758e-05, "step": 87550 }, { "embedding_loss": 0.0153, "epoch": 4.894675085209812, "grad_norm": 0.059199728071689606, "learning_rate": 1.1345290644614555e-05, "step": 87600 }, { "embedding_loss": 0.0159, "epoch": 4.897468849527854, "grad_norm": 0.04462113976478577, "learning_rate": 1.133908227946335e-05, "step": 87650 }, { "embedding_loss": 0.0164, "epoch": 4.9002626138458965, "grad_norm": 0.09946616739034653, "learning_rate": 1.1332873914312146e-05, "step": 87700 }, { "embedding_loss": 0.0157, "epoch": 4.903056378163938, "grad_norm": 0.07409241050481796, "learning_rate": 1.132666554916094e-05, "step": 87750 }, { "embedding_loss": 0.0133, "epoch": 4.90585014248198, "grad_norm": 0.08518587797880173, "learning_rate": 1.1320457184009736e-05, "step": 87800 }, { "embedding_loss": 0.0162, "epoch": 4.908643906800022, "grad_norm": 0.16306351125240326, "learning_rate": 1.1314248818858531e-05, "step": 87850 }, { "embedding_loss": 0.0142, "epoch": 4.9114376711180645, "grad_norm": 0.12218185514211655, "learning_rate": 1.1308040453707325e-05, "step": 87900 }, { "embedding_loss": 0.016, "epoch": 4.914231435436107, "grad_norm": 0.07847185432910919, "learning_rate": 1.1301832088556122e-05, "step": 87950 }, { "embedding_loss": 0.0158, "epoch": 4.917025199754149, "grad_norm": 0.07826220989227295, "learning_rate": 1.1295623723404916e-05, "step": 88000 }, { "embedding_loss": 0.0151, "epoch": 4.919818964072191, "grad_norm": 0.06350817531347275, "learning_rate": 1.1289415358253712e-05, "step": 88050 }, { "embedding_loss": 0.0167, "epoch": 4.922612728390233, "grad_norm": 0.05898492410778999, "learning_rate": 1.1283206993102507e-05, "step": 88100 }, { "embedding_loss": 0.0164, "epoch": 4.925406492708275, "grad_norm": 0.05880478397011757, "learning_rate": 1.1276998627951303e-05, "step": 88150 }, { "embedding_loss": 0.0161, "epoch": 4.928200257026317, "grad_norm": 0.6811013221740723, "learning_rate": 1.1270790262800098e-05, "step": 88200 }, { "embedding_loss": 0.0153, "epoch": 4.930994021344359, "grad_norm": 0.025427179411053658, "learning_rate": 1.1264581897648894e-05, "step": 88250 }, { "embedding_loss": 0.015, "epoch": 4.9337877856624015, "grad_norm": 0.04850493744015694, "learning_rate": 1.125837353249769e-05, "step": 88300 }, { "embedding_loss": 0.0146, "epoch": 4.936581549980444, "grad_norm": 0.036485180258750916, "learning_rate": 1.1252165167346483e-05, "step": 88350 }, { "embedding_loss": 0.0172, "epoch": 4.939375314298486, "grad_norm": 0.057537633925676346, "learning_rate": 1.1245956802195279e-05, "step": 88400 }, { "embedding_loss": 0.0155, "epoch": 4.942169078616528, "grad_norm": 0.05739140138030052, "learning_rate": 1.1239748437044074e-05, "step": 88450 }, { "embedding_loss": 0.0152, "epoch": 4.9449628429345704, "grad_norm": 0.05737254396080971, "learning_rate": 1.123354007189287e-05, "step": 88500 }, { "embedding_loss": 0.0156, "epoch": 4.947756607252612, "grad_norm": 0.021410031244158745, "learning_rate": 1.1227331706741665e-05, "step": 88550 }, { "embedding_loss": 0.0171, "epoch": 4.950550371570654, "grad_norm": 0.05457943305373192, "learning_rate": 1.122112334159046e-05, "step": 88600 }, { "embedding_loss": 0.0157, "epoch": 4.953344135888696, "grad_norm": 0.06628642231225967, "learning_rate": 1.1214914976439254e-05, "step": 88650 }, { "embedding_loss": 0.0165, "epoch": 4.9561379002067385, "grad_norm": 0.028495484963059425, "learning_rate": 1.120870661128805e-05, "step": 88700 }, { "embedding_loss": 0.0165, "epoch": 4.958931664524781, "grad_norm": 0.04415672644972801, "learning_rate": 1.1202498246136845e-05, "step": 88750 }, { "embedding_loss": 0.0165, "epoch": 4.961725428842823, "grad_norm": 0.03747834637761116, "learning_rate": 1.1196289880985641e-05, "step": 88800 }, { "embedding_loss": 0.0153, "epoch": 4.964519193160865, "grad_norm": 0.04223033785820007, "learning_rate": 1.1190081515834436e-05, "step": 88850 }, { "embedding_loss": 0.0141, "epoch": 4.967312957478907, "grad_norm": 0.057472798973321915, "learning_rate": 1.118387315068323e-05, "step": 88900 }, { "embedding_loss": 0.0156, "epoch": 4.970106721796949, "grad_norm": 0.045666515827178955, "learning_rate": 1.1177664785532028e-05, "step": 88950 }, { "embedding_loss": 0.0152, "epoch": 4.972900486114991, "grad_norm": 0.1038128137588501, "learning_rate": 1.1171456420380821e-05, "step": 89000 }, { "embedding_loss": 0.0157, "epoch": 4.975694250433033, "grad_norm": 0.09196501225233078, "learning_rate": 1.1165248055229619e-05, "step": 89050 }, { "embedding_loss": 0.0157, "epoch": 4.9784880147510755, "grad_norm": 0.03464238718152046, "learning_rate": 1.1159039690078412e-05, "step": 89100 }, { "embedding_loss": 0.0168, "epoch": 4.981281779069118, "grad_norm": 0.8400146961212158, "learning_rate": 1.1152831324927206e-05, "step": 89150 }, { "embedding_loss": 0.0163, "epoch": 4.98407554338716, "grad_norm": 0.04518213868141174, "learning_rate": 1.1146622959776003e-05, "step": 89200 }, { "embedding_loss": 0.0161, "epoch": 4.986869307705202, "grad_norm": 0.04030297324061394, "learning_rate": 1.1140414594624797e-05, "step": 89250 }, { "embedding_loss": 0.0142, "epoch": 4.989663072023244, "grad_norm": 0.09017229825258255, "learning_rate": 1.1134206229473594e-05, "step": 89300 }, { "embedding_loss": 0.0148, "epoch": 4.992456836341287, "grad_norm": 0.04734496399760246, "learning_rate": 1.1127997864322388e-05, "step": 89350 }, { "embedding_loss": 0.0167, "epoch": 4.995250600659328, "grad_norm": 0.09140326082706451, "learning_rate": 1.1121789499171185e-05, "step": 89400 }, { "embedding_loss": 0.017, "epoch": 4.99804436497737, "grad_norm": 0.08538486808538437, "learning_rate": 1.111558113401998e-05, "step": 89450 }, { "embedding_loss": 0.0149, "epoch": 5.0008381292954125, "grad_norm": 0.06004970148205757, "learning_rate": 1.1109372768868776e-05, "step": 89500 }, { "embedding_loss": 0.0143, "epoch": 5.003631893613455, "grad_norm": 0.040674839168787, "learning_rate": 1.110316440371757e-05, "step": 89550 }, { "embedding_loss": 0.0161, "epoch": 5.006425657931497, "grad_norm": 0.0652661994099617, "learning_rate": 1.1096956038566364e-05, "step": 89600 }, { "embedding_loss": 0.0146, "epoch": 5.009219422249539, "grad_norm": 0.07770649343729019, "learning_rate": 1.1090747673415161e-05, "step": 89650 }, { "embedding_loss": 0.0152, "epoch": 5.012013186567581, "grad_norm": 0.05440425127744675, "learning_rate": 1.1084539308263955e-05, "step": 89700 }, { "embedding_loss": 0.016, "epoch": 5.014806950885624, "grad_norm": 0.047864679247140884, "learning_rate": 1.1078330943112752e-05, "step": 89750 }, { "embedding_loss": 0.016, "epoch": 5.017600715203665, "grad_norm": 0.05374499782919884, "learning_rate": 1.1072122577961546e-05, "step": 89800 }, { "embedding_loss": 0.014, "epoch": 5.020394479521707, "grad_norm": 0.04659700766205788, "learning_rate": 1.1065914212810342e-05, "step": 89850 }, { "embedding_loss": 0.015, "epoch": 5.0231882438397495, "grad_norm": 0.08015410602092743, "learning_rate": 1.1059705847659137e-05, "step": 89900 }, { "embedding_loss": 0.0178, "epoch": 5.025982008157792, "grad_norm": 0.08702098578214645, "learning_rate": 1.1053497482507931e-05, "step": 89950 }, { "embedding_loss": 0.016, "epoch": 5.028775772475834, "grad_norm": 0.08179530501365662, "learning_rate": 1.1047289117356728e-05, "step": 90000 }, { "embedding_loss": 0.017, "epoch": 5.031569536793876, "grad_norm": 0.05700922757387161, "learning_rate": 1.1041080752205522e-05, "step": 90050 }, { "embedding_loss": 0.0157, "epoch": 5.034363301111918, "grad_norm": 0.10996080935001373, "learning_rate": 1.1034872387054318e-05, "step": 90100 }, { "embedding_loss": 0.0149, "epoch": 5.037157065429961, "grad_norm": 0.036482810974121094, "learning_rate": 1.1028664021903113e-05, "step": 90150 }, { "embedding_loss": 0.0151, "epoch": 5.039950829748003, "grad_norm": 0.06888780742883682, "learning_rate": 1.1022455656751909e-05, "step": 90200 }, { "embedding_loss": 0.0169, "epoch": 5.042744594066044, "grad_norm": 0.06221791356801987, "learning_rate": 1.1016247291600704e-05, "step": 90250 }, { "embedding_loss": 0.0143, "epoch": 5.0455383583840865, "grad_norm": 0.03854485973715782, "learning_rate": 1.10100389264495e-05, "step": 90300 }, { "embedding_loss": 0.0161, "epoch": 5.048332122702129, "grad_norm": 0.27463874220848083, "learning_rate": 1.1003830561298293e-05, "step": 90350 }, { "embedding_loss": 0.0157, "epoch": 5.051125887020171, "grad_norm": 0.03906189277768135, "learning_rate": 1.0997622196147089e-05, "step": 90400 }, { "embedding_loss": 0.0158, "epoch": 5.053919651338213, "grad_norm": 0.05959931015968323, "learning_rate": 1.0991413830995884e-05, "step": 90450 }, { "embedding_loss": 0.0139, "epoch": 5.056713415656255, "grad_norm": 0.04372347518801689, "learning_rate": 1.098520546584468e-05, "step": 90500 }, { "embedding_loss": 0.0142, "epoch": 5.059507179974298, "grad_norm": 0.06188967078924179, "learning_rate": 1.0978997100693476e-05, "step": 90550 }, { "embedding_loss": 0.0159, "epoch": 5.06230094429234, "grad_norm": 0.05990060791373253, "learning_rate": 1.0972788735542271e-05, "step": 90600 }, { "embedding_loss": 0.0172, "epoch": 5.065094708610381, "grad_norm": 0.11858756095170975, "learning_rate": 1.0966580370391067e-05, "step": 90650 }, { "embedding_loss": 0.0158, "epoch": 5.0678884729284235, "grad_norm": 0.08373738080263138, "learning_rate": 1.096037200523986e-05, "step": 90700 }, { "embedding_loss": 0.0158, "epoch": 5.070682237246466, "grad_norm": 0.07499847561120987, "learning_rate": 1.0954163640088656e-05, "step": 90750 }, { "embedding_loss": 0.0165, "epoch": 5.073476001564508, "grad_norm": 0.03891967982053757, "learning_rate": 1.0947955274937451e-05, "step": 90800 }, { "embedding_loss": 0.0165, "epoch": 5.07626976588255, "grad_norm": 0.09228294342756271, "learning_rate": 1.0941746909786247e-05, "step": 90850 }, { "embedding_loss": 0.0173, "epoch": 5.079063530200592, "grad_norm": 0.05076824501156807, "learning_rate": 1.0935538544635042e-05, "step": 90900 }, { "embedding_loss": 0.0154, "epoch": 5.081857294518635, "grad_norm": 0.056932829320430756, "learning_rate": 1.0929330179483836e-05, "step": 90950 }, { "embedding_loss": 0.0154, "epoch": 5.084651058836677, "grad_norm": 0.0507420115172863, "learning_rate": 1.0923121814332633e-05, "step": 91000 }, { "embedding_loss": 0.017, "epoch": 5.087444823154718, "grad_norm": 0.0652204379439354, "learning_rate": 1.0916913449181427e-05, "step": 91050 }, { "embedding_loss": 0.0156, "epoch": 5.0902385874727605, "grad_norm": 0.2834748923778534, "learning_rate": 1.0910705084030224e-05, "step": 91100 }, { "embedding_loss": 0.0173, "epoch": 5.093032351790803, "grad_norm": 0.0614762119948864, "learning_rate": 1.0904496718879018e-05, "step": 91150 }, { "embedding_loss": 0.0169, "epoch": 5.095826116108845, "grad_norm": 0.05106177181005478, "learning_rate": 1.0898288353727812e-05, "step": 91200 }, { "embedding_loss": 0.0155, "epoch": 5.098619880426887, "grad_norm": 0.09053416550159454, "learning_rate": 1.089207998857661e-05, "step": 91250 }, { "embedding_loss": 0.0163, "epoch": 5.101413644744929, "grad_norm": 0.10259773582220078, "learning_rate": 1.0885871623425403e-05, "step": 91300 }, { "embedding_loss": 0.0144, "epoch": 5.104207409062972, "grad_norm": 0.0576709508895874, "learning_rate": 1.08796632582742e-05, "step": 91350 }, { "embedding_loss": 0.015, "epoch": 5.107001173381014, "grad_norm": 0.09734645485877991, "learning_rate": 1.0873454893122994e-05, "step": 91400 }, { "embedding_loss": 0.017, "epoch": 5.109794937699056, "grad_norm": 0.13361917436122894, "learning_rate": 1.0867246527971791e-05, "step": 91450 }, { "embedding_loss": 0.0138, "epoch": 5.1125887020170975, "grad_norm": 0.04032347351312637, "learning_rate": 1.0861038162820585e-05, "step": 91500 }, { "embedding_loss": 0.0172, "epoch": 5.11538246633514, "grad_norm": 0.08210046589374542, "learning_rate": 1.0854829797669382e-05, "step": 91550 }, { "embedding_loss": 0.0159, "epoch": 5.118176230653182, "grad_norm": 0.04050448536872864, "learning_rate": 1.0848621432518176e-05, "step": 91600 }, { "embedding_loss": 0.0136, "epoch": 5.120969994971224, "grad_norm": 0.04431804269552231, "learning_rate": 1.084241306736697e-05, "step": 91650 }, { "embedding_loss": 0.0162, "epoch": 5.123763759289266, "grad_norm": 0.05799337103962898, "learning_rate": 1.0836204702215767e-05, "step": 91700 }, { "embedding_loss": 0.0167, "epoch": 5.126557523607309, "grad_norm": 0.05859442055225372, "learning_rate": 1.0829996337064561e-05, "step": 91750 }, { "embedding_loss": 0.0148, "epoch": 5.129351287925351, "grad_norm": 0.04649636894464493, "learning_rate": 1.0823787971913358e-05, "step": 91800 }, { "embedding_loss": 0.0149, "epoch": 5.132145052243393, "grad_norm": 0.04598875343799591, "learning_rate": 1.0817579606762152e-05, "step": 91850 }, { "embedding_loss": 0.0159, "epoch": 5.1349388165614345, "grad_norm": 0.06723975390195847, "learning_rate": 1.0811371241610948e-05, "step": 91900 }, { "embedding_loss": 0.0173, "epoch": 5.137732580879477, "grad_norm": 0.024851223453879356, "learning_rate": 1.0805162876459743e-05, "step": 91950 }, { "embedding_loss": 0.0154, "epoch": 5.140526345197519, "grad_norm": 0.05461699143052101, "learning_rate": 1.0798954511308537e-05, "step": 92000 }, { "embedding_loss": 0.0149, "epoch": 5.143320109515561, "grad_norm": 0.07281236350536346, "learning_rate": 1.0792746146157334e-05, "step": 92050 }, { "embedding_loss": 0.0133, "epoch": 5.146113873833603, "grad_norm": 0.047439444810152054, "learning_rate": 1.0786537781006128e-05, "step": 92100 }, { "embedding_loss": 0.0129, "epoch": 5.148907638151646, "grad_norm": 0.10424092411994934, "learning_rate": 1.0780329415854924e-05, "step": 92150 }, { "embedding_loss": 0.0183, "epoch": 5.151701402469688, "grad_norm": 0.06679093092679977, "learning_rate": 1.0774121050703719e-05, "step": 92200 }, { "embedding_loss": 0.0135, "epoch": 5.15449516678773, "grad_norm": 0.052083007991313934, "learning_rate": 1.0767912685552515e-05, "step": 92250 }, { "embedding_loss": 0.0156, "epoch": 5.1572889311057715, "grad_norm": 0.0750984251499176, "learning_rate": 1.076170432040131e-05, "step": 92300 }, { "embedding_loss": 0.0181, "epoch": 5.160082695423814, "grad_norm": 0.03514746204018593, "learning_rate": 1.0755495955250106e-05, "step": 92350 }, { "embedding_loss": 0.0151, "epoch": 5.162876459741856, "grad_norm": 0.07536029070615768, "learning_rate": 1.07492875900989e-05, "step": 92400 }, { "embedding_loss": 0.016, "epoch": 5.165670224059898, "grad_norm": 0.10701073706150055, "learning_rate": 1.0743079224947695e-05, "step": 92450 }, { "embedding_loss": 0.0149, "epoch": 5.16846398837794, "grad_norm": 0.06109026074409485, "learning_rate": 1.073687085979649e-05, "step": 92500 }, { "embedding_loss": 0.0153, "epoch": 5.171257752695983, "grad_norm": 0.05963483080267906, "learning_rate": 1.0730662494645286e-05, "step": 92550 }, { "embedding_loss": 0.0154, "epoch": 5.174051517014025, "grad_norm": 0.06164640560746193, "learning_rate": 1.0724454129494081e-05, "step": 92600 }, { "embedding_loss": 0.0147, "epoch": 5.176845281332067, "grad_norm": 0.046850621700286865, "learning_rate": 1.0718245764342877e-05, "step": 92650 }, { "embedding_loss": 0.0158, "epoch": 5.179639045650109, "grad_norm": 0.037290968000888824, "learning_rate": 1.0712037399191672e-05, "step": 92700 }, { "embedding_loss": 0.0157, "epoch": 5.182432809968151, "grad_norm": 0.05649701878428459, "learning_rate": 1.0705829034040466e-05, "step": 92750 }, { "embedding_loss": 0.0182, "epoch": 5.185226574286193, "grad_norm": 0.08650650084018707, "learning_rate": 1.0699620668889262e-05, "step": 92800 }, { "embedding_loss": 0.0156, "epoch": 5.188020338604235, "grad_norm": 0.05208244174718857, "learning_rate": 1.0693412303738057e-05, "step": 92850 }, { "embedding_loss": 0.0154, "epoch": 5.190814102922277, "grad_norm": 0.2358648031949997, "learning_rate": 1.0687203938586853e-05, "step": 92900 }, { "embedding_loss": 0.0178, "epoch": 5.19360786724032, "grad_norm": 0.06256962567567825, "learning_rate": 1.0680995573435648e-05, "step": 92950 }, { "embedding_loss": 0.0149, "epoch": 5.196401631558362, "grad_norm": 0.05258328840136528, "learning_rate": 1.0674787208284442e-05, "step": 93000 }, { "embedding_loss": 0.0145, "epoch": 5.199195395876404, "grad_norm": 0.06209081783890724, "learning_rate": 1.066857884313324e-05, "step": 93050 }, { "embedding_loss": 0.0157, "epoch": 5.201989160194446, "grad_norm": 0.06745569407939911, "learning_rate": 1.0662370477982033e-05, "step": 93100 }, { "embedding_loss": 0.0167, "epoch": 5.204782924512488, "grad_norm": 1.6964589357376099, "learning_rate": 1.065616211283083e-05, "step": 93150 }, { "embedding_loss": 0.016, "epoch": 5.20757668883053, "grad_norm": 0.03966876119375229, "learning_rate": 1.0649953747679624e-05, "step": 93200 }, { "embedding_loss": 0.0149, "epoch": 5.210370453148572, "grad_norm": 0.03473056107759476, "learning_rate": 1.0643745382528418e-05, "step": 93250 }, { "embedding_loss": 0.0149, "epoch": 5.213164217466614, "grad_norm": 0.09497449547052383, "learning_rate": 1.0637537017377215e-05, "step": 93300 }, { "embedding_loss": 0.0159, "epoch": 5.215957981784657, "grad_norm": 0.1981734037399292, "learning_rate": 1.0631328652226009e-05, "step": 93350 }, { "embedding_loss": 0.0163, "epoch": 5.218751746102699, "grad_norm": 0.26169273257255554, "learning_rate": 1.0625120287074806e-05, "step": 93400 }, { "embedding_loss": 0.0142, "epoch": 5.221545510420741, "grad_norm": 0.050725992769002914, "learning_rate": 1.06189119219236e-05, "step": 93450 }, { "embedding_loss": 0.0152, "epoch": 5.224339274738783, "grad_norm": 0.07805200666189194, "learning_rate": 1.0612703556772397e-05, "step": 93500 }, { "embedding_loss": 0.0169, "epoch": 5.227133039056826, "grad_norm": 0.08784621208906174, "learning_rate": 1.0606495191621191e-05, "step": 93550 }, { "embedding_loss": 0.0168, "epoch": 5.229926803374867, "grad_norm": 0.06918930262327194, "learning_rate": 1.0600286826469988e-05, "step": 93600 }, { "embedding_loss": 0.016, "epoch": 5.232720567692909, "grad_norm": 0.03698456287384033, "learning_rate": 1.0594078461318782e-05, "step": 93650 }, { "embedding_loss": 0.015, "epoch": 5.235514332010951, "grad_norm": 0.1537168323993683, "learning_rate": 1.0587870096167576e-05, "step": 93700 }, { "embedding_loss": 0.0162, "epoch": 5.238308096328994, "grad_norm": 0.06485524773597717, "learning_rate": 1.0581661731016373e-05, "step": 93750 }, { "embedding_loss": 0.0174, "epoch": 5.241101860647036, "grad_norm": 0.08967548608779907, "learning_rate": 1.0575453365865167e-05, "step": 93800 }, { "embedding_loss": 0.0166, "epoch": 5.243895624965078, "grad_norm": 0.0509321391582489, "learning_rate": 1.0569245000713964e-05, "step": 93850 }, { "embedding_loss": 0.0155, "epoch": 5.24668938928312, "grad_norm": 0.04130686819553375, "learning_rate": 1.0563036635562758e-05, "step": 93900 }, { "embedding_loss": 0.0158, "epoch": 5.249483153601163, "grad_norm": 0.09664051979780197, "learning_rate": 1.0556828270411554e-05, "step": 93950 }, { "embedding_loss": 0.0165, "epoch": 5.252276917919204, "grad_norm": 0.04659804329276085, "learning_rate": 1.0550619905260349e-05, "step": 94000 }, { "embedding_loss": 0.0172, "epoch": 5.255070682237246, "grad_norm": 0.07526962459087372, "learning_rate": 1.0544411540109143e-05, "step": 94050 }, { "embedding_loss": 0.0154, "epoch": 5.257864446555288, "grad_norm": 0.05522993206977844, "learning_rate": 1.053820317495794e-05, "step": 94100 }, { "embedding_loss": 0.016, "epoch": 5.260658210873331, "grad_norm": 0.09104318171739578, "learning_rate": 1.0531994809806734e-05, "step": 94150 }, { "embedding_loss": 0.0153, "epoch": 5.263451975191373, "grad_norm": 0.04526481777429581, "learning_rate": 1.052578644465553e-05, "step": 94200 }, { "embedding_loss": 0.0163, "epoch": 5.266245739509415, "grad_norm": 0.06631246209144592, "learning_rate": 1.0519578079504325e-05, "step": 94250 }, { "embedding_loss": 0.0167, "epoch": 5.269039503827457, "grad_norm": 0.065740667283535, "learning_rate": 1.051336971435312e-05, "step": 94300 }, { "embedding_loss": 0.0151, "epoch": 5.2718332681455, "grad_norm": 0.06833446025848389, "learning_rate": 1.0507161349201916e-05, "step": 94350 }, { "embedding_loss": 0.0181, "epoch": 5.274627032463542, "grad_norm": 0.2738802433013916, "learning_rate": 1.0500952984050711e-05, "step": 94400 }, { "embedding_loss": 0.0163, "epoch": 5.277420796781583, "grad_norm": 0.07120973616838455, "learning_rate": 1.0494744618899505e-05, "step": 94450 }, { "embedding_loss": 0.0141, "epoch": 5.280214561099625, "grad_norm": 0.09698312729597092, "learning_rate": 1.04885362537483e-05, "step": 94500 }, { "embedding_loss": 0.0177, "epoch": 5.283008325417668, "grad_norm": 0.820493757724762, "learning_rate": 1.0482327888597096e-05, "step": 94550 }, { "embedding_loss": 0.0151, "epoch": 5.28580208973571, "grad_norm": 0.06611043959856033, "learning_rate": 1.0476119523445892e-05, "step": 94600 }, { "embedding_loss": 0.0185, "epoch": 5.288595854053752, "grad_norm": 0.07692337781190872, "learning_rate": 1.0469911158294687e-05, "step": 94650 }, { "embedding_loss": 0.0166, "epoch": 5.291389618371794, "grad_norm": 0.7415544986724854, "learning_rate": 1.0463702793143483e-05, "step": 94700 }, { "embedding_loss": 0.0152, "epoch": 5.294183382689837, "grad_norm": 0.04513034597039223, "learning_rate": 1.0457494427992278e-05, "step": 94750 }, { "embedding_loss": 0.0148, "epoch": 5.296977147007879, "grad_norm": 0.05909308046102524, "learning_rate": 1.0451286062841072e-05, "step": 94800 }, { "embedding_loss": 0.0163, "epoch": 5.29977091132592, "grad_norm": 0.09354628622531891, "learning_rate": 1.0445077697689868e-05, "step": 94850 }, { "embedding_loss": 0.0157, "epoch": 5.302564675643962, "grad_norm": 0.07116477936506271, "learning_rate": 1.0438869332538663e-05, "step": 94900 }, { "embedding_loss": 0.013, "epoch": 5.305358439962005, "grad_norm": 0.03294273465871811, "learning_rate": 1.0432660967387459e-05, "step": 94950 }, { "embedding_loss": 0.0177, "epoch": 5.308152204280047, "grad_norm": 0.07885225117206573, "learning_rate": 1.0426452602236254e-05, "step": 95000 }, { "embedding_loss": 0.0151, "epoch": 5.310945968598089, "grad_norm": 0.06741628795862198, "learning_rate": 1.0420244237085048e-05, "step": 95050 }, { "embedding_loss": 0.0166, "epoch": 5.313739732916131, "grad_norm": 0.062811940908432, "learning_rate": 1.0414035871933845e-05, "step": 95100 }, { "embedding_loss": 0.0165, "epoch": 5.316533497234174, "grad_norm": 0.03716414421796799, "learning_rate": 1.0407827506782639e-05, "step": 95150 }, { "embedding_loss": 0.0148, "epoch": 5.319327261552216, "grad_norm": 0.06313669681549072, "learning_rate": 1.0401619141631436e-05, "step": 95200 }, { "embedding_loss": 0.0159, "epoch": 5.322121025870258, "grad_norm": 0.056762631982564926, "learning_rate": 1.039541077648023e-05, "step": 95250 }, { "embedding_loss": 0.0165, "epoch": 5.324914790188299, "grad_norm": 0.11099851876497269, "learning_rate": 1.0389202411329024e-05, "step": 95300 }, { "embedding_loss": 0.0168, "epoch": 5.327708554506342, "grad_norm": 0.07540174573659897, "learning_rate": 1.0382994046177821e-05, "step": 95350 }, { "embedding_loss": 0.0145, "epoch": 5.330502318824384, "grad_norm": 0.05014439672231674, "learning_rate": 1.0376785681026615e-05, "step": 95400 }, { "embedding_loss": 0.0162, "epoch": 5.333296083142426, "grad_norm": 0.07147827744483948, "learning_rate": 1.0370577315875412e-05, "step": 95450 }, { "embedding_loss": 0.0148, "epoch": 5.336089847460468, "grad_norm": 0.08208739012479782, "learning_rate": 1.0364368950724206e-05, "step": 95500 }, { "embedding_loss": 0.0153, "epoch": 5.338883611778511, "grad_norm": 0.16903583705425262, "learning_rate": 1.0358160585573003e-05, "step": 95550 }, { "embedding_loss": 0.0156, "epoch": 5.341677376096553, "grad_norm": 0.04609150066971779, "learning_rate": 1.0351952220421797e-05, "step": 95600 }, { "embedding_loss": 0.0146, "epoch": 5.344471140414595, "grad_norm": 0.049917321652173996, "learning_rate": 1.0345743855270594e-05, "step": 95650 }, { "embedding_loss": 0.014, "epoch": 5.347264904732636, "grad_norm": 0.06373244524002075, "learning_rate": 1.0339535490119388e-05, "step": 95700 }, { "embedding_loss": 0.0161, "epoch": 5.350058669050679, "grad_norm": 0.07134360074996948, "learning_rate": 1.0333327124968182e-05, "step": 95750 }, { "embedding_loss": 0.016, "epoch": 5.352852433368721, "grad_norm": 0.032827045768499374, "learning_rate": 1.0327118759816979e-05, "step": 95800 }, { "embedding_loss": 0.016, "epoch": 5.355646197686763, "grad_norm": 0.10427302867174149, "learning_rate": 1.0320910394665773e-05, "step": 95850 }, { "embedding_loss": 0.0146, "epoch": 5.358439962004805, "grad_norm": 0.05384469032287598, "learning_rate": 1.031470202951457e-05, "step": 95900 }, { "embedding_loss": 0.0158, "epoch": 5.361233726322848, "grad_norm": 0.048924319446086884, "learning_rate": 1.0308493664363364e-05, "step": 95950 }, { "embedding_loss": 0.0152, "epoch": 5.36402749064089, "grad_norm": 0.07664570957422256, "learning_rate": 1.030228529921216e-05, "step": 96000 }, { "embedding_loss": 0.015, "epoch": 5.366821254958932, "grad_norm": 0.040698885917663574, "learning_rate": 1.0296076934060955e-05, "step": 96050 }, { "embedding_loss": 0.0155, "epoch": 5.369615019276974, "grad_norm": 0.03694605827331543, "learning_rate": 1.0289868568909749e-05, "step": 96100 }, { "embedding_loss": 0.0155, "epoch": 5.372408783595016, "grad_norm": 0.05922548472881317, "learning_rate": 1.0283660203758546e-05, "step": 96150 }, { "embedding_loss": 0.0165, "epoch": 5.375202547913058, "grad_norm": 0.09288611263036728, "learning_rate": 1.027745183860734e-05, "step": 96200 }, { "embedding_loss": 0.0165, "epoch": 5.3779963122311, "grad_norm": 0.13849571347236633, "learning_rate": 1.0271243473456135e-05, "step": 96250 }, { "embedding_loss": 0.0164, "epoch": 5.380790076549142, "grad_norm": 0.03724781051278114, "learning_rate": 1.0265035108304931e-05, "step": 96300 }, { "embedding_loss": 0.0156, "epoch": 5.3835838408671846, "grad_norm": 0.0943017527461052, "learning_rate": 1.0258826743153726e-05, "step": 96350 }, { "embedding_loss": 0.0147, "epoch": 5.386377605185227, "grad_norm": 0.05005461722612381, "learning_rate": 1.0252618378002522e-05, "step": 96400 }, { "embedding_loss": 0.0158, "epoch": 5.389171369503269, "grad_norm": 0.07624027132987976, "learning_rate": 1.0246410012851317e-05, "step": 96450 }, { "embedding_loss": 0.0148, "epoch": 5.39196513382131, "grad_norm": 0.0681694969534874, "learning_rate": 1.0240201647700111e-05, "step": 96500 }, { "embedding_loss": 0.0147, "epoch": 5.394758898139353, "grad_norm": 0.03893858939409256, "learning_rate": 1.0233993282548907e-05, "step": 96550 }, { "embedding_loss": 0.0153, "epoch": 5.397552662457395, "grad_norm": 0.04285294562578201, "learning_rate": 1.0227784917397702e-05, "step": 96600 }, { "embedding_loss": 0.017, "epoch": 5.400346426775437, "grad_norm": 1.3456002473831177, "learning_rate": 1.0221576552246498e-05, "step": 96650 }, { "embedding_loss": 0.0173, "epoch": 5.403140191093479, "grad_norm": 0.0767652615904808, "learning_rate": 1.0215368187095293e-05, "step": 96700 }, { "embedding_loss": 0.017, "epoch": 5.4059339554115216, "grad_norm": 0.07762341946363449, "learning_rate": 1.0209159821944087e-05, "step": 96750 }, { "embedding_loss": 0.0149, "epoch": 5.408727719729564, "grad_norm": 0.04020615667104721, "learning_rate": 1.0202951456792884e-05, "step": 96800 }, { "embedding_loss": 0.0166, "epoch": 5.411521484047606, "grad_norm": 0.059502020478248596, "learning_rate": 1.0196743091641678e-05, "step": 96850 }, { "embedding_loss": 0.0163, "epoch": 5.414315248365648, "grad_norm": 0.07205372303724289, "learning_rate": 1.0190534726490474e-05, "step": 96900 }, { "embedding_loss": 0.0157, "epoch": 5.41710901268369, "grad_norm": 0.05349917337298393, "learning_rate": 1.018432636133927e-05, "step": 96950 }, { "embedding_loss": 0.0167, "epoch": 5.419902777001732, "grad_norm": 0.04318578168749809, "learning_rate": 1.0178117996188065e-05, "step": 97000 }, { "embedding_loss": 0.016, "epoch": 5.422696541319774, "grad_norm": 0.021379543468356133, "learning_rate": 1.017190963103686e-05, "step": 97050 }, { "embedding_loss": 0.0175, "epoch": 5.425490305637816, "grad_norm": 0.09314034134149551, "learning_rate": 1.0165701265885654e-05, "step": 97100 }, { "embedding_loss": 0.0153, "epoch": 5.4282840699558585, "grad_norm": 0.09744363278150558, "learning_rate": 1.0159492900734451e-05, "step": 97150 }, { "embedding_loss": 0.0156, "epoch": 5.431077834273901, "grad_norm": 0.06145336851477623, "learning_rate": 1.0153284535583245e-05, "step": 97200 }, { "embedding_loss": 0.0171, "epoch": 5.433871598591943, "grad_norm": 0.08606644719839096, "learning_rate": 1.0147076170432042e-05, "step": 97250 }, { "embedding_loss": 0.016, "epoch": 5.436665362909985, "grad_norm": 0.08077038824558258, "learning_rate": 1.0140867805280836e-05, "step": 97300 }, { "embedding_loss": 0.0143, "epoch": 5.439459127228027, "grad_norm": 0.09422262758016586, "learning_rate": 1.013465944012963e-05, "step": 97350 }, { "embedding_loss": 0.0151, "epoch": 5.442252891546069, "grad_norm": 0.06664010882377625, "learning_rate": 1.0128451074978427e-05, "step": 97400 }, { "embedding_loss": 0.0137, "epoch": 5.445046655864111, "grad_norm": 0.0781770721077919, "learning_rate": 1.0122242709827221e-05, "step": 97450 }, { "embedding_loss": 0.0158, "epoch": 5.447840420182153, "grad_norm": 0.06077120825648308, "learning_rate": 1.0116034344676018e-05, "step": 97500 }, { "embedding_loss": 0.0165, "epoch": 5.4506341845001955, "grad_norm": 0.06399764865636826, "learning_rate": 1.0109825979524812e-05, "step": 97550 }, { "embedding_loss": 0.0158, "epoch": 5.453427948818238, "grad_norm": 0.04253954440355301, "learning_rate": 1.010361761437361e-05, "step": 97600 }, { "embedding_loss": 0.0157, "epoch": 5.45622171313628, "grad_norm": 0.07813206315040588, "learning_rate": 1.0097409249222403e-05, "step": 97650 }, { "embedding_loss": 0.0149, "epoch": 5.459015477454322, "grad_norm": 0.06559271365404129, "learning_rate": 1.00912008840712e-05, "step": 97700 }, { "embedding_loss": 0.0155, "epoch": 5.4618092417723645, "grad_norm": 0.046932291239500046, "learning_rate": 1.0084992518919994e-05, "step": 97750 }, { "embedding_loss": 0.0162, "epoch": 5.464603006090406, "grad_norm": 0.02986653335392475, "learning_rate": 1.0078784153768788e-05, "step": 97800 }, { "embedding_loss": 0.0153, "epoch": 5.467396770408448, "grad_norm": 0.06518872082233429, "learning_rate": 1.0072575788617585e-05, "step": 97850 }, { "embedding_loss": 0.0169, "epoch": 5.47019053472649, "grad_norm": 0.04042830318212509, "learning_rate": 1.0066367423466379e-05, "step": 97900 }, { "embedding_loss": 0.0155, "epoch": 5.4729842990445325, "grad_norm": 0.04810776561498642, "learning_rate": 1.0060159058315176e-05, "step": 97950 }, { "embedding_loss": 0.0147, "epoch": 5.475778063362575, "grad_norm": 0.08379826694726944, "learning_rate": 1.005395069316397e-05, "step": 98000 }, { "embedding_loss": 0.0162, "epoch": 5.478571827680617, "grad_norm": 0.12663666903972626, "learning_rate": 1.0047742328012765e-05, "step": 98050 }, { "embedding_loss": 0.0155, "epoch": 5.481365591998659, "grad_norm": 0.05131833255290985, "learning_rate": 1.0041533962861561e-05, "step": 98100 }, { "embedding_loss": 0.0167, "epoch": 5.4841593563167015, "grad_norm": 0.05770955607295036, "learning_rate": 1.0035325597710355e-05, "step": 98150 }, { "embedding_loss": 0.0167, "epoch": 5.486953120634743, "grad_norm": 0.051694005727767944, "learning_rate": 1.0029117232559152e-05, "step": 98200 }, { "embedding_loss": 0.0161, "epoch": 5.489746884952785, "grad_norm": 0.07899698615074158, "learning_rate": 1.0022908867407946e-05, "step": 98250 }, { "embedding_loss": 0.0148, "epoch": 5.492540649270827, "grad_norm": 0.06934590637683868, "learning_rate": 1.0016700502256741e-05, "step": 98300 }, { "embedding_loss": 0.0156, "epoch": 5.4953344135888695, "grad_norm": 0.057976506650447845, "learning_rate": 1.0010492137105537e-05, "step": 98350 }, { "embedding_loss": 0.0143, "epoch": 5.498128177906912, "grad_norm": 0.08699237555265427, "learning_rate": 1.0004283771954332e-05, "step": 98400 }, { "embedding_loss": 0.0158, "epoch": 5.500921942224954, "grad_norm": 0.1759660243988037, "learning_rate": 9.998075406803128e-06, "step": 98450 }, { "embedding_loss": 0.0157, "epoch": 5.503715706542996, "grad_norm": 0.057625915855169296, "learning_rate": 9.991867041651923e-06, "step": 98500 }, { "embedding_loss": 0.0158, "epoch": 5.5065094708610385, "grad_norm": 0.05510551482439041, "learning_rate": 9.985658676500717e-06, "step": 98550 }, { "embedding_loss": 0.0152, "epoch": 5.509303235179081, "grad_norm": 0.04085443541407585, "learning_rate": 9.979450311349513e-06, "step": 98600 }, { "embedding_loss": 0.0161, "epoch": 5.512096999497122, "grad_norm": 0.050827570259571075, "learning_rate": 9.973241946198308e-06, "step": 98650 }, { "embedding_loss": 0.0158, "epoch": 5.514890763815164, "grad_norm": 0.07945003360509872, "learning_rate": 9.967033581047104e-06, "step": 98700 }, { "embedding_loss": 0.0163, "epoch": 5.5176845281332065, "grad_norm": 0.08681822568178177, "learning_rate": 9.9608252158959e-06, "step": 98750 }, { "embedding_loss": 0.0165, "epoch": 5.520478292451249, "grad_norm": 0.0710330531001091, "learning_rate": 9.954616850744693e-06, "step": 98800 }, { "embedding_loss": 0.015, "epoch": 5.523272056769291, "grad_norm": 0.07567067444324493, "learning_rate": 9.948408485593489e-06, "step": 98850 }, { "embedding_loss": 0.0158, "epoch": 5.526065821087333, "grad_norm": 0.048341915011405945, "learning_rate": 9.942200120442284e-06, "step": 98900 }, { "embedding_loss": 0.018, "epoch": 5.5288595854053755, "grad_norm": 0.05280967801809311, "learning_rate": 9.93599175529108e-06, "step": 98950 }, { "embedding_loss": 0.0164, "epoch": 5.531653349723418, "grad_norm": 0.08527275919914246, "learning_rate": 9.929783390139875e-06, "step": 99000 }, { "embedding_loss": 0.0162, "epoch": 5.534447114041459, "grad_norm": 0.06791257113218307, "learning_rate": 9.92357502498867e-06, "step": 99050 }, { "embedding_loss": 0.0145, "epoch": 5.537240878359501, "grad_norm": 0.034521616995334625, "learning_rate": 9.917366659837466e-06, "step": 99100 }, { "embedding_loss": 0.0165, "epoch": 5.5400346426775435, "grad_norm": 0.07993583381175995, "learning_rate": 9.91115829468626e-06, "step": 99150 }, { "embedding_loss": 0.0158, "epoch": 5.542828406995586, "grad_norm": 0.056182872503995895, "learning_rate": 9.904949929535055e-06, "step": 99200 }, { "embedding_loss": 0.0174, "epoch": 5.545622171313628, "grad_norm": 0.05506473779678345, "learning_rate": 9.898741564383851e-06, "step": 99250 }, { "embedding_loss": 0.0144, "epoch": 5.54841593563167, "grad_norm": 0.1017434298992157, "learning_rate": 9.892533199232646e-06, "step": 99300 }, { "embedding_loss": 0.0165, "epoch": 5.5512096999497125, "grad_norm": 0.05002304166555405, "learning_rate": 9.886324834081442e-06, "step": 99350 }, { "embedding_loss": 0.0146, "epoch": 5.554003464267755, "grad_norm": 0.07684843987226486, "learning_rate": 9.880116468930238e-06, "step": 99400 }, { "embedding_loss": 0.0156, "epoch": 5.556797228585797, "grad_norm": 0.04431593045592308, "learning_rate": 9.873908103779033e-06, "step": 99450 }, { "embedding_loss": 0.0169, "epoch": 5.559590992903838, "grad_norm": 0.023779423907399178, "learning_rate": 9.867699738627829e-06, "step": 99500 }, { "embedding_loss": 0.0167, "epoch": 5.5623847572218805, "grad_norm": 0.11598797887563705, "learning_rate": 9.861491373476622e-06, "step": 99550 }, { "embedding_loss": 0.0164, "epoch": 5.565178521539923, "grad_norm": 0.1564791053533554, "learning_rate": 9.855283008325418e-06, "step": 99600 }, { "embedding_loss": 0.0151, "epoch": 5.567972285857965, "grad_norm": 0.09342484176158905, "learning_rate": 9.849074643174213e-06, "step": 99650 }, { "embedding_loss": 0.0165, "epoch": 5.570766050176007, "grad_norm": 0.03713317587971687, "learning_rate": 9.842866278023009e-06, "step": 99700 }, { "embedding_loss": 0.017, "epoch": 5.5735598144940495, "grad_norm": 0.05288253724575043, "learning_rate": 9.836657912871804e-06, "step": 99750 }, { "embedding_loss": 0.0144, "epoch": 5.576353578812092, "grad_norm": 0.041654184460639954, "learning_rate": 9.8304495477206e-06, "step": 99800 }, { "embedding_loss": 0.016, "epoch": 5.579147343130133, "grad_norm": 0.03141145780682564, "learning_rate": 9.824241182569395e-06, "step": 99850 }, { "embedding_loss": 0.0168, "epoch": 5.581941107448175, "grad_norm": 0.05531300604343414, "learning_rate": 9.818032817418191e-06, "step": 99900 }, { "embedding_loss": 0.0152, "epoch": 5.5847348717662175, "grad_norm": 0.058415789157152176, "learning_rate": 9.811824452266986e-06, "step": 99950 }, { "embedding_loss": 0.0149, "epoch": 5.58752863608426, "grad_norm": 0.06668351590633392, "learning_rate": 9.80561608711578e-06, "step": 100000 }, { "embedding_loss": 0.0156, "epoch": 5.590322400402302, "grad_norm": 0.0655505582690239, "learning_rate": 9.799407721964576e-06, "step": 100050 }, { "embedding_loss": 0.0165, "epoch": 5.593116164720344, "grad_norm": 0.04853115603327751, "learning_rate": 9.793199356813371e-06, "step": 100100 }, { "embedding_loss": 0.0152, "epoch": 5.5959099290383865, "grad_norm": 0.07904411107301712, "learning_rate": 9.786990991662167e-06, "step": 100150 }, { "embedding_loss": 0.0157, "epoch": 5.598703693356429, "grad_norm": 0.21376940608024597, "learning_rate": 9.780782626510962e-06, "step": 100200 }, { "embedding_loss": 0.0153, "epoch": 5.601497457674471, "grad_norm": 0.03863833099603653, "learning_rate": 9.774574261359758e-06, "step": 100250 }, { "embedding_loss": 0.0172, "epoch": 5.604291221992513, "grad_norm": 0.054355863481760025, "learning_rate": 9.768365896208553e-06, "step": 100300 }, { "embedding_loss": 0.0159, "epoch": 5.6070849863105545, "grad_norm": 0.0639168918132782, "learning_rate": 9.762157531057347e-06, "step": 100350 }, { "embedding_loss": 0.0162, "epoch": 5.609878750628597, "grad_norm": 0.07354596257209778, "learning_rate": 9.755949165906143e-06, "step": 100400 }, { "embedding_loss": 0.0169, "epoch": 5.612672514946639, "grad_norm": 0.8064032196998596, "learning_rate": 9.749740800754938e-06, "step": 100450 }, { "embedding_loss": 0.0171, "epoch": 5.615466279264681, "grad_norm": 0.11467508971691132, "learning_rate": 9.743532435603734e-06, "step": 100500 }, { "embedding_loss": 0.0145, "epoch": 5.6182600435827235, "grad_norm": 0.051842499524354935, "learning_rate": 9.73732407045253e-06, "step": 100550 }, { "embedding_loss": 0.0179, "epoch": 5.621053807900766, "grad_norm": 0.07193715870380402, "learning_rate": 9.731115705301323e-06, "step": 100600 }, { "embedding_loss": 0.0158, "epoch": 5.623847572218808, "grad_norm": 0.05966987833380699, "learning_rate": 9.724907340150119e-06, "step": 100650 }, { "embedding_loss": 0.0145, "epoch": 5.626641336536849, "grad_norm": 0.03577367216348648, "learning_rate": 9.718698974998914e-06, "step": 100700 }, { "embedding_loss": 0.0155, "epoch": 5.6294351008548915, "grad_norm": 0.04335552453994751, "learning_rate": 9.71249060984771e-06, "step": 100750 }, { "embedding_loss": 0.0168, "epoch": 5.632228865172934, "grad_norm": 0.07316351681947708, "learning_rate": 9.706282244696505e-06, "step": 100800 }, { "embedding_loss": 0.0156, "epoch": 5.635022629490976, "grad_norm": 0.05413438007235527, "learning_rate": 9.700073879545299e-06, "step": 100850 }, { "embedding_loss": 0.0156, "epoch": 5.637816393809018, "grad_norm": 0.03375181183218956, "learning_rate": 9.693865514394094e-06, "step": 100900 }, { "embedding_loss": 0.0138, "epoch": 5.6406101581270605, "grad_norm": 0.043028540909290314, "learning_rate": 9.68765714924289e-06, "step": 100950 }, { "embedding_loss": 0.0172, "epoch": 5.643403922445103, "grad_norm": 0.03824777156114578, "learning_rate": 9.681448784091685e-06, "step": 101000 }, { "embedding_loss": 0.0168, "epoch": 5.646197686763145, "grad_norm": 0.03832137957215309, "learning_rate": 9.675240418940481e-06, "step": 101050 }, { "embedding_loss": 0.0162, "epoch": 5.648991451081187, "grad_norm": 0.11636210232973099, "learning_rate": 9.669032053789277e-06, "step": 101100 }, { "embedding_loss": 0.0153, "epoch": 5.651785215399229, "grad_norm": 0.025669284164905548, "learning_rate": 9.662823688638072e-06, "step": 101150 }, { "embedding_loss": 0.015, "epoch": 5.654578979717271, "grad_norm": 0.07291487604379654, "learning_rate": 9.656615323486866e-06, "step": 101200 }, { "embedding_loss": 0.0172, "epoch": 5.657372744035313, "grad_norm": 0.036202237010002136, "learning_rate": 9.650406958335661e-06, "step": 101250 }, { "embedding_loss": 0.0177, "epoch": 5.660166508353355, "grad_norm": 0.05650855600833893, "learning_rate": 9.644198593184457e-06, "step": 101300 }, { "embedding_loss": 0.0162, "epoch": 5.6629602726713975, "grad_norm": 0.04316619411110878, "learning_rate": 9.637990228033252e-06, "step": 101350 }, { "embedding_loss": 0.013, "epoch": 5.66575403698944, "grad_norm": 0.09185607731342316, "learning_rate": 9.631781862882048e-06, "step": 101400 }, { "embedding_loss": 0.0163, "epoch": 5.668547801307482, "grad_norm": 0.09788227826356888, "learning_rate": 9.625573497730843e-06, "step": 101450 }, { "embedding_loss": 0.0149, "epoch": 5.671341565625524, "grad_norm": 0.04542144387960434, "learning_rate": 9.619365132579639e-06, "step": 101500 }, { "embedding_loss": 0.0144, "epoch": 5.6741353299435655, "grad_norm": 0.062012460082769394, "learning_rate": 9.613156767428434e-06, "step": 101550 }, { "embedding_loss": 0.0151, "epoch": 5.676929094261608, "grad_norm": 0.04946340247988701, "learning_rate": 9.606948402277228e-06, "step": 101600 }, { "embedding_loss": 0.0152, "epoch": 5.67972285857965, "grad_norm": 0.020871104672551155, "learning_rate": 9.600740037126024e-06, "step": 101650 }, { "embedding_loss": 0.0178, "epoch": 5.682516622897692, "grad_norm": 0.059410955756902695, "learning_rate": 9.59453167197482e-06, "step": 101700 }, { "embedding_loss": 0.0154, "epoch": 5.6853103872157345, "grad_norm": 0.05543459579348564, "learning_rate": 9.588323306823615e-06, "step": 101750 }, { "embedding_loss": 0.0153, "epoch": 5.688104151533777, "grad_norm": 0.05587749928236008, "learning_rate": 9.58211494167241e-06, "step": 101800 }, { "embedding_loss": 0.0148, "epoch": 5.690897915851819, "grad_norm": 0.031140653416514397, "learning_rate": 9.575906576521206e-06, "step": 101850 }, { "embedding_loss": 0.0173, "epoch": 5.693691680169861, "grad_norm": 0.07330740988254547, "learning_rate": 9.569698211370001e-06, "step": 101900 }, { "embedding_loss": 0.0161, "epoch": 5.696485444487903, "grad_norm": 0.013740414753556252, "learning_rate": 9.563489846218797e-06, "step": 101950 }, { "embedding_loss": 0.0152, "epoch": 5.699279208805946, "grad_norm": 0.0744500383734703, "learning_rate": 9.557281481067592e-06, "step": 102000 }, { "embedding_loss": 0.0146, "epoch": 5.702072973123987, "grad_norm": 0.02889496646821499, "learning_rate": 9.551073115916386e-06, "step": 102050 }, { "embedding_loss": 0.0158, "epoch": 5.704866737442029, "grad_norm": 0.08042611181735992, "learning_rate": 9.544864750765182e-06, "step": 102100 }, { "embedding_loss": 0.0145, "epoch": 5.7076605017600714, "grad_norm": 0.06363100558519363, "learning_rate": 9.538656385613977e-06, "step": 102150 }, { "embedding_loss": 0.016, "epoch": 5.710454266078114, "grad_norm": 0.061320312321186066, "learning_rate": 9.532448020462773e-06, "step": 102200 }, { "embedding_loss": 0.0159, "epoch": 5.713248030396156, "grad_norm": 0.043268539011478424, "learning_rate": 9.526239655311568e-06, "step": 102250 }, { "embedding_loss": 0.0153, "epoch": 5.716041794714198, "grad_norm": 0.08508063852787018, "learning_rate": 9.520031290160364e-06, "step": 102300 }, { "embedding_loss": 0.014, "epoch": 5.71883555903224, "grad_norm": 0.049274176359176636, "learning_rate": 9.51382292500916e-06, "step": 102350 }, { "embedding_loss": 0.0155, "epoch": 5.721629323350282, "grad_norm": 0.05279829353094101, "learning_rate": 9.507614559857953e-06, "step": 102400 }, { "embedding_loss": 0.0159, "epoch": 5.724423087668324, "grad_norm": 0.054348334670066833, "learning_rate": 9.501406194706749e-06, "step": 102450 }, { "embedding_loss": 0.0147, "epoch": 5.727216851986366, "grad_norm": 0.04712050035595894, "learning_rate": 9.495197829555544e-06, "step": 102500 }, { "embedding_loss": 0.0152, "epoch": 5.730010616304408, "grad_norm": 0.06653130799531937, "learning_rate": 9.48898946440434e-06, "step": 102550 }, { "embedding_loss": 0.0158, "epoch": 5.732804380622451, "grad_norm": 0.04192822799086571, "learning_rate": 9.482781099253135e-06, "step": 102600 }, { "embedding_loss": 0.0163, "epoch": 5.735598144940493, "grad_norm": 0.07935847342014313, "learning_rate": 9.476572734101929e-06, "step": 102650 }, { "embedding_loss": 0.0152, "epoch": 5.738391909258535, "grad_norm": 0.053832899779081345, "learning_rate": 9.470364368950725e-06, "step": 102700 }, { "embedding_loss": 0.0165, "epoch": 5.741185673576577, "grad_norm": 0.04349770396947861, "learning_rate": 9.46415600379952e-06, "step": 102750 }, { "embedding_loss": 0.0159, "epoch": 5.74397943789462, "grad_norm": 0.1391909122467041, "learning_rate": 9.457947638648316e-06, "step": 102800 }, { "embedding_loss": 0.0166, "epoch": 5.746773202212661, "grad_norm": 0.045904312282800674, "learning_rate": 9.451739273497111e-06, "step": 102850 }, { "embedding_loss": 0.0169, "epoch": 5.749566966530703, "grad_norm": 0.07364467531442642, "learning_rate": 9.445530908345905e-06, "step": 102900 }, { "embedding_loss": 0.0171, "epoch": 5.752360730848745, "grad_norm": 0.05059810355305672, "learning_rate": 9.4393225431947e-06, "step": 102950 }, { "embedding_loss": 0.0153, "epoch": 5.755154495166788, "grad_norm": 0.03603336587548256, "learning_rate": 9.433114178043496e-06, "step": 103000 }, { "embedding_loss": 0.016, "epoch": 5.75794825948483, "grad_norm": 0.06496428698301315, "learning_rate": 9.426905812892291e-06, "step": 103050 }, { "embedding_loss": 0.0148, "epoch": 5.760742023802872, "grad_norm": 0.052661970257759094, "learning_rate": 9.420697447741087e-06, "step": 103100 }, { "embedding_loss": 0.0168, "epoch": 5.763535788120914, "grad_norm": 0.05237967148423195, "learning_rate": 9.414489082589882e-06, "step": 103150 }, { "embedding_loss": 0.0155, "epoch": 5.766329552438957, "grad_norm": 0.0499303936958313, "learning_rate": 9.408280717438678e-06, "step": 103200 }, { "embedding_loss": 0.0166, "epoch": 5.769123316756998, "grad_norm": 0.040169619023799896, "learning_rate": 9.402072352287472e-06, "step": 103250 }, { "embedding_loss": 0.0151, "epoch": 5.77191708107504, "grad_norm": 0.08143165707588196, "learning_rate": 9.395863987136267e-06, "step": 103300 }, { "embedding_loss": 0.0173, "epoch": 5.774710845393082, "grad_norm": 0.07904316484928131, "learning_rate": 9.389655621985063e-06, "step": 103350 }, { "embedding_loss": 0.0138, "epoch": 5.777504609711125, "grad_norm": 0.11068825423717499, "learning_rate": 9.383447256833858e-06, "step": 103400 }, { "embedding_loss": 0.0151, "epoch": 5.780298374029167, "grad_norm": 0.047108180820941925, "learning_rate": 9.377238891682654e-06, "step": 103450 }, { "embedding_loss": 0.0163, "epoch": 5.783092138347209, "grad_norm": 0.055089592933654785, "learning_rate": 9.37103052653145e-06, "step": 103500 }, { "embedding_loss": 0.0149, "epoch": 5.785885902665251, "grad_norm": 0.07989136874675751, "learning_rate": 9.364822161380245e-06, "step": 103550 }, { "embedding_loss": 0.0154, "epoch": 5.788679666983294, "grad_norm": 0.03635120019316673, "learning_rate": 9.35861379622904e-06, "step": 103600 }, { "embedding_loss": 0.0164, "epoch": 5.791473431301336, "grad_norm": 0.05390840768814087, "learning_rate": 9.352405431077834e-06, "step": 103650 }, { "embedding_loss": 0.016, "epoch": 5.794267195619377, "grad_norm": 0.035512253642082214, "learning_rate": 9.34619706592663e-06, "step": 103700 }, { "embedding_loss": 0.0159, "epoch": 5.797060959937419, "grad_norm": 0.05652906745672226, "learning_rate": 9.339988700775425e-06, "step": 103750 }, { "embedding_loss": 0.0171, "epoch": 5.799854724255462, "grad_norm": 0.05431665852665901, "learning_rate": 9.33378033562422e-06, "step": 103800 }, { "embedding_loss": 0.0162, "epoch": 5.802648488573504, "grad_norm": 0.06480461359024048, "learning_rate": 9.327571970473016e-06, "step": 103850 }, { "embedding_loss": 0.0159, "epoch": 5.805442252891546, "grad_norm": 0.11303944140672684, "learning_rate": 9.321363605321812e-06, "step": 103900 }, { "embedding_loss": 0.0163, "epoch": 5.808236017209588, "grad_norm": 0.044073790311813354, "learning_rate": 9.315155240170607e-06, "step": 103950 }, { "embedding_loss": 0.0165, "epoch": 5.811029781527631, "grad_norm": 0.05992767587304115, "learning_rate": 9.308946875019403e-06, "step": 104000 }, { "embedding_loss": 0.0154, "epoch": 5.813823545845673, "grad_norm": 0.0484279990196228, "learning_rate": 9.302738509868198e-06, "step": 104050 }, { "embedding_loss": 0.0146, "epoch": 5.816617310163714, "grad_norm": 0.06219727173447609, "learning_rate": 9.296530144716992e-06, "step": 104100 }, { "embedding_loss": 0.0167, "epoch": 5.819411074481756, "grad_norm": 0.06364631652832031, "learning_rate": 9.290321779565788e-06, "step": 104150 }, { "embedding_loss": 0.0158, "epoch": 5.822204838799799, "grad_norm": 0.04901329055428505, "learning_rate": 9.284113414414583e-06, "step": 104200 }, { "embedding_loss": 0.0176, "epoch": 5.824998603117841, "grad_norm": 0.06980184465646744, "learning_rate": 9.277905049263379e-06, "step": 104250 }, { "embedding_loss": 0.0158, "epoch": 5.827792367435883, "grad_norm": 0.07012127339839935, "learning_rate": 9.271696684112174e-06, "step": 104300 }, { "embedding_loss": 0.0156, "epoch": 5.830586131753925, "grad_norm": 0.03036240115761757, "learning_rate": 9.26548831896097e-06, "step": 104350 }, { "embedding_loss": 0.0133, "epoch": 5.833379896071968, "grad_norm": 0.03921573981642723, "learning_rate": 9.259279953809765e-06, "step": 104400 }, { "embedding_loss": 0.0171, "epoch": 5.83617366039001, "grad_norm": 0.07803326100111008, "learning_rate": 9.253071588658559e-06, "step": 104450 }, { "embedding_loss": 0.0166, "epoch": 5.838967424708052, "grad_norm": 0.04757678136229515, "learning_rate": 9.246863223507355e-06, "step": 104500 }, { "embedding_loss": 0.0182, "epoch": 5.841761189026093, "grad_norm": 0.08087670058012009, "learning_rate": 9.24065485835615e-06, "step": 104550 }, { "embedding_loss": 0.0158, "epoch": 5.844554953344136, "grad_norm": 0.04701922833919525, "learning_rate": 9.234446493204946e-06, "step": 104600 }, { "embedding_loss": 0.0164, "epoch": 5.847348717662178, "grad_norm": 0.05157835781574249, "learning_rate": 9.228238128053741e-06, "step": 104650 }, { "embedding_loss": 0.0155, "epoch": 5.85014248198022, "grad_norm": 0.04575811326503754, "learning_rate": 9.222029762902535e-06, "step": 104700 }, { "embedding_loss": 0.017, "epoch": 5.852936246298262, "grad_norm": 0.03679355978965759, "learning_rate": 9.21582139775133e-06, "step": 104750 }, { "embedding_loss": 0.0159, "epoch": 5.855730010616305, "grad_norm": 0.04346170276403427, "learning_rate": 9.209613032600126e-06, "step": 104800 }, { "embedding_loss": 0.0137, "epoch": 5.858523774934347, "grad_norm": 0.0342545360326767, "learning_rate": 9.203404667448921e-06, "step": 104850 }, { "embedding_loss": 0.0156, "epoch": 5.861317539252388, "grad_norm": 0.04222829267382622, "learning_rate": 9.197196302297717e-06, "step": 104900 }, { "embedding_loss": 0.0151, "epoch": 5.86411130357043, "grad_norm": 0.040028009563684464, "learning_rate": 9.19098793714651e-06, "step": 104950 }, { "embedding_loss": 0.0156, "epoch": 5.866905067888473, "grad_norm": 0.06355522572994232, "learning_rate": 9.184779571995306e-06, "step": 105000 }, { "embedding_loss": 0.0167, "epoch": 5.869698832206515, "grad_norm": 0.1320505440235138, "learning_rate": 9.178571206844102e-06, "step": 105050 }, { "embedding_loss": 0.0156, "epoch": 5.872492596524557, "grad_norm": 0.08957044780254364, "learning_rate": 9.172362841692897e-06, "step": 105100 }, { "embedding_loss": 0.0163, "epoch": 5.875286360842599, "grad_norm": 0.07053355872631073, "learning_rate": 9.166154476541693e-06, "step": 105150 }, { "embedding_loss": 0.0167, "epoch": 5.878080125160642, "grad_norm": 0.06330220401287079, "learning_rate": 9.159946111390488e-06, "step": 105200 }, { "embedding_loss": 0.0159, "epoch": 5.880873889478684, "grad_norm": 0.06338316202163696, "learning_rate": 9.153737746239284e-06, "step": 105250 }, { "embedding_loss": 0.0165, "epoch": 5.883667653796726, "grad_norm": 0.07357938587665558, "learning_rate": 9.147529381088078e-06, "step": 105300 }, { "embedding_loss": 0.017, "epoch": 5.886461418114768, "grad_norm": 0.05482756346464157, "learning_rate": 9.141321015936873e-06, "step": 105350 }, { "embedding_loss": 0.0162, "epoch": 5.88925518243281, "grad_norm": 0.038082998245954514, "learning_rate": 9.135112650785669e-06, "step": 105400 }, { "embedding_loss": 0.0154, "epoch": 5.892048946750852, "grad_norm": 0.059268705546855927, "learning_rate": 9.128904285634464e-06, "step": 105450 }, { "embedding_loss": 0.0159, "epoch": 5.894842711068894, "grad_norm": 0.058936137706041336, "learning_rate": 9.12269592048326e-06, "step": 105500 }, { "embedding_loss": 0.0156, "epoch": 5.897636475386936, "grad_norm": 0.04192807152867317, "learning_rate": 9.116487555332055e-06, "step": 105550 }, { "embedding_loss": 0.0155, "epoch": 5.900430239704979, "grad_norm": 0.035481881350278854, "learning_rate": 9.11027919018085e-06, "step": 105600 }, { "embedding_loss": 0.0154, "epoch": 5.903224004023021, "grad_norm": 0.046245962381362915, "learning_rate": 9.104070825029646e-06, "step": 105650 }, { "embedding_loss": 0.0167, "epoch": 5.906017768341063, "grad_norm": 0.06673267483711243, "learning_rate": 9.09786245987844e-06, "step": 105700 }, { "embedding_loss": 0.0153, "epoch": 5.908811532659104, "grad_norm": 0.06362496316432953, "learning_rate": 9.091654094727236e-06, "step": 105750 }, { "embedding_loss": 0.0146, "epoch": 5.911605296977147, "grad_norm": 0.06827598065137863, "learning_rate": 9.085445729576031e-06, "step": 105800 }, { "embedding_loss": 0.0156, "epoch": 5.914399061295189, "grad_norm": 0.052432917058467865, "learning_rate": 9.079237364424827e-06, "step": 105850 }, { "embedding_loss": 0.0172, "epoch": 5.917192825613231, "grad_norm": 0.04865510016679764, "learning_rate": 9.073028999273622e-06, "step": 105900 }, { "embedding_loss": 0.0169, "epoch": 5.919986589931273, "grad_norm": 0.0785992369055748, "learning_rate": 9.066820634122418e-06, "step": 105950 }, { "embedding_loss": 0.0142, "epoch": 5.922780354249316, "grad_norm": 0.05793536454439163, "learning_rate": 9.060612268971213e-06, "step": 106000 }, { "embedding_loss": 0.0167, "epoch": 5.925574118567358, "grad_norm": 0.06424162536859512, "learning_rate": 9.054403903820009e-06, "step": 106050 }, { "embedding_loss": 0.0149, "epoch": 5.9283678828854, "grad_norm": 0.05023278668522835, "learning_rate": 9.048195538668804e-06, "step": 106100 }, { "embedding_loss": 0.0159, "epoch": 5.931161647203442, "grad_norm": 0.04523177072405815, "learning_rate": 9.041987173517598e-06, "step": 106150 }, { "embedding_loss": 0.0164, "epoch": 5.9339554115214845, "grad_norm": 0.06717188656330109, "learning_rate": 9.035778808366394e-06, "step": 106200 }, { "embedding_loss": 0.017, "epoch": 5.936749175839526, "grad_norm": 0.052329760044813156, "learning_rate": 9.029570443215189e-06, "step": 106250 }, { "embedding_loss": 0.0148, "epoch": 5.939542940157568, "grad_norm": 0.04855384677648544, "learning_rate": 9.023362078063985e-06, "step": 106300 }, { "embedding_loss": 0.0167, "epoch": 5.94233670447561, "grad_norm": 0.06384146213531494, "learning_rate": 9.01715371291278e-06, "step": 106350 }, { "embedding_loss": 0.0148, "epoch": 5.945130468793653, "grad_norm": 0.04774003103375435, "learning_rate": 9.010945347761576e-06, "step": 106400 }, { "embedding_loss": 0.0172, "epoch": 5.947924233111695, "grad_norm": 0.059157997369766235, "learning_rate": 9.004736982610371e-06, "step": 106450 }, { "embedding_loss": 0.0156, "epoch": 5.950717997429737, "grad_norm": 0.06057243421673775, "learning_rate": 8.998528617459165e-06, "step": 106500 }, { "embedding_loss": 0.0179, "epoch": 5.953511761747779, "grad_norm": 0.06709274649620056, "learning_rate": 8.99232025230796e-06, "step": 106550 }, { "embedding_loss": 0.0145, "epoch": 5.956305526065821, "grad_norm": 0.04538804292678833, "learning_rate": 8.986111887156756e-06, "step": 106600 }, { "embedding_loss": 0.0164, "epoch": 5.959099290383863, "grad_norm": 0.0714077278971672, "learning_rate": 8.979903522005552e-06, "step": 106650 }, { "embedding_loss": 0.0157, "epoch": 5.961893054701905, "grad_norm": 0.08324045687913895, "learning_rate": 8.973695156854347e-06, "step": 106700 }, { "embedding_loss": 0.0159, "epoch": 5.964686819019947, "grad_norm": 0.02255346067249775, "learning_rate": 8.967486791703141e-06, "step": 106750 }, { "embedding_loss": 0.0156, "epoch": 5.96748058333799, "grad_norm": 0.058159757405519485, "learning_rate": 8.961278426551936e-06, "step": 106800 }, { "embedding_loss": 0.0159, "epoch": 5.970274347656032, "grad_norm": 0.050547562539577484, "learning_rate": 8.955070061400732e-06, "step": 106850 }, { "embedding_loss": 0.0156, "epoch": 5.973068111974074, "grad_norm": 0.10003169625997543, "learning_rate": 8.948861696249527e-06, "step": 106900 }, { "embedding_loss": 0.0162, "epoch": 5.975861876292116, "grad_norm": 0.030234837904572487, "learning_rate": 8.942653331098323e-06, "step": 106950 }, { "embedding_loss": 0.0161, "epoch": 5.9786556406101585, "grad_norm": 0.0573028028011322, "learning_rate": 8.936444965947117e-06, "step": 107000 }, { "embedding_loss": 0.0161, "epoch": 5.9814494049282, "grad_norm": 0.03465546295046806, "learning_rate": 8.930236600795912e-06, "step": 107050 }, { "embedding_loss": 0.0153, "epoch": 5.984243169246242, "grad_norm": 0.07231830805540085, "learning_rate": 8.924028235644708e-06, "step": 107100 }, { "embedding_loss": 0.0161, "epoch": 5.987036933564284, "grad_norm": 0.054215170443058014, "learning_rate": 8.917819870493503e-06, "step": 107150 }, { "embedding_loss": 0.0155, "epoch": 5.989830697882327, "grad_norm": 0.06423011422157288, "learning_rate": 8.911611505342299e-06, "step": 107200 }, { "embedding_loss": 0.0158, "epoch": 5.992624462200369, "grad_norm": 0.08137304335832596, "learning_rate": 8.905403140191094e-06, "step": 107250 }, { "embedding_loss": 0.0164, "epoch": 5.995418226518411, "grad_norm": 0.13593100011348724, "learning_rate": 8.89919477503989e-06, "step": 107300 }, { "embedding_loss": 0.0164, "epoch": 5.998211990836453, "grad_norm": 0.05986414849758148, "learning_rate": 8.892986409888684e-06, "step": 107350 }, { "embedding_loss": 0.0158, "epoch": 6.0010057551544955, "grad_norm": 0.07634907960891724, "learning_rate": 8.886778044737479e-06, "step": 107400 }, { "embedding_loss": 0.0142, "epoch": 6.003799519472537, "grad_norm": 0.08174613118171692, "learning_rate": 8.880569679586275e-06, "step": 107450 }, { "embedding_loss": 0.0152, "epoch": 6.006593283790579, "grad_norm": 0.04269128292798996, "learning_rate": 8.87436131443507e-06, "step": 107500 }, { "embedding_loss": 0.0169, "epoch": 6.009387048108621, "grad_norm": 0.03271391615271568, "learning_rate": 8.868152949283866e-06, "step": 107550 }, { "embedding_loss": 0.0147, "epoch": 6.012180812426664, "grad_norm": 0.07430098205804825, "learning_rate": 8.861944584132661e-06, "step": 107600 }, { "embedding_loss": 0.0166, "epoch": 6.014974576744706, "grad_norm": 0.08635520190000534, "learning_rate": 8.855736218981457e-06, "step": 107650 }, { "embedding_loss": 0.0157, "epoch": 6.017768341062748, "grad_norm": 0.0626230463385582, "learning_rate": 8.849527853830252e-06, "step": 107700 }, { "embedding_loss": 0.0136, "epoch": 6.02056210538079, "grad_norm": 0.0689324364066124, "learning_rate": 8.843319488679046e-06, "step": 107750 }, { "embedding_loss": 0.0178, "epoch": 6.0233558696988325, "grad_norm": 0.06722570955753326, "learning_rate": 8.837111123527842e-06, "step": 107800 }, { "embedding_loss": 0.0163, "epoch": 6.026149634016875, "grad_norm": 0.09286294132471085, "learning_rate": 8.830902758376637e-06, "step": 107850 }, { "embedding_loss": 0.015, "epoch": 6.028943398334916, "grad_norm": 0.03652914613485336, "learning_rate": 8.824694393225433e-06, "step": 107900 }, { "embedding_loss": 0.0161, "epoch": 6.031737162652958, "grad_norm": 0.05358687415719032, "learning_rate": 8.818486028074228e-06, "step": 107950 }, { "embedding_loss": 0.0157, "epoch": 6.034530926971001, "grad_norm": 0.042199354618787766, "learning_rate": 8.812277662923024e-06, "step": 108000 }, { "embedding_loss": 0.0179, "epoch": 6.037324691289043, "grad_norm": 0.0504700131714344, "learning_rate": 8.806069297771819e-06, "step": 108050 }, { "embedding_loss": 0.0168, "epoch": 6.040118455607085, "grad_norm": 0.18795189261436462, "learning_rate": 8.799860932620615e-06, "step": 108100 }, { "embedding_loss": 0.0159, "epoch": 6.042912219925127, "grad_norm": 0.06643370538949966, "learning_rate": 8.79365256746941e-06, "step": 108150 }, { "embedding_loss": 0.0147, "epoch": 6.0457059842431695, "grad_norm": 0.059068623930215836, "learning_rate": 8.787444202318204e-06, "step": 108200 }, { "embedding_loss": 0.0166, "epoch": 6.048499748561212, "grad_norm": 0.036493491381406784, "learning_rate": 8.781235837167e-06, "step": 108250 }, { "embedding_loss": 0.0164, "epoch": 6.051293512879253, "grad_norm": 0.06689608842134476, "learning_rate": 8.775027472015795e-06, "step": 108300 }, { "embedding_loss": 0.016, "epoch": 6.054087277197295, "grad_norm": 0.0776507705450058, "learning_rate": 8.76881910686459e-06, "step": 108350 }, { "embedding_loss": 0.0153, "epoch": 6.056881041515338, "grad_norm": 0.05526261404156685, "learning_rate": 8.762610741713386e-06, "step": 108400 }, { "embedding_loss": 0.0167, "epoch": 6.05967480583338, "grad_norm": 0.063357412815094, "learning_rate": 8.756402376562182e-06, "step": 108450 }, { "embedding_loss": 0.0166, "epoch": 6.062468570151422, "grad_norm": 0.11479425430297852, "learning_rate": 8.750194011410977e-06, "step": 108500 }, { "embedding_loss": 0.0156, "epoch": 6.065262334469464, "grad_norm": 0.033919982612133026, "learning_rate": 8.743985646259771e-06, "step": 108550 }, { "embedding_loss": 0.0145, "epoch": 6.0680560987875065, "grad_norm": 0.026380721479654312, "learning_rate": 8.737777281108566e-06, "step": 108600 }, { "embedding_loss": 0.0177, "epoch": 6.070849863105549, "grad_norm": 0.06048324331641197, "learning_rate": 8.731568915957362e-06, "step": 108650 }, { "embedding_loss": 0.0154, "epoch": 6.073643627423591, "grad_norm": 0.06959399580955505, "learning_rate": 8.725360550806157e-06, "step": 108700 }, { "embedding_loss": 0.0161, "epoch": 6.076437391741632, "grad_norm": 0.04996245726943016, "learning_rate": 8.719152185654953e-06, "step": 108750 }, { "embedding_loss": 0.0152, "epoch": 6.079231156059675, "grad_norm": 0.09241630882024765, "learning_rate": 8.712943820503747e-06, "step": 108800 }, { "embedding_loss": 0.0167, "epoch": 6.082024920377717, "grad_norm": 0.03774264082312584, "learning_rate": 8.706735455352542e-06, "step": 108850 }, { "embedding_loss": 0.0154, "epoch": 6.084818684695759, "grad_norm": 0.053527407348155975, "learning_rate": 8.700527090201338e-06, "step": 108900 }, { "embedding_loss": 0.0164, "epoch": 6.087612449013801, "grad_norm": 0.06560757011175156, "learning_rate": 8.694318725050133e-06, "step": 108950 }, { "embedding_loss": 0.016, "epoch": 6.0904062133318435, "grad_norm": 0.059011269360780716, "learning_rate": 8.688110359898929e-06, "step": 109000 }, { "embedding_loss": 0.0171, "epoch": 6.093199977649886, "grad_norm": 0.08763888478279114, "learning_rate": 8.681901994747723e-06, "step": 109050 }, { "embedding_loss": 0.0158, "epoch": 6.095993741967928, "grad_norm": 0.31079623103141785, "learning_rate": 8.675693629596518e-06, "step": 109100 }, { "embedding_loss": 0.0142, "epoch": 6.098787506285969, "grad_norm": 0.03550941124558449, "learning_rate": 8.669485264445314e-06, "step": 109150 }, { "embedding_loss": 0.0162, "epoch": 6.101581270604012, "grad_norm": 0.20576918125152588, "learning_rate": 8.66327689929411e-06, "step": 109200 }, { "embedding_loss": 0.0157, "epoch": 6.104375034922054, "grad_norm": 0.08025240898132324, "learning_rate": 8.657068534142905e-06, "step": 109250 }, { "embedding_loss": 0.0159, "epoch": 6.107168799240096, "grad_norm": 0.048530615866184235, "learning_rate": 8.6508601689917e-06, "step": 109300 }, { "embedding_loss": 0.0145, "epoch": 6.109962563558138, "grad_norm": 0.02496921829879284, "learning_rate": 8.644651803840496e-06, "step": 109350 }, { "embedding_loss": 0.016, "epoch": 6.1127563278761805, "grad_norm": 0.06659088283777237, "learning_rate": 8.63844343868929e-06, "step": 109400 }, { "embedding_loss": 0.0145, "epoch": 6.115550092194223, "grad_norm": 0.06159112975001335, "learning_rate": 8.632235073538085e-06, "step": 109450 }, { "embedding_loss": 0.0158, "epoch": 6.118343856512265, "grad_norm": 0.053332436829805374, "learning_rate": 8.62602670838688e-06, "step": 109500 }, { "embedding_loss": 0.0153, "epoch": 6.121137620830306, "grad_norm": 0.03508515655994415, "learning_rate": 8.619818343235676e-06, "step": 109550 }, { "embedding_loss": 0.0147, "epoch": 6.123931385148349, "grad_norm": 0.07652411609888077, "learning_rate": 8.613609978084472e-06, "step": 109600 }, { "embedding_loss": 0.0183, "epoch": 6.126725149466391, "grad_norm": 0.10607723891735077, "learning_rate": 8.607401612933267e-06, "step": 109650 }, { "embedding_loss": 0.0148, "epoch": 6.129518913784433, "grad_norm": 0.05137154459953308, "learning_rate": 8.601193247782063e-06, "step": 109700 }, { "embedding_loss": 0.015, "epoch": 6.132312678102475, "grad_norm": 0.06274483352899551, "learning_rate": 8.594984882630858e-06, "step": 109750 }, { "embedding_loss": 0.0149, "epoch": 6.1351064424205175, "grad_norm": 0.03580088168382645, "learning_rate": 8.588776517479652e-06, "step": 109800 }, { "embedding_loss": 0.0144, "epoch": 6.13790020673856, "grad_norm": 0.0432235449552536, "learning_rate": 8.582568152328447e-06, "step": 109850 }, { "embedding_loss": 0.0145, "epoch": 6.140693971056602, "grad_norm": 0.05804803594946861, "learning_rate": 8.576359787177243e-06, "step": 109900 }, { "embedding_loss": 0.0174, "epoch": 6.143487735374644, "grad_norm": 0.05548759549856186, "learning_rate": 8.570151422026039e-06, "step": 109950 }, { "embedding_loss": 0.0149, "epoch": 6.146281499692686, "grad_norm": 0.060697343200445175, "learning_rate": 8.563943056874834e-06, "step": 110000 }, { "embedding_loss": 0.0158, "epoch": 6.149075264010728, "grad_norm": 0.04218410700559616, "learning_rate": 8.55773469172363e-06, "step": 110050 }, { "embedding_loss": 0.0148, "epoch": 6.15186902832877, "grad_norm": 0.0747370794415474, "learning_rate": 8.551526326572425e-06, "step": 110100 }, { "embedding_loss": 0.0167, "epoch": 6.154662792646812, "grad_norm": 0.043206166476011276, "learning_rate": 8.54531796142122e-06, "step": 110150 }, { "embedding_loss": 0.0147, "epoch": 6.1574565569648545, "grad_norm": 0.05829416587948799, "learning_rate": 8.539109596270016e-06, "step": 110200 }, { "embedding_loss": 0.0176, "epoch": 6.160250321282897, "grad_norm": 0.07679557055234909, "learning_rate": 8.53290123111881e-06, "step": 110250 }, { "embedding_loss": 0.0162, "epoch": 6.163044085600939, "grad_norm": 0.051805783063173294, "learning_rate": 8.526692865967605e-06, "step": 110300 }, { "embedding_loss": 0.0155, "epoch": 6.165837849918981, "grad_norm": 0.03660309314727783, "learning_rate": 8.520484500816401e-06, "step": 110350 }, { "embedding_loss": 0.0145, "epoch": 6.1686316142370226, "grad_norm": 0.04567583277821541, "learning_rate": 8.514276135665196e-06, "step": 110400 }, { "embedding_loss": 0.0161, "epoch": 6.171425378555065, "grad_norm": 0.03527223318815231, "learning_rate": 8.508067770513992e-06, "step": 110450 }, { "embedding_loss": 0.0151, "epoch": 6.174219142873107, "grad_norm": 0.09020619094371796, "learning_rate": 8.501859405362787e-06, "step": 110500 }, { "embedding_loss": 0.015, "epoch": 6.177012907191149, "grad_norm": 0.06386974453926086, "learning_rate": 8.495651040211583e-06, "step": 110550 }, { "embedding_loss": 0.0152, "epoch": 6.1798066715091915, "grad_norm": 0.040173593908548355, "learning_rate": 8.489442675060377e-06, "step": 110600 }, { "embedding_loss": 0.0162, "epoch": 6.182600435827234, "grad_norm": 0.051913097500801086, "learning_rate": 8.483234309909172e-06, "step": 110650 }, { "embedding_loss": 0.0135, "epoch": 6.185394200145276, "grad_norm": 0.07246961444616318, "learning_rate": 8.477025944757968e-06, "step": 110700 }, { "embedding_loss": 0.0155, "epoch": 6.188187964463318, "grad_norm": 0.08175276219844818, "learning_rate": 8.470817579606763e-06, "step": 110750 }, { "embedding_loss": 0.0163, "epoch": 6.1909817287813595, "grad_norm": 0.06937560439109802, "learning_rate": 8.464609214455559e-06, "step": 110800 }, { "embedding_loss": 0.0165, "epoch": 6.193775493099402, "grad_norm": 0.12556493282318115, "learning_rate": 8.458400849304353e-06, "step": 110850 }, { "embedding_loss": 0.0156, "epoch": 6.196569257417444, "grad_norm": 0.056679412722587585, "learning_rate": 8.452192484153148e-06, "step": 110900 }, { "embedding_loss": 0.0164, "epoch": 6.199363021735486, "grad_norm": 0.07167758047580719, "learning_rate": 8.445984119001944e-06, "step": 110950 }, { "embedding_loss": 0.0175, "epoch": 6.2021567860535285, "grad_norm": 0.048919159919023514, "learning_rate": 8.43977575385074e-06, "step": 111000 }, { "embedding_loss": 0.0162, "epoch": 6.204950550371571, "grad_norm": 0.03226659074425697, "learning_rate": 8.433567388699535e-06, "step": 111050 }, { "embedding_loss": 0.0168, "epoch": 6.207744314689613, "grad_norm": 0.06245211139321327, "learning_rate": 8.427359023548329e-06, "step": 111100 }, { "embedding_loss": 0.0153, "epoch": 6.210538079007655, "grad_norm": 0.03879906237125397, "learning_rate": 8.421150658397124e-06, "step": 111150 }, { "embedding_loss": 0.0156, "epoch": 6.213331843325697, "grad_norm": 0.04094162955880165, "learning_rate": 8.41494229324592e-06, "step": 111200 }, { "embedding_loss": 0.0166, "epoch": 6.216125607643739, "grad_norm": 0.03628602251410484, "learning_rate": 8.408733928094715e-06, "step": 111250 }, { "embedding_loss": 0.0159, "epoch": 6.218919371961781, "grad_norm": 0.10990241169929504, "learning_rate": 8.40252556294351e-06, "step": 111300 }, { "embedding_loss": 0.0162, "epoch": 6.221713136279823, "grad_norm": 0.1103757992386818, "learning_rate": 8.396317197792306e-06, "step": 111350 }, { "embedding_loss": 0.0153, "epoch": 6.2245069005978655, "grad_norm": 0.0646350160241127, "learning_rate": 8.390108832641102e-06, "step": 111400 }, { "embedding_loss": 0.0159, "epoch": 6.227300664915908, "grad_norm": 0.022643115371465683, "learning_rate": 8.383900467489895e-06, "step": 111450 }, { "embedding_loss": 0.0152, "epoch": 6.23009442923395, "grad_norm": 0.052732620388269424, "learning_rate": 8.377692102338691e-06, "step": 111500 }, { "embedding_loss": 0.0163, "epoch": 6.232888193551992, "grad_norm": 0.05946877598762512, "learning_rate": 8.371483737187487e-06, "step": 111550 }, { "embedding_loss": 0.0162, "epoch": 6.235681957870034, "grad_norm": 0.06976132839918137, "learning_rate": 8.365275372036282e-06, "step": 111600 }, { "embedding_loss": 0.0157, "epoch": 6.238475722188076, "grad_norm": 0.05256889760494232, "learning_rate": 8.359067006885078e-06, "step": 111650 }, { "embedding_loss": 0.0172, "epoch": 6.241269486506118, "grad_norm": 0.05325094237923622, "learning_rate": 8.352858641733873e-06, "step": 111700 }, { "embedding_loss": 0.0163, "epoch": 6.24406325082416, "grad_norm": 0.061098694801330566, "learning_rate": 8.346650276582669e-06, "step": 111750 }, { "embedding_loss": 0.0159, "epoch": 6.2468570151422025, "grad_norm": 0.04658793285489082, "learning_rate": 8.340441911431464e-06, "step": 111800 }, { "embedding_loss": 0.0145, "epoch": 6.249650779460245, "grad_norm": 0.06493590027093887, "learning_rate": 8.334233546280258e-06, "step": 111850 }, { "embedding_loss": 0.0176, "epoch": 6.252444543778287, "grad_norm": 0.06759931892156601, "learning_rate": 8.328025181129053e-06, "step": 111900 }, { "embedding_loss": 0.0152, "epoch": 6.255238308096329, "grad_norm": 0.11386681348085403, "learning_rate": 8.321816815977849e-06, "step": 111950 }, { "embedding_loss": 0.0153, "epoch": 6.258032072414371, "grad_norm": 0.10054360330104828, "learning_rate": 8.315608450826644e-06, "step": 112000 }, { "embedding_loss": 0.015, "epoch": 6.260825836732414, "grad_norm": 0.057152941823005676, "learning_rate": 8.30940008567544e-06, "step": 112050 }, { "embedding_loss": 0.0154, "epoch": 6.263619601050455, "grad_norm": 0.042303673923015594, "learning_rate": 8.303191720524235e-06, "step": 112100 }, { "embedding_loss": 0.0153, "epoch": 6.266413365368497, "grad_norm": 0.05625724792480469, "learning_rate": 8.296983355373031e-06, "step": 112150 }, { "embedding_loss": 0.0159, "epoch": 6.2692071296865395, "grad_norm": 0.05881032347679138, "learning_rate": 8.290774990221827e-06, "step": 112200 }, { "embedding_loss": 0.0155, "epoch": 6.272000894004582, "grad_norm": 0.049773406237363815, "learning_rate": 8.284566625070622e-06, "step": 112250 }, { "embedding_loss": 0.0149, "epoch": 6.274794658322624, "grad_norm": 0.0637885183095932, "learning_rate": 8.278358259919416e-06, "step": 112300 }, { "embedding_loss": 0.0154, "epoch": 6.277588422640666, "grad_norm": 0.06211785972118378, "learning_rate": 8.272149894768211e-06, "step": 112350 }, { "embedding_loss": 0.0164, "epoch": 6.280382186958708, "grad_norm": 0.0362621434032917, "learning_rate": 8.265941529617007e-06, "step": 112400 }, { "embedding_loss": 0.0154, "epoch": 6.283175951276751, "grad_norm": 0.040084075182676315, "learning_rate": 8.259733164465802e-06, "step": 112450 }, { "embedding_loss": 0.0168, "epoch": 6.285969715594792, "grad_norm": 0.057392019778490067, "learning_rate": 8.253524799314598e-06, "step": 112500 }, { "embedding_loss": 0.016, "epoch": 6.288763479912834, "grad_norm": 0.07236210256814957, "learning_rate": 8.247316434163393e-06, "step": 112550 }, { "embedding_loss": 0.0141, "epoch": 6.2915572442308765, "grad_norm": 0.42183858156204224, "learning_rate": 8.241108069012187e-06, "step": 112600 }, { "embedding_loss": 0.015, "epoch": 6.294351008548919, "grad_norm": 0.055525027215480804, "learning_rate": 8.234899703860983e-06, "step": 112650 }, { "embedding_loss": 0.0155, "epoch": 6.297144772866961, "grad_norm": 0.05253464728593826, "learning_rate": 8.228691338709778e-06, "step": 112700 }, { "embedding_loss": 0.0141, "epoch": 6.299938537185003, "grad_norm": 0.04619992524385452, "learning_rate": 8.222482973558574e-06, "step": 112750 }, { "embedding_loss": 0.0172, "epoch": 6.302732301503045, "grad_norm": 0.04845918342471123, "learning_rate": 8.21627460840737e-06, "step": 112800 }, { "embedding_loss": 0.0136, "epoch": 6.305526065821088, "grad_norm": 0.027917658910155296, "learning_rate": 8.210066243256165e-06, "step": 112850 }, { "embedding_loss": 0.0158, "epoch": 6.30831983013913, "grad_norm": 0.08190688490867615, "learning_rate": 8.203857878104959e-06, "step": 112900 }, { "embedding_loss": 0.014, "epoch": 6.311113594457171, "grad_norm": 0.04489002004265785, "learning_rate": 8.197649512953754e-06, "step": 112950 }, { "embedding_loss": 0.0159, "epoch": 6.3139073587752135, "grad_norm": 0.054548244923353195, "learning_rate": 8.19144114780255e-06, "step": 113000 }, { "embedding_loss": 0.0151, "epoch": 6.316701123093256, "grad_norm": 0.0672951340675354, "learning_rate": 8.185232782651345e-06, "step": 113050 }, { "embedding_loss": 0.016, "epoch": 6.319494887411298, "grad_norm": 0.04302044212818146, "learning_rate": 8.17902441750014e-06, "step": 113100 }, { "embedding_loss": 0.0146, "epoch": 6.32228865172934, "grad_norm": 0.07114610821008682, "learning_rate": 8.172816052348934e-06, "step": 113150 }, { "embedding_loss": 0.0162, "epoch": 6.325082416047382, "grad_norm": 0.05586700141429901, "learning_rate": 8.16660768719773e-06, "step": 113200 }, { "embedding_loss": 0.0166, "epoch": 6.327876180365425, "grad_norm": 0.076819509267807, "learning_rate": 8.160399322046526e-06, "step": 113250 }, { "embedding_loss": 0.0168, "epoch": 6.330669944683467, "grad_norm": 0.08422467112541199, "learning_rate": 8.154190956895321e-06, "step": 113300 }, { "embedding_loss": 0.0151, "epoch": 6.333463709001508, "grad_norm": 0.07333653420209885, "learning_rate": 8.147982591744117e-06, "step": 113350 }, { "embedding_loss": 0.0163, "epoch": 6.3362574733195505, "grad_norm": 0.04838164523243904, "learning_rate": 8.141774226592912e-06, "step": 113400 }, { "embedding_loss": 0.015, "epoch": 6.339051237637593, "grad_norm": 0.04581170529127121, "learning_rate": 8.135565861441708e-06, "step": 113450 }, { "embedding_loss": 0.0178, "epoch": 6.341845001955635, "grad_norm": 0.04499870538711548, "learning_rate": 8.129357496290501e-06, "step": 113500 }, { "embedding_loss": 0.0161, "epoch": 6.344638766273677, "grad_norm": 0.0639326274394989, "learning_rate": 8.123149131139297e-06, "step": 113550 }, { "embedding_loss": 0.0168, "epoch": 6.347432530591719, "grad_norm": 0.04103877767920494, "learning_rate": 8.116940765988092e-06, "step": 113600 }, { "embedding_loss": 0.0149, "epoch": 6.350226294909762, "grad_norm": 0.06601230055093765, "learning_rate": 8.110732400836888e-06, "step": 113650 }, { "embedding_loss": 0.0138, "epoch": 6.353020059227804, "grad_norm": 0.04997080937027931, "learning_rate": 8.104524035685683e-06, "step": 113700 }, { "embedding_loss": 0.0154, "epoch": 6.355813823545846, "grad_norm": 4.329464912414551, "learning_rate": 8.098315670534479e-06, "step": 113750 }, { "embedding_loss": 0.0155, "epoch": 6.3586075878638875, "grad_norm": 0.05528069660067558, "learning_rate": 8.092107305383274e-06, "step": 113800 }, { "embedding_loss": 0.0138, "epoch": 6.36140135218193, "grad_norm": 0.029992148280143738, "learning_rate": 8.08589894023207e-06, "step": 113850 }, { "embedding_loss": 0.0173, "epoch": 6.364195116499972, "grad_norm": 0.05381214618682861, "learning_rate": 8.079690575080864e-06, "step": 113900 }, { "embedding_loss": 0.0148, "epoch": 6.366988880818014, "grad_norm": 0.03535177558660507, "learning_rate": 8.07348220992966e-06, "step": 113950 }, { "embedding_loss": 0.0146, "epoch": 6.369782645136056, "grad_norm": 0.07360133528709412, "learning_rate": 8.067273844778455e-06, "step": 114000 }, { "embedding_loss": 0.0168, "epoch": 6.372576409454099, "grad_norm": 0.0749272033572197, "learning_rate": 8.06106547962725e-06, "step": 114050 }, { "embedding_loss": 0.0164, "epoch": 6.375370173772141, "grad_norm": 0.08845933526754379, "learning_rate": 8.054857114476046e-06, "step": 114100 }, { "embedding_loss": 0.0165, "epoch": 6.378163938090183, "grad_norm": 0.05570349097251892, "learning_rate": 8.048648749324841e-06, "step": 114150 }, { "embedding_loss": 0.0153, "epoch": 6.3809577024082245, "grad_norm": 0.062146127223968506, "learning_rate": 8.042440384173637e-06, "step": 114200 }, { "embedding_loss": 0.0162, "epoch": 6.383751466726267, "grad_norm": 0.04918602481484413, "learning_rate": 8.036232019022432e-06, "step": 114250 }, { "embedding_loss": 0.0157, "epoch": 6.386545231044309, "grad_norm": 0.05638418719172478, "learning_rate": 8.030023653871228e-06, "step": 114300 }, { "embedding_loss": 0.0158, "epoch": 6.389338995362351, "grad_norm": 0.06302925944328308, "learning_rate": 8.023815288720022e-06, "step": 114350 }, { "embedding_loss": 0.0158, "epoch": 6.392132759680393, "grad_norm": 0.04540010169148445, "learning_rate": 8.017606923568817e-06, "step": 114400 }, { "embedding_loss": 0.0158, "epoch": 6.394926523998436, "grad_norm": 0.08267533779144287, "learning_rate": 8.011398558417613e-06, "step": 114450 }, { "embedding_loss": 0.0163, "epoch": 6.397720288316478, "grad_norm": 0.030309099704027176, "learning_rate": 8.005190193266408e-06, "step": 114500 }, { "embedding_loss": 0.0155, "epoch": 6.40051405263452, "grad_norm": 0.05635339394211769, "learning_rate": 7.998981828115204e-06, "step": 114550 }, { "embedding_loss": 0.0146, "epoch": 6.403307816952562, "grad_norm": 0.077373206615448, "learning_rate": 7.992773462964e-06, "step": 114600 }, { "embedding_loss": 0.0163, "epoch": 6.406101581270604, "grad_norm": 0.029802948236465454, "learning_rate": 7.986565097812793e-06, "step": 114650 }, { "embedding_loss": 0.0156, "epoch": 6.408895345588646, "grad_norm": 0.04904951900243759, "learning_rate": 7.980356732661589e-06, "step": 114700 }, { "embedding_loss": 0.0155, "epoch": 6.411689109906688, "grad_norm": 0.06064911186695099, "learning_rate": 7.974148367510384e-06, "step": 114750 }, { "embedding_loss": 0.0152, "epoch": 6.41448287422473, "grad_norm": 0.06018833443522453, "learning_rate": 7.96794000235918e-06, "step": 114800 }, { "embedding_loss": 0.0157, "epoch": 6.417276638542773, "grad_norm": 0.09397483617067337, "learning_rate": 7.961731637207975e-06, "step": 114850 }, { "embedding_loss": 0.0157, "epoch": 6.420070402860815, "grad_norm": 0.04569234699010849, "learning_rate": 7.95552327205677e-06, "step": 114900 }, { "embedding_loss": 0.0139, "epoch": 6.422864167178857, "grad_norm": 0.0748940259218216, "learning_rate": 7.949314906905565e-06, "step": 114950 }, { "embedding_loss": 0.0151, "epoch": 6.4256579314968985, "grad_norm": 0.04853441193699837, "learning_rate": 7.94310654175436e-06, "step": 115000 }, { "embedding_loss": 0.0175, "epoch": 6.428451695814941, "grad_norm": 0.04859469458460808, "learning_rate": 7.936898176603156e-06, "step": 115050 }, { "embedding_loss": 0.0162, "epoch": 6.431245460132983, "grad_norm": 0.11717961728572845, "learning_rate": 7.930689811451951e-06, "step": 115100 }, { "embedding_loss": 0.016, "epoch": 6.434039224451025, "grad_norm": 0.04120974987745285, "learning_rate": 7.924481446300747e-06, "step": 115150 }, { "embedding_loss": 0.0149, "epoch": 6.436832988769067, "grad_norm": 0.33747419714927673, "learning_rate": 7.91827308114954e-06, "step": 115200 }, { "embedding_loss": 0.0143, "epoch": 6.43962675308711, "grad_norm": 0.047322504222393036, "learning_rate": 7.912064715998336e-06, "step": 115250 }, { "embedding_loss": 0.0145, "epoch": 6.442420517405152, "grad_norm": 0.05937094986438751, "learning_rate": 7.905856350847131e-06, "step": 115300 }, { "embedding_loss": 0.0163, "epoch": 6.445214281723194, "grad_norm": 0.04787561669945717, "learning_rate": 7.899647985695927e-06, "step": 115350 }, { "embedding_loss": 0.0155, "epoch": 6.448008046041236, "grad_norm": 0.07948049157857895, "learning_rate": 7.893439620544722e-06, "step": 115400 }, { "embedding_loss": 0.0155, "epoch": 6.450801810359278, "grad_norm": 0.06410897523164749, "learning_rate": 7.887231255393518e-06, "step": 115450 }, { "embedding_loss": 0.0178, "epoch": 6.45359557467732, "grad_norm": 0.03232398256659508, "learning_rate": 7.881022890242314e-06, "step": 115500 }, { "embedding_loss": 0.0151, "epoch": 6.456389338995362, "grad_norm": 0.05939759686589241, "learning_rate": 7.874814525091107e-06, "step": 115550 }, { "embedding_loss": 0.0161, "epoch": 6.459183103313404, "grad_norm": 0.10890965908765793, "learning_rate": 7.868606159939903e-06, "step": 115600 }, { "embedding_loss": 0.016, "epoch": 6.461976867631447, "grad_norm": 0.03824763745069504, "learning_rate": 7.862397794788698e-06, "step": 115650 }, { "embedding_loss": 0.0159, "epoch": 6.464770631949489, "grad_norm": 0.04373810067772865, "learning_rate": 7.856189429637494e-06, "step": 115700 }, { "embedding_loss": 0.0162, "epoch": 6.467564396267531, "grad_norm": 0.0714171752333641, "learning_rate": 7.84998106448629e-06, "step": 115750 }, { "embedding_loss": 0.0161, "epoch": 6.470358160585573, "grad_norm": 0.03638332709670067, "learning_rate": 7.843772699335085e-06, "step": 115800 }, { "embedding_loss": 0.0175, "epoch": 6.473151924903615, "grad_norm": 0.0641186386346817, "learning_rate": 7.83756433418388e-06, "step": 115850 }, { "embedding_loss": 0.0165, "epoch": 6.475945689221657, "grad_norm": 0.0883864015340805, "learning_rate": 7.831355969032676e-06, "step": 115900 }, { "embedding_loss": 0.0159, "epoch": 6.478739453539699, "grad_norm": 0.06475694477558136, "learning_rate": 7.82514760388147e-06, "step": 115950 }, { "embedding_loss": 0.0151, "epoch": 6.481533217857741, "grad_norm": 0.03954308480024338, "learning_rate": 7.818939238730265e-06, "step": 116000 }, { "embedding_loss": 0.0147, "epoch": 6.484326982175784, "grad_norm": 0.05403446778655052, "learning_rate": 7.81273087357906e-06, "step": 116050 }, { "embedding_loss": 0.0155, "epoch": 6.487120746493826, "grad_norm": 0.04268207401037216, "learning_rate": 7.806522508427856e-06, "step": 116100 }, { "embedding_loss": 0.015, "epoch": 6.489914510811868, "grad_norm": 0.07818350195884705, "learning_rate": 7.800314143276652e-06, "step": 116150 }, { "embedding_loss": 0.015, "epoch": 6.49270827512991, "grad_norm": 0.04580368101596832, "learning_rate": 7.794105778125447e-06, "step": 116200 }, { "embedding_loss": 0.0165, "epoch": 6.495502039447953, "grad_norm": 0.04260854050517082, "learning_rate": 7.787897412974243e-06, "step": 116250 }, { "embedding_loss": 0.0158, "epoch": 6.498295803765994, "grad_norm": 0.04666067287325859, "learning_rate": 7.781689047823038e-06, "step": 116300 }, { "embedding_loss": 0.0178, "epoch": 6.501089568084036, "grad_norm": 0.04727206379175186, "learning_rate": 7.775480682671834e-06, "step": 116350 }, { "embedding_loss": 0.0179, "epoch": 6.503883332402078, "grad_norm": 0.07129380106925964, "learning_rate": 7.769272317520628e-06, "step": 116400 }, { "embedding_loss": 0.015, "epoch": 6.506677096720121, "grad_norm": 0.06838800758123398, "learning_rate": 7.763063952369423e-06, "step": 116450 }, { "embedding_loss": 0.0156, "epoch": 6.509470861038163, "grad_norm": 0.04991946741938591, "learning_rate": 7.756855587218219e-06, "step": 116500 }, { "embedding_loss": 0.0154, "epoch": 6.512264625356205, "grad_norm": 0.06805353611707687, "learning_rate": 7.750647222067014e-06, "step": 116550 }, { "embedding_loss": 0.0148, "epoch": 6.515058389674247, "grad_norm": 0.051499299705028534, "learning_rate": 7.74443885691581e-06, "step": 116600 }, { "embedding_loss": 0.0163, "epoch": 6.51785215399229, "grad_norm": 0.13762328028678894, "learning_rate": 7.738230491764605e-06, "step": 116650 }, { "embedding_loss": 0.016, "epoch": 6.520645918310331, "grad_norm": 0.06045481562614441, "learning_rate": 7.732022126613399e-06, "step": 116700 }, { "embedding_loss": 0.0162, "epoch": 6.523439682628373, "grad_norm": 0.06733287125825882, "learning_rate": 7.725813761462195e-06, "step": 116750 }, { "embedding_loss": 0.016, "epoch": 6.526233446946415, "grad_norm": 0.1436520367860794, "learning_rate": 7.71960539631099e-06, "step": 116800 }, { "embedding_loss": 0.0162, "epoch": 6.529027211264458, "grad_norm": 0.046463582664728165, "learning_rate": 7.713397031159786e-06, "step": 116850 }, { "embedding_loss": 0.0171, "epoch": 6.5318209755825, "grad_norm": 0.07185880094766617, "learning_rate": 7.707188666008581e-06, "step": 116900 }, { "embedding_loss": 0.0157, "epoch": 6.534614739900542, "grad_norm": 0.04449654743075371, "learning_rate": 7.700980300857377e-06, "step": 116950 }, { "embedding_loss": 0.0179, "epoch": 6.537408504218584, "grad_norm": 0.055425114929676056, "learning_rate": 7.69477193570617e-06, "step": 117000 }, { "embedding_loss": 0.0144, "epoch": 6.540202268536627, "grad_norm": 0.05466505140066147, "learning_rate": 7.688563570554966e-06, "step": 117050 }, { "embedding_loss": 0.0169, "epoch": 6.542996032854669, "grad_norm": 0.053676947951316833, "learning_rate": 7.682355205403761e-06, "step": 117100 }, { "embedding_loss": 0.0164, "epoch": 6.54578979717271, "grad_norm": 0.07387387007474899, "learning_rate": 7.676146840252557e-06, "step": 117150 }, { "embedding_loss": 0.0148, "epoch": 6.548583561490752, "grad_norm": 0.06936652958393097, "learning_rate": 7.669938475101353e-06, "step": 117200 }, { "embedding_loss": 0.0131, "epoch": 6.551377325808795, "grad_norm": 0.05235354229807854, "learning_rate": 7.663730109950146e-06, "step": 117250 }, { "embedding_loss": 0.0147, "epoch": 6.554171090126837, "grad_norm": 0.049179334193468094, "learning_rate": 7.657521744798942e-06, "step": 117300 }, { "embedding_loss": 0.0155, "epoch": 6.556964854444879, "grad_norm": 0.07028753310441971, "learning_rate": 7.651313379647737e-06, "step": 117350 }, { "embedding_loss": 0.0145, "epoch": 6.559758618762921, "grad_norm": 0.05477194860577583, "learning_rate": 7.645105014496533e-06, "step": 117400 }, { "embedding_loss": 0.0157, "epoch": 6.562552383080964, "grad_norm": 0.029650770127773285, "learning_rate": 7.638896649345328e-06, "step": 117450 }, { "embedding_loss": 0.0167, "epoch": 6.565346147399006, "grad_norm": 0.07423653453588486, "learning_rate": 7.632688284194124e-06, "step": 117500 }, { "embedding_loss": 0.0161, "epoch": 6.568139911717047, "grad_norm": 0.05360526219010353, "learning_rate": 7.6264799190429194e-06, "step": 117550 }, { "embedding_loss": 0.0169, "epoch": 6.570933676035089, "grad_norm": 0.05349397286772728, "learning_rate": 7.620271553891714e-06, "step": 117600 }, { "embedding_loss": 0.0164, "epoch": 6.573727440353132, "grad_norm": 0.05648175999522209, "learning_rate": 7.61406318874051e-06, "step": 117650 }, { "embedding_loss": 0.015, "epoch": 6.576521204671174, "grad_norm": 0.09666883200407028, "learning_rate": 7.607854823589304e-06, "step": 117700 }, { "embedding_loss": 0.0166, "epoch": 6.579314968989216, "grad_norm": 0.06362679600715637, "learning_rate": 7.6016464584381e-06, "step": 117750 }, { "embedding_loss": 0.0143, "epoch": 6.582108733307258, "grad_norm": 0.045859623700380325, "learning_rate": 7.595438093286895e-06, "step": 117800 }, { "embedding_loss": 0.0171, "epoch": 6.5849024976253006, "grad_norm": 0.04494471475481987, "learning_rate": 7.589229728135691e-06, "step": 117850 }, { "embedding_loss": 0.0139, "epoch": 6.587696261943343, "grad_norm": 0.03776172548532486, "learning_rate": 7.583021362984486e-06, "step": 117900 }, { "embedding_loss": 0.0145, "epoch": 6.590490026261385, "grad_norm": 0.04627622291445732, "learning_rate": 7.576812997833282e-06, "step": 117950 }, { "embedding_loss": 0.0164, "epoch": 6.593283790579426, "grad_norm": 0.0318390317261219, "learning_rate": 7.570604632682076e-06, "step": 118000 }, { "embedding_loss": 0.0171, "epoch": 6.596077554897469, "grad_norm": 0.055520739406347275, "learning_rate": 7.564396267530871e-06, "step": 118050 }, { "embedding_loss": 0.0149, "epoch": 6.598871319215511, "grad_norm": 0.0432266928255558, "learning_rate": 7.558187902379667e-06, "step": 118100 }, { "embedding_loss": 0.0154, "epoch": 6.601665083533553, "grad_norm": 0.10743258893489838, "learning_rate": 7.551979537228462e-06, "step": 118150 }, { "embedding_loss": 0.0161, "epoch": 6.604458847851595, "grad_norm": 0.0328730046749115, "learning_rate": 7.545771172077258e-06, "step": 118200 }, { "embedding_loss": 0.0161, "epoch": 6.6072526121696376, "grad_norm": 0.05345474183559418, "learning_rate": 7.539562806926053e-06, "step": 118250 }, { "embedding_loss": 0.0157, "epoch": 6.61004637648768, "grad_norm": 0.027642004191875458, "learning_rate": 7.533354441774848e-06, "step": 118300 }, { "embedding_loss": 0.0142, "epoch": 6.612840140805721, "grad_norm": 0.05494844913482666, "learning_rate": 7.5271460766236434e-06, "step": 118350 }, { "embedding_loss": 0.017, "epoch": 6.615633905123763, "grad_norm": 0.04861176386475563, "learning_rate": 7.520937711472439e-06, "step": 118400 }, { "embedding_loss": 0.0163, "epoch": 6.618427669441806, "grad_norm": 0.04718920215964317, "learning_rate": 7.514729346321234e-06, "step": 118450 }, { "embedding_loss": 0.0163, "epoch": 6.621221433759848, "grad_norm": 0.059340909123420715, "learning_rate": 7.508520981170029e-06, "step": 118500 }, { "embedding_loss": 0.0146, "epoch": 6.62401519807789, "grad_norm": 0.041155245155096054, "learning_rate": 7.502312616018825e-06, "step": 118550 }, { "embedding_loss": 0.0161, "epoch": 6.626808962395932, "grad_norm": 0.052242446690797806, "learning_rate": 7.496104250867619e-06, "step": 118600 }, { "embedding_loss": 0.0159, "epoch": 6.6296027267139745, "grad_norm": 0.04894329234957695, "learning_rate": 7.489895885716415e-06, "step": 118650 }, { "embedding_loss": 0.0174, "epoch": 6.632396491032017, "grad_norm": 0.15718454122543335, "learning_rate": 7.48368752056521e-06, "step": 118700 }, { "embedding_loss": 0.0167, "epoch": 6.635190255350059, "grad_norm": 0.034374725073575974, "learning_rate": 7.477479155414006e-06, "step": 118750 }, { "embedding_loss": 0.0158, "epoch": 6.637984019668101, "grad_norm": 0.045780591666698456, "learning_rate": 7.471270790262801e-06, "step": 118800 }, { "embedding_loss": 0.0148, "epoch": 6.640777783986143, "grad_norm": 0.039088353514671326, "learning_rate": 7.465062425111595e-06, "step": 118850 }, { "embedding_loss": 0.0159, "epoch": 6.643571548304185, "grad_norm": 0.07713353633880615, "learning_rate": 7.458854059960391e-06, "step": 118900 }, { "embedding_loss": 0.0163, "epoch": 6.646365312622227, "grad_norm": 0.02455860748887062, "learning_rate": 7.452645694809186e-06, "step": 118950 }, { "embedding_loss": 0.0158, "epoch": 6.649159076940269, "grad_norm": 0.055904217064380646, "learning_rate": 7.446437329657982e-06, "step": 119000 }, { "embedding_loss": 0.0161, "epoch": 6.6519528412583115, "grad_norm": 0.06128691881895065, "learning_rate": 7.440228964506777e-06, "step": 119050 }, { "embedding_loss": 0.0158, "epoch": 6.654746605576354, "grad_norm": 0.060874439775943756, "learning_rate": 7.434020599355573e-06, "step": 119100 }, { "embedding_loss": 0.0164, "epoch": 6.657540369894396, "grad_norm": 0.07163013517856598, "learning_rate": 7.427812234204368e-06, "step": 119150 }, { "embedding_loss": 0.0158, "epoch": 6.660334134212437, "grad_norm": 0.04256109520792961, "learning_rate": 7.421603869053163e-06, "step": 119200 }, { "embedding_loss": 0.0168, "epoch": 6.66312789853048, "grad_norm": 0.04533923417329788, "learning_rate": 7.415395503901958e-06, "step": 119250 }, { "embedding_loss": 0.0152, "epoch": 6.665921662848522, "grad_norm": 0.04747192561626434, "learning_rate": 7.409187138750753e-06, "step": 119300 }, { "embedding_loss": 0.0156, "epoch": 6.668715427166564, "grad_norm": 0.06614654511213303, "learning_rate": 7.402978773599549e-06, "step": 119350 }, { "embedding_loss": 0.0156, "epoch": 6.671509191484606, "grad_norm": 0.05609576776623726, "learning_rate": 7.396770408448344e-06, "step": 119400 }, { "embedding_loss": 0.0153, "epoch": 6.6743029558026485, "grad_norm": 0.07295659184455872, "learning_rate": 7.390562043297139e-06, "step": 119450 }, { "embedding_loss": 0.016, "epoch": 6.677096720120691, "grad_norm": 0.08116994053125381, "learning_rate": 7.384353678145934e-06, "step": 119500 }, { "embedding_loss": 0.0168, "epoch": 6.679890484438733, "grad_norm": 0.056772779673337936, "learning_rate": 7.37814531299473e-06, "step": 119550 }, { "embedding_loss": 0.0135, "epoch": 6.682684248756775, "grad_norm": 0.21242111921310425, "learning_rate": 7.371936947843525e-06, "step": 119600 }, { "embedding_loss": 0.016, "epoch": 6.6854780130748175, "grad_norm": 0.02652454376220703, "learning_rate": 7.36572858269232e-06, "step": 119650 }, { "embedding_loss": 0.0152, "epoch": 6.688271777392859, "grad_norm": 0.5687534213066101, "learning_rate": 7.3595202175411155e-06, "step": 119700 }, { "embedding_loss": 0.0173, "epoch": 6.691065541710901, "grad_norm": 0.09691095352172852, "learning_rate": 7.35331185238991e-06, "step": 119750 }, { "embedding_loss": 0.0148, "epoch": 6.693859306028943, "grad_norm": 0.04152536392211914, "learning_rate": 7.347103487238706e-06, "step": 119800 }, { "embedding_loss": 0.0164, "epoch": 6.6966530703469855, "grad_norm": 0.09206335991621017, "learning_rate": 7.340895122087501e-06, "step": 119850 }, { "embedding_loss": 0.0142, "epoch": 6.699446834665028, "grad_norm": 0.2590373754501343, "learning_rate": 7.334686756936297e-06, "step": 119900 }, { "embedding_loss": 0.0138, "epoch": 6.70224059898307, "grad_norm": 0.027605261653661728, "learning_rate": 7.328478391785092e-06, "step": 119950 }, { "embedding_loss": 0.0147, "epoch": 6.705034363301112, "grad_norm": 0.05919680371880531, "learning_rate": 7.322270026633888e-06, "step": 120000 }, { "embedding_loss": 0.0157, "epoch": 6.707828127619154, "grad_norm": 0.06217193603515625, "learning_rate": 7.316061661482682e-06, "step": 120050 }, { "embedding_loss": 0.0157, "epoch": 6.710621891937196, "grad_norm": 0.04540227726101875, "learning_rate": 7.309853296331477e-06, "step": 120100 }, { "embedding_loss": 0.0147, "epoch": 6.713415656255238, "grad_norm": 0.05928482115268707, "learning_rate": 7.303644931180273e-06, "step": 120150 }, { "embedding_loss": 0.0148, "epoch": 6.71620942057328, "grad_norm": 0.04733273759484291, "learning_rate": 7.297436566029068e-06, "step": 120200 }, { "embedding_loss": 0.0155, "epoch": 6.7190031848913225, "grad_norm": 0.05549396201968193, "learning_rate": 7.291228200877864e-06, "step": 120250 }, { "embedding_loss": 0.0141, "epoch": 6.721796949209365, "grad_norm": 0.04241145774722099, "learning_rate": 7.285019835726659e-06, "step": 120300 }, { "embedding_loss": 0.0158, "epoch": 6.724590713527407, "grad_norm": 0.059867456555366516, "learning_rate": 7.278811470575454e-06, "step": 120350 }, { "embedding_loss": 0.0163, "epoch": 6.727384477845449, "grad_norm": 0.14148882031440735, "learning_rate": 7.272603105424249e-06, "step": 120400 }, { "embedding_loss": 0.0169, "epoch": 6.7301782421634915, "grad_norm": 0.05824155732989311, "learning_rate": 7.266394740273045e-06, "step": 120450 }, { "embedding_loss": 0.0151, "epoch": 6.732972006481534, "grad_norm": 0.043122321367263794, "learning_rate": 7.2601863751218395e-06, "step": 120500 }, { "embedding_loss": 0.0149, "epoch": 6.735765770799575, "grad_norm": 0.05774736404418945, "learning_rate": 7.253978009970635e-06, "step": 120550 }, { "embedding_loss": 0.0168, "epoch": 6.738559535117617, "grad_norm": 0.043133385479450226, "learning_rate": 7.24776964481943e-06, "step": 120600 }, { "embedding_loss": 0.0156, "epoch": 6.7413532994356595, "grad_norm": 0.07063376903533936, "learning_rate": 7.241561279668225e-06, "step": 120650 }, { "embedding_loss": 0.014, "epoch": 6.744147063753702, "grad_norm": 0.02919379435479641, "learning_rate": 7.235352914517021e-06, "step": 120700 }, { "embedding_loss": 0.0153, "epoch": 6.746940828071744, "grad_norm": 0.05175863951444626, "learning_rate": 7.229144549365816e-06, "step": 120750 }, { "embedding_loss": 0.0145, "epoch": 6.749734592389786, "grad_norm": 0.05438996106386185, "learning_rate": 7.222936184214612e-06, "step": 120800 }, { "embedding_loss": 0.015, "epoch": 6.7525283567078285, "grad_norm": 0.06414154171943665, "learning_rate": 7.216727819063407e-06, "step": 120850 }, { "embedding_loss": 0.0161, "epoch": 6.75532212102587, "grad_norm": 0.10892228782176971, "learning_rate": 7.210519453912201e-06, "step": 120900 }, { "embedding_loss": 0.0154, "epoch": 6.758115885343912, "grad_norm": 0.13763527572155, "learning_rate": 7.204311088760997e-06, "step": 120950 }, { "embedding_loss": 0.0164, "epoch": 6.760909649661954, "grad_norm": 0.06769434362649918, "learning_rate": 7.198102723609792e-06, "step": 121000 }, { "embedding_loss": 0.0158, "epoch": 6.7637034139799965, "grad_norm": 0.06501352041959763, "learning_rate": 7.191894358458588e-06, "step": 121050 }, { "embedding_loss": 0.0146, "epoch": 6.766497178298039, "grad_norm": 0.02371823415160179, "learning_rate": 7.185685993307383e-06, "step": 121100 }, { "embedding_loss": 0.0147, "epoch": 6.769290942616081, "grad_norm": 0.0937359407544136, "learning_rate": 7.179477628156179e-06, "step": 121150 }, { "embedding_loss": 0.0151, "epoch": 6.772084706934123, "grad_norm": 0.045178208500146866, "learning_rate": 7.173269263004974e-06, "step": 121200 }, { "embedding_loss": 0.0167, "epoch": 6.7748784712521655, "grad_norm": 0.07762327045202255, "learning_rate": 7.167060897853769e-06, "step": 121250 }, { "embedding_loss": 0.0135, "epoch": 6.777672235570208, "grad_norm": 0.05087947100400925, "learning_rate": 7.1608525327025635e-06, "step": 121300 }, { "embedding_loss": 0.016, "epoch": 6.780465999888249, "grad_norm": 0.02567402645945549, "learning_rate": 7.154644167551359e-06, "step": 121350 }, { "embedding_loss": 0.0152, "epoch": 6.783259764206291, "grad_norm": 0.03237712383270264, "learning_rate": 7.1484358024001546e-06, "step": 121400 }, { "embedding_loss": 0.0163, "epoch": 6.7860535285243335, "grad_norm": 0.04124288260936737, "learning_rate": 7.14222743724895e-06, "step": 121450 }, { "embedding_loss": 0.0164, "epoch": 6.788847292842376, "grad_norm": 0.0379829928278923, "learning_rate": 7.136019072097745e-06, "step": 121500 }, { "embedding_loss": 0.0154, "epoch": 6.791641057160418, "grad_norm": 0.0873032957315445, "learning_rate": 7.12981070694654e-06, "step": 121550 }, { "embedding_loss": 0.0161, "epoch": 6.79443482147846, "grad_norm": 0.020691150799393654, "learning_rate": 7.123602341795336e-06, "step": 121600 }, { "embedding_loss": 0.0153, "epoch": 6.7972285857965025, "grad_norm": 0.0400848463177681, "learning_rate": 7.117393976644131e-06, "step": 121650 }, { "embedding_loss": 0.0158, "epoch": 6.800022350114545, "grad_norm": 0.14608672261238098, "learning_rate": 7.111185611492926e-06, "step": 121700 }, { "embedding_loss": 0.0176, "epoch": 6.802816114432586, "grad_norm": 0.03956136852502823, "learning_rate": 7.104977246341721e-06, "step": 121750 }, { "embedding_loss": 0.0167, "epoch": 6.805609878750628, "grad_norm": 0.05567660555243492, "learning_rate": 7.098768881190516e-06, "step": 121800 }, { "embedding_loss": 0.0181, "epoch": 6.8084036430686705, "grad_norm": 0.07364827394485474, "learning_rate": 7.092560516039312e-06, "step": 121850 }, { "embedding_loss": 0.0149, "epoch": 6.811197407386713, "grad_norm": 0.03574289008975029, "learning_rate": 7.086352150888107e-06, "step": 121900 }, { "embedding_loss": 0.0149, "epoch": 6.813991171704755, "grad_norm": 0.059383999556303024, "learning_rate": 7.080143785736903e-06, "step": 121950 }, { "embedding_loss": 0.0138, "epoch": 6.816784936022797, "grad_norm": 0.10033558309078217, "learning_rate": 7.073935420585698e-06, "step": 122000 }, { "embedding_loss": 0.0163, "epoch": 6.8195787003408395, "grad_norm": 0.054619647562503815, "learning_rate": 7.067727055434494e-06, "step": 122050 }, { "embedding_loss": 0.0148, "epoch": 6.822372464658882, "grad_norm": 0.04414360970258713, "learning_rate": 7.0615186902832875e-06, "step": 122100 }, { "embedding_loss": 0.0152, "epoch": 6.825166228976924, "grad_norm": 0.021419361233711243, "learning_rate": 7.055310325132083e-06, "step": 122150 }, { "embedding_loss": 0.0154, "epoch": 6.827959993294965, "grad_norm": 0.05137372016906738, "learning_rate": 7.0491019599808785e-06, "step": 122200 }, { "embedding_loss": 0.0156, "epoch": 6.8307537576130075, "grad_norm": 0.086142398416996, "learning_rate": 7.042893594829674e-06, "step": 122250 }, { "embedding_loss": 0.0152, "epoch": 6.83354752193105, "grad_norm": 0.049509257078170776, "learning_rate": 7.03668522967847e-06, "step": 122300 }, { "embedding_loss": 0.0159, "epoch": 6.836341286249092, "grad_norm": 0.03887538984417915, "learning_rate": 7.030476864527265e-06, "step": 122350 }, { "embedding_loss": 0.0168, "epoch": 6.839135050567134, "grad_norm": 0.04265086352825165, "learning_rate": 7.02426849937606e-06, "step": 122400 }, { "embedding_loss": 0.0154, "epoch": 6.8419288148851765, "grad_norm": 0.08086023479700089, "learning_rate": 7.018060134224855e-06, "step": 122450 }, { "embedding_loss": 0.0168, "epoch": 6.844722579203219, "grad_norm": 0.038332074880599976, "learning_rate": 7.011851769073651e-06, "step": 122500 }, { "embedding_loss": 0.0151, "epoch": 6.847516343521261, "grad_norm": 0.0479397252202034, "learning_rate": 7.0056434039224455e-06, "step": 122550 }, { "embedding_loss": 0.0164, "epoch": 6.850310107839302, "grad_norm": 0.02427505888044834, "learning_rate": 6.999435038771241e-06, "step": 122600 }, { "embedding_loss": 0.016, "epoch": 6.8531038721573445, "grad_norm": 0.06853608787059784, "learning_rate": 6.993226673620036e-06, "step": 122650 }, { "embedding_loss": 0.0149, "epoch": 6.855897636475387, "grad_norm": 0.07144781947135925, "learning_rate": 6.987018308468831e-06, "step": 122700 }, { "embedding_loss": 0.0171, "epoch": 6.858691400793429, "grad_norm": 0.18678221106529236, "learning_rate": 6.980809943317627e-06, "step": 122750 }, { "embedding_loss": 0.0152, "epoch": 6.861485165111471, "grad_norm": 0.046737924218177795, "learning_rate": 6.974601578166422e-06, "step": 122800 }, { "embedding_loss": 0.0188, "epoch": 6.8642789294295135, "grad_norm": 0.13632963597774506, "learning_rate": 6.968393213015218e-06, "step": 122850 }, { "embedding_loss": 0.0158, "epoch": 6.867072693747556, "grad_norm": 0.058363985270261765, "learning_rate": 6.962184847864013e-06, "step": 122900 }, { "embedding_loss": 0.0151, "epoch": 6.869866458065598, "grad_norm": 0.04980282112956047, "learning_rate": 6.955976482712807e-06, "step": 122950 }, { "embedding_loss": 0.0186, "epoch": 6.87266022238364, "grad_norm": 0.04107705131173134, "learning_rate": 6.9497681175616025e-06, "step": 123000 }, { "embedding_loss": 0.0148, "epoch": 6.8754539867016815, "grad_norm": 0.048957452178001404, "learning_rate": 6.943559752410398e-06, "step": 123050 }, { "embedding_loss": 0.015, "epoch": 6.878247751019724, "grad_norm": 0.05793645977973938, "learning_rate": 6.9373513872591936e-06, "step": 123100 }, { "embedding_loss": 0.0144, "epoch": 6.881041515337766, "grad_norm": 0.041470132768154144, "learning_rate": 6.931143022107989e-06, "step": 123150 }, { "embedding_loss": 0.0155, "epoch": 6.883835279655808, "grad_norm": 0.044091373682022095, "learning_rate": 6.924934656956785e-06, "step": 123200 }, { "embedding_loss": 0.0163, "epoch": 6.8866290439738505, "grad_norm": 0.08897567540407181, "learning_rate": 6.91872629180558e-06, "step": 123250 }, { "embedding_loss": 0.0162, "epoch": 6.889422808291893, "grad_norm": 0.017512209713459015, "learning_rate": 6.912517926654375e-06, "step": 123300 }, { "embedding_loss": 0.0143, "epoch": 6.892216572609935, "grad_norm": 0.05894048884510994, "learning_rate": 6.9063095615031694e-06, "step": 123350 }, { "embedding_loss": 0.0149, "epoch": 6.895010336927976, "grad_norm": 0.06829342246055603, "learning_rate": 6.900101196351965e-06, "step": 123400 }, { "embedding_loss": 0.0164, "epoch": 6.8978041012460185, "grad_norm": 0.05823536962270737, "learning_rate": 6.8938928312007605e-06, "step": 123450 }, { "embedding_loss": 0.0159, "epoch": 6.900597865564061, "grad_norm": 0.023914635181427002, "learning_rate": 6.887684466049556e-06, "step": 123500 }, { "embedding_loss": 0.0157, "epoch": 6.903391629882103, "grad_norm": 0.0668298676609993, "learning_rate": 6.881476100898351e-06, "step": 123550 }, { "embedding_loss": 0.0165, "epoch": 6.906185394200145, "grad_norm": 0.29945459961891174, "learning_rate": 6.875267735747146e-06, "step": 123600 }, { "embedding_loss": 0.0164, "epoch": 6.9089791585181874, "grad_norm": 0.05635469779372215, "learning_rate": 6.869059370595942e-06, "step": 123650 }, { "embedding_loss": 0.0165, "epoch": 6.91177292283623, "grad_norm": 0.07351613789796829, "learning_rate": 6.862851005444737e-06, "step": 123700 }, { "embedding_loss": 0.0146, "epoch": 6.914566687154272, "grad_norm": 0.032958269119262695, "learning_rate": 6.856642640293532e-06, "step": 123750 }, { "embedding_loss": 0.0155, "epoch": 6.917360451472314, "grad_norm": 0.03694605827331543, "learning_rate": 6.8504342751423265e-06, "step": 123800 }, { "embedding_loss": 0.0137, "epoch": 6.920154215790356, "grad_norm": 0.07552290707826614, "learning_rate": 6.844225909991122e-06, "step": 123850 }, { "embedding_loss": 0.0163, "epoch": 6.922947980108398, "grad_norm": 0.05481826141476631, "learning_rate": 6.8380175448399176e-06, "step": 123900 }, { "embedding_loss": 0.0156, "epoch": 6.92574174442644, "grad_norm": 0.042147524654865265, "learning_rate": 6.831809179688713e-06, "step": 123950 }, { "embedding_loss": 0.0138, "epoch": 6.928535508744482, "grad_norm": 0.04023466259241104, "learning_rate": 6.825600814537509e-06, "step": 124000 }, { "embedding_loss": 0.0178, "epoch": 6.931329273062524, "grad_norm": 0.0400567352771759, "learning_rate": 6.819392449386304e-06, "step": 124050 }, { "embedding_loss": 0.0158, "epoch": 6.934123037380567, "grad_norm": 0.04823686555027962, "learning_rate": 6.8131840842351e-06, "step": 124100 }, { "embedding_loss": 0.0166, "epoch": 6.936916801698609, "grad_norm": 0.09561844915151596, "learning_rate": 6.8069757190838934e-06, "step": 124150 }, { "embedding_loss": 0.0164, "epoch": 6.939710566016651, "grad_norm": 0.087944395840168, "learning_rate": 6.800767353932689e-06, "step": 124200 }, { "embedding_loss": 0.0158, "epoch": 6.9425043303346925, "grad_norm": 0.05118734389543533, "learning_rate": 6.7945589887814845e-06, "step": 124250 }, { "embedding_loss": 0.0164, "epoch": 6.945298094652735, "grad_norm": 0.039666350930929184, "learning_rate": 6.78835062363028e-06, "step": 124300 }, { "embedding_loss": 0.0157, "epoch": 6.948091858970777, "grad_norm": 0.07868050038814545, "learning_rate": 6.7821422584790755e-06, "step": 124350 }, { "embedding_loss": 0.0144, "epoch": 6.950885623288819, "grad_norm": 0.06077270582318306, "learning_rate": 6.775933893327871e-06, "step": 124400 }, { "embedding_loss": 0.0143, "epoch": 6.953679387606861, "grad_norm": 0.03127770870923996, "learning_rate": 6.769725528176666e-06, "step": 124450 }, { "embedding_loss": 0.0146, "epoch": 6.956473151924904, "grad_norm": 0.0241890549659729, "learning_rate": 6.763517163025461e-06, "step": 124500 }, { "embedding_loss": 0.0166, "epoch": 6.959266916242946, "grad_norm": 0.07079970091581345, "learning_rate": 6.757308797874257e-06, "step": 124550 }, { "embedding_loss": 0.0147, "epoch": 6.962060680560988, "grad_norm": 0.04756411164999008, "learning_rate": 6.751100432723051e-06, "step": 124600 }, { "embedding_loss": 0.0136, "epoch": 6.96485444487903, "grad_norm": 0.1317901462316513, "learning_rate": 6.744892067571847e-06, "step": 124650 }, { "embedding_loss": 0.0164, "epoch": 6.967648209197073, "grad_norm": 0.08515078574419022, "learning_rate": 6.7386837024206416e-06, "step": 124700 }, { "embedding_loss": 0.0151, "epoch": 6.970441973515114, "grad_norm": 0.06111662834882736, "learning_rate": 6.732475337269437e-06, "step": 124750 }, { "embedding_loss": 0.0176, "epoch": 6.973235737833156, "grad_norm": 0.046287134289741516, "learning_rate": 6.726266972118233e-06, "step": 124800 }, { "embedding_loss": 0.0155, "epoch": 6.976029502151198, "grad_norm": 0.04581582918763161, "learning_rate": 6.720058606967028e-06, "step": 124850 }, { "embedding_loss": 0.0157, "epoch": 6.978823266469241, "grad_norm": 0.0813995972275734, "learning_rate": 6.713850241815824e-06, "step": 124900 }, { "embedding_loss": 0.0143, "epoch": 6.981617030787283, "grad_norm": 0.08016880601644516, "learning_rate": 6.707641876664619e-06, "step": 124950 }, { "embedding_loss": 0.0155, "epoch": 6.984410795105325, "grad_norm": 0.047971535474061966, "learning_rate": 6.701433511513413e-06, "step": 125000 }, { "embedding_loss": 0.0154, "epoch": 6.987204559423367, "grad_norm": 0.06058749929070473, "learning_rate": 6.6952251463622085e-06, "step": 125050 }, { "embedding_loss": 0.0165, "epoch": 6.989998323741409, "grad_norm": 0.09321723133325577, "learning_rate": 6.689016781211004e-06, "step": 125100 }, { "embedding_loss": 0.016, "epoch": 6.992792088059451, "grad_norm": 0.08071985840797424, "learning_rate": 6.6828084160597995e-06, "step": 125150 }, { "embedding_loss": 0.0168, "epoch": 6.995585852377493, "grad_norm": 0.031407516449689865, "learning_rate": 6.676600050908595e-06, "step": 125200 }, { "embedding_loss": 0.0159, "epoch": 6.998379616695535, "grad_norm": 0.08357969671487808, "learning_rate": 6.6703916857573905e-06, "step": 125250 }, { "embedding_loss": 0.0158, "epoch": 7.001173381013578, "grad_norm": 0.04072713851928711, "learning_rate": 6.664183320606186e-06, "step": 125300 }, { "embedding_loss": 0.0144, "epoch": 7.00396714533162, "grad_norm": 0.03253120183944702, "learning_rate": 6.657974955454981e-06, "step": 125350 }, { "embedding_loss": 0.0161, "epoch": 7.006760909649662, "grad_norm": 0.06688445806503296, "learning_rate": 6.651766590303775e-06, "step": 125400 }, { "embedding_loss": 0.0136, "epoch": 7.009554673967704, "grad_norm": 0.07059884071350098, "learning_rate": 6.645558225152571e-06, "step": 125450 }, { "embedding_loss": 0.0153, "epoch": 7.012348438285747, "grad_norm": 0.09571552276611328, "learning_rate": 6.639349860001366e-06, "step": 125500 }, { "embedding_loss": 0.0152, "epoch": 7.015142202603788, "grad_norm": 0.03748427703976631, "learning_rate": 6.633141494850162e-06, "step": 125550 }, { "embedding_loss": 0.015, "epoch": 7.01793596692183, "grad_norm": 0.05773162841796875, "learning_rate": 6.626933129698957e-06, "step": 125600 }, { "embedding_loss": 0.0134, "epoch": 7.020729731239872, "grad_norm": 0.04907944053411484, "learning_rate": 6.620724764547752e-06, "step": 125650 }, { "embedding_loss": 0.0155, "epoch": 7.023523495557915, "grad_norm": 0.042922165244817734, "learning_rate": 6.614516399396548e-06, "step": 125700 }, { "embedding_loss": 0.0163, "epoch": 7.026317259875957, "grad_norm": 0.07573281973600388, "learning_rate": 6.608308034245343e-06, "step": 125750 }, { "embedding_loss": 0.0163, "epoch": 7.029111024193999, "grad_norm": 0.04789429157972336, "learning_rate": 6.602099669094138e-06, "step": 125800 }, { "embedding_loss": 0.0176, "epoch": 7.031904788512041, "grad_norm": 0.06962573528289795, "learning_rate": 6.5958913039429325e-06, "step": 125850 }, { "embedding_loss": 0.0142, "epoch": 7.034698552830084, "grad_norm": 0.04586207866668701, "learning_rate": 6.589682938791728e-06, "step": 125900 }, { "embedding_loss": 0.0161, "epoch": 7.037492317148125, "grad_norm": 0.02872786484658718, "learning_rate": 6.5834745736405235e-06, "step": 125950 }, { "embedding_loss": 0.0159, "epoch": 7.040286081466167, "grad_norm": 0.07625932991504669, "learning_rate": 6.577266208489319e-06, "step": 126000 }, { "embedding_loss": 0.0162, "epoch": 7.043079845784209, "grad_norm": 0.06585145741701126, "learning_rate": 6.5710578433381145e-06, "step": 126050 }, { "embedding_loss": 0.0173, "epoch": 7.045873610102252, "grad_norm": 0.10921823978424072, "learning_rate": 6.56484947818691e-06, "step": 126100 }, { "embedding_loss": 0.0146, "epoch": 7.048667374420294, "grad_norm": 0.05330265685915947, "learning_rate": 6.5586411130357055e-06, "step": 126150 }, { "embedding_loss": 0.0159, "epoch": 7.051461138738336, "grad_norm": 0.06830411404371262, "learning_rate": 6.552432747884499e-06, "step": 126200 }, { "embedding_loss": 0.0183, "epoch": 7.054254903056378, "grad_norm": 0.10270995646715164, "learning_rate": 6.546224382733295e-06, "step": 126250 }, { "embedding_loss": 0.0146, "epoch": 7.057048667374421, "grad_norm": 0.05576518177986145, "learning_rate": 6.54001601758209e-06, "step": 126300 }, { "embedding_loss": 0.0159, "epoch": 7.059842431692463, "grad_norm": 0.03425639495253563, "learning_rate": 6.533807652430886e-06, "step": 126350 }, { "embedding_loss": 0.0156, "epoch": 7.062636196010504, "grad_norm": 0.07367387413978577, "learning_rate": 6.5275992872796814e-06, "step": 126400 }, { "embedding_loss": 0.0166, "epoch": 7.065429960328546, "grad_norm": 0.068904809653759, "learning_rate": 6.521390922128477e-06, "step": 126450 }, { "embedding_loss": 0.0163, "epoch": 7.068223724646589, "grad_norm": 0.07591640204191208, "learning_rate": 6.515182556977272e-06, "step": 126500 }, { "embedding_loss": 0.0155, "epoch": 7.071017488964631, "grad_norm": 0.0423586368560791, "learning_rate": 6.508974191826067e-06, "step": 126550 }, { "embedding_loss": 0.0173, "epoch": 7.073811253282673, "grad_norm": 0.06538179516792297, "learning_rate": 6.502765826674863e-06, "step": 126600 }, { "embedding_loss": 0.0169, "epoch": 7.076605017600715, "grad_norm": 0.10358361899852753, "learning_rate": 6.496557461523657e-06, "step": 126650 }, { "embedding_loss": 0.0156, "epoch": 7.079398781918758, "grad_norm": 0.10240999609231949, "learning_rate": 6.490349096372453e-06, "step": 126700 }, { "embedding_loss": 0.0156, "epoch": 7.0821925462368, "grad_norm": 0.04419811815023422, "learning_rate": 6.4841407312212475e-06, "step": 126750 }, { "embedding_loss": 0.015, "epoch": 7.084986310554841, "grad_norm": 0.12192144989967346, "learning_rate": 6.477932366070043e-06, "step": 126800 }, { "embedding_loss": 0.0159, "epoch": 7.087780074872883, "grad_norm": 0.03584303334355354, "learning_rate": 6.4717240009188385e-06, "step": 126850 }, { "embedding_loss": 0.0155, "epoch": 7.090573839190926, "grad_norm": 0.04670807719230652, "learning_rate": 6.465515635767634e-06, "step": 126900 }, { "embedding_loss": 0.0167, "epoch": 7.093367603508968, "grad_norm": 0.10209380090236664, "learning_rate": 6.4593072706164295e-06, "step": 126950 }, { "embedding_loss": 0.0163, "epoch": 7.09616136782701, "grad_norm": 0.0531579852104187, "learning_rate": 6.453098905465225e-06, "step": 127000 }, { "embedding_loss": 0.0177, "epoch": 7.098955132145052, "grad_norm": 0.06941573321819305, "learning_rate": 6.446890540314019e-06, "step": 127050 }, { "embedding_loss": 0.0162, "epoch": 7.101748896463095, "grad_norm": 0.0624704547226429, "learning_rate": 6.440682175162814e-06, "step": 127100 }, { "embedding_loss": 0.0166, "epoch": 7.104542660781137, "grad_norm": 0.03859766572713852, "learning_rate": 6.43447381001161e-06, "step": 127150 }, { "embedding_loss": 0.0152, "epoch": 7.107336425099179, "grad_norm": 0.022667037323117256, "learning_rate": 6.428265444860405e-06, "step": 127200 }, { "embedding_loss": 0.0148, "epoch": 7.11013018941722, "grad_norm": 0.04574231058359146, "learning_rate": 6.422057079709201e-06, "step": 127250 }, { "embedding_loss": 0.0164, "epoch": 7.112923953735263, "grad_norm": 0.09998048841953278, "learning_rate": 6.4158487145579964e-06, "step": 127300 }, { "embedding_loss": 0.0167, "epoch": 7.115717718053305, "grad_norm": 0.03885971009731293, "learning_rate": 6.409640349406792e-06, "step": 127350 }, { "embedding_loss": 0.0169, "epoch": 7.118511482371347, "grad_norm": 0.055479686707258224, "learning_rate": 6.403431984255587e-06, "step": 127400 }, { "embedding_loss": 0.0151, "epoch": 7.121305246689389, "grad_norm": 0.08464046567678452, "learning_rate": 6.397223619104381e-06, "step": 127450 }, { "embedding_loss": 0.0153, "epoch": 7.124099011007432, "grad_norm": 0.07091924548149109, "learning_rate": 6.391015253953177e-06, "step": 127500 }, { "embedding_loss": 0.0172, "epoch": 7.126892775325474, "grad_norm": 0.04840958118438721, "learning_rate": 6.384806888801972e-06, "step": 127550 }, { "embedding_loss": 0.0186, "epoch": 7.129686539643516, "grad_norm": 0.0454111322760582, "learning_rate": 6.378598523650768e-06, "step": 127600 }, { "embedding_loss": 0.0153, "epoch": 7.132480303961557, "grad_norm": 0.08608731627464294, "learning_rate": 6.3723901584995625e-06, "step": 127650 }, { "embedding_loss": 0.0152, "epoch": 7.1352740682796, "grad_norm": 0.03832891955971718, "learning_rate": 6.366181793348358e-06, "step": 127700 }, { "embedding_loss": 0.0151, "epoch": 7.138067832597642, "grad_norm": 0.09290667623281479, "learning_rate": 6.3599734281971535e-06, "step": 127750 }, { "embedding_loss": 0.0146, "epoch": 7.140861596915684, "grad_norm": 0.08212405443191528, "learning_rate": 6.353765063045949e-06, "step": 127800 }, { "embedding_loss": 0.0142, "epoch": 7.143655361233726, "grad_norm": 0.04998835548758507, "learning_rate": 6.347556697894744e-06, "step": 127850 }, { "embedding_loss": 0.0139, "epoch": 7.146449125551769, "grad_norm": 0.03884972631931305, "learning_rate": 6.341348332743538e-06, "step": 127900 }, { "embedding_loss": 0.0145, "epoch": 7.149242889869811, "grad_norm": 0.05149669572710991, "learning_rate": 6.335139967592334e-06, "step": 127950 }, { "embedding_loss": 0.0149, "epoch": 7.152036654187853, "grad_norm": 0.051544446498155594, "learning_rate": 6.328931602441129e-06, "step": 128000 }, { "embedding_loss": 0.0163, "epoch": 7.154830418505895, "grad_norm": 0.06401709467172623, "learning_rate": 6.322723237289925e-06, "step": 128050 }, { "embedding_loss": 0.0154, "epoch": 7.157624182823937, "grad_norm": 0.0651194229722023, "learning_rate": 6.3165148721387204e-06, "step": 128100 }, { "embedding_loss": 0.0159, "epoch": 7.160417947141979, "grad_norm": 0.06471746414899826, "learning_rate": 6.310306506987516e-06, "step": 128150 }, { "embedding_loss": 0.0153, "epoch": 7.163211711460021, "grad_norm": 0.08151091635227203, "learning_rate": 6.3040981418363115e-06, "step": 128200 }, { "embedding_loss": 0.0166, "epoch": 7.166005475778063, "grad_norm": 0.05856778100132942, "learning_rate": 6.297889776685105e-06, "step": 128250 }, { "embedding_loss": 0.017, "epoch": 7.168799240096106, "grad_norm": 0.05329563096165657, "learning_rate": 6.291681411533901e-06, "step": 128300 }, { "embedding_loss": 0.0158, "epoch": 7.171593004414148, "grad_norm": 0.04574774578213692, "learning_rate": 6.285473046382696e-06, "step": 128350 }, { "embedding_loss": 0.0147, "epoch": 7.17438676873219, "grad_norm": 0.060682423412799835, "learning_rate": 6.279264681231492e-06, "step": 128400 }, { "embedding_loss": 0.0161, "epoch": 7.177180533050232, "grad_norm": 0.05462687090039253, "learning_rate": 6.273056316080287e-06, "step": 128450 }, { "embedding_loss": 0.0135, "epoch": 7.179974297368274, "grad_norm": 0.029793836176395416, "learning_rate": 6.266847950929083e-06, "step": 128500 }, { "embedding_loss": 0.0144, "epoch": 7.182768061686316, "grad_norm": 0.05813127011060715, "learning_rate": 6.2606395857778775e-06, "step": 128550 }, { "embedding_loss": 0.0155, "epoch": 7.185561826004358, "grad_norm": 0.070486880838871, "learning_rate": 6.254431220626673e-06, "step": 128600 }, { "embedding_loss": 0.0158, "epoch": 7.1883555903224, "grad_norm": 0.05644659698009491, "learning_rate": 6.2482228554754686e-06, "step": 128650 }, { "embedding_loss": 0.0151, "epoch": 7.191149354640443, "grad_norm": 0.04849706590175629, "learning_rate": 6.242014490324263e-06, "step": 128700 }, { "embedding_loss": 0.0148, "epoch": 7.193943118958485, "grad_norm": 0.07241576910018921, "learning_rate": 6.235806125173059e-06, "step": 128750 }, { "embedding_loss": 0.0167, "epoch": 7.196736883276527, "grad_norm": 0.24801692366600037, "learning_rate": 6.229597760021853e-06, "step": 128800 }, { "embedding_loss": 0.0173, "epoch": 7.199530647594569, "grad_norm": 0.04555341228842735, "learning_rate": 6.223389394870649e-06, "step": 128850 }, { "embedding_loss": 0.0153, "epoch": 7.202324411912611, "grad_norm": 0.06220013275742531, "learning_rate": 6.2171810297194444e-06, "step": 128900 }, { "embedding_loss": 0.017, "epoch": 7.205118176230653, "grad_norm": 0.06438829004764557, "learning_rate": 6.21097266456824e-06, "step": 128950 }, { "embedding_loss": 0.0143, "epoch": 7.207911940548695, "grad_norm": 0.06800001114606857, "learning_rate": 6.2047642994170355e-06, "step": 129000 }, { "embedding_loss": 0.0153, "epoch": 7.210705704866737, "grad_norm": 0.061178624629974365, "learning_rate": 6.198555934265831e-06, "step": 129050 }, { "embedding_loss": 0.0165, "epoch": 7.21349946918478, "grad_norm": 0.04741508141160011, "learning_rate": 6.192347569114625e-06, "step": 129100 }, { "embedding_loss": 0.0166, "epoch": 7.216293233502822, "grad_norm": 0.07182268798351288, "learning_rate": 6.18613920396342e-06, "step": 129150 }, { "embedding_loss": 0.0145, "epoch": 7.219086997820864, "grad_norm": 0.03916815668344498, "learning_rate": 6.179930838812216e-06, "step": 129200 }, { "embedding_loss": 0.0158, "epoch": 7.221880762138906, "grad_norm": 0.042909204959869385, "learning_rate": 6.173722473661011e-06, "step": 129250 }, { "embedding_loss": 0.0145, "epoch": 7.2246745264569485, "grad_norm": 0.04781007021665573, "learning_rate": 6.167514108509807e-06, "step": 129300 }, { "embedding_loss": 0.0149, "epoch": 7.22746829077499, "grad_norm": 0.04596741497516632, "learning_rate": 6.161305743358602e-06, "step": 129350 }, { "embedding_loss": 0.0163, "epoch": 7.230262055093032, "grad_norm": 0.10431301593780518, "learning_rate": 6.155097378207398e-06, "step": 129400 }, { "embedding_loss": 0.0164, "epoch": 7.233055819411074, "grad_norm": 0.05575423687696457, "learning_rate": 6.1488890130561926e-06, "step": 129450 }, { "embedding_loss": 0.0157, "epoch": 7.235849583729117, "grad_norm": 0.07737904787063599, "learning_rate": 6.142680647904987e-06, "step": 129500 }, { "embedding_loss": 0.015, "epoch": 7.238643348047159, "grad_norm": 0.05878661572933197, "learning_rate": 6.136472282753783e-06, "step": 129550 }, { "embedding_loss": 0.0151, "epoch": 7.241437112365201, "grad_norm": 0.06560530513525009, "learning_rate": 6.130263917602578e-06, "step": 129600 }, { "embedding_loss": 0.0152, "epoch": 7.244230876683243, "grad_norm": 0.07853823155164719, "learning_rate": 6.124055552451374e-06, "step": 129650 }, { "embedding_loss": 0.0155, "epoch": 7.2470246410012855, "grad_norm": 0.0717538595199585, "learning_rate": 6.1178471873001684e-06, "step": 129700 }, { "embedding_loss": 0.0166, "epoch": 7.249818405319327, "grad_norm": 0.0748300775885582, "learning_rate": 6.111638822148964e-06, "step": 129750 }, { "embedding_loss": 0.0167, "epoch": 7.252612169637369, "grad_norm": 0.046986520290374756, "learning_rate": 6.1054304569977595e-06, "step": 129800 }, { "embedding_loss": 0.0143, "epoch": 7.255405933955411, "grad_norm": 0.05497590824961662, "learning_rate": 6.099222091846555e-06, "step": 129850 }, { "embedding_loss": 0.0146, "epoch": 7.258199698273454, "grad_norm": 0.07180599868297577, "learning_rate": 6.09301372669535e-06, "step": 129900 }, { "embedding_loss": 0.0169, "epoch": 7.260993462591496, "grad_norm": 0.06333713233470917, "learning_rate": 6.086805361544144e-06, "step": 129950 }, { "embedding_loss": 0.015, "epoch": 7.263787226909538, "grad_norm": 0.20727849006652832, "learning_rate": 6.08059699639294e-06, "step": 130000 }, { "embedding_loss": 0.0166, "epoch": 7.26658099122758, "grad_norm": 0.03129785135388374, "learning_rate": 6.074388631241735e-06, "step": 130050 }, { "embedding_loss": 0.0162, "epoch": 7.2693747555456225, "grad_norm": 0.05227687582373619, "learning_rate": 6.068180266090531e-06, "step": 130100 }, { "embedding_loss": 0.0152, "epoch": 7.272168519863664, "grad_norm": 0.0558108389377594, "learning_rate": 6.061971900939326e-06, "step": 130150 }, { "embedding_loss": 0.0158, "epoch": 7.274962284181706, "grad_norm": 0.028995152562856674, "learning_rate": 6.055763535788122e-06, "step": 130200 }, { "embedding_loss": 0.0153, "epoch": 7.277756048499748, "grad_norm": 0.024030301719903946, "learning_rate": 6.049555170636917e-06, "step": 130250 }, { "embedding_loss": 0.017, "epoch": 7.280549812817791, "grad_norm": 0.11168868839740753, "learning_rate": 6.043346805485711e-06, "step": 130300 }, { "embedding_loss": 0.0159, "epoch": 7.283343577135833, "grad_norm": 0.04670443385839462, "learning_rate": 6.037138440334507e-06, "step": 130350 }, { "embedding_loss": 0.0139, "epoch": 7.286137341453875, "grad_norm": 0.10887888073921204, "learning_rate": 6.030930075183302e-06, "step": 130400 }, { "embedding_loss": 0.016, "epoch": 7.288931105771917, "grad_norm": 0.06960166990756989, "learning_rate": 6.024721710032098e-06, "step": 130450 }, { "embedding_loss": 0.0154, "epoch": 7.2917248700899595, "grad_norm": 0.05914528667926788, "learning_rate": 6.018513344880893e-06, "step": 130500 }, { "embedding_loss": 0.0153, "epoch": 7.294518634408002, "grad_norm": 0.03662776201963425, "learning_rate": 6.012304979729689e-06, "step": 130550 }, { "embedding_loss": 0.0162, "epoch": 7.297312398726043, "grad_norm": 0.05944181978702545, "learning_rate": 6.0060966145784835e-06, "step": 130600 }, { "embedding_loss": 0.017, "epoch": 7.300106163044085, "grad_norm": 0.06354328244924545, "learning_rate": 5.999888249427279e-06, "step": 130650 }, { "embedding_loss": 0.0154, "epoch": 7.302899927362128, "grad_norm": 0.03889360651373863, "learning_rate": 5.9936798842760745e-06, "step": 130700 }, { "embedding_loss": 0.0147, "epoch": 7.30569369168017, "grad_norm": 0.0615963488817215, "learning_rate": 5.987471519124869e-06, "step": 130750 }, { "embedding_loss": 0.0132, "epoch": 7.308487455998212, "grad_norm": 0.04187722131609917, "learning_rate": 5.981263153973665e-06, "step": 130800 }, { "embedding_loss": 0.0156, "epoch": 7.311281220316254, "grad_norm": 0.04623600095510483, "learning_rate": 5.975054788822459e-06, "step": 130850 }, { "embedding_loss": 0.0157, "epoch": 7.3140749846342965, "grad_norm": 0.036469824612140656, "learning_rate": 5.968846423671255e-06, "step": 130900 }, { "embedding_loss": 0.0175, "epoch": 7.316868748952339, "grad_norm": 0.053088150918483734, "learning_rate": 5.96263805852005e-06, "step": 130950 }, { "embedding_loss": 0.0153, "epoch": 7.31966251327038, "grad_norm": 0.06933115422725677, "learning_rate": 5.956429693368846e-06, "step": 131000 }, { "embedding_loss": 0.0144, "epoch": 7.322456277588422, "grad_norm": 0.07866281270980835, "learning_rate": 5.950221328217641e-06, "step": 131050 }, { "embedding_loss": 0.0175, "epoch": 7.325250041906465, "grad_norm": 0.12231084704399109, "learning_rate": 5.944012963066437e-06, "step": 131100 }, { "embedding_loss": 0.0177, "epoch": 7.328043806224507, "grad_norm": 0.08394178748130798, "learning_rate": 5.937804597915231e-06, "step": 131150 }, { "embedding_loss": 0.0166, "epoch": 7.330837570542549, "grad_norm": 0.06125438213348389, "learning_rate": 5.931596232764026e-06, "step": 131200 }, { "embedding_loss": 0.0154, "epoch": 7.333631334860591, "grad_norm": 0.038577690720558167, "learning_rate": 5.925387867612822e-06, "step": 131250 }, { "embedding_loss": 0.0161, "epoch": 7.3364250991786335, "grad_norm": 0.03790817782282829, "learning_rate": 5.919179502461617e-06, "step": 131300 }, { "embedding_loss": 0.0144, "epoch": 7.339218863496676, "grad_norm": 0.03538224473595619, "learning_rate": 5.912971137310413e-06, "step": 131350 }, { "embedding_loss": 0.0152, "epoch": 7.342012627814718, "grad_norm": 0.03958588093519211, "learning_rate": 5.906762772159208e-06, "step": 131400 }, { "embedding_loss": 0.0161, "epoch": 7.344806392132759, "grad_norm": 0.07985013723373413, "learning_rate": 5.900554407008004e-06, "step": 131450 }, { "embedding_loss": 0.0149, "epoch": 7.347600156450802, "grad_norm": 0.05969721078872681, "learning_rate": 5.8943460418567985e-06, "step": 131500 }, { "embedding_loss": 0.0144, "epoch": 7.350393920768844, "grad_norm": 0.033311519771814346, "learning_rate": 5.888137676705593e-06, "step": 131550 }, { "embedding_loss": 0.0161, "epoch": 7.353187685086886, "grad_norm": 0.04158949851989746, "learning_rate": 5.881929311554389e-06, "step": 131600 }, { "embedding_loss": 0.0154, "epoch": 7.355981449404928, "grad_norm": 0.06804822385311127, "learning_rate": 5.875720946403184e-06, "step": 131650 }, { "embedding_loss": 0.0154, "epoch": 7.3587752137229705, "grad_norm": 0.0667879655957222, "learning_rate": 5.86951258125198e-06, "step": 131700 }, { "embedding_loss": 0.0171, "epoch": 7.361568978041013, "grad_norm": 0.07973307371139526, "learning_rate": 5.863304216100774e-06, "step": 131750 }, { "embedding_loss": 0.0143, "epoch": 7.364362742359055, "grad_norm": 0.05507231131196022, "learning_rate": 5.85709585094957e-06, "step": 131800 }, { "embedding_loss": 0.0159, "epoch": 7.367156506677096, "grad_norm": 0.07874174416065216, "learning_rate": 5.850887485798365e-06, "step": 131850 }, { "embedding_loss": 0.0152, "epoch": 7.3699502709951386, "grad_norm": 0.08796451985836029, "learning_rate": 5.844679120647161e-06, "step": 131900 }, { "embedding_loss": 0.0165, "epoch": 7.372744035313181, "grad_norm": 0.06416477262973785, "learning_rate": 5.8384707554959556e-06, "step": 131950 }, { "embedding_loss": 0.0159, "epoch": 7.375537799631223, "grad_norm": 0.07229957729578018, "learning_rate": 5.83226239034475e-06, "step": 132000 }, { "embedding_loss": 0.0173, "epoch": 7.378331563949265, "grad_norm": 0.07061868160963058, "learning_rate": 5.826054025193546e-06, "step": 132050 }, { "embedding_loss": 0.015, "epoch": 7.3811253282673075, "grad_norm": 0.052631083875894547, "learning_rate": 5.819845660042341e-06, "step": 132100 }, { "embedding_loss": 0.0174, "epoch": 7.38391909258535, "grad_norm": 0.0545504130423069, "learning_rate": 5.813637294891137e-06, "step": 132150 }, { "embedding_loss": 0.0163, "epoch": 7.386712856903392, "grad_norm": 0.06425320357084274, "learning_rate": 5.807428929739932e-06, "step": 132200 }, { "embedding_loss": 0.0144, "epoch": 7.389506621221434, "grad_norm": 0.0748821571469307, "learning_rate": 5.801220564588728e-06, "step": 132250 }, { "embedding_loss": 0.0166, "epoch": 7.3923003855394755, "grad_norm": 0.05936644226312637, "learning_rate": 5.795012199437523e-06, "step": 132300 }, { "embedding_loss": 0.0151, "epoch": 7.395094149857518, "grad_norm": 0.0709327682852745, "learning_rate": 5.788803834286317e-06, "step": 132350 }, { "embedding_loss": 0.0178, "epoch": 7.39788791417556, "grad_norm": 0.047576699405908585, "learning_rate": 5.782595469135113e-06, "step": 132400 }, { "embedding_loss": 0.0145, "epoch": 7.400681678493602, "grad_norm": 0.04976436868309975, "learning_rate": 5.776387103983908e-06, "step": 132450 }, { "embedding_loss": 0.0151, "epoch": 7.4034754428116445, "grad_norm": 0.049262844026088715, "learning_rate": 5.770178738832704e-06, "step": 132500 }, { "embedding_loss": 0.0157, "epoch": 7.406269207129687, "grad_norm": 0.09796982258558273, "learning_rate": 5.763970373681499e-06, "step": 132550 }, { "embedding_loss": 0.0177, "epoch": 7.409062971447729, "grad_norm": 0.03958730399608612, "learning_rate": 5.757762008530295e-06, "step": 132600 }, { "embedding_loss": 0.016, "epoch": 7.411856735765771, "grad_norm": 0.03641224279999733, "learning_rate": 5.751553643379089e-06, "step": 132650 }, { "embedding_loss": 0.0161, "epoch": 7.4146505000838125, "grad_norm": 0.08262453228235245, "learning_rate": 5.745345278227885e-06, "step": 132700 }, { "embedding_loss": 0.0159, "epoch": 7.417444264401855, "grad_norm": 0.050462834537029266, "learning_rate": 5.73913691307668e-06, "step": 132750 }, { "embedding_loss": 0.0164, "epoch": 7.420238028719897, "grad_norm": 0.045736391097307205, "learning_rate": 5.732928547925475e-06, "step": 132800 }, { "embedding_loss": 0.0142, "epoch": 7.423031793037939, "grad_norm": 0.033085282891988754, "learning_rate": 5.726720182774271e-06, "step": 132850 }, { "embedding_loss": 0.0135, "epoch": 7.4258255573559815, "grad_norm": 0.18250994384288788, "learning_rate": 5.720511817623065e-06, "step": 132900 }, { "embedding_loss": 0.0148, "epoch": 7.428619321674024, "grad_norm": 0.05505122244358063, "learning_rate": 5.714303452471861e-06, "step": 132950 }, { "embedding_loss": 0.0158, "epoch": 7.431413085992066, "grad_norm": 0.07550457119941711, "learning_rate": 5.708095087320656e-06, "step": 133000 }, { "embedding_loss": 0.0148, "epoch": 7.434206850310108, "grad_norm": 0.0361814871430397, "learning_rate": 5.701886722169452e-06, "step": 133050 }, { "embedding_loss": 0.0147, "epoch": 7.43700061462815, "grad_norm": 0.04689343273639679, "learning_rate": 5.695678357018247e-06, "step": 133100 }, { "embedding_loss": 0.0158, "epoch": 7.439794378946192, "grad_norm": 0.07154731452465057, "learning_rate": 5.689469991867043e-06, "step": 133150 }, { "embedding_loss": 0.0152, "epoch": 7.442588143264234, "grad_norm": 0.04977073147892952, "learning_rate": 5.683261626715837e-06, "step": 133200 }, { "embedding_loss": 0.0141, "epoch": 7.445381907582276, "grad_norm": 0.04541080817580223, "learning_rate": 5.677053261564632e-06, "step": 133250 }, { "embedding_loss": 0.0151, "epoch": 7.4481756719003185, "grad_norm": 0.05884585529565811, "learning_rate": 5.670844896413428e-06, "step": 133300 }, { "embedding_loss": 0.0164, "epoch": 7.450969436218361, "grad_norm": 0.0625000074505806, "learning_rate": 5.664636531262223e-06, "step": 133350 }, { "embedding_loss": 0.0161, "epoch": 7.453763200536403, "grad_norm": 0.059063468128442764, "learning_rate": 5.658428166111019e-06, "step": 133400 }, { "embedding_loss": 0.0151, "epoch": 7.456556964854445, "grad_norm": 0.06604129076004028, "learning_rate": 5.652219800959814e-06, "step": 133450 }, { "embedding_loss": 0.015, "epoch": 7.4593507291724865, "grad_norm": 0.058580316603183746, "learning_rate": 5.64601143580861e-06, "step": 133500 }, { "embedding_loss": 0.0135, "epoch": 7.462144493490529, "grad_norm": 0.09610086679458618, "learning_rate": 5.639803070657404e-06, "step": 133550 }, { "embedding_loss": 0.0146, "epoch": 7.464938257808571, "grad_norm": 0.07526242733001709, "learning_rate": 5.633594705506199e-06, "step": 133600 }, { "embedding_loss": 0.0153, "epoch": 7.467732022126613, "grad_norm": 0.04899730160832405, "learning_rate": 5.627386340354995e-06, "step": 133650 }, { "embedding_loss": 0.0156, "epoch": 7.4705257864446555, "grad_norm": 0.07121424376964569, "learning_rate": 5.62117797520379e-06, "step": 133700 }, { "embedding_loss": 0.0148, "epoch": 7.473319550762698, "grad_norm": 0.0699893981218338, "learning_rate": 5.614969610052586e-06, "step": 133750 }, { "embedding_loss": 0.0139, "epoch": 7.47611331508074, "grad_norm": 0.07073114812374115, "learning_rate": 5.60876124490138e-06, "step": 133800 }, { "embedding_loss": 0.0165, "epoch": 7.478907079398782, "grad_norm": 0.05045955628156662, "learning_rate": 5.602552879750176e-06, "step": 133850 }, { "embedding_loss": 0.0159, "epoch": 7.481700843716824, "grad_norm": 0.09479732066392899, "learning_rate": 5.596344514598971e-06, "step": 133900 }, { "embedding_loss": 0.0141, "epoch": 7.484494608034866, "grad_norm": 0.04172611981630325, "learning_rate": 5.590136149447767e-06, "step": 133950 }, { "embedding_loss": 0.0163, "epoch": 7.487288372352908, "grad_norm": 0.04535437375307083, "learning_rate": 5.5839277842965615e-06, "step": 134000 }, { "embedding_loss": 0.0158, "epoch": 7.49008213667095, "grad_norm": 0.045710545033216476, "learning_rate": 5.577719419145356e-06, "step": 134050 }, { "embedding_loss": 0.0142, "epoch": 7.4928759009889925, "grad_norm": 0.09403838217258453, "learning_rate": 5.571511053994152e-06, "step": 134100 }, { "embedding_loss": 0.0148, "epoch": 7.495669665307035, "grad_norm": 0.08599317073822021, "learning_rate": 5.565302688842947e-06, "step": 134150 }, { "embedding_loss": 0.0158, "epoch": 7.498463429625077, "grad_norm": 0.08361662179231644, "learning_rate": 5.559094323691743e-06, "step": 134200 }, { "embedding_loss": 0.0156, "epoch": 7.501257193943119, "grad_norm": 0.034180738031864166, "learning_rate": 5.552885958540538e-06, "step": 134250 }, { "embedding_loss": 0.0157, "epoch": 7.504050958261161, "grad_norm": 0.030917974188923836, "learning_rate": 5.546677593389334e-06, "step": 134300 }, { "embedding_loss": 0.0173, "epoch": 7.506844722579203, "grad_norm": 0.03797817602753639, "learning_rate": 5.540469228238129e-06, "step": 134350 }, { "embedding_loss": 0.016, "epoch": 7.509638486897245, "grad_norm": 0.05274832993745804, "learning_rate": 5.534260863086923e-06, "step": 134400 }, { "embedding_loss": 0.0147, "epoch": 7.512432251215287, "grad_norm": 0.05753246322274208, "learning_rate": 5.528052497935719e-06, "step": 134450 }, { "embedding_loss": 0.0156, "epoch": 7.5152260155333295, "grad_norm": 0.0655054822564125, "learning_rate": 5.521844132784514e-06, "step": 134500 }, { "embedding_loss": 0.015, "epoch": 7.518019779851372, "grad_norm": 0.04647801071405411, "learning_rate": 5.51563576763331e-06, "step": 134550 }, { "embedding_loss": 0.0151, "epoch": 7.520813544169414, "grad_norm": 0.039318978786468506, "learning_rate": 5.509427402482105e-06, "step": 134600 }, { "embedding_loss": 0.0158, "epoch": 7.523607308487456, "grad_norm": 0.03617199510335922, "learning_rate": 5.503219037330901e-06, "step": 134650 }, { "embedding_loss": 0.0173, "epoch": 7.526401072805498, "grad_norm": 0.05252876132726669, "learning_rate": 5.497010672179695e-06, "step": 134700 }, { "embedding_loss": 0.0159, "epoch": 7.529194837123541, "grad_norm": 0.04787129908800125, "learning_rate": 5.490802307028491e-06, "step": 134750 }, { "embedding_loss": 0.0161, "epoch": 7.531988601441583, "grad_norm": 0.045768942683935165, "learning_rate": 5.484593941877286e-06, "step": 134800 }, { "embedding_loss": 0.0154, "epoch": 7.534782365759624, "grad_norm": 0.045027539134025574, "learning_rate": 5.478385576726081e-06, "step": 134850 }, { "embedding_loss": 0.0158, "epoch": 7.5375761300776665, "grad_norm": 0.02997432090342045, "learning_rate": 5.4721772115748765e-06, "step": 134900 }, { "embedding_loss": 0.0161, "epoch": 7.540369894395709, "grad_norm": 0.0521201565861702, "learning_rate": 5.465968846423671e-06, "step": 134950 }, { "embedding_loss": 0.0153, "epoch": 7.543163658713751, "grad_norm": 0.028852906078100204, "learning_rate": 5.459760481272467e-06, "step": 135000 }, { "embedding_loss": 0.0166, "epoch": 7.545957423031793, "grad_norm": 0.03352310135960579, "learning_rate": 5.453552116121262e-06, "step": 135050 }, { "embedding_loss": 0.0159, "epoch": 7.548751187349835, "grad_norm": 0.0638401210308075, "learning_rate": 5.447343750970058e-06, "step": 135100 }, { "embedding_loss": 0.016, "epoch": 7.551544951667878, "grad_norm": 0.074251689016819, "learning_rate": 5.441135385818853e-06, "step": 135150 }, { "embedding_loss": 0.015, "epoch": 7.554338715985919, "grad_norm": 0.04788848012685776, "learning_rate": 5.434927020667649e-06, "step": 135200 }, { "embedding_loss": 0.0159, "epoch": 7.557132480303961, "grad_norm": 0.06266861408948898, "learning_rate": 5.4287186555164426e-06, "step": 135250 }, { "embedding_loss": 0.0182, "epoch": 7.5599262446220035, "grad_norm": 0.07460419088602066, "learning_rate": 5.422510290365238e-06, "step": 135300 }, { "embedding_loss": 0.0139, "epoch": 7.562720008940046, "grad_norm": 0.05039200931787491, "learning_rate": 5.416301925214034e-06, "step": 135350 }, { "embedding_loss": 0.0168, "epoch": 7.565513773258088, "grad_norm": 0.06373965740203857, "learning_rate": 5.410093560062829e-06, "step": 135400 }, { "embedding_loss": 0.0157, "epoch": 7.56830753757613, "grad_norm": 0.04785095155239105, "learning_rate": 5.403885194911625e-06, "step": 135450 }, { "embedding_loss": 0.016, "epoch": 7.571101301894172, "grad_norm": 0.031995419412851334, "learning_rate": 5.39767682976042e-06, "step": 135500 }, { "embedding_loss": 0.015, "epoch": 7.573895066212215, "grad_norm": 0.03164113312959671, "learning_rate": 5.391468464609216e-06, "step": 135550 }, { "embedding_loss": 0.0142, "epoch": 7.576688830530257, "grad_norm": 0.13557343184947968, "learning_rate": 5.38526009945801e-06, "step": 135600 }, { "embedding_loss": 0.0155, "epoch": 7.579482594848298, "grad_norm": 0.052740391343832016, "learning_rate": 5.379051734306805e-06, "step": 135650 }, { "embedding_loss": 0.0173, "epoch": 7.5822763591663405, "grad_norm": 0.056343477219343185, "learning_rate": 5.3728433691556005e-06, "step": 135700 }, { "embedding_loss": 0.0157, "epoch": 7.585070123484383, "grad_norm": 0.023987440392374992, "learning_rate": 5.366635004004396e-06, "step": 135750 }, { "embedding_loss": 0.0148, "epoch": 7.587863887802425, "grad_norm": 0.09078365564346313, "learning_rate": 5.3604266388531915e-06, "step": 135800 }, { "embedding_loss": 0.0161, "epoch": 7.590657652120467, "grad_norm": 0.04718571901321411, "learning_rate": 5.354218273701986e-06, "step": 135850 }, { "embedding_loss": 0.0154, "epoch": 7.593451416438509, "grad_norm": 0.06694589555263519, "learning_rate": 5.348009908550782e-06, "step": 135900 }, { "embedding_loss": 0.0158, "epoch": 7.596245180756552, "grad_norm": 0.06492333114147186, "learning_rate": 5.341801543399577e-06, "step": 135950 }, { "embedding_loss": 0.0157, "epoch": 7.599038945074594, "grad_norm": 0.03726405277848244, "learning_rate": 5.335593178248373e-06, "step": 136000 }, { "embedding_loss": 0.0142, "epoch": 7.601832709392635, "grad_norm": 0.05718429759144783, "learning_rate": 5.329384813097167e-06, "step": 136050 }, { "embedding_loss": 0.015, "epoch": 7.6046264737106775, "grad_norm": 0.053512223064899445, "learning_rate": 5.323176447945962e-06, "step": 136100 }, { "embedding_loss": 0.0148, "epoch": 7.60742023802872, "grad_norm": 0.04575628787279129, "learning_rate": 5.316968082794758e-06, "step": 136150 }, { "embedding_loss": 0.0162, "epoch": 7.610214002346762, "grad_norm": 0.05224055051803589, "learning_rate": 5.310759717643553e-06, "step": 136200 }, { "embedding_loss": 0.0145, "epoch": 7.613007766664804, "grad_norm": 0.05657549202442169, "learning_rate": 5.304551352492349e-06, "step": 136250 }, { "embedding_loss": 0.0145, "epoch": 7.615801530982846, "grad_norm": 0.0585649348795414, "learning_rate": 5.298342987341144e-06, "step": 136300 }, { "embedding_loss": 0.0176, "epoch": 7.618595295300889, "grad_norm": 0.044184207916259766, "learning_rate": 5.29213462218994e-06, "step": 136350 }, { "embedding_loss": 0.0159, "epoch": 7.621389059618931, "grad_norm": 0.09535758942365646, "learning_rate": 5.285926257038735e-06, "step": 136400 }, { "embedding_loss": 0.016, "epoch": 7.624182823936973, "grad_norm": 0.08637368679046631, "learning_rate": 5.279717891887529e-06, "step": 136450 }, { "embedding_loss": 0.0175, "epoch": 7.6269765882550145, "grad_norm": 0.04838310927152634, "learning_rate": 5.2735095267363245e-06, "step": 136500 }, { "embedding_loss": 0.016, "epoch": 7.629770352573057, "grad_norm": 0.044118721038103104, "learning_rate": 5.26730116158512e-06, "step": 136550 }, { "embedding_loss": 0.0147, "epoch": 7.632564116891099, "grad_norm": 0.05959166958928108, "learning_rate": 5.2610927964339155e-06, "step": 136600 }, { "embedding_loss": 0.0154, "epoch": 7.635357881209141, "grad_norm": 0.06502282619476318, "learning_rate": 5.254884431282711e-06, "step": 136650 }, { "embedding_loss": 0.0145, "epoch": 7.638151645527183, "grad_norm": 0.03438902273774147, "learning_rate": 5.2486760661315066e-06, "step": 136700 }, { "embedding_loss": 0.0152, "epoch": 7.640945409845226, "grad_norm": 0.042924586683511734, "learning_rate": 5.242467700980301e-06, "step": 136750 }, { "embedding_loss": 0.0171, "epoch": 7.643739174163268, "grad_norm": 0.072599396109581, "learning_rate": 5.236259335829097e-06, "step": 136800 }, { "embedding_loss": 0.016, "epoch": 7.64653293848131, "grad_norm": 0.07162138819694519, "learning_rate": 5.230050970677892e-06, "step": 136850 }, { "embedding_loss": 0.0146, "epoch": 7.6493267027993515, "grad_norm": 0.049751680344343185, "learning_rate": 5.223842605526687e-06, "step": 136900 }, { "embedding_loss": 0.0147, "epoch": 7.652120467117394, "grad_norm": 0.08118194341659546, "learning_rate": 5.2176342403754824e-06, "step": 136950 }, { "embedding_loss": 0.015, "epoch": 7.654914231435436, "grad_norm": 0.0778822973370552, "learning_rate": 5.211425875224277e-06, "step": 137000 }, { "embedding_loss": 0.0157, "epoch": 7.657707995753478, "grad_norm": 0.0957423746585846, "learning_rate": 5.205217510073073e-06, "step": 137050 }, { "embedding_loss": 0.0167, "epoch": 7.66050176007152, "grad_norm": 0.04951760917901993, "learning_rate": 5.199009144921868e-06, "step": 137100 }, { "embedding_loss": 0.0143, "epoch": 7.663295524389563, "grad_norm": 0.05426972731947899, "learning_rate": 5.192800779770664e-06, "step": 137150 }, { "embedding_loss": 0.0157, "epoch": 7.666089288707605, "grad_norm": 0.10137445479631424, "learning_rate": 5.186592414619459e-06, "step": 137200 }, { "embedding_loss": 0.0151, "epoch": 7.668883053025647, "grad_norm": 0.09467735141515732, "learning_rate": 5.180384049468255e-06, "step": 137250 }, { "embedding_loss": 0.0156, "epoch": 7.671676817343689, "grad_norm": 0.04260128363966942, "learning_rate": 5.1741756843170485e-06, "step": 137300 }, { "embedding_loss": 0.0173, "epoch": 7.674470581661731, "grad_norm": 0.06287594139575958, "learning_rate": 5.167967319165844e-06, "step": 137350 }, { "embedding_loss": 0.0159, "epoch": 7.677264345979773, "grad_norm": 0.060747742652893066, "learning_rate": 5.1617589540146395e-06, "step": 137400 }, { "embedding_loss": 0.016, "epoch": 7.680058110297815, "grad_norm": 0.05914287269115448, "learning_rate": 5.155550588863435e-06, "step": 137450 }, { "embedding_loss": 0.014, "epoch": 7.682851874615857, "grad_norm": 0.04605783894658089, "learning_rate": 5.1493422237122306e-06, "step": 137500 }, { "embedding_loss": 0.0157, "epoch": 7.6856456389339, "grad_norm": 0.07171718776226044, "learning_rate": 5.143133858561026e-06, "step": 137550 }, { "embedding_loss": 0.016, "epoch": 7.688439403251942, "grad_norm": 0.0417226105928421, "learning_rate": 5.136925493409822e-06, "step": 137600 }, { "embedding_loss": 0.0139, "epoch": 7.691233167569984, "grad_norm": 0.044290002435445786, "learning_rate": 5.130717128258616e-06, "step": 137650 }, { "embedding_loss": 0.0157, "epoch": 7.6940269318880254, "grad_norm": 0.05429627746343613, "learning_rate": 5.124508763107411e-06, "step": 137700 }, { "embedding_loss": 0.0153, "epoch": 7.696820696206068, "grad_norm": 0.06612218916416168, "learning_rate": 5.1183003979562064e-06, "step": 137750 }, { "embedding_loss": 0.016, "epoch": 7.69961446052411, "grad_norm": 0.02817896381020546, "learning_rate": 5.112092032805002e-06, "step": 137800 }, { "embedding_loss": 0.0147, "epoch": 7.702408224842152, "grad_norm": 0.041813578456640244, "learning_rate": 5.1058836676537975e-06, "step": 137850 }, { "embedding_loss": 0.0158, "epoch": 7.705201989160194, "grad_norm": 0.08163344115018845, "learning_rate": 5.099675302502592e-06, "step": 137900 }, { "embedding_loss": 0.0169, "epoch": 7.707995753478237, "grad_norm": 0.04607648402452469, "learning_rate": 5.093466937351388e-06, "step": 137950 }, { "embedding_loss": 0.015, "epoch": 7.710789517796279, "grad_norm": 0.04595935717225075, "learning_rate": 5.087258572200183e-06, "step": 138000 }, { "embedding_loss": 0.0157, "epoch": 7.713583282114321, "grad_norm": 0.03516564518213272, "learning_rate": 5.081050207048979e-06, "step": 138050 }, { "embedding_loss": 0.0147, "epoch": 7.716377046432363, "grad_norm": 0.10278575867414474, "learning_rate": 5.074841841897773e-06, "step": 138100 }, { "embedding_loss": 0.0153, "epoch": 7.719170810750406, "grad_norm": 0.05596132576465607, "learning_rate": 5.068633476746568e-06, "step": 138150 }, { "embedding_loss": 0.0146, "epoch": 7.721964575068447, "grad_norm": 0.07665430754423141, "learning_rate": 5.0624251115953635e-06, "step": 138200 }, { "embedding_loss": 0.0157, "epoch": 7.724758339386489, "grad_norm": 0.08114848285913467, "learning_rate": 5.056216746444159e-06, "step": 138250 }, { "embedding_loss": 0.0172, "epoch": 7.727552103704531, "grad_norm": 0.08287734538316727, "learning_rate": 5.0500083812929545e-06, "step": 138300 }, { "embedding_loss": 0.0167, "epoch": 7.730345868022574, "grad_norm": 0.07547027617692947, "learning_rate": 5.04380001614175e-06, "step": 138350 }, { "embedding_loss": 0.0169, "epoch": 7.733139632340616, "grad_norm": 0.0662161111831665, "learning_rate": 5.037591650990546e-06, "step": 138400 }, { "embedding_loss": 0.0141, "epoch": 7.735933396658658, "grad_norm": 0.08456622809171677, "learning_rate": 5.031383285839341e-06, "step": 138450 }, { "embedding_loss": 0.0121, "epoch": 7.7387271609767, "grad_norm": 0.019242485985159874, "learning_rate": 5.025174920688135e-06, "step": 138500 }, { "embedding_loss": 0.0157, "epoch": 7.741520925294742, "grad_norm": 0.06019526720046997, "learning_rate": 5.0189665555369304e-06, "step": 138550 }, { "embedding_loss": 0.0159, "epoch": 7.744314689612784, "grad_norm": 0.04803960770368576, "learning_rate": 5.012758190385726e-06, "step": 138600 }, { "embedding_loss": 0.0139, "epoch": 7.747108453930826, "grad_norm": 0.0839293897151947, "learning_rate": 5.0065498252345215e-06, "step": 138650 }, { "embedding_loss": 0.0151, "epoch": 7.749902218248868, "grad_norm": 0.06496819853782654, "learning_rate": 5.000341460083317e-06, "step": 138700 }, { "embedding_loss": 0.0155, "epoch": 7.752695982566911, "grad_norm": 0.061302460730075836, "learning_rate": 4.9941330949321125e-06, "step": 138750 }, { "embedding_loss": 0.0157, "epoch": 7.755489746884953, "grad_norm": 0.06493088603019714, "learning_rate": 4.987924729780907e-06, "step": 138800 }, { "embedding_loss": 0.0149, "epoch": 7.758283511202995, "grad_norm": 0.05394092574715614, "learning_rate": 4.981716364629703e-06, "step": 138850 }, { "embedding_loss": 0.0178, "epoch": 7.761077275521037, "grad_norm": 0.07571502029895782, "learning_rate": 4.975507999478497e-06, "step": 138900 }, { "embedding_loss": 0.0149, "epoch": 7.76387103983908, "grad_norm": 0.05186597630381584, "learning_rate": 4.969299634327293e-06, "step": 138950 }, { "embedding_loss": 0.0136, "epoch": 7.766664804157122, "grad_norm": 0.05231587588787079, "learning_rate": 4.963091269176088e-06, "step": 139000 }, { "embedding_loss": 0.0135, "epoch": 7.769458568475163, "grad_norm": 0.03708725422620773, "learning_rate": 4.956882904024883e-06, "step": 139050 }, { "embedding_loss": 0.0166, "epoch": 7.772252332793205, "grad_norm": 0.0521240159869194, "learning_rate": 4.9506745388736785e-06, "step": 139100 }, { "embedding_loss": 0.0149, "epoch": 7.775046097111248, "grad_norm": 0.21957917511463165, "learning_rate": 4.944466173722474e-06, "step": 139150 }, { "embedding_loss": 0.0153, "epoch": 7.77783986142929, "grad_norm": 0.04781011492013931, "learning_rate": 4.9382578085712696e-06, "step": 139200 }, { "embedding_loss": 0.0156, "epoch": 7.780633625747332, "grad_norm": 0.02043551206588745, "learning_rate": 4.932049443420064e-06, "step": 139250 }, { "embedding_loss": 0.0166, "epoch": 7.783427390065374, "grad_norm": 0.101578488945961, "learning_rate": 4.92584107826886e-06, "step": 139300 }, { "embedding_loss": 0.0167, "epoch": 7.7862211543834166, "grad_norm": 0.05577180162072182, "learning_rate": 4.919632713117655e-06, "step": 139350 }, { "embedding_loss": 0.0152, "epoch": 7.789014918701458, "grad_norm": 0.050792302936315536, "learning_rate": 4.913424347966451e-06, "step": 139400 }, { "embedding_loss": 0.0169, "epoch": 7.7918086830195, "grad_norm": 0.08153994381427765, "learning_rate": 4.9072159828152454e-06, "step": 139450 }, { "embedding_loss": 0.0153, "epoch": 7.794602447337542, "grad_norm": 0.056249137967824936, "learning_rate": 4.901007617664041e-06, "step": 139500 }, { "embedding_loss": 0.0148, "epoch": 7.797396211655585, "grad_norm": 0.10349223762750626, "learning_rate": 4.8947992525128365e-06, "step": 139550 }, { "embedding_loss": 0.0143, "epoch": 7.800189975973627, "grad_norm": 0.03883183002471924, "learning_rate": 4.888590887361632e-06, "step": 139600 }, { "embedding_loss": 0.0158, "epoch": 7.802983740291669, "grad_norm": 0.029566630721092224, "learning_rate": 4.882382522210427e-06, "step": 139650 }, { "embedding_loss": 0.0158, "epoch": 7.805777504609711, "grad_norm": 0.0717063769698143, "learning_rate": 4.876174157059222e-06, "step": 139700 }, { "embedding_loss": 0.0157, "epoch": 7.8085712689277536, "grad_norm": 0.056294482201337814, "learning_rate": 4.869965791908018e-06, "step": 139750 }, { "embedding_loss": 0.017, "epoch": 7.811365033245796, "grad_norm": 0.04951024055480957, "learning_rate": 4.863757426756812e-06, "step": 139800 }, { "embedding_loss": 0.0153, "epoch": 7.814158797563837, "grad_norm": 0.07926607131958008, "learning_rate": 4.857549061605608e-06, "step": 139850 }, { "embedding_loss": 0.0145, "epoch": 7.816952561881879, "grad_norm": 0.06322721391916275, "learning_rate": 4.851340696454403e-06, "step": 139900 }, { "embedding_loss": 0.0163, "epoch": 7.819746326199922, "grad_norm": 0.048883240669965744, "learning_rate": 4.845132331303198e-06, "step": 139950 }, { "embedding_loss": 0.0167, "epoch": 7.822540090517964, "grad_norm": 0.04128299281001091, "learning_rate": 4.8389239661519936e-06, "step": 140000 }, { "embedding_loss": 0.016, "epoch": 7.825333854836006, "grad_norm": 0.056100860238075256, "learning_rate": 4.832715601000788e-06, "step": 140050 }, { "embedding_loss": 0.0158, "epoch": 7.828127619154048, "grad_norm": 0.044209595769643784, "learning_rate": 4.826507235849584e-06, "step": 140100 }, { "embedding_loss": 0.0171, "epoch": 7.8309213834720905, "grad_norm": 0.03360556811094284, "learning_rate": 4.820298870698379e-06, "step": 140150 }, { "embedding_loss": 0.0156, "epoch": 7.833715147790133, "grad_norm": 0.06806418299674988, "learning_rate": 4.814090505547175e-06, "step": 140200 }, { "embedding_loss": 0.0148, "epoch": 7.836508912108174, "grad_norm": 0.04394381865859032, "learning_rate": 4.8078821403959694e-06, "step": 140250 }, { "embedding_loss": 0.0166, "epoch": 7.839302676426216, "grad_norm": 0.03416161984205246, "learning_rate": 4.801673775244765e-06, "step": 140300 }, { "embedding_loss": 0.0158, "epoch": 7.842096440744259, "grad_norm": 0.06977276504039764, "learning_rate": 4.7954654100935605e-06, "step": 140350 }, { "embedding_loss": 0.0161, "epoch": 7.844890205062301, "grad_norm": 0.04947476089000702, "learning_rate": 4.789257044942356e-06, "step": 140400 }, { "embedding_loss": 0.0176, "epoch": 7.847683969380343, "grad_norm": 0.0475824810564518, "learning_rate": 4.783048679791151e-06, "step": 140450 }, { "embedding_loss": 0.0162, "epoch": 7.850477733698385, "grad_norm": 0.036376964300870895, "learning_rate": 4.776840314639946e-06, "step": 140500 }, { "embedding_loss": 0.0151, "epoch": 7.8532714980164275, "grad_norm": 0.05750812217593193, "learning_rate": 4.770631949488742e-06, "step": 140550 }, { "embedding_loss": 0.0143, "epoch": 7.85606526233447, "grad_norm": 0.07255418598651886, "learning_rate": 4.764423584337537e-06, "step": 140600 }, { "embedding_loss": 0.0156, "epoch": 7.858859026652512, "grad_norm": 0.1038205549120903, "learning_rate": 4.758215219186332e-06, "step": 140650 }, { "embedding_loss": 0.0156, "epoch": 7.861652790970553, "grad_norm": 0.08250603824853897, "learning_rate": 4.752006854035127e-06, "step": 140700 }, { "embedding_loss": 0.0176, "epoch": 7.864446555288596, "grad_norm": 0.057784564793109894, "learning_rate": 4.745798488883923e-06, "step": 140750 }, { "embedding_loss": 0.016, "epoch": 7.867240319606638, "grad_norm": 0.1337995082139969, "learning_rate": 4.739590123732718e-06, "step": 140800 }, { "embedding_loss": 0.0142, "epoch": 7.87003408392468, "grad_norm": 0.0698920413851738, "learning_rate": 4.733381758581513e-06, "step": 140850 }, { "embedding_loss": 0.0169, "epoch": 7.872827848242722, "grad_norm": 0.05566681548953056, "learning_rate": 4.727173393430309e-06, "step": 140900 }, { "embedding_loss": 0.0156, "epoch": 7.8756216125607645, "grad_norm": 0.03844939544796944, "learning_rate": 4.720965028279103e-06, "step": 140950 }, { "embedding_loss": 0.0152, "epoch": 7.878415376878807, "grad_norm": 0.05615454539656639, "learning_rate": 4.714756663127899e-06, "step": 141000 }, { "embedding_loss": 0.0137, "epoch": 7.881209141196849, "grad_norm": 0.06420154124498367, "learning_rate": 4.708548297976694e-06, "step": 141050 }, { "embedding_loss": 0.0144, "epoch": 7.88400290551489, "grad_norm": 0.061636783182621, "learning_rate": 4.702339932825489e-06, "step": 141100 }, { "embedding_loss": 0.0155, "epoch": 7.886796669832933, "grad_norm": 0.05933433398604393, "learning_rate": 4.6961315676742845e-06, "step": 141150 }, { "embedding_loss": 0.0146, "epoch": 7.889590434150975, "grad_norm": 0.05684826523065567, "learning_rate": 4.68992320252308e-06, "step": 141200 }, { "embedding_loss": 0.0147, "epoch": 7.892384198469017, "grad_norm": 0.05655647814273834, "learning_rate": 4.6837148373718755e-06, "step": 141250 }, { "embedding_loss": 0.0142, "epoch": 7.895177962787059, "grad_norm": 0.0659424290060997, "learning_rate": 4.67750647222067e-06, "step": 141300 }, { "embedding_loss": 0.0164, "epoch": 7.8979717271051015, "grad_norm": 0.07677673548460007, "learning_rate": 4.671298107069466e-06, "step": 141350 }, { "embedding_loss": 0.0161, "epoch": 7.900765491423144, "grad_norm": 0.05039184167981148, "learning_rate": 4.665089741918261e-06, "step": 141400 }, { "embedding_loss": 0.0187, "epoch": 7.903559255741186, "grad_norm": 0.025092948228120804, "learning_rate": 4.658881376767057e-06, "step": 141450 }, { "embedding_loss": 0.015, "epoch": 7.906353020059228, "grad_norm": 0.034462593495845795, "learning_rate": 4.652673011615851e-06, "step": 141500 }, { "embedding_loss": 0.015, "epoch": 7.90914678437727, "grad_norm": 0.05116935074329376, "learning_rate": 4.646464646464647e-06, "step": 141550 }, { "embedding_loss": 0.0129, "epoch": 7.911940548695312, "grad_norm": 0.047426752746105194, "learning_rate": 4.640256281313442e-06, "step": 141600 }, { "embedding_loss": 0.0143, "epoch": 7.914734313013354, "grad_norm": 0.055629000067710876, "learning_rate": 4.634047916162238e-06, "step": 141650 }, { "embedding_loss": 0.0168, "epoch": 7.917528077331396, "grad_norm": 0.032767269760370255, "learning_rate": 4.627839551011033e-06, "step": 141700 }, { "embedding_loss": 0.0153, "epoch": 7.9203218416494385, "grad_norm": 0.048217229545116425, "learning_rate": 4.621631185859828e-06, "step": 141750 }, { "embedding_loss": 0.0166, "epoch": 7.923115605967481, "grad_norm": 0.07299972325563431, "learning_rate": 4.615422820708624e-06, "step": 141800 }, { "embedding_loss": 0.0159, "epoch": 7.925909370285523, "grad_norm": 0.04857059195637703, "learning_rate": 4.609214455557418e-06, "step": 141850 }, { "embedding_loss": 0.0144, "epoch": 7.928703134603564, "grad_norm": 0.05191836506128311, "learning_rate": 4.603006090406214e-06, "step": 141900 }, { "embedding_loss": 0.0158, "epoch": 7.931496898921607, "grad_norm": 0.04399538040161133, "learning_rate": 4.596797725255009e-06, "step": 141950 }, { "embedding_loss": 0.0162, "epoch": 7.934290663239649, "grad_norm": 0.0775209292769432, "learning_rate": 4.590589360103804e-06, "step": 142000 }, { "embedding_loss": 0.0143, "epoch": 7.937084427557691, "grad_norm": 0.047511227428913116, "learning_rate": 4.5843809949525995e-06, "step": 142050 }, { "embedding_loss": 0.0144, "epoch": 7.939878191875733, "grad_norm": 0.07279589772224426, "learning_rate": 4.578172629801394e-06, "step": 142100 }, { "embedding_loss": 0.0172, "epoch": 7.9426719561937755, "grad_norm": 0.06378699839115143, "learning_rate": 4.57196426465019e-06, "step": 142150 }, { "embedding_loss": 0.0149, "epoch": 7.945465720511818, "grad_norm": 0.04138970375061035, "learning_rate": 4.565755899498985e-06, "step": 142200 }, { "embedding_loss": 0.016, "epoch": 7.94825948482986, "grad_norm": 0.06876478344202042, "learning_rate": 4.559547534347781e-06, "step": 142250 }, { "embedding_loss": 0.0146, "epoch": 7.951053249147902, "grad_norm": 0.04213244467973709, "learning_rate": 4.553339169196575e-06, "step": 142300 }, { "embedding_loss": 0.0142, "epoch": 7.9538470134659445, "grad_norm": 0.027315618470311165, "learning_rate": 4.547130804045371e-06, "step": 142350 }, { "embedding_loss": 0.0162, "epoch": 7.956640777783986, "grad_norm": 0.061694882810115814, "learning_rate": 4.540922438894166e-06, "step": 142400 }, { "embedding_loss": 0.0158, "epoch": 7.959434542102028, "grad_norm": 0.06917886435985565, "learning_rate": 4.534714073742962e-06, "step": 142450 }, { "embedding_loss": 0.016, "epoch": 7.96222830642007, "grad_norm": 0.07773596048355103, "learning_rate": 4.528505708591757e-06, "step": 142500 }, { "embedding_loss": 0.016, "epoch": 7.9650220707381125, "grad_norm": 0.05112120509147644, "learning_rate": 4.522297343440552e-06, "step": 142550 }, { "embedding_loss": 0.015, "epoch": 7.967815835056155, "grad_norm": 0.042156390845775604, "learning_rate": 4.516088978289348e-06, "step": 142600 }, { "embedding_loss": 0.0155, "epoch": 7.970609599374197, "grad_norm": 0.025161266326904297, "learning_rate": 4.509880613138143e-06, "step": 142650 }, { "embedding_loss": 0.0156, "epoch": 7.973403363692239, "grad_norm": 0.10848912596702576, "learning_rate": 4.503672247986938e-06, "step": 142700 }, { "embedding_loss": 0.0154, "epoch": 7.976197128010281, "grad_norm": 0.036289282143116, "learning_rate": 4.497463882835733e-06, "step": 142750 }, { "embedding_loss": 0.0172, "epoch": 7.978990892328323, "grad_norm": 0.06117171049118042, "learning_rate": 4.491255517684529e-06, "step": 142800 }, { "embedding_loss": 0.0148, "epoch": 7.981784656646365, "grad_norm": 0.06379450857639313, "learning_rate": 4.4850471525333235e-06, "step": 142850 }, { "embedding_loss": 0.0165, "epoch": 7.984578420964407, "grad_norm": 0.04176761209964752, "learning_rate": 4.478838787382119e-06, "step": 142900 }, { "embedding_loss": 0.016, "epoch": 7.9873721852824495, "grad_norm": 0.027536462992429733, "learning_rate": 4.4726304222309145e-06, "step": 142950 }, { "embedding_loss": 0.016, "epoch": 7.990165949600492, "grad_norm": 0.043112851679325104, "learning_rate": 4.466422057079709e-06, "step": 143000 }, { "embedding_loss": 0.0149, "epoch": 7.992959713918534, "grad_norm": 0.5323634147644043, "learning_rate": 4.460213691928505e-06, "step": 143050 }, { "embedding_loss": 0.0149, "epoch": 7.995753478236576, "grad_norm": 0.04633312299847603, "learning_rate": 4.4540053267773e-06, "step": 143100 }, { "embedding_loss": 0.0152, "epoch": 7.9985472425546185, "grad_norm": 0.04311442747712135, "learning_rate": 4.447796961626095e-06, "step": 143150 }, { "embedding_loss": 0.0162, "epoch": 8.00134100687266, "grad_norm": 0.06420165300369263, "learning_rate": 4.44158859647489e-06, "step": 143200 }, { "embedding_loss": 0.0159, "epoch": 8.004134771190703, "grad_norm": 0.05892159044742584, "learning_rate": 4.435380231323686e-06, "step": 143250 }, { "embedding_loss": 0.0144, "epoch": 8.006928535508745, "grad_norm": 0.060875304043293, "learning_rate": 4.429171866172481e-06, "step": 143300 }, { "embedding_loss": 0.016, "epoch": 8.009722299826787, "grad_norm": 0.056694839149713516, "learning_rate": 4.422963501021276e-06, "step": 143350 }, { "embedding_loss": 0.0166, "epoch": 8.012516064144828, "grad_norm": 0.03988644853234291, "learning_rate": 4.416755135870072e-06, "step": 143400 }, { "embedding_loss": 0.0148, "epoch": 8.01530982846287, "grad_norm": 0.060001764446496964, "learning_rate": 4.410546770718867e-06, "step": 143450 }, { "embedding_loss": 0.015, "epoch": 8.018103592780912, "grad_norm": 0.04318331554532051, "learning_rate": 4.404338405567663e-06, "step": 143500 }, { "embedding_loss": 0.0163, "epoch": 8.020897357098955, "grad_norm": 0.07054218649864197, "learning_rate": 4.398130040416457e-06, "step": 143550 }, { "embedding_loss": 0.0156, "epoch": 8.023691121416997, "grad_norm": 0.08779139816761017, "learning_rate": 4.391921675265253e-06, "step": 143600 }, { "embedding_loss": 0.0152, "epoch": 8.026484885735039, "grad_norm": 0.045149099081754684, "learning_rate": 4.385713310114048e-06, "step": 143650 }, { "embedding_loss": 0.0164, "epoch": 8.029278650053081, "grad_norm": 0.049239855259656906, "learning_rate": 4.379504944962844e-06, "step": 143700 }, { "embedding_loss": 0.0149, "epoch": 8.032072414371124, "grad_norm": 0.0310216024518013, "learning_rate": 4.3732965798116385e-06, "step": 143750 }, { "embedding_loss": 0.0147, "epoch": 8.034866178689166, "grad_norm": 0.0534900538623333, "learning_rate": 4.367088214660434e-06, "step": 143800 }, { "embedding_loss": 0.0139, "epoch": 8.037659943007208, "grad_norm": 0.03358186036348343, "learning_rate": 4.3608798495092295e-06, "step": 143850 }, { "embedding_loss": 0.0151, "epoch": 8.04045370732525, "grad_norm": 0.05215112119913101, "learning_rate": 4.354671484358024e-06, "step": 143900 }, { "embedding_loss": 0.0169, "epoch": 8.043247471643292, "grad_norm": 0.07432570308446884, "learning_rate": 4.34846311920682e-06, "step": 143950 }, { "embedding_loss": 0.0164, "epoch": 8.046041235961335, "grad_norm": 0.06607092171907425, "learning_rate": 4.342254754055614e-06, "step": 144000 }, { "embedding_loss": 0.0173, "epoch": 8.048835000279377, "grad_norm": 0.041787005960941315, "learning_rate": 4.33604638890441e-06, "step": 144050 }, { "embedding_loss": 0.0166, "epoch": 8.05162876459742, "grad_norm": 0.05770726129412651, "learning_rate": 4.329838023753205e-06, "step": 144100 }, { "embedding_loss": 0.0151, "epoch": 8.054422528915461, "grad_norm": 0.05363460257649422, "learning_rate": 4.323629658602e-06, "step": 144150 }, { "embedding_loss": 0.0172, "epoch": 8.057216293233504, "grad_norm": 0.03553815558552742, "learning_rate": 4.317421293450796e-06, "step": 144200 }, { "embedding_loss": 0.0156, "epoch": 8.060010057551544, "grad_norm": 0.041245367377996445, "learning_rate": 4.311212928299591e-06, "step": 144250 }, { "embedding_loss": 0.0161, "epoch": 8.062803821869586, "grad_norm": 0.043860141187906265, "learning_rate": 4.305004563148387e-06, "step": 144300 }, { "embedding_loss": 0.0155, "epoch": 8.065597586187629, "grad_norm": 0.056407809257507324, "learning_rate": 4.298796197997181e-06, "step": 144350 }, { "embedding_loss": 0.0145, "epoch": 8.06839135050567, "grad_norm": 0.03490234911441803, "learning_rate": 4.292587832845977e-06, "step": 144400 }, { "embedding_loss": 0.0162, "epoch": 8.071185114823713, "grad_norm": 0.06116282939910889, "learning_rate": 4.286379467694772e-06, "step": 144450 }, { "embedding_loss": 0.0179, "epoch": 8.073978879141755, "grad_norm": 0.11266177147626877, "learning_rate": 4.280171102543568e-06, "step": 144500 }, { "embedding_loss": 0.0154, "epoch": 8.076772643459798, "grad_norm": 0.0698772519826889, "learning_rate": 4.2739627373923625e-06, "step": 144550 }, { "embedding_loss": 0.0154, "epoch": 8.07956640777784, "grad_norm": 0.030191976577043533, "learning_rate": 4.267754372241158e-06, "step": 144600 }, { "embedding_loss": 0.0168, "epoch": 8.082360172095882, "grad_norm": 0.06653487682342529, "learning_rate": 4.2615460070899535e-06, "step": 144650 }, { "embedding_loss": 0.015, "epoch": 8.085153936413924, "grad_norm": 0.05131590738892555, "learning_rate": 4.255337641938749e-06, "step": 144700 }, { "embedding_loss": 0.0164, "epoch": 8.087947700731966, "grad_norm": 0.02848934754729271, "learning_rate": 4.249129276787544e-06, "step": 144750 }, { "embedding_loss": 0.0171, "epoch": 8.090741465050009, "grad_norm": 0.06961118429899216, "learning_rate": 4.242920911636339e-06, "step": 144800 }, { "embedding_loss": 0.0154, "epoch": 8.093535229368051, "grad_norm": 0.06119099259376526, "learning_rate": 4.236712546485135e-06, "step": 144850 }, { "embedding_loss": 0.015, "epoch": 8.096328993686093, "grad_norm": 0.05504428222775459, "learning_rate": 4.230504181333929e-06, "step": 144900 }, { "embedding_loss": 0.0141, "epoch": 8.099122758004135, "grad_norm": 0.06728274375200272, "learning_rate": 4.224295816182725e-06, "step": 144950 }, { "embedding_loss": 0.0165, "epoch": 8.101916522322178, "grad_norm": 0.052492547780275345, "learning_rate": 4.2180874510315204e-06, "step": 145000 }, { "embedding_loss": 0.0164, "epoch": 8.10471028664022, "grad_norm": 0.0477912463247776, "learning_rate": 4.211879085880315e-06, "step": 145050 }, { "embedding_loss": 0.0154, "epoch": 8.10750405095826, "grad_norm": 0.03573684021830559, "learning_rate": 4.205670720729111e-06, "step": 145100 }, { "embedding_loss": 0.0145, "epoch": 8.110297815276303, "grad_norm": 0.08818930387496948, "learning_rate": 4.199462355577906e-06, "step": 145150 }, { "embedding_loss": 0.0157, "epoch": 8.113091579594345, "grad_norm": 0.03861730545759201, "learning_rate": 4.193253990426701e-06, "step": 145200 }, { "embedding_loss": 0.0168, "epoch": 8.115885343912387, "grad_norm": 0.09663066267967224, "learning_rate": 4.187045625275496e-06, "step": 145250 }, { "embedding_loss": 0.015, "epoch": 8.11867910823043, "grad_norm": 0.06574726104736328, "learning_rate": 4.180837260124292e-06, "step": 145300 }, { "embedding_loss": 0.0151, "epoch": 8.121472872548471, "grad_norm": 0.04052649065852165, "learning_rate": 4.174628894973087e-06, "step": 145350 }, { "embedding_loss": 0.0151, "epoch": 8.124266636866514, "grad_norm": 0.053414154797792435, "learning_rate": 4.168420529821882e-06, "step": 145400 }, { "embedding_loss": 0.0156, "epoch": 8.127060401184556, "grad_norm": 0.04597848653793335, "learning_rate": 4.1622121646706775e-06, "step": 145450 }, { "embedding_loss": 0.0169, "epoch": 8.129854165502598, "grad_norm": 0.07772082090377808, "learning_rate": 4.156003799519473e-06, "step": 145500 }, { "embedding_loss": 0.016, "epoch": 8.13264792982064, "grad_norm": 0.04812376946210861, "learning_rate": 4.1497954343682686e-06, "step": 145550 }, { "embedding_loss": 0.0166, "epoch": 8.135441694138683, "grad_norm": 0.06415215879678726, "learning_rate": 4.143587069217063e-06, "step": 145600 }, { "embedding_loss": 0.0157, "epoch": 8.138235458456725, "grad_norm": 0.06860022246837616, "learning_rate": 4.137378704065859e-06, "step": 145650 }, { "embedding_loss": 0.0139, "epoch": 8.141029222774767, "grad_norm": 0.08358192443847656, "learning_rate": 4.131170338914654e-06, "step": 145700 }, { "embedding_loss": 0.0159, "epoch": 8.14382298709281, "grad_norm": 0.049305979162454605, "learning_rate": 4.12496197376345e-06, "step": 145750 }, { "embedding_loss": 0.0158, "epoch": 8.146616751410852, "grad_norm": 0.024661963805556297, "learning_rate": 4.1187536086122444e-06, "step": 145800 }, { "embedding_loss": 0.0167, "epoch": 8.149410515728894, "grad_norm": 0.06120772659778595, "learning_rate": 4.11254524346104e-06, "step": 145850 }, { "embedding_loss": 0.0165, "epoch": 8.152204280046936, "grad_norm": 0.04738255590200424, "learning_rate": 4.1063368783098355e-06, "step": 145900 }, { "embedding_loss": 0.0167, "epoch": 8.154998044364977, "grad_norm": 0.10655181109905243, "learning_rate": 4.10012851315863e-06, "step": 145950 }, { "embedding_loss": 0.0147, "epoch": 8.157791808683019, "grad_norm": 0.05456550791859627, "learning_rate": 4.093920148007426e-06, "step": 146000 }, { "embedding_loss": 0.0159, "epoch": 8.160585573001061, "grad_norm": 0.043607838451862335, "learning_rate": 4.08771178285622e-06, "step": 146050 }, { "embedding_loss": 0.0136, "epoch": 8.163379337319103, "grad_norm": 0.026854954659938812, "learning_rate": 4.081503417705016e-06, "step": 146100 }, { "embedding_loss": 0.0161, "epoch": 8.166173101637145, "grad_norm": 0.0949036255478859, "learning_rate": 4.075295052553811e-06, "step": 146150 }, { "embedding_loss": 0.0163, "epoch": 8.168966865955188, "grad_norm": 0.052596479654312134, "learning_rate": 4.069086687402606e-06, "step": 146200 }, { "embedding_loss": 0.0162, "epoch": 8.17176063027323, "grad_norm": 0.08315473794937134, "learning_rate": 4.0628783222514015e-06, "step": 146250 }, { "embedding_loss": 0.0151, "epoch": 8.174554394591272, "grad_norm": 0.058567069470882416, "learning_rate": 4.056669957100197e-06, "step": 146300 }, { "embedding_loss": 0.0158, "epoch": 8.177348158909314, "grad_norm": 0.08132802695035934, "learning_rate": 4.0504615919489925e-06, "step": 146350 }, { "embedding_loss": 0.0157, "epoch": 8.180141923227357, "grad_norm": 0.04848384112119675, "learning_rate": 4.044253226797787e-06, "step": 146400 }, { "embedding_loss": 0.0157, "epoch": 8.182935687545399, "grad_norm": 0.05570460483431816, "learning_rate": 4.038044861646583e-06, "step": 146450 }, { "embedding_loss": 0.0159, "epoch": 8.185729451863441, "grad_norm": 0.05229785293340683, "learning_rate": 4.031836496495378e-06, "step": 146500 }, { "embedding_loss": 0.0151, "epoch": 8.188523216181483, "grad_norm": 0.08448002487421036, "learning_rate": 4.025628131344174e-06, "step": 146550 }, { "embedding_loss": 0.0152, "epoch": 8.191316980499526, "grad_norm": 0.044427912682294846, "learning_rate": 4.0194197661929684e-06, "step": 146600 }, { "embedding_loss": 0.015, "epoch": 8.194110744817568, "grad_norm": 0.045312706381082535, "learning_rate": 4.013211401041764e-06, "step": 146650 }, { "embedding_loss": 0.0168, "epoch": 8.19690450913561, "grad_norm": 0.07529039680957794, "learning_rate": 4.0070030358905595e-06, "step": 146700 }, { "embedding_loss": 0.0153, "epoch": 8.199698273453652, "grad_norm": 0.04421663284301758, "learning_rate": 4.000794670739355e-06, "step": 146750 }, { "embedding_loss": 0.0142, "epoch": 8.202492037771693, "grad_norm": 0.05931408330798149, "learning_rate": 3.99458630558815e-06, "step": 146800 }, { "embedding_loss": 0.0149, "epoch": 8.205285802089735, "grad_norm": 0.0630725622177124, "learning_rate": 3.988377940436945e-06, "step": 146850 }, { "embedding_loss": 0.0162, "epoch": 8.208079566407777, "grad_norm": 0.0778961330652237, "learning_rate": 3.982169575285741e-06, "step": 146900 }, { "embedding_loss": 0.0168, "epoch": 8.21087333072582, "grad_norm": 0.07034385204315186, "learning_rate": 3.975961210134535e-06, "step": 146950 }, { "embedding_loss": 0.0144, "epoch": 8.213667095043862, "grad_norm": 0.05898449197411537, "learning_rate": 3.969752844983331e-06, "step": 147000 }, { "embedding_loss": 0.0135, "epoch": 8.216460859361904, "grad_norm": 0.05083516985177994, "learning_rate": 3.963544479832126e-06, "step": 147050 }, { "embedding_loss": 0.0147, "epoch": 8.219254623679946, "grad_norm": 0.04925710707902908, "learning_rate": 3.957336114680921e-06, "step": 147100 }, { "embedding_loss": 0.0162, "epoch": 8.222048387997988, "grad_norm": 0.03163077309727669, "learning_rate": 3.9511277495297165e-06, "step": 147150 }, { "embedding_loss": 0.0171, "epoch": 8.22484215231603, "grad_norm": 0.025726785883307457, "learning_rate": 3.944919384378512e-06, "step": 147200 }, { "embedding_loss": 0.0146, "epoch": 8.227635916634073, "grad_norm": 0.05579661950469017, "learning_rate": 3.938711019227307e-06, "step": 147250 }, { "embedding_loss": 0.0141, "epoch": 8.230429680952115, "grad_norm": 0.056074850261211395, "learning_rate": 3.932502654076102e-06, "step": 147300 }, { "embedding_loss": 0.0166, "epoch": 8.233223445270157, "grad_norm": 0.04700454697012901, "learning_rate": 3.926294288924898e-06, "step": 147350 }, { "embedding_loss": 0.0165, "epoch": 8.2360172095882, "grad_norm": 0.03531279042363167, "learning_rate": 3.920085923773693e-06, "step": 147400 }, { "embedding_loss": 0.0154, "epoch": 8.238810973906242, "grad_norm": 0.061014942824840546, "learning_rate": 3.913877558622488e-06, "step": 147450 }, { "embedding_loss": 0.0147, "epoch": 8.241604738224284, "grad_norm": 0.03606068715453148, "learning_rate": 3.9076691934712834e-06, "step": 147500 }, { "embedding_loss": 0.0173, "epoch": 8.244398502542326, "grad_norm": 0.0627874881029129, "learning_rate": 3.901460828320079e-06, "step": 147550 }, { "embedding_loss": 0.016, "epoch": 8.247192266860367, "grad_norm": 0.033137623220682144, "learning_rate": 3.8952524631688745e-06, "step": 147600 }, { "embedding_loss": 0.0139, "epoch": 8.249986031178409, "grad_norm": 0.04442247375845909, "learning_rate": 3.889044098017669e-06, "step": 147650 }, { "embedding_loss": 0.0171, "epoch": 8.252779795496451, "grad_norm": 0.07028225064277649, "learning_rate": 3.882835732866465e-06, "step": 147700 }, { "embedding_loss": 0.0126, "epoch": 8.255573559814493, "grad_norm": 0.07205158472061157, "learning_rate": 3.87662736771526e-06, "step": 147750 }, { "embedding_loss": 0.0159, "epoch": 8.258367324132536, "grad_norm": 0.062027644366025925, "learning_rate": 3.870419002564056e-06, "step": 147800 }, { "embedding_loss": 0.0148, "epoch": 8.261161088450578, "grad_norm": 0.05245164781808853, "learning_rate": 3.86421063741285e-06, "step": 147850 }, { "embedding_loss": 0.0152, "epoch": 8.26395485276862, "grad_norm": 0.05164723098278046, "learning_rate": 3.858002272261646e-06, "step": 147900 }, { "embedding_loss": 0.0168, "epoch": 8.266748617086662, "grad_norm": 0.033159755170345306, "learning_rate": 3.851793907110441e-06, "step": 147950 }, { "embedding_loss": 0.0156, "epoch": 8.269542381404705, "grad_norm": 0.0814923495054245, "learning_rate": 3.845585541959236e-06, "step": 148000 }, { "embedding_loss": 0.0158, "epoch": 8.272336145722747, "grad_norm": 0.04957865923643112, "learning_rate": 3.8393771768080316e-06, "step": 148050 }, { "embedding_loss": 0.0178, "epoch": 8.27512991004079, "grad_norm": 0.05099678784608841, "learning_rate": 3.833168811656826e-06, "step": 148100 }, { "embedding_loss": 0.0159, "epoch": 8.277923674358831, "grad_norm": 0.06204412505030632, "learning_rate": 3.826960446505622e-06, "step": 148150 }, { "embedding_loss": 0.0168, "epoch": 8.280717438676874, "grad_norm": 0.07615069299936295, "learning_rate": 3.820752081354417e-06, "step": 148200 }, { "embedding_loss": 0.0165, "epoch": 8.283511202994916, "grad_norm": 0.04502413049340248, "learning_rate": 3.8145437162032124e-06, "step": 148250 }, { "embedding_loss": 0.0143, "epoch": 8.286304967312958, "grad_norm": 0.06529464572668076, "learning_rate": 3.808335351052008e-06, "step": 148300 }, { "embedding_loss": 0.0154, "epoch": 8.289098731631, "grad_norm": 0.032825078815221786, "learning_rate": 3.802126985900803e-06, "step": 148350 }, { "embedding_loss": 0.0148, "epoch": 8.291892495949043, "grad_norm": 0.05553850531578064, "learning_rate": 3.7959186207495985e-06, "step": 148400 }, { "embedding_loss": 0.0133, "epoch": 8.294686260267085, "grad_norm": 0.05747929960489273, "learning_rate": 3.789710255598393e-06, "step": 148450 }, { "embedding_loss": 0.0138, "epoch": 8.297480024585125, "grad_norm": 0.040748752653598785, "learning_rate": 3.7835018904471887e-06, "step": 148500 }, { "embedding_loss": 0.0147, "epoch": 8.300273788903167, "grad_norm": 0.054184116423130035, "learning_rate": 3.777293525295984e-06, "step": 148550 }, { "embedding_loss": 0.0156, "epoch": 8.30306755322121, "grad_norm": 0.05957194045186043, "learning_rate": 3.7710851601447797e-06, "step": 148600 }, { "embedding_loss": 0.0153, "epoch": 8.305861317539252, "grad_norm": 0.08380547910928726, "learning_rate": 3.7648767949935743e-06, "step": 148650 }, { "embedding_loss": 0.0157, "epoch": 8.308655081857294, "grad_norm": 0.04617278650403023, "learning_rate": 3.75866842984237e-06, "step": 148700 }, { "embedding_loss": 0.0153, "epoch": 8.311448846175336, "grad_norm": 0.09773170202970505, "learning_rate": 3.7524600646911654e-06, "step": 148750 }, { "embedding_loss": 0.0173, "epoch": 8.314242610493379, "grad_norm": 0.04130915552377701, "learning_rate": 3.7462516995399605e-06, "step": 148800 }, { "embedding_loss": 0.015, "epoch": 8.31703637481142, "grad_norm": 0.022649487480521202, "learning_rate": 3.7400433343887556e-06, "step": 148850 }, { "embedding_loss": 0.0154, "epoch": 8.319830139129463, "grad_norm": 0.13605131208896637, "learning_rate": 3.7338349692375507e-06, "step": 148900 }, { "embedding_loss": 0.0142, "epoch": 8.322623903447505, "grad_norm": 0.026791492477059364, "learning_rate": 3.727626604086346e-06, "step": 148950 }, { "embedding_loss": 0.0145, "epoch": 8.325417667765548, "grad_norm": 0.04632120579481125, "learning_rate": 3.7214182389351417e-06, "step": 149000 }, { "embedding_loss": 0.0151, "epoch": 8.32821143208359, "grad_norm": 0.09252113103866577, "learning_rate": 3.715209873783937e-06, "step": 149050 }, { "embedding_loss": 0.0165, "epoch": 8.331005196401632, "grad_norm": 0.06140727922320366, "learning_rate": 3.709001508632732e-06, "step": 149100 }, { "embedding_loss": 0.0146, "epoch": 8.333798960719674, "grad_norm": 0.05017589032649994, "learning_rate": 3.7027931434815274e-06, "step": 149150 }, { "embedding_loss": 0.0169, "epoch": 8.336592725037717, "grad_norm": 0.054387886077165604, "learning_rate": 3.696584778330323e-06, "step": 149200 }, { "embedding_loss": 0.015, "epoch": 8.339386489355759, "grad_norm": 0.02958703599870205, "learning_rate": 3.690376413179118e-06, "step": 149250 }, { "embedding_loss": 0.0167, "epoch": 8.3421802536738, "grad_norm": 0.08897984027862549, "learning_rate": 3.684168048027913e-06, "step": 149300 }, { "embedding_loss": 0.0149, "epoch": 8.344974017991841, "grad_norm": 0.04249068349599838, "learning_rate": 3.677959682876708e-06, "step": 149350 }, { "embedding_loss": 0.0163, "epoch": 8.347767782309884, "grad_norm": 0.09830516576766968, "learning_rate": 3.6717513177255037e-06, "step": 149400 }, { "embedding_loss": 0.0158, "epoch": 8.350561546627926, "grad_norm": 0.05530906096100807, "learning_rate": 3.665542952574299e-06, "step": 149450 }, { "embedding_loss": 0.0155, "epoch": 8.353355310945968, "grad_norm": 0.048273928463459015, "learning_rate": 3.659334587423094e-06, "step": 149500 }, { "embedding_loss": 0.014, "epoch": 8.35614907526401, "grad_norm": 0.04482319951057434, "learning_rate": 3.6531262222718894e-06, "step": 149550 }, { "embedding_loss": 0.0164, "epoch": 8.358942839582053, "grad_norm": 0.07107780873775482, "learning_rate": 3.646917857120685e-06, "step": 149600 }, { "embedding_loss": 0.0156, "epoch": 8.361736603900095, "grad_norm": 0.06994830816984177, "learning_rate": 3.6407094919694804e-06, "step": 149650 }, { "embedding_loss": 0.0149, "epoch": 8.364530368218137, "grad_norm": 0.03927583247423172, "learning_rate": 3.634501126818275e-06, "step": 149700 }, { "embedding_loss": 0.0149, "epoch": 8.36732413253618, "grad_norm": 0.022574856877326965, "learning_rate": 3.6282927616670706e-06, "step": 149750 }, { "embedding_loss": 0.0155, "epoch": 8.370117896854222, "grad_norm": 0.08981145173311234, "learning_rate": 3.6220843965158657e-06, "step": 149800 }, { "embedding_loss": 0.015, "epoch": 8.372911661172264, "grad_norm": 0.04297909140586853, "learning_rate": 3.615876031364661e-06, "step": 149850 }, { "embedding_loss": 0.0154, "epoch": 8.375705425490306, "grad_norm": 0.06013580039143562, "learning_rate": 3.6096676662134563e-06, "step": 149900 }, { "embedding_loss": 0.0142, "epoch": 8.378499189808348, "grad_norm": 0.09528236091136932, "learning_rate": 3.6034593010622514e-06, "step": 149950 }, { "embedding_loss": 0.0148, "epoch": 8.38129295412639, "grad_norm": 0.04537883773446083, "learning_rate": 3.597250935911047e-06, "step": 150000 }, { "embedding_loss": 0.0153, "epoch": 8.384086718444433, "grad_norm": 0.1078735813498497, "learning_rate": 3.5910425707598424e-06, "step": 150050 }, { "embedding_loss": 0.0149, "epoch": 8.386880482762475, "grad_norm": 0.04632721096277237, "learning_rate": 3.584834205608637e-06, "step": 150100 }, { "embedding_loss": 0.0154, "epoch": 8.389674247080515, "grad_norm": 0.07725604623556137, "learning_rate": 3.5786258404574326e-06, "step": 150150 }, { "embedding_loss": 0.016, "epoch": 8.392468011398558, "grad_norm": 0.05129144713282585, "learning_rate": 3.572417475306228e-06, "step": 150200 }, { "embedding_loss": 0.0149, "epoch": 8.3952617757166, "grad_norm": 0.06233081966638565, "learning_rate": 3.566209110155023e-06, "step": 150250 }, { "embedding_loss": 0.0145, "epoch": 8.398055540034642, "grad_norm": 0.06113690510392189, "learning_rate": 3.5600007450038183e-06, "step": 150300 }, { "embedding_loss": 0.015, "epoch": 8.400849304352684, "grad_norm": 0.07731983065605164, "learning_rate": 3.553792379852614e-06, "step": 150350 }, { "embedding_loss": 0.0148, "epoch": 8.403643068670727, "grad_norm": 0.05062863975763321, "learning_rate": 3.547584014701409e-06, "step": 150400 }, { "embedding_loss": 0.0159, "epoch": 8.406436832988769, "grad_norm": 0.045579079538583755, "learning_rate": 3.5413756495502044e-06, "step": 150450 }, { "embedding_loss": 0.0158, "epoch": 8.409230597306811, "grad_norm": 0.0494043193757534, "learning_rate": 3.535167284398999e-06, "step": 150500 }, { "embedding_loss": 0.0143, "epoch": 8.412024361624853, "grad_norm": 0.03793462738394737, "learning_rate": 3.5289589192477946e-06, "step": 150550 }, { "embedding_loss": 0.0141, "epoch": 8.414818125942896, "grad_norm": 0.06969532370567322, "learning_rate": 3.52275055409659e-06, "step": 150600 }, { "embedding_loss": 0.0154, "epoch": 8.417611890260938, "grad_norm": 0.05371808260679245, "learning_rate": 3.5165421889453856e-06, "step": 150650 }, { "embedding_loss": 0.0156, "epoch": 8.42040565457898, "grad_norm": 0.03521112725138664, "learning_rate": 3.5103338237941803e-06, "step": 150700 }, { "embedding_loss": 0.0165, "epoch": 8.423199418897022, "grad_norm": 0.06402125209569931, "learning_rate": 3.5041254586429758e-06, "step": 150750 }, { "embedding_loss": 0.0154, "epoch": 8.425993183215065, "grad_norm": 0.09679724276065826, "learning_rate": 3.4979170934917713e-06, "step": 150800 }, { "embedding_loss": 0.0158, "epoch": 8.428786947533107, "grad_norm": 0.0765095204114914, "learning_rate": 3.4917087283405664e-06, "step": 150850 }, { "embedding_loss": 0.0174, "epoch": 8.431580711851149, "grad_norm": 0.028773197904229164, "learning_rate": 3.4855003631893615e-06, "step": 150900 }, { "embedding_loss": 0.0156, "epoch": 8.434374476169191, "grad_norm": 0.04515661671757698, "learning_rate": 3.4792919980381566e-06, "step": 150950 }, { "embedding_loss": 0.0145, "epoch": 8.437168240487232, "grad_norm": 0.05907922610640526, "learning_rate": 3.473083632886952e-06, "step": 151000 }, { "embedding_loss": 0.0165, "epoch": 8.439962004805274, "grad_norm": 0.06323345750570297, "learning_rate": 3.4668752677357476e-06, "step": 151050 }, { "embedding_loss": 0.0165, "epoch": 8.442755769123316, "grad_norm": 0.051628027111291885, "learning_rate": 3.460666902584543e-06, "step": 151100 }, { "embedding_loss": 0.0153, "epoch": 8.445549533441358, "grad_norm": 0.0633857324719429, "learning_rate": 3.4544585374333378e-06, "step": 151150 }, { "embedding_loss": 0.0166, "epoch": 8.4483432977594, "grad_norm": 0.048903319984674454, "learning_rate": 3.4482501722821333e-06, "step": 151200 }, { "embedding_loss": 0.0171, "epoch": 8.451137062077443, "grad_norm": 0.05888624116778374, "learning_rate": 3.442041807130929e-06, "step": 151250 }, { "embedding_loss": 0.017, "epoch": 8.453930826395485, "grad_norm": 0.05064351484179497, "learning_rate": 3.435833441979724e-06, "step": 151300 }, { "embedding_loss": 0.0155, "epoch": 8.456724590713527, "grad_norm": 0.057990968227386475, "learning_rate": 3.429625076828519e-06, "step": 151350 }, { "embedding_loss": 0.0151, "epoch": 8.45951835503157, "grad_norm": 0.03225862979888916, "learning_rate": 3.423416711677314e-06, "step": 151400 }, { "embedding_loss": 0.0149, "epoch": 8.462312119349612, "grad_norm": 0.04345643147826195, "learning_rate": 3.4172083465261096e-06, "step": 151450 }, { "embedding_loss": 0.0148, "epoch": 8.465105883667654, "grad_norm": 0.060473524034023285, "learning_rate": 3.410999981374905e-06, "step": 151500 }, { "embedding_loss": 0.0168, "epoch": 8.467899647985696, "grad_norm": 0.08493508398532867, "learning_rate": 3.4047916162236998e-06, "step": 151550 }, { "embedding_loss": 0.0163, "epoch": 8.470693412303739, "grad_norm": 0.060536086559295654, "learning_rate": 3.3985832510724953e-06, "step": 151600 }, { "embedding_loss": 0.0157, "epoch": 8.47348717662178, "grad_norm": 0.030919358134269714, "learning_rate": 3.392374885921291e-06, "step": 151650 }, { "embedding_loss": 0.0145, "epoch": 8.476280940939823, "grad_norm": 0.07308661937713623, "learning_rate": 3.3861665207700863e-06, "step": 151700 }, { "embedding_loss": 0.0144, "epoch": 8.479074705257865, "grad_norm": 0.08535640686750412, "learning_rate": 3.379958155618881e-06, "step": 151750 }, { "embedding_loss": 0.0159, "epoch": 8.481868469575907, "grad_norm": 0.07428670674562454, "learning_rate": 3.3737497904676765e-06, "step": 151800 }, { "embedding_loss": 0.0147, "epoch": 8.484662233893948, "grad_norm": 0.040890783071517944, "learning_rate": 3.3675414253164716e-06, "step": 151850 }, { "embedding_loss": 0.0142, "epoch": 8.48745599821199, "grad_norm": 0.07627901434898376, "learning_rate": 3.361333060165267e-06, "step": 151900 }, { "embedding_loss": 0.0168, "epoch": 8.490249762530032, "grad_norm": 0.024004917591810226, "learning_rate": 3.355124695014062e-06, "step": 151950 }, { "embedding_loss": 0.0145, "epoch": 8.493043526848075, "grad_norm": 0.05516546592116356, "learning_rate": 3.3489163298628573e-06, "step": 152000 }, { "embedding_loss": 0.0156, "epoch": 8.495837291166117, "grad_norm": 0.0653652474284172, "learning_rate": 3.342707964711653e-06, "step": 152050 }, { "embedding_loss": 0.0172, "epoch": 8.498631055484159, "grad_norm": 0.03151296451687813, "learning_rate": 3.3364995995604483e-06, "step": 152100 }, { "embedding_loss": 0.0138, "epoch": 8.501424819802201, "grad_norm": 0.04163474217057228, "learning_rate": 3.330291234409243e-06, "step": 152150 }, { "embedding_loss": 0.015, "epoch": 8.504218584120244, "grad_norm": 0.0720534697175026, "learning_rate": 3.3240828692580385e-06, "step": 152200 }, { "embedding_loss": 0.0164, "epoch": 8.507012348438286, "grad_norm": 0.06276772171258926, "learning_rate": 3.317874504106834e-06, "step": 152250 }, { "embedding_loss": 0.0148, "epoch": 8.509806112756328, "grad_norm": 0.06099456176161766, "learning_rate": 3.311666138955629e-06, "step": 152300 }, { "embedding_loss": 0.0156, "epoch": 8.51259987707437, "grad_norm": 0.05836554616689682, "learning_rate": 3.305457773804424e-06, "step": 152350 }, { "embedding_loss": 0.0179, "epoch": 8.515393641392413, "grad_norm": 0.04615028202533722, "learning_rate": 3.2992494086532197e-06, "step": 152400 }, { "embedding_loss": 0.0159, "epoch": 8.518187405710455, "grad_norm": 0.05056123808026314, "learning_rate": 3.293041043502015e-06, "step": 152450 }, { "embedding_loss": 0.0151, "epoch": 8.520981170028497, "grad_norm": 0.07040714472532272, "learning_rate": 3.2868326783508103e-06, "step": 152500 }, { "embedding_loss": 0.0154, "epoch": 8.52377493434654, "grad_norm": 0.03974661976099014, "learning_rate": 3.280624313199605e-06, "step": 152550 }, { "embedding_loss": 0.016, "epoch": 8.526568698664581, "grad_norm": 0.07807382196187973, "learning_rate": 3.2744159480484005e-06, "step": 152600 }, { "embedding_loss": 0.0151, "epoch": 8.529362462982622, "grad_norm": 0.048375170677900314, "learning_rate": 3.268207582897196e-06, "step": 152650 }, { "embedding_loss": 0.0152, "epoch": 8.532156227300664, "grad_norm": 0.08035442978143692, "learning_rate": 3.2619992177459915e-06, "step": 152700 }, { "embedding_loss": 0.0157, "epoch": 8.534949991618706, "grad_norm": 0.10738883167505264, "learning_rate": 3.255790852594786e-06, "step": 152750 }, { "embedding_loss": 0.0138, "epoch": 8.537743755936749, "grad_norm": 0.038910217583179474, "learning_rate": 3.2495824874435817e-06, "step": 152800 }, { "embedding_loss": 0.0154, "epoch": 8.54053752025479, "grad_norm": 0.03990960121154785, "learning_rate": 3.2433741222923772e-06, "step": 152850 }, { "embedding_loss": 0.0132, "epoch": 8.543331284572833, "grad_norm": 0.07080736756324768, "learning_rate": 3.2371657571411723e-06, "step": 152900 }, { "embedding_loss": 0.0164, "epoch": 8.546125048890875, "grad_norm": 0.060322992503643036, "learning_rate": 3.2309573919899674e-06, "step": 152950 }, { "embedding_loss": 0.0147, "epoch": 8.548918813208918, "grad_norm": 0.050156768411397934, "learning_rate": 3.2247490268387625e-06, "step": 153000 }, { "embedding_loss": 0.0167, "epoch": 8.55171257752696, "grad_norm": 0.07222689688205719, "learning_rate": 3.218540661687558e-06, "step": 153050 }, { "embedding_loss": 0.0153, "epoch": 8.554506341845002, "grad_norm": 0.05776450037956238, "learning_rate": 3.2123322965363535e-06, "step": 153100 }, { "embedding_loss": 0.0144, "epoch": 8.557300106163044, "grad_norm": 0.037613797932863235, "learning_rate": 3.206123931385149e-06, "step": 153150 }, { "embedding_loss": 0.0166, "epoch": 8.560093870481086, "grad_norm": 0.04089590534567833, "learning_rate": 3.1999155662339437e-06, "step": 153200 }, { "embedding_loss": 0.014, "epoch": 8.562887634799129, "grad_norm": 0.07364202290773392, "learning_rate": 3.1937072010827392e-06, "step": 153250 }, { "embedding_loss": 0.015, "epoch": 8.565681399117171, "grad_norm": 0.04349654167890549, "learning_rate": 3.1874988359315347e-06, "step": 153300 }, { "embedding_loss": 0.014, "epoch": 8.568475163435213, "grad_norm": 0.038592297583818436, "learning_rate": 3.18129047078033e-06, "step": 153350 }, { "embedding_loss": 0.0151, "epoch": 8.571268927753255, "grad_norm": 0.07288108766078949, "learning_rate": 3.175082105629125e-06, "step": 153400 }, { "embedding_loss": 0.0157, "epoch": 8.574062692071298, "grad_norm": 0.053858883678913116, "learning_rate": 3.16887374047792e-06, "step": 153450 }, { "embedding_loss": 0.0162, "epoch": 8.57685645638934, "grad_norm": 0.04754741117358208, "learning_rate": 3.1626653753267155e-06, "step": 153500 }, { "embedding_loss": 0.0155, "epoch": 8.57965022070738, "grad_norm": 0.12883758544921875, "learning_rate": 3.156457010175511e-06, "step": 153550 }, { "embedding_loss": 0.0145, "epoch": 8.582443985025423, "grad_norm": 0.04562751576304436, "learning_rate": 3.1502486450243057e-06, "step": 153600 }, { "embedding_loss": 0.0155, "epoch": 8.585237749343465, "grad_norm": 0.04847174882888794, "learning_rate": 3.1440402798731012e-06, "step": 153650 }, { "embedding_loss": 0.0171, "epoch": 8.588031513661507, "grad_norm": 0.03747905418276787, "learning_rate": 3.1378319147218967e-06, "step": 153700 }, { "embedding_loss": 0.0156, "epoch": 8.59082527797955, "grad_norm": 0.04249433055520058, "learning_rate": 3.131623549570692e-06, "step": 153750 }, { "embedding_loss": 0.0146, "epoch": 8.593619042297592, "grad_norm": 0.0521470345556736, "learning_rate": 3.125415184419487e-06, "step": 153800 }, { "embedding_loss": 0.0156, "epoch": 8.596412806615634, "grad_norm": 0.04739159345626831, "learning_rate": 3.1192068192682824e-06, "step": 153850 }, { "embedding_loss": 0.0136, "epoch": 8.599206570933676, "grad_norm": 0.04161981865763664, "learning_rate": 3.1129984541170775e-06, "step": 153900 }, { "embedding_loss": 0.0158, "epoch": 8.602000335251718, "grad_norm": 0.07080359756946564, "learning_rate": 3.106790088965873e-06, "step": 153950 }, { "embedding_loss": 0.0151, "epoch": 8.60479409956976, "grad_norm": 0.05372055247426033, "learning_rate": 3.100581723814668e-06, "step": 154000 }, { "embedding_loss": 0.0165, "epoch": 8.607587863887803, "grad_norm": 0.051005084067583084, "learning_rate": 3.0943733586634632e-06, "step": 154050 }, { "embedding_loss": 0.0166, "epoch": 8.610381628205845, "grad_norm": 0.057950712740421295, "learning_rate": 3.0881649935122587e-06, "step": 154100 }, { "embedding_loss": 0.0156, "epoch": 8.613175392523887, "grad_norm": 0.04430091381072998, "learning_rate": 3.0819566283610542e-06, "step": 154150 }, { "embedding_loss": 0.0146, "epoch": 8.61596915684193, "grad_norm": 0.0727304145693779, "learning_rate": 3.075748263209849e-06, "step": 154200 }, { "embedding_loss": 0.0154, "epoch": 8.618762921159972, "grad_norm": 0.05588168650865555, "learning_rate": 3.0695398980586444e-06, "step": 154250 }, { "embedding_loss": 0.0164, "epoch": 8.621556685478014, "grad_norm": 0.04937589913606644, "learning_rate": 3.06333153290744e-06, "step": 154300 }, { "embedding_loss": 0.0155, "epoch": 8.624350449796054, "grad_norm": 0.08637061715126038, "learning_rate": 3.057123167756235e-06, "step": 154350 }, { "embedding_loss": 0.0158, "epoch": 8.627144214114097, "grad_norm": 0.05110703036189079, "learning_rate": 3.05091480260503e-06, "step": 154400 }, { "embedding_loss": 0.0142, "epoch": 8.629937978432139, "grad_norm": 0.030705546960234642, "learning_rate": 3.0447064374538256e-06, "step": 154450 }, { "embedding_loss": 0.0164, "epoch": 8.632731742750181, "grad_norm": 0.032925866544246674, "learning_rate": 3.0384980723026207e-06, "step": 154500 }, { "embedding_loss": 0.0152, "epoch": 8.635525507068223, "grad_norm": 0.07655947655439377, "learning_rate": 3.0322897071514162e-06, "step": 154550 }, { "embedding_loss": 0.0182, "epoch": 8.638319271386266, "grad_norm": 0.042550019919872284, "learning_rate": 3.026081342000211e-06, "step": 154600 }, { "embedding_loss": 0.0168, "epoch": 8.641113035704308, "grad_norm": 0.0905623584985733, "learning_rate": 3.0198729768490064e-06, "step": 154650 }, { "embedding_loss": 0.0158, "epoch": 8.64390680002235, "grad_norm": 0.0687936469912529, "learning_rate": 3.013664611697802e-06, "step": 154700 }, { "embedding_loss": 0.0165, "epoch": 8.646700564340392, "grad_norm": 0.07117894291877747, "learning_rate": 3.0074562465465975e-06, "step": 154750 }, { "embedding_loss": 0.0169, "epoch": 8.649494328658434, "grad_norm": 0.09120851010084152, "learning_rate": 3.001247881395392e-06, "step": 154800 }, { "embedding_loss": 0.017, "epoch": 8.652288092976477, "grad_norm": 0.08104082196950912, "learning_rate": 2.9950395162441876e-06, "step": 154850 }, { "embedding_loss": 0.0159, "epoch": 8.655081857294519, "grad_norm": 0.03330042585730553, "learning_rate": 2.988831151092983e-06, "step": 154900 }, { "embedding_loss": 0.0162, "epoch": 8.657875621612561, "grad_norm": 0.047238606959581375, "learning_rate": 2.9826227859417782e-06, "step": 154950 }, { "embedding_loss": 0.0151, "epoch": 8.660669385930603, "grad_norm": 0.057230737060308456, "learning_rate": 2.9764144207905733e-06, "step": 155000 }, { "embedding_loss": 0.0164, "epoch": 8.663463150248646, "grad_norm": 0.09840896725654602, "learning_rate": 2.9702060556393684e-06, "step": 155050 }, { "embedding_loss": 0.0157, "epoch": 8.666256914566688, "grad_norm": 0.044191062450408936, "learning_rate": 2.963997690488164e-06, "step": 155100 }, { "embedding_loss": 0.0144, "epoch": 8.66905067888473, "grad_norm": 0.037563640624284744, "learning_rate": 2.9577893253369595e-06, "step": 155150 }, { "embedding_loss": 0.0157, "epoch": 8.67184444320277, "grad_norm": 0.0646868422627449, "learning_rate": 2.951580960185755e-06, "step": 155200 }, { "embedding_loss": 0.0174, "epoch": 8.674638207520813, "grad_norm": 0.04759542644023895, "learning_rate": 2.9453725950345496e-06, "step": 155250 }, { "embedding_loss": 0.0162, "epoch": 8.677431971838855, "grad_norm": 0.05421661585569382, "learning_rate": 2.939164229883345e-06, "step": 155300 }, { "embedding_loss": 0.0144, "epoch": 8.680225736156897, "grad_norm": 0.04806475341320038, "learning_rate": 2.9329558647321402e-06, "step": 155350 }, { "embedding_loss": 0.014, "epoch": 8.68301950047494, "grad_norm": 0.08625195920467377, "learning_rate": 2.9267474995809358e-06, "step": 155400 }, { "embedding_loss": 0.0133, "epoch": 8.685813264792982, "grad_norm": 0.054260220378637314, "learning_rate": 2.920539134429731e-06, "step": 155450 }, { "embedding_loss": 0.0149, "epoch": 8.688607029111024, "grad_norm": 0.04675925523042679, "learning_rate": 2.914330769278526e-06, "step": 155500 }, { "embedding_loss": 0.0165, "epoch": 8.691400793429066, "grad_norm": 0.07507915049791336, "learning_rate": 2.9081224041273214e-06, "step": 155550 }, { "embedding_loss": 0.0138, "epoch": 8.694194557747108, "grad_norm": 0.04354666545987129, "learning_rate": 2.901914038976117e-06, "step": 155600 }, { "embedding_loss": 0.0173, "epoch": 8.69698832206515, "grad_norm": 0.06759773194789886, "learning_rate": 2.8957056738249116e-06, "step": 155650 }, { "embedding_loss": 0.015, "epoch": 8.699782086383193, "grad_norm": 0.0751531720161438, "learning_rate": 2.889497308673707e-06, "step": 155700 }, { "embedding_loss": 0.0161, "epoch": 8.702575850701235, "grad_norm": 0.049919724464416504, "learning_rate": 2.8832889435225027e-06, "step": 155750 }, { "embedding_loss": 0.0153, "epoch": 8.705369615019277, "grad_norm": 0.04018599912524223, "learning_rate": 2.8770805783712978e-06, "step": 155800 }, { "embedding_loss": 0.0171, "epoch": 8.70816337933732, "grad_norm": 0.0699193999171257, "learning_rate": 2.870872213220093e-06, "step": 155850 }, { "embedding_loss": 0.0152, "epoch": 8.710957143655362, "grad_norm": 0.10491643100976944, "learning_rate": 2.8646638480688884e-06, "step": 155900 }, { "embedding_loss": 0.0156, "epoch": 8.713750907973404, "grad_norm": 0.051087960600852966, "learning_rate": 2.8584554829176834e-06, "step": 155950 }, { "embedding_loss": 0.0167, "epoch": 8.716544672291445, "grad_norm": 0.03479992598295212, "learning_rate": 2.852247117766479e-06, "step": 156000 }, { "embedding_loss": 0.0151, "epoch": 8.719338436609487, "grad_norm": 0.052964504808187485, "learning_rate": 2.846038752615274e-06, "step": 156050 }, { "embedding_loss": 0.0146, "epoch": 8.722132200927529, "grad_norm": 0.10387752205133438, "learning_rate": 2.839830387464069e-06, "step": 156100 }, { "embedding_loss": 0.0153, "epoch": 8.724925965245571, "grad_norm": 0.05466112121939659, "learning_rate": 2.8336220223128647e-06, "step": 156150 }, { "embedding_loss": 0.0156, "epoch": 8.727719729563614, "grad_norm": 0.03902602568268776, "learning_rate": 2.82741365716166e-06, "step": 156200 }, { "embedding_loss": 0.0156, "epoch": 8.730513493881656, "grad_norm": 0.03803364932537079, "learning_rate": 2.821205292010455e-06, "step": 156250 }, { "embedding_loss": 0.0157, "epoch": 8.733307258199698, "grad_norm": 0.0631370022892952, "learning_rate": 2.8149969268592504e-06, "step": 156300 }, { "embedding_loss": 0.0158, "epoch": 8.73610102251774, "grad_norm": 0.06471937149763107, "learning_rate": 2.808788561708046e-06, "step": 156350 }, { "embedding_loss": 0.0155, "epoch": 8.738894786835782, "grad_norm": 0.08012683689594269, "learning_rate": 2.802580196556841e-06, "step": 156400 }, { "embedding_loss": 0.0161, "epoch": 8.741688551153825, "grad_norm": 0.07575327157974243, "learning_rate": 2.796371831405636e-06, "step": 156450 }, { "embedding_loss": 0.0156, "epoch": 8.744482315471867, "grad_norm": 0.04413817077875137, "learning_rate": 2.7901634662544316e-06, "step": 156500 }, { "embedding_loss": 0.0157, "epoch": 8.74727607978991, "grad_norm": 0.061855413019657135, "learning_rate": 2.7839551011032267e-06, "step": 156550 }, { "embedding_loss": 0.0149, "epoch": 8.750069844107951, "grad_norm": 0.06529432535171509, "learning_rate": 2.777746735952022e-06, "step": 156600 }, { "embedding_loss": 0.0164, "epoch": 8.752863608425994, "grad_norm": 0.04072681814432144, "learning_rate": 2.771538370800817e-06, "step": 156650 }, { "embedding_loss": 0.0141, "epoch": 8.755657372744036, "grad_norm": 0.043379828333854675, "learning_rate": 2.7653300056496123e-06, "step": 156700 }, { "embedding_loss": 0.0168, "epoch": 8.758451137062078, "grad_norm": 0.07363314181566238, "learning_rate": 2.759121640498408e-06, "step": 156750 }, { "embedding_loss": 0.0146, "epoch": 8.76124490138012, "grad_norm": 0.03521423041820526, "learning_rate": 2.7529132753472034e-06, "step": 156800 }, { "embedding_loss": 0.0159, "epoch": 8.764038665698163, "grad_norm": 0.0744566097855568, "learning_rate": 2.746704910195998e-06, "step": 156850 }, { "embedding_loss": 0.0149, "epoch": 8.766832430016203, "grad_norm": 0.04117882251739502, "learning_rate": 2.7404965450447936e-06, "step": 156900 }, { "embedding_loss": 0.0145, "epoch": 8.769626194334245, "grad_norm": 0.05817362293601036, "learning_rate": 2.7342881798935887e-06, "step": 156950 }, { "embedding_loss": 0.0162, "epoch": 8.772419958652288, "grad_norm": 0.04985356703400612, "learning_rate": 2.728079814742384e-06, "step": 157000 }, { "embedding_loss": 0.0143, "epoch": 8.77521372297033, "grad_norm": 0.0700426697731018, "learning_rate": 2.7218714495911793e-06, "step": 157050 }, { "embedding_loss": 0.0135, "epoch": 8.778007487288372, "grad_norm": 0.06707290560007095, "learning_rate": 2.7156630844399743e-06, "step": 157100 }, { "embedding_loss": 0.0162, "epoch": 8.780801251606414, "grad_norm": 0.05153552070260048, "learning_rate": 2.70945471928877e-06, "step": 157150 }, { "embedding_loss": 0.0167, "epoch": 8.783595015924456, "grad_norm": 0.057787440717220306, "learning_rate": 2.7032463541375654e-06, "step": 157200 }, { "embedding_loss": 0.0157, "epoch": 8.786388780242499, "grad_norm": 0.05983661115169525, "learning_rate": 2.697037988986361e-06, "step": 157250 }, { "embedding_loss": 0.0148, "epoch": 8.789182544560541, "grad_norm": 0.05793158337473869, "learning_rate": 2.6908296238351556e-06, "step": 157300 }, { "embedding_loss": 0.0151, "epoch": 8.791976308878583, "grad_norm": 0.044759102165699005, "learning_rate": 2.684621258683951e-06, "step": 157350 }, { "embedding_loss": 0.0145, "epoch": 8.794770073196625, "grad_norm": 0.0341864712536335, "learning_rate": 2.678412893532746e-06, "step": 157400 }, { "embedding_loss": 0.0155, "epoch": 8.797563837514668, "grad_norm": 0.07804260402917862, "learning_rate": 2.6722045283815417e-06, "step": 157450 }, { "embedding_loss": 0.0153, "epoch": 8.80035760183271, "grad_norm": 0.05785113945603371, "learning_rate": 2.6659961632303368e-06, "step": 157500 }, { "embedding_loss": 0.0158, "epoch": 8.803151366150752, "grad_norm": 0.060273148119449615, "learning_rate": 2.659787798079132e-06, "step": 157550 }, { "embedding_loss": 0.0146, "epoch": 8.805945130468794, "grad_norm": 0.04177701473236084, "learning_rate": 2.6535794329279274e-06, "step": 157600 }, { "embedding_loss": 0.0175, "epoch": 8.808738894786837, "grad_norm": 0.048635657876729965, "learning_rate": 2.647371067776723e-06, "step": 157650 }, { "embedding_loss": 0.0149, "epoch": 8.811532659104877, "grad_norm": 0.04698022082448006, "learning_rate": 2.6411627026255176e-06, "step": 157700 }, { "embedding_loss": 0.0147, "epoch": 8.81432642342292, "grad_norm": 0.07695703953504562, "learning_rate": 2.634954337474313e-06, "step": 157750 }, { "embedding_loss": 0.0148, "epoch": 8.817120187740962, "grad_norm": 0.035691265016794205, "learning_rate": 2.6287459723231086e-06, "step": 157800 }, { "embedding_loss": 0.0123, "epoch": 8.819913952059004, "grad_norm": 0.06887619197368622, "learning_rate": 2.6225376071719037e-06, "step": 157850 }, { "embedding_loss": 0.0164, "epoch": 8.822707716377046, "grad_norm": 0.10739543288946152, "learning_rate": 2.6163292420206988e-06, "step": 157900 }, { "embedding_loss": 0.0152, "epoch": 8.825501480695088, "grad_norm": 0.08229203522205353, "learning_rate": 2.6101208768694943e-06, "step": 157950 }, { "embedding_loss": 0.0158, "epoch": 8.82829524501313, "grad_norm": 0.037708062678575516, "learning_rate": 2.6039125117182894e-06, "step": 158000 }, { "embedding_loss": 0.0152, "epoch": 8.831089009331173, "grad_norm": 0.0681888535618782, "learning_rate": 2.597704146567085e-06, "step": 158050 }, { "embedding_loss": 0.0152, "epoch": 8.833882773649215, "grad_norm": 0.06411877274513245, "learning_rate": 2.5914957814158796e-06, "step": 158100 }, { "embedding_loss": 0.015, "epoch": 8.836676537967257, "grad_norm": 0.0720384493470192, "learning_rate": 2.585287416264675e-06, "step": 158150 }, { "embedding_loss": 0.0164, "epoch": 8.8394703022853, "grad_norm": 0.05243666470050812, "learning_rate": 2.5790790511134706e-06, "step": 158200 }, { "embedding_loss": 0.0128, "epoch": 8.842264066603342, "grad_norm": 0.04803541302680969, "learning_rate": 2.572870685962266e-06, "step": 158250 }, { "embedding_loss": 0.0138, "epoch": 8.845057830921384, "grad_norm": 0.04684024676680565, "learning_rate": 2.5666623208110608e-06, "step": 158300 }, { "embedding_loss": 0.017, "epoch": 8.847851595239426, "grad_norm": 0.029565557837486267, "learning_rate": 2.5604539556598563e-06, "step": 158350 }, { "embedding_loss": 0.0153, "epoch": 8.850645359557468, "grad_norm": 0.030923837795853615, "learning_rate": 2.554245590508652e-06, "step": 158400 }, { "embedding_loss": 0.0166, "epoch": 8.85343912387551, "grad_norm": 0.04406418651342392, "learning_rate": 2.548037225357447e-06, "step": 158450 }, { "embedding_loss": 0.0157, "epoch": 8.856232888193553, "grad_norm": 0.06384923309087753, "learning_rate": 2.541828860206242e-06, "step": 158500 }, { "embedding_loss": 0.0155, "epoch": 8.859026652511595, "grad_norm": 0.0671994611620903, "learning_rate": 2.535620495055037e-06, "step": 158550 }, { "embedding_loss": 0.0156, "epoch": 8.861820416829636, "grad_norm": 0.05191962420940399, "learning_rate": 2.5294121299038326e-06, "step": 158600 }, { "embedding_loss": 0.0152, "epoch": 8.864614181147678, "grad_norm": 0.06976179778575897, "learning_rate": 2.523203764752628e-06, "step": 158650 }, { "embedding_loss": 0.017, "epoch": 8.86740794546572, "grad_norm": 0.04912690073251724, "learning_rate": 2.5169953996014228e-06, "step": 158700 }, { "embedding_loss": 0.0154, "epoch": 8.870201709783762, "grad_norm": 0.03306075930595398, "learning_rate": 2.5107870344502183e-06, "step": 158750 }, { "embedding_loss": 0.0153, "epoch": 8.872995474101804, "grad_norm": 0.07040365040302277, "learning_rate": 2.5045786692990138e-06, "step": 158800 }, { "embedding_loss": 0.0149, "epoch": 8.875789238419847, "grad_norm": 0.05947064608335495, "learning_rate": 2.498370304147809e-06, "step": 158850 }, { "embedding_loss": 0.0154, "epoch": 8.878583002737889, "grad_norm": 0.05439265817403793, "learning_rate": 2.4921619389966044e-06, "step": 158900 }, { "embedding_loss": 0.0152, "epoch": 8.881376767055931, "grad_norm": 0.0645432099699974, "learning_rate": 2.4859535738453995e-06, "step": 158950 }, { "embedding_loss": 0.0151, "epoch": 8.884170531373973, "grad_norm": 0.05618186667561531, "learning_rate": 2.4797452086941946e-06, "step": 159000 }, { "embedding_loss": 0.0153, "epoch": 8.886964295692016, "grad_norm": 0.06403154134750366, "learning_rate": 2.47353684354299e-06, "step": 159050 }, { "embedding_loss": 0.0153, "epoch": 8.889758060010058, "grad_norm": 0.07830258458852768, "learning_rate": 2.467328478391785e-06, "step": 159100 }, { "embedding_loss": 0.015, "epoch": 8.8925518243281, "grad_norm": 0.06559592485427856, "learning_rate": 2.4611201132405803e-06, "step": 159150 }, { "embedding_loss": 0.0146, "epoch": 8.895345588646142, "grad_norm": 0.0676599070429802, "learning_rate": 2.4549117480893758e-06, "step": 159200 }, { "embedding_loss": 0.0155, "epoch": 8.898139352964185, "grad_norm": 0.05193488672375679, "learning_rate": 2.4487033829381713e-06, "step": 159250 }, { "embedding_loss": 0.0168, "epoch": 8.900933117282227, "grad_norm": 0.07110825181007385, "learning_rate": 2.4424950177869664e-06, "step": 159300 }, { "embedding_loss": 0.0158, "epoch": 8.903726881600267, "grad_norm": 0.04808557406067848, "learning_rate": 2.436286652635762e-06, "step": 159350 }, { "embedding_loss": 0.0145, "epoch": 8.90652064591831, "grad_norm": 0.05145444720983505, "learning_rate": 2.430078287484557e-06, "step": 159400 }, { "embedding_loss": 0.0136, "epoch": 8.909314410236352, "grad_norm": 0.08466923981904984, "learning_rate": 2.423869922333352e-06, "step": 159450 }, { "embedding_loss": 0.0158, "epoch": 8.912108174554394, "grad_norm": 0.06032668054103851, "learning_rate": 2.4176615571821476e-06, "step": 159500 }, { "embedding_loss": 0.0148, "epoch": 8.914901938872436, "grad_norm": 0.10141456127166748, "learning_rate": 2.4114531920309427e-06, "step": 159550 }, { "embedding_loss": 0.0157, "epoch": 8.917695703190478, "grad_norm": 0.05911766737699509, "learning_rate": 2.4052448268797378e-06, "step": 159600 }, { "embedding_loss": 0.0142, "epoch": 8.92048946750852, "grad_norm": 0.08014023303985596, "learning_rate": 2.3990364617285333e-06, "step": 159650 }, { "embedding_loss": 0.0147, "epoch": 8.923283231826563, "grad_norm": 0.02860698103904724, "learning_rate": 2.3928280965773284e-06, "step": 159700 }, { "embedding_loss": 0.0164, "epoch": 8.926076996144605, "grad_norm": 0.0860762670636177, "learning_rate": 2.386619731426124e-06, "step": 159750 }, { "embedding_loss": 0.016, "epoch": 8.928870760462647, "grad_norm": 0.03438321128487587, "learning_rate": 2.380411366274919e-06, "step": 159800 }, { "embedding_loss": 0.015, "epoch": 8.93166452478069, "grad_norm": 0.07097504287958145, "learning_rate": 2.3742030011237145e-06, "step": 159850 }, { "embedding_loss": 0.016, "epoch": 8.934458289098732, "grad_norm": 0.05858347937464714, "learning_rate": 2.3679946359725096e-06, "step": 159900 }, { "embedding_loss": 0.0146, "epoch": 8.937252053416774, "grad_norm": 0.049422550946474075, "learning_rate": 2.361786270821305e-06, "step": 159950 }, { "embedding_loss": 0.0161, "epoch": 8.940045817734816, "grad_norm": 0.041975632309913635, "learning_rate": 2.3555779056701e-06, "step": 160000 }, { "embedding_loss": 0.0168, "epoch": 8.942839582052859, "grad_norm": 0.04720095917582512, "learning_rate": 2.3493695405188953e-06, "step": 160050 }, { "embedding_loss": 0.016, "epoch": 8.9456333463709, "grad_norm": 0.06787000596523285, "learning_rate": 2.3431611753676904e-06, "step": 160100 }, { "embedding_loss": 0.0153, "epoch": 8.948427110688943, "grad_norm": 0.06329987943172455, "learning_rate": 2.336952810216486e-06, "step": 160150 }, { "embedding_loss": 0.0163, "epoch": 8.951220875006985, "grad_norm": 0.050353601574897766, "learning_rate": 2.330744445065281e-06, "step": 160200 }, { "embedding_loss": 0.0155, "epoch": 8.954014639325026, "grad_norm": 0.09457340091466904, "learning_rate": 2.3245360799140765e-06, "step": 160250 }, { "embedding_loss": 0.0143, "epoch": 8.956808403643068, "grad_norm": 0.03126998618245125, "learning_rate": 2.3183277147628716e-06, "step": 160300 }, { "embedding_loss": 0.0158, "epoch": 8.95960216796111, "grad_norm": 0.07323930412530899, "learning_rate": 2.312119349611667e-06, "step": 160350 }, { "embedding_loss": 0.0171, "epoch": 8.962395932279152, "grad_norm": 0.03784079849720001, "learning_rate": 2.305910984460462e-06, "step": 160400 }, { "embedding_loss": 0.0159, "epoch": 8.965189696597195, "grad_norm": 0.07286468148231506, "learning_rate": 2.2997026193092577e-06, "step": 160450 }, { "embedding_loss": 0.0146, "epoch": 8.967983460915237, "grad_norm": 0.04767627641558647, "learning_rate": 2.293494254158053e-06, "step": 160500 }, { "embedding_loss": 0.0173, "epoch": 8.97077722523328, "grad_norm": 0.06112785264849663, "learning_rate": 2.287285889006848e-06, "step": 160550 }, { "embedding_loss": 0.0145, "epoch": 8.973570989551321, "grad_norm": 0.06336301565170288, "learning_rate": 2.281077523855643e-06, "step": 160600 }, { "embedding_loss": 0.0149, "epoch": 8.976364753869364, "grad_norm": 0.06365911662578583, "learning_rate": 2.2748691587044385e-06, "step": 160650 }, { "embedding_loss": 0.0144, "epoch": 8.979158518187406, "grad_norm": 0.05691937357187271, "learning_rate": 2.2686607935532336e-06, "step": 160700 }, { "embedding_loss": 0.0156, "epoch": 8.981952282505448, "grad_norm": 0.07120310515165329, "learning_rate": 2.262452428402029e-06, "step": 160750 }, { "embedding_loss": 0.0153, "epoch": 8.98474604682349, "grad_norm": 0.05211897939443588, "learning_rate": 2.256244063250824e-06, "step": 160800 }, { "embedding_loss": 0.017, "epoch": 8.987539811141533, "grad_norm": 0.09098859876394272, "learning_rate": 2.2500356980996197e-06, "step": 160850 }, { "embedding_loss": 0.0158, "epoch": 8.990333575459575, "grad_norm": 0.058238960802555084, "learning_rate": 2.243827332948415e-06, "step": 160900 }, { "embedding_loss": 0.0161, "epoch": 8.993127339777617, "grad_norm": 0.019644148647785187, "learning_rate": 2.2376189677972103e-06, "step": 160950 }, { "embedding_loss": 0.0153, "epoch": 8.99592110409566, "grad_norm": 0.06604664772748947, "learning_rate": 2.2314106026460054e-06, "step": 161000 }, { "embedding_loss": 0.0162, "epoch": 8.9987148684137, "grad_norm": 0.05808446556329727, "learning_rate": 2.2252022374948005e-06, "step": 161050 }, { "embedding_loss": 0.0169, "epoch": 9.001508632731742, "grad_norm": 0.03752119466662407, "learning_rate": 2.218993872343596e-06, "step": 161100 }, { "embedding_loss": 0.0142, "epoch": 9.004302397049784, "grad_norm": 0.06256867945194244, "learning_rate": 2.212785507192391e-06, "step": 161150 }, { "embedding_loss": 0.0141, "epoch": 9.007096161367826, "grad_norm": 0.05849900469183922, "learning_rate": 2.206577142041186e-06, "step": 161200 }, { "embedding_loss": 0.0149, "epoch": 9.009889925685869, "grad_norm": 0.09350146353244781, "learning_rate": 2.2003687768899817e-06, "step": 161250 }, { "embedding_loss": 0.0149, "epoch": 9.012683690003911, "grad_norm": 0.14924278855323792, "learning_rate": 2.1941604117387772e-06, "step": 161300 }, { "embedding_loss": 0.0159, "epoch": 9.015477454321953, "grad_norm": 0.05956177040934563, "learning_rate": 2.1879520465875723e-06, "step": 161350 }, { "embedding_loss": 0.0154, "epoch": 9.018271218639995, "grad_norm": 0.07845552265644073, "learning_rate": 2.181743681436368e-06, "step": 161400 }, { "embedding_loss": 0.0145, "epoch": 9.021064982958038, "grad_norm": 0.0789293423295021, "learning_rate": 2.175535316285163e-06, "step": 161450 }, { "embedding_loss": 0.0158, "epoch": 9.02385874727608, "grad_norm": 0.05742744356393814, "learning_rate": 2.169326951133958e-06, "step": 161500 }, { "embedding_loss": 0.0156, "epoch": 9.026652511594122, "grad_norm": 0.04877501353621483, "learning_rate": 2.1631185859827535e-06, "step": 161550 }, { "embedding_loss": 0.0162, "epoch": 9.029446275912164, "grad_norm": 0.05105751007795334, "learning_rate": 2.1569102208315486e-06, "step": 161600 }, { "embedding_loss": 0.0168, "epoch": 9.032240040230207, "grad_norm": 0.09983652085065842, "learning_rate": 2.1507018556803437e-06, "step": 161650 }, { "embedding_loss": 0.0143, "epoch": 9.035033804548249, "grad_norm": 0.05604614317417145, "learning_rate": 2.1444934905291392e-06, "step": 161700 }, { "embedding_loss": 0.0155, "epoch": 9.037827568866291, "grad_norm": 0.048837967216968536, "learning_rate": 2.1382851253779343e-06, "step": 161750 }, { "embedding_loss": 0.0144, "epoch": 9.040621333184333, "grad_norm": 0.03551775962114334, "learning_rate": 2.13207676022673e-06, "step": 161800 }, { "embedding_loss": 0.0154, "epoch": 9.043415097502375, "grad_norm": 0.035209622234106064, "learning_rate": 2.125868395075525e-06, "step": 161850 }, { "embedding_loss": 0.015, "epoch": 9.046208861820416, "grad_norm": 0.09386445581912994, "learning_rate": 2.1196600299243204e-06, "step": 161900 }, { "embedding_loss": 0.0153, "epoch": 9.049002626138458, "grad_norm": 0.04908418655395508, "learning_rate": 2.1134516647731155e-06, "step": 161950 }, { "embedding_loss": 0.0157, "epoch": 9.0517963904565, "grad_norm": 0.0675104558467865, "learning_rate": 2.1072432996219106e-06, "step": 162000 }, { "embedding_loss": 0.0153, "epoch": 9.054590154774543, "grad_norm": 0.08371925354003906, "learning_rate": 2.101034934470706e-06, "step": 162050 }, { "embedding_loss": 0.0158, "epoch": 9.057383919092585, "grad_norm": 0.06252847611904144, "learning_rate": 2.0948265693195012e-06, "step": 162100 }, { "embedding_loss": 0.0162, "epoch": 9.060177683410627, "grad_norm": 0.0433754101395607, "learning_rate": 2.0886182041682963e-06, "step": 162150 }, { "embedding_loss": 0.0178, "epoch": 9.06297144772867, "grad_norm": 0.09109321981668472, "learning_rate": 2.082409839017092e-06, "step": 162200 }, { "embedding_loss": 0.014, "epoch": 9.065765212046712, "grad_norm": 0.0367901511490345, "learning_rate": 2.076201473865887e-06, "step": 162250 }, { "embedding_loss": 0.0165, "epoch": 9.068558976364754, "grad_norm": 0.10956259071826935, "learning_rate": 2.0699931087146824e-06, "step": 162300 }, { "embedding_loss": 0.0153, "epoch": 9.071352740682796, "grad_norm": 0.07856793701648712, "learning_rate": 2.0637847435634775e-06, "step": 162350 }, { "embedding_loss": 0.0168, "epoch": 9.074146505000838, "grad_norm": 0.06888768076896667, "learning_rate": 2.057576378412273e-06, "step": 162400 }, { "embedding_loss": 0.0152, "epoch": 9.07694026931888, "grad_norm": 0.07215029001235962, "learning_rate": 2.051368013261068e-06, "step": 162450 }, { "embedding_loss": 0.0137, "epoch": 9.079734033636923, "grad_norm": 0.05398083105683327, "learning_rate": 2.0451596481098636e-06, "step": 162500 }, { "embedding_loss": 0.0145, "epoch": 9.082527797954965, "grad_norm": 0.053923483937978745, "learning_rate": 2.0389512829586587e-06, "step": 162550 }, { "embedding_loss": 0.0165, "epoch": 9.085321562273007, "grad_norm": 0.05497181415557861, "learning_rate": 2.032742917807454e-06, "step": 162600 }, { "embedding_loss": 0.0176, "epoch": 9.08811532659105, "grad_norm": 0.06226390600204468, "learning_rate": 2.026534552656249e-06, "step": 162650 }, { "embedding_loss": 0.0136, "epoch": 9.090909090909092, "grad_norm": 0.047523606568574905, "learning_rate": 2.0203261875050444e-06, "step": 162700 }, { "embedding_loss": 0.0156, "epoch": 9.093702855227132, "grad_norm": 0.08582614362239838, "learning_rate": 2.0141178223538395e-06, "step": 162750 }, { "embedding_loss": 0.0155, "epoch": 9.096496619545174, "grad_norm": 0.04434392973780632, "learning_rate": 2.007909457202635e-06, "step": 162800 }, { "embedding_loss": 0.0141, "epoch": 9.099290383863217, "grad_norm": 0.040966130793094635, "learning_rate": 2.00170109205143e-06, "step": 162850 }, { "embedding_loss": 0.017, "epoch": 9.102084148181259, "grad_norm": 0.05902962014079094, "learning_rate": 1.9954927269002256e-06, "step": 162900 }, { "embedding_loss": 0.0144, "epoch": 9.104877912499301, "grad_norm": 0.04357817769050598, "learning_rate": 1.9892843617490207e-06, "step": 162950 }, { "embedding_loss": 0.015, "epoch": 9.107671676817343, "grad_norm": 0.026144901290535927, "learning_rate": 1.9830759965978162e-06, "step": 163000 }, { "embedding_loss": 0.0153, "epoch": 9.110465441135386, "grad_norm": 0.09898866713047028, "learning_rate": 1.9768676314466113e-06, "step": 163050 }, { "embedding_loss": 0.0161, "epoch": 9.113259205453428, "grad_norm": 0.060047294944524765, "learning_rate": 1.9706592662954064e-06, "step": 163100 }, { "embedding_loss": 0.0153, "epoch": 9.11605296977147, "grad_norm": 0.07735827565193176, "learning_rate": 1.964450901144202e-06, "step": 163150 }, { "embedding_loss": 0.0148, "epoch": 9.118846734089512, "grad_norm": 0.07199098914861679, "learning_rate": 1.958242535992997e-06, "step": 163200 }, { "embedding_loss": 0.0147, "epoch": 9.121640498407555, "grad_norm": 0.06076597049832344, "learning_rate": 1.952034170841792e-06, "step": 163250 }, { "embedding_loss": 0.0155, "epoch": 9.124434262725597, "grad_norm": 0.037279162555933, "learning_rate": 1.9458258056905876e-06, "step": 163300 }, { "embedding_loss": 0.016, "epoch": 9.127228027043639, "grad_norm": 0.03761284798383713, "learning_rate": 1.939617440539383e-06, "step": 163350 }, { "embedding_loss": 0.0136, "epoch": 9.130021791361681, "grad_norm": 0.06350681185722351, "learning_rate": 1.9334090753881782e-06, "step": 163400 }, { "embedding_loss": 0.0152, "epoch": 9.132815555679723, "grad_norm": 0.08755284547805786, "learning_rate": 1.9272007102369738e-06, "step": 163450 }, { "embedding_loss": 0.0161, "epoch": 9.135609319997766, "grad_norm": 0.3854496479034424, "learning_rate": 1.920992345085769e-06, "step": 163500 }, { "embedding_loss": 0.0147, "epoch": 9.138403084315808, "grad_norm": 0.04789258539676666, "learning_rate": 1.914783979934564e-06, "step": 163550 }, { "embedding_loss": 0.0151, "epoch": 9.141196848633848, "grad_norm": 0.07978746294975281, "learning_rate": 1.908575614783359e-06, "step": 163600 }, { "embedding_loss": 0.0148, "epoch": 9.14399061295189, "grad_norm": 0.08482625335454941, "learning_rate": 1.9023672496321545e-06, "step": 163650 }, { "embedding_loss": 0.0162, "epoch": 9.146784377269933, "grad_norm": 0.060257989913225174, "learning_rate": 1.8961588844809498e-06, "step": 163700 }, { "embedding_loss": 0.0139, "epoch": 9.149578141587975, "grad_norm": 0.06018097698688507, "learning_rate": 1.8899505193297451e-06, "step": 163750 }, { "embedding_loss": 0.0138, "epoch": 9.152371905906017, "grad_norm": 0.06549549102783203, "learning_rate": 1.8837421541785402e-06, "step": 163800 }, { "embedding_loss": 0.0138, "epoch": 9.15516567022406, "grad_norm": 0.09264976531267166, "learning_rate": 1.8775337890273357e-06, "step": 163850 }, { "embedding_loss": 0.0168, "epoch": 9.157959434542102, "grad_norm": 0.05171775072813034, "learning_rate": 1.8713254238761308e-06, "step": 163900 }, { "embedding_loss": 0.0156, "epoch": 9.160753198860144, "grad_norm": 0.06426435708999634, "learning_rate": 1.8651170587249261e-06, "step": 163950 }, { "embedding_loss": 0.0146, "epoch": 9.163546963178186, "grad_norm": 0.07341618835926056, "learning_rate": 1.8589086935737212e-06, "step": 164000 }, { "embedding_loss": 0.0154, "epoch": 9.166340727496229, "grad_norm": 0.06872574239969254, "learning_rate": 1.8527003284225167e-06, "step": 164050 }, { "embedding_loss": 0.0151, "epoch": 9.16913449181427, "grad_norm": 0.11310024559497833, "learning_rate": 1.8464919632713118e-06, "step": 164100 }, { "embedding_loss": 0.0146, "epoch": 9.171928256132313, "grad_norm": 0.10081680864095688, "learning_rate": 1.8402835981201074e-06, "step": 164150 }, { "embedding_loss": 0.0148, "epoch": 9.174722020450355, "grad_norm": 0.06937085092067719, "learning_rate": 1.8340752329689024e-06, "step": 164200 }, { "embedding_loss": 0.0159, "epoch": 9.177515784768397, "grad_norm": 0.04203878715634346, "learning_rate": 1.8278668678176977e-06, "step": 164250 }, { "embedding_loss": 0.0176, "epoch": 9.18030954908644, "grad_norm": 0.11971700191497803, "learning_rate": 1.8216585026664928e-06, "step": 164300 }, { "embedding_loss": 0.0142, "epoch": 9.183103313404482, "grad_norm": 0.04504963010549545, "learning_rate": 1.8154501375152884e-06, "step": 164350 }, { "embedding_loss": 0.0148, "epoch": 9.185897077722524, "grad_norm": 0.12954439222812653, "learning_rate": 1.8092417723640834e-06, "step": 164400 }, { "embedding_loss": 0.0133, "epoch": 9.188690842040565, "grad_norm": 0.07178053259849548, "learning_rate": 1.8030334072128787e-06, "step": 164450 }, { "embedding_loss": 0.0154, "epoch": 9.191484606358607, "grad_norm": 0.0964643582701683, "learning_rate": 1.796825042061674e-06, "step": 164500 }, { "embedding_loss": 0.0153, "epoch": 9.194278370676649, "grad_norm": 0.04136199876666069, "learning_rate": 1.7906166769104694e-06, "step": 164550 }, { "embedding_loss": 0.0154, "epoch": 9.197072134994691, "grad_norm": 0.09591491520404816, "learning_rate": 1.7844083117592644e-06, "step": 164600 }, { "embedding_loss": 0.0149, "epoch": 9.199865899312734, "grad_norm": 0.04187457263469696, "learning_rate": 1.77819994660806e-06, "step": 164650 }, { "embedding_loss": 0.015, "epoch": 9.202659663630776, "grad_norm": 0.041926346719264984, "learning_rate": 1.771991581456855e-06, "step": 164700 }, { "embedding_loss": 0.015, "epoch": 9.205453427948818, "grad_norm": 0.0790950134396553, "learning_rate": 1.7657832163056503e-06, "step": 164750 }, { "embedding_loss": 0.0149, "epoch": 9.20824719226686, "grad_norm": 0.038461022078990936, "learning_rate": 1.7595748511544454e-06, "step": 164800 }, { "embedding_loss": 0.0157, "epoch": 9.211040956584903, "grad_norm": 0.05523988604545593, "learning_rate": 1.753366486003241e-06, "step": 164850 }, { "embedding_loss": 0.0163, "epoch": 9.213834720902945, "grad_norm": 0.050267063081264496, "learning_rate": 1.747158120852036e-06, "step": 164900 }, { "embedding_loss": 0.0152, "epoch": 9.216628485220987, "grad_norm": 0.04718694090843201, "learning_rate": 1.7409497557008316e-06, "step": 164950 }, { "embedding_loss": 0.0144, "epoch": 9.21942224953903, "grad_norm": 0.049239736050367355, "learning_rate": 1.7347413905496267e-06, "step": 165000 }, { "embedding_loss": 0.0142, "epoch": 9.222216013857071, "grad_norm": 0.05572182312607765, "learning_rate": 1.728533025398422e-06, "step": 165050 }, { "embedding_loss": 0.0142, "epoch": 9.225009778175114, "grad_norm": 0.05549968034029007, "learning_rate": 1.722324660247217e-06, "step": 165100 }, { "embedding_loss": 0.0158, "epoch": 9.227803542493156, "grad_norm": 0.04809502512216568, "learning_rate": 1.7161162950960126e-06, "step": 165150 }, { "embedding_loss": 0.0153, "epoch": 9.230597306811198, "grad_norm": 0.041069213300943375, "learning_rate": 1.7099079299448076e-06, "step": 165200 }, { "embedding_loss": 0.0154, "epoch": 9.23339107112924, "grad_norm": 0.0993303507566452, "learning_rate": 1.703699564793603e-06, "step": 165250 }, { "embedding_loss": 0.0152, "epoch": 9.23618483544728, "grad_norm": 0.05832532420754433, "learning_rate": 1.6974911996423983e-06, "step": 165300 }, { "embedding_loss": 0.0154, "epoch": 9.238978599765323, "grad_norm": 0.07033936679363251, "learning_rate": 1.6912828344911936e-06, "step": 165350 }, { "embedding_loss": 0.0162, "epoch": 9.241772364083365, "grad_norm": 0.04890349134802818, "learning_rate": 1.685074469339989e-06, "step": 165400 }, { "embedding_loss": 0.0144, "epoch": 9.244566128401408, "grad_norm": 0.035638175904750824, "learning_rate": 1.6788661041887842e-06, "step": 165450 }, { "embedding_loss": 0.0138, "epoch": 9.24735989271945, "grad_norm": 0.11260495334863663, "learning_rate": 1.6726577390375795e-06, "step": 165500 }, { "embedding_loss": 0.0166, "epoch": 9.250153657037492, "grad_norm": 0.057898588478565216, "learning_rate": 1.6664493738863746e-06, "step": 165550 }, { "embedding_loss": 0.0152, "epoch": 9.252947421355534, "grad_norm": 0.0392274372279644, "learning_rate": 1.66024100873517e-06, "step": 165600 }, { "embedding_loss": 0.0151, "epoch": 9.255741185673577, "grad_norm": 0.06095728650689125, "learning_rate": 1.6540326435839652e-06, "step": 165650 }, { "embedding_loss": 0.0157, "epoch": 9.258534949991619, "grad_norm": 0.0661395937204361, "learning_rate": 1.6478242784327605e-06, "step": 165700 }, { "embedding_loss": 0.0166, "epoch": 9.261328714309661, "grad_norm": 0.084013931453228, "learning_rate": 1.6416159132815558e-06, "step": 165750 }, { "embedding_loss": 0.0166, "epoch": 9.264122478627703, "grad_norm": 0.060134291648864746, "learning_rate": 1.635407548130351e-06, "step": 165800 }, { "embedding_loss": 0.0156, "epoch": 9.266916242945745, "grad_norm": 0.049006711691617966, "learning_rate": 1.6291991829791462e-06, "step": 165850 }, { "embedding_loss": 0.0155, "epoch": 9.269710007263788, "grad_norm": 0.053068093955516815, "learning_rate": 1.6229908178279417e-06, "step": 165900 }, { "embedding_loss": 0.0152, "epoch": 9.27250377158183, "grad_norm": 0.06202986091375351, "learning_rate": 1.6167824526767368e-06, "step": 165950 }, { "embedding_loss": 0.0151, "epoch": 9.275297535899872, "grad_norm": 0.045536793768405914, "learning_rate": 1.610574087525532e-06, "step": 166000 }, { "embedding_loss": 0.0162, "epoch": 9.278091300217914, "grad_norm": 0.12845316529273987, "learning_rate": 1.6043657223743272e-06, "step": 166050 }, { "embedding_loss": 0.0167, "epoch": 9.280885064535955, "grad_norm": 0.08535853773355484, "learning_rate": 1.5981573572231227e-06, "step": 166100 }, { "embedding_loss": 0.0157, "epoch": 9.283678828853997, "grad_norm": 0.03720531612634659, "learning_rate": 1.5919489920719178e-06, "step": 166150 }, { "embedding_loss": 0.0155, "epoch": 9.28647259317204, "grad_norm": 0.05126653239130974, "learning_rate": 1.5857406269207133e-06, "step": 166200 }, { "embedding_loss": 0.0167, "epoch": 9.289266357490082, "grad_norm": 0.058326032012701035, "learning_rate": 1.5795322617695084e-06, "step": 166250 }, { "embedding_loss": 0.0151, "epoch": 9.292060121808124, "grad_norm": 0.0486987940967083, "learning_rate": 1.5733238966183037e-06, "step": 166300 }, { "embedding_loss": 0.0156, "epoch": 9.294853886126166, "grad_norm": 0.045983318239450455, "learning_rate": 1.5671155314670988e-06, "step": 166350 }, { "embedding_loss": 0.0132, "epoch": 9.297647650444208, "grad_norm": 0.024560825899243355, "learning_rate": 1.5609071663158943e-06, "step": 166400 }, { "embedding_loss": 0.0149, "epoch": 9.30044141476225, "grad_norm": 0.0722869262099266, "learning_rate": 1.5546988011646894e-06, "step": 166450 }, { "embedding_loss": 0.0162, "epoch": 9.303235179080293, "grad_norm": 0.08157627284526825, "learning_rate": 1.5484904360134847e-06, "step": 166500 }, { "embedding_loss": 0.0145, "epoch": 9.306028943398335, "grad_norm": 0.026283349841833115, "learning_rate": 1.54228207086228e-06, "step": 166550 }, { "embedding_loss": 0.0154, "epoch": 9.308822707716377, "grad_norm": 0.05732161924242973, "learning_rate": 1.5360737057110753e-06, "step": 166600 }, { "embedding_loss": 0.0148, "epoch": 9.31161647203442, "grad_norm": 0.07738050818443298, "learning_rate": 1.5298653405598704e-06, "step": 166650 }, { "embedding_loss": 0.017, "epoch": 9.314410236352462, "grad_norm": 0.09288697689771652, "learning_rate": 1.5236569754086659e-06, "step": 166700 }, { "embedding_loss": 0.0175, "epoch": 9.317204000670504, "grad_norm": 0.05944402888417244, "learning_rate": 1.517448610257461e-06, "step": 166750 }, { "embedding_loss": 0.0154, "epoch": 9.319997764988546, "grad_norm": 0.044444091618061066, "learning_rate": 1.5112402451062563e-06, "step": 166800 }, { "embedding_loss": 0.0159, "epoch": 9.322791529306588, "grad_norm": 0.06004597246646881, "learning_rate": 1.5050318799550514e-06, "step": 166850 }, { "embedding_loss": 0.0168, "epoch": 9.32558529362463, "grad_norm": 0.05414561927318573, "learning_rate": 1.4988235148038469e-06, "step": 166900 }, { "embedding_loss": 0.015, "epoch": 9.328379057942673, "grad_norm": 0.06842868775129318, "learning_rate": 1.492615149652642e-06, "step": 166950 }, { "embedding_loss": 0.0155, "epoch": 9.331172822260713, "grad_norm": 0.04556742683053017, "learning_rate": 1.4864067845014375e-06, "step": 167000 }, { "embedding_loss": 0.0156, "epoch": 9.333966586578756, "grad_norm": 0.05623120814561844, "learning_rate": 1.4801984193502326e-06, "step": 167050 }, { "embedding_loss": 0.0151, "epoch": 9.336760350896798, "grad_norm": 0.026222139596939087, "learning_rate": 1.4739900541990279e-06, "step": 167100 }, { "embedding_loss": 0.0146, "epoch": 9.33955411521484, "grad_norm": 0.03921249508857727, "learning_rate": 1.467781689047823e-06, "step": 167150 }, { "embedding_loss": 0.015, "epoch": 9.342347879532882, "grad_norm": 0.06639277935028076, "learning_rate": 1.4615733238966185e-06, "step": 167200 }, { "embedding_loss": 0.0155, "epoch": 9.345141643850924, "grad_norm": 0.04248809069395065, "learning_rate": 1.4553649587454136e-06, "step": 167250 }, { "embedding_loss": 0.0149, "epoch": 9.347935408168967, "grad_norm": 0.041203051805496216, "learning_rate": 1.4491565935942089e-06, "step": 167300 }, { "embedding_loss": 0.0173, "epoch": 9.350729172487009, "grad_norm": 0.06416883319616318, "learning_rate": 1.4429482284430042e-06, "step": 167350 }, { "embedding_loss": 0.0154, "epoch": 9.353522936805051, "grad_norm": 0.0616183765232563, "learning_rate": 1.4367398632917995e-06, "step": 167400 }, { "embedding_loss": 0.0161, "epoch": 9.356316701123093, "grad_norm": 0.05286962911486626, "learning_rate": 1.4305314981405948e-06, "step": 167450 }, { "embedding_loss": 0.0164, "epoch": 9.359110465441136, "grad_norm": 0.05711276829242706, "learning_rate": 1.42432313298939e-06, "step": 167500 }, { "embedding_loss": 0.0144, "epoch": 9.361904229759178, "grad_norm": 0.040416523814201355, "learning_rate": 1.4181147678381854e-06, "step": 167550 }, { "embedding_loss": 0.0159, "epoch": 9.36469799407722, "grad_norm": 0.08377740532159805, "learning_rate": 1.4119064026869805e-06, "step": 167600 }, { "embedding_loss": 0.015, "epoch": 9.367491758395262, "grad_norm": 0.05509188771247864, "learning_rate": 1.405698037535776e-06, "step": 167650 }, { "embedding_loss": 0.0167, "epoch": 9.370285522713305, "grad_norm": 0.03316820412874222, "learning_rate": 1.399489672384571e-06, "step": 167700 }, { "embedding_loss": 0.0163, "epoch": 9.373079287031347, "grad_norm": 0.03886551037430763, "learning_rate": 1.3932813072333664e-06, "step": 167750 }, { "embedding_loss": 0.0158, "epoch": 9.375873051349387, "grad_norm": 0.10700199007987976, "learning_rate": 1.3870729420821617e-06, "step": 167800 }, { "embedding_loss": 0.016, "epoch": 9.37866681566743, "grad_norm": 0.045231953263282776, "learning_rate": 1.380864576930957e-06, "step": 167850 }, { "embedding_loss": 0.0155, "epoch": 9.381460579985472, "grad_norm": 0.06922867149114609, "learning_rate": 1.374656211779752e-06, "step": 167900 }, { "embedding_loss": 0.0174, "epoch": 9.384254344303514, "grad_norm": 0.03936642035841942, "learning_rate": 1.3684478466285476e-06, "step": 167950 }, { "embedding_loss": 0.0162, "epoch": 9.387048108621556, "grad_norm": 0.09735462069511414, "learning_rate": 1.3622394814773427e-06, "step": 168000 }, { "embedding_loss": 0.0163, "epoch": 9.389841872939598, "grad_norm": 0.05434006452560425, "learning_rate": 1.356031116326138e-06, "step": 168050 }, { "embedding_loss": 0.0146, "epoch": 9.39263563725764, "grad_norm": 0.07327135652303696, "learning_rate": 1.349822751174933e-06, "step": 168100 }, { "embedding_loss": 0.0162, "epoch": 9.395429401575683, "grad_norm": 0.07481716573238373, "learning_rate": 1.3436143860237286e-06, "step": 168150 }, { "embedding_loss": 0.0164, "epoch": 9.398223165893725, "grad_norm": 0.06422816962003708, "learning_rate": 1.3374060208725237e-06, "step": 168200 }, { "embedding_loss": 0.0159, "epoch": 9.401016930211767, "grad_norm": 0.07814431935548782, "learning_rate": 1.331197655721319e-06, "step": 168250 }, { "embedding_loss": 0.0163, "epoch": 9.40381069452981, "grad_norm": 0.1068698912858963, "learning_rate": 1.3249892905701143e-06, "step": 168300 }, { "embedding_loss": 0.0161, "epoch": 9.406604458847852, "grad_norm": 0.026954196393489838, "learning_rate": 1.3187809254189096e-06, "step": 168350 }, { "embedding_loss": 0.0153, "epoch": 9.409398223165894, "grad_norm": 0.06634292751550674, "learning_rate": 1.3125725602677047e-06, "step": 168400 }, { "embedding_loss": 0.0149, "epoch": 9.412191987483936, "grad_norm": 0.030197761952877045, "learning_rate": 1.3063641951165002e-06, "step": 168450 }, { "embedding_loss": 0.0181, "epoch": 9.414985751801979, "grad_norm": 0.05532224848866463, "learning_rate": 1.3001558299652953e-06, "step": 168500 }, { "embedding_loss": 0.0152, "epoch": 9.41777951612002, "grad_norm": 0.06693398952484131, "learning_rate": 1.2939474648140906e-06, "step": 168550 }, { "embedding_loss": 0.0165, "epoch": 9.420573280438063, "grad_norm": 0.0787341520190239, "learning_rate": 1.287739099662886e-06, "step": 168600 }, { "embedding_loss": 0.0169, "epoch": 9.423367044756104, "grad_norm": 0.1035710871219635, "learning_rate": 1.2815307345116812e-06, "step": 168650 }, { "embedding_loss": 0.0149, "epoch": 9.426160809074146, "grad_norm": 0.05119935795664787, "learning_rate": 1.2753223693604763e-06, "step": 168700 }, { "embedding_loss": 0.0155, "epoch": 9.428954573392188, "grad_norm": 0.07760409265756607, "learning_rate": 1.2691140042092718e-06, "step": 168750 }, { "embedding_loss": 0.0145, "epoch": 9.43174833771023, "grad_norm": 0.041758064180612564, "learning_rate": 1.262905639058067e-06, "step": 168800 }, { "embedding_loss": 0.0151, "epoch": 9.434542102028272, "grad_norm": 0.07390302419662476, "learning_rate": 1.2566972739068622e-06, "step": 168850 }, { "embedding_loss": 0.0147, "epoch": 9.437335866346315, "grad_norm": 0.060873761773109436, "learning_rate": 1.2504889087556573e-06, "step": 168900 }, { "embedding_loss": 0.0157, "epoch": 9.440129630664357, "grad_norm": 0.057726603001356125, "learning_rate": 1.2442805436044528e-06, "step": 168950 }, { "embedding_loss": 0.0166, "epoch": 9.4429233949824, "grad_norm": 0.05388617888092995, "learning_rate": 1.238072178453248e-06, "step": 169000 }, { "embedding_loss": 0.0139, "epoch": 9.445717159300441, "grad_norm": 0.04536530748009682, "learning_rate": 1.2318638133020432e-06, "step": 169050 }, { "embedding_loss": 0.0149, "epoch": 9.448510923618484, "grad_norm": 0.06704512238502502, "learning_rate": 1.2256554481508385e-06, "step": 169100 }, { "embedding_loss": 0.0151, "epoch": 9.451304687936526, "grad_norm": 0.10795387625694275, "learning_rate": 1.2194470829996338e-06, "step": 169150 }, { "embedding_loss": 0.0151, "epoch": 9.454098452254568, "grad_norm": 0.05211743339896202, "learning_rate": 1.213238717848429e-06, "step": 169200 }, { "embedding_loss": 0.0149, "epoch": 9.45689221657261, "grad_norm": 0.1304827332496643, "learning_rate": 1.2070303526972244e-06, "step": 169250 }, { "embedding_loss": 0.0157, "epoch": 9.459685980890653, "grad_norm": 0.05449260026216507, "learning_rate": 1.2008219875460197e-06, "step": 169300 }, { "embedding_loss": 0.0163, "epoch": 9.462479745208695, "grad_norm": 0.06524305045604706, "learning_rate": 1.1946136223948148e-06, "step": 169350 }, { "embedding_loss": 0.0155, "epoch": 9.465273509526737, "grad_norm": 0.0559932217001915, "learning_rate": 1.18840525724361e-06, "step": 169400 }, { "embedding_loss": 0.0145, "epoch": 9.46806727384478, "grad_norm": 0.0782332494854927, "learning_rate": 1.1821968920924054e-06, "step": 169450 }, { "embedding_loss": 0.0148, "epoch": 9.47086103816282, "grad_norm": 0.05917515605688095, "learning_rate": 1.1759885269412007e-06, "step": 169500 }, { "embedding_loss": 0.0144, "epoch": 9.473654802480862, "grad_norm": 0.11685115844011307, "learning_rate": 1.169780161789996e-06, "step": 169550 }, { "embedding_loss": 0.0151, "epoch": 9.476448566798904, "grad_norm": 0.04868616536259651, "learning_rate": 1.163571796638791e-06, "step": 169600 }, { "embedding_loss": 0.0159, "epoch": 9.479242331116946, "grad_norm": 0.03021172434091568, "learning_rate": 1.1573634314875864e-06, "step": 169650 }, { "embedding_loss": 0.0153, "epoch": 9.482036095434989, "grad_norm": 0.057947319000959396, "learning_rate": 1.1511550663363817e-06, "step": 169700 }, { "embedding_loss": 0.0145, "epoch": 9.484829859753031, "grad_norm": 0.049395643174648285, "learning_rate": 1.144946701185177e-06, "step": 169750 }, { "embedding_loss": 0.0161, "epoch": 9.487623624071073, "grad_norm": 0.02916993573307991, "learning_rate": 1.1387383360339723e-06, "step": 169800 }, { "embedding_loss": 0.0162, "epoch": 9.490417388389115, "grad_norm": 0.07885372638702393, "learning_rate": 1.1325299708827674e-06, "step": 169850 }, { "embedding_loss": 0.0159, "epoch": 9.493211152707158, "grad_norm": 0.055373597890138626, "learning_rate": 1.1263216057315627e-06, "step": 169900 }, { "embedding_loss": 0.0153, "epoch": 9.4960049170252, "grad_norm": 0.04432480409741402, "learning_rate": 1.120113240580358e-06, "step": 169950 }, { "embedding_loss": 0.0151, "epoch": 9.498798681343242, "grad_norm": 0.03774315491318703, "learning_rate": 1.1139048754291533e-06, "step": 170000 }, { "embedding_loss": 0.016, "epoch": 9.501592445661284, "grad_norm": 0.054884810000658035, "learning_rate": 1.1076965102779486e-06, "step": 170050 }, { "embedding_loss": 0.0147, "epoch": 9.504386209979327, "grad_norm": 0.0441933274269104, "learning_rate": 1.101488145126744e-06, "step": 170100 }, { "embedding_loss": 0.0177, "epoch": 9.507179974297369, "grad_norm": 0.06400329619646072, "learning_rate": 1.095279779975539e-06, "step": 170150 }, { "embedding_loss": 0.0146, "epoch": 9.509973738615411, "grad_norm": 0.11967655271291733, "learning_rate": 1.0890714148243343e-06, "step": 170200 }, { "embedding_loss": 0.0166, "epoch": 9.512767502933453, "grad_norm": 0.0738925039768219, "learning_rate": 1.0828630496731296e-06, "step": 170250 }, { "embedding_loss": 0.0157, "epoch": 9.515561267251496, "grad_norm": 0.0929449275135994, "learning_rate": 1.076654684521925e-06, "step": 170300 }, { "embedding_loss": 0.0142, "epoch": 9.518355031569536, "grad_norm": 0.060912493616342545, "learning_rate": 1.0704463193707202e-06, "step": 170350 }, { "embedding_loss": 0.0164, "epoch": 9.521148795887578, "grad_norm": 0.05678689107298851, "learning_rate": 1.0642379542195153e-06, "step": 170400 }, { "embedding_loss": 0.0148, "epoch": 9.52394256020562, "grad_norm": 0.0876108705997467, "learning_rate": 1.0580295890683108e-06, "step": 170450 }, { "embedding_loss": 0.0155, "epoch": 9.526736324523663, "grad_norm": 0.1202625259757042, "learning_rate": 1.0518212239171061e-06, "step": 170500 }, { "embedding_loss": 0.0144, "epoch": 9.529530088841705, "grad_norm": 0.03313468396663666, "learning_rate": 1.0456128587659014e-06, "step": 170550 }, { "embedding_loss": 0.0145, "epoch": 9.532323853159747, "grad_norm": 0.0683431401848793, "learning_rate": 1.0394044936146965e-06, "step": 170600 }, { "embedding_loss": 0.0171, "epoch": 9.53511761747779, "grad_norm": 0.1346394568681717, "learning_rate": 1.0331961284634918e-06, "step": 170650 }, { "embedding_loss": 0.0158, "epoch": 9.537911381795832, "grad_norm": 0.06161856651306152, "learning_rate": 1.0269877633122871e-06, "step": 170700 }, { "embedding_loss": 0.0147, "epoch": 9.540705146113874, "grad_norm": 0.043467454612255096, "learning_rate": 1.0207793981610824e-06, "step": 170750 }, { "embedding_loss": 0.014, "epoch": 9.543498910431916, "grad_norm": 0.07706349343061447, "learning_rate": 1.0145710330098777e-06, "step": 170800 }, { "embedding_loss": 0.0166, "epoch": 9.546292674749958, "grad_norm": 0.09053578227758408, "learning_rate": 1.0083626678586728e-06, "step": 170850 }, { "embedding_loss": 0.0138, "epoch": 9.549086439068, "grad_norm": 0.061312235891819, "learning_rate": 1.0021543027074681e-06, "step": 170900 }, { "embedding_loss": 0.015, "epoch": 9.551880203386043, "grad_norm": 0.06020141765475273, "learning_rate": 9.959459375562634e-07, "step": 170950 }, { "embedding_loss": 0.0155, "epoch": 9.554673967704085, "grad_norm": 0.056792739778757095, "learning_rate": 9.897375724050587e-07, "step": 171000 }, { "embedding_loss": 0.0159, "epoch": 9.557467732022127, "grad_norm": 0.03778080269694328, "learning_rate": 9.83529207253854e-07, "step": 171050 }, { "embedding_loss": 0.0139, "epoch": 9.56026149634017, "grad_norm": 0.05465861037373543, "learning_rate": 9.773208421026491e-07, "step": 171100 }, { "embedding_loss": 0.0136, "epoch": 9.56305526065821, "grad_norm": 0.06093434989452362, "learning_rate": 9.711124769514444e-07, "step": 171150 }, { "embedding_loss": 0.0147, "epoch": 9.565849024976252, "grad_norm": 0.09028757363557816, "learning_rate": 9.649041118002397e-07, "step": 171200 }, { "embedding_loss": 0.0157, "epoch": 9.568642789294294, "grad_norm": 0.0430658683180809, "learning_rate": 9.58695746649035e-07, "step": 171250 }, { "embedding_loss": 0.0135, "epoch": 9.571436553612337, "grad_norm": 0.06472203880548477, "learning_rate": 9.524873814978302e-07, "step": 171300 }, { "embedding_loss": 0.0151, "epoch": 9.574230317930379, "grad_norm": 0.030231958255171776, "learning_rate": 9.462790163466255e-07, "step": 171350 }, { "embedding_loss": 0.0166, "epoch": 9.577024082248421, "grad_norm": 0.03969379886984825, "learning_rate": 9.400706511954208e-07, "step": 171400 }, { "embedding_loss": 0.0153, "epoch": 9.579817846566463, "grad_norm": 0.06954512745141983, "learning_rate": 9.33862286044216e-07, "step": 171450 }, { "embedding_loss": 0.0153, "epoch": 9.582611610884506, "grad_norm": 0.06431099772453308, "learning_rate": 9.276539208930113e-07, "step": 171500 }, { "embedding_loss": 0.0164, "epoch": 9.585405375202548, "grad_norm": 0.03209669142961502, "learning_rate": 9.214455557418066e-07, "step": 171550 }, { "embedding_loss": 0.0138, "epoch": 9.58819913952059, "grad_norm": 0.04812703654170036, "learning_rate": 9.152371905906018e-07, "step": 171600 }, { "embedding_loss": 0.0139, "epoch": 9.590992903838632, "grad_norm": 0.05922723561525345, "learning_rate": 9.090288254393971e-07, "step": 171650 }, { "embedding_loss": 0.015, "epoch": 9.593786668156675, "grad_norm": 0.0755658745765686, "learning_rate": 9.028204602881923e-07, "step": 171700 }, { "embedding_loss": 0.0153, "epoch": 9.596580432474717, "grad_norm": 0.047429751604795456, "learning_rate": 8.966120951369876e-07, "step": 171750 }, { "embedding_loss": 0.016, "epoch": 9.599374196792759, "grad_norm": 0.039230313152074814, "learning_rate": 8.904037299857829e-07, "step": 171800 }, { "embedding_loss": 0.0158, "epoch": 9.602167961110801, "grad_norm": 0.0499185174703598, "learning_rate": 8.841953648345781e-07, "step": 171850 }, { "embedding_loss": 0.015, "epoch": 9.604961725428844, "grad_norm": 0.056891318410634995, "learning_rate": 8.779869996833734e-07, "step": 171900 }, { "embedding_loss": 0.0159, "epoch": 9.607755489746886, "grad_norm": 0.06430607289075851, "learning_rate": 8.717786345321687e-07, "step": 171950 }, { "embedding_loss": 0.0171, "epoch": 9.610549254064928, "grad_norm": 0.055571068078279495, "learning_rate": 8.655702693809639e-07, "step": 172000 }, { "embedding_loss": 0.0166, "epoch": 9.613343018382968, "grad_norm": 0.04518261179327965, "learning_rate": 8.593619042297592e-07, "step": 172050 }, { "embedding_loss": 0.0141, "epoch": 9.61613678270101, "grad_norm": 0.10038986057043076, "learning_rate": 8.531535390785544e-07, "step": 172100 }, { "embedding_loss": 0.0157, "epoch": 9.618930547019053, "grad_norm": 0.060151729732751846, "learning_rate": 8.469451739273497e-07, "step": 172150 }, { "embedding_loss": 0.0161, "epoch": 9.621724311337095, "grad_norm": 0.08050819486379623, "learning_rate": 8.40736808776145e-07, "step": 172200 }, { "embedding_loss": 0.015, "epoch": 9.624518075655137, "grad_norm": 0.13005058467388153, "learning_rate": 8.345284436249402e-07, "step": 172250 }, { "embedding_loss": 0.0169, "epoch": 9.62731183997318, "grad_norm": 0.04868805408477783, "learning_rate": 8.283200784737355e-07, "step": 172300 }, { "embedding_loss": 0.0168, "epoch": 9.630105604291222, "grad_norm": 0.05622735247015953, "learning_rate": 8.221117133225308e-07, "step": 172350 }, { "embedding_loss": 0.0143, "epoch": 9.632899368609264, "grad_norm": 0.04232439398765564, "learning_rate": 8.15903348171326e-07, "step": 172400 }, { "embedding_loss": 0.0161, "epoch": 9.635693132927306, "grad_norm": 0.07719841599464417, "learning_rate": 8.096949830201213e-07, "step": 172450 }, { "embedding_loss": 0.0158, "epoch": 9.638486897245349, "grad_norm": 0.05674806237220764, "learning_rate": 8.034866178689167e-07, "step": 172500 }, { "embedding_loss": 0.0158, "epoch": 9.64128066156339, "grad_norm": 0.0408230721950531, "learning_rate": 7.972782527177119e-07, "step": 172550 }, { "embedding_loss": 0.0138, "epoch": 9.644074425881433, "grad_norm": 0.07332329452037811, "learning_rate": 7.910698875665072e-07, "step": 172600 }, { "embedding_loss": 0.0138, "epoch": 9.646868190199475, "grad_norm": 0.04048895090818405, "learning_rate": 7.848615224153025e-07, "step": 172650 }, { "embedding_loss": 0.0134, "epoch": 9.649661954517518, "grad_norm": 0.050506412982940674, "learning_rate": 7.786531572640977e-07, "step": 172700 }, { "embedding_loss": 0.0149, "epoch": 9.65245571883556, "grad_norm": 0.04874931275844574, "learning_rate": 7.72444792112893e-07, "step": 172750 }, { "embedding_loss": 0.0146, "epoch": 9.655249483153602, "grad_norm": 0.06552176177501678, "learning_rate": 7.662364269616882e-07, "step": 172800 }, { "embedding_loss": 0.0159, "epoch": 9.658043247471642, "grad_norm": 0.07190211862325668, "learning_rate": 7.600280618104835e-07, "step": 172850 }, { "embedding_loss": 0.0162, "epoch": 9.660837011789685, "grad_norm": 0.05614541098475456, "learning_rate": 7.538196966592788e-07, "step": 172900 }, { "embedding_loss": 0.0172, "epoch": 9.663630776107727, "grad_norm": 0.03671416640281677, "learning_rate": 7.47611331508074e-07, "step": 172950 }, { "embedding_loss": 0.0156, "epoch": 9.66642454042577, "grad_norm": 0.030775878578424454, "learning_rate": 7.414029663568693e-07, "step": 173000 }, { "embedding_loss": 0.0132, "epoch": 9.669218304743811, "grad_norm": 0.06186706945300102, "learning_rate": 7.351946012056647e-07, "step": 173050 }, { "embedding_loss": 0.0156, "epoch": 9.672012069061854, "grad_norm": 0.08452620357275009, "learning_rate": 7.289862360544598e-07, "step": 173100 }, { "embedding_loss": 0.0161, "epoch": 9.674805833379896, "grad_norm": 0.05132485181093216, "learning_rate": 7.227778709032551e-07, "step": 173150 }, { "embedding_loss": 0.0148, "epoch": 9.677599597697938, "grad_norm": 0.06832301616668701, "learning_rate": 7.165695057520503e-07, "step": 173200 }, { "embedding_loss": 0.0148, "epoch": 9.68039336201598, "grad_norm": 0.03394049406051636, "learning_rate": 7.103611406008456e-07, "step": 173250 }, { "embedding_loss": 0.0146, "epoch": 9.683187126334023, "grad_norm": 0.05936712399125099, "learning_rate": 7.04152775449641e-07, "step": 173300 }, { "embedding_loss": 0.0169, "epoch": 9.685980890652065, "grad_norm": 0.057083893567323685, "learning_rate": 6.979444102984361e-07, "step": 173350 }, { "embedding_loss": 0.0164, "epoch": 9.688774654970107, "grad_norm": 0.08525379747152328, "learning_rate": 6.917360451472315e-07, "step": 173400 }, { "embedding_loss": 0.0149, "epoch": 9.69156841928815, "grad_norm": 0.04801617190241814, "learning_rate": 6.855276799960268e-07, "step": 173450 }, { "embedding_loss": 0.0147, "epoch": 9.694362183606192, "grad_norm": 0.06048130616545677, "learning_rate": 6.79319314844822e-07, "step": 173500 }, { "embedding_loss": 0.0162, "epoch": 9.697155947924234, "grad_norm": 0.057179972529411316, "learning_rate": 6.731109496936173e-07, "step": 173550 }, { "embedding_loss": 0.0156, "epoch": 9.699949712242276, "grad_norm": 0.08637413382530212, "learning_rate": 6.669025845424124e-07, "step": 173600 }, { "embedding_loss": 0.0153, "epoch": 9.702743476560318, "grad_norm": 0.0480232909321785, "learning_rate": 6.606942193912078e-07, "step": 173650 }, { "embedding_loss": 0.016, "epoch": 9.705537240878359, "grad_norm": 0.03687522932887077, "learning_rate": 6.544858542400031e-07, "step": 173700 }, { "embedding_loss": 0.0166, "epoch": 9.708331005196401, "grad_norm": 0.06541021913290024, "learning_rate": 6.482774890887983e-07, "step": 173750 }, { "embedding_loss": 0.014, "epoch": 9.711124769514443, "grad_norm": 0.05307541415095329, "learning_rate": 6.420691239375936e-07, "step": 173800 }, { "embedding_loss": 0.016, "epoch": 9.713918533832485, "grad_norm": 0.05334112048149109, "learning_rate": 6.358607587863889e-07, "step": 173850 }, { "embedding_loss": 0.0157, "epoch": 9.716712298150528, "grad_norm": 0.0737810805439949, "learning_rate": 6.296523936351841e-07, "step": 173900 }, { "embedding_loss": 0.0155, "epoch": 9.71950606246857, "grad_norm": 0.05218750238418579, "learning_rate": 6.234440284839794e-07, "step": 173950 }, { "embedding_loss": 0.0165, "epoch": 9.722299826786612, "grad_norm": 0.05030455440282822, "learning_rate": 6.172356633327746e-07, "step": 174000 }, { "embedding_loss": 0.0148, "epoch": 9.725093591104654, "grad_norm": 0.0475066676735878, "learning_rate": 6.1102729818157e-07, "step": 174050 }, { "embedding_loss": 0.0145, "epoch": 9.727887355422697, "grad_norm": 0.08541229367256165, "learning_rate": 6.048189330303652e-07, "step": 174100 }, { "embedding_loss": 0.0152, "epoch": 9.730681119740739, "grad_norm": 0.050043195486068726, "learning_rate": 5.986105678791605e-07, "step": 174150 }, { "embedding_loss": 0.016, "epoch": 9.733474884058781, "grad_norm": 0.061092082411050797, "learning_rate": 5.924022027279558e-07, "step": 174200 }, { "embedding_loss": 0.0157, "epoch": 9.736268648376823, "grad_norm": 0.05706522986292839, "learning_rate": 5.86193837576751e-07, "step": 174250 }, { "embedding_loss": 0.0158, "epoch": 9.739062412694866, "grad_norm": 0.03432847559452057, "learning_rate": 5.799854724255463e-07, "step": 174300 }, { "embedding_loss": 0.0153, "epoch": 9.741856177012908, "grad_norm": 0.05427205190062523, "learning_rate": 5.737771072743415e-07, "step": 174350 }, { "embedding_loss": 0.0178, "epoch": 9.74464994133095, "grad_norm": 0.09904669225215912, "learning_rate": 5.675687421231368e-07, "step": 174400 }, { "embedding_loss": 0.0149, "epoch": 9.747443705648992, "grad_norm": 0.04487961530685425, "learning_rate": 5.613603769719321e-07, "step": 174450 }, { "embedding_loss": 0.0145, "epoch": 9.750237469967033, "grad_norm": 0.04429694265127182, "learning_rate": 5.551520118207273e-07, "step": 174500 }, { "embedding_loss": 0.0162, "epoch": 9.753031234285075, "grad_norm": 0.05616743117570877, "learning_rate": 5.489436466695226e-07, "step": 174550 }, { "embedding_loss": 0.0165, "epoch": 9.755824998603117, "grad_norm": 0.09186428785324097, "learning_rate": 5.427352815183179e-07, "step": 174600 }, { "embedding_loss": 0.0162, "epoch": 9.75861876292116, "grad_norm": 0.043210938572883606, "learning_rate": 5.365269163671131e-07, "step": 174650 }, { "embedding_loss": 0.0174, "epoch": 9.761412527239202, "grad_norm": 0.04739660024642944, "learning_rate": 5.303185512159084e-07, "step": 174700 }, { "embedding_loss": 0.0156, "epoch": 9.764206291557244, "grad_norm": 0.05904650315642357, "learning_rate": 5.241101860647036e-07, "step": 174750 }, { "embedding_loss": 0.0142, "epoch": 9.767000055875286, "grad_norm": 0.03177081421017647, "learning_rate": 5.179018209134989e-07, "step": 174800 }, { "embedding_loss": 0.0151, "epoch": 9.769793820193328, "grad_norm": 0.06185535341501236, "learning_rate": 5.116934557622942e-07, "step": 174850 }, { "embedding_loss": 0.0151, "epoch": 9.77258758451137, "grad_norm": 0.04885827377438545, "learning_rate": 5.054850906110894e-07, "step": 174900 }, { "embedding_loss": 0.0165, "epoch": 9.775381348829413, "grad_norm": 0.041246477514505386, "learning_rate": 4.992767254598847e-07, "step": 174950 }, { "embedding_loss": 0.0157, "epoch": 9.778175113147455, "grad_norm": 0.06119820848107338, "learning_rate": 4.9306836030868e-07, "step": 175000 }, { "embedding_loss": 0.0137, "epoch": 9.780968877465497, "grad_norm": 0.07492721825838089, "learning_rate": 4.868599951574752e-07, "step": 175050 }, { "embedding_loss": 0.0177, "epoch": 9.78376264178354, "grad_norm": 0.03374669700860977, "learning_rate": 4.806516300062705e-07, "step": 175100 }, { "embedding_loss": 0.014, "epoch": 9.786556406101582, "grad_norm": 0.04329746961593628, "learning_rate": 4.7444326485506577e-07, "step": 175150 }, { "embedding_loss": 0.0146, "epoch": 9.789350170419624, "grad_norm": 0.07072066515684128, "learning_rate": 4.68234899703861e-07, "step": 175200 }, { "embedding_loss": 0.0148, "epoch": 9.792143934737666, "grad_norm": 0.07216149568557739, "learning_rate": 4.620265345526563e-07, "step": 175250 }, { "embedding_loss": 0.0143, "epoch": 9.794937699055708, "grad_norm": 0.042551737278699875, "learning_rate": 4.5581816940145157e-07, "step": 175300 }, { "embedding_loss": 0.015, "epoch": 9.79773146337375, "grad_norm": 0.026629043743014336, "learning_rate": 4.496098042502468e-07, "step": 175350 }, { "embedding_loss": 0.0158, "epoch": 9.800525227691791, "grad_norm": 0.04261905327439308, "learning_rate": 4.4340143909904207e-07, "step": 175400 }, { "embedding_loss": 0.0161, "epoch": 9.803318992009833, "grad_norm": 0.06466835737228394, "learning_rate": 4.371930739478374e-07, "step": 175450 }, { "embedding_loss": 0.0149, "epoch": 9.806112756327876, "grad_norm": 0.06306499242782593, "learning_rate": 4.309847087966326e-07, "step": 175500 }, { "embedding_loss": 0.0162, "epoch": 9.808906520645918, "grad_norm": 0.1226004958152771, "learning_rate": 4.247763436454279e-07, "step": 175550 }, { "embedding_loss": 0.0152, "epoch": 9.81170028496396, "grad_norm": 0.056616250425577164, "learning_rate": 4.185679784942231e-07, "step": 175600 }, { "embedding_loss": 0.016, "epoch": 9.814494049282002, "grad_norm": 0.09914407134056091, "learning_rate": 4.1235961334301843e-07, "step": 175650 }, { "embedding_loss": 0.0144, "epoch": 9.817287813600045, "grad_norm": 0.0610104575753212, "learning_rate": 4.061512481918137e-07, "step": 175700 }, { "embedding_loss": 0.0153, "epoch": 9.820081577918087, "grad_norm": 0.061603669077157974, "learning_rate": 3.999428830406089e-07, "step": 175750 }, { "embedding_loss": 0.0164, "epoch": 9.822875342236129, "grad_norm": 0.055407728999853134, "learning_rate": 3.937345178894042e-07, "step": 175800 }, { "embedding_loss": 0.0154, "epoch": 9.825669106554171, "grad_norm": 0.059301335364580154, "learning_rate": 3.875261527381995e-07, "step": 175850 }, { "embedding_loss": 0.0145, "epoch": 9.828462870872213, "grad_norm": 0.02248278073966503, "learning_rate": 3.8131778758699473e-07, "step": 175900 }, { "embedding_loss": 0.0179, "epoch": 9.831256635190256, "grad_norm": 0.06880957633256912, "learning_rate": 3.7510942243579e-07, "step": 175950 }, { "embedding_loss": 0.0164, "epoch": 9.834050399508298, "grad_norm": 0.05740993842482567, "learning_rate": 3.6890105728458523e-07, "step": 176000 }, { "embedding_loss": 0.0167, "epoch": 9.83684416382634, "grad_norm": 0.09515996277332306, "learning_rate": 3.6269269213338053e-07, "step": 176050 }, { "embedding_loss": 0.0163, "epoch": 9.839637928144382, "grad_norm": 0.038796231150627136, "learning_rate": 3.5648432698217583e-07, "step": 176100 }, { "embedding_loss": 0.015, "epoch": 9.842431692462425, "grad_norm": 0.08688762038946152, "learning_rate": 3.5027596183097113e-07, "step": 176150 }, { "embedding_loss": 0.0142, "epoch": 9.845225456780465, "grad_norm": 0.06452853232622147, "learning_rate": 3.440675966797664e-07, "step": 176200 }, { "embedding_loss": 0.0149, "epoch": 9.848019221098507, "grad_norm": 0.06985889375209808, "learning_rate": 3.3785923152856163e-07, "step": 176250 }, { "embedding_loss": 0.0154, "epoch": 9.85081298541655, "grad_norm": 0.030967745929956436, "learning_rate": 3.316508663773569e-07, "step": 176300 }, { "embedding_loss": 0.0157, "epoch": 9.853606749734592, "grad_norm": 0.0648987889289856, "learning_rate": 3.254425012261522e-07, "step": 176350 }, { "embedding_loss": 0.0152, "epoch": 9.856400514052634, "grad_norm": 0.07367976009845734, "learning_rate": 3.1923413607494744e-07, "step": 176400 }, { "embedding_loss": 0.0162, "epoch": 9.859194278370676, "grad_norm": 0.029789457097649574, "learning_rate": 3.130257709237427e-07, "step": 176450 }, { "embedding_loss": 0.0157, "epoch": 9.861988042688719, "grad_norm": 0.06485350430011749, "learning_rate": 3.0681740577253794e-07, "step": 176500 }, { "embedding_loss": 0.0146, "epoch": 9.86478180700676, "grad_norm": 0.05797067657113075, "learning_rate": 3.0060904062133324e-07, "step": 176550 }, { "embedding_loss": 0.0168, "epoch": 9.867575571324803, "grad_norm": 0.055623218417167664, "learning_rate": 2.944006754701285e-07, "step": 176600 }, { "embedding_loss": 0.0153, "epoch": 9.870369335642845, "grad_norm": 0.053547751158475876, "learning_rate": 2.8819231031892374e-07, "step": 176650 }, { "embedding_loss": 0.0152, "epoch": 9.873163099960887, "grad_norm": 0.07015665620565414, "learning_rate": 2.81983945167719e-07, "step": 176700 }, { "embedding_loss": 0.0154, "epoch": 9.87595686427893, "grad_norm": 0.14789582788944244, "learning_rate": 2.757755800165143e-07, "step": 176750 }, { "embedding_loss": 0.0142, "epoch": 9.878750628596972, "grad_norm": 0.04666084796190262, "learning_rate": 2.6956721486530954e-07, "step": 176800 }, { "embedding_loss": 0.0147, "epoch": 9.881544392915014, "grad_norm": 0.04909820109605789, "learning_rate": 2.6335884971410484e-07, "step": 176850 }, { "embedding_loss": 0.015, "epoch": 9.884338157233056, "grad_norm": 0.038890670984983444, "learning_rate": 2.571504845629001e-07, "step": 176900 }, { "embedding_loss": 0.0158, "epoch": 9.887131921551099, "grad_norm": 0.10520564764738083, "learning_rate": 2.5094211941169534e-07, "step": 176950 }, { "embedding_loss": 0.0152, "epoch": 9.889925685869141, "grad_norm": 0.04814615845680237, "learning_rate": 2.447337542604906e-07, "step": 177000 }, { "embedding_loss": 0.0162, "epoch": 9.892719450187183, "grad_norm": 0.03262645751237869, "learning_rate": 2.385253891092859e-07, "step": 177050 }, { "embedding_loss": 0.0145, "epoch": 9.895513214505224, "grad_norm": 0.05738615617156029, "learning_rate": 2.3231702395808114e-07, "step": 177100 }, { "embedding_loss": 0.0144, "epoch": 9.898306978823266, "grad_norm": 0.048036884516477585, "learning_rate": 2.261086588068764e-07, "step": 177150 }, { "embedding_loss": 0.015, "epoch": 9.901100743141308, "grad_norm": 0.055350162088871, "learning_rate": 2.1990029365567167e-07, "step": 177200 }, { "embedding_loss": 0.0176, "epoch": 9.90389450745935, "grad_norm": 0.05247173458337784, "learning_rate": 2.1369192850446692e-07, "step": 177250 }, { "embedding_loss": 0.0145, "epoch": 9.906688271777393, "grad_norm": 0.06052635237574577, "learning_rate": 2.074835633532622e-07, "step": 177300 }, { "embedding_loss": 0.0167, "epoch": 9.909482036095435, "grad_norm": 0.11479943990707397, "learning_rate": 2.0127519820205744e-07, "step": 177350 }, { "embedding_loss": 0.0154, "epoch": 9.912275800413477, "grad_norm": 0.06444258987903595, "learning_rate": 1.9506683305085275e-07, "step": 177400 }, { "embedding_loss": 0.0148, "epoch": 9.91506956473152, "grad_norm": 0.07587450742721558, "learning_rate": 1.8885846789964802e-07, "step": 177450 }, { "embedding_loss": 0.0157, "epoch": 9.917863329049561, "grad_norm": 0.04983720928430557, "learning_rate": 1.8265010274844327e-07, "step": 177500 }, { "embedding_loss": 0.0137, "epoch": 9.920657093367604, "grad_norm": 0.04526406526565552, "learning_rate": 1.7644173759723855e-07, "step": 177550 }, { "embedding_loss": 0.0162, "epoch": 9.923450857685646, "grad_norm": 0.04360136762261391, "learning_rate": 1.702333724460338e-07, "step": 177600 }, { "embedding_loss": 0.0163, "epoch": 9.926244622003688, "grad_norm": 0.03790513426065445, "learning_rate": 1.6402500729482907e-07, "step": 177650 }, { "embedding_loss": 0.0168, "epoch": 9.92903838632173, "grad_norm": 0.08325926959514618, "learning_rate": 1.5781664214362432e-07, "step": 177700 }, { "embedding_loss": 0.0142, "epoch": 9.931832150639773, "grad_norm": 0.035275012254714966, "learning_rate": 1.516082769924196e-07, "step": 177750 }, { "embedding_loss": 0.016, "epoch": 9.934625914957815, "grad_norm": 0.06301518529653549, "learning_rate": 1.4539991184121488e-07, "step": 177800 }, { "embedding_loss": 0.0158, "epoch": 9.937419679275857, "grad_norm": 0.05105563998222351, "learning_rate": 1.3919154669001013e-07, "step": 177850 }, { "embedding_loss": 0.0154, "epoch": 9.940213443593898, "grad_norm": 0.05071847513318062, "learning_rate": 1.329831815388054e-07, "step": 177900 }, { "embedding_loss": 0.0163, "epoch": 9.94300720791194, "grad_norm": 0.028682498261332512, "learning_rate": 1.2677481638760065e-07, "step": 177950 }, { "embedding_loss": 0.016, "epoch": 9.945800972229982, "grad_norm": 0.041561830788850784, "learning_rate": 1.2056645123639593e-07, "step": 178000 }, { "embedding_loss": 0.0142, "epoch": 9.948594736548024, "grad_norm": 0.07538501173257828, "learning_rate": 1.143580860851912e-07, "step": 178050 }, { "embedding_loss": 0.0164, "epoch": 9.951388500866067, "grad_norm": 0.054089102894067764, "learning_rate": 1.0814972093398647e-07, "step": 178100 }, { "embedding_loss": 0.0165, "epoch": 9.954182265184109, "grad_norm": 0.043543897569179535, "learning_rate": 1.0194135578278173e-07, "step": 178150 }, { "embedding_loss": 0.015, "epoch": 9.956976029502151, "grad_norm": 0.041517991572618484, "learning_rate": 9.573299063157699e-08, "step": 178200 }, { "embedding_loss": 0.0148, "epoch": 9.959769793820193, "grad_norm": 0.06052238866686821, "learning_rate": 8.952462548037226e-08, "step": 178250 }, { "embedding_loss": 0.0158, "epoch": 9.962563558138235, "grad_norm": 0.05422796681523323, "learning_rate": 8.331626032916753e-08, "step": 178300 }, { "embedding_loss": 0.0151, "epoch": 9.965357322456278, "grad_norm": 0.039732396602630615, "learning_rate": 7.71078951779628e-08, "step": 178350 }, { "embedding_loss": 0.0163, "epoch": 9.96815108677432, "grad_norm": 0.09346306324005127, "learning_rate": 7.089953002675806e-08, "step": 178400 }, { "embedding_loss": 0.0156, "epoch": 9.970944851092362, "grad_norm": 0.05334315821528435, "learning_rate": 6.469116487555332e-08, "step": 178450 }, { "embedding_loss": 0.0161, "epoch": 9.973738615410404, "grad_norm": 0.053550608456134796, "learning_rate": 5.8482799724348596e-08, "step": 178500 }, { "embedding_loss": 0.0163, "epoch": 9.976532379728447, "grad_norm": 0.04945642501115799, "learning_rate": 5.227443457314386e-08, "step": 178550 }, { "embedding_loss": 0.0143, "epoch": 9.979326144046489, "grad_norm": 0.0427851602435112, "learning_rate": 4.606606942193913e-08, "step": 178600 }, { "embedding_loss": 0.0158, "epoch": 9.982119908364531, "grad_norm": 0.05253991484642029, "learning_rate": 3.985770427073439e-08, "step": 178650 }, { "embedding_loss": 0.0161, "epoch": 9.984913672682573, "grad_norm": 0.09952722489833832, "learning_rate": 3.3649339119529655e-08, "step": 178700 }, { "embedding_loss": 0.0156, "epoch": 9.987707437000614, "grad_norm": 0.048213981091976166, "learning_rate": 2.7440973968324924e-08, "step": 178750 }, { "embedding_loss": 0.0156, "epoch": 9.990501201318656, "grad_norm": 0.07185114175081253, "learning_rate": 2.1232608817120187e-08, "step": 178800 }, { "embedding_loss": 0.0165, "epoch": 9.993294965636698, "grad_norm": 0.042073339223861694, "learning_rate": 1.5024243665915453e-08, "step": 178850 }, { "embedding_loss": 0.015, "epoch": 9.99608872995474, "grad_norm": 0.05939392000436783, "learning_rate": 8.815878514710723e-09, "step": 178900 }, { "embedding_loss": 0.0185, "epoch": 9.998882494272783, "grad_norm": 0.042130157351493835, "learning_rate": 2.607513363505988e-09, "step": 178950 } ], "logging_steps": 50, "max_steps": 178970, "num_input_tokens_seen": 0, "num_train_epochs": 10, "save_steps": 500, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 0.0, "train_batch_size": 32, "trial_name": null, "trial_params": null }