{ "best_global_step": null, "best_metric": null, "best_model_checkpoint": null, "epoch": 5.0, "eval_steps": 500, "global_step": 33850, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "embedding_loss": 0.2247, "epoch": 0.00014771048744460856, "grad_norm": 1.4463119506835938, "learning_rate": 0.0, "step": 1 }, { "embedding_loss": 0.2914, "epoch": 0.007385524372230428, "grad_norm": 1.524917721748352, "learning_rate": 2.8951255539143283e-07, "step": 50 }, { "embedding_loss": 0.2746, "epoch": 0.014771048744460856, "grad_norm": 1.1199702024459839, "learning_rate": 5.849335302806499e-07, "step": 100 }, { "embedding_loss": 0.2579, "epoch": 0.022156573116691284, "grad_norm": 1.1694086790084839, "learning_rate": 8.803545051698672e-07, "step": 150 }, { "embedding_loss": 0.2499, "epoch": 0.029542097488921712, "grad_norm": 0.9039924144744873, "learning_rate": 1.1757754800590842e-06, "step": 200 }, { "embedding_loss": 0.2386, "epoch": 0.03692762186115214, "grad_norm": 1.048995018005371, "learning_rate": 1.4711964549483015e-06, "step": 250 }, { "embedding_loss": 0.2269, "epoch": 0.04431314623338257, "grad_norm": 1.1124966144561768, "learning_rate": 1.7666174298375186e-06, "step": 300 }, { "embedding_loss": 0.2171, "epoch": 0.051698670605613, "grad_norm": 1.0527081489562988, "learning_rate": 2.062038404726736e-06, "step": 350 }, { "embedding_loss": 0.1999, "epoch": 0.059084194977843424, "grad_norm": 1.5473452806472778, "learning_rate": 2.3574593796159526e-06, "step": 400 }, { "embedding_loss": 0.1787, "epoch": 0.06646971935007386, "grad_norm": 1.0279266834259033, "learning_rate": 2.65288035450517e-06, "step": 450 }, { "embedding_loss": 0.1647, "epoch": 0.07385524372230429, "grad_norm": 1.260780930519104, "learning_rate": 2.9483013293943873e-06, "step": 500 }, { "embedding_loss": 0.1581, "epoch": 0.08124076809453472, "grad_norm": 0.8986091017723083, "learning_rate": 3.243722304283604e-06, "step": 550 }, { "embedding_loss": 0.1531, "epoch": 0.08862629246676514, "grad_norm": 1.3418588638305664, "learning_rate": 3.5391432791728215e-06, "step": 600 }, { "embedding_loss": 0.1475, "epoch": 0.09601181683899557, "grad_norm": 0.9869722723960876, "learning_rate": 3.834564254062039e-06, "step": 650 }, { "embedding_loss": 0.1375, "epoch": 0.103397341211226, "grad_norm": 1.1203314065933228, "learning_rate": 4.129985228951256e-06, "step": 700 }, { "embedding_loss": 0.1274, "epoch": 0.11078286558345643, "grad_norm": 0.8669000267982483, "learning_rate": 4.425406203840473e-06, "step": 750 }, { "embedding_loss": 0.1312, "epoch": 0.11816838995568685, "grad_norm": 1.493843913078308, "learning_rate": 4.72082717872969e-06, "step": 800 }, { "embedding_loss": 0.1228, "epoch": 0.1255539143279173, "grad_norm": 0.9798605442047119, "learning_rate": 5.0162481536189075e-06, "step": 850 }, { "embedding_loss": 0.118, "epoch": 0.1329394387001477, "grad_norm": 1.3890421390533447, "learning_rate": 5.311669128508124e-06, "step": 900 }, { "embedding_loss": 0.1117, "epoch": 0.14032496307237813, "grad_norm": 1.3494268655776978, "learning_rate": 5.607090103397341e-06, "step": 950 }, { "embedding_loss": 0.1108, "epoch": 0.14771048744460857, "grad_norm": 0.9772982001304626, "learning_rate": 5.902511078286559e-06, "step": 1000 }, { "embedding_loss": 0.0941, "epoch": 0.155096011816839, "grad_norm": 0.8814387917518616, "learning_rate": 6.197932053175776e-06, "step": 1050 }, { "embedding_loss": 0.0917, "epoch": 0.16248153618906944, "grad_norm": 1.0253106355667114, "learning_rate": 6.4933530280649935e-06, "step": 1100 }, { "embedding_loss": 0.0961, "epoch": 0.16986706056129985, "grad_norm": 0.9646548628807068, "learning_rate": 6.78877400295421e-06, "step": 1150 }, { "embedding_loss": 0.0896, "epoch": 0.17725258493353027, "grad_norm": 0.9323801398277283, "learning_rate": 7.084194977843427e-06, "step": 1200 }, { "embedding_loss": 0.092, "epoch": 0.18463810930576072, "grad_norm": 1.0876870155334473, "learning_rate": 7.379615952732645e-06, "step": 1250 }, { "embedding_loss": 0.0895, "epoch": 0.19202363367799113, "grad_norm": 1.2848527431488037, "learning_rate": 7.675036927621861e-06, "step": 1300 }, { "embedding_loss": 0.0823, "epoch": 0.19940915805022155, "grad_norm": 1.4480323791503906, "learning_rate": 7.970457902511078e-06, "step": 1350 }, { "embedding_loss": 0.0809, "epoch": 0.206794682422452, "grad_norm": 1.5744627714157104, "learning_rate": 8.265878877400296e-06, "step": 1400 }, { "embedding_loss": 0.0766, "epoch": 0.21418020679468242, "grad_norm": 1.5040708780288696, "learning_rate": 8.561299852289513e-06, "step": 1450 }, { "embedding_loss": 0.0733, "epoch": 0.22156573116691286, "grad_norm": 1.0043632984161377, "learning_rate": 8.856720827178731e-06, "step": 1500 }, { "embedding_loss": 0.0778, "epoch": 0.22895125553914328, "grad_norm": 1.5973531007766724, "learning_rate": 9.152141802067948e-06, "step": 1550 }, { "embedding_loss": 0.0715, "epoch": 0.2363367799113737, "grad_norm": 1.5385518074035645, "learning_rate": 9.447562776957165e-06, "step": 1600 }, { "embedding_loss": 0.0701, "epoch": 0.24372230428360414, "grad_norm": 1.069740653038025, "learning_rate": 9.742983751846381e-06, "step": 1650 }, { "embedding_loss": 0.0664, "epoch": 0.2511078286558346, "grad_norm": 0.7774745225906372, "learning_rate": 1.00384047267356e-05, "step": 1700 }, { "embedding_loss": 0.0645, "epoch": 0.258493353028065, "grad_norm": 0.9332543015480042, "learning_rate": 1.0333825701624816e-05, "step": 1750 }, { "embedding_loss": 0.061, "epoch": 0.2658788774002954, "grad_norm": 1.161795973777771, "learning_rate": 1.0629246676514033e-05, "step": 1800 }, { "embedding_loss": 0.0625, "epoch": 0.27326440177252587, "grad_norm": 0.773992657661438, "learning_rate": 1.0924667651403251e-05, "step": 1850 }, { "embedding_loss": 0.054, "epoch": 0.28064992614475626, "grad_norm": 0.8391284346580505, "learning_rate": 1.1220088626292466e-05, "step": 1900 }, { "embedding_loss": 0.0612, "epoch": 0.2880354505169867, "grad_norm": 0.6210933923721313, "learning_rate": 1.1515509601181685e-05, "step": 1950 }, { "embedding_loss": 0.0579, "epoch": 0.29542097488921715, "grad_norm": 0.8674732446670532, "learning_rate": 1.1810930576070903e-05, "step": 2000 }, { "embedding_loss": 0.0566, "epoch": 0.30280649926144754, "grad_norm": 1.716627597808838, "learning_rate": 1.2106351550960118e-05, "step": 2050 }, { "embedding_loss": 0.0495, "epoch": 0.310192023633678, "grad_norm": 1.162758231163025, "learning_rate": 1.2401772525849337e-05, "step": 2100 }, { "embedding_loss": 0.0514, "epoch": 0.3175775480059084, "grad_norm": 1.5488636493682861, "learning_rate": 1.2697193500738553e-05, "step": 2150 }, { "embedding_loss": 0.0478, "epoch": 0.3249630723781389, "grad_norm": 1.5231482982635498, "learning_rate": 1.2992614475627772e-05, "step": 2200 }, { "embedding_loss": 0.0484, "epoch": 0.33234859675036926, "grad_norm": 1.966753602027893, "learning_rate": 1.3288035450516987e-05, "step": 2250 }, { "embedding_loss": 0.0547, "epoch": 0.3397341211225997, "grad_norm": 2.125790596008301, "learning_rate": 1.3583456425406205e-05, "step": 2300 }, { "embedding_loss": 0.0466, "epoch": 0.34711964549483015, "grad_norm": 1.8197243213653564, "learning_rate": 1.3878877400295423e-05, "step": 2350 }, { "embedding_loss": 0.0454, "epoch": 0.35450516986706054, "grad_norm": 0.8179060816764832, "learning_rate": 1.4174298375184638e-05, "step": 2400 }, { "embedding_loss": 0.041, "epoch": 0.361890694239291, "grad_norm": 0.3561592400074005, "learning_rate": 1.4469719350073857e-05, "step": 2450 }, { "embedding_loss": 0.0395, "epoch": 0.36927621861152143, "grad_norm": 0.40876850485801697, "learning_rate": 1.4765140324963074e-05, "step": 2500 }, { "embedding_loss": 0.0398, "epoch": 0.3766617429837518, "grad_norm": 1.050619125366211, "learning_rate": 1.506056129985229e-05, "step": 2550 }, { "embedding_loss": 0.0415, "epoch": 0.38404726735598227, "grad_norm": 0.24330730736255646, "learning_rate": 1.5355982274741507e-05, "step": 2600 }, { "embedding_loss": 0.0367, "epoch": 0.3914327917282127, "grad_norm": 2.6866581439971924, "learning_rate": 1.5651403249630725e-05, "step": 2650 }, { "embedding_loss": 0.0331, "epoch": 0.3988183161004431, "grad_norm": 0.7530401945114136, "learning_rate": 1.594682422451994e-05, "step": 2700 }, { "embedding_loss": 0.0399, "epoch": 0.40620384047267355, "grad_norm": 0.4778743386268616, "learning_rate": 1.624224519940916e-05, "step": 2750 }, { "embedding_loss": 0.0342, "epoch": 0.413589364844904, "grad_norm": 0.8823883533477783, "learning_rate": 1.6537666174298377e-05, "step": 2800 }, { "embedding_loss": 0.0356, "epoch": 0.42097488921713444, "grad_norm": 2.0318665504455566, "learning_rate": 1.6833087149187595e-05, "step": 2850 }, { "embedding_loss": 0.0346, "epoch": 0.42836041358936483, "grad_norm": 0.46766990423202515, "learning_rate": 1.712850812407681e-05, "step": 2900 }, { "embedding_loss": 0.0326, "epoch": 0.4357459379615953, "grad_norm": 1.899274468421936, "learning_rate": 1.742392909896603e-05, "step": 2950 }, { "embedding_loss": 0.0301, "epoch": 0.4431314623338257, "grad_norm": 0.4528331458568573, "learning_rate": 1.7719350073855247e-05, "step": 3000 }, { "embedding_loss": 0.0297, "epoch": 0.4505169867060561, "grad_norm": 1.74443519115448, "learning_rate": 1.8014771048744462e-05, "step": 3050 }, { "embedding_loss": 0.0318, "epoch": 0.45790251107828656, "grad_norm": 0.41255202889442444, "learning_rate": 1.831019202363368e-05, "step": 3100 }, { "embedding_loss": 0.0288, "epoch": 0.465288035450517, "grad_norm": 0.7493127584457397, "learning_rate": 1.8605612998522896e-05, "step": 3150 }, { "embedding_loss": 0.0324, "epoch": 0.4726735598227474, "grad_norm": 0.12168914079666138, "learning_rate": 1.8901033973412114e-05, "step": 3200 }, { "embedding_loss": 0.024, "epoch": 0.48005908419497784, "grad_norm": 1.5052778720855713, "learning_rate": 1.9196454948301332e-05, "step": 3250 }, { "embedding_loss": 0.0299, "epoch": 0.4874446085672083, "grad_norm": 0.22781763970851898, "learning_rate": 1.9491875923190547e-05, "step": 3300 }, { "embedding_loss": 0.0315, "epoch": 0.4948301329394387, "grad_norm": 0.7878602147102356, "learning_rate": 1.9787296898079766e-05, "step": 3350 }, { "embedding_loss": 0.0267, "epoch": 0.5022156573116692, "grad_norm": 0.823674738407135, "learning_rate": 1.999080912522567e-05, "step": 3400 }, { "embedding_loss": 0.0268, "epoch": 0.5096011816838996, "grad_norm": 0.6394932866096497, "learning_rate": 1.99579845724602e-05, "step": 3450 }, { "embedding_loss": 0.0231, "epoch": 0.51698670605613, "grad_norm": 0.6224627494812012, "learning_rate": 1.9925160019694733e-05, "step": 3500 }, { "embedding_loss": 0.0257, "epoch": 0.5243722304283605, "grad_norm": 2.495439291000366, "learning_rate": 1.9892335466929265e-05, "step": 3550 }, { "embedding_loss": 0.023, "epoch": 0.5317577548005908, "grad_norm": 2.96049165725708, "learning_rate": 1.9859510914163794e-05, "step": 3600 }, { "embedding_loss": 0.0222, "epoch": 0.5391432791728212, "grad_norm": 0.24069173634052277, "learning_rate": 1.982668636139833e-05, "step": 3650 }, { "embedding_loss": 0.0244, "epoch": 0.5465288035450517, "grad_norm": 2.0120162963867188, "learning_rate": 1.9793861808632858e-05, "step": 3700 }, { "embedding_loss": 0.0218, "epoch": 0.5539143279172821, "grad_norm": 0.8145495653152466, "learning_rate": 1.976103725586739e-05, "step": 3750 }, { "embedding_loss": 0.0267, "epoch": 0.5612998522895125, "grad_norm": 0.40902212262153625, "learning_rate": 1.9728212703101922e-05, "step": 3800 }, { "embedding_loss": 0.0221, "epoch": 0.568685376661743, "grad_norm": 0.1750016063451767, "learning_rate": 1.9695388150336454e-05, "step": 3850 }, { "embedding_loss": 0.0169, "epoch": 0.5760709010339734, "grad_norm": 0.4834084212779999, "learning_rate": 1.9662563597570986e-05, "step": 3900 }, { "embedding_loss": 0.0203, "epoch": 0.5834564254062038, "grad_norm": 0.18973205983638763, "learning_rate": 1.9629739044805515e-05, "step": 3950 }, { "embedding_loss": 0.0184, "epoch": 0.5908419497784343, "grad_norm": 0.4886401295661926, "learning_rate": 1.9596914492040047e-05, "step": 4000 }, { "embedding_loss": 0.0175, "epoch": 0.5982274741506647, "grad_norm": 0.8744384050369263, "learning_rate": 1.956408993927458e-05, "step": 4050 }, { "embedding_loss": 0.0219, "epoch": 0.6056129985228951, "grad_norm": 1.2341519594192505, "learning_rate": 1.953126538650911e-05, "step": 4100 }, { "embedding_loss": 0.0175, "epoch": 0.6129985228951256, "grad_norm": 0.4706520140171051, "learning_rate": 1.9498440833743643e-05, "step": 4150 }, { "embedding_loss": 0.017, "epoch": 0.620384047267356, "grad_norm": 0.12396424263715744, "learning_rate": 1.946561628097817e-05, "step": 4200 }, { "embedding_loss": 0.0181, "epoch": 0.6277695716395865, "grad_norm": 1.962485909461975, "learning_rate": 1.9432791728212707e-05, "step": 4250 }, { "embedding_loss": 0.0164, "epoch": 0.6351550960118169, "grad_norm": 2.613374948501587, "learning_rate": 1.9399967175447236e-05, "step": 4300 }, { "embedding_loss": 0.0129, "epoch": 0.6425406203840472, "grad_norm": 0.3567068874835968, "learning_rate": 1.9367142622681768e-05, "step": 4350 }, { "embedding_loss": 0.0136, "epoch": 0.6499261447562777, "grad_norm": 1.5771572589874268, "learning_rate": 1.9334318069916296e-05, "step": 4400 }, { "embedding_loss": 0.0169, "epoch": 0.6573116691285081, "grad_norm": 0.31481969356536865, "learning_rate": 1.9301493517150832e-05, "step": 4450 }, { "embedding_loss": 0.0154, "epoch": 0.6646971935007385, "grad_norm": 0.16484883427619934, "learning_rate": 1.926866896438536e-05, "step": 4500 }, { "embedding_loss": 0.0168, "epoch": 0.672082717872969, "grad_norm": 0.279256671667099, "learning_rate": 1.9235844411619893e-05, "step": 4550 }, { "embedding_loss": 0.0158, "epoch": 0.6794682422451994, "grad_norm": 0.2343069612979889, "learning_rate": 1.9203019858854425e-05, "step": 4600 }, { "embedding_loss": 0.0157, "epoch": 0.6868537666174298, "grad_norm": 0.17091761529445648, "learning_rate": 1.9170195306088957e-05, "step": 4650 }, { "embedding_loss": 0.0127, "epoch": 0.6942392909896603, "grad_norm": 1.3237155675888062, "learning_rate": 1.913737075332349e-05, "step": 4700 }, { "embedding_loss": 0.0116, "epoch": 0.7016248153618907, "grad_norm": 0.7258033752441406, "learning_rate": 1.9104546200558017e-05, "step": 4750 }, { "embedding_loss": 0.0134, "epoch": 0.7090103397341211, "grad_norm": 3.0486900806427, "learning_rate": 1.907172164779255e-05, "step": 4800 }, { "embedding_loss": 0.012, "epoch": 0.7163958641063516, "grad_norm": 0.10283143818378448, "learning_rate": 1.903889709502708e-05, "step": 4850 }, { "embedding_loss": 0.0134, "epoch": 0.723781388478582, "grad_norm": 0.3316308259963989, "learning_rate": 1.9006072542261613e-05, "step": 4900 }, { "embedding_loss": 0.0157, "epoch": 0.7311669128508124, "grad_norm": 0.421657919883728, "learning_rate": 1.8973247989496146e-05, "step": 4950 }, { "embedding_loss": 0.0121, "epoch": 0.7385524372230429, "grad_norm": 0.4950125813484192, "learning_rate": 1.8940423436730678e-05, "step": 5000 }, { "embedding_loss": 0.0134, "epoch": 0.7459379615952733, "grad_norm": 0.5293028950691223, "learning_rate": 1.890759888396521e-05, "step": 5050 }, { "embedding_loss": 0.0083, "epoch": 0.7533234859675036, "grad_norm": 2.0652644634246826, "learning_rate": 1.8874774331199738e-05, "step": 5100 }, { "embedding_loss": 0.0122, "epoch": 0.7607090103397341, "grad_norm": 1.9949322938919067, "learning_rate": 1.884194977843427e-05, "step": 5150 }, { "embedding_loss": 0.0104, "epoch": 0.7680945347119645, "grad_norm": 0.07039645314216614, "learning_rate": 1.8809125225668802e-05, "step": 5200 }, { "embedding_loss": 0.0061, "epoch": 0.7754800590841949, "grad_norm": 0.07697559893131256, "learning_rate": 1.8776300672903334e-05, "step": 5250 }, { "embedding_loss": 0.0107, "epoch": 0.7828655834564254, "grad_norm": 0.05644530802965164, "learning_rate": 1.8743476120137863e-05, "step": 5300 }, { "embedding_loss": 0.0093, "epoch": 0.7902511078286558, "grad_norm": 0.34979447722435, "learning_rate": 1.8710651567372395e-05, "step": 5350 }, { "embedding_loss": 0.012, "epoch": 0.7976366322008862, "grad_norm": 0.06782261282205582, "learning_rate": 1.8677827014606927e-05, "step": 5400 }, { "embedding_loss": 0.0119, "epoch": 0.8050221565731167, "grad_norm": 0.11144471168518066, "learning_rate": 1.864500246184146e-05, "step": 5450 }, { "embedding_loss": 0.0114, "epoch": 0.8124076809453471, "grad_norm": 0.2110595852136612, "learning_rate": 1.861217790907599e-05, "step": 5500 }, { "embedding_loss": 0.0133, "epoch": 0.8197932053175776, "grad_norm": 0.49429744482040405, "learning_rate": 1.857935335631052e-05, "step": 5550 }, { "embedding_loss": 0.0087, "epoch": 0.827178729689808, "grad_norm": 0.07333461195230484, "learning_rate": 1.8546528803545055e-05, "step": 5600 }, { "embedding_loss": 0.008, "epoch": 0.8345642540620384, "grad_norm": 0.11741068214178085, "learning_rate": 1.8513704250779584e-05, "step": 5650 }, { "embedding_loss": 0.0058, "epoch": 0.8419497784342689, "grad_norm": 0.12451150268316269, "learning_rate": 1.8480879698014116e-05, "step": 5700 }, { "embedding_loss": 0.0098, "epoch": 0.8493353028064993, "grad_norm": 0.04639327526092529, "learning_rate": 1.8448055145248648e-05, "step": 5750 }, { "embedding_loss": 0.0083, "epoch": 0.8567208271787297, "grad_norm": 0.06967220455408096, "learning_rate": 1.841523059248318e-05, "step": 5800 }, { "embedding_loss": 0.0127, "epoch": 0.8641063515509602, "grad_norm": 1.032842755317688, "learning_rate": 1.8382406039717712e-05, "step": 5850 }, { "embedding_loss": 0.0119, "epoch": 0.8714918759231906, "grad_norm": 0.0814921110868454, "learning_rate": 1.834958148695224e-05, "step": 5900 }, { "embedding_loss": 0.0117, "epoch": 0.8788774002954209, "grad_norm": 3.7965452671051025, "learning_rate": 1.8316756934186773e-05, "step": 5950 }, { "embedding_loss": 0.0107, "epoch": 0.8862629246676514, "grad_norm": 0.13023847341537476, "learning_rate": 1.8283932381421305e-05, "step": 6000 }, { "embedding_loss": 0.0099, "epoch": 0.8936484490398818, "grad_norm": 0.15792806446552277, "learning_rate": 1.8251107828655837e-05, "step": 6050 }, { "embedding_loss": 0.0129, "epoch": 0.9010339734121122, "grad_norm": 0.06038963794708252, "learning_rate": 1.8218283275890366e-05, "step": 6100 }, { "embedding_loss": 0.0111, "epoch": 0.9084194977843427, "grad_norm": 0.26467612385749817, "learning_rate": 1.8185458723124898e-05, "step": 6150 }, { "embedding_loss": 0.0099, "epoch": 0.9158050221565731, "grad_norm": 0.354390025138855, "learning_rate": 1.815263417035943e-05, "step": 6200 }, { "embedding_loss": 0.0101, "epoch": 0.9231905465288035, "grad_norm": 1.5564332008361816, "learning_rate": 1.8119809617593962e-05, "step": 6250 }, { "embedding_loss": 0.0123, "epoch": 0.930576070901034, "grad_norm": 0.12284110486507416, "learning_rate": 1.8086985064828494e-05, "step": 6300 }, { "embedding_loss": 0.0055, "epoch": 0.9379615952732644, "grad_norm": 0.15565811097621918, "learning_rate": 1.8054160512063022e-05, "step": 6350 }, { "embedding_loss": 0.0105, "epoch": 0.9453471196454948, "grad_norm": 0.12946315109729767, "learning_rate": 1.8021335959297558e-05, "step": 6400 }, { "embedding_loss": 0.0071, "epoch": 0.9527326440177253, "grad_norm": 0.4842424690723419, "learning_rate": 1.7988511406532087e-05, "step": 6450 }, { "embedding_loss": 0.0074, "epoch": 0.9601181683899557, "grad_norm": 0.36668410897254944, "learning_rate": 1.795568685376662e-05, "step": 6500 }, { "embedding_loss": 0.007, "epoch": 0.9675036927621861, "grad_norm": 0.1203831359744072, "learning_rate": 1.792286230100115e-05, "step": 6550 }, { "embedding_loss": 0.0095, "epoch": 0.9748892171344166, "grad_norm": 0.046238359063863754, "learning_rate": 1.7890037748235683e-05, "step": 6600 }, { "embedding_loss": 0.0088, "epoch": 0.982274741506647, "grad_norm": 0.1258874386548996, "learning_rate": 1.7857213195470215e-05, "step": 6650 }, { "embedding_loss": 0.0052, "epoch": 0.9896602658788775, "grad_norm": 0.02032575197517872, "learning_rate": 1.7824388642704743e-05, "step": 6700 }, { "embedding_loss": 0.0079, "epoch": 0.9970457902511078, "grad_norm": 0.03921140730381012, "learning_rate": 1.7791564089939275e-05, "step": 6750 }, { "embedding_loss": 0.0069, "epoch": 1.0044313146233383, "grad_norm": 0.012766249477863312, "learning_rate": 1.7758739537173807e-05, "step": 6800 }, { "embedding_loss": 0.0058, "epoch": 1.0118168389955686, "grad_norm": 0.20952889323234558, "learning_rate": 1.772591498440834e-05, "step": 6850 }, { "embedding_loss": 0.0102, "epoch": 1.0192023633677991, "grad_norm": 0.04774490371346474, "learning_rate": 1.7693090431642868e-05, "step": 6900 }, { "embedding_loss": 0.0097, "epoch": 1.0265878877400296, "grad_norm": 0.15566791594028473, "learning_rate": 1.7660265878877404e-05, "step": 6950 }, { "embedding_loss": 0.0095, "epoch": 1.03397341211226, "grad_norm": 0.6467046141624451, "learning_rate": 1.7627441326111932e-05, "step": 7000 }, { "embedding_loss": 0.0082, "epoch": 1.0413589364844904, "grad_norm": 0.35328537225723267, "learning_rate": 1.7594616773346464e-05, "step": 7050 }, { "embedding_loss": 0.0066, "epoch": 1.048744460856721, "grad_norm": 0.3548614978790283, "learning_rate": 1.7561792220580996e-05, "step": 7100 }, { "embedding_loss": 0.009, "epoch": 1.0561299852289512, "grad_norm": 0.6114194393157959, "learning_rate": 1.752896766781553e-05, "step": 7150 }, { "embedding_loss": 0.0062, "epoch": 1.0635155096011817, "grad_norm": 0.7183836698532104, "learning_rate": 1.749614311505006e-05, "step": 7200 }, { "embedding_loss": 0.0082, "epoch": 1.0709010339734122, "grad_norm": 0.41628143191337585, "learning_rate": 1.746331856228459e-05, "step": 7250 }, { "embedding_loss": 0.0083, "epoch": 1.0782865583456425, "grad_norm": 0.22927437722682953, "learning_rate": 1.743049400951912e-05, "step": 7300 }, { "embedding_loss": 0.0089, "epoch": 1.085672082717873, "grad_norm": 0.11581069976091385, "learning_rate": 1.7397669456753653e-05, "step": 7350 }, { "embedding_loss": 0.0088, "epoch": 1.0930576070901035, "grad_norm": 1.374656081199646, "learning_rate": 1.7364844903988185e-05, "step": 7400 }, { "embedding_loss": 0.0075, "epoch": 1.1004431314623337, "grad_norm": 0.25289225578308105, "learning_rate": 1.7332020351222717e-05, "step": 7450 }, { "embedding_loss": 0.005, "epoch": 1.1078286558345642, "grad_norm": 0.034826990216970444, "learning_rate": 1.7299195798457246e-05, "step": 7500 }, { "embedding_loss": 0.0074, "epoch": 1.1152141802067947, "grad_norm": 0.20261834561824799, "learning_rate": 1.726637124569178e-05, "step": 7550 }, { "embedding_loss": 0.0062, "epoch": 1.122599704579025, "grad_norm": 0.01275601889938116, "learning_rate": 1.723354669292631e-05, "step": 7600 }, { "embedding_loss": 0.0062, "epoch": 1.1299852289512555, "grad_norm": 0.036308519542217255, "learning_rate": 1.7200722140160842e-05, "step": 7650 }, { "embedding_loss": 0.0079, "epoch": 1.137370753323486, "grad_norm": 0.05968335270881653, "learning_rate": 1.7167897587395374e-05, "step": 7700 }, { "embedding_loss": 0.0108, "epoch": 1.1447562776957163, "grad_norm": 1.3406931161880493, "learning_rate": 1.7135073034629906e-05, "step": 7750 }, { "embedding_loss": 0.0079, "epoch": 1.1521418020679468, "grad_norm": 0.07372719049453735, "learning_rate": 1.7102248481864435e-05, "step": 7800 }, { "embedding_loss": 0.0083, "epoch": 1.1595273264401773, "grad_norm": 0.38173583149909973, "learning_rate": 1.7069423929098967e-05, "step": 7850 }, { "embedding_loss": 0.0074, "epoch": 1.1669128508124076, "grad_norm": 0.1348145604133606, "learning_rate": 1.70365993763335e-05, "step": 7900 }, { "embedding_loss": 0.0078, "epoch": 1.174298375184638, "grad_norm": 0.0659070536494255, "learning_rate": 1.700377482356803e-05, "step": 7950 }, { "embedding_loss": 0.0057, "epoch": 1.1816838995568686, "grad_norm": 0.017487822100520134, "learning_rate": 1.6970950270802563e-05, "step": 8000 }, { "embedding_loss": 0.0057, "epoch": 1.1890694239290989, "grad_norm": 3.8321328163146973, "learning_rate": 1.693812571803709e-05, "step": 8050 }, { "embedding_loss": 0.005, "epoch": 1.1964549483013294, "grad_norm": 0.04197081923484802, "learning_rate": 1.6905301165271624e-05, "step": 8100 }, { "embedding_loss": 0.0099, "epoch": 1.2038404726735599, "grad_norm": 0.05384385213255882, "learning_rate": 1.6872476612506156e-05, "step": 8150 }, { "embedding_loss": 0.0041, "epoch": 1.2112259970457901, "grad_norm": 0.027099648490548134, "learning_rate": 1.6839652059740688e-05, "step": 8200 }, { "embedding_loss": 0.0095, "epoch": 1.2186115214180206, "grad_norm": 0.02560454048216343, "learning_rate": 1.680682750697522e-05, "step": 8250 }, { "embedding_loss": 0.0076, "epoch": 1.2259970457902511, "grad_norm": 0.0267130509018898, "learning_rate": 1.677400295420975e-05, "step": 8300 }, { "embedding_loss": 0.0065, "epoch": 1.2333825701624814, "grad_norm": 0.14713996648788452, "learning_rate": 1.6741178401444284e-05, "step": 8350 }, { "embedding_loss": 0.0044, "epoch": 1.240768094534712, "grad_norm": 0.0488862581551075, "learning_rate": 1.6708353848678812e-05, "step": 8400 }, { "embedding_loss": 0.0059, "epoch": 1.2481536189069424, "grad_norm": 0.03769877180457115, "learning_rate": 1.6675529295913345e-05, "step": 8450 }, { "embedding_loss": 0.0083, "epoch": 1.2555391432791727, "grad_norm": 0.04677336663007736, "learning_rate": 1.6642704743147877e-05, "step": 8500 }, { "embedding_loss": 0.0069, "epoch": 1.2629246676514032, "grad_norm": 1.583303689956665, "learning_rate": 1.660988019038241e-05, "step": 8550 }, { "embedding_loss": 0.0059, "epoch": 1.2703101920236337, "grad_norm": 0.057745561003685, "learning_rate": 1.6577055637616937e-05, "step": 8600 }, { "embedding_loss": 0.0048, "epoch": 1.277695716395864, "grad_norm": 0.05651646852493286, "learning_rate": 1.654423108485147e-05, "step": 8650 }, { "embedding_loss": 0.0081, "epoch": 1.2850812407680945, "grad_norm": 0.5371580719947815, "learning_rate": 1.6511406532086e-05, "step": 8700 }, { "embedding_loss": 0.0056, "epoch": 1.292466765140325, "grad_norm": 0.01594601757824421, "learning_rate": 1.6478581979320533e-05, "step": 8750 }, { "embedding_loss": 0.0069, "epoch": 1.2998522895125553, "grad_norm": 0.22201408445835114, "learning_rate": 1.6445757426555065e-05, "step": 8800 }, { "embedding_loss": 0.005, "epoch": 1.3072378138847858, "grad_norm": 0.0434761643409729, "learning_rate": 1.6412932873789594e-05, "step": 8850 }, { "embedding_loss": 0.0057, "epoch": 1.3146233382570163, "grad_norm": 0.20662403106689453, "learning_rate": 1.6380108321024126e-05, "step": 8900 }, { "embedding_loss": 0.0059, "epoch": 1.3220088626292466, "grad_norm": 0.49766138195991516, "learning_rate": 1.6347283768258658e-05, "step": 8950 }, { "embedding_loss": 0.0036, "epoch": 1.329394387001477, "grad_norm": 3.3815248012542725, "learning_rate": 1.631445921549319e-05, "step": 9000 }, { "embedding_loss": 0.0072, "epoch": 1.3367799113737076, "grad_norm": 0.03580164164304733, "learning_rate": 1.6281634662727722e-05, "step": 9050 }, { "embedding_loss": 0.0053, "epoch": 1.3441654357459378, "grad_norm": 1.0942792892456055, "learning_rate": 1.6248810109962254e-05, "step": 9100 }, { "embedding_loss": 0.0035, "epoch": 1.3515509601181683, "grad_norm": 0.05680214613676071, "learning_rate": 1.6215985557196786e-05, "step": 9150 }, { "embedding_loss": 0.0073, "epoch": 1.3589364844903988, "grad_norm": 0.377883642911911, "learning_rate": 1.6183161004431315e-05, "step": 9200 }, { "embedding_loss": 0.0028, "epoch": 1.3663220088626291, "grad_norm": 0.019608836621046066, "learning_rate": 1.6150336451665847e-05, "step": 9250 }, { "embedding_loss": 0.0055, "epoch": 1.3737075332348596, "grad_norm": 0.5401307344436646, "learning_rate": 1.611751189890038e-05, "step": 9300 }, { "embedding_loss": 0.0071, "epoch": 1.3810930576070901, "grad_norm": 0.4266299605369568, "learning_rate": 1.608468734613491e-05, "step": 9350 }, { "embedding_loss": 0.0057, "epoch": 1.3884785819793206, "grad_norm": 0.16506928205490112, "learning_rate": 1.6051862793369443e-05, "step": 9400 }, { "embedding_loss": 0.0107, "epoch": 1.395864106351551, "grad_norm": 0.021154019981622696, "learning_rate": 1.6019038240603972e-05, "step": 9450 }, { "embedding_loss": 0.0054, "epoch": 1.4032496307237814, "grad_norm": 0.13461002707481384, "learning_rate": 1.5986213687838504e-05, "step": 9500 }, { "embedding_loss": 0.0045, "epoch": 1.410635155096012, "grad_norm": 0.0639062374830246, "learning_rate": 1.5953389135073036e-05, "step": 9550 }, { "embedding_loss": 0.0067, "epoch": 1.4180206794682422, "grad_norm": 0.13972270488739014, "learning_rate": 1.5920564582307568e-05, "step": 9600 }, { "embedding_loss": 0.0038, "epoch": 1.4254062038404727, "grad_norm": 0.024820247665047646, "learning_rate": 1.5887740029542097e-05, "step": 9650 }, { "embedding_loss": 0.0079, "epoch": 1.4327917282127032, "grad_norm": 0.5452784299850464, "learning_rate": 1.5854915476776632e-05, "step": 9700 }, { "embedding_loss": 0.0078, "epoch": 1.4401772525849335, "grad_norm": 0.2737050950527191, "learning_rate": 1.582209092401116e-05, "step": 9750 }, { "embedding_loss": 0.005, "epoch": 1.447562776957164, "grad_norm": 0.024434711784124374, "learning_rate": 1.5789266371245693e-05, "step": 9800 }, { "embedding_loss": 0.0032, "epoch": 1.4549483013293945, "grad_norm": 0.10400200635194778, "learning_rate": 1.5756441818480225e-05, "step": 9850 }, { "embedding_loss": 0.0043, "epoch": 1.4623338257016247, "grad_norm": 0.048794183880090714, "learning_rate": 1.5723617265714757e-05, "step": 9900 }, { "embedding_loss": 0.0079, "epoch": 1.4697193500738552, "grad_norm": 0.06030944362282753, "learning_rate": 1.569079271294929e-05, "step": 9950 }, { "embedding_loss": 0.0044, "epoch": 1.4771048744460857, "grad_norm": 0.007165232207626104, "learning_rate": 1.5657968160183818e-05, "step": 10000 }, { "embedding_loss": 0.0056, "epoch": 1.4844903988183162, "grad_norm": 0.02217938005924225, "learning_rate": 1.562514360741835e-05, "step": 10050 }, { "embedding_loss": 0.004, "epoch": 1.4918759231905465, "grad_norm": 2.4009013175964355, "learning_rate": 1.559231905465288e-05, "step": 10100 }, { "embedding_loss": 0.0065, "epoch": 1.499261447562777, "grad_norm": 0.20312148332595825, "learning_rate": 1.5559494501887414e-05, "step": 10150 }, { "embedding_loss": 0.0056, "epoch": 1.5066469719350075, "grad_norm": 0.07194498181343079, "learning_rate": 1.5526669949121946e-05, "step": 10200 }, { "embedding_loss": 0.0044, "epoch": 1.5140324963072378, "grad_norm": 0.01895447075366974, "learning_rate": 1.5493845396356474e-05, "step": 10250 }, { "embedding_loss": 0.0065, "epoch": 1.5214180206794683, "grad_norm": 0.06340127438306808, "learning_rate": 1.5461020843591006e-05, "step": 10300 }, { "embedding_loss": 0.0043, "epoch": 1.5288035450516988, "grad_norm": 0.08964123576879501, "learning_rate": 1.542819629082554e-05, "step": 10350 }, { "embedding_loss": 0.0041, "epoch": 1.536189069423929, "grad_norm": 0.010926262475550175, "learning_rate": 1.539537173806007e-05, "step": 10400 }, { "embedding_loss": 0.0043, "epoch": 1.5435745937961596, "grad_norm": 0.021898791193962097, "learning_rate": 1.53625471852946e-05, "step": 10450 }, { "embedding_loss": 0.0065, "epoch": 1.55096011816839, "grad_norm": 0.03210087865591049, "learning_rate": 1.5329722632529135e-05, "step": 10500 }, { "embedding_loss": 0.005, "epoch": 1.5583456425406204, "grad_norm": 0.061248380690813065, "learning_rate": 1.5296898079763663e-05, "step": 10550 }, { "embedding_loss": 0.003, "epoch": 1.5657311669128509, "grad_norm": 0.016059886664152145, "learning_rate": 1.5264073526998195e-05, "step": 10600 }, { "embedding_loss": 0.0031, "epoch": 1.5731166912850814, "grad_norm": 0.30576014518737793, "learning_rate": 1.5231248974232726e-05, "step": 10650 }, { "embedding_loss": 0.0057, "epoch": 1.5805022156573116, "grad_norm": 0.40583568811416626, "learning_rate": 1.519842442146726e-05, "step": 10700 }, { "embedding_loss": 0.0028, "epoch": 1.5878877400295421, "grad_norm": 0.022348936647176743, "learning_rate": 1.516559986870179e-05, "step": 10750 }, { "embedding_loss": 0.0065, "epoch": 1.5952732644017726, "grad_norm": 0.012712485156953335, "learning_rate": 1.5132775315936322e-05, "step": 10800 }, { "embedding_loss": 0.0024, "epoch": 1.602658788774003, "grad_norm": 0.02547537162899971, "learning_rate": 1.5099950763170852e-05, "step": 10850 }, { "embedding_loss": 0.0037, "epoch": 1.6100443131462334, "grad_norm": 0.011224956251680851, "learning_rate": 1.5067126210405386e-05, "step": 10900 }, { "embedding_loss": 0.0046, "epoch": 1.617429837518464, "grad_norm": 0.7301647067070007, "learning_rate": 1.5034301657639916e-05, "step": 10950 }, { "embedding_loss": 0.0048, "epoch": 1.6248153618906942, "grad_norm": 1.1569029092788696, "learning_rate": 1.5001477104874447e-05, "step": 11000 }, { "embedding_loss": 0.0042, "epoch": 1.6322008862629247, "grad_norm": 0.03382499888539314, "learning_rate": 1.496865255210898e-05, "step": 11050 }, { "embedding_loss": 0.0029, "epoch": 1.6395864106351552, "grad_norm": 0.19339300692081451, "learning_rate": 1.493582799934351e-05, "step": 11100 }, { "embedding_loss": 0.005, "epoch": 1.6469719350073855, "grad_norm": 0.03563707694411278, "learning_rate": 1.4903003446578041e-05, "step": 11150 }, { "embedding_loss": 0.0059, "epoch": 1.654357459379616, "grad_norm": 0.046909235417842865, "learning_rate": 1.4870178893812573e-05, "step": 11200 }, { "embedding_loss": 0.0061, "epoch": 1.6617429837518465, "grad_norm": 0.24560566246509552, "learning_rate": 1.4837354341047105e-05, "step": 11250 }, { "embedding_loss": 0.0037, "epoch": 1.6691285081240768, "grad_norm": 0.08229757100343704, "learning_rate": 1.4804529788281637e-05, "step": 11300 }, { "embedding_loss": 0.0034, "epoch": 1.6765140324963073, "grad_norm": 0.187529519200325, "learning_rate": 1.4771705235516167e-05, "step": 11350 }, { "embedding_loss": 0.0058, "epoch": 1.6838995568685378, "grad_norm": 0.21818560361862183, "learning_rate": 1.4738880682750698e-05, "step": 11400 }, { "embedding_loss": 0.0057, "epoch": 1.691285081240768, "grad_norm": 0.017579764127731323, "learning_rate": 1.4706056129985232e-05, "step": 11450 }, { "embedding_loss": 0.0053, "epoch": 1.6986706056129985, "grad_norm": 0.005298899486660957, "learning_rate": 1.4673231577219762e-05, "step": 11500 }, { "embedding_loss": 0.0038, "epoch": 1.706056129985229, "grad_norm": 0.21702563762664795, "learning_rate": 1.4640407024454292e-05, "step": 11550 }, { "embedding_loss": 0.0055, "epoch": 1.7134416543574593, "grad_norm": 0.028038183227181435, "learning_rate": 1.4607582471688824e-05, "step": 11600 }, { "embedding_loss": 0.0053, "epoch": 1.7208271787296898, "grad_norm": 0.008879674598574638, "learning_rate": 1.4574757918923356e-05, "step": 11650 }, { "embedding_loss": 0.0046, "epoch": 1.7282127031019203, "grad_norm": 0.41037923097610474, "learning_rate": 1.4541933366157888e-05, "step": 11700 }, { "embedding_loss": 0.0038, "epoch": 1.7355982274741506, "grad_norm": 0.0060186549089848995, "learning_rate": 1.4509108813392419e-05, "step": 11750 }, { "embedding_loss": 0.006, "epoch": 1.742983751846381, "grad_norm": 0.02156016044318676, "learning_rate": 1.4476284260626949e-05, "step": 11800 }, { "embedding_loss": 0.0063, "epoch": 1.7503692762186116, "grad_norm": 0.024685271084308624, "learning_rate": 1.4443459707861483e-05, "step": 11850 }, { "embedding_loss": 0.0044, "epoch": 1.7577548005908419, "grad_norm": 0.1743912249803543, "learning_rate": 1.4410635155096013e-05, "step": 11900 }, { "embedding_loss": 0.0044, "epoch": 1.7651403249630724, "grad_norm": 0.5131327509880066, "learning_rate": 1.4377810602330544e-05, "step": 11950 }, { "embedding_loss": 0.0038, "epoch": 1.7725258493353029, "grad_norm": 0.1307702213525772, "learning_rate": 1.4344986049565076e-05, "step": 12000 }, { "embedding_loss": 0.0063, "epoch": 1.7799113737075332, "grad_norm": 0.04227305203676224, "learning_rate": 1.4312161496799608e-05, "step": 12050 }, { "embedding_loss": 0.0022, "epoch": 1.7872968980797637, "grad_norm": 0.01793646812438965, "learning_rate": 1.427933694403414e-05, "step": 12100 }, { "embedding_loss": 0.0043, "epoch": 1.7946824224519942, "grad_norm": 0.369022011756897, "learning_rate": 1.424651239126867e-05, "step": 12150 }, { "embedding_loss": 0.0035, "epoch": 1.8020679468242244, "grad_norm": 0.024383598938584328, "learning_rate": 1.42136878385032e-05, "step": 12200 }, { "embedding_loss": 0.0044, "epoch": 1.809453471196455, "grad_norm": 0.034295763820409775, "learning_rate": 1.4180863285737734e-05, "step": 12250 }, { "embedding_loss": 0.0034, "epoch": 1.8168389955686854, "grad_norm": 0.011509880423545837, "learning_rate": 1.4148038732972264e-05, "step": 12300 }, { "embedding_loss": 0.0045, "epoch": 1.8242245199409157, "grad_norm": 0.048171836882829666, "learning_rate": 1.4115214180206795e-05, "step": 12350 }, { "embedding_loss": 0.0035, "epoch": 1.8316100443131462, "grad_norm": 0.5833490490913391, "learning_rate": 1.4082389627441327e-05, "step": 12400 }, { "embedding_loss": 0.0037, "epoch": 1.8389955686853767, "grad_norm": 0.057985421270132065, "learning_rate": 1.4049565074675859e-05, "step": 12450 }, { "embedding_loss": 0.0043, "epoch": 1.846381093057607, "grad_norm": 0.22399385273456573, "learning_rate": 1.4016740521910391e-05, "step": 12500 }, { "embedding_loss": 0.0046, "epoch": 1.8537666174298375, "grad_norm": 0.0484611876308918, "learning_rate": 1.3983915969144921e-05, "step": 12550 }, { "embedding_loss": 0.0062, "epoch": 1.861152141802068, "grad_norm": 0.03510669618844986, "learning_rate": 1.3951091416379452e-05, "step": 12600 }, { "embedding_loss": 0.0023, "epoch": 1.8685376661742983, "grad_norm": 0.0480966791510582, "learning_rate": 1.3918266863613985e-05, "step": 12650 }, { "embedding_loss": 0.0033, "epoch": 1.8759231905465288, "grad_norm": 0.06846830993890762, "learning_rate": 1.3885442310848516e-05, "step": 12700 }, { "embedding_loss": 0.0043, "epoch": 1.8833087149187593, "grad_norm": 0.18425996601581573, "learning_rate": 1.3852617758083046e-05, "step": 12750 }, { "embedding_loss": 0.004, "epoch": 1.8906942392909896, "grad_norm": 0.024371977895498276, "learning_rate": 1.3819793205317578e-05, "step": 12800 }, { "embedding_loss": 0.0025, "epoch": 1.89807976366322, "grad_norm": 0.05540316924452782, "learning_rate": 1.378696865255211e-05, "step": 12850 }, { "embedding_loss": 0.0062, "epoch": 1.9054652880354506, "grad_norm": 0.2133670598268509, "learning_rate": 1.3754144099786642e-05, "step": 12900 }, { "embedding_loss": 0.0037, "epoch": 1.9128508124076808, "grad_norm": 0.007817639969289303, "learning_rate": 1.3721319547021173e-05, "step": 12950 }, { "embedding_loss": 0.0038, "epoch": 1.9202363367799113, "grad_norm": 0.06182079762220383, "learning_rate": 1.3688494994255706e-05, "step": 13000 }, { "embedding_loss": 0.0044, "epoch": 1.9276218611521418, "grad_norm": 0.010844537056982517, "learning_rate": 1.3655670441490237e-05, "step": 13050 }, { "embedding_loss": 0.003, "epoch": 1.9350073855243721, "grad_norm": 0.008412591181695461, "learning_rate": 1.3622845888724767e-05, "step": 13100 }, { "embedding_loss": 0.0037, "epoch": 1.9423929098966026, "grad_norm": 0.3621113896369934, "learning_rate": 1.3590021335959297e-05, "step": 13150 }, { "embedding_loss": 0.0034, "epoch": 1.9497784342688331, "grad_norm": 0.09569013118743896, "learning_rate": 1.3557196783193831e-05, "step": 13200 }, { "embedding_loss": 0.0029, "epoch": 1.9571639586410634, "grad_norm": 0.022653287276625633, "learning_rate": 1.3524372230428361e-05, "step": 13250 }, { "embedding_loss": 0.0019, "epoch": 1.964549483013294, "grad_norm": 0.013618898577988148, "learning_rate": 1.3491547677662893e-05, "step": 13300 }, { "embedding_loss": 0.003, "epoch": 1.9719350073855244, "grad_norm": 0.009312042035162449, "learning_rate": 1.3458723124897424e-05, "step": 13350 }, { "embedding_loss": 0.0041, "epoch": 1.9793205317577547, "grad_norm": 0.26061955094337463, "learning_rate": 1.3425898572131958e-05, "step": 13400 }, { "embedding_loss": 0.0033, "epoch": 1.9867060561299852, "grad_norm": 0.0065947119146585464, "learning_rate": 1.3393074019366488e-05, "step": 13450 }, { "embedding_loss": 0.0032, "epoch": 1.9940915805022157, "grad_norm": 0.016747118905186653, "learning_rate": 1.3360249466601018e-05, "step": 13500 }, { "embedding_loss": 0.0029, "epoch": 2.001477104874446, "grad_norm": 0.01635347120463848, "learning_rate": 1.3327424913835549e-05, "step": 13550 }, { "embedding_loss": 0.0058, "epoch": 2.0088626292466767, "grad_norm": 0.045433904975652695, "learning_rate": 1.3294600361070082e-05, "step": 13600 }, { "embedding_loss": 0.0019, "epoch": 2.016248153618907, "grad_norm": 0.06570059806108475, "learning_rate": 1.3261775808304613e-05, "step": 13650 }, { "embedding_loss": 0.0027, "epoch": 2.0236336779911372, "grad_norm": 0.026954207569360733, "learning_rate": 1.3228951255539145e-05, "step": 13700 }, { "embedding_loss": 0.0015, "epoch": 2.031019202363368, "grad_norm": 0.013637225143611431, "learning_rate": 1.3196126702773675e-05, "step": 13750 }, { "embedding_loss": 0.0029, "epoch": 2.0384047267355982, "grad_norm": 0.01706545241177082, "learning_rate": 1.3163302150008209e-05, "step": 13800 }, { "embedding_loss": 0.0043, "epoch": 2.0457902511078285, "grad_norm": 0.008318389765918255, "learning_rate": 1.313047759724274e-05, "step": 13850 }, { "embedding_loss": 0.0016, "epoch": 2.0531757754800593, "grad_norm": 0.010482273995876312, "learning_rate": 1.309765304447727e-05, "step": 13900 }, { "embedding_loss": 0.0022, "epoch": 2.0605612998522895, "grad_norm": 0.10514198988676071, "learning_rate": 1.30648284917118e-05, "step": 13950 }, { "embedding_loss": 0.0035, "epoch": 2.06794682422452, "grad_norm": 0.265434592962265, "learning_rate": 1.3032003938946334e-05, "step": 14000 }, { "embedding_loss": 0.0033, "epoch": 2.0753323485967505, "grad_norm": 0.10725241899490356, "learning_rate": 1.2999179386180864e-05, "step": 14050 }, { "embedding_loss": 0.0019, "epoch": 2.082717872968981, "grad_norm": 0.03083561733365059, "learning_rate": 1.2966354833415396e-05, "step": 14100 }, { "embedding_loss": 0.0039, "epoch": 2.090103397341211, "grad_norm": 0.4700145721435547, "learning_rate": 1.2933530280649926e-05, "step": 14150 }, { "embedding_loss": 0.0022, "epoch": 2.097488921713442, "grad_norm": 0.007506008259952068, "learning_rate": 1.290070572788446e-05, "step": 14200 }, { "embedding_loss": 0.0042, "epoch": 2.104874446085672, "grad_norm": 0.08826395869255066, "learning_rate": 1.286788117511899e-05, "step": 14250 }, { "embedding_loss": 0.0023, "epoch": 2.1122599704579024, "grad_norm": 0.0911986455321312, "learning_rate": 1.283505662235352e-05, "step": 14300 }, { "embedding_loss": 0.0022, "epoch": 2.119645494830133, "grad_norm": 0.03140464425086975, "learning_rate": 1.2802232069588053e-05, "step": 14350 }, { "embedding_loss": 0.0016, "epoch": 2.1270310192023634, "grad_norm": 0.017707446590065956, "learning_rate": 1.2769407516822585e-05, "step": 14400 }, { "embedding_loss": 0.0023, "epoch": 2.1344165435745936, "grad_norm": 0.17360664904117584, "learning_rate": 1.2736582964057115e-05, "step": 14450 }, { "embedding_loss": 0.0034, "epoch": 2.1418020679468244, "grad_norm": 0.006408170331269503, "learning_rate": 1.2703758411291647e-05, "step": 14500 }, { "embedding_loss": 0.0019, "epoch": 2.1491875923190547, "grad_norm": 0.00851589534431696, "learning_rate": 1.2670933858526178e-05, "step": 14550 }, { "embedding_loss": 0.0027, "epoch": 2.156573116691285, "grad_norm": 0.03338400647044182, "learning_rate": 1.2638109305760711e-05, "step": 14600 }, { "embedding_loss": 0.0025, "epoch": 2.1639586410635157, "grad_norm": 0.009356162510812283, "learning_rate": 1.2605284752995242e-05, "step": 14650 }, { "embedding_loss": 0.0025, "epoch": 2.171344165435746, "grad_norm": 0.028701895847916603, "learning_rate": 1.2572460200229772e-05, "step": 14700 }, { "embedding_loss": 0.0024, "epoch": 2.178729689807976, "grad_norm": 0.7400600910186768, "learning_rate": 1.2539635647464304e-05, "step": 14750 }, { "embedding_loss": 0.004, "epoch": 2.186115214180207, "grad_norm": 0.011697505600750446, "learning_rate": 1.2506811094698836e-05, "step": 14800 }, { "embedding_loss": 0.0013, "epoch": 2.193500738552437, "grad_norm": 0.0038999137468636036, "learning_rate": 1.2473986541933366e-05, "step": 14850 }, { "embedding_loss": 0.0018, "epoch": 2.2008862629246675, "grad_norm": 0.013158189132809639, "learning_rate": 1.2441161989167899e-05, "step": 14900 }, { "embedding_loss": 0.0025, "epoch": 2.208271787296898, "grad_norm": 0.019193725660443306, "learning_rate": 1.2408337436402429e-05, "step": 14950 }, { "embedding_loss": 0.0052, "epoch": 2.2156573116691285, "grad_norm": 0.07765129953622818, "learning_rate": 1.2375512883636963e-05, "step": 15000 }, { "embedding_loss": 0.0027, "epoch": 2.2230428360413588, "grad_norm": 0.16390322148799896, "learning_rate": 1.2342688330871493e-05, "step": 15050 }, { "embedding_loss": 0.0011, "epoch": 2.2304283604135895, "grad_norm": 0.019845524802803993, "learning_rate": 1.2309863778106023e-05, "step": 15100 }, { "embedding_loss": 0.0019, "epoch": 2.2378138847858198, "grad_norm": 0.0020033265464007854, "learning_rate": 1.2277039225340557e-05, "step": 15150 }, { "embedding_loss": 0.0012, "epoch": 2.24519940915805, "grad_norm": 0.008046300150454044, "learning_rate": 1.2244214672575087e-05, "step": 15200 }, { "embedding_loss": 0.0045, "epoch": 2.2525849335302808, "grad_norm": 0.16893664002418518, "learning_rate": 1.2211390119809618e-05, "step": 15250 }, { "embedding_loss": 0.0031, "epoch": 2.259970457902511, "grad_norm": 0.012031147256493568, "learning_rate": 1.217856556704415e-05, "step": 15300 }, { "embedding_loss": 0.0029, "epoch": 2.2673559822747413, "grad_norm": 0.007804942317306995, "learning_rate": 1.2145741014278682e-05, "step": 15350 }, { "embedding_loss": 0.0048, "epoch": 2.274741506646972, "grad_norm": 0.003408796386793256, "learning_rate": 1.2112916461513214e-05, "step": 15400 }, { "embedding_loss": 0.0024, "epoch": 2.2821270310192023, "grad_norm": 0.0196861382573843, "learning_rate": 1.2080091908747744e-05, "step": 15450 }, { "embedding_loss": 0.0032, "epoch": 2.2895125553914326, "grad_norm": 0.10261236131191254, "learning_rate": 1.2047267355982275e-05, "step": 15500 }, { "embedding_loss": 0.0017, "epoch": 2.2968980797636633, "grad_norm": 0.008358814753592014, "learning_rate": 1.2014442803216808e-05, "step": 15550 }, { "embedding_loss": 0.0018, "epoch": 2.3042836041358936, "grad_norm": 0.03527391329407692, "learning_rate": 1.1981618250451339e-05, "step": 15600 }, { "embedding_loss": 0.0035, "epoch": 2.311669128508124, "grad_norm": 0.011962966993451118, "learning_rate": 1.1948793697685869e-05, "step": 15650 }, { "embedding_loss": 0.0041, "epoch": 2.3190546528803546, "grad_norm": 0.005154829006642103, "learning_rate": 1.1915969144920401e-05, "step": 15700 }, { "embedding_loss": 0.0015, "epoch": 2.326440177252585, "grad_norm": 0.007693074177950621, "learning_rate": 1.1883144592154933e-05, "step": 15750 }, { "embedding_loss": 0.003, "epoch": 2.333825701624815, "grad_norm": 0.02695990726351738, "learning_rate": 1.1850320039389465e-05, "step": 15800 }, { "embedding_loss": 0.0016, "epoch": 2.341211225997046, "grad_norm": 0.19833894073963165, "learning_rate": 1.1817495486623995e-05, "step": 15850 }, { "embedding_loss": 0.0027, "epoch": 2.348596750369276, "grad_norm": 0.153117373585701, "learning_rate": 1.1784670933858526e-05, "step": 15900 }, { "embedding_loss": 0.0024, "epoch": 2.3559822747415065, "grad_norm": 0.5938816666603088, "learning_rate": 1.175184638109306e-05, "step": 15950 }, { "embedding_loss": 0.002, "epoch": 2.363367799113737, "grad_norm": 0.01386656891554594, "learning_rate": 1.171902182832759e-05, "step": 16000 }, { "embedding_loss": 0.0014, "epoch": 2.3707533234859675, "grad_norm": 0.010158052667975426, "learning_rate": 1.1686197275562122e-05, "step": 16050 }, { "embedding_loss": 0.001, "epoch": 2.3781388478581977, "grad_norm": 0.008198092691600323, "learning_rate": 1.1653372722796652e-05, "step": 16100 }, { "embedding_loss": 0.0005, "epoch": 2.3855243722304285, "grad_norm": 0.010181965306401253, "learning_rate": 1.1620548170031184e-05, "step": 16150 }, { "embedding_loss": 0.0015, "epoch": 2.3929098966026587, "grad_norm": 0.008307389914989471, "learning_rate": 1.1587723617265716e-05, "step": 16200 }, { "embedding_loss": 0.0045, "epoch": 2.4002954209748895, "grad_norm": 0.018941566348075867, "learning_rate": 1.1554899064500247e-05, "step": 16250 }, { "embedding_loss": 0.0015, "epoch": 2.4076809453471197, "grad_norm": 0.005391134414821863, "learning_rate": 1.1522074511734777e-05, "step": 16300 }, { "embedding_loss": 0.0011, "epoch": 2.41506646971935, "grad_norm": 0.019267791882157326, "learning_rate": 1.148924995896931e-05, "step": 16350 }, { "embedding_loss": 0.0019, "epoch": 2.4224519940915803, "grad_norm": 0.2630805969238281, "learning_rate": 1.1456425406203841e-05, "step": 16400 }, { "embedding_loss": 0.0024, "epoch": 2.429837518463811, "grad_norm": 0.010556219145655632, "learning_rate": 1.1423600853438373e-05, "step": 16450 }, { "embedding_loss": 0.002, "epoch": 2.4372230428360413, "grad_norm": 0.03994214907288551, "learning_rate": 1.1390776300672904e-05, "step": 16500 }, { "embedding_loss": 0.0016, "epoch": 2.444608567208272, "grad_norm": 3.779356002807617, "learning_rate": 1.1357951747907436e-05, "step": 16550 }, { "embedding_loss": 0.0015, "epoch": 2.4519940915805023, "grad_norm": 0.030276980251073837, "learning_rate": 1.1325127195141968e-05, "step": 16600 }, { "embedding_loss": 0.0021, "epoch": 2.4593796159527326, "grad_norm": 0.010462663136422634, "learning_rate": 1.1292302642376498e-05, "step": 16650 }, { "embedding_loss": 0.0025, "epoch": 2.466765140324963, "grad_norm": 0.04659969359636307, "learning_rate": 1.1259478089611028e-05, "step": 16700 }, { "embedding_loss": 0.0021, "epoch": 2.4741506646971936, "grad_norm": 0.34690728783607483, "learning_rate": 1.1226653536845562e-05, "step": 16750 }, { "embedding_loss": 0.0029, "epoch": 2.481536189069424, "grad_norm": 0.019812889397144318, "learning_rate": 1.1193828984080092e-05, "step": 16800 }, { "embedding_loss": 0.0014, "epoch": 2.4889217134416546, "grad_norm": 0.004909256473183632, "learning_rate": 1.1161004431314624e-05, "step": 16850 }, { "embedding_loss": 0.0029, "epoch": 2.496307237813885, "grad_norm": 0.016758764162659645, "learning_rate": 1.1128179878549155e-05, "step": 16900 }, { "embedding_loss": 0.004, "epoch": 2.503692762186115, "grad_norm": 0.07048258185386658, "learning_rate": 1.1095355325783687e-05, "step": 16950 }, { "embedding_loss": 0.0028, "epoch": 2.5110782865583454, "grad_norm": 0.010511302389204502, "learning_rate": 1.1062530773018219e-05, "step": 17000 }, { "embedding_loss": 0.0027, "epoch": 2.518463810930576, "grad_norm": 0.0332963727414608, "learning_rate": 1.102970622025275e-05, "step": 17050 }, { "embedding_loss": 0.0011, "epoch": 2.5258493353028064, "grad_norm": 0.02814817987382412, "learning_rate": 1.0996881667487283e-05, "step": 17100 }, { "embedding_loss": 0.0036, "epoch": 2.533234859675037, "grad_norm": 0.01899763010442257, "learning_rate": 1.0964057114721813e-05, "step": 17150 }, { "embedding_loss": 0.0031, "epoch": 2.5406203840472674, "grad_norm": 0.004354503005743027, "learning_rate": 1.0931232561956344e-05, "step": 17200 }, { "embedding_loss": 0.0021, "epoch": 2.5480059084194977, "grad_norm": 0.0331052802503109, "learning_rate": 1.0898408009190876e-05, "step": 17250 }, { "embedding_loss": 0.0018, "epoch": 2.555391432791728, "grad_norm": 0.0316183939576149, "learning_rate": 1.0865583456425408e-05, "step": 17300 }, { "embedding_loss": 0.0015, "epoch": 2.5627769571639587, "grad_norm": 0.009719472378492355, "learning_rate": 1.0832758903659938e-05, "step": 17350 }, { "embedding_loss": 0.0031, "epoch": 2.570162481536189, "grad_norm": 0.035008445382118225, "learning_rate": 1.079993435089447e-05, "step": 17400 }, { "embedding_loss": 0.0031, "epoch": 2.5775480059084197, "grad_norm": 0.008490943349897861, "learning_rate": 1.0767109798129e-05, "step": 17450 }, { "embedding_loss": 0.0011, "epoch": 2.58493353028065, "grad_norm": 0.01083299983292818, "learning_rate": 1.0734285245363534e-05, "step": 17500 }, { "embedding_loss": 0.0044, "epoch": 2.5923190546528803, "grad_norm": 0.01600501500070095, "learning_rate": 1.0701460692598065e-05, "step": 17550 }, { "embedding_loss": 0.0013, "epoch": 2.5997045790251105, "grad_norm": 0.07744074612855911, "learning_rate": 1.0668636139832595e-05, "step": 17600 }, { "embedding_loss": 0.0015, "epoch": 2.6070901033973413, "grad_norm": 0.036319248378276825, "learning_rate": 1.0635811587067127e-05, "step": 17650 }, { "embedding_loss": 0.0013, "epoch": 2.6144756277695715, "grad_norm": 0.01792324334383011, "learning_rate": 1.0602987034301659e-05, "step": 17700 }, { "embedding_loss": 0.0018, "epoch": 2.6218611521418023, "grad_norm": 0.07195013016462326, "learning_rate": 1.0570162481536191e-05, "step": 17750 }, { "embedding_loss": 0.0023, "epoch": 2.6292466765140325, "grad_norm": 3.590275526046753, "learning_rate": 1.0537337928770721e-05, "step": 17800 }, { "embedding_loss": 0.0043, "epoch": 2.636632200886263, "grad_norm": 0.009701603092253208, "learning_rate": 1.0504513376005252e-05, "step": 17850 }, { "embedding_loss": 0.0049, "epoch": 2.644017725258493, "grad_norm": 0.09409826993942261, "learning_rate": 1.0471688823239786e-05, "step": 17900 }, { "embedding_loss": 0.0045, "epoch": 2.651403249630724, "grad_norm": 0.012147662229835987, "learning_rate": 1.0438864270474316e-05, "step": 17950 }, { "embedding_loss": 0.0017, "epoch": 2.658788774002954, "grad_norm": 0.008824297226965427, "learning_rate": 1.0406039717708846e-05, "step": 18000 }, { "embedding_loss": 0.002, "epoch": 2.666174298375185, "grad_norm": 0.018408598378300667, "learning_rate": 1.0373215164943378e-05, "step": 18050 }, { "embedding_loss": 0.0021, "epoch": 2.673559822747415, "grad_norm": 0.01662319526076317, "learning_rate": 1.034039061217791e-05, "step": 18100 }, { "embedding_loss": 0.0014, "epoch": 2.6809453471196454, "grad_norm": 0.020505361258983612, "learning_rate": 1.0307566059412442e-05, "step": 18150 }, { "embedding_loss": 0.0025, "epoch": 2.6883308714918757, "grad_norm": 0.08292482793331146, "learning_rate": 1.0274741506646973e-05, "step": 18200 }, { "embedding_loss": 0.0032, "epoch": 2.6957163958641064, "grad_norm": 0.023084105923771858, "learning_rate": 1.0241916953881503e-05, "step": 18250 }, { "embedding_loss": 0.0038, "epoch": 2.7031019202363367, "grad_norm": 0.030171602964401245, "learning_rate": 1.0209092401116037e-05, "step": 18300 }, { "embedding_loss": 0.0016, "epoch": 2.7104874446085674, "grad_norm": 0.004670475609600544, "learning_rate": 1.0176267848350567e-05, "step": 18350 }, { "embedding_loss": 0.0014, "epoch": 2.7178729689807977, "grad_norm": 0.003984387032687664, "learning_rate": 1.0143443295585098e-05, "step": 18400 }, { "embedding_loss": 0.0013, "epoch": 2.725258493353028, "grad_norm": 0.03194098919630051, "learning_rate": 1.011061874281963e-05, "step": 18450 }, { "embedding_loss": 0.0013, "epoch": 2.7326440177252582, "grad_norm": 0.007552579510957003, "learning_rate": 1.0077794190054162e-05, "step": 18500 }, { "embedding_loss": 0.0024, "epoch": 2.740029542097489, "grad_norm": 0.02968655154109001, "learning_rate": 1.0044969637288694e-05, "step": 18550 }, { "embedding_loss": 0.0024, "epoch": 2.7474150664697192, "grad_norm": 0.2573186159133911, "learning_rate": 1.0012145084523224e-05, "step": 18600 }, { "embedding_loss": 0.0026, "epoch": 2.75480059084195, "grad_norm": 0.036742523312568665, "learning_rate": 9.979320531757756e-06, "step": 18650 }, { "embedding_loss": 0.0032, "epoch": 2.7621861152141802, "grad_norm": 0.20845580101013184, "learning_rate": 9.946495978992286e-06, "step": 18700 }, { "embedding_loss": 0.0024, "epoch": 2.7695716395864105, "grad_norm": 0.051792044192552567, "learning_rate": 9.913671426226818e-06, "step": 18750 }, { "embedding_loss": 0.0019, "epoch": 2.7769571639586412, "grad_norm": 0.015146799385547638, "learning_rate": 9.880846873461349e-06, "step": 18800 }, { "embedding_loss": 0.0015, "epoch": 2.7843426883308715, "grad_norm": 0.01486288197338581, "learning_rate": 9.84802232069588e-06, "step": 18850 }, { "embedding_loss": 0.0028, "epoch": 2.791728212703102, "grad_norm": 0.018719913437962532, "learning_rate": 9.815197767930413e-06, "step": 18900 }, { "embedding_loss": 0.0021, "epoch": 2.7991137370753325, "grad_norm": 0.005828204099088907, "learning_rate": 9.782373215164945e-06, "step": 18950 }, { "embedding_loss": 0.0018, "epoch": 2.806499261447563, "grad_norm": 0.03715846315026283, "learning_rate": 9.749548662399475e-06, "step": 19000 }, { "embedding_loss": 0.0009, "epoch": 2.813884785819793, "grad_norm": 0.019518226385116577, "learning_rate": 9.716724109634007e-06, "step": 19050 }, { "embedding_loss": 0.0024, "epoch": 2.821270310192024, "grad_norm": 0.020911335945129395, "learning_rate": 9.68389955686854e-06, "step": 19100 }, { "embedding_loss": 0.0016, "epoch": 2.828655834564254, "grad_norm": 0.010648909956216812, "learning_rate": 9.65107500410307e-06, "step": 19150 }, { "embedding_loss": 0.001, "epoch": 2.8360413589364843, "grad_norm": 0.007687574252486229, "learning_rate": 9.618250451337602e-06, "step": 19200 }, { "embedding_loss": 0.0016, "epoch": 2.843426883308715, "grad_norm": 0.016183407977223396, "learning_rate": 9.585425898572132e-06, "step": 19250 }, { "embedding_loss": 0.0009, "epoch": 2.8508124076809453, "grad_norm": 0.032978300005197525, "learning_rate": 9.552601345806664e-06, "step": 19300 }, { "embedding_loss": 0.0025, "epoch": 2.8581979320531756, "grad_norm": 0.00619637593626976, "learning_rate": 9.519776793041196e-06, "step": 19350 }, { "embedding_loss": 0.0026, "epoch": 2.8655834564254064, "grad_norm": 0.0032677731942385435, "learning_rate": 9.486952240275728e-06, "step": 19400 }, { "embedding_loss": 0.0018, "epoch": 2.8729689807976366, "grad_norm": 0.0064840479753911495, "learning_rate": 9.454127687510259e-06, "step": 19450 }, { "embedding_loss": 0.0012, "epoch": 2.880354505169867, "grad_norm": 0.01070446241647005, "learning_rate": 9.42130313474479e-06, "step": 19500 }, { "embedding_loss": 0.0012, "epoch": 2.8877400295420976, "grad_norm": 0.07543105632066727, "learning_rate": 9.388478581979321e-06, "step": 19550 }, { "embedding_loss": 0.0018, "epoch": 2.895125553914328, "grad_norm": 0.025806330144405365, "learning_rate": 9.355654029213853e-06, "step": 19600 }, { "embedding_loss": 0.003, "epoch": 2.902511078286558, "grad_norm": 0.026599083095788956, "learning_rate": 9.322829476448383e-06, "step": 19650 }, { "embedding_loss": 0.0026, "epoch": 2.909896602658789, "grad_norm": 0.37029746174812317, "learning_rate": 9.290004923682915e-06, "step": 19700 }, { "embedding_loss": 0.001, "epoch": 2.917282127031019, "grad_norm": 0.07045801728963852, "learning_rate": 9.257180370917447e-06, "step": 19750 }, { "embedding_loss": 0.0031, "epoch": 2.9246676514032495, "grad_norm": 0.020875511690974236, "learning_rate": 9.22435581815198e-06, "step": 19800 }, { "embedding_loss": 0.0019, "epoch": 2.93205317577548, "grad_norm": 0.013287228532135487, "learning_rate": 9.19153126538651e-06, "step": 19850 }, { "embedding_loss": 0.0027, "epoch": 2.9394387001477105, "grad_norm": 0.006682571489363909, "learning_rate": 9.158706712621042e-06, "step": 19900 }, { "embedding_loss": 0.001, "epoch": 2.9468242245199407, "grad_norm": 0.016461633145809174, "learning_rate": 9.125882159855572e-06, "step": 19950 }, { "embedding_loss": 0.0025, "epoch": 2.9542097488921715, "grad_norm": 0.0292360782623291, "learning_rate": 9.093057607090104e-06, "step": 20000 }, { "embedding_loss": 0.0017, "epoch": 2.9615952732644018, "grad_norm": 0.007479995954781771, "learning_rate": 9.060233054324635e-06, "step": 20050 }, { "embedding_loss": 0.0033, "epoch": 2.9689807976366325, "grad_norm": 0.19220024347305298, "learning_rate": 9.027408501559167e-06, "step": 20100 }, { "embedding_loss": 0.0006, "epoch": 2.9763663220088628, "grad_norm": 0.018404290080070496, "learning_rate": 8.994583948793699e-06, "step": 20150 }, { "embedding_loss": 0.0026, "epoch": 2.983751846381093, "grad_norm": 0.012631416320800781, "learning_rate": 8.96175939602823e-06, "step": 20200 }, { "embedding_loss": 0.0011, "epoch": 2.9911373707533233, "grad_norm": 0.04417691379785538, "learning_rate": 8.928934843262761e-06, "step": 20250 }, { "embedding_loss": 0.0021, "epoch": 2.998522895125554, "grad_norm": 0.0054418547078967094, "learning_rate": 8.896110290497293e-06, "step": 20300 }, { "embedding_loss": 0.0039, "epoch": 3.0059084194977843, "grad_norm": 0.005223344080150127, "learning_rate": 8.863285737731823e-06, "step": 20350 }, { "embedding_loss": 0.0003, "epoch": 3.0132939438700146, "grad_norm": 0.0242659542709589, "learning_rate": 8.830461184966356e-06, "step": 20400 }, { "embedding_loss": 0.001, "epoch": 3.0206794682422453, "grad_norm": 0.0049690124578773975, "learning_rate": 8.797636632200886e-06, "step": 20450 }, { "embedding_loss": 0.0008, "epoch": 3.0280649926144756, "grad_norm": 0.0040290821343660355, "learning_rate": 8.764812079435418e-06, "step": 20500 }, { "embedding_loss": 0.0009, "epoch": 3.035450516986706, "grad_norm": 0.019365187734365463, "learning_rate": 8.73198752666995e-06, "step": 20550 }, { "embedding_loss": 0.0023, "epoch": 3.0428360413589366, "grad_norm": 0.10174138844013214, "learning_rate": 8.699162973904482e-06, "step": 20600 }, { "embedding_loss": 0.0008, "epoch": 3.050221565731167, "grad_norm": 0.2679438889026642, "learning_rate": 8.666338421139012e-06, "step": 20650 }, { "embedding_loss": 0.0009, "epoch": 3.057607090103397, "grad_norm": 0.010431923903524876, "learning_rate": 8.633513868373544e-06, "step": 20700 }, { "embedding_loss": 0.0015, "epoch": 3.064992614475628, "grad_norm": 0.034736406058073044, "learning_rate": 8.600689315608075e-06, "step": 20750 }, { "embedding_loss": 0.0019, "epoch": 3.072378138847858, "grad_norm": 0.012600087560713291, "learning_rate": 8.567864762842607e-06, "step": 20800 }, { "embedding_loss": 0.0027, "epoch": 3.0797636632200884, "grad_norm": 0.017327722162008286, "learning_rate": 8.535040210077137e-06, "step": 20850 }, { "embedding_loss": 0.0009, "epoch": 3.087149187592319, "grad_norm": 0.08267229795455933, "learning_rate": 8.50221565731167e-06, "step": 20900 }, { "embedding_loss": 0.0006, "epoch": 3.0945347119645494, "grad_norm": 0.1653100550174713, "learning_rate": 8.469391104546201e-06, "step": 20950 }, { "embedding_loss": 0.0011, "epoch": 3.1019202363367797, "grad_norm": 0.004222211427986622, "learning_rate": 8.436566551780733e-06, "step": 21000 }, { "embedding_loss": 0.0014, "epoch": 3.1093057607090104, "grad_norm": 0.005486358422785997, "learning_rate": 8.403741999015264e-06, "step": 21050 }, { "embedding_loss": 0.0009, "epoch": 3.1166912850812407, "grad_norm": 0.007771783974021673, "learning_rate": 8.370917446249796e-06, "step": 21100 }, { "embedding_loss": 0.0011, "epoch": 3.124076809453471, "grad_norm": 0.009825172834098339, "learning_rate": 8.338092893484328e-06, "step": 21150 }, { "embedding_loss": 0.0021, "epoch": 3.1314623338257017, "grad_norm": 0.009619227610528469, "learning_rate": 8.305268340718858e-06, "step": 21200 }, { "embedding_loss": 0.0023, "epoch": 3.138847858197932, "grad_norm": 0.04705429822206497, "learning_rate": 8.27244378795339e-06, "step": 21250 }, { "embedding_loss": 0.0022, "epoch": 3.1462333825701623, "grad_norm": 0.03510194644331932, "learning_rate": 8.23961923518792e-06, "step": 21300 }, { "embedding_loss": 0.001, "epoch": 3.153618906942393, "grad_norm": 0.007620047312229872, "learning_rate": 8.206794682422453e-06, "step": 21350 }, { "embedding_loss": 0.0017, "epoch": 3.1610044313146233, "grad_norm": 0.006676162593066692, "learning_rate": 8.173970129656985e-06, "step": 21400 }, { "embedding_loss": 0.0022, "epoch": 3.1683899556868536, "grad_norm": 0.006805592216551304, "learning_rate": 8.141145576891517e-06, "step": 21450 }, { "embedding_loss": 0.0007, "epoch": 3.1757754800590843, "grad_norm": 0.02653045393526554, "learning_rate": 8.108321024126047e-06, "step": 21500 }, { "embedding_loss": 0.0022, "epoch": 3.1831610044313146, "grad_norm": 0.055775534361600876, "learning_rate": 8.075496471360579e-06, "step": 21550 }, { "embedding_loss": 0.0002, "epoch": 3.1905465288035453, "grad_norm": 0.004594122059643269, "learning_rate": 8.04267191859511e-06, "step": 21600 }, { "embedding_loss": 0.0004, "epoch": 3.1979320531757756, "grad_norm": 0.057899340987205505, "learning_rate": 8.009847365829641e-06, "step": 21650 }, { "embedding_loss": 0.001, "epoch": 3.205317577548006, "grad_norm": 0.003396780928596854, "learning_rate": 7.977022813064172e-06, "step": 21700 }, { "embedding_loss": 0.0033, "epoch": 3.212703101920236, "grad_norm": 0.09521088004112244, "learning_rate": 7.944198260298704e-06, "step": 21750 }, { "embedding_loss": 0.0011, "epoch": 3.220088626292467, "grad_norm": 0.004245746415108442, "learning_rate": 7.911373707533236e-06, "step": 21800 }, { "embedding_loss": 0.0003, "epoch": 3.227474150664697, "grad_norm": 0.005718466360121965, "learning_rate": 7.878549154767768e-06, "step": 21850 }, { "embedding_loss": 0.0004, "epoch": 3.234859675036928, "grad_norm": 0.007956516928970814, "learning_rate": 7.845724602002298e-06, "step": 21900 }, { "embedding_loss": 0.0004, "epoch": 3.242245199409158, "grad_norm": 0.06620016694068909, "learning_rate": 7.81290004923683e-06, "step": 21950 }, { "embedding_loss": 0.0014, "epoch": 3.2496307237813884, "grad_norm": 0.04369127005338669, "learning_rate": 7.78007549647136e-06, "step": 22000 }, { "embedding_loss": 0.0012, "epoch": 3.2570162481536187, "grad_norm": 0.029797792434692383, "learning_rate": 7.747250943705893e-06, "step": 22050 }, { "embedding_loss": 0.002, "epoch": 3.2644017725258494, "grad_norm": 0.014197341166436672, "learning_rate": 7.714426390940423e-06, "step": 22100 }, { "embedding_loss": 0.001, "epoch": 3.2717872968980797, "grad_norm": 0.011921238154172897, "learning_rate": 7.681601838174955e-06, "step": 22150 }, { "embedding_loss": 0.0026, "epoch": 3.2791728212703104, "grad_norm": 0.022078925743699074, "learning_rate": 7.648777285409487e-06, "step": 22200 }, { "embedding_loss": 0.0017, "epoch": 3.2865583456425407, "grad_norm": 0.011514640413224697, "learning_rate": 7.615952732644018e-06, "step": 22250 }, { "embedding_loss": 0.0009, "epoch": 3.293943870014771, "grad_norm": 0.2203167974948883, "learning_rate": 7.5831281798785495e-06, "step": 22300 }, { "embedding_loss": 0.0018, "epoch": 3.3013293943870012, "grad_norm": 0.0317855142056942, "learning_rate": 7.5503036271130815e-06, "step": 22350 }, { "embedding_loss": 0.0025, "epoch": 3.308714918759232, "grad_norm": 0.005245373118668795, "learning_rate": 7.517479074347612e-06, "step": 22400 }, { "embedding_loss": 0.0016, "epoch": 3.3161004431314622, "grad_norm": 0.007596870884299278, "learning_rate": 7.484654521582144e-06, "step": 22450 }, { "embedding_loss": 0.0035, "epoch": 3.323485967503693, "grad_norm": 0.020896727219223976, "learning_rate": 7.451829968816675e-06, "step": 22500 }, { "embedding_loss": 0.0002, "epoch": 3.3308714918759232, "grad_norm": 0.07300405204296112, "learning_rate": 7.419005416051207e-06, "step": 22550 }, { "embedding_loss": 0.0015, "epoch": 3.3382570162481535, "grad_norm": 0.005366707220673561, "learning_rate": 7.3861808632857375e-06, "step": 22600 }, { "embedding_loss": 0.002, "epoch": 3.345642540620384, "grad_norm": 0.011347993277013302, "learning_rate": 7.3533563105202695e-06, "step": 22650 }, { "embedding_loss": 0.0021, "epoch": 3.3530280649926145, "grad_norm": 0.006689651869237423, "learning_rate": 7.320531757754801e-06, "step": 22700 }, { "embedding_loss": 0.0024, "epoch": 3.360413589364845, "grad_norm": 0.006886324379593134, "learning_rate": 7.287707204989333e-06, "step": 22750 }, { "embedding_loss": 0.0015, "epoch": 3.3677991137370755, "grad_norm": 0.03965551033616066, "learning_rate": 7.254882652223864e-06, "step": 22800 }, { "embedding_loss": 0.0021, "epoch": 3.375184638109306, "grad_norm": 0.03409096226096153, "learning_rate": 7.222058099458395e-06, "step": 22850 }, { "embedding_loss": 0.0022, "epoch": 3.382570162481536, "grad_norm": 0.00742725282907486, "learning_rate": 7.189233546692926e-06, "step": 22900 }, { "embedding_loss": 0.0015, "epoch": 3.389955686853767, "grad_norm": 0.008300753310322762, "learning_rate": 7.156408993927458e-06, "step": 22950 }, { "embedding_loss": 0.0015, "epoch": 3.397341211225997, "grad_norm": 0.03697545453906059, "learning_rate": 7.12358444116199e-06, "step": 23000 }, { "embedding_loss": 0.0016, "epoch": 3.4047267355982274, "grad_norm": 0.021186918020248413, "learning_rate": 7.090759888396521e-06, "step": 23050 }, { "embedding_loss": 0.0008, "epoch": 3.412112259970458, "grad_norm": 0.021211344748735428, "learning_rate": 7.057935335631053e-06, "step": 23100 }, { "embedding_loss": 0.0021, "epoch": 3.4194977843426884, "grad_norm": 0.015593543648719788, "learning_rate": 7.025110782865584e-06, "step": 23150 }, { "embedding_loss": 0.0021, "epoch": 3.4268833087149186, "grad_norm": 0.12304917722940445, "learning_rate": 6.992286230100116e-06, "step": 23200 }, { "embedding_loss": 0.0017, "epoch": 3.4342688330871494, "grad_norm": 0.030567510053515434, "learning_rate": 6.9594616773346464e-06, "step": 23250 }, { "embedding_loss": 0.0015, "epoch": 3.4416543574593796, "grad_norm": 0.029271570965647697, "learning_rate": 6.9266371245691785e-06, "step": 23300 }, { "embedding_loss": 0.0004, "epoch": 3.44903988183161, "grad_norm": 0.007037239149212837, "learning_rate": 6.89381257180371e-06, "step": 23350 }, { "embedding_loss": 0.0007, "epoch": 3.4564254062038406, "grad_norm": 0.004125585313886404, "learning_rate": 6.860988019038242e-06, "step": 23400 }, { "embedding_loss": 0.0004, "epoch": 3.463810930576071, "grad_norm": 0.012953881174325943, "learning_rate": 6.828163466272772e-06, "step": 23450 }, { "embedding_loss": 0.0014, "epoch": 3.471196454948301, "grad_norm": 0.0054145329631865025, "learning_rate": 6.795338913507304e-06, "step": 23500 }, { "embedding_loss": 0.0003, "epoch": 3.478581979320532, "grad_norm": 0.00575551250949502, "learning_rate": 6.762514360741835e-06, "step": 23550 }, { "embedding_loss": 0.002, "epoch": 3.485967503692762, "grad_norm": 0.0046454742550849915, "learning_rate": 6.729689807976367e-06, "step": 23600 }, { "embedding_loss": 0.0003, "epoch": 3.4933530280649925, "grad_norm": 0.01675521954894066, "learning_rate": 6.6968652552108986e-06, "step": 23650 }, { "embedding_loss": 0.0002, "epoch": 3.500738552437223, "grad_norm": 0.5372416973114014, "learning_rate": 6.66404070244543e-06, "step": 23700 }, { "embedding_loss": 0.0009, "epoch": 3.5081240768094535, "grad_norm": 0.010617181658744812, "learning_rate": 6.631216149679961e-06, "step": 23750 }, { "embedding_loss": 0.0036, "epoch": 3.5155096011816838, "grad_norm": 0.009620044380426407, "learning_rate": 6.598391596914493e-06, "step": 23800 }, { "embedding_loss": 0.0022, "epoch": 3.5228951255539145, "grad_norm": 0.0031557646580040455, "learning_rate": 6.565567044149024e-06, "step": 23850 }, { "embedding_loss": 0.0014, "epoch": 3.5302806499261448, "grad_norm": 0.006240217015147209, "learning_rate": 6.532742491383555e-06, "step": 23900 }, { "embedding_loss": 0.0015, "epoch": 3.537666174298375, "grad_norm": 0.054248787462711334, "learning_rate": 6.499917938618087e-06, "step": 23950 }, { "embedding_loss": 0.0009, "epoch": 3.5450516986706058, "grad_norm": 0.004816859494894743, "learning_rate": 6.467093385852619e-06, "step": 24000 }, { "embedding_loss": 0.0007, "epoch": 3.552437223042836, "grad_norm": 0.014538111165165901, "learning_rate": 6.43426883308715e-06, "step": 24050 }, { "embedding_loss": 0.0024, "epoch": 3.5598227474150663, "grad_norm": 0.0056640529073774815, "learning_rate": 6.401444280321681e-06, "step": 24100 }, { "embedding_loss": 0.0011, "epoch": 3.567208271787297, "grad_norm": 0.005168286617845297, "learning_rate": 6.368619727556212e-06, "step": 24150 }, { "embedding_loss": 0.0018, "epoch": 3.5745937961595273, "grad_norm": 0.006222166121006012, "learning_rate": 6.335795174790744e-06, "step": 24200 }, { "embedding_loss": 0.0018, "epoch": 3.5819793205317576, "grad_norm": 0.1087515652179718, "learning_rate": 6.3029706220252755e-06, "step": 24250 }, { "embedding_loss": 0.0029, "epoch": 3.5893648449039883, "grad_norm": 0.002414864953607321, "learning_rate": 6.270146069259807e-06, "step": 24300 }, { "embedding_loss": 0.0009, "epoch": 3.5967503692762186, "grad_norm": 0.003966380376368761, "learning_rate": 6.237321516494338e-06, "step": 24350 }, { "embedding_loss": 0.0015, "epoch": 3.604135893648449, "grad_norm": 0.0019502595532685518, "learning_rate": 6.20449696372887e-06, "step": 24400 }, { "embedding_loss": 0.0015, "epoch": 3.6115214180206796, "grad_norm": 0.04951006919145584, "learning_rate": 6.171672410963401e-06, "step": 24450 }, { "embedding_loss": 0.0009, "epoch": 3.61890694239291, "grad_norm": 0.05455106496810913, "learning_rate": 6.138847858197933e-06, "step": 24500 }, { "embedding_loss": 0.0002, "epoch": 3.62629246676514, "grad_norm": 0.006709383800625801, "learning_rate": 6.1060233054324635e-06, "step": 24550 }, { "embedding_loss": 0.0021, "epoch": 3.633677991137371, "grad_norm": 0.006298394873738289, "learning_rate": 6.0731987526669955e-06, "step": 24600 }, { "embedding_loss": 0.0002, "epoch": 3.641063515509601, "grad_norm": 0.007862403988838196, "learning_rate": 6.040374199901527e-06, "step": 24650 }, { "embedding_loss": 0.0014, "epoch": 3.6484490398818314, "grad_norm": 0.014156641438603401, "learning_rate": 6.007549647136059e-06, "step": 24700 }, { "embedding_loss": 0.0008, "epoch": 3.655834564254062, "grad_norm": 0.05146721005439758, "learning_rate": 5.974725094370589e-06, "step": 24750 }, { "embedding_loss": 0.0013, "epoch": 3.6632200886262924, "grad_norm": 0.0289792250841856, "learning_rate": 5.941900541605121e-06, "step": 24800 }, { "embedding_loss": 0.0023, "epoch": 3.670605612998523, "grad_norm": 0.001623387448489666, "learning_rate": 5.909075988839652e-06, "step": 24850 }, { "embedding_loss": 0.0004, "epoch": 3.6779911373707534, "grad_norm": 0.013176560401916504, "learning_rate": 5.876251436074184e-06, "step": 24900 }, { "embedding_loss": 0.0007, "epoch": 3.6853766617429837, "grad_norm": 0.004344393033534288, "learning_rate": 5.843426883308715e-06, "step": 24950 }, { "embedding_loss": 0.0015, "epoch": 3.692762186115214, "grad_norm": 0.00743023632094264, "learning_rate": 5.810602330543247e-06, "step": 25000 }, { "embedding_loss": 0.0008, "epoch": 3.7001477104874447, "grad_norm": 0.01551518589258194, "learning_rate": 5.777777777777778e-06, "step": 25050 }, { "embedding_loss": 0.0014, "epoch": 3.707533234859675, "grad_norm": 0.0053128432482481, "learning_rate": 5.74495322501231e-06, "step": 25100 }, { "embedding_loss": 0.0005, "epoch": 3.7149187592319057, "grad_norm": 0.005089200101792812, "learning_rate": 5.712128672246841e-06, "step": 25150 }, { "embedding_loss": 0.0018, "epoch": 3.722304283604136, "grad_norm": 0.007938201539218426, "learning_rate": 5.679304119481372e-06, "step": 25200 }, { "embedding_loss": 0.0012, "epoch": 3.7296898079763663, "grad_norm": 0.031416155397892, "learning_rate": 5.6464795667159045e-06, "step": 25250 }, { "embedding_loss": 0.0002, "epoch": 3.7370753323485966, "grad_norm": 0.01789075881242752, "learning_rate": 5.613655013950436e-06, "step": 25300 }, { "embedding_loss": 0.0005, "epoch": 3.7444608567208273, "grad_norm": 0.0897989496588707, "learning_rate": 5.580830461184968e-06, "step": 25350 }, { "embedding_loss": 0.0016, "epoch": 3.7518463810930576, "grad_norm": 0.0033649958204478025, "learning_rate": 5.548005908419498e-06, "step": 25400 }, { "embedding_loss": 0.0015, "epoch": 3.7592319054652883, "grad_norm": 0.007789059076458216, "learning_rate": 5.51518135565403e-06, "step": 25450 }, { "embedding_loss": 0.0014, "epoch": 3.7666174298375186, "grad_norm": 0.0069976383820176125, "learning_rate": 5.482356802888561e-06, "step": 25500 }, { "embedding_loss": 0.0008, "epoch": 3.774002954209749, "grad_norm": 0.028319302946329117, "learning_rate": 5.449532250123093e-06, "step": 25550 }, { "embedding_loss": 0.0004, "epoch": 3.781388478581979, "grad_norm": 0.13736043870449066, "learning_rate": 5.416707697357624e-06, "step": 25600 }, { "embedding_loss": 0.0014, "epoch": 3.78877400295421, "grad_norm": 0.0662890300154686, "learning_rate": 5.383883144592156e-06, "step": 25650 }, { "embedding_loss": 0.0018, "epoch": 3.79615952732644, "grad_norm": 0.07620090991258621, "learning_rate": 5.351058591826687e-06, "step": 25700 }, { "embedding_loss": 0.0008, "epoch": 3.803545051698671, "grad_norm": 0.0053595914505422115, "learning_rate": 5.318234039061219e-06, "step": 25750 }, { "embedding_loss": 0.0008, "epoch": 3.810930576070901, "grad_norm": 0.03874294087290764, "learning_rate": 5.285409486295749e-06, "step": 25800 }, { "embedding_loss": 0.0002, "epoch": 3.8183161004431314, "grad_norm": 0.0377751886844635, "learning_rate": 5.252584933530281e-06, "step": 25850 }, { "embedding_loss": 0.0003, "epoch": 3.8257016248153617, "grad_norm": 0.052115991711616516, "learning_rate": 5.2197603807648126e-06, "step": 25900 }, { "embedding_loss": 0.0009, "epoch": 3.8330871491875924, "grad_norm": 0.004992119502276182, "learning_rate": 5.186935827999345e-06, "step": 25950 }, { "embedding_loss": 0.002, "epoch": 3.8404726735598227, "grad_norm": 0.010746672749519348, "learning_rate": 5.154111275233875e-06, "step": 26000 }, { "embedding_loss": 0.0016, "epoch": 3.8478581979320534, "grad_norm": 0.0036030395422130823, "learning_rate": 5.121286722468407e-06, "step": 26050 }, { "embedding_loss": 0.0013, "epoch": 3.8552437223042837, "grad_norm": 0.004183268640190363, "learning_rate": 5.088462169702938e-06, "step": 26100 }, { "embedding_loss": 0.0021, "epoch": 3.862629246676514, "grad_norm": 0.03056243434548378, "learning_rate": 5.05563761693747e-06, "step": 26150 }, { "embedding_loss": 0.0006, "epoch": 3.8700147710487443, "grad_norm": 0.020458584651350975, "learning_rate": 5.022813064172001e-06, "step": 26200 }, { "embedding_loss": 0.0005, "epoch": 3.877400295420975, "grad_norm": 0.014284319244325161, "learning_rate": 4.989988511406533e-06, "step": 26250 }, { "embedding_loss": 0.0019, "epoch": 3.8847858197932053, "grad_norm": 0.004101385362446308, "learning_rate": 4.957163958641064e-06, "step": 26300 }, { "embedding_loss": 0.0017, "epoch": 3.892171344165436, "grad_norm": 0.003685436677187681, "learning_rate": 4.924339405875596e-06, "step": 26350 }, { "embedding_loss": 0.0002, "epoch": 3.8995568685376663, "grad_norm": 0.01675995998084545, "learning_rate": 4.891514853110127e-06, "step": 26400 }, { "embedding_loss": 0.0014, "epoch": 3.9069423929098965, "grad_norm": 0.003458675229921937, "learning_rate": 4.858690300344658e-06, "step": 26450 }, { "embedding_loss": 0.0003, "epoch": 3.914327917282127, "grad_norm": 0.004388707224279642, "learning_rate": 4.8258657475791895e-06, "step": 26500 }, { "embedding_loss": 0.0015, "epoch": 3.9217134416543575, "grad_norm": 0.006350350566208363, "learning_rate": 4.7930411948137215e-06, "step": 26550 }, { "embedding_loss": 0.001, "epoch": 3.929098966026588, "grad_norm": 0.005695797968655825, "learning_rate": 4.760216642048253e-06, "step": 26600 }, { "embedding_loss": 0.0002, "epoch": 3.9364844903988185, "grad_norm": 0.004757806193083525, "learning_rate": 4.727392089282784e-06, "step": 26650 }, { "embedding_loss": 0.0002, "epoch": 3.943870014771049, "grad_norm": 0.0033138145226985216, "learning_rate": 4.694567536517315e-06, "step": 26700 }, { "embedding_loss": 0.0002, "epoch": 3.951255539143279, "grad_norm": 0.003561320947483182, "learning_rate": 4.661742983751847e-06, "step": 26750 }, { "embedding_loss": 0.0006, "epoch": 3.9586410635155094, "grad_norm": 0.01382633950561285, "learning_rate": 4.628918430986378e-06, "step": 26800 }, { "embedding_loss": 0.0003, "epoch": 3.96602658788774, "grad_norm": 0.005202912725508213, "learning_rate": 4.5960938782209095e-06, "step": 26850 }, { "embedding_loss": 0.0016, "epoch": 3.9734121122599704, "grad_norm": 0.012079019099473953, "learning_rate": 4.563269325455441e-06, "step": 26900 }, { "embedding_loss": 0.0008, "epoch": 3.980797636632201, "grad_norm": 0.014114444144070148, "learning_rate": 4.530444772689973e-06, "step": 26950 }, { "embedding_loss": 0.002, "epoch": 3.9881831610044314, "grad_norm": 0.02415064163506031, "learning_rate": 4.497620219924504e-06, "step": 27000 }, { "embedding_loss": 0.0017, "epoch": 3.9955686853766617, "grad_norm": 0.021551288664340973, "learning_rate": 4.464795667159035e-06, "step": 27050 }, { "embedding_loss": 0.0003, "epoch": 4.002954209748892, "grad_norm": 0.005066817160695791, "learning_rate": 4.431971114393566e-06, "step": 27100 }, { "embedding_loss": 0.0012, "epoch": 4.010339734121122, "grad_norm": 0.007141259498894215, "learning_rate": 4.399146561628098e-06, "step": 27150 }, { "embedding_loss": 0.0004, "epoch": 4.017725258493353, "grad_norm": 0.12070070952177048, "learning_rate": 4.36632200886263e-06, "step": 27200 }, { "embedding_loss": 0.0022, "epoch": 4.025110782865584, "grad_norm": 0.0378386452794075, "learning_rate": 4.333497456097161e-06, "step": 27250 }, { "embedding_loss": 0.0015, "epoch": 4.032496307237814, "grad_norm": 0.0050777471624314785, "learning_rate": 4.300672903331692e-06, "step": 27300 }, { "embedding_loss": 0.0004, "epoch": 4.039881831610044, "grad_norm": 0.01844395510852337, "learning_rate": 4.267848350566224e-06, "step": 27350 }, { "embedding_loss": 0.001, "epoch": 4.0472673559822745, "grad_norm": 0.012001128867268562, "learning_rate": 4.235023797800755e-06, "step": 27400 }, { "embedding_loss": 0.0002, "epoch": 4.054652880354505, "grad_norm": 0.0065623316913843155, "learning_rate": 4.2021992450352864e-06, "step": 27450 }, { "embedding_loss": 0.0002, "epoch": 4.062038404726736, "grad_norm": 0.004251908976584673, "learning_rate": 4.1693746922698185e-06, "step": 27500 }, { "embedding_loss": 0.0002, "epoch": 4.069423929098966, "grad_norm": 0.010989518836140633, "learning_rate": 4.13655013950435e-06, "step": 27550 }, { "embedding_loss": 0.0002, "epoch": 4.0768094534711965, "grad_norm": 0.0056010037660598755, "learning_rate": 4.103725586738881e-06, "step": 27600 }, { "embedding_loss": 0.0009, "epoch": 4.084194977843427, "grad_norm": 0.010540075600147247, "learning_rate": 4.070901033973412e-06, "step": 27650 }, { "embedding_loss": 0.0016, "epoch": 4.091580502215657, "grad_norm": 0.01558383833616972, "learning_rate": 4.038076481207944e-06, "step": 27700 }, { "embedding_loss": 0.0017, "epoch": 4.098966026587887, "grad_norm": 0.0061827609315514565, "learning_rate": 4.005251928442475e-06, "step": 27750 }, { "embedding_loss": 0.0016, "epoch": 4.1063515509601185, "grad_norm": 0.016165059059858322, "learning_rate": 3.9724273756770065e-06, "step": 27800 }, { "embedding_loss": 0.0008, "epoch": 4.113737075332349, "grad_norm": 0.013678347691893578, "learning_rate": 3.939602822911538e-06, "step": 27850 }, { "embedding_loss": 0.0007, "epoch": 4.121122599704579, "grad_norm": 0.002744109369814396, "learning_rate": 3.90677827014607e-06, "step": 27900 }, { "embedding_loss": 0.0002, "epoch": 4.128508124076809, "grad_norm": 0.002654843032360077, "learning_rate": 3.873953717380601e-06, "step": 27950 }, { "embedding_loss": 0.0003, "epoch": 4.13589364844904, "grad_norm": 0.031312067061662674, "learning_rate": 3.841129164615132e-06, "step": 28000 }, { "embedding_loss": 0.0007, "epoch": 4.14327917282127, "grad_norm": 0.002234194427728653, "learning_rate": 3.8083046118496638e-06, "step": 28050 }, { "embedding_loss": 0.002, "epoch": 4.150664697193501, "grad_norm": 0.005080920644104481, "learning_rate": 3.775480059084195e-06, "step": 28100 }, { "embedding_loss": 0.0013, "epoch": 4.158050221565731, "grad_norm": 0.00728574488312006, "learning_rate": 3.742655506318727e-06, "step": 28150 }, { "embedding_loss": 0.0003, "epoch": 4.165435745937962, "grad_norm": 0.0030798488296568394, "learning_rate": 3.709830953553258e-06, "step": 28200 }, { "embedding_loss": 0.0001, "epoch": 4.172821270310192, "grad_norm": 0.0029522618278861046, "learning_rate": 3.67700640078779e-06, "step": 28250 }, { "embedding_loss": 0.0008, "epoch": 4.180206794682422, "grad_norm": 0.04743621125817299, "learning_rate": 3.6441818480223214e-06, "step": 28300 }, { "embedding_loss": 0.0019, "epoch": 4.1875923190546525, "grad_norm": 0.021583393216133118, "learning_rate": 3.6113572952568526e-06, "step": 28350 }, { "embedding_loss": 0.0017, "epoch": 4.194977843426884, "grad_norm": 0.004344303160905838, "learning_rate": 3.5785327424913842e-06, "step": 28400 }, { "embedding_loss": 0.0007, "epoch": 4.202363367799114, "grad_norm": 0.05794514715671539, "learning_rate": 3.5457081897259154e-06, "step": 28450 }, { "embedding_loss": 0.0021, "epoch": 4.209748892171344, "grad_norm": 0.03878411650657654, "learning_rate": 3.512883636960447e-06, "step": 28500 }, { "embedding_loss": 0.0005, "epoch": 4.2171344165435745, "grad_norm": 0.024656204506754875, "learning_rate": 3.4800590841949783e-06, "step": 28550 }, { "embedding_loss": 0.0009, "epoch": 4.224519940915805, "grad_norm": 0.07271004468202591, "learning_rate": 3.44723453142951e-06, "step": 28600 }, { "embedding_loss": 0.0019, "epoch": 4.231905465288035, "grad_norm": 0.028899872675538063, "learning_rate": 3.414409978664041e-06, "step": 28650 }, { "embedding_loss": 0.0006, "epoch": 4.239290989660266, "grad_norm": 0.05092883110046387, "learning_rate": 3.3815854258985727e-06, "step": 28700 }, { "embedding_loss": 0.0011, "epoch": 4.2466765140324965, "grad_norm": 0.0032700442243367434, "learning_rate": 3.348760873133104e-06, "step": 28750 }, { "embedding_loss": 0.0005, "epoch": 4.254062038404727, "grad_norm": 0.008138212375342846, "learning_rate": 3.3159363203676355e-06, "step": 28800 }, { "embedding_loss": 0.0008, "epoch": 4.261447562776957, "grad_norm": 0.004710312932729721, "learning_rate": 3.2831117676021667e-06, "step": 28850 }, { "embedding_loss": 0.0006, "epoch": 4.268833087149187, "grad_norm": 0.006768395192921162, "learning_rate": 3.2502872148366983e-06, "step": 28900 }, { "embedding_loss": 0.0006, "epoch": 4.2762186115214185, "grad_norm": 3.97033429145813, "learning_rate": 3.2174626620712295e-06, "step": 28950 }, { "embedding_loss": 0.0008, "epoch": 4.283604135893649, "grad_norm": 0.0024879788979887962, "learning_rate": 3.184638109305761e-06, "step": 29000 }, { "embedding_loss": 0.0014, "epoch": 4.290989660265879, "grad_norm": 0.004705480299890041, "learning_rate": 3.1518135565402923e-06, "step": 29050 }, { "embedding_loss": 0.0003, "epoch": 4.298375184638109, "grad_norm": 0.0038461387157440186, "learning_rate": 3.118989003774824e-06, "step": 29100 }, { "embedding_loss": 0.0002, "epoch": 4.30576070901034, "grad_norm": 0.0027929339557886124, "learning_rate": 3.086164451009355e-06, "step": 29150 }, { "embedding_loss": 0.0009, "epoch": 4.31314623338257, "grad_norm": 0.022274106740951538, "learning_rate": 3.0533398982438868e-06, "step": 29200 }, { "embedding_loss": 0.0001, "epoch": 4.3205317577548, "grad_norm": 0.001826609717682004, "learning_rate": 3.020515345478418e-06, "step": 29250 }, { "embedding_loss": 0.0002, "epoch": 4.327917282127031, "grad_norm": 0.026185447350144386, "learning_rate": 2.9876907927129496e-06, "step": 29300 }, { "embedding_loss": 0.0008, "epoch": 4.335302806499262, "grad_norm": 0.8461505770683289, "learning_rate": 2.954866239947481e-06, "step": 29350 }, { "embedding_loss": 0.0003, "epoch": 4.342688330871492, "grad_norm": 0.0049928221851587296, "learning_rate": 2.9220416871820124e-06, "step": 29400 }, { "embedding_loss": 0.0009, "epoch": 4.350073855243722, "grad_norm": 0.0039035649970173836, "learning_rate": 2.8892171344165436e-06, "step": 29450 }, { "embedding_loss": 0.0008, "epoch": 4.357459379615952, "grad_norm": 0.003410862758755684, "learning_rate": 2.8563925816510752e-06, "step": 29500 }, { "embedding_loss": 0.0009, "epoch": 4.364844903988184, "grad_norm": 0.007184627000242472, "learning_rate": 2.8235680288856064e-06, "step": 29550 }, { "embedding_loss": 0.0012, "epoch": 4.372230428360414, "grad_norm": 0.005408278200775385, "learning_rate": 2.790743476120138e-06, "step": 29600 }, { "embedding_loss": 0.0004, "epoch": 4.379615952732644, "grad_norm": 0.005216268356889486, "learning_rate": 2.7579189233546692e-06, "step": 29650 }, { "embedding_loss": 0.0015, "epoch": 4.387001477104874, "grad_norm": 0.12092409282922745, "learning_rate": 2.725094370589201e-06, "step": 29700 }, { "embedding_loss": 0.0011, "epoch": 4.394387001477105, "grad_norm": 2.360546112060547, "learning_rate": 2.692269817823732e-06, "step": 29750 }, { "embedding_loss": 0.0003, "epoch": 4.401772525849335, "grad_norm": 0.04117804393172264, "learning_rate": 2.6594452650582637e-06, "step": 29800 }, { "embedding_loss": 0.0014, "epoch": 4.409158050221565, "grad_norm": 0.31080320477485657, "learning_rate": 2.626620712292795e-06, "step": 29850 }, { "embedding_loss": 0.0001, "epoch": 4.416543574593796, "grad_norm": 0.0030854118522256613, "learning_rate": 2.5937961595273265e-06, "step": 29900 }, { "embedding_loss": 0.001, "epoch": 4.423929098966027, "grad_norm": 0.0051147108897566795, "learning_rate": 2.5609716067618577e-06, "step": 29950 }, { "embedding_loss": 0.0003, "epoch": 4.431314623338257, "grad_norm": 0.002233837265521288, "learning_rate": 2.5281470539963893e-06, "step": 30000 }, { "embedding_loss": 0.0003, "epoch": 4.438700147710487, "grad_norm": 0.002284417860209942, "learning_rate": 2.495322501230921e-06, "step": 30050 }, { "embedding_loss": 0.0008, "epoch": 4.4460856720827175, "grad_norm": 0.03488105162978172, "learning_rate": 2.4624979484654525e-06, "step": 30100 }, { "embedding_loss": 0.0008, "epoch": 4.453471196454949, "grad_norm": 0.011509645730257034, "learning_rate": 2.4296733956999837e-06, "step": 30150 }, { "embedding_loss": 0.0002, "epoch": 4.460856720827179, "grad_norm": 0.0027210384141653776, "learning_rate": 2.3968488429345154e-06, "step": 30200 }, { "embedding_loss": 0.0002, "epoch": 4.468242245199409, "grad_norm": 0.013157092034816742, "learning_rate": 2.3640242901690466e-06, "step": 30250 }, { "embedding_loss": 0.0001, "epoch": 4.4756277695716395, "grad_norm": 0.004714665934443474, "learning_rate": 2.331199737403578e-06, "step": 30300 }, { "embedding_loss": 0.0007, "epoch": 4.48301329394387, "grad_norm": 0.011190270073711872, "learning_rate": 2.2983751846381094e-06, "step": 30350 }, { "embedding_loss": 0.0001, "epoch": 4.4903988183161, "grad_norm": 0.00462683429941535, "learning_rate": 2.265550631872641e-06, "step": 30400 }, { "embedding_loss": 0.0001, "epoch": 4.497784342688331, "grad_norm": 0.003459771629422903, "learning_rate": 2.232726079107172e-06, "step": 30450 }, { "embedding_loss": 0.0003, "epoch": 4.5051698670605616, "grad_norm": 0.04073004424571991, "learning_rate": 2.199901526341704e-06, "step": 30500 }, { "embedding_loss": 0.0002, "epoch": 4.512555391432792, "grad_norm": 0.005062537267804146, "learning_rate": 2.167076973576235e-06, "step": 30550 }, { "embedding_loss": 0.0001, "epoch": 4.519940915805022, "grad_norm": 0.007456169463694096, "learning_rate": 2.1342524208107666e-06, "step": 30600 }, { "embedding_loss": 0.0014, "epoch": 4.527326440177252, "grad_norm": 0.021971579641103745, "learning_rate": 2.1014278680452982e-06, "step": 30650 }, { "embedding_loss": 0.0003, "epoch": 4.534711964549483, "grad_norm": 0.006037925370037556, "learning_rate": 2.0686033152798294e-06, "step": 30700 }, { "embedding_loss": 0.0007, "epoch": 4.542097488921714, "grad_norm": 0.020268207415938377, "learning_rate": 2.035778762514361e-06, "step": 30750 }, { "embedding_loss": 0.0009, "epoch": 4.549483013293944, "grad_norm": 0.003740853862836957, "learning_rate": 2.0029542097488923e-06, "step": 30800 }, { "embedding_loss": 0.0001, "epoch": 4.556868537666174, "grad_norm": 0.006321648135781288, "learning_rate": 1.970129656983424e-06, "step": 30850 }, { "embedding_loss": 0.001, "epoch": 4.564254062038405, "grad_norm": 0.049009013921022415, "learning_rate": 1.937305104217955e-06, "step": 30900 }, { "embedding_loss": 0.0001, "epoch": 4.571639586410635, "grad_norm": 0.006071667652577162, "learning_rate": 1.9044805514524867e-06, "step": 30950 }, { "embedding_loss": 0.0005, "epoch": 4.579025110782865, "grad_norm": 0.025013990700244904, "learning_rate": 1.8716559986870181e-06, "step": 31000 }, { "embedding_loss": 0.0003, "epoch": 4.586410635155096, "grad_norm": 0.0030903525184839964, "learning_rate": 1.8388314459215495e-06, "step": 31050 }, { "embedding_loss": 0.0001, "epoch": 4.593796159527327, "grad_norm": 0.006547342520207167, "learning_rate": 1.806006893156081e-06, "step": 31100 }, { "embedding_loss": 0.0007, "epoch": 4.601181683899557, "grad_norm": 0.0045944456942379475, "learning_rate": 1.7731823403906123e-06, "step": 31150 }, { "embedding_loss": 0.0003, "epoch": 4.608567208271787, "grad_norm": 0.006300546228885651, "learning_rate": 1.7403577876251437e-06, "step": 31200 }, { "embedding_loss": 0.0009, "epoch": 4.6159527326440175, "grad_norm": 0.017787380144000053, "learning_rate": 1.7075332348596751e-06, "step": 31250 }, { "embedding_loss": 0.0003, "epoch": 4.623338257016248, "grad_norm": 0.006018889602273703, "learning_rate": 1.6747086820942066e-06, "step": 31300 }, { "embedding_loss": 0.0003, "epoch": 4.630723781388479, "grad_norm": 0.03421813249588013, "learning_rate": 1.641884129328738e-06, "step": 31350 }, { "embedding_loss": 0.0001, "epoch": 4.638109305760709, "grad_norm": 0.002777187153697014, "learning_rate": 1.6090595765632694e-06, "step": 31400 }, { "embedding_loss": 0.0007, "epoch": 4.6454948301329395, "grad_norm": 0.013371906243264675, "learning_rate": 1.5762350237978008e-06, "step": 31450 }, { "embedding_loss": 0.0014, "epoch": 4.65288035450517, "grad_norm": 0.01614244654774666, "learning_rate": 1.5434104710323322e-06, "step": 31500 }, { "embedding_loss": 0.0008, "epoch": 4.6602658788774, "grad_norm": 0.014399105682969093, "learning_rate": 1.5105859182668636e-06, "step": 31550 }, { "embedding_loss": 0.0003, "epoch": 4.66765140324963, "grad_norm": 0.007542195729911327, "learning_rate": 1.477761365501395e-06, "step": 31600 }, { "embedding_loss": 0.001, "epoch": 4.6750369276218615, "grad_norm": 0.0031674772035330534, "learning_rate": 1.4449368127359264e-06, "step": 31650 }, { "embedding_loss": 0.0013, "epoch": 4.682422451994092, "grad_norm": 0.012060785666108131, "learning_rate": 1.4121122599704582e-06, "step": 31700 }, { "embedding_loss": 0.0015, "epoch": 4.689807976366322, "grad_norm": 0.04494306072592735, "learning_rate": 1.3792877072049897e-06, "step": 31750 }, { "embedding_loss": 0.0017, "epoch": 4.697193500738552, "grad_norm": 0.009011705406010151, "learning_rate": 1.346463154439521e-06, "step": 31800 }, { "embedding_loss": 0.0002, "epoch": 4.704579025110783, "grad_norm": 0.002131384564563632, "learning_rate": 1.3136386016740525e-06, "step": 31850 }, { "embedding_loss": 0.0014, "epoch": 4.711964549483013, "grad_norm": 0.003283638972789049, "learning_rate": 1.2808140489085839e-06, "step": 31900 }, { "embedding_loss": 0.0003, "epoch": 4.719350073855244, "grad_norm": 0.0024919663555920124, "learning_rate": 1.2479894961431153e-06, "step": 31950 }, { "embedding_loss": 0.0002, "epoch": 4.726735598227474, "grad_norm": 0.14705249667167664, "learning_rate": 1.2151649433776467e-06, "step": 32000 }, { "embedding_loss": 0.0009, "epoch": 4.734121122599705, "grad_norm": 0.0017327765235677361, "learning_rate": 1.182340390612178e-06, "step": 32050 }, { "embedding_loss": 0.0008, "epoch": 4.741506646971935, "grad_norm": 0.8608806729316711, "learning_rate": 1.1495158378467095e-06, "step": 32100 }, { "embedding_loss": 0.0011, "epoch": 4.748892171344165, "grad_norm": 0.019515294581651688, "learning_rate": 1.116691285081241e-06, "step": 32150 }, { "embedding_loss": 0.0002, "epoch": 4.7562776957163955, "grad_norm": 0.010132347233593464, "learning_rate": 1.0838667323157723e-06, "step": 32200 }, { "embedding_loss": 0.0002, "epoch": 4.763663220088627, "grad_norm": 0.010688789188861847, "learning_rate": 1.0510421795503037e-06, "step": 32250 }, { "embedding_loss": 0.0004, "epoch": 4.771048744460857, "grad_norm": 0.004127997439354658, "learning_rate": 1.0182176267848351e-06, "step": 32300 }, { "embedding_loss": 0.0011, "epoch": 4.778434268833087, "grad_norm": 0.0045281765051186085, "learning_rate": 9.853930740193666e-07, "step": 32350 }, { "embedding_loss": 0.0009, "epoch": 4.7858197932053175, "grad_norm": 0.03462732210755348, "learning_rate": 9.52568521253898e-07, "step": 32400 }, { "embedding_loss": 0.0002, "epoch": 4.793205317577548, "grad_norm": 0.011012708768248558, "learning_rate": 9.197439684884295e-07, "step": 32450 }, { "embedding_loss": 0.0017, "epoch": 4.800590841949779, "grad_norm": 0.002184939570724964, "learning_rate": 8.869194157229609e-07, "step": 32500 }, { "embedding_loss": 0.0003, "epoch": 4.807976366322009, "grad_norm": 0.0029540294781327248, "learning_rate": 8.540948629574923e-07, "step": 32550 }, { "embedding_loss": 0.0009, "epoch": 4.8153618906942395, "grad_norm": 0.0017814520979300141, "learning_rate": 8.212703101920237e-07, "step": 32600 }, { "embedding_loss": 0.0007, "epoch": 4.82274741506647, "grad_norm": 0.008224143646657467, "learning_rate": 7.884457574265551e-07, "step": 32650 }, { "embedding_loss": 0.0013, "epoch": 4.8301329394387, "grad_norm": 0.0046016438864171505, "learning_rate": 7.556212046610865e-07, "step": 32700 }, { "embedding_loss": 0.0007, "epoch": 4.83751846381093, "grad_norm": 0.011043339967727661, "learning_rate": 7.227966518956179e-07, "step": 32750 }, { "embedding_loss": 0.0002, "epoch": 4.844903988183161, "grad_norm": 0.010177507996559143, "learning_rate": 6.899720991301493e-07, "step": 32800 }, { "embedding_loss": 0.0004, "epoch": 4.852289512555392, "grad_norm": 0.006154090631753206, "learning_rate": 6.571475463646807e-07, "step": 32850 }, { "embedding_loss": 0.0002, "epoch": 4.859675036927622, "grad_norm": 0.0051424214616417885, "learning_rate": 6.243229935992123e-07, "step": 32900 }, { "embedding_loss": 0.0002, "epoch": 4.867060561299852, "grad_norm": 0.045284390449523926, "learning_rate": 5.914984408337437e-07, "step": 32950 }, { "embedding_loss": 0.0001, "epoch": 4.874446085672083, "grad_norm": 0.006224027369171381, "learning_rate": 5.586738880682751e-07, "step": 33000 }, { "embedding_loss": 0.0005, "epoch": 4.881831610044313, "grad_norm": 0.06909705698490143, "learning_rate": 5.258493353028066e-07, "step": 33050 }, { "embedding_loss": 0.0011, "epoch": 4.889217134416544, "grad_norm": 0.007023925427347422, "learning_rate": 4.93024782537338e-07, "step": 33100 }, { "embedding_loss": 0.0008, "epoch": 4.896602658788774, "grad_norm": 0.005172157660126686, "learning_rate": 4.602002297718694e-07, "step": 33150 }, { "embedding_loss": 0.001, "epoch": 4.903988183161005, "grad_norm": 0.012793969362974167, "learning_rate": 4.273756770064008e-07, "step": 33200 }, { "embedding_loss": 0.001, "epoch": 4.911373707533235, "grad_norm": 0.0392751581966877, "learning_rate": 3.945511242409322e-07, "step": 33250 }, { "embedding_loss": 0.0012, "epoch": 4.918759231905465, "grad_norm": 0.007948558777570724, "learning_rate": 3.6172657147546373e-07, "step": 33300 }, { "embedding_loss": 0.0003, "epoch": 4.926144756277695, "grad_norm": 0.0075578768737614155, "learning_rate": 3.2890201870999514e-07, "step": 33350 }, { "embedding_loss": 0.0002, "epoch": 4.933530280649926, "grad_norm": 0.007234030868858099, "learning_rate": 2.9607746594452655e-07, "step": 33400 }, { "embedding_loss": 0.0014, "epoch": 4.940915805022157, "grad_norm": 0.01585334725677967, "learning_rate": 2.6325291317905796e-07, "step": 33450 }, { "embedding_loss": 0.0001, "epoch": 4.948301329394387, "grad_norm": 0.005034049041569233, "learning_rate": 2.304283604135894e-07, "step": 33500 }, { "embedding_loss": 0.0007, "epoch": 4.955686853766617, "grad_norm": 0.0041028158739209175, "learning_rate": 1.976038076481208e-07, "step": 33550 }, { "embedding_loss": 0.0007, "epoch": 4.963072378138848, "grad_norm": 0.006748030427843332, "learning_rate": 1.647792548826522e-07, "step": 33600 }, { "embedding_loss": 0.0014, "epoch": 4.970457902511078, "grad_norm": 0.004400690086185932, "learning_rate": 1.3195470211718367e-07, "step": 33650 }, { "embedding_loss": 0.0003, "epoch": 4.977843426883309, "grad_norm": 0.2393534779548645, "learning_rate": 9.913014935171508e-08, "step": 33700 }, { "embedding_loss": 0.0002, "epoch": 4.985228951255539, "grad_norm": 0.017467621713876724, "learning_rate": 6.630559658624653e-08, "step": 33750 }, { "embedding_loss": 0.002, "epoch": 4.99261447562777, "grad_norm": 0.006581551861017942, "learning_rate": 3.348104382077794e-08, "step": 33800 }, { "embedding_loss": 0.0007, "epoch": 5.0, "grad_norm": 0.007424783427268267, "learning_rate": 6.564910553093716e-10, "step": 33850 } ], "logging_steps": 50, "max_steps": 33850, "num_input_tokens_seen": 0, "num_train_epochs": 5, "save_steps": 500, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 0.0, "train_batch_size": 32, "trial_name": null, "trial_params": null }