NLBSE2026-java / checkpoint-33850 /trainer_state.json
ThomBors's picture
Upload folder using huggingface_hub
fbdfb21 verified
{
"best_global_step": null,
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 5.0,
"eval_steps": 500,
"global_step": 33850,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"embedding_loss": 0.2247,
"epoch": 0.00014771048744460856,
"grad_norm": 1.4463119506835938,
"learning_rate": 0.0,
"step": 1
},
{
"embedding_loss": 0.2914,
"epoch": 0.007385524372230428,
"grad_norm": 1.524917721748352,
"learning_rate": 2.8951255539143283e-07,
"step": 50
},
{
"embedding_loss": 0.2746,
"epoch": 0.014771048744460856,
"grad_norm": 1.1199702024459839,
"learning_rate": 5.849335302806499e-07,
"step": 100
},
{
"embedding_loss": 0.2579,
"epoch": 0.022156573116691284,
"grad_norm": 1.1694086790084839,
"learning_rate": 8.803545051698672e-07,
"step": 150
},
{
"embedding_loss": 0.2499,
"epoch": 0.029542097488921712,
"grad_norm": 0.9039924144744873,
"learning_rate": 1.1757754800590842e-06,
"step": 200
},
{
"embedding_loss": 0.2386,
"epoch": 0.03692762186115214,
"grad_norm": 1.048995018005371,
"learning_rate": 1.4711964549483015e-06,
"step": 250
},
{
"embedding_loss": 0.2269,
"epoch": 0.04431314623338257,
"grad_norm": 1.1124966144561768,
"learning_rate": 1.7666174298375186e-06,
"step": 300
},
{
"embedding_loss": 0.2171,
"epoch": 0.051698670605613,
"grad_norm": 1.0527081489562988,
"learning_rate": 2.062038404726736e-06,
"step": 350
},
{
"embedding_loss": 0.1999,
"epoch": 0.059084194977843424,
"grad_norm": 1.5473452806472778,
"learning_rate": 2.3574593796159526e-06,
"step": 400
},
{
"embedding_loss": 0.1787,
"epoch": 0.06646971935007386,
"grad_norm": 1.0279266834259033,
"learning_rate": 2.65288035450517e-06,
"step": 450
},
{
"embedding_loss": 0.1647,
"epoch": 0.07385524372230429,
"grad_norm": 1.260780930519104,
"learning_rate": 2.9483013293943873e-06,
"step": 500
},
{
"embedding_loss": 0.1581,
"epoch": 0.08124076809453472,
"grad_norm": 0.8986091017723083,
"learning_rate": 3.243722304283604e-06,
"step": 550
},
{
"embedding_loss": 0.1531,
"epoch": 0.08862629246676514,
"grad_norm": 1.3418588638305664,
"learning_rate": 3.5391432791728215e-06,
"step": 600
},
{
"embedding_loss": 0.1475,
"epoch": 0.09601181683899557,
"grad_norm": 0.9869722723960876,
"learning_rate": 3.834564254062039e-06,
"step": 650
},
{
"embedding_loss": 0.1375,
"epoch": 0.103397341211226,
"grad_norm": 1.1203314065933228,
"learning_rate": 4.129985228951256e-06,
"step": 700
},
{
"embedding_loss": 0.1274,
"epoch": 0.11078286558345643,
"grad_norm": 0.8669000267982483,
"learning_rate": 4.425406203840473e-06,
"step": 750
},
{
"embedding_loss": 0.1312,
"epoch": 0.11816838995568685,
"grad_norm": 1.493843913078308,
"learning_rate": 4.72082717872969e-06,
"step": 800
},
{
"embedding_loss": 0.1228,
"epoch": 0.1255539143279173,
"grad_norm": 0.9798605442047119,
"learning_rate": 5.0162481536189075e-06,
"step": 850
},
{
"embedding_loss": 0.118,
"epoch": 0.1329394387001477,
"grad_norm": 1.3890421390533447,
"learning_rate": 5.311669128508124e-06,
"step": 900
},
{
"embedding_loss": 0.1117,
"epoch": 0.14032496307237813,
"grad_norm": 1.3494268655776978,
"learning_rate": 5.607090103397341e-06,
"step": 950
},
{
"embedding_loss": 0.1108,
"epoch": 0.14771048744460857,
"grad_norm": 0.9772982001304626,
"learning_rate": 5.902511078286559e-06,
"step": 1000
},
{
"embedding_loss": 0.0941,
"epoch": 0.155096011816839,
"grad_norm": 0.8814387917518616,
"learning_rate": 6.197932053175776e-06,
"step": 1050
},
{
"embedding_loss": 0.0917,
"epoch": 0.16248153618906944,
"grad_norm": 1.0253106355667114,
"learning_rate": 6.4933530280649935e-06,
"step": 1100
},
{
"embedding_loss": 0.0961,
"epoch": 0.16986706056129985,
"grad_norm": 0.9646548628807068,
"learning_rate": 6.78877400295421e-06,
"step": 1150
},
{
"embedding_loss": 0.0896,
"epoch": 0.17725258493353027,
"grad_norm": 0.9323801398277283,
"learning_rate": 7.084194977843427e-06,
"step": 1200
},
{
"embedding_loss": 0.092,
"epoch": 0.18463810930576072,
"grad_norm": 1.0876870155334473,
"learning_rate": 7.379615952732645e-06,
"step": 1250
},
{
"embedding_loss": 0.0895,
"epoch": 0.19202363367799113,
"grad_norm": 1.2848527431488037,
"learning_rate": 7.675036927621861e-06,
"step": 1300
},
{
"embedding_loss": 0.0823,
"epoch": 0.19940915805022155,
"grad_norm": 1.4480323791503906,
"learning_rate": 7.970457902511078e-06,
"step": 1350
},
{
"embedding_loss": 0.0809,
"epoch": 0.206794682422452,
"grad_norm": 1.5744627714157104,
"learning_rate": 8.265878877400296e-06,
"step": 1400
},
{
"embedding_loss": 0.0766,
"epoch": 0.21418020679468242,
"grad_norm": 1.5040708780288696,
"learning_rate": 8.561299852289513e-06,
"step": 1450
},
{
"embedding_loss": 0.0733,
"epoch": 0.22156573116691286,
"grad_norm": 1.0043632984161377,
"learning_rate": 8.856720827178731e-06,
"step": 1500
},
{
"embedding_loss": 0.0778,
"epoch": 0.22895125553914328,
"grad_norm": 1.5973531007766724,
"learning_rate": 9.152141802067948e-06,
"step": 1550
},
{
"embedding_loss": 0.0715,
"epoch": 0.2363367799113737,
"grad_norm": 1.5385518074035645,
"learning_rate": 9.447562776957165e-06,
"step": 1600
},
{
"embedding_loss": 0.0701,
"epoch": 0.24372230428360414,
"grad_norm": 1.069740653038025,
"learning_rate": 9.742983751846381e-06,
"step": 1650
},
{
"embedding_loss": 0.0664,
"epoch": 0.2511078286558346,
"grad_norm": 0.7774745225906372,
"learning_rate": 1.00384047267356e-05,
"step": 1700
},
{
"embedding_loss": 0.0645,
"epoch": 0.258493353028065,
"grad_norm": 0.9332543015480042,
"learning_rate": 1.0333825701624816e-05,
"step": 1750
},
{
"embedding_loss": 0.061,
"epoch": 0.2658788774002954,
"grad_norm": 1.161795973777771,
"learning_rate": 1.0629246676514033e-05,
"step": 1800
},
{
"embedding_loss": 0.0625,
"epoch": 0.27326440177252587,
"grad_norm": 0.773992657661438,
"learning_rate": 1.0924667651403251e-05,
"step": 1850
},
{
"embedding_loss": 0.054,
"epoch": 0.28064992614475626,
"grad_norm": 0.8391284346580505,
"learning_rate": 1.1220088626292466e-05,
"step": 1900
},
{
"embedding_loss": 0.0612,
"epoch": 0.2880354505169867,
"grad_norm": 0.6210933923721313,
"learning_rate": 1.1515509601181685e-05,
"step": 1950
},
{
"embedding_loss": 0.0579,
"epoch": 0.29542097488921715,
"grad_norm": 0.8674732446670532,
"learning_rate": 1.1810930576070903e-05,
"step": 2000
},
{
"embedding_loss": 0.0566,
"epoch": 0.30280649926144754,
"grad_norm": 1.716627597808838,
"learning_rate": 1.2106351550960118e-05,
"step": 2050
},
{
"embedding_loss": 0.0495,
"epoch": 0.310192023633678,
"grad_norm": 1.162758231163025,
"learning_rate": 1.2401772525849337e-05,
"step": 2100
},
{
"embedding_loss": 0.0514,
"epoch": 0.3175775480059084,
"grad_norm": 1.5488636493682861,
"learning_rate": 1.2697193500738553e-05,
"step": 2150
},
{
"embedding_loss": 0.0478,
"epoch": 0.3249630723781389,
"grad_norm": 1.5231482982635498,
"learning_rate": 1.2992614475627772e-05,
"step": 2200
},
{
"embedding_loss": 0.0484,
"epoch": 0.33234859675036926,
"grad_norm": 1.966753602027893,
"learning_rate": 1.3288035450516987e-05,
"step": 2250
},
{
"embedding_loss": 0.0547,
"epoch": 0.3397341211225997,
"grad_norm": 2.125790596008301,
"learning_rate": 1.3583456425406205e-05,
"step": 2300
},
{
"embedding_loss": 0.0466,
"epoch": 0.34711964549483015,
"grad_norm": 1.8197243213653564,
"learning_rate": 1.3878877400295423e-05,
"step": 2350
},
{
"embedding_loss": 0.0454,
"epoch": 0.35450516986706054,
"grad_norm": 0.8179060816764832,
"learning_rate": 1.4174298375184638e-05,
"step": 2400
},
{
"embedding_loss": 0.041,
"epoch": 0.361890694239291,
"grad_norm": 0.3561592400074005,
"learning_rate": 1.4469719350073857e-05,
"step": 2450
},
{
"embedding_loss": 0.0395,
"epoch": 0.36927621861152143,
"grad_norm": 0.40876850485801697,
"learning_rate": 1.4765140324963074e-05,
"step": 2500
},
{
"embedding_loss": 0.0398,
"epoch": 0.3766617429837518,
"grad_norm": 1.050619125366211,
"learning_rate": 1.506056129985229e-05,
"step": 2550
},
{
"embedding_loss": 0.0415,
"epoch": 0.38404726735598227,
"grad_norm": 0.24330730736255646,
"learning_rate": 1.5355982274741507e-05,
"step": 2600
},
{
"embedding_loss": 0.0367,
"epoch": 0.3914327917282127,
"grad_norm": 2.6866581439971924,
"learning_rate": 1.5651403249630725e-05,
"step": 2650
},
{
"embedding_loss": 0.0331,
"epoch": 0.3988183161004431,
"grad_norm": 0.7530401945114136,
"learning_rate": 1.594682422451994e-05,
"step": 2700
},
{
"embedding_loss": 0.0399,
"epoch": 0.40620384047267355,
"grad_norm": 0.4778743386268616,
"learning_rate": 1.624224519940916e-05,
"step": 2750
},
{
"embedding_loss": 0.0342,
"epoch": 0.413589364844904,
"grad_norm": 0.8823883533477783,
"learning_rate": 1.6537666174298377e-05,
"step": 2800
},
{
"embedding_loss": 0.0356,
"epoch": 0.42097488921713444,
"grad_norm": 2.0318665504455566,
"learning_rate": 1.6833087149187595e-05,
"step": 2850
},
{
"embedding_loss": 0.0346,
"epoch": 0.42836041358936483,
"grad_norm": 0.46766990423202515,
"learning_rate": 1.712850812407681e-05,
"step": 2900
},
{
"embedding_loss": 0.0326,
"epoch": 0.4357459379615953,
"grad_norm": 1.899274468421936,
"learning_rate": 1.742392909896603e-05,
"step": 2950
},
{
"embedding_loss": 0.0301,
"epoch": 0.4431314623338257,
"grad_norm": 0.4528331458568573,
"learning_rate": 1.7719350073855247e-05,
"step": 3000
},
{
"embedding_loss": 0.0297,
"epoch": 0.4505169867060561,
"grad_norm": 1.74443519115448,
"learning_rate": 1.8014771048744462e-05,
"step": 3050
},
{
"embedding_loss": 0.0318,
"epoch": 0.45790251107828656,
"grad_norm": 0.41255202889442444,
"learning_rate": 1.831019202363368e-05,
"step": 3100
},
{
"embedding_loss": 0.0288,
"epoch": 0.465288035450517,
"grad_norm": 0.7493127584457397,
"learning_rate": 1.8605612998522896e-05,
"step": 3150
},
{
"embedding_loss": 0.0324,
"epoch": 0.4726735598227474,
"grad_norm": 0.12168914079666138,
"learning_rate": 1.8901033973412114e-05,
"step": 3200
},
{
"embedding_loss": 0.024,
"epoch": 0.48005908419497784,
"grad_norm": 1.5052778720855713,
"learning_rate": 1.9196454948301332e-05,
"step": 3250
},
{
"embedding_loss": 0.0299,
"epoch": 0.4874446085672083,
"grad_norm": 0.22781763970851898,
"learning_rate": 1.9491875923190547e-05,
"step": 3300
},
{
"embedding_loss": 0.0315,
"epoch": 0.4948301329394387,
"grad_norm": 0.7878602147102356,
"learning_rate": 1.9787296898079766e-05,
"step": 3350
},
{
"embedding_loss": 0.0267,
"epoch": 0.5022156573116692,
"grad_norm": 0.823674738407135,
"learning_rate": 1.999080912522567e-05,
"step": 3400
},
{
"embedding_loss": 0.0268,
"epoch": 0.5096011816838996,
"grad_norm": 0.6394932866096497,
"learning_rate": 1.99579845724602e-05,
"step": 3450
},
{
"embedding_loss": 0.0231,
"epoch": 0.51698670605613,
"grad_norm": 0.6224627494812012,
"learning_rate": 1.9925160019694733e-05,
"step": 3500
},
{
"embedding_loss": 0.0257,
"epoch": 0.5243722304283605,
"grad_norm": 2.495439291000366,
"learning_rate": 1.9892335466929265e-05,
"step": 3550
},
{
"embedding_loss": 0.023,
"epoch": 0.5317577548005908,
"grad_norm": 2.96049165725708,
"learning_rate": 1.9859510914163794e-05,
"step": 3600
},
{
"embedding_loss": 0.0222,
"epoch": 0.5391432791728212,
"grad_norm": 0.24069173634052277,
"learning_rate": 1.982668636139833e-05,
"step": 3650
},
{
"embedding_loss": 0.0244,
"epoch": 0.5465288035450517,
"grad_norm": 2.0120162963867188,
"learning_rate": 1.9793861808632858e-05,
"step": 3700
},
{
"embedding_loss": 0.0218,
"epoch": 0.5539143279172821,
"grad_norm": 0.8145495653152466,
"learning_rate": 1.976103725586739e-05,
"step": 3750
},
{
"embedding_loss": 0.0267,
"epoch": 0.5612998522895125,
"grad_norm": 0.40902212262153625,
"learning_rate": 1.9728212703101922e-05,
"step": 3800
},
{
"embedding_loss": 0.0221,
"epoch": 0.568685376661743,
"grad_norm": 0.1750016063451767,
"learning_rate": 1.9695388150336454e-05,
"step": 3850
},
{
"embedding_loss": 0.0169,
"epoch": 0.5760709010339734,
"grad_norm": 0.4834084212779999,
"learning_rate": 1.9662563597570986e-05,
"step": 3900
},
{
"embedding_loss": 0.0203,
"epoch": 0.5834564254062038,
"grad_norm": 0.18973205983638763,
"learning_rate": 1.9629739044805515e-05,
"step": 3950
},
{
"embedding_loss": 0.0184,
"epoch": 0.5908419497784343,
"grad_norm": 0.4886401295661926,
"learning_rate": 1.9596914492040047e-05,
"step": 4000
},
{
"embedding_loss": 0.0175,
"epoch": 0.5982274741506647,
"grad_norm": 0.8744384050369263,
"learning_rate": 1.956408993927458e-05,
"step": 4050
},
{
"embedding_loss": 0.0219,
"epoch": 0.6056129985228951,
"grad_norm": 1.2341519594192505,
"learning_rate": 1.953126538650911e-05,
"step": 4100
},
{
"embedding_loss": 0.0175,
"epoch": 0.6129985228951256,
"grad_norm": 0.4706520140171051,
"learning_rate": 1.9498440833743643e-05,
"step": 4150
},
{
"embedding_loss": 0.017,
"epoch": 0.620384047267356,
"grad_norm": 0.12396424263715744,
"learning_rate": 1.946561628097817e-05,
"step": 4200
},
{
"embedding_loss": 0.0181,
"epoch": 0.6277695716395865,
"grad_norm": 1.962485909461975,
"learning_rate": 1.9432791728212707e-05,
"step": 4250
},
{
"embedding_loss": 0.0164,
"epoch": 0.6351550960118169,
"grad_norm": 2.613374948501587,
"learning_rate": 1.9399967175447236e-05,
"step": 4300
},
{
"embedding_loss": 0.0129,
"epoch": 0.6425406203840472,
"grad_norm": 0.3567068874835968,
"learning_rate": 1.9367142622681768e-05,
"step": 4350
},
{
"embedding_loss": 0.0136,
"epoch": 0.6499261447562777,
"grad_norm": 1.5771572589874268,
"learning_rate": 1.9334318069916296e-05,
"step": 4400
},
{
"embedding_loss": 0.0169,
"epoch": 0.6573116691285081,
"grad_norm": 0.31481969356536865,
"learning_rate": 1.9301493517150832e-05,
"step": 4450
},
{
"embedding_loss": 0.0154,
"epoch": 0.6646971935007385,
"grad_norm": 0.16484883427619934,
"learning_rate": 1.926866896438536e-05,
"step": 4500
},
{
"embedding_loss": 0.0168,
"epoch": 0.672082717872969,
"grad_norm": 0.279256671667099,
"learning_rate": 1.9235844411619893e-05,
"step": 4550
},
{
"embedding_loss": 0.0158,
"epoch": 0.6794682422451994,
"grad_norm": 0.2343069612979889,
"learning_rate": 1.9203019858854425e-05,
"step": 4600
},
{
"embedding_loss": 0.0157,
"epoch": 0.6868537666174298,
"grad_norm": 0.17091761529445648,
"learning_rate": 1.9170195306088957e-05,
"step": 4650
},
{
"embedding_loss": 0.0127,
"epoch": 0.6942392909896603,
"grad_norm": 1.3237155675888062,
"learning_rate": 1.913737075332349e-05,
"step": 4700
},
{
"embedding_loss": 0.0116,
"epoch": 0.7016248153618907,
"grad_norm": 0.7258033752441406,
"learning_rate": 1.9104546200558017e-05,
"step": 4750
},
{
"embedding_loss": 0.0134,
"epoch": 0.7090103397341211,
"grad_norm": 3.0486900806427,
"learning_rate": 1.907172164779255e-05,
"step": 4800
},
{
"embedding_loss": 0.012,
"epoch": 0.7163958641063516,
"grad_norm": 0.10283143818378448,
"learning_rate": 1.903889709502708e-05,
"step": 4850
},
{
"embedding_loss": 0.0134,
"epoch": 0.723781388478582,
"grad_norm": 0.3316308259963989,
"learning_rate": 1.9006072542261613e-05,
"step": 4900
},
{
"embedding_loss": 0.0157,
"epoch": 0.7311669128508124,
"grad_norm": 0.421657919883728,
"learning_rate": 1.8973247989496146e-05,
"step": 4950
},
{
"embedding_loss": 0.0121,
"epoch": 0.7385524372230429,
"grad_norm": 0.4950125813484192,
"learning_rate": 1.8940423436730678e-05,
"step": 5000
},
{
"embedding_loss": 0.0134,
"epoch": 0.7459379615952733,
"grad_norm": 0.5293028950691223,
"learning_rate": 1.890759888396521e-05,
"step": 5050
},
{
"embedding_loss": 0.0083,
"epoch": 0.7533234859675036,
"grad_norm": 2.0652644634246826,
"learning_rate": 1.8874774331199738e-05,
"step": 5100
},
{
"embedding_loss": 0.0122,
"epoch": 0.7607090103397341,
"grad_norm": 1.9949322938919067,
"learning_rate": 1.884194977843427e-05,
"step": 5150
},
{
"embedding_loss": 0.0104,
"epoch": 0.7680945347119645,
"grad_norm": 0.07039645314216614,
"learning_rate": 1.8809125225668802e-05,
"step": 5200
},
{
"embedding_loss": 0.0061,
"epoch": 0.7754800590841949,
"grad_norm": 0.07697559893131256,
"learning_rate": 1.8776300672903334e-05,
"step": 5250
},
{
"embedding_loss": 0.0107,
"epoch": 0.7828655834564254,
"grad_norm": 0.05644530802965164,
"learning_rate": 1.8743476120137863e-05,
"step": 5300
},
{
"embedding_loss": 0.0093,
"epoch": 0.7902511078286558,
"grad_norm": 0.34979447722435,
"learning_rate": 1.8710651567372395e-05,
"step": 5350
},
{
"embedding_loss": 0.012,
"epoch": 0.7976366322008862,
"grad_norm": 0.06782261282205582,
"learning_rate": 1.8677827014606927e-05,
"step": 5400
},
{
"embedding_loss": 0.0119,
"epoch": 0.8050221565731167,
"grad_norm": 0.11144471168518066,
"learning_rate": 1.864500246184146e-05,
"step": 5450
},
{
"embedding_loss": 0.0114,
"epoch": 0.8124076809453471,
"grad_norm": 0.2110595852136612,
"learning_rate": 1.861217790907599e-05,
"step": 5500
},
{
"embedding_loss": 0.0133,
"epoch": 0.8197932053175776,
"grad_norm": 0.49429744482040405,
"learning_rate": 1.857935335631052e-05,
"step": 5550
},
{
"embedding_loss": 0.0087,
"epoch": 0.827178729689808,
"grad_norm": 0.07333461195230484,
"learning_rate": 1.8546528803545055e-05,
"step": 5600
},
{
"embedding_loss": 0.008,
"epoch": 0.8345642540620384,
"grad_norm": 0.11741068214178085,
"learning_rate": 1.8513704250779584e-05,
"step": 5650
},
{
"embedding_loss": 0.0058,
"epoch": 0.8419497784342689,
"grad_norm": 0.12451150268316269,
"learning_rate": 1.8480879698014116e-05,
"step": 5700
},
{
"embedding_loss": 0.0098,
"epoch": 0.8493353028064993,
"grad_norm": 0.04639327526092529,
"learning_rate": 1.8448055145248648e-05,
"step": 5750
},
{
"embedding_loss": 0.0083,
"epoch": 0.8567208271787297,
"grad_norm": 0.06967220455408096,
"learning_rate": 1.841523059248318e-05,
"step": 5800
},
{
"embedding_loss": 0.0127,
"epoch": 0.8641063515509602,
"grad_norm": 1.032842755317688,
"learning_rate": 1.8382406039717712e-05,
"step": 5850
},
{
"embedding_loss": 0.0119,
"epoch": 0.8714918759231906,
"grad_norm": 0.0814921110868454,
"learning_rate": 1.834958148695224e-05,
"step": 5900
},
{
"embedding_loss": 0.0117,
"epoch": 0.8788774002954209,
"grad_norm": 3.7965452671051025,
"learning_rate": 1.8316756934186773e-05,
"step": 5950
},
{
"embedding_loss": 0.0107,
"epoch": 0.8862629246676514,
"grad_norm": 0.13023847341537476,
"learning_rate": 1.8283932381421305e-05,
"step": 6000
},
{
"embedding_loss": 0.0099,
"epoch": 0.8936484490398818,
"grad_norm": 0.15792806446552277,
"learning_rate": 1.8251107828655837e-05,
"step": 6050
},
{
"embedding_loss": 0.0129,
"epoch": 0.9010339734121122,
"grad_norm": 0.06038963794708252,
"learning_rate": 1.8218283275890366e-05,
"step": 6100
},
{
"embedding_loss": 0.0111,
"epoch": 0.9084194977843427,
"grad_norm": 0.26467612385749817,
"learning_rate": 1.8185458723124898e-05,
"step": 6150
},
{
"embedding_loss": 0.0099,
"epoch": 0.9158050221565731,
"grad_norm": 0.354390025138855,
"learning_rate": 1.815263417035943e-05,
"step": 6200
},
{
"embedding_loss": 0.0101,
"epoch": 0.9231905465288035,
"grad_norm": 1.5564332008361816,
"learning_rate": 1.8119809617593962e-05,
"step": 6250
},
{
"embedding_loss": 0.0123,
"epoch": 0.930576070901034,
"grad_norm": 0.12284110486507416,
"learning_rate": 1.8086985064828494e-05,
"step": 6300
},
{
"embedding_loss": 0.0055,
"epoch": 0.9379615952732644,
"grad_norm": 0.15565811097621918,
"learning_rate": 1.8054160512063022e-05,
"step": 6350
},
{
"embedding_loss": 0.0105,
"epoch": 0.9453471196454948,
"grad_norm": 0.12946315109729767,
"learning_rate": 1.8021335959297558e-05,
"step": 6400
},
{
"embedding_loss": 0.0071,
"epoch": 0.9527326440177253,
"grad_norm": 0.4842424690723419,
"learning_rate": 1.7988511406532087e-05,
"step": 6450
},
{
"embedding_loss": 0.0074,
"epoch": 0.9601181683899557,
"grad_norm": 0.36668410897254944,
"learning_rate": 1.795568685376662e-05,
"step": 6500
},
{
"embedding_loss": 0.007,
"epoch": 0.9675036927621861,
"grad_norm": 0.1203831359744072,
"learning_rate": 1.792286230100115e-05,
"step": 6550
},
{
"embedding_loss": 0.0095,
"epoch": 0.9748892171344166,
"grad_norm": 0.046238359063863754,
"learning_rate": 1.7890037748235683e-05,
"step": 6600
},
{
"embedding_loss": 0.0088,
"epoch": 0.982274741506647,
"grad_norm": 0.1258874386548996,
"learning_rate": 1.7857213195470215e-05,
"step": 6650
},
{
"embedding_loss": 0.0052,
"epoch": 0.9896602658788775,
"grad_norm": 0.02032575197517872,
"learning_rate": 1.7824388642704743e-05,
"step": 6700
},
{
"embedding_loss": 0.0079,
"epoch": 0.9970457902511078,
"grad_norm": 0.03921140730381012,
"learning_rate": 1.7791564089939275e-05,
"step": 6750
},
{
"embedding_loss": 0.0069,
"epoch": 1.0044313146233383,
"grad_norm": 0.012766249477863312,
"learning_rate": 1.7758739537173807e-05,
"step": 6800
},
{
"embedding_loss": 0.0058,
"epoch": 1.0118168389955686,
"grad_norm": 0.20952889323234558,
"learning_rate": 1.772591498440834e-05,
"step": 6850
},
{
"embedding_loss": 0.0102,
"epoch": 1.0192023633677991,
"grad_norm": 0.04774490371346474,
"learning_rate": 1.7693090431642868e-05,
"step": 6900
},
{
"embedding_loss": 0.0097,
"epoch": 1.0265878877400296,
"grad_norm": 0.15566791594028473,
"learning_rate": 1.7660265878877404e-05,
"step": 6950
},
{
"embedding_loss": 0.0095,
"epoch": 1.03397341211226,
"grad_norm": 0.6467046141624451,
"learning_rate": 1.7627441326111932e-05,
"step": 7000
},
{
"embedding_loss": 0.0082,
"epoch": 1.0413589364844904,
"grad_norm": 0.35328537225723267,
"learning_rate": 1.7594616773346464e-05,
"step": 7050
},
{
"embedding_loss": 0.0066,
"epoch": 1.048744460856721,
"grad_norm": 0.3548614978790283,
"learning_rate": 1.7561792220580996e-05,
"step": 7100
},
{
"embedding_loss": 0.009,
"epoch": 1.0561299852289512,
"grad_norm": 0.6114194393157959,
"learning_rate": 1.752896766781553e-05,
"step": 7150
},
{
"embedding_loss": 0.0062,
"epoch": 1.0635155096011817,
"grad_norm": 0.7183836698532104,
"learning_rate": 1.749614311505006e-05,
"step": 7200
},
{
"embedding_loss": 0.0082,
"epoch": 1.0709010339734122,
"grad_norm": 0.41628143191337585,
"learning_rate": 1.746331856228459e-05,
"step": 7250
},
{
"embedding_loss": 0.0083,
"epoch": 1.0782865583456425,
"grad_norm": 0.22927437722682953,
"learning_rate": 1.743049400951912e-05,
"step": 7300
},
{
"embedding_loss": 0.0089,
"epoch": 1.085672082717873,
"grad_norm": 0.11581069976091385,
"learning_rate": 1.7397669456753653e-05,
"step": 7350
},
{
"embedding_loss": 0.0088,
"epoch": 1.0930576070901035,
"grad_norm": 1.374656081199646,
"learning_rate": 1.7364844903988185e-05,
"step": 7400
},
{
"embedding_loss": 0.0075,
"epoch": 1.1004431314623337,
"grad_norm": 0.25289225578308105,
"learning_rate": 1.7332020351222717e-05,
"step": 7450
},
{
"embedding_loss": 0.005,
"epoch": 1.1078286558345642,
"grad_norm": 0.034826990216970444,
"learning_rate": 1.7299195798457246e-05,
"step": 7500
},
{
"embedding_loss": 0.0074,
"epoch": 1.1152141802067947,
"grad_norm": 0.20261834561824799,
"learning_rate": 1.726637124569178e-05,
"step": 7550
},
{
"embedding_loss": 0.0062,
"epoch": 1.122599704579025,
"grad_norm": 0.01275601889938116,
"learning_rate": 1.723354669292631e-05,
"step": 7600
},
{
"embedding_loss": 0.0062,
"epoch": 1.1299852289512555,
"grad_norm": 0.036308519542217255,
"learning_rate": 1.7200722140160842e-05,
"step": 7650
},
{
"embedding_loss": 0.0079,
"epoch": 1.137370753323486,
"grad_norm": 0.05968335270881653,
"learning_rate": 1.7167897587395374e-05,
"step": 7700
},
{
"embedding_loss": 0.0108,
"epoch": 1.1447562776957163,
"grad_norm": 1.3406931161880493,
"learning_rate": 1.7135073034629906e-05,
"step": 7750
},
{
"embedding_loss": 0.0079,
"epoch": 1.1521418020679468,
"grad_norm": 0.07372719049453735,
"learning_rate": 1.7102248481864435e-05,
"step": 7800
},
{
"embedding_loss": 0.0083,
"epoch": 1.1595273264401773,
"grad_norm": 0.38173583149909973,
"learning_rate": 1.7069423929098967e-05,
"step": 7850
},
{
"embedding_loss": 0.0074,
"epoch": 1.1669128508124076,
"grad_norm": 0.1348145604133606,
"learning_rate": 1.70365993763335e-05,
"step": 7900
},
{
"embedding_loss": 0.0078,
"epoch": 1.174298375184638,
"grad_norm": 0.0659070536494255,
"learning_rate": 1.700377482356803e-05,
"step": 7950
},
{
"embedding_loss": 0.0057,
"epoch": 1.1816838995568686,
"grad_norm": 0.017487822100520134,
"learning_rate": 1.6970950270802563e-05,
"step": 8000
},
{
"embedding_loss": 0.0057,
"epoch": 1.1890694239290989,
"grad_norm": 3.8321328163146973,
"learning_rate": 1.693812571803709e-05,
"step": 8050
},
{
"embedding_loss": 0.005,
"epoch": 1.1964549483013294,
"grad_norm": 0.04197081923484802,
"learning_rate": 1.6905301165271624e-05,
"step": 8100
},
{
"embedding_loss": 0.0099,
"epoch": 1.2038404726735599,
"grad_norm": 0.05384385213255882,
"learning_rate": 1.6872476612506156e-05,
"step": 8150
},
{
"embedding_loss": 0.0041,
"epoch": 1.2112259970457901,
"grad_norm": 0.027099648490548134,
"learning_rate": 1.6839652059740688e-05,
"step": 8200
},
{
"embedding_loss": 0.0095,
"epoch": 1.2186115214180206,
"grad_norm": 0.02560454048216343,
"learning_rate": 1.680682750697522e-05,
"step": 8250
},
{
"embedding_loss": 0.0076,
"epoch": 1.2259970457902511,
"grad_norm": 0.0267130509018898,
"learning_rate": 1.677400295420975e-05,
"step": 8300
},
{
"embedding_loss": 0.0065,
"epoch": 1.2333825701624814,
"grad_norm": 0.14713996648788452,
"learning_rate": 1.6741178401444284e-05,
"step": 8350
},
{
"embedding_loss": 0.0044,
"epoch": 1.240768094534712,
"grad_norm": 0.0488862581551075,
"learning_rate": 1.6708353848678812e-05,
"step": 8400
},
{
"embedding_loss": 0.0059,
"epoch": 1.2481536189069424,
"grad_norm": 0.03769877180457115,
"learning_rate": 1.6675529295913345e-05,
"step": 8450
},
{
"embedding_loss": 0.0083,
"epoch": 1.2555391432791727,
"grad_norm": 0.04677336663007736,
"learning_rate": 1.6642704743147877e-05,
"step": 8500
},
{
"embedding_loss": 0.0069,
"epoch": 1.2629246676514032,
"grad_norm": 1.583303689956665,
"learning_rate": 1.660988019038241e-05,
"step": 8550
},
{
"embedding_loss": 0.0059,
"epoch": 1.2703101920236337,
"grad_norm": 0.057745561003685,
"learning_rate": 1.6577055637616937e-05,
"step": 8600
},
{
"embedding_loss": 0.0048,
"epoch": 1.277695716395864,
"grad_norm": 0.05651646852493286,
"learning_rate": 1.654423108485147e-05,
"step": 8650
},
{
"embedding_loss": 0.0081,
"epoch": 1.2850812407680945,
"grad_norm": 0.5371580719947815,
"learning_rate": 1.6511406532086e-05,
"step": 8700
},
{
"embedding_loss": 0.0056,
"epoch": 1.292466765140325,
"grad_norm": 0.01594601757824421,
"learning_rate": 1.6478581979320533e-05,
"step": 8750
},
{
"embedding_loss": 0.0069,
"epoch": 1.2998522895125553,
"grad_norm": 0.22201408445835114,
"learning_rate": 1.6445757426555065e-05,
"step": 8800
},
{
"embedding_loss": 0.005,
"epoch": 1.3072378138847858,
"grad_norm": 0.0434761643409729,
"learning_rate": 1.6412932873789594e-05,
"step": 8850
},
{
"embedding_loss": 0.0057,
"epoch": 1.3146233382570163,
"grad_norm": 0.20662403106689453,
"learning_rate": 1.6380108321024126e-05,
"step": 8900
},
{
"embedding_loss": 0.0059,
"epoch": 1.3220088626292466,
"grad_norm": 0.49766138195991516,
"learning_rate": 1.6347283768258658e-05,
"step": 8950
},
{
"embedding_loss": 0.0036,
"epoch": 1.329394387001477,
"grad_norm": 3.3815248012542725,
"learning_rate": 1.631445921549319e-05,
"step": 9000
},
{
"embedding_loss": 0.0072,
"epoch": 1.3367799113737076,
"grad_norm": 0.03580164164304733,
"learning_rate": 1.6281634662727722e-05,
"step": 9050
},
{
"embedding_loss": 0.0053,
"epoch": 1.3441654357459378,
"grad_norm": 1.0942792892456055,
"learning_rate": 1.6248810109962254e-05,
"step": 9100
},
{
"embedding_loss": 0.0035,
"epoch": 1.3515509601181683,
"grad_norm": 0.05680214613676071,
"learning_rate": 1.6215985557196786e-05,
"step": 9150
},
{
"embedding_loss": 0.0073,
"epoch": 1.3589364844903988,
"grad_norm": 0.377883642911911,
"learning_rate": 1.6183161004431315e-05,
"step": 9200
},
{
"embedding_loss": 0.0028,
"epoch": 1.3663220088626291,
"grad_norm": 0.019608836621046066,
"learning_rate": 1.6150336451665847e-05,
"step": 9250
},
{
"embedding_loss": 0.0055,
"epoch": 1.3737075332348596,
"grad_norm": 0.5401307344436646,
"learning_rate": 1.611751189890038e-05,
"step": 9300
},
{
"embedding_loss": 0.0071,
"epoch": 1.3810930576070901,
"grad_norm": 0.4266299605369568,
"learning_rate": 1.608468734613491e-05,
"step": 9350
},
{
"embedding_loss": 0.0057,
"epoch": 1.3884785819793206,
"grad_norm": 0.16506928205490112,
"learning_rate": 1.6051862793369443e-05,
"step": 9400
},
{
"embedding_loss": 0.0107,
"epoch": 1.395864106351551,
"grad_norm": 0.021154019981622696,
"learning_rate": 1.6019038240603972e-05,
"step": 9450
},
{
"embedding_loss": 0.0054,
"epoch": 1.4032496307237814,
"grad_norm": 0.13461002707481384,
"learning_rate": 1.5986213687838504e-05,
"step": 9500
},
{
"embedding_loss": 0.0045,
"epoch": 1.410635155096012,
"grad_norm": 0.0639062374830246,
"learning_rate": 1.5953389135073036e-05,
"step": 9550
},
{
"embedding_loss": 0.0067,
"epoch": 1.4180206794682422,
"grad_norm": 0.13972270488739014,
"learning_rate": 1.5920564582307568e-05,
"step": 9600
},
{
"embedding_loss": 0.0038,
"epoch": 1.4254062038404727,
"grad_norm": 0.024820247665047646,
"learning_rate": 1.5887740029542097e-05,
"step": 9650
},
{
"embedding_loss": 0.0079,
"epoch": 1.4327917282127032,
"grad_norm": 0.5452784299850464,
"learning_rate": 1.5854915476776632e-05,
"step": 9700
},
{
"embedding_loss": 0.0078,
"epoch": 1.4401772525849335,
"grad_norm": 0.2737050950527191,
"learning_rate": 1.582209092401116e-05,
"step": 9750
},
{
"embedding_loss": 0.005,
"epoch": 1.447562776957164,
"grad_norm": 0.024434711784124374,
"learning_rate": 1.5789266371245693e-05,
"step": 9800
},
{
"embedding_loss": 0.0032,
"epoch": 1.4549483013293945,
"grad_norm": 0.10400200635194778,
"learning_rate": 1.5756441818480225e-05,
"step": 9850
},
{
"embedding_loss": 0.0043,
"epoch": 1.4623338257016247,
"grad_norm": 0.048794183880090714,
"learning_rate": 1.5723617265714757e-05,
"step": 9900
},
{
"embedding_loss": 0.0079,
"epoch": 1.4697193500738552,
"grad_norm": 0.06030944362282753,
"learning_rate": 1.569079271294929e-05,
"step": 9950
},
{
"embedding_loss": 0.0044,
"epoch": 1.4771048744460857,
"grad_norm": 0.007165232207626104,
"learning_rate": 1.5657968160183818e-05,
"step": 10000
},
{
"embedding_loss": 0.0056,
"epoch": 1.4844903988183162,
"grad_norm": 0.02217938005924225,
"learning_rate": 1.562514360741835e-05,
"step": 10050
},
{
"embedding_loss": 0.004,
"epoch": 1.4918759231905465,
"grad_norm": 2.4009013175964355,
"learning_rate": 1.559231905465288e-05,
"step": 10100
},
{
"embedding_loss": 0.0065,
"epoch": 1.499261447562777,
"grad_norm": 0.20312148332595825,
"learning_rate": 1.5559494501887414e-05,
"step": 10150
},
{
"embedding_loss": 0.0056,
"epoch": 1.5066469719350075,
"grad_norm": 0.07194498181343079,
"learning_rate": 1.5526669949121946e-05,
"step": 10200
},
{
"embedding_loss": 0.0044,
"epoch": 1.5140324963072378,
"grad_norm": 0.01895447075366974,
"learning_rate": 1.5493845396356474e-05,
"step": 10250
},
{
"embedding_loss": 0.0065,
"epoch": 1.5214180206794683,
"grad_norm": 0.06340127438306808,
"learning_rate": 1.5461020843591006e-05,
"step": 10300
},
{
"embedding_loss": 0.0043,
"epoch": 1.5288035450516988,
"grad_norm": 0.08964123576879501,
"learning_rate": 1.542819629082554e-05,
"step": 10350
},
{
"embedding_loss": 0.0041,
"epoch": 1.536189069423929,
"grad_norm": 0.010926262475550175,
"learning_rate": 1.539537173806007e-05,
"step": 10400
},
{
"embedding_loss": 0.0043,
"epoch": 1.5435745937961596,
"grad_norm": 0.021898791193962097,
"learning_rate": 1.53625471852946e-05,
"step": 10450
},
{
"embedding_loss": 0.0065,
"epoch": 1.55096011816839,
"grad_norm": 0.03210087865591049,
"learning_rate": 1.5329722632529135e-05,
"step": 10500
},
{
"embedding_loss": 0.005,
"epoch": 1.5583456425406204,
"grad_norm": 0.061248380690813065,
"learning_rate": 1.5296898079763663e-05,
"step": 10550
},
{
"embedding_loss": 0.003,
"epoch": 1.5657311669128509,
"grad_norm": 0.016059886664152145,
"learning_rate": 1.5264073526998195e-05,
"step": 10600
},
{
"embedding_loss": 0.0031,
"epoch": 1.5731166912850814,
"grad_norm": 0.30576014518737793,
"learning_rate": 1.5231248974232726e-05,
"step": 10650
},
{
"embedding_loss": 0.0057,
"epoch": 1.5805022156573116,
"grad_norm": 0.40583568811416626,
"learning_rate": 1.519842442146726e-05,
"step": 10700
},
{
"embedding_loss": 0.0028,
"epoch": 1.5878877400295421,
"grad_norm": 0.022348936647176743,
"learning_rate": 1.516559986870179e-05,
"step": 10750
},
{
"embedding_loss": 0.0065,
"epoch": 1.5952732644017726,
"grad_norm": 0.012712485156953335,
"learning_rate": 1.5132775315936322e-05,
"step": 10800
},
{
"embedding_loss": 0.0024,
"epoch": 1.602658788774003,
"grad_norm": 0.02547537162899971,
"learning_rate": 1.5099950763170852e-05,
"step": 10850
},
{
"embedding_loss": 0.0037,
"epoch": 1.6100443131462334,
"grad_norm": 0.011224956251680851,
"learning_rate": 1.5067126210405386e-05,
"step": 10900
},
{
"embedding_loss": 0.0046,
"epoch": 1.617429837518464,
"grad_norm": 0.7301647067070007,
"learning_rate": 1.5034301657639916e-05,
"step": 10950
},
{
"embedding_loss": 0.0048,
"epoch": 1.6248153618906942,
"grad_norm": 1.1569029092788696,
"learning_rate": 1.5001477104874447e-05,
"step": 11000
},
{
"embedding_loss": 0.0042,
"epoch": 1.6322008862629247,
"grad_norm": 0.03382499888539314,
"learning_rate": 1.496865255210898e-05,
"step": 11050
},
{
"embedding_loss": 0.0029,
"epoch": 1.6395864106351552,
"grad_norm": 0.19339300692081451,
"learning_rate": 1.493582799934351e-05,
"step": 11100
},
{
"embedding_loss": 0.005,
"epoch": 1.6469719350073855,
"grad_norm": 0.03563707694411278,
"learning_rate": 1.4903003446578041e-05,
"step": 11150
},
{
"embedding_loss": 0.0059,
"epoch": 1.654357459379616,
"grad_norm": 0.046909235417842865,
"learning_rate": 1.4870178893812573e-05,
"step": 11200
},
{
"embedding_loss": 0.0061,
"epoch": 1.6617429837518465,
"grad_norm": 0.24560566246509552,
"learning_rate": 1.4837354341047105e-05,
"step": 11250
},
{
"embedding_loss": 0.0037,
"epoch": 1.6691285081240768,
"grad_norm": 0.08229757100343704,
"learning_rate": 1.4804529788281637e-05,
"step": 11300
},
{
"embedding_loss": 0.0034,
"epoch": 1.6765140324963073,
"grad_norm": 0.187529519200325,
"learning_rate": 1.4771705235516167e-05,
"step": 11350
},
{
"embedding_loss": 0.0058,
"epoch": 1.6838995568685378,
"grad_norm": 0.21818560361862183,
"learning_rate": 1.4738880682750698e-05,
"step": 11400
},
{
"embedding_loss": 0.0057,
"epoch": 1.691285081240768,
"grad_norm": 0.017579764127731323,
"learning_rate": 1.4706056129985232e-05,
"step": 11450
},
{
"embedding_loss": 0.0053,
"epoch": 1.6986706056129985,
"grad_norm": 0.005298899486660957,
"learning_rate": 1.4673231577219762e-05,
"step": 11500
},
{
"embedding_loss": 0.0038,
"epoch": 1.706056129985229,
"grad_norm": 0.21702563762664795,
"learning_rate": 1.4640407024454292e-05,
"step": 11550
},
{
"embedding_loss": 0.0055,
"epoch": 1.7134416543574593,
"grad_norm": 0.028038183227181435,
"learning_rate": 1.4607582471688824e-05,
"step": 11600
},
{
"embedding_loss": 0.0053,
"epoch": 1.7208271787296898,
"grad_norm": 0.008879674598574638,
"learning_rate": 1.4574757918923356e-05,
"step": 11650
},
{
"embedding_loss": 0.0046,
"epoch": 1.7282127031019203,
"grad_norm": 0.41037923097610474,
"learning_rate": 1.4541933366157888e-05,
"step": 11700
},
{
"embedding_loss": 0.0038,
"epoch": 1.7355982274741506,
"grad_norm": 0.0060186549089848995,
"learning_rate": 1.4509108813392419e-05,
"step": 11750
},
{
"embedding_loss": 0.006,
"epoch": 1.742983751846381,
"grad_norm": 0.02156016044318676,
"learning_rate": 1.4476284260626949e-05,
"step": 11800
},
{
"embedding_loss": 0.0063,
"epoch": 1.7503692762186116,
"grad_norm": 0.024685271084308624,
"learning_rate": 1.4443459707861483e-05,
"step": 11850
},
{
"embedding_loss": 0.0044,
"epoch": 1.7577548005908419,
"grad_norm": 0.1743912249803543,
"learning_rate": 1.4410635155096013e-05,
"step": 11900
},
{
"embedding_loss": 0.0044,
"epoch": 1.7651403249630724,
"grad_norm": 0.5131327509880066,
"learning_rate": 1.4377810602330544e-05,
"step": 11950
},
{
"embedding_loss": 0.0038,
"epoch": 1.7725258493353029,
"grad_norm": 0.1307702213525772,
"learning_rate": 1.4344986049565076e-05,
"step": 12000
},
{
"embedding_loss": 0.0063,
"epoch": 1.7799113737075332,
"grad_norm": 0.04227305203676224,
"learning_rate": 1.4312161496799608e-05,
"step": 12050
},
{
"embedding_loss": 0.0022,
"epoch": 1.7872968980797637,
"grad_norm": 0.01793646812438965,
"learning_rate": 1.427933694403414e-05,
"step": 12100
},
{
"embedding_loss": 0.0043,
"epoch": 1.7946824224519942,
"grad_norm": 0.369022011756897,
"learning_rate": 1.424651239126867e-05,
"step": 12150
},
{
"embedding_loss": 0.0035,
"epoch": 1.8020679468242244,
"grad_norm": 0.024383598938584328,
"learning_rate": 1.42136878385032e-05,
"step": 12200
},
{
"embedding_loss": 0.0044,
"epoch": 1.809453471196455,
"grad_norm": 0.034295763820409775,
"learning_rate": 1.4180863285737734e-05,
"step": 12250
},
{
"embedding_loss": 0.0034,
"epoch": 1.8168389955686854,
"grad_norm": 0.011509880423545837,
"learning_rate": 1.4148038732972264e-05,
"step": 12300
},
{
"embedding_loss": 0.0045,
"epoch": 1.8242245199409157,
"grad_norm": 0.048171836882829666,
"learning_rate": 1.4115214180206795e-05,
"step": 12350
},
{
"embedding_loss": 0.0035,
"epoch": 1.8316100443131462,
"grad_norm": 0.5833490490913391,
"learning_rate": 1.4082389627441327e-05,
"step": 12400
},
{
"embedding_loss": 0.0037,
"epoch": 1.8389955686853767,
"grad_norm": 0.057985421270132065,
"learning_rate": 1.4049565074675859e-05,
"step": 12450
},
{
"embedding_loss": 0.0043,
"epoch": 1.846381093057607,
"grad_norm": 0.22399385273456573,
"learning_rate": 1.4016740521910391e-05,
"step": 12500
},
{
"embedding_loss": 0.0046,
"epoch": 1.8537666174298375,
"grad_norm": 0.0484611876308918,
"learning_rate": 1.3983915969144921e-05,
"step": 12550
},
{
"embedding_loss": 0.0062,
"epoch": 1.861152141802068,
"grad_norm": 0.03510669618844986,
"learning_rate": 1.3951091416379452e-05,
"step": 12600
},
{
"embedding_loss": 0.0023,
"epoch": 1.8685376661742983,
"grad_norm": 0.0480966791510582,
"learning_rate": 1.3918266863613985e-05,
"step": 12650
},
{
"embedding_loss": 0.0033,
"epoch": 1.8759231905465288,
"grad_norm": 0.06846830993890762,
"learning_rate": 1.3885442310848516e-05,
"step": 12700
},
{
"embedding_loss": 0.0043,
"epoch": 1.8833087149187593,
"grad_norm": 0.18425996601581573,
"learning_rate": 1.3852617758083046e-05,
"step": 12750
},
{
"embedding_loss": 0.004,
"epoch": 1.8906942392909896,
"grad_norm": 0.024371977895498276,
"learning_rate": 1.3819793205317578e-05,
"step": 12800
},
{
"embedding_loss": 0.0025,
"epoch": 1.89807976366322,
"grad_norm": 0.05540316924452782,
"learning_rate": 1.378696865255211e-05,
"step": 12850
},
{
"embedding_loss": 0.0062,
"epoch": 1.9054652880354506,
"grad_norm": 0.2133670598268509,
"learning_rate": 1.3754144099786642e-05,
"step": 12900
},
{
"embedding_loss": 0.0037,
"epoch": 1.9128508124076808,
"grad_norm": 0.007817639969289303,
"learning_rate": 1.3721319547021173e-05,
"step": 12950
},
{
"embedding_loss": 0.0038,
"epoch": 1.9202363367799113,
"grad_norm": 0.06182079762220383,
"learning_rate": 1.3688494994255706e-05,
"step": 13000
},
{
"embedding_loss": 0.0044,
"epoch": 1.9276218611521418,
"grad_norm": 0.010844537056982517,
"learning_rate": 1.3655670441490237e-05,
"step": 13050
},
{
"embedding_loss": 0.003,
"epoch": 1.9350073855243721,
"grad_norm": 0.008412591181695461,
"learning_rate": 1.3622845888724767e-05,
"step": 13100
},
{
"embedding_loss": 0.0037,
"epoch": 1.9423929098966026,
"grad_norm": 0.3621113896369934,
"learning_rate": 1.3590021335959297e-05,
"step": 13150
},
{
"embedding_loss": 0.0034,
"epoch": 1.9497784342688331,
"grad_norm": 0.09569013118743896,
"learning_rate": 1.3557196783193831e-05,
"step": 13200
},
{
"embedding_loss": 0.0029,
"epoch": 1.9571639586410634,
"grad_norm": 0.022653287276625633,
"learning_rate": 1.3524372230428361e-05,
"step": 13250
},
{
"embedding_loss": 0.0019,
"epoch": 1.964549483013294,
"grad_norm": 0.013618898577988148,
"learning_rate": 1.3491547677662893e-05,
"step": 13300
},
{
"embedding_loss": 0.003,
"epoch": 1.9719350073855244,
"grad_norm": 0.009312042035162449,
"learning_rate": 1.3458723124897424e-05,
"step": 13350
},
{
"embedding_loss": 0.0041,
"epoch": 1.9793205317577547,
"grad_norm": 0.26061955094337463,
"learning_rate": 1.3425898572131958e-05,
"step": 13400
},
{
"embedding_loss": 0.0033,
"epoch": 1.9867060561299852,
"grad_norm": 0.0065947119146585464,
"learning_rate": 1.3393074019366488e-05,
"step": 13450
},
{
"embedding_loss": 0.0032,
"epoch": 1.9940915805022157,
"grad_norm": 0.016747118905186653,
"learning_rate": 1.3360249466601018e-05,
"step": 13500
},
{
"embedding_loss": 0.0029,
"epoch": 2.001477104874446,
"grad_norm": 0.01635347120463848,
"learning_rate": 1.3327424913835549e-05,
"step": 13550
},
{
"embedding_loss": 0.0058,
"epoch": 2.0088626292466767,
"grad_norm": 0.045433904975652695,
"learning_rate": 1.3294600361070082e-05,
"step": 13600
},
{
"embedding_loss": 0.0019,
"epoch": 2.016248153618907,
"grad_norm": 0.06570059806108475,
"learning_rate": 1.3261775808304613e-05,
"step": 13650
},
{
"embedding_loss": 0.0027,
"epoch": 2.0236336779911372,
"grad_norm": 0.026954207569360733,
"learning_rate": 1.3228951255539145e-05,
"step": 13700
},
{
"embedding_loss": 0.0015,
"epoch": 2.031019202363368,
"grad_norm": 0.013637225143611431,
"learning_rate": 1.3196126702773675e-05,
"step": 13750
},
{
"embedding_loss": 0.0029,
"epoch": 2.0384047267355982,
"grad_norm": 0.01706545241177082,
"learning_rate": 1.3163302150008209e-05,
"step": 13800
},
{
"embedding_loss": 0.0043,
"epoch": 2.0457902511078285,
"grad_norm": 0.008318389765918255,
"learning_rate": 1.313047759724274e-05,
"step": 13850
},
{
"embedding_loss": 0.0016,
"epoch": 2.0531757754800593,
"grad_norm": 0.010482273995876312,
"learning_rate": 1.309765304447727e-05,
"step": 13900
},
{
"embedding_loss": 0.0022,
"epoch": 2.0605612998522895,
"grad_norm": 0.10514198988676071,
"learning_rate": 1.30648284917118e-05,
"step": 13950
},
{
"embedding_loss": 0.0035,
"epoch": 2.06794682422452,
"grad_norm": 0.265434592962265,
"learning_rate": 1.3032003938946334e-05,
"step": 14000
},
{
"embedding_loss": 0.0033,
"epoch": 2.0753323485967505,
"grad_norm": 0.10725241899490356,
"learning_rate": 1.2999179386180864e-05,
"step": 14050
},
{
"embedding_loss": 0.0019,
"epoch": 2.082717872968981,
"grad_norm": 0.03083561733365059,
"learning_rate": 1.2966354833415396e-05,
"step": 14100
},
{
"embedding_loss": 0.0039,
"epoch": 2.090103397341211,
"grad_norm": 0.4700145721435547,
"learning_rate": 1.2933530280649926e-05,
"step": 14150
},
{
"embedding_loss": 0.0022,
"epoch": 2.097488921713442,
"grad_norm": 0.007506008259952068,
"learning_rate": 1.290070572788446e-05,
"step": 14200
},
{
"embedding_loss": 0.0042,
"epoch": 2.104874446085672,
"grad_norm": 0.08826395869255066,
"learning_rate": 1.286788117511899e-05,
"step": 14250
},
{
"embedding_loss": 0.0023,
"epoch": 2.1122599704579024,
"grad_norm": 0.0911986455321312,
"learning_rate": 1.283505662235352e-05,
"step": 14300
},
{
"embedding_loss": 0.0022,
"epoch": 2.119645494830133,
"grad_norm": 0.03140464425086975,
"learning_rate": 1.2802232069588053e-05,
"step": 14350
},
{
"embedding_loss": 0.0016,
"epoch": 2.1270310192023634,
"grad_norm": 0.017707446590065956,
"learning_rate": 1.2769407516822585e-05,
"step": 14400
},
{
"embedding_loss": 0.0023,
"epoch": 2.1344165435745936,
"grad_norm": 0.17360664904117584,
"learning_rate": 1.2736582964057115e-05,
"step": 14450
},
{
"embedding_loss": 0.0034,
"epoch": 2.1418020679468244,
"grad_norm": 0.006408170331269503,
"learning_rate": 1.2703758411291647e-05,
"step": 14500
},
{
"embedding_loss": 0.0019,
"epoch": 2.1491875923190547,
"grad_norm": 0.00851589534431696,
"learning_rate": 1.2670933858526178e-05,
"step": 14550
},
{
"embedding_loss": 0.0027,
"epoch": 2.156573116691285,
"grad_norm": 0.03338400647044182,
"learning_rate": 1.2638109305760711e-05,
"step": 14600
},
{
"embedding_loss": 0.0025,
"epoch": 2.1639586410635157,
"grad_norm": 0.009356162510812283,
"learning_rate": 1.2605284752995242e-05,
"step": 14650
},
{
"embedding_loss": 0.0025,
"epoch": 2.171344165435746,
"grad_norm": 0.028701895847916603,
"learning_rate": 1.2572460200229772e-05,
"step": 14700
},
{
"embedding_loss": 0.0024,
"epoch": 2.178729689807976,
"grad_norm": 0.7400600910186768,
"learning_rate": 1.2539635647464304e-05,
"step": 14750
},
{
"embedding_loss": 0.004,
"epoch": 2.186115214180207,
"grad_norm": 0.011697505600750446,
"learning_rate": 1.2506811094698836e-05,
"step": 14800
},
{
"embedding_loss": 0.0013,
"epoch": 2.193500738552437,
"grad_norm": 0.0038999137468636036,
"learning_rate": 1.2473986541933366e-05,
"step": 14850
},
{
"embedding_loss": 0.0018,
"epoch": 2.2008862629246675,
"grad_norm": 0.013158189132809639,
"learning_rate": 1.2441161989167899e-05,
"step": 14900
},
{
"embedding_loss": 0.0025,
"epoch": 2.208271787296898,
"grad_norm": 0.019193725660443306,
"learning_rate": 1.2408337436402429e-05,
"step": 14950
},
{
"embedding_loss": 0.0052,
"epoch": 2.2156573116691285,
"grad_norm": 0.07765129953622818,
"learning_rate": 1.2375512883636963e-05,
"step": 15000
},
{
"embedding_loss": 0.0027,
"epoch": 2.2230428360413588,
"grad_norm": 0.16390322148799896,
"learning_rate": 1.2342688330871493e-05,
"step": 15050
},
{
"embedding_loss": 0.0011,
"epoch": 2.2304283604135895,
"grad_norm": 0.019845524802803993,
"learning_rate": 1.2309863778106023e-05,
"step": 15100
},
{
"embedding_loss": 0.0019,
"epoch": 2.2378138847858198,
"grad_norm": 0.0020033265464007854,
"learning_rate": 1.2277039225340557e-05,
"step": 15150
},
{
"embedding_loss": 0.0012,
"epoch": 2.24519940915805,
"grad_norm": 0.008046300150454044,
"learning_rate": 1.2244214672575087e-05,
"step": 15200
},
{
"embedding_loss": 0.0045,
"epoch": 2.2525849335302808,
"grad_norm": 0.16893664002418518,
"learning_rate": 1.2211390119809618e-05,
"step": 15250
},
{
"embedding_loss": 0.0031,
"epoch": 2.259970457902511,
"grad_norm": 0.012031147256493568,
"learning_rate": 1.217856556704415e-05,
"step": 15300
},
{
"embedding_loss": 0.0029,
"epoch": 2.2673559822747413,
"grad_norm": 0.007804942317306995,
"learning_rate": 1.2145741014278682e-05,
"step": 15350
},
{
"embedding_loss": 0.0048,
"epoch": 2.274741506646972,
"grad_norm": 0.003408796386793256,
"learning_rate": 1.2112916461513214e-05,
"step": 15400
},
{
"embedding_loss": 0.0024,
"epoch": 2.2821270310192023,
"grad_norm": 0.0196861382573843,
"learning_rate": 1.2080091908747744e-05,
"step": 15450
},
{
"embedding_loss": 0.0032,
"epoch": 2.2895125553914326,
"grad_norm": 0.10261236131191254,
"learning_rate": 1.2047267355982275e-05,
"step": 15500
},
{
"embedding_loss": 0.0017,
"epoch": 2.2968980797636633,
"grad_norm": 0.008358814753592014,
"learning_rate": 1.2014442803216808e-05,
"step": 15550
},
{
"embedding_loss": 0.0018,
"epoch": 2.3042836041358936,
"grad_norm": 0.03527391329407692,
"learning_rate": 1.1981618250451339e-05,
"step": 15600
},
{
"embedding_loss": 0.0035,
"epoch": 2.311669128508124,
"grad_norm": 0.011962966993451118,
"learning_rate": 1.1948793697685869e-05,
"step": 15650
},
{
"embedding_loss": 0.0041,
"epoch": 2.3190546528803546,
"grad_norm": 0.005154829006642103,
"learning_rate": 1.1915969144920401e-05,
"step": 15700
},
{
"embedding_loss": 0.0015,
"epoch": 2.326440177252585,
"grad_norm": 0.007693074177950621,
"learning_rate": 1.1883144592154933e-05,
"step": 15750
},
{
"embedding_loss": 0.003,
"epoch": 2.333825701624815,
"grad_norm": 0.02695990726351738,
"learning_rate": 1.1850320039389465e-05,
"step": 15800
},
{
"embedding_loss": 0.0016,
"epoch": 2.341211225997046,
"grad_norm": 0.19833894073963165,
"learning_rate": 1.1817495486623995e-05,
"step": 15850
},
{
"embedding_loss": 0.0027,
"epoch": 2.348596750369276,
"grad_norm": 0.153117373585701,
"learning_rate": 1.1784670933858526e-05,
"step": 15900
},
{
"embedding_loss": 0.0024,
"epoch": 2.3559822747415065,
"grad_norm": 0.5938816666603088,
"learning_rate": 1.175184638109306e-05,
"step": 15950
},
{
"embedding_loss": 0.002,
"epoch": 2.363367799113737,
"grad_norm": 0.01386656891554594,
"learning_rate": 1.171902182832759e-05,
"step": 16000
},
{
"embedding_loss": 0.0014,
"epoch": 2.3707533234859675,
"grad_norm": 0.010158052667975426,
"learning_rate": 1.1686197275562122e-05,
"step": 16050
},
{
"embedding_loss": 0.001,
"epoch": 2.3781388478581977,
"grad_norm": 0.008198092691600323,
"learning_rate": 1.1653372722796652e-05,
"step": 16100
},
{
"embedding_loss": 0.0005,
"epoch": 2.3855243722304285,
"grad_norm": 0.010181965306401253,
"learning_rate": 1.1620548170031184e-05,
"step": 16150
},
{
"embedding_loss": 0.0015,
"epoch": 2.3929098966026587,
"grad_norm": 0.008307389914989471,
"learning_rate": 1.1587723617265716e-05,
"step": 16200
},
{
"embedding_loss": 0.0045,
"epoch": 2.4002954209748895,
"grad_norm": 0.018941566348075867,
"learning_rate": 1.1554899064500247e-05,
"step": 16250
},
{
"embedding_loss": 0.0015,
"epoch": 2.4076809453471197,
"grad_norm": 0.005391134414821863,
"learning_rate": 1.1522074511734777e-05,
"step": 16300
},
{
"embedding_loss": 0.0011,
"epoch": 2.41506646971935,
"grad_norm": 0.019267791882157326,
"learning_rate": 1.148924995896931e-05,
"step": 16350
},
{
"embedding_loss": 0.0019,
"epoch": 2.4224519940915803,
"grad_norm": 0.2630805969238281,
"learning_rate": 1.1456425406203841e-05,
"step": 16400
},
{
"embedding_loss": 0.0024,
"epoch": 2.429837518463811,
"grad_norm": 0.010556219145655632,
"learning_rate": 1.1423600853438373e-05,
"step": 16450
},
{
"embedding_loss": 0.002,
"epoch": 2.4372230428360413,
"grad_norm": 0.03994214907288551,
"learning_rate": 1.1390776300672904e-05,
"step": 16500
},
{
"embedding_loss": 0.0016,
"epoch": 2.444608567208272,
"grad_norm": 3.779356002807617,
"learning_rate": 1.1357951747907436e-05,
"step": 16550
},
{
"embedding_loss": 0.0015,
"epoch": 2.4519940915805023,
"grad_norm": 0.030276980251073837,
"learning_rate": 1.1325127195141968e-05,
"step": 16600
},
{
"embedding_loss": 0.0021,
"epoch": 2.4593796159527326,
"grad_norm": 0.010462663136422634,
"learning_rate": 1.1292302642376498e-05,
"step": 16650
},
{
"embedding_loss": 0.0025,
"epoch": 2.466765140324963,
"grad_norm": 0.04659969359636307,
"learning_rate": 1.1259478089611028e-05,
"step": 16700
},
{
"embedding_loss": 0.0021,
"epoch": 2.4741506646971936,
"grad_norm": 0.34690728783607483,
"learning_rate": 1.1226653536845562e-05,
"step": 16750
},
{
"embedding_loss": 0.0029,
"epoch": 2.481536189069424,
"grad_norm": 0.019812889397144318,
"learning_rate": 1.1193828984080092e-05,
"step": 16800
},
{
"embedding_loss": 0.0014,
"epoch": 2.4889217134416546,
"grad_norm": 0.004909256473183632,
"learning_rate": 1.1161004431314624e-05,
"step": 16850
},
{
"embedding_loss": 0.0029,
"epoch": 2.496307237813885,
"grad_norm": 0.016758764162659645,
"learning_rate": 1.1128179878549155e-05,
"step": 16900
},
{
"embedding_loss": 0.004,
"epoch": 2.503692762186115,
"grad_norm": 0.07048258185386658,
"learning_rate": 1.1095355325783687e-05,
"step": 16950
},
{
"embedding_loss": 0.0028,
"epoch": 2.5110782865583454,
"grad_norm": 0.010511302389204502,
"learning_rate": 1.1062530773018219e-05,
"step": 17000
},
{
"embedding_loss": 0.0027,
"epoch": 2.518463810930576,
"grad_norm": 0.0332963727414608,
"learning_rate": 1.102970622025275e-05,
"step": 17050
},
{
"embedding_loss": 0.0011,
"epoch": 2.5258493353028064,
"grad_norm": 0.02814817987382412,
"learning_rate": 1.0996881667487283e-05,
"step": 17100
},
{
"embedding_loss": 0.0036,
"epoch": 2.533234859675037,
"grad_norm": 0.01899763010442257,
"learning_rate": 1.0964057114721813e-05,
"step": 17150
},
{
"embedding_loss": 0.0031,
"epoch": 2.5406203840472674,
"grad_norm": 0.004354503005743027,
"learning_rate": 1.0931232561956344e-05,
"step": 17200
},
{
"embedding_loss": 0.0021,
"epoch": 2.5480059084194977,
"grad_norm": 0.0331052802503109,
"learning_rate": 1.0898408009190876e-05,
"step": 17250
},
{
"embedding_loss": 0.0018,
"epoch": 2.555391432791728,
"grad_norm": 0.0316183939576149,
"learning_rate": 1.0865583456425408e-05,
"step": 17300
},
{
"embedding_loss": 0.0015,
"epoch": 2.5627769571639587,
"grad_norm": 0.009719472378492355,
"learning_rate": 1.0832758903659938e-05,
"step": 17350
},
{
"embedding_loss": 0.0031,
"epoch": 2.570162481536189,
"grad_norm": 0.035008445382118225,
"learning_rate": 1.079993435089447e-05,
"step": 17400
},
{
"embedding_loss": 0.0031,
"epoch": 2.5775480059084197,
"grad_norm": 0.008490943349897861,
"learning_rate": 1.0767109798129e-05,
"step": 17450
},
{
"embedding_loss": 0.0011,
"epoch": 2.58493353028065,
"grad_norm": 0.01083299983292818,
"learning_rate": 1.0734285245363534e-05,
"step": 17500
},
{
"embedding_loss": 0.0044,
"epoch": 2.5923190546528803,
"grad_norm": 0.01600501500070095,
"learning_rate": 1.0701460692598065e-05,
"step": 17550
},
{
"embedding_loss": 0.0013,
"epoch": 2.5997045790251105,
"grad_norm": 0.07744074612855911,
"learning_rate": 1.0668636139832595e-05,
"step": 17600
},
{
"embedding_loss": 0.0015,
"epoch": 2.6070901033973413,
"grad_norm": 0.036319248378276825,
"learning_rate": 1.0635811587067127e-05,
"step": 17650
},
{
"embedding_loss": 0.0013,
"epoch": 2.6144756277695715,
"grad_norm": 0.01792324334383011,
"learning_rate": 1.0602987034301659e-05,
"step": 17700
},
{
"embedding_loss": 0.0018,
"epoch": 2.6218611521418023,
"grad_norm": 0.07195013016462326,
"learning_rate": 1.0570162481536191e-05,
"step": 17750
},
{
"embedding_loss": 0.0023,
"epoch": 2.6292466765140325,
"grad_norm": 3.590275526046753,
"learning_rate": 1.0537337928770721e-05,
"step": 17800
},
{
"embedding_loss": 0.0043,
"epoch": 2.636632200886263,
"grad_norm": 0.009701603092253208,
"learning_rate": 1.0504513376005252e-05,
"step": 17850
},
{
"embedding_loss": 0.0049,
"epoch": 2.644017725258493,
"grad_norm": 0.09409826993942261,
"learning_rate": 1.0471688823239786e-05,
"step": 17900
},
{
"embedding_loss": 0.0045,
"epoch": 2.651403249630724,
"grad_norm": 0.012147662229835987,
"learning_rate": 1.0438864270474316e-05,
"step": 17950
},
{
"embedding_loss": 0.0017,
"epoch": 2.658788774002954,
"grad_norm": 0.008824297226965427,
"learning_rate": 1.0406039717708846e-05,
"step": 18000
},
{
"embedding_loss": 0.002,
"epoch": 2.666174298375185,
"grad_norm": 0.018408598378300667,
"learning_rate": 1.0373215164943378e-05,
"step": 18050
},
{
"embedding_loss": 0.0021,
"epoch": 2.673559822747415,
"grad_norm": 0.01662319526076317,
"learning_rate": 1.034039061217791e-05,
"step": 18100
},
{
"embedding_loss": 0.0014,
"epoch": 2.6809453471196454,
"grad_norm": 0.020505361258983612,
"learning_rate": 1.0307566059412442e-05,
"step": 18150
},
{
"embedding_loss": 0.0025,
"epoch": 2.6883308714918757,
"grad_norm": 0.08292482793331146,
"learning_rate": 1.0274741506646973e-05,
"step": 18200
},
{
"embedding_loss": 0.0032,
"epoch": 2.6957163958641064,
"grad_norm": 0.023084105923771858,
"learning_rate": 1.0241916953881503e-05,
"step": 18250
},
{
"embedding_loss": 0.0038,
"epoch": 2.7031019202363367,
"grad_norm": 0.030171602964401245,
"learning_rate": 1.0209092401116037e-05,
"step": 18300
},
{
"embedding_loss": 0.0016,
"epoch": 2.7104874446085674,
"grad_norm": 0.004670475609600544,
"learning_rate": 1.0176267848350567e-05,
"step": 18350
},
{
"embedding_loss": 0.0014,
"epoch": 2.7178729689807977,
"grad_norm": 0.003984387032687664,
"learning_rate": 1.0143443295585098e-05,
"step": 18400
},
{
"embedding_loss": 0.0013,
"epoch": 2.725258493353028,
"grad_norm": 0.03194098919630051,
"learning_rate": 1.011061874281963e-05,
"step": 18450
},
{
"embedding_loss": 0.0013,
"epoch": 2.7326440177252582,
"grad_norm": 0.007552579510957003,
"learning_rate": 1.0077794190054162e-05,
"step": 18500
},
{
"embedding_loss": 0.0024,
"epoch": 2.740029542097489,
"grad_norm": 0.02968655154109001,
"learning_rate": 1.0044969637288694e-05,
"step": 18550
},
{
"embedding_loss": 0.0024,
"epoch": 2.7474150664697192,
"grad_norm": 0.2573186159133911,
"learning_rate": 1.0012145084523224e-05,
"step": 18600
},
{
"embedding_loss": 0.0026,
"epoch": 2.75480059084195,
"grad_norm": 0.036742523312568665,
"learning_rate": 9.979320531757756e-06,
"step": 18650
},
{
"embedding_loss": 0.0032,
"epoch": 2.7621861152141802,
"grad_norm": 0.20845580101013184,
"learning_rate": 9.946495978992286e-06,
"step": 18700
},
{
"embedding_loss": 0.0024,
"epoch": 2.7695716395864105,
"grad_norm": 0.051792044192552567,
"learning_rate": 9.913671426226818e-06,
"step": 18750
},
{
"embedding_loss": 0.0019,
"epoch": 2.7769571639586412,
"grad_norm": 0.015146799385547638,
"learning_rate": 9.880846873461349e-06,
"step": 18800
},
{
"embedding_loss": 0.0015,
"epoch": 2.7843426883308715,
"grad_norm": 0.01486288197338581,
"learning_rate": 9.84802232069588e-06,
"step": 18850
},
{
"embedding_loss": 0.0028,
"epoch": 2.791728212703102,
"grad_norm": 0.018719913437962532,
"learning_rate": 9.815197767930413e-06,
"step": 18900
},
{
"embedding_loss": 0.0021,
"epoch": 2.7991137370753325,
"grad_norm": 0.005828204099088907,
"learning_rate": 9.782373215164945e-06,
"step": 18950
},
{
"embedding_loss": 0.0018,
"epoch": 2.806499261447563,
"grad_norm": 0.03715846315026283,
"learning_rate": 9.749548662399475e-06,
"step": 19000
},
{
"embedding_loss": 0.0009,
"epoch": 2.813884785819793,
"grad_norm": 0.019518226385116577,
"learning_rate": 9.716724109634007e-06,
"step": 19050
},
{
"embedding_loss": 0.0024,
"epoch": 2.821270310192024,
"grad_norm": 0.020911335945129395,
"learning_rate": 9.68389955686854e-06,
"step": 19100
},
{
"embedding_loss": 0.0016,
"epoch": 2.828655834564254,
"grad_norm": 0.010648909956216812,
"learning_rate": 9.65107500410307e-06,
"step": 19150
},
{
"embedding_loss": 0.001,
"epoch": 2.8360413589364843,
"grad_norm": 0.007687574252486229,
"learning_rate": 9.618250451337602e-06,
"step": 19200
},
{
"embedding_loss": 0.0016,
"epoch": 2.843426883308715,
"grad_norm": 0.016183407977223396,
"learning_rate": 9.585425898572132e-06,
"step": 19250
},
{
"embedding_loss": 0.0009,
"epoch": 2.8508124076809453,
"grad_norm": 0.032978300005197525,
"learning_rate": 9.552601345806664e-06,
"step": 19300
},
{
"embedding_loss": 0.0025,
"epoch": 2.8581979320531756,
"grad_norm": 0.00619637593626976,
"learning_rate": 9.519776793041196e-06,
"step": 19350
},
{
"embedding_loss": 0.0026,
"epoch": 2.8655834564254064,
"grad_norm": 0.0032677731942385435,
"learning_rate": 9.486952240275728e-06,
"step": 19400
},
{
"embedding_loss": 0.0018,
"epoch": 2.8729689807976366,
"grad_norm": 0.0064840479753911495,
"learning_rate": 9.454127687510259e-06,
"step": 19450
},
{
"embedding_loss": 0.0012,
"epoch": 2.880354505169867,
"grad_norm": 0.01070446241647005,
"learning_rate": 9.42130313474479e-06,
"step": 19500
},
{
"embedding_loss": 0.0012,
"epoch": 2.8877400295420976,
"grad_norm": 0.07543105632066727,
"learning_rate": 9.388478581979321e-06,
"step": 19550
},
{
"embedding_loss": 0.0018,
"epoch": 2.895125553914328,
"grad_norm": 0.025806330144405365,
"learning_rate": 9.355654029213853e-06,
"step": 19600
},
{
"embedding_loss": 0.003,
"epoch": 2.902511078286558,
"grad_norm": 0.026599083095788956,
"learning_rate": 9.322829476448383e-06,
"step": 19650
},
{
"embedding_loss": 0.0026,
"epoch": 2.909896602658789,
"grad_norm": 0.37029746174812317,
"learning_rate": 9.290004923682915e-06,
"step": 19700
},
{
"embedding_loss": 0.001,
"epoch": 2.917282127031019,
"grad_norm": 0.07045801728963852,
"learning_rate": 9.257180370917447e-06,
"step": 19750
},
{
"embedding_loss": 0.0031,
"epoch": 2.9246676514032495,
"grad_norm": 0.020875511690974236,
"learning_rate": 9.22435581815198e-06,
"step": 19800
},
{
"embedding_loss": 0.0019,
"epoch": 2.93205317577548,
"grad_norm": 0.013287228532135487,
"learning_rate": 9.19153126538651e-06,
"step": 19850
},
{
"embedding_loss": 0.0027,
"epoch": 2.9394387001477105,
"grad_norm": 0.006682571489363909,
"learning_rate": 9.158706712621042e-06,
"step": 19900
},
{
"embedding_loss": 0.001,
"epoch": 2.9468242245199407,
"grad_norm": 0.016461633145809174,
"learning_rate": 9.125882159855572e-06,
"step": 19950
},
{
"embedding_loss": 0.0025,
"epoch": 2.9542097488921715,
"grad_norm": 0.0292360782623291,
"learning_rate": 9.093057607090104e-06,
"step": 20000
},
{
"embedding_loss": 0.0017,
"epoch": 2.9615952732644018,
"grad_norm": 0.007479995954781771,
"learning_rate": 9.060233054324635e-06,
"step": 20050
},
{
"embedding_loss": 0.0033,
"epoch": 2.9689807976366325,
"grad_norm": 0.19220024347305298,
"learning_rate": 9.027408501559167e-06,
"step": 20100
},
{
"embedding_loss": 0.0006,
"epoch": 2.9763663220088628,
"grad_norm": 0.018404290080070496,
"learning_rate": 8.994583948793699e-06,
"step": 20150
},
{
"embedding_loss": 0.0026,
"epoch": 2.983751846381093,
"grad_norm": 0.012631416320800781,
"learning_rate": 8.96175939602823e-06,
"step": 20200
},
{
"embedding_loss": 0.0011,
"epoch": 2.9911373707533233,
"grad_norm": 0.04417691379785538,
"learning_rate": 8.928934843262761e-06,
"step": 20250
},
{
"embedding_loss": 0.0021,
"epoch": 2.998522895125554,
"grad_norm": 0.0054418547078967094,
"learning_rate": 8.896110290497293e-06,
"step": 20300
},
{
"embedding_loss": 0.0039,
"epoch": 3.0059084194977843,
"grad_norm": 0.005223344080150127,
"learning_rate": 8.863285737731823e-06,
"step": 20350
},
{
"embedding_loss": 0.0003,
"epoch": 3.0132939438700146,
"grad_norm": 0.0242659542709589,
"learning_rate": 8.830461184966356e-06,
"step": 20400
},
{
"embedding_loss": 0.001,
"epoch": 3.0206794682422453,
"grad_norm": 0.0049690124578773975,
"learning_rate": 8.797636632200886e-06,
"step": 20450
},
{
"embedding_loss": 0.0008,
"epoch": 3.0280649926144756,
"grad_norm": 0.0040290821343660355,
"learning_rate": 8.764812079435418e-06,
"step": 20500
},
{
"embedding_loss": 0.0009,
"epoch": 3.035450516986706,
"grad_norm": 0.019365187734365463,
"learning_rate": 8.73198752666995e-06,
"step": 20550
},
{
"embedding_loss": 0.0023,
"epoch": 3.0428360413589366,
"grad_norm": 0.10174138844013214,
"learning_rate": 8.699162973904482e-06,
"step": 20600
},
{
"embedding_loss": 0.0008,
"epoch": 3.050221565731167,
"grad_norm": 0.2679438889026642,
"learning_rate": 8.666338421139012e-06,
"step": 20650
},
{
"embedding_loss": 0.0009,
"epoch": 3.057607090103397,
"grad_norm": 0.010431923903524876,
"learning_rate": 8.633513868373544e-06,
"step": 20700
},
{
"embedding_loss": 0.0015,
"epoch": 3.064992614475628,
"grad_norm": 0.034736406058073044,
"learning_rate": 8.600689315608075e-06,
"step": 20750
},
{
"embedding_loss": 0.0019,
"epoch": 3.072378138847858,
"grad_norm": 0.012600087560713291,
"learning_rate": 8.567864762842607e-06,
"step": 20800
},
{
"embedding_loss": 0.0027,
"epoch": 3.0797636632200884,
"grad_norm": 0.017327722162008286,
"learning_rate": 8.535040210077137e-06,
"step": 20850
},
{
"embedding_loss": 0.0009,
"epoch": 3.087149187592319,
"grad_norm": 0.08267229795455933,
"learning_rate": 8.50221565731167e-06,
"step": 20900
},
{
"embedding_loss": 0.0006,
"epoch": 3.0945347119645494,
"grad_norm": 0.1653100550174713,
"learning_rate": 8.469391104546201e-06,
"step": 20950
},
{
"embedding_loss": 0.0011,
"epoch": 3.1019202363367797,
"grad_norm": 0.004222211427986622,
"learning_rate": 8.436566551780733e-06,
"step": 21000
},
{
"embedding_loss": 0.0014,
"epoch": 3.1093057607090104,
"grad_norm": 0.005486358422785997,
"learning_rate": 8.403741999015264e-06,
"step": 21050
},
{
"embedding_loss": 0.0009,
"epoch": 3.1166912850812407,
"grad_norm": 0.007771783974021673,
"learning_rate": 8.370917446249796e-06,
"step": 21100
},
{
"embedding_loss": 0.0011,
"epoch": 3.124076809453471,
"grad_norm": 0.009825172834098339,
"learning_rate": 8.338092893484328e-06,
"step": 21150
},
{
"embedding_loss": 0.0021,
"epoch": 3.1314623338257017,
"grad_norm": 0.009619227610528469,
"learning_rate": 8.305268340718858e-06,
"step": 21200
},
{
"embedding_loss": 0.0023,
"epoch": 3.138847858197932,
"grad_norm": 0.04705429822206497,
"learning_rate": 8.27244378795339e-06,
"step": 21250
},
{
"embedding_loss": 0.0022,
"epoch": 3.1462333825701623,
"grad_norm": 0.03510194644331932,
"learning_rate": 8.23961923518792e-06,
"step": 21300
},
{
"embedding_loss": 0.001,
"epoch": 3.153618906942393,
"grad_norm": 0.007620047312229872,
"learning_rate": 8.206794682422453e-06,
"step": 21350
},
{
"embedding_loss": 0.0017,
"epoch": 3.1610044313146233,
"grad_norm": 0.006676162593066692,
"learning_rate": 8.173970129656985e-06,
"step": 21400
},
{
"embedding_loss": 0.0022,
"epoch": 3.1683899556868536,
"grad_norm": 0.006805592216551304,
"learning_rate": 8.141145576891517e-06,
"step": 21450
},
{
"embedding_loss": 0.0007,
"epoch": 3.1757754800590843,
"grad_norm": 0.02653045393526554,
"learning_rate": 8.108321024126047e-06,
"step": 21500
},
{
"embedding_loss": 0.0022,
"epoch": 3.1831610044313146,
"grad_norm": 0.055775534361600876,
"learning_rate": 8.075496471360579e-06,
"step": 21550
},
{
"embedding_loss": 0.0002,
"epoch": 3.1905465288035453,
"grad_norm": 0.004594122059643269,
"learning_rate": 8.04267191859511e-06,
"step": 21600
},
{
"embedding_loss": 0.0004,
"epoch": 3.1979320531757756,
"grad_norm": 0.057899340987205505,
"learning_rate": 8.009847365829641e-06,
"step": 21650
},
{
"embedding_loss": 0.001,
"epoch": 3.205317577548006,
"grad_norm": 0.003396780928596854,
"learning_rate": 7.977022813064172e-06,
"step": 21700
},
{
"embedding_loss": 0.0033,
"epoch": 3.212703101920236,
"grad_norm": 0.09521088004112244,
"learning_rate": 7.944198260298704e-06,
"step": 21750
},
{
"embedding_loss": 0.0011,
"epoch": 3.220088626292467,
"grad_norm": 0.004245746415108442,
"learning_rate": 7.911373707533236e-06,
"step": 21800
},
{
"embedding_loss": 0.0003,
"epoch": 3.227474150664697,
"grad_norm": 0.005718466360121965,
"learning_rate": 7.878549154767768e-06,
"step": 21850
},
{
"embedding_loss": 0.0004,
"epoch": 3.234859675036928,
"grad_norm": 0.007956516928970814,
"learning_rate": 7.845724602002298e-06,
"step": 21900
},
{
"embedding_loss": 0.0004,
"epoch": 3.242245199409158,
"grad_norm": 0.06620016694068909,
"learning_rate": 7.81290004923683e-06,
"step": 21950
},
{
"embedding_loss": 0.0014,
"epoch": 3.2496307237813884,
"grad_norm": 0.04369127005338669,
"learning_rate": 7.78007549647136e-06,
"step": 22000
},
{
"embedding_loss": 0.0012,
"epoch": 3.2570162481536187,
"grad_norm": 0.029797792434692383,
"learning_rate": 7.747250943705893e-06,
"step": 22050
},
{
"embedding_loss": 0.002,
"epoch": 3.2644017725258494,
"grad_norm": 0.014197341166436672,
"learning_rate": 7.714426390940423e-06,
"step": 22100
},
{
"embedding_loss": 0.001,
"epoch": 3.2717872968980797,
"grad_norm": 0.011921238154172897,
"learning_rate": 7.681601838174955e-06,
"step": 22150
},
{
"embedding_loss": 0.0026,
"epoch": 3.2791728212703104,
"grad_norm": 0.022078925743699074,
"learning_rate": 7.648777285409487e-06,
"step": 22200
},
{
"embedding_loss": 0.0017,
"epoch": 3.2865583456425407,
"grad_norm": 0.011514640413224697,
"learning_rate": 7.615952732644018e-06,
"step": 22250
},
{
"embedding_loss": 0.0009,
"epoch": 3.293943870014771,
"grad_norm": 0.2203167974948883,
"learning_rate": 7.5831281798785495e-06,
"step": 22300
},
{
"embedding_loss": 0.0018,
"epoch": 3.3013293943870012,
"grad_norm": 0.0317855142056942,
"learning_rate": 7.5503036271130815e-06,
"step": 22350
},
{
"embedding_loss": 0.0025,
"epoch": 3.308714918759232,
"grad_norm": 0.005245373118668795,
"learning_rate": 7.517479074347612e-06,
"step": 22400
},
{
"embedding_loss": 0.0016,
"epoch": 3.3161004431314622,
"grad_norm": 0.007596870884299278,
"learning_rate": 7.484654521582144e-06,
"step": 22450
},
{
"embedding_loss": 0.0035,
"epoch": 3.323485967503693,
"grad_norm": 0.020896727219223976,
"learning_rate": 7.451829968816675e-06,
"step": 22500
},
{
"embedding_loss": 0.0002,
"epoch": 3.3308714918759232,
"grad_norm": 0.07300405204296112,
"learning_rate": 7.419005416051207e-06,
"step": 22550
},
{
"embedding_loss": 0.0015,
"epoch": 3.3382570162481535,
"grad_norm": 0.005366707220673561,
"learning_rate": 7.3861808632857375e-06,
"step": 22600
},
{
"embedding_loss": 0.002,
"epoch": 3.345642540620384,
"grad_norm": 0.011347993277013302,
"learning_rate": 7.3533563105202695e-06,
"step": 22650
},
{
"embedding_loss": 0.0021,
"epoch": 3.3530280649926145,
"grad_norm": 0.006689651869237423,
"learning_rate": 7.320531757754801e-06,
"step": 22700
},
{
"embedding_loss": 0.0024,
"epoch": 3.360413589364845,
"grad_norm": 0.006886324379593134,
"learning_rate": 7.287707204989333e-06,
"step": 22750
},
{
"embedding_loss": 0.0015,
"epoch": 3.3677991137370755,
"grad_norm": 0.03965551033616066,
"learning_rate": 7.254882652223864e-06,
"step": 22800
},
{
"embedding_loss": 0.0021,
"epoch": 3.375184638109306,
"grad_norm": 0.03409096226096153,
"learning_rate": 7.222058099458395e-06,
"step": 22850
},
{
"embedding_loss": 0.0022,
"epoch": 3.382570162481536,
"grad_norm": 0.00742725282907486,
"learning_rate": 7.189233546692926e-06,
"step": 22900
},
{
"embedding_loss": 0.0015,
"epoch": 3.389955686853767,
"grad_norm": 0.008300753310322762,
"learning_rate": 7.156408993927458e-06,
"step": 22950
},
{
"embedding_loss": 0.0015,
"epoch": 3.397341211225997,
"grad_norm": 0.03697545453906059,
"learning_rate": 7.12358444116199e-06,
"step": 23000
},
{
"embedding_loss": 0.0016,
"epoch": 3.4047267355982274,
"grad_norm": 0.021186918020248413,
"learning_rate": 7.090759888396521e-06,
"step": 23050
},
{
"embedding_loss": 0.0008,
"epoch": 3.412112259970458,
"grad_norm": 0.021211344748735428,
"learning_rate": 7.057935335631053e-06,
"step": 23100
},
{
"embedding_loss": 0.0021,
"epoch": 3.4194977843426884,
"grad_norm": 0.015593543648719788,
"learning_rate": 7.025110782865584e-06,
"step": 23150
},
{
"embedding_loss": 0.0021,
"epoch": 3.4268833087149186,
"grad_norm": 0.12304917722940445,
"learning_rate": 6.992286230100116e-06,
"step": 23200
},
{
"embedding_loss": 0.0017,
"epoch": 3.4342688330871494,
"grad_norm": 0.030567510053515434,
"learning_rate": 6.9594616773346464e-06,
"step": 23250
},
{
"embedding_loss": 0.0015,
"epoch": 3.4416543574593796,
"grad_norm": 0.029271570965647697,
"learning_rate": 6.9266371245691785e-06,
"step": 23300
},
{
"embedding_loss": 0.0004,
"epoch": 3.44903988183161,
"grad_norm": 0.007037239149212837,
"learning_rate": 6.89381257180371e-06,
"step": 23350
},
{
"embedding_loss": 0.0007,
"epoch": 3.4564254062038406,
"grad_norm": 0.004125585313886404,
"learning_rate": 6.860988019038242e-06,
"step": 23400
},
{
"embedding_loss": 0.0004,
"epoch": 3.463810930576071,
"grad_norm": 0.012953881174325943,
"learning_rate": 6.828163466272772e-06,
"step": 23450
},
{
"embedding_loss": 0.0014,
"epoch": 3.471196454948301,
"grad_norm": 0.0054145329631865025,
"learning_rate": 6.795338913507304e-06,
"step": 23500
},
{
"embedding_loss": 0.0003,
"epoch": 3.478581979320532,
"grad_norm": 0.00575551250949502,
"learning_rate": 6.762514360741835e-06,
"step": 23550
},
{
"embedding_loss": 0.002,
"epoch": 3.485967503692762,
"grad_norm": 0.0046454742550849915,
"learning_rate": 6.729689807976367e-06,
"step": 23600
},
{
"embedding_loss": 0.0003,
"epoch": 3.4933530280649925,
"grad_norm": 0.01675521954894066,
"learning_rate": 6.6968652552108986e-06,
"step": 23650
},
{
"embedding_loss": 0.0002,
"epoch": 3.500738552437223,
"grad_norm": 0.5372416973114014,
"learning_rate": 6.66404070244543e-06,
"step": 23700
},
{
"embedding_loss": 0.0009,
"epoch": 3.5081240768094535,
"grad_norm": 0.010617181658744812,
"learning_rate": 6.631216149679961e-06,
"step": 23750
},
{
"embedding_loss": 0.0036,
"epoch": 3.5155096011816838,
"grad_norm": 0.009620044380426407,
"learning_rate": 6.598391596914493e-06,
"step": 23800
},
{
"embedding_loss": 0.0022,
"epoch": 3.5228951255539145,
"grad_norm": 0.0031557646580040455,
"learning_rate": 6.565567044149024e-06,
"step": 23850
},
{
"embedding_loss": 0.0014,
"epoch": 3.5302806499261448,
"grad_norm": 0.006240217015147209,
"learning_rate": 6.532742491383555e-06,
"step": 23900
},
{
"embedding_loss": 0.0015,
"epoch": 3.537666174298375,
"grad_norm": 0.054248787462711334,
"learning_rate": 6.499917938618087e-06,
"step": 23950
},
{
"embedding_loss": 0.0009,
"epoch": 3.5450516986706058,
"grad_norm": 0.004816859494894743,
"learning_rate": 6.467093385852619e-06,
"step": 24000
},
{
"embedding_loss": 0.0007,
"epoch": 3.552437223042836,
"grad_norm": 0.014538111165165901,
"learning_rate": 6.43426883308715e-06,
"step": 24050
},
{
"embedding_loss": 0.0024,
"epoch": 3.5598227474150663,
"grad_norm": 0.0056640529073774815,
"learning_rate": 6.401444280321681e-06,
"step": 24100
},
{
"embedding_loss": 0.0011,
"epoch": 3.567208271787297,
"grad_norm": 0.005168286617845297,
"learning_rate": 6.368619727556212e-06,
"step": 24150
},
{
"embedding_loss": 0.0018,
"epoch": 3.5745937961595273,
"grad_norm": 0.006222166121006012,
"learning_rate": 6.335795174790744e-06,
"step": 24200
},
{
"embedding_loss": 0.0018,
"epoch": 3.5819793205317576,
"grad_norm": 0.1087515652179718,
"learning_rate": 6.3029706220252755e-06,
"step": 24250
},
{
"embedding_loss": 0.0029,
"epoch": 3.5893648449039883,
"grad_norm": 0.002414864953607321,
"learning_rate": 6.270146069259807e-06,
"step": 24300
},
{
"embedding_loss": 0.0009,
"epoch": 3.5967503692762186,
"grad_norm": 0.003966380376368761,
"learning_rate": 6.237321516494338e-06,
"step": 24350
},
{
"embedding_loss": 0.0015,
"epoch": 3.604135893648449,
"grad_norm": 0.0019502595532685518,
"learning_rate": 6.20449696372887e-06,
"step": 24400
},
{
"embedding_loss": 0.0015,
"epoch": 3.6115214180206796,
"grad_norm": 0.04951006919145584,
"learning_rate": 6.171672410963401e-06,
"step": 24450
},
{
"embedding_loss": 0.0009,
"epoch": 3.61890694239291,
"grad_norm": 0.05455106496810913,
"learning_rate": 6.138847858197933e-06,
"step": 24500
},
{
"embedding_loss": 0.0002,
"epoch": 3.62629246676514,
"grad_norm": 0.006709383800625801,
"learning_rate": 6.1060233054324635e-06,
"step": 24550
},
{
"embedding_loss": 0.0021,
"epoch": 3.633677991137371,
"grad_norm": 0.006298394873738289,
"learning_rate": 6.0731987526669955e-06,
"step": 24600
},
{
"embedding_loss": 0.0002,
"epoch": 3.641063515509601,
"grad_norm": 0.007862403988838196,
"learning_rate": 6.040374199901527e-06,
"step": 24650
},
{
"embedding_loss": 0.0014,
"epoch": 3.6484490398818314,
"grad_norm": 0.014156641438603401,
"learning_rate": 6.007549647136059e-06,
"step": 24700
},
{
"embedding_loss": 0.0008,
"epoch": 3.655834564254062,
"grad_norm": 0.05146721005439758,
"learning_rate": 5.974725094370589e-06,
"step": 24750
},
{
"embedding_loss": 0.0013,
"epoch": 3.6632200886262924,
"grad_norm": 0.0289792250841856,
"learning_rate": 5.941900541605121e-06,
"step": 24800
},
{
"embedding_loss": 0.0023,
"epoch": 3.670605612998523,
"grad_norm": 0.001623387448489666,
"learning_rate": 5.909075988839652e-06,
"step": 24850
},
{
"embedding_loss": 0.0004,
"epoch": 3.6779911373707534,
"grad_norm": 0.013176560401916504,
"learning_rate": 5.876251436074184e-06,
"step": 24900
},
{
"embedding_loss": 0.0007,
"epoch": 3.6853766617429837,
"grad_norm": 0.004344393033534288,
"learning_rate": 5.843426883308715e-06,
"step": 24950
},
{
"embedding_loss": 0.0015,
"epoch": 3.692762186115214,
"grad_norm": 0.00743023632094264,
"learning_rate": 5.810602330543247e-06,
"step": 25000
},
{
"embedding_loss": 0.0008,
"epoch": 3.7001477104874447,
"grad_norm": 0.01551518589258194,
"learning_rate": 5.777777777777778e-06,
"step": 25050
},
{
"embedding_loss": 0.0014,
"epoch": 3.707533234859675,
"grad_norm": 0.0053128432482481,
"learning_rate": 5.74495322501231e-06,
"step": 25100
},
{
"embedding_loss": 0.0005,
"epoch": 3.7149187592319057,
"grad_norm": 0.005089200101792812,
"learning_rate": 5.712128672246841e-06,
"step": 25150
},
{
"embedding_loss": 0.0018,
"epoch": 3.722304283604136,
"grad_norm": 0.007938201539218426,
"learning_rate": 5.679304119481372e-06,
"step": 25200
},
{
"embedding_loss": 0.0012,
"epoch": 3.7296898079763663,
"grad_norm": 0.031416155397892,
"learning_rate": 5.6464795667159045e-06,
"step": 25250
},
{
"embedding_loss": 0.0002,
"epoch": 3.7370753323485966,
"grad_norm": 0.01789075881242752,
"learning_rate": 5.613655013950436e-06,
"step": 25300
},
{
"embedding_loss": 0.0005,
"epoch": 3.7444608567208273,
"grad_norm": 0.0897989496588707,
"learning_rate": 5.580830461184968e-06,
"step": 25350
},
{
"embedding_loss": 0.0016,
"epoch": 3.7518463810930576,
"grad_norm": 0.0033649958204478025,
"learning_rate": 5.548005908419498e-06,
"step": 25400
},
{
"embedding_loss": 0.0015,
"epoch": 3.7592319054652883,
"grad_norm": 0.007789059076458216,
"learning_rate": 5.51518135565403e-06,
"step": 25450
},
{
"embedding_loss": 0.0014,
"epoch": 3.7666174298375186,
"grad_norm": 0.0069976383820176125,
"learning_rate": 5.482356802888561e-06,
"step": 25500
},
{
"embedding_loss": 0.0008,
"epoch": 3.774002954209749,
"grad_norm": 0.028319302946329117,
"learning_rate": 5.449532250123093e-06,
"step": 25550
},
{
"embedding_loss": 0.0004,
"epoch": 3.781388478581979,
"grad_norm": 0.13736043870449066,
"learning_rate": 5.416707697357624e-06,
"step": 25600
},
{
"embedding_loss": 0.0014,
"epoch": 3.78877400295421,
"grad_norm": 0.0662890300154686,
"learning_rate": 5.383883144592156e-06,
"step": 25650
},
{
"embedding_loss": 0.0018,
"epoch": 3.79615952732644,
"grad_norm": 0.07620090991258621,
"learning_rate": 5.351058591826687e-06,
"step": 25700
},
{
"embedding_loss": 0.0008,
"epoch": 3.803545051698671,
"grad_norm": 0.0053595914505422115,
"learning_rate": 5.318234039061219e-06,
"step": 25750
},
{
"embedding_loss": 0.0008,
"epoch": 3.810930576070901,
"grad_norm": 0.03874294087290764,
"learning_rate": 5.285409486295749e-06,
"step": 25800
},
{
"embedding_loss": 0.0002,
"epoch": 3.8183161004431314,
"grad_norm": 0.0377751886844635,
"learning_rate": 5.252584933530281e-06,
"step": 25850
},
{
"embedding_loss": 0.0003,
"epoch": 3.8257016248153617,
"grad_norm": 0.052115991711616516,
"learning_rate": 5.2197603807648126e-06,
"step": 25900
},
{
"embedding_loss": 0.0009,
"epoch": 3.8330871491875924,
"grad_norm": 0.004992119502276182,
"learning_rate": 5.186935827999345e-06,
"step": 25950
},
{
"embedding_loss": 0.002,
"epoch": 3.8404726735598227,
"grad_norm": 0.010746672749519348,
"learning_rate": 5.154111275233875e-06,
"step": 26000
},
{
"embedding_loss": 0.0016,
"epoch": 3.8478581979320534,
"grad_norm": 0.0036030395422130823,
"learning_rate": 5.121286722468407e-06,
"step": 26050
},
{
"embedding_loss": 0.0013,
"epoch": 3.8552437223042837,
"grad_norm": 0.004183268640190363,
"learning_rate": 5.088462169702938e-06,
"step": 26100
},
{
"embedding_loss": 0.0021,
"epoch": 3.862629246676514,
"grad_norm": 0.03056243434548378,
"learning_rate": 5.05563761693747e-06,
"step": 26150
},
{
"embedding_loss": 0.0006,
"epoch": 3.8700147710487443,
"grad_norm": 0.020458584651350975,
"learning_rate": 5.022813064172001e-06,
"step": 26200
},
{
"embedding_loss": 0.0005,
"epoch": 3.877400295420975,
"grad_norm": 0.014284319244325161,
"learning_rate": 4.989988511406533e-06,
"step": 26250
},
{
"embedding_loss": 0.0019,
"epoch": 3.8847858197932053,
"grad_norm": 0.004101385362446308,
"learning_rate": 4.957163958641064e-06,
"step": 26300
},
{
"embedding_loss": 0.0017,
"epoch": 3.892171344165436,
"grad_norm": 0.003685436677187681,
"learning_rate": 4.924339405875596e-06,
"step": 26350
},
{
"embedding_loss": 0.0002,
"epoch": 3.8995568685376663,
"grad_norm": 0.01675995998084545,
"learning_rate": 4.891514853110127e-06,
"step": 26400
},
{
"embedding_loss": 0.0014,
"epoch": 3.9069423929098965,
"grad_norm": 0.003458675229921937,
"learning_rate": 4.858690300344658e-06,
"step": 26450
},
{
"embedding_loss": 0.0003,
"epoch": 3.914327917282127,
"grad_norm": 0.004388707224279642,
"learning_rate": 4.8258657475791895e-06,
"step": 26500
},
{
"embedding_loss": 0.0015,
"epoch": 3.9217134416543575,
"grad_norm": 0.006350350566208363,
"learning_rate": 4.7930411948137215e-06,
"step": 26550
},
{
"embedding_loss": 0.001,
"epoch": 3.929098966026588,
"grad_norm": 0.005695797968655825,
"learning_rate": 4.760216642048253e-06,
"step": 26600
},
{
"embedding_loss": 0.0002,
"epoch": 3.9364844903988185,
"grad_norm": 0.004757806193083525,
"learning_rate": 4.727392089282784e-06,
"step": 26650
},
{
"embedding_loss": 0.0002,
"epoch": 3.943870014771049,
"grad_norm": 0.0033138145226985216,
"learning_rate": 4.694567536517315e-06,
"step": 26700
},
{
"embedding_loss": 0.0002,
"epoch": 3.951255539143279,
"grad_norm": 0.003561320947483182,
"learning_rate": 4.661742983751847e-06,
"step": 26750
},
{
"embedding_loss": 0.0006,
"epoch": 3.9586410635155094,
"grad_norm": 0.01382633950561285,
"learning_rate": 4.628918430986378e-06,
"step": 26800
},
{
"embedding_loss": 0.0003,
"epoch": 3.96602658788774,
"grad_norm": 0.005202912725508213,
"learning_rate": 4.5960938782209095e-06,
"step": 26850
},
{
"embedding_loss": 0.0016,
"epoch": 3.9734121122599704,
"grad_norm": 0.012079019099473953,
"learning_rate": 4.563269325455441e-06,
"step": 26900
},
{
"embedding_loss": 0.0008,
"epoch": 3.980797636632201,
"grad_norm": 0.014114444144070148,
"learning_rate": 4.530444772689973e-06,
"step": 26950
},
{
"embedding_loss": 0.002,
"epoch": 3.9881831610044314,
"grad_norm": 0.02415064163506031,
"learning_rate": 4.497620219924504e-06,
"step": 27000
},
{
"embedding_loss": 0.0017,
"epoch": 3.9955686853766617,
"grad_norm": 0.021551288664340973,
"learning_rate": 4.464795667159035e-06,
"step": 27050
},
{
"embedding_loss": 0.0003,
"epoch": 4.002954209748892,
"grad_norm": 0.005066817160695791,
"learning_rate": 4.431971114393566e-06,
"step": 27100
},
{
"embedding_loss": 0.0012,
"epoch": 4.010339734121122,
"grad_norm": 0.007141259498894215,
"learning_rate": 4.399146561628098e-06,
"step": 27150
},
{
"embedding_loss": 0.0004,
"epoch": 4.017725258493353,
"grad_norm": 0.12070070952177048,
"learning_rate": 4.36632200886263e-06,
"step": 27200
},
{
"embedding_loss": 0.0022,
"epoch": 4.025110782865584,
"grad_norm": 0.0378386452794075,
"learning_rate": 4.333497456097161e-06,
"step": 27250
},
{
"embedding_loss": 0.0015,
"epoch": 4.032496307237814,
"grad_norm": 0.0050777471624314785,
"learning_rate": 4.300672903331692e-06,
"step": 27300
},
{
"embedding_loss": 0.0004,
"epoch": 4.039881831610044,
"grad_norm": 0.01844395510852337,
"learning_rate": 4.267848350566224e-06,
"step": 27350
},
{
"embedding_loss": 0.001,
"epoch": 4.0472673559822745,
"grad_norm": 0.012001128867268562,
"learning_rate": 4.235023797800755e-06,
"step": 27400
},
{
"embedding_loss": 0.0002,
"epoch": 4.054652880354505,
"grad_norm": 0.0065623316913843155,
"learning_rate": 4.2021992450352864e-06,
"step": 27450
},
{
"embedding_loss": 0.0002,
"epoch": 4.062038404726736,
"grad_norm": 0.004251908976584673,
"learning_rate": 4.1693746922698185e-06,
"step": 27500
},
{
"embedding_loss": 0.0002,
"epoch": 4.069423929098966,
"grad_norm": 0.010989518836140633,
"learning_rate": 4.13655013950435e-06,
"step": 27550
},
{
"embedding_loss": 0.0002,
"epoch": 4.0768094534711965,
"grad_norm": 0.0056010037660598755,
"learning_rate": 4.103725586738881e-06,
"step": 27600
},
{
"embedding_loss": 0.0009,
"epoch": 4.084194977843427,
"grad_norm": 0.010540075600147247,
"learning_rate": 4.070901033973412e-06,
"step": 27650
},
{
"embedding_loss": 0.0016,
"epoch": 4.091580502215657,
"grad_norm": 0.01558383833616972,
"learning_rate": 4.038076481207944e-06,
"step": 27700
},
{
"embedding_loss": 0.0017,
"epoch": 4.098966026587887,
"grad_norm": 0.0061827609315514565,
"learning_rate": 4.005251928442475e-06,
"step": 27750
},
{
"embedding_loss": 0.0016,
"epoch": 4.1063515509601185,
"grad_norm": 0.016165059059858322,
"learning_rate": 3.9724273756770065e-06,
"step": 27800
},
{
"embedding_loss": 0.0008,
"epoch": 4.113737075332349,
"grad_norm": 0.013678347691893578,
"learning_rate": 3.939602822911538e-06,
"step": 27850
},
{
"embedding_loss": 0.0007,
"epoch": 4.121122599704579,
"grad_norm": 0.002744109369814396,
"learning_rate": 3.90677827014607e-06,
"step": 27900
},
{
"embedding_loss": 0.0002,
"epoch": 4.128508124076809,
"grad_norm": 0.002654843032360077,
"learning_rate": 3.873953717380601e-06,
"step": 27950
},
{
"embedding_loss": 0.0003,
"epoch": 4.13589364844904,
"grad_norm": 0.031312067061662674,
"learning_rate": 3.841129164615132e-06,
"step": 28000
},
{
"embedding_loss": 0.0007,
"epoch": 4.14327917282127,
"grad_norm": 0.002234194427728653,
"learning_rate": 3.8083046118496638e-06,
"step": 28050
},
{
"embedding_loss": 0.002,
"epoch": 4.150664697193501,
"grad_norm": 0.005080920644104481,
"learning_rate": 3.775480059084195e-06,
"step": 28100
},
{
"embedding_loss": 0.0013,
"epoch": 4.158050221565731,
"grad_norm": 0.00728574488312006,
"learning_rate": 3.742655506318727e-06,
"step": 28150
},
{
"embedding_loss": 0.0003,
"epoch": 4.165435745937962,
"grad_norm": 0.0030798488296568394,
"learning_rate": 3.709830953553258e-06,
"step": 28200
},
{
"embedding_loss": 0.0001,
"epoch": 4.172821270310192,
"grad_norm": 0.0029522618278861046,
"learning_rate": 3.67700640078779e-06,
"step": 28250
},
{
"embedding_loss": 0.0008,
"epoch": 4.180206794682422,
"grad_norm": 0.04743621125817299,
"learning_rate": 3.6441818480223214e-06,
"step": 28300
},
{
"embedding_loss": 0.0019,
"epoch": 4.1875923190546525,
"grad_norm": 0.021583393216133118,
"learning_rate": 3.6113572952568526e-06,
"step": 28350
},
{
"embedding_loss": 0.0017,
"epoch": 4.194977843426884,
"grad_norm": 0.004344303160905838,
"learning_rate": 3.5785327424913842e-06,
"step": 28400
},
{
"embedding_loss": 0.0007,
"epoch": 4.202363367799114,
"grad_norm": 0.05794514715671539,
"learning_rate": 3.5457081897259154e-06,
"step": 28450
},
{
"embedding_loss": 0.0021,
"epoch": 4.209748892171344,
"grad_norm": 0.03878411650657654,
"learning_rate": 3.512883636960447e-06,
"step": 28500
},
{
"embedding_loss": 0.0005,
"epoch": 4.2171344165435745,
"grad_norm": 0.024656204506754875,
"learning_rate": 3.4800590841949783e-06,
"step": 28550
},
{
"embedding_loss": 0.0009,
"epoch": 4.224519940915805,
"grad_norm": 0.07271004468202591,
"learning_rate": 3.44723453142951e-06,
"step": 28600
},
{
"embedding_loss": 0.0019,
"epoch": 4.231905465288035,
"grad_norm": 0.028899872675538063,
"learning_rate": 3.414409978664041e-06,
"step": 28650
},
{
"embedding_loss": 0.0006,
"epoch": 4.239290989660266,
"grad_norm": 0.05092883110046387,
"learning_rate": 3.3815854258985727e-06,
"step": 28700
},
{
"embedding_loss": 0.0011,
"epoch": 4.2466765140324965,
"grad_norm": 0.0032700442243367434,
"learning_rate": 3.348760873133104e-06,
"step": 28750
},
{
"embedding_loss": 0.0005,
"epoch": 4.254062038404727,
"grad_norm": 0.008138212375342846,
"learning_rate": 3.3159363203676355e-06,
"step": 28800
},
{
"embedding_loss": 0.0008,
"epoch": 4.261447562776957,
"grad_norm": 0.004710312932729721,
"learning_rate": 3.2831117676021667e-06,
"step": 28850
},
{
"embedding_loss": 0.0006,
"epoch": 4.268833087149187,
"grad_norm": 0.006768395192921162,
"learning_rate": 3.2502872148366983e-06,
"step": 28900
},
{
"embedding_loss": 0.0006,
"epoch": 4.2762186115214185,
"grad_norm": 3.97033429145813,
"learning_rate": 3.2174626620712295e-06,
"step": 28950
},
{
"embedding_loss": 0.0008,
"epoch": 4.283604135893649,
"grad_norm": 0.0024879788979887962,
"learning_rate": 3.184638109305761e-06,
"step": 29000
},
{
"embedding_loss": 0.0014,
"epoch": 4.290989660265879,
"grad_norm": 0.004705480299890041,
"learning_rate": 3.1518135565402923e-06,
"step": 29050
},
{
"embedding_loss": 0.0003,
"epoch": 4.298375184638109,
"grad_norm": 0.0038461387157440186,
"learning_rate": 3.118989003774824e-06,
"step": 29100
},
{
"embedding_loss": 0.0002,
"epoch": 4.30576070901034,
"grad_norm": 0.0027929339557886124,
"learning_rate": 3.086164451009355e-06,
"step": 29150
},
{
"embedding_loss": 0.0009,
"epoch": 4.31314623338257,
"grad_norm": 0.022274106740951538,
"learning_rate": 3.0533398982438868e-06,
"step": 29200
},
{
"embedding_loss": 0.0001,
"epoch": 4.3205317577548,
"grad_norm": 0.001826609717682004,
"learning_rate": 3.020515345478418e-06,
"step": 29250
},
{
"embedding_loss": 0.0002,
"epoch": 4.327917282127031,
"grad_norm": 0.026185447350144386,
"learning_rate": 2.9876907927129496e-06,
"step": 29300
},
{
"embedding_loss": 0.0008,
"epoch": 4.335302806499262,
"grad_norm": 0.8461505770683289,
"learning_rate": 2.954866239947481e-06,
"step": 29350
},
{
"embedding_loss": 0.0003,
"epoch": 4.342688330871492,
"grad_norm": 0.0049928221851587296,
"learning_rate": 2.9220416871820124e-06,
"step": 29400
},
{
"embedding_loss": 0.0009,
"epoch": 4.350073855243722,
"grad_norm": 0.0039035649970173836,
"learning_rate": 2.8892171344165436e-06,
"step": 29450
},
{
"embedding_loss": 0.0008,
"epoch": 4.357459379615952,
"grad_norm": 0.003410862758755684,
"learning_rate": 2.8563925816510752e-06,
"step": 29500
},
{
"embedding_loss": 0.0009,
"epoch": 4.364844903988184,
"grad_norm": 0.007184627000242472,
"learning_rate": 2.8235680288856064e-06,
"step": 29550
},
{
"embedding_loss": 0.0012,
"epoch": 4.372230428360414,
"grad_norm": 0.005408278200775385,
"learning_rate": 2.790743476120138e-06,
"step": 29600
},
{
"embedding_loss": 0.0004,
"epoch": 4.379615952732644,
"grad_norm": 0.005216268356889486,
"learning_rate": 2.7579189233546692e-06,
"step": 29650
},
{
"embedding_loss": 0.0015,
"epoch": 4.387001477104874,
"grad_norm": 0.12092409282922745,
"learning_rate": 2.725094370589201e-06,
"step": 29700
},
{
"embedding_loss": 0.0011,
"epoch": 4.394387001477105,
"grad_norm": 2.360546112060547,
"learning_rate": 2.692269817823732e-06,
"step": 29750
},
{
"embedding_loss": 0.0003,
"epoch": 4.401772525849335,
"grad_norm": 0.04117804393172264,
"learning_rate": 2.6594452650582637e-06,
"step": 29800
},
{
"embedding_loss": 0.0014,
"epoch": 4.409158050221565,
"grad_norm": 0.31080320477485657,
"learning_rate": 2.626620712292795e-06,
"step": 29850
},
{
"embedding_loss": 0.0001,
"epoch": 4.416543574593796,
"grad_norm": 0.0030854118522256613,
"learning_rate": 2.5937961595273265e-06,
"step": 29900
},
{
"embedding_loss": 0.001,
"epoch": 4.423929098966027,
"grad_norm": 0.0051147108897566795,
"learning_rate": 2.5609716067618577e-06,
"step": 29950
},
{
"embedding_loss": 0.0003,
"epoch": 4.431314623338257,
"grad_norm": 0.002233837265521288,
"learning_rate": 2.5281470539963893e-06,
"step": 30000
},
{
"embedding_loss": 0.0003,
"epoch": 4.438700147710487,
"grad_norm": 0.002284417860209942,
"learning_rate": 2.495322501230921e-06,
"step": 30050
},
{
"embedding_loss": 0.0008,
"epoch": 4.4460856720827175,
"grad_norm": 0.03488105162978172,
"learning_rate": 2.4624979484654525e-06,
"step": 30100
},
{
"embedding_loss": 0.0008,
"epoch": 4.453471196454949,
"grad_norm": 0.011509645730257034,
"learning_rate": 2.4296733956999837e-06,
"step": 30150
},
{
"embedding_loss": 0.0002,
"epoch": 4.460856720827179,
"grad_norm": 0.0027210384141653776,
"learning_rate": 2.3968488429345154e-06,
"step": 30200
},
{
"embedding_loss": 0.0002,
"epoch": 4.468242245199409,
"grad_norm": 0.013157092034816742,
"learning_rate": 2.3640242901690466e-06,
"step": 30250
},
{
"embedding_loss": 0.0001,
"epoch": 4.4756277695716395,
"grad_norm": 0.004714665934443474,
"learning_rate": 2.331199737403578e-06,
"step": 30300
},
{
"embedding_loss": 0.0007,
"epoch": 4.48301329394387,
"grad_norm": 0.011190270073711872,
"learning_rate": 2.2983751846381094e-06,
"step": 30350
},
{
"embedding_loss": 0.0001,
"epoch": 4.4903988183161,
"grad_norm": 0.00462683429941535,
"learning_rate": 2.265550631872641e-06,
"step": 30400
},
{
"embedding_loss": 0.0001,
"epoch": 4.497784342688331,
"grad_norm": 0.003459771629422903,
"learning_rate": 2.232726079107172e-06,
"step": 30450
},
{
"embedding_loss": 0.0003,
"epoch": 4.5051698670605616,
"grad_norm": 0.04073004424571991,
"learning_rate": 2.199901526341704e-06,
"step": 30500
},
{
"embedding_loss": 0.0002,
"epoch": 4.512555391432792,
"grad_norm": 0.005062537267804146,
"learning_rate": 2.167076973576235e-06,
"step": 30550
},
{
"embedding_loss": 0.0001,
"epoch": 4.519940915805022,
"grad_norm": 0.007456169463694096,
"learning_rate": 2.1342524208107666e-06,
"step": 30600
},
{
"embedding_loss": 0.0014,
"epoch": 4.527326440177252,
"grad_norm": 0.021971579641103745,
"learning_rate": 2.1014278680452982e-06,
"step": 30650
},
{
"embedding_loss": 0.0003,
"epoch": 4.534711964549483,
"grad_norm": 0.006037925370037556,
"learning_rate": 2.0686033152798294e-06,
"step": 30700
},
{
"embedding_loss": 0.0007,
"epoch": 4.542097488921714,
"grad_norm": 0.020268207415938377,
"learning_rate": 2.035778762514361e-06,
"step": 30750
},
{
"embedding_loss": 0.0009,
"epoch": 4.549483013293944,
"grad_norm": 0.003740853862836957,
"learning_rate": 2.0029542097488923e-06,
"step": 30800
},
{
"embedding_loss": 0.0001,
"epoch": 4.556868537666174,
"grad_norm": 0.006321648135781288,
"learning_rate": 1.970129656983424e-06,
"step": 30850
},
{
"embedding_loss": 0.001,
"epoch": 4.564254062038405,
"grad_norm": 0.049009013921022415,
"learning_rate": 1.937305104217955e-06,
"step": 30900
},
{
"embedding_loss": 0.0001,
"epoch": 4.571639586410635,
"grad_norm": 0.006071667652577162,
"learning_rate": 1.9044805514524867e-06,
"step": 30950
},
{
"embedding_loss": 0.0005,
"epoch": 4.579025110782865,
"grad_norm": 0.025013990700244904,
"learning_rate": 1.8716559986870181e-06,
"step": 31000
},
{
"embedding_loss": 0.0003,
"epoch": 4.586410635155096,
"grad_norm": 0.0030903525184839964,
"learning_rate": 1.8388314459215495e-06,
"step": 31050
},
{
"embedding_loss": 0.0001,
"epoch": 4.593796159527327,
"grad_norm": 0.006547342520207167,
"learning_rate": 1.806006893156081e-06,
"step": 31100
},
{
"embedding_loss": 0.0007,
"epoch": 4.601181683899557,
"grad_norm": 0.0045944456942379475,
"learning_rate": 1.7731823403906123e-06,
"step": 31150
},
{
"embedding_loss": 0.0003,
"epoch": 4.608567208271787,
"grad_norm": 0.006300546228885651,
"learning_rate": 1.7403577876251437e-06,
"step": 31200
},
{
"embedding_loss": 0.0009,
"epoch": 4.6159527326440175,
"grad_norm": 0.017787380144000053,
"learning_rate": 1.7075332348596751e-06,
"step": 31250
},
{
"embedding_loss": 0.0003,
"epoch": 4.623338257016248,
"grad_norm": 0.006018889602273703,
"learning_rate": 1.6747086820942066e-06,
"step": 31300
},
{
"embedding_loss": 0.0003,
"epoch": 4.630723781388479,
"grad_norm": 0.03421813249588013,
"learning_rate": 1.641884129328738e-06,
"step": 31350
},
{
"embedding_loss": 0.0001,
"epoch": 4.638109305760709,
"grad_norm": 0.002777187153697014,
"learning_rate": 1.6090595765632694e-06,
"step": 31400
},
{
"embedding_loss": 0.0007,
"epoch": 4.6454948301329395,
"grad_norm": 0.013371906243264675,
"learning_rate": 1.5762350237978008e-06,
"step": 31450
},
{
"embedding_loss": 0.0014,
"epoch": 4.65288035450517,
"grad_norm": 0.01614244654774666,
"learning_rate": 1.5434104710323322e-06,
"step": 31500
},
{
"embedding_loss": 0.0008,
"epoch": 4.6602658788774,
"grad_norm": 0.014399105682969093,
"learning_rate": 1.5105859182668636e-06,
"step": 31550
},
{
"embedding_loss": 0.0003,
"epoch": 4.66765140324963,
"grad_norm": 0.007542195729911327,
"learning_rate": 1.477761365501395e-06,
"step": 31600
},
{
"embedding_loss": 0.001,
"epoch": 4.6750369276218615,
"grad_norm": 0.0031674772035330534,
"learning_rate": 1.4449368127359264e-06,
"step": 31650
},
{
"embedding_loss": 0.0013,
"epoch": 4.682422451994092,
"grad_norm": 0.012060785666108131,
"learning_rate": 1.4121122599704582e-06,
"step": 31700
},
{
"embedding_loss": 0.0015,
"epoch": 4.689807976366322,
"grad_norm": 0.04494306072592735,
"learning_rate": 1.3792877072049897e-06,
"step": 31750
},
{
"embedding_loss": 0.0017,
"epoch": 4.697193500738552,
"grad_norm": 0.009011705406010151,
"learning_rate": 1.346463154439521e-06,
"step": 31800
},
{
"embedding_loss": 0.0002,
"epoch": 4.704579025110783,
"grad_norm": 0.002131384564563632,
"learning_rate": 1.3136386016740525e-06,
"step": 31850
},
{
"embedding_loss": 0.0014,
"epoch": 4.711964549483013,
"grad_norm": 0.003283638972789049,
"learning_rate": 1.2808140489085839e-06,
"step": 31900
},
{
"embedding_loss": 0.0003,
"epoch": 4.719350073855244,
"grad_norm": 0.0024919663555920124,
"learning_rate": 1.2479894961431153e-06,
"step": 31950
},
{
"embedding_loss": 0.0002,
"epoch": 4.726735598227474,
"grad_norm": 0.14705249667167664,
"learning_rate": 1.2151649433776467e-06,
"step": 32000
},
{
"embedding_loss": 0.0009,
"epoch": 4.734121122599705,
"grad_norm": 0.0017327765235677361,
"learning_rate": 1.182340390612178e-06,
"step": 32050
},
{
"embedding_loss": 0.0008,
"epoch": 4.741506646971935,
"grad_norm": 0.8608806729316711,
"learning_rate": 1.1495158378467095e-06,
"step": 32100
},
{
"embedding_loss": 0.0011,
"epoch": 4.748892171344165,
"grad_norm": 0.019515294581651688,
"learning_rate": 1.116691285081241e-06,
"step": 32150
},
{
"embedding_loss": 0.0002,
"epoch": 4.7562776957163955,
"grad_norm": 0.010132347233593464,
"learning_rate": 1.0838667323157723e-06,
"step": 32200
},
{
"embedding_loss": 0.0002,
"epoch": 4.763663220088627,
"grad_norm": 0.010688789188861847,
"learning_rate": 1.0510421795503037e-06,
"step": 32250
},
{
"embedding_loss": 0.0004,
"epoch": 4.771048744460857,
"grad_norm": 0.004127997439354658,
"learning_rate": 1.0182176267848351e-06,
"step": 32300
},
{
"embedding_loss": 0.0011,
"epoch": 4.778434268833087,
"grad_norm": 0.0045281765051186085,
"learning_rate": 9.853930740193666e-07,
"step": 32350
},
{
"embedding_loss": 0.0009,
"epoch": 4.7858197932053175,
"grad_norm": 0.03462732210755348,
"learning_rate": 9.52568521253898e-07,
"step": 32400
},
{
"embedding_loss": 0.0002,
"epoch": 4.793205317577548,
"grad_norm": 0.011012708768248558,
"learning_rate": 9.197439684884295e-07,
"step": 32450
},
{
"embedding_loss": 0.0017,
"epoch": 4.800590841949779,
"grad_norm": 0.002184939570724964,
"learning_rate": 8.869194157229609e-07,
"step": 32500
},
{
"embedding_loss": 0.0003,
"epoch": 4.807976366322009,
"grad_norm": 0.0029540294781327248,
"learning_rate": 8.540948629574923e-07,
"step": 32550
},
{
"embedding_loss": 0.0009,
"epoch": 4.8153618906942395,
"grad_norm": 0.0017814520979300141,
"learning_rate": 8.212703101920237e-07,
"step": 32600
},
{
"embedding_loss": 0.0007,
"epoch": 4.82274741506647,
"grad_norm": 0.008224143646657467,
"learning_rate": 7.884457574265551e-07,
"step": 32650
},
{
"embedding_loss": 0.0013,
"epoch": 4.8301329394387,
"grad_norm": 0.0046016438864171505,
"learning_rate": 7.556212046610865e-07,
"step": 32700
},
{
"embedding_loss": 0.0007,
"epoch": 4.83751846381093,
"grad_norm": 0.011043339967727661,
"learning_rate": 7.227966518956179e-07,
"step": 32750
},
{
"embedding_loss": 0.0002,
"epoch": 4.844903988183161,
"grad_norm": 0.010177507996559143,
"learning_rate": 6.899720991301493e-07,
"step": 32800
},
{
"embedding_loss": 0.0004,
"epoch": 4.852289512555392,
"grad_norm": 0.006154090631753206,
"learning_rate": 6.571475463646807e-07,
"step": 32850
},
{
"embedding_loss": 0.0002,
"epoch": 4.859675036927622,
"grad_norm": 0.0051424214616417885,
"learning_rate": 6.243229935992123e-07,
"step": 32900
},
{
"embedding_loss": 0.0002,
"epoch": 4.867060561299852,
"grad_norm": 0.045284390449523926,
"learning_rate": 5.914984408337437e-07,
"step": 32950
},
{
"embedding_loss": 0.0001,
"epoch": 4.874446085672083,
"grad_norm": 0.006224027369171381,
"learning_rate": 5.586738880682751e-07,
"step": 33000
},
{
"embedding_loss": 0.0005,
"epoch": 4.881831610044313,
"grad_norm": 0.06909705698490143,
"learning_rate": 5.258493353028066e-07,
"step": 33050
},
{
"embedding_loss": 0.0011,
"epoch": 4.889217134416544,
"grad_norm": 0.007023925427347422,
"learning_rate": 4.93024782537338e-07,
"step": 33100
},
{
"embedding_loss": 0.0008,
"epoch": 4.896602658788774,
"grad_norm": 0.005172157660126686,
"learning_rate": 4.602002297718694e-07,
"step": 33150
},
{
"embedding_loss": 0.001,
"epoch": 4.903988183161005,
"grad_norm": 0.012793969362974167,
"learning_rate": 4.273756770064008e-07,
"step": 33200
},
{
"embedding_loss": 0.001,
"epoch": 4.911373707533235,
"grad_norm": 0.0392751581966877,
"learning_rate": 3.945511242409322e-07,
"step": 33250
},
{
"embedding_loss": 0.0012,
"epoch": 4.918759231905465,
"grad_norm": 0.007948558777570724,
"learning_rate": 3.6172657147546373e-07,
"step": 33300
},
{
"embedding_loss": 0.0003,
"epoch": 4.926144756277695,
"grad_norm": 0.0075578768737614155,
"learning_rate": 3.2890201870999514e-07,
"step": 33350
},
{
"embedding_loss": 0.0002,
"epoch": 4.933530280649926,
"grad_norm": 0.007234030868858099,
"learning_rate": 2.9607746594452655e-07,
"step": 33400
},
{
"embedding_loss": 0.0014,
"epoch": 4.940915805022157,
"grad_norm": 0.01585334725677967,
"learning_rate": 2.6325291317905796e-07,
"step": 33450
},
{
"embedding_loss": 0.0001,
"epoch": 4.948301329394387,
"grad_norm": 0.005034049041569233,
"learning_rate": 2.304283604135894e-07,
"step": 33500
},
{
"embedding_loss": 0.0007,
"epoch": 4.955686853766617,
"grad_norm": 0.0041028158739209175,
"learning_rate": 1.976038076481208e-07,
"step": 33550
},
{
"embedding_loss": 0.0007,
"epoch": 4.963072378138848,
"grad_norm": 0.006748030427843332,
"learning_rate": 1.647792548826522e-07,
"step": 33600
},
{
"embedding_loss": 0.0014,
"epoch": 4.970457902511078,
"grad_norm": 0.004400690086185932,
"learning_rate": 1.3195470211718367e-07,
"step": 33650
},
{
"embedding_loss": 0.0003,
"epoch": 4.977843426883309,
"grad_norm": 0.2393534779548645,
"learning_rate": 9.913014935171508e-08,
"step": 33700
},
{
"embedding_loss": 0.0002,
"epoch": 4.985228951255539,
"grad_norm": 0.017467621713876724,
"learning_rate": 6.630559658624653e-08,
"step": 33750
},
{
"embedding_loss": 0.002,
"epoch": 4.99261447562777,
"grad_norm": 0.006581551861017942,
"learning_rate": 3.348104382077794e-08,
"step": 33800
},
{
"embedding_loss": 0.0007,
"epoch": 5.0,
"grad_norm": 0.007424783427268267,
"learning_rate": 6.564910553093716e-10,
"step": 33850
}
],
"logging_steps": 50,
"max_steps": 33850,
"num_input_tokens_seen": 0,
"num_train_epochs": 5,
"save_steps": 500,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": true
},
"attributes": {}
}
},
"total_flos": 0.0,
"train_batch_size": 32,
"trial_name": null,
"trial_params": null
}