| { | |
| "best_global_step": null, | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 5.0, | |
| "eval_steps": 500, | |
| "global_step": 33850, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "embedding_loss": 0.2247, | |
| "epoch": 0.00014771048744460856, | |
| "grad_norm": 1.4463119506835938, | |
| "learning_rate": 0.0, | |
| "step": 1 | |
| }, | |
| { | |
| "embedding_loss": 0.2914, | |
| "epoch": 0.007385524372230428, | |
| "grad_norm": 1.524917721748352, | |
| "learning_rate": 2.8951255539143283e-07, | |
| "step": 50 | |
| }, | |
| { | |
| "embedding_loss": 0.2746, | |
| "epoch": 0.014771048744460856, | |
| "grad_norm": 1.1199702024459839, | |
| "learning_rate": 5.849335302806499e-07, | |
| "step": 100 | |
| }, | |
| { | |
| "embedding_loss": 0.2579, | |
| "epoch": 0.022156573116691284, | |
| "grad_norm": 1.1694086790084839, | |
| "learning_rate": 8.803545051698672e-07, | |
| "step": 150 | |
| }, | |
| { | |
| "embedding_loss": 0.2499, | |
| "epoch": 0.029542097488921712, | |
| "grad_norm": 0.9039924144744873, | |
| "learning_rate": 1.1757754800590842e-06, | |
| "step": 200 | |
| }, | |
| { | |
| "embedding_loss": 0.2386, | |
| "epoch": 0.03692762186115214, | |
| "grad_norm": 1.048995018005371, | |
| "learning_rate": 1.4711964549483015e-06, | |
| "step": 250 | |
| }, | |
| { | |
| "embedding_loss": 0.2269, | |
| "epoch": 0.04431314623338257, | |
| "grad_norm": 1.1124966144561768, | |
| "learning_rate": 1.7666174298375186e-06, | |
| "step": 300 | |
| }, | |
| { | |
| "embedding_loss": 0.2171, | |
| "epoch": 0.051698670605613, | |
| "grad_norm": 1.0527081489562988, | |
| "learning_rate": 2.062038404726736e-06, | |
| "step": 350 | |
| }, | |
| { | |
| "embedding_loss": 0.1999, | |
| "epoch": 0.059084194977843424, | |
| "grad_norm": 1.5473452806472778, | |
| "learning_rate": 2.3574593796159526e-06, | |
| "step": 400 | |
| }, | |
| { | |
| "embedding_loss": 0.1787, | |
| "epoch": 0.06646971935007386, | |
| "grad_norm": 1.0279266834259033, | |
| "learning_rate": 2.65288035450517e-06, | |
| "step": 450 | |
| }, | |
| { | |
| "embedding_loss": 0.1647, | |
| "epoch": 0.07385524372230429, | |
| "grad_norm": 1.260780930519104, | |
| "learning_rate": 2.9483013293943873e-06, | |
| "step": 500 | |
| }, | |
| { | |
| "embedding_loss": 0.1581, | |
| "epoch": 0.08124076809453472, | |
| "grad_norm": 0.8986091017723083, | |
| "learning_rate": 3.243722304283604e-06, | |
| "step": 550 | |
| }, | |
| { | |
| "embedding_loss": 0.1531, | |
| "epoch": 0.08862629246676514, | |
| "grad_norm": 1.3418588638305664, | |
| "learning_rate": 3.5391432791728215e-06, | |
| "step": 600 | |
| }, | |
| { | |
| "embedding_loss": 0.1475, | |
| "epoch": 0.09601181683899557, | |
| "grad_norm": 0.9869722723960876, | |
| "learning_rate": 3.834564254062039e-06, | |
| "step": 650 | |
| }, | |
| { | |
| "embedding_loss": 0.1375, | |
| "epoch": 0.103397341211226, | |
| "grad_norm": 1.1203314065933228, | |
| "learning_rate": 4.129985228951256e-06, | |
| "step": 700 | |
| }, | |
| { | |
| "embedding_loss": 0.1274, | |
| "epoch": 0.11078286558345643, | |
| "grad_norm": 0.8669000267982483, | |
| "learning_rate": 4.425406203840473e-06, | |
| "step": 750 | |
| }, | |
| { | |
| "embedding_loss": 0.1312, | |
| "epoch": 0.11816838995568685, | |
| "grad_norm": 1.493843913078308, | |
| "learning_rate": 4.72082717872969e-06, | |
| "step": 800 | |
| }, | |
| { | |
| "embedding_loss": 0.1228, | |
| "epoch": 0.1255539143279173, | |
| "grad_norm": 0.9798605442047119, | |
| "learning_rate": 5.0162481536189075e-06, | |
| "step": 850 | |
| }, | |
| { | |
| "embedding_loss": 0.118, | |
| "epoch": 0.1329394387001477, | |
| "grad_norm": 1.3890421390533447, | |
| "learning_rate": 5.311669128508124e-06, | |
| "step": 900 | |
| }, | |
| { | |
| "embedding_loss": 0.1117, | |
| "epoch": 0.14032496307237813, | |
| "grad_norm": 1.3494268655776978, | |
| "learning_rate": 5.607090103397341e-06, | |
| "step": 950 | |
| }, | |
| { | |
| "embedding_loss": 0.1108, | |
| "epoch": 0.14771048744460857, | |
| "grad_norm": 0.9772982001304626, | |
| "learning_rate": 5.902511078286559e-06, | |
| "step": 1000 | |
| }, | |
| { | |
| "embedding_loss": 0.0941, | |
| "epoch": 0.155096011816839, | |
| "grad_norm": 0.8814387917518616, | |
| "learning_rate": 6.197932053175776e-06, | |
| "step": 1050 | |
| }, | |
| { | |
| "embedding_loss": 0.0917, | |
| "epoch": 0.16248153618906944, | |
| "grad_norm": 1.0253106355667114, | |
| "learning_rate": 6.4933530280649935e-06, | |
| "step": 1100 | |
| }, | |
| { | |
| "embedding_loss": 0.0961, | |
| "epoch": 0.16986706056129985, | |
| "grad_norm": 0.9646548628807068, | |
| "learning_rate": 6.78877400295421e-06, | |
| "step": 1150 | |
| }, | |
| { | |
| "embedding_loss": 0.0896, | |
| "epoch": 0.17725258493353027, | |
| "grad_norm": 0.9323801398277283, | |
| "learning_rate": 7.084194977843427e-06, | |
| "step": 1200 | |
| }, | |
| { | |
| "embedding_loss": 0.092, | |
| "epoch": 0.18463810930576072, | |
| "grad_norm": 1.0876870155334473, | |
| "learning_rate": 7.379615952732645e-06, | |
| "step": 1250 | |
| }, | |
| { | |
| "embedding_loss": 0.0895, | |
| "epoch": 0.19202363367799113, | |
| "grad_norm": 1.2848527431488037, | |
| "learning_rate": 7.675036927621861e-06, | |
| "step": 1300 | |
| }, | |
| { | |
| "embedding_loss": 0.0823, | |
| "epoch": 0.19940915805022155, | |
| "grad_norm": 1.4480323791503906, | |
| "learning_rate": 7.970457902511078e-06, | |
| "step": 1350 | |
| }, | |
| { | |
| "embedding_loss": 0.0809, | |
| "epoch": 0.206794682422452, | |
| "grad_norm": 1.5744627714157104, | |
| "learning_rate": 8.265878877400296e-06, | |
| "step": 1400 | |
| }, | |
| { | |
| "embedding_loss": 0.0766, | |
| "epoch": 0.21418020679468242, | |
| "grad_norm": 1.5040708780288696, | |
| "learning_rate": 8.561299852289513e-06, | |
| "step": 1450 | |
| }, | |
| { | |
| "embedding_loss": 0.0733, | |
| "epoch": 0.22156573116691286, | |
| "grad_norm": 1.0043632984161377, | |
| "learning_rate": 8.856720827178731e-06, | |
| "step": 1500 | |
| }, | |
| { | |
| "embedding_loss": 0.0778, | |
| "epoch": 0.22895125553914328, | |
| "grad_norm": 1.5973531007766724, | |
| "learning_rate": 9.152141802067948e-06, | |
| "step": 1550 | |
| }, | |
| { | |
| "embedding_loss": 0.0715, | |
| "epoch": 0.2363367799113737, | |
| "grad_norm": 1.5385518074035645, | |
| "learning_rate": 9.447562776957165e-06, | |
| "step": 1600 | |
| }, | |
| { | |
| "embedding_loss": 0.0701, | |
| "epoch": 0.24372230428360414, | |
| "grad_norm": 1.069740653038025, | |
| "learning_rate": 9.742983751846381e-06, | |
| "step": 1650 | |
| }, | |
| { | |
| "embedding_loss": 0.0664, | |
| "epoch": 0.2511078286558346, | |
| "grad_norm": 0.7774745225906372, | |
| "learning_rate": 1.00384047267356e-05, | |
| "step": 1700 | |
| }, | |
| { | |
| "embedding_loss": 0.0645, | |
| "epoch": 0.258493353028065, | |
| "grad_norm": 0.9332543015480042, | |
| "learning_rate": 1.0333825701624816e-05, | |
| "step": 1750 | |
| }, | |
| { | |
| "embedding_loss": 0.061, | |
| "epoch": 0.2658788774002954, | |
| "grad_norm": 1.161795973777771, | |
| "learning_rate": 1.0629246676514033e-05, | |
| "step": 1800 | |
| }, | |
| { | |
| "embedding_loss": 0.0625, | |
| "epoch": 0.27326440177252587, | |
| "grad_norm": 0.773992657661438, | |
| "learning_rate": 1.0924667651403251e-05, | |
| "step": 1850 | |
| }, | |
| { | |
| "embedding_loss": 0.054, | |
| "epoch": 0.28064992614475626, | |
| "grad_norm": 0.8391284346580505, | |
| "learning_rate": 1.1220088626292466e-05, | |
| "step": 1900 | |
| }, | |
| { | |
| "embedding_loss": 0.0612, | |
| "epoch": 0.2880354505169867, | |
| "grad_norm": 0.6210933923721313, | |
| "learning_rate": 1.1515509601181685e-05, | |
| "step": 1950 | |
| }, | |
| { | |
| "embedding_loss": 0.0579, | |
| "epoch": 0.29542097488921715, | |
| "grad_norm": 0.8674732446670532, | |
| "learning_rate": 1.1810930576070903e-05, | |
| "step": 2000 | |
| }, | |
| { | |
| "embedding_loss": 0.0566, | |
| "epoch": 0.30280649926144754, | |
| "grad_norm": 1.716627597808838, | |
| "learning_rate": 1.2106351550960118e-05, | |
| "step": 2050 | |
| }, | |
| { | |
| "embedding_loss": 0.0495, | |
| "epoch": 0.310192023633678, | |
| "grad_norm": 1.162758231163025, | |
| "learning_rate": 1.2401772525849337e-05, | |
| "step": 2100 | |
| }, | |
| { | |
| "embedding_loss": 0.0514, | |
| "epoch": 0.3175775480059084, | |
| "grad_norm": 1.5488636493682861, | |
| "learning_rate": 1.2697193500738553e-05, | |
| "step": 2150 | |
| }, | |
| { | |
| "embedding_loss": 0.0478, | |
| "epoch": 0.3249630723781389, | |
| "grad_norm": 1.5231482982635498, | |
| "learning_rate": 1.2992614475627772e-05, | |
| "step": 2200 | |
| }, | |
| { | |
| "embedding_loss": 0.0484, | |
| "epoch": 0.33234859675036926, | |
| "grad_norm": 1.966753602027893, | |
| "learning_rate": 1.3288035450516987e-05, | |
| "step": 2250 | |
| }, | |
| { | |
| "embedding_loss": 0.0547, | |
| "epoch": 0.3397341211225997, | |
| "grad_norm": 2.125790596008301, | |
| "learning_rate": 1.3583456425406205e-05, | |
| "step": 2300 | |
| }, | |
| { | |
| "embedding_loss": 0.0466, | |
| "epoch": 0.34711964549483015, | |
| "grad_norm": 1.8197243213653564, | |
| "learning_rate": 1.3878877400295423e-05, | |
| "step": 2350 | |
| }, | |
| { | |
| "embedding_loss": 0.0454, | |
| "epoch": 0.35450516986706054, | |
| "grad_norm": 0.8179060816764832, | |
| "learning_rate": 1.4174298375184638e-05, | |
| "step": 2400 | |
| }, | |
| { | |
| "embedding_loss": 0.041, | |
| "epoch": 0.361890694239291, | |
| "grad_norm": 0.3561592400074005, | |
| "learning_rate": 1.4469719350073857e-05, | |
| "step": 2450 | |
| }, | |
| { | |
| "embedding_loss": 0.0395, | |
| "epoch": 0.36927621861152143, | |
| "grad_norm": 0.40876850485801697, | |
| "learning_rate": 1.4765140324963074e-05, | |
| "step": 2500 | |
| }, | |
| { | |
| "embedding_loss": 0.0398, | |
| "epoch": 0.3766617429837518, | |
| "grad_norm": 1.050619125366211, | |
| "learning_rate": 1.506056129985229e-05, | |
| "step": 2550 | |
| }, | |
| { | |
| "embedding_loss": 0.0415, | |
| "epoch": 0.38404726735598227, | |
| "grad_norm": 0.24330730736255646, | |
| "learning_rate": 1.5355982274741507e-05, | |
| "step": 2600 | |
| }, | |
| { | |
| "embedding_loss": 0.0367, | |
| "epoch": 0.3914327917282127, | |
| "grad_norm": 2.6866581439971924, | |
| "learning_rate": 1.5651403249630725e-05, | |
| "step": 2650 | |
| }, | |
| { | |
| "embedding_loss": 0.0331, | |
| "epoch": 0.3988183161004431, | |
| "grad_norm": 0.7530401945114136, | |
| "learning_rate": 1.594682422451994e-05, | |
| "step": 2700 | |
| }, | |
| { | |
| "embedding_loss": 0.0399, | |
| "epoch": 0.40620384047267355, | |
| "grad_norm": 0.4778743386268616, | |
| "learning_rate": 1.624224519940916e-05, | |
| "step": 2750 | |
| }, | |
| { | |
| "embedding_loss": 0.0342, | |
| "epoch": 0.413589364844904, | |
| "grad_norm": 0.8823883533477783, | |
| "learning_rate": 1.6537666174298377e-05, | |
| "step": 2800 | |
| }, | |
| { | |
| "embedding_loss": 0.0356, | |
| "epoch": 0.42097488921713444, | |
| "grad_norm": 2.0318665504455566, | |
| "learning_rate": 1.6833087149187595e-05, | |
| "step": 2850 | |
| }, | |
| { | |
| "embedding_loss": 0.0346, | |
| "epoch": 0.42836041358936483, | |
| "grad_norm": 0.46766990423202515, | |
| "learning_rate": 1.712850812407681e-05, | |
| "step": 2900 | |
| }, | |
| { | |
| "embedding_loss": 0.0326, | |
| "epoch": 0.4357459379615953, | |
| "grad_norm": 1.899274468421936, | |
| "learning_rate": 1.742392909896603e-05, | |
| "step": 2950 | |
| }, | |
| { | |
| "embedding_loss": 0.0301, | |
| "epoch": 0.4431314623338257, | |
| "grad_norm": 0.4528331458568573, | |
| "learning_rate": 1.7719350073855247e-05, | |
| "step": 3000 | |
| }, | |
| { | |
| "embedding_loss": 0.0297, | |
| "epoch": 0.4505169867060561, | |
| "grad_norm": 1.74443519115448, | |
| "learning_rate": 1.8014771048744462e-05, | |
| "step": 3050 | |
| }, | |
| { | |
| "embedding_loss": 0.0318, | |
| "epoch": 0.45790251107828656, | |
| "grad_norm": 0.41255202889442444, | |
| "learning_rate": 1.831019202363368e-05, | |
| "step": 3100 | |
| }, | |
| { | |
| "embedding_loss": 0.0288, | |
| "epoch": 0.465288035450517, | |
| "grad_norm": 0.7493127584457397, | |
| "learning_rate": 1.8605612998522896e-05, | |
| "step": 3150 | |
| }, | |
| { | |
| "embedding_loss": 0.0324, | |
| "epoch": 0.4726735598227474, | |
| "grad_norm": 0.12168914079666138, | |
| "learning_rate": 1.8901033973412114e-05, | |
| "step": 3200 | |
| }, | |
| { | |
| "embedding_loss": 0.024, | |
| "epoch": 0.48005908419497784, | |
| "grad_norm": 1.5052778720855713, | |
| "learning_rate": 1.9196454948301332e-05, | |
| "step": 3250 | |
| }, | |
| { | |
| "embedding_loss": 0.0299, | |
| "epoch": 0.4874446085672083, | |
| "grad_norm": 0.22781763970851898, | |
| "learning_rate": 1.9491875923190547e-05, | |
| "step": 3300 | |
| }, | |
| { | |
| "embedding_loss": 0.0315, | |
| "epoch": 0.4948301329394387, | |
| "grad_norm": 0.7878602147102356, | |
| "learning_rate": 1.9787296898079766e-05, | |
| "step": 3350 | |
| }, | |
| { | |
| "embedding_loss": 0.0267, | |
| "epoch": 0.5022156573116692, | |
| "grad_norm": 0.823674738407135, | |
| "learning_rate": 1.999080912522567e-05, | |
| "step": 3400 | |
| }, | |
| { | |
| "embedding_loss": 0.0268, | |
| "epoch": 0.5096011816838996, | |
| "grad_norm": 0.6394932866096497, | |
| "learning_rate": 1.99579845724602e-05, | |
| "step": 3450 | |
| }, | |
| { | |
| "embedding_loss": 0.0231, | |
| "epoch": 0.51698670605613, | |
| "grad_norm": 0.6224627494812012, | |
| "learning_rate": 1.9925160019694733e-05, | |
| "step": 3500 | |
| }, | |
| { | |
| "embedding_loss": 0.0257, | |
| "epoch": 0.5243722304283605, | |
| "grad_norm": 2.495439291000366, | |
| "learning_rate": 1.9892335466929265e-05, | |
| "step": 3550 | |
| }, | |
| { | |
| "embedding_loss": 0.023, | |
| "epoch": 0.5317577548005908, | |
| "grad_norm": 2.96049165725708, | |
| "learning_rate": 1.9859510914163794e-05, | |
| "step": 3600 | |
| }, | |
| { | |
| "embedding_loss": 0.0222, | |
| "epoch": 0.5391432791728212, | |
| "grad_norm": 0.24069173634052277, | |
| "learning_rate": 1.982668636139833e-05, | |
| "step": 3650 | |
| }, | |
| { | |
| "embedding_loss": 0.0244, | |
| "epoch": 0.5465288035450517, | |
| "grad_norm": 2.0120162963867188, | |
| "learning_rate": 1.9793861808632858e-05, | |
| "step": 3700 | |
| }, | |
| { | |
| "embedding_loss": 0.0218, | |
| "epoch": 0.5539143279172821, | |
| "grad_norm": 0.8145495653152466, | |
| "learning_rate": 1.976103725586739e-05, | |
| "step": 3750 | |
| }, | |
| { | |
| "embedding_loss": 0.0267, | |
| "epoch": 0.5612998522895125, | |
| "grad_norm": 0.40902212262153625, | |
| "learning_rate": 1.9728212703101922e-05, | |
| "step": 3800 | |
| }, | |
| { | |
| "embedding_loss": 0.0221, | |
| "epoch": 0.568685376661743, | |
| "grad_norm": 0.1750016063451767, | |
| "learning_rate": 1.9695388150336454e-05, | |
| "step": 3850 | |
| }, | |
| { | |
| "embedding_loss": 0.0169, | |
| "epoch": 0.5760709010339734, | |
| "grad_norm": 0.4834084212779999, | |
| "learning_rate": 1.9662563597570986e-05, | |
| "step": 3900 | |
| }, | |
| { | |
| "embedding_loss": 0.0203, | |
| "epoch": 0.5834564254062038, | |
| "grad_norm": 0.18973205983638763, | |
| "learning_rate": 1.9629739044805515e-05, | |
| "step": 3950 | |
| }, | |
| { | |
| "embedding_loss": 0.0184, | |
| "epoch": 0.5908419497784343, | |
| "grad_norm": 0.4886401295661926, | |
| "learning_rate": 1.9596914492040047e-05, | |
| "step": 4000 | |
| }, | |
| { | |
| "embedding_loss": 0.0175, | |
| "epoch": 0.5982274741506647, | |
| "grad_norm": 0.8744384050369263, | |
| "learning_rate": 1.956408993927458e-05, | |
| "step": 4050 | |
| }, | |
| { | |
| "embedding_loss": 0.0219, | |
| "epoch": 0.6056129985228951, | |
| "grad_norm": 1.2341519594192505, | |
| "learning_rate": 1.953126538650911e-05, | |
| "step": 4100 | |
| }, | |
| { | |
| "embedding_loss": 0.0175, | |
| "epoch": 0.6129985228951256, | |
| "grad_norm": 0.4706520140171051, | |
| "learning_rate": 1.9498440833743643e-05, | |
| "step": 4150 | |
| }, | |
| { | |
| "embedding_loss": 0.017, | |
| "epoch": 0.620384047267356, | |
| "grad_norm": 0.12396424263715744, | |
| "learning_rate": 1.946561628097817e-05, | |
| "step": 4200 | |
| }, | |
| { | |
| "embedding_loss": 0.0181, | |
| "epoch": 0.6277695716395865, | |
| "grad_norm": 1.962485909461975, | |
| "learning_rate": 1.9432791728212707e-05, | |
| "step": 4250 | |
| }, | |
| { | |
| "embedding_loss": 0.0164, | |
| "epoch": 0.6351550960118169, | |
| "grad_norm": 2.613374948501587, | |
| "learning_rate": 1.9399967175447236e-05, | |
| "step": 4300 | |
| }, | |
| { | |
| "embedding_loss": 0.0129, | |
| "epoch": 0.6425406203840472, | |
| "grad_norm": 0.3567068874835968, | |
| "learning_rate": 1.9367142622681768e-05, | |
| "step": 4350 | |
| }, | |
| { | |
| "embedding_loss": 0.0136, | |
| "epoch": 0.6499261447562777, | |
| "grad_norm": 1.5771572589874268, | |
| "learning_rate": 1.9334318069916296e-05, | |
| "step": 4400 | |
| }, | |
| { | |
| "embedding_loss": 0.0169, | |
| "epoch": 0.6573116691285081, | |
| "grad_norm": 0.31481969356536865, | |
| "learning_rate": 1.9301493517150832e-05, | |
| "step": 4450 | |
| }, | |
| { | |
| "embedding_loss": 0.0154, | |
| "epoch": 0.6646971935007385, | |
| "grad_norm": 0.16484883427619934, | |
| "learning_rate": 1.926866896438536e-05, | |
| "step": 4500 | |
| }, | |
| { | |
| "embedding_loss": 0.0168, | |
| "epoch": 0.672082717872969, | |
| "grad_norm": 0.279256671667099, | |
| "learning_rate": 1.9235844411619893e-05, | |
| "step": 4550 | |
| }, | |
| { | |
| "embedding_loss": 0.0158, | |
| "epoch": 0.6794682422451994, | |
| "grad_norm": 0.2343069612979889, | |
| "learning_rate": 1.9203019858854425e-05, | |
| "step": 4600 | |
| }, | |
| { | |
| "embedding_loss": 0.0157, | |
| "epoch": 0.6868537666174298, | |
| "grad_norm": 0.17091761529445648, | |
| "learning_rate": 1.9170195306088957e-05, | |
| "step": 4650 | |
| }, | |
| { | |
| "embedding_loss": 0.0127, | |
| "epoch": 0.6942392909896603, | |
| "grad_norm": 1.3237155675888062, | |
| "learning_rate": 1.913737075332349e-05, | |
| "step": 4700 | |
| }, | |
| { | |
| "embedding_loss": 0.0116, | |
| "epoch": 0.7016248153618907, | |
| "grad_norm": 0.7258033752441406, | |
| "learning_rate": 1.9104546200558017e-05, | |
| "step": 4750 | |
| }, | |
| { | |
| "embedding_loss": 0.0134, | |
| "epoch": 0.7090103397341211, | |
| "grad_norm": 3.0486900806427, | |
| "learning_rate": 1.907172164779255e-05, | |
| "step": 4800 | |
| }, | |
| { | |
| "embedding_loss": 0.012, | |
| "epoch": 0.7163958641063516, | |
| "grad_norm": 0.10283143818378448, | |
| "learning_rate": 1.903889709502708e-05, | |
| "step": 4850 | |
| }, | |
| { | |
| "embedding_loss": 0.0134, | |
| "epoch": 0.723781388478582, | |
| "grad_norm": 0.3316308259963989, | |
| "learning_rate": 1.9006072542261613e-05, | |
| "step": 4900 | |
| }, | |
| { | |
| "embedding_loss": 0.0157, | |
| "epoch": 0.7311669128508124, | |
| "grad_norm": 0.421657919883728, | |
| "learning_rate": 1.8973247989496146e-05, | |
| "step": 4950 | |
| }, | |
| { | |
| "embedding_loss": 0.0121, | |
| "epoch": 0.7385524372230429, | |
| "grad_norm": 0.4950125813484192, | |
| "learning_rate": 1.8940423436730678e-05, | |
| "step": 5000 | |
| }, | |
| { | |
| "embedding_loss": 0.0134, | |
| "epoch": 0.7459379615952733, | |
| "grad_norm": 0.5293028950691223, | |
| "learning_rate": 1.890759888396521e-05, | |
| "step": 5050 | |
| }, | |
| { | |
| "embedding_loss": 0.0083, | |
| "epoch": 0.7533234859675036, | |
| "grad_norm": 2.0652644634246826, | |
| "learning_rate": 1.8874774331199738e-05, | |
| "step": 5100 | |
| }, | |
| { | |
| "embedding_loss": 0.0122, | |
| "epoch": 0.7607090103397341, | |
| "grad_norm": 1.9949322938919067, | |
| "learning_rate": 1.884194977843427e-05, | |
| "step": 5150 | |
| }, | |
| { | |
| "embedding_loss": 0.0104, | |
| "epoch": 0.7680945347119645, | |
| "grad_norm": 0.07039645314216614, | |
| "learning_rate": 1.8809125225668802e-05, | |
| "step": 5200 | |
| }, | |
| { | |
| "embedding_loss": 0.0061, | |
| "epoch": 0.7754800590841949, | |
| "grad_norm": 0.07697559893131256, | |
| "learning_rate": 1.8776300672903334e-05, | |
| "step": 5250 | |
| }, | |
| { | |
| "embedding_loss": 0.0107, | |
| "epoch": 0.7828655834564254, | |
| "grad_norm": 0.05644530802965164, | |
| "learning_rate": 1.8743476120137863e-05, | |
| "step": 5300 | |
| }, | |
| { | |
| "embedding_loss": 0.0093, | |
| "epoch": 0.7902511078286558, | |
| "grad_norm": 0.34979447722435, | |
| "learning_rate": 1.8710651567372395e-05, | |
| "step": 5350 | |
| }, | |
| { | |
| "embedding_loss": 0.012, | |
| "epoch": 0.7976366322008862, | |
| "grad_norm": 0.06782261282205582, | |
| "learning_rate": 1.8677827014606927e-05, | |
| "step": 5400 | |
| }, | |
| { | |
| "embedding_loss": 0.0119, | |
| "epoch": 0.8050221565731167, | |
| "grad_norm": 0.11144471168518066, | |
| "learning_rate": 1.864500246184146e-05, | |
| "step": 5450 | |
| }, | |
| { | |
| "embedding_loss": 0.0114, | |
| "epoch": 0.8124076809453471, | |
| "grad_norm": 0.2110595852136612, | |
| "learning_rate": 1.861217790907599e-05, | |
| "step": 5500 | |
| }, | |
| { | |
| "embedding_loss": 0.0133, | |
| "epoch": 0.8197932053175776, | |
| "grad_norm": 0.49429744482040405, | |
| "learning_rate": 1.857935335631052e-05, | |
| "step": 5550 | |
| }, | |
| { | |
| "embedding_loss": 0.0087, | |
| "epoch": 0.827178729689808, | |
| "grad_norm": 0.07333461195230484, | |
| "learning_rate": 1.8546528803545055e-05, | |
| "step": 5600 | |
| }, | |
| { | |
| "embedding_loss": 0.008, | |
| "epoch": 0.8345642540620384, | |
| "grad_norm": 0.11741068214178085, | |
| "learning_rate": 1.8513704250779584e-05, | |
| "step": 5650 | |
| }, | |
| { | |
| "embedding_loss": 0.0058, | |
| "epoch": 0.8419497784342689, | |
| "grad_norm": 0.12451150268316269, | |
| "learning_rate": 1.8480879698014116e-05, | |
| "step": 5700 | |
| }, | |
| { | |
| "embedding_loss": 0.0098, | |
| "epoch": 0.8493353028064993, | |
| "grad_norm": 0.04639327526092529, | |
| "learning_rate": 1.8448055145248648e-05, | |
| "step": 5750 | |
| }, | |
| { | |
| "embedding_loss": 0.0083, | |
| "epoch": 0.8567208271787297, | |
| "grad_norm": 0.06967220455408096, | |
| "learning_rate": 1.841523059248318e-05, | |
| "step": 5800 | |
| }, | |
| { | |
| "embedding_loss": 0.0127, | |
| "epoch": 0.8641063515509602, | |
| "grad_norm": 1.032842755317688, | |
| "learning_rate": 1.8382406039717712e-05, | |
| "step": 5850 | |
| }, | |
| { | |
| "embedding_loss": 0.0119, | |
| "epoch": 0.8714918759231906, | |
| "grad_norm": 0.0814921110868454, | |
| "learning_rate": 1.834958148695224e-05, | |
| "step": 5900 | |
| }, | |
| { | |
| "embedding_loss": 0.0117, | |
| "epoch": 0.8788774002954209, | |
| "grad_norm": 3.7965452671051025, | |
| "learning_rate": 1.8316756934186773e-05, | |
| "step": 5950 | |
| }, | |
| { | |
| "embedding_loss": 0.0107, | |
| "epoch": 0.8862629246676514, | |
| "grad_norm": 0.13023847341537476, | |
| "learning_rate": 1.8283932381421305e-05, | |
| "step": 6000 | |
| }, | |
| { | |
| "embedding_loss": 0.0099, | |
| "epoch": 0.8936484490398818, | |
| "grad_norm": 0.15792806446552277, | |
| "learning_rate": 1.8251107828655837e-05, | |
| "step": 6050 | |
| }, | |
| { | |
| "embedding_loss": 0.0129, | |
| "epoch": 0.9010339734121122, | |
| "grad_norm": 0.06038963794708252, | |
| "learning_rate": 1.8218283275890366e-05, | |
| "step": 6100 | |
| }, | |
| { | |
| "embedding_loss": 0.0111, | |
| "epoch": 0.9084194977843427, | |
| "grad_norm": 0.26467612385749817, | |
| "learning_rate": 1.8185458723124898e-05, | |
| "step": 6150 | |
| }, | |
| { | |
| "embedding_loss": 0.0099, | |
| "epoch": 0.9158050221565731, | |
| "grad_norm": 0.354390025138855, | |
| "learning_rate": 1.815263417035943e-05, | |
| "step": 6200 | |
| }, | |
| { | |
| "embedding_loss": 0.0101, | |
| "epoch": 0.9231905465288035, | |
| "grad_norm": 1.5564332008361816, | |
| "learning_rate": 1.8119809617593962e-05, | |
| "step": 6250 | |
| }, | |
| { | |
| "embedding_loss": 0.0123, | |
| "epoch": 0.930576070901034, | |
| "grad_norm": 0.12284110486507416, | |
| "learning_rate": 1.8086985064828494e-05, | |
| "step": 6300 | |
| }, | |
| { | |
| "embedding_loss": 0.0055, | |
| "epoch": 0.9379615952732644, | |
| "grad_norm": 0.15565811097621918, | |
| "learning_rate": 1.8054160512063022e-05, | |
| "step": 6350 | |
| }, | |
| { | |
| "embedding_loss": 0.0105, | |
| "epoch": 0.9453471196454948, | |
| "grad_norm": 0.12946315109729767, | |
| "learning_rate": 1.8021335959297558e-05, | |
| "step": 6400 | |
| }, | |
| { | |
| "embedding_loss": 0.0071, | |
| "epoch": 0.9527326440177253, | |
| "grad_norm": 0.4842424690723419, | |
| "learning_rate": 1.7988511406532087e-05, | |
| "step": 6450 | |
| }, | |
| { | |
| "embedding_loss": 0.0074, | |
| "epoch": 0.9601181683899557, | |
| "grad_norm": 0.36668410897254944, | |
| "learning_rate": 1.795568685376662e-05, | |
| "step": 6500 | |
| }, | |
| { | |
| "embedding_loss": 0.007, | |
| "epoch": 0.9675036927621861, | |
| "grad_norm": 0.1203831359744072, | |
| "learning_rate": 1.792286230100115e-05, | |
| "step": 6550 | |
| }, | |
| { | |
| "embedding_loss": 0.0095, | |
| "epoch": 0.9748892171344166, | |
| "grad_norm": 0.046238359063863754, | |
| "learning_rate": 1.7890037748235683e-05, | |
| "step": 6600 | |
| }, | |
| { | |
| "embedding_loss": 0.0088, | |
| "epoch": 0.982274741506647, | |
| "grad_norm": 0.1258874386548996, | |
| "learning_rate": 1.7857213195470215e-05, | |
| "step": 6650 | |
| }, | |
| { | |
| "embedding_loss": 0.0052, | |
| "epoch": 0.9896602658788775, | |
| "grad_norm": 0.02032575197517872, | |
| "learning_rate": 1.7824388642704743e-05, | |
| "step": 6700 | |
| }, | |
| { | |
| "embedding_loss": 0.0079, | |
| "epoch": 0.9970457902511078, | |
| "grad_norm": 0.03921140730381012, | |
| "learning_rate": 1.7791564089939275e-05, | |
| "step": 6750 | |
| }, | |
| { | |
| "embedding_loss": 0.0069, | |
| "epoch": 1.0044313146233383, | |
| "grad_norm": 0.012766249477863312, | |
| "learning_rate": 1.7758739537173807e-05, | |
| "step": 6800 | |
| }, | |
| { | |
| "embedding_loss": 0.0058, | |
| "epoch": 1.0118168389955686, | |
| "grad_norm": 0.20952889323234558, | |
| "learning_rate": 1.772591498440834e-05, | |
| "step": 6850 | |
| }, | |
| { | |
| "embedding_loss": 0.0102, | |
| "epoch": 1.0192023633677991, | |
| "grad_norm": 0.04774490371346474, | |
| "learning_rate": 1.7693090431642868e-05, | |
| "step": 6900 | |
| }, | |
| { | |
| "embedding_loss": 0.0097, | |
| "epoch": 1.0265878877400296, | |
| "grad_norm": 0.15566791594028473, | |
| "learning_rate": 1.7660265878877404e-05, | |
| "step": 6950 | |
| }, | |
| { | |
| "embedding_loss": 0.0095, | |
| "epoch": 1.03397341211226, | |
| "grad_norm": 0.6467046141624451, | |
| "learning_rate": 1.7627441326111932e-05, | |
| "step": 7000 | |
| }, | |
| { | |
| "embedding_loss": 0.0082, | |
| "epoch": 1.0413589364844904, | |
| "grad_norm": 0.35328537225723267, | |
| "learning_rate": 1.7594616773346464e-05, | |
| "step": 7050 | |
| }, | |
| { | |
| "embedding_loss": 0.0066, | |
| "epoch": 1.048744460856721, | |
| "grad_norm": 0.3548614978790283, | |
| "learning_rate": 1.7561792220580996e-05, | |
| "step": 7100 | |
| }, | |
| { | |
| "embedding_loss": 0.009, | |
| "epoch": 1.0561299852289512, | |
| "grad_norm": 0.6114194393157959, | |
| "learning_rate": 1.752896766781553e-05, | |
| "step": 7150 | |
| }, | |
| { | |
| "embedding_loss": 0.0062, | |
| "epoch": 1.0635155096011817, | |
| "grad_norm": 0.7183836698532104, | |
| "learning_rate": 1.749614311505006e-05, | |
| "step": 7200 | |
| }, | |
| { | |
| "embedding_loss": 0.0082, | |
| "epoch": 1.0709010339734122, | |
| "grad_norm": 0.41628143191337585, | |
| "learning_rate": 1.746331856228459e-05, | |
| "step": 7250 | |
| }, | |
| { | |
| "embedding_loss": 0.0083, | |
| "epoch": 1.0782865583456425, | |
| "grad_norm": 0.22927437722682953, | |
| "learning_rate": 1.743049400951912e-05, | |
| "step": 7300 | |
| }, | |
| { | |
| "embedding_loss": 0.0089, | |
| "epoch": 1.085672082717873, | |
| "grad_norm": 0.11581069976091385, | |
| "learning_rate": 1.7397669456753653e-05, | |
| "step": 7350 | |
| }, | |
| { | |
| "embedding_loss": 0.0088, | |
| "epoch": 1.0930576070901035, | |
| "grad_norm": 1.374656081199646, | |
| "learning_rate": 1.7364844903988185e-05, | |
| "step": 7400 | |
| }, | |
| { | |
| "embedding_loss": 0.0075, | |
| "epoch": 1.1004431314623337, | |
| "grad_norm": 0.25289225578308105, | |
| "learning_rate": 1.7332020351222717e-05, | |
| "step": 7450 | |
| }, | |
| { | |
| "embedding_loss": 0.005, | |
| "epoch": 1.1078286558345642, | |
| "grad_norm": 0.034826990216970444, | |
| "learning_rate": 1.7299195798457246e-05, | |
| "step": 7500 | |
| }, | |
| { | |
| "embedding_loss": 0.0074, | |
| "epoch": 1.1152141802067947, | |
| "grad_norm": 0.20261834561824799, | |
| "learning_rate": 1.726637124569178e-05, | |
| "step": 7550 | |
| }, | |
| { | |
| "embedding_loss": 0.0062, | |
| "epoch": 1.122599704579025, | |
| "grad_norm": 0.01275601889938116, | |
| "learning_rate": 1.723354669292631e-05, | |
| "step": 7600 | |
| }, | |
| { | |
| "embedding_loss": 0.0062, | |
| "epoch": 1.1299852289512555, | |
| "grad_norm": 0.036308519542217255, | |
| "learning_rate": 1.7200722140160842e-05, | |
| "step": 7650 | |
| }, | |
| { | |
| "embedding_loss": 0.0079, | |
| "epoch": 1.137370753323486, | |
| "grad_norm": 0.05968335270881653, | |
| "learning_rate": 1.7167897587395374e-05, | |
| "step": 7700 | |
| }, | |
| { | |
| "embedding_loss": 0.0108, | |
| "epoch": 1.1447562776957163, | |
| "grad_norm": 1.3406931161880493, | |
| "learning_rate": 1.7135073034629906e-05, | |
| "step": 7750 | |
| }, | |
| { | |
| "embedding_loss": 0.0079, | |
| "epoch": 1.1521418020679468, | |
| "grad_norm": 0.07372719049453735, | |
| "learning_rate": 1.7102248481864435e-05, | |
| "step": 7800 | |
| }, | |
| { | |
| "embedding_loss": 0.0083, | |
| "epoch": 1.1595273264401773, | |
| "grad_norm": 0.38173583149909973, | |
| "learning_rate": 1.7069423929098967e-05, | |
| "step": 7850 | |
| }, | |
| { | |
| "embedding_loss": 0.0074, | |
| "epoch": 1.1669128508124076, | |
| "grad_norm": 0.1348145604133606, | |
| "learning_rate": 1.70365993763335e-05, | |
| "step": 7900 | |
| }, | |
| { | |
| "embedding_loss": 0.0078, | |
| "epoch": 1.174298375184638, | |
| "grad_norm": 0.0659070536494255, | |
| "learning_rate": 1.700377482356803e-05, | |
| "step": 7950 | |
| }, | |
| { | |
| "embedding_loss": 0.0057, | |
| "epoch": 1.1816838995568686, | |
| "grad_norm": 0.017487822100520134, | |
| "learning_rate": 1.6970950270802563e-05, | |
| "step": 8000 | |
| }, | |
| { | |
| "embedding_loss": 0.0057, | |
| "epoch": 1.1890694239290989, | |
| "grad_norm": 3.8321328163146973, | |
| "learning_rate": 1.693812571803709e-05, | |
| "step": 8050 | |
| }, | |
| { | |
| "embedding_loss": 0.005, | |
| "epoch": 1.1964549483013294, | |
| "grad_norm": 0.04197081923484802, | |
| "learning_rate": 1.6905301165271624e-05, | |
| "step": 8100 | |
| }, | |
| { | |
| "embedding_loss": 0.0099, | |
| "epoch": 1.2038404726735599, | |
| "grad_norm": 0.05384385213255882, | |
| "learning_rate": 1.6872476612506156e-05, | |
| "step": 8150 | |
| }, | |
| { | |
| "embedding_loss": 0.0041, | |
| "epoch": 1.2112259970457901, | |
| "grad_norm": 0.027099648490548134, | |
| "learning_rate": 1.6839652059740688e-05, | |
| "step": 8200 | |
| }, | |
| { | |
| "embedding_loss": 0.0095, | |
| "epoch": 1.2186115214180206, | |
| "grad_norm": 0.02560454048216343, | |
| "learning_rate": 1.680682750697522e-05, | |
| "step": 8250 | |
| }, | |
| { | |
| "embedding_loss": 0.0076, | |
| "epoch": 1.2259970457902511, | |
| "grad_norm": 0.0267130509018898, | |
| "learning_rate": 1.677400295420975e-05, | |
| "step": 8300 | |
| }, | |
| { | |
| "embedding_loss": 0.0065, | |
| "epoch": 1.2333825701624814, | |
| "grad_norm": 0.14713996648788452, | |
| "learning_rate": 1.6741178401444284e-05, | |
| "step": 8350 | |
| }, | |
| { | |
| "embedding_loss": 0.0044, | |
| "epoch": 1.240768094534712, | |
| "grad_norm": 0.0488862581551075, | |
| "learning_rate": 1.6708353848678812e-05, | |
| "step": 8400 | |
| }, | |
| { | |
| "embedding_loss": 0.0059, | |
| "epoch": 1.2481536189069424, | |
| "grad_norm": 0.03769877180457115, | |
| "learning_rate": 1.6675529295913345e-05, | |
| "step": 8450 | |
| }, | |
| { | |
| "embedding_loss": 0.0083, | |
| "epoch": 1.2555391432791727, | |
| "grad_norm": 0.04677336663007736, | |
| "learning_rate": 1.6642704743147877e-05, | |
| "step": 8500 | |
| }, | |
| { | |
| "embedding_loss": 0.0069, | |
| "epoch": 1.2629246676514032, | |
| "grad_norm": 1.583303689956665, | |
| "learning_rate": 1.660988019038241e-05, | |
| "step": 8550 | |
| }, | |
| { | |
| "embedding_loss": 0.0059, | |
| "epoch": 1.2703101920236337, | |
| "grad_norm": 0.057745561003685, | |
| "learning_rate": 1.6577055637616937e-05, | |
| "step": 8600 | |
| }, | |
| { | |
| "embedding_loss": 0.0048, | |
| "epoch": 1.277695716395864, | |
| "grad_norm": 0.05651646852493286, | |
| "learning_rate": 1.654423108485147e-05, | |
| "step": 8650 | |
| }, | |
| { | |
| "embedding_loss": 0.0081, | |
| "epoch": 1.2850812407680945, | |
| "grad_norm": 0.5371580719947815, | |
| "learning_rate": 1.6511406532086e-05, | |
| "step": 8700 | |
| }, | |
| { | |
| "embedding_loss": 0.0056, | |
| "epoch": 1.292466765140325, | |
| "grad_norm": 0.01594601757824421, | |
| "learning_rate": 1.6478581979320533e-05, | |
| "step": 8750 | |
| }, | |
| { | |
| "embedding_loss": 0.0069, | |
| "epoch": 1.2998522895125553, | |
| "grad_norm": 0.22201408445835114, | |
| "learning_rate": 1.6445757426555065e-05, | |
| "step": 8800 | |
| }, | |
| { | |
| "embedding_loss": 0.005, | |
| "epoch": 1.3072378138847858, | |
| "grad_norm": 0.0434761643409729, | |
| "learning_rate": 1.6412932873789594e-05, | |
| "step": 8850 | |
| }, | |
| { | |
| "embedding_loss": 0.0057, | |
| "epoch": 1.3146233382570163, | |
| "grad_norm": 0.20662403106689453, | |
| "learning_rate": 1.6380108321024126e-05, | |
| "step": 8900 | |
| }, | |
| { | |
| "embedding_loss": 0.0059, | |
| "epoch": 1.3220088626292466, | |
| "grad_norm": 0.49766138195991516, | |
| "learning_rate": 1.6347283768258658e-05, | |
| "step": 8950 | |
| }, | |
| { | |
| "embedding_loss": 0.0036, | |
| "epoch": 1.329394387001477, | |
| "grad_norm": 3.3815248012542725, | |
| "learning_rate": 1.631445921549319e-05, | |
| "step": 9000 | |
| }, | |
| { | |
| "embedding_loss": 0.0072, | |
| "epoch": 1.3367799113737076, | |
| "grad_norm": 0.03580164164304733, | |
| "learning_rate": 1.6281634662727722e-05, | |
| "step": 9050 | |
| }, | |
| { | |
| "embedding_loss": 0.0053, | |
| "epoch": 1.3441654357459378, | |
| "grad_norm": 1.0942792892456055, | |
| "learning_rate": 1.6248810109962254e-05, | |
| "step": 9100 | |
| }, | |
| { | |
| "embedding_loss": 0.0035, | |
| "epoch": 1.3515509601181683, | |
| "grad_norm": 0.05680214613676071, | |
| "learning_rate": 1.6215985557196786e-05, | |
| "step": 9150 | |
| }, | |
| { | |
| "embedding_loss": 0.0073, | |
| "epoch": 1.3589364844903988, | |
| "grad_norm": 0.377883642911911, | |
| "learning_rate": 1.6183161004431315e-05, | |
| "step": 9200 | |
| }, | |
| { | |
| "embedding_loss": 0.0028, | |
| "epoch": 1.3663220088626291, | |
| "grad_norm": 0.019608836621046066, | |
| "learning_rate": 1.6150336451665847e-05, | |
| "step": 9250 | |
| }, | |
| { | |
| "embedding_loss": 0.0055, | |
| "epoch": 1.3737075332348596, | |
| "grad_norm": 0.5401307344436646, | |
| "learning_rate": 1.611751189890038e-05, | |
| "step": 9300 | |
| }, | |
| { | |
| "embedding_loss": 0.0071, | |
| "epoch": 1.3810930576070901, | |
| "grad_norm": 0.4266299605369568, | |
| "learning_rate": 1.608468734613491e-05, | |
| "step": 9350 | |
| }, | |
| { | |
| "embedding_loss": 0.0057, | |
| "epoch": 1.3884785819793206, | |
| "grad_norm": 0.16506928205490112, | |
| "learning_rate": 1.6051862793369443e-05, | |
| "step": 9400 | |
| }, | |
| { | |
| "embedding_loss": 0.0107, | |
| "epoch": 1.395864106351551, | |
| "grad_norm": 0.021154019981622696, | |
| "learning_rate": 1.6019038240603972e-05, | |
| "step": 9450 | |
| }, | |
| { | |
| "embedding_loss": 0.0054, | |
| "epoch": 1.4032496307237814, | |
| "grad_norm": 0.13461002707481384, | |
| "learning_rate": 1.5986213687838504e-05, | |
| "step": 9500 | |
| }, | |
| { | |
| "embedding_loss": 0.0045, | |
| "epoch": 1.410635155096012, | |
| "grad_norm": 0.0639062374830246, | |
| "learning_rate": 1.5953389135073036e-05, | |
| "step": 9550 | |
| }, | |
| { | |
| "embedding_loss": 0.0067, | |
| "epoch": 1.4180206794682422, | |
| "grad_norm": 0.13972270488739014, | |
| "learning_rate": 1.5920564582307568e-05, | |
| "step": 9600 | |
| }, | |
| { | |
| "embedding_loss": 0.0038, | |
| "epoch": 1.4254062038404727, | |
| "grad_norm": 0.024820247665047646, | |
| "learning_rate": 1.5887740029542097e-05, | |
| "step": 9650 | |
| }, | |
| { | |
| "embedding_loss": 0.0079, | |
| "epoch": 1.4327917282127032, | |
| "grad_norm": 0.5452784299850464, | |
| "learning_rate": 1.5854915476776632e-05, | |
| "step": 9700 | |
| }, | |
| { | |
| "embedding_loss": 0.0078, | |
| "epoch": 1.4401772525849335, | |
| "grad_norm": 0.2737050950527191, | |
| "learning_rate": 1.582209092401116e-05, | |
| "step": 9750 | |
| }, | |
| { | |
| "embedding_loss": 0.005, | |
| "epoch": 1.447562776957164, | |
| "grad_norm": 0.024434711784124374, | |
| "learning_rate": 1.5789266371245693e-05, | |
| "step": 9800 | |
| }, | |
| { | |
| "embedding_loss": 0.0032, | |
| "epoch": 1.4549483013293945, | |
| "grad_norm": 0.10400200635194778, | |
| "learning_rate": 1.5756441818480225e-05, | |
| "step": 9850 | |
| }, | |
| { | |
| "embedding_loss": 0.0043, | |
| "epoch": 1.4623338257016247, | |
| "grad_norm": 0.048794183880090714, | |
| "learning_rate": 1.5723617265714757e-05, | |
| "step": 9900 | |
| }, | |
| { | |
| "embedding_loss": 0.0079, | |
| "epoch": 1.4697193500738552, | |
| "grad_norm": 0.06030944362282753, | |
| "learning_rate": 1.569079271294929e-05, | |
| "step": 9950 | |
| }, | |
| { | |
| "embedding_loss": 0.0044, | |
| "epoch": 1.4771048744460857, | |
| "grad_norm": 0.007165232207626104, | |
| "learning_rate": 1.5657968160183818e-05, | |
| "step": 10000 | |
| }, | |
| { | |
| "embedding_loss": 0.0056, | |
| "epoch": 1.4844903988183162, | |
| "grad_norm": 0.02217938005924225, | |
| "learning_rate": 1.562514360741835e-05, | |
| "step": 10050 | |
| }, | |
| { | |
| "embedding_loss": 0.004, | |
| "epoch": 1.4918759231905465, | |
| "grad_norm": 2.4009013175964355, | |
| "learning_rate": 1.559231905465288e-05, | |
| "step": 10100 | |
| }, | |
| { | |
| "embedding_loss": 0.0065, | |
| "epoch": 1.499261447562777, | |
| "grad_norm": 0.20312148332595825, | |
| "learning_rate": 1.5559494501887414e-05, | |
| "step": 10150 | |
| }, | |
| { | |
| "embedding_loss": 0.0056, | |
| "epoch": 1.5066469719350075, | |
| "grad_norm": 0.07194498181343079, | |
| "learning_rate": 1.5526669949121946e-05, | |
| "step": 10200 | |
| }, | |
| { | |
| "embedding_loss": 0.0044, | |
| "epoch": 1.5140324963072378, | |
| "grad_norm": 0.01895447075366974, | |
| "learning_rate": 1.5493845396356474e-05, | |
| "step": 10250 | |
| }, | |
| { | |
| "embedding_loss": 0.0065, | |
| "epoch": 1.5214180206794683, | |
| "grad_norm": 0.06340127438306808, | |
| "learning_rate": 1.5461020843591006e-05, | |
| "step": 10300 | |
| }, | |
| { | |
| "embedding_loss": 0.0043, | |
| "epoch": 1.5288035450516988, | |
| "grad_norm": 0.08964123576879501, | |
| "learning_rate": 1.542819629082554e-05, | |
| "step": 10350 | |
| }, | |
| { | |
| "embedding_loss": 0.0041, | |
| "epoch": 1.536189069423929, | |
| "grad_norm": 0.010926262475550175, | |
| "learning_rate": 1.539537173806007e-05, | |
| "step": 10400 | |
| }, | |
| { | |
| "embedding_loss": 0.0043, | |
| "epoch": 1.5435745937961596, | |
| "grad_norm": 0.021898791193962097, | |
| "learning_rate": 1.53625471852946e-05, | |
| "step": 10450 | |
| }, | |
| { | |
| "embedding_loss": 0.0065, | |
| "epoch": 1.55096011816839, | |
| "grad_norm": 0.03210087865591049, | |
| "learning_rate": 1.5329722632529135e-05, | |
| "step": 10500 | |
| }, | |
| { | |
| "embedding_loss": 0.005, | |
| "epoch": 1.5583456425406204, | |
| "grad_norm": 0.061248380690813065, | |
| "learning_rate": 1.5296898079763663e-05, | |
| "step": 10550 | |
| }, | |
| { | |
| "embedding_loss": 0.003, | |
| "epoch": 1.5657311669128509, | |
| "grad_norm": 0.016059886664152145, | |
| "learning_rate": 1.5264073526998195e-05, | |
| "step": 10600 | |
| }, | |
| { | |
| "embedding_loss": 0.0031, | |
| "epoch": 1.5731166912850814, | |
| "grad_norm": 0.30576014518737793, | |
| "learning_rate": 1.5231248974232726e-05, | |
| "step": 10650 | |
| }, | |
| { | |
| "embedding_loss": 0.0057, | |
| "epoch": 1.5805022156573116, | |
| "grad_norm": 0.40583568811416626, | |
| "learning_rate": 1.519842442146726e-05, | |
| "step": 10700 | |
| }, | |
| { | |
| "embedding_loss": 0.0028, | |
| "epoch": 1.5878877400295421, | |
| "grad_norm": 0.022348936647176743, | |
| "learning_rate": 1.516559986870179e-05, | |
| "step": 10750 | |
| }, | |
| { | |
| "embedding_loss": 0.0065, | |
| "epoch": 1.5952732644017726, | |
| "grad_norm": 0.012712485156953335, | |
| "learning_rate": 1.5132775315936322e-05, | |
| "step": 10800 | |
| }, | |
| { | |
| "embedding_loss": 0.0024, | |
| "epoch": 1.602658788774003, | |
| "grad_norm": 0.02547537162899971, | |
| "learning_rate": 1.5099950763170852e-05, | |
| "step": 10850 | |
| }, | |
| { | |
| "embedding_loss": 0.0037, | |
| "epoch": 1.6100443131462334, | |
| "grad_norm": 0.011224956251680851, | |
| "learning_rate": 1.5067126210405386e-05, | |
| "step": 10900 | |
| }, | |
| { | |
| "embedding_loss": 0.0046, | |
| "epoch": 1.617429837518464, | |
| "grad_norm": 0.7301647067070007, | |
| "learning_rate": 1.5034301657639916e-05, | |
| "step": 10950 | |
| }, | |
| { | |
| "embedding_loss": 0.0048, | |
| "epoch": 1.6248153618906942, | |
| "grad_norm": 1.1569029092788696, | |
| "learning_rate": 1.5001477104874447e-05, | |
| "step": 11000 | |
| }, | |
| { | |
| "embedding_loss": 0.0042, | |
| "epoch": 1.6322008862629247, | |
| "grad_norm": 0.03382499888539314, | |
| "learning_rate": 1.496865255210898e-05, | |
| "step": 11050 | |
| }, | |
| { | |
| "embedding_loss": 0.0029, | |
| "epoch": 1.6395864106351552, | |
| "grad_norm": 0.19339300692081451, | |
| "learning_rate": 1.493582799934351e-05, | |
| "step": 11100 | |
| }, | |
| { | |
| "embedding_loss": 0.005, | |
| "epoch": 1.6469719350073855, | |
| "grad_norm": 0.03563707694411278, | |
| "learning_rate": 1.4903003446578041e-05, | |
| "step": 11150 | |
| }, | |
| { | |
| "embedding_loss": 0.0059, | |
| "epoch": 1.654357459379616, | |
| "grad_norm": 0.046909235417842865, | |
| "learning_rate": 1.4870178893812573e-05, | |
| "step": 11200 | |
| }, | |
| { | |
| "embedding_loss": 0.0061, | |
| "epoch": 1.6617429837518465, | |
| "grad_norm": 0.24560566246509552, | |
| "learning_rate": 1.4837354341047105e-05, | |
| "step": 11250 | |
| }, | |
| { | |
| "embedding_loss": 0.0037, | |
| "epoch": 1.6691285081240768, | |
| "grad_norm": 0.08229757100343704, | |
| "learning_rate": 1.4804529788281637e-05, | |
| "step": 11300 | |
| }, | |
| { | |
| "embedding_loss": 0.0034, | |
| "epoch": 1.6765140324963073, | |
| "grad_norm": 0.187529519200325, | |
| "learning_rate": 1.4771705235516167e-05, | |
| "step": 11350 | |
| }, | |
| { | |
| "embedding_loss": 0.0058, | |
| "epoch": 1.6838995568685378, | |
| "grad_norm": 0.21818560361862183, | |
| "learning_rate": 1.4738880682750698e-05, | |
| "step": 11400 | |
| }, | |
| { | |
| "embedding_loss": 0.0057, | |
| "epoch": 1.691285081240768, | |
| "grad_norm": 0.017579764127731323, | |
| "learning_rate": 1.4706056129985232e-05, | |
| "step": 11450 | |
| }, | |
| { | |
| "embedding_loss": 0.0053, | |
| "epoch": 1.6986706056129985, | |
| "grad_norm": 0.005298899486660957, | |
| "learning_rate": 1.4673231577219762e-05, | |
| "step": 11500 | |
| }, | |
| { | |
| "embedding_loss": 0.0038, | |
| "epoch": 1.706056129985229, | |
| "grad_norm": 0.21702563762664795, | |
| "learning_rate": 1.4640407024454292e-05, | |
| "step": 11550 | |
| }, | |
| { | |
| "embedding_loss": 0.0055, | |
| "epoch": 1.7134416543574593, | |
| "grad_norm": 0.028038183227181435, | |
| "learning_rate": 1.4607582471688824e-05, | |
| "step": 11600 | |
| }, | |
| { | |
| "embedding_loss": 0.0053, | |
| "epoch": 1.7208271787296898, | |
| "grad_norm": 0.008879674598574638, | |
| "learning_rate": 1.4574757918923356e-05, | |
| "step": 11650 | |
| }, | |
| { | |
| "embedding_loss": 0.0046, | |
| "epoch": 1.7282127031019203, | |
| "grad_norm": 0.41037923097610474, | |
| "learning_rate": 1.4541933366157888e-05, | |
| "step": 11700 | |
| }, | |
| { | |
| "embedding_loss": 0.0038, | |
| "epoch": 1.7355982274741506, | |
| "grad_norm": 0.0060186549089848995, | |
| "learning_rate": 1.4509108813392419e-05, | |
| "step": 11750 | |
| }, | |
| { | |
| "embedding_loss": 0.006, | |
| "epoch": 1.742983751846381, | |
| "grad_norm": 0.02156016044318676, | |
| "learning_rate": 1.4476284260626949e-05, | |
| "step": 11800 | |
| }, | |
| { | |
| "embedding_loss": 0.0063, | |
| "epoch": 1.7503692762186116, | |
| "grad_norm": 0.024685271084308624, | |
| "learning_rate": 1.4443459707861483e-05, | |
| "step": 11850 | |
| }, | |
| { | |
| "embedding_loss": 0.0044, | |
| "epoch": 1.7577548005908419, | |
| "grad_norm": 0.1743912249803543, | |
| "learning_rate": 1.4410635155096013e-05, | |
| "step": 11900 | |
| }, | |
| { | |
| "embedding_loss": 0.0044, | |
| "epoch": 1.7651403249630724, | |
| "grad_norm": 0.5131327509880066, | |
| "learning_rate": 1.4377810602330544e-05, | |
| "step": 11950 | |
| }, | |
| { | |
| "embedding_loss": 0.0038, | |
| "epoch": 1.7725258493353029, | |
| "grad_norm": 0.1307702213525772, | |
| "learning_rate": 1.4344986049565076e-05, | |
| "step": 12000 | |
| }, | |
| { | |
| "embedding_loss": 0.0063, | |
| "epoch": 1.7799113737075332, | |
| "grad_norm": 0.04227305203676224, | |
| "learning_rate": 1.4312161496799608e-05, | |
| "step": 12050 | |
| }, | |
| { | |
| "embedding_loss": 0.0022, | |
| "epoch": 1.7872968980797637, | |
| "grad_norm": 0.01793646812438965, | |
| "learning_rate": 1.427933694403414e-05, | |
| "step": 12100 | |
| }, | |
| { | |
| "embedding_loss": 0.0043, | |
| "epoch": 1.7946824224519942, | |
| "grad_norm": 0.369022011756897, | |
| "learning_rate": 1.424651239126867e-05, | |
| "step": 12150 | |
| }, | |
| { | |
| "embedding_loss": 0.0035, | |
| "epoch": 1.8020679468242244, | |
| "grad_norm": 0.024383598938584328, | |
| "learning_rate": 1.42136878385032e-05, | |
| "step": 12200 | |
| }, | |
| { | |
| "embedding_loss": 0.0044, | |
| "epoch": 1.809453471196455, | |
| "grad_norm": 0.034295763820409775, | |
| "learning_rate": 1.4180863285737734e-05, | |
| "step": 12250 | |
| }, | |
| { | |
| "embedding_loss": 0.0034, | |
| "epoch": 1.8168389955686854, | |
| "grad_norm": 0.011509880423545837, | |
| "learning_rate": 1.4148038732972264e-05, | |
| "step": 12300 | |
| }, | |
| { | |
| "embedding_loss": 0.0045, | |
| "epoch": 1.8242245199409157, | |
| "grad_norm": 0.048171836882829666, | |
| "learning_rate": 1.4115214180206795e-05, | |
| "step": 12350 | |
| }, | |
| { | |
| "embedding_loss": 0.0035, | |
| "epoch": 1.8316100443131462, | |
| "grad_norm": 0.5833490490913391, | |
| "learning_rate": 1.4082389627441327e-05, | |
| "step": 12400 | |
| }, | |
| { | |
| "embedding_loss": 0.0037, | |
| "epoch": 1.8389955686853767, | |
| "grad_norm": 0.057985421270132065, | |
| "learning_rate": 1.4049565074675859e-05, | |
| "step": 12450 | |
| }, | |
| { | |
| "embedding_loss": 0.0043, | |
| "epoch": 1.846381093057607, | |
| "grad_norm": 0.22399385273456573, | |
| "learning_rate": 1.4016740521910391e-05, | |
| "step": 12500 | |
| }, | |
| { | |
| "embedding_loss": 0.0046, | |
| "epoch": 1.8537666174298375, | |
| "grad_norm": 0.0484611876308918, | |
| "learning_rate": 1.3983915969144921e-05, | |
| "step": 12550 | |
| }, | |
| { | |
| "embedding_loss": 0.0062, | |
| "epoch": 1.861152141802068, | |
| "grad_norm": 0.03510669618844986, | |
| "learning_rate": 1.3951091416379452e-05, | |
| "step": 12600 | |
| }, | |
| { | |
| "embedding_loss": 0.0023, | |
| "epoch": 1.8685376661742983, | |
| "grad_norm": 0.0480966791510582, | |
| "learning_rate": 1.3918266863613985e-05, | |
| "step": 12650 | |
| }, | |
| { | |
| "embedding_loss": 0.0033, | |
| "epoch": 1.8759231905465288, | |
| "grad_norm": 0.06846830993890762, | |
| "learning_rate": 1.3885442310848516e-05, | |
| "step": 12700 | |
| }, | |
| { | |
| "embedding_loss": 0.0043, | |
| "epoch": 1.8833087149187593, | |
| "grad_norm": 0.18425996601581573, | |
| "learning_rate": 1.3852617758083046e-05, | |
| "step": 12750 | |
| }, | |
| { | |
| "embedding_loss": 0.004, | |
| "epoch": 1.8906942392909896, | |
| "grad_norm": 0.024371977895498276, | |
| "learning_rate": 1.3819793205317578e-05, | |
| "step": 12800 | |
| }, | |
| { | |
| "embedding_loss": 0.0025, | |
| "epoch": 1.89807976366322, | |
| "grad_norm": 0.05540316924452782, | |
| "learning_rate": 1.378696865255211e-05, | |
| "step": 12850 | |
| }, | |
| { | |
| "embedding_loss": 0.0062, | |
| "epoch": 1.9054652880354506, | |
| "grad_norm": 0.2133670598268509, | |
| "learning_rate": 1.3754144099786642e-05, | |
| "step": 12900 | |
| }, | |
| { | |
| "embedding_loss": 0.0037, | |
| "epoch": 1.9128508124076808, | |
| "grad_norm": 0.007817639969289303, | |
| "learning_rate": 1.3721319547021173e-05, | |
| "step": 12950 | |
| }, | |
| { | |
| "embedding_loss": 0.0038, | |
| "epoch": 1.9202363367799113, | |
| "grad_norm": 0.06182079762220383, | |
| "learning_rate": 1.3688494994255706e-05, | |
| "step": 13000 | |
| }, | |
| { | |
| "embedding_loss": 0.0044, | |
| "epoch": 1.9276218611521418, | |
| "grad_norm": 0.010844537056982517, | |
| "learning_rate": 1.3655670441490237e-05, | |
| "step": 13050 | |
| }, | |
| { | |
| "embedding_loss": 0.003, | |
| "epoch": 1.9350073855243721, | |
| "grad_norm": 0.008412591181695461, | |
| "learning_rate": 1.3622845888724767e-05, | |
| "step": 13100 | |
| }, | |
| { | |
| "embedding_loss": 0.0037, | |
| "epoch": 1.9423929098966026, | |
| "grad_norm": 0.3621113896369934, | |
| "learning_rate": 1.3590021335959297e-05, | |
| "step": 13150 | |
| }, | |
| { | |
| "embedding_loss": 0.0034, | |
| "epoch": 1.9497784342688331, | |
| "grad_norm": 0.09569013118743896, | |
| "learning_rate": 1.3557196783193831e-05, | |
| "step": 13200 | |
| }, | |
| { | |
| "embedding_loss": 0.0029, | |
| "epoch": 1.9571639586410634, | |
| "grad_norm": 0.022653287276625633, | |
| "learning_rate": 1.3524372230428361e-05, | |
| "step": 13250 | |
| }, | |
| { | |
| "embedding_loss": 0.0019, | |
| "epoch": 1.964549483013294, | |
| "grad_norm": 0.013618898577988148, | |
| "learning_rate": 1.3491547677662893e-05, | |
| "step": 13300 | |
| }, | |
| { | |
| "embedding_loss": 0.003, | |
| "epoch": 1.9719350073855244, | |
| "grad_norm": 0.009312042035162449, | |
| "learning_rate": 1.3458723124897424e-05, | |
| "step": 13350 | |
| }, | |
| { | |
| "embedding_loss": 0.0041, | |
| "epoch": 1.9793205317577547, | |
| "grad_norm": 0.26061955094337463, | |
| "learning_rate": 1.3425898572131958e-05, | |
| "step": 13400 | |
| }, | |
| { | |
| "embedding_loss": 0.0033, | |
| "epoch": 1.9867060561299852, | |
| "grad_norm": 0.0065947119146585464, | |
| "learning_rate": 1.3393074019366488e-05, | |
| "step": 13450 | |
| }, | |
| { | |
| "embedding_loss": 0.0032, | |
| "epoch": 1.9940915805022157, | |
| "grad_norm": 0.016747118905186653, | |
| "learning_rate": 1.3360249466601018e-05, | |
| "step": 13500 | |
| }, | |
| { | |
| "embedding_loss": 0.0029, | |
| "epoch": 2.001477104874446, | |
| "grad_norm": 0.01635347120463848, | |
| "learning_rate": 1.3327424913835549e-05, | |
| "step": 13550 | |
| }, | |
| { | |
| "embedding_loss": 0.0058, | |
| "epoch": 2.0088626292466767, | |
| "grad_norm": 0.045433904975652695, | |
| "learning_rate": 1.3294600361070082e-05, | |
| "step": 13600 | |
| }, | |
| { | |
| "embedding_loss": 0.0019, | |
| "epoch": 2.016248153618907, | |
| "grad_norm": 0.06570059806108475, | |
| "learning_rate": 1.3261775808304613e-05, | |
| "step": 13650 | |
| }, | |
| { | |
| "embedding_loss": 0.0027, | |
| "epoch": 2.0236336779911372, | |
| "grad_norm": 0.026954207569360733, | |
| "learning_rate": 1.3228951255539145e-05, | |
| "step": 13700 | |
| }, | |
| { | |
| "embedding_loss": 0.0015, | |
| "epoch": 2.031019202363368, | |
| "grad_norm": 0.013637225143611431, | |
| "learning_rate": 1.3196126702773675e-05, | |
| "step": 13750 | |
| }, | |
| { | |
| "embedding_loss": 0.0029, | |
| "epoch": 2.0384047267355982, | |
| "grad_norm": 0.01706545241177082, | |
| "learning_rate": 1.3163302150008209e-05, | |
| "step": 13800 | |
| }, | |
| { | |
| "embedding_loss": 0.0043, | |
| "epoch": 2.0457902511078285, | |
| "grad_norm": 0.008318389765918255, | |
| "learning_rate": 1.313047759724274e-05, | |
| "step": 13850 | |
| }, | |
| { | |
| "embedding_loss": 0.0016, | |
| "epoch": 2.0531757754800593, | |
| "grad_norm": 0.010482273995876312, | |
| "learning_rate": 1.309765304447727e-05, | |
| "step": 13900 | |
| }, | |
| { | |
| "embedding_loss": 0.0022, | |
| "epoch": 2.0605612998522895, | |
| "grad_norm": 0.10514198988676071, | |
| "learning_rate": 1.30648284917118e-05, | |
| "step": 13950 | |
| }, | |
| { | |
| "embedding_loss": 0.0035, | |
| "epoch": 2.06794682422452, | |
| "grad_norm": 0.265434592962265, | |
| "learning_rate": 1.3032003938946334e-05, | |
| "step": 14000 | |
| }, | |
| { | |
| "embedding_loss": 0.0033, | |
| "epoch": 2.0753323485967505, | |
| "grad_norm": 0.10725241899490356, | |
| "learning_rate": 1.2999179386180864e-05, | |
| "step": 14050 | |
| }, | |
| { | |
| "embedding_loss": 0.0019, | |
| "epoch": 2.082717872968981, | |
| "grad_norm": 0.03083561733365059, | |
| "learning_rate": 1.2966354833415396e-05, | |
| "step": 14100 | |
| }, | |
| { | |
| "embedding_loss": 0.0039, | |
| "epoch": 2.090103397341211, | |
| "grad_norm": 0.4700145721435547, | |
| "learning_rate": 1.2933530280649926e-05, | |
| "step": 14150 | |
| }, | |
| { | |
| "embedding_loss": 0.0022, | |
| "epoch": 2.097488921713442, | |
| "grad_norm": 0.007506008259952068, | |
| "learning_rate": 1.290070572788446e-05, | |
| "step": 14200 | |
| }, | |
| { | |
| "embedding_loss": 0.0042, | |
| "epoch": 2.104874446085672, | |
| "grad_norm": 0.08826395869255066, | |
| "learning_rate": 1.286788117511899e-05, | |
| "step": 14250 | |
| }, | |
| { | |
| "embedding_loss": 0.0023, | |
| "epoch": 2.1122599704579024, | |
| "grad_norm": 0.0911986455321312, | |
| "learning_rate": 1.283505662235352e-05, | |
| "step": 14300 | |
| }, | |
| { | |
| "embedding_loss": 0.0022, | |
| "epoch": 2.119645494830133, | |
| "grad_norm": 0.03140464425086975, | |
| "learning_rate": 1.2802232069588053e-05, | |
| "step": 14350 | |
| }, | |
| { | |
| "embedding_loss": 0.0016, | |
| "epoch": 2.1270310192023634, | |
| "grad_norm": 0.017707446590065956, | |
| "learning_rate": 1.2769407516822585e-05, | |
| "step": 14400 | |
| }, | |
| { | |
| "embedding_loss": 0.0023, | |
| "epoch": 2.1344165435745936, | |
| "grad_norm": 0.17360664904117584, | |
| "learning_rate": 1.2736582964057115e-05, | |
| "step": 14450 | |
| }, | |
| { | |
| "embedding_loss": 0.0034, | |
| "epoch": 2.1418020679468244, | |
| "grad_norm": 0.006408170331269503, | |
| "learning_rate": 1.2703758411291647e-05, | |
| "step": 14500 | |
| }, | |
| { | |
| "embedding_loss": 0.0019, | |
| "epoch": 2.1491875923190547, | |
| "grad_norm": 0.00851589534431696, | |
| "learning_rate": 1.2670933858526178e-05, | |
| "step": 14550 | |
| }, | |
| { | |
| "embedding_loss": 0.0027, | |
| "epoch": 2.156573116691285, | |
| "grad_norm": 0.03338400647044182, | |
| "learning_rate": 1.2638109305760711e-05, | |
| "step": 14600 | |
| }, | |
| { | |
| "embedding_loss": 0.0025, | |
| "epoch": 2.1639586410635157, | |
| "grad_norm": 0.009356162510812283, | |
| "learning_rate": 1.2605284752995242e-05, | |
| "step": 14650 | |
| }, | |
| { | |
| "embedding_loss": 0.0025, | |
| "epoch": 2.171344165435746, | |
| "grad_norm": 0.028701895847916603, | |
| "learning_rate": 1.2572460200229772e-05, | |
| "step": 14700 | |
| }, | |
| { | |
| "embedding_loss": 0.0024, | |
| "epoch": 2.178729689807976, | |
| "grad_norm": 0.7400600910186768, | |
| "learning_rate": 1.2539635647464304e-05, | |
| "step": 14750 | |
| }, | |
| { | |
| "embedding_loss": 0.004, | |
| "epoch": 2.186115214180207, | |
| "grad_norm": 0.011697505600750446, | |
| "learning_rate": 1.2506811094698836e-05, | |
| "step": 14800 | |
| }, | |
| { | |
| "embedding_loss": 0.0013, | |
| "epoch": 2.193500738552437, | |
| "grad_norm": 0.0038999137468636036, | |
| "learning_rate": 1.2473986541933366e-05, | |
| "step": 14850 | |
| }, | |
| { | |
| "embedding_loss": 0.0018, | |
| "epoch": 2.2008862629246675, | |
| "grad_norm": 0.013158189132809639, | |
| "learning_rate": 1.2441161989167899e-05, | |
| "step": 14900 | |
| }, | |
| { | |
| "embedding_loss": 0.0025, | |
| "epoch": 2.208271787296898, | |
| "grad_norm": 0.019193725660443306, | |
| "learning_rate": 1.2408337436402429e-05, | |
| "step": 14950 | |
| }, | |
| { | |
| "embedding_loss": 0.0052, | |
| "epoch": 2.2156573116691285, | |
| "grad_norm": 0.07765129953622818, | |
| "learning_rate": 1.2375512883636963e-05, | |
| "step": 15000 | |
| }, | |
| { | |
| "embedding_loss": 0.0027, | |
| "epoch": 2.2230428360413588, | |
| "grad_norm": 0.16390322148799896, | |
| "learning_rate": 1.2342688330871493e-05, | |
| "step": 15050 | |
| }, | |
| { | |
| "embedding_loss": 0.0011, | |
| "epoch": 2.2304283604135895, | |
| "grad_norm": 0.019845524802803993, | |
| "learning_rate": 1.2309863778106023e-05, | |
| "step": 15100 | |
| }, | |
| { | |
| "embedding_loss": 0.0019, | |
| "epoch": 2.2378138847858198, | |
| "grad_norm": 0.0020033265464007854, | |
| "learning_rate": 1.2277039225340557e-05, | |
| "step": 15150 | |
| }, | |
| { | |
| "embedding_loss": 0.0012, | |
| "epoch": 2.24519940915805, | |
| "grad_norm": 0.008046300150454044, | |
| "learning_rate": 1.2244214672575087e-05, | |
| "step": 15200 | |
| }, | |
| { | |
| "embedding_loss": 0.0045, | |
| "epoch": 2.2525849335302808, | |
| "grad_norm": 0.16893664002418518, | |
| "learning_rate": 1.2211390119809618e-05, | |
| "step": 15250 | |
| }, | |
| { | |
| "embedding_loss": 0.0031, | |
| "epoch": 2.259970457902511, | |
| "grad_norm": 0.012031147256493568, | |
| "learning_rate": 1.217856556704415e-05, | |
| "step": 15300 | |
| }, | |
| { | |
| "embedding_loss": 0.0029, | |
| "epoch": 2.2673559822747413, | |
| "grad_norm": 0.007804942317306995, | |
| "learning_rate": 1.2145741014278682e-05, | |
| "step": 15350 | |
| }, | |
| { | |
| "embedding_loss": 0.0048, | |
| "epoch": 2.274741506646972, | |
| "grad_norm": 0.003408796386793256, | |
| "learning_rate": 1.2112916461513214e-05, | |
| "step": 15400 | |
| }, | |
| { | |
| "embedding_loss": 0.0024, | |
| "epoch": 2.2821270310192023, | |
| "grad_norm": 0.0196861382573843, | |
| "learning_rate": 1.2080091908747744e-05, | |
| "step": 15450 | |
| }, | |
| { | |
| "embedding_loss": 0.0032, | |
| "epoch": 2.2895125553914326, | |
| "grad_norm": 0.10261236131191254, | |
| "learning_rate": 1.2047267355982275e-05, | |
| "step": 15500 | |
| }, | |
| { | |
| "embedding_loss": 0.0017, | |
| "epoch": 2.2968980797636633, | |
| "grad_norm": 0.008358814753592014, | |
| "learning_rate": 1.2014442803216808e-05, | |
| "step": 15550 | |
| }, | |
| { | |
| "embedding_loss": 0.0018, | |
| "epoch": 2.3042836041358936, | |
| "grad_norm": 0.03527391329407692, | |
| "learning_rate": 1.1981618250451339e-05, | |
| "step": 15600 | |
| }, | |
| { | |
| "embedding_loss": 0.0035, | |
| "epoch": 2.311669128508124, | |
| "grad_norm": 0.011962966993451118, | |
| "learning_rate": 1.1948793697685869e-05, | |
| "step": 15650 | |
| }, | |
| { | |
| "embedding_loss": 0.0041, | |
| "epoch": 2.3190546528803546, | |
| "grad_norm": 0.005154829006642103, | |
| "learning_rate": 1.1915969144920401e-05, | |
| "step": 15700 | |
| }, | |
| { | |
| "embedding_loss": 0.0015, | |
| "epoch": 2.326440177252585, | |
| "grad_norm": 0.007693074177950621, | |
| "learning_rate": 1.1883144592154933e-05, | |
| "step": 15750 | |
| }, | |
| { | |
| "embedding_loss": 0.003, | |
| "epoch": 2.333825701624815, | |
| "grad_norm": 0.02695990726351738, | |
| "learning_rate": 1.1850320039389465e-05, | |
| "step": 15800 | |
| }, | |
| { | |
| "embedding_loss": 0.0016, | |
| "epoch": 2.341211225997046, | |
| "grad_norm": 0.19833894073963165, | |
| "learning_rate": 1.1817495486623995e-05, | |
| "step": 15850 | |
| }, | |
| { | |
| "embedding_loss": 0.0027, | |
| "epoch": 2.348596750369276, | |
| "grad_norm": 0.153117373585701, | |
| "learning_rate": 1.1784670933858526e-05, | |
| "step": 15900 | |
| }, | |
| { | |
| "embedding_loss": 0.0024, | |
| "epoch": 2.3559822747415065, | |
| "grad_norm": 0.5938816666603088, | |
| "learning_rate": 1.175184638109306e-05, | |
| "step": 15950 | |
| }, | |
| { | |
| "embedding_loss": 0.002, | |
| "epoch": 2.363367799113737, | |
| "grad_norm": 0.01386656891554594, | |
| "learning_rate": 1.171902182832759e-05, | |
| "step": 16000 | |
| }, | |
| { | |
| "embedding_loss": 0.0014, | |
| "epoch": 2.3707533234859675, | |
| "grad_norm": 0.010158052667975426, | |
| "learning_rate": 1.1686197275562122e-05, | |
| "step": 16050 | |
| }, | |
| { | |
| "embedding_loss": 0.001, | |
| "epoch": 2.3781388478581977, | |
| "grad_norm": 0.008198092691600323, | |
| "learning_rate": 1.1653372722796652e-05, | |
| "step": 16100 | |
| }, | |
| { | |
| "embedding_loss": 0.0005, | |
| "epoch": 2.3855243722304285, | |
| "grad_norm": 0.010181965306401253, | |
| "learning_rate": 1.1620548170031184e-05, | |
| "step": 16150 | |
| }, | |
| { | |
| "embedding_loss": 0.0015, | |
| "epoch": 2.3929098966026587, | |
| "grad_norm": 0.008307389914989471, | |
| "learning_rate": 1.1587723617265716e-05, | |
| "step": 16200 | |
| }, | |
| { | |
| "embedding_loss": 0.0045, | |
| "epoch": 2.4002954209748895, | |
| "grad_norm": 0.018941566348075867, | |
| "learning_rate": 1.1554899064500247e-05, | |
| "step": 16250 | |
| }, | |
| { | |
| "embedding_loss": 0.0015, | |
| "epoch": 2.4076809453471197, | |
| "grad_norm": 0.005391134414821863, | |
| "learning_rate": 1.1522074511734777e-05, | |
| "step": 16300 | |
| }, | |
| { | |
| "embedding_loss": 0.0011, | |
| "epoch": 2.41506646971935, | |
| "grad_norm": 0.019267791882157326, | |
| "learning_rate": 1.148924995896931e-05, | |
| "step": 16350 | |
| }, | |
| { | |
| "embedding_loss": 0.0019, | |
| "epoch": 2.4224519940915803, | |
| "grad_norm": 0.2630805969238281, | |
| "learning_rate": 1.1456425406203841e-05, | |
| "step": 16400 | |
| }, | |
| { | |
| "embedding_loss": 0.0024, | |
| "epoch": 2.429837518463811, | |
| "grad_norm": 0.010556219145655632, | |
| "learning_rate": 1.1423600853438373e-05, | |
| "step": 16450 | |
| }, | |
| { | |
| "embedding_loss": 0.002, | |
| "epoch": 2.4372230428360413, | |
| "grad_norm": 0.03994214907288551, | |
| "learning_rate": 1.1390776300672904e-05, | |
| "step": 16500 | |
| }, | |
| { | |
| "embedding_loss": 0.0016, | |
| "epoch": 2.444608567208272, | |
| "grad_norm": 3.779356002807617, | |
| "learning_rate": 1.1357951747907436e-05, | |
| "step": 16550 | |
| }, | |
| { | |
| "embedding_loss": 0.0015, | |
| "epoch": 2.4519940915805023, | |
| "grad_norm": 0.030276980251073837, | |
| "learning_rate": 1.1325127195141968e-05, | |
| "step": 16600 | |
| }, | |
| { | |
| "embedding_loss": 0.0021, | |
| "epoch": 2.4593796159527326, | |
| "grad_norm": 0.010462663136422634, | |
| "learning_rate": 1.1292302642376498e-05, | |
| "step": 16650 | |
| }, | |
| { | |
| "embedding_loss": 0.0025, | |
| "epoch": 2.466765140324963, | |
| "grad_norm": 0.04659969359636307, | |
| "learning_rate": 1.1259478089611028e-05, | |
| "step": 16700 | |
| }, | |
| { | |
| "embedding_loss": 0.0021, | |
| "epoch": 2.4741506646971936, | |
| "grad_norm": 0.34690728783607483, | |
| "learning_rate": 1.1226653536845562e-05, | |
| "step": 16750 | |
| }, | |
| { | |
| "embedding_loss": 0.0029, | |
| "epoch": 2.481536189069424, | |
| "grad_norm": 0.019812889397144318, | |
| "learning_rate": 1.1193828984080092e-05, | |
| "step": 16800 | |
| }, | |
| { | |
| "embedding_loss": 0.0014, | |
| "epoch": 2.4889217134416546, | |
| "grad_norm": 0.004909256473183632, | |
| "learning_rate": 1.1161004431314624e-05, | |
| "step": 16850 | |
| }, | |
| { | |
| "embedding_loss": 0.0029, | |
| "epoch": 2.496307237813885, | |
| "grad_norm": 0.016758764162659645, | |
| "learning_rate": 1.1128179878549155e-05, | |
| "step": 16900 | |
| }, | |
| { | |
| "embedding_loss": 0.004, | |
| "epoch": 2.503692762186115, | |
| "grad_norm": 0.07048258185386658, | |
| "learning_rate": 1.1095355325783687e-05, | |
| "step": 16950 | |
| }, | |
| { | |
| "embedding_loss": 0.0028, | |
| "epoch": 2.5110782865583454, | |
| "grad_norm": 0.010511302389204502, | |
| "learning_rate": 1.1062530773018219e-05, | |
| "step": 17000 | |
| }, | |
| { | |
| "embedding_loss": 0.0027, | |
| "epoch": 2.518463810930576, | |
| "grad_norm": 0.0332963727414608, | |
| "learning_rate": 1.102970622025275e-05, | |
| "step": 17050 | |
| }, | |
| { | |
| "embedding_loss": 0.0011, | |
| "epoch": 2.5258493353028064, | |
| "grad_norm": 0.02814817987382412, | |
| "learning_rate": 1.0996881667487283e-05, | |
| "step": 17100 | |
| }, | |
| { | |
| "embedding_loss": 0.0036, | |
| "epoch": 2.533234859675037, | |
| "grad_norm": 0.01899763010442257, | |
| "learning_rate": 1.0964057114721813e-05, | |
| "step": 17150 | |
| }, | |
| { | |
| "embedding_loss": 0.0031, | |
| "epoch": 2.5406203840472674, | |
| "grad_norm": 0.004354503005743027, | |
| "learning_rate": 1.0931232561956344e-05, | |
| "step": 17200 | |
| }, | |
| { | |
| "embedding_loss": 0.0021, | |
| "epoch": 2.5480059084194977, | |
| "grad_norm": 0.0331052802503109, | |
| "learning_rate": 1.0898408009190876e-05, | |
| "step": 17250 | |
| }, | |
| { | |
| "embedding_loss": 0.0018, | |
| "epoch": 2.555391432791728, | |
| "grad_norm": 0.0316183939576149, | |
| "learning_rate": 1.0865583456425408e-05, | |
| "step": 17300 | |
| }, | |
| { | |
| "embedding_loss": 0.0015, | |
| "epoch": 2.5627769571639587, | |
| "grad_norm": 0.009719472378492355, | |
| "learning_rate": 1.0832758903659938e-05, | |
| "step": 17350 | |
| }, | |
| { | |
| "embedding_loss": 0.0031, | |
| "epoch": 2.570162481536189, | |
| "grad_norm": 0.035008445382118225, | |
| "learning_rate": 1.079993435089447e-05, | |
| "step": 17400 | |
| }, | |
| { | |
| "embedding_loss": 0.0031, | |
| "epoch": 2.5775480059084197, | |
| "grad_norm": 0.008490943349897861, | |
| "learning_rate": 1.0767109798129e-05, | |
| "step": 17450 | |
| }, | |
| { | |
| "embedding_loss": 0.0011, | |
| "epoch": 2.58493353028065, | |
| "grad_norm": 0.01083299983292818, | |
| "learning_rate": 1.0734285245363534e-05, | |
| "step": 17500 | |
| }, | |
| { | |
| "embedding_loss": 0.0044, | |
| "epoch": 2.5923190546528803, | |
| "grad_norm": 0.01600501500070095, | |
| "learning_rate": 1.0701460692598065e-05, | |
| "step": 17550 | |
| }, | |
| { | |
| "embedding_loss": 0.0013, | |
| "epoch": 2.5997045790251105, | |
| "grad_norm": 0.07744074612855911, | |
| "learning_rate": 1.0668636139832595e-05, | |
| "step": 17600 | |
| }, | |
| { | |
| "embedding_loss": 0.0015, | |
| "epoch": 2.6070901033973413, | |
| "grad_norm": 0.036319248378276825, | |
| "learning_rate": 1.0635811587067127e-05, | |
| "step": 17650 | |
| }, | |
| { | |
| "embedding_loss": 0.0013, | |
| "epoch": 2.6144756277695715, | |
| "grad_norm": 0.01792324334383011, | |
| "learning_rate": 1.0602987034301659e-05, | |
| "step": 17700 | |
| }, | |
| { | |
| "embedding_loss": 0.0018, | |
| "epoch": 2.6218611521418023, | |
| "grad_norm": 0.07195013016462326, | |
| "learning_rate": 1.0570162481536191e-05, | |
| "step": 17750 | |
| }, | |
| { | |
| "embedding_loss": 0.0023, | |
| "epoch": 2.6292466765140325, | |
| "grad_norm": 3.590275526046753, | |
| "learning_rate": 1.0537337928770721e-05, | |
| "step": 17800 | |
| }, | |
| { | |
| "embedding_loss": 0.0043, | |
| "epoch": 2.636632200886263, | |
| "grad_norm": 0.009701603092253208, | |
| "learning_rate": 1.0504513376005252e-05, | |
| "step": 17850 | |
| }, | |
| { | |
| "embedding_loss": 0.0049, | |
| "epoch": 2.644017725258493, | |
| "grad_norm": 0.09409826993942261, | |
| "learning_rate": 1.0471688823239786e-05, | |
| "step": 17900 | |
| }, | |
| { | |
| "embedding_loss": 0.0045, | |
| "epoch": 2.651403249630724, | |
| "grad_norm": 0.012147662229835987, | |
| "learning_rate": 1.0438864270474316e-05, | |
| "step": 17950 | |
| }, | |
| { | |
| "embedding_loss": 0.0017, | |
| "epoch": 2.658788774002954, | |
| "grad_norm": 0.008824297226965427, | |
| "learning_rate": 1.0406039717708846e-05, | |
| "step": 18000 | |
| }, | |
| { | |
| "embedding_loss": 0.002, | |
| "epoch": 2.666174298375185, | |
| "grad_norm": 0.018408598378300667, | |
| "learning_rate": 1.0373215164943378e-05, | |
| "step": 18050 | |
| }, | |
| { | |
| "embedding_loss": 0.0021, | |
| "epoch": 2.673559822747415, | |
| "grad_norm": 0.01662319526076317, | |
| "learning_rate": 1.034039061217791e-05, | |
| "step": 18100 | |
| }, | |
| { | |
| "embedding_loss": 0.0014, | |
| "epoch": 2.6809453471196454, | |
| "grad_norm": 0.020505361258983612, | |
| "learning_rate": 1.0307566059412442e-05, | |
| "step": 18150 | |
| }, | |
| { | |
| "embedding_loss": 0.0025, | |
| "epoch": 2.6883308714918757, | |
| "grad_norm": 0.08292482793331146, | |
| "learning_rate": 1.0274741506646973e-05, | |
| "step": 18200 | |
| }, | |
| { | |
| "embedding_loss": 0.0032, | |
| "epoch": 2.6957163958641064, | |
| "grad_norm": 0.023084105923771858, | |
| "learning_rate": 1.0241916953881503e-05, | |
| "step": 18250 | |
| }, | |
| { | |
| "embedding_loss": 0.0038, | |
| "epoch": 2.7031019202363367, | |
| "grad_norm": 0.030171602964401245, | |
| "learning_rate": 1.0209092401116037e-05, | |
| "step": 18300 | |
| }, | |
| { | |
| "embedding_loss": 0.0016, | |
| "epoch": 2.7104874446085674, | |
| "grad_norm": 0.004670475609600544, | |
| "learning_rate": 1.0176267848350567e-05, | |
| "step": 18350 | |
| }, | |
| { | |
| "embedding_loss": 0.0014, | |
| "epoch": 2.7178729689807977, | |
| "grad_norm": 0.003984387032687664, | |
| "learning_rate": 1.0143443295585098e-05, | |
| "step": 18400 | |
| }, | |
| { | |
| "embedding_loss": 0.0013, | |
| "epoch": 2.725258493353028, | |
| "grad_norm": 0.03194098919630051, | |
| "learning_rate": 1.011061874281963e-05, | |
| "step": 18450 | |
| }, | |
| { | |
| "embedding_loss": 0.0013, | |
| "epoch": 2.7326440177252582, | |
| "grad_norm": 0.007552579510957003, | |
| "learning_rate": 1.0077794190054162e-05, | |
| "step": 18500 | |
| }, | |
| { | |
| "embedding_loss": 0.0024, | |
| "epoch": 2.740029542097489, | |
| "grad_norm": 0.02968655154109001, | |
| "learning_rate": 1.0044969637288694e-05, | |
| "step": 18550 | |
| }, | |
| { | |
| "embedding_loss": 0.0024, | |
| "epoch": 2.7474150664697192, | |
| "grad_norm": 0.2573186159133911, | |
| "learning_rate": 1.0012145084523224e-05, | |
| "step": 18600 | |
| }, | |
| { | |
| "embedding_loss": 0.0026, | |
| "epoch": 2.75480059084195, | |
| "grad_norm": 0.036742523312568665, | |
| "learning_rate": 9.979320531757756e-06, | |
| "step": 18650 | |
| }, | |
| { | |
| "embedding_loss": 0.0032, | |
| "epoch": 2.7621861152141802, | |
| "grad_norm": 0.20845580101013184, | |
| "learning_rate": 9.946495978992286e-06, | |
| "step": 18700 | |
| }, | |
| { | |
| "embedding_loss": 0.0024, | |
| "epoch": 2.7695716395864105, | |
| "grad_norm": 0.051792044192552567, | |
| "learning_rate": 9.913671426226818e-06, | |
| "step": 18750 | |
| }, | |
| { | |
| "embedding_loss": 0.0019, | |
| "epoch": 2.7769571639586412, | |
| "grad_norm": 0.015146799385547638, | |
| "learning_rate": 9.880846873461349e-06, | |
| "step": 18800 | |
| }, | |
| { | |
| "embedding_loss": 0.0015, | |
| "epoch": 2.7843426883308715, | |
| "grad_norm": 0.01486288197338581, | |
| "learning_rate": 9.84802232069588e-06, | |
| "step": 18850 | |
| }, | |
| { | |
| "embedding_loss": 0.0028, | |
| "epoch": 2.791728212703102, | |
| "grad_norm": 0.018719913437962532, | |
| "learning_rate": 9.815197767930413e-06, | |
| "step": 18900 | |
| }, | |
| { | |
| "embedding_loss": 0.0021, | |
| "epoch": 2.7991137370753325, | |
| "grad_norm": 0.005828204099088907, | |
| "learning_rate": 9.782373215164945e-06, | |
| "step": 18950 | |
| }, | |
| { | |
| "embedding_loss": 0.0018, | |
| "epoch": 2.806499261447563, | |
| "grad_norm": 0.03715846315026283, | |
| "learning_rate": 9.749548662399475e-06, | |
| "step": 19000 | |
| }, | |
| { | |
| "embedding_loss": 0.0009, | |
| "epoch": 2.813884785819793, | |
| "grad_norm": 0.019518226385116577, | |
| "learning_rate": 9.716724109634007e-06, | |
| "step": 19050 | |
| }, | |
| { | |
| "embedding_loss": 0.0024, | |
| "epoch": 2.821270310192024, | |
| "grad_norm": 0.020911335945129395, | |
| "learning_rate": 9.68389955686854e-06, | |
| "step": 19100 | |
| }, | |
| { | |
| "embedding_loss": 0.0016, | |
| "epoch": 2.828655834564254, | |
| "grad_norm": 0.010648909956216812, | |
| "learning_rate": 9.65107500410307e-06, | |
| "step": 19150 | |
| }, | |
| { | |
| "embedding_loss": 0.001, | |
| "epoch": 2.8360413589364843, | |
| "grad_norm": 0.007687574252486229, | |
| "learning_rate": 9.618250451337602e-06, | |
| "step": 19200 | |
| }, | |
| { | |
| "embedding_loss": 0.0016, | |
| "epoch": 2.843426883308715, | |
| "grad_norm": 0.016183407977223396, | |
| "learning_rate": 9.585425898572132e-06, | |
| "step": 19250 | |
| }, | |
| { | |
| "embedding_loss": 0.0009, | |
| "epoch": 2.8508124076809453, | |
| "grad_norm": 0.032978300005197525, | |
| "learning_rate": 9.552601345806664e-06, | |
| "step": 19300 | |
| }, | |
| { | |
| "embedding_loss": 0.0025, | |
| "epoch": 2.8581979320531756, | |
| "grad_norm": 0.00619637593626976, | |
| "learning_rate": 9.519776793041196e-06, | |
| "step": 19350 | |
| }, | |
| { | |
| "embedding_loss": 0.0026, | |
| "epoch": 2.8655834564254064, | |
| "grad_norm": 0.0032677731942385435, | |
| "learning_rate": 9.486952240275728e-06, | |
| "step": 19400 | |
| }, | |
| { | |
| "embedding_loss": 0.0018, | |
| "epoch": 2.8729689807976366, | |
| "grad_norm": 0.0064840479753911495, | |
| "learning_rate": 9.454127687510259e-06, | |
| "step": 19450 | |
| }, | |
| { | |
| "embedding_loss": 0.0012, | |
| "epoch": 2.880354505169867, | |
| "grad_norm": 0.01070446241647005, | |
| "learning_rate": 9.42130313474479e-06, | |
| "step": 19500 | |
| }, | |
| { | |
| "embedding_loss": 0.0012, | |
| "epoch": 2.8877400295420976, | |
| "grad_norm": 0.07543105632066727, | |
| "learning_rate": 9.388478581979321e-06, | |
| "step": 19550 | |
| }, | |
| { | |
| "embedding_loss": 0.0018, | |
| "epoch": 2.895125553914328, | |
| "grad_norm": 0.025806330144405365, | |
| "learning_rate": 9.355654029213853e-06, | |
| "step": 19600 | |
| }, | |
| { | |
| "embedding_loss": 0.003, | |
| "epoch": 2.902511078286558, | |
| "grad_norm": 0.026599083095788956, | |
| "learning_rate": 9.322829476448383e-06, | |
| "step": 19650 | |
| }, | |
| { | |
| "embedding_loss": 0.0026, | |
| "epoch": 2.909896602658789, | |
| "grad_norm": 0.37029746174812317, | |
| "learning_rate": 9.290004923682915e-06, | |
| "step": 19700 | |
| }, | |
| { | |
| "embedding_loss": 0.001, | |
| "epoch": 2.917282127031019, | |
| "grad_norm": 0.07045801728963852, | |
| "learning_rate": 9.257180370917447e-06, | |
| "step": 19750 | |
| }, | |
| { | |
| "embedding_loss": 0.0031, | |
| "epoch": 2.9246676514032495, | |
| "grad_norm": 0.020875511690974236, | |
| "learning_rate": 9.22435581815198e-06, | |
| "step": 19800 | |
| }, | |
| { | |
| "embedding_loss": 0.0019, | |
| "epoch": 2.93205317577548, | |
| "grad_norm": 0.013287228532135487, | |
| "learning_rate": 9.19153126538651e-06, | |
| "step": 19850 | |
| }, | |
| { | |
| "embedding_loss": 0.0027, | |
| "epoch": 2.9394387001477105, | |
| "grad_norm": 0.006682571489363909, | |
| "learning_rate": 9.158706712621042e-06, | |
| "step": 19900 | |
| }, | |
| { | |
| "embedding_loss": 0.001, | |
| "epoch": 2.9468242245199407, | |
| "grad_norm": 0.016461633145809174, | |
| "learning_rate": 9.125882159855572e-06, | |
| "step": 19950 | |
| }, | |
| { | |
| "embedding_loss": 0.0025, | |
| "epoch": 2.9542097488921715, | |
| "grad_norm": 0.0292360782623291, | |
| "learning_rate": 9.093057607090104e-06, | |
| "step": 20000 | |
| }, | |
| { | |
| "embedding_loss": 0.0017, | |
| "epoch": 2.9615952732644018, | |
| "grad_norm": 0.007479995954781771, | |
| "learning_rate": 9.060233054324635e-06, | |
| "step": 20050 | |
| }, | |
| { | |
| "embedding_loss": 0.0033, | |
| "epoch": 2.9689807976366325, | |
| "grad_norm": 0.19220024347305298, | |
| "learning_rate": 9.027408501559167e-06, | |
| "step": 20100 | |
| }, | |
| { | |
| "embedding_loss": 0.0006, | |
| "epoch": 2.9763663220088628, | |
| "grad_norm": 0.018404290080070496, | |
| "learning_rate": 8.994583948793699e-06, | |
| "step": 20150 | |
| }, | |
| { | |
| "embedding_loss": 0.0026, | |
| "epoch": 2.983751846381093, | |
| "grad_norm": 0.012631416320800781, | |
| "learning_rate": 8.96175939602823e-06, | |
| "step": 20200 | |
| }, | |
| { | |
| "embedding_loss": 0.0011, | |
| "epoch": 2.9911373707533233, | |
| "grad_norm": 0.04417691379785538, | |
| "learning_rate": 8.928934843262761e-06, | |
| "step": 20250 | |
| }, | |
| { | |
| "embedding_loss": 0.0021, | |
| "epoch": 2.998522895125554, | |
| "grad_norm": 0.0054418547078967094, | |
| "learning_rate": 8.896110290497293e-06, | |
| "step": 20300 | |
| }, | |
| { | |
| "embedding_loss": 0.0039, | |
| "epoch": 3.0059084194977843, | |
| "grad_norm": 0.005223344080150127, | |
| "learning_rate": 8.863285737731823e-06, | |
| "step": 20350 | |
| }, | |
| { | |
| "embedding_loss": 0.0003, | |
| "epoch": 3.0132939438700146, | |
| "grad_norm": 0.0242659542709589, | |
| "learning_rate": 8.830461184966356e-06, | |
| "step": 20400 | |
| }, | |
| { | |
| "embedding_loss": 0.001, | |
| "epoch": 3.0206794682422453, | |
| "grad_norm": 0.0049690124578773975, | |
| "learning_rate": 8.797636632200886e-06, | |
| "step": 20450 | |
| }, | |
| { | |
| "embedding_loss": 0.0008, | |
| "epoch": 3.0280649926144756, | |
| "grad_norm": 0.0040290821343660355, | |
| "learning_rate": 8.764812079435418e-06, | |
| "step": 20500 | |
| }, | |
| { | |
| "embedding_loss": 0.0009, | |
| "epoch": 3.035450516986706, | |
| "grad_norm": 0.019365187734365463, | |
| "learning_rate": 8.73198752666995e-06, | |
| "step": 20550 | |
| }, | |
| { | |
| "embedding_loss": 0.0023, | |
| "epoch": 3.0428360413589366, | |
| "grad_norm": 0.10174138844013214, | |
| "learning_rate": 8.699162973904482e-06, | |
| "step": 20600 | |
| }, | |
| { | |
| "embedding_loss": 0.0008, | |
| "epoch": 3.050221565731167, | |
| "grad_norm": 0.2679438889026642, | |
| "learning_rate": 8.666338421139012e-06, | |
| "step": 20650 | |
| }, | |
| { | |
| "embedding_loss": 0.0009, | |
| "epoch": 3.057607090103397, | |
| "grad_norm": 0.010431923903524876, | |
| "learning_rate": 8.633513868373544e-06, | |
| "step": 20700 | |
| }, | |
| { | |
| "embedding_loss": 0.0015, | |
| "epoch": 3.064992614475628, | |
| "grad_norm": 0.034736406058073044, | |
| "learning_rate": 8.600689315608075e-06, | |
| "step": 20750 | |
| }, | |
| { | |
| "embedding_loss": 0.0019, | |
| "epoch": 3.072378138847858, | |
| "grad_norm": 0.012600087560713291, | |
| "learning_rate": 8.567864762842607e-06, | |
| "step": 20800 | |
| }, | |
| { | |
| "embedding_loss": 0.0027, | |
| "epoch": 3.0797636632200884, | |
| "grad_norm": 0.017327722162008286, | |
| "learning_rate": 8.535040210077137e-06, | |
| "step": 20850 | |
| }, | |
| { | |
| "embedding_loss": 0.0009, | |
| "epoch": 3.087149187592319, | |
| "grad_norm": 0.08267229795455933, | |
| "learning_rate": 8.50221565731167e-06, | |
| "step": 20900 | |
| }, | |
| { | |
| "embedding_loss": 0.0006, | |
| "epoch": 3.0945347119645494, | |
| "grad_norm": 0.1653100550174713, | |
| "learning_rate": 8.469391104546201e-06, | |
| "step": 20950 | |
| }, | |
| { | |
| "embedding_loss": 0.0011, | |
| "epoch": 3.1019202363367797, | |
| "grad_norm": 0.004222211427986622, | |
| "learning_rate": 8.436566551780733e-06, | |
| "step": 21000 | |
| }, | |
| { | |
| "embedding_loss": 0.0014, | |
| "epoch": 3.1093057607090104, | |
| "grad_norm": 0.005486358422785997, | |
| "learning_rate": 8.403741999015264e-06, | |
| "step": 21050 | |
| }, | |
| { | |
| "embedding_loss": 0.0009, | |
| "epoch": 3.1166912850812407, | |
| "grad_norm": 0.007771783974021673, | |
| "learning_rate": 8.370917446249796e-06, | |
| "step": 21100 | |
| }, | |
| { | |
| "embedding_loss": 0.0011, | |
| "epoch": 3.124076809453471, | |
| "grad_norm": 0.009825172834098339, | |
| "learning_rate": 8.338092893484328e-06, | |
| "step": 21150 | |
| }, | |
| { | |
| "embedding_loss": 0.0021, | |
| "epoch": 3.1314623338257017, | |
| "grad_norm": 0.009619227610528469, | |
| "learning_rate": 8.305268340718858e-06, | |
| "step": 21200 | |
| }, | |
| { | |
| "embedding_loss": 0.0023, | |
| "epoch": 3.138847858197932, | |
| "grad_norm": 0.04705429822206497, | |
| "learning_rate": 8.27244378795339e-06, | |
| "step": 21250 | |
| }, | |
| { | |
| "embedding_loss": 0.0022, | |
| "epoch": 3.1462333825701623, | |
| "grad_norm": 0.03510194644331932, | |
| "learning_rate": 8.23961923518792e-06, | |
| "step": 21300 | |
| }, | |
| { | |
| "embedding_loss": 0.001, | |
| "epoch": 3.153618906942393, | |
| "grad_norm": 0.007620047312229872, | |
| "learning_rate": 8.206794682422453e-06, | |
| "step": 21350 | |
| }, | |
| { | |
| "embedding_loss": 0.0017, | |
| "epoch": 3.1610044313146233, | |
| "grad_norm": 0.006676162593066692, | |
| "learning_rate": 8.173970129656985e-06, | |
| "step": 21400 | |
| }, | |
| { | |
| "embedding_loss": 0.0022, | |
| "epoch": 3.1683899556868536, | |
| "grad_norm": 0.006805592216551304, | |
| "learning_rate": 8.141145576891517e-06, | |
| "step": 21450 | |
| }, | |
| { | |
| "embedding_loss": 0.0007, | |
| "epoch": 3.1757754800590843, | |
| "grad_norm": 0.02653045393526554, | |
| "learning_rate": 8.108321024126047e-06, | |
| "step": 21500 | |
| }, | |
| { | |
| "embedding_loss": 0.0022, | |
| "epoch": 3.1831610044313146, | |
| "grad_norm": 0.055775534361600876, | |
| "learning_rate": 8.075496471360579e-06, | |
| "step": 21550 | |
| }, | |
| { | |
| "embedding_loss": 0.0002, | |
| "epoch": 3.1905465288035453, | |
| "grad_norm": 0.004594122059643269, | |
| "learning_rate": 8.04267191859511e-06, | |
| "step": 21600 | |
| }, | |
| { | |
| "embedding_loss": 0.0004, | |
| "epoch": 3.1979320531757756, | |
| "grad_norm": 0.057899340987205505, | |
| "learning_rate": 8.009847365829641e-06, | |
| "step": 21650 | |
| }, | |
| { | |
| "embedding_loss": 0.001, | |
| "epoch": 3.205317577548006, | |
| "grad_norm": 0.003396780928596854, | |
| "learning_rate": 7.977022813064172e-06, | |
| "step": 21700 | |
| }, | |
| { | |
| "embedding_loss": 0.0033, | |
| "epoch": 3.212703101920236, | |
| "grad_norm": 0.09521088004112244, | |
| "learning_rate": 7.944198260298704e-06, | |
| "step": 21750 | |
| }, | |
| { | |
| "embedding_loss": 0.0011, | |
| "epoch": 3.220088626292467, | |
| "grad_norm": 0.004245746415108442, | |
| "learning_rate": 7.911373707533236e-06, | |
| "step": 21800 | |
| }, | |
| { | |
| "embedding_loss": 0.0003, | |
| "epoch": 3.227474150664697, | |
| "grad_norm": 0.005718466360121965, | |
| "learning_rate": 7.878549154767768e-06, | |
| "step": 21850 | |
| }, | |
| { | |
| "embedding_loss": 0.0004, | |
| "epoch": 3.234859675036928, | |
| "grad_norm": 0.007956516928970814, | |
| "learning_rate": 7.845724602002298e-06, | |
| "step": 21900 | |
| }, | |
| { | |
| "embedding_loss": 0.0004, | |
| "epoch": 3.242245199409158, | |
| "grad_norm": 0.06620016694068909, | |
| "learning_rate": 7.81290004923683e-06, | |
| "step": 21950 | |
| }, | |
| { | |
| "embedding_loss": 0.0014, | |
| "epoch": 3.2496307237813884, | |
| "grad_norm": 0.04369127005338669, | |
| "learning_rate": 7.78007549647136e-06, | |
| "step": 22000 | |
| }, | |
| { | |
| "embedding_loss": 0.0012, | |
| "epoch": 3.2570162481536187, | |
| "grad_norm": 0.029797792434692383, | |
| "learning_rate": 7.747250943705893e-06, | |
| "step": 22050 | |
| }, | |
| { | |
| "embedding_loss": 0.002, | |
| "epoch": 3.2644017725258494, | |
| "grad_norm": 0.014197341166436672, | |
| "learning_rate": 7.714426390940423e-06, | |
| "step": 22100 | |
| }, | |
| { | |
| "embedding_loss": 0.001, | |
| "epoch": 3.2717872968980797, | |
| "grad_norm": 0.011921238154172897, | |
| "learning_rate": 7.681601838174955e-06, | |
| "step": 22150 | |
| }, | |
| { | |
| "embedding_loss": 0.0026, | |
| "epoch": 3.2791728212703104, | |
| "grad_norm": 0.022078925743699074, | |
| "learning_rate": 7.648777285409487e-06, | |
| "step": 22200 | |
| }, | |
| { | |
| "embedding_loss": 0.0017, | |
| "epoch": 3.2865583456425407, | |
| "grad_norm": 0.011514640413224697, | |
| "learning_rate": 7.615952732644018e-06, | |
| "step": 22250 | |
| }, | |
| { | |
| "embedding_loss": 0.0009, | |
| "epoch": 3.293943870014771, | |
| "grad_norm": 0.2203167974948883, | |
| "learning_rate": 7.5831281798785495e-06, | |
| "step": 22300 | |
| }, | |
| { | |
| "embedding_loss": 0.0018, | |
| "epoch": 3.3013293943870012, | |
| "grad_norm": 0.0317855142056942, | |
| "learning_rate": 7.5503036271130815e-06, | |
| "step": 22350 | |
| }, | |
| { | |
| "embedding_loss": 0.0025, | |
| "epoch": 3.308714918759232, | |
| "grad_norm": 0.005245373118668795, | |
| "learning_rate": 7.517479074347612e-06, | |
| "step": 22400 | |
| }, | |
| { | |
| "embedding_loss": 0.0016, | |
| "epoch": 3.3161004431314622, | |
| "grad_norm": 0.007596870884299278, | |
| "learning_rate": 7.484654521582144e-06, | |
| "step": 22450 | |
| }, | |
| { | |
| "embedding_loss": 0.0035, | |
| "epoch": 3.323485967503693, | |
| "grad_norm": 0.020896727219223976, | |
| "learning_rate": 7.451829968816675e-06, | |
| "step": 22500 | |
| }, | |
| { | |
| "embedding_loss": 0.0002, | |
| "epoch": 3.3308714918759232, | |
| "grad_norm": 0.07300405204296112, | |
| "learning_rate": 7.419005416051207e-06, | |
| "step": 22550 | |
| }, | |
| { | |
| "embedding_loss": 0.0015, | |
| "epoch": 3.3382570162481535, | |
| "grad_norm": 0.005366707220673561, | |
| "learning_rate": 7.3861808632857375e-06, | |
| "step": 22600 | |
| }, | |
| { | |
| "embedding_loss": 0.002, | |
| "epoch": 3.345642540620384, | |
| "grad_norm": 0.011347993277013302, | |
| "learning_rate": 7.3533563105202695e-06, | |
| "step": 22650 | |
| }, | |
| { | |
| "embedding_loss": 0.0021, | |
| "epoch": 3.3530280649926145, | |
| "grad_norm": 0.006689651869237423, | |
| "learning_rate": 7.320531757754801e-06, | |
| "step": 22700 | |
| }, | |
| { | |
| "embedding_loss": 0.0024, | |
| "epoch": 3.360413589364845, | |
| "grad_norm": 0.006886324379593134, | |
| "learning_rate": 7.287707204989333e-06, | |
| "step": 22750 | |
| }, | |
| { | |
| "embedding_loss": 0.0015, | |
| "epoch": 3.3677991137370755, | |
| "grad_norm": 0.03965551033616066, | |
| "learning_rate": 7.254882652223864e-06, | |
| "step": 22800 | |
| }, | |
| { | |
| "embedding_loss": 0.0021, | |
| "epoch": 3.375184638109306, | |
| "grad_norm": 0.03409096226096153, | |
| "learning_rate": 7.222058099458395e-06, | |
| "step": 22850 | |
| }, | |
| { | |
| "embedding_loss": 0.0022, | |
| "epoch": 3.382570162481536, | |
| "grad_norm": 0.00742725282907486, | |
| "learning_rate": 7.189233546692926e-06, | |
| "step": 22900 | |
| }, | |
| { | |
| "embedding_loss": 0.0015, | |
| "epoch": 3.389955686853767, | |
| "grad_norm": 0.008300753310322762, | |
| "learning_rate": 7.156408993927458e-06, | |
| "step": 22950 | |
| }, | |
| { | |
| "embedding_loss": 0.0015, | |
| "epoch": 3.397341211225997, | |
| "grad_norm": 0.03697545453906059, | |
| "learning_rate": 7.12358444116199e-06, | |
| "step": 23000 | |
| }, | |
| { | |
| "embedding_loss": 0.0016, | |
| "epoch": 3.4047267355982274, | |
| "grad_norm": 0.021186918020248413, | |
| "learning_rate": 7.090759888396521e-06, | |
| "step": 23050 | |
| }, | |
| { | |
| "embedding_loss": 0.0008, | |
| "epoch": 3.412112259970458, | |
| "grad_norm": 0.021211344748735428, | |
| "learning_rate": 7.057935335631053e-06, | |
| "step": 23100 | |
| }, | |
| { | |
| "embedding_loss": 0.0021, | |
| "epoch": 3.4194977843426884, | |
| "grad_norm": 0.015593543648719788, | |
| "learning_rate": 7.025110782865584e-06, | |
| "step": 23150 | |
| }, | |
| { | |
| "embedding_loss": 0.0021, | |
| "epoch": 3.4268833087149186, | |
| "grad_norm": 0.12304917722940445, | |
| "learning_rate": 6.992286230100116e-06, | |
| "step": 23200 | |
| }, | |
| { | |
| "embedding_loss": 0.0017, | |
| "epoch": 3.4342688330871494, | |
| "grad_norm": 0.030567510053515434, | |
| "learning_rate": 6.9594616773346464e-06, | |
| "step": 23250 | |
| }, | |
| { | |
| "embedding_loss": 0.0015, | |
| "epoch": 3.4416543574593796, | |
| "grad_norm": 0.029271570965647697, | |
| "learning_rate": 6.9266371245691785e-06, | |
| "step": 23300 | |
| }, | |
| { | |
| "embedding_loss": 0.0004, | |
| "epoch": 3.44903988183161, | |
| "grad_norm": 0.007037239149212837, | |
| "learning_rate": 6.89381257180371e-06, | |
| "step": 23350 | |
| }, | |
| { | |
| "embedding_loss": 0.0007, | |
| "epoch": 3.4564254062038406, | |
| "grad_norm": 0.004125585313886404, | |
| "learning_rate": 6.860988019038242e-06, | |
| "step": 23400 | |
| }, | |
| { | |
| "embedding_loss": 0.0004, | |
| "epoch": 3.463810930576071, | |
| "grad_norm": 0.012953881174325943, | |
| "learning_rate": 6.828163466272772e-06, | |
| "step": 23450 | |
| }, | |
| { | |
| "embedding_loss": 0.0014, | |
| "epoch": 3.471196454948301, | |
| "grad_norm": 0.0054145329631865025, | |
| "learning_rate": 6.795338913507304e-06, | |
| "step": 23500 | |
| }, | |
| { | |
| "embedding_loss": 0.0003, | |
| "epoch": 3.478581979320532, | |
| "grad_norm": 0.00575551250949502, | |
| "learning_rate": 6.762514360741835e-06, | |
| "step": 23550 | |
| }, | |
| { | |
| "embedding_loss": 0.002, | |
| "epoch": 3.485967503692762, | |
| "grad_norm": 0.0046454742550849915, | |
| "learning_rate": 6.729689807976367e-06, | |
| "step": 23600 | |
| }, | |
| { | |
| "embedding_loss": 0.0003, | |
| "epoch": 3.4933530280649925, | |
| "grad_norm": 0.01675521954894066, | |
| "learning_rate": 6.6968652552108986e-06, | |
| "step": 23650 | |
| }, | |
| { | |
| "embedding_loss": 0.0002, | |
| "epoch": 3.500738552437223, | |
| "grad_norm": 0.5372416973114014, | |
| "learning_rate": 6.66404070244543e-06, | |
| "step": 23700 | |
| }, | |
| { | |
| "embedding_loss": 0.0009, | |
| "epoch": 3.5081240768094535, | |
| "grad_norm": 0.010617181658744812, | |
| "learning_rate": 6.631216149679961e-06, | |
| "step": 23750 | |
| }, | |
| { | |
| "embedding_loss": 0.0036, | |
| "epoch": 3.5155096011816838, | |
| "grad_norm": 0.009620044380426407, | |
| "learning_rate": 6.598391596914493e-06, | |
| "step": 23800 | |
| }, | |
| { | |
| "embedding_loss": 0.0022, | |
| "epoch": 3.5228951255539145, | |
| "grad_norm": 0.0031557646580040455, | |
| "learning_rate": 6.565567044149024e-06, | |
| "step": 23850 | |
| }, | |
| { | |
| "embedding_loss": 0.0014, | |
| "epoch": 3.5302806499261448, | |
| "grad_norm": 0.006240217015147209, | |
| "learning_rate": 6.532742491383555e-06, | |
| "step": 23900 | |
| }, | |
| { | |
| "embedding_loss": 0.0015, | |
| "epoch": 3.537666174298375, | |
| "grad_norm": 0.054248787462711334, | |
| "learning_rate": 6.499917938618087e-06, | |
| "step": 23950 | |
| }, | |
| { | |
| "embedding_loss": 0.0009, | |
| "epoch": 3.5450516986706058, | |
| "grad_norm": 0.004816859494894743, | |
| "learning_rate": 6.467093385852619e-06, | |
| "step": 24000 | |
| }, | |
| { | |
| "embedding_loss": 0.0007, | |
| "epoch": 3.552437223042836, | |
| "grad_norm": 0.014538111165165901, | |
| "learning_rate": 6.43426883308715e-06, | |
| "step": 24050 | |
| }, | |
| { | |
| "embedding_loss": 0.0024, | |
| "epoch": 3.5598227474150663, | |
| "grad_norm": 0.0056640529073774815, | |
| "learning_rate": 6.401444280321681e-06, | |
| "step": 24100 | |
| }, | |
| { | |
| "embedding_loss": 0.0011, | |
| "epoch": 3.567208271787297, | |
| "grad_norm": 0.005168286617845297, | |
| "learning_rate": 6.368619727556212e-06, | |
| "step": 24150 | |
| }, | |
| { | |
| "embedding_loss": 0.0018, | |
| "epoch": 3.5745937961595273, | |
| "grad_norm": 0.006222166121006012, | |
| "learning_rate": 6.335795174790744e-06, | |
| "step": 24200 | |
| }, | |
| { | |
| "embedding_loss": 0.0018, | |
| "epoch": 3.5819793205317576, | |
| "grad_norm": 0.1087515652179718, | |
| "learning_rate": 6.3029706220252755e-06, | |
| "step": 24250 | |
| }, | |
| { | |
| "embedding_loss": 0.0029, | |
| "epoch": 3.5893648449039883, | |
| "grad_norm": 0.002414864953607321, | |
| "learning_rate": 6.270146069259807e-06, | |
| "step": 24300 | |
| }, | |
| { | |
| "embedding_loss": 0.0009, | |
| "epoch": 3.5967503692762186, | |
| "grad_norm": 0.003966380376368761, | |
| "learning_rate": 6.237321516494338e-06, | |
| "step": 24350 | |
| }, | |
| { | |
| "embedding_loss": 0.0015, | |
| "epoch": 3.604135893648449, | |
| "grad_norm": 0.0019502595532685518, | |
| "learning_rate": 6.20449696372887e-06, | |
| "step": 24400 | |
| }, | |
| { | |
| "embedding_loss": 0.0015, | |
| "epoch": 3.6115214180206796, | |
| "grad_norm": 0.04951006919145584, | |
| "learning_rate": 6.171672410963401e-06, | |
| "step": 24450 | |
| }, | |
| { | |
| "embedding_loss": 0.0009, | |
| "epoch": 3.61890694239291, | |
| "grad_norm": 0.05455106496810913, | |
| "learning_rate": 6.138847858197933e-06, | |
| "step": 24500 | |
| }, | |
| { | |
| "embedding_loss": 0.0002, | |
| "epoch": 3.62629246676514, | |
| "grad_norm": 0.006709383800625801, | |
| "learning_rate": 6.1060233054324635e-06, | |
| "step": 24550 | |
| }, | |
| { | |
| "embedding_loss": 0.0021, | |
| "epoch": 3.633677991137371, | |
| "grad_norm": 0.006298394873738289, | |
| "learning_rate": 6.0731987526669955e-06, | |
| "step": 24600 | |
| }, | |
| { | |
| "embedding_loss": 0.0002, | |
| "epoch": 3.641063515509601, | |
| "grad_norm": 0.007862403988838196, | |
| "learning_rate": 6.040374199901527e-06, | |
| "step": 24650 | |
| }, | |
| { | |
| "embedding_loss": 0.0014, | |
| "epoch": 3.6484490398818314, | |
| "grad_norm": 0.014156641438603401, | |
| "learning_rate": 6.007549647136059e-06, | |
| "step": 24700 | |
| }, | |
| { | |
| "embedding_loss": 0.0008, | |
| "epoch": 3.655834564254062, | |
| "grad_norm": 0.05146721005439758, | |
| "learning_rate": 5.974725094370589e-06, | |
| "step": 24750 | |
| }, | |
| { | |
| "embedding_loss": 0.0013, | |
| "epoch": 3.6632200886262924, | |
| "grad_norm": 0.0289792250841856, | |
| "learning_rate": 5.941900541605121e-06, | |
| "step": 24800 | |
| }, | |
| { | |
| "embedding_loss": 0.0023, | |
| "epoch": 3.670605612998523, | |
| "grad_norm": 0.001623387448489666, | |
| "learning_rate": 5.909075988839652e-06, | |
| "step": 24850 | |
| }, | |
| { | |
| "embedding_loss": 0.0004, | |
| "epoch": 3.6779911373707534, | |
| "grad_norm": 0.013176560401916504, | |
| "learning_rate": 5.876251436074184e-06, | |
| "step": 24900 | |
| }, | |
| { | |
| "embedding_loss": 0.0007, | |
| "epoch": 3.6853766617429837, | |
| "grad_norm": 0.004344393033534288, | |
| "learning_rate": 5.843426883308715e-06, | |
| "step": 24950 | |
| }, | |
| { | |
| "embedding_loss": 0.0015, | |
| "epoch": 3.692762186115214, | |
| "grad_norm": 0.00743023632094264, | |
| "learning_rate": 5.810602330543247e-06, | |
| "step": 25000 | |
| }, | |
| { | |
| "embedding_loss": 0.0008, | |
| "epoch": 3.7001477104874447, | |
| "grad_norm": 0.01551518589258194, | |
| "learning_rate": 5.777777777777778e-06, | |
| "step": 25050 | |
| }, | |
| { | |
| "embedding_loss": 0.0014, | |
| "epoch": 3.707533234859675, | |
| "grad_norm": 0.0053128432482481, | |
| "learning_rate": 5.74495322501231e-06, | |
| "step": 25100 | |
| }, | |
| { | |
| "embedding_loss": 0.0005, | |
| "epoch": 3.7149187592319057, | |
| "grad_norm": 0.005089200101792812, | |
| "learning_rate": 5.712128672246841e-06, | |
| "step": 25150 | |
| }, | |
| { | |
| "embedding_loss": 0.0018, | |
| "epoch": 3.722304283604136, | |
| "grad_norm": 0.007938201539218426, | |
| "learning_rate": 5.679304119481372e-06, | |
| "step": 25200 | |
| }, | |
| { | |
| "embedding_loss": 0.0012, | |
| "epoch": 3.7296898079763663, | |
| "grad_norm": 0.031416155397892, | |
| "learning_rate": 5.6464795667159045e-06, | |
| "step": 25250 | |
| }, | |
| { | |
| "embedding_loss": 0.0002, | |
| "epoch": 3.7370753323485966, | |
| "grad_norm": 0.01789075881242752, | |
| "learning_rate": 5.613655013950436e-06, | |
| "step": 25300 | |
| }, | |
| { | |
| "embedding_loss": 0.0005, | |
| "epoch": 3.7444608567208273, | |
| "grad_norm": 0.0897989496588707, | |
| "learning_rate": 5.580830461184968e-06, | |
| "step": 25350 | |
| }, | |
| { | |
| "embedding_loss": 0.0016, | |
| "epoch": 3.7518463810930576, | |
| "grad_norm": 0.0033649958204478025, | |
| "learning_rate": 5.548005908419498e-06, | |
| "step": 25400 | |
| }, | |
| { | |
| "embedding_loss": 0.0015, | |
| "epoch": 3.7592319054652883, | |
| "grad_norm": 0.007789059076458216, | |
| "learning_rate": 5.51518135565403e-06, | |
| "step": 25450 | |
| }, | |
| { | |
| "embedding_loss": 0.0014, | |
| "epoch": 3.7666174298375186, | |
| "grad_norm": 0.0069976383820176125, | |
| "learning_rate": 5.482356802888561e-06, | |
| "step": 25500 | |
| }, | |
| { | |
| "embedding_loss": 0.0008, | |
| "epoch": 3.774002954209749, | |
| "grad_norm": 0.028319302946329117, | |
| "learning_rate": 5.449532250123093e-06, | |
| "step": 25550 | |
| }, | |
| { | |
| "embedding_loss": 0.0004, | |
| "epoch": 3.781388478581979, | |
| "grad_norm": 0.13736043870449066, | |
| "learning_rate": 5.416707697357624e-06, | |
| "step": 25600 | |
| }, | |
| { | |
| "embedding_loss": 0.0014, | |
| "epoch": 3.78877400295421, | |
| "grad_norm": 0.0662890300154686, | |
| "learning_rate": 5.383883144592156e-06, | |
| "step": 25650 | |
| }, | |
| { | |
| "embedding_loss": 0.0018, | |
| "epoch": 3.79615952732644, | |
| "grad_norm": 0.07620090991258621, | |
| "learning_rate": 5.351058591826687e-06, | |
| "step": 25700 | |
| }, | |
| { | |
| "embedding_loss": 0.0008, | |
| "epoch": 3.803545051698671, | |
| "grad_norm": 0.0053595914505422115, | |
| "learning_rate": 5.318234039061219e-06, | |
| "step": 25750 | |
| }, | |
| { | |
| "embedding_loss": 0.0008, | |
| "epoch": 3.810930576070901, | |
| "grad_norm": 0.03874294087290764, | |
| "learning_rate": 5.285409486295749e-06, | |
| "step": 25800 | |
| }, | |
| { | |
| "embedding_loss": 0.0002, | |
| "epoch": 3.8183161004431314, | |
| "grad_norm": 0.0377751886844635, | |
| "learning_rate": 5.252584933530281e-06, | |
| "step": 25850 | |
| }, | |
| { | |
| "embedding_loss": 0.0003, | |
| "epoch": 3.8257016248153617, | |
| "grad_norm": 0.052115991711616516, | |
| "learning_rate": 5.2197603807648126e-06, | |
| "step": 25900 | |
| }, | |
| { | |
| "embedding_loss": 0.0009, | |
| "epoch": 3.8330871491875924, | |
| "grad_norm": 0.004992119502276182, | |
| "learning_rate": 5.186935827999345e-06, | |
| "step": 25950 | |
| }, | |
| { | |
| "embedding_loss": 0.002, | |
| "epoch": 3.8404726735598227, | |
| "grad_norm": 0.010746672749519348, | |
| "learning_rate": 5.154111275233875e-06, | |
| "step": 26000 | |
| }, | |
| { | |
| "embedding_loss": 0.0016, | |
| "epoch": 3.8478581979320534, | |
| "grad_norm": 0.0036030395422130823, | |
| "learning_rate": 5.121286722468407e-06, | |
| "step": 26050 | |
| }, | |
| { | |
| "embedding_loss": 0.0013, | |
| "epoch": 3.8552437223042837, | |
| "grad_norm": 0.004183268640190363, | |
| "learning_rate": 5.088462169702938e-06, | |
| "step": 26100 | |
| }, | |
| { | |
| "embedding_loss": 0.0021, | |
| "epoch": 3.862629246676514, | |
| "grad_norm": 0.03056243434548378, | |
| "learning_rate": 5.05563761693747e-06, | |
| "step": 26150 | |
| }, | |
| { | |
| "embedding_loss": 0.0006, | |
| "epoch": 3.8700147710487443, | |
| "grad_norm": 0.020458584651350975, | |
| "learning_rate": 5.022813064172001e-06, | |
| "step": 26200 | |
| }, | |
| { | |
| "embedding_loss": 0.0005, | |
| "epoch": 3.877400295420975, | |
| "grad_norm": 0.014284319244325161, | |
| "learning_rate": 4.989988511406533e-06, | |
| "step": 26250 | |
| }, | |
| { | |
| "embedding_loss": 0.0019, | |
| "epoch": 3.8847858197932053, | |
| "grad_norm": 0.004101385362446308, | |
| "learning_rate": 4.957163958641064e-06, | |
| "step": 26300 | |
| }, | |
| { | |
| "embedding_loss": 0.0017, | |
| "epoch": 3.892171344165436, | |
| "grad_norm": 0.003685436677187681, | |
| "learning_rate": 4.924339405875596e-06, | |
| "step": 26350 | |
| }, | |
| { | |
| "embedding_loss": 0.0002, | |
| "epoch": 3.8995568685376663, | |
| "grad_norm": 0.01675995998084545, | |
| "learning_rate": 4.891514853110127e-06, | |
| "step": 26400 | |
| }, | |
| { | |
| "embedding_loss": 0.0014, | |
| "epoch": 3.9069423929098965, | |
| "grad_norm": 0.003458675229921937, | |
| "learning_rate": 4.858690300344658e-06, | |
| "step": 26450 | |
| }, | |
| { | |
| "embedding_loss": 0.0003, | |
| "epoch": 3.914327917282127, | |
| "grad_norm": 0.004388707224279642, | |
| "learning_rate": 4.8258657475791895e-06, | |
| "step": 26500 | |
| }, | |
| { | |
| "embedding_loss": 0.0015, | |
| "epoch": 3.9217134416543575, | |
| "grad_norm": 0.006350350566208363, | |
| "learning_rate": 4.7930411948137215e-06, | |
| "step": 26550 | |
| }, | |
| { | |
| "embedding_loss": 0.001, | |
| "epoch": 3.929098966026588, | |
| "grad_norm": 0.005695797968655825, | |
| "learning_rate": 4.760216642048253e-06, | |
| "step": 26600 | |
| }, | |
| { | |
| "embedding_loss": 0.0002, | |
| "epoch": 3.9364844903988185, | |
| "grad_norm": 0.004757806193083525, | |
| "learning_rate": 4.727392089282784e-06, | |
| "step": 26650 | |
| }, | |
| { | |
| "embedding_loss": 0.0002, | |
| "epoch": 3.943870014771049, | |
| "grad_norm": 0.0033138145226985216, | |
| "learning_rate": 4.694567536517315e-06, | |
| "step": 26700 | |
| }, | |
| { | |
| "embedding_loss": 0.0002, | |
| "epoch": 3.951255539143279, | |
| "grad_norm": 0.003561320947483182, | |
| "learning_rate": 4.661742983751847e-06, | |
| "step": 26750 | |
| }, | |
| { | |
| "embedding_loss": 0.0006, | |
| "epoch": 3.9586410635155094, | |
| "grad_norm": 0.01382633950561285, | |
| "learning_rate": 4.628918430986378e-06, | |
| "step": 26800 | |
| }, | |
| { | |
| "embedding_loss": 0.0003, | |
| "epoch": 3.96602658788774, | |
| "grad_norm": 0.005202912725508213, | |
| "learning_rate": 4.5960938782209095e-06, | |
| "step": 26850 | |
| }, | |
| { | |
| "embedding_loss": 0.0016, | |
| "epoch": 3.9734121122599704, | |
| "grad_norm": 0.012079019099473953, | |
| "learning_rate": 4.563269325455441e-06, | |
| "step": 26900 | |
| }, | |
| { | |
| "embedding_loss": 0.0008, | |
| "epoch": 3.980797636632201, | |
| "grad_norm": 0.014114444144070148, | |
| "learning_rate": 4.530444772689973e-06, | |
| "step": 26950 | |
| }, | |
| { | |
| "embedding_loss": 0.002, | |
| "epoch": 3.9881831610044314, | |
| "grad_norm": 0.02415064163506031, | |
| "learning_rate": 4.497620219924504e-06, | |
| "step": 27000 | |
| }, | |
| { | |
| "embedding_loss": 0.0017, | |
| "epoch": 3.9955686853766617, | |
| "grad_norm": 0.021551288664340973, | |
| "learning_rate": 4.464795667159035e-06, | |
| "step": 27050 | |
| }, | |
| { | |
| "embedding_loss": 0.0003, | |
| "epoch": 4.002954209748892, | |
| "grad_norm": 0.005066817160695791, | |
| "learning_rate": 4.431971114393566e-06, | |
| "step": 27100 | |
| }, | |
| { | |
| "embedding_loss": 0.0012, | |
| "epoch": 4.010339734121122, | |
| "grad_norm": 0.007141259498894215, | |
| "learning_rate": 4.399146561628098e-06, | |
| "step": 27150 | |
| }, | |
| { | |
| "embedding_loss": 0.0004, | |
| "epoch": 4.017725258493353, | |
| "grad_norm": 0.12070070952177048, | |
| "learning_rate": 4.36632200886263e-06, | |
| "step": 27200 | |
| }, | |
| { | |
| "embedding_loss": 0.0022, | |
| "epoch": 4.025110782865584, | |
| "grad_norm": 0.0378386452794075, | |
| "learning_rate": 4.333497456097161e-06, | |
| "step": 27250 | |
| }, | |
| { | |
| "embedding_loss": 0.0015, | |
| "epoch": 4.032496307237814, | |
| "grad_norm": 0.0050777471624314785, | |
| "learning_rate": 4.300672903331692e-06, | |
| "step": 27300 | |
| }, | |
| { | |
| "embedding_loss": 0.0004, | |
| "epoch": 4.039881831610044, | |
| "grad_norm": 0.01844395510852337, | |
| "learning_rate": 4.267848350566224e-06, | |
| "step": 27350 | |
| }, | |
| { | |
| "embedding_loss": 0.001, | |
| "epoch": 4.0472673559822745, | |
| "grad_norm": 0.012001128867268562, | |
| "learning_rate": 4.235023797800755e-06, | |
| "step": 27400 | |
| }, | |
| { | |
| "embedding_loss": 0.0002, | |
| "epoch": 4.054652880354505, | |
| "grad_norm": 0.0065623316913843155, | |
| "learning_rate": 4.2021992450352864e-06, | |
| "step": 27450 | |
| }, | |
| { | |
| "embedding_loss": 0.0002, | |
| "epoch": 4.062038404726736, | |
| "grad_norm": 0.004251908976584673, | |
| "learning_rate": 4.1693746922698185e-06, | |
| "step": 27500 | |
| }, | |
| { | |
| "embedding_loss": 0.0002, | |
| "epoch": 4.069423929098966, | |
| "grad_norm": 0.010989518836140633, | |
| "learning_rate": 4.13655013950435e-06, | |
| "step": 27550 | |
| }, | |
| { | |
| "embedding_loss": 0.0002, | |
| "epoch": 4.0768094534711965, | |
| "grad_norm": 0.0056010037660598755, | |
| "learning_rate": 4.103725586738881e-06, | |
| "step": 27600 | |
| }, | |
| { | |
| "embedding_loss": 0.0009, | |
| "epoch": 4.084194977843427, | |
| "grad_norm": 0.010540075600147247, | |
| "learning_rate": 4.070901033973412e-06, | |
| "step": 27650 | |
| }, | |
| { | |
| "embedding_loss": 0.0016, | |
| "epoch": 4.091580502215657, | |
| "grad_norm": 0.01558383833616972, | |
| "learning_rate": 4.038076481207944e-06, | |
| "step": 27700 | |
| }, | |
| { | |
| "embedding_loss": 0.0017, | |
| "epoch": 4.098966026587887, | |
| "grad_norm": 0.0061827609315514565, | |
| "learning_rate": 4.005251928442475e-06, | |
| "step": 27750 | |
| }, | |
| { | |
| "embedding_loss": 0.0016, | |
| "epoch": 4.1063515509601185, | |
| "grad_norm": 0.016165059059858322, | |
| "learning_rate": 3.9724273756770065e-06, | |
| "step": 27800 | |
| }, | |
| { | |
| "embedding_loss": 0.0008, | |
| "epoch": 4.113737075332349, | |
| "grad_norm": 0.013678347691893578, | |
| "learning_rate": 3.939602822911538e-06, | |
| "step": 27850 | |
| }, | |
| { | |
| "embedding_loss": 0.0007, | |
| "epoch": 4.121122599704579, | |
| "grad_norm": 0.002744109369814396, | |
| "learning_rate": 3.90677827014607e-06, | |
| "step": 27900 | |
| }, | |
| { | |
| "embedding_loss": 0.0002, | |
| "epoch": 4.128508124076809, | |
| "grad_norm": 0.002654843032360077, | |
| "learning_rate": 3.873953717380601e-06, | |
| "step": 27950 | |
| }, | |
| { | |
| "embedding_loss": 0.0003, | |
| "epoch": 4.13589364844904, | |
| "grad_norm": 0.031312067061662674, | |
| "learning_rate": 3.841129164615132e-06, | |
| "step": 28000 | |
| }, | |
| { | |
| "embedding_loss": 0.0007, | |
| "epoch": 4.14327917282127, | |
| "grad_norm": 0.002234194427728653, | |
| "learning_rate": 3.8083046118496638e-06, | |
| "step": 28050 | |
| }, | |
| { | |
| "embedding_loss": 0.002, | |
| "epoch": 4.150664697193501, | |
| "grad_norm": 0.005080920644104481, | |
| "learning_rate": 3.775480059084195e-06, | |
| "step": 28100 | |
| }, | |
| { | |
| "embedding_loss": 0.0013, | |
| "epoch": 4.158050221565731, | |
| "grad_norm": 0.00728574488312006, | |
| "learning_rate": 3.742655506318727e-06, | |
| "step": 28150 | |
| }, | |
| { | |
| "embedding_loss": 0.0003, | |
| "epoch": 4.165435745937962, | |
| "grad_norm": 0.0030798488296568394, | |
| "learning_rate": 3.709830953553258e-06, | |
| "step": 28200 | |
| }, | |
| { | |
| "embedding_loss": 0.0001, | |
| "epoch": 4.172821270310192, | |
| "grad_norm": 0.0029522618278861046, | |
| "learning_rate": 3.67700640078779e-06, | |
| "step": 28250 | |
| }, | |
| { | |
| "embedding_loss": 0.0008, | |
| "epoch": 4.180206794682422, | |
| "grad_norm": 0.04743621125817299, | |
| "learning_rate": 3.6441818480223214e-06, | |
| "step": 28300 | |
| }, | |
| { | |
| "embedding_loss": 0.0019, | |
| "epoch": 4.1875923190546525, | |
| "grad_norm": 0.021583393216133118, | |
| "learning_rate": 3.6113572952568526e-06, | |
| "step": 28350 | |
| }, | |
| { | |
| "embedding_loss": 0.0017, | |
| "epoch": 4.194977843426884, | |
| "grad_norm": 0.004344303160905838, | |
| "learning_rate": 3.5785327424913842e-06, | |
| "step": 28400 | |
| }, | |
| { | |
| "embedding_loss": 0.0007, | |
| "epoch": 4.202363367799114, | |
| "grad_norm": 0.05794514715671539, | |
| "learning_rate": 3.5457081897259154e-06, | |
| "step": 28450 | |
| }, | |
| { | |
| "embedding_loss": 0.0021, | |
| "epoch": 4.209748892171344, | |
| "grad_norm": 0.03878411650657654, | |
| "learning_rate": 3.512883636960447e-06, | |
| "step": 28500 | |
| }, | |
| { | |
| "embedding_loss": 0.0005, | |
| "epoch": 4.2171344165435745, | |
| "grad_norm": 0.024656204506754875, | |
| "learning_rate": 3.4800590841949783e-06, | |
| "step": 28550 | |
| }, | |
| { | |
| "embedding_loss": 0.0009, | |
| "epoch": 4.224519940915805, | |
| "grad_norm": 0.07271004468202591, | |
| "learning_rate": 3.44723453142951e-06, | |
| "step": 28600 | |
| }, | |
| { | |
| "embedding_loss": 0.0019, | |
| "epoch": 4.231905465288035, | |
| "grad_norm": 0.028899872675538063, | |
| "learning_rate": 3.414409978664041e-06, | |
| "step": 28650 | |
| }, | |
| { | |
| "embedding_loss": 0.0006, | |
| "epoch": 4.239290989660266, | |
| "grad_norm": 0.05092883110046387, | |
| "learning_rate": 3.3815854258985727e-06, | |
| "step": 28700 | |
| }, | |
| { | |
| "embedding_loss": 0.0011, | |
| "epoch": 4.2466765140324965, | |
| "grad_norm": 0.0032700442243367434, | |
| "learning_rate": 3.348760873133104e-06, | |
| "step": 28750 | |
| }, | |
| { | |
| "embedding_loss": 0.0005, | |
| "epoch": 4.254062038404727, | |
| "grad_norm": 0.008138212375342846, | |
| "learning_rate": 3.3159363203676355e-06, | |
| "step": 28800 | |
| }, | |
| { | |
| "embedding_loss": 0.0008, | |
| "epoch": 4.261447562776957, | |
| "grad_norm": 0.004710312932729721, | |
| "learning_rate": 3.2831117676021667e-06, | |
| "step": 28850 | |
| }, | |
| { | |
| "embedding_loss": 0.0006, | |
| "epoch": 4.268833087149187, | |
| "grad_norm": 0.006768395192921162, | |
| "learning_rate": 3.2502872148366983e-06, | |
| "step": 28900 | |
| }, | |
| { | |
| "embedding_loss": 0.0006, | |
| "epoch": 4.2762186115214185, | |
| "grad_norm": 3.97033429145813, | |
| "learning_rate": 3.2174626620712295e-06, | |
| "step": 28950 | |
| }, | |
| { | |
| "embedding_loss": 0.0008, | |
| "epoch": 4.283604135893649, | |
| "grad_norm": 0.0024879788979887962, | |
| "learning_rate": 3.184638109305761e-06, | |
| "step": 29000 | |
| }, | |
| { | |
| "embedding_loss": 0.0014, | |
| "epoch": 4.290989660265879, | |
| "grad_norm": 0.004705480299890041, | |
| "learning_rate": 3.1518135565402923e-06, | |
| "step": 29050 | |
| }, | |
| { | |
| "embedding_loss": 0.0003, | |
| "epoch": 4.298375184638109, | |
| "grad_norm": 0.0038461387157440186, | |
| "learning_rate": 3.118989003774824e-06, | |
| "step": 29100 | |
| }, | |
| { | |
| "embedding_loss": 0.0002, | |
| "epoch": 4.30576070901034, | |
| "grad_norm": 0.0027929339557886124, | |
| "learning_rate": 3.086164451009355e-06, | |
| "step": 29150 | |
| }, | |
| { | |
| "embedding_loss": 0.0009, | |
| "epoch": 4.31314623338257, | |
| "grad_norm": 0.022274106740951538, | |
| "learning_rate": 3.0533398982438868e-06, | |
| "step": 29200 | |
| }, | |
| { | |
| "embedding_loss": 0.0001, | |
| "epoch": 4.3205317577548, | |
| "grad_norm": 0.001826609717682004, | |
| "learning_rate": 3.020515345478418e-06, | |
| "step": 29250 | |
| }, | |
| { | |
| "embedding_loss": 0.0002, | |
| "epoch": 4.327917282127031, | |
| "grad_norm": 0.026185447350144386, | |
| "learning_rate": 2.9876907927129496e-06, | |
| "step": 29300 | |
| }, | |
| { | |
| "embedding_loss": 0.0008, | |
| "epoch": 4.335302806499262, | |
| "grad_norm": 0.8461505770683289, | |
| "learning_rate": 2.954866239947481e-06, | |
| "step": 29350 | |
| }, | |
| { | |
| "embedding_loss": 0.0003, | |
| "epoch": 4.342688330871492, | |
| "grad_norm": 0.0049928221851587296, | |
| "learning_rate": 2.9220416871820124e-06, | |
| "step": 29400 | |
| }, | |
| { | |
| "embedding_loss": 0.0009, | |
| "epoch": 4.350073855243722, | |
| "grad_norm": 0.0039035649970173836, | |
| "learning_rate": 2.8892171344165436e-06, | |
| "step": 29450 | |
| }, | |
| { | |
| "embedding_loss": 0.0008, | |
| "epoch": 4.357459379615952, | |
| "grad_norm": 0.003410862758755684, | |
| "learning_rate": 2.8563925816510752e-06, | |
| "step": 29500 | |
| }, | |
| { | |
| "embedding_loss": 0.0009, | |
| "epoch": 4.364844903988184, | |
| "grad_norm": 0.007184627000242472, | |
| "learning_rate": 2.8235680288856064e-06, | |
| "step": 29550 | |
| }, | |
| { | |
| "embedding_loss": 0.0012, | |
| "epoch": 4.372230428360414, | |
| "grad_norm": 0.005408278200775385, | |
| "learning_rate": 2.790743476120138e-06, | |
| "step": 29600 | |
| }, | |
| { | |
| "embedding_loss": 0.0004, | |
| "epoch": 4.379615952732644, | |
| "grad_norm": 0.005216268356889486, | |
| "learning_rate": 2.7579189233546692e-06, | |
| "step": 29650 | |
| }, | |
| { | |
| "embedding_loss": 0.0015, | |
| "epoch": 4.387001477104874, | |
| "grad_norm": 0.12092409282922745, | |
| "learning_rate": 2.725094370589201e-06, | |
| "step": 29700 | |
| }, | |
| { | |
| "embedding_loss": 0.0011, | |
| "epoch": 4.394387001477105, | |
| "grad_norm": 2.360546112060547, | |
| "learning_rate": 2.692269817823732e-06, | |
| "step": 29750 | |
| }, | |
| { | |
| "embedding_loss": 0.0003, | |
| "epoch": 4.401772525849335, | |
| "grad_norm": 0.04117804393172264, | |
| "learning_rate": 2.6594452650582637e-06, | |
| "step": 29800 | |
| }, | |
| { | |
| "embedding_loss": 0.0014, | |
| "epoch": 4.409158050221565, | |
| "grad_norm": 0.31080320477485657, | |
| "learning_rate": 2.626620712292795e-06, | |
| "step": 29850 | |
| }, | |
| { | |
| "embedding_loss": 0.0001, | |
| "epoch": 4.416543574593796, | |
| "grad_norm": 0.0030854118522256613, | |
| "learning_rate": 2.5937961595273265e-06, | |
| "step": 29900 | |
| }, | |
| { | |
| "embedding_loss": 0.001, | |
| "epoch": 4.423929098966027, | |
| "grad_norm": 0.0051147108897566795, | |
| "learning_rate": 2.5609716067618577e-06, | |
| "step": 29950 | |
| }, | |
| { | |
| "embedding_loss": 0.0003, | |
| "epoch": 4.431314623338257, | |
| "grad_norm": 0.002233837265521288, | |
| "learning_rate": 2.5281470539963893e-06, | |
| "step": 30000 | |
| }, | |
| { | |
| "embedding_loss": 0.0003, | |
| "epoch": 4.438700147710487, | |
| "grad_norm": 0.002284417860209942, | |
| "learning_rate": 2.495322501230921e-06, | |
| "step": 30050 | |
| }, | |
| { | |
| "embedding_loss": 0.0008, | |
| "epoch": 4.4460856720827175, | |
| "grad_norm": 0.03488105162978172, | |
| "learning_rate": 2.4624979484654525e-06, | |
| "step": 30100 | |
| }, | |
| { | |
| "embedding_loss": 0.0008, | |
| "epoch": 4.453471196454949, | |
| "grad_norm": 0.011509645730257034, | |
| "learning_rate": 2.4296733956999837e-06, | |
| "step": 30150 | |
| }, | |
| { | |
| "embedding_loss": 0.0002, | |
| "epoch": 4.460856720827179, | |
| "grad_norm": 0.0027210384141653776, | |
| "learning_rate": 2.3968488429345154e-06, | |
| "step": 30200 | |
| }, | |
| { | |
| "embedding_loss": 0.0002, | |
| "epoch": 4.468242245199409, | |
| "grad_norm": 0.013157092034816742, | |
| "learning_rate": 2.3640242901690466e-06, | |
| "step": 30250 | |
| }, | |
| { | |
| "embedding_loss": 0.0001, | |
| "epoch": 4.4756277695716395, | |
| "grad_norm": 0.004714665934443474, | |
| "learning_rate": 2.331199737403578e-06, | |
| "step": 30300 | |
| }, | |
| { | |
| "embedding_loss": 0.0007, | |
| "epoch": 4.48301329394387, | |
| "grad_norm": 0.011190270073711872, | |
| "learning_rate": 2.2983751846381094e-06, | |
| "step": 30350 | |
| }, | |
| { | |
| "embedding_loss": 0.0001, | |
| "epoch": 4.4903988183161, | |
| "grad_norm": 0.00462683429941535, | |
| "learning_rate": 2.265550631872641e-06, | |
| "step": 30400 | |
| }, | |
| { | |
| "embedding_loss": 0.0001, | |
| "epoch": 4.497784342688331, | |
| "grad_norm": 0.003459771629422903, | |
| "learning_rate": 2.232726079107172e-06, | |
| "step": 30450 | |
| }, | |
| { | |
| "embedding_loss": 0.0003, | |
| "epoch": 4.5051698670605616, | |
| "grad_norm": 0.04073004424571991, | |
| "learning_rate": 2.199901526341704e-06, | |
| "step": 30500 | |
| }, | |
| { | |
| "embedding_loss": 0.0002, | |
| "epoch": 4.512555391432792, | |
| "grad_norm": 0.005062537267804146, | |
| "learning_rate": 2.167076973576235e-06, | |
| "step": 30550 | |
| }, | |
| { | |
| "embedding_loss": 0.0001, | |
| "epoch": 4.519940915805022, | |
| "grad_norm": 0.007456169463694096, | |
| "learning_rate": 2.1342524208107666e-06, | |
| "step": 30600 | |
| }, | |
| { | |
| "embedding_loss": 0.0014, | |
| "epoch": 4.527326440177252, | |
| "grad_norm": 0.021971579641103745, | |
| "learning_rate": 2.1014278680452982e-06, | |
| "step": 30650 | |
| }, | |
| { | |
| "embedding_loss": 0.0003, | |
| "epoch": 4.534711964549483, | |
| "grad_norm": 0.006037925370037556, | |
| "learning_rate": 2.0686033152798294e-06, | |
| "step": 30700 | |
| }, | |
| { | |
| "embedding_loss": 0.0007, | |
| "epoch": 4.542097488921714, | |
| "grad_norm": 0.020268207415938377, | |
| "learning_rate": 2.035778762514361e-06, | |
| "step": 30750 | |
| }, | |
| { | |
| "embedding_loss": 0.0009, | |
| "epoch": 4.549483013293944, | |
| "grad_norm": 0.003740853862836957, | |
| "learning_rate": 2.0029542097488923e-06, | |
| "step": 30800 | |
| }, | |
| { | |
| "embedding_loss": 0.0001, | |
| "epoch": 4.556868537666174, | |
| "grad_norm": 0.006321648135781288, | |
| "learning_rate": 1.970129656983424e-06, | |
| "step": 30850 | |
| }, | |
| { | |
| "embedding_loss": 0.001, | |
| "epoch": 4.564254062038405, | |
| "grad_norm": 0.049009013921022415, | |
| "learning_rate": 1.937305104217955e-06, | |
| "step": 30900 | |
| }, | |
| { | |
| "embedding_loss": 0.0001, | |
| "epoch": 4.571639586410635, | |
| "grad_norm": 0.006071667652577162, | |
| "learning_rate": 1.9044805514524867e-06, | |
| "step": 30950 | |
| }, | |
| { | |
| "embedding_loss": 0.0005, | |
| "epoch": 4.579025110782865, | |
| "grad_norm": 0.025013990700244904, | |
| "learning_rate": 1.8716559986870181e-06, | |
| "step": 31000 | |
| }, | |
| { | |
| "embedding_loss": 0.0003, | |
| "epoch": 4.586410635155096, | |
| "grad_norm": 0.0030903525184839964, | |
| "learning_rate": 1.8388314459215495e-06, | |
| "step": 31050 | |
| }, | |
| { | |
| "embedding_loss": 0.0001, | |
| "epoch": 4.593796159527327, | |
| "grad_norm": 0.006547342520207167, | |
| "learning_rate": 1.806006893156081e-06, | |
| "step": 31100 | |
| }, | |
| { | |
| "embedding_loss": 0.0007, | |
| "epoch": 4.601181683899557, | |
| "grad_norm": 0.0045944456942379475, | |
| "learning_rate": 1.7731823403906123e-06, | |
| "step": 31150 | |
| }, | |
| { | |
| "embedding_loss": 0.0003, | |
| "epoch": 4.608567208271787, | |
| "grad_norm": 0.006300546228885651, | |
| "learning_rate": 1.7403577876251437e-06, | |
| "step": 31200 | |
| }, | |
| { | |
| "embedding_loss": 0.0009, | |
| "epoch": 4.6159527326440175, | |
| "grad_norm": 0.017787380144000053, | |
| "learning_rate": 1.7075332348596751e-06, | |
| "step": 31250 | |
| }, | |
| { | |
| "embedding_loss": 0.0003, | |
| "epoch": 4.623338257016248, | |
| "grad_norm": 0.006018889602273703, | |
| "learning_rate": 1.6747086820942066e-06, | |
| "step": 31300 | |
| }, | |
| { | |
| "embedding_loss": 0.0003, | |
| "epoch": 4.630723781388479, | |
| "grad_norm": 0.03421813249588013, | |
| "learning_rate": 1.641884129328738e-06, | |
| "step": 31350 | |
| }, | |
| { | |
| "embedding_loss": 0.0001, | |
| "epoch": 4.638109305760709, | |
| "grad_norm": 0.002777187153697014, | |
| "learning_rate": 1.6090595765632694e-06, | |
| "step": 31400 | |
| }, | |
| { | |
| "embedding_loss": 0.0007, | |
| "epoch": 4.6454948301329395, | |
| "grad_norm": 0.013371906243264675, | |
| "learning_rate": 1.5762350237978008e-06, | |
| "step": 31450 | |
| }, | |
| { | |
| "embedding_loss": 0.0014, | |
| "epoch": 4.65288035450517, | |
| "grad_norm": 0.01614244654774666, | |
| "learning_rate": 1.5434104710323322e-06, | |
| "step": 31500 | |
| }, | |
| { | |
| "embedding_loss": 0.0008, | |
| "epoch": 4.6602658788774, | |
| "grad_norm": 0.014399105682969093, | |
| "learning_rate": 1.5105859182668636e-06, | |
| "step": 31550 | |
| }, | |
| { | |
| "embedding_loss": 0.0003, | |
| "epoch": 4.66765140324963, | |
| "grad_norm": 0.007542195729911327, | |
| "learning_rate": 1.477761365501395e-06, | |
| "step": 31600 | |
| }, | |
| { | |
| "embedding_loss": 0.001, | |
| "epoch": 4.6750369276218615, | |
| "grad_norm": 0.0031674772035330534, | |
| "learning_rate": 1.4449368127359264e-06, | |
| "step": 31650 | |
| }, | |
| { | |
| "embedding_loss": 0.0013, | |
| "epoch": 4.682422451994092, | |
| "grad_norm": 0.012060785666108131, | |
| "learning_rate": 1.4121122599704582e-06, | |
| "step": 31700 | |
| }, | |
| { | |
| "embedding_loss": 0.0015, | |
| "epoch": 4.689807976366322, | |
| "grad_norm": 0.04494306072592735, | |
| "learning_rate": 1.3792877072049897e-06, | |
| "step": 31750 | |
| }, | |
| { | |
| "embedding_loss": 0.0017, | |
| "epoch": 4.697193500738552, | |
| "grad_norm": 0.009011705406010151, | |
| "learning_rate": 1.346463154439521e-06, | |
| "step": 31800 | |
| }, | |
| { | |
| "embedding_loss": 0.0002, | |
| "epoch": 4.704579025110783, | |
| "grad_norm": 0.002131384564563632, | |
| "learning_rate": 1.3136386016740525e-06, | |
| "step": 31850 | |
| }, | |
| { | |
| "embedding_loss": 0.0014, | |
| "epoch": 4.711964549483013, | |
| "grad_norm": 0.003283638972789049, | |
| "learning_rate": 1.2808140489085839e-06, | |
| "step": 31900 | |
| }, | |
| { | |
| "embedding_loss": 0.0003, | |
| "epoch": 4.719350073855244, | |
| "grad_norm": 0.0024919663555920124, | |
| "learning_rate": 1.2479894961431153e-06, | |
| "step": 31950 | |
| }, | |
| { | |
| "embedding_loss": 0.0002, | |
| "epoch": 4.726735598227474, | |
| "grad_norm": 0.14705249667167664, | |
| "learning_rate": 1.2151649433776467e-06, | |
| "step": 32000 | |
| }, | |
| { | |
| "embedding_loss": 0.0009, | |
| "epoch": 4.734121122599705, | |
| "grad_norm": 0.0017327765235677361, | |
| "learning_rate": 1.182340390612178e-06, | |
| "step": 32050 | |
| }, | |
| { | |
| "embedding_loss": 0.0008, | |
| "epoch": 4.741506646971935, | |
| "grad_norm": 0.8608806729316711, | |
| "learning_rate": 1.1495158378467095e-06, | |
| "step": 32100 | |
| }, | |
| { | |
| "embedding_loss": 0.0011, | |
| "epoch": 4.748892171344165, | |
| "grad_norm": 0.019515294581651688, | |
| "learning_rate": 1.116691285081241e-06, | |
| "step": 32150 | |
| }, | |
| { | |
| "embedding_loss": 0.0002, | |
| "epoch": 4.7562776957163955, | |
| "grad_norm": 0.010132347233593464, | |
| "learning_rate": 1.0838667323157723e-06, | |
| "step": 32200 | |
| }, | |
| { | |
| "embedding_loss": 0.0002, | |
| "epoch": 4.763663220088627, | |
| "grad_norm": 0.010688789188861847, | |
| "learning_rate": 1.0510421795503037e-06, | |
| "step": 32250 | |
| }, | |
| { | |
| "embedding_loss": 0.0004, | |
| "epoch": 4.771048744460857, | |
| "grad_norm": 0.004127997439354658, | |
| "learning_rate": 1.0182176267848351e-06, | |
| "step": 32300 | |
| }, | |
| { | |
| "embedding_loss": 0.0011, | |
| "epoch": 4.778434268833087, | |
| "grad_norm": 0.0045281765051186085, | |
| "learning_rate": 9.853930740193666e-07, | |
| "step": 32350 | |
| }, | |
| { | |
| "embedding_loss": 0.0009, | |
| "epoch": 4.7858197932053175, | |
| "grad_norm": 0.03462732210755348, | |
| "learning_rate": 9.52568521253898e-07, | |
| "step": 32400 | |
| }, | |
| { | |
| "embedding_loss": 0.0002, | |
| "epoch": 4.793205317577548, | |
| "grad_norm": 0.011012708768248558, | |
| "learning_rate": 9.197439684884295e-07, | |
| "step": 32450 | |
| }, | |
| { | |
| "embedding_loss": 0.0017, | |
| "epoch": 4.800590841949779, | |
| "grad_norm": 0.002184939570724964, | |
| "learning_rate": 8.869194157229609e-07, | |
| "step": 32500 | |
| }, | |
| { | |
| "embedding_loss": 0.0003, | |
| "epoch": 4.807976366322009, | |
| "grad_norm": 0.0029540294781327248, | |
| "learning_rate": 8.540948629574923e-07, | |
| "step": 32550 | |
| }, | |
| { | |
| "embedding_loss": 0.0009, | |
| "epoch": 4.8153618906942395, | |
| "grad_norm": 0.0017814520979300141, | |
| "learning_rate": 8.212703101920237e-07, | |
| "step": 32600 | |
| }, | |
| { | |
| "embedding_loss": 0.0007, | |
| "epoch": 4.82274741506647, | |
| "grad_norm": 0.008224143646657467, | |
| "learning_rate": 7.884457574265551e-07, | |
| "step": 32650 | |
| }, | |
| { | |
| "embedding_loss": 0.0013, | |
| "epoch": 4.8301329394387, | |
| "grad_norm": 0.0046016438864171505, | |
| "learning_rate": 7.556212046610865e-07, | |
| "step": 32700 | |
| }, | |
| { | |
| "embedding_loss": 0.0007, | |
| "epoch": 4.83751846381093, | |
| "grad_norm": 0.011043339967727661, | |
| "learning_rate": 7.227966518956179e-07, | |
| "step": 32750 | |
| }, | |
| { | |
| "embedding_loss": 0.0002, | |
| "epoch": 4.844903988183161, | |
| "grad_norm": 0.010177507996559143, | |
| "learning_rate": 6.899720991301493e-07, | |
| "step": 32800 | |
| }, | |
| { | |
| "embedding_loss": 0.0004, | |
| "epoch": 4.852289512555392, | |
| "grad_norm": 0.006154090631753206, | |
| "learning_rate": 6.571475463646807e-07, | |
| "step": 32850 | |
| }, | |
| { | |
| "embedding_loss": 0.0002, | |
| "epoch": 4.859675036927622, | |
| "grad_norm": 0.0051424214616417885, | |
| "learning_rate": 6.243229935992123e-07, | |
| "step": 32900 | |
| }, | |
| { | |
| "embedding_loss": 0.0002, | |
| "epoch": 4.867060561299852, | |
| "grad_norm": 0.045284390449523926, | |
| "learning_rate": 5.914984408337437e-07, | |
| "step": 32950 | |
| }, | |
| { | |
| "embedding_loss": 0.0001, | |
| "epoch": 4.874446085672083, | |
| "grad_norm": 0.006224027369171381, | |
| "learning_rate": 5.586738880682751e-07, | |
| "step": 33000 | |
| }, | |
| { | |
| "embedding_loss": 0.0005, | |
| "epoch": 4.881831610044313, | |
| "grad_norm": 0.06909705698490143, | |
| "learning_rate": 5.258493353028066e-07, | |
| "step": 33050 | |
| }, | |
| { | |
| "embedding_loss": 0.0011, | |
| "epoch": 4.889217134416544, | |
| "grad_norm": 0.007023925427347422, | |
| "learning_rate": 4.93024782537338e-07, | |
| "step": 33100 | |
| }, | |
| { | |
| "embedding_loss": 0.0008, | |
| "epoch": 4.896602658788774, | |
| "grad_norm": 0.005172157660126686, | |
| "learning_rate": 4.602002297718694e-07, | |
| "step": 33150 | |
| }, | |
| { | |
| "embedding_loss": 0.001, | |
| "epoch": 4.903988183161005, | |
| "grad_norm": 0.012793969362974167, | |
| "learning_rate": 4.273756770064008e-07, | |
| "step": 33200 | |
| }, | |
| { | |
| "embedding_loss": 0.001, | |
| "epoch": 4.911373707533235, | |
| "grad_norm": 0.0392751581966877, | |
| "learning_rate": 3.945511242409322e-07, | |
| "step": 33250 | |
| }, | |
| { | |
| "embedding_loss": 0.0012, | |
| "epoch": 4.918759231905465, | |
| "grad_norm": 0.007948558777570724, | |
| "learning_rate": 3.6172657147546373e-07, | |
| "step": 33300 | |
| }, | |
| { | |
| "embedding_loss": 0.0003, | |
| "epoch": 4.926144756277695, | |
| "grad_norm": 0.0075578768737614155, | |
| "learning_rate": 3.2890201870999514e-07, | |
| "step": 33350 | |
| }, | |
| { | |
| "embedding_loss": 0.0002, | |
| "epoch": 4.933530280649926, | |
| "grad_norm": 0.007234030868858099, | |
| "learning_rate": 2.9607746594452655e-07, | |
| "step": 33400 | |
| }, | |
| { | |
| "embedding_loss": 0.0014, | |
| "epoch": 4.940915805022157, | |
| "grad_norm": 0.01585334725677967, | |
| "learning_rate": 2.6325291317905796e-07, | |
| "step": 33450 | |
| }, | |
| { | |
| "embedding_loss": 0.0001, | |
| "epoch": 4.948301329394387, | |
| "grad_norm": 0.005034049041569233, | |
| "learning_rate": 2.304283604135894e-07, | |
| "step": 33500 | |
| }, | |
| { | |
| "embedding_loss": 0.0007, | |
| "epoch": 4.955686853766617, | |
| "grad_norm": 0.0041028158739209175, | |
| "learning_rate": 1.976038076481208e-07, | |
| "step": 33550 | |
| }, | |
| { | |
| "embedding_loss": 0.0007, | |
| "epoch": 4.963072378138848, | |
| "grad_norm": 0.006748030427843332, | |
| "learning_rate": 1.647792548826522e-07, | |
| "step": 33600 | |
| }, | |
| { | |
| "embedding_loss": 0.0014, | |
| "epoch": 4.970457902511078, | |
| "grad_norm": 0.004400690086185932, | |
| "learning_rate": 1.3195470211718367e-07, | |
| "step": 33650 | |
| }, | |
| { | |
| "embedding_loss": 0.0003, | |
| "epoch": 4.977843426883309, | |
| "grad_norm": 0.2393534779548645, | |
| "learning_rate": 9.913014935171508e-08, | |
| "step": 33700 | |
| }, | |
| { | |
| "embedding_loss": 0.0002, | |
| "epoch": 4.985228951255539, | |
| "grad_norm": 0.017467621713876724, | |
| "learning_rate": 6.630559658624653e-08, | |
| "step": 33750 | |
| }, | |
| { | |
| "embedding_loss": 0.002, | |
| "epoch": 4.99261447562777, | |
| "grad_norm": 0.006581551861017942, | |
| "learning_rate": 3.348104382077794e-08, | |
| "step": 33800 | |
| }, | |
| { | |
| "embedding_loss": 0.0007, | |
| "epoch": 5.0, | |
| "grad_norm": 0.007424783427268267, | |
| "learning_rate": 6.564910553093716e-10, | |
| "step": 33850 | |
| } | |
| ], | |
| "logging_steps": 50, | |
| "max_steps": 33850, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 5, | |
| "save_steps": 500, | |
| "stateful_callbacks": { | |
| "TrainerControl": { | |
| "args": { | |
| "should_epoch_stop": false, | |
| "should_evaluate": false, | |
| "should_log": false, | |
| "should_save": true, | |
| "should_training_stop": true | |
| }, | |
| "attributes": {} | |
| } | |
| }, | |
| "total_flos": 0.0, | |
| "train_batch_size": 32, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |