| { |
| "best_global_step": null, |
| "best_metric": null, |
| "best_model_checkpoint": null, |
| "epoch": 14.86035710599649, |
| "eval_steps": 500, |
| "global_step": 18000, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 0.008256786046031582, |
| "grad_norm": 1.7616229057312012, |
| "learning_rate": 1.6515276630883566e-06, |
| "loss": 10.4208, |
| "step": 10 |
| }, |
| { |
| "epoch": 0.016513572092063163, |
| "grad_norm": 1.7366944551467896, |
| "learning_rate": 3.3030553261767132e-06, |
| "loss": 10.4105, |
| "step": 20 |
| }, |
| { |
| "epoch": 0.024770358138094747, |
| "grad_norm": 1.7757558822631836, |
| "learning_rate": 4.95458298926507e-06, |
| "loss": 10.387, |
| "step": 30 |
| }, |
| { |
| "epoch": 0.03302714418412633, |
| "grad_norm": 1.747761845588684, |
| "learning_rate": 6.6061106523534265e-06, |
| "loss": 10.3514, |
| "step": 40 |
| }, |
| { |
| "epoch": 0.04128393023015791, |
| "grad_norm": 1.6940944194793701, |
| "learning_rate": 8.257638315441784e-06, |
| "loss": 10.3033, |
| "step": 50 |
| }, |
| { |
| "epoch": 0.04954071627618949, |
| "grad_norm": 1.608394980430603, |
| "learning_rate": 9.90916597853014e-06, |
| "loss": 10.2448, |
| "step": 60 |
| }, |
| { |
| "epoch": 0.05779750232222108, |
| "grad_norm": 1.5383769273757935, |
| "learning_rate": 1.1560693641618496e-05, |
| "loss": 10.1765, |
| "step": 70 |
| }, |
| { |
| "epoch": 0.06605428836825265, |
| "grad_norm": 1.4542272090911865, |
| "learning_rate": 1.3212221304706853e-05, |
| "loss": 10.1003, |
| "step": 80 |
| }, |
| { |
| "epoch": 0.07431107441428424, |
| "grad_norm": 1.4005941152572632, |
| "learning_rate": 1.486374896779521e-05, |
| "loss": 10.0172, |
| "step": 90 |
| }, |
| { |
| "epoch": 0.08256786046031582, |
| "grad_norm": 1.372097134590149, |
| "learning_rate": 1.6515276630883568e-05, |
| "loss": 9.9285, |
| "step": 100 |
| }, |
| { |
| "epoch": 0.0908246465063474, |
| "grad_norm": 1.3433704376220703, |
| "learning_rate": 1.8166804293971927e-05, |
| "loss": 9.8394, |
| "step": 110 |
| }, |
| { |
| "epoch": 0.09908143255237899, |
| "grad_norm": 1.3353278636932373, |
| "learning_rate": 1.981833195706028e-05, |
| "loss": 9.7529, |
| "step": 120 |
| }, |
| { |
| "epoch": 0.10733821859841057, |
| "grad_norm": 1.3279035091400146, |
| "learning_rate": 2.1469859620148637e-05, |
| "loss": 9.6614, |
| "step": 130 |
| }, |
| { |
| "epoch": 0.11559500464444215, |
| "grad_norm": 1.3572771549224854, |
| "learning_rate": 2.3121387283236992e-05, |
| "loss": 9.5723, |
| "step": 140 |
| }, |
| { |
| "epoch": 0.12385179069047374, |
| "grad_norm": 1.3397237062454224, |
| "learning_rate": 2.477291494632535e-05, |
| "loss": 9.4781, |
| "step": 150 |
| }, |
| { |
| "epoch": 0.1321085767365053, |
| "grad_norm": 1.327772617340088, |
| "learning_rate": 2.6424442609413706e-05, |
| "loss": 9.3866, |
| "step": 160 |
| }, |
| { |
| "epoch": 0.1403653627825369, |
| "grad_norm": 1.3253681659698486, |
| "learning_rate": 2.8075970272502064e-05, |
| "loss": 9.2926, |
| "step": 170 |
| }, |
| { |
| "epoch": 0.14862214882856847, |
| "grad_norm": 1.3415825366973877, |
| "learning_rate": 2.972749793559042e-05, |
| "loss": 9.1932, |
| "step": 180 |
| }, |
| { |
| "epoch": 0.15687893487460006, |
| "grad_norm": 1.3262524604797363, |
| "learning_rate": 3.137902559867878e-05, |
| "loss": 9.0972, |
| "step": 190 |
| }, |
| { |
| "epoch": 0.16513572092063164, |
| "grad_norm": 1.3293501138687134, |
| "learning_rate": 3.3030553261767136e-05, |
| "loss": 8.996, |
| "step": 200 |
| }, |
| { |
| "epoch": 0.17339250696666322, |
| "grad_norm": 1.3101015090942383, |
| "learning_rate": 3.468208092485549e-05, |
| "loss": 8.8972, |
| "step": 210 |
| }, |
| { |
| "epoch": 0.1816492930126948, |
| "grad_norm": 1.3407046794891357, |
| "learning_rate": 3.6333608587943854e-05, |
| "loss": 8.795, |
| "step": 220 |
| }, |
| { |
| "epoch": 0.1899060790587264, |
| "grad_norm": 1.310673475265503, |
| "learning_rate": 3.798513625103221e-05, |
| "loss": 8.6952, |
| "step": 230 |
| }, |
| { |
| "epoch": 0.19816286510475797, |
| "grad_norm": 1.3073076009750366, |
| "learning_rate": 3.963666391412056e-05, |
| "loss": 8.5946, |
| "step": 240 |
| }, |
| { |
| "epoch": 0.20641965115078956, |
| "grad_norm": 1.2669146060943604, |
| "learning_rate": 4.128819157720892e-05, |
| "loss": 8.4994, |
| "step": 250 |
| }, |
| { |
| "epoch": 0.21467643719682114, |
| "grad_norm": 1.257196307182312, |
| "learning_rate": 4.2939719240297274e-05, |
| "loss": 8.3993, |
| "step": 260 |
| }, |
| { |
| "epoch": 0.22293322324285272, |
| "grad_norm": 1.258946418762207, |
| "learning_rate": 4.459124690338563e-05, |
| "loss": 8.2995, |
| "step": 270 |
| }, |
| { |
| "epoch": 0.2311900092888843, |
| "grad_norm": 1.2212146520614624, |
| "learning_rate": 4.6242774566473984e-05, |
| "loss": 8.2009, |
| "step": 280 |
| }, |
| { |
| "epoch": 0.2394467953349159, |
| "grad_norm": 1.1783366203308105, |
| "learning_rate": 4.7894302229562346e-05, |
| "loss": 8.1136, |
| "step": 290 |
| }, |
| { |
| "epoch": 0.24770358138094747, |
| "grad_norm": 1.1963677406311035, |
| "learning_rate": 4.95458298926507e-05, |
| "loss": 8.0248, |
| "step": 300 |
| }, |
| { |
| "epoch": 0.25596036742697903, |
| "grad_norm": 1.112446665763855, |
| "learning_rate": 5.1197357555739056e-05, |
| "loss": 7.9439, |
| "step": 310 |
| }, |
| { |
| "epoch": 0.2642171534730106, |
| "grad_norm": 1.0816154479980469, |
| "learning_rate": 5.284888521882741e-05, |
| "loss": 7.8579, |
| "step": 320 |
| }, |
| { |
| "epoch": 0.2724739395190422, |
| "grad_norm": 1.0233012437820435, |
| "learning_rate": 5.4500412881915774e-05, |
| "loss": 7.7776, |
| "step": 330 |
| }, |
| { |
| "epoch": 0.2807307255650738, |
| "grad_norm": 0.9429032206535339, |
| "learning_rate": 5.615194054500413e-05, |
| "loss": 7.7092, |
| "step": 340 |
| }, |
| { |
| "epoch": 0.28898751161110536, |
| "grad_norm": 0.8611400127410889, |
| "learning_rate": 5.7803468208092484e-05, |
| "loss": 7.6461, |
| "step": 350 |
| }, |
| { |
| "epoch": 0.29724429765713695, |
| "grad_norm": 0.8037594556808472, |
| "learning_rate": 5.945499587118084e-05, |
| "loss": 7.5908, |
| "step": 360 |
| }, |
| { |
| "epoch": 0.30550108370316853, |
| "grad_norm": 0.7045464515686035, |
| "learning_rate": 6.110652353426921e-05, |
| "loss": 7.5412, |
| "step": 370 |
| }, |
| { |
| "epoch": 0.3137578697492001, |
| "grad_norm": 0.6216972470283508, |
| "learning_rate": 6.275805119735756e-05, |
| "loss": 7.4982, |
| "step": 380 |
| }, |
| { |
| "epoch": 0.3220146557952317, |
| "grad_norm": 0.5564426183700562, |
| "learning_rate": 6.440957886044592e-05, |
| "loss": 7.4692, |
| "step": 390 |
| }, |
| { |
| "epoch": 0.3302714418412633, |
| "grad_norm": 0.5008960962295532, |
| "learning_rate": 6.606110652353427e-05, |
| "loss": 7.4429, |
| "step": 400 |
| }, |
| { |
| "epoch": 0.33852822788729486, |
| "grad_norm": 0.38887500762939453, |
| "learning_rate": 6.771263418662263e-05, |
| "loss": 7.4219, |
| "step": 410 |
| }, |
| { |
| "epoch": 0.34678501393332645, |
| "grad_norm": 0.34142008423805237, |
| "learning_rate": 6.936416184971098e-05, |
| "loss": 7.3995, |
| "step": 420 |
| }, |
| { |
| "epoch": 0.35504179997935803, |
| "grad_norm": 0.2729811668395996, |
| "learning_rate": 7.101568951279934e-05, |
| "loss": 7.3893, |
| "step": 430 |
| }, |
| { |
| "epoch": 0.3632985860253896, |
| "grad_norm": 0.2536115348339081, |
| "learning_rate": 7.266721717588771e-05, |
| "loss": 7.3651, |
| "step": 440 |
| }, |
| { |
| "epoch": 0.3715553720714212, |
| "grad_norm": 0.2442627251148224, |
| "learning_rate": 7.431874483897605e-05, |
| "loss": 7.3566, |
| "step": 450 |
| }, |
| { |
| "epoch": 0.3798121581174528, |
| "grad_norm": 0.2364257276058197, |
| "learning_rate": 7.597027250206442e-05, |
| "loss": 7.3426, |
| "step": 460 |
| }, |
| { |
| "epoch": 0.38806894416348436, |
| "grad_norm": 0.17855331301689148, |
| "learning_rate": 7.762180016515277e-05, |
| "loss": 7.3399, |
| "step": 470 |
| }, |
| { |
| "epoch": 0.39632573020951595, |
| "grad_norm": 0.19668430089950562, |
| "learning_rate": 7.927332782824111e-05, |
| "loss": 7.3281, |
| "step": 480 |
| }, |
| { |
| "epoch": 0.40458251625554753, |
| "grad_norm": 0.19964857399463654, |
| "learning_rate": 8.092485549132948e-05, |
| "loss": 7.3147, |
| "step": 490 |
| }, |
| { |
| "epoch": 0.4128393023015791, |
| "grad_norm": 0.1922236531972885, |
| "learning_rate": 8.257638315441784e-05, |
| "loss": 7.307, |
| "step": 500 |
| }, |
| { |
| "epoch": 0.4210960883476107, |
| "grad_norm": 0.18698124587535858, |
| "learning_rate": 8.422791081750619e-05, |
| "loss": 7.2961, |
| "step": 510 |
| }, |
| { |
| "epoch": 0.4293528743936423, |
| "grad_norm": 0.22864067554473877, |
| "learning_rate": 8.587943848059455e-05, |
| "loss": 7.2856, |
| "step": 520 |
| }, |
| { |
| "epoch": 0.43760966043967386, |
| "grad_norm": 0.21611692011356354, |
| "learning_rate": 8.75309661436829e-05, |
| "loss": 7.2777, |
| "step": 530 |
| }, |
| { |
| "epoch": 0.44586644648570545, |
| "grad_norm": 0.24379616975784302, |
| "learning_rate": 8.918249380677126e-05, |
| "loss": 7.2656, |
| "step": 540 |
| }, |
| { |
| "epoch": 0.45412323253173703, |
| "grad_norm": 0.2078498750925064, |
| "learning_rate": 9.083402146985963e-05, |
| "loss": 7.2623, |
| "step": 550 |
| }, |
| { |
| "epoch": 0.4623800185777686, |
| "grad_norm": 0.21156761050224304, |
| "learning_rate": 9.248554913294797e-05, |
| "loss": 7.2523, |
| "step": 560 |
| }, |
| { |
| "epoch": 0.4706368046238002, |
| "grad_norm": 0.2728644609451294, |
| "learning_rate": 9.413707679603634e-05, |
| "loss": 7.2481, |
| "step": 570 |
| }, |
| { |
| "epoch": 0.4788935906698318, |
| "grad_norm": 0.2564805746078491, |
| "learning_rate": 9.578860445912469e-05, |
| "loss": 7.2328, |
| "step": 580 |
| }, |
| { |
| "epoch": 0.48715037671586336, |
| "grad_norm": 0.239385187625885, |
| "learning_rate": 9.744013212221305e-05, |
| "loss": 7.228, |
| "step": 590 |
| }, |
| { |
| "epoch": 0.49540716276189495, |
| "grad_norm": 0.29688534140586853, |
| "learning_rate": 9.90916597853014e-05, |
| "loss": 7.2167, |
| "step": 600 |
| }, |
| { |
| "epoch": 0.5036639488079265, |
| "grad_norm": 0.3255228102207184, |
| "learning_rate": 0.00010074318744838976, |
| "loss": 7.2115, |
| "step": 610 |
| }, |
| { |
| "epoch": 0.5119207348539581, |
| "grad_norm": 0.26421359181404114, |
| "learning_rate": 0.00010239471511147811, |
| "loss": 7.2046, |
| "step": 620 |
| }, |
| { |
| "epoch": 0.5201775208999897, |
| "grad_norm": 0.22493909299373627, |
| "learning_rate": 0.00010404624277456648, |
| "loss": 7.1852, |
| "step": 630 |
| }, |
| { |
| "epoch": 0.5284343069460212, |
| "grad_norm": 0.23427563905715942, |
| "learning_rate": 0.00010569777043765482, |
| "loss": 7.188, |
| "step": 640 |
| }, |
| { |
| "epoch": 0.5366910929920529, |
| "grad_norm": 0.2601110637187958, |
| "learning_rate": 0.00010734929810074319, |
| "loss": 7.1702, |
| "step": 650 |
| }, |
| { |
| "epoch": 0.5449478790380844, |
| "grad_norm": 0.2562381625175476, |
| "learning_rate": 0.00010900082576383155, |
| "loss": 7.1727, |
| "step": 660 |
| }, |
| { |
| "epoch": 0.553204665084116, |
| "grad_norm": 0.3765369653701782, |
| "learning_rate": 0.0001106523534269199, |
| "loss": 7.1649, |
| "step": 670 |
| }, |
| { |
| "epoch": 0.5614614511301476, |
| "grad_norm": 0.33350440859794617, |
| "learning_rate": 0.00011230388109000826, |
| "loss": 7.1527, |
| "step": 680 |
| }, |
| { |
| "epoch": 0.5697182371761792, |
| "grad_norm": 0.27716100215911865, |
| "learning_rate": 0.00011395540875309663, |
| "loss": 7.139, |
| "step": 690 |
| }, |
| { |
| "epoch": 0.5779750232222107, |
| "grad_norm": 0.30098262429237366, |
| "learning_rate": 0.00011560693641618497, |
| "loss": 7.1353, |
| "step": 700 |
| }, |
| { |
| "epoch": 0.5862318092682424, |
| "grad_norm": 0.32388460636138916, |
| "learning_rate": 0.00011725846407927334, |
| "loss": 7.1255, |
| "step": 710 |
| }, |
| { |
| "epoch": 0.5944885953142739, |
| "grad_norm": 0.37705451250076294, |
| "learning_rate": 0.00011890999174236168, |
| "loss": 7.1116, |
| "step": 720 |
| }, |
| { |
| "epoch": 0.6027453813603055, |
| "grad_norm": 0.23738576471805573, |
| "learning_rate": 0.00012056151940545005, |
| "loss": 7.0997, |
| "step": 730 |
| }, |
| { |
| "epoch": 0.6110021674063371, |
| "grad_norm": 0.6089703440666199, |
| "learning_rate": 0.00012221304706853842, |
| "loss": 7.1003, |
| "step": 740 |
| }, |
| { |
| "epoch": 0.6192589534523687, |
| "grad_norm": 0.5266053080558777, |
| "learning_rate": 0.00012386457473162674, |
| "loss": 7.085, |
| "step": 750 |
| }, |
| { |
| "epoch": 0.6275157394984002, |
| "grad_norm": 0.2401445358991623, |
| "learning_rate": 0.00012551610239471513, |
| "loss": 7.0832, |
| "step": 760 |
| }, |
| { |
| "epoch": 0.6357725255444319, |
| "grad_norm": 0.30341988801956177, |
| "learning_rate": 0.00012716763005780345, |
| "loss": 7.0664, |
| "step": 770 |
| }, |
| { |
| "epoch": 0.6440293115904634, |
| "grad_norm": 0.2831466495990753, |
| "learning_rate": 0.00012881915772089184, |
| "loss": 7.0738, |
| "step": 780 |
| }, |
| { |
| "epoch": 0.652286097636495, |
| "grad_norm": 0.43197304010391235, |
| "learning_rate": 0.0001304706853839802, |
| "loss": 7.0623, |
| "step": 790 |
| }, |
| { |
| "epoch": 0.6605428836825266, |
| "grad_norm": 0.266729474067688, |
| "learning_rate": 0.00013212221304706855, |
| "loss": 7.0505, |
| "step": 800 |
| }, |
| { |
| "epoch": 0.6687996697285582, |
| "grad_norm": 0.26868125796318054, |
| "learning_rate": 0.0001337737407101569, |
| "loss": 7.036, |
| "step": 810 |
| }, |
| { |
| "epoch": 0.6770564557745897, |
| "grad_norm": 0.33523380756378174, |
| "learning_rate": 0.00013542526837324526, |
| "loss": 7.0277, |
| "step": 820 |
| }, |
| { |
| "epoch": 0.6853132418206214, |
| "grad_norm": 0.44178086519241333, |
| "learning_rate": 0.0001370767960363336, |
| "loss": 7.0232, |
| "step": 830 |
| }, |
| { |
| "epoch": 0.6935700278666529, |
| "grad_norm": 0.23396629095077515, |
| "learning_rate": 0.00013872832369942197, |
| "loss": 7.017, |
| "step": 840 |
| }, |
| { |
| "epoch": 0.7018268139126845, |
| "grad_norm": 0.2513016164302826, |
| "learning_rate": 0.00014037985136251032, |
| "loss": 7.0152, |
| "step": 850 |
| }, |
| { |
| "epoch": 0.7100835999587161, |
| "grad_norm": 0.22369173169136047, |
| "learning_rate": 0.00014203137902559868, |
| "loss": 7.0076, |
| "step": 860 |
| }, |
| { |
| "epoch": 0.7183403860047477, |
| "grad_norm": 0.31220072507858276, |
| "learning_rate": 0.00014368290668868703, |
| "loss": 6.9968, |
| "step": 870 |
| }, |
| { |
| "epoch": 0.7265971720507792, |
| "grad_norm": 0.26952481269836426, |
| "learning_rate": 0.00014533443435177541, |
| "loss": 6.9924, |
| "step": 880 |
| }, |
| { |
| "epoch": 0.7348539580968109, |
| "grad_norm": 0.33689916133880615, |
| "learning_rate": 0.00014698596201486374, |
| "loss": 6.977, |
| "step": 890 |
| }, |
| { |
| "epoch": 0.7431107441428424, |
| "grad_norm": 0.27384230494499207, |
| "learning_rate": 0.0001486374896779521, |
| "loss": 6.9776, |
| "step": 900 |
| }, |
| { |
| "epoch": 0.751367530188874, |
| "grad_norm": 0.3221207559108734, |
| "learning_rate": 0.00015028901734104048, |
| "loss": 6.9716, |
| "step": 910 |
| }, |
| { |
| "epoch": 0.7596243162349056, |
| "grad_norm": 0.3615335524082184, |
| "learning_rate": 0.00015194054500412883, |
| "loss": 6.9636, |
| "step": 920 |
| }, |
| { |
| "epoch": 0.7678811022809372, |
| "grad_norm": 0.23735912144184113, |
| "learning_rate": 0.00015359207266721716, |
| "loss": 6.9528, |
| "step": 930 |
| }, |
| { |
| "epoch": 0.7761378883269687, |
| "grad_norm": 0.3608275353908539, |
| "learning_rate": 0.00015524360033030554, |
| "loss": 6.9488, |
| "step": 940 |
| }, |
| { |
| "epoch": 0.7843946743730003, |
| "grad_norm": 0.32908084988594055, |
| "learning_rate": 0.0001568951279933939, |
| "loss": 6.9367, |
| "step": 950 |
| }, |
| { |
| "epoch": 0.7926514604190319, |
| "grad_norm": 0.27347663044929504, |
| "learning_rate": 0.00015854665565648223, |
| "loss": 6.9458, |
| "step": 960 |
| }, |
| { |
| "epoch": 0.8009082464650634, |
| "grad_norm": 0.2819118797779083, |
| "learning_rate": 0.0001601981833195706, |
| "loss": 6.9314, |
| "step": 970 |
| }, |
| { |
| "epoch": 0.8091650325110951, |
| "grad_norm": 0.32597044110298157, |
| "learning_rate": 0.00016184971098265897, |
| "loss": 6.9304, |
| "step": 980 |
| }, |
| { |
| "epoch": 0.8174218185571266, |
| "grad_norm": 0.2454291731119156, |
| "learning_rate": 0.00016350123864574732, |
| "loss": 6.9015, |
| "step": 990 |
| }, |
| { |
| "epoch": 0.8256786046031582, |
| "grad_norm": 0.3510182499885559, |
| "learning_rate": 0.00016515276630883568, |
| "loss": 6.902, |
| "step": 1000 |
| }, |
| { |
| "epoch": 0.8339353906491898, |
| "grad_norm": 0.3421700894832611, |
| "learning_rate": 0.00016680429397192403, |
| "loss": 6.9061, |
| "step": 1010 |
| }, |
| { |
| "epoch": 0.8421921766952214, |
| "grad_norm": 0.2990358769893646, |
| "learning_rate": 0.00016845582163501239, |
| "loss": 6.896, |
| "step": 1020 |
| }, |
| { |
| "epoch": 0.8504489627412529, |
| "grad_norm": 0.2679702937602997, |
| "learning_rate": 0.00017010734929810074, |
| "loss": 6.8927, |
| "step": 1030 |
| }, |
| { |
| "epoch": 0.8587057487872846, |
| "grad_norm": 0.2782333195209503, |
| "learning_rate": 0.0001717588769611891, |
| "loss": 6.8892, |
| "step": 1040 |
| }, |
| { |
| "epoch": 0.8669625348333161, |
| "grad_norm": 0.25567376613616943, |
| "learning_rate": 0.00017341040462427745, |
| "loss": 6.8829, |
| "step": 1050 |
| }, |
| { |
| "epoch": 0.8752193208793477, |
| "grad_norm": 0.25939440727233887, |
| "learning_rate": 0.0001750619322873658, |
| "loss": 6.8573, |
| "step": 1060 |
| }, |
| { |
| "epoch": 0.8834761069253793, |
| "grad_norm": 0.5158926844596863, |
| "learning_rate": 0.0001767134599504542, |
| "loss": 6.8639, |
| "step": 1070 |
| }, |
| { |
| "epoch": 0.8917328929714109, |
| "grad_norm": 0.3435886800289154, |
| "learning_rate": 0.00017836498761354252, |
| "loss": 6.8615, |
| "step": 1080 |
| }, |
| { |
| "epoch": 0.8999896790174424, |
| "grad_norm": 0.34237366914749146, |
| "learning_rate": 0.00018001651527663087, |
| "loss": 6.856, |
| "step": 1090 |
| }, |
| { |
| "epoch": 0.9082464650634741, |
| "grad_norm": 0.5136009454727173, |
| "learning_rate": 0.00018166804293971925, |
| "loss": 6.8604, |
| "step": 1100 |
| }, |
| { |
| "epoch": 0.9165032511095056, |
| "grad_norm": 0.42529767751693726, |
| "learning_rate": 0.0001833195706028076, |
| "loss": 6.8544, |
| "step": 1110 |
| }, |
| { |
| "epoch": 0.9247600371555372, |
| "grad_norm": 0.29155978560447693, |
| "learning_rate": 0.00018497109826589594, |
| "loss": 6.8404, |
| "step": 1120 |
| }, |
| { |
| "epoch": 0.9330168232015688, |
| "grad_norm": 0.5477098226547241, |
| "learning_rate": 0.00018662262592898432, |
| "loss": 6.8273, |
| "step": 1130 |
| }, |
| { |
| "epoch": 0.9412736092476004, |
| "grad_norm": 0.6319007873535156, |
| "learning_rate": 0.00018827415359207267, |
| "loss": 6.8354, |
| "step": 1140 |
| }, |
| { |
| "epoch": 0.9495303952936319, |
| "grad_norm": 0.5107876062393188, |
| "learning_rate": 0.00018992568125516103, |
| "loss": 6.8176, |
| "step": 1150 |
| }, |
| { |
| "epoch": 0.9577871813396636, |
| "grad_norm": 0.3837167024612427, |
| "learning_rate": 0.00019157720891824938, |
| "loss": 6.8291, |
| "step": 1160 |
| }, |
| { |
| "epoch": 0.9660439673856951, |
| "grad_norm": 0.4074363708496094, |
| "learning_rate": 0.00019322873658133774, |
| "loss": 6.808, |
| "step": 1170 |
| }, |
| { |
| "epoch": 0.9743007534317267, |
| "grad_norm": 0.3952867388725281, |
| "learning_rate": 0.0001948802642444261, |
| "loss": 6.8046, |
| "step": 1180 |
| }, |
| { |
| "epoch": 0.9825575394777583, |
| "grad_norm": 0.3207480311393738, |
| "learning_rate": 0.00019653179190751448, |
| "loss": 6.8099, |
| "step": 1190 |
| }, |
| { |
| "epoch": 0.9908143255237899, |
| "grad_norm": 0.46632879972457886, |
| "learning_rate": 0.0001981833195706028, |
| "loss": 6.7924, |
| "step": 1200 |
| }, |
| { |
| "epoch": 0.9990711115698214, |
| "grad_norm": 0.4463304877281189, |
| "learning_rate": 0.00019983484723369116, |
| "loss": 6.8048, |
| "step": 1210 |
| }, |
| { |
| "epoch": 1.0066054288368254, |
| "grad_norm": 0.481222927570343, |
| "learning_rate": 0.00020148637489677952, |
| "loss": 6.2066, |
| "step": 1220 |
| }, |
| { |
| "epoch": 1.0148622148828568, |
| "grad_norm": 0.35238635540008545, |
| "learning_rate": 0.0002031379025598679, |
| "loss": 6.7881, |
| "step": 1230 |
| }, |
| { |
| "epoch": 1.0231190009288884, |
| "grad_norm": 0.3762964904308319, |
| "learning_rate": 0.00020478943022295623, |
| "loss": 6.7819, |
| "step": 1240 |
| }, |
| { |
| "epoch": 1.03137578697492, |
| "grad_norm": 0.40854519605636597, |
| "learning_rate": 0.00020644095788604458, |
| "loss": 6.7868, |
| "step": 1250 |
| }, |
| { |
| "epoch": 1.0396325730209517, |
| "grad_norm": 0.7023956179618835, |
| "learning_rate": 0.00020809248554913296, |
| "loss": 6.7674, |
| "step": 1260 |
| }, |
| { |
| "epoch": 1.047889359066983, |
| "grad_norm": 0.43909308314323425, |
| "learning_rate": 0.00020974401321222132, |
| "loss": 6.7735, |
| "step": 1270 |
| }, |
| { |
| "epoch": 1.0561461451130147, |
| "grad_norm": 0.5223090648651123, |
| "learning_rate": 0.00021139554087530965, |
| "loss": 6.7506, |
| "step": 1280 |
| }, |
| { |
| "epoch": 1.0644029311590464, |
| "grad_norm": 0.41518449783325195, |
| "learning_rate": 0.00021304706853839803, |
| "loss": 6.7645, |
| "step": 1290 |
| }, |
| { |
| "epoch": 1.072659717205078, |
| "grad_norm": 0.346605509519577, |
| "learning_rate": 0.00021469859620148638, |
| "loss": 6.7545, |
| "step": 1300 |
| }, |
| { |
| "epoch": 1.0809165032511094, |
| "grad_norm": 0.50466388463974, |
| "learning_rate": 0.00021635012386457474, |
| "loss": 6.7577, |
| "step": 1310 |
| }, |
| { |
| "epoch": 1.089173289297141, |
| "grad_norm": 0.2985013723373413, |
| "learning_rate": 0.0002180016515276631, |
| "loss": 6.7553, |
| "step": 1320 |
| }, |
| { |
| "epoch": 1.0974300753431727, |
| "grad_norm": 0.3818993866443634, |
| "learning_rate": 0.00021965317919075145, |
| "loss": 6.7531, |
| "step": 1330 |
| }, |
| { |
| "epoch": 1.1056868613892044, |
| "grad_norm": 0.39540722966194153, |
| "learning_rate": 0.0002213047068538398, |
| "loss": 6.7366, |
| "step": 1340 |
| }, |
| { |
| "epoch": 1.1139436474352358, |
| "grad_norm": 0.4633065462112427, |
| "learning_rate": 0.0002229562345169282, |
| "loss": 6.7335, |
| "step": 1350 |
| }, |
| { |
| "epoch": 1.1222004334812674, |
| "grad_norm": 0.395951509475708, |
| "learning_rate": 0.00022460776218001651, |
| "loss": 6.7419, |
| "step": 1360 |
| }, |
| { |
| "epoch": 1.130457219527299, |
| "grad_norm": 0.3049313724040985, |
| "learning_rate": 0.00022625928984310487, |
| "loss": 6.7304, |
| "step": 1370 |
| }, |
| { |
| "epoch": 1.1387140055733305, |
| "grad_norm": 0.3902861475944519, |
| "learning_rate": 0.00022791081750619325, |
| "loss": 6.73, |
| "step": 1380 |
| }, |
| { |
| "epoch": 1.146970791619362, |
| "grad_norm": 0.336494117975235, |
| "learning_rate": 0.0002295623451692816, |
| "loss": 6.7183, |
| "step": 1390 |
| }, |
| { |
| "epoch": 1.1552275776653937, |
| "grad_norm": 0.4714094400405884, |
| "learning_rate": 0.00023121387283236994, |
| "loss": 6.7258, |
| "step": 1400 |
| }, |
| { |
| "epoch": 1.1634843637114254, |
| "grad_norm": 0.5000776052474976, |
| "learning_rate": 0.0002328654004954583, |
| "loss": 6.7238, |
| "step": 1410 |
| }, |
| { |
| "epoch": 1.171741149757457, |
| "grad_norm": 0.39674362540245056, |
| "learning_rate": 0.00023451692815854667, |
| "loss": 6.7216, |
| "step": 1420 |
| }, |
| { |
| "epoch": 1.1799979358034884, |
| "grad_norm": 0.4412456452846527, |
| "learning_rate": 0.00023616845582163503, |
| "loss": 6.7075, |
| "step": 1430 |
| }, |
| { |
| "epoch": 1.18825472184952, |
| "grad_norm": 0.5480419397354126, |
| "learning_rate": 0.00023781998348472336, |
| "loss": 6.7109, |
| "step": 1440 |
| }, |
| { |
| "epoch": 1.1965115078955517, |
| "grad_norm": 0.5100497007369995, |
| "learning_rate": 0.00023947151114781174, |
| "loss": 6.7063, |
| "step": 1450 |
| }, |
| { |
| "epoch": 1.2047682939415831, |
| "grad_norm": 0.48953869938850403, |
| "learning_rate": 0.0002411230388109001, |
| "loss": 6.7087, |
| "step": 1460 |
| }, |
| { |
| "epoch": 1.2130250799876148, |
| "grad_norm": 0.6405659317970276, |
| "learning_rate": 0.00024277456647398842, |
| "loss": 6.7031, |
| "step": 1470 |
| }, |
| { |
| "epoch": 1.2212818660336464, |
| "grad_norm": 0.6138748526573181, |
| "learning_rate": 0.00024442609413707683, |
| "loss": 6.6993, |
| "step": 1480 |
| }, |
| { |
| "epoch": 1.229538652079678, |
| "grad_norm": 0.5157073736190796, |
| "learning_rate": 0.00024607762180016516, |
| "loss": 6.6926, |
| "step": 1490 |
| }, |
| { |
| "epoch": 1.2377954381257097, |
| "grad_norm": 0.46693915128707886, |
| "learning_rate": 0.0002477291494632535, |
| "loss": 6.6984, |
| "step": 1500 |
| }, |
| { |
| "epoch": 1.246052224171741, |
| "grad_norm": 0.3783016502857208, |
| "learning_rate": 0.00024938067712634187, |
| "loss": 6.7014, |
| "step": 1510 |
| }, |
| { |
| "epoch": 1.2543090102177727, |
| "grad_norm": 0.657699704170227, |
| "learning_rate": 0.00025103220478943025, |
| "loss": 6.7012, |
| "step": 1520 |
| }, |
| { |
| "epoch": 1.2625657962638044, |
| "grad_norm": 0.28327375650405884, |
| "learning_rate": 0.0002526837324525186, |
| "loss": 6.689, |
| "step": 1530 |
| }, |
| { |
| "epoch": 1.2708225823098358, |
| "grad_norm": 0.419355571269989, |
| "learning_rate": 0.0002543352601156069, |
| "loss": 6.6892, |
| "step": 1540 |
| }, |
| { |
| "epoch": 1.2790793683558674, |
| "grad_norm": 0.6164056658744812, |
| "learning_rate": 0.0002559867877786953, |
| "loss": 6.6925, |
| "step": 1550 |
| }, |
| { |
| "epoch": 1.287336154401899, |
| "grad_norm": 0.5302212238311768, |
| "learning_rate": 0.00025763831544178367, |
| "loss": 6.685, |
| "step": 1560 |
| }, |
| { |
| "epoch": 1.2955929404479307, |
| "grad_norm": 0.4732874929904938, |
| "learning_rate": 0.000259289843104872, |
| "loss": 6.6831, |
| "step": 1570 |
| }, |
| { |
| "epoch": 1.3038497264939624, |
| "grad_norm": 0.6624964475631714, |
| "learning_rate": 0.0002609413707679604, |
| "loss": 6.6712, |
| "step": 1580 |
| }, |
| { |
| "epoch": 1.3121065125399938, |
| "grad_norm": 0.31143829226493835, |
| "learning_rate": 0.0002625928984310487, |
| "loss": 6.6742, |
| "step": 1590 |
| }, |
| { |
| "epoch": 1.3203632985860254, |
| "grad_norm": 0.6145860552787781, |
| "learning_rate": 0.0002642444260941371, |
| "loss": 6.6835, |
| "step": 1600 |
| }, |
| { |
| "epoch": 1.328620084632057, |
| "grad_norm": 0.3368646204471588, |
| "learning_rate": 0.0002658959537572254, |
| "loss": 6.6695, |
| "step": 1610 |
| }, |
| { |
| "epoch": 1.3368768706780885, |
| "grad_norm": 0.412112295627594, |
| "learning_rate": 0.0002675474814203138, |
| "loss": 6.6827, |
| "step": 1620 |
| }, |
| { |
| "epoch": 1.34513365672412, |
| "grad_norm": 0.3918752372264862, |
| "learning_rate": 0.0002691990090834022, |
| "loss": 6.6718, |
| "step": 1630 |
| }, |
| { |
| "epoch": 1.3533904427701517, |
| "grad_norm": 0.3757690489292145, |
| "learning_rate": 0.0002708505367464905, |
| "loss": 6.6744, |
| "step": 1640 |
| }, |
| { |
| "epoch": 1.3616472288161834, |
| "grad_norm": 0.5821499228477478, |
| "learning_rate": 0.00027250206440957884, |
| "loss": 6.6673, |
| "step": 1650 |
| }, |
| { |
| "epoch": 1.3699040148622148, |
| "grad_norm": 0.5082701444625854, |
| "learning_rate": 0.0002741535920726672, |
| "loss": 6.6474, |
| "step": 1660 |
| }, |
| { |
| "epoch": 1.3781608009082464, |
| "grad_norm": 0.3153243362903595, |
| "learning_rate": 0.00027580511973575555, |
| "loss": 6.6526, |
| "step": 1670 |
| }, |
| { |
| "epoch": 1.386417586954278, |
| "grad_norm": 0.5374317169189453, |
| "learning_rate": 0.00027745664739884393, |
| "loss": 6.6552, |
| "step": 1680 |
| }, |
| { |
| "epoch": 1.3946743730003095, |
| "grad_norm": 0.3755870759487152, |
| "learning_rate": 0.0002791081750619323, |
| "loss": 6.6457, |
| "step": 1690 |
| }, |
| { |
| "epoch": 1.4029311590463411, |
| "grad_norm": 0.39434754848480225, |
| "learning_rate": 0.00028075970272502064, |
| "loss": 6.6485, |
| "step": 1700 |
| }, |
| { |
| "epoch": 1.4111879450923728, |
| "grad_norm": 0.6054360270500183, |
| "learning_rate": 0.000282411230388109, |
| "loss": 6.6515, |
| "step": 1710 |
| }, |
| { |
| "epoch": 1.4194447311384044, |
| "grad_norm": 0.4798774719238281, |
| "learning_rate": 0.00028406275805119735, |
| "loss": 6.6451, |
| "step": 1720 |
| }, |
| { |
| "epoch": 1.427701517184436, |
| "grad_norm": 0.4491690695285797, |
| "learning_rate": 0.0002857142857142857, |
| "loss": 6.6524, |
| "step": 1730 |
| }, |
| { |
| "epoch": 1.4359583032304675, |
| "grad_norm": 0.5428591370582581, |
| "learning_rate": 0.00028736581337737406, |
| "loss": 6.6488, |
| "step": 1740 |
| }, |
| { |
| "epoch": 1.444215089276499, |
| "grad_norm": 0.5588364601135254, |
| "learning_rate": 0.00028901734104046245, |
| "loss": 6.6417, |
| "step": 1750 |
| }, |
| { |
| "epoch": 1.4524718753225307, |
| "grad_norm": 0.2916945517063141, |
| "learning_rate": 0.00029066886870355083, |
| "loss": 6.6449, |
| "step": 1760 |
| }, |
| { |
| "epoch": 1.4607286613685622, |
| "grad_norm": 0.40670228004455566, |
| "learning_rate": 0.00029232039636663916, |
| "loss": 6.6301, |
| "step": 1770 |
| }, |
| { |
| "epoch": 1.4689854474145938, |
| "grad_norm": 0.3062325119972229, |
| "learning_rate": 0.0002939719240297275, |
| "loss": 6.6322, |
| "step": 1780 |
| }, |
| { |
| "epoch": 1.4772422334606254, |
| "grad_norm": 0.46464964747428894, |
| "learning_rate": 0.00029562345169281587, |
| "loss": 6.6401, |
| "step": 1790 |
| }, |
| { |
| "epoch": 1.485499019506657, |
| "grad_norm": 0.45769989490509033, |
| "learning_rate": 0.0002972749793559042, |
| "loss": 6.6398, |
| "step": 1800 |
| }, |
| { |
| "epoch": 1.4937558055526887, |
| "grad_norm": 0.45375707745552063, |
| "learning_rate": 0.0002989265070189926, |
| "loss": 6.6404, |
| "step": 1810 |
| }, |
| { |
| "epoch": 1.5020125915987204, |
| "grad_norm": 0.47998979687690735, |
| "learning_rate": 0.00030057803468208096, |
| "loss": 6.6322, |
| "step": 1820 |
| }, |
| { |
| "epoch": 1.5102693776447518, |
| "grad_norm": 0.5479523539543152, |
| "learning_rate": 0.0003022295623451693, |
| "loss": 6.623, |
| "step": 1830 |
| }, |
| { |
| "epoch": 1.5185261636907834, |
| "grad_norm": 0.6078441143035889, |
| "learning_rate": 0.00030388109000825767, |
| "loss": 6.6307, |
| "step": 1840 |
| }, |
| { |
| "epoch": 1.5267829497368148, |
| "grad_norm": 0.580402672290802, |
| "learning_rate": 0.000305532617671346, |
| "loss": 6.6283, |
| "step": 1850 |
| }, |
| { |
| "epoch": 1.5350397357828465, |
| "grad_norm": 0.4065966010093689, |
| "learning_rate": 0.0003071841453344343, |
| "loss": 6.6217, |
| "step": 1860 |
| }, |
| { |
| "epoch": 1.543296521828878, |
| "grad_norm": 0.46066299080848694, |
| "learning_rate": 0.0003088356729975227, |
| "loss": 6.6222, |
| "step": 1870 |
| }, |
| { |
| "epoch": 1.5515533078749097, |
| "grad_norm": 0.5753124356269836, |
| "learning_rate": 0.0003104872006606111, |
| "loss": 6.6148, |
| "step": 1880 |
| }, |
| { |
| "epoch": 1.5598100939209414, |
| "grad_norm": 0.4921572506427765, |
| "learning_rate": 0.0003121387283236994, |
| "loss": 6.6167, |
| "step": 1890 |
| }, |
| { |
| "epoch": 1.5680668799669728, |
| "grad_norm": 0.5580713748931885, |
| "learning_rate": 0.0003137902559867878, |
| "loss": 6.6081, |
| "step": 1900 |
| }, |
| { |
| "epoch": 1.5763236660130044, |
| "grad_norm": 0.5664856433868408, |
| "learning_rate": 0.00031544178364987613, |
| "loss": 6.6037, |
| "step": 1910 |
| }, |
| { |
| "epoch": 1.5845804520590359, |
| "grad_norm": 0.466069757938385, |
| "learning_rate": 0.00031709331131296446, |
| "loss": 6.6023, |
| "step": 1920 |
| }, |
| { |
| "epoch": 1.5928372381050675, |
| "grad_norm": 0.594886302947998, |
| "learning_rate": 0.00031874483897605284, |
| "loss": 6.606, |
| "step": 1930 |
| }, |
| { |
| "epoch": 1.6010940241510991, |
| "grad_norm": 0.6219709515571594, |
| "learning_rate": 0.0003203963666391412, |
| "loss": 6.6031, |
| "step": 1940 |
| }, |
| { |
| "epoch": 1.6093508101971308, |
| "grad_norm": 0.419695109128952, |
| "learning_rate": 0.0003220478943022296, |
| "loss": 6.6258, |
| "step": 1950 |
| }, |
| { |
| "epoch": 1.6176075962431624, |
| "grad_norm": 0.6769319772720337, |
| "learning_rate": 0.00032369942196531793, |
| "loss": 6.6192, |
| "step": 1960 |
| }, |
| { |
| "epoch": 1.625864382289194, |
| "grad_norm": 0.4297437369823456, |
| "learning_rate": 0.00032535094962840626, |
| "loss": 6.5908, |
| "step": 1970 |
| }, |
| { |
| "epoch": 1.6341211683352255, |
| "grad_norm": 0.5890651941299438, |
| "learning_rate": 0.00032700247729149464, |
| "loss": 6.598, |
| "step": 1980 |
| }, |
| { |
| "epoch": 1.642377954381257, |
| "grad_norm": 0.42893460392951965, |
| "learning_rate": 0.00032865400495458297, |
| "loss": 6.5953, |
| "step": 1990 |
| }, |
| { |
| "epoch": 1.6506347404272885, |
| "grad_norm": 0.6383348107337952, |
| "learning_rate": 0.00033030553261767135, |
| "loss": 6.5885, |
| "step": 2000 |
| }, |
| { |
| "epoch": 1.6588915264733202, |
| "grad_norm": 0.4384669363498688, |
| "learning_rate": 0.00033195706028075973, |
| "loss": 6.5994, |
| "step": 2010 |
| }, |
| { |
| "epoch": 1.6671483125193518, |
| "grad_norm": 0.6523682475090027, |
| "learning_rate": 0.00033360858794384806, |
| "loss": 6.5988, |
| "step": 2020 |
| }, |
| { |
| "epoch": 1.6754050985653834, |
| "grad_norm": 0.48499011993408203, |
| "learning_rate": 0.00033526011560693644, |
| "loss": 6.5926, |
| "step": 2030 |
| }, |
| { |
| "epoch": 1.683661884611415, |
| "grad_norm": 0.6244016885757446, |
| "learning_rate": 0.00033691164327002477, |
| "loss": 6.5968, |
| "step": 2040 |
| }, |
| { |
| "epoch": 1.6919186706574467, |
| "grad_norm": 0.4214613139629364, |
| "learning_rate": 0.0003385631709331131, |
| "loss": 6.5831, |
| "step": 2050 |
| }, |
| { |
| "epoch": 1.7001754567034781, |
| "grad_norm": 0.6971030235290527, |
| "learning_rate": 0.0003402146985962015, |
| "loss": 6.5887, |
| "step": 2060 |
| }, |
| { |
| "epoch": 1.7084322427495098, |
| "grad_norm": 0.6625652313232422, |
| "learning_rate": 0.00034186622625928986, |
| "loss": 6.594, |
| "step": 2070 |
| }, |
| { |
| "epoch": 1.7166890287955412, |
| "grad_norm": 0.33810535073280334, |
| "learning_rate": 0.0003435177539223782, |
| "loss": 6.5831, |
| "step": 2080 |
| }, |
| { |
| "epoch": 1.7249458148415728, |
| "grad_norm": 0.3338748812675476, |
| "learning_rate": 0.0003451692815854666, |
| "loss": 6.5915, |
| "step": 2090 |
| }, |
| { |
| "epoch": 1.7332026008876045, |
| "grad_norm": 0.31808751821517944, |
| "learning_rate": 0.0003468208092485549, |
| "loss": 6.5816, |
| "step": 2100 |
| }, |
| { |
| "epoch": 1.741459386933636, |
| "grad_norm": 0.403300404548645, |
| "learning_rate": 0.0003484723369116433, |
| "loss": 6.5827, |
| "step": 2110 |
| }, |
| { |
| "epoch": 1.7497161729796677, |
| "grad_norm": 0.48524588346481323, |
| "learning_rate": 0.0003501238645747316, |
| "loss": 6.5864, |
| "step": 2120 |
| }, |
| { |
| "epoch": 1.7579729590256994, |
| "grad_norm": 0.3188144266605377, |
| "learning_rate": 0.00035177539223782, |
| "loss": 6.5745, |
| "step": 2130 |
| }, |
| { |
| "epoch": 1.7662297450717308, |
| "grad_norm": 0.499127596616745, |
| "learning_rate": 0.0003534269199009084, |
| "loss": 6.5814, |
| "step": 2140 |
| }, |
| { |
| "epoch": 1.7744865311177624, |
| "grad_norm": 0.3072030544281006, |
| "learning_rate": 0.0003550784475639967, |
| "loss": 6.5764, |
| "step": 2150 |
| }, |
| { |
| "epoch": 1.7827433171637939, |
| "grad_norm": 0.45411720871925354, |
| "learning_rate": 0.00035672997522708503, |
| "loss": 6.5791, |
| "step": 2160 |
| }, |
| { |
| "epoch": 1.7910001032098255, |
| "grad_norm": 0.5847482085227966, |
| "learning_rate": 0.0003583815028901734, |
| "loss": 6.5736, |
| "step": 2170 |
| }, |
| { |
| "epoch": 1.7992568892558571, |
| "grad_norm": 0.7848684787750244, |
| "learning_rate": 0.00036003303055326174, |
| "loss": 6.5646, |
| "step": 2180 |
| }, |
| { |
| "epoch": 1.8075136753018888, |
| "grad_norm": 0.49951326847076416, |
| "learning_rate": 0.0003616845582163502, |
| "loss": 6.5686, |
| "step": 2190 |
| }, |
| { |
| "epoch": 1.8157704613479204, |
| "grad_norm": 0.3567127287387848, |
| "learning_rate": 0.0003633360858794385, |
| "loss": 6.5613, |
| "step": 2200 |
| }, |
| { |
| "epoch": 1.824027247393952, |
| "grad_norm": 0.4178829491138458, |
| "learning_rate": 0.00036498761354252684, |
| "loss": 6.5557, |
| "step": 2210 |
| }, |
| { |
| "epoch": 1.8322840334399835, |
| "grad_norm": 0.5776088237762451, |
| "learning_rate": 0.0003666391412056152, |
| "loss": 6.5701, |
| "step": 2220 |
| }, |
| { |
| "epoch": 1.8405408194860151, |
| "grad_norm": 0.44506144523620605, |
| "learning_rate": 0.00036829066886870355, |
| "loss": 6.5653, |
| "step": 2230 |
| }, |
| { |
| "epoch": 1.8487976055320465, |
| "grad_norm": 0.4636722803115845, |
| "learning_rate": 0.0003699421965317919, |
| "loss": 6.5795, |
| "step": 2240 |
| }, |
| { |
| "epoch": 1.8570543915780782, |
| "grad_norm": 0.7135621309280396, |
| "learning_rate": 0.00037159372419488026, |
| "loss": 6.5533, |
| "step": 2250 |
| }, |
| { |
| "epoch": 1.8653111776241098, |
| "grad_norm": 0.6034865379333496, |
| "learning_rate": 0.00037324525185796864, |
| "loss": 6.5739, |
| "step": 2260 |
| }, |
| { |
| "epoch": 1.8735679636701414, |
| "grad_norm": 0.40661871433258057, |
| "learning_rate": 0.000374896779521057, |
| "loss": 6.5669, |
| "step": 2270 |
| }, |
| { |
| "epoch": 1.881824749716173, |
| "grad_norm": 0.43377530574798584, |
| "learning_rate": 0.00037654830718414535, |
| "loss": 6.5516, |
| "step": 2280 |
| }, |
| { |
| "epoch": 1.8900815357622047, |
| "grad_norm": 0.4028869867324829, |
| "learning_rate": 0.0003781998348472337, |
| "loss": 6.5637, |
| "step": 2290 |
| }, |
| { |
| "epoch": 1.8983383218082361, |
| "grad_norm": 0.45178523659706116, |
| "learning_rate": 0.00037985136251032206, |
| "loss": 6.5599, |
| "step": 2300 |
| }, |
| { |
| "epoch": 1.9065951078542678, |
| "grad_norm": 0.4831728935241699, |
| "learning_rate": 0.0003815028901734104, |
| "loss": 6.5574, |
| "step": 2310 |
| }, |
| { |
| "epoch": 1.9148518939002992, |
| "grad_norm": 0.7209576368331909, |
| "learning_rate": 0.00038315441783649877, |
| "loss": 6.5532, |
| "step": 2320 |
| }, |
| { |
| "epoch": 1.9231086799463308, |
| "grad_norm": 0.8340161442756653, |
| "learning_rate": 0.00038480594549958715, |
| "loss": 6.5647, |
| "step": 2330 |
| }, |
| { |
| "epoch": 1.9313654659923625, |
| "grad_norm": 0.42559853196144104, |
| "learning_rate": 0.0003864574731626755, |
| "loss": 6.5493, |
| "step": 2340 |
| }, |
| { |
| "epoch": 1.9396222520383941, |
| "grad_norm": 0.44831937551498413, |
| "learning_rate": 0.00038810900082576386, |
| "loss": 6.5543, |
| "step": 2350 |
| }, |
| { |
| "epoch": 1.9478790380844258, |
| "grad_norm": 0.4785579442977905, |
| "learning_rate": 0.0003897605284888522, |
| "loss": 6.5594, |
| "step": 2360 |
| }, |
| { |
| "epoch": 1.9561358241304572, |
| "grad_norm": 0.5370727181434631, |
| "learning_rate": 0.0003914120561519405, |
| "loss": 6.5567, |
| "step": 2370 |
| }, |
| { |
| "epoch": 1.9643926101764888, |
| "grad_norm": 0.7570891976356506, |
| "learning_rate": 0.00039306358381502895, |
| "loss": 6.5502, |
| "step": 2380 |
| }, |
| { |
| "epoch": 1.9726493962225202, |
| "grad_norm": 0.3707781136035919, |
| "learning_rate": 0.0003947151114781173, |
| "loss": 6.5606, |
| "step": 2390 |
| }, |
| { |
| "epoch": 1.9809061822685519, |
| "grad_norm": 0.733705461025238, |
| "learning_rate": 0.0003963666391412056, |
| "loss": 6.539, |
| "step": 2400 |
| }, |
| { |
| "epoch": 1.9891629683145835, |
| "grad_norm": 0.6355495452880859, |
| "learning_rate": 0.000398018166804294, |
| "loss": 6.5604, |
| "step": 2410 |
| }, |
| { |
| "epoch": 1.9974197543606151, |
| "grad_norm": 0.7003040313720703, |
| "learning_rate": 0.0003996696944673823, |
| "loss": 6.5358, |
| "step": 2420 |
| }, |
| { |
| "epoch": 2.004954071627619, |
| "grad_norm": 0.7953137159347534, |
| "learning_rate": 0.00040132122213047065, |
| "loss": 5.9742, |
| "step": 2430 |
| }, |
| { |
| "epoch": 2.0132108576736507, |
| "grad_norm": 0.642260730266571, |
| "learning_rate": 0.00040297274979355903, |
| "loss": 6.5496, |
| "step": 2440 |
| }, |
| { |
| "epoch": 2.021467643719682, |
| "grad_norm": 0.534957230091095, |
| "learning_rate": 0.0004046242774566474, |
| "loss": 6.5353, |
| "step": 2450 |
| }, |
| { |
| "epoch": 2.0297244297657135, |
| "grad_norm": 0.7051903605461121, |
| "learning_rate": 0.0004062758051197358, |
| "loss": 6.5398, |
| "step": 2460 |
| }, |
| { |
| "epoch": 2.037981215811745, |
| "grad_norm": 0.6130774617195129, |
| "learning_rate": 0.0004079273327828241, |
| "loss": 6.5238, |
| "step": 2470 |
| }, |
| { |
| "epoch": 2.046238001857777, |
| "grad_norm": 0.5148051977157593, |
| "learning_rate": 0.00040957886044591245, |
| "loss": 6.5378, |
| "step": 2480 |
| }, |
| { |
| "epoch": 2.0544947879038085, |
| "grad_norm": 0.47121939063072205, |
| "learning_rate": 0.00041123038810900083, |
| "loss": 6.5309, |
| "step": 2490 |
| }, |
| { |
| "epoch": 2.06275157394984, |
| "grad_norm": 0.49403122067451477, |
| "learning_rate": 0.00041288191577208916, |
| "loss": 6.5399, |
| "step": 2500 |
| }, |
| { |
| "epoch": 2.0710083599958717, |
| "grad_norm": 0.6388276815414429, |
| "learning_rate": 0.00041453344343517754, |
| "loss": 6.5237, |
| "step": 2510 |
| }, |
| { |
| "epoch": 2.0792651460419034, |
| "grad_norm": 0.714484691619873, |
| "learning_rate": 0.0004161849710982659, |
| "loss": 6.5343, |
| "step": 2520 |
| }, |
| { |
| "epoch": 2.0875219320879346, |
| "grad_norm": 0.7904228568077087, |
| "learning_rate": 0.00041783649876135425, |
| "loss": 6.534, |
| "step": 2530 |
| }, |
| { |
| "epoch": 2.095778718133966, |
| "grad_norm": 0.8133807182312012, |
| "learning_rate": 0.00041948802642444264, |
| "loss": 6.5307, |
| "step": 2540 |
| }, |
| { |
| "epoch": 2.104035504179998, |
| "grad_norm": 0.6666577458381653, |
| "learning_rate": 0.00042113955408753096, |
| "loss": 6.5222, |
| "step": 2550 |
| }, |
| { |
| "epoch": 2.1122922902260295, |
| "grad_norm": 0.5386178493499756, |
| "learning_rate": 0.0004227910817506193, |
| "loss": 6.5311, |
| "step": 2560 |
| }, |
| { |
| "epoch": 2.120549076272061, |
| "grad_norm": 0.7694815397262573, |
| "learning_rate": 0.00042444260941370773, |
| "loss": 6.5248, |
| "step": 2570 |
| }, |
| { |
| "epoch": 2.1288058623180928, |
| "grad_norm": 0.5060898065567017, |
| "learning_rate": 0.00042609413707679606, |
| "loss": 6.5294, |
| "step": 2580 |
| }, |
| { |
| "epoch": 2.1370626483641244, |
| "grad_norm": 0.6927939057350159, |
| "learning_rate": 0.0004277456647398844, |
| "loss": 6.5161, |
| "step": 2590 |
| }, |
| { |
| "epoch": 2.145319434410156, |
| "grad_norm": 0.5611531734466553, |
| "learning_rate": 0.00042939719240297277, |
| "loss": 6.5093, |
| "step": 2600 |
| }, |
| { |
| "epoch": 2.1535762204561872, |
| "grad_norm": 0.5405219197273254, |
| "learning_rate": 0.0004310487200660611, |
| "loss": 6.5176, |
| "step": 2610 |
| }, |
| { |
| "epoch": 2.161833006502219, |
| "grad_norm": 0.6646769046783447, |
| "learning_rate": 0.0004327002477291495, |
| "loss": 6.5141, |
| "step": 2620 |
| }, |
| { |
| "epoch": 2.1700897925482505, |
| "grad_norm": 0.4237206280231476, |
| "learning_rate": 0.0004343517753922378, |
| "loss": 6.5156, |
| "step": 2630 |
| }, |
| { |
| "epoch": 2.178346578594282, |
| "grad_norm": 0.5155819654464722, |
| "learning_rate": 0.0004360033030553262, |
| "loss": 6.5128, |
| "step": 2640 |
| }, |
| { |
| "epoch": 2.186603364640314, |
| "grad_norm": 0.5071853399276733, |
| "learning_rate": 0.00043765483071841457, |
| "loss": 6.5145, |
| "step": 2650 |
| }, |
| { |
| "epoch": 2.1948601506863454, |
| "grad_norm": 0.4715791642665863, |
| "learning_rate": 0.0004393063583815029, |
| "loss": 6.4999, |
| "step": 2660 |
| }, |
| { |
| "epoch": 2.203116936732377, |
| "grad_norm": 0.7622984647750854, |
| "learning_rate": 0.0004409578860445912, |
| "loss": 6.5098, |
| "step": 2670 |
| }, |
| { |
| "epoch": 2.2113737227784087, |
| "grad_norm": 0.7513878345489502, |
| "learning_rate": 0.0004426094137076796, |
| "loss": 6.5038, |
| "step": 2680 |
| }, |
| { |
| "epoch": 2.21963050882444, |
| "grad_norm": 0.5908055305480957, |
| "learning_rate": 0.00044426094137076794, |
| "loss": 6.4992, |
| "step": 2690 |
| }, |
| { |
| "epoch": 2.2278872948704715, |
| "grad_norm": 0.6824406981468201, |
| "learning_rate": 0.0004459124690338564, |
| "loss": 6.4816, |
| "step": 2700 |
| }, |
| { |
| "epoch": 2.236144080916503, |
| "grad_norm": 0.6593980193138123, |
| "learning_rate": 0.0004475639966969447, |
| "loss": 6.4793, |
| "step": 2710 |
| }, |
| { |
| "epoch": 2.244400866962535, |
| "grad_norm": 0.6030164361000061, |
| "learning_rate": 0.00044921552436003303, |
| "loss": 6.4714, |
| "step": 2720 |
| }, |
| { |
| "epoch": 2.2526576530085665, |
| "grad_norm": 0.5835041999816895, |
| "learning_rate": 0.0004508670520231214, |
| "loss": 6.4707, |
| "step": 2730 |
| }, |
| { |
| "epoch": 2.260914439054598, |
| "grad_norm": 0.7592746615409851, |
| "learning_rate": 0.00045251857968620974, |
| "loss": 6.46, |
| "step": 2740 |
| }, |
| { |
| "epoch": 2.2691712251006297, |
| "grad_norm": 0.596171498298645, |
| "learning_rate": 0.00045417010734929807, |
| "loss": 6.4632, |
| "step": 2750 |
| }, |
| { |
| "epoch": 2.277428011146661, |
| "grad_norm": 0.6832927465438843, |
| "learning_rate": 0.0004558216350123865, |
| "loss": 6.4563, |
| "step": 2760 |
| }, |
| { |
| "epoch": 2.2856847971926926, |
| "grad_norm": 0.6881881952285767, |
| "learning_rate": 0.00045747316267547483, |
| "loss": 6.4462, |
| "step": 2770 |
| }, |
| { |
| "epoch": 2.293941583238724, |
| "grad_norm": 0.6391133069992065, |
| "learning_rate": 0.0004591246903385632, |
| "loss": 6.4408, |
| "step": 2780 |
| }, |
| { |
| "epoch": 2.302198369284756, |
| "grad_norm": 0.55253666639328, |
| "learning_rate": 0.00046077621800165154, |
| "loss": 6.4379, |
| "step": 2790 |
| }, |
| { |
| "epoch": 2.3104551553307875, |
| "grad_norm": 0.7399817109107971, |
| "learning_rate": 0.00046242774566473987, |
| "loss": 6.427, |
| "step": 2800 |
| }, |
| { |
| "epoch": 2.318711941376819, |
| "grad_norm": 0.8651242256164551, |
| "learning_rate": 0.00046407927332782825, |
| "loss": 6.4348, |
| "step": 2810 |
| }, |
| { |
| "epoch": 2.3269687274228508, |
| "grad_norm": 0.7616157531738281, |
| "learning_rate": 0.0004657308009909166, |
| "loss": 6.4292, |
| "step": 2820 |
| }, |
| { |
| "epoch": 2.3352255134688824, |
| "grad_norm": 0.8812574148178101, |
| "learning_rate": 0.00046738232865400496, |
| "loss": 6.4302, |
| "step": 2830 |
| }, |
| { |
| "epoch": 2.343482299514914, |
| "grad_norm": 0.5720260143280029, |
| "learning_rate": 0.00046903385631709334, |
| "loss": 6.4159, |
| "step": 2840 |
| }, |
| { |
| "epoch": 2.3517390855609452, |
| "grad_norm": 0.6884876489639282, |
| "learning_rate": 0.0004706853839801817, |
| "loss": 6.4149, |
| "step": 2850 |
| }, |
| { |
| "epoch": 2.359995871606977, |
| "grad_norm": 0.8449527621269226, |
| "learning_rate": 0.00047233691164327006, |
| "loss": 6.4116, |
| "step": 2860 |
| }, |
| { |
| "epoch": 2.3682526576530085, |
| "grad_norm": 0.652077853679657, |
| "learning_rate": 0.0004739884393063584, |
| "loss": 6.4141, |
| "step": 2870 |
| }, |
| { |
| "epoch": 2.37650944369904, |
| "grad_norm": 0.7905910015106201, |
| "learning_rate": 0.0004756399669694467, |
| "loss": 6.3859, |
| "step": 2880 |
| }, |
| { |
| "epoch": 2.384766229745072, |
| "grad_norm": 0.6471400260925293, |
| "learning_rate": 0.00047729149463253515, |
| "loss": 6.3831, |
| "step": 2890 |
| }, |
| { |
| "epoch": 2.3930230157911034, |
| "grad_norm": 0.7152949571609497, |
| "learning_rate": 0.0004789430222956235, |
| "loss": 6.3814, |
| "step": 2900 |
| }, |
| { |
| "epoch": 2.401279801837135, |
| "grad_norm": 0.7103463411331177, |
| "learning_rate": 0.0004805945499587118, |
| "loss": 6.3843, |
| "step": 2910 |
| }, |
| { |
| "epoch": 2.4095365878831663, |
| "grad_norm": 0.8913406133651733, |
| "learning_rate": 0.0004822460776218002, |
| "loss": 6.3788, |
| "step": 2920 |
| }, |
| { |
| "epoch": 2.417793373929198, |
| "grad_norm": 0.7296728491783142, |
| "learning_rate": 0.0004838976052848885, |
| "loss": 6.3887, |
| "step": 2930 |
| }, |
| { |
| "epoch": 2.4260501599752295, |
| "grad_norm": 0.809280514717102, |
| "learning_rate": 0.00048554913294797684, |
| "loss": 6.3556, |
| "step": 2940 |
| }, |
| { |
| "epoch": 2.434306946021261, |
| "grad_norm": 0.7426701188087463, |
| "learning_rate": 0.0004872006606110653, |
| "loss": 6.3556, |
| "step": 2950 |
| }, |
| { |
| "epoch": 2.442563732067293, |
| "grad_norm": 0.6244359612464905, |
| "learning_rate": 0.0004888521882741537, |
| "loss": 6.3484, |
| "step": 2960 |
| }, |
| { |
| "epoch": 2.4508205181133245, |
| "grad_norm": 0.7139498591423035, |
| "learning_rate": 0.000490503715937242, |
| "loss": 6.344, |
| "step": 2970 |
| }, |
| { |
| "epoch": 2.459077304159356, |
| "grad_norm": 0.811773419380188, |
| "learning_rate": 0.0004921552436003303, |
| "loss": 6.3231, |
| "step": 2980 |
| }, |
| { |
| "epoch": 2.4673340902053877, |
| "grad_norm": 0.6412104368209839, |
| "learning_rate": 0.0004938067712634186, |
| "loss": 6.3251, |
| "step": 2990 |
| }, |
| { |
| "epoch": 2.4755908762514194, |
| "grad_norm": 1.169380784034729, |
| "learning_rate": 0.000495458298926507, |
| "loss": 6.3118, |
| "step": 3000 |
| }, |
| { |
| "epoch": 2.4838476622974506, |
| "grad_norm": 1.2334198951721191, |
| "learning_rate": 0.0004971098265895954, |
| "loss": 6.2942, |
| "step": 3010 |
| }, |
| { |
| "epoch": 2.492104448343482, |
| "grad_norm": 0.967470645904541, |
| "learning_rate": 0.0004987613542526837, |
| "loss": 6.2815, |
| "step": 3020 |
| }, |
| { |
| "epoch": 2.500361234389514, |
| "grad_norm": 0.8269219398498535, |
| "learning_rate": 0.0005004128819157721, |
| "loss": 6.2641, |
| "step": 3030 |
| }, |
| { |
| "epoch": 2.5086180204355455, |
| "grad_norm": 1.445989727973938, |
| "learning_rate": 0.0005020644095788605, |
| "loss": 6.2498, |
| "step": 3040 |
| }, |
| { |
| "epoch": 2.516874806481577, |
| "grad_norm": 1.0903043746948242, |
| "learning_rate": 0.0005037159372419488, |
| "loss": 6.233, |
| "step": 3050 |
| }, |
| { |
| "epoch": 2.5251315925276088, |
| "grad_norm": 1.0649830102920532, |
| "learning_rate": 0.0005053674649050372, |
| "loss": 6.2011, |
| "step": 3060 |
| }, |
| { |
| "epoch": 2.5333883785736404, |
| "grad_norm": 1.4002567529678345, |
| "learning_rate": 0.0005070189925681256, |
| "loss": 6.1885, |
| "step": 3070 |
| }, |
| { |
| "epoch": 2.5416451646196716, |
| "grad_norm": 1.6574758291244507, |
| "learning_rate": 0.0005086705202312138, |
| "loss": 6.1617, |
| "step": 3080 |
| }, |
| { |
| "epoch": 2.5499019506657032, |
| "grad_norm": 1.6543638706207275, |
| "learning_rate": 0.0005103220478943023, |
| "loss": 6.1348, |
| "step": 3090 |
| }, |
| { |
| "epoch": 2.558158736711735, |
| "grad_norm": 1.585507869720459, |
| "learning_rate": 0.0005119735755573906, |
| "loss": 6.1079, |
| "step": 3100 |
| }, |
| { |
| "epoch": 2.5664155227577665, |
| "grad_norm": 1.7207673788070679, |
| "learning_rate": 0.0005136251032204789, |
| "loss": 6.0665, |
| "step": 3110 |
| }, |
| { |
| "epoch": 2.574672308803798, |
| "grad_norm": 0.8735297918319702, |
| "learning_rate": 0.0005152766308835673, |
| "loss": 6.0345, |
| "step": 3120 |
| }, |
| { |
| "epoch": 2.58292909484983, |
| "grad_norm": 1.352499008178711, |
| "learning_rate": 0.0005169281585466557, |
| "loss": 6.0208, |
| "step": 3130 |
| }, |
| { |
| "epoch": 2.5911858808958614, |
| "grad_norm": 1.4594937562942505, |
| "learning_rate": 0.000518579686209744, |
| "loss": 5.9824, |
| "step": 3140 |
| }, |
| { |
| "epoch": 2.5994426669418926, |
| "grad_norm": 1.2508600950241089, |
| "learning_rate": 0.0005202312138728323, |
| "loss": 5.9507, |
| "step": 3150 |
| }, |
| { |
| "epoch": 2.6076994529879247, |
| "grad_norm": 1.80980384349823, |
| "learning_rate": 0.0005218827415359208, |
| "loss": 5.9357, |
| "step": 3160 |
| }, |
| { |
| "epoch": 2.615956239033956, |
| "grad_norm": 1.3844646215438843, |
| "learning_rate": 0.0005235342691990091, |
| "loss": 5.8944, |
| "step": 3170 |
| }, |
| { |
| "epoch": 2.6242130250799875, |
| "grad_norm": 1.927004098892212, |
| "learning_rate": 0.0005251857968620974, |
| "loss": 5.8612, |
| "step": 3180 |
| }, |
| { |
| "epoch": 2.632469811126019, |
| "grad_norm": 1.4003227949142456, |
| "learning_rate": 0.0005268373245251859, |
| "loss": 5.8304, |
| "step": 3190 |
| }, |
| { |
| "epoch": 2.640726597172051, |
| "grad_norm": 1.8084081411361694, |
| "learning_rate": 0.0005284888521882742, |
| "loss": 5.8157, |
| "step": 3200 |
| }, |
| { |
| "epoch": 2.6489833832180825, |
| "grad_norm": 1.6965852975845337, |
| "learning_rate": 0.0005301403798513625, |
| "loss": 5.7719, |
| "step": 3210 |
| }, |
| { |
| "epoch": 2.657240169264114, |
| "grad_norm": 1.2121639251708984, |
| "learning_rate": 0.0005317919075144508, |
| "loss": 5.7462, |
| "step": 3220 |
| }, |
| { |
| "epoch": 2.6654969553101457, |
| "grad_norm": 1.6979306936264038, |
| "learning_rate": 0.0005334434351775393, |
| "loss": 5.7426, |
| "step": 3230 |
| }, |
| { |
| "epoch": 2.673753741356177, |
| "grad_norm": 1.974894642829895, |
| "learning_rate": 0.0005350949628406276, |
| "loss": 5.704, |
| "step": 3240 |
| }, |
| { |
| "epoch": 2.6820105274022086, |
| "grad_norm": 1.8848650455474854, |
| "learning_rate": 0.0005367464905037159, |
| "loss": 5.6652, |
| "step": 3250 |
| }, |
| { |
| "epoch": 2.69026731344824, |
| "grad_norm": 1.4253407716751099, |
| "learning_rate": 0.0005383980181668044, |
| "loss": 5.652, |
| "step": 3260 |
| }, |
| { |
| "epoch": 2.698524099494272, |
| "grad_norm": 1.3455779552459717, |
| "learning_rate": 0.0005400495458298926, |
| "loss": 5.635, |
| "step": 3270 |
| }, |
| { |
| "epoch": 2.7067808855403035, |
| "grad_norm": 1.4485799074172974, |
| "learning_rate": 0.000541701073492981, |
| "loss": 5.6146, |
| "step": 3280 |
| }, |
| { |
| "epoch": 2.715037671586335, |
| "grad_norm": 2.0756478309631348, |
| "learning_rate": 0.0005433526011560694, |
| "loss": 5.6048, |
| "step": 3290 |
| }, |
| { |
| "epoch": 2.7232944576323668, |
| "grad_norm": 1.5905245542526245, |
| "learning_rate": 0.0005450041288191577, |
| "loss": 5.5905, |
| "step": 3300 |
| }, |
| { |
| "epoch": 2.731551243678398, |
| "grad_norm": 1.2772367000579834, |
| "learning_rate": 0.0005466556564822461, |
| "loss": 5.5564, |
| "step": 3310 |
| }, |
| { |
| "epoch": 2.7398080297244296, |
| "grad_norm": 1.7051196098327637, |
| "learning_rate": 0.0005483071841453344, |
| "loss": 5.5448, |
| "step": 3320 |
| }, |
| { |
| "epoch": 2.7480648157704612, |
| "grad_norm": 1.9072637557983398, |
| "learning_rate": 0.0005499587118084229, |
| "loss": 5.5277, |
| "step": 3330 |
| }, |
| { |
| "epoch": 2.756321601816493, |
| "grad_norm": 1.741525411605835, |
| "learning_rate": 0.0005516102394715111, |
| "loss": 5.5004, |
| "step": 3340 |
| }, |
| { |
| "epoch": 2.7645783878625245, |
| "grad_norm": 1.8459067344665527, |
| "learning_rate": 0.0005532617671345995, |
| "loss": 5.4817, |
| "step": 3350 |
| }, |
| { |
| "epoch": 2.772835173908556, |
| "grad_norm": 1.4545385837554932, |
| "learning_rate": 0.0005549132947976879, |
| "loss": 5.452, |
| "step": 3360 |
| }, |
| { |
| "epoch": 2.781091959954588, |
| "grad_norm": 1.9043197631835938, |
| "learning_rate": 0.0005565648224607762, |
| "loss": 5.4375, |
| "step": 3370 |
| }, |
| { |
| "epoch": 2.789348746000619, |
| "grad_norm": 1.4310576915740967, |
| "learning_rate": 0.0005582163501238646, |
| "loss": 5.4358, |
| "step": 3380 |
| }, |
| { |
| "epoch": 2.797605532046651, |
| "grad_norm": 2.0747084617614746, |
| "learning_rate": 0.000559867877786953, |
| "loss": 5.4096, |
| "step": 3390 |
| }, |
| { |
| "epoch": 2.8058623180926823, |
| "grad_norm": 1.9496272802352905, |
| "learning_rate": 0.0005615194054500413, |
| "loss": 5.4057, |
| "step": 3400 |
| }, |
| { |
| "epoch": 2.814119104138714, |
| "grad_norm": 1.805301547050476, |
| "learning_rate": 0.0005631709331131296, |
| "loss": 5.3882, |
| "step": 3410 |
| }, |
| { |
| "epoch": 2.8223758901847456, |
| "grad_norm": 1.5036094188690186, |
| "learning_rate": 0.000564822460776218, |
| "loss": 5.3605, |
| "step": 3420 |
| }, |
| { |
| "epoch": 2.830632676230777, |
| "grad_norm": 1.7904752492904663, |
| "learning_rate": 0.0005664739884393064, |
| "loss": 5.3395, |
| "step": 3430 |
| }, |
| { |
| "epoch": 2.838889462276809, |
| "grad_norm": 1.3491814136505127, |
| "learning_rate": 0.0005681255161023947, |
| "loss": 5.3045, |
| "step": 3440 |
| }, |
| { |
| "epoch": 2.8471462483228405, |
| "grad_norm": 1.4046028852462769, |
| "learning_rate": 0.0005697770437654831, |
| "loss": 5.2824, |
| "step": 3450 |
| }, |
| { |
| "epoch": 2.855403034368872, |
| "grad_norm": 1.8236334323883057, |
| "learning_rate": 0.0005714285714285714, |
| "loss": 5.2604, |
| "step": 3460 |
| }, |
| { |
| "epoch": 2.8636598204149033, |
| "grad_norm": 1.4702653884887695, |
| "learning_rate": 0.0005730800990916598, |
| "loss": 5.2299, |
| "step": 3470 |
| }, |
| { |
| "epoch": 2.871916606460935, |
| "grad_norm": 1.4314968585968018, |
| "learning_rate": 0.0005747316267547481, |
| "loss": 5.2004, |
| "step": 3480 |
| }, |
| { |
| "epoch": 2.8801733925069666, |
| "grad_norm": 2.0397439002990723, |
| "learning_rate": 0.0005763831544178365, |
| "loss": 5.1926, |
| "step": 3490 |
| }, |
| { |
| "epoch": 2.888430178552998, |
| "grad_norm": 1.335970163345337, |
| "learning_rate": 0.0005780346820809249, |
| "loss": 5.1421, |
| "step": 3500 |
| }, |
| { |
| "epoch": 2.89668696459903, |
| "grad_norm": 1.551160216331482, |
| "learning_rate": 0.0005796862097440132, |
| "loss": 5.1393, |
| "step": 3510 |
| }, |
| { |
| "epoch": 2.9049437506450615, |
| "grad_norm": 1.6374657154083252, |
| "learning_rate": 0.0005813377374071017, |
| "loss": 5.1064, |
| "step": 3520 |
| }, |
| { |
| "epoch": 2.913200536691093, |
| "grad_norm": 1.8045247793197632, |
| "learning_rate": 0.0005829892650701899, |
| "loss": 5.0742, |
| "step": 3530 |
| }, |
| { |
| "epoch": 2.9214573227371243, |
| "grad_norm": 1.6787662506103516, |
| "learning_rate": 0.0005846407927332783, |
| "loss": 5.0685, |
| "step": 3540 |
| }, |
| { |
| "epoch": 2.9297141087831564, |
| "grad_norm": 2.1116514205932617, |
| "learning_rate": 0.0005862923203963666, |
| "loss": 5.0505, |
| "step": 3550 |
| }, |
| { |
| "epoch": 2.9379708948291876, |
| "grad_norm": 1.637165904045105, |
| "learning_rate": 0.000587943848059455, |
| "loss": 5.0354, |
| "step": 3560 |
| }, |
| { |
| "epoch": 2.9462276808752192, |
| "grad_norm": 1.6193593740463257, |
| "learning_rate": 0.0005895953757225434, |
| "loss": 5.003, |
| "step": 3570 |
| }, |
| { |
| "epoch": 2.954484466921251, |
| "grad_norm": 1.4651744365692139, |
| "learning_rate": 0.0005912469033856317, |
| "loss": 4.9779, |
| "step": 3580 |
| }, |
| { |
| "epoch": 2.9627412529672825, |
| "grad_norm": 1.4064579010009766, |
| "learning_rate": 0.0005928984310487201, |
| "loss": 4.9413, |
| "step": 3590 |
| }, |
| { |
| "epoch": 2.970998039013314, |
| "grad_norm": 1.6661295890808105, |
| "learning_rate": 0.0005945499587118084, |
| "loss": 4.9438, |
| "step": 3600 |
| }, |
| { |
| "epoch": 2.979254825059346, |
| "grad_norm": 1.8025574684143066, |
| "learning_rate": 0.0005962014863748968, |
| "loss": 4.9145, |
| "step": 3610 |
| }, |
| { |
| "epoch": 2.9875116111053774, |
| "grad_norm": 2.135185718536377, |
| "learning_rate": 0.0005978530140379852, |
| "loss": 4.8906, |
| "step": 3620 |
| }, |
| { |
| "epoch": 2.9957683971514086, |
| "grad_norm": 1.839839220046997, |
| "learning_rate": 0.0005995045417010735, |
| "loss": 4.8848, |
| "step": 3630 |
| }, |
| { |
| "epoch": 3.0033027144184126, |
| "grad_norm": 1.8142980337142944, |
| "learning_rate": 0.0006011560693641619, |
| "loss": 4.4459, |
| "step": 3640 |
| }, |
| { |
| "epoch": 3.011559500464444, |
| "grad_norm": 1.6817476749420166, |
| "learning_rate": 0.0006028075970272501, |
| "loss": 4.8488, |
| "step": 3650 |
| }, |
| { |
| "epoch": 3.019816286510476, |
| "grad_norm": 1.583275318145752, |
| "learning_rate": 0.0006044591246903386, |
| "loss": 4.8085, |
| "step": 3660 |
| }, |
| { |
| "epoch": 3.0280730725565075, |
| "grad_norm": 1.5430843830108643, |
| "learning_rate": 0.0006061106523534269, |
| "loss": 4.7979, |
| "step": 3670 |
| }, |
| { |
| "epoch": 3.036329858602539, |
| "grad_norm": 1.5976840257644653, |
| "learning_rate": 0.0006077621800165153, |
| "loss": 4.7792, |
| "step": 3680 |
| }, |
| { |
| "epoch": 3.0445866446485708, |
| "grad_norm": 1.5955281257629395, |
| "learning_rate": 0.0006094137076796037, |
| "loss": 4.7536, |
| "step": 3690 |
| }, |
| { |
| "epoch": 3.052843430694602, |
| "grad_norm": 1.276328444480896, |
| "learning_rate": 0.000611065235342692, |
| "loss": 4.7247, |
| "step": 3700 |
| }, |
| { |
| "epoch": 3.0611002167406336, |
| "grad_norm": 1.3994117975234985, |
| "learning_rate": 0.0006127167630057804, |
| "loss": 4.726, |
| "step": 3710 |
| }, |
| { |
| "epoch": 3.0693570027866652, |
| "grad_norm": 1.333193063735962, |
| "learning_rate": 0.0006143682906688687, |
| "loss": 4.6887, |
| "step": 3720 |
| }, |
| { |
| "epoch": 3.077613788832697, |
| "grad_norm": 1.2973535060882568, |
| "learning_rate": 0.0006160198183319571, |
| "loss": 4.6584, |
| "step": 3730 |
| }, |
| { |
| "epoch": 3.0858705748787285, |
| "grad_norm": 1.3581771850585938, |
| "learning_rate": 0.0006176713459950454, |
| "loss": 4.6377, |
| "step": 3740 |
| }, |
| { |
| "epoch": 3.09412736092476, |
| "grad_norm": 1.2961112260818481, |
| "learning_rate": 0.0006193228736581337, |
| "loss": 4.6226, |
| "step": 3750 |
| }, |
| { |
| "epoch": 3.102384146970792, |
| "grad_norm": 1.2253503799438477, |
| "learning_rate": 0.0006209744013212222, |
| "loss": 4.6013, |
| "step": 3760 |
| }, |
| { |
| "epoch": 3.110640933016823, |
| "grad_norm": 1.4154301881790161, |
| "learning_rate": 0.0006226259289843105, |
| "loss": 4.5738, |
| "step": 3770 |
| }, |
| { |
| "epoch": 3.1188977190628546, |
| "grad_norm": 1.083807349205017, |
| "learning_rate": 0.0006242774566473988, |
| "loss": 4.5646, |
| "step": 3780 |
| }, |
| { |
| "epoch": 3.1271545051088863, |
| "grad_norm": 1.0674443244934082, |
| "learning_rate": 0.0006259289843104872, |
| "loss": 4.5446, |
| "step": 3790 |
| }, |
| { |
| "epoch": 3.135411291154918, |
| "grad_norm": 1.1654369831085205, |
| "learning_rate": 0.0006275805119735756, |
| "loss": 4.5398, |
| "step": 3800 |
| }, |
| { |
| "epoch": 3.1436680772009495, |
| "grad_norm": 1.1597775220870972, |
| "learning_rate": 0.0006292320396366639, |
| "loss": 4.5162, |
| "step": 3810 |
| }, |
| { |
| "epoch": 3.151924863246981, |
| "grad_norm": 1.133058786392212, |
| "learning_rate": 0.0006308835672997523, |
| "loss": 4.4877, |
| "step": 3820 |
| }, |
| { |
| "epoch": 3.160181649293013, |
| "grad_norm": 1.0915247201919556, |
| "learning_rate": 0.0006325350949628407, |
| "loss": 4.4808, |
| "step": 3830 |
| }, |
| { |
| "epoch": 3.1684384353390445, |
| "grad_norm": 1.137772560119629, |
| "learning_rate": 0.0006341866226259289, |
| "loss": 4.4562, |
| "step": 3840 |
| }, |
| { |
| "epoch": 3.1766952213850757, |
| "grad_norm": 1.1915597915649414, |
| "learning_rate": 0.0006358381502890173, |
| "loss": 4.4448, |
| "step": 3850 |
| }, |
| { |
| "epoch": 3.1849520074311073, |
| "grad_norm": 1.2536723613739014, |
| "learning_rate": 0.0006374896779521057, |
| "loss": 4.4146, |
| "step": 3860 |
| }, |
| { |
| "epoch": 3.193208793477139, |
| "grad_norm": 1.0724738836288452, |
| "learning_rate": 0.0006391412056151941, |
| "loss": 4.4128, |
| "step": 3870 |
| }, |
| { |
| "epoch": 3.2014655795231706, |
| "grad_norm": 1.0709177255630493, |
| "learning_rate": 0.0006407927332782824, |
| "loss": 4.3958, |
| "step": 3880 |
| }, |
| { |
| "epoch": 3.209722365569202, |
| "grad_norm": 1.0337883234024048, |
| "learning_rate": 0.0006424442609413708, |
| "loss": 4.379, |
| "step": 3890 |
| }, |
| { |
| "epoch": 3.217979151615234, |
| "grad_norm": 0.9574352502822876, |
| "learning_rate": 0.0006440957886044592, |
| "loss": 4.3577, |
| "step": 3900 |
| }, |
| { |
| "epoch": 3.2262359376612655, |
| "grad_norm": 0.9089698791503906, |
| "learning_rate": 0.0006457473162675474, |
| "loss": 4.3324, |
| "step": 3910 |
| }, |
| { |
| "epoch": 3.234492723707297, |
| "grad_norm": 1.1630803346633911, |
| "learning_rate": 0.0006473988439306359, |
| "loss": 4.3419, |
| "step": 3920 |
| }, |
| { |
| "epoch": 3.2427495097533283, |
| "grad_norm": 1.068848967552185, |
| "learning_rate": 0.0006490503715937242, |
| "loss": 4.3209, |
| "step": 3930 |
| }, |
| { |
| "epoch": 3.25100629579936, |
| "grad_norm": 0.9788937568664551, |
| "learning_rate": 0.0006507018992568125, |
| "loss": 4.2982, |
| "step": 3940 |
| }, |
| { |
| "epoch": 3.2592630818453916, |
| "grad_norm": 0.9281165599822998, |
| "learning_rate": 0.000652353426919901, |
| "loss": 4.2847, |
| "step": 3950 |
| }, |
| { |
| "epoch": 3.2675198678914232, |
| "grad_norm": 0.9365155696868896, |
| "learning_rate": 0.0006540049545829893, |
| "loss": 4.2756, |
| "step": 3960 |
| }, |
| { |
| "epoch": 3.275776653937455, |
| "grad_norm": 0.8847097158432007, |
| "learning_rate": 0.0006556564822460776, |
| "loss": 4.2646, |
| "step": 3970 |
| }, |
| { |
| "epoch": 3.2840334399834865, |
| "grad_norm": 0.8232343196868896, |
| "learning_rate": 0.0006573080099091659, |
| "loss": 4.2546, |
| "step": 3980 |
| }, |
| { |
| "epoch": 3.292290226029518, |
| "grad_norm": 0.870100736618042, |
| "learning_rate": 0.0006589595375722544, |
| "loss": 4.2119, |
| "step": 3990 |
| }, |
| { |
| "epoch": 3.3005470120755493, |
| "grad_norm": 0.8268401622772217, |
| "learning_rate": 0.0006606110652353427, |
| "loss": 4.2242, |
| "step": 4000 |
| }, |
| { |
| "epoch": 3.308803798121581, |
| "grad_norm": 0.9999098777770996, |
| "learning_rate": 0.000662262592898431, |
| "loss": 4.1925, |
| "step": 4010 |
| }, |
| { |
| "epoch": 3.3170605841676126, |
| "grad_norm": 0.8971749544143677, |
| "learning_rate": 0.0006639141205615195, |
| "loss": 4.1838, |
| "step": 4020 |
| }, |
| { |
| "epoch": 3.3253173702136443, |
| "grad_norm": 1.0182358026504517, |
| "learning_rate": 0.0006655656482246078, |
| "loss": 4.1679, |
| "step": 4030 |
| }, |
| { |
| "epoch": 3.333574156259676, |
| "grad_norm": 0.9021536707878113, |
| "learning_rate": 0.0006672171758876961, |
| "loss": 4.1573, |
| "step": 4040 |
| }, |
| { |
| "epoch": 3.3418309423057075, |
| "grad_norm": 0.8236122131347656, |
| "learning_rate": 0.0006688687035507845, |
| "loss": 4.1271, |
| "step": 4050 |
| }, |
| { |
| "epoch": 3.350087728351739, |
| "grad_norm": 0.9135034084320068, |
| "learning_rate": 0.0006705202312138729, |
| "loss": 4.1383, |
| "step": 4060 |
| }, |
| { |
| "epoch": 3.358344514397771, |
| "grad_norm": 0.9369881749153137, |
| "learning_rate": 0.0006721717588769612, |
| "loss": 4.1254, |
| "step": 4070 |
| }, |
| { |
| "epoch": 3.3666013004438025, |
| "grad_norm": 0.8571922183036804, |
| "learning_rate": 0.0006738232865400495, |
| "loss": 4.1155, |
| "step": 4080 |
| }, |
| { |
| "epoch": 3.3748580864898337, |
| "grad_norm": 0.7268726825714111, |
| "learning_rate": 0.000675474814203138, |
| "loss": 4.1042, |
| "step": 4090 |
| }, |
| { |
| "epoch": 3.3831148725358653, |
| "grad_norm": 0.7929525375366211, |
| "learning_rate": 0.0006771263418662262, |
| "loss": 4.0846, |
| "step": 4100 |
| }, |
| { |
| "epoch": 3.391371658581897, |
| "grad_norm": 0.9496756792068481, |
| "learning_rate": 0.0006787778695293146, |
| "loss": 4.0856, |
| "step": 4110 |
| }, |
| { |
| "epoch": 3.3996284446279286, |
| "grad_norm": 0.9352908730506897, |
| "learning_rate": 0.000680429397192403, |
| "loss": 4.0632, |
| "step": 4120 |
| }, |
| { |
| "epoch": 3.40788523067396, |
| "grad_norm": 0.7630198001861572, |
| "learning_rate": 0.0006820809248554913, |
| "loss": 4.0605, |
| "step": 4130 |
| }, |
| { |
| "epoch": 3.416142016719992, |
| "grad_norm": 0.8446700572967529, |
| "learning_rate": 0.0006837324525185797, |
| "loss": 4.0639, |
| "step": 4140 |
| }, |
| { |
| "epoch": 3.4243988027660235, |
| "grad_norm": 0.8067805767059326, |
| "learning_rate": 0.0006853839801816681, |
| "loss": 4.0482, |
| "step": 4150 |
| }, |
| { |
| "epoch": 3.4326555888120547, |
| "grad_norm": 0.9675975441932678, |
| "learning_rate": 0.0006870355078447564, |
| "loss": 4.0384, |
| "step": 4160 |
| }, |
| { |
| "epoch": 3.4409123748580863, |
| "grad_norm": 0.847297191619873, |
| "learning_rate": 0.0006886870355078447, |
| "loss": 4.0303, |
| "step": 4170 |
| }, |
| { |
| "epoch": 3.449169160904118, |
| "grad_norm": 0.8416860699653625, |
| "learning_rate": 0.0006903385631709331, |
| "loss": 4.0099, |
| "step": 4180 |
| }, |
| { |
| "epoch": 3.4574259469501496, |
| "grad_norm": 0.8956720232963562, |
| "learning_rate": 0.0006919900908340216, |
| "loss": 4.0159, |
| "step": 4190 |
| }, |
| { |
| "epoch": 3.4656827329961812, |
| "grad_norm": 0.8465309739112854, |
| "learning_rate": 0.0006936416184971098, |
| "loss": 3.997, |
| "step": 4200 |
| }, |
| { |
| "epoch": 3.473939519042213, |
| "grad_norm": 0.8289422988891602, |
| "learning_rate": 0.0006952931461601982, |
| "loss": 3.9781, |
| "step": 4210 |
| }, |
| { |
| "epoch": 3.4821963050882445, |
| "grad_norm": 0.8076528906822205, |
| "learning_rate": 0.0006969446738232866, |
| "loss": 3.9801, |
| "step": 4220 |
| }, |
| { |
| "epoch": 3.490453091134276, |
| "grad_norm": 0.8516877293586731, |
| "learning_rate": 0.0006985962014863749, |
| "loss": 3.9584, |
| "step": 4230 |
| }, |
| { |
| "epoch": 3.498709877180308, |
| "grad_norm": 0.8170517683029175, |
| "learning_rate": 0.0007002477291494632, |
| "loss": 3.9598, |
| "step": 4240 |
| }, |
| { |
| "epoch": 3.506966663226339, |
| "grad_norm": 0.8019408583641052, |
| "learning_rate": 0.0007018992568125517, |
| "loss": 3.947, |
| "step": 4250 |
| }, |
| { |
| "epoch": 3.5152234492723706, |
| "grad_norm": 0.8144872188568115, |
| "learning_rate": 0.00070355078447564, |
| "loss": 3.9539, |
| "step": 4260 |
| }, |
| { |
| "epoch": 3.5234802353184023, |
| "grad_norm": 0.7871933579444885, |
| "learning_rate": 0.0007052023121387283, |
| "loss": 3.9532, |
| "step": 4270 |
| }, |
| { |
| "epoch": 3.531737021364434, |
| "grad_norm": 0.8461719751358032, |
| "learning_rate": 0.0007068538398018168, |
| "loss": 3.9195, |
| "step": 4280 |
| }, |
| { |
| "epoch": 3.5399938074104655, |
| "grad_norm": 0.8719236850738525, |
| "learning_rate": 0.000708505367464905, |
| "loss": 3.9151, |
| "step": 4290 |
| }, |
| { |
| "epoch": 3.548250593456497, |
| "grad_norm": 0.8670084476470947, |
| "learning_rate": 0.0007101568951279934, |
| "loss": 3.9101, |
| "step": 4300 |
| }, |
| { |
| "epoch": 3.556507379502529, |
| "grad_norm": 0.7470918893814087, |
| "learning_rate": 0.0007118084227910817, |
| "loss": 3.9066, |
| "step": 4310 |
| }, |
| { |
| "epoch": 3.56476416554856, |
| "grad_norm": 0.7734981775283813, |
| "learning_rate": 0.0007134599504541701, |
| "loss": 3.9026, |
| "step": 4320 |
| }, |
| { |
| "epoch": 3.5730209515945917, |
| "grad_norm": 0.8196832537651062, |
| "learning_rate": 0.0007151114781172585, |
| "loss": 3.8973, |
| "step": 4330 |
| }, |
| { |
| "epoch": 3.5812777376406233, |
| "grad_norm": 0.796661376953125, |
| "learning_rate": 0.0007167630057803468, |
| "loss": 3.8841, |
| "step": 4340 |
| }, |
| { |
| "epoch": 3.589534523686655, |
| "grad_norm": 0.7249051928520203, |
| "learning_rate": 0.0007184145334434353, |
| "loss": 3.8769, |
| "step": 4350 |
| }, |
| { |
| "epoch": 3.5977913097326866, |
| "grad_norm": 0.8851024508476257, |
| "learning_rate": 0.0007200660611065235, |
| "loss": 3.8695, |
| "step": 4360 |
| }, |
| { |
| "epoch": 3.606048095778718, |
| "grad_norm": 0.8773587942123413, |
| "learning_rate": 0.0007217175887696119, |
| "loss": 3.8665, |
| "step": 4370 |
| }, |
| { |
| "epoch": 3.61430488182475, |
| "grad_norm": 0.7050719857215881, |
| "learning_rate": 0.0007233691164327004, |
| "loss": 3.8632, |
| "step": 4380 |
| }, |
| { |
| "epoch": 3.622561667870781, |
| "grad_norm": 0.8079128861427307, |
| "learning_rate": 0.0007250206440957886, |
| "loss": 3.8456, |
| "step": 4390 |
| }, |
| { |
| "epoch": 3.630818453916813, |
| "grad_norm": 0.8955399394035339, |
| "learning_rate": 0.000726672171758877, |
| "loss": 3.8456, |
| "step": 4400 |
| }, |
| { |
| "epoch": 3.6390752399628443, |
| "grad_norm": 0.8179429769515991, |
| "learning_rate": 0.0007283236994219653, |
| "loss": 3.8317, |
| "step": 4410 |
| }, |
| { |
| "epoch": 3.647332026008876, |
| "grad_norm": 0.8095247149467468, |
| "learning_rate": 0.0007299752270850537, |
| "loss": 3.8363, |
| "step": 4420 |
| }, |
| { |
| "epoch": 3.6555888120549076, |
| "grad_norm": 0.7325819730758667, |
| "learning_rate": 0.000731626754748142, |
| "loss": 3.8162, |
| "step": 4430 |
| }, |
| { |
| "epoch": 3.6638455981009392, |
| "grad_norm": 0.8401527404785156, |
| "learning_rate": 0.0007332782824112304, |
| "loss": 3.8204, |
| "step": 4440 |
| }, |
| { |
| "epoch": 3.672102384146971, |
| "grad_norm": 0.9044252634048462, |
| "learning_rate": 0.0007349298100743188, |
| "loss": 3.8106, |
| "step": 4450 |
| }, |
| { |
| "epoch": 3.6803591701930025, |
| "grad_norm": 0.8086848258972168, |
| "learning_rate": 0.0007365813377374071, |
| "loss": 3.7821, |
| "step": 4460 |
| }, |
| { |
| "epoch": 3.688615956239034, |
| "grad_norm": 0.718523383140564, |
| "learning_rate": 0.0007382328654004955, |
| "loss": 3.7964, |
| "step": 4470 |
| }, |
| { |
| "epoch": 3.6968727422850653, |
| "grad_norm": 0.84502112865448, |
| "learning_rate": 0.0007398843930635837, |
| "loss": 3.7868, |
| "step": 4480 |
| }, |
| { |
| "epoch": 3.705129528331097, |
| "grad_norm": 0.8375003337860107, |
| "learning_rate": 0.0007415359207266722, |
| "loss": 3.7848, |
| "step": 4490 |
| }, |
| { |
| "epoch": 3.7133863143771286, |
| "grad_norm": 0.8933425545692444, |
| "learning_rate": 0.0007431874483897605, |
| "loss": 3.776, |
| "step": 4500 |
| }, |
| { |
| "epoch": 3.7216431004231603, |
| "grad_norm": 0.7778623104095459, |
| "learning_rate": 0.0007448389760528488, |
| "loss": 3.7683, |
| "step": 4510 |
| }, |
| { |
| "epoch": 3.729899886469192, |
| "grad_norm": 0.704544186592102, |
| "learning_rate": 0.0007464905037159373, |
| "loss": 3.7699, |
| "step": 4520 |
| }, |
| { |
| "epoch": 3.7381566725152235, |
| "grad_norm": 0.8176696300506592, |
| "learning_rate": 0.0007481420313790256, |
| "loss": 3.7589, |
| "step": 4530 |
| }, |
| { |
| "epoch": 3.746413458561255, |
| "grad_norm": 0.8019874095916748, |
| "learning_rate": 0.000749793559042114, |
| "loss": 3.7464, |
| "step": 4540 |
| }, |
| { |
| "epoch": 3.7546702446072864, |
| "grad_norm": 0.8178896307945251, |
| "learning_rate": 0.0007514450867052023, |
| "loss": 3.742, |
| "step": 4550 |
| }, |
| { |
| "epoch": 3.7629270306533185, |
| "grad_norm": 0.7082737684249878, |
| "learning_rate": 0.0007530966143682907, |
| "loss": 3.7345, |
| "step": 4560 |
| }, |
| { |
| "epoch": 3.7711838166993497, |
| "grad_norm": 0.7834277749061584, |
| "learning_rate": 0.0007547481420313791, |
| "loss": 3.7127, |
| "step": 4570 |
| }, |
| { |
| "epoch": 3.7794406027453813, |
| "grad_norm": 0.7585816383361816, |
| "learning_rate": 0.0007563996696944674, |
| "loss": 3.7122, |
| "step": 4580 |
| }, |
| { |
| "epoch": 3.787697388791413, |
| "grad_norm": 0.7101882696151733, |
| "learning_rate": 0.0007580511973575558, |
| "loss": 3.7167, |
| "step": 4590 |
| }, |
| { |
| "epoch": 3.7959541748374446, |
| "grad_norm": 0.7938413619995117, |
| "learning_rate": 0.0007597027250206441, |
| "loss": 3.7045, |
| "step": 4600 |
| }, |
| { |
| "epoch": 3.804210960883476, |
| "grad_norm": 0.694128155708313, |
| "learning_rate": 0.0007613542526837324, |
| "loss": 3.6961, |
| "step": 4610 |
| }, |
| { |
| "epoch": 3.812467746929508, |
| "grad_norm": 0.7648592591285706, |
| "learning_rate": 0.0007630057803468208, |
| "loss": 3.7112, |
| "step": 4620 |
| }, |
| { |
| "epoch": 3.8207245329755395, |
| "grad_norm": 0.7412601709365845, |
| "learning_rate": 0.0007646573080099092, |
| "loss": 3.6992, |
| "step": 4630 |
| }, |
| { |
| "epoch": 3.8289813190215707, |
| "grad_norm": 0.8250954747200012, |
| "learning_rate": 0.0007663088356729975, |
| "loss": 3.6947, |
| "step": 4640 |
| }, |
| { |
| "epoch": 3.8372381050676023, |
| "grad_norm": 1.020845890045166, |
| "learning_rate": 0.0007679603633360859, |
| "loss": 3.6912, |
| "step": 4650 |
| }, |
| { |
| "epoch": 3.845494891113634, |
| "grad_norm": 0.714709997177124, |
| "learning_rate": 0.0007696118909991743, |
| "loss": 3.6897, |
| "step": 4660 |
| }, |
| { |
| "epoch": 3.8537516771596656, |
| "grad_norm": 0.8402379155158997, |
| "learning_rate": 0.0007712634186622625, |
| "loss": 3.6728, |
| "step": 4670 |
| }, |
| { |
| "epoch": 3.8620084632056972, |
| "grad_norm": 0.8631258010864258, |
| "learning_rate": 0.000772914946325351, |
| "loss": 3.6656, |
| "step": 4680 |
| }, |
| { |
| "epoch": 3.870265249251729, |
| "grad_norm": 0.7027178406715393, |
| "learning_rate": 0.0007745664739884393, |
| "loss": 3.6668, |
| "step": 4690 |
| }, |
| { |
| "epoch": 3.8785220352977605, |
| "grad_norm": 0.8082193732261658, |
| "learning_rate": 0.0007762180016515277, |
| "loss": 3.6604, |
| "step": 4700 |
| }, |
| { |
| "epoch": 3.8867788213437917, |
| "grad_norm": 0.7741190791130066, |
| "learning_rate": 0.000777869529314616, |
| "loss": 3.6598, |
| "step": 4710 |
| }, |
| { |
| "epoch": 3.8950356073898234, |
| "grad_norm": 0.8848899006843567, |
| "learning_rate": 0.0007795210569777044, |
| "loss": 3.6496, |
| "step": 4720 |
| }, |
| { |
| "epoch": 3.903292393435855, |
| "grad_norm": 0.8357560634613037, |
| "learning_rate": 0.0007811725846407928, |
| "loss": 3.6361, |
| "step": 4730 |
| }, |
| { |
| "epoch": 3.9115491794818866, |
| "grad_norm": 0.7973350286483765, |
| "learning_rate": 0.000782824112303881, |
| "loss": 3.6356, |
| "step": 4740 |
| }, |
| { |
| "epoch": 3.9198059655279183, |
| "grad_norm": 0.8782063126564026, |
| "learning_rate": 0.0007844756399669695, |
| "loss": 3.6421, |
| "step": 4750 |
| }, |
| { |
| "epoch": 3.92806275157395, |
| "grad_norm": 0.7487813234329224, |
| "learning_rate": 0.0007861271676300579, |
| "loss": 3.6245, |
| "step": 4760 |
| }, |
| { |
| "epoch": 3.9363195376199815, |
| "grad_norm": 0.766007125377655, |
| "learning_rate": 0.0007877786952931461, |
| "loss": 3.6219, |
| "step": 4770 |
| }, |
| { |
| "epoch": 3.9445763236660127, |
| "grad_norm": 0.7262325882911682, |
| "learning_rate": 0.0007894302229562346, |
| "loss": 3.6152, |
| "step": 4780 |
| }, |
| { |
| "epoch": 3.952833109712045, |
| "grad_norm": 0.837656557559967, |
| "learning_rate": 0.0007910817506193229, |
| "loss": 3.6129, |
| "step": 4790 |
| }, |
| { |
| "epoch": 3.961089895758076, |
| "grad_norm": 0.7486396431922913, |
| "learning_rate": 0.0007927332782824112, |
| "loss": 3.6017, |
| "step": 4800 |
| }, |
| { |
| "epoch": 3.9693466818041077, |
| "grad_norm": 0.7907805442810059, |
| "learning_rate": 0.0007943848059454995, |
| "loss": 3.5954, |
| "step": 4810 |
| }, |
| { |
| "epoch": 3.9776034678501393, |
| "grad_norm": 0.8688389658927917, |
| "learning_rate": 0.000796036333608588, |
| "loss": 3.5994, |
| "step": 4820 |
| }, |
| { |
| "epoch": 3.985860253896171, |
| "grad_norm": 0.8377218842506409, |
| "learning_rate": 0.0007976878612716763, |
| "loss": 3.5908, |
| "step": 4830 |
| }, |
| { |
| "epoch": 3.9941170399422026, |
| "grad_norm": 0.7856019139289856, |
| "learning_rate": 0.0007993393889347646, |
| "loss": 3.5889, |
| "step": 4840 |
| }, |
| { |
| "epoch": 4.001651357209206, |
| "grad_norm": 0.7975202202796936, |
| "learning_rate": 0.0008009909165978531, |
| "loss": 3.2828, |
| "step": 4850 |
| }, |
| { |
| "epoch": 4.009908143255238, |
| "grad_norm": 0.8581557869911194, |
| "learning_rate": 0.0008026424442609413, |
| "loss": 3.5907, |
| "step": 4860 |
| }, |
| { |
| "epoch": 4.018164929301269, |
| "grad_norm": 0.8292841911315918, |
| "learning_rate": 0.0008042939719240297, |
| "loss": 3.5775, |
| "step": 4870 |
| }, |
| { |
| "epoch": 4.026421715347301, |
| "grad_norm": 0.7097908854484558, |
| "learning_rate": 0.0008059454995871181, |
| "loss": 3.5703, |
| "step": 4880 |
| }, |
| { |
| "epoch": 4.034678501393333, |
| "grad_norm": 0.8221850395202637, |
| "learning_rate": 0.0008075970272502065, |
| "loss": 3.5639, |
| "step": 4890 |
| }, |
| { |
| "epoch": 4.042935287439364, |
| "grad_norm": 0.7759121656417847, |
| "learning_rate": 0.0008092485549132948, |
| "loss": 3.5577, |
| "step": 4900 |
| }, |
| { |
| "epoch": 4.051192073485396, |
| "grad_norm": 0.8487065434455872, |
| "learning_rate": 0.0008109000825763832, |
| "loss": 3.5502, |
| "step": 4910 |
| }, |
| { |
| "epoch": 4.059448859531427, |
| "grad_norm": 0.7345426082611084, |
| "learning_rate": 0.0008125516102394716, |
| "loss": 3.5432, |
| "step": 4920 |
| }, |
| { |
| "epoch": 4.067705645577459, |
| "grad_norm": 0.738944411277771, |
| "learning_rate": 0.0008142031379025598, |
| "loss": 3.5464, |
| "step": 4930 |
| }, |
| { |
| "epoch": 4.07596243162349, |
| "grad_norm": 0.8091252446174622, |
| "learning_rate": 0.0008158546655656482, |
| "loss": 3.5477, |
| "step": 4940 |
| }, |
| { |
| "epoch": 4.0842192176695225, |
| "grad_norm": 0.7931963801383972, |
| "learning_rate": 0.0008175061932287367, |
| "loss": 3.5353, |
| "step": 4950 |
| }, |
| { |
| "epoch": 4.092476003715554, |
| "grad_norm": 0.886758029460907, |
| "learning_rate": 0.0008191577208918249, |
| "loss": 3.5303, |
| "step": 4960 |
| }, |
| { |
| "epoch": 4.100732789761586, |
| "grad_norm": 0.7010697722434998, |
| "learning_rate": 0.0008208092485549133, |
| "loss": 3.5222, |
| "step": 4970 |
| }, |
| { |
| "epoch": 4.108989575807617, |
| "grad_norm": 0.8633137941360474, |
| "learning_rate": 0.0008224607762180017, |
| "loss": 3.5354, |
| "step": 4980 |
| }, |
| { |
| "epoch": 4.117246361853648, |
| "grad_norm": 0.8236711025238037, |
| "learning_rate": 0.00082411230388109, |
| "loss": 3.5233, |
| "step": 4990 |
| }, |
| { |
| "epoch": 4.12550314789968, |
| "grad_norm": 0.7535457015037537, |
| "learning_rate": 0.0008257638315441783, |
| "loss": 3.5156, |
| "step": 5000 |
| }, |
| { |
| "epoch": 4.133759933945711, |
| "grad_norm": 0.7200325727462769, |
| "learning_rate": 0.0008274153592072668, |
| "loss": 3.5292, |
| "step": 5010 |
| }, |
| { |
| "epoch": 4.1420167199917435, |
| "grad_norm": 0.6595053672790527, |
| "learning_rate": 0.0008290668868703551, |
| "loss": 3.5097, |
| "step": 5020 |
| }, |
| { |
| "epoch": 4.150273506037775, |
| "grad_norm": 0.8142825961112976, |
| "learning_rate": 0.0008307184145334434, |
| "loss": 3.5125, |
| "step": 5030 |
| }, |
| { |
| "epoch": 4.158530292083807, |
| "grad_norm": 0.8555150628089905, |
| "learning_rate": 0.0008323699421965319, |
| "loss": 3.5013, |
| "step": 5040 |
| }, |
| { |
| "epoch": 4.166787078129838, |
| "grad_norm": 0.9074802994728088, |
| "learning_rate": 0.0008340214698596202, |
| "loss": 3.5021, |
| "step": 5050 |
| }, |
| { |
| "epoch": 4.175043864175869, |
| "grad_norm": 0.9074805974960327, |
| "learning_rate": 0.0008356729975227085, |
| "loss": 3.5052, |
| "step": 5060 |
| }, |
| { |
| "epoch": 4.183300650221901, |
| "grad_norm": 0.7932230234146118, |
| "learning_rate": 0.0008373245251857968, |
| "loss": 3.5048, |
| "step": 5070 |
| }, |
| { |
| "epoch": 4.191557436267932, |
| "grad_norm": 0.8132278323173523, |
| "learning_rate": 0.0008389760528488853, |
| "loss": 3.4994, |
| "step": 5080 |
| }, |
| { |
| "epoch": 4.1998142223139645, |
| "grad_norm": 0.7839681506156921, |
| "learning_rate": 0.0008406275805119736, |
| "loss": 3.4905, |
| "step": 5090 |
| }, |
| { |
| "epoch": 4.208071008359996, |
| "grad_norm": 0.7674237489700317, |
| "learning_rate": 0.0008422791081750619, |
| "loss": 3.4764, |
| "step": 5100 |
| }, |
| { |
| "epoch": 4.216327794406028, |
| "grad_norm": 0.945733904838562, |
| "learning_rate": 0.0008439306358381504, |
| "loss": 3.48, |
| "step": 5110 |
| }, |
| { |
| "epoch": 4.224584580452059, |
| "grad_norm": 0.8186456561088562, |
| "learning_rate": 0.0008455821635012386, |
| "loss": 3.4886, |
| "step": 5120 |
| }, |
| { |
| "epoch": 4.232841366498091, |
| "grad_norm": 0.7169471383094788, |
| "learning_rate": 0.000847233691164327, |
| "loss": 3.4863, |
| "step": 5130 |
| }, |
| { |
| "epoch": 4.241098152544122, |
| "grad_norm": 0.8962691426277161, |
| "learning_rate": 0.0008488852188274155, |
| "loss": 3.478, |
| "step": 5140 |
| }, |
| { |
| "epoch": 4.2493549385901535, |
| "grad_norm": 0.7380357980728149, |
| "learning_rate": 0.0008505367464905037, |
| "loss": 3.4759, |
| "step": 5150 |
| }, |
| { |
| "epoch": 4.2576117246361855, |
| "grad_norm": 0.7585932612419128, |
| "learning_rate": 0.0008521882741535921, |
| "loss": 3.4725, |
| "step": 5160 |
| }, |
| { |
| "epoch": 4.265868510682217, |
| "grad_norm": 0.8082647919654846, |
| "learning_rate": 0.0008538398018166804, |
| "loss": 3.4546, |
| "step": 5170 |
| }, |
| { |
| "epoch": 4.274125296728249, |
| "grad_norm": 0.8778128027915955, |
| "learning_rate": 0.0008554913294797688, |
| "loss": 3.4585, |
| "step": 5180 |
| }, |
| { |
| "epoch": 4.28238208277428, |
| "grad_norm": 0.7410449981689453, |
| "learning_rate": 0.0008571428571428571, |
| "loss": 3.4558, |
| "step": 5190 |
| }, |
| { |
| "epoch": 4.290638868820312, |
| "grad_norm": 0.9528789520263672, |
| "learning_rate": 0.0008587943848059455, |
| "loss": 3.4529, |
| "step": 5200 |
| }, |
| { |
| "epoch": 4.298895654866343, |
| "grad_norm": 0.8182398080825806, |
| "learning_rate": 0.000860445912469034, |
| "loss": 3.454, |
| "step": 5210 |
| }, |
| { |
| "epoch": 4.3071524409123745, |
| "grad_norm": 0.8230072855949402, |
| "learning_rate": 0.0008620974401321222, |
| "loss": 3.4561, |
| "step": 5220 |
| }, |
| { |
| "epoch": 4.315409226958407, |
| "grad_norm": 0.8617272973060608, |
| "learning_rate": 0.0008637489677952106, |
| "loss": 3.4471, |
| "step": 5230 |
| }, |
| { |
| "epoch": 4.323666013004438, |
| "grad_norm": 0.7993802428245544, |
| "learning_rate": 0.000865400495458299, |
| "loss": 3.4389, |
| "step": 5240 |
| }, |
| { |
| "epoch": 4.33192279905047, |
| "grad_norm": 0.9033696055412292, |
| "learning_rate": 0.0008670520231213873, |
| "loss": 3.4565, |
| "step": 5250 |
| }, |
| { |
| "epoch": 4.340179585096501, |
| "grad_norm": 0.7320334911346436, |
| "learning_rate": 0.0008687035507844756, |
| "loss": 3.448, |
| "step": 5260 |
| }, |
| { |
| "epoch": 4.348436371142533, |
| "grad_norm": 0.7799825072288513, |
| "learning_rate": 0.000870355078447564, |
| "loss": 3.4408, |
| "step": 5270 |
| }, |
| { |
| "epoch": 4.356693157188564, |
| "grad_norm": 0.7929351329803467, |
| "learning_rate": 0.0008720066061106524, |
| "loss": 3.4409, |
| "step": 5280 |
| }, |
| { |
| "epoch": 4.3649499432345955, |
| "grad_norm": 0.821667492389679, |
| "learning_rate": 0.0008736581337737407, |
| "loss": 3.4242, |
| "step": 5290 |
| }, |
| { |
| "epoch": 4.373206729280628, |
| "grad_norm": 0.7827187180519104, |
| "learning_rate": 0.0008753096614368291, |
| "loss": 3.4301, |
| "step": 5300 |
| }, |
| { |
| "epoch": 4.381463515326659, |
| "grad_norm": 0.7317821383476257, |
| "learning_rate": 0.0008769611890999174, |
| "loss": 3.4305, |
| "step": 5310 |
| }, |
| { |
| "epoch": 4.389720301372691, |
| "grad_norm": 0.7912768125534058, |
| "learning_rate": 0.0008786127167630058, |
| "loss": 3.4159, |
| "step": 5320 |
| }, |
| { |
| "epoch": 4.397977087418722, |
| "grad_norm": 0.8757966756820679, |
| "learning_rate": 0.0008802642444260942, |
| "loss": 3.4325, |
| "step": 5330 |
| }, |
| { |
| "epoch": 4.406233873464754, |
| "grad_norm": 0.7433684468269348, |
| "learning_rate": 0.0008819157720891825, |
| "loss": 3.4247, |
| "step": 5340 |
| }, |
| { |
| "epoch": 4.414490659510785, |
| "grad_norm": 0.8170937895774841, |
| "learning_rate": 0.0008835672997522709, |
| "loss": 3.422, |
| "step": 5350 |
| }, |
| { |
| "epoch": 4.422747445556817, |
| "grad_norm": 0.7130184769630432, |
| "learning_rate": 0.0008852188274153592, |
| "loss": 3.4137, |
| "step": 5360 |
| }, |
| { |
| "epoch": 4.431004231602849, |
| "grad_norm": 0.8633317947387695, |
| "learning_rate": 0.0008868703550784475, |
| "loss": 3.4101, |
| "step": 5370 |
| }, |
| { |
| "epoch": 4.43926101764888, |
| "grad_norm": 0.6866912841796875, |
| "learning_rate": 0.0008885218827415359, |
| "loss": 3.3982, |
| "step": 5380 |
| }, |
| { |
| "epoch": 4.447517803694912, |
| "grad_norm": 0.7277703285217285, |
| "learning_rate": 0.0008901734104046243, |
| "loss": 3.4016, |
| "step": 5390 |
| }, |
| { |
| "epoch": 4.455774589740943, |
| "grad_norm": 0.7942615151405334, |
| "learning_rate": 0.0008918249380677127, |
| "loss": 3.3961, |
| "step": 5400 |
| }, |
| { |
| "epoch": 4.464031375786975, |
| "grad_norm": 0.9028828144073486, |
| "learning_rate": 0.000893476465730801, |
| "loss": 3.3883, |
| "step": 5410 |
| }, |
| { |
| "epoch": 4.472288161833006, |
| "grad_norm": 0.6972488760948181, |
| "learning_rate": 0.0008951279933938894, |
| "loss": 3.3828, |
| "step": 5420 |
| }, |
| { |
| "epoch": 4.4805449478790385, |
| "grad_norm": 0.746987521648407, |
| "learning_rate": 0.0008967795210569777, |
| "loss": 3.4034, |
| "step": 5430 |
| }, |
| { |
| "epoch": 4.48880173392507, |
| "grad_norm": 0.8450544476509094, |
| "learning_rate": 0.0008984310487200661, |
| "loss": 3.3972, |
| "step": 5440 |
| }, |
| { |
| "epoch": 4.497058519971101, |
| "grad_norm": 0.7922062873840332, |
| "learning_rate": 0.0009000825763831544, |
| "loss": 3.3915, |
| "step": 5450 |
| }, |
| { |
| "epoch": 4.505315306017133, |
| "grad_norm": 0.6909300088882446, |
| "learning_rate": 0.0009017341040462428, |
| "loss": 3.3797, |
| "step": 5460 |
| }, |
| { |
| "epoch": 4.513572092063164, |
| "grad_norm": 0.7204782366752625, |
| "learning_rate": 0.0009033856317093312, |
| "loss": 3.3781, |
| "step": 5470 |
| }, |
| { |
| "epoch": 4.521828878109196, |
| "grad_norm": 0.8381190299987793, |
| "learning_rate": 0.0009050371593724195, |
| "loss": 3.37, |
| "step": 5480 |
| }, |
| { |
| "epoch": 4.530085664155227, |
| "grad_norm": 0.8983927965164185, |
| "learning_rate": 0.0009066886870355079, |
| "loss": 3.3779, |
| "step": 5490 |
| }, |
| { |
| "epoch": 4.5383424502012595, |
| "grad_norm": 0.7274337410926819, |
| "learning_rate": 0.0009083402146985961, |
| "loss": 3.3777, |
| "step": 5500 |
| }, |
| { |
| "epoch": 4.546599236247291, |
| "grad_norm": 0.7718445062637329, |
| "learning_rate": 0.0009099917423616846, |
| "loss": 3.3728, |
| "step": 5510 |
| }, |
| { |
| "epoch": 4.554856022293322, |
| "grad_norm": 0.7668145298957825, |
| "learning_rate": 0.000911643270024773, |
| "loss": 3.3756, |
| "step": 5520 |
| }, |
| { |
| "epoch": 4.563112808339354, |
| "grad_norm": 0.7342451810836792, |
| "learning_rate": 0.0009132947976878612, |
| "loss": 3.3628, |
| "step": 5530 |
| }, |
| { |
| "epoch": 4.571369594385385, |
| "grad_norm": 0.6988268494606018, |
| "learning_rate": 0.0009149463253509497, |
| "loss": 3.3683, |
| "step": 5540 |
| }, |
| { |
| "epoch": 4.579626380431417, |
| "grad_norm": 0.7202860116958618, |
| "learning_rate": 0.000916597853014038, |
| "loss": 3.3681, |
| "step": 5550 |
| }, |
| { |
| "epoch": 4.587883166477448, |
| "grad_norm": 0.8209216594696045, |
| "learning_rate": 0.0009182493806771264, |
| "loss": 3.3652, |
| "step": 5560 |
| }, |
| { |
| "epoch": 4.5961399525234805, |
| "grad_norm": 0.7195369601249695, |
| "learning_rate": 0.0009199009083402146, |
| "loss": 3.3596, |
| "step": 5570 |
| }, |
| { |
| "epoch": 4.604396738569512, |
| "grad_norm": 0.706985354423523, |
| "learning_rate": 0.0009215524360033031, |
| "loss": 3.357, |
| "step": 5580 |
| }, |
| { |
| "epoch": 4.612653524615544, |
| "grad_norm": 0.8509654402732849, |
| "learning_rate": 0.0009232039636663915, |
| "loss": 3.3697, |
| "step": 5590 |
| }, |
| { |
| "epoch": 4.620910310661575, |
| "grad_norm": 0.867950439453125, |
| "learning_rate": 0.0009248554913294797, |
| "loss": 3.3569, |
| "step": 5600 |
| }, |
| { |
| "epoch": 4.629167096707606, |
| "grad_norm": 0.7825170159339905, |
| "learning_rate": 0.0009265070189925682, |
| "loss": 3.3529, |
| "step": 5610 |
| }, |
| { |
| "epoch": 4.637423882753638, |
| "grad_norm": 0.7278405427932739, |
| "learning_rate": 0.0009281585466556565, |
| "loss": 3.3587, |
| "step": 5620 |
| }, |
| { |
| "epoch": 4.6456806687996695, |
| "grad_norm": 0.7527414560317993, |
| "learning_rate": 0.0009298100743187448, |
| "loss": 3.3555, |
| "step": 5630 |
| }, |
| { |
| "epoch": 4.6539374548457015, |
| "grad_norm": 0.6936579346656799, |
| "learning_rate": 0.0009314616019818332, |
| "loss": 3.3483, |
| "step": 5640 |
| }, |
| { |
| "epoch": 4.662194240891733, |
| "grad_norm": 0.7889197468757629, |
| "learning_rate": 0.0009331131296449216, |
| "loss": 3.3545, |
| "step": 5650 |
| }, |
| { |
| "epoch": 4.670451026937765, |
| "grad_norm": 0.7300989627838135, |
| "learning_rate": 0.0009347646573080099, |
| "loss": 3.3435, |
| "step": 5660 |
| }, |
| { |
| "epoch": 4.678707812983796, |
| "grad_norm": 0.7153423428535461, |
| "learning_rate": 0.0009364161849710983, |
| "loss": 3.3284, |
| "step": 5670 |
| }, |
| { |
| "epoch": 4.686964599029828, |
| "grad_norm": 0.716394305229187, |
| "learning_rate": 0.0009380677126341867, |
| "loss": 3.3285, |
| "step": 5680 |
| }, |
| { |
| "epoch": 4.695221385075859, |
| "grad_norm": 0.748479425907135, |
| "learning_rate": 0.0009397192402972749, |
| "loss": 3.3384, |
| "step": 5690 |
| }, |
| { |
| "epoch": 4.7034781711218905, |
| "grad_norm": 0.7145617604255676, |
| "learning_rate": 0.0009413707679603633, |
| "loss": 3.3297, |
| "step": 5700 |
| }, |
| { |
| "epoch": 4.711734957167923, |
| "grad_norm": 0.7331937551498413, |
| "learning_rate": 0.0009430222956234518, |
| "loss": 3.329, |
| "step": 5710 |
| }, |
| { |
| "epoch": 4.719991743213954, |
| "grad_norm": 0.8153555393218994, |
| "learning_rate": 0.0009446738232865401, |
| "loss": 3.326, |
| "step": 5720 |
| }, |
| { |
| "epoch": 4.728248529259986, |
| "grad_norm": 0.803225576877594, |
| "learning_rate": 0.0009463253509496284, |
| "loss": 3.3375, |
| "step": 5730 |
| }, |
| { |
| "epoch": 4.736505315306017, |
| "grad_norm": 0.7512196898460388, |
| "learning_rate": 0.0009479768786127168, |
| "loss": 3.3291, |
| "step": 5740 |
| }, |
| { |
| "epoch": 4.744762101352048, |
| "grad_norm": 0.7939392328262329, |
| "learning_rate": 0.0009496284062758052, |
| "loss": 3.3263, |
| "step": 5750 |
| }, |
| { |
| "epoch": 4.75301888739808, |
| "grad_norm": 0.6965025663375854, |
| "learning_rate": 0.0009512799339388934, |
| "loss": 3.3139, |
| "step": 5760 |
| }, |
| { |
| "epoch": 4.7612756734441115, |
| "grad_norm": 0.7877525687217712, |
| "learning_rate": 0.0009529314616019819, |
| "loss": 3.3244, |
| "step": 5770 |
| }, |
| { |
| "epoch": 4.769532459490144, |
| "grad_norm": 0.7251325249671936, |
| "learning_rate": 0.0009545829892650703, |
| "loss": 3.3212, |
| "step": 5780 |
| }, |
| { |
| "epoch": 4.777789245536175, |
| "grad_norm": 0.7695476412773132, |
| "learning_rate": 0.0009562345169281585, |
| "loss": 3.3155, |
| "step": 5790 |
| }, |
| { |
| "epoch": 4.786046031582207, |
| "grad_norm": 0.7189447283744812, |
| "learning_rate": 0.000957886044591247, |
| "loss": 3.3115, |
| "step": 5800 |
| }, |
| { |
| "epoch": 4.794302817628238, |
| "grad_norm": 0.762616753578186, |
| "learning_rate": 0.0009595375722543353, |
| "loss": 3.3193, |
| "step": 5810 |
| }, |
| { |
| "epoch": 4.80255960367427, |
| "grad_norm": 0.7273391485214233, |
| "learning_rate": 0.0009611890999174236, |
| "loss": 3.2929, |
| "step": 5820 |
| }, |
| { |
| "epoch": 4.810816389720301, |
| "grad_norm": 0.7077900171279907, |
| "learning_rate": 0.0009628406275805119, |
| "loss": 3.2998, |
| "step": 5830 |
| }, |
| { |
| "epoch": 4.8190731757663325, |
| "grad_norm": 0.7202854156494141, |
| "learning_rate": 0.0009644921552436004, |
| "loss": 3.3005, |
| "step": 5840 |
| }, |
| { |
| "epoch": 4.827329961812365, |
| "grad_norm": 0.758812427520752, |
| "learning_rate": 0.0009661436829066887, |
| "loss": 3.2916, |
| "step": 5850 |
| }, |
| { |
| "epoch": 4.835586747858396, |
| "grad_norm": 0.7209702730178833, |
| "learning_rate": 0.000967795210569777, |
| "loss": 3.3037, |
| "step": 5860 |
| }, |
| { |
| "epoch": 4.843843533904428, |
| "grad_norm": 0.7841807007789612, |
| "learning_rate": 0.0009694467382328655, |
| "loss": 3.305, |
| "step": 5870 |
| }, |
| { |
| "epoch": 4.852100319950459, |
| "grad_norm": 0.7753096222877502, |
| "learning_rate": 0.0009710982658959537, |
| "loss": 3.31, |
| "step": 5880 |
| }, |
| { |
| "epoch": 4.860357105996491, |
| "grad_norm": 0.7271151542663574, |
| "learning_rate": 0.0009727497935590421, |
| "loss": 3.2891, |
| "step": 5890 |
| }, |
| { |
| "epoch": 4.868613892042522, |
| "grad_norm": 0.7262945175170898, |
| "learning_rate": 0.0009744013212221306, |
| "loss": 3.3034, |
| "step": 5900 |
| }, |
| { |
| "epoch": 4.8768706780885545, |
| "grad_norm": 0.698553740978241, |
| "learning_rate": 0.0009760528488852189, |
| "loss": 3.2953, |
| "step": 5910 |
| }, |
| { |
| "epoch": 4.885127464134586, |
| "grad_norm": 0.8064056634902954, |
| "learning_rate": 0.0009777043765483073, |
| "loss": 3.287, |
| "step": 5920 |
| }, |
| { |
| "epoch": 4.893384250180617, |
| "grad_norm": 0.7037026286125183, |
| "learning_rate": 0.0009793559042113955, |
| "loss": 3.2919, |
| "step": 5930 |
| }, |
| { |
| "epoch": 4.901641036226649, |
| "grad_norm": 0.7652758359909058, |
| "learning_rate": 0.000981007431874484, |
| "loss": 3.2884, |
| "step": 5940 |
| }, |
| { |
| "epoch": 4.90989782227268, |
| "grad_norm": 0.7884798049926758, |
| "learning_rate": 0.0009826589595375722, |
| "loss": 3.2911, |
| "step": 5950 |
| }, |
| { |
| "epoch": 4.918154608318712, |
| "grad_norm": 0.6904022693634033, |
| "learning_rate": 0.0009843104872006606, |
| "loss": 3.2835, |
| "step": 5960 |
| }, |
| { |
| "epoch": 4.926411394364743, |
| "grad_norm": 0.724676251411438, |
| "learning_rate": 0.000985962014863749, |
| "loss": 3.2875, |
| "step": 5970 |
| }, |
| { |
| "epoch": 4.9346681804107755, |
| "grad_norm": 0.8747690916061401, |
| "learning_rate": 0.0009876135425268373, |
| "loss": 3.2847, |
| "step": 5980 |
| }, |
| { |
| "epoch": 4.942924966456807, |
| "grad_norm": 0.793563187122345, |
| "learning_rate": 0.0009892650701899257, |
| "loss": 3.2873, |
| "step": 5990 |
| }, |
| { |
| "epoch": 4.951181752502839, |
| "grad_norm": 0.7522445917129517, |
| "learning_rate": 0.000990916597853014, |
| "loss": 3.2833, |
| "step": 6000 |
| }, |
| { |
| "epoch": 4.95943853854887, |
| "grad_norm": 0.7040495276451111, |
| "learning_rate": 0.0009925681255161024, |
| "loss": 3.276, |
| "step": 6010 |
| }, |
| { |
| "epoch": 4.967695324594901, |
| "grad_norm": 0.7230294942855835, |
| "learning_rate": 0.0009942196531791908, |
| "loss": 3.255, |
| "step": 6020 |
| }, |
| { |
| "epoch": 4.975952110640933, |
| "grad_norm": 0.7457141280174255, |
| "learning_rate": 0.000995871180842279, |
| "loss": 3.2603, |
| "step": 6030 |
| }, |
| { |
| "epoch": 4.984208896686964, |
| "grad_norm": 0.6743199229240417, |
| "learning_rate": 0.0009975227085053675, |
| "loss": 3.2682, |
| "step": 6040 |
| }, |
| { |
| "epoch": 4.9924656827329965, |
| "grad_norm": 0.7625213861465454, |
| "learning_rate": 0.000999174236168456, |
| "loss": 3.2795, |
| "step": 6050 |
| }, |
| { |
| "epoch": 5.0, |
| "grad_norm": 0.2576947510242462, |
| "learning_rate": 0.0009999082484631618, |
| "loss": 2.9799, |
| "step": 6060 |
| }, |
| { |
| "epoch": 5.008256786046031, |
| "grad_norm": 0.6936799883842468, |
| "learning_rate": 0.0009997247453894854, |
| "loss": 3.2597, |
| "step": 6070 |
| }, |
| { |
| "epoch": 5.016513572092063, |
| "grad_norm": 0.7428798675537109, |
| "learning_rate": 0.0009995412423158088, |
| "loss": 3.257, |
| "step": 6080 |
| }, |
| { |
| "epoch": 5.0247703581380945, |
| "grad_norm": 0.720032811164856, |
| "learning_rate": 0.0009993577392421323, |
| "loss": 3.2575, |
| "step": 6090 |
| }, |
| { |
| "epoch": 5.0330271441841266, |
| "grad_norm": 0.7074559330940247, |
| "learning_rate": 0.000999174236168456, |
| "loss": 3.2607, |
| "step": 6100 |
| }, |
| { |
| "epoch": 5.041283930230158, |
| "grad_norm": 0.713192880153656, |
| "learning_rate": 0.0009989907330947795, |
| "loss": 3.2604, |
| "step": 6110 |
| }, |
| { |
| "epoch": 5.04954071627619, |
| "grad_norm": 0.7576326727867126, |
| "learning_rate": 0.0009988072300211029, |
| "loss": 3.2604, |
| "step": 6120 |
| }, |
| { |
| "epoch": 5.057797502322221, |
| "grad_norm": 0.7588953375816345, |
| "learning_rate": 0.0009986237269474264, |
| "loss": 3.2452, |
| "step": 6130 |
| }, |
| { |
| "epoch": 5.066054288368252, |
| "grad_norm": 0.7049972414970398, |
| "learning_rate": 0.0009984402238737498, |
| "loss": 3.2655, |
| "step": 6140 |
| }, |
| { |
| "epoch": 5.074311074414284, |
| "grad_norm": 0.7510067820549011, |
| "learning_rate": 0.0009982567208000734, |
| "loss": 3.2359, |
| "step": 6150 |
| }, |
| { |
| "epoch": 5.0825678604603155, |
| "grad_norm": 0.7003161907196045, |
| "learning_rate": 0.000998073217726397, |
| "loss": 3.2391, |
| "step": 6160 |
| }, |
| { |
| "epoch": 5.090824646506348, |
| "grad_norm": 0.6871075630187988, |
| "learning_rate": 0.0009978897146527205, |
| "loss": 3.2435, |
| "step": 6170 |
| }, |
| { |
| "epoch": 5.099081432552379, |
| "grad_norm": 0.7129902243614197, |
| "learning_rate": 0.000997706211579044, |
| "loss": 3.2562, |
| "step": 6180 |
| }, |
| { |
| "epoch": 5.107338218598411, |
| "grad_norm": 0.6472665071487427, |
| "learning_rate": 0.0009975227085053675, |
| "loss": 3.235, |
| "step": 6190 |
| }, |
| { |
| "epoch": 5.115595004644442, |
| "grad_norm": 0.5904700756072998, |
| "learning_rate": 0.000997339205431691, |
| "loss": 3.2397, |
| "step": 6200 |
| }, |
| { |
| "epoch": 5.123851790690474, |
| "grad_norm": 0.6430502533912659, |
| "learning_rate": 0.0009971557023580146, |
| "loss": 3.2403, |
| "step": 6210 |
| }, |
| { |
| "epoch": 5.132108576736505, |
| "grad_norm": 0.7082479000091553, |
| "learning_rate": 0.000996972199284338, |
| "loss": 3.234, |
| "step": 6220 |
| }, |
| { |
| "epoch": 5.1403653627825365, |
| "grad_norm": 0.7464805245399475, |
| "learning_rate": 0.0009967886962106616, |
| "loss": 3.2178, |
| "step": 6230 |
| }, |
| { |
| "epoch": 5.148622148828569, |
| "grad_norm": 0.7043919563293457, |
| "learning_rate": 0.0009966051931369852, |
| "loss": 3.2287, |
| "step": 6240 |
| }, |
| { |
| "epoch": 5.1568789348746, |
| "grad_norm": 0.7245175838470459, |
| "learning_rate": 0.0009964216900633087, |
| "loss": 3.2352, |
| "step": 6250 |
| }, |
| { |
| "epoch": 5.165135720920632, |
| "grad_norm": 0.7505689859390259, |
| "learning_rate": 0.000996238186989632, |
| "loss": 3.2309, |
| "step": 6260 |
| }, |
| { |
| "epoch": 5.173392506966663, |
| "grad_norm": 0.7331697940826416, |
| "learning_rate": 0.0009960546839159557, |
| "loss": 3.2283, |
| "step": 6270 |
| }, |
| { |
| "epoch": 5.181649293012695, |
| "grad_norm": 0.6291115283966064, |
| "learning_rate": 0.000995871180842279, |
| "loss": 3.2195, |
| "step": 6280 |
| }, |
| { |
| "epoch": 5.189906079058726, |
| "grad_norm": 0.6958070397377014, |
| "learning_rate": 0.0009956876777686026, |
| "loss": 3.2255, |
| "step": 6290 |
| }, |
| { |
| "epoch": 5.198162865104758, |
| "grad_norm": 0.7305887937545776, |
| "learning_rate": 0.0009955041746949262, |
| "loss": 3.2089, |
| "step": 6300 |
| }, |
| { |
| "epoch": 5.20641965115079, |
| "grad_norm": 0.6707571744918823, |
| "learning_rate": 0.0009953206716212498, |
| "loss": 3.2188, |
| "step": 6310 |
| }, |
| { |
| "epoch": 5.214676437196821, |
| "grad_norm": 0.6920966506004333, |
| "learning_rate": 0.0009951371685475731, |
| "loss": 3.2288, |
| "step": 6320 |
| }, |
| { |
| "epoch": 5.222933223242853, |
| "grad_norm": 0.6794142127037048, |
| "learning_rate": 0.0009949536654738967, |
| "loss": 3.2277, |
| "step": 6330 |
| }, |
| { |
| "epoch": 5.231190009288884, |
| "grad_norm": 0.6484349966049194, |
| "learning_rate": 0.0009947701624002203, |
| "loss": 3.2327, |
| "step": 6340 |
| }, |
| { |
| "epoch": 5.239446795334916, |
| "grad_norm": 0.7271141409873962, |
| "learning_rate": 0.0009945866593265437, |
| "loss": 3.2253, |
| "step": 6350 |
| }, |
| { |
| "epoch": 5.247703581380947, |
| "grad_norm": 0.6956265568733215, |
| "learning_rate": 0.0009944031562528672, |
| "loss": 3.223, |
| "step": 6360 |
| }, |
| { |
| "epoch": 5.2559603674269795, |
| "grad_norm": 0.6692689061164856, |
| "learning_rate": 0.0009942196531791908, |
| "loss": 3.2136, |
| "step": 6370 |
| }, |
| { |
| "epoch": 5.264217153473011, |
| "grad_norm": 0.7369921803474426, |
| "learning_rate": 0.0009940361501055144, |
| "loss": 3.2078, |
| "step": 6380 |
| }, |
| { |
| "epoch": 5.272473939519042, |
| "grad_norm": 0.607624351978302, |
| "learning_rate": 0.0009938526470318378, |
| "loss": 3.21, |
| "step": 6390 |
| }, |
| { |
| "epoch": 5.280730725565074, |
| "grad_norm": 0.7406266331672668, |
| "learning_rate": 0.0009936691439581613, |
| "loss": 3.1989, |
| "step": 6400 |
| }, |
| { |
| "epoch": 5.288987511611105, |
| "grad_norm": 0.8179799318313599, |
| "learning_rate": 0.0009934856408844847, |
| "loss": 3.2034, |
| "step": 6410 |
| }, |
| { |
| "epoch": 5.297244297657137, |
| "grad_norm": 0.6973315477371216, |
| "learning_rate": 0.0009933021378108083, |
| "loss": 3.2205, |
| "step": 6420 |
| }, |
| { |
| "epoch": 5.305501083703168, |
| "grad_norm": 0.7340269088745117, |
| "learning_rate": 0.0009931186347371319, |
| "loss": 3.1984, |
| "step": 6430 |
| }, |
| { |
| "epoch": 5.3137578697492005, |
| "grad_norm": 0.6557930111885071, |
| "learning_rate": 0.0009929351316634554, |
| "loss": 3.1927, |
| "step": 6440 |
| }, |
| { |
| "epoch": 5.322014655795232, |
| "grad_norm": 0.7558073401451111, |
| "learning_rate": 0.0009927516285897788, |
| "loss": 3.1934, |
| "step": 6450 |
| }, |
| { |
| "epoch": 5.330271441841263, |
| "grad_norm": 0.7387466430664062, |
| "learning_rate": 0.0009925681255161024, |
| "loss": 3.1974, |
| "step": 6460 |
| }, |
| { |
| "epoch": 5.338528227887295, |
| "grad_norm": 0.6814390420913696, |
| "learning_rate": 0.000992384622442426, |
| "loss": 3.1997, |
| "step": 6470 |
| }, |
| { |
| "epoch": 5.346785013933326, |
| "grad_norm": 0.7269142866134644, |
| "learning_rate": 0.0009922011193687495, |
| "loss": 3.2081, |
| "step": 6480 |
| }, |
| { |
| "epoch": 5.355041799979358, |
| "grad_norm": 0.7551733255386353, |
| "learning_rate": 0.000992017616295073, |
| "loss": 3.2013, |
| "step": 6490 |
| }, |
| { |
| "epoch": 5.3632985860253894, |
| "grad_norm": 0.6852086186408997, |
| "learning_rate": 0.0009918341132213965, |
| "loss": 3.1996, |
| "step": 6500 |
| }, |
| { |
| "epoch": 5.3715553720714215, |
| "grad_norm": 0.7068336009979248, |
| "learning_rate": 0.00099165061014772, |
| "loss": 3.197, |
| "step": 6510 |
| }, |
| { |
| "epoch": 5.379812158117453, |
| "grad_norm": 0.6530427932739258, |
| "learning_rate": 0.0009914671070740434, |
| "loss": 3.1939, |
| "step": 6520 |
| }, |
| { |
| "epoch": 5.388068944163484, |
| "grad_norm": 0.7301046252250671, |
| "learning_rate": 0.000991283604000367, |
| "loss": 3.1973, |
| "step": 6530 |
| }, |
| { |
| "epoch": 5.396325730209516, |
| "grad_norm": 0.6607205271720886, |
| "learning_rate": 0.0009911001009266906, |
| "loss": 3.1844, |
| "step": 6540 |
| }, |
| { |
| "epoch": 5.404582516255547, |
| "grad_norm": 0.7713533043861389, |
| "learning_rate": 0.000990916597853014, |
| "loss": 3.1842, |
| "step": 6550 |
| }, |
| { |
| "epoch": 5.412839302301579, |
| "grad_norm": 0.6876000165939331, |
| "learning_rate": 0.0009907330947793375, |
| "loss": 3.1819, |
| "step": 6560 |
| }, |
| { |
| "epoch": 5.4210960883476105, |
| "grad_norm": 0.7219623327255249, |
| "learning_rate": 0.000990549591705661, |
| "loss": 3.1839, |
| "step": 6570 |
| }, |
| { |
| "epoch": 5.429352874393643, |
| "grad_norm": 0.5987829566001892, |
| "learning_rate": 0.0009903660886319847, |
| "loss": 3.1795, |
| "step": 6580 |
| }, |
| { |
| "epoch": 5.437609660439674, |
| "grad_norm": 0.6070224046707153, |
| "learning_rate": 0.000990182585558308, |
| "loss": 3.1921, |
| "step": 6590 |
| }, |
| { |
| "epoch": 5.445866446485706, |
| "grad_norm": 0.648897647857666, |
| "learning_rate": 0.0009899990824846316, |
| "loss": 3.1893, |
| "step": 6600 |
| }, |
| { |
| "epoch": 5.454123232531737, |
| "grad_norm": 0.6025215983390808, |
| "learning_rate": 0.0009898155794109552, |
| "loss": 3.1848, |
| "step": 6610 |
| }, |
| { |
| "epoch": 5.462380018577768, |
| "grad_norm": 0.6439123749732971, |
| "learning_rate": 0.0009896320763372788, |
| "loss": 3.1912, |
| "step": 6620 |
| }, |
| { |
| "epoch": 5.4706368046238, |
| "grad_norm": 0.6637933254241943, |
| "learning_rate": 0.0009894485732636021, |
| "loss": 3.1695, |
| "step": 6630 |
| }, |
| { |
| "epoch": 5.4788935906698315, |
| "grad_norm": 0.6922410726547241, |
| "learning_rate": 0.0009892650701899257, |
| "loss": 3.1787, |
| "step": 6640 |
| }, |
| { |
| "epoch": 5.487150376715864, |
| "grad_norm": 0.6986757516860962, |
| "learning_rate": 0.000989081567116249, |
| "loss": 3.1635, |
| "step": 6650 |
| }, |
| { |
| "epoch": 5.495407162761895, |
| "grad_norm": 0.7966019511222839, |
| "learning_rate": 0.0009888980640425727, |
| "loss": 3.1833, |
| "step": 6660 |
| }, |
| { |
| "epoch": 5.503663948807927, |
| "grad_norm": 0.6623300313949585, |
| "learning_rate": 0.0009887145609688962, |
| "loss": 3.1653, |
| "step": 6670 |
| }, |
| { |
| "epoch": 5.511920734853958, |
| "grad_norm": 0.6772647500038147, |
| "learning_rate": 0.0009885310578952198, |
| "loss": 3.185, |
| "step": 6680 |
| }, |
| { |
| "epoch": 5.52017752089999, |
| "grad_norm": 0.69997239112854, |
| "learning_rate": 0.0009883475548215432, |
| "loss": 3.1709, |
| "step": 6690 |
| }, |
| { |
| "epoch": 5.528434306946021, |
| "grad_norm": 0.6997058987617493, |
| "learning_rate": 0.0009881640517478668, |
| "loss": 3.1592, |
| "step": 6700 |
| }, |
| { |
| "epoch": 5.5366910929920525, |
| "grad_norm": 0.6697850227355957, |
| "learning_rate": 0.0009879805486741903, |
| "loss": 3.1748, |
| "step": 6710 |
| }, |
| { |
| "epoch": 5.544947879038085, |
| "grad_norm": 0.6371259093284607, |
| "learning_rate": 0.000987797045600514, |
| "loss": 3.1651, |
| "step": 6720 |
| }, |
| { |
| "epoch": 5.553204665084116, |
| "grad_norm": 0.6484488844871521, |
| "learning_rate": 0.0009876135425268373, |
| "loss": 3.1566, |
| "step": 6730 |
| }, |
| { |
| "epoch": 5.561461451130148, |
| "grad_norm": 0.6380677223205566, |
| "learning_rate": 0.0009874300394531609, |
| "loss": 3.1609, |
| "step": 6740 |
| }, |
| { |
| "epoch": 5.569718237176179, |
| "grad_norm": 0.7111419439315796, |
| "learning_rate": 0.0009872465363794844, |
| "loss": 3.1685, |
| "step": 6750 |
| }, |
| { |
| "epoch": 5.57797502322221, |
| "grad_norm": 0.7145205140113831, |
| "learning_rate": 0.000987063033305808, |
| "loss": 3.1482, |
| "step": 6760 |
| }, |
| { |
| "epoch": 5.586231809268242, |
| "grad_norm": 0.6077954769134521, |
| "learning_rate": 0.0009868795302321314, |
| "loss": 3.1577, |
| "step": 6770 |
| }, |
| { |
| "epoch": 5.594488595314274, |
| "grad_norm": 0.6183308959007263, |
| "learning_rate": 0.000986696027158455, |
| "loss": 3.159, |
| "step": 6780 |
| }, |
| { |
| "epoch": 5.602745381360306, |
| "grad_norm": 0.7077763080596924, |
| "learning_rate": 0.0009865125240847783, |
| "loss": 3.1583, |
| "step": 6790 |
| }, |
| { |
| "epoch": 5.611002167406337, |
| "grad_norm": 0.7214525938034058, |
| "learning_rate": 0.000986329021011102, |
| "loss": 3.1493, |
| "step": 6800 |
| }, |
| { |
| "epoch": 5.619258953452369, |
| "grad_norm": 0.6487968564033508, |
| "learning_rate": 0.0009861455179374255, |
| "loss": 3.142, |
| "step": 6810 |
| }, |
| { |
| "epoch": 5.6275157394984, |
| "grad_norm": 0.7745679020881653, |
| "learning_rate": 0.000985962014863749, |
| "loss": 3.1525, |
| "step": 6820 |
| }, |
| { |
| "epoch": 5.635772525544432, |
| "grad_norm": 0.7056599259376526, |
| "learning_rate": 0.0009857785117900724, |
| "loss": 3.144, |
| "step": 6830 |
| }, |
| { |
| "epoch": 5.644029311590463, |
| "grad_norm": 0.7179878354072571, |
| "learning_rate": 0.000985595008716396, |
| "loss": 3.1379, |
| "step": 6840 |
| }, |
| { |
| "epoch": 5.652286097636495, |
| "grad_norm": 0.6427177786827087, |
| "learning_rate": 0.0009854115056427196, |
| "loss": 3.1306, |
| "step": 6850 |
| }, |
| { |
| "epoch": 5.660542883682527, |
| "grad_norm": 0.6616296768188477, |
| "learning_rate": 0.0009852280025690432, |
| "loss": 3.1354, |
| "step": 6860 |
| }, |
| { |
| "epoch": 5.668799669728558, |
| "grad_norm": 0.6171796917915344, |
| "learning_rate": 0.0009850444994953665, |
| "loss": 3.1296, |
| "step": 6870 |
| }, |
| { |
| "epoch": 5.67705645577459, |
| "grad_norm": 0.7268235087394714, |
| "learning_rate": 0.0009848609964216901, |
| "loss": 3.1347, |
| "step": 6880 |
| }, |
| { |
| "epoch": 5.685313241820621, |
| "grad_norm": 0.7473070621490479, |
| "learning_rate": 0.0009846774933480137, |
| "loss": 3.14, |
| "step": 6890 |
| }, |
| { |
| "epoch": 5.693570027866653, |
| "grad_norm": 0.6529579162597656, |
| "learning_rate": 0.0009844939902743373, |
| "loss": 3.1364, |
| "step": 6900 |
| }, |
| { |
| "epoch": 5.701826813912684, |
| "grad_norm": 0.6876893043518066, |
| "learning_rate": 0.0009843104872006606, |
| "loss": 3.1376, |
| "step": 6910 |
| }, |
| { |
| "epoch": 5.7100835999587165, |
| "grad_norm": 0.7397525310516357, |
| "learning_rate": 0.000984126984126984, |
| "loss": 3.1271, |
| "step": 6920 |
| }, |
| { |
| "epoch": 5.718340386004748, |
| "grad_norm": 0.7049607634544373, |
| "learning_rate": 0.0009839434810533076, |
| "loss": 3.1335, |
| "step": 6930 |
| }, |
| { |
| "epoch": 5.726597172050779, |
| "grad_norm": 0.6563366651535034, |
| "learning_rate": 0.0009837599779796312, |
| "loss": 3.1174, |
| "step": 6940 |
| }, |
| { |
| "epoch": 5.734853958096811, |
| "grad_norm": 0.7188289761543274, |
| "learning_rate": 0.0009835764749059547, |
| "loss": 3.1344, |
| "step": 6950 |
| }, |
| { |
| "epoch": 5.743110744142842, |
| "grad_norm": 0.6964494585990906, |
| "learning_rate": 0.000983392971832278, |
| "loss": 3.1183, |
| "step": 6960 |
| }, |
| { |
| "epoch": 5.751367530188874, |
| "grad_norm": 0.6440771818161011, |
| "learning_rate": 0.0009832094687586017, |
| "loss": 3.125, |
| "step": 6970 |
| }, |
| { |
| "epoch": 5.7596243162349055, |
| "grad_norm": 0.6640235185623169, |
| "learning_rate": 0.0009830259656849253, |
| "loss": 3.1288, |
| "step": 6980 |
| }, |
| { |
| "epoch": 5.7678811022809375, |
| "grad_norm": 0.660474956035614, |
| "learning_rate": 0.0009828424626112488, |
| "loss": 3.1244, |
| "step": 6990 |
| }, |
| { |
| "epoch": 5.776137888326969, |
| "grad_norm": 0.6896589994430542, |
| "learning_rate": 0.0009826589595375722, |
| "loss": 3.1238, |
| "step": 7000 |
| }, |
| { |
| "epoch": 5.784394674373, |
| "grad_norm": 0.6928004026412964, |
| "learning_rate": 0.0009824754564638958, |
| "loss": 3.1281, |
| "step": 7010 |
| }, |
| { |
| "epoch": 5.792651460419032, |
| "grad_norm": 0.6702253222465515, |
| "learning_rate": 0.0009822919533902194, |
| "loss": 3.1199, |
| "step": 7020 |
| }, |
| { |
| "epoch": 5.800908246465063, |
| "grad_norm": 0.6199045777320862, |
| "learning_rate": 0.000982108450316543, |
| "loss": 3.1273, |
| "step": 7030 |
| }, |
| { |
| "epoch": 5.809165032511095, |
| "grad_norm": 0.6956904530525208, |
| "learning_rate": 0.0009819249472428663, |
| "loss": 3.1273, |
| "step": 7040 |
| }, |
| { |
| "epoch": 5.8174218185571265, |
| "grad_norm": 0.7308268547058105, |
| "learning_rate": 0.0009817414441691899, |
| "loss": 3.1214, |
| "step": 7050 |
| }, |
| { |
| "epoch": 5.825678604603159, |
| "grad_norm": 0.6409997940063477, |
| "learning_rate": 0.0009815579410955132, |
| "loss": 3.1102, |
| "step": 7060 |
| }, |
| { |
| "epoch": 5.83393539064919, |
| "grad_norm": 0.6429135203361511, |
| "learning_rate": 0.0009813744380218368, |
| "loss": 3.1109, |
| "step": 7070 |
| }, |
| { |
| "epoch": 5.842192176695221, |
| "grad_norm": 0.7045457363128662, |
| "learning_rate": 0.0009811909349481604, |
| "loss": 3.1168, |
| "step": 7080 |
| }, |
| { |
| "epoch": 5.850448962741253, |
| "grad_norm": 0.6149047613143921, |
| "learning_rate": 0.000981007431874484, |
| "loss": 3.0994, |
| "step": 7090 |
| }, |
| { |
| "epoch": 5.858705748787284, |
| "grad_norm": 0.6406427621841431, |
| "learning_rate": 0.0009808239288008073, |
| "loss": 3.1184, |
| "step": 7100 |
| }, |
| { |
| "epoch": 5.866962534833316, |
| "grad_norm": 0.6805707216262817, |
| "learning_rate": 0.000980640425727131, |
| "loss": 3.112, |
| "step": 7110 |
| }, |
| { |
| "epoch": 5.8752193208793475, |
| "grad_norm": 0.6262876987457275, |
| "learning_rate": 0.0009804569226534545, |
| "loss": 3.1104, |
| "step": 7120 |
| }, |
| { |
| "epoch": 5.88347610692538, |
| "grad_norm": 0.7171155214309692, |
| "learning_rate": 0.000980273419579778, |
| "loss": 3.1017, |
| "step": 7130 |
| }, |
| { |
| "epoch": 5.891732892971411, |
| "grad_norm": 0.6478092670440674, |
| "learning_rate": 0.0009800899165061014, |
| "loss": 3.0997, |
| "step": 7140 |
| }, |
| { |
| "epoch": 5.899989679017443, |
| "grad_norm": 0.6612927317619324, |
| "learning_rate": 0.000979906413432425, |
| "loss": 3.0963, |
| "step": 7150 |
| }, |
| { |
| "epoch": 5.908246465063474, |
| "grad_norm": 0.689495325088501, |
| "learning_rate": 0.0009797229103587486, |
| "loss": 3.1038, |
| "step": 7160 |
| }, |
| { |
| "epoch": 5.916503251109505, |
| "grad_norm": 0.6566335558891296, |
| "learning_rate": 0.0009795394072850722, |
| "loss": 3.1062, |
| "step": 7170 |
| }, |
| { |
| "epoch": 5.924760037155537, |
| "grad_norm": 0.7480162382125854, |
| "learning_rate": 0.0009793559042113955, |
| "loss": 3.0928, |
| "step": 7180 |
| }, |
| { |
| "epoch": 5.9330168232015685, |
| "grad_norm": 0.6011252403259277, |
| "learning_rate": 0.0009791724011377191, |
| "loss": 3.1066, |
| "step": 7190 |
| }, |
| { |
| "epoch": 5.941273609247601, |
| "grad_norm": 0.6580034494400024, |
| "learning_rate": 0.0009789888980640425, |
| "loss": 3.0919, |
| "step": 7200 |
| }, |
| { |
| "epoch": 5.949530395293632, |
| "grad_norm": 0.6794580817222595, |
| "learning_rate": 0.000978805394990366, |
| "loss": 3.0912, |
| "step": 7210 |
| }, |
| { |
| "epoch": 5.957787181339664, |
| "grad_norm": 0.6901310682296753, |
| "learning_rate": 0.0009786218919166896, |
| "loss": 3.0976, |
| "step": 7220 |
| }, |
| { |
| "epoch": 5.966043967385695, |
| "grad_norm": 0.7033196687698364, |
| "learning_rate": 0.0009784383888430132, |
| "loss": 3.0969, |
| "step": 7230 |
| }, |
| { |
| "epoch": 5.974300753431727, |
| "grad_norm": 0.5777009129524231, |
| "learning_rate": 0.0009782548857693366, |
| "loss": 3.0985, |
| "step": 7240 |
| }, |
| { |
| "epoch": 5.982557539477758, |
| "grad_norm": 0.6438208818435669, |
| "learning_rate": 0.0009780713826956602, |
| "loss": 3.0915, |
| "step": 7250 |
| }, |
| { |
| "epoch": 5.99081432552379, |
| "grad_norm": 0.6833881139755249, |
| "learning_rate": 0.0009778878796219837, |
| "loss": 3.1012, |
| "step": 7260 |
| }, |
| { |
| "epoch": 5.999071111569822, |
| "grad_norm": 0.5851444602012634, |
| "learning_rate": 0.0009777043765483073, |
| "loss": 3.0961, |
| "step": 7270 |
| }, |
| { |
| "epoch": 6.006605428836825, |
| "grad_norm": 0.6323698163032532, |
| "learning_rate": 0.0009775208734746307, |
| "loss": 2.8359, |
| "step": 7280 |
| }, |
| { |
| "epoch": 6.014862214882857, |
| "grad_norm": 0.6244434118270874, |
| "learning_rate": 0.0009773373704009543, |
| "loss": 3.0765, |
| "step": 7290 |
| }, |
| { |
| "epoch": 6.023119000928888, |
| "grad_norm": 0.6204081177711487, |
| "learning_rate": 0.0009771538673272778, |
| "loss": 3.0839, |
| "step": 7300 |
| }, |
| { |
| "epoch": 6.03137578697492, |
| "grad_norm": 0.6735767126083374, |
| "learning_rate": 0.0009769703642536012, |
| "loss": 3.0678, |
| "step": 7310 |
| }, |
| { |
| "epoch": 6.039632573020952, |
| "grad_norm": 0.6244058609008789, |
| "learning_rate": 0.0009767868611799248, |
| "loss": 3.0883, |
| "step": 7320 |
| }, |
| { |
| "epoch": 6.047889359066983, |
| "grad_norm": 0.581949770450592, |
| "learning_rate": 0.0009766033581062484, |
| "loss": 3.082, |
| "step": 7330 |
| }, |
| { |
| "epoch": 6.056146145113015, |
| "grad_norm": 0.6072763204574585, |
| "learning_rate": 0.0009764198550325718, |
| "loss": 3.0771, |
| "step": 7340 |
| }, |
| { |
| "epoch": 6.064402931159046, |
| "grad_norm": 0.5899455547332764, |
| "learning_rate": 0.0009762363519588954, |
| "loss": 3.0745, |
| "step": 7350 |
| }, |
| { |
| "epoch": 6.072659717205078, |
| "grad_norm": 0.636332094669342, |
| "learning_rate": 0.0009760528488852189, |
| "loss": 3.0777, |
| "step": 7360 |
| }, |
| { |
| "epoch": 6.080916503251109, |
| "grad_norm": 0.623324990272522, |
| "learning_rate": 0.0009758693458115424, |
| "loss": 3.0854, |
| "step": 7370 |
| }, |
| { |
| "epoch": 6.0891732892971415, |
| "grad_norm": 0.6235571503639221, |
| "learning_rate": 0.0009756858427378658, |
| "loss": 3.0639, |
| "step": 7380 |
| }, |
| { |
| "epoch": 6.097430075343173, |
| "grad_norm": 0.657364010810852, |
| "learning_rate": 0.0009755023396641894, |
| "loss": 3.0843, |
| "step": 7390 |
| }, |
| { |
| "epoch": 6.105686861389204, |
| "grad_norm": 0.678801953792572, |
| "learning_rate": 0.0009753188365905129, |
| "loss": 3.0809, |
| "step": 7400 |
| }, |
| { |
| "epoch": 6.113943647435236, |
| "grad_norm": 0.6138309836387634, |
| "learning_rate": 0.0009751353335168365, |
| "loss": 3.0832, |
| "step": 7410 |
| }, |
| { |
| "epoch": 6.122200433481267, |
| "grad_norm": 0.6104526519775391, |
| "learning_rate": 0.0009749518304431599, |
| "loss": 3.0667, |
| "step": 7420 |
| }, |
| { |
| "epoch": 6.130457219527299, |
| "grad_norm": 0.6059489250183105, |
| "learning_rate": 0.0009747683273694835, |
| "loss": 3.0592, |
| "step": 7430 |
| }, |
| { |
| "epoch": 6.1387140055733305, |
| "grad_norm": 0.640777587890625, |
| "learning_rate": 0.000974584824295807, |
| "loss": 3.0643, |
| "step": 7440 |
| }, |
| { |
| "epoch": 6.1469707916193626, |
| "grad_norm": 0.7542473077774048, |
| "learning_rate": 0.0009744013212221306, |
| "loss": 3.064, |
| "step": 7450 |
| }, |
| { |
| "epoch": 6.155227577665394, |
| "grad_norm": 0.6118050217628479, |
| "learning_rate": 0.000974217818148454, |
| "loss": 3.0737, |
| "step": 7460 |
| }, |
| { |
| "epoch": 6.163484363711425, |
| "grad_norm": 0.6154510378837585, |
| "learning_rate": 0.0009740343150747776, |
| "loss": 3.0759, |
| "step": 7470 |
| }, |
| { |
| "epoch": 6.171741149757457, |
| "grad_norm": 0.6432428359985352, |
| "learning_rate": 0.0009738508120011011, |
| "loss": 3.0799, |
| "step": 7480 |
| }, |
| { |
| "epoch": 6.179997935803488, |
| "grad_norm": 0.705723226070404, |
| "learning_rate": 0.0009736673089274245, |
| "loss": 3.0685, |
| "step": 7490 |
| }, |
| { |
| "epoch": 6.18825472184952, |
| "grad_norm": 0.6126253008842468, |
| "learning_rate": 0.000973483805853748, |
| "loss": 3.0565, |
| "step": 7500 |
| }, |
| { |
| "epoch": 6.1965115078955515, |
| "grad_norm": 0.6755325198173523, |
| "learning_rate": 0.0009733003027800715, |
| "loss": 3.0589, |
| "step": 7510 |
| }, |
| { |
| "epoch": 6.204768293941584, |
| "grad_norm": 0.5887700319290161, |
| "learning_rate": 0.0009731167997063951, |
| "loss": 3.0569, |
| "step": 7520 |
| }, |
| { |
| "epoch": 6.213025079987615, |
| "grad_norm": 0.627024233341217, |
| "learning_rate": 0.0009729332966327185, |
| "loss": 3.053, |
| "step": 7530 |
| }, |
| { |
| "epoch": 6.221281866033646, |
| "grad_norm": 0.7310320734977722, |
| "learning_rate": 0.0009727497935590421, |
| "loss": 3.0601, |
| "step": 7540 |
| }, |
| { |
| "epoch": 6.229538652079678, |
| "grad_norm": 0.5800510048866272, |
| "learning_rate": 0.0009725662904853656, |
| "loss": 3.0632, |
| "step": 7550 |
| }, |
| { |
| "epoch": 6.237795438125709, |
| "grad_norm": 0.6462418437004089, |
| "learning_rate": 0.0009723827874116892, |
| "loss": 3.0601, |
| "step": 7560 |
| }, |
| { |
| "epoch": 6.246052224171741, |
| "grad_norm": 0.5978024005889893, |
| "learning_rate": 0.0009721992843380126, |
| "loss": 3.0607, |
| "step": 7570 |
| }, |
| { |
| "epoch": 6.2543090102177725, |
| "grad_norm": 0.7113734483718872, |
| "learning_rate": 0.0009720157812643362, |
| "loss": 3.0441, |
| "step": 7580 |
| }, |
| { |
| "epoch": 6.262565796263805, |
| "grad_norm": 0.7136105298995972, |
| "learning_rate": 0.0009718322781906597, |
| "loss": 3.0627, |
| "step": 7590 |
| }, |
| { |
| "epoch": 6.270822582309836, |
| "grad_norm": 0.6299401521682739, |
| "learning_rate": 0.0009716487751169833, |
| "loss": 3.0632, |
| "step": 7600 |
| }, |
| { |
| "epoch": 6.279079368355868, |
| "grad_norm": 0.5755194425582886, |
| "learning_rate": 0.0009714652720433067, |
| "loss": 3.0655, |
| "step": 7610 |
| }, |
| { |
| "epoch": 6.287336154401899, |
| "grad_norm": 0.6883841753005981, |
| "learning_rate": 0.0009712817689696303, |
| "loss": 3.0555, |
| "step": 7620 |
| }, |
| { |
| "epoch": 6.29559294044793, |
| "grad_norm": 0.6997891664505005, |
| "learning_rate": 0.0009710982658959537, |
| "loss": 3.0537, |
| "step": 7630 |
| }, |
| { |
| "epoch": 6.303849726493962, |
| "grad_norm": 0.7656093835830688, |
| "learning_rate": 0.0009709147628222773, |
| "loss": 3.0597, |
| "step": 7640 |
| }, |
| { |
| "epoch": 6.3121065125399936, |
| "grad_norm": 0.6529414653778076, |
| "learning_rate": 0.0009707312597486007, |
| "loss": 3.0395, |
| "step": 7650 |
| }, |
| { |
| "epoch": 6.320363298586026, |
| "grad_norm": 0.6436627507209778, |
| "learning_rate": 0.0009705477566749243, |
| "loss": 3.0545, |
| "step": 7660 |
| }, |
| { |
| "epoch": 6.328620084632057, |
| "grad_norm": 0.6693470478057861, |
| "learning_rate": 0.0009703642536012478, |
| "loss": 3.0467, |
| "step": 7670 |
| }, |
| { |
| "epoch": 6.336876870678089, |
| "grad_norm": 0.6640053987503052, |
| "learning_rate": 0.0009701807505275714, |
| "loss": 3.0484, |
| "step": 7680 |
| }, |
| { |
| "epoch": 6.34513365672412, |
| "grad_norm": 0.615193784236908, |
| "learning_rate": 0.0009699972474538948, |
| "loss": 3.0456, |
| "step": 7690 |
| }, |
| { |
| "epoch": 6.353390442770151, |
| "grad_norm": 0.6501012444496155, |
| "learning_rate": 0.0009698137443802184, |
| "loss": 3.0536, |
| "step": 7700 |
| }, |
| { |
| "epoch": 6.361647228816183, |
| "grad_norm": 0.7172884941101074, |
| "learning_rate": 0.0009696302413065419, |
| "loss": 3.041, |
| "step": 7710 |
| }, |
| { |
| "epoch": 6.369904014862215, |
| "grad_norm": 0.6863964200019836, |
| "learning_rate": 0.0009694467382328655, |
| "loss": 3.0422, |
| "step": 7720 |
| }, |
| { |
| "epoch": 6.378160800908247, |
| "grad_norm": 0.6568806171417236, |
| "learning_rate": 0.0009692632351591889, |
| "loss": 3.0516, |
| "step": 7730 |
| }, |
| { |
| "epoch": 6.386417586954278, |
| "grad_norm": 0.7293218374252319, |
| "learning_rate": 0.0009690797320855125, |
| "loss": 3.0388, |
| "step": 7740 |
| }, |
| { |
| "epoch": 6.39467437300031, |
| "grad_norm": 0.651716947555542, |
| "learning_rate": 0.000968896229011836, |
| "loss": 3.0529, |
| "step": 7750 |
| }, |
| { |
| "epoch": 6.402931159046341, |
| "grad_norm": 0.6633101105690002, |
| "learning_rate": 0.0009687127259381596, |
| "loss": 3.0411, |
| "step": 7760 |
| }, |
| { |
| "epoch": 6.411187945092372, |
| "grad_norm": 0.685100793838501, |
| "learning_rate": 0.0009685292228644829, |
| "loss": 3.0472, |
| "step": 7770 |
| }, |
| { |
| "epoch": 6.419444731138404, |
| "grad_norm": 0.6207525730133057, |
| "learning_rate": 0.0009683457197908065, |
| "loss": 3.0368, |
| "step": 7780 |
| }, |
| { |
| "epoch": 6.427701517184436, |
| "grad_norm": 0.6622489094734192, |
| "learning_rate": 0.00096816221671713, |
| "loss": 3.0413, |
| "step": 7790 |
| }, |
| { |
| "epoch": 6.435958303230468, |
| "grad_norm": 0.640729546546936, |
| "learning_rate": 0.0009679787136434536, |
| "loss": 3.0358, |
| "step": 7800 |
| }, |
| { |
| "epoch": 6.444215089276499, |
| "grad_norm": 0.6243358254432678, |
| "learning_rate": 0.000967795210569777, |
| "loss": 3.0353, |
| "step": 7810 |
| }, |
| { |
| "epoch": 6.452471875322531, |
| "grad_norm": 0.7254058718681335, |
| "learning_rate": 0.0009676117074961006, |
| "loss": 3.0346, |
| "step": 7820 |
| }, |
| { |
| "epoch": 6.460728661368562, |
| "grad_norm": 0.6251292824745178, |
| "learning_rate": 0.0009674282044224241, |
| "loss": 3.0504, |
| "step": 7830 |
| }, |
| { |
| "epoch": 6.468985447414594, |
| "grad_norm": 0.6604384779930115, |
| "learning_rate": 0.0009672447013487477, |
| "loss": 3.037, |
| "step": 7840 |
| }, |
| { |
| "epoch": 6.477242233460625, |
| "grad_norm": 0.6694011092185974, |
| "learning_rate": 0.0009670611982750711, |
| "loss": 3.0383, |
| "step": 7850 |
| }, |
| { |
| "epoch": 6.485499019506657, |
| "grad_norm": 0.6231392025947571, |
| "learning_rate": 0.0009668776952013947, |
| "loss": 3.0465, |
| "step": 7860 |
| }, |
| { |
| "epoch": 6.493755805552689, |
| "grad_norm": 0.6012188792228699, |
| "learning_rate": 0.0009666941921277182, |
| "loss": 3.0319, |
| "step": 7870 |
| }, |
| { |
| "epoch": 6.50201259159872, |
| "grad_norm": 0.5632750988006592, |
| "learning_rate": 0.0009665106890540418, |
| "loss": 3.0311, |
| "step": 7880 |
| }, |
| { |
| "epoch": 6.510269377644752, |
| "grad_norm": 0.6662549376487732, |
| "learning_rate": 0.0009663271859803652, |
| "loss": 3.0278, |
| "step": 7890 |
| }, |
| { |
| "epoch": 6.518526163690783, |
| "grad_norm": 0.6620095372200012, |
| "learning_rate": 0.0009661436829066887, |
| "loss": 3.0341, |
| "step": 7900 |
| }, |
| { |
| "epoch": 6.526782949736815, |
| "grad_norm": 0.6526013612747192, |
| "learning_rate": 0.0009659601798330122, |
| "loss": 3.044, |
| "step": 7910 |
| }, |
| { |
| "epoch": 6.5350397357828465, |
| "grad_norm": 0.6725477576255798, |
| "learning_rate": 0.0009657766767593358, |
| "loss": 3.0221, |
| "step": 7920 |
| }, |
| { |
| "epoch": 6.5432965218288786, |
| "grad_norm": 0.5865882039070129, |
| "learning_rate": 0.0009655931736856592, |
| "loss": 3.0441, |
| "step": 7930 |
| }, |
| { |
| "epoch": 6.55155330787491, |
| "grad_norm": 0.6650230288505554, |
| "learning_rate": 0.0009654096706119828, |
| "loss": 3.0306, |
| "step": 7940 |
| }, |
| { |
| "epoch": 6.559810093920941, |
| "grad_norm": 0.7044249773025513, |
| "learning_rate": 0.0009652261675383063, |
| "loss": 3.0343, |
| "step": 7950 |
| }, |
| { |
| "epoch": 6.568066879966973, |
| "grad_norm": 0.6340664625167847, |
| "learning_rate": 0.0009650426644646299, |
| "loss": 3.0324, |
| "step": 7960 |
| }, |
| { |
| "epoch": 6.576323666013004, |
| "grad_norm": 0.6298174262046814, |
| "learning_rate": 0.0009648591613909533, |
| "loss": 3.0411, |
| "step": 7970 |
| }, |
| { |
| "epoch": 6.584580452059036, |
| "grad_norm": 0.6297299265861511, |
| "learning_rate": 0.0009646756583172769, |
| "loss": 3.0303, |
| "step": 7980 |
| }, |
| { |
| "epoch": 6.5928372381050675, |
| "grad_norm": 0.6586875915527344, |
| "learning_rate": 0.0009644921552436004, |
| "loss": 3.0271, |
| "step": 7990 |
| }, |
| { |
| "epoch": 6.601094024151099, |
| "grad_norm": 0.6087930798530579, |
| "learning_rate": 0.000964308652169924, |
| "loss": 3.0277, |
| "step": 8000 |
| }, |
| { |
| "epoch": 6.609350810197131, |
| "grad_norm": 0.6917185187339783, |
| "learning_rate": 0.0009641251490962474, |
| "loss": 3.0362, |
| "step": 8010 |
| }, |
| { |
| "epoch": 6.617607596243162, |
| "grad_norm": 0.6129333972930908, |
| "learning_rate": 0.000963941646022571, |
| "loss": 3.0206, |
| "step": 8020 |
| }, |
| { |
| "epoch": 6.625864382289194, |
| "grad_norm": 0.5826658606529236, |
| "learning_rate": 0.0009637581429488944, |
| "loss": 3.0132, |
| "step": 8030 |
| }, |
| { |
| "epoch": 6.634121168335225, |
| "grad_norm": 0.6190428733825684, |
| "learning_rate": 0.0009635746398752179, |
| "loss": 3.0196, |
| "step": 8040 |
| }, |
| { |
| "epoch": 6.642377954381257, |
| "grad_norm": 0.6231646537780762, |
| "learning_rate": 0.0009633911368015414, |
| "loss": 3.0279, |
| "step": 8050 |
| }, |
| { |
| "epoch": 6.6506347404272885, |
| "grad_norm": 0.7201693058013916, |
| "learning_rate": 0.0009632076337278649, |
| "loss": 3.0134, |
| "step": 8060 |
| }, |
| { |
| "epoch": 6.658891526473321, |
| "grad_norm": 0.616397500038147, |
| "learning_rate": 0.0009630241306541885, |
| "loss": 3.0182, |
| "step": 8070 |
| }, |
| { |
| "epoch": 6.667148312519352, |
| "grad_norm": 0.6851087212562561, |
| "learning_rate": 0.0009628406275805119, |
| "loss": 3.0179, |
| "step": 8080 |
| }, |
| { |
| "epoch": 6.675405098565383, |
| "grad_norm": 0.6185948252677917, |
| "learning_rate": 0.0009626571245068355, |
| "loss": 3.0191, |
| "step": 8090 |
| }, |
| { |
| "epoch": 6.683661884611415, |
| "grad_norm": 0.5413244962692261, |
| "learning_rate": 0.000962473621433159, |
| "loss": 3.0243, |
| "step": 8100 |
| }, |
| { |
| "epoch": 6.691918670657446, |
| "grad_norm": 0.7104983925819397, |
| "learning_rate": 0.0009622901183594826, |
| "loss": 3.0189, |
| "step": 8110 |
| }, |
| { |
| "epoch": 6.700175456703478, |
| "grad_norm": 0.5723142623901367, |
| "learning_rate": 0.000962106615285806, |
| "loss": 3.0015, |
| "step": 8120 |
| }, |
| { |
| "epoch": 6.7084322427495096, |
| "grad_norm": 0.6276829242706299, |
| "learning_rate": 0.0009619231122121296, |
| "loss": 3.0215, |
| "step": 8130 |
| }, |
| { |
| "epoch": 6.716689028795542, |
| "grad_norm": 0.6671704053878784, |
| "learning_rate": 0.0009617396091384531, |
| "loss": 3.0153, |
| "step": 8140 |
| }, |
| { |
| "epoch": 6.724945814841573, |
| "grad_norm": 0.7471591234207153, |
| "learning_rate": 0.0009615561060647767, |
| "loss": 3.0091, |
| "step": 8150 |
| }, |
| { |
| "epoch": 6.733202600887605, |
| "grad_norm": 0.6197100281715393, |
| "learning_rate": 0.0009613726029911, |
| "loss": 3.0037, |
| "step": 8160 |
| }, |
| { |
| "epoch": 6.741459386933636, |
| "grad_norm": 0.6177218556404114, |
| "learning_rate": 0.0009611890999174236, |
| "loss": 3.0122, |
| "step": 8170 |
| }, |
| { |
| "epoch": 6.749716172979667, |
| "grad_norm": 0.6349440813064575, |
| "learning_rate": 0.0009610055968437471, |
| "loss": 3.0155, |
| "step": 8180 |
| }, |
| { |
| "epoch": 6.757972959025699, |
| "grad_norm": 0.6462443470954895, |
| "learning_rate": 0.0009608220937700707, |
| "loss": 3.0141, |
| "step": 8190 |
| }, |
| { |
| "epoch": 6.766229745071731, |
| "grad_norm": 0.7159162163734436, |
| "learning_rate": 0.0009606385906963941, |
| "loss": 3.0119, |
| "step": 8200 |
| }, |
| { |
| "epoch": 6.774486531117763, |
| "grad_norm": 0.592444658279419, |
| "learning_rate": 0.0009604550876227177, |
| "loss": 3.0087, |
| "step": 8210 |
| }, |
| { |
| "epoch": 6.782743317163794, |
| "grad_norm": 0.6107344627380371, |
| "learning_rate": 0.0009602715845490412, |
| "loss": 3.0186, |
| "step": 8220 |
| }, |
| { |
| "epoch": 6.791000103209826, |
| "grad_norm": 0.6150995492935181, |
| "learning_rate": 0.0009600880814753648, |
| "loss": 3.0112, |
| "step": 8230 |
| }, |
| { |
| "epoch": 6.799256889255857, |
| "grad_norm": 0.6124362945556641, |
| "learning_rate": 0.0009599045784016882, |
| "loss": 2.9989, |
| "step": 8240 |
| }, |
| { |
| "epoch": 6.807513675301889, |
| "grad_norm": 0.6340455412864685, |
| "learning_rate": 0.0009597210753280118, |
| "loss": 3.0154, |
| "step": 8250 |
| }, |
| { |
| "epoch": 6.81577046134792, |
| "grad_norm": 0.5861290097236633, |
| "learning_rate": 0.0009595375722543353, |
| "loss": 3.0115, |
| "step": 8260 |
| }, |
| { |
| "epoch": 6.824027247393952, |
| "grad_norm": 0.5904505848884583, |
| "learning_rate": 0.0009593540691806589, |
| "loss": 3.0065, |
| "step": 8270 |
| }, |
| { |
| "epoch": 6.832284033439984, |
| "grad_norm": 0.6523525714874268, |
| "learning_rate": 0.0009591705661069823, |
| "loss": 3.0069, |
| "step": 8280 |
| }, |
| { |
| "epoch": 6.840540819486015, |
| "grad_norm": 0.6429992318153381, |
| "learning_rate": 0.0009589870630333058, |
| "loss": 3.0033, |
| "step": 8290 |
| }, |
| { |
| "epoch": 6.848797605532047, |
| "grad_norm": 0.6393450498580933, |
| "learning_rate": 0.0009588035599596293, |
| "loss": 3.0094, |
| "step": 8300 |
| }, |
| { |
| "epoch": 6.857054391578078, |
| "grad_norm": 0.6140925884246826, |
| "learning_rate": 0.0009586200568859529, |
| "loss": 3.0092, |
| "step": 8310 |
| }, |
| { |
| "epoch": 6.865311177624109, |
| "grad_norm": 0.5966553092002869, |
| "learning_rate": 0.0009584365538122763, |
| "loss": 3.005, |
| "step": 8320 |
| }, |
| { |
| "epoch": 6.8735679636701414, |
| "grad_norm": 0.5963024497032166, |
| "learning_rate": 0.0009582530507385999, |
| "loss": 3.009, |
| "step": 8330 |
| }, |
| { |
| "epoch": 6.881824749716173, |
| "grad_norm": 0.5785512924194336, |
| "learning_rate": 0.0009580695476649234, |
| "loss": 3.0075, |
| "step": 8340 |
| }, |
| { |
| "epoch": 6.890081535762205, |
| "grad_norm": 0.5979735851287842, |
| "learning_rate": 0.000957886044591247, |
| "loss": 2.9997, |
| "step": 8350 |
| }, |
| { |
| "epoch": 6.898338321808236, |
| "grad_norm": 0.6088021397590637, |
| "learning_rate": 0.0009577025415175704, |
| "loss": 3.0102, |
| "step": 8360 |
| }, |
| { |
| "epoch": 6.906595107854268, |
| "grad_norm": 0.6511215567588806, |
| "learning_rate": 0.000957519038443894, |
| "loss": 3.0058, |
| "step": 8370 |
| }, |
| { |
| "epoch": 6.914851893900299, |
| "grad_norm": 0.6001556515693665, |
| "learning_rate": 0.0009573355353702175, |
| "loss": 3.0005, |
| "step": 8380 |
| }, |
| { |
| "epoch": 6.923108679946331, |
| "grad_norm": 0.7033063173294067, |
| "learning_rate": 0.000957152032296541, |
| "loss": 2.9954, |
| "step": 8390 |
| }, |
| { |
| "epoch": 6.9313654659923625, |
| "grad_norm": 0.6751210689544678, |
| "learning_rate": 0.0009569685292228645, |
| "loss": 3.0032, |
| "step": 8400 |
| }, |
| { |
| "epoch": 6.939622252038394, |
| "grad_norm": 0.6629015207290649, |
| "learning_rate": 0.0009567850261491881, |
| "loss": 3.0148, |
| "step": 8410 |
| }, |
| { |
| "epoch": 6.947879038084426, |
| "grad_norm": 0.6272764801979065, |
| "learning_rate": 0.0009566015230755115, |
| "loss": 3.002, |
| "step": 8420 |
| }, |
| { |
| "epoch": 6.956135824130457, |
| "grad_norm": 0.6458156108856201, |
| "learning_rate": 0.000956418020001835, |
| "loss": 3.0066, |
| "step": 8430 |
| }, |
| { |
| "epoch": 6.964392610176489, |
| "grad_norm": 0.6023524403572083, |
| "learning_rate": 0.0009562345169281585, |
| "loss": 2.9992, |
| "step": 8440 |
| }, |
| { |
| "epoch": 6.97264939622252, |
| "grad_norm": 0.6430317759513855, |
| "learning_rate": 0.0009560510138544821, |
| "loss": 2.9976, |
| "step": 8450 |
| }, |
| { |
| "epoch": 6.980906182268552, |
| "grad_norm": 0.6168457269668579, |
| "learning_rate": 0.0009558675107808056, |
| "loss": 2.9931, |
| "step": 8460 |
| }, |
| { |
| "epoch": 6.9891629683145835, |
| "grad_norm": 0.6400942802429199, |
| "learning_rate": 0.0009556840077071291, |
| "loss": 2.9795, |
| "step": 8470 |
| }, |
| { |
| "epoch": 6.997419754360616, |
| "grad_norm": 0.5995707511901855, |
| "learning_rate": 0.0009555005046334526, |
| "loss": 3.0002, |
| "step": 8480 |
| }, |
| { |
| "epoch": 7.004954071627619, |
| "grad_norm": 0.7327253222465515, |
| "learning_rate": 0.0009553170015597762, |
| "loss": 2.7253, |
| "step": 8490 |
| }, |
| { |
| "epoch": 7.01321085767365, |
| "grad_norm": 0.6455899477005005, |
| "learning_rate": 0.0009551334984860997, |
| "loss": 2.9832, |
| "step": 8500 |
| }, |
| { |
| "epoch": 7.021467643719682, |
| "grad_norm": 0.6111765503883362, |
| "learning_rate": 0.0009549499954124232, |
| "loss": 2.9918, |
| "step": 8510 |
| }, |
| { |
| "epoch": 7.0297244297657135, |
| "grad_norm": 0.6223667860031128, |
| "learning_rate": 0.0009547664923387467, |
| "loss": 2.9752, |
| "step": 8520 |
| }, |
| { |
| "epoch": 7.037981215811746, |
| "grad_norm": 0.6821649074554443, |
| "learning_rate": 0.0009545829892650703, |
| "loss": 2.9938, |
| "step": 8530 |
| }, |
| { |
| "epoch": 7.046238001857777, |
| "grad_norm": 0.5645655989646912, |
| "learning_rate": 0.0009543994861913938, |
| "loss": 2.9932, |
| "step": 8540 |
| }, |
| { |
| "epoch": 7.054494787903808, |
| "grad_norm": 0.6132038235664368, |
| "learning_rate": 0.0009542159831177172, |
| "loss": 2.9796, |
| "step": 8550 |
| }, |
| { |
| "epoch": 7.06275157394984, |
| "grad_norm": 0.6503163576126099, |
| "learning_rate": 0.0009540324800440407, |
| "loss": 2.9844, |
| "step": 8560 |
| }, |
| { |
| "epoch": 7.071008359995871, |
| "grad_norm": 0.5986816883087158, |
| "learning_rate": 0.0009538489769703643, |
| "loss": 2.9817, |
| "step": 8570 |
| }, |
| { |
| "epoch": 7.079265146041903, |
| "grad_norm": 0.6171458959579468, |
| "learning_rate": 0.0009536654738966878, |
| "loss": 2.9802, |
| "step": 8580 |
| }, |
| { |
| "epoch": 7.087521932087935, |
| "grad_norm": 0.624758243560791, |
| "learning_rate": 0.0009534819708230113, |
| "loss": 2.9903, |
| "step": 8590 |
| }, |
| { |
| "epoch": 7.095778718133967, |
| "grad_norm": 0.6675239205360413, |
| "learning_rate": 0.0009532984677493348, |
| "loss": 2.9838, |
| "step": 8600 |
| }, |
| { |
| "epoch": 7.104035504179998, |
| "grad_norm": 0.6595028042793274, |
| "learning_rate": 0.0009531149646756584, |
| "loss": 2.9938, |
| "step": 8610 |
| }, |
| { |
| "epoch": 7.11229229022603, |
| "grad_norm": 0.7010105848312378, |
| "learning_rate": 0.0009529314616019819, |
| "loss": 2.9871, |
| "step": 8620 |
| }, |
| { |
| "epoch": 7.120549076272061, |
| "grad_norm": 0.6516680121421814, |
| "learning_rate": 0.0009527479585283053, |
| "loss": 2.9889, |
| "step": 8630 |
| }, |
| { |
| "epoch": 7.128805862318092, |
| "grad_norm": 0.6057817935943604, |
| "learning_rate": 0.0009525644554546289, |
| "loss": 2.9926, |
| "step": 8640 |
| }, |
| { |
| "epoch": 7.137062648364124, |
| "grad_norm": 0.6336268782615662, |
| "learning_rate": 0.0009523809523809524, |
| "loss": 2.9963, |
| "step": 8650 |
| }, |
| { |
| "epoch": 7.145319434410156, |
| "grad_norm": 0.5994205474853516, |
| "learning_rate": 0.000952197449307276, |
| "loss": 2.9847, |
| "step": 8660 |
| }, |
| { |
| "epoch": 7.153576220456188, |
| "grad_norm": 0.6255319118499756, |
| "learning_rate": 0.0009520139462335994, |
| "loss": 2.98, |
| "step": 8670 |
| }, |
| { |
| "epoch": 7.161833006502219, |
| "grad_norm": 0.5612902641296387, |
| "learning_rate": 0.0009518304431599229, |
| "loss": 2.9802, |
| "step": 8680 |
| }, |
| { |
| "epoch": 7.170089792548251, |
| "grad_norm": 0.6441757082939148, |
| "learning_rate": 0.0009516469400862464, |
| "loss": 2.9847, |
| "step": 8690 |
| }, |
| { |
| "epoch": 7.178346578594282, |
| "grad_norm": 0.6565569639205933, |
| "learning_rate": 0.00095146343701257, |
| "loss": 2.9879, |
| "step": 8700 |
| }, |
| { |
| "epoch": 7.186603364640313, |
| "grad_norm": 0.609322726726532, |
| "learning_rate": 0.0009512799339388934, |
| "loss": 2.9798, |
| "step": 8710 |
| }, |
| { |
| "epoch": 7.194860150686345, |
| "grad_norm": 0.6805379986763, |
| "learning_rate": 0.000951096430865217, |
| "loss": 2.9741, |
| "step": 8720 |
| }, |
| { |
| "epoch": 7.203116936732377, |
| "grad_norm": 0.674920380115509, |
| "learning_rate": 0.0009509129277915405, |
| "loss": 2.9833, |
| "step": 8730 |
| }, |
| { |
| "epoch": 7.211373722778409, |
| "grad_norm": 0.6178304553031921, |
| "learning_rate": 0.000950729424717864, |
| "loss": 2.9818, |
| "step": 8740 |
| }, |
| { |
| "epoch": 7.21963050882444, |
| "grad_norm": 0.5889567136764526, |
| "learning_rate": 0.0009505459216441875, |
| "loss": 2.9855, |
| "step": 8750 |
| }, |
| { |
| "epoch": 7.227887294870472, |
| "grad_norm": 0.5856685638427734, |
| "learning_rate": 0.0009503624185705111, |
| "loss": 2.97, |
| "step": 8760 |
| }, |
| { |
| "epoch": 7.236144080916503, |
| "grad_norm": 0.660362958908081, |
| "learning_rate": 0.0009501789154968346, |
| "loss": 2.9745, |
| "step": 8770 |
| }, |
| { |
| "epoch": 7.244400866962534, |
| "grad_norm": 0.7222636342048645, |
| "learning_rate": 0.0009499954124231582, |
| "loss": 2.9836, |
| "step": 8780 |
| }, |
| { |
| "epoch": 7.2526576530085665, |
| "grad_norm": 0.7483038306236267, |
| "learning_rate": 0.0009498119093494816, |
| "loss": 2.9743, |
| "step": 8790 |
| }, |
| { |
| "epoch": 7.260914439054598, |
| "grad_norm": 0.6627931594848633, |
| "learning_rate": 0.0009496284062758052, |
| "loss": 2.9716, |
| "step": 8800 |
| }, |
| { |
| "epoch": 7.26917122510063, |
| "grad_norm": 0.6666322350502014, |
| "learning_rate": 0.0009494449032021286, |
| "loss": 2.9693, |
| "step": 8810 |
| }, |
| { |
| "epoch": 7.277428011146661, |
| "grad_norm": 0.6174741387367249, |
| "learning_rate": 0.0009492614001284521, |
| "loss": 2.9638, |
| "step": 8820 |
| }, |
| { |
| "epoch": 7.285684797192693, |
| "grad_norm": 0.5936954617500305, |
| "learning_rate": 0.0009490778970547756, |
| "loss": 2.9701, |
| "step": 8830 |
| }, |
| { |
| "epoch": 7.293941583238724, |
| "grad_norm": 0.6383837461471558, |
| "learning_rate": 0.0009488943939810992, |
| "loss": 2.9706, |
| "step": 8840 |
| }, |
| { |
| "epoch": 7.302198369284756, |
| "grad_norm": 0.6035402417182922, |
| "learning_rate": 0.0009487108909074227, |
| "loss": 2.9639, |
| "step": 8850 |
| }, |
| { |
| "epoch": 7.3104551553307875, |
| "grad_norm": 0.6518993377685547, |
| "learning_rate": 0.0009485273878337462, |
| "loss": 2.9693, |
| "step": 8860 |
| }, |
| { |
| "epoch": 7.318711941376819, |
| "grad_norm": 0.5939560532569885, |
| "learning_rate": 0.0009483438847600697, |
| "loss": 2.9686, |
| "step": 8870 |
| }, |
| { |
| "epoch": 7.326968727422851, |
| "grad_norm": 0.6224295496940613, |
| "learning_rate": 0.0009481603816863933, |
| "loss": 2.9779, |
| "step": 8880 |
| }, |
| { |
| "epoch": 7.335225513468882, |
| "grad_norm": 0.6374024748802185, |
| "learning_rate": 0.0009479768786127168, |
| "loss": 2.969, |
| "step": 8890 |
| }, |
| { |
| "epoch": 7.343482299514914, |
| "grad_norm": 0.6577615141868591, |
| "learning_rate": 0.0009477933755390403, |
| "loss": 2.9645, |
| "step": 8900 |
| }, |
| { |
| "epoch": 7.351739085560945, |
| "grad_norm": 0.659116268157959, |
| "learning_rate": 0.0009476098724653638, |
| "loss": 2.9694, |
| "step": 8910 |
| }, |
| { |
| "epoch": 7.359995871606977, |
| "grad_norm": 0.618446946144104, |
| "learning_rate": 0.0009474263693916874, |
| "loss": 2.9615, |
| "step": 8920 |
| }, |
| { |
| "epoch": 7.3682526576530085, |
| "grad_norm": 0.6356460452079773, |
| "learning_rate": 0.0009472428663180109, |
| "loss": 2.9812, |
| "step": 8930 |
| }, |
| { |
| "epoch": 7.376509443699041, |
| "grad_norm": 0.5520789623260498, |
| "learning_rate": 0.0009470593632443344, |
| "loss": 2.9557, |
| "step": 8940 |
| }, |
| { |
| "epoch": 7.384766229745072, |
| "grad_norm": 0.6499543190002441, |
| "learning_rate": 0.0009468758601706578, |
| "loss": 2.9669, |
| "step": 8950 |
| }, |
| { |
| "epoch": 7.393023015791103, |
| "grad_norm": 0.6642090678215027, |
| "learning_rate": 0.0009466923570969814, |
| "loss": 2.956, |
| "step": 8960 |
| }, |
| { |
| "epoch": 7.401279801837135, |
| "grad_norm": 0.6019958257675171, |
| "learning_rate": 0.0009465088540233049, |
| "loss": 2.962, |
| "step": 8970 |
| }, |
| { |
| "epoch": 7.409536587883166, |
| "grad_norm": 0.6056467890739441, |
| "learning_rate": 0.0009463253509496284, |
| "loss": 2.9704, |
| "step": 8980 |
| }, |
| { |
| "epoch": 7.417793373929198, |
| "grad_norm": 0.5770221948623657, |
| "learning_rate": 0.0009461418478759519, |
| "loss": 2.9487, |
| "step": 8990 |
| }, |
| { |
| "epoch": 7.4260501599752295, |
| "grad_norm": 0.5907398462295532, |
| "learning_rate": 0.0009459583448022755, |
| "loss": 2.9609, |
| "step": 9000 |
| }, |
| { |
| "epoch": 7.434306946021262, |
| "grad_norm": 0.6140010952949524, |
| "learning_rate": 0.000945774841728599, |
| "loss": 2.9691, |
| "step": 9010 |
| }, |
| { |
| "epoch": 7.442563732067293, |
| "grad_norm": 0.5944181084632874, |
| "learning_rate": 0.0009455913386549225, |
| "loss": 2.957, |
| "step": 9020 |
| }, |
| { |
| "epoch": 7.450820518113324, |
| "grad_norm": 0.6197523474693298, |
| "learning_rate": 0.000945407835581246, |
| "loss": 2.9656, |
| "step": 9030 |
| }, |
| { |
| "epoch": 7.459077304159356, |
| "grad_norm": 0.6460192799568176, |
| "learning_rate": 0.0009452243325075696, |
| "loss": 2.9599, |
| "step": 9040 |
| }, |
| { |
| "epoch": 7.467334090205387, |
| "grad_norm": 0.6181427836418152, |
| "learning_rate": 0.0009450408294338931, |
| "loss": 2.9458, |
| "step": 9050 |
| }, |
| { |
| "epoch": 7.475590876251419, |
| "grad_norm": 0.6719056367874146, |
| "learning_rate": 0.0009448573263602166, |
| "loss": 2.9665, |
| "step": 9060 |
| }, |
| { |
| "epoch": 7.483847662297451, |
| "grad_norm": 0.6406500339508057, |
| "learning_rate": 0.0009446738232865401, |
| "loss": 2.9555, |
| "step": 9070 |
| }, |
| { |
| "epoch": 7.492104448343483, |
| "grad_norm": 0.6553565263748169, |
| "learning_rate": 0.0009444903202128636, |
| "loss": 2.9581, |
| "step": 9080 |
| }, |
| { |
| "epoch": 7.500361234389514, |
| "grad_norm": 0.5775774121284485, |
| "learning_rate": 0.000944306817139187, |
| "loss": 2.9582, |
| "step": 9090 |
| }, |
| { |
| "epoch": 7.508618020435545, |
| "grad_norm": 0.6064974665641785, |
| "learning_rate": 0.0009441233140655106, |
| "loss": 2.9567, |
| "step": 9100 |
| }, |
| { |
| "epoch": 7.516874806481577, |
| "grad_norm": 0.6577678322792053, |
| "learning_rate": 0.0009439398109918341, |
| "loss": 2.9727, |
| "step": 9110 |
| }, |
| { |
| "epoch": 7.525131592527608, |
| "grad_norm": 0.7013944387435913, |
| "learning_rate": 0.0009437563079181577, |
| "loss": 2.9625, |
| "step": 9120 |
| }, |
| { |
| "epoch": 7.53338837857364, |
| "grad_norm": 0.5832070112228394, |
| "learning_rate": 0.0009435728048444812, |
| "loss": 2.9544, |
| "step": 9130 |
| }, |
| { |
| "epoch": 7.541645164619672, |
| "grad_norm": 0.633455753326416, |
| "learning_rate": 0.0009433893017708047, |
| "loss": 2.966, |
| "step": 9140 |
| }, |
| { |
| "epoch": 7.549901950665704, |
| "grad_norm": 0.6928477883338928, |
| "learning_rate": 0.0009432057986971282, |
| "loss": 2.9606, |
| "step": 9150 |
| }, |
| { |
| "epoch": 7.558158736711735, |
| "grad_norm": 0.6043297052383423, |
| "learning_rate": 0.0009430222956234518, |
| "loss": 2.965, |
| "step": 9160 |
| }, |
| { |
| "epoch": 7.566415522757767, |
| "grad_norm": 0.6551850438117981, |
| "learning_rate": 0.0009428387925497753, |
| "loss": 2.9584, |
| "step": 9170 |
| }, |
| { |
| "epoch": 7.574672308803798, |
| "grad_norm": 0.5572656989097595, |
| "learning_rate": 0.0009426552894760988, |
| "loss": 2.9622, |
| "step": 9180 |
| }, |
| { |
| "epoch": 7.582929094849829, |
| "grad_norm": 0.5612010359764099, |
| "learning_rate": 0.0009424717864024223, |
| "loss": 2.9484, |
| "step": 9190 |
| }, |
| { |
| "epoch": 7.591185880895861, |
| "grad_norm": 0.6252767443656921, |
| "learning_rate": 0.0009422882833287458, |
| "loss": 2.9578, |
| "step": 9200 |
| }, |
| { |
| "epoch": 7.599442666941893, |
| "grad_norm": 0.569965124130249, |
| "learning_rate": 0.0009421047802550692, |
| "loss": 2.9554, |
| "step": 9210 |
| }, |
| { |
| "epoch": 7.607699452987925, |
| "grad_norm": 0.6037718057632446, |
| "learning_rate": 0.0009419212771813927, |
| "loss": 2.9417, |
| "step": 9220 |
| }, |
| { |
| "epoch": 7.615956239033956, |
| "grad_norm": 0.5498155355453491, |
| "learning_rate": 0.0009417377741077163, |
| "loss": 2.9473, |
| "step": 9230 |
| }, |
| { |
| "epoch": 7.624213025079988, |
| "grad_norm": 0.6004564166069031, |
| "learning_rate": 0.0009415542710340398, |
| "loss": 2.9404, |
| "step": 9240 |
| }, |
| { |
| "epoch": 7.632469811126019, |
| "grad_norm": 0.6017456650733948, |
| "learning_rate": 0.0009413707679603633, |
| "loss": 2.9513, |
| "step": 9250 |
| }, |
| { |
| "epoch": 7.640726597172051, |
| "grad_norm": 0.6328597068786621, |
| "learning_rate": 0.0009411872648866868, |
| "loss": 2.9446, |
| "step": 9260 |
| }, |
| { |
| "epoch": 7.6489833832180825, |
| "grad_norm": 0.5953946709632874, |
| "learning_rate": 0.0009410037618130104, |
| "loss": 2.9435, |
| "step": 9270 |
| }, |
| { |
| "epoch": 7.657240169264114, |
| "grad_norm": 0.6098210215568542, |
| "learning_rate": 0.0009408202587393339, |
| "loss": 2.9538, |
| "step": 9280 |
| }, |
| { |
| "epoch": 7.665496955310146, |
| "grad_norm": 0.592674732208252, |
| "learning_rate": 0.0009406367556656574, |
| "loss": 2.9527, |
| "step": 9290 |
| }, |
| { |
| "epoch": 7.673753741356177, |
| "grad_norm": 0.5980309247970581, |
| "learning_rate": 0.0009404532525919809, |
| "loss": 2.9348, |
| "step": 9300 |
| }, |
| { |
| "epoch": 7.682010527402209, |
| "grad_norm": 0.5754213333129883, |
| "learning_rate": 0.0009402697495183045, |
| "loss": 2.9523, |
| "step": 9310 |
| }, |
| { |
| "epoch": 7.69026731344824, |
| "grad_norm": 0.624748945236206, |
| "learning_rate": 0.000940086246444628, |
| "loss": 2.9538, |
| "step": 9320 |
| }, |
| { |
| "epoch": 7.698524099494271, |
| "grad_norm": 0.5637576580047607, |
| "learning_rate": 0.0009399027433709515, |
| "loss": 2.9414, |
| "step": 9330 |
| }, |
| { |
| "epoch": 7.7067808855403035, |
| "grad_norm": 0.6265804171562195, |
| "learning_rate": 0.0009397192402972749, |
| "loss": 2.9429, |
| "step": 9340 |
| }, |
| { |
| "epoch": 7.715037671586335, |
| "grad_norm": 0.6041392087936401, |
| "learning_rate": 0.0009395357372235985, |
| "loss": 2.9428, |
| "step": 9350 |
| }, |
| { |
| "epoch": 7.723294457632367, |
| "grad_norm": 0.5320299863815308, |
| "learning_rate": 0.000939352234149922, |
| "loss": 2.9391, |
| "step": 9360 |
| }, |
| { |
| "epoch": 7.731551243678398, |
| "grad_norm": 0.6173900365829468, |
| "learning_rate": 0.0009391687310762455, |
| "loss": 2.9374, |
| "step": 9370 |
| }, |
| { |
| "epoch": 7.73980802972443, |
| "grad_norm": 0.5725083351135254, |
| "learning_rate": 0.000938985228002569, |
| "loss": 2.9609, |
| "step": 9380 |
| }, |
| { |
| "epoch": 7.748064815770461, |
| "grad_norm": 0.5768330097198486, |
| "learning_rate": 0.0009388017249288926, |
| "loss": 2.9498, |
| "step": 9390 |
| }, |
| { |
| "epoch": 7.756321601816493, |
| "grad_norm": 0.6300333142280579, |
| "learning_rate": 0.0009386182218552161, |
| "loss": 2.9493, |
| "step": 9400 |
| }, |
| { |
| "epoch": 7.7645783878625245, |
| "grad_norm": 0.6431629061698914, |
| "learning_rate": 0.0009384347187815396, |
| "loss": 2.9324, |
| "step": 9410 |
| }, |
| { |
| "epoch": 7.772835173908556, |
| "grad_norm": 0.5805600881576538, |
| "learning_rate": 0.0009382512157078631, |
| "loss": 2.947, |
| "step": 9420 |
| }, |
| { |
| "epoch": 7.781091959954588, |
| "grad_norm": 0.6539075970649719, |
| "learning_rate": 0.0009380677126341867, |
| "loss": 2.9421, |
| "step": 9430 |
| }, |
| { |
| "epoch": 7.789348746000619, |
| "grad_norm": 0.6129085421562195, |
| "learning_rate": 0.0009378842095605102, |
| "loss": 2.9413, |
| "step": 9440 |
| }, |
| { |
| "epoch": 7.797605532046651, |
| "grad_norm": 0.6538434624671936, |
| "learning_rate": 0.0009377007064868337, |
| "loss": 2.935, |
| "step": 9450 |
| }, |
| { |
| "epoch": 7.805862318092682, |
| "grad_norm": 0.617875337600708, |
| "learning_rate": 0.0009375172034131572, |
| "loss": 2.9439, |
| "step": 9460 |
| }, |
| { |
| "epoch": 7.814119104138714, |
| "grad_norm": 0.6133493781089783, |
| "learning_rate": 0.0009373337003394807, |
| "loss": 2.9428, |
| "step": 9470 |
| }, |
| { |
| "epoch": 7.8223758901847456, |
| "grad_norm": 0.6544171571731567, |
| "learning_rate": 0.0009371501972658042, |
| "loss": 2.936, |
| "step": 9480 |
| }, |
| { |
| "epoch": 7.830632676230778, |
| "grad_norm": 0.6270118355751038, |
| "learning_rate": 0.0009369666941921277, |
| "loss": 2.9486, |
| "step": 9490 |
| }, |
| { |
| "epoch": 7.838889462276809, |
| "grad_norm": 0.6458065509796143, |
| "learning_rate": 0.0009367831911184512, |
| "loss": 2.9396, |
| "step": 9500 |
| }, |
| { |
| "epoch": 7.84714624832284, |
| "grad_norm": 0.6657986640930176, |
| "learning_rate": 0.0009365996880447748, |
| "loss": 2.9461, |
| "step": 9510 |
| }, |
| { |
| "epoch": 7.855403034368872, |
| "grad_norm": 0.6538524627685547, |
| "learning_rate": 0.0009364161849710983, |
| "loss": 2.9358, |
| "step": 9520 |
| }, |
| { |
| "epoch": 7.863659820414903, |
| "grad_norm": 0.6204900741577148, |
| "learning_rate": 0.0009362326818974218, |
| "loss": 2.9375, |
| "step": 9530 |
| }, |
| { |
| "epoch": 7.871916606460935, |
| "grad_norm": 0.5772661566734314, |
| "learning_rate": 0.0009360491788237453, |
| "loss": 2.9371, |
| "step": 9540 |
| }, |
| { |
| "epoch": 7.880173392506967, |
| "grad_norm": 0.7631484270095825, |
| "learning_rate": 0.0009358656757500689, |
| "loss": 2.9518, |
| "step": 9550 |
| }, |
| { |
| "epoch": 7.888430178552998, |
| "grad_norm": 0.5904896855354309, |
| "learning_rate": 0.0009356821726763924, |
| "loss": 2.9401, |
| "step": 9560 |
| }, |
| { |
| "epoch": 7.89668696459903, |
| "grad_norm": 0.6027041077613831, |
| "learning_rate": 0.0009354986696027159, |
| "loss": 2.935, |
| "step": 9570 |
| }, |
| { |
| "epoch": 7.904943750645061, |
| "grad_norm": 0.5784376859664917, |
| "learning_rate": 0.0009353151665290394, |
| "loss": 2.9314, |
| "step": 9580 |
| }, |
| { |
| "epoch": 7.913200536691093, |
| "grad_norm": 0.6234803795814514, |
| "learning_rate": 0.000935131663455363, |
| "loss": 2.9341, |
| "step": 9590 |
| }, |
| { |
| "epoch": 7.921457322737124, |
| "grad_norm": 0.5850915312767029, |
| "learning_rate": 0.0009349481603816863, |
| "loss": 2.9266, |
| "step": 9600 |
| }, |
| { |
| "epoch": 7.929714108783156, |
| "grad_norm": 0.6063703894615173, |
| "learning_rate": 0.0009347646573080099, |
| "loss": 2.9421, |
| "step": 9610 |
| }, |
| { |
| "epoch": 7.937970894829188, |
| "grad_norm": 0.5547103881835938, |
| "learning_rate": 0.0009345811542343334, |
| "loss": 2.9294, |
| "step": 9620 |
| }, |
| { |
| "epoch": 7.94622768087522, |
| "grad_norm": 0.5692980885505676, |
| "learning_rate": 0.000934397651160657, |
| "loss": 2.9347, |
| "step": 9630 |
| }, |
| { |
| "epoch": 7.954484466921251, |
| "grad_norm": 0.6392699480056763, |
| "learning_rate": 0.0009342141480869804, |
| "loss": 2.9386, |
| "step": 9640 |
| }, |
| { |
| "epoch": 7.962741252967282, |
| "grad_norm": 0.5906763076782227, |
| "learning_rate": 0.000934030645013304, |
| "loss": 2.9407, |
| "step": 9650 |
| }, |
| { |
| "epoch": 7.970998039013314, |
| "grad_norm": 0.5717517733573914, |
| "learning_rate": 0.0009338471419396275, |
| "loss": 2.93, |
| "step": 9660 |
| }, |
| { |
| "epoch": 7.979254825059345, |
| "grad_norm": 0.63603675365448, |
| "learning_rate": 0.0009336636388659511, |
| "loss": 2.9334, |
| "step": 9670 |
| }, |
| { |
| "epoch": 7.987511611105377, |
| "grad_norm": 0.6233087778091431, |
| "learning_rate": 0.0009334801357922745, |
| "loss": 2.9247, |
| "step": 9680 |
| }, |
| { |
| "epoch": 7.995768397151409, |
| "grad_norm": 0.6149667501449585, |
| "learning_rate": 0.0009332966327185981, |
| "loss": 2.9218, |
| "step": 9690 |
| }, |
| { |
| "epoch": 8.003302714418412, |
| "grad_norm": 0.6047292947769165, |
| "learning_rate": 0.0009331131296449216, |
| "loss": 2.6704, |
| "step": 9700 |
| }, |
| { |
| "epoch": 8.011559500464445, |
| "grad_norm": 0.6108692288398743, |
| "learning_rate": 0.0009329296265712452, |
| "loss": 2.9237, |
| "step": 9710 |
| }, |
| { |
| "epoch": 8.019816286510476, |
| "grad_norm": 0.5642316341400146, |
| "learning_rate": 0.0009327461234975686, |
| "loss": 2.9258, |
| "step": 9720 |
| }, |
| { |
| "epoch": 8.028073072556507, |
| "grad_norm": 0.6315813660621643, |
| "learning_rate": 0.0009325626204238921, |
| "loss": 2.9293, |
| "step": 9730 |
| }, |
| { |
| "epoch": 8.036329858602539, |
| "grad_norm": 0.6231210827827454, |
| "learning_rate": 0.0009323791173502156, |
| "loss": 2.9161, |
| "step": 9740 |
| }, |
| { |
| "epoch": 8.04458664464857, |
| "grad_norm": 0.5583593249320984, |
| "learning_rate": 0.0009321956142765392, |
| "loss": 2.923, |
| "step": 9750 |
| }, |
| { |
| "epoch": 8.052843430694603, |
| "grad_norm": 0.5963938236236572, |
| "learning_rate": 0.0009320121112028626, |
| "loss": 2.9282, |
| "step": 9760 |
| }, |
| { |
| "epoch": 8.061100216740634, |
| "grad_norm": 0.6553643941879272, |
| "learning_rate": 0.0009318286081291861, |
| "loss": 2.9218, |
| "step": 9770 |
| }, |
| { |
| "epoch": 8.069357002786665, |
| "grad_norm": 0.5880711674690247, |
| "learning_rate": 0.0009316451050555097, |
| "loss": 2.9278, |
| "step": 9780 |
| }, |
| { |
| "epoch": 8.077613788832696, |
| "grad_norm": 0.584306001663208, |
| "learning_rate": 0.0009314616019818332, |
| "loss": 2.9275, |
| "step": 9790 |
| }, |
| { |
| "epoch": 8.085870574878728, |
| "grad_norm": 0.655783474445343, |
| "learning_rate": 0.0009312780989081567, |
| "loss": 2.9148, |
| "step": 9800 |
| }, |
| { |
| "epoch": 8.09412736092476, |
| "grad_norm": 0.6076985001564026, |
| "learning_rate": 0.0009310945958344802, |
| "loss": 2.9243, |
| "step": 9810 |
| }, |
| { |
| "epoch": 8.102384146970792, |
| "grad_norm": 0.5802444815635681, |
| "learning_rate": 0.0009309110927608038, |
| "loss": 2.9269, |
| "step": 9820 |
| }, |
| { |
| "epoch": 8.110640933016823, |
| "grad_norm": 0.6020260453224182, |
| "learning_rate": 0.0009307275896871273, |
| "loss": 2.9156, |
| "step": 9830 |
| }, |
| { |
| "epoch": 8.118897719062854, |
| "grad_norm": 0.6201086044311523, |
| "learning_rate": 0.0009305440866134508, |
| "loss": 2.9187, |
| "step": 9840 |
| }, |
| { |
| "epoch": 8.127154505108887, |
| "grad_norm": 0.6539363861083984, |
| "learning_rate": 0.0009303605835397743, |
| "loss": 2.9242, |
| "step": 9850 |
| }, |
| { |
| "epoch": 8.135411291154918, |
| "grad_norm": 0.6557437777519226, |
| "learning_rate": 0.0009301770804660978, |
| "loss": 2.9149, |
| "step": 9860 |
| }, |
| { |
| "epoch": 8.14366807720095, |
| "grad_norm": 0.563693106174469, |
| "learning_rate": 0.0009299935773924213, |
| "loss": 2.9283, |
| "step": 9870 |
| }, |
| { |
| "epoch": 8.15192486324698, |
| "grad_norm": 0.610340416431427, |
| "learning_rate": 0.0009298100743187448, |
| "loss": 2.9233, |
| "step": 9880 |
| }, |
| { |
| "epoch": 8.160181649293012, |
| "grad_norm": 0.5527334809303284, |
| "learning_rate": 0.0009296265712450683, |
| "loss": 2.9088, |
| "step": 9890 |
| }, |
| { |
| "epoch": 8.168438435339045, |
| "grad_norm": 0.5965984463691711, |
| "learning_rate": 0.0009294430681713919, |
| "loss": 2.9233, |
| "step": 9900 |
| }, |
| { |
| "epoch": 8.176695221385076, |
| "grad_norm": 0.6083648204803467, |
| "learning_rate": 0.0009292595650977154, |
| "loss": 2.9182, |
| "step": 9910 |
| }, |
| { |
| "epoch": 8.184952007431107, |
| "grad_norm": 0.5621761083602905, |
| "learning_rate": 0.0009290760620240389, |
| "loss": 2.9146, |
| "step": 9920 |
| }, |
| { |
| "epoch": 8.193208793477138, |
| "grad_norm": 0.5425733923912048, |
| "learning_rate": 0.0009288925589503624, |
| "loss": 2.9133, |
| "step": 9930 |
| }, |
| { |
| "epoch": 8.201465579523171, |
| "grad_norm": 0.5596359372138977, |
| "learning_rate": 0.000928709055876686, |
| "loss": 2.9141, |
| "step": 9940 |
| }, |
| { |
| "epoch": 8.209722365569203, |
| "grad_norm": 0.5979769825935364, |
| "learning_rate": 0.0009285255528030095, |
| "loss": 2.9155, |
| "step": 9950 |
| }, |
| { |
| "epoch": 8.217979151615234, |
| "grad_norm": 0.6086379289627075, |
| "learning_rate": 0.000928342049729333, |
| "loss": 2.9253, |
| "step": 9960 |
| }, |
| { |
| "epoch": 8.226235937661265, |
| "grad_norm": 0.6083199381828308, |
| "learning_rate": 0.0009281585466556565, |
| "loss": 2.913, |
| "step": 9970 |
| }, |
| { |
| "epoch": 8.234492723707296, |
| "grad_norm": 0.6459252238273621, |
| "learning_rate": 0.0009279750435819801, |
| "loss": 2.914, |
| "step": 9980 |
| }, |
| { |
| "epoch": 8.24274950975333, |
| "grad_norm": 0.5913544297218323, |
| "learning_rate": 0.0009277915405083034, |
| "loss": 2.9142, |
| "step": 9990 |
| }, |
| { |
| "epoch": 8.25100629579936, |
| "grad_norm": 0.6325271129608154, |
| "learning_rate": 0.000927608037434627, |
| "loss": 2.9161, |
| "step": 10000 |
| }, |
| { |
| "epoch": 8.259263081845392, |
| "grad_norm": 0.5974222421646118, |
| "learning_rate": 0.0009274245343609505, |
| "loss": 2.907, |
| "step": 10010 |
| }, |
| { |
| "epoch": 8.267519867891423, |
| "grad_norm": 0.5887889862060547, |
| "learning_rate": 0.0009272410312872741, |
| "loss": 2.906, |
| "step": 10020 |
| }, |
| { |
| "epoch": 8.275776653937454, |
| "grad_norm": 0.6619329452514648, |
| "learning_rate": 0.0009270575282135975, |
| "loss": 2.9102, |
| "step": 10030 |
| }, |
| { |
| "epoch": 8.284033439983487, |
| "grad_norm": 0.5642185211181641, |
| "learning_rate": 0.0009268740251399211, |
| "loss": 2.9119, |
| "step": 10040 |
| }, |
| { |
| "epoch": 8.292290226029518, |
| "grad_norm": 0.6225172877311707, |
| "learning_rate": 0.0009266905220662446, |
| "loss": 2.9189, |
| "step": 10050 |
| }, |
| { |
| "epoch": 8.30054701207555, |
| "grad_norm": 0.6109263300895691, |
| "learning_rate": 0.0009265070189925682, |
| "loss": 2.9113, |
| "step": 10060 |
| }, |
| { |
| "epoch": 8.30880379812158, |
| "grad_norm": 0.6616942286491394, |
| "learning_rate": 0.0009263235159188916, |
| "loss": 2.9115, |
| "step": 10070 |
| }, |
| { |
| "epoch": 8.317060584167614, |
| "grad_norm": 0.5564186573028564, |
| "learning_rate": 0.0009261400128452152, |
| "loss": 2.9175, |
| "step": 10080 |
| }, |
| { |
| "epoch": 8.325317370213645, |
| "grad_norm": 0.5995142459869385, |
| "learning_rate": 0.0009259565097715387, |
| "loss": 2.9014, |
| "step": 10090 |
| }, |
| { |
| "epoch": 8.333574156259676, |
| "grad_norm": 0.599012553691864, |
| "learning_rate": 0.0009257730066978623, |
| "loss": 2.9076, |
| "step": 10100 |
| }, |
| { |
| "epoch": 8.341830942305707, |
| "grad_norm": 0.5985011458396912, |
| "learning_rate": 0.0009255895036241857, |
| "loss": 2.9071, |
| "step": 10110 |
| }, |
| { |
| "epoch": 8.350087728351738, |
| "grad_norm": 0.6194997429847717, |
| "learning_rate": 0.0009254060005505093, |
| "loss": 2.9066, |
| "step": 10120 |
| }, |
| { |
| "epoch": 8.358344514397771, |
| "grad_norm": 0.6201893091201782, |
| "learning_rate": 0.0009252224974768327, |
| "loss": 2.8995, |
| "step": 10130 |
| }, |
| { |
| "epoch": 8.366601300443802, |
| "grad_norm": 0.5880855321884155, |
| "learning_rate": 0.0009250389944031563, |
| "loss": 2.9174, |
| "step": 10140 |
| }, |
| { |
| "epoch": 8.374858086489834, |
| "grad_norm": 0.574177086353302, |
| "learning_rate": 0.0009248554913294797, |
| "loss": 2.9035, |
| "step": 10150 |
| }, |
| { |
| "epoch": 8.383114872535865, |
| "grad_norm": 0.6537944674491882, |
| "learning_rate": 0.0009246719882558033, |
| "loss": 2.9017, |
| "step": 10160 |
| }, |
| { |
| "epoch": 8.391371658581898, |
| "grad_norm": 0.5747184753417969, |
| "learning_rate": 0.0009244884851821268, |
| "loss": 2.9057, |
| "step": 10170 |
| }, |
| { |
| "epoch": 8.399628444627929, |
| "grad_norm": 0.6202713251113892, |
| "learning_rate": 0.0009243049821084504, |
| "loss": 2.9102, |
| "step": 10180 |
| }, |
| { |
| "epoch": 8.40788523067396, |
| "grad_norm": 0.5950630307197571, |
| "learning_rate": 0.0009241214790347738, |
| "loss": 2.9159, |
| "step": 10190 |
| }, |
| { |
| "epoch": 8.416142016719991, |
| "grad_norm": 0.6630895733833313, |
| "learning_rate": 0.0009239379759610974, |
| "loss": 2.9099, |
| "step": 10200 |
| }, |
| { |
| "epoch": 8.424398802766023, |
| "grad_norm": 0.6798600554466248, |
| "learning_rate": 0.0009237544728874209, |
| "loss": 2.9168, |
| "step": 10210 |
| }, |
| { |
| "epoch": 8.432655588812056, |
| "grad_norm": 0.6319479942321777, |
| "learning_rate": 0.0009235709698137445, |
| "loss": 2.9081, |
| "step": 10220 |
| }, |
| { |
| "epoch": 8.440912374858087, |
| "grad_norm": 0.6305397152900696, |
| "learning_rate": 0.0009233874667400679, |
| "loss": 2.9087, |
| "step": 10230 |
| }, |
| { |
| "epoch": 8.449169160904118, |
| "grad_norm": 0.5864200592041016, |
| "learning_rate": 0.0009232039636663915, |
| "loss": 2.8947, |
| "step": 10240 |
| }, |
| { |
| "epoch": 8.45742594695015, |
| "grad_norm": 0.5810872316360474, |
| "learning_rate": 0.000923020460592715, |
| "loss": 2.909, |
| "step": 10250 |
| }, |
| { |
| "epoch": 8.465682732996182, |
| "grad_norm": 0.6141155362129211, |
| "learning_rate": 0.0009228369575190385, |
| "loss": 2.9061, |
| "step": 10260 |
| }, |
| { |
| "epoch": 8.473939519042213, |
| "grad_norm": 0.6127697825431824, |
| "learning_rate": 0.0009226534544453619, |
| "loss": 2.9052, |
| "step": 10270 |
| }, |
| { |
| "epoch": 8.482196305088245, |
| "grad_norm": 0.6289766430854797, |
| "learning_rate": 0.0009224699513716855, |
| "loss": 2.8969, |
| "step": 10280 |
| }, |
| { |
| "epoch": 8.490453091134276, |
| "grad_norm": 0.6233021020889282, |
| "learning_rate": 0.000922286448298009, |
| "loss": 2.9047, |
| "step": 10290 |
| }, |
| { |
| "epoch": 8.498709877180307, |
| "grad_norm": 0.6213576197624207, |
| "learning_rate": 0.0009221029452243326, |
| "loss": 2.9005, |
| "step": 10300 |
| }, |
| { |
| "epoch": 8.50696666322634, |
| "grad_norm": 0.6397675275802612, |
| "learning_rate": 0.000921919442150656, |
| "loss": 2.9018, |
| "step": 10310 |
| }, |
| { |
| "epoch": 8.515223449272371, |
| "grad_norm": 0.6674553751945496, |
| "learning_rate": 0.0009217359390769796, |
| "loss": 2.9055, |
| "step": 10320 |
| }, |
| { |
| "epoch": 8.523480235318402, |
| "grad_norm": 0.636461615562439, |
| "learning_rate": 0.0009215524360033031, |
| "loss": 2.908, |
| "step": 10330 |
| }, |
| { |
| "epoch": 8.531737021364433, |
| "grad_norm": 0.593784511089325, |
| "learning_rate": 0.0009213689329296266, |
| "loss": 2.9028, |
| "step": 10340 |
| }, |
| { |
| "epoch": 8.539993807410465, |
| "grad_norm": 0.5959449410438538, |
| "learning_rate": 0.0009211854298559501, |
| "loss": 2.9042, |
| "step": 10350 |
| }, |
| { |
| "epoch": 8.548250593456498, |
| "grad_norm": 0.6200835704803467, |
| "learning_rate": 0.0009210019267822736, |
| "loss": 2.9108, |
| "step": 10360 |
| }, |
| { |
| "epoch": 8.556507379502529, |
| "grad_norm": 0.6081064939498901, |
| "learning_rate": 0.0009208184237085972, |
| "loss": 2.9078, |
| "step": 10370 |
| }, |
| { |
| "epoch": 8.56476416554856, |
| "grad_norm": 0.5773234963417053, |
| "learning_rate": 0.0009206349206349207, |
| "loss": 2.9061, |
| "step": 10380 |
| }, |
| { |
| "epoch": 8.573020951594591, |
| "grad_norm": 0.6200804710388184, |
| "learning_rate": 0.0009204514175612441, |
| "loss": 2.9125, |
| "step": 10390 |
| }, |
| { |
| "epoch": 8.581277737640624, |
| "grad_norm": 0.602094829082489, |
| "learning_rate": 0.0009202679144875676, |
| "loss": 2.8998, |
| "step": 10400 |
| }, |
| { |
| "epoch": 8.589534523686655, |
| "grad_norm": 0.6243281364440918, |
| "learning_rate": 0.0009200844114138912, |
| "loss": 2.899, |
| "step": 10410 |
| }, |
| { |
| "epoch": 8.597791309732687, |
| "grad_norm": 0.5654193758964539, |
| "learning_rate": 0.0009199009083402146, |
| "loss": 2.8988, |
| "step": 10420 |
| }, |
| { |
| "epoch": 8.606048095778718, |
| "grad_norm": 0.5849204063415527, |
| "learning_rate": 0.0009197174052665382, |
| "loss": 2.8968, |
| "step": 10430 |
| }, |
| { |
| "epoch": 8.614304881824749, |
| "grad_norm": 0.6373389363288879, |
| "learning_rate": 0.0009195339021928617, |
| "loss": 2.8907, |
| "step": 10440 |
| }, |
| { |
| "epoch": 8.622561667870782, |
| "grad_norm": 0.5677966475486755, |
| "learning_rate": 0.0009193503991191853, |
| "loss": 2.8966, |
| "step": 10450 |
| }, |
| { |
| "epoch": 8.630818453916813, |
| "grad_norm": 0.5700002908706665, |
| "learning_rate": 0.0009191668960455087, |
| "loss": 2.8932, |
| "step": 10460 |
| }, |
| { |
| "epoch": 8.639075239962844, |
| "grad_norm": 0.5689521431922913, |
| "learning_rate": 0.0009189833929718323, |
| "loss": 2.9058, |
| "step": 10470 |
| }, |
| { |
| "epoch": 8.647332026008876, |
| "grad_norm": 0.579234778881073, |
| "learning_rate": 0.0009187998898981558, |
| "loss": 2.8854, |
| "step": 10480 |
| }, |
| { |
| "epoch": 8.655588812054908, |
| "grad_norm": 0.5431221127510071, |
| "learning_rate": 0.0009186163868244794, |
| "loss": 2.8935, |
| "step": 10490 |
| }, |
| { |
| "epoch": 8.66384559810094, |
| "grad_norm": 0.5348896980285645, |
| "learning_rate": 0.0009184328837508028, |
| "loss": 2.9021, |
| "step": 10500 |
| }, |
| { |
| "epoch": 8.67210238414697, |
| "grad_norm": 0.5952715873718262, |
| "learning_rate": 0.0009182493806771264, |
| "loss": 2.902, |
| "step": 10510 |
| }, |
| { |
| "epoch": 8.680359170193002, |
| "grad_norm": 0.6143502593040466, |
| "learning_rate": 0.0009180658776034498, |
| "loss": 2.8996, |
| "step": 10520 |
| }, |
| { |
| "epoch": 8.688615956239033, |
| "grad_norm": 0.5976707339286804, |
| "learning_rate": 0.0009178823745297734, |
| "loss": 2.8936, |
| "step": 10530 |
| }, |
| { |
| "epoch": 8.696872742285066, |
| "grad_norm": 0.6755147576332092, |
| "learning_rate": 0.0009176988714560968, |
| "loss": 2.8962, |
| "step": 10540 |
| }, |
| { |
| "epoch": 8.705129528331097, |
| "grad_norm": 0.6825839281082153, |
| "learning_rate": 0.0009175153683824204, |
| "loss": 2.9054, |
| "step": 10550 |
| }, |
| { |
| "epoch": 8.713386314377129, |
| "grad_norm": 0.6553934812545776, |
| "learning_rate": 0.0009173318653087439, |
| "loss": 2.9012, |
| "step": 10560 |
| }, |
| { |
| "epoch": 8.72164310042316, |
| "grad_norm": 0.6154677867889404, |
| "learning_rate": 0.0009171483622350675, |
| "loss": 2.8977, |
| "step": 10570 |
| }, |
| { |
| "epoch": 8.729899886469191, |
| "grad_norm": 0.6081441044807434, |
| "learning_rate": 0.0009169648591613909, |
| "loss": 2.8851, |
| "step": 10580 |
| }, |
| { |
| "epoch": 8.738156672515224, |
| "grad_norm": 0.6328044533729553, |
| "learning_rate": 0.0009167813560877145, |
| "loss": 2.8979, |
| "step": 10590 |
| }, |
| { |
| "epoch": 8.746413458561255, |
| "grad_norm": 0.5969833731651306, |
| "learning_rate": 0.000916597853014038, |
| "loss": 2.8985, |
| "step": 10600 |
| }, |
| { |
| "epoch": 8.754670244607286, |
| "grad_norm": 0.5929258465766907, |
| "learning_rate": 0.0009164143499403616, |
| "loss": 2.8965, |
| "step": 10610 |
| }, |
| { |
| "epoch": 8.762927030653318, |
| "grad_norm": 0.5987407565116882, |
| "learning_rate": 0.000916230846866685, |
| "loss": 2.8874, |
| "step": 10620 |
| }, |
| { |
| "epoch": 8.77118381669935, |
| "grad_norm": 0.568051278591156, |
| "learning_rate": 0.0009160473437930086, |
| "loss": 2.9001, |
| "step": 10630 |
| }, |
| { |
| "epoch": 8.779440602745382, |
| "grad_norm": 0.6252589225769043, |
| "learning_rate": 0.0009158638407193321, |
| "loss": 2.8974, |
| "step": 10640 |
| }, |
| { |
| "epoch": 8.787697388791413, |
| "grad_norm": 0.5795060992240906, |
| "learning_rate": 0.0009156803376456556, |
| "loss": 2.8898, |
| "step": 10650 |
| }, |
| { |
| "epoch": 8.795954174837444, |
| "grad_norm": 0.5712361931800842, |
| "learning_rate": 0.000915496834571979, |
| "loss": 2.8999, |
| "step": 10660 |
| }, |
| { |
| "epoch": 8.804210960883475, |
| "grad_norm": 0.5985157489776611, |
| "learning_rate": 0.0009153133314983026, |
| "loss": 2.8827, |
| "step": 10670 |
| }, |
| { |
| "epoch": 8.812467746929508, |
| "grad_norm": 0.6716547608375549, |
| "learning_rate": 0.0009151298284246261, |
| "loss": 2.8915, |
| "step": 10680 |
| }, |
| { |
| "epoch": 8.82072453297554, |
| "grad_norm": 0.572161853313446, |
| "learning_rate": 0.0009149463253509497, |
| "loss": 2.8874, |
| "step": 10690 |
| }, |
| { |
| "epoch": 8.82898131902157, |
| "grad_norm": 0.6197661757469177, |
| "learning_rate": 0.0009147628222772731, |
| "loss": 2.8814, |
| "step": 10700 |
| }, |
| { |
| "epoch": 8.837238105067602, |
| "grad_norm": 0.5292848348617554, |
| "learning_rate": 0.0009145793192035967, |
| "loss": 2.8972, |
| "step": 10710 |
| }, |
| { |
| "epoch": 8.845494891113635, |
| "grad_norm": 0.6543566584587097, |
| "learning_rate": 0.0009143958161299202, |
| "loss": 2.8988, |
| "step": 10720 |
| }, |
| { |
| "epoch": 8.853751677159666, |
| "grad_norm": 0.5767044425010681, |
| "learning_rate": 0.0009142123130562438, |
| "loss": 2.8984, |
| "step": 10730 |
| }, |
| { |
| "epoch": 8.862008463205697, |
| "grad_norm": 0.6067584156990051, |
| "learning_rate": 0.0009140288099825672, |
| "loss": 2.8843, |
| "step": 10740 |
| }, |
| { |
| "epoch": 8.870265249251728, |
| "grad_norm": 0.7177631258964539, |
| "learning_rate": 0.0009138453069088908, |
| "loss": 2.8775, |
| "step": 10750 |
| }, |
| { |
| "epoch": 8.87852203529776, |
| "grad_norm": 0.5992334485054016, |
| "learning_rate": 0.0009136618038352143, |
| "loss": 2.8936, |
| "step": 10760 |
| }, |
| { |
| "epoch": 8.886778821343793, |
| "grad_norm": 0.5875272750854492, |
| "learning_rate": 0.0009134783007615379, |
| "loss": 2.892, |
| "step": 10770 |
| }, |
| { |
| "epoch": 8.895035607389824, |
| "grad_norm": 0.6319445967674255, |
| "learning_rate": 0.0009132947976878612, |
| "loss": 2.89, |
| "step": 10780 |
| }, |
| { |
| "epoch": 8.903292393435855, |
| "grad_norm": 0.6280015110969543, |
| "learning_rate": 0.0009131112946141848, |
| "loss": 2.892, |
| "step": 10790 |
| }, |
| { |
| "epoch": 8.911549179481886, |
| "grad_norm": 0.5766534805297852, |
| "learning_rate": 0.0009129277915405083, |
| "loss": 2.8984, |
| "step": 10800 |
| }, |
| { |
| "epoch": 8.919805965527917, |
| "grad_norm": 0.5661517381668091, |
| "learning_rate": 0.0009127442884668319, |
| "loss": 2.8816, |
| "step": 10810 |
| }, |
| { |
| "epoch": 8.92806275157395, |
| "grad_norm": 0.6289181709289551, |
| "learning_rate": 0.0009125607853931553, |
| "loss": 2.8935, |
| "step": 10820 |
| }, |
| { |
| "epoch": 8.936319537619982, |
| "grad_norm": 0.5980255603790283, |
| "learning_rate": 0.0009123772823194789, |
| "loss": 2.8961, |
| "step": 10830 |
| }, |
| { |
| "epoch": 8.944576323666013, |
| "grad_norm": 0.5405508279800415, |
| "learning_rate": 0.0009121937792458024, |
| "loss": 2.893, |
| "step": 10840 |
| }, |
| { |
| "epoch": 8.952833109712044, |
| "grad_norm": 0.5458992719650269, |
| "learning_rate": 0.000912010276172126, |
| "loss": 2.8879, |
| "step": 10850 |
| }, |
| { |
| "epoch": 8.961089895758077, |
| "grad_norm": 0.6285332441329956, |
| "learning_rate": 0.0009118267730984494, |
| "loss": 2.8697, |
| "step": 10860 |
| }, |
| { |
| "epoch": 8.969346681804108, |
| "grad_norm": 0.5860605239868164, |
| "learning_rate": 0.000911643270024773, |
| "loss": 2.8808, |
| "step": 10870 |
| }, |
| { |
| "epoch": 8.97760346785014, |
| "grad_norm": 0.6316761374473572, |
| "learning_rate": 0.0009114597669510965, |
| "loss": 2.8776, |
| "step": 10880 |
| }, |
| { |
| "epoch": 8.98586025389617, |
| "grad_norm": 0.6294664144515991, |
| "learning_rate": 0.0009112762638774201, |
| "loss": 2.8836, |
| "step": 10890 |
| }, |
| { |
| "epoch": 8.994117039942202, |
| "grad_norm": 0.5913059711456299, |
| "learning_rate": 0.0009110927608037435, |
| "loss": 2.8694, |
| "step": 10900 |
| }, |
| { |
| "epoch": 9.001651357209207, |
| "grad_norm": 0.5948079228401184, |
| "learning_rate": 0.0009109092577300669, |
| "loss": 2.6295, |
| "step": 10910 |
| }, |
| { |
| "epoch": 9.009908143255238, |
| "grad_norm": 0.6169693470001221, |
| "learning_rate": 0.0009107257546563905, |
| "loss": 2.8809, |
| "step": 10920 |
| }, |
| { |
| "epoch": 9.01816492930127, |
| "grad_norm": 0.5843782424926758, |
| "learning_rate": 0.0009105422515827139, |
| "loss": 2.8656, |
| "step": 10930 |
| }, |
| { |
| "epoch": 9.0264217153473, |
| "grad_norm": 0.5803791284561157, |
| "learning_rate": 0.0009103587485090375, |
| "loss": 2.8739, |
| "step": 10940 |
| }, |
| { |
| "epoch": 9.034678501393334, |
| "grad_norm": 0.5891194343566895, |
| "learning_rate": 0.000910175245435361, |
| "loss": 2.8722, |
| "step": 10950 |
| }, |
| { |
| "epoch": 9.042935287439365, |
| "grad_norm": 0.6038116216659546, |
| "learning_rate": 0.0009099917423616846, |
| "loss": 2.8822, |
| "step": 10960 |
| }, |
| { |
| "epoch": 9.051192073485396, |
| "grad_norm": 0.589483380317688, |
| "learning_rate": 0.000909808239288008, |
| "loss": 2.8651, |
| "step": 10970 |
| }, |
| { |
| "epoch": 9.059448859531427, |
| "grad_norm": 0.6521607041358948, |
| "learning_rate": 0.0009096247362143316, |
| "loss": 2.8731, |
| "step": 10980 |
| }, |
| { |
| "epoch": 9.067705645577458, |
| "grad_norm": 0.6631231307983398, |
| "learning_rate": 0.0009094412331406551, |
| "loss": 2.8701, |
| "step": 10990 |
| }, |
| { |
| "epoch": 9.075962431623491, |
| "grad_norm": 0.5744627714157104, |
| "learning_rate": 0.0009092577300669787, |
| "loss": 2.8745, |
| "step": 11000 |
| }, |
| { |
| "epoch": 9.084219217669522, |
| "grad_norm": 0.6196519732475281, |
| "learning_rate": 0.0009090742269933021, |
| "loss": 2.8726, |
| "step": 11010 |
| }, |
| { |
| "epoch": 9.092476003715554, |
| "grad_norm": 0.6212047934532166, |
| "learning_rate": 0.0009088907239196257, |
| "loss": 2.8806, |
| "step": 11020 |
| }, |
| { |
| "epoch": 9.100732789761585, |
| "grad_norm": 0.5632530450820923, |
| "learning_rate": 0.0009087072208459492, |
| "loss": 2.8707, |
| "step": 11030 |
| }, |
| { |
| "epoch": 9.108989575807616, |
| "grad_norm": 0.6230122447013855, |
| "learning_rate": 0.0009085237177722727, |
| "loss": 2.8752, |
| "step": 11040 |
| }, |
| { |
| "epoch": 9.117246361853649, |
| "grad_norm": 0.6368362307548523, |
| "learning_rate": 0.0009083402146985961, |
| "loss": 2.8752, |
| "step": 11050 |
| }, |
| { |
| "epoch": 9.12550314789968, |
| "grad_norm": 0.6354774236679077, |
| "learning_rate": 0.0009081567116249197, |
| "loss": 2.8783, |
| "step": 11060 |
| }, |
| { |
| "epoch": 9.133759933945711, |
| "grad_norm": 0.5921966433525085, |
| "learning_rate": 0.0009079732085512432, |
| "loss": 2.866, |
| "step": 11070 |
| }, |
| { |
| "epoch": 9.142016719991743, |
| "grad_norm": 0.6098789572715759, |
| "learning_rate": 0.0009077897054775668, |
| "loss": 2.8635, |
| "step": 11080 |
| }, |
| { |
| "epoch": 9.150273506037776, |
| "grad_norm": 0.6147322058677673, |
| "learning_rate": 0.0009076062024038902, |
| "loss": 2.879, |
| "step": 11090 |
| }, |
| { |
| "epoch": 9.158530292083807, |
| "grad_norm": 0.554958164691925, |
| "learning_rate": 0.0009074226993302138, |
| "loss": 2.8811, |
| "step": 11100 |
| }, |
| { |
| "epoch": 9.166787078129838, |
| "grad_norm": 0.5771721601486206, |
| "learning_rate": 0.0009072391962565373, |
| "loss": 2.8716, |
| "step": 11110 |
| }, |
| { |
| "epoch": 9.17504386417587, |
| "grad_norm": 0.5154232382774353, |
| "learning_rate": 0.0009070556931828609, |
| "loss": 2.8749, |
| "step": 11120 |
| }, |
| { |
| "epoch": 9.1833006502219, |
| "grad_norm": 0.6075816750526428, |
| "learning_rate": 0.0009068721901091843, |
| "loss": 2.8602, |
| "step": 11130 |
| }, |
| { |
| "epoch": 9.191557436267933, |
| "grad_norm": 0.6058173775672913, |
| "learning_rate": 0.0009066886870355079, |
| "loss": 2.8639, |
| "step": 11140 |
| }, |
| { |
| "epoch": 9.199814222313965, |
| "grad_norm": 0.6568463444709778, |
| "learning_rate": 0.0009065051839618314, |
| "loss": 2.8737, |
| "step": 11150 |
| }, |
| { |
| "epoch": 9.208071008359996, |
| "grad_norm": 0.6088699698448181, |
| "learning_rate": 0.000906321680888155, |
| "loss": 2.8674, |
| "step": 11160 |
| }, |
| { |
| "epoch": 9.216327794406027, |
| "grad_norm": 0.635866105556488, |
| "learning_rate": 0.0009061381778144783, |
| "loss": 2.8711, |
| "step": 11170 |
| }, |
| { |
| "epoch": 9.22458458045206, |
| "grad_norm": 0.6134654879570007, |
| "learning_rate": 0.0009059546747408019, |
| "loss": 2.8702, |
| "step": 11180 |
| }, |
| { |
| "epoch": 9.232841366498091, |
| "grad_norm": 0.6194204688072205, |
| "learning_rate": 0.0009057711716671254, |
| "loss": 2.8653, |
| "step": 11190 |
| }, |
| { |
| "epoch": 9.241098152544122, |
| "grad_norm": 0.5850259065628052, |
| "learning_rate": 0.000905587668593449, |
| "loss": 2.8682, |
| "step": 11200 |
| }, |
| { |
| "epoch": 9.249354938590153, |
| "grad_norm": 0.5745192766189575, |
| "learning_rate": 0.0009054041655197724, |
| "loss": 2.8726, |
| "step": 11210 |
| }, |
| { |
| "epoch": 9.257611724636185, |
| "grad_norm": 0.5950500965118408, |
| "learning_rate": 0.000905220662446096, |
| "loss": 2.8816, |
| "step": 11220 |
| }, |
| { |
| "epoch": 9.265868510682218, |
| "grad_norm": 0.5739644765853882, |
| "learning_rate": 0.0009050371593724195, |
| "loss": 2.8672, |
| "step": 11230 |
| }, |
| { |
| "epoch": 9.274125296728249, |
| "grad_norm": 0.632830798625946, |
| "learning_rate": 0.0009048536562987431, |
| "loss": 2.8733, |
| "step": 11240 |
| }, |
| { |
| "epoch": 9.28238208277428, |
| "grad_norm": 0.5722547769546509, |
| "learning_rate": 0.0009046701532250665, |
| "loss": 2.8772, |
| "step": 11250 |
| }, |
| { |
| "epoch": 9.290638868820311, |
| "grad_norm": 0.6459839344024658, |
| "learning_rate": 0.0009044866501513901, |
| "loss": 2.8625, |
| "step": 11260 |
| }, |
| { |
| "epoch": 9.298895654866342, |
| "grad_norm": 0.6177144050598145, |
| "learning_rate": 0.0009043031470777136, |
| "loss": 2.8658, |
| "step": 11270 |
| }, |
| { |
| "epoch": 9.307152440912375, |
| "grad_norm": 0.5970734357833862, |
| "learning_rate": 0.0009041196440040372, |
| "loss": 2.8572, |
| "step": 11280 |
| }, |
| { |
| "epoch": 9.315409226958407, |
| "grad_norm": 0.5540674924850464, |
| "learning_rate": 0.0009039361409303606, |
| "loss": 2.8578, |
| "step": 11290 |
| }, |
| { |
| "epoch": 9.323666013004438, |
| "grad_norm": 0.5886362791061401, |
| "learning_rate": 0.0009037526378566842, |
| "loss": 2.8622, |
| "step": 11300 |
| }, |
| { |
| "epoch": 9.331922799050469, |
| "grad_norm": 0.563347339630127, |
| "learning_rate": 0.0009035691347830076, |
| "loss": 2.8613, |
| "step": 11310 |
| }, |
| { |
| "epoch": 9.340179585096502, |
| "grad_norm": 0.6594980359077454, |
| "learning_rate": 0.0009033856317093312, |
| "loss": 2.8611, |
| "step": 11320 |
| }, |
| { |
| "epoch": 9.348436371142533, |
| "grad_norm": 0.6381516456604004, |
| "learning_rate": 0.0009032021286356546, |
| "loss": 2.8629, |
| "step": 11330 |
| }, |
| { |
| "epoch": 9.356693157188564, |
| "grad_norm": 0.5937607884407043, |
| "learning_rate": 0.0009030186255619782, |
| "loss": 2.8714, |
| "step": 11340 |
| }, |
| { |
| "epoch": 9.364949943234596, |
| "grad_norm": 0.6181517243385315, |
| "learning_rate": 0.0009028351224883017, |
| "loss": 2.8608, |
| "step": 11350 |
| }, |
| { |
| "epoch": 9.373206729280627, |
| "grad_norm": 0.601092517375946, |
| "learning_rate": 0.0009026516194146253, |
| "loss": 2.8648, |
| "step": 11360 |
| }, |
| { |
| "epoch": 9.38146351532666, |
| "grad_norm": 0.532781183719635, |
| "learning_rate": 0.0009024681163409487, |
| "loss": 2.8596, |
| "step": 11370 |
| }, |
| { |
| "epoch": 9.38972030137269, |
| "grad_norm": 0.6382347941398621, |
| "learning_rate": 0.0009022846132672723, |
| "loss": 2.8776, |
| "step": 11380 |
| }, |
| { |
| "epoch": 9.397977087418722, |
| "grad_norm": 0.617072343826294, |
| "learning_rate": 0.0009021011101935958, |
| "loss": 2.8641, |
| "step": 11390 |
| }, |
| { |
| "epoch": 9.406233873464753, |
| "grad_norm": 0.6701762676239014, |
| "learning_rate": 0.0009019176071199194, |
| "loss": 2.8749, |
| "step": 11400 |
| }, |
| { |
| "epoch": 9.414490659510786, |
| "grad_norm": 0.7021268010139465, |
| "learning_rate": 0.0009017341040462428, |
| "loss": 2.8707, |
| "step": 11410 |
| }, |
| { |
| "epoch": 9.422747445556817, |
| "grad_norm": 0.6214231848716736, |
| "learning_rate": 0.0009015506009725664, |
| "loss": 2.866, |
| "step": 11420 |
| }, |
| { |
| "epoch": 9.431004231602849, |
| "grad_norm": 0.5644016861915588, |
| "learning_rate": 0.0009013670978988899, |
| "loss": 2.869, |
| "step": 11430 |
| }, |
| { |
| "epoch": 9.43926101764888, |
| "grad_norm": 0.6352203488349915, |
| "learning_rate": 0.0009011835948252133, |
| "loss": 2.8674, |
| "step": 11440 |
| }, |
| { |
| "epoch": 9.447517803694911, |
| "grad_norm": 0.5540649890899658, |
| "learning_rate": 0.0009010000917515368, |
| "loss": 2.8574, |
| "step": 11450 |
| }, |
| { |
| "epoch": 9.455774589740944, |
| "grad_norm": 0.5691086649894714, |
| "learning_rate": 0.0009008165886778604, |
| "loss": 2.8652, |
| "step": 11460 |
| }, |
| { |
| "epoch": 9.464031375786975, |
| "grad_norm": 0.5646165013313293, |
| "learning_rate": 0.0009006330856041839, |
| "loss": 2.8704, |
| "step": 11470 |
| }, |
| { |
| "epoch": 9.472288161833006, |
| "grad_norm": 0.6189112067222595, |
| "learning_rate": 0.0009004495825305073, |
| "loss": 2.8685, |
| "step": 11480 |
| }, |
| { |
| "epoch": 9.480544947879038, |
| "grad_norm": 0.5498800873756409, |
| "learning_rate": 0.0009002660794568309, |
| "loss": 2.8595, |
| "step": 11490 |
| }, |
| { |
| "epoch": 9.488801733925069, |
| "grad_norm": 0.5840670466423035, |
| "learning_rate": 0.0009000825763831544, |
| "loss": 2.8675, |
| "step": 11500 |
| }, |
| { |
| "epoch": 9.497058519971102, |
| "grad_norm": 0.5607289671897888, |
| "learning_rate": 0.000899899073309478, |
| "loss": 2.8593, |
| "step": 11510 |
| }, |
| { |
| "epoch": 9.505315306017133, |
| "grad_norm": 0.6241579055786133, |
| "learning_rate": 0.0008997155702358014, |
| "loss": 2.8488, |
| "step": 11520 |
| }, |
| { |
| "epoch": 9.513572092063164, |
| "grad_norm": 0.6067299246788025, |
| "learning_rate": 0.000899532067162125, |
| "loss": 2.8573, |
| "step": 11530 |
| }, |
| { |
| "epoch": 9.521828878109195, |
| "grad_norm": 0.6034315824508667, |
| "learning_rate": 0.0008993485640884485, |
| "loss": 2.8672, |
| "step": 11540 |
| }, |
| { |
| "epoch": 9.530085664155228, |
| "grad_norm": 0.5804450511932373, |
| "learning_rate": 0.0008991650610147721, |
| "loss": 2.8576, |
| "step": 11550 |
| }, |
| { |
| "epoch": 9.53834245020126, |
| "grad_norm": 0.6092609167098999, |
| "learning_rate": 0.0008989815579410955, |
| "loss": 2.8654, |
| "step": 11560 |
| }, |
| { |
| "epoch": 9.54659923624729, |
| "grad_norm": 0.5359856486320496, |
| "learning_rate": 0.000898798054867419, |
| "loss": 2.8616, |
| "step": 11570 |
| }, |
| { |
| "epoch": 9.554856022293322, |
| "grad_norm": 0.6626849174499512, |
| "learning_rate": 0.0008986145517937425, |
| "loss": 2.8635, |
| "step": 11580 |
| }, |
| { |
| "epoch": 9.563112808339355, |
| "grad_norm": 0.6117586493492126, |
| "learning_rate": 0.0008984310487200661, |
| "loss": 2.8682, |
| "step": 11590 |
| }, |
| { |
| "epoch": 9.571369594385386, |
| "grad_norm": 0.6978448629379272, |
| "learning_rate": 0.0008982475456463895, |
| "loss": 2.8591, |
| "step": 11600 |
| }, |
| { |
| "epoch": 9.579626380431417, |
| "grad_norm": 0.569664478302002, |
| "learning_rate": 0.0008980640425727131, |
| "loss": 2.8576, |
| "step": 11610 |
| }, |
| { |
| "epoch": 9.587883166477448, |
| "grad_norm": 0.6535126566886902, |
| "learning_rate": 0.0008978805394990366, |
| "loss": 2.85, |
| "step": 11620 |
| }, |
| { |
| "epoch": 9.59613995252348, |
| "grad_norm": 0.5983597636222839, |
| "learning_rate": 0.0008976970364253602, |
| "loss": 2.8674, |
| "step": 11630 |
| }, |
| { |
| "epoch": 9.604396738569513, |
| "grad_norm": 0.5989744067192078, |
| "learning_rate": 0.0008975135333516836, |
| "loss": 2.8606, |
| "step": 11640 |
| }, |
| { |
| "epoch": 9.612653524615544, |
| "grad_norm": 0.6094872355461121, |
| "learning_rate": 0.0008973300302780072, |
| "loss": 2.8586, |
| "step": 11650 |
| }, |
| { |
| "epoch": 9.620910310661575, |
| "grad_norm": 0.5862686038017273, |
| "learning_rate": 0.0008971465272043307, |
| "loss": 2.8566, |
| "step": 11660 |
| }, |
| { |
| "epoch": 9.629167096707606, |
| "grad_norm": 0.6004934310913086, |
| "learning_rate": 0.0008969630241306543, |
| "loss": 2.854, |
| "step": 11670 |
| }, |
| { |
| "epoch": 9.637423882753637, |
| "grad_norm": 0.6094337105751038, |
| "learning_rate": 0.0008967795210569777, |
| "loss": 2.8599, |
| "step": 11680 |
| }, |
| { |
| "epoch": 9.64568066879967, |
| "grad_norm": 0.5388069748878479, |
| "learning_rate": 0.0008965960179833013, |
| "loss": 2.8595, |
| "step": 11690 |
| }, |
| { |
| "epoch": 9.653937454845702, |
| "grad_norm": 0.5832782983779907, |
| "learning_rate": 0.0008964125149096247, |
| "loss": 2.8579, |
| "step": 11700 |
| }, |
| { |
| "epoch": 9.662194240891733, |
| "grad_norm": 0.6066370606422424, |
| "learning_rate": 0.0008962290118359483, |
| "loss": 2.8632, |
| "step": 11710 |
| }, |
| { |
| "epoch": 9.670451026937764, |
| "grad_norm": 0.6169841289520264, |
| "learning_rate": 0.0008960455087622717, |
| "loss": 2.8664, |
| "step": 11720 |
| }, |
| { |
| "epoch": 9.678707812983795, |
| "grad_norm": 0.5962358713150024, |
| "learning_rate": 0.0008958620056885953, |
| "loss": 2.8569, |
| "step": 11730 |
| }, |
| { |
| "epoch": 9.686964599029828, |
| "grad_norm": 0.6182219386100769, |
| "learning_rate": 0.0008956785026149188, |
| "loss": 2.8409, |
| "step": 11740 |
| }, |
| { |
| "epoch": 9.69522138507586, |
| "grad_norm": 0.5909119248390198, |
| "learning_rate": 0.0008954949995412424, |
| "loss": 2.8491, |
| "step": 11750 |
| }, |
| { |
| "epoch": 9.70347817112189, |
| "grad_norm": 0.6216540932655334, |
| "learning_rate": 0.0008953114964675658, |
| "loss": 2.8516, |
| "step": 11760 |
| }, |
| { |
| "epoch": 9.711734957167922, |
| "grad_norm": 0.5488907694816589, |
| "learning_rate": 0.0008951279933938894, |
| "loss": 2.854, |
| "step": 11770 |
| }, |
| { |
| "epoch": 9.719991743213955, |
| "grad_norm": 0.6433009505271912, |
| "learning_rate": 0.0008949444903202129, |
| "loss": 2.8601, |
| "step": 11780 |
| }, |
| { |
| "epoch": 9.728248529259986, |
| "grad_norm": 0.6110396385192871, |
| "learning_rate": 0.0008947609872465365, |
| "loss": 2.8499, |
| "step": 11790 |
| }, |
| { |
| "epoch": 9.736505315306017, |
| "grad_norm": 0.5883538722991943, |
| "learning_rate": 0.0008945774841728599, |
| "loss": 2.8494, |
| "step": 11800 |
| }, |
| { |
| "epoch": 9.744762101352048, |
| "grad_norm": 0.6048309803009033, |
| "learning_rate": 0.0008943939810991835, |
| "loss": 2.8598, |
| "step": 11810 |
| }, |
| { |
| "epoch": 9.753018887398081, |
| "grad_norm": 0.5529934763908386, |
| "learning_rate": 0.000894210478025507, |
| "loss": 2.8556, |
| "step": 11820 |
| }, |
| { |
| "epoch": 9.761275673444112, |
| "grad_norm": 0.5549076199531555, |
| "learning_rate": 0.0008940269749518304, |
| "loss": 2.8615, |
| "step": 11830 |
| }, |
| { |
| "epoch": 9.769532459490144, |
| "grad_norm": 0.6248366832733154, |
| "learning_rate": 0.0008938434718781539, |
| "loss": 2.842, |
| "step": 11840 |
| }, |
| { |
| "epoch": 9.777789245536175, |
| "grad_norm": 0.5666365027427673, |
| "learning_rate": 0.0008936599688044775, |
| "loss": 2.8444, |
| "step": 11850 |
| }, |
| { |
| "epoch": 9.786046031582206, |
| "grad_norm": 0.5991445183753967, |
| "learning_rate": 0.000893476465730801, |
| "loss": 2.8415, |
| "step": 11860 |
| }, |
| { |
| "epoch": 9.794302817628239, |
| "grad_norm": 0.5583236217498779, |
| "learning_rate": 0.0008932929626571245, |
| "loss": 2.8519, |
| "step": 11870 |
| }, |
| { |
| "epoch": 9.80255960367427, |
| "grad_norm": 0.6396259069442749, |
| "learning_rate": 0.000893109459583448, |
| "loss": 2.8603, |
| "step": 11880 |
| }, |
| { |
| "epoch": 9.810816389720301, |
| "grad_norm": 0.6023778915405273, |
| "learning_rate": 0.0008929259565097716, |
| "loss": 2.856, |
| "step": 11890 |
| }, |
| { |
| "epoch": 9.819073175766333, |
| "grad_norm": 0.582880437374115, |
| "learning_rate": 0.0008927424534360951, |
| "loss": 2.8575, |
| "step": 11900 |
| }, |
| { |
| "epoch": 9.827329961812364, |
| "grad_norm": 0.6072121262550354, |
| "learning_rate": 0.0008925589503624186, |
| "loss": 2.8533, |
| "step": 11910 |
| }, |
| { |
| "epoch": 9.835586747858397, |
| "grad_norm": 0.6384845972061157, |
| "learning_rate": 0.0008923754472887421, |
| "loss": 2.8593, |
| "step": 11920 |
| }, |
| { |
| "epoch": 9.843843533904428, |
| "grad_norm": 0.6040202379226685, |
| "learning_rate": 0.0008921919442150657, |
| "loss": 2.8579, |
| "step": 11930 |
| }, |
| { |
| "epoch": 9.852100319950459, |
| "grad_norm": 0.5378623008728027, |
| "learning_rate": 0.0008920084411413892, |
| "loss": 2.8493, |
| "step": 11940 |
| }, |
| { |
| "epoch": 9.86035710599649, |
| "grad_norm": 0.6223618388175964, |
| "learning_rate": 0.0008918249380677127, |
| "loss": 2.8474, |
| "step": 11950 |
| }, |
| { |
| "epoch": 9.868613892042523, |
| "grad_norm": 0.5807675719261169, |
| "learning_rate": 0.0008916414349940361, |
| "loss": 2.8442, |
| "step": 11960 |
| }, |
| { |
| "epoch": 9.876870678088554, |
| "grad_norm": 0.6609598398208618, |
| "learning_rate": 0.0008914579319203597, |
| "loss": 2.8692, |
| "step": 11970 |
| }, |
| { |
| "epoch": 9.885127464134586, |
| "grad_norm": 0.55182945728302, |
| "learning_rate": 0.0008912744288466832, |
| "loss": 2.8533, |
| "step": 11980 |
| }, |
| { |
| "epoch": 9.893384250180617, |
| "grad_norm": 0.6168049573898315, |
| "learning_rate": 0.0008910909257730067, |
| "loss": 2.8432, |
| "step": 11990 |
| }, |
| { |
| "epoch": 9.901641036226648, |
| "grad_norm": 0.5642480850219727, |
| "learning_rate": 0.0008909074226993302, |
| "loss": 2.8483, |
| "step": 12000 |
| }, |
| { |
| "epoch": 9.91732892971411, |
| "grad_norm": 0.5881712436676025, |
| "learning_rate": 0.0008907239196256538, |
| "loss": 2.8515, |
| "step": 12010 |
| }, |
| { |
| "epoch": 9.92558571576014, |
| "grad_norm": 0.597673773765564, |
| "learning_rate": 0.0008905404165519773, |
| "loss": 2.843, |
| "step": 12020 |
| }, |
| { |
| "epoch": 9.933842501806172, |
| "grad_norm": 0.5990006923675537, |
| "learning_rate": 0.0008903569134783008, |
| "loss": 2.848, |
| "step": 12030 |
| }, |
| { |
| "epoch": 9.942099287852203, |
| "grad_norm": 0.6145173907279968, |
| "learning_rate": 0.0008901734104046243, |
| "loss": 2.8411, |
| "step": 12040 |
| }, |
| { |
| "epoch": 9.950356073898234, |
| "grad_norm": 0.5862278938293457, |
| "learning_rate": 0.0008899899073309478, |
| "loss": 2.8561, |
| "step": 12050 |
| }, |
| { |
| "epoch": 9.958612859944267, |
| "grad_norm": 0.5999264717102051, |
| "learning_rate": 0.0008898064042572714, |
| "loss": 2.8487, |
| "step": 12060 |
| }, |
| { |
| "epoch": 9.966869645990299, |
| "grad_norm": 0.5286862850189209, |
| "learning_rate": 0.0008896229011835948, |
| "loss": 2.851, |
| "step": 12070 |
| }, |
| { |
| "epoch": 9.97512643203633, |
| "grad_norm": 0.5677134394645691, |
| "learning_rate": 0.0008894393981099184, |
| "loss": 2.8516, |
| "step": 12080 |
| }, |
| { |
| "epoch": 9.983383218082361, |
| "grad_norm": 0.5856079459190369, |
| "learning_rate": 0.0008892558950362418, |
| "loss": 2.8545, |
| "step": 12090 |
| }, |
| { |
| "epoch": 9.991640004128392, |
| "grad_norm": 0.6451898813247681, |
| "learning_rate": 0.0008890723919625654, |
| "loss": 2.8358, |
| "step": 12100 |
| }, |
| { |
| "epoch": 9.999896790174425, |
| "grad_norm": 0.6016899347305298, |
| "learning_rate": 0.0008888888888888888, |
| "loss": 2.8434, |
| "step": 12110 |
| }, |
| { |
| "epoch": 10.008256786046031, |
| "grad_norm": 0.5568205714225769, |
| "learning_rate": 0.0008887053858152124, |
| "loss": 2.8738, |
| "step": 12120 |
| }, |
| { |
| "epoch": 10.016513572092062, |
| "grad_norm": 0.544348955154419, |
| "learning_rate": 0.0008885218827415359, |
| "loss": 2.8332, |
| "step": 12130 |
| }, |
| { |
| "epoch": 10.024770358138095, |
| "grad_norm": 0.6535346508026123, |
| "learning_rate": 0.0008883383796678595, |
| "loss": 2.8394, |
| "step": 12140 |
| }, |
| { |
| "epoch": 10.033027144184127, |
| "grad_norm": 0.5878455638885498, |
| "learning_rate": 0.0008881548765941829, |
| "loss": 2.8475, |
| "step": 12150 |
| }, |
| { |
| "epoch": 10.041283930230158, |
| "grad_norm": 0.5842605829238892, |
| "learning_rate": 0.0008879713735205065, |
| "loss": 2.8403, |
| "step": 12160 |
| }, |
| { |
| "epoch": 10.049540716276189, |
| "grad_norm": 0.6385082006454468, |
| "learning_rate": 0.00088778787044683, |
| "loss": 2.8376, |
| "step": 12170 |
| }, |
| { |
| "epoch": 10.057797502322222, |
| "grad_norm": 0.6178941130638123, |
| "learning_rate": 0.0008876043673731536, |
| "loss": 2.8441, |
| "step": 12180 |
| }, |
| { |
| "epoch": 10.066054288368253, |
| "grad_norm": 0.5717580318450928, |
| "learning_rate": 0.000887420864299477, |
| "loss": 2.8356, |
| "step": 12190 |
| }, |
| { |
| "epoch": 10.074311074414284, |
| "grad_norm": 0.5871554613113403, |
| "learning_rate": 0.0008872373612258006, |
| "loss": 2.8412, |
| "step": 12200 |
| }, |
| { |
| "epoch": 10.082567860460316, |
| "grad_norm": 0.6004984974861145, |
| "learning_rate": 0.0008870538581521241, |
| "loss": 2.8338, |
| "step": 12210 |
| }, |
| { |
| "epoch": 10.090824646506347, |
| "grad_norm": 0.6046565175056458, |
| "learning_rate": 0.0008868703550784475, |
| "loss": 2.8372, |
| "step": 12220 |
| }, |
| { |
| "epoch": 10.09908143255238, |
| "grad_norm": 0.5893774032592773, |
| "learning_rate": 0.000886686852004771, |
| "loss": 2.8295, |
| "step": 12230 |
| }, |
| { |
| "epoch": 10.10733821859841, |
| "grad_norm": 0.5833553671836853, |
| "learning_rate": 0.0008865033489310946, |
| "loss": 2.8371, |
| "step": 12240 |
| }, |
| { |
| "epoch": 10.115595004644442, |
| "grad_norm": 0.6019455194473267, |
| "learning_rate": 0.0008863198458574181, |
| "loss": 2.8246, |
| "step": 12250 |
| }, |
| { |
| "epoch": 10.123851790690473, |
| "grad_norm": 0.6151683926582336, |
| "learning_rate": 0.0008861363427837416, |
| "loss": 2.841, |
| "step": 12260 |
| }, |
| { |
| "epoch": 10.132108576736504, |
| "grad_norm": 0.6026824116706848, |
| "learning_rate": 0.0008859528397100651, |
| "loss": 2.8392, |
| "step": 12270 |
| }, |
| { |
| "epoch": 10.140365362782537, |
| "grad_norm": 0.5783131718635559, |
| "learning_rate": 0.0008857693366363887, |
| "loss": 2.8479, |
| "step": 12280 |
| }, |
| { |
| "epoch": 10.148622148828569, |
| "grad_norm": 0.6481205821037292, |
| "learning_rate": 0.0008855858335627122, |
| "loss": 2.8423, |
| "step": 12290 |
| }, |
| { |
| "epoch": 10.1568789348746, |
| "grad_norm": 0.5748919248580933, |
| "learning_rate": 0.0008854023304890357, |
| "loss": 2.8349, |
| "step": 12300 |
| }, |
| { |
| "epoch": 10.165135720920631, |
| "grad_norm": 0.5705230832099915, |
| "learning_rate": 0.0008852188274153592, |
| "loss": 2.8463, |
| "step": 12310 |
| }, |
| { |
| "epoch": 10.173392506966664, |
| "grad_norm": 0.5699977278709412, |
| "learning_rate": 0.0008850353243416828, |
| "loss": 2.8438, |
| "step": 12320 |
| }, |
| { |
| "epoch": 10.181649293012695, |
| "grad_norm": 0.544175386428833, |
| "learning_rate": 0.0008848518212680063, |
| "loss": 2.8363, |
| "step": 12330 |
| }, |
| { |
| "epoch": 10.189906079058726, |
| "grad_norm": 0.568715512752533, |
| "learning_rate": 0.0008846683181943298, |
| "loss": 2.8362, |
| "step": 12340 |
| }, |
| { |
| "epoch": 10.198162865104758, |
| "grad_norm": 0.5720770955085754, |
| "learning_rate": 0.0008844848151206532, |
| "loss": 2.8284, |
| "step": 12350 |
| }, |
| { |
| "epoch": 10.206419651150789, |
| "grad_norm": 0.626235842704773, |
| "learning_rate": 0.0008843013120469768, |
| "loss": 2.8393, |
| "step": 12360 |
| }, |
| { |
| "epoch": 10.214676437196822, |
| "grad_norm": 0.5661699175834656, |
| "learning_rate": 0.0008841178089733003, |
| "loss": 2.8333, |
| "step": 12370 |
| }, |
| { |
| "epoch": 10.222933223242853, |
| "grad_norm": 0.6092801094055176, |
| "learning_rate": 0.0008839343058996238, |
| "loss": 2.8513, |
| "step": 12380 |
| }, |
| { |
| "epoch": 10.231190009288884, |
| "grad_norm": 0.6037712097167969, |
| "learning_rate": 0.0008837508028259473, |
| "loss": 2.8328, |
| "step": 12390 |
| }, |
| { |
| "epoch": 10.239446795334915, |
| "grad_norm": 0.5994784832000732, |
| "learning_rate": 0.0008835672997522709, |
| "loss": 2.8268, |
| "step": 12400 |
| }, |
| { |
| "epoch": 10.247703581380948, |
| "grad_norm": 0.5821447968482971, |
| "learning_rate": 0.0008833837966785944, |
| "loss": 2.8376, |
| "step": 12410 |
| }, |
| { |
| "epoch": 10.25596036742698, |
| "grad_norm": 0.6151066422462463, |
| "learning_rate": 0.0008832002936049179, |
| "loss": 2.8338, |
| "step": 12420 |
| }, |
| { |
| "epoch": 10.26421715347301, |
| "grad_norm": 0.6016796231269836, |
| "learning_rate": 0.0008830167905312414, |
| "loss": 2.8295, |
| "step": 12430 |
| }, |
| { |
| "epoch": 10.272473939519042, |
| "grad_norm": 0.5741587281227112, |
| "learning_rate": 0.000882833287457565, |
| "loss": 2.8283, |
| "step": 12440 |
| }, |
| { |
| "epoch": 10.280730725565073, |
| "grad_norm": 0.5840280055999756, |
| "learning_rate": 0.0008826497843838885, |
| "loss": 2.8268, |
| "step": 12450 |
| }, |
| { |
| "epoch": 10.288987511611106, |
| "grad_norm": 0.5622872710227966, |
| "learning_rate": 0.000882466281310212, |
| "loss": 2.8424, |
| "step": 12460 |
| }, |
| { |
| "epoch": 10.297244297657137, |
| "grad_norm": 0.6184718608856201, |
| "learning_rate": 0.0008822827782365355, |
| "loss": 2.8269, |
| "step": 12470 |
| }, |
| { |
| "epoch": 10.305501083703168, |
| "grad_norm": 0.5796384215354919, |
| "learning_rate": 0.0008820992751628591, |
| "loss": 2.8383, |
| "step": 12480 |
| }, |
| { |
| "epoch": 10.3137578697492, |
| "grad_norm": 0.617235541343689, |
| "learning_rate": 0.0008819157720891825, |
| "loss": 2.8268, |
| "step": 12490 |
| }, |
| { |
| "epoch": 10.322014655795233, |
| "grad_norm": 0.5677554607391357, |
| "learning_rate": 0.000881732269015506, |
| "loss": 2.8349, |
| "step": 12500 |
| }, |
| { |
| "epoch": 10.330271441841264, |
| "grad_norm": 0.5938097238540649, |
| "learning_rate": 0.0008815487659418295, |
| "loss": 2.8362, |
| "step": 12510 |
| }, |
| { |
| "epoch": 10.338528227887295, |
| "grad_norm": 0.6369422078132629, |
| "learning_rate": 0.0008813652628681531, |
| "loss": 2.8364, |
| "step": 12520 |
| }, |
| { |
| "epoch": 10.346785013933326, |
| "grad_norm": 0.6142675280570984, |
| "learning_rate": 0.0008811817597944766, |
| "loss": 2.8259, |
| "step": 12530 |
| }, |
| { |
| "epoch": 10.355041799979357, |
| "grad_norm": 0.5718218684196472, |
| "learning_rate": 0.0008809982567208001, |
| "loss": 2.8473, |
| "step": 12540 |
| }, |
| { |
| "epoch": 10.36329858602539, |
| "grad_norm": 0.5698361992835999, |
| "learning_rate": 0.0008808147536471236, |
| "loss": 2.8398, |
| "step": 12550 |
| }, |
| { |
| "epoch": 10.371555372071422, |
| "grad_norm": 0.5833884477615356, |
| "learning_rate": 0.0008806312505734472, |
| "loss": 2.8171, |
| "step": 12560 |
| }, |
| { |
| "epoch": 10.379812158117453, |
| "grad_norm": 0.6157854795455933, |
| "learning_rate": 0.0008804477474997707, |
| "loss": 2.8409, |
| "step": 12570 |
| }, |
| { |
| "epoch": 10.388068944163484, |
| "grad_norm": 0.5915418863296509, |
| "learning_rate": 0.0008802642444260942, |
| "loss": 2.8369, |
| "step": 12580 |
| }, |
| { |
| "epoch": 10.396325730209515, |
| "grad_norm": 0.5849014520645142, |
| "learning_rate": 0.0008800807413524177, |
| "loss": 2.8329, |
| "step": 12590 |
| }, |
| { |
| "epoch": 10.404582516255548, |
| "grad_norm": 0.6383744478225708, |
| "learning_rate": 0.0008798972382787413, |
| "loss": 2.8299, |
| "step": 12600 |
| }, |
| { |
| "epoch": 10.41283930230158, |
| "grad_norm": 0.5256903767585754, |
| "learning_rate": 0.0008797137352050648, |
| "loss": 2.8244, |
| "step": 12610 |
| }, |
| { |
| "epoch": 10.42109608834761, |
| "grad_norm": 0.6176425218582153, |
| "learning_rate": 0.0008795302321313881, |
| "loss": 2.8318, |
| "step": 12620 |
| }, |
| { |
| "epoch": 10.429352874393642, |
| "grad_norm": 0.625028133392334, |
| "learning_rate": 0.0008793467290577117, |
| "loss": 2.8367, |
| "step": 12630 |
| }, |
| { |
| "epoch": 10.437609660439675, |
| "grad_norm": 0.626335620880127, |
| "learning_rate": 0.0008791632259840352, |
| "loss": 2.8346, |
| "step": 12640 |
| }, |
| { |
| "epoch": 10.445866446485706, |
| "grad_norm": 0.5328546166419983, |
| "learning_rate": 0.0008789797229103587, |
| "loss": 2.8279, |
| "step": 12650 |
| }, |
| { |
| "epoch": 10.454123232531737, |
| "grad_norm": 0.5871540904045105, |
| "learning_rate": 0.0008787962198366822, |
| "loss": 2.8268, |
| "step": 12660 |
| }, |
| { |
| "epoch": 10.462380018577768, |
| "grad_norm": 0.5590776205062866, |
| "learning_rate": 0.0008786127167630058, |
| "loss": 2.8227, |
| "step": 12670 |
| }, |
| { |
| "epoch": 10.4706368046238, |
| "grad_norm": 0.5899330973625183, |
| "learning_rate": 0.0008784292136893293, |
| "loss": 2.8186, |
| "step": 12680 |
| }, |
| { |
| "epoch": 10.478893590669832, |
| "grad_norm": 0.653564989566803, |
| "learning_rate": 0.0008782457106156528, |
| "loss": 2.8333, |
| "step": 12690 |
| }, |
| { |
| "epoch": 10.487150376715864, |
| "grad_norm": 0.627564013004303, |
| "learning_rate": 0.0008780622075419763, |
| "loss": 2.823, |
| "step": 12700 |
| }, |
| { |
| "epoch": 10.495407162761895, |
| "grad_norm": 0.6121799945831299, |
| "learning_rate": 0.0008778787044682999, |
| "loss": 2.8394, |
| "step": 12710 |
| }, |
| { |
| "epoch": 10.503663948807926, |
| "grad_norm": 0.6052922010421753, |
| "learning_rate": 0.0008776952013946234, |
| "loss": 2.815, |
| "step": 12720 |
| }, |
| { |
| "epoch": 10.511920734853959, |
| "grad_norm": 0.592348039150238, |
| "learning_rate": 0.000877511698320947, |
| "loss": 2.8293, |
| "step": 12730 |
| }, |
| { |
| "epoch": 10.52017752089999, |
| "grad_norm": 0.5429986119270325, |
| "learning_rate": 0.0008773281952472704, |
| "loss": 2.8258, |
| "step": 12740 |
| }, |
| { |
| "epoch": 10.528434306946021, |
| "grad_norm": 0.6261007785797119, |
| "learning_rate": 0.0008771446921735939, |
| "loss": 2.8287, |
| "step": 12750 |
| }, |
| { |
| "epoch": 10.536691092992053, |
| "grad_norm": 0.5362280011177063, |
| "learning_rate": 0.0008769611890999174, |
| "loss": 2.8271, |
| "step": 12760 |
| }, |
| { |
| "epoch": 10.544947879038084, |
| "grad_norm": 0.5826970338821411, |
| "learning_rate": 0.0008767776860262409, |
| "loss": 2.8285, |
| "step": 12770 |
| }, |
| { |
| "epoch": 10.553204665084117, |
| "grad_norm": 0.597993791103363, |
| "learning_rate": 0.0008765941829525644, |
| "loss": 2.8213, |
| "step": 12780 |
| }, |
| { |
| "epoch": 10.561461451130148, |
| "grad_norm": 0.5747185945510864, |
| "learning_rate": 0.000876410679878888, |
| "loss": 2.8282, |
| "step": 12790 |
| }, |
| { |
| "epoch": 10.569718237176179, |
| "grad_norm": 0.5573180317878723, |
| "learning_rate": 0.0008762271768052115, |
| "loss": 2.824, |
| "step": 12800 |
| }, |
| { |
| "epoch": 10.57797502322221, |
| "grad_norm": 0.5840964317321777, |
| "learning_rate": 0.000876043673731535, |
| "loss": 2.8178, |
| "step": 12810 |
| }, |
| { |
| "epoch": 10.586231809268241, |
| "grad_norm": 0.5690692663192749, |
| "learning_rate": 0.0008758601706578585, |
| "loss": 2.8155, |
| "step": 12820 |
| }, |
| { |
| "epoch": 10.594488595314274, |
| "grad_norm": 0.5685713887214661, |
| "learning_rate": 0.0008756766675841821, |
| "loss": 2.8147, |
| "step": 12830 |
| }, |
| { |
| "epoch": 10.602745381360306, |
| "grad_norm": 0.6194620132446289, |
| "learning_rate": 0.0008754931645105056, |
| "loss": 2.8374, |
| "step": 12840 |
| }, |
| { |
| "epoch": 10.611002167406337, |
| "grad_norm": 0.5465943217277527, |
| "learning_rate": 0.0008753096614368291, |
| "loss": 2.8312, |
| "step": 12850 |
| }, |
| { |
| "epoch": 10.619258953452368, |
| "grad_norm": 0.5942501425743103, |
| "learning_rate": 0.0008751261583631526, |
| "loss": 2.8235, |
| "step": 12860 |
| }, |
| { |
| "epoch": 10.627515739498401, |
| "grad_norm": 0.5760926008224487, |
| "learning_rate": 0.0008749426552894762, |
| "loss": 2.8196, |
| "step": 12870 |
| }, |
| { |
| "epoch": 10.635772525544432, |
| "grad_norm": 0.5682793259620667, |
| "learning_rate": 0.0008747591522157996, |
| "loss": 2.8349, |
| "step": 12880 |
| }, |
| { |
| "epoch": 10.644029311590463, |
| "grad_norm": 0.5754048228263855, |
| "learning_rate": 0.0008745756491421231, |
| "loss": 2.8215, |
| "step": 12890 |
| }, |
| { |
| "epoch": 10.652286097636495, |
| "grad_norm": 0.5868312120437622, |
| "learning_rate": 0.0008743921460684466, |
| "loss": 2.8359, |
| "step": 12900 |
| }, |
| { |
| "epoch": 10.660542883682526, |
| "grad_norm": 0.5740572214126587, |
| "learning_rate": 0.0008742086429947702, |
| "loss": 2.8321, |
| "step": 12910 |
| }, |
| { |
| "epoch": 10.668799669728559, |
| "grad_norm": 0.570972740650177, |
| "learning_rate": 0.0008740251399210937, |
| "loss": 2.8291, |
| "step": 12920 |
| }, |
| { |
| "epoch": 10.67705645577459, |
| "grad_norm": 0.5573681592941284, |
| "learning_rate": 0.0008738416368474172, |
| "loss": 2.8211, |
| "step": 12930 |
| }, |
| { |
| "epoch": 10.685313241820621, |
| "grad_norm": 0.6186919212341309, |
| "learning_rate": 0.0008736581337737407, |
| "loss": 2.8122, |
| "step": 12940 |
| }, |
| { |
| "epoch": 10.693570027866652, |
| "grad_norm": 0.6006292700767517, |
| "learning_rate": 0.0008734746307000643, |
| "loss": 2.8187, |
| "step": 12950 |
| }, |
| { |
| "epoch": 10.701826813912685, |
| "grad_norm": 0.571305513381958, |
| "learning_rate": 0.0008732911276263878, |
| "loss": 2.8213, |
| "step": 12960 |
| }, |
| { |
| "epoch": 10.710083599958717, |
| "grad_norm": 0.5861838459968567, |
| "learning_rate": 0.0008731076245527113, |
| "loss": 2.8176, |
| "step": 12970 |
| }, |
| { |
| "epoch": 10.718340386004748, |
| "grad_norm": 0.618885338306427, |
| "learning_rate": 0.0008729241214790348, |
| "loss": 2.8325, |
| "step": 12980 |
| }, |
| { |
| "epoch": 10.726597172050779, |
| "grad_norm": 0.6155752539634705, |
| "learning_rate": 0.0008727406184053584, |
| "loss": 2.8359, |
| "step": 12990 |
| }, |
| { |
| "epoch": 10.73485395809681, |
| "grad_norm": 0.5645089149475098, |
| "learning_rate": 0.0008725571153316819, |
| "loss": 2.8272, |
| "step": 13000 |
| }, |
| { |
| "epoch": 10.743110744142843, |
| "grad_norm": 0.5860604643821716, |
| "learning_rate": 0.0008723736122580053, |
| "loss": 2.8277, |
| "step": 13010 |
| }, |
| { |
| "epoch": 10.751367530188874, |
| "grad_norm": 0.6243773698806763, |
| "learning_rate": 0.0008721901091843288, |
| "loss": 2.8401, |
| "step": 13020 |
| }, |
| { |
| "epoch": 10.759624316234905, |
| "grad_norm": 0.6127947568893433, |
| "learning_rate": 0.0008720066061106524, |
| "loss": 2.8302, |
| "step": 13030 |
| }, |
| { |
| "epoch": 10.767881102280937, |
| "grad_norm": 0.6391910910606384, |
| "learning_rate": 0.0008718231030369758, |
| "loss": 2.8283, |
| "step": 13040 |
| }, |
| { |
| "epoch": 10.776137888326968, |
| "grad_norm": 0.5912919044494629, |
| "learning_rate": 0.0008716395999632994, |
| "loss": 2.8191, |
| "step": 13050 |
| }, |
| { |
| "epoch": 10.784394674373, |
| "grad_norm": 0.5919018983840942, |
| "learning_rate": 0.0008714560968896229, |
| "loss": 2.819, |
| "step": 13060 |
| }, |
| { |
| "epoch": 10.792651460419032, |
| "grad_norm": 0.6195237040519714, |
| "learning_rate": 0.0008712725938159465, |
| "loss": 2.8178, |
| "step": 13070 |
| }, |
| { |
| "epoch": 10.800908246465063, |
| "grad_norm": 0.6050122976303101, |
| "learning_rate": 0.00087108909074227, |
| "loss": 2.825, |
| "step": 13080 |
| }, |
| { |
| "epoch": 10.809165032511094, |
| "grad_norm": 0.6022667288780212, |
| "learning_rate": 0.0008709055876685935, |
| "loss": 2.8237, |
| "step": 13090 |
| }, |
| { |
| "epoch": 10.817421818557127, |
| "grad_norm": 0.6186034679412842, |
| "learning_rate": 0.000870722084594917, |
| "loss": 2.8207, |
| "step": 13100 |
| }, |
| { |
| "epoch": 10.825678604603159, |
| "grad_norm": 0.5731164813041687, |
| "learning_rate": 0.0008705385815212406, |
| "loss": 2.8229, |
| "step": 13110 |
| }, |
| { |
| "epoch": 10.83393539064919, |
| "grad_norm": 0.5715698003768921, |
| "learning_rate": 0.000870355078447564, |
| "loss": 2.8319, |
| "step": 13120 |
| }, |
| { |
| "epoch": 10.842192176695221, |
| "grad_norm": 0.5976336002349854, |
| "learning_rate": 0.0008701715753738876, |
| "loss": 2.8099, |
| "step": 13130 |
| }, |
| { |
| "epoch": 10.850448962741252, |
| "grad_norm": 0.5506688952445984, |
| "learning_rate": 0.000869988072300211, |
| "loss": 2.8256, |
| "step": 13140 |
| }, |
| { |
| "epoch": 10.858705748787285, |
| "grad_norm": 0.5799828767776489, |
| "learning_rate": 0.0008698045692265346, |
| "loss": 2.8198, |
| "step": 13150 |
| }, |
| { |
| "epoch": 10.866962534833316, |
| "grad_norm": 0.5726258754730225, |
| "learning_rate": 0.000869621066152858, |
| "loss": 2.8296, |
| "step": 13160 |
| }, |
| { |
| "epoch": 10.875219320879348, |
| "grad_norm": 0.5829789042472839, |
| "learning_rate": 0.0008694375630791816, |
| "loss": 2.8207, |
| "step": 13170 |
| }, |
| { |
| "epoch": 10.883476106925379, |
| "grad_norm": 0.5800747871398926, |
| "learning_rate": 0.0008692540600055051, |
| "loss": 2.8167, |
| "step": 13180 |
| }, |
| { |
| "epoch": 10.891732892971412, |
| "grad_norm": 0.6349780559539795, |
| "learning_rate": 0.0008690705569318286, |
| "loss": 2.8033, |
| "step": 13190 |
| }, |
| { |
| "epoch": 10.899989679017443, |
| "grad_norm": 0.6027595400810242, |
| "learning_rate": 0.0008688870538581521, |
| "loss": 2.8244, |
| "step": 13200 |
| }, |
| { |
| "epoch": 10.908246465063474, |
| "grad_norm": 0.5879138708114624, |
| "learning_rate": 0.0008687035507844756, |
| "loss": 2.8161, |
| "step": 13210 |
| }, |
| { |
| "epoch": 10.916503251109505, |
| "grad_norm": 0.5601217746734619, |
| "learning_rate": 0.0008685200477107992, |
| "loss": 2.8168, |
| "step": 13220 |
| }, |
| { |
| "epoch": 10.924760037155536, |
| "grad_norm": 0.6721534729003906, |
| "learning_rate": 0.0008683365446371227, |
| "loss": 2.8268, |
| "step": 13230 |
| }, |
| { |
| "epoch": 10.93301682320157, |
| "grad_norm": 0.6043145060539246, |
| "learning_rate": 0.0008681530415634462, |
| "loss": 2.8145, |
| "step": 13240 |
| }, |
| { |
| "epoch": 10.9412736092476, |
| "grad_norm": 0.6050463914871216, |
| "learning_rate": 0.0008679695384897697, |
| "loss": 2.8168, |
| "step": 13250 |
| }, |
| { |
| "epoch": 10.949530395293632, |
| "grad_norm": 0.6205517649650574, |
| "learning_rate": 0.0008677860354160933, |
| "loss": 2.8108, |
| "step": 13260 |
| }, |
| { |
| "epoch": 10.957787181339663, |
| "grad_norm": 0.6242938041687012, |
| "learning_rate": 0.0008676025323424167, |
| "loss": 2.8152, |
| "step": 13270 |
| }, |
| { |
| "epoch": 10.966043967385694, |
| "grad_norm": 0.5615045428276062, |
| "learning_rate": 0.0008674190292687402, |
| "loss": 2.817, |
| "step": 13280 |
| }, |
| { |
| "epoch": 10.974300753431727, |
| "grad_norm": 0.606850802898407, |
| "learning_rate": 0.0008672355261950637, |
| "loss": 2.8072, |
| "step": 13290 |
| }, |
| { |
| "epoch": 10.982557539477758, |
| "grad_norm": 0.6060166358947754, |
| "learning_rate": 0.0008670520231213873, |
| "loss": 2.8135, |
| "step": 13300 |
| }, |
| { |
| "epoch": 10.99081432552379, |
| "grad_norm": 0.5779221653938293, |
| "learning_rate": 0.0008668685200477108, |
| "loss": 2.8174, |
| "step": 13310 |
| }, |
| { |
| "epoch": 10.99907111156982, |
| "grad_norm": 0.5719656944274902, |
| "learning_rate": 0.0008666850169740343, |
| "loss": 2.8116, |
| "step": 13320 |
| }, |
| { |
| "epoch": 11.006605428836826, |
| "grad_norm": 0.5705454349517822, |
| "learning_rate": 0.0008665015139003578, |
| "loss": 2.5797, |
| "step": 13330 |
| }, |
| { |
| "epoch": 11.014862214882857, |
| "grad_norm": 0.6058195233345032, |
| "learning_rate": 0.0008663180108266814, |
| "loss": 2.807, |
| "step": 13340 |
| }, |
| { |
| "epoch": 11.023119000928888, |
| "grad_norm": 0.6259657740592957, |
| "learning_rate": 0.0008661345077530049, |
| "loss": 2.8049, |
| "step": 13350 |
| }, |
| { |
| "epoch": 11.03137578697492, |
| "grad_norm": 0.5490705966949463, |
| "learning_rate": 0.0008659510046793284, |
| "loss": 2.8092, |
| "step": 13360 |
| }, |
| { |
| "epoch": 11.03963257302095, |
| "grad_norm": 0.5268445611000061, |
| "learning_rate": 0.0008657675016056519, |
| "loss": 2.7993, |
| "step": 13370 |
| }, |
| { |
| "epoch": 11.047889359066984, |
| "grad_norm": 0.553423285484314, |
| "learning_rate": 0.0008655839985319755, |
| "loss": 2.7974, |
| "step": 13380 |
| }, |
| { |
| "epoch": 11.056146145113015, |
| "grad_norm": 0.5482957363128662, |
| "learning_rate": 0.000865400495458299, |
| "loss": 2.8158, |
| "step": 13390 |
| }, |
| { |
| "epoch": 11.064402931159046, |
| "grad_norm": 0.5422245860099792, |
| "learning_rate": 0.0008652169923846225, |
| "loss": 2.8082, |
| "step": 13400 |
| }, |
| { |
| "epoch": 11.072659717205077, |
| "grad_norm": 0.6090859174728394, |
| "learning_rate": 0.0008650334893109459, |
| "loss": 2.8095, |
| "step": 13410 |
| }, |
| { |
| "epoch": 11.08091650325111, |
| "grad_norm": 0.6134405732154846, |
| "learning_rate": 0.0008648499862372695, |
| "loss": 2.8136, |
| "step": 13420 |
| }, |
| { |
| "epoch": 11.089173289297142, |
| "grad_norm": 0.5978251099586487, |
| "learning_rate": 0.000864666483163593, |
| "loss": 2.8098, |
| "step": 13430 |
| }, |
| { |
| "epoch": 11.097430075343173, |
| "grad_norm": 0.6461356282234192, |
| "learning_rate": 0.0008644829800899165, |
| "loss": 2.8117, |
| "step": 13440 |
| }, |
| { |
| "epoch": 11.105686861389204, |
| "grad_norm": 0.5783932209014893, |
| "learning_rate": 0.00086429947701624, |
| "loss": 2.816, |
| "step": 13450 |
| }, |
| { |
| "epoch": 11.113943647435235, |
| "grad_norm": 0.5924204587936401, |
| "learning_rate": 0.0008641159739425636, |
| "loss": 2.8083, |
| "step": 13460 |
| }, |
| { |
| "epoch": 11.122200433481268, |
| "grad_norm": 0.5752689838409424, |
| "learning_rate": 0.000863932470868887, |
| "loss": 2.808, |
| "step": 13470 |
| }, |
| { |
| "epoch": 11.1304572195273, |
| "grad_norm": 0.6291837692260742, |
| "learning_rate": 0.0008637489677952106, |
| "loss": 2.815, |
| "step": 13480 |
| }, |
| { |
| "epoch": 11.13871400557333, |
| "grad_norm": 0.637244701385498, |
| "learning_rate": 0.0008635654647215341, |
| "loss": 2.8058, |
| "step": 13490 |
| }, |
| { |
| "epoch": 11.146970791619362, |
| "grad_norm": 0.5656126737594604, |
| "learning_rate": 0.0008633819616478577, |
| "loss": 2.8029, |
| "step": 13500 |
| }, |
| { |
| "epoch": 11.155227577665393, |
| "grad_norm": 0.5883386731147766, |
| "learning_rate": 0.0008631984585741811, |
| "loss": 2.8116, |
| "step": 13510 |
| }, |
| { |
| "epoch": 11.163484363711426, |
| "grad_norm": 0.541492760181427, |
| "learning_rate": 0.0008630149555005047, |
| "loss": 2.8034, |
| "step": 13520 |
| }, |
| { |
| "epoch": 11.171741149757457, |
| "grad_norm": 0.5849348902702332, |
| "learning_rate": 0.0008628314524268282, |
| "loss": 2.8121, |
| "step": 13530 |
| }, |
| { |
| "epoch": 11.179997935803488, |
| "grad_norm": 0.5753573179244995, |
| "learning_rate": 0.0008626479493531517, |
| "loss": 2.8147, |
| "step": 13540 |
| }, |
| { |
| "epoch": 11.18825472184952, |
| "grad_norm": 0.5690391659736633, |
| "learning_rate": 0.0008624644462794751, |
| "loss": 2.8021, |
| "step": 13550 |
| }, |
| { |
| "epoch": 11.196511507895552, |
| "grad_norm": 0.5915670990943909, |
| "learning_rate": 0.0008622809432057987, |
| "loss": 2.8023, |
| "step": 13560 |
| }, |
| { |
| "epoch": 11.204768293941584, |
| "grad_norm": 0.634675145149231, |
| "learning_rate": 0.0008620974401321222, |
| "loss": 2.8189, |
| "step": 13570 |
| }, |
| { |
| "epoch": 11.213025079987615, |
| "grad_norm": 0.5452571511268616, |
| "learning_rate": 0.0008619139370584458, |
| "loss": 2.8067, |
| "step": 13580 |
| }, |
| { |
| "epoch": 11.221281866033646, |
| "grad_norm": 0.5526494383811951, |
| "learning_rate": 0.0008617304339847692, |
| "loss": 2.8281, |
| "step": 13590 |
| }, |
| { |
| "epoch": 11.229538652079677, |
| "grad_norm": 0.6009969115257263, |
| "learning_rate": 0.0008615469309110928, |
| "loss": 2.8102, |
| "step": 13600 |
| }, |
| { |
| "epoch": 11.23779543812571, |
| "grad_norm": 0.5761014819145203, |
| "learning_rate": 0.0008613634278374163, |
| "loss": 2.8073, |
| "step": 13610 |
| }, |
| { |
| "epoch": 11.246052224171741, |
| "grad_norm": 0.6111817359924316, |
| "learning_rate": 0.0008611799247637399, |
| "loss": 2.8056, |
| "step": 13620 |
| }, |
| { |
| "epoch": 11.254309010217773, |
| "grad_norm": 0.5755062699317932, |
| "learning_rate": 0.0008609964216900633, |
| "loss": 2.8099, |
| "step": 13630 |
| }, |
| { |
| "epoch": 11.262565796263804, |
| "grad_norm": 0.5578922033309937, |
| "learning_rate": 0.0008608129186163869, |
| "loss": 2.8047, |
| "step": 13640 |
| }, |
| { |
| "epoch": 11.270822582309837, |
| "grad_norm": 0.6050003170967102, |
| "learning_rate": 0.0008606294155427104, |
| "loss": 2.8096, |
| "step": 13650 |
| }, |
| { |
| "epoch": 11.279079368355868, |
| "grad_norm": 0.6092653870582581, |
| "learning_rate": 0.000860445912469034, |
| "loss": 2.8029, |
| "step": 13660 |
| }, |
| { |
| "epoch": 11.287336154401899, |
| "grad_norm": 0.6080880165100098, |
| "learning_rate": 0.0008602624093953573, |
| "loss": 2.8025, |
| "step": 13670 |
| }, |
| { |
| "epoch": 11.29559294044793, |
| "grad_norm": 0.5565916895866394, |
| "learning_rate": 0.0008600789063216809, |
| "loss": 2.8062, |
| "step": 13680 |
| }, |
| { |
| "epoch": 11.303849726493961, |
| "grad_norm": 0.6291329860687256, |
| "learning_rate": 0.0008598954032480044, |
| "loss": 2.8212, |
| "step": 13690 |
| }, |
| { |
| "epoch": 11.312106512539994, |
| "grad_norm": 0.6040759682655334, |
| "learning_rate": 0.000859711900174328, |
| "loss": 2.7991, |
| "step": 13700 |
| }, |
| { |
| "epoch": 11.320363298586026, |
| "grad_norm": 0.5415501594543457, |
| "learning_rate": 0.0008595283971006514, |
| "loss": 2.7978, |
| "step": 13710 |
| }, |
| { |
| "epoch": 11.328620084632057, |
| "grad_norm": 0.5466763973236084, |
| "learning_rate": 0.000859344894026975, |
| "loss": 2.807, |
| "step": 13720 |
| }, |
| { |
| "epoch": 11.336876870678088, |
| "grad_norm": 0.5396016836166382, |
| "learning_rate": 0.0008591613909532985, |
| "loss": 2.8042, |
| "step": 13730 |
| }, |
| { |
| "epoch": 11.345133656724121, |
| "grad_norm": 0.6255636215209961, |
| "learning_rate": 0.0008589778878796221, |
| "loss": 2.8082, |
| "step": 13740 |
| }, |
| { |
| "epoch": 11.353390442770152, |
| "grad_norm": 0.6161576509475708, |
| "learning_rate": 0.0008587943848059455, |
| "loss": 2.8061, |
| "step": 13750 |
| }, |
| { |
| "epoch": 11.361647228816183, |
| "grad_norm": 0.62225741147995, |
| "learning_rate": 0.000858610881732269, |
| "loss": 2.8042, |
| "step": 13760 |
| }, |
| { |
| "epoch": 11.369904014862215, |
| "grad_norm": 0.6520695090293884, |
| "learning_rate": 0.0008584273786585926, |
| "loss": 2.8062, |
| "step": 13770 |
| }, |
| { |
| "epoch": 11.378160800908246, |
| "grad_norm": 0.6661168932914734, |
| "learning_rate": 0.0008582438755849161, |
| "loss": 2.8053, |
| "step": 13780 |
| }, |
| { |
| "epoch": 11.386417586954279, |
| "grad_norm": 0.5990477204322815, |
| "learning_rate": 0.0008580603725112396, |
| "loss": 2.8013, |
| "step": 13790 |
| }, |
| { |
| "epoch": 11.39467437300031, |
| "grad_norm": 0.6206037402153015, |
| "learning_rate": 0.000857876869437563, |
| "loss": 2.8089, |
| "step": 13800 |
| }, |
| { |
| "epoch": 11.402931159046341, |
| "grad_norm": 0.6662552356719971, |
| "learning_rate": 0.0008576933663638866, |
| "loss": 2.8048, |
| "step": 13810 |
| }, |
| { |
| "epoch": 11.411187945092372, |
| "grad_norm": 0.6055031418800354, |
| "learning_rate": 0.00085750986329021, |
| "loss": 2.8057, |
| "step": 13820 |
| }, |
| { |
| "epoch": 11.419444731138404, |
| "grad_norm": 0.618643045425415, |
| "learning_rate": 0.0008573263602165336, |
| "loss": 2.8175, |
| "step": 13830 |
| }, |
| { |
| "epoch": 11.427701517184436, |
| "grad_norm": 0.58855140209198, |
| "learning_rate": 0.0008571428571428571, |
| "loss": 2.7973, |
| "step": 13840 |
| }, |
| { |
| "epoch": 11.435958303230468, |
| "grad_norm": 0.5836468935012817, |
| "learning_rate": 0.0008569593540691807, |
| "loss": 2.8031, |
| "step": 13850 |
| }, |
| { |
| "epoch": 11.444215089276499, |
| "grad_norm": 0.6513998508453369, |
| "learning_rate": 0.0008567758509955041, |
| "loss": 2.8049, |
| "step": 13860 |
| }, |
| { |
| "epoch": 11.45247187532253, |
| "grad_norm": 0.6231095790863037, |
| "learning_rate": 0.0008565923479218277, |
| "loss": 2.7965, |
| "step": 13870 |
| }, |
| { |
| "epoch": 11.460728661368563, |
| "grad_norm": 0.598556637763977, |
| "learning_rate": 0.0008564088448481512, |
| "loss": 2.794, |
| "step": 13880 |
| }, |
| { |
| "epoch": 11.468985447414594, |
| "grad_norm": 0.613278329372406, |
| "learning_rate": 0.0008562253417744748, |
| "loss": 2.8008, |
| "step": 13890 |
| }, |
| { |
| "epoch": 11.477242233460625, |
| "grad_norm": 0.5937925577163696, |
| "learning_rate": 0.0008560418387007982, |
| "loss": 2.7992, |
| "step": 13900 |
| }, |
| { |
| "epoch": 11.485499019506657, |
| "grad_norm": 0.5671007037162781, |
| "learning_rate": 0.0008558583356271218, |
| "loss": 2.8071, |
| "step": 13910 |
| }, |
| { |
| "epoch": 11.493755805552688, |
| "grad_norm": 0.5720387101173401, |
| "learning_rate": 0.0008556748325534453, |
| "loss": 2.8006, |
| "step": 13920 |
| }, |
| { |
| "epoch": 11.50201259159872, |
| "grad_norm": 0.5988256335258484, |
| "learning_rate": 0.0008554913294797688, |
| "loss": 2.794, |
| "step": 13930 |
| }, |
| { |
| "epoch": 11.510269377644752, |
| "grad_norm": 0.5751326680183411, |
| "learning_rate": 0.0008553078264060922, |
| "loss": 2.809, |
| "step": 13940 |
| }, |
| { |
| "epoch": 11.518526163690783, |
| "grad_norm": 0.5636781454086304, |
| "learning_rate": 0.0008551243233324158, |
| "loss": 2.792, |
| "step": 13950 |
| }, |
| { |
| "epoch": 11.526782949736814, |
| "grad_norm": 0.6231285929679871, |
| "learning_rate": 0.0008549408202587393, |
| "loss": 2.8034, |
| "step": 13960 |
| }, |
| { |
| "epoch": 11.535039735782847, |
| "grad_norm": 0.5834125280380249, |
| "learning_rate": 0.0008547573171850629, |
| "loss": 2.7947, |
| "step": 13970 |
| }, |
| { |
| "epoch": 11.543296521828879, |
| "grad_norm": 0.5725896954536438, |
| "learning_rate": 0.0008545738141113863, |
| "loss": 2.7883, |
| "step": 13980 |
| }, |
| { |
| "epoch": 11.55155330787491, |
| "grad_norm": 0.6235449314117432, |
| "learning_rate": 0.0008543903110377099, |
| "loss": 2.794, |
| "step": 13990 |
| }, |
| { |
| "epoch": 11.559810093920941, |
| "grad_norm": 0.5574560165405273, |
| "learning_rate": 0.0008542068079640334, |
| "loss": 2.804, |
| "step": 14000 |
| }, |
| { |
| "epoch": 11.568066879966972, |
| "grad_norm": 0.6278049349784851, |
| "learning_rate": 0.000854023304890357, |
| "loss": 2.8071, |
| "step": 14010 |
| }, |
| { |
| "epoch": 11.576323666013005, |
| "grad_norm": 0.618698239326477, |
| "learning_rate": 0.0008538398018166804, |
| "loss": 2.8062, |
| "step": 14020 |
| }, |
| { |
| "epoch": 11.584580452059036, |
| "grad_norm": 0.5747182369232178, |
| "learning_rate": 0.000853656298743004, |
| "loss": 2.8152, |
| "step": 14030 |
| }, |
| { |
| "epoch": 11.592837238105067, |
| "grad_norm": 0.590527355670929, |
| "learning_rate": 0.0008534727956693275, |
| "loss": 2.8008, |
| "step": 14040 |
| }, |
| { |
| "epoch": 11.601094024151099, |
| "grad_norm": 0.5996799468994141, |
| "learning_rate": 0.0008532892925956511, |
| "loss": 2.8007, |
| "step": 14050 |
| }, |
| { |
| "epoch": 11.609350810197132, |
| "grad_norm": 0.5726416110992432, |
| "learning_rate": 0.0008531057895219744, |
| "loss": 2.8028, |
| "step": 14060 |
| }, |
| { |
| "epoch": 11.617607596243163, |
| "grad_norm": 0.5995892286300659, |
| "learning_rate": 0.000852922286448298, |
| "loss": 2.8071, |
| "step": 14070 |
| }, |
| { |
| "epoch": 11.625864382289194, |
| "grad_norm": 0.5530434250831604, |
| "learning_rate": 0.0008527387833746215, |
| "loss": 2.8062, |
| "step": 14080 |
| }, |
| { |
| "epoch": 11.634121168335225, |
| "grad_norm": 0.5788170695304871, |
| "learning_rate": 0.0008525552803009451, |
| "loss": 2.7975, |
| "step": 14090 |
| }, |
| { |
| "epoch": 11.642377954381256, |
| "grad_norm": 0.5959973931312561, |
| "learning_rate": 0.0008523717772272685, |
| "loss": 2.7908, |
| "step": 14100 |
| }, |
| { |
| "epoch": 11.65063474042729, |
| "grad_norm": 0.6316953301429749, |
| "learning_rate": 0.0008521882741535921, |
| "loss": 2.798, |
| "step": 14110 |
| }, |
| { |
| "epoch": 11.65889152647332, |
| "grad_norm": 0.5617077350616455, |
| "learning_rate": 0.0008520047710799156, |
| "loss": 2.8042, |
| "step": 14120 |
| }, |
| { |
| "epoch": 11.667148312519352, |
| "grad_norm": 0.553451657295227, |
| "learning_rate": 0.0008518212680062392, |
| "loss": 2.796, |
| "step": 14130 |
| }, |
| { |
| "epoch": 11.675405098565383, |
| "grad_norm": 0.5701197385787964, |
| "learning_rate": 0.0008516377649325626, |
| "loss": 2.7992, |
| "step": 14140 |
| }, |
| { |
| "epoch": 11.683661884611414, |
| "grad_norm": 0.6057118773460388, |
| "learning_rate": 0.0008514542618588862, |
| "loss": 2.7976, |
| "step": 14150 |
| }, |
| { |
| "epoch": 11.691918670657447, |
| "grad_norm": 0.5956297516822815, |
| "learning_rate": 0.0008512707587852097, |
| "loss": 2.8001, |
| "step": 14160 |
| }, |
| { |
| "epoch": 11.700175456703478, |
| "grad_norm": 0.5502737164497375, |
| "learning_rate": 0.0008510872557115333, |
| "loss": 2.7948, |
| "step": 14170 |
| }, |
| { |
| "epoch": 11.70843224274951, |
| "grad_norm": 0.6299700736999512, |
| "learning_rate": 0.0008509037526378567, |
| "loss": 2.7963, |
| "step": 14180 |
| }, |
| { |
| "epoch": 11.71668902879554, |
| "grad_norm": 0.5706774592399597, |
| "learning_rate": 0.0008507202495641802, |
| "loss": 2.7897, |
| "step": 14190 |
| }, |
| { |
| "epoch": 11.724945814841574, |
| "grad_norm": 0.5503284335136414, |
| "learning_rate": 0.0008505367464905037, |
| "loss": 2.7954, |
| "step": 14200 |
| }, |
| { |
| "epoch": 11.733202600887605, |
| "grad_norm": 0.6203439235687256, |
| "learning_rate": 0.0008503532434168273, |
| "loss": 2.7927, |
| "step": 14210 |
| }, |
| { |
| "epoch": 11.741459386933636, |
| "grad_norm": 0.5536445379257202, |
| "learning_rate": 0.0008501697403431507, |
| "loss": 2.785, |
| "step": 14220 |
| }, |
| { |
| "epoch": 11.749716172979667, |
| "grad_norm": 0.5857203006744385, |
| "learning_rate": 0.0008499862372694743, |
| "loss": 2.7982, |
| "step": 14230 |
| }, |
| { |
| "epoch": 11.757972959025699, |
| "grad_norm": 0.5552855730056763, |
| "learning_rate": 0.0008498027341957978, |
| "loss": 2.7839, |
| "step": 14240 |
| }, |
| { |
| "epoch": 11.766229745071731, |
| "grad_norm": 0.5858961939811707, |
| "learning_rate": 0.0008496192311221214, |
| "loss": 2.7923, |
| "step": 14250 |
| }, |
| { |
| "epoch": 11.774486531117763, |
| "grad_norm": 0.6338097453117371, |
| "learning_rate": 0.0008494357280484448, |
| "loss": 2.8024, |
| "step": 14260 |
| }, |
| { |
| "epoch": 11.782743317163794, |
| "grad_norm": 0.6377038359642029, |
| "learning_rate": 0.0008492522249747684, |
| "loss": 2.7864, |
| "step": 14270 |
| }, |
| { |
| "epoch": 11.791000103209825, |
| "grad_norm": 0.6072639226913452, |
| "learning_rate": 0.0008490687219010919, |
| "loss": 2.788, |
| "step": 14280 |
| }, |
| { |
| "epoch": 11.799256889255858, |
| "grad_norm": 0.5601785778999329, |
| "learning_rate": 0.0008488852188274155, |
| "loss": 2.7999, |
| "step": 14290 |
| }, |
| { |
| "epoch": 11.80751367530189, |
| "grad_norm": 0.6033042669296265, |
| "learning_rate": 0.0008487017157537389, |
| "loss": 2.7998, |
| "step": 14300 |
| }, |
| { |
| "epoch": 11.81577046134792, |
| "grad_norm": 0.5611660480499268, |
| "learning_rate": 0.0008485182126800625, |
| "loss": 2.795, |
| "step": 14310 |
| }, |
| { |
| "epoch": 11.824027247393952, |
| "grad_norm": 0.5943251848220825, |
| "learning_rate": 0.0008483347096063859, |
| "loss": 2.7871, |
| "step": 14320 |
| }, |
| { |
| "epoch": 11.832284033439983, |
| "grad_norm": 0.6414892077445984, |
| "learning_rate": 0.0008481512065327093, |
| "loss": 2.8011, |
| "step": 14330 |
| }, |
| { |
| "epoch": 11.840540819486016, |
| "grad_norm": 0.6055446267127991, |
| "learning_rate": 0.0008479677034590329, |
| "loss": 2.799, |
| "step": 14340 |
| }, |
| { |
| "epoch": 11.848797605532047, |
| "grad_norm": 0.6286283135414124, |
| "learning_rate": 0.0008477842003853564, |
| "loss": 2.7998, |
| "step": 14350 |
| }, |
| { |
| "epoch": 11.857054391578078, |
| "grad_norm": 0.6823182702064514, |
| "learning_rate": 0.00084760069731168, |
| "loss": 2.7979, |
| "step": 14360 |
| }, |
| { |
| "epoch": 11.86531117762411, |
| "grad_norm": 0.555995523929596, |
| "learning_rate": 0.0008474171942380034, |
| "loss": 2.7882, |
| "step": 14370 |
| }, |
| { |
| "epoch": 11.87356796367014, |
| "grad_norm": 0.5597317814826965, |
| "learning_rate": 0.000847233691164327, |
| "loss": 2.7941, |
| "step": 14380 |
| }, |
| { |
| "epoch": 11.881824749716174, |
| "grad_norm": 0.6191929578781128, |
| "learning_rate": 0.0008470501880906505, |
| "loss": 2.793, |
| "step": 14390 |
| }, |
| { |
| "epoch": 11.890081535762205, |
| "grad_norm": 0.6188380122184753, |
| "learning_rate": 0.0008468666850169741, |
| "loss": 2.7849, |
| "step": 14400 |
| }, |
| { |
| "epoch": 11.898338321808236, |
| "grad_norm": 0.6156478524208069, |
| "learning_rate": 0.0008466831819432975, |
| "loss": 2.8008, |
| "step": 14410 |
| }, |
| { |
| "epoch": 11.906595107854267, |
| "grad_norm": 0.5268288850784302, |
| "learning_rate": 0.0008464996788696211, |
| "loss": 2.7886, |
| "step": 14420 |
| }, |
| { |
| "epoch": 11.9148518939003, |
| "grad_norm": 0.5729905962944031, |
| "learning_rate": 0.0008463161757959446, |
| "loss": 2.7831, |
| "step": 14430 |
| }, |
| { |
| "epoch": 11.923108679946331, |
| "grad_norm": 0.6199338436126709, |
| "learning_rate": 0.0008461326727222682, |
| "loss": 2.7981, |
| "step": 14440 |
| }, |
| { |
| "epoch": 11.931365465992362, |
| "grad_norm": 0.5851151943206787, |
| "learning_rate": 0.0008459491696485915, |
| "loss": 2.7842, |
| "step": 14450 |
| }, |
| { |
| "epoch": 11.939622252038394, |
| "grad_norm": 0.6265865564346313, |
| "learning_rate": 0.0008457656665749151, |
| "loss": 2.7953, |
| "step": 14460 |
| }, |
| { |
| "epoch": 11.947879038084425, |
| "grad_norm": 0.5881917476654053, |
| "learning_rate": 0.0008455821635012386, |
| "loss": 2.7959, |
| "step": 14470 |
| }, |
| { |
| "epoch": 11.956135824130458, |
| "grad_norm": 0.5763278603553772, |
| "learning_rate": 0.0008453986604275622, |
| "loss": 2.8031, |
| "step": 14480 |
| }, |
| { |
| "epoch": 11.964392610176489, |
| "grad_norm": 0.5758784413337708, |
| "learning_rate": 0.0008452151573538856, |
| "loss": 2.7987, |
| "step": 14490 |
| }, |
| { |
| "epoch": 11.97264939622252, |
| "grad_norm": 0.5578325986862183, |
| "learning_rate": 0.0008450316542802092, |
| "loss": 2.7885, |
| "step": 14500 |
| }, |
| { |
| "epoch": 11.980906182268551, |
| "grad_norm": 0.5875093936920166, |
| "learning_rate": 0.0008448481512065327, |
| "loss": 2.789, |
| "step": 14510 |
| }, |
| { |
| "epoch": 11.989162968314584, |
| "grad_norm": 0.5956554412841797, |
| "learning_rate": 0.0008446646481328563, |
| "loss": 2.7899, |
| "step": 14520 |
| }, |
| { |
| "epoch": 11.997419754360616, |
| "grad_norm": 0.5572901368141174, |
| "learning_rate": 0.0008444811450591797, |
| "loss": 2.7874, |
| "step": 14530 |
| }, |
| { |
| "epoch": 12.004954071627619, |
| "grad_norm": 0.6897820234298706, |
| "learning_rate": 0.0008442976419855033, |
| "loss": 2.5443, |
| "step": 14540 |
| }, |
| { |
| "epoch": 12.01321085767365, |
| "grad_norm": 0.6007983684539795, |
| "learning_rate": 0.0008441141389118268, |
| "loss": 2.7947, |
| "step": 14550 |
| }, |
| { |
| "epoch": 12.021467643719681, |
| "grad_norm": 0.5293102860450745, |
| "learning_rate": 0.0008439306358381504, |
| "loss": 2.78, |
| "step": 14560 |
| }, |
| { |
| "epoch": 12.029724429765714, |
| "grad_norm": 0.5955855250358582, |
| "learning_rate": 0.0008437471327644738, |
| "loss": 2.7833, |
| "step": 14570 |
| }, |
| { |
| "epoch": 12.037981215811746, |
| "grad_norm": 0.6245818734169006, |
| "learning_rate": 0.0008435636296907974, |
| "loss": 2.7857, |
| "step": 14580 |
| }, |
| { |
| "epoch": 12.046238001857777, |
| "grad_norm": 0.6412973403930664, |
| "learning_rate": 0.0008433801266171208, |
| "loss": 2.7848, |
| "step": 14590 |
| }, |
| { |
| "epoch": 12.054494787903808, |
| "grad_norm": 0.5761491060256958, |
| "learning_rate": 0.0008431966235434444, |
| "loss": 2.7827, |
| "step": 14600 |
| }, |
| { |
| "epoch": 12.06275157394984, |
| "grad_norm": 0.5610695481300354, |
| "learning_rate": 0.0008430131204697678, |
| "loss": 2.7837, |
| "step": 14610 |
| }, |
| { |
| "epoch": 12.071008359995872, |
| "grad_norm": 0.5644015073776245, |
| "learning_rate": 0.0008428296173960914, |
| "loss": 2.7887, |
| "step": 14620 |
| }, |
| { |
| "epoch": 12.079265146041903, |
| "grad_norm": 0.5807902216911316, |
| "learning_rate": 0.0008426461143224149, |
| "loss": 2.7811, |
| "step": 14630 |
| }, |
| { |
| "epoch": 12.087521932087935, |
| "grad_norm": 0.6081882119178772, |
| "learning_rate": 0.0008424626112487385, |
| "loss": 2.7874, |
| "step": 14640 |
| }, |
| { |
| "epoch": 12.095778718133966, |
| "grad_norm": 0.5682186484336853, |
| "learning_rate": 0.0008422791081750619, |
| "loss": 2.7769, |
| "step": 14650 |
| }, |
| { |
| "epoch": 12.104035504179999, |
| "grad_norm": 0.5743092894554138, |
| "learning_rate": 0.0008420956051013855, |
| "loss": 2.7838, |
| "step": 14660 |
| }, |
| { |
| "epoch": 12.11229229022603, |
| "grad_norm": 0.5903926491737366, |
| "learning_rate": 0.000841912102027709, |
| "loss": 2.7798, |
| "step": 14670 |
| }, |
| { |
| "epoch": 12.120549076272061, |
| "grad_norm": 0.5655919909477234, |
| "learning_rate": 0.0008417285989540326, |
| "loss": 2.775, |
| "step": 14680 |
| }, |
| { |
| "epoch": 12.128805862318092, |
| "grad_norm": 0.6008566617965698, |
| "learning_rate": 0.000841545095880356, |
| "loss": 2.778, |
| "step": 14690 |
| }, |
| { |
| "epoch": 12.137062648364124, |
| "grad_norm": 0.5972955822944641, |
| "learning_rate": 0.0008413615928066796, |
| "loss": 2.7869, |
| "step": 14700 |
| }, |
| { |
| "epoch": 12.145319434410156, |
| "grad_norm": 0.6555289626121521, |
| "learning_rate": 0.0008411780897330031, |
| "loss": 2.7882, |
| "step": 14710 |
| }, |
| { |
| "epoch": 12.153576220456188, |
| "grad_norm": 0.571524441242218, |
| "learning_rate": 0.0008409945866593266, |
| "loss": 2.7853, |
| "step": 14720 |
| }, |
| { |
| "epoch": 12.161833006502219, |
| "grad_norm": 0.6336715221405029, |
| "learning_rate": 0.00084081108358565, |
| "loss": 2.7711, |
| "step": 14730 |
| }, |
| { |
| "epoch": 12.17008979254825, |
| "grad_norm": 0.6028571724891663, |
| "learning_rate": 0.0008406275805119736, |
| "loss": 2.7839, |
| "step": 14740 |
| }, |
| { |
| "epoch": 12.178346578594283, |
| "grad_norm": 0.6054027080535889, |
| "learning_rate": 0.0008404440774382971, |
| "loss": 2.7947, |
| "step": 14750 |
| }, |
| { |
| "epoch": 12.186603364640314, |
| "grad_norm": 0.6011176109313965, |
| "learning_rate": 0.0008402605743646207, |
| "loss": 2.7799, |
| "step": 14760 |
| }, |
| { |
| "epoch": 12.194860150686345, |
| "grad_norm": 0.5620320439338684, |
| "learning_rate": 0.0008400770712909441, |
| "loss": 2.7835, |
| "step": 14770 |
| }, |
| { |
| "epoch": 12.203116936732377, |
| "grad_norm": 0.6046602129936218, |
| "learning_rate": 0.0008398935682172677, |
| "loss": 2.7928, |
| "step": 14780 |
| }, |
| { |
| "epoch": 12.211373722778408, |
| "grad_norm": 0.5642755627632141, |
| "learning_rate": 0.0008397100651435912, |
| "loss": 2.7915, |
| "step": 14790 |
| }, |
| { |
| "epoch": 12.21963050882444, |
| "grad_norm": 0.5510666370391846, |
| "learning_rate": 0.0008395265620699148, |
| "loss": 2.783, |
| "step": 14800 |
| }, |
| { |
| "epoch": 12.227887294870472, |
| "grad_norm": 0.5766282081604004, |
| "learning_rate": 0.0008393430589962382, |
| "loss": 2.7827, |
| "step": 14810 |
| }, |
| { |
| "epoch": 12.236144080916503, |
| "grad_norm": 0.564561128616333, |
| "learning_rate": 0.0008391595559225618, |
| "loss": 2.7754, |
| "step": 14820 |
| }, |
| { |
| "epoch": 12.244400866962534, |
| "grad_norm": 0.5662837028503418, |
| "learning_rate": 0.0008389760528488853, |
| "loss": 2.7778, |
| "step": 14830 |
| }, |
| { |
| "epoch": 12.252657653008566, |
| "grad_norm": 0.5977376699447632, |
| "learning_rate": 0.0008387925497752089, |
| "loss": 2.7718, |
| "step": 14840 |
| }, |
| { |
| "epoch": 12.260914439054599, |
| "grad_norm": 0.6425307989120483, |
| "learning_rate": 0.0008386090467015322, |
| "loss": 2.785, |
| "step": 14850 |
| }, |
| { |
| "epoch": 12.26917122510063, |
| "grad_norm": 0.6152507066726685, |
| "learning_rate": 0.0008384255436278558, |
| "loss": 2.7817, |
| "step": 14860 |
| }, |
| { |
| "epoch": 12.277428011146661, |
| "grad_norm": 0.5792108774185181, |
| "learning_rate": 0.0008382420405541793, |
| "loss": 2.7763, |
| "step": 14870 |
| }, |
| { |
| "epoch": 12.285684797192692, |
| "grad_norm": 0.5410921573638916, |
| "learning_rate": 0.0008380585374805028, |
| "loss": 2.7731, |
| "step": 14880 |
| }, |
| { |
| "epoch": 12.293941583238725, |
| "grad_norm": 0.5805179476737976, |
| "learning_rate": 0.0008378750344068263, |
| "loss": 2.7858, |
| "step": 14890 |
| }, |
| { |
| "epoch": 12.302198369284756, |
| "grad_norm": 0.5664601922035217, |
| "learning_rate": 0.0008376915313331498, |
| "loss": 2.7816, |
| "step": 14900 |
| }, |
| { |
| "epoch": 12.310455155330787, |
| "grad_norm": 0.5598365664482117, |
| "learning_rate": 0.0008375080282594734, |
| "loss": 2.7788, |
| "step": 14910 |
| }, |
| { |
| "epoch": 12.318711941376819, |
| "grad_norm": 0.5913284420967102, |
| "learning_rate": 0.0008373245251857968, |
| "loss": 2.7721, |
| "step": 14920 |
| }, |
| { |
| "epoch": 12.32696872742285, |
| "grad_norm": 0.6424931287765503, |
| "learning_rate": 0.0008371410221121204, |
| "loss": 2.774, |
| "step": 14930 |
| }, |
| { |
| "epoch": 12.335225513468883, |
| "grad_norm": 0.5732784271240234, |
| "learning_rate": 0.0008369575190384439, |
| "loss": 2.7865, |
| "step": 14940 |
| }, |
| { |
| "epoch": 12.343482299514914, |
| "grad_norm": 0.6080560088157654, |
| "learning_rate": 0.0008367740159647675, |
| "loss": 2.7926, |
| "step": 14950 |
| }, |
| { |
| "epoch": 12.351739085560945, |
| "grad_norm": 0.6897197961807251, |
| "learning_rate": 0.0008365905128910909, |
| "loss": 2.7813, |
| "step": 14960 |
| }, |
| { |
| "epoch": 12.359995871606976, |
| "grad_norm": 0.5854954719543457, |
| "learning_rate": 0.0008364070098174145, |
| "loss": 2.7781, |
| "step": 14970 |
| }, |
| { |
| "epoch": 12.36825265765301, |
| "grad_norm": 0.6034757494926453, |
| "learning_rate": 0.0008362235067437379, |
| "loss": 2.7685, |
| "step": 14980 |
| }, |
| { |
| "epoch": 12.37650944369904, |
| "grad_norm": 0.6345445513725281, |
| "learning_rate": 0.0008360400036700615, |
| "loss": 2.784, |
| "step": 14990 |
| }, |
| { |
| "epoch": 12.384766229745072, |
| "grad_norm": 0.5897849798202515, |
| "learning_rate": 0.0008358565005963849, |
| "loss": 2.7849, |
| "step": 15000 |
| }, |
| { |
| "epoch": 12.393023015791103, |
| "grad_norm": 0.5857816338539124, |
| "learning_rate": 0.0008356729975227085, |
| "loss": 2.7702, |
| "step": 15010 |
| }, |
| { |
| "epoch": 12.401279801837134, |
| "grad_norm": 0.5820302367210388, |
| "learning_rate": 0.000835489494449032, |
| "loss": 2.7862, |
| "step": 15020 |
| }, |
| { |
| "epoch": 12.409536587883167, |
| "grad_norm": 0.6015300750732422, |
| "learning_rate": 0.0008353059913753556, |
| "loss": 2.7827, |
| "step": 15030 |
| }, |
| { |
| "epoch": 12.417793373929198, |
| "grad_norm": 0.5810590386390686, |
| "learning_rate": 0.000835122488301679, |
| "loss": 2.7848, |
| "step": 15040 |
| }, |
| { |
| "epoch": 12.42605015997523, |
| "grad_norm": 0.525604784488678, |
| "learning_rate": 0.0008349389852280026, |
| "loss": 2.7693, |
| "step": 15050 |
| }, |
| { |
| "epoch": 12.43430694602126, |
| "grad_norm": 0.5634535551071167, |
| "learning_rate": 0.0008347554821543261, |
| "loss": 2.782, |
| "step": 15060 |
| }, |
| { |
| "epoch": 12.442563732067292, |
| "grad_norm": 0.5564500689506531, |
| "learning_rate": 0.0008345719790806497, |
| "loss": 2.7656, |
| "step": 15070 |
| }, |
| { |
| "epoch": 12.450820518113325, |
| "grad_norm": 0.570466160774231, |
| "learning_rate": 0.0008343884760069731, |
| "loss": 2.7781, |
| "step": 15080 |
| }, |
| { |
| "epoch": 12.459077304159356, |
| "grad_norm": 0.5621691942214966, |
| "learning_rate": 0.0008342049729332967, |
| "loss": 2.774, |
| "step": 15090 |
| }, |
| { |
| "epoch": 12.467334090205387, |
| "grad_norm": 0.5975548624992371, |
| "learning_rate": 0.0008340214698596202, |
| "loss": 2.7771, |
| "step": 15100 |
| }, |
| { |
| "epoch": 12.475590876251418, |
| "grad_norm": 0.5807538628578186, |
| "learning_rate": 0.0008338379667859437, |
| "loss": 2.7786, |
| "step": 15110 |
| }, |
| { |
| "epoch": 12.483847662297451, |
| "grad_norm": 0.61223965883255, |
| "learning_rate": 0.0008336544637122671, |
| "loss": 2.7769, |
| "step": 15120 |
| }, |
| { |
| "epoch": 12.492104448343483, |
| "grad_norm": 0.5965583324432373, |
| "learning_rate": 0.0008334709606385907, |
| "loss": 2.777, |
| "step": 15130 |
| }, |
| { |
| "epoch": 12.500361234389514, |
| "grad_norm": 0.5752902626991272, |
| "learning_rate": 0.0008332874575649142, |
| "loss": 2.7799, |
| "step": 15140 |
| }, |
| { |
| "epoch": 12.508618020435545, |
| "grad_norm": 0.5716910362243652, |
| "learning_rate": 0.0008331039544912378, |
| "loss": 2.7791, |
| "step": 15150 |
| }, |
| { |
| "epoch": 12.516874806481576, |
| "grad_norm": 0.6005849242210388, |
| "learning_rate": 0.0008329204514175612, |
| "loss": 2.7852, |
| "step": 15160 |
| }, |
| { |
| "epoch": 12.52513159252761, |
| "grad_norm": 0.5944279432296753, |
| "learning_rate": 0.0008327369483438848, |
| "loss": 2.7757, |
| "step": 15170 |
| }, |
| { |
| "epoch": 12.53338837857364, |
| "grad_norm": 0.6027126908302307, |
| "learning_rate": 0.0008325534452702083, |
| "loss": 2.7776, |
| "step": 15180 |
| }, |
| { |
| "epoch": 12.541645164619672, |
| "grad_norm": 0.6317790746688843, |
| "learning_rate": 0.0008323699421965319, |
| "loss": 2.773, |
| "step": 15190 |
| }, |
| { |
| "epoch": 12.549901950665703, |
| "grad_norm": 0.5477875471115112, |
| "learning_rate": 0.0008321864391228553, |
| "loss": 2.7858, |
| "step": 15200 |
| }, |
| { |
| "epoch": 12.558158736711736, |
| "grad_norm": 0.5689815282821655, |
| "learning_rate": 0.0008320029360491789, |
| "loss": 2.7735, |
| "step": 15210 |
| }, |
| { |
| "epoch": 12.566415522757767, |
| "grad_norm": 0.6152288317680359, |
| "learning_rate": 0.0008318194329755024, |
| "loss": 2.7855, |
| "step": 15220 |
| }, |
| { |
| "epoch": 12.574672308803798, |
| "grad_norm": 0.5703557133674622, |
| "learning_rate": 0.000831635929901826, |
| "loss": 2.7674, |
| "step": 15230 |
| }, |
| { |
| "epoch": 12.58292909484983, |
| "grad_norm": 0.6394575834274292, |
| "learning_rate": 0.0008314524268281493, |
| "loss": 2.7703, |
| "step": 15240 |
| }, |
| { |
| "epoch": 12.59118588089586, |
| "grad_norm": 0.5734837055206299, |
| "learning_rate": 0.0008312689237544729, |
| "loss": 2.7765, |
| "step": 15250 |
| }, |
| { |
| "epoch": 12.599442666941894, |
| "grad_norm": 0.594292402267456, |
| "learning_rate": 0.0008310854206807964, |
| "loss": 2.7674, |
| "step": 15260 |
| }, |
| { |
| "epoch": 12.607699452987925, |
| "grad_norm": 0.5458073616027832, |
| "learning_rate": 0.0008309019176071199, |
| "loss": 2.7778, |
| "step": 15270 |
| }, |
| { |
| "epoch": 12.615956239033956, |
| "grad_norm": 0.5974953174591064, |
| "learning_rate": 0.0008307184145334434, |
| "loss": 2.771, |
| "step": 15280 |
| }, |
| { |
| "epoch": 12.624213025079987, |
| "grad_norm": 0.6053661108016968, |
| "learning_rate": 0.000830534911459767, |
| "loss": 2.7737, |
| "step": 15290 |
| }, |
| { |
| "epoch": 12.632469811126018, |
| "grad_norm": 0.5710778832435608, |
| "learning_rate": 0.0008303514083860905, |
| "loss": 2.7705, |
| "step": 15300 |
| }, |
| { |
| "epoch": 12.640726597172051, |
| "grad_norm": 0.5878491401672363, |
| "learning_rate": 0.000830167905312414, |
| "loss": 2.7832, |
| "step": 15310 |
| }, |
| { |
| "epoch": 12.648983383218082, |
| "grad_norm": 0.5833500623703003, |
| "learning_rate": 0.0008299844022387375, |
| "loss": 2.7734, |
| "step": 15320 |
| }, |
| { |
| "epoch": 12.657240169264114, |
| "grad_norm": 0.5963436961174011, |
| "learning_rate": 0.0008298008991650611, |
| "loss": 2.7795, |
| "step": 15330 |
| }, |
| { |
| "epoch": 12.665496955310145, |
| "grad_norm": 0.6217861175537109, |
| "learning_rate": 0.0008296173960913846, |
| "loss": 2.7715, |
| "step": 15340 |
| }, |
| { |
| "epoch": 12.673753741356178, |
| "grad_norm": 0.546258807182312, |
| "learning_rate": 0.0008294338930177081, |
| "loss": 2.7821, |
| "step": 15350 |
| }, |
| { |
| "epoch": 12.682010527402209, |
| "grad_norm": 0.6429739594459534, |
| "learning_rate": 0.0008292503899440316, |
| "loss": 2.7808, |
| "step": 15360 |
| }, |
| { |
| "epoch": 12.69026731344824, |
| "grad_norm": 0.6150422096252441, |
| "learning_rate": 0.0008290668868703551, |
| "loss": 2.7709, |
| "step": 15370 |
| }, |
| { |
| "epoch": 12.698524099494271, |
| "grad_norm": 0.5569972991943359, |
| "learning_rate": 0.0008288833837966786, |
| "loss": 2.7778, |
| "step": 15380 |
| }, |
| { |
| "epoch": 12.706780885540303, |
| "grad_norm": 0.5828894972801208, |
| "learning_rate": 0.0008286998807230021, |
| "loss": 2.7719, |
| "step": 15390 |
| }, |
| { |
| "epoch": 12.715037671586336, |
| "grad_norm": 0.5625948309898376, |
| "learning_rate": 0.0008285163776493256, |
| "loss": 2.7635, |
| "step": 15400 |
| }, |
| { |
| "epoch": 12.723294457632367, |
| "grad_norm": 0.6146851778030396, |
| "learning_rate": 0.0008283328745756492, |
| "loss": 2.791, |
| "step": 15410 |
| }, |
| { |
| "epoch": 12.731551243678398, |
| "grad_norm": 0.5903885364532471, |
| "learning_rate": 0.0008281493715019727, |
| "loss": 2.7738, |
| "step": 15420 |
| }, |
| { |
| "epoch": 12.73980802972443, |
| "grad_norm": 0.5333955883979797, |
| "learning_rate": 0.0008279658684282962, |
| "loss": 2.7711, |
| "step": 15430 |
| }, |
| { |
| "epoch": 12.748064815770462, |
| "grad_norm": 0.5588700175285339, |
| "learning_rate": 0.0008277823653546197, |
| "loss": 2.7776, |
| "step": 15440 |
| }, |
| { |
| "epoch": 12.756321601816493, |
| "grad_norm": 0.6176479458808899, |
| "learning_rate": 0.0008275988622809433, |
| "loss": 2.7769, |
| "step": 15450 |
| }, |
| { |
| "epoch": 12.764578387862525, |
| "grad_norm": 0.5709108114242554, |
| "learning_rate": 0.0008274153592072668, |
| "loss": 2.7691, |
| "step": 15460 |
| }, |
| { |
| "epoch": 12.772835173908556, |
| "grad_norm": 0.5612215995788574, |
| "learning_rate": 0.0008272318561335903, |
| "loss": 2.7771, |
| "step": 15470 |
| }, |
| { |
| "epoch": 12.781091959954587, |
| "grad_norm": 0.582386314868927, |
| "learning_rate": 0.0008270483530599138, |
| "loss": 2.7688, |
| "step": 15480 |
| }, |
| { |
| "epoch": 12.78934874600062, |
| "grad_norm": 0.5977119207382202, |
| "learning_rate": 0.0008268648499862373, |
| "loss": 2.776, |
| "step": 15490 |
| }, |
| { |
| "epoch": 12.797605532046651, |
| "grad_norm": 0.5754312872886658, |
| "learning_rate": 0.0008266813469125608, |
| "loss": 2.761, |
| "step": 15500 |
| }, |
| { |
| "epoch": 12.805862318092682, |
| "grad_norm": 0.56341552734375, |
| "learning_rate": 0.0008264978438388842, |
| "loss": 2.7812, |
| "step": 15510 |
| }, |
| { |
| "epoch": 12.814119104138713, |
| "grad_norm": 0.5888708829879761, |
| "learning_rate": 0.0008263143407652078, |
| "loss": 2.7708, |
| "step": 15520 |
| }, |
| { |
| "epoch": 12.822375890184745, |
| "grad_norm": 0.5750503540039062, |
| "learning_rate": 0.0008261308376915313, |
| "loss": 2.7895, |
| "step": 15530 |
| }, |
| { |
| "epoch": 12.830632676230778, |
| "grad_norm": 0.5679807662963867, |
| "learning_rate": 0.0008259473346178549, |
| "loss": 2.7826, |
| "step": 15540 |
| }, |
| { |
| "epoch": 12.838889462276809, |
| "grad_norm": 0.5332905054092407, |
| "learning_rate": 0.0008257638315441783, |
| "loss": 2.774, |
| "step": 15550 |
| }, |
| { |
| "epoch": 12.84714624832284, |
| "grad_norm": 0.5367740392684937, |
| "learning_rate": 0.0008255803284705019, |
| "loss": 2.7621, |
| "step": 15560 |
| }, |
| { |
| "epoch": 12.855403034368871, |
| "grad_norm": 0.6053501963615417, |
| "learning_rate": 0.0008253968253968254, |
| "loss": 2.7633, |
| "step": 15570 |
| }, |
| { |
| "epoch": 12.863659820414904, |
| "grad_norm": 0.5788416862487793, |
| "learning_rate": 0.000825213322323149, |
| "loss": 2.7689, |
| "step": 15580 |
| }, |
| { |
| "epoch": 12.871916606460935, |
| "grad_norm": 0.5835745334625244, |
| "learning_rate": 0.0008250298192494724, |
| "loss": 2.7746, |
| "step": 15590 |
| }, |
| { |
| "epoch": 12.880173392506967, |
| "grad_norm": 0.6038824915885925, |
| "learning_rate": 0.000824846316175796, |
| "loss": 2.7778, |
| "step": 15600 |
| }, |
| { |
| "epoch": 12.888430178552998, |
| "grad_norm": 0.5711358785629272, |
| "learning_rate": 0.0008246628131021195, |
| "loss": 2.7828, |
| "step": 15610 |
| }, |
| { |
| "epoch": 12.89668696459903, |
| "grad_norm": 0.6088118553161621, |
| "learning_rate": 0.0008244793100284431, |
| "loss": 2.7833, |
| "step": 15620 |
| }, |
| { |
| "epoch": 12.904943750645062, |
| "grad_norm": 0.6028804183006287, |
| "learning_rate": 0.0008242958069547664, |
| "loss": 2.7823, |
| "step": 15630 |
| }, |
| { |
| "epoch": 12.913200536691093, |
| "grad_norm": 0.5889461636543274, |
| "learning_rate": 0.00082411230388109, |
| "loss": 2.7751, |
| "step": 15640 |
| }, |
| { |
| "epoch": 12.921457322737124, |
| "grad_norm": 0.5903311967849731, |
| "learning_rate": 0.0008239288008074135, |
| "loss": 2.7712, |
| "step": 15650 |
| }, |
| { |
| "epoch": 12.929714108783156, |
| "grad_norm": 0.5665178894996643, |
| "learning_rate": 0.000823745297733737, |
| "loss": 2.7639, |
| "step": 15660 |
| }, |
| { |
| "epoch": 12.937970894829188, |
| "grad_norm": 0.5634979605674744, |
| "learning_rate": 0.0008235617946600605, |
| "loss": 2.7664, |
| "step": 15670 |
| }, |
| { |
| "epoch": 12.94622768087522, |
| "grad_norm": 0.5990162491798401, |
| "learning_rate": 0.0008233782915863841, |
| "loss": 2.778, |
| "step": 15680 |
| }, |
| { |
| "epoch": 12.95448446692125, |
| "grad_norm": 0.558689296245575, |
| "learning_rate": 0.0008231947885127076, |
| "loss": 2.7743, |
| "step": 15690 |
| }, |
| { |
| "epoch": 12.962741252967282, |
| "grad_norm": 0.546913206577301, |
| "learning_rate": 0.0008230112854390311, |
| "loss": 2.7657, |
| "step": 15700 |
| }, |
| { |
| "epoch": 12.970998039013313, |
| "grad_norm": 0.6025896072387695, |
| "learning_rate": 0.0008228277823653546, |
| "loss": 2.768, |
| "step": 15710 |
| }, |
| { |
| "epoch": 12.979254825059346, |
| "grad_norm": 0.5498492121696472, |
| "learning_rate": 0.0008226442792916782, |
| "loss": 2.7725, |
| "step": 15720 |
| }, |
| { |
| "epoch": 12.987511611105377, |
| "grad_norm": 0.6049798130989075, |
| "learning_rate": 0.0008224607762180017, |
| "loss": 2.7694, |
| "step": 15730 |
| }, |
| { |
| "epoch": 12.995768397151409, |
| "grad_norm": 0.5635313987731934, |
| "learning_rate": 0.0008222772731443252, |
| "loss": 2.7658, |
| "step": 15740 |
| }, |
| { |
| "epoch": 13.003302714418412, |
| "grad_norm": 0.6339975595474243, |
| "learning_rate": 0.0008220937700706487, |
| "loss": 2.5294, |
| "step": 15750 |
| }, |
| { |
| "epoch": 13.011559500464445, |
| "grad_norm": 0.5738035440444946, |
| "learning_rate": 0.0008219102669969723, |
| "loss": 2.768, |
| "step": 15760 |
| }, |
| { |
| "epoch": 13.019816286510476, |
| "grad_norm": 0.6072455644607544, |
| "learning_rate": 0.0008217267639232957, |
| "loss": 2.7673, |
| "step": 15770 |
| }, |
| { |
| "epoch": 13.028073072556507, |
| "grad_norm": 0.5527037978172302, |
| "learning_rate": 0.0008215432608496192, |
| "loss": 2.7617, |
| "step": 15780 |
| }, |
| { |
| "epoch": 13.036329858602539, |
| "grad_norm": 0.5809829831123352, |
| "learning_rate": 0.0008213597577759427, |
| "loss": 2.7681, |
| "step": 15790 |
| }, |
| { |
| "epoch": 13.04458664464857, |
| "grad_norm": 0.6070693135261536, |
| "learning_rate": 0.0008211762547022663, |
| "loss": 2.7738, |
| "step": 15800 |
| }, |
| { |
| "epoch": 13.052843430694603, |
| "grad_norm": 0.6138054132461548, |
| "learning_rate": 0.0008209927516285898, |
| "loss": 2.7669, |
| "step": 15810 |
| }, |
| { |
| "epoch": 13.061100216740634, |
| "grad_norm": 0.5852046012878418, |
| "learning_rate": 0.0008208092485549133, |
| "loss": 2.757, |
| "step": 15820 |
| }, |
| { |
| "epoch": 13.069357002786665, |
| "grad_norm": 0.5757762789726257, |
| "learning_rate": 0.0008206257454812368, |
| "loss": 2.7668, |
| "step": 15830 |
| }, |
| { |
| "epoch": 13.077613788832696, |
| "grad_norm": 0.5901942253112793, |
| "learning_rate": 0.0008204422424075604, |
| "loss": 2.7672, |
| "step": 15840 |
| }, |
| { |
| "epoch": 13.085870574878728, |
| "grad_norm": 0.6006907224655151, |
| "learning_rate": 0.0008202587393338839, |
| "loss": 2.7778, |
| "step": 15850 |
| }, |
| { |
| "epoch": 13.09412736092476, |
| "grad_norm": 0.6453226208686829, |
| "learning_rate": 0.0008200752362602074, |
| "loss": 2.7627, |
| "step": 15860 |
| }, |
| { |
| "epoch": 13.102384146970792, |
| "grad_norm": 0.5957190990447998, |
| "learning_rate": 0.0008198917331865309, |
| "loss": 2.7666, |
| "step": 15870 |
| }, |
| { |
| "epoch": 13.110640933016823, |
| "grad_norm": 0.5730419754981995, |
| "learning_rate": 0.0008197082301128545, |
| "loss": 2.7531, |
| "step": 15880 |
| }, |
| { |
| "epoch": 13.118897719062854, |
| "grad_norm": 0.5487211346626282, |
| "learning_rate": 0.000819524727039178, |
| "loss": 2.762, |
| "step": 15890 |
| }, |
| { |
| "epoch": 13.127154505108887, |
| "grad_norm": 0.605567216873169, |
| "learning_rate": 0.0008193412239655014, |
| "loss": 2.7731, |
| "step": 15900 |
| }, |
| { |
| "epoch": 13.135411291154918, |
| "grad_norm": 0.6126657128334045, |
| "learning_rate": 0.0008191577208918249, |
| "loss": 2.7694, |
| "step": 15910 |
| }, |
| { |
| "epoch": 13.14366807720095, |
| "grad_norm": 0.5657448172569275, |
| "learning_rate": 0.0008189742178181485, |
| "loss": 2.7749, |
| "step": 15920 |
| }, |
| { |
| "epoch": 13.15192486324698, |
| "grad_norm": 0.5270867347717285, |
| "learning_rate": 0.000818790714744472, |
| "loss": 2.7536, |
| "step": 15930 |
| }, |
| { |
| "epoch": 13.160181649293012, |
| "grad_norm": 0.5588110685348511, |
| "learning_rate": 0.0008186072116707955, |
| "loss": 2.7682, |
| "step": 15940 |
| }, |
| { |
| "epoch": 13.168438435339045, |
| "grad_norm": 0.5444889664649963, |
| "learning_rate": 0.000818423708597119, |
| "loss": 2.7521, |
| "step": 15950 |
| }, |
| { |
| "epoch": 13.176695221385076, |
| "grad_norm": 0.5641809105873108, |
| "learning_rate": 0.0008182402055234426, |
| "loss": 2.7611, |
| "step": 15960 |
| }, |
| { |
| "epoch": 13.184952007431107, |
| "grad_norm": 0.572223424911499, |
| "learning_rate": 0.0008180567024497661, |
| "loss": 2.7601, |
| "step": 15970 |
| }, |
| { |
| "epoch": 13.193208793477138, |
| "grad_norm": 0.6031824946403503, |
| "learning_rate": 0.0008178731993760896, |
| "loss": 2.7721, |
| "step": 15980 |
| }, |
| { |
| "epoch": 13.201465579523171, |
| "grad_norm": 0.5700808167457581, |
| "learning_rate": 0.0008176896963024131, |
| "loss": 2.7592, |
| "step": 15990 |
| }, |
| { |
| "epoch": 13.209722365569203, |
| "grad_norm": 0.5641034245491028, |
| "learning_rate": 0.0008175061932287367, |
| "loss": 2.7622, |
| "step": 16000 |
| }, |
| { |
| "epoch": 13.217979151615234, |
| "grad_norm": 0.5520575642585754, |
| "learning_rate": 0.0008173226901550602, |
| "loss": 2.7675, |
| "step": 16010 |
| }, |
| { |
| "epoch": 13.226235937661265, |
| "grad_norm": 0.6408013701438904, |
| "learning_rate": 0.0008171391870813837, |
| "loss": 2.7563, |
| "step": 16020 |
| }, |
| { |
| "epoch": 13.234492723707296, |
| "grad_norm": 0.5883532762527466, |
| "learning_rate": 0.0008169556840077071, |
| "loss": 2.7659, |
| "step": 16030 |
| }, |
| { |
| "epoch": 13.24274950975333, |
| "grad_norm": 0.5909677147865295, |
| "learning_rate": 0.0008167721809340307, |
| "loss": 2.7637, |
| "step": 16040 |
| }, |
| { |
| "epoch": 13.25100629579936, |
| "grad_norm": 0.5736511945724487, |
| "learning_rate": 0.0008165886778603541, |
| "loss": 2.7526, |
| "step": 16050 |
| }, |
| { |
| "epoch": 13.259263081845392, |
| "grad_norm": 0.5763906240463257, |
| "learning_rate": 0.0008164051747866776, |
| "loss": 2.7717, |
| "step": 16060 |
| }, |
| { |
| "epoch": 13.267519867891423, |
| "grad_norm": 0.5538901090621948, |
| "learning_rate": 0.0008162216717130012, |
| "loss": 2.7666, |
| "step": 16070 |
| }, |
| { |
| "epoch": 13.275776653937454, |
| "grad_norm": 0.6136773824691772, |
| "learning_rate": 0.0008160381686393247, |
| "loss": 2.7473, |
| "step": 16080 |
| }, |
| { |
| "epoch": 13.284033439983487, |
| "grad_norm": 0.5361644625663757, |
| "learning_rate": 0.0008158546655656482, |
| "loss": 2.7581, |
| "step": 16090 |
| }, |
| { |
| "epoch": 13.292290226029518, |
| "grad_norm": 0.5708776116371155, |
| "learning_rate": 0.0008156711624919717, |
| "loss": 2.758, |
| "step": 16100 |
| }, |
| { |
| "epoch": 13.30054701207555, |
| "grad_norm": 0.5603443384170532, |
| "learning_rate": 0.0008154876594182953, |
| "loss": 2.7581, |
| "step": 16110 |
| }, |
| { |
| "epoch": 13.30880379812158, |
| "grad_norm": 0.56572026014328, |
| "learning_rate": 0.0008153041563446188, |
| "loss": 2.7618, |
| "step": 16120 |
| }, |
| { |
| "epoch": 13.317060584167614, |
| "grad_norm": 0.6429868936538696, |
| "learning_rate": 0.0008151206532709423, |
| "loss": 2.7641, |
| "step": 16130 |
| }, |
| { |
| "epoch": 13.325317370213645, |
| "grad_norm": 0.6419973969459534, |
| "learning_rate": 0.0008149371501972658, |
| "loss": 2.7685, |
| "step": 16140 |
| }, |
| { |
| "epoch": 13.333574156259676, |
| "grad_norm": 0.6389254331588745, |
| "learning_rate": 0.0008147536471235894, |
| "loss": 2.7607, |
| "step": 16150 |
| }, |
| { |
| "epoch": 13.341830942305707, |
| "grad_norm": 0.5886973142623901, |
| "learning_rate": 0.0008145701440499128, |
| "loss": 2.7545, |
| "step": 16160 |
| }, |
| { |
| "epoch": 13.350087728351738, |
| "grad_norm": 0.6030955910682678, |
| "learning_rate": 0.0008143866409762363, |
| "loss": 2.7703, |
| "step": 16170 |
| }, |
| { |
| "epoch": 13.358344514397771, |
| "grad_norm": 0.5616552233695984, |
| "learning_rate": 0.0008142031379025598, |
| "loss": 2.763, |
| "step": 16180 |
| }, |
| { |
| "epoch": 13.366601300443802, |
| "grad_norm": 0.6055645942687988, |
| "learning_rate": 0.0008140196348288834, |
| "loss": 2.7692, |
| "step": 16190 |
| }, |
| { |
| "epoch": 13.374858086489834, |
| "grad_norm": 0.6489038467407227, |
| "learning_rate": 0.0008138361317552069, |
| "loss": 2.7599, |
| "step": 16200 |
| }, |
| { |
| "epoch": 13.383114872535865, |
| "grad_norm": 0.5819231867790222, |
| "learning_rate": 0.0008136526286815304, |
| "loss": 2.7609, |
| "step": 16210 |
| }, |
| { |
| "epoch": 13.391371658581898, |
| "grad_norm": 0.6634029150009155, |
| "learning_rate": 0.0008134691256078539, |
| "loss": 2.7662, |
| "step": 16220 |
| }, |
| { |
| "epoch": 13.399628444627929, |
| "grad_norm": 0.591643750667572, |
| "learning_rate": 0.0008132856225341775, |
| "loss": 2.762, |
| "step": 16230 |
| }, |
| { |
| "epoch": 13.40788523067396, |
| "grad_norm": 0.5705656409263611, |
| "learning_rate": 0.000813102119460501, |
| "loss": 2.7483, |
| "step": 16240 |
| }, |
| { |
| "epoch": 13.416142016719991, |
| "grad_norm": 0.597012996673584, |
| "learning_rate": 0.0008129186163868245, |
| "loss": 2.77, |
| "step": 16250 |
| }, |
| { |
| "epoch": 13.424398802766023, |
| "grad_norm": 0.6613156795501709, |
| "learning_rate": 0.000812735113313148, |
| "loss": 2.7585, |
| "step": 16260 |
| }, |
| { |
| "epoch": 13.432655588812056, |
| "grad_norm": 0.5879620313644409, |
| "learning_rate": 0.0008125516102394716, |
| "loss": 2.7628, |
| "step": 16270 |
| }, |
| { |
| "epoch": 13.440912374858087, |
| "grad_norm": 0.5478769540786743, |
| "learning_rate": 0.0008123681071657951, |
| "loss": 2.7576, |
| "step": 16280 |
| }, |
| { |
| "epoch": 13.449169160904118, |
| "grad_norm": 0.5667441487312317, |
| "learning_rate": 0.0008121846040921185, |
| "loss": 2.7507, |
| "step": 16290 |
| }, |
| { |
| "epoch": 13.45742594695015, |
| "grad_norm": 0.5518149733543396, |
| "learning_rate": 0.000812001101018442, |
| "loss": 2.7519, |
| "step": 16300 |
| }, |
| { |
| "epoch": 13.465682732996182, |
| "grad_norm": 0.6225078701972961, |
| "learning_rate": 0.0008118175979447656, |
| "loss": 2.7632, |
| "step": 16310 |
| }, |
| { |
| "epoch": 13.473939519042213, |
| "grad_norm": 0.587518572807312, |
| "learning_rate": 0.0008116340948710891, |
| "loss": 2.7691, |
| "step": 16320 |
| }, |
| { |
| "epoch": 13.482196305088245, |
| "grad_norm": 0.5977440476417542, |
| "learning_rate": 0.0008114505917974126, |
| "loss": 2.7503, |
| "step": 16330 |
| }, |
| { |
| "epoch": 13.490453091134276, |
| "grad_norm": 0.5600082278251648, |
| "learning_rate": 0.0008112670887237361, |
| "loss": 2.7622, |
| "step": 16340 |
| }, |
| { |
| "epoch": 13.498709877180307, |
| "grad_norm": 0.560407817363739, |
| "learning_rate": 0.0008110835856500597, |
| "loss": 2.7697, |
| "step": 16350 |
| }, |
| { |
| "epoch": 13.50696666322634, |
| "grad_norm": 0.5983129143714905, |
| "learning_rate": 0.0008109000825763832, |
| "loss": 2.7498, |
| "step": 16360 |
| }, |
| { |
| "epoch": 13.515223449272371, |
| "grad_norm": 0.5887816548347473, |
| "learning_rate": 0.0008107165795027067, |
| "loss": 2.7707, |
| "step": 16370 |
| }, |
| { |
| "epoch": 13.523480235318402, |
| "grad_norm": 0.5647554993629456, |
| "learning_rate": 0.0008105330764290302, |
| "loss": 2.7625, |
| "step": 16380 |
| }, |
| { |
| "epoch": 13.531737021364433, |
| "grad_norm": 0.551149845123291, |
| "learning_rate": 0.0008103495733553538, |
| "loss": 2.7564, |
| "step": 16390 |
| }, |
| { |
| "epoch": 13.539993807410465, |
| "grad_norm": 0.568866491317749, |
| "learning_rate": 0.0008101660702816773, |
| "loss": 2.751, |
| "step": 16400 |
| }, |
| { |
| "epoch": 13.548250593456498, |
| "grad_norm": 0.5884142518043518, |
| "learning_rate": 0.0008099825672080008, |
| "loss": 2.7613, |
| "step": 16410 |
| }, |
| { |
| "epoch": 13.556507379502529, |
| "grad_norm": 0.5169154405593872, |
| "learning_rate": 0.0008097990641343242, |
| "loss": 2.7544, |
| "step": 16420 |
| }, |
| { |
| "epoch": 13.56476416554856, |
| "grad_norm": 0.6176019310951233, |
| "learning_rate": 0.0008096155610606478, |
| "loss": 2.7581, |
| "step": 16430 |
| }, |
| { |
| "epoch": 13.573020951594591, |
| "grad_norm": 0.6097131967544556, |
| "learning_rate": 0.0008094320579869712, |
| "loss": 2.7634, |
| "step": 16440 |
| }, |
| { |
| "epoch": 13.581277737640624, |
| "grad_norm": 0.6191734075546265, |
| "learning_rate": 0.0008092485549132948, |
| "loss": 2.758, |
| "step": 16450 |
| }, |
| { |
| "epoch": 13.589534523686655, |
| "grad_norm": 0.5689364075660706, |
| "learning_rate": 0.0008090650518396183, |
| "loss": 2.7614, |
| "step": 16460 |
| }, |
| { |
| "epoch": 13.597791309732687, |
| "grad_norm": 0.6023751497268677, |
| "learning_rate": 0.0008088815487659419, |
| "loss": 2.775, |
| "step": 16470 |
| }, |
| { |
| "epoch": 13.606048095778718, |
| "grad_norm": 0.5691829323768616, |
| "learning_rate": 0.0008086980456922653, |
| "loss": 2.7631, |
| "step": 16480 |
| }, |
| { |
| "epoch": 13.614304881824749, |
| "grad_norm": 0.5723507404327393, |
| "learning_rate": 0.0008085145426185889, |
| "loss": 2.7529, |
| "step": 16490 |
| }, |
| { |
| "epoch": 13.622561667870782, |
| "grad_norm": 0.5878212451934814, |
| "learning_rate": 0.0008083310395449124, |
| "loss": 2.7629, |
| "step": 16500 |
| }, |
| { |
| "epoch": 13.630818453916813, |
| "grad_norm": 0.651943564414978, |
| "learning_rate": 0.000808147536471236, |
| "loss": 2.7539, |
| "step": 16510 |
| }, |
| { |
| "epoch": 13.639075239962844, |
| "grad_norm": 0.6334558129310608, |
| "learning_rate": 0.0008079640333975594, |
| "loss": 2.7562, |
| "step": 16520 |
| }, |
| { |
| "epoch": 13.647332026008876, |
| "grad_norm": 0.6675853133201599, |
| "learning_rate": 0.000807780530323883, |
| "loss": 2.7666, |
| "step": 16530 |
| }, |
| { |
| "epoch": 13.655588812054908, |
| "grad_norm": 0.5692960023880005, |
| "learning_rate": 0.0008075970272502065, |
| "loss": 2.7527, |
| "step": 16540 |
| }, |
| { |
| "epoch": 13.66384559810094, |
| "grad_norm": 0.5518311858177185, |
| "learning_rate": 0.00080741352417653, |
| "loss": 2.7626, |
| "step": 16550 |
| }, |
| { |
| "epoch": 13.67210238414697, |
| "grad_norm": 0.6077815890312195, |
| "learning_rate": 0.0008072300211028534, |
| "loss": 2.7613, |
| "step": 16560 |
| }, |
| { |
| "epoch": 13.680359170193002, |
| "grad_norm": 0.5508092641830444, |
| "learning_rate": 0.000807046518029177, |
| "loss": 2.7574, |
| "step": 16570 |
| }, |
| { |
| "epoch": 13.688615956239033, |
| "grad_norm": 0.5735660791397095, |
| "learning_rate": 0.0008068630149555005, |
| "loss": 2.7573, |
| "step": 16580 |
| }, |
| { |
| "epoch": 13.696872742285066, |
| "grad_norm": 0.5603192448616028, |
| "learning_rate": 0.0008066795118818241, |
| "loss": 2.7544, |
| "step": 16590 |
| }, |
| { |
| "epoch": 13.705129528331097, |
| "grad_norm": 0.556424081325531, |
| "learning_rate": 0.0008064960088081475, |
| "loss": 2.756, |
| "step": 16600 |
| }, |
| { |
| "epoch": 13.713386314377129, |
| "grad_norm": 0.5140565037727356, |
| "learning_rate": 0.0008063125057344711, |
| "loss": 2.7666, |
| "step": 16610 |
| }, |
| { |
| "epoch": 13.72164310042316, |
| "grad_norm": 0.5534517168998718, |
| "learning_rate": 0.0008061290026607946, |
| "loss": 2.7569, |
| "step": 16620 |
| }, |
| { |
| "epoch": 13.729899886469191, |
| "grad_norm": 0.6492647528648376, |
| "learning_rate": 0.0008059454995871181, |
| "loss": 2.7567, |
| "step": 16630 |
| }, |
| { |
| "epoch": 13.738156672515224, |
| "grad_norm": 0.5888465642929077, |
| "learning_rate": 0.0008057619965134416, |
| "loss": 2.7451, |
| "step": 16640 |
| }, |
| { |
| "epoch": 13.746413458561255, |
| "grad_norm": 0.6425179243087769, |
| "learning_rate": 0.0008055784934397651, |
| "loss": 2.7575, |
| "step": 16650 |
| }, |
| { |
| "epoch": 13.754670244607286, |
| "grad_norm": 0.5842881202697754, |
| "learning_rate": 0.0008053949903660887, |
| "loss": 2.756, |
| "step": 16660 |
| }, |
| { |
| "epoch": 13.762927030653318, |
| "grad_norm": 0.5675920248031616, |
| "learning_rate": 0.0008052114872924122, |
| "loss": 2.761, |
| "step": 16670 |
| }, |
| { |
| "epoch": 13.77118381669935, |
| "grad_norm": 0.532641589641571, |
| "learning_rate": 0.0008050279842187356, |
| "loss": 2.7615, |
| "step": 16680 |
| }, |
| { |
| "epoch": 13.779440602745382, |
| "grad_norm": 0.5731536149978638, |
| "learning_rate": 0.0008048444811450591, |
| "loss": 2.7562, |
| "step": 16690 |
| }, |
| { |
| "epoch": 13.787697388791413, |
| "grad_norm": 0.567754328250885, |
| "learning_rate": 0.0008046609780713827, |
| "loss": 2.7479, |
| "step": 16700 |
| }, |
| { |
| "epoch": 13.795954174837444, |
| "grad_norm": 0.524221658706665, |
| "learning_rate": 0.0008044774749977062, |
| "loss": 2.7509, |
| "step": 16710 |
| }, |
| { |
| "epoch": 13.804210960883475, |
| "grad_norm": 0.5846814513206482, |
| "learning_rate": 0.0008042939719240297, |
| "loss": 2.7475, |
| "step": 16720 |
| }, |
| { |
| "epoch": 13.812467746929508, |
| "grad_norm": 0.5527751445770264, |
| "learning_rate": 0.0008041104688503532, |
| "loss": 2.7608, |
| "step": 16730 |
| }, |
| { |
| "epoch": 13.82072453297554, |
| "grad_norm": 0.6005294919013977, |
| "learning_rate": 0.0008039269657766768, |
| "loss": 2.7499, |
| "step": 16740 |
| }, |
| { |
| "epoch": 13.82898131902157, |
| "grad_norm": 0.5409100651741028, |
| "learning_rate": 0.0008037434627030003, |
| "loss": 2.7473, |
| "step": 16750 |
| }, |
| { |
| "epoch": 13.837238105067602, |
| "grad_norm": 0.5972150564193726, |
| "learning_rate": 0.0008035599596293238, |
| "loss": 2.7597, |
| "step": 16760 |
| }, |
| { |
| "epoch": 13.845494891113635, |
| "grad_norm": 0.5449065566062927, |
| "learning_rate": 0.0008033764565556473, |
| "loss": 2.7471, |
| "step": 16770 |
| }, |
| { |
| "epoch": 13.853751677159666, |
| "grad_norm": 0.5764107704162598, |
| "learning_rate": 0.0008031929534819709, |
| "loss": 2.7548, |
| "step": 16780 |
| }, |
| { |
| "epoch": 13.862008463205697, |
| "grad_norm": 0.5843521356582642, |
| "learning_rate": 0.0008030094504082944, |
| "loss": 2.7527, |
| "step": 16790 |
| }, |
| { |
| "epoch": 13.870265249251728, |
| "grad_norm": 0.5988937020301819, |
| "learning_rate": 0.0008028259473346179, |
| "loss": 2.7538, |
| "step": 16800 |
| }, |
| { |
| "epoch": 13.87852203529776, |
| "grad_norm": 0.5904337763786316, |
| "learning_rate": 0.0008026424442609413, |
| "loss": 2.7502, |
| "step": 16810 |
| }, |
| { |
| "epoch": 13.886778821343793, |
| "grad_norm": 0.5412918329238892, |
| "learning_rate": 0.0008024589411872649, |
| "loss": 2.7522, |
| "step": 16820 |
| }, |
| { |
| "epoch": 13.895035607389824, |
| "grad_norm": 0.5681438446044922, |
| "learning_rate": 0.0008022754381135883, |
| "loss": 2.7576, |
| "step": 16830 |
| }, |
| { |
| "epoch": 13.903292393435855, |
| "grad_norm": 0.5728694796562195, |
| "learning_rate": 0.0008020919350399119, |
| "loss": 2.7549, |
| "step": 16840 |
| }, |
| { |
| "epoch": 13.911549179481886, |
| "grad_norm": 0.5923236608505249, |
| "learning_rate": 0.0008019084319662354, |
| "loss": 2.7553, |
| "step": 16850 |
| }, |
| { |
| "epoch": 13.919805965527917, |
| "grad_norm": 0.5946152210235596, |
| "learning_rate": 0.000801724928892559, |
| "loss": 2.7457, |
| "step": 16860 |
| }, |
| { |
| "epoch": 13.92806275157395, |
| "grad_norm": 0.5166122913360596, |
| "learning_rate": 0.0008015414258188824, |
| "loss": 2.7488, |
| "step": 16870 |
| }, |
| { |
| "epoch": 13.936319537619982, |
| "grad_norm": 0.5555543303489685, |
| "learning_rate": 0.000801357922745206, |
| "loss": 2.7606, |
| "step": 16880 |
| }, |
| { |
| "epoch": 13.944576323666013, |
| "grad_norm": 0.5452257990837097, |
| "learning_rate": 0.0008011744196715295, |
| "loss": 2.7558, |
| "step": 16890 |
| }, |
| { |
| "epoch": 13.952833109712044, |
| "grad_norm": 0.5303358435630798, |
| "learning_rate": 0.0008009909165978531, |
| "loss": 2.7481, |
| "step": 16900 |
| }, |
| { |
| "epoch": 13.961089895758077, |
| "grad_norm": 0.5449009537696838, |
| "learning_rate": 0.0008008074135241765, |
| "loss": 2.7548, |
| "step": 16910 |
| }, |
| { |
| "epoch": 13.969346681804108, |
| "grad_norm": 0.5688961148262024, |
| "learning_rate": 0.0008006239104505001, |
| "loss": 2.7543, |
| "step": 16920 |
| }, |
| { |
| "epoch": 13.97760346785014, |
| "grad_norm": 0.6097021698951721, |
| "learning_rate": 0.0008004404073768236, |
| "loss": 2.7521, |
| "step": 16930 |
| }, |
| { |
| "epoch": 13.98586025389617, |
| "grad_norm": 0.6139764189720154, |
| "learning_rate": 0.0008002569043031472, |
| "loss": 2.7544, |
| "step": 16940 |
| }, |
| { |
| "epoch": 13.994117039942202, |
| "grad_norm": 0.5823282599449158, |
| "learning_rate": 0.0008000734012294705, |
| "loss": 2.7485, |
| "step": 16950 |
| }, |
| { |
| "epoch": 14.001651357209207, |
| "grad_norm": 0.5491234064102173, |
| "learning_rate": 0.0007998898981557941, |
| "loss": 2.5171, |
| "step": 16960 |
| }, |
| { |
| "epoch": 14.009908143255238, |
| "grad_norm": 0.6469337940216064, |
| "learning_rate": 0.0007997063950821176, |
| "loss": 2.7401, |
| "step": 16970 |
| }, |
| { |
| "epoch": 14.01816492930127, |
| "grad_norm": 0.622250497341156, |
| "learning_rate": 0.0007995228920084412, |
| "loss": 2.7526, |
| "step": 16980 |
| }, |
| { |
| "epoch": 14.0264217153473, |
| "grad_norm": 0.6488636136054993, |
| "learning_rate": 0.0007993393889347646, |
| "loss": 2.7349, |
| "step": 16990 |
| }, |
| { |
| "epoch": 14.034678501393334, |
| "grad_norm": 0.5935384631156921, |
| "learning_rate": 0.0007991558858610882, |
| "loss": 2.7503, |
| "step": 17000 |
| }, |
| { |
| "epoch": 14.042935287439365, |
| "grad_norm": 0.6315668821334839, |
| "learning_rate": 0.0007989723827874117, |
| "loss": 2.7522, |
| "step": 17010 |
| }, |
| { |
| "epoch": 14.051192073485396, |
| "grad_norm": 0.607702910900116, |
| "learning_rate": 0.0007987888797137353, |
| "loss": 2.7474, |
| "step": 17020 |
| }, |
| { |
| "epoch": 14.059448859531427, |
| "grad_norm": 0.55247962474823, |
| "learning_rate": 0.0007986053766400587, |
| "loss": 2.7366, |
| "step": 17030 |
| }, |
| { |
| "epoch": 14.067705645577458, |
| "grad_norm": 0.5892691016197205, |
| "learning_rate": 0.0007984218735663823, |
| "loss": 2.7319, |
| "step": 17040 |
| }, |
| { |
| "epoch": 14.075962431623491, |
| "grad_norm": 0.5575072765350342, |
| "learning_rate": 0.0007982383704927058, |
| "loss": 2.753, |
| "step": 17050 |
| }, |
| { |
| "epoch": 14.084219217669522, |
| "grad_norm": 0.6110917329788208, |
| "learning_rate": 0.0007980548674190294, |
| "loss": 2.7465, |
| "step": 17060 |
| }, |
| { |
| "epoch": 14.092476003715554, |
| "grad_norm": 0.6070433855056763, |
| "learning_rate": 0.0007978713643453528, |
| "loss": 2.7533, |
| "step": 17070 |
| }, |
| { |
| "epoch": 14.100732789761585, |
| "grad_norm": 0.5724040865898132, |
| "learning_rate": 0.0007976878612716763, |
| "loss": 2.7412, |
| "step": 17080 |
| }, |
| { |
| "epoch": 14.108989575807616, |
| "grad_norm": 0.5734650492668152, |
| "learning_rate": 0.0007975043581979998, |
| "loss": 2.7417, |
| "step": 17090 |
| }, |
| { |
| "epoch": 14.117246361853649, |
| "grad_norm": 0.5555775165557861, |
| "learning_rate": 0.0007973208551243234, |
| "loss": 2.7436, |
| "step": 17100 |
| }, |
| { |
| "epoch": 14.12550314789968, |
| "grad_norm": 0.5774323344230652, |
| "learning_rate": 0.0007971373520506468, |
| "loss": 2.7413, |
| "step": 17110 |
| }, |
| { |
| "epoch": 14.133759933945711, |
| "grad_norm": 0.6438599824905396, |
| "learning_rate": 0.0007969538489769704, |
| "loss": 2.7539, |
| "step": 17120 |
| }, |
| { |
| "epoch": 14.142016719991743, |
| "grad_norm": 0.5561356544494629, |
| "learning_rate": 0.0007967703459032939, |
| "loss": 2.7405, |
| "step": 17130 |
| }, |
| { |
| "epoch": 14.150273506037776, |
| "grad_norm": 0.5886418223381042, |
| "learning_rate": 0.0007965868428296175, |
| "loss": 2.7599, |
| "step": 17140 |
| }, |
| { |
| "epoch": 14.158530292083807, |
| "grad_norm": 0.5819487571716309, |
| "learning_rate": 0.0007964033397559409, |
| "loss": 2.7569, |
| "step": 17150 |
| }, |
| { |
| "epoch": 14.166787078129838, |
| "grad_norm": 0.5723300576210022, |
| "learning_rate": 0.0007962198366822645, |
| "loss": 2.7404, |
| "step": 17160 |
| }, |
| { |
| "epoch": 14.17504386417587, |
| "grad_norm": 0.5738250017166138, |
| "learning_rate": 0.000796036333608588, |
| "loss": 2.7518, |
| "step": 17170 |
| }, |
| { |
| "epoch": 14.1833006502219, |
| "grad_norm": 0.5601485967636108, |
| "learning_rate": 0.0007958528305349116, |
| "loss": 2.7477, |
| "step": 17180 |
| }, |
| { |
| "epoch": 14.191557436267933, |
| "grad_norm": 0.5593155026435852, |
| "learning_rate": 0.000795669327461235, |
| "loss": 2.751, |
| "step": 17190 |
| }, |
| { |
| "epoch": 14.199814222313965, |
| "grad_norm": 0.5404049158096313, |
| "learning_rate": 0.0007954858243875585, |
| "loss": 2.7537, |
| "step": 17200 |
| }, |
| { |
| "epoch": 14.208071008359996, |
| "grad_norm": 0.5567106008529663, |
| "learning_rate": 0.000795302321313882, |
| "loss": 2.7499, |
| "step": 17210 |
| }, |
| { |
| "epoch": 14.216327794406027, |
| "grad_norm": 0.5681931376457214, |
| "learning_rate": 0.0007951188182402054, |
| "loss": 2.7598, |
| "step": 17220 |
| }, |
| { |
| "epoch": 14.22458458045206, |
| "grad_norm": 0.5726577639579773, |
| "learning_rate": 0.000794935315166529, |
| "loss": 2.7431, |
| "step": 17230 |
| }, |
| { |
| "epoch": 14.232841366498091, |
| "grad_norm": 0.5552230477333069, |
| "learning_rate": 0.0007947518120928525, |
| "loss": 2.7498, |
| "step": 17240 |
| }, |
| { |
| "epoch": 14.241098152544122, |
| "grad_norm": 0.5898513793945312, |
| "learning_rate": 0.0007945683090191761, |
| "loss": 2.7503, |
| "step": 17250 |
| }, |
| { |
| "epoch": 14.249354938590153, |
| "grad_norm": 0.5322459936141968, |
| "learning_rate": 0.0007943848059454995, |
| "loss": 2.7343, |
| "step": 17260 |
| }, |
| { |
| "epoch": 14.257611724636185, |
| "grad_norm": 0.62173992395401, |
| "learning_rate": 0.0007942013028718231, |
| "loss": 2.7424, |
| "step": 17270 |
| }, |
| { |
| "epoch": 14.265868510682218, |
| "grad_norm": 0.5796912908554077, |
| "learning_rate": 0.0007940177997981466, |
| "loss": 2.7477, |
| "step": 17280 |
| }, |
| { |
| "epoch": 14.274125296728249, |
| "grad_norm": 0.6236594915390015, |
| "learning_rate": 0.0007938342967244702, |
| "loss": 2.7553, |
| "step": 17290 |
| }, |
| { |
| "epoch": 14.28238208277428, |
| "grad_norm": 0.5684297680854797, |
| "learning_rate": 0.0007936507936507937, |
| "loss": 2.7347, |
| "step": 17300 |
| }, |
| { |
| "epoch": 14.290638868820311, |
| "grad_norm": 0.576805830001831, |
| "learning_rate": 0.0007934672905771172, |
| "loss": 2.7465, |
| "step": 17310 |
| }, |
| { |
| "epoch": 14.298895654866342, |
| "grad_norm": 0.6182284951210022, |
| "learning_rate": 0.0007932837875034407, |
| "loss": 2.746, |
| "step": 17320 |
| }, |
| { |
| "epoch": 14.307152440912375, |
| "grad_norm": 0.5486750602722168, |
| "learning_rate": 0.0007931002844297643, |
| "loss": 2.7496, |
| "step": 17330 |
| }, |
| { |
| "epoch": 14.315409226958407, |
| "grad_norm": 0.5673812627792358, |
| "learning_rate": 0.0007929167813560876, |
| "loss": 2.7365, |
| "step": 17340 |
| }, |
| { |
| "epoch": 14.323666013004438, |
| "grad_norm": 0.606238067150116, |
| "learning_rate": 0.0007927332782824112, |
| "loss": 2.7423, |
| "step": 17350 |
| }, |
| { |
| "epoch": 14.331922799050469, |
| "grad_norm": 0.555072009563446, |
| "learning_rate": 0.0007925497752087347, |
| "loss": 2.746, |
| "step": 17360 |
| }, |
| { |
| "epoch": 14.340179585096502, |
| "grad_norm": 0.5399696826934814, |
| "learning_rate": 0.0007923662721350583, |
| "loss": 2.7488, |
| "step": 17370 |
| }, |
| { |
| "epoch": 14.348436371142533, |
| "grad_norm": 0.5781683921813965, |
| "learning_rate": 0.0007921827690613817, |
| "loss": 2.7525, |
| "step": 17380 |
| }, |
| { |
| "epoch": 14.356693157188564, |
| "grad_norm": 0.5473909378051758, |
| "learning_rate": 0.0007919992659877053, |
| "loss": 2.7469, |
| "step": 17390 |
| }, |
| { |
| "epoch": 14.364949943234596, |
| "grad_norm": 0.5242516398429871, |
| "learning_rate": 0.0007918157629140288, |
| "loss": 2.737, |
| "step": 17400 |
| }, |
| { |
| "epoch": 14.373206729280627, |
| "grad_norm": 0.5968852043151855, |
| "learning_rate": 0.0007916322598403524, |
| "loss": 2.7457, |
| "step": 17410 |
| }, |
| { |
| "epoch": 14.38146351532666, |
| "grad_norm": 0.5766412615776062, |
| "learning_rate": 0.0007914487567666758, |
| "loss": 2.7326, |
| "step": 17420 |
| }, |
| { |
| "epoch": 14.38972030137269, |
| "grad_norm": 0.6067407131195068, |
| "learning_rate": 0.0007912652536929994, |
| "loss": 2.7426, |
| "step": 17430 |
| }, |
| { |
| "epoch": 14.397977087418722, |
| "grad_norm": 0.6106924414634705, |
| "learning_rate": 0.0007910817506193229, |
| "loss": 2.7525, |
| "step": 17440 |
| }, |
| { |
| "epoch": 14.406233873464753, |
| "grad_norm": 0.6435558199882507, |
| "learning_rate": 0.0007908982475456465, |
| "loss": 2.7428, |
| "step": 17450 |
| }, |
| { |
| "epoch": 14.414490659510786, |
| "grad_norm": 0.6241771578788757, |
| "learning_rate": 0.0007907147444719699, |
| "loss": 2.7438, |
| "step": 17460 |
| }, |
| { |
| "epoch": 14.422747445556817, |
| "grad_norm": 0.6236996054649353, |
| "learning_rate": 0.0007905312413982934, |
| "loss": 2.7496, |
| "step": 17470 |
| }, |
| { |
| "epoch": 14.431004231602849, |
| "grad_norm": 0.6004934310913086, |
| "learning_rate": 0.0007903477383246169, |
| "loss": 2.7483, |
| "step": 17480 |
| }, |
| { |
| "epoch": 14.43926101764888, |
| "grad_norm": 0.5864703059196472, |
| "learning_rate": 0.0007901642352509405, |
| "loss": 2.7421, |
| "step": 17490 |
| }, |
| { |
| "epoch": 14.447517803694911, |
| "grad_norm": 0.5803243517875671, |
| "learning_rate": 0.0007899807321772639, |
| "loss": 2.7512, |
| "step": 17500 |
| }, |
| { |
| "epoch": 14.455774589740944, |
| "grad_norm": 0.5815431475639343, |
| "learning_rate": 0.0007897972291035875, |
| "loss": 2.7539, |
| "step": 17510 |
| }, |
| { |
| "epoch": 14.464031375786975, |
| "grad_norm": 0.5773807168006897, |
| "learning_rate": 0.000789613726029911, |
| "loss": 2.7282, |
| "step": 17520 |
| }, |
| { |
| "epoch": 14.472288161833006, |
| "grad_norm": 0.561482846736908, |
| "learning_rate": 0.0007894302229562346, |
| "loss": 2.7368, |
| "step": 17530 |
| }, |
| { |
| "epoch": 14.480544947879038, |
| "grad_norm": 0.6419026255607605, |
| "learning_rate": 0.000789246719882558, |
| "loss": 2.752, |
| "step": 17540 |
| }, |
| { |
| "epoch": 14.488801733925069, |
| "grad_norm": 0.5817477107048035, |
| "learning_rate": 0.0007890632168088816, |
| "loss": 2.7464, |
| "step": 17550 |
| }, |
| { |
| "epoch": 14.497058519971102, |
| "grad_norm": 0.6521551609039307, |
| "learning_rate": 0.0007888797137352051, |
| "loss": 2.7582, |
| "step": 17560 |
| }, |
| { |
| "epoch": 14.505315306017133, |
| "grad_norm": 0.6004222631454468, |
| "learning_rate": 0.0007886962106615287, |
| "loss": 2.7429, |
| "step": 17570 |
| }, |
| { |
| "epoch": 14.513572092063164, |
| "grad_norm": 0.6220718026161194, |
| "learning_rate": 0.0007885127075878521, |
| "loss": 2.7376, |
| "step": 17580 |
| }, |
| { |
| "epoch": 14.521828878109195, |
| "grad_norm": 0.5441803336143494, |
| "learning_rate": 0.0007883292045141757, |
| "loss": 2.7418, |
| "step": 17590 |
| }, |
| { |
| "epoch": 14.530085664155228, |
| "grad_norm": 0.5832270383834839, |
| "learning_rate": 0.0007881457014404991, |
| "loss": 2.7377, |
| "step": 17600 |
| }, |
| { |
| "epoch": 14.53834245020126, |
| "grad_norm": 0.536746621131897, |
| "learning_rate": 0.0007879621983668227, |
| "loss": 2.7456, |
| "step": 17610 |
| }, |
| { |
| "epoch": 14.54659923624729, |
| "grad_norm": 0.5866507887840271, |
| "learning_rate": 0.0007877786952931461, |
| "loss": 2.7429, |
| "step": 17620 |
| }, |
| { |
| "epoch": 14.554856022293322, |
| "grad_norm": 0.5756723880767822, |
| "learning_rate": 0.0007875951922194697, |
| "loss": 2.743, |
| "step": 17630 |
| }, |
| { |
| "epoch": 14.563112808339355, |
| "grad_norm": 0.5826034545898438, |
| "learning_rate": 0.0007874116891457932, |
| "loss": 2.7452, |
| "step": 17640 |
| }, |
| { |
| "epoch": 14.571369594385386, |
| "grad_norm": 0.5977003574371338, |
| "learning_rate": 0.0007872281860721168, |
| "loss": 2.7443, |
| "step": 17650 |
| }, |
| { |
| "epoch": 14.579626380431417, |
| "grad_norm": 0.551539957523346, |
| "learning_rate": 0.0007870446829984402, |
| "loss": 2.7434, |
| "step": 17660 |
| }, |
| { |
| "epoch": 14.587883166477448, |
| "grad_norm": 0.6162058115005493, |
| "learning_rate": 0.0007868611799247638, |
| "loss": 2.734, |
| "step": 17670 |
| }, |
| { |
| "epoch": 14.59613995252348, |
| "grad_norm": 0.5811628103256226, |
| "learning_rate": 0.0007866776768510873, |
| "loss": 2.7447, |
| "step": 17680 |
| }, |
| { |
| "epoch": 14.604396738569513, |
| "grad_norm": 0.6103553771972656, |
| "learning_rate": 0.0007864941737774109, |
| "loss": 2.7472, |
| "step": 17690 |
| }, |
| { |
| "epoch": 14.612653524615544, |
| "grad_norm": 0.569419264793396, |
| "learning_rate": 0.0007863106707037343, |
| "loss": 2.7578, |
| "step": 17700 |
| }, |
| { |
| "epoch": 14.620910310661575, |
| "grad_norm": 0.6102364659309387, |
| "learning_rate": 0.0007861271676300579, |
| "loss": 2.7364, |
| "step": 17710 |
| }, |
| { |
| "epoch": 14.629167096707606, |
| "grad_norm": 0.5832472443580627, |
| "learning_rate": 0.0007859436645563814, |
| "loss": 2.7449, |
| "step": 17720 |
| }, |
| { |
| "epoch": 14.637423882753637, |
| "grad_norm": 0.5760400891304016, |
| "learning_rate": 0.0007857601614827049, |
| "loss": 2.7581, |
| "step": 17730 |
| }, |
| { |
| "epoch": 14.64568066879967, |
| "grad_norm": 0.6216306686401367, |
| "learning_rate": 0.0007855766584090283, |
| "loss": 2.732, |
| "step": 17740 |
| }, |
| { |
| "epoch": 14.653937454845702, |
| "grad_norm": 0.5639564394950867, |
| "learning_rate": 0.0007853931553353519, |
| "loss": 2.7582, |
| "step": 17750 |
| }, |
| { |
| "epoch": 14.662194240891733, |
| "grad_norm": 0.5887823700904846, |
| "learning_rate": 0.0007852096522616754, |
| "loss": 2.7492, |
| "step": 17760 |
| }, |
| { |
| "epoch": 14.670451026937764, |
| "grad_norm": 0.5743685364723206, |
| "learning_rate": 0.0007850261491879988, |
| "loss": 2.7484, |
| "step": 17770 |
| }, |
| { |
| "epoch": 14.678707812983795, |
| "grad_norm": 0.6122255921363831, |
| "learning_rate": 0.0007848426461143224, |
| "loss": 2.7326, |
| "step": 17780 |
| }, |
| { |
| "epoch": 14.686964599029828, |
| "grad_norm": 0.6089203357696533, |
| "learning_rate": 0.0007846591430406459, |
| "loss": 2.7411, |
| "step": 17790 |
| }, |
| { |
| "epoch": 14.69522138507586, |
| "grad_norm": 0.5829803347587585, |
| "learning_rate": 0.0007844756399669695, |
| "loss": 2.7406, |
| "step": 17800 |
| }, |
| { |
| "epoch": 14.70347817112189, |
| "grad_norm": 0.5928598642349243, |
| "learning_rate": 0.0007842921368932929, |
| "loss": 2.7462, |
| "step": 17810 |
| }, |
| { |
| "epoch": 14.711734957167922, |
| "grad_norm": 0.6143853664398193, |
| "learning_rate": 0.0007841086338196165, |
| "loss": 2.7446, |
| "step": 17820 |
| }, |
| { |
| "epoch": 14.719991743213955, |
| "grad_norm": 0.6457964777946472, |
| "learning_rate": 0.00078392513074594, |
| "loss": 2.7416, |
| "step": 17830 |
| }, |
| { |
| "epoch": 14.728248529259986, |
| "grad_norm": 0.6104548573493958, |
| "learning_rate": 0.0007837416276722636, |
| "loss": 2.7332, |
| "step": 17840 |
| }, |
| { |
| "epoch": 14.736505315306017, |
| "grad_norm": 0.5743314623832703, |
| "learning_rate": 0.000783558124598587, |
| "loss": 2.7459, |
| "step": 17850 |
| }, |
| { |
| "epoch": 14.744762101352048, |
| "grad_norm": 0.552040159702301, |
| "learning_rate": 0.0007833746215249105, |
| "loss": 2.747, |
| "step": 17860 |
| }, |
| { |
| "epoch": 14.753018887398081, |
| "grad_norm": 0.57485431432724, |
| "learning_rate": 0.000783191118451234, |
| "loss": 2.7543, |
| "step": 17870 |
| }, |
| { |
| "epoch": 14.761275673444112, |
| "grad_norm": 0.5415575504302979, |
| "learning_rate": 0.0007830076153775576, |
| "loss": 2.7432, |
| "step": 17880 |
| }, |
| { |
| "epoch": 14.769532459490144, |
| "grad_norm": 0.58236163854599, |
| "learning_rate": 0.000782824112303881, |
| "loss": 2.7429, |
| "step": 17890 |
| }, |
| { |
| "epoch": 14.777789245536175, |
| "grad_norm": 0.5532475709915161, |
| "learning_rate": 0.0007826406092302046, |
| "loss": 2.7298, |
| "step": 17900 |
| }, |
| { |
| "epoch": 14.786046031582206, |
| "grad_norm": 0.5620941519737244, |
| "learning_rate": 0.0007824571061565281, |
| "loss": 2.7398, |
| "step": 17910 |
| }, |
| { |
| "epoch": 14.794302817628239, |
| "grad_norm": 0.5772944688796997, |
| "learning_rate": 0.0007822736030828517, |
| "loss": 2.7326, |
| "step": 17920 |
| }, |
| { |
| "epoch": 14.80255960367427, |
| "grad_norm": 0.6066027879714966, |
| "learning_rate": 0.0007820901000091751, |
| "loss": 2.7341, |
| "step": 17930 |
| }, |
| { |
| "epoch": 14.810816389720301, |
| "grad_norm": 0.5544676184654236, |
| "learning_rate": 0.0007819065969354987, |
| "loss": 2.7498, |
| "step": 17940 |
| }, |
| { |
| "epoch": 14.819073175766333, |
| "grad_norm": 0.6160995364189148, |
| "learning_rate": 0.0007817230938618222, |
| "loss": 2.7362, |
| "step": 17950 |
| }, |
| { |
| "epoch": 14.827329961812364, |
| "grad_norm": 0.6500398516654968, |
| "learning_rate": 0.0007815395907881458, |
| "loss": 2.7412, |
| "step": 17960 |
| }, |
| { |
| "epoch": 14.835586747858397, |
| "grad_norm": 0.5683214068412781, |
| "learning_rate": 0.0007813560877144692, |
| "loss": 2.7469, |
| "step": 17970 |
| }, |
| { |
| "epoch": 14.843843533904428, |
| "grad_norm": 0.5637840032577515, |
| "learning_rate": 0.0007811725846407928, |
| "loss": 2.7378, |
| "step": 17980 |
| }, |
| { |
| "epoch": 14.852100319950459, |
| "grad_norm": 0.5927807092666626, |
| "learning_rate": 0.0007809890815671162, |
| "loss": 2.727, |
| "step": 17990 |
| }, |
| { |
| "epoch": 14.86035710599649, |
| "grad_norm": 0.611671507358551, |
| "learning_rate": 0.0007808055784934398, |
| "loss": 2.7437, |
| "step": 18000 |
| } |
| ], |
| "logging_steps": 10, |
| "max_steps": 60550, |
| "num_input_tokens_seen": 0, |
| "num_train_epochs": 50, |
| "save_steps": 1000, |
| "stateful_callbacks": { |
| "TrainerControl": { |
| "args": { |
| "should_epoch_stop": false, |
| "should_evaluate": false, |
| "should_log": false, |
| "should_save": true, |
| "should_training_stop": false |
| }, |
| "attributes": {} |
| } |
| }, |
| "total_flos": 9.215610343472333e+16, |
| "train_batch_size": 64, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|