| { | |
| "best_global_step": null, | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 0.169658177331354, | |
| "eval_steps": 500, | |
| "global_step": 1100, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.00015423470666486725, | |
| "grad_norm": 4.125617980957031, | |
| "learning_rate": 0.0, | |
| "loss": 2.7158, | |
| "step": 1 | |
| }, | |
| { | |
| "epoch": 0.0003084694133297345, | |
| "grad_norm": 4.080927848815918, | |
| "learning_rate": 5.1282051282051286e-08, | |
| "loss": 2.7259, | |
| "step": 2 | |
| }, | |
| { | |
| "epoch": 0.0004627041199946018, | |
| "grad_norm": 4.546864986419678, | |
| "learning_rate": 1.0256410256410257e-07, | |
| "loss": 2.7844, | |
| "step": 3 | |
| }, | |
| { | |
| "epoch": 0.000616938826659469, | |
| "grad_norm": 3.869814872741699, | |
| "learning_rate": 1.5384615384615387e-07, | |
| "loss": 2.7772, | |
| "step": 4 | |
| }, | |
| { | |
| "epoch": 0.0007711735333243363, | |
| "grad_norm": 3.8191957473754883, | |
| "learning_rate": 2.0512820512820514e-07, | |
| "loss": 2.669, | |
| "step": 5 | |
| }, | |
| { | |
| "epoch": 0.0009254082399892036, | |
| "grad_norm": 4.232642650604248, | |
| "learning_rate": 2.564102564102564e-07, | |
| "loss": 2.7774, | |
| "step": 6 | |
| }, | |
| { | |
| "epoch": 0.001079642946654071, | |
| "grad_norm": 4.562683582305908, | |
| "learning_rate": 3.0769230769230774e-07, | |
| "loss": 2.8335, | |
| "step": 7 | |
| }, | |
| { | |
| "epoch": 0.001233877653318938, | |
| "grad_norm": 4.024895668029785, | |
| "learning_rate": 3.5897435897435896e-07, | |
| "loss": 2.7137, | |
| "step": 8 | |
| }, | |
| { | |
| "epoch": 0.0013881123599838053, | |
| "grad_norm": 4.051122665405273, | |
| "learning_rate": 4.102564102564103e-07, | |
| "loss": 2.6775, | |
| "step": 9 | |
| }, | |
| { | |
| "epoch": 0.0015423470666486726, | |
| "grad_norm": 4.009105682373047, | |
| "learning_rate": 4.615384615384616e-07, | |
| "loss": 2.7797, | |
| "step": 10 | |
| }, | |
| { | |
| "epoch": 0.00169658177331354, | |
| "grad_norm": 3.7827749252319336, | |
| "learning_rate": 5.128205128205128e-07, | |
| "loss": 2.7706, | |
| "step": 11 | |
| }, | |
| { | |
| "epoch": 0.0018508164799784072, | |
| "grad_norm": 3.986659288406372, | |
| "learning_rate": 5.641025641025642e-07, | |
| "loss": 2.7587, | |
| "step": 12 | |
| }, | |
| { | |
| "epoch": 0.0020050511866432745, | |
| "grad_norm": 4.076365947723389, | |
| "learning_rate": 6.153846153846155e-07, | |
| "loss": 2.7406, | |
| "step": 13 | |
| }, | |
| { | |
| "epoch": 0.002159285893308142, | |
| "grad_norm": 3.7137603759765625, | |
| "learning_rate": 6.666666666666667e-07, | |
| "loss": 2.6706, | |
| "step": 14 | |
| }, | |
| { | |
| "epoch": 0.002313520599973009, | |
| "grad_norm": 3.197049140930176, | |
| "learning_rate": 7.179487179487179e-07, | |
| "loss": 2.5892, | |
| "step": 15 | |
| }, | |
| { | |
| "epoch": 0.002467755306637876, | |
| "grad_norm": 3.490769147872925, | |
| "learning_rate": 7.692307692307694e-07, | |
| "loss": 2.6735, | |
| "step": 16 | |
| }, | |
| { | |
| "epoch": 0.0026219900133027433, | |
| "grad_norm": 3.234196662902832, | |
| "learning_rate": 8.205128205128206e-07, | |
| "loss": 2.5749, | |
| "step": 17 | |
| }, | |
| { | |
| "epoch": 0.0027762247199676106, | |
| "grad_norm": 3.0578436851501465, | |
| "learning_rate": 8.717948717948718e-07, | |
| "loss": 2.5744, | |
| "step": 18 | |
| }, | |
| { | |
| "epoch": 0.002930459426632478, | |
| "grad_norm": 3.03320574760437, | |
| "learning_rate": 9.230769230769232e-07, | |
| "loss": 2.5946, | |
| "step": 19 | |
| }, | |
| { | |
| "epoch": 0.003084694133297345, | |
| "grad_norm": 1.8710768222808838, | |
| "learning_rate": 9.743589743589745e-07, | |
| "loss": 2.4013, | |
| "step": 20 | |
| }, | |
| { | |
| "epoch": 0.0032389288399622125, | |
| "grad_norm": 1.9252475500106812, | |
| "learning_rate": 1.0256410256410257e-06, | |
| "loss": 2.3882, | |
| "step": 21 | |
| }, | |
| { | |
| "epoch": 0.00339316354662708, | |
| "grad_norm": 5.356478691101074, | |
| "learning_rate": 1.076923076923077e-06, | |
| "loss": 2.4184, | |
| "step": 22 | |
| }, | |
| { | |
| "epoch": 0.003547398253291947, | |
| "grad_norm": 1.6710164546966553, | |
| "learning_rate": 1.1282051282051283e-06, | |
| "loss": 2.3395, | |
| "step": 23 | |
| }, | |
| { | |
| "epoch": 0.0037016329599568144, | |
| "grad_norm": 1.7735917568206787, | |
| "learning_rate": 1.1794871794871795e-06, | |
| "loss": 2.35, | |
| "step": 24 | |
| }, | |
| { | |
| "epoch": 0.0038558676666216817, | |
| "grad_norm": 1.9577521085739136, | |
| "learning_rate": 1.230769230769231e-06, | |
| "loss": 2.3646, | |
| "step": 25 | |
| }, | |
| { | |
| "epoch": 0.004010102373286549, | |
| "grad_norm": 1.5192971229553223, | |
| "learning_rate": 1.282051282051282e-06, | |
| "loss": 2.2818, | |
| "step": 26 | |
| }, | |
| { | |
| "epoch": 0.004164337079951416, | |
| "grad_norm": 1.4385325908660889, | |
| "learning_rate": 1.3333333333333334e-06, | |
| "loss": 2.2448, | |
| "step": 27 | |
| }, | |
| { | |
| "epoch": 0.004318571786616284, | |
| "grad_norm": 1.439254879951477, | |
| "learning_rate": 1.3846153846153848e-06, | |
| "loss": 2.1641, | |
| "step": 28 | |
| }, | |
| { | |
| "epoch": 0.0044728064932811505, | |
| "grad_norm": 1.5258543491363525, | |
| "learning_rate": 1.4358974358974359e-06, | |
| "loss": 2.2035, | |
| "step": 29 | |
| }, | |
| { | |
| "epoch": 0.004627041199946018, | |
| "grad_norm": 1.4886516332626343, | |
| "learning_rate": 1.4871794871794873e-06, | |
| "loss": 2.1415, | |
| "step": 30 | |
| }, | |
| { | |
| "epoch": 0.004781275906610885, | |
| "grad_norm": 1.3116540908813477, | |
| "learning_rate": 1.5384615384615387e-06, | |
| "loss": 2.1337, | |
| "step": 31 | |
| }, | |
| { | |
| "epoch": 0.004935510613275752, | |
| "grad_norm": 1.0612387657165527, | |
| "learning_rate": 1.5897435897435897e-06, | |
| "loss": 2.1492, | |
| "step": 32 | |
| }, | |
| { | |
| "epoch": 0.00508974531994062, | |
| "grad_norm": 1.0794328451156616, | |
| "learning_rate": 1.6410256410256412e-06, | |
| "loss": 2.1277, | |
| "step": 33 | |
| }, | |
| { | |
| "epoch": 0.0052439800266054866, | |
| "grad_norm": 1.0971394777297974, | |
| "learning_rate": 1.6923076923076926e-06, | |
| "loss": 2.0956, | |
| "step": 34 | |
| }, | |
| { | |
| "epoch": 0.005398214733270354, | |
| "grad_norm": 1.0308690071105957, | |
| "learning_rate": 1.7435897435897436e-06, | |
| "loss": 2.1688, | |
| "step": 35 | |
| }, | |
| { | |
| "epoch": 0.005552449439935221, | |
| "grad_norm": 0.8738017678260803, | |
| "learning_rate": 1.794871794871795e-06, | |
| "loss": 2.1154, | |
| "step": 36 | |
| }, | |
| { | |
| "epoch": 0.005706684146600089, | |
| "grad_norm": 0.8435266613960266, | |
| "learning_rate": 1.8461538461538465e-06, | |
| "loss": 2.1031, | |
| "step": 37 | |
| }, | |
| { | |
| "epoch": 0.005860918853264956, | |
| "grad_norm": 0.865908682346344, | |
| "learning_rate": 1.8974358974358975e-06, | |
| "loss": 2.0727, | |
| "step": 38 | |
| }, | |
| { | |
| "epoch": 0.0060151535599298235, | |
| "grad_norm": 0.8485544323921204, | |
| "learning_rate": 1.948717948717949e-06, | |
| "loss": 2.0476, | |
| "step": 39 | |
| }, | |
| { | |
| "epoch": 0.00616938826659469, | |
| "grad_norm": 0.8207165598869324, | |
| "learning_rate": 2.0000000000000003e-06, | |
| "loss": 2.0544, | |
| "step": 40 | |
| }, | |
| { | |
| "epoch": 0.006323622973259558, | |
| "grad_norm": 0.93079674243927, | |
| "learning_rate": 2.0512820512820513e-06, | |
| "loss": 2.0403, | |
| "step": 41 | |
| }, | |
| { | |
| "epoch": 0.006477857679924425, | |
| "grad_norm": 0.7712639570236206, | |
| "learning_rate": 2.1025641025641028e-06, | |
| "loss": 1.9826, | |
| "step": 42 | |
| }, | |
| { | |
| "epoch": 0.006632092386589292, | |
| "grad_norm": 0.7859727740287781, | |
| "learning_rate": 2.153846153846154e-06, | |
| "loss": 2.01, | |
| "step": 43 | |
| }, | |
| { | |
| "epoch": 0.00678632709325416, | |
| "grad_norm": 0.8241223692893982, | |
| "learning_rate": 2.2051282051282052e-06, | |
| "loss": 1.996, | |
| "step": 44 | |
| }, | |
| { | |
| "epoch": 0.0069405617999190265, | |
| "grad_norm": 0.6832298636436462, | |
| "learning_rate": 2.2564102564102566e-06, | |
| "loss": 2.0134, | |
| "step": 45 | |
| }, | |
| { | |
| "epoch": 0.007094796506583894, | |
| "grad_norm": 0.7749252915382385, | |
| "learning_rate": 2.307692307692308e-06, | |
| "loss": 2.0027, | |
| "step": 46 | |
| }, | |
| { | |
| "epoch": 0.007249031213248761, | |
| "grad_norm": 0.7955232262611389, | |
| "learning_rate": 2.358974358974359e-06, | |
| "loss": 1.9854, | |
| "step": 47 | |
| }, | |
| { | |
| "epoch": 0.007403265919913629, | |
| "grad_norm": 0.6382924914360046, | |
| "learning_rate": 2.4102564102564105e-06, | |
| "loss": 1.9807, | |
| "step": 48 | |
| }, | |
| { | |
| "epoch": 0.007557500626578496, | |
| "grad_norm": 0.6480730772018433, | |
| "learning_rate": 2.461538461538462e-06, | |
| "loss": 1.9236, | |
| "step": 49 | |
| }, | |
| { | |
| "epoch": 0.007711735333243363, | |
| "grad_norm": 0.6792906522750854, | |
| "learning_rate": 2.512820512820513e-06, | |
| "loss": 1.9633, | |
| "step": 50 | |
| }, | |
| { | |
| "epoch": 0.007865970039908231, | |
| "grad_norm": 0.6324858665466309, | |
| "learning_rate": 2.564102564102564e-06, | |
| "loss": 1.9083, | |
| "step": 51 | |
| }, | |
| { | |
| "epoch": 0.008020204746573098, | |
| "grad_norm": 0.6173593997955322, | |
| "learning_rate": 2.615384615384616e-06, | |
| "loss": 1.9377, | |
| "step": 52 | |
| }, | |
| { | |
| "epoch": 0.008174439453237965, | |
| "grad_norm": 0.6734508275985718, | |
| "learning_rate": 2.666666666666667e-06, | |
| "loss": 1.8874, | |
| "step": 53 | |
| }, | |
| { | |
| "epoch": 0.008328674159902832, | |
| "grad_norm": 0.6378979086875916, | |
| "learning_rate": 2.717948717948718e-06, | |
| "loss": 1.9492, | |
| "step": 54 | |
| }, | |
| { | |
| "epoch": 0.008482908866567699, | |
| "grad_norm": 0.6195733547210693, | |
| "learning_rate": 2.7692307692307697e-06, | |
| "loss": 1.9294, | |
| "step": 55 | |
| }, | |
| { | |
| "epoch": 0.008637143573232567, | |
| "grad_norm": 0.6716891527175903, | |
| "learning_rate": 2.8205128205128207e-06, | |
| "loss": 1.8872, | |
| "step": 56 | |
| }, | |
| { | |
| "epoch": 0.008791378279897434, | |
| "grad_norm": 0.6091171503067017, | |
| "learning_rate": 2.8717948717948717e-06, | |
| "loss": 1.9162, | |
| "step": 57 | |
| }, | |
| { | |
| "epoch": 0.008945612986562301, | |
| "grad_norm": 0.6023731231689453, | |
| "learning_rate": 2.9230769230769236e-06, | |
| "loss": 1.8666, | |
| "step": 58 | |
| }, | |
| { | |
| "epoch": 0.009099847693227168, | |
| "grad_norm": 0.5978520512580872, | |
| "learning_rate": 2.9743589743589746e-06, | |
| "loss": 1.9221, | |
| "step": 59 | |
| }, | |
| { | |
| "epoch": 0.009254082399892036, | |
| "grad_norm": 0.6359299421310425, | |
| "learning_rate": 3.0256410256410256e-06, | |
| "loss": 1.8786, | |
| "step": 60 | |
| }, | |
| { | |
| "epoch": 0.009408317106556903, | |
| "grad_norm": 0.5901927947998047, | |
| "learning_rate": 3.0769230769230774e-06, | |
| "loss": 1.8436, | |
| "step": 61 | |
| }, | |
| { | |
| "epoch": 0.00956255181322177, | |
| "grad_norm": 0.5627117156982422, | |
| "learning_rate": 3.1282051282051284e-06, | |
| "loss": 1.866, | |
| "step": 62 | |
| }, | |
| { | |
| "epoch": 0.009716786519886637, | |
| "grad_norm": 0.6476024389266968, | |
| "learning_rate": 3.1794871794871795e-06, | |
| "loss": 1.8644, | |
| "step": 63 | |
| }, | |
| { | |
| "epoch": 0.009871021226551504, | |
| "grad_norm": 0.5732126832008362, | |
| "learning_rate": 3.2307692307692313e-06, | |
| "loss": 1.7929, | |
| "step": 64 | |
| }, | |
| { | |
| "epoch": 0.010025255933216373, | |
| "grad_norm": 0.5641695857048035, | |
| "learning_rate": 3.2820512820512823e-06, | |
| "loss": 1.827, | |
| "step": 65 | |
| }, | |
| { | |
| "epoch": 0.01017949063988124, | |
| "grad_norm": 0.6193923950195312, | |
| "learning_rate": 3.3333333333333333e-06, | |
| "loss": 1.8669, | |
| "step": 66 | |
| }, | |
| { | |
| "epoch": 0.010333725346546106, | |
| "grad_norm": 0.5760056376457214, | |
| "learning_rate": 3.384615384615385e-06, | |
| "loss": 1.847, | |
| "step": 67 | |
| }, | |
| { | |
| "epoch": 0.010487960053210973, | |
| "grad_norm": 0.585292637348175, | |
| "learning_rate": 3.435897435897436e-06, | |
| "loss": 1.8246, | |
| "step": 68 | |
| }, | |
| { | |
| "epoch": 0.010642194759875842, | |
| "grad_norm": 0.6549813151359558, | |
| "learning_rate": 3.487179487179487e-06, | |
| "loss": 1.8534, | |
| "step": 69 | |
| }, | |
| { | |
| "epoch": 0.010796429466540709, | |
| "grad_norm": 0.5588355660438538, | |
| "learning_rate": 3.538461538461539e-06, | |
| "loss": 1.8266, | |
| "step": 70 | |
| }, | |
| { | |
| "epoch": 0.010950664173205575, | |
| "grad_norm": 0.5905765891075134, | |
| "learning_rate": 3.58974358974359e-06, | |
| "loss": 1.8236, | |
| "step": 71 | |
| }, | |
| { | |
| "epoch": 0.011104898879870442, | |
| "grad_norm": 0.6020631790161133, | |
| "learning_rate": 3.641025641025641e-06, | |
| "loss": 1.8284, | |
| "step": 72 | |
| }, | |
| { | |
| "epoch": 0.011259133586535311, | |
| "grad_norm": 0.5250481367111206, | |
| "learning_rate": 3.692307692307693e-06, | |
| "loss": 1.7922, | |
| "step": 73 | |
| }, | |
| { | |
| "epoch": 0.011413368293200178, | |
| "grad_norm": 0.5569522976875305, | |
| "learning_rate": 3.743589743589744e-06, | |
| "loss": 1.785, | |
| "step": 74 | |
| }, | |
| { | |
| "epoch": 0.011567602999865045, | |
| "grad_norm": 0.5544557571411133, | |
| "learning_rate": 3.794871794871795e-06, | |
| "loss": 1.7892, | |
| "step": 75 | |
| }, | |
| { | |
| "epoch": 0.011721837706529912, | |
| "grad_norm": 0.6039264798164368, | |
| "learning_rate": 3.846153846153847e-06, | |
| "loss": 1.7843, | |
| "step": 76 | |
| }, | |
| { | |
| "epoch": 0.011876072413194778, | |
| "grad_norm": 0.5743536949157715, | |
| "learning_rate": 3.897435897435898e-06, | |
| "loss": 1.757, | |
| "step": 77 | |
| }, | |
| { | |
| "epoch": 0.012030307119859647, | |
| "grad_norm": 0.5441644787788391, | |
| "learning_rate": 3.948717948717949e-06, | |
| "loss": 1.755, | |
| "step": 78 | |
| }, | |
| { | |
| "epoch": 0.012184541826524514, | |
| "grad_norm": 0.5642723441123962, | |
| "learning_rate": 4.000000000000001e-06, | |
| "loss": 1.7981, | |
| "step": 79 | |
| }, | |
| { | |
| "epoch": 0.01233877653318938, | |
| "grad_norm": 0.563156008720398, | |
| "learning_rate": 4.051282051282052e-06, | |
| "loss": 1.7576, | |
| "step": 80 | |
| }, | |
| { | |
| "epoch": 0.012493011239854248, | |
| "grad_norm": 0.5854681730270386, | |
| "learning_rate": 4.102564102564103e-06, | |
| "loss": 1.7968, | |
| "step": 81 | |
| }, | |
| { | |
| "epoch": 0.012647245946519116, | |
| "grad_norm": 0.5472329258918762, | |
| "learning_rate": 4.1538461538461545e-06, | |
| "loss": 1.7854, | |
| "step": 82 | |
| }, | |
| { | |
| "epoch": 0.012801480653183983, | |
| "grad_norm": 0.5131881833076477, | |
| "learning_rate": 4.2051282051282055e-06, | |
| "loss": 1.7551, | |
| "step": 83 | |
| }, | |
| { | |
| "epoch": 0.01295571535984885, | |
| "grad_norm": 0.5052653551101685, | |
| "learning_rate": 4.2564102564102566e-06, | |
| "loss": 1.7032, | |
| "step": 84 | |
| }, | |
| { | |
| "epoch": 0.013109950066513717, | |
| "grad_norm": 0.5798134207725525, | |
| "learning_rate": 4.307692307692308e-06, | |
| "loss": 1.7943, | |
| "step": 85 | |
| }, | |
| { | |
| "epoch": 0.013264184773178584, | |
| "grad_norm": 0.5180870294570923, | |
| "learning_rate": 4.358974358974359e-06, | |
| "loss": 1.687, | |
| "step": 86 | |
| }, | |
| { | |
| "epoch": 0.013418419479843452, | |
| "grad_norm": 0.49474793672561646, | |
| "learning_rate": 4.4102564102564104e-06, | |
| "loss": 1.7606, | |
| "step": 87 | |
| }, | |
| { | |
| "epoch": 0.01357265418650832, | |
| "grad_norm": 0.572761595249176, | |
| "learning_rate": 4.461538461538462e-06, | |
| "loss": 1.8037, | |
| "step": 88 | |
| }, | |
| { | |
| "epoch": 0.013726888893173186, | |
| "grad_norm": 0.5462682247161865, | |
| "learning_rate": 4.512820512820513e-06, | |
| "loss": 1.7548, | |
| "step": 89 | |
| }, | |
| { | |
| "epoch": 0.013881123599838053, | |
| "grad_norm": 0.5709042549133301, | |
| "learning_rate": 4.564102564102564e-06, | |
| "loss": 1.7579, | |
| "step": 90 | |
| }, | |
| { | |
| "epoch": 0.014035358306502922, | |
| "grad_norm": 0.553099513053894, | |
| "learning_rate": 4.615384615384616e-06, | |
| "loss": 1.7568, | |
| "step": 91 | |
| }, | |
| { | |
| "epoch": 0.014189593013167788, | |
| "grad_norm": 0.5180365443229675, | |
| "learning_rate": 4.666666666666667e-06, | |
| "loss": 1.7567, | |
| "step": 92 | |
| }, | |
| { | |
| "epoch": 0.014343827719832655, | |
| "grad_norm": 0.5820419192314148, | |
| "learning_rate": 4.717948717948718e-06, | |
| "loss": 1.7418, | |
| "step": 93 | |
| }, | |
| { | |
| "epoch": 0.014498062426497522, | |
| "grad_norm": 0.5648301243782043, | |
| "learning_rate": 4.76923076923077e-06, | |
| "loss": 1.6861, | |
| "step": 94 | |
| }, | |
| { | |
| "epoch": 0.01465229713316239, | |
| "grad_norm": 0.49814391136169434, | |
| "learning_rate": 4.820512820512821e-06, | |
| "loss": 1.7415, | |
| "step": 95 | |
| }, | |
| { | |
| "epoch": 0.014806531839827258, | |
| "grad_norm": 0.5138998031616211, | |
| "learning_rate": 4.871794871794872e-06, | |
| "loss": 1.7127, | |
| "step": 96 | |
| }, | |
| { | |
| "epoch": 0.014960766546492124, | |
| "grad_norm": 0.6080782413482666, | |
| "learning_rate": 4.923076923076924e-06, | |
| "loss": 1.7281, | |
| "step": 97 | |
| }, | |
| { | |
| "epoch": 0.015115001253156991, | |
| "grad_norm": 0.5199087858200073, | |
| "learning_rate": 4.974358974358975e-06, | |
| "loss": 1.7136, | |
| "step": 98 | |
| }, | |
| { | |
| "epoch": 0.015269235959821858, | |
| "grad_norm": 0.5357125997543335, | |
| "learning_rate": 5.025641025641026e-06, | |
| "loss": 1.7344, | |
| "step": 99 | |
| }, | |
| { | |
| "epoch": 0.015423470666486727, | |
| "grad_norm": 0.547616183757782, | |
| "learning_rate": 5.076923076923077e-06, | |
| "loss": 1.7265, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 0.015577705373151594, | |
| "grad_norm": 0.5598102807998657, | |
| "learning_rate": 5.128205128205128e-06, | |
| "loss": 1.7278, | |
| "step": 101 | |
| }, | |
| { | |
| "epoch": 0.015731940079816462, | |
| "grad_norm": 0.534231960773468, | |
| "learning_rate": 5.179487179487181e-06, | |
| "loss": 1.6925, | |
| "step": 102 | |
| }, | |
| { | |
| "epoch": 0.015886174786481327, | |
| "grad_norm": 0.5571257472038269, | |
| "learning_rate": 5.230769230769232e-06, | |
| "loss": 1.6954, | |
| "step": 103 | |
| }, | |
| { | |
| "epoch": 0.016040409493146196, | |
| "grad_norm": 0.5430976152420044, | |
| "learning_rate": 5.282051282051283e-06, | |
| "loss": 1.7232, | |
| "step": 104 | |
| }, | |
| { | |
| "epoch": 0.01619464419981106, | |
| "grad_norm": 0.5642494559288025, | |
| "learning_rate": 5.333333333333334e-06, | |
| "loss": 1.688, | |
| "step": 105 | |
| }, | |
| { | |
| "epoch": 0.01634887890647593, | |
| "grad_norm": 0.5086437463760376, | |
| "learning_rate": 5.384615384615385e-06, | |
| "loss": 1.6922, | |
| "step": 106 | |
| }, | |
| { | |
| "epoch": 0.0165031136131408, | |
| "grad_norm": 0.6055141687393188, | |
| "learning_rate": 5.435897435897436e-06, | |
| "loss": 1.7557, | |
| "step": 107 | |
| }, | |
| { | |
| "epoch": 0.016657348319805663, | |
| "grad_norm": 0.5747716426849365, | |
| "learning_rate": 5.487179487179488e-06, | |
| "loss": 1.6945, | |
| "step": 108 | |
| }, | |
| { | |
| "epoch": 0.016811583026470532, | |
| "grad_norm": 0.5839638113975525, | |
| "learning_rate": 5.538461538461539e-06, | |
| "loss": 1.6987, | |
| "step": 109 | |
| }, | |
| { | |
| "epoch": 0.016965817733135397, | |
| "grad_norm": 0.6069226861000061, | |
| "learning_rate": 5.58974358974359e-06, | |
| "loss": 1.6843, | |
| "step": 110 | |
| }, | |
| { | |
| "epoch": 0.017120052439800266, | |
| "grad_norm": 0.5975008010864258, | |
| "learning_rate": 5.641025641025641e-06, | |
| "loss": 1.6918, | |
| "step": 111 | |
| }, | |
| { | |
| "epoch": 0.017274287146465134, | |
| "grad_norm": 0.5783462524414062, | |
| "learning_rate": 5.692307692307692e-06, | |
| "loss": 1.6752, | |
| "step": 112 | |
| }, | |
| { | |
| "epoch": 0.01742852185313, | |
| "grad_norm": 0.5778764486312866, | |
| "learning_rate": 5.743589743589743e-06, | |
| "loss": 1.7039, | |
| "step": 113 | |
| }, | |
| { | |
| "epoch": 0.017582756559794868, | |
| "grad_norm": 0.5791458487510681, | |
| "learning_rate": 5.794871794871796e-06, | |
| "loss": 1.6676, | |
| "step": 114 | |
| }, | |
| { | |
| "epoch": 0.017736991266459737, | |
| "grad_norm": 0.577421247959137, | |
| "learning_rate": 5.846153846153847e-06, | |
| "loss": 1.6754, | |
| "step": 115 | |
| }, | |
| { | |
| "epoch": 0.017891225973124602, | |
| "grad_norm": 0.6224790811538696, | |
| "learning_rate": 5.897435897435898e-06, | |
| "loss": 1.6874, | |
| "step": 116 | |
| }, | |
| { | |
| "epoch": 0.01804546067978947, | |
| "grad_norm": 0.5089656114578247, | |
| "learning_rate": 5.948717948717949e-06, | |
| "loss": 1.6383, | |
| "step": 117 | |
| }, | |
| { | |
| "epoch": 0.018199695386454336, | |
| "grad_norm": 0.5308560729026794, | |
| "learning_rate": 6e-06, | |
| "loss": 1.6398, | |
| "step": 118 | |
| }, | |
| { | |
| "epoch": 0.018353930093119204, | |
| "grad_norm": 0.630492627620697, | |
| "learning_rate": 6.051282051282051e-06, | |
| "loss": 1.6644, | |
| "step": 119 | |
| }, | |
| { | |
| "epoch": 0.018508164799784073, | |
| "grad_norm": 0.5221322774887085, | |
| "learning_rate": 6.102564102564104e-06, | |
| "loss": 1.6541, | |
| "step": 120 | |
| }, | |
| { | |
| "epoch": 0.018662399506448938, | |
| "grad_norm": 0.5628217458724976, | |
| "learning_rate": 6.153846153846155e-06, | |
| "loss": 1.596, | |
| "step": 121 | |
| }, | |
| { | |
| "epoch": 0.018816634213113807, | |
| "grad_norm": 0.5598528385162354, | |
| "learning_rate": 6.205128205128206e-06, | |
| "loss": 1.6402, | |
| "step": 122 | |
| }, | |
| { | |
| "epoch": 0.018970868919778672, | |
| "grad_norm": 0.5308383107185364, | |
| "learning_rate": 6.256410256410257e-06, | |
| "loss": 1.6523, | |
| "step": 123 | |
| }, | |
| { | |
| "epoch": 0.01912510362644354, | |
| "grad_norm": 0.5692138075828552, | |
| "learning_rate": 6.307692307692308e-06, | |
| "loss": 1.6359, | |
| "step": 124 | |
| }, | |
| { | |
| "epoch": 0.01927933833310841, | |
| "grad_norm": 0.6486672163009644, | |
| "learning_rate": 6.358974358974359e-06, | |
| "loss": 1.6528, | |
| "step": 125 | |
| }, | |
| { | |
| "epoch": 0.019433573039773274, | |
| "grad_norm": 0.5242540240287781, | |
| "learning_rate": 6.410256410256412e-06, | |
| "loss": 1.6354, | |
| "step": 126 | |
| }, | |
| { | |
| "epoch": 0.019587807746438143, | |
| "grad_norm": 0.5515648722648621, | |
| "learning_rate": 6.461538461538463e-06, | |
| "loss": 1.6188, | |
| "step": 127 | |
| }, | |
| { | |
| "epoch": 0.019742042453103008, | |
| "grad_norm": 0.5765663981437683, | |
| "learning_rate": 6.512820512820514e-06, | |
| "loss": 1.6153, | |
| "step": 128 | |
| }, | |
| { | |
| "epoch": 0.019896277159767876, | |
| "grad_norm": 0.5636539459228516, | |
| "learning_rate": 6.564102564102565e-06, | |
| "loss": 1.6604, | |
| "step": 129 | |
| }, | |
| { | |
| "epoch": 0.020050511866432745, | |
| "grad_norm": 0.576707661151886, | |
| "learning_rate": 6.615384615384616e-06, | |
| "loss": 1.599, | |
| "step": 130 | |
| }, | |
| { | |
| "epoch": 0.02020474657309761, | |
| "grad_norm": 0.544995129108429, | |
| "learning_rate": 6.666666666666667e-06, | |
| "loss": 1.5985, | |
| "step": 131 | |
| }, | |
| { | |
| "epoch": 0.02035898127976248, | |
| "grad_norm": 0.545677125453949, | |
| "learning_rate": 6.717948717948718e-06, | |
| "loss": 1.6248, | |
| "step": 132 | |
| }, | |
| { | |
| "epoch": 0.020513215986427347, | |
| "grad_norm": 0.5226114988327026, | |
| "learning_rate": 6.76923076923077e-06, | |
| "loss": 1.6214, | |
| "step": 133 | |
| }, | |
| { | |
| "epoch": 0.020667450693092212, | |
| "grad_norm": 0.5579204559326172, | |
| "learning_rate": 6.820512820512821e-06, | |
| "loss": 1.5938, | |
| "step": 134 | |
| }, | |
| { | |
| "epoch": 0.02082168539975708, | |
| "grad_norm": 0.5821995735168457, | |
| "learning_rate": 6.871794871794872e-06, | |
| "loss": 1.6156, | |
| "step": 135 | |
| }, | |
| { | |
| "epoch": 0.020975920106421946, | |
| "grad_norm": 0.5184845924377441, | |
| "learning_rate": 6.923076923076923e-06, | |
| "loss": 1.602, | |
| "step": 136 | |
| }, | |
| { | |
| "epoch": 0.021130154813086815, | |
| "grad_norm": 0.5434860587120056, | |
| "learning_rate": 6.974358974358974e-06, | |
| "loss": 1.5739, | |
| "step": 137 | |
| }, | |
| { | |
| "epoch": 0.021284389519751683, | |
| "grad_norm": 0.5877484083175659, | |
| "learning_rate": 7.025641025641025e-06, | |
| "loss": 1.6584, | |
| "step": 138 | |
| }, | |
| { | |
| "epoch": 0.02143862422641655, | |
| "grad_norm": 0.5218464732170105, | |
| "learning_rate": 7.076923076923078e-06, | |
| "loss": 1.5989, | |
| "step": 139 | |
| }, | |
| { | |
| "epoch": 0.021592858933081417, | |
| "grad_norm": 0.5589679479598999, | |
| "learning_rate": 7.128205128205129e-06, | |
| "loss": 1.5849, | |
| "step": 140 | |
| }, | |
| { | |
| "epoch": 0.021747093639746282, | |
| "grad_norm": 0.5412963628768921, | |
| "learning_rate": 7.17948717948718e-06, | |
| "loss": 1.6026, | |
| "step": 141 | |
| }, | |
| { | |
| "epoch": 0.02190132834641115, | |
| "grad_norm": 0.5312784910202026, | |
| "learning_rate": 7.230769230769231e-06, | |
| "loss": 1.6181, | |
| "step": 142 | |
| }, | |
| { | |
| "epoch": 0.02205556305307602, | |
| "grad_norm": 0.5265107750892639, | |
| "learning_rate": 7.282051282051282e-06, | |
| "loss": 1.6133, | |
| "step": 143 | |
| }, | |
| { | |
| "epoch": 0.022209797759740885, | |
| "grad_norm": 0.61285799741745, | |
| "learning_rate": 7.333333333333333e-06, | |
| "loss": 1.5933, | |
| "step": 144 | |
| }, | |
| { | |
| "epoch": 0.022364032466405753, | |
| "grad_norm": 0.5416054725646973, | |
| "learning_rate": 7.384615384615386e-06, | |
| "loss": 1.5919, | |
| "step": 145 | |
| }, | |
| { | |
| "epoch": 0.022518267173070622, | |
| "grad_norm": 0.5560144782066345, | |
| "learning_rate": 7.435897435897437e-06, | |
| "loss": 1.5417, | |
| "step": 146 | |
| }, | |
| { | |
| "epoch": 0.022672501879735487, | |
| "grad_norm": 0.5515575408935547, | |
| "learning_rate": 7.487179487179488e-06, | |
| "loss": 1.6038, | |
| "step": 147 | |
| }, | |
| { | |
| "epoch": 0.022826736586400356, | |
| "grad_norm": 0.5019623041152954, | |
| "learning_rate": 7.538461538461539e-06, | |
| "loss": 1.5825, | |
| "step": 148 | |
| }, | |
| { | |
| "epoch": 0.02298097129306522, | |
| "grad_norm": 0.4966740310192108, | |
| "learning_rate": 7.58974358974359e-06, | |
| "loss": 1.5658, | |
| "step": 149 | |
| }, | |
| { | |
| "epoch": 0.02313520599973009, | |
| "grad_norm": 0.4935978353023529, | |
| "learning_rate": 7.641025641025641e-06, | |
| "loss": 1.5489, | |
| "step": 150 | |
| }, | |
| { | |
| "epoch": 0.023289440706394958, | |
| "grad_norm": 0.5220585465431213, | |
| "learning_rate": 7.692307692307694e-06, | |
| "loss": 1.5868, | |
| "step": 151 | |
| }, | |
| { | |
| "epoch": 0.023443675413059823, | |
| "grad_norm": 0.5111873745918274, | |
| "learning_rate": 7.743589743589745e-06, | |
| "loss": 1.5168, | |
| "step": 152 | |
| }, | |
| { | |
| "epoch": 0.02359791011972469, | |
| "grad_norm": 0.5328781604766846, | |
| "learning_rate": 7.794871794871796e-06, | |
| "loss": 1.5689, | |
| "step": 153 | |
| }, | |
| { | |
| "epoch": 0.023752144826389557, | |
| "grad_norm": 0.5277527570724487, | |
| "learning_rate": 7.846153846153847e-06, | |
| "loss": 1.5745, | |
| "step": 154 | |
| }, | |
| { | |
| "epoch": 0.023906379533054425, | |
| "grad_norm": 0.47298046946525574, | |
| "learning_rate": 7.897435897435898e-06, | |
| "loss": 1.5584, | |
| "step": 155 | |
| }, | |
| { | |
| "epoch": 0.024060614239719294, | |
| "grad_norm": 0.5444458723068237, | |
| "learning_rate": 7.948717948717949e-06, | |
| "loss": 1.5624, | |
| "step": 156 | |
| }, | |
| { | |
| "epoch": 0.02421484894638416, | |
| "grad_norm": 0.5083262920379639, | |
| "learning_rate": 8.000000000000001e-06, | |
| "loss": 1.6258, | |
| "step": 157 | |
| }, | |
| { | |
| "epoch": 0.024369083653049028, | |
| "grad_norm": 0.5201572179794312, | |
| "learning_rate": 8.051282051282052e-06, | |
| "loss": 1.6051, | |
| "step": 158 | |
| }, | |
| { | |
| "epoch": 0.024523318359713893, | |
| "grad_norm": 0.5222218632698059, | |
| "learning_rate": 8.102564102564103e-06, | |
| "loss": 1.5448, | |
| "step": 159 | |
| }, | |
| { | |
| "epoch": 0.02467755306637876, | |
| "grad_norm": 0.4937100410461426, | |
| "learning_rate": 8.153846153846154e-06, | |
| "loss": 1.5736, | |
| "step": 160 | |
| }, | |
| { | |
| "epoch": 0.02483178777304363, | |
| "grad_norm": 0.511566162109375, | |
| "learning_rate": 8.205128205128205e-06, | |
| "loss": 1.5593, | |
| "step": 161 | |
| }, | |
| { | |
| "epoch": 0.024986022479708495, | |
| "grad_norm": 0.5414931178092957, | |
| "learning_rate": 8.256410256410256e-06, | |
| "loss": 1.5372, | |
| "step": 162 | |
| }, | |
| { | |
| "epoch": 0.025140257186373364, | |
| "grad_norm": 0.5102213025093079, | |
| "learning_rate": 8.307692307692309e-06, | |
| "loss": 1.5652, | |
| "step": 163 | |
| }, | |
| { | |
| "epoch": 0.025294491893038232, | |
| "grad_norm": 0.49184349179267883, | |
| "learning_rate": 8.35897435897436e-06, | |
| "loss": 1.5025, | |
| "step": 164 | |
| }, | |
| { | |
| "epoch": 0.025448726599703098, | |
| "grad_norm": 0.5266595482826233, | |
| "learning_rate": 8.410256410256411e-06, | |
| "loss": 1.5522, | |
| "step": 165 | |
| }, | |
| { | |
| "epoch": 0.025602961306367966, | |
| "grad_norm": 0.5281389951705933, | |
| "learning_rate": 8.461538461538462e-06, | |
| "loss": 1.5447, | |
| "step": 166 | |
| }, | |
| { | |
| "epoch": 0.02575719601303283, | |
| "grad_norm": 0.5073181390762329, | |
| "learning_rate": 8.512820512820513e-06, | |
| "loss": 1.5365, | |
| "step": 167 | |
| }, | |
| { | |
| "epoch": 0.0259114307196977, | |
| "grad_norm": 0.5095680356025696, | |
| "learning_rate": 8.564102564102564e-06, | |
| "loss": 1.543, | |
| "step": 168 | |
| }, | |
| { | |
| "epoch": 0.02606566542636257, | |
| "grad_norm": 0.5605200529098511, | |
| "learning_rate": 8.615384615384617e-06, | |
| "loss": 1.5694, | |
| "step": 169 | |
| }, | |
| { | |
| "epoch": 0.026219900133027434, | |
| "grad_norm": 0.4923345744609833, | |
| "learning_rate": 8.666666666666668e-06, | |
| "loss": 1.5625, | |
| "step": 170 | |
| }, | |
| { | |
| "epoch": 0.026374134839692302, | |
| "grad_norm": 0.520775318145752, | |
| "learning_rate": 8.717948717948719e-06, | |
| "loss": 1.549, | |
| "step": 171 | |
| }, | |
| { | |
| "epoch": 0.026528369546357167, | |
| "grad_norm": 0.5525628924369812, | |
| "learning_rate": 8.76923076923077e-06, | |
| "loss": 1.5293, | |
| "step": 172 | |
| }, | |
| { | |
| "epoch": 0.026682604253022036, | |
| "grad_norm": 0.4697412848472595, | |
| "learning_rate": 8.820512820512821e-06, | |
| "loss": 1.5181, | |
| "step": 173 | |
| }, | |
| { | |
| "epoch": 0.026836838959686905, | |
| "grad_norm": 0.5007003545761108, | |
| "learning_rate": 8.871794871794872e-06, | |
| "loss": 1.5086, | |
| "step": 174 | |
| }, | |
| { | |
| "epoch": 0.02699107366635177, | |
| "grad_norm": 0.544914960861206, | |
| "learning_rate": 8.923076923076925e-06, | |
| "loss": 1.4978, | |
| "step": 175 | |
| }, | |
| { | |
| "epoch": 0.02714530837301664, | |
| "grad_norm": 0.5071407556533813, | |
| "learning_rate": 8.974358974358976e-06, | |
| "loss": 1.5403, | |
| "step": 176 | |
| }, | |
| { | |
| "epoch": 0.027299543079681507, | |
| "grad_norm": 0.49474087357521057, | |
| "learning_rate": 9.025641025641027e-06, | |
| "loss": 1.5416, | |
| "step": 177 | |
| }, | |
| { | |
| "epoch": 0.027453777786346372, | |
| "grad_norm": 0.5103272199630737, | |
| "learning_rate": 9.076923076923078e-06, | |
| "loss": 1.5159, | |
| "step": 178 | |
| }, | |
| { | |
| "epoch": 0.02760801249301124, | |
| "grad_norm": 0.5085631608963013, | |
| "learning_rate": 9.128205128205129e-06, | |
| "loss": 1.5361, | |
| "step": 179 | |
| }, | |
| { | |
| "epoch": 0.027762247199676106, | |
| "grad_norm": 0.5117743015289307, | |
| "learning_rate": 9.17948717948718e-06, | |
| "loss": 1.5114, | |
| "step": 180 | |
| }, | |
| { | |
| "epoch": 0.027916481906340974, | |
| "grad_norm": 0.5602802038192749, | |
| "learning_rate": 9.230769230769232e-06, | |
| "loss": 1.5534, | |
| "step": 181 | |
| }, | |
| { | |
| "epoch": 0.028070716613005843, | |
| "grad_norm": 0.509979784488678, | |
| "learning_rate": 9.282051282051283e-06, | |
| "loss": 1.5526, | |
| "step": 182 | |
| }, | |
| { | |
| "epoch": 0.028224951319670708, | |
| "grad_norm": 0.4802814722061157, | |
| "learning_rate": 9.333333333333334e-06, | |
| "loss": 1.5043, | |
| "step": 183 | |
| }, | |
| { | |
| "epoch": 0.028379186026335577, | |
| "grad_norm": 0.511145293712616, | |
| "learning_rate": 9.384615384615385e-06, | |
| "loss": 1.5254, | |
| "step": 184 | |
| }, | |
| { | |
| "epoch": 0.028533420733000442, | |
| "grad_norm": 0.5523600578308105, | |
| "learning_rate": 9.435897435897436e-06, | |
| "loss": 1.5476, | |
| "step": 185 | |
| }, | |
| { | |
| "epoch": 0.02868765543966531, | |
| "grad_norm": 0.5209853649139404, | |
| "learning_rate": 9.487179487179487e-06, | |
| "loss": 1.5132, | |
| "step": 186 | |
| }, | |
| { | |
| "epoch": 0.02884189014633018, | |
| "grad_norm": 0.5149086117744446, | |
| "learning_rate": 9.53846153846154e-06, | |
| "loss": 1.5123, | |
| "step": 187 | |
| }, | |
| { | |
| "epoch": 0.028996124852995044, | |
| "grad_norm": 0.5762576460838318, | |
| "learning_rate": 9.589743589743591e-06, | |
| "loss": 1.5759, | |
| "step": 188 | |
| }, | |
| { | |
| "epoch": 0.029150359559659913, | |
| "grad_norm": 0.47409066557884216, | |
| "learning_rate": 9.641025641025642e-06, | |
| "loss": 1.5326, | |
| "step": 189 | |
| }, | |
| { | |
| "epoch": 0.02930459426632478, | |
| "grad_norm": 0.466664582490921, | |
| "learning_rate": 9.692307692307693e-06, | |
| "loss": 1.5393, | |
| "step": 190 | |
| }, | |
| { | |
| "epoch": 0.029458828972989647, | |
| "grad_norm": 0.5279547572135925, | |
| "learning_rate": 9.743589743589744e-06, | |
| "loss": 1.5227, | |
| "step": 191 | |
| }, | |
| { | |
| "epoch": 0.029613063679654515, | |
| "grad_norm": 0.486102819442749, | |
| "learning_rate": 9.794871794871795e-06, | |
| "loss": 1.5195, | |
| "step": 192 | |
| }, | |
| { | |
| "epoch": 0.02976729838631938, | |
| "grad_norm": 0.4880712926387787, | |
| "learning_rate": 9.846153846153848e-06, | |
| "loss": 1.4996, | |
| "step": 193 | |
| }, | |
| { | |
| "epoch": 0.02992153309298425, | |
| "grad_norm": 0.5420333743095398, | |
| "learning_rate": 9.897435897435899e-06, | |
| "loss": 1.5643, | |
| "step": 194 | |
| }, | |
| { | |
| "epoch": 0.030075767799649118, | |
| "grad_norm": 0.48395177721977234, | |
| "learning_rate": 9.94871794871795e-06, | |
| "loss": 1.5478, | |
| "step": 195 | |
| }, | |
| { | |
| "epoch": 0.030230002506313983, | |
| "grad_norm": 0.5014393329620361, | |
| "learning_rate": 1e-05, | |
| "loss": 1.4718, | |
| "step": 196 | |
| }, | |
| { | |
| "epoch": 0.03038423721297885, | |
| "grad_norm": 0.4976564645767212, | |
| "learning_rate": 9.999999376155205e-06, | |
| "loss": 1.5321, | |
| "step": 197 | |
| }, | |
| { | |
| "epoch": 0.030538471919643716, | |
| "grad_norm": 0.46849575638771057, | |
| "learning_rate": 9.999997504620979e-06, | |
| "loss": 1.5309, | |
| "step": 198 | |
| }, | |
| { | |
| "epoch": 0.030692706626308585, | |
| "grad_norm": 0.5039035081863403, | |
| "learning_rate": 9.999994385397785e-06, | |
| "loss": 1.4752, | |
| "step": 199 | |
| }, | |
| { | |
| "epoch": 0.030846941332973454, | |
| "grad_norm": 0.49510613083839417, | |
| "learning_rate": 9.999990018486403e-06, | |
| "loss": 1.4921, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 0.03100117603963832, | |
| "grad_norm": 0.49737676978111267, | |
| "learning_rate": 9.999984403887922e-06, | |
| "loss": 1.501, | |
| "step": 201 | |
| }, | |
| { | |
| "epoch": 0.031155410746303187, | |
| "grad_norm": 0.4791927933692932, | |
| "learning_rate": 9.999977541603745e-06, | |
| "loss": 1.4581, | |
| "step": 202 | |
| }, | |
| { | |
| "epoch": 0.031309645452968056, | |
| "grad_norm": 0.46148934960365295, | |
| "learning_rate": 9.999969431635584e-06, | |
| "loss": 1.5367, | |
| "step": 203 | |
| }, | |
| { | |
| "epoch": 0.031463880159632925, | |
| "grad_norm": 0.5311307907104492, | |
| "learning_rate": 9.99996007398546e-06, | |
| "loss": 1.5426, | |
| "step": 204 | |
| }, | |
| { | |
| "epoch": 0.031618114866297786, | |
| "grad_norm": 0.46260470151901245, | |
| "learning_rate": 9.999949468655711e-06, | |
| "loss": 1.5089, | |
| "step": 205 | |
| }, | |
| { | |
| "epoch": 0.031772349572962655, | |
| "grad_norm": 0.46585148572921753, | |
| "learning_rate": 9.999937615648983e-06, | |
| "loss": 1.5111, | |
| "step": 206 | |
| }, | |
| { | |
| "epoch": 0.03192658427962752, | |
| "grad_norm": 0.528596043586731, | |
| "learning_rate": 9.999924514968234e-06, | |
| "loss": 1.5383, | |
| "step": 207 | |
| }, | |
| { | |
| "epoch": 0.03208081898629239, | |
| "grad_norm": 0.47473981976509094, | |
| "learning_rate": 9.99991016661673e-06, | |
| "loss": 1.4716, | |
| "step": 208 | |
| }, | |
| { | |
| "epoch": 0.03223505369295726, | |
| "grad_norm": 0.45880991220474243, | |
| "learning_rate": 9.999894570598055e-06, | |
| "loss": 1.49, | |
| "step": 209 | |
| }, | |
| { | |
| "epoch": 0.03238928839962212, | |
| "grad_norm": 0.5201250910758972, | |
| "learning_rate": 9.9998777269161e-06, | |
| "loss": 1.4918, | |
| "step": 210 | |
| }, | |
| { | |
| "epoch": 0.03254352310628699, | |
| "grad_norm": 0.492723673582077, | |
| "learning_rate": 9.999859635575068e-06, | |
| "loss": 1.5056, | |
| "step": 211 | |
| }, | |
| { | |
| "epoch": 0.03269775781295186, | |
| "grad_norm": 0.45561763644218445, | |
| "learning_rate": 9.999840296579472e-06, | |
| "loss": 1.4839, | |
| "step": 212 | |
| }, | |
| { | |
| "epoch": 0.03285199251961673, | |
| "grad_norm": 0.5062990784645081, | |
| "learning_rate": 9.999819709934142e-06, | |
| "loss": 1.5299, | |
| "step": 213 | |
| }, | |
| { | |
| "epoch": 0.0330062272262816, | |
| "grad_norm": 0.47736623883247375, | |
| "learning_rate": 9.99979787564421e-06, | |
| "loss": 1.4988, | |
| "step": 214 | |
| }, | |
| { | |
| "epoch": 0.03316046193294646, | |
| "grad_norm": 0.4908311069011688, | |
| "learning_rate": 9.999774793715126e-06, | |
| "loss": 1.5076, | |
| "step": 215 | |
| }, | |
| { | |
| "epoch": 0.03331469663961133, | |
| "grad_norm": 0.5111482739448547, | |
| "learning_rate": 9.99975046415265e-06, | |
| "loss": 1.5338, | |
| "step": 216 | |
| }, | |
| { | |
| "epoch": 0.033468931346276196, | |
| "grad_norm": 0.4972241520881653, | |
| "learning_rate": 9.999724886962857e-06, | |
| "loss": 1.506, | |
| "step": 217 | |
| }, | |
| { | |
| "epoch": 0.033623166052941064, | |
| "grad_norm": 0.5056758522987366, | |
| "learning_rate": 9.999698062152125e-06, | |
| "loss": 1.4763, | |
| "step": 218 | |
| }, | |
| { | |
| "epoch": 0.03377740075960593, | |
| "grad_norm": 0.5542264580726624, | |
| "learning_rate": 9.999669989727147e-06, | |
| "loss": 1.5122, | |
| "step": 219 | |
| }, | |
| { | |
| "epoch": 0.033931635466270794, | |
| "grad_norm": 0.4869178831577301, | |
| "learning_rate": 9.999640669694932e-06, | |
| "loss": 1.4991, | |
| "step": 220 | |
| }, | |
| { | |
| "epoch": 0.03408587017293566, | |
| "grad_norm": 0.5192879438400269, | |
| "learning_rate": 9.999610102062795e-06, | |
| "loss": 1.5102, | |
| "step": 221 | |
| }, | |
| { | |
| "epoch": 0.03424010487960053, | |
| "grad_norm": 0.5683949589729309, | |
| "learning_rate": 9.999578286838363e-06, | |
| "loss": 1.5163, | |
| "step": 222 | |
| }, | |
| { | |
| "epoch": 0.0343943395862654, | |
| "grad_norm": 0.5145759582519531, | |
| "learning_rate": 9.999545224029574e-06, | |
| "loss": 1.4913, | |
| "step": 223 | |
| }, | |
| { | |
| "epoch": 0.03454857429293027, | |
| "grad_norm": 0.4716167747974396, | |
| "learning_rate": 9.99951091364468e-06, | |
| "loss": 1.4894, | |
| "step": 224 | |
| }, | |
| { | |
| "epoch": 0.03470280899959513, | |
| "grad_norm": 0.5503811836242676, | |
| "learning_rate": 9.999475355692245e-06, | |
| "loss": 1.5157, | |
| "step": 225 | |
| }, | |
| { | |
| "epoch": 0.03485704370626, | |
| "grad_norm": 0.4690919816493988, | |
| "learning_rate": 9.999438550181137e-06, | |
| "loss": 1.5261, | |
| "step": 226 | |
| }, | |
| { | |
| "epoch": 0.03501127841292487, | |
| "grad_norm": 0.4738644063472748, | |
| "learning_rate": 9.999400497120545e-06, | |
| "loss": 1.4896, | |
| "step": 227 | |
| }, | |
| { | |
| "epoch": 0.035165513119589736, | |
| "grad_norm": 0.5106549263000488, | |
| "learning_rate": 9.999361196519961e-06, | |
| "loss": 1.4862, | |
| "step": 228 | |
| }, | |
| { | |
| "epoch": 0.035319747826254605, | |
| "grad_norm": 0.46504995226860046, | |
| "learning_rate": 9.999320648389195e-06, | |
| "loss": 1.5248, | |
| "step": 229 | |
| }, | |
| { | |
| "epoch": 0.035473982532919474, | |
| "grad_norm": 0.4892650246620178, | |
| "learning_rate": 9.999278852738364e-06, | |
| "loss": 1.4917, | |
| "step": 230 | |
| }, | |
| { | |
| "epoch": 0.035628217239584335, | |
| "grad_norm": 0.4626985192298889, | |
| "learning_rate": 9.999235809577897e-06, | |
| "loss": 1.489, | |
| "step": 231 | |
| }, | |
| { | |
| "epoch": 0.035782451946249204, | |
| "grad_norm": 0.552362859249115, | |
| "learning_rate": 9.999191518918536e-06, | |
| "loss": 1.5348, | |
| "step": 232 | |
| }, | |
| { | |
| "epoch": 0.03593668665291407, | |
| "grad_norm": 0.4743778109550476, | |
| "learning_rate": 9.999145980771334e-06, | |
| "loss": 1.4837, | |
| "step": 233 | |
| }, | |
| { | |
| "epoch": 0.03609092135957894, | |
| "grad_norm": 0.47292467951774597, | |
| "learning_rate": 9.99909919514765e-06, | |
| "loss": 1.489, | |
| "step": 234 | |
| }, | |
| { | |
| "epoch": 0.03624515606624381, | |
| "grad_norm": 0.4847182333469391, | |
| "learning_rate": 9.999051162059165e-06, | |
| "loss": 1.5122, | |
| "step": 235 | |
| }, | |
| { | |
| "epoch": 0.03639939077290867, | |
| "grad_norm": 0.45157289505004883, | |
| "learning_rate": 9.99900188151786e-06, | |
| "loss": 1.4347, | |
| "step": 236 | |
| }, | |
| { | |
| "epoch": 0.03655362547957354, | |
| "grad_norm": 0.4606632590293884, | |
| "learning_rate": 9.998951353536036e-06, | |
| "loss": 1.4722, | |
| "step": 237 | |
| }, | |
| { | |
| "epoch": 0.03670786018623841, | |
| "grad_norm": 0.5129456520080566, | |
| "learning_rate": 9.9988995781263e-06, | |
| "loss": 1.506, | |
| "step": 238 | |
| }, | |
| { | |
| "epoch": 0.03686209489290328, | |
| "grad_norm": 0.49908360838890076, | |
| "learning_rate": 9.998846555301572e-06, | |
| "loss": 1.4697, | |
| "step": 239 | |
| }, | |
| { | |
| "epoch": 0.037016329599568146, | |
| "grad_norm": 0.47629302740097046, | |
| "learning_rate": 9.99879228507508e-06, | |
| "loss": 1.4859, | |
| "step": 240 | |
| }, | |
| { | |
| "epoch": 0.03717056430623301, | |
| "grad_norm": 0.55729079246521, | |
| "learning_rate": 9.998736767460373e-06, | |
| "loss": 1.505, | |
| "step": 241 | |
| }, | |
| { | |
| "epoch": 0.037324799012897876, | |
| "grad_norm": 0.4755311906337738, | |
| "learning_rate": 9.998680002471302e-06, | |
| "loss": 1.4776, | |
| "step": 242 | |
| }, | |
| { | |
| "epoch": 0.037479033719562745, | |
| "grad_norm": 0.48049280047416687, | |
| "learning_rate": 9.99862199012203e-06, | |
| "loss": 1.4838, | |
| "step": 243 | |
| }, | |
| { | |
| "epoch": 0.03763326842622761, | |
| "grad_norm": 0.5337971448898315, | |
| "learning_rate": 9.998562730427035e-06, | |
| "loss": 1.5136, | |
| "step": 244 | |
| }, | |
| { | |
| "epoch": 0.03778750313289248, | |
| "grad_norm": 0.4617859721183777, | |
| "learning_rate": 9.998502223401104e-06, | |
| "loss": 1.4989, | |
| "step": 245 | |
| }, | |
| { | |
| "epoch": 0.037941737839557343, | |
| "grad_norm": 0.4700513780117035, | |
| "learning_rate": 9.998440469059336e-06, | |
| "loss": 1.4656, | |
| "step": 246 | |
| }, | |
| { | |
| "epoch": 0.03809597254622221, | |
| "grad_norm": 0.5715804696083069, | |
| "learning_rate": 9.998377467417142e-06, | |
| "loss": 1.5091, | |
| "step": 247 | |
| }, | |
| { | |
| "epoch": 0.03825020725288708, | |
| "grad_norm": 0.4865875244140625, | |
| "learning_rate": 9.998313218490244e-06, | |
| "loss": 1.5167, | |
| "step": 248 | |
| }, | |
| { | |
| "epoch": 0.03840444195955195, | |
| "grad_norm": 0.4848752021789551, | |
| "learning_rate": 9.99824772229467e-06, | |
| "loss": 1.5317, | |
| "step": 249 | |
| }, | |
| { | |
| "epoch": 0.03855867666621682, | |
| "grad_norm": 0.46630924940109253, | |
| "learning_rate": 9.998180978846768e-06, | |
| "loss": 1.486, | |
| "step": 250 | |
| }, | |
| { | |
| "epoch": 0.03871291137288168, | |
| "grad_norm": 0.4797709584236145, | |
| "learning_rate": 9.998112988163191e-06, | |
| "loss": 1.4303, | |
| "step": 251 | |
| }, | |
| { | |
| "epoch": 0.03886714607954655, | |
| "grad_norm": 0.5287182331085205, | |
| "learning_rate": 9.998043750260906e-06, | |
| "loss": 1.4937, | |
| "step": 252 | |
| }, | |
| { | |
| "epoch": 0.03902138078621142, | |
| "grad_norm": 0.4925422966480255, | |
| "learning_rate": 9.997973265157192e-06, | |
| "loss": 1.4897, | |
| "step": 253 | |
| }, | |
| { | |
| "epoch": 0.039175615492876285, | |
| "grad_norm": 0.5214465856552124, | |
| "learning_rate": 9.997901532869636e-06, | |
| "loss": 1.4669, | |
| "step": 254 | |
| }, | |
| { | |
| "epoch": 0.039329850199541154, | |
| "grad_norm": 0.48910996317863464, | |
| "learning_rate": 9.997828553416136e-06, | |
| "loss": 1.4673, | |
| "step": 255 | |
| }, | |
| { | |
| "epoch": 0.039484084906206016, | |
| "grad_norm": 0.5112558603286743, | |
| "learning_rate": 9.997754326814908e-06, | |
| "loss": 1.476, | |
| "step": 256 | |
| }, | |
| { | |
| "epoch": 0.039638319612870884, | |
| "grad_norm": 0.5078018307685852, | |
| "learning_rate": 9.99767885308447e-06, | |
| "loss": 1.4815, | |
| "step": 257 | |
| }, | |
| { | |
| "epoch": 0.03979255431953575, | |
| "grad_norm": 0.5006856322288513, | |
| "learning_rate": 9.997602132243659e-06, | |
| "loss": 1.4811, | |
| "step": 258 | |
| }, | |
| { | |
| "epoch": 0.03994678902620062, | |
| "grad_norm": 0.4795530140399933, | |
| "learning_rate": 9.997524164311616e-06, | |
| "loss": 1.4706, | |
| "step": 259 | |
| }, | |
| { | |
| "epoch": 0.04010102373286549, | |
| "grad_norm": 0.5512394309043884, | |
| "learning_rate": 9.9974449493078e-06, | |
| "loss": 1.5279, | |
| "step": 260 | |
| }, | |
| { | |
| "epoch": 0.04025525843953036, | |
| "grad_norm": 0.5088499784469604, | |
| "learning_rate": 9.997364487251976e-06, | |
| "loss": 1.4688, | |
| "step": 261 | |
| }, | |
| { | |
| "epoch": 0.04040949314619522, | |
| "grad_norm": 0.4722222089767456, | |
| "learning_rate": 9.997282778164223e-06, | |
| "loss": 1.4721, | |
| "step": 262 | |
| }, | |
| { | |
| "epoch": 0.04056372785286009, | |
| "grad_norm": 0.5502070188522339, | |
| "learning_rate": 9.997199822064934e-06, | |
| "loss": 1.4868, | |
| "step": 263 | |
| }, | |
| { | |
| "epoch": 0.04071796255952496, | |
| "grad_norm": 0.4725727140903473, | |
| "learning_rate": 9.997115618974804e-06, | |
| "loss": 1.4851, | |
| "step": 264 | |
| }, | |
| { | |
| "epoch": 0.040872197266189826, | |
| "grad_norm": 0.5010146498680115, | |
| "learning_rate": 9.997030168914848e-06, | |
| "loss": 1.4376, | |
| "step": 265 | |
| }, | |
| { | |
| "epoch": 0.041026431972854695, | |
| "grad_norm": 0.5068126320838928, | |
| "learning_rate": 9.996943471906389e-06, | |
| "loss": 1.4676, | |
| "step": 266 | |
| }, | |
| { | |
| "epoch": 0.041180666679519556, | |
| "grad_norm": 0.47465837001800537, | |
| "learning_rate": 9.996855527971058e-06, | |
| "loss": 1.4522, | |
| "step": 267 | |
| }, | |
| { | |
| "epoch": 0.041334901386184425, | |
| "grad_norm": 0.4365485906600952, | |
| "learning_rate": 9.996766337130806e-06, | |
| "loss": 1.4403, | |
| "step": 268 | |
| }, | |
| { | |
| "epoch": 0.041489136092849294, | |
| "grad_norm": 0.5462592840194702, | |
| "learning_rate": 9.996675899407886e-06, | |
| "loss": 1.5112, | |
| "step": 269 | |
| }, | |
| { | |
| "epoch": 0.04164337079951416, | |
| "grad_norm": 0.4605362117290497, | |
| "learning_rate": 9.996584214824867e-06, | |
| "loss": 1.4659, | |
| "step": 270 | |
| }, | |
| { | |
| "epoch": 0.04179760550617903, | |
| "grad_norm": 0.46796712279319763, | |
| "learning_rate": 9.996491283404626e-06, | |
| "loss": 1.4506, | |
| "step": 271 | |
| }, | |
| { | |
| "epoch": 0.04195184021284389, | |
| "grad_norm": 0.5163938403129578, | |
| "learning_rate": 9.996397105170353e-06, | |
| "loss": 1.4661, | |
| "step": 272 | |
| }, | |
| { | |
| "epoch": 0.04210607491950876, | |
| "grad_norm": 0.5185596942901611, | |
| "learning_rate": 9.996301680145551e-06, | |
| "loss": 1.4609, | |
| "step": 273 | |
| }, | |
| { | |
| "epoch": 0.04226030962617363, | |
| "grad_norm": 0.48026686906814575, | |
| "learning_rate": 9.996205008354031e-06, | |
| "loss": 1.4885, | |
| "step": 274 | |
| }, | |
| { | |
| "epoch": 0.0424145443328385, | |
| "grad_norm": 0.5034846663475037, | |
| "learning_rate": 9.996107089819916e-06, | |
| "loss": 1.4446, | |
| "step": 275 | |
| }, | |
| { | |
| "epoch": 0.04256877903950337, | |
| "grad_norm": 0.5184863805770874, | |
| "learning_rate": 9.996007924567641e-06, | |
| "loss": 1.4707, | |
| "step": 276 | |
| }, | |
| { | |
| "epoch": 0.04272301374616823, | |
| "grad_norm": 0.4674989879131317, | |
| "learning_rate": 9.995907512621952e-06, | |
| "loss": 1.4632, | |
| "step": 277 | |
| }, | |
| { | |
| "epoch": 0.0428772484528331, | |
| "grad_norm": 0.4774078130722046, | |
| "learning_rate": 9.995805854007903e-06, | |
| "loss": 1.4849, | |
| "step": 278 | |
| }, | |
| { | |
| "epoch": 0.043031483159497966, | |
| "grad_norm": 0.4731788635253906, | |
| "learning_rate": 9.995702948750864e-06, | |
| "loss": 1.4733, | |
| "step": 279 | |
| }, | |
| { | |
| "epoch": 0.043185717866162834, | |
| "grad_norm": 0.4455285370349884, | |
| "learning_rate": 9.995598796876514e-06, | |
| "loss": 1.4507, | |
| "step": 280 | |
| }, | |
| { | |
| "epoch": 0.0433399525728277, | |
| "grad_norm": 0.46435633301734924, | |
| "learning_rate": 9.995493398410843e-06, | |
| "loss": 1.4662, | |
| "step": 281 | |
| }, | |
| { | |
| "epoch": 0.043494187279492565, | |
| "grad_norm": 0.47969239950180054, | |
| "learning_rate": 9.995386753380149e-06, | |
| "loss": 1.4943, | |
| "step": 282 | |
| }, | |
| { | |
| "epoch": 0.04364842198615743, | |
| "grad_norm": 0.4887070655822754, | |
| "learning_rate": 9.995278861811047e-06, | |
| "loss": 1.5, | |
| "step": 283 | |
| }, | |
| { | |
| "epoch": 0.0438026566928223, | |
| "grad_norm": 0.4667576849460602, | |
| "learning_rate": 9.995169723730458e-06, | |
| "loss": 1.4772, | |
| "step": 284 | |
| }, | |
| { | |
| "epoch": 0.04395689139948717, | |
| "grad_norm": 0.5419292449951172, | |
| "learning_rate": 9.995059339165619e-06, | |
| "loss": 1.4736, | |
| "step": 285 | |
| }, | |
| { | |
| "epoch": 0.04411112610615204, | |
| "grad_norm": 0.4901038706302643, | |
| "learning_rate": 9.994947708144072e-06, | |
| "loss": 1.4694, | |
| "step": 286 | |
| }, | |
| { | |
| "epoch": 0.0442653608128169, | |
| "grad_norm": 0.47999393939971924, | |
| "learning_rate": 9.994834830693676e-06, | |
| "loss": 1.4669, | |
| "step": 287 | |
| }, | |
| { | |
| "epoch": 0.04441959551948177, | |
| "grad_norm": 0.5101773738861084, | |
| "learning_rate": 9.994720706842595e-06, | |
| "loss": 1.5044, | |
| "step": 288 | |
| }, | |
| { | |
| "epoch": 0.04457383022614664, | |
| "grad_norm": 0.45634952187538147, | |
| "learning_rate": 9.994605336619309e-06, | |
| "loss": 1.4656, | |
| "step": 289 | |
| }, | |
| { | |
| "epoch": 0.044728064932811507, | |
| "grad_norm": 0.4852774143218994, | |
| "learning_rate": 9.994488720052606e-06, | |
| "loss": 1.4386, | |
| "step": 290 | |
| }, | |
| { | |
| "epoch": 0.044882299639476375, | |
| "grad_norm": 0.5093563199043274, | |
| "learning_rate": 9.99437085717159e-06, | |
| "loss": 1.4539, | |
| "step": 291 | |
| }, | |
| { | |
| "epoch": 0.045036534346141244, | |
| "grad_norm": 0.4422704577445984, | |
| "learning_rate": 9.994251748005668e-06, | |
| "loss": 1.4716, | |
| "step": 292 | |
| }, | |
| { | |
| "epoch": 0.045190769052806105, | |
| "grad_norm": 0.46956226229667664, | |
| "learning_rate": 9.994131392584565e-06, | |
| "loss": 1.4452, | |
| "step": 293 | |
| }, | |
| { | |
| "epoch": 0.045345003759470974, | |
| "grad_norm": 0.5034730434417725, | |
| "learning_rate": 9.994009790938311e-06, | |
| "loss": 1.4598, | |
| "step": 294 | |
| }, | |
| { | |
| "epoch": 0.04549923846613584, | |
| "grad_norm": 0.47918686270713806, | |
| "learning_rate": 9.993886943097254e-06, | |
| "loss": 1.4674, | |
| "step": 295 | |
| }, | |
| { | |
| "epoch": 0.04565347317280071, | |
| "grad_norm": 0.4500378668308258, | |
| "learning_rate": 9.993762849092047e-06, | |
| "loss": 1.4652, | |
| "step": 296 | |
| }, | |
| { | |
| "epoch": 0.04580770787946558, | |
| "grad_norm": 0.507198691368103, | |
| "learning_rate": 9.993637508953658e-06, | |
| "loss": 1.4752, | |
| "step": 297 | |
| }, | |
| { | |
| "epoch": 0.04596194258613044, | |
| "grad_norm": 0.42645618319511414, | |
| "learning_rate": 9.993510922713362e-06, | |
| "loss": 1.4617, | |
| "step": 298 | |
| }, | |
| { | |
| "epoch": 0.04611617729279531, | |
| "grad_norm": 0.4737209677696228, | |
| "learning_rate": 9.993383090402746e-06, | |
| "loss": 1.4178, | |
| "step": 299 | |
| }, | |
| { | |
| "epoch": 0.04627041199946018, | |
| "grad_norm": 0.4638979434967041, | |
| "learning_rate": 9.993254012053713e-06, | |
| "loss": 1.4434, | |
| "step": 300 | |
| }, | |
| { | |
| "epoch": 0.04642464670612505, | |
| "grad_norm": 0.4808723032474518, | |
| "learning_rate": 9.99312368769847e-06, | |
| "loss": 1.44, | |
| "step": 301 | |
| }, | |
| { | |
| "epoch": 0.046578881412789916, | |
| "grad_norm": 0.4738243818283081, | |
| "learning_rate": 9.99299211736954e-06, | |
| "loss": 1.4517, | |
| "step": 302 | |
| }, | |
| { | |
| "epoch": 0.04673311611945478, | |
| "grad_norm": 0.4913071095943451, | |
| "learning_rate": 9.992859301099752e-06, | |
| "loss": 1.4143, | |
| "step": 303 | |
| }, | |
| { | |
| "epoch": 0.046887350826119646, | |
| "grad_norm": 0.4924822151660919, | |
| "learning_rate": 9.99272523892225e-06, | |
| "loss": 1.4787, | |
| "step": 304 | |
| }, | |
| { | |
| "epoch": 0.047041585532784515, | |
| "grad_norm": 0.48220935463905334, | |
| "learning_rate": 9.99258993087049e-06, | |
| "loss": 1.4354, | |
| "step": 305 | |
| }, | |
| { | |
| "epoch": 0.04719582023944938, | |
| "grad_norm": 0.48419612646102905, | |
| "learning_rate": 9.99245337697823e-06, | |
| "loss": 1.4448, | |
| "step": 306 | |
| }, | |
| { | |
| "epoch": 0.04735005494611425, | |
| "grad_norm": 0.477363258600235, | |
| "learning_rate": 9.992315577279552e-06, | |
| "loss": 1.4507, | |
| "step": 307 | |
| }, | |
| { | |
| "epoch": 0.047504289652779114, | |
| "grad_norm": 0.48646730184555054, | |
| "learning_rate": 9.992176531808841e-06, | |
| "loss": 1.4706, | |
| "step": 308 | |
| }, | |
| { | |
| "epoch": 0.04765852435944398, | |
| "grad_norm": 0.5142022967338562, | |
| "learning_rate": 9.992036240600792e-06, | |
| "loss": 1.4283, | |
| "step": 309 | |
| }, | |
| { | |
| "epoch": 0.04781275906610885, | |
| "grad_norm": 0.4945804178714752, | |
| "learning_rate": 9.991894703690414e-06, | |
| "loss": 1.501, | |
| "step": 310 | |
| }, | |
| { | |
| "epoch": 0.04796699377277372, | |
| "grad_norm": 0.44045937061309814, | |
| "learning_rate": 9.991751921113027e-06, | |
| "loss": 1.4747, | |
| "step": 311 | |
| }, | |
| { | |
| "epoch": 0.04812122847943859, | |
| "grad_norm": 0.46978119015693665, | |
| "learning_rate": 9.991607892904259e-06, | |
| "loss": 1.4119, | |
| "step": 312 | |
| }, | |
| { | |
| "epoch": 0.04827546318610345, | |
| "grad_norm": 0.5342782139778137, | |
| "learning_rate": 9.991462619100049e-06, | |
| "loss": 1.4878, | |
| "step": 313 | |
| }, | |
| { | |
| "epoch": 0.04842969789276832, | |
| "grad_norm": 0.4728383421897888, | |
| "learning_rate": 9.991316099736651e-06, | |
| "loss": 1.4876, | |
| "step": 314 | |
| }, | |
| { | |
| "epoch": 0.04858393259943319, | |
| "grad_norm": 0.47804784774780273, | |
| "learning_rate": 9.991168334850628e-06, | |
| "loss": 1.4649, | |
| "step": 315 | |
| }, | |
| { | |
| "epoch": 0.048738167306098056, | |
| "grad_norm": 0.5584465265274048, | |
| "learning_rate": 9.99101932447885e-06, | |
| "loss": 1.4685, | |
| "step": 316 | |
| }, | |
| { | |
| "epoch": 0.048892402012762924, | |
| "grad_norm": 0.4746883809566498, | |
| "learning_rate": 9.990869068658502e-06, | |
| "loss": 1.4271, | |
| "step": 317 | |
| }, | |
| { | |
| "epoch": 0.049046636719427786, | |
| "grad_norm": 0.42429521679878235, | |
| "learning_rate": 9.990717567427079e-06, | |
| "loss": 1.4502, | |
| "step": 318 | |
| }, | |
| { | |
| "epoch": 0.049200871426092654, | |
| "grad_norm": 0.5619769096374512, | |
| "learning_rate": 9.990564820822386e-06, | |
| "loss": 1.4848, | |
| "step": 319 | |
| }, | |
| { | |
| "epoch": 0.04935510613275752, | |
| "grad_norm": 0.49077802896499634, | |
| "learning_rate": 9.99041082888254e-06, | |
| "loss": 1.4661, | |
| "step": 320 | |
| }, | |
| { | |
| "epoch": 0.04950934083942239, | |
| "grad_norm": 0.43036144971847534, | |
| "learning_rate": 9.990255591645966e-06, | |
| "loss": 1.3897, | |
| "step": 321 | |
| }, | |
| { | |
| "epoch": 0.04966357554608726, | |
| "grad_norm": 0.522767961025238, | |
| "learning_rate": 9.990099109151402e-06, | |
| "loss": 1.4244, | |
| "step": 322 | |
| }, | |
| { | |
| "epoch": 0.04981781025275213, | |
| "grad_norm": 0.45697134733200073, | |
| "learning_rate": 9.989941381437897e-06, | |
| "loss": 1.4535, | |
| "step": 323 | |
| }, | |
| { | |
| "epoch": 0.04997204495941699, | |
| "grad_norm": 0.44421565532684326, | |
| "learning_rate": 9.98978240854481e-06, | |
| "loss": 1.4096, | |
| "step": 324 | |
| }, | |
| { | |
| "epoch": 0.05012627966608186, | |
| "grad_norm": 0.46733853220939636, | |
| "learning_rate": 9.989622190511811e-06, | |
| "loss": 1.4702, | |
| "step": 325 | |
| }, | |
| { | |
| "epoch": 0.05028051437274673, | |
| "grad_norm": 0.4815812110900879, | |
| "learning_rate": 9.98946072737888e-06, | |
| "loss": 1.4503, | |
| "step": 326 | |
| }, | |
| { | |
| "epoch": 0.050434749079411596, | |
| "grad_norm": 0.4651709198951721, | |
| "learning_rate": 9.989298019186307e-06, | |
| "loss": 1.4296, | |
| "step": 327 | |
| }, | |
| { | |
| "epoch": 0.050588983786076465, | |
| "grad_norm": 0.45224279165267944, | |
| "learning_rate": 9.989134065974695e-06, | |
| "loss": 1.4073, | |
| "step": 328 | |
| }, | |
| { | |
| "epoch": 0.05074321849274133, | |
| "grad_norm": 0.45871537923812866, | |
| "learning_rate": 9.988968867784958e-06, | |
| "loss": 1.4613, | |
| "step": 329 | |
| }, | |
| { | |
| "epoch": 0.050897453199406195, | |
| "grad_norm": 0.4504376947879791, | |
| "learning_rate": 9.988802424658317e-06, | |
| "loss": 1.4464, | |
| "step": 330 | |
| }, | |
| { | |
| "epoch": 0.051051687906071064, | |
| "grad_norm": 0.48538896441459656, | |
| "learning_rate": 9.988634736636307e-06, | |
| "loss": 1.4279, | |
| "step": 331 | |
| }, | |
| { | |
| "epoch": 0.05120592261273593, | |
| "grad_norm": 0.4870400130748749, | |
| "learning_rate": 9.988465803760772e-06, | |
| "loss": 1.4856, | |
| "step": 332 | |
| }, | |
| { | |
| "epoch": 0.0513601573194008, | |
| "grad_norm": 0.4800507426261902, | |
| "learning_rate": 9.988295626073866e-06, | |
| "loss": 1.4326, | |
| "step": 333 | |
| }, | |
| { | |
| "epoch": 0.05151439202606566, | |
| "grad_norm": 0.4934535026550293, | |
| "learning_rate": 9.988124203618056e-06, | |
| "loss": 1.4114, | |
| "step": 334 | |
| }, | |
| { | |
| "epoch": 0.05166862673273053, | |
| "grad_norm": 0.512982189655304, | |
| "learning_rate": 9.98795153643612e-06, | |
| "loss": 1.4247, | |
| "step": 335 | |
| }, | |
| { | |
| "epoch": 0.0518228614393954, | |
| "grad_norm": 0.4471703767776489, | |
| "learning_rate": 9.987777624571145e-06, | |
| "loss": 1.4521, | |
| "step": 336 | |
| }, | |
| { | |
| "epoch": 0.05197709614606027, | |
| "grad_norm": 0.45655134320259094, | |
| "learning_rate": 9.987602468066527e-06, | |
| "loss": 1.4566, | |
| "step": 337 | |
| }, | |
| { | |
| "epoch": 0.05213133085272514, | |
| "grad_norm": 0.4856320321559906, | |
| "learning_rate": 9.987426066965972e-06, | |
| "loss": 1.4331, | |
| "step": 338 | |
| }, | |
| { | |
| "epoch": 0.05228556555939, | |
| "grad_norm": 0.48707860708236694, | |
| "learning_rate": 9.987248421313503e-06, | |
| "loss": 1.4198, | |
| "step": 339 | |
| }, | |
| { | |
| "epoch": 0.05243980026605487, | |
| "grad_norm": 0.482424259185791, | |
| "learning_rate": 9.987069531153446e-06, | |
| "loss": 1.4082, | |
| "step": 340 | |
| }, | |
| { | |
| "epoch": 0.052594034972719736, | |
| "grad_norm": 0.5362090468406677, | |
| "learning_rate": 9.986889396530442e-06, | |
| "loss": 1.4742, | |
| "step": 341 | |
| }, | |
| { | |
| "epoch": 0.052748269679384605, | |
| "grad_norm": 0.47652480006217957, | |
| "learning_rate": 9.986708017489442e-06, | |
| "loss": 1.4893, | |
| "step": 342 | |
| }, | |
| { | |
| "epoch": 0.05290250438604947, | |
| "grad_norm": 0.45364049077033997, | |
| "learning_rate": 9.986525394075707e-06, | |
| "loss": 1.4454, | |
| "step": 343 | |
| }, | |
| { | |
| "epoch": 0.053056739092714335, | |
| "grad_norm": 0.5351753234863281, | |
| "learning_rate": 9.986341526334808e-06, | |
| "loss": 1.4633, | |
| "step": 344 | |
| }, | |
| { | |
| "epoch": 0.0532109737993792, | |
| "grad_norm": 0.46467578411102295, | |
| "learning_rate": 9.98615641431263e-06, | |
| "loss": 1.4628, | |
| "step": 345 | |
| }, | |
| { | |
| "epoch": 0.05336520850604407, | |
| "grad_norm": 0.4417281746864319, | |
| "learning_rate": 9.985970058055359e-06, | |
| "loss": 1.4535, | |
| "step": 346 | |
| }, | |
| { | |
| "epoch": 0.05351944321270894, | |
| "grad_norm": 0.5379167795181274, | |
| "learning_rate": 9.985782457609503e-06, | |
| "loss": 1.4062, | |
| "step": 347 | |
| }, | |
| { | |
| "epoch": 0.05367367791937381, | |
| "grad_norm": 0.4748031795024872, | |
| "learning_rate": 9.985593613021873e-06, | |
| "loss": 1.423, | |
| "step": 348 | |
| }, | |
| { | |
| "epoch": 0.05382791262603867, | |
| "grad_norm": 0.4667428731918335, | |
| "learning_rate": 9.985403524339594e-06, | |
| "loss": 1.4063, | |
| "step": 349 | |
| }, | |
| { | |
| "epoch": 0.05398214733270354, | |
| "grad_norm": 0.5456904172897339, | |
| "learning_rate": 9.985212191610101e-06, | |
| "loss": 1.4352, | |
| "step": 350 | |
| }, | |
| { | |
| "epoch": 0.05413638203936841, | |
| "grad_norm": 0.438327431678772, | |
| "learning_rate": 9.985019614881137e-06, | |
| "loss": 1.443, | |
| "step": 351 | |
| }, | |
| { | |
| "epoch": 0.05429061674603328, | |
| "grad_norm": 0.4759283661842346, | |
| "learning_rate": 9.98482579420076e-06, | |
| "loss": 1.425, | |
| "step": 352 | |
| }, | |
| { | |
| "epoch": 0.054444851452698145, | |
| "grad_norm": 0.47276994585990906, | |
| "learning_rate": 9.984630729617331e-06, | |
| "loss": 1.4331, | |
| "step": 353 | |
| }, | |
| { | |
| "epoch": 0.054599086159363014, | |
| "grad_norm": 0.4805431663990021, | |
| "learning_rate": 9.98443442117953e-06, | |
| "loss": 1.4726, | |
| "step": 354 | |
| }, | |
| { | |
| "epoch": 0.054753320866027876, | |
| "grad_norm": 0.4648710787296295, | |
| "learning_rate": 9.984236868936344e-06, | |
| "loss": 1.4078, | |
| "step": 355 | |
| }, | |
| { | |
| "epoch": 0.054907555572692744, | |
| "grad_norm": 0.4526321291923523, | |
| "learning_rate": 9.984038072937065e-06, | |
| "loss": 1.4609, | |
| "step": 356 | |
| }, | |
| { | |
| "epoch": 0.05506179027935761, | |
| "grad_norm": 0.4688374996185303, | |
| "learning_rate": 9.983838033231306e-06, | |
| "loss": 1.4013, | |
| "step": 357 | |
| }, | |
| { | |
| "epoch": 0.05521602498602248, | |
| "grad_norm": 0.4460720121860504, | |
| "learning_rate": 9.983636749868979e-06, | |
| "loss": 1.4452, | |
| "step": 358 | |
| }, | |
| { | |
| "epoch": 0.05537025969268735, | |
| "grad_norm": 0.5248163342475891, | |
| "learning_rate": 9.983434222900316e-06, | |
| "loss": 1.4115, | |
| "step": 359 | |
| }, | |
| { | |
| "epoch": 0.05552449439935221, | |
| "grad_norm": 0.5059958100318909, | |
| "learning_rate": 9.983230452375853e-06, | |
| "loss": 1.4302, | |
| "step": 360 | |
| }, | |
| { | |
| "epoch": 0.05567872910601708, | |
| "grad_norm": 0.4585413336753845, | |
| "learning_rate": 9.983025438346439e-06, | |
| "loss": 1.4372, | |
| "step": 361 | |
| }, | |
| { | |
| "epoch": 0.05583296381268195, | |
| "grad_norm": 0.5210665464401245, | |
| "learning_rate": 9.982819180863233e-06, | |
| "loss": 1.4344, | |
| "step": 362 | |
| }, | |
| { | |
| "epoch": 0.05598719851934682, | |
| "grad_norm": 0.5004061460494995, | |
| "learning_rate": 9.982611679977703e-06, | |
| "loss": 1.4463, | |
| "step": 363 | |
| }, | |
| { | |
| "epoch": 0.056141433226011686, | |
| "grad_norm": 0.4296887218952179, | |
| "learning_rate": 9.98240293574163e-06, | |
| "loss": 1.421, | |
| "step": 364 | |
| }, | |
| { | |
| "epoch": 0.05629566793267655, | |
| "grad_norm": 0.4660370945930481, | |
| "learning_rate": 9.982192948207103e-06, | |
| "loss": 1.4076, | |
| "step": 365 | |
| }, | |
| { | |
| "epoch": 0.056449902639341416, | |
| "grad_norm": 0.5059395432472229, | |
| "learning_rate": 9.981981717426522e-06, | |
| "loss": 1.4589, | |
| "step": 366 | |
| }, | |
| { | |
| "epoch": 0.056604137346006285, | |
| "grad_norm": 0.48390713334083557, | |
| "learning_rate": 9.981769243452595e-06, | |
| "loss": 1.4529, | |
| "step": 367 | |
| }, | |
| { | |
| "epoch": 0.056758372052671154, | |
| "grad_norm": 0.46936866641044617, | |
| "learning_rate": 9.981555526338345e-06, | |
| "loss": 1.4164, | |
| "step": 368 | |
| }, | |
| { | |
| "epoch": 0.05691260675933602, | |
| "grad_norm": 0.5150275826454163, | |
| "learning_rate": 9.981340566137103e-06, | |
| "loss": 1.4518, | |
| "step": 369 | |
| }, | |
| { | |
| "epoch": 0.057066841466000884, | |
| "grad_norm": 0.4665602743625641, | |
| "learning_rate": 9.981124362902507e-06, | |
| "loss": 1.39, | |
| "step": 370 | |
| }, | |
| { | |
| "epoch": 0.05722107617266575, | |
| "grad_norm": 0.46382731199264526, | |
| "learning_rate": 9.980906916688508e-06, | |
| "loss": 1.4427, | |
| "step": 371 | |
| }, | |
| { | |
| "epoch": 0.05737531087933062, | |
| "grad_norm": 0.49271711707115173, | |
| "learning_rate": 9.980688227549372e-06, | |
| "loss": 1.3956, | |
| "step": 372 | |
| }, | |
| { | |
| "epoch": 0.05752954558599549, | |
| "grad_norm": 0.5274916887283325, | |
| "learning_rate": 9.980468295539663e-06, | |
| "loss": 1.4818, | |
| "step": 373 | |
| }, | |
| { | |
| "epoch": 0.05768378029266036, | |
| "grad_norm": 0.4875819981098175, | |
| "learning_rate": 9.980247120714265e-06, | |
| "loss": 1.3953, | |
| "step": 374 | |
| }, | |
| { | |
| "epoch": 0.05783801499932522, | |
| "grad_norm": 0.4955143630504608, | |
| "learning_rate": 9.980024703128375e-06, | |
| "loss": 1.3925, | |
| "step": 375 | |
| }, | |
| { | |
| "epoch": 0.05799224970599009, | |
| "grad_norm": 0.5156822204589844, | |
| "learning_rate": 9.979801042837487e-06, | |
| "loss": 1.4886, | |
| "step": 376 | |
| }, | |
| { | |
| "epoch": 0.05814648441265496, | |
| "grad_norm": 0.4711759090423584, | |
| "learning_rate": 9.979576139897416e-06, | |
| "loss": 1.4357, | |
| "step": 377 | |
| }, | |
| { | |
| "epoch": 0.058300719119319826, | |
| "grad_norm": 0.4979749321937561, | |
| "learning_rate": 9.979349994364283e-06, | |
| "loss": 1.4075, | |
| "step": 378 | |
| }, | |
| { | |
| "epoch": 0.058454953825984694, | |
| "grad_norm": 0.49984684586524963, | |
| "learning_rate": 9.979122606294523e-06, | |
| "loss": 1.4653, | |
| "step": 379 | |
| }, | |
| { | |
| "epoch": 0.05860918853264956, | |
| "grad_norm": 0.47537845373153687, | |
| "learning_rate": 9.978893975744872e-06, | |
| "loss": 1.439, | |
| "step": 380 | |
| }, | |
| { | |
| "epoch": 0.058763423239314425, | |
| "grad_norm": 0.46360743045806885, | |
| "learning_rate": 9.978664102772388e-06, | |
| "loss": 1.4371, | |
| "step": 381 | |
| }, | |
| { | |
| "epoch": 0.05891765794597929, | |
| "grad_norm": 0.5247479677200317, | |
| "learning_rate": 9.978432987434428e-06, | |
| "loss": 1.4487, | |
| "step": 382 | |
| }, | |
| { | |
| "epoch": 0.05907189265264416, | |
| "grad_norm": 0.45880356431007385, | |
| "learning_rate": 9.97820062978867e-06, | |
| "loss": 1.4783, | |
| "step": 383 | |
| }, | |
| { | |
| "epoch": 0.05922612735930903, | |
| "grad_norm": 0.471962571144104, | |
| "learning_rate": 9.97796702989309e-06, | |
| "loss": 1.4386, | |
| "step": 384 | |
| }, | |
| { | |
| "epoch": 0.0593803620659739, | |
| "grad_norm": 0.5721044540405273, | |
| "learning_rate": 9.977732187805982e-06, | |
| "loss": 1.4859, | |
| "step": 385 | |
| }, | |
| { | |
| "epoch": 0.05953459677263876, | |
| "grad_norm": 0.5007787942886353, | |
| "learning_rate": 9.977496103585949e-06, | |
| "loss": 1.4008, | |
| "step": 386 | |
| }, | |
| { | |
| "epoch": 0.05968883147930363, | |
| "grad_norm": 0.48961400985717773, | |
| "learning_rate": 9.977258777291902e-06, | |
| "loss": 1.4447, | |
| "step": 387 | |
| }, | |
| { | |
| "epoch": 0.0598430661859685, | |
| "grad_norm": 0.5735716223716736, | |
| "learning_rate": 9.977020208983066e-06, | |
| "loss": 1.4565, | |
| "step": 388 | |
| }, | |
| { | |
| "epoch": 0.059997300892633366, | |
| "grad_norm": 0.477939635515213, | |
| "learning_rate": 9.976780398718969e-06, | |
| "loss": 1.4011, | |
| "step": 389 | |
| }, | |
| { | |
| "epoch": 0.060151535599298235, | |
| "grad_norm": 0.4673793613910675, | |
| "learning_rate": 9.976539346559453e-06, | |
| "loss": 1.4266, | |
| "step": 390 | |
| }, | |
| { | |
| "epoch": 0.0603057703059631, | |
| "grad_norm": 0.5940758585929871, | |
| "learning_rate": 9.976297052564672e-06, | |
| "loss": 1.4022, | |
| "step": 391 | |
| }, | |
| { | |
| "epoch": 0.060460005012627965, | |
| "grad_norm": 0.5857895612716675, | |
| "learning_rate": 9.976053516795085e-06, | |
| "loss": 1.3914, | |
| "step": 392 | |
| }, | |
| { | |
| "epoch": 0.060614239719292834, | |
| "grad_norm": 0.5206599235534668, | |
| "learning_rate": 9.975808739311465e-06, | |
| "loss": 1.4112, | |
| "step": 393 | |
| }, | |
| { | |
| "epoch": 0.0607684744259577, | |
| "grad_norm": 0.8916546702384949, | |
| "learning_rate": 9.975562720174892e-06, | |
| "loss": 1.4316, | |
| "step": 394 | |
| }, | |
| { | |
| "epoch": 0.06092270913262257, | |
| "grad_norm": 0.5995873808860779, | |
| "learning_rate": 9.97531545944676e-06, | |
| "loss": 1.4445, | |
| "step": 395 | |
| }, | |
| { | |
| "epoch": 0.06107694383928743, | |
| "grad_norm": 0.6093815565109253, | |
| "learning_rate": 9.975066957188766e-06, | |
| "loss": 1.4183, | |
| "step": 396 | |
| }, | |
| { | |
| "epoch": 0.0612311785459523, | |
| "grad_norm": 15.272882461547852, | |
| "learning_rate": 9.974817213462924e-06, | |
| "loss": 1.3998, | |
| "step": 397 | |
| }, | |
| { | |
| "epoch": 0.06138541325261717, | |
| "grad_norm": 0.543540358543396, | |
| "learning_rate": 9.974566228331552e-06, | |
| "loss": 1.4498, | |
| "step": 398 | |
| }, | |
| { | |
| "epoch": 0.06153964795928204, | |
| "grad_norm": 0.4766438901424408, | |
| "learning_rate": 9.974314001857281e-06, | |
| "loss": 1.4315, | |
| "step": 399 | |
| }, | |
| { | |
| "epoch": 0.06169388266594691, | |
| "grad_norm": 0.5161774754524231, | |
| "learning_rate": 9.974060534103054e-06, | |
| "loss": 1.3846, | |
| "step": 400 | |
| }, | |
| { | |
| "epoch": 0.06184811737261177, | |
| "grad_norm": 0.44010746479034424, | |
| "learning_rate": 9.973805825132116e-06, | |
| "loss": 1.4421, | |
| "step": 401 | |
| }, | |
| { | |
| "epoch": 0.06200235207927664, | |
| "grad_norm": 0.48070842027664185, | |
| "learning_rate": 9.973549875008029e-06, | |
| "loss": 1.4278, | |
| "step": 402 | |
| }, | |
| { | |
| "epoch": 0.062156586785941506, | |
| "grad_norm": 0.47066718339920044, | |
| "learning_rate": 9.973292683794665e-06, | |
| "loss": 1.4244, | |
| "step": 403 | |
| }, | |
| { | |
| "epoch": 0.062310821492606375, | |
| "grad_norm": 0.4821130335330963, | |
| "learning_rate": 9.973034251556197e-06, | |
| "loss": 1.492, | |
| "step": 404 | |
| }, | |
| { | |
| "epoch": 0.06246505619927124, | |
| "grad_norm": 0.47957906126976013, | |
| "learning_rate": 9.972774578357118e-06, | |
| "loss": 1.449, | |
| "step": 405 | |
| }, | |
| { | |
| "epoch": 0.06261929090593611, | |
| "grad_norm": 0.42835289239883423, | |
| "learning_rate": 9.972513664262225e-06, | |
| "loss": 1.4245, | |
| "step": 406 | |
| }, | |
| { | |
| "epoch": 0.06277352561260098, | |
| "grad_norm": 0.4677746891975403, | |
| "learning_rate": 9.972251509336626e-06, | |
| "loss": 1.4087, | |
| "step": 407 | |
| }, | |
| { | |
| "epoch": 0.06292776031926585, | |
| "grad_norm": 0.43734127283096313, | |
| "learning_rate": 9.971988113645737e-06, | |
| "loss": 1.4076, | |
| "step": 408 | |
| }, | |
| { | |
| "epoch": 0.0630819950259307, | |
| "grad_norm": 0.5043666362762451, | |
| "learning_rate": 9.971723477255289e-06, | |
| "loss": 1.4518, | |
| "step": 409 | |
| }, | |
| { | |
| "epoch": 0.06323622973259557, | |
| "grad_norm": 0.4832194447517395, | |
| "learning_rate": 9.971457600231316e-06, | |
| "loss": 1.4511, | |
| "step": 410 | |
| }, | |
| { | |
| "epoch": 0.06339046443926044, | |
| "grad_norm": 0.4825354516506195, | |
| "learning_rate": 9.971190482640166e-06, | |
| "loss": 1.4213, | |
| "step": 411 | |
| }, | |
| { | |
| "epoch": 0.06354469914592531, | |
| "grad_norm": 0.5067551136016846, | |
| "learning_rate": 9.970922124548492e-06, | |
| "loss": 1.4389, | |
| "step": 412 | |
| }, | |
| { | |
| "epoch": 0.06369893385259018, | |
| "grad_norm": 0.4832996428012848, | |
| "learning_rate": 9.970652526023262e-06, | |
| "loss": 1.4497, | |
| "step": 413 | |
| }, | |
| { | |
| "epoch": 0.06385316855925505, | |
| "grad_norm": 0.4582710862159729, | |
| "learning_rate": 9.970381687131751e-06, | |
| "loss": 1.4547, | |
| "step": 414 | |
| }, | |
| { | |
| "epoch": 0.06400740326591992, | |
| "grad_norm": 0.45857349038124084, | |
| "learning_rate": 9.970109607941544e-06, | |
| "loss": 1.4107, | |
| "step": 415 | |
| }, | |
| { | |
| "epoch": 0.06416163797258478, | |
| "grad_norm": 0.4977855086326599, | |
| "learning_rate": 9.969836288520534e-06, | |
| "loss": 1.4477, | |
| "step": 416 | |
| }, | |
| { | |
| "epoch": 0.06431587267924965, | |
| "grad_norm": 0.4536735713481903, | |
| "learning_rate": 9.969561728936922e-06, | |
| "loss": 1.3812, | |
| "step": 417 | |
| }, | |
| { | |
| "epoch": 0.06447010738591452, | |
| "grad_norm": 0.4105980098247528, | |
| "learning_rate": 9.969285929259226e-06, | |
| "loss": 1.399, | |
| "step": 418 | |
| }, | |
| { | |
| "epoch": 0.06462434209257938, | |
| "grad_norm": 0.479280948638916, | |
| "learning_rate": 9.969008889556269e-06, | |
| "loss": 1.4202, | |
| "step": 419 | |
| }, | |
| { | |
| "epoch": 0.06477857679924424, | |
| "grad_norm": 0.44376733899116516, | |
| "learning_rate": 9.968730609897177e-06, | |
| "loss": 1.4232, | |
| "step": 420 | |
| }, | |
| { | |
| "epoch": 0.06493281150590911, | |
| "grad_norm": 0.41896483302116394, | |
| "learning_rate": 9.968451090351394e-06, | |
| "loss": 1.4072, | |
| "step": 421 | |
| }, | |
| { | |
| "epoch": 0.06508704621257398, | |
| "grad_norm": 0.5083034634590149, | |
| "learning_rate": 9.968170330988673e-06, | |
| "loss": 1.4272, | |
| "step": 422 | |
| }, | |
| { | |
| "epoch": 0.06524128091923885, | |
| "grad_norm": 0.44754496216773987, | |
| "learning_rate": 9.967888331879073e-06, | |
| "loss": 1.4526, | |
| "step": 423 | |
| }, | |
| { | |
| "epoch": 0.06539551562590372, | |
| "grad_norm": 0.4666605293750763, | |
| "learning_rate": 9.96760509309296e-06, | |
| "loss": 1.4244, | |
| "step": 424 | |
| }, | |
| { | |
| "epoch": 0.06554975033256859, | |
| "grad_norm": 0.4701414108276367, | |
| "learning_rate": 9.967320614701019e-06, | |
| "loss": 1.4179, | |
| "step": 425 | |
| }, | |
| { | |
| "epoch": 0.06570398503923346, | |
| "grad_norm": 0.4674292206764221, | |
| "learning_rate": 9.967034896774233e-06, | |
| "loss": 1.4335, | |
| "step": 426 | |
| }, | |
| { | |
| "epoch": 0.06585821974589832, | |
| "grad_norm": 0.44576194882392883, | |
| "learning_rate": 9.9667479393839e-06, | |
| "loss": 1.3998, | |
| "step": 427 | |
| }, | |
| { | |
| "epoch": 0.0660124544525632, | |
| "grad_norm": 0.4495324194431305, | |
| "learning_rate": 9.966459742601631e-06, | |
| "loss": 1.4125, | |
| "step": 428 | |
| }, | |
| { | |
| "epoch": 0.06616668915922806, | |
| "grad_norm": 0.5072752833366394, | |
| "learning_rate": 9.966170306499337e-06, | |
| "loss": 1.4233, | |
| "step": 429 | |
| }, | |
| { | |
| "epoch": 0.06632092386589292, | |
| "grad_norm": 0.4577788710594177, | |
| "learning_rate": 9.965879631149245e-06, | |
| "loss": 1.3595, | |
| "step": 430 | |
| }, | |
| { | |
| "epoch": 0.06647515857255779, | |
| "grad_norm": 0.5432767271995544, | |
| "learning_rate": 9.96558771662389e-06, | |
| "loss": 1.4081, | |
| "step": 431 | |
| }, | |
| { | |
| "epoch": 0.06662939327922265, | |
| "grad_norm": 0.5130487084388733, | |
| "learning_rate": 9.965294562996118e-06, | |
| "loss": 1.4438, | |
| "step": 432 | |
| }, | |
| { | |
| "epoch": 0.06678362798588752, | |
| "grad_norm": 0.4710625410079956, | |
| "learning_rate": 9.965000170339076e-06, | |
| "loss": 1.3916, | |
| "step": 433 | |
| }, | |
| { | |
| "epoch": 0.06693786269255239, | |
| "grad_norm": 0.4908548891544342, | |
| "learning_rate": 9.964704538726231e-06, | |
| "loss": 1.3885, | |
| "step": 434 | |
| }, | |
| { | |
| "epoch": 0.06709209739921726, | |
| "grad_norm": 0.5099604725837708, | |
| "learning_rate": 9.964407668231354e-06, | |
| "loss": 1.4404, | |
| "step": 435 | |
| }, | |
| { | |
| "epoch": 0.06724633210588213, | |
| "grad_norm": 0.4541493356227875, | |
| "learning_rate": 9.964109558928524e-06, | |
| "loss": 1.4706, | |
| "step": 436 | |
| }, | |
| { | |
| "epoch": 0.067400566812547, | |
| "grad_norm": 0.5716336965560913, | |
| "learning_rate": 9.96381021089213e-06, | |
| "loss": 1.395, | |
| "step": 437 | |
| }, | |
| { | |
| "epoch": 0.06755480151921187, | |
| "grad_norm": 0.5119542479515076, | |
| "learning_rate": 9.963509624196871e-06, | |
| "loss": 1.4332, | |
| "step": 438 | |
| }, | |
| { | |
| "epoch": 0.06770903622587673, | |
| "grad_norm": 0.4324071407318115, | |
| "learning_rate": 9.963207798917758e-06, | |
| "loss": 1.4372, | |
| "step": 439 | |
| }, | |
| { | |
| "epoch": 0.06786327093254159, | |
| "grad_norm": 0.46256715059280396, | |
| "learning_rate": 9.962904735130105e-06, | |
| "loss": 1.425, | |
| "step": 440 | |
| }, | |
| { | |
| "epoch": 0.06801750563920646, | |
| "grad_norm": 0.5053929686546326, | |
| "learning_rate": 9.962600432909537e-06, | |
| "loss": 1.4286, | |
| "step": 441 | |
| }, | |
| { | |
| "epoch": 0.06817174034587133, | |
| "grad_norm": 0.46784093976020813, | |
| "learning_rate": 9.96229489233199e-06, | |
| "loss": 1.4165, | |
| "step": 442 | |
| }, | |
| { | |
| "epoch": 0.0683259750525362, | |
| "grad_norm": 0.4608568549156189, | |
| "learning_rate": 9.961988113473708e-06, | |
| "loss": 1.4143, | |
| "step": 443 | |
| }, | |
| { | |
| "epoch": 0.06848020975920106, | |
| "grad_norm": 0.5083256363868713, | |
| "learning_rate": 9.961680096411245e-06, | |
| "loss": 1.4202, | |
| "step": 444 | |
| }, | |
| { | |
| "epoch": 0.06863444446586593, | |
| "grad_norm": 0.4442773163318634, | |
| "learning_rate": 9.961370841221461e-06, | |
| "loss": 1.3899, | |
| "step": 445 | |
| }, | |
| { | |
| "epoch": 0.0687886791725308, | |
| "grad_norm": 0.42745813727378845, | |
| "learning_rate": 9.961060347981529e-06, | |
| "loss": 1.4039, | |
| "step": 446 | |
| }, | |
| { | |
| "epoch": 0.06894291387919567, | |
| "grad_norm": 0.5610799193382263, | |
| "learning_rate": 9.960748616768927e-06, | |
| "loss": 1.4298, | |
| "step": 447 | |
| }, | |
| { | |
| "epoch": 0.06909714858586054, | |
| "grad_norm": 0.47480154037475586, | |
| "learning_rate": 9.960435647661444e-06, | |
| "loss": 1.4292, | |
| "step": 448 | |
| }, | |
| { | |
| "epoch": 0.0692513832925254, | |
| "grad_norm": 0.46681931614875793, | |
| "learning_rate": 9.960121440737179e-06, | |
| "loss": 1.3831, | |
| "step": 449 | |
| }, | |
| { | |
| "epoch": 0.06940561799919026, | |
| "grad_norm": 0.47058233618736267, | |
| "learning_rate": 9.959805996074538e-06, | |
| "loss": 1.4064, | |
| "step": 450 | |
| }, | |
| { | |
| "epoch": 0.06955985270585513, | |
| "grad_norm": 0.42082738876342773, | |
| "learning_rate": 9.959489313752235e-06, | |
| "loss": 1.4319, | |
| "step": 451 | |
| }, | |
| { | |
| "epoch": 0.06971408741252, | |
| "grad_norm": 0.4539491534233093, | |
| "learning_rate": 9.959171393849296e-06, | |
| "loss": 1.4184, | |
| "step": 452 | |
| }, | |
| { | |
| "epoch": 0.06986832211918487, | |
| "grad_norm": 0.45387235283851624, | |
| "learning_rate": 9.958852236445054e-06, | |
| "loss": 1.4268, | |
| "step": 453 | |
| }, | |
| { | |
| "epoch": 0.07002255682584974, | |
| "grad_norm": 0.4910185933113098, | |
| "learning_rate": 9.958531841619149e-06, | |
| "loss": 1.4459, | |
| "step": 454 | |
| }, | |
| { | |
| "epoch": 0.0701767915325146, | |
| "grad_norm": 0.46503302454948425, | |
| "learning_rate": 9.958210209451534e-06, | |
| "loss": 1.3797, | |
| "step": 455 | |
| }, | |
| { | |
| "epoch": 0.07033102623917947, | |
| "grad_norm": 0.4434162974357605, | |
| "learning_rate": 9.957887340022467e-06, | |
| "loss": 1.3723, | |
| "step": 456 | |
| }, | |
| { | |
| "epoch": 0.07048526094584434, | |
| "grad_norm": 0.48650768399238586, | |
| "learning_rate": 9.957563233412515e-06, | |
| "loss": 1.4484, | |
| "step": 457 | |
| }, | |
| { | |
| "epoch": 0.07063949565250921, | |
| "grad_norm": 0.48305729031562805, | |
| "learning_rate": 9.957237889702559e-06, | |
| "loss": 1.4251, | |
| "step": 458 | |
| }, | |
| { | |
| "epoch": 0.07079373035917408, | |
| "grad_norm": 0.4370194971561432, | |
| "learning_rate": 9.95691130897378e-06, | |
| "loss": 1.3741, | |
| "step": 459 | |
| }, | |
| { | |
| "epoch": 0.07094796506583895, | |
| "grad_norm": 0.47357454895973206, | |
| "learning_rate": 9.956583491307674e-06, | |
| "loss": 1.4555, | |
| "step": 460 | |
| }, | |
| { | |
| "epoch": 0.0711021997725038, | |
| "grad_norm": 0.4653128981590271, | |
| "learning_rate": 9.956254436786045e-06, | |
| "loss": 1.4068, | |
| "step": 461 | |
| }, | |
| { | |
| "epoch": 0.07125643447916867, | |
| "grad_norm": 0.4576094448566437, | |
| "learning_rate": 9.955924145491005e-06, | |
| "loss": 1.4189, | |
| "step": 462 | |
| }, | |
| { | |
| "epoch": 0.07141066918583354, | |
| "grad_norm": 0.5161656141281128, | |
| "learning_rate": 9.955592617504972e-06, | |
| "loss": 1.4005, | |
| "step": 463 | |
| }, | |
| { | |
| "epoch": 0.07156490389249841, | |
| "grad_norm": 0.44188177585601807, | |
| "learning_rate": 9.955259852910675e-06, | |
| "loss": 1.4089, | |
| "step": 464 | |
| }, | |
| { | |
| "epoch": 0.07171913859916328, | |
| "grad_norm": 0.4291110038757324, | |
| "learning_rate": 9.954925851791153e-06, | |
| "loss": 1.3606, | |
| "step": 465 | |
| }, | |
| { | |
| "epoch": 0.07187337330582814, | |
| "grad_norm": 0.46663251519203186, | |
| "learning_rate": 9.954590614229752e-06, | |
| "loss": 1.4353, | |
| "step": 466 | |
| }, | |
| { | |
| "epoch": 0.07202760801249301, | |
| "grad_norm": 0.4870257079601288, | |
| "learning_rate": 9.954254140310125e-06, | |
| "loss": 1.414, | |
| "step": 467 | |
| }, | |
| { | |
| "epoch": 0.07218184271915788, | |
| "grad_norm": 0.43861398100852966, | |
| "learning_rate": 9.953916430116234e-06, | |
| "loss": 1.3677, | |
| "step": 468 | |
| }, | |
| { | |
| "epoch": 0.07233607742582275, | |
| "grad_norm": 0.47025996446609497, | |
| "learning_rate": 9.953577483732355e-06, | |
| "loss": 1.3961, | |
| "step": 469 | |
| }, | |
| { | |
| "epoch": 0.07249031213248762, | |
| "grad_norm": 0.450141966342926, | |
| "learning_rate": 9.953237301243064e-06, | |
| "loss": 1.4271, | |
| "step": 470 | |
| }, | |
| { | |
| "epoch": 0.07264454683915247, | |
| "grad_norm": 0.46584373712539673, | |
| "learning_rate": 9.952895882733249e-06, | |
| "loss": 1.3757, | |
| "step": 471 | |
| }, | |
| { | |
| "epoch": 0.07279878154581734, | |
| "grad_norm": 0.5051293969154358, | |
| "learning_rate": 9.95255322828811e-06, | |
| "loss": 1.4145, | |
| "step": 472 | |
| }, | |
| { | |
| "epoch": 0.07295301625248221, | |
| "grad_norm": 0.4585671126842499, | |
| "learning_rate": 9.95220933799315e-06, | |
| "loss": 1.4621, | |
| "step": 473 | |
| }, | |
| { | |
| "epoch": 0.07310725095914708, | |
| "grad_norm": 0.458593487739563, | |
| "learning_rate": 9.951864211934183e-06, | |
| "loss": 1.3547, | |
| "step": 474 | |
| }, | |
| { | |
| "epoch": 0.07326148566581195, | |
| "grad_norm": 0.47871628403663635, | |
| "learning_rate": 9.951517850197331e-06, | |
| "loss": 1.4152, | |
| "step": 475 | |
| }, | |
| { | |
| "epoch": 0.07341572037247682, | |
| "grad_norm": 0.44403016567230225, | |
| "learning_rate": 9.951170252869027e-06, | |
| "loss": 1.4207, | |
| "step": 476 | |
| }, | |
| { | |
| "epoch": 0.07356995507914169, | |
| "grad_norm": 0.4658779203891754, | |
| "learning_rate": 9.950821420036006e-06, | |
| "loss": 1.4242, | |
| "step": 477 | |
| }, | |
| { | |
| "epoch": 0.07372418978580655, | |
| "grad_norm": 0.4666624069213867, | |
| "learning_rate": 9.950471351785317e-06, | |
| "loss": 1.3814, | |
| "step": 478 | |
| }, | |
| { | |
| "epoch": 0.07387842449247142, | |
| "grad_norm": 0.4759904444217682, | |
| "learning_rate": 9.950120048204314e-06, | |
| "loss": 1.4177, | |
| "step": 479 | |
| }, | |
| { | |
| "epoch": 0.07403265919913629, | |
| "grad_norm": 0.4552132785320282, | |
| "learning_rate": 9.949767509380661e-06, | |
| "loss": 1.3858, | |
| "step": 480 | |
| }, | |
| { | |
| "epoch": 0.07418689390580115, | |
| "grad_norm": 0.4564070999622345, | |
| "learning_rate": 9.949413735402332e-06, | |
| "loss": 1.3558, | |
| "step": 481 | |
| }, | |
| { | |
| "epoch": 0.07434112861246601, | |
| "grad_norm": 0.5005853772163391, | |
| "learning_rate": 9.949058726357603e-06, | |
| "loss": 1.4229, | |
| "step": 482 | |
| }, | |
| { | |
| "epoch": 0.07449536331913088, | |
| "grad_norm": 0.44161200523376465, | |
| "learning_rate": 9.948702482335067e-06, | |
| "loss": 1.3854, | |
| "step": 483 | |
| }, | |
| { | |
| "epoch": 0.07464959802579575, | |
| "grad_norm": 0.43942132592201233, | |
| "learning_rate": 9.948345003423615e-06, | |
| "loss": 1.3854, | |
| "step": 484 | |
| }, | |
| { | |
| "epoch": 0.07480383273246062, | |
| "grad_norm": 0.5212045907974243, | |
| "learning_rate": 9.947986289712456e-06, | |
| "loss": 1.4402, | |
| "step": 485 | |
| }, | |
| { | |
| "epoch": 0.07495806743912549, | |
| "grad_norm": 0.44679194688796997, | |
| "learning_rate": 9.9476263412911e-06, | |
| "loss": 1.4045, | |
| "step": 486 | |
| }, | |
| { | |
| "epoch": 0.07511230214579036, | |
| "grad_norm": 0.44435495138168335, | |
| "learning_rate": 9.94726515824937e-06, | |
| "loss": 1.4144, | |
| "step": 487 | |
| }, | |
| { | |
| "epoch": 0.07526653685245523, | |
| "grad_norm": 0.4690037667751312, | |
| "learning_rate": 9.946902740677392e-06, | |
| "loss": 1.4128, | |
| "step": 488 | |
| }, | |
| { | |
| "epoch": 0.0754207715591201, | |
| "grad_norm": 0.47753167152404785, | |
| "learning_rate": 9.946539088665605e-06, | |
| "loss": 1.4132, | |
| "step": 489 | |
| }, | |
| { | |
| "epoch": 0.07557500626578496, | |
| "grad_norm": 0.4679515063762665, | |
| "learning_rate": 9.946174202304755e-06, | |
| "loss": 1.4144, | |
| "step": 490 | |
| }, | |
| { | |
| "epoch": 0.07572924097244983, | |
| "grad_norm": 0.5085904598236084, | |
| "learning_rate": 9.945808081685893e-06, | |
| "loss": 1.4096, | |
| "step": 491 | |
| }, | |
| { | |
| "epoch": 0.07588347567911469, | |
| "grad_norm": 0.4496731162071228, | |
| "learning_rate": 9.94544072690038e-06, | |
| "loss": 1.3907, | |
| "step": 492 | |
| }, | |
| { | |
| "epoch": 0.07603771038577956, | |
| "grad_norm": 0.4731052815914154, | |
| "learning_rate": 9.945072138039884e-06, | |
| "loss": 1.3817, | |
| "step": 493 | |
| }, | |
| { | |
| "epoch": 0.07619194509244442, | |
| "grad_norm": 0.49058064818382263, | |
| "learning_rate": 9.944702315196386e-06, | |
| "loss": 1.4072, | |
| "step": 494 | |
| }, | |
| { | |
| "epoch": 0.07634617979910929, | |
| "grad_norm": 0.3971802890300751, | |
| "learning_rate": 9.944331258462167e-06, | |
| "loss": 1.4191, | |
| "step": 495 | |
| }, | |
| { | |
| "epoch": 0.07650041450577416, | |
| "grad_norm": 0.44060471653938293, | |
| "learning_rate": 9.943958967929821e-06, | |
| "loss": 1.3959, | |
| "step": 496 | |
| }, | |
| { | |
| "epoch": 0.07665464921243903, | |
| "grad_norm": 0.5203728675842285, | |
| "learning_rate": 9.943585443692248e-06, | |
| "loss": 1.4548, | |
| "step": 497 | |
| }, | |
| { | |
| "epoch": 0.0768088839191039, | |
| "grad_norm": 0.475394606590271, | |
| "learning_rate": 9.943210685842658e-06, | |
| "loss": 1.4231, | |
| "step": 498 | |
| }, | |
| { | |
| "epoch": 0.07696311862576877, | |
| "grad_norm": 0.4179852604866028, | |
| "learning_rate": 9.942834694474565e-06, | |
| "loss": 1.4058, | |
| "step": 499 | |
| }, | |
| { | |
| "epoch": 0.07711735333243364, | |
| "grad_norm": 0.4127453863620758, | |
| "learning_rate": 9.942457469681794e-06, | |
| "loss": 1.3713, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 0.0772715880390985, | |
| "grad_norm": 0.42606067657470703, | |
| "learning_rate": 9.942079011558478e-06, | |
| "loss": 1.4487, | |
| "step": 501 | |
| }, | |
| { | |
| "epoch": 0.07742582274576336, | |
| "grad_norm": 0.4229513108730316, | |
| "learning_rate": 9.941699320199056e-06, | |
| "loss": 1.3565, | |
| "step": 502 | |
| }, | |
| { | |
| "epoch": 0.07758005745242823, | |
| "grad_norm": 0.44844040274620056, | |
| "learning_rate": 9.941318395698277e-06, | |
| "loss": 1.4019, | |
| "step": 503 | |
| }, | |
| { | |
| "epoch": 0.0777342921590931, | |
| "grad_norm": 0.48133325576782227, | |
| "learning_rate": 9.940936238151192e-06, | |
| "loss": 1.4171, | |
| "step": 504 | |
| }, | |
| { | |
| "epoch": 0.07788852686575796, | |
| "grad_norm": 0.47930583357810974, | |
| "learning_rate": 9.940552847653166e-06, | |
| "loss": 1.373, | |
| "step": 505 | |
| }, | |
| { | |
| "epoch": 0.07804276157242283, | |
| "grad_norm": 0.48454520106315613, | |
| "learning_rate": 9.940168224299872e-06, | |
| "loss": 1.3805, | |
| "step": 506 | |
| }, | |
| { | |
| "epoch": 0.0781969962790877, | |
| "grad_norm": 0.5197977423667908, | |
| "learning_rate": 9.939782368187284e-06, | |
| "loss": 1.4257, | |
| "step": 507 | |
| }, | |
| { | |
| "epoch": 0.07835123098575257, | |
| "grad_norm": 0.4360477328300476, | |
| "learning_rate": 9.93939527941169e-06, | |
| "loss": 1.4139, | |
| "step": 508 | |
| }, | |
| { | |
| "epoch": 0.07850546569241744, | |
| "grad_norm": 0.45959925651550293, | |
| "learning_rate": 9.939006958069682e-06, | |
| "loss": 1.4148, | |
| "step": 509 | |
| }, | |
| { | |
| "epoch": 0.07865970039908231, | |
| "grad_norm": 0.5091875195503235, | |
| "learning_rate": 9.938617404258163e-06, | |
| "loss": 1.4721, | |
| "step": 510 | |
| }, | |
| { | |
| "epoch": 0.07881393510574718, | |
| "grad_norm": 0.4871944785118103, | |
| "learning_rate": 9.93822661807434e-06, | |
| "loss": 1.4325, | |
| "step": 511 | |
| }, | |
| { | |
| "epoch": 0.07896816981241203, | |
| "grad_norm": 0.4516337513923645, | |
| "learning_rate": 9.937834599615728e-06, | |
| "loss": 1.3699, | |
| "step": 512 | |
| }, | |
| { | |
| "epoch": 0.0791224045190769, | |
| "grad_norm": 0.4808272421360016, | |
| "learning_rate": 9.937441348980152e-06, | |
| "loss": 1.3957, | |
| "step": 513 | |
| }, | |
| { | |
| "epoch": 0.07927663922574177, | |
| "grad_norm": 0.43793177604675293, | |
| "learning_rate": 9.937046866265742e-06, | |
| "loss": 1.3617, | |
| "step": 514 | |
| }, | |
| { | |
| "epoch": 0.07943087393240664, | |
| "grad_norm": 0.44577157497406006, | |
| "learning_rate": 9.93665115157094e-06, | |
| "loss": 1.3803, | |
| "step": 515 | |
| }, | |
| { | |
| "epoch": 0.0795851086390715, | |
| "grad_norm": 0.4638996720314026, | |
| "learning_rate": 9.936254204994487e-06, | |
| "loss": 1.4105, | |
| "step": 516 | |
| }, | |
| { | |
| "epoch": 0.07973934334573637, | |
| "grad_norm": 0.46314525604248047, | |
| "learning_rate": 9.935856026635436e-06, | |
| "loss": 1.4045, | |
| "step": 517 | |
| }, | |
| { | |
| "epoch": 0.07989357805240124, | |
| "grad_norm": 0.4436893165111542, | |
| "learning_rate": 9.935456616593151e-06, | |
| "loss": 1.3591, | |
| "step": 518 | |
| }, | |
| { | |
| "epoch": 0.08004781275906611, | |
| "grad_norm": 0.5167062282562256, | |
| "learning_rate": 9.935055974967299e-06, | |
| "loss": 1.4185, | |
| "step": 519 | |
| }, | |
| { | |
| "epoch": 0.08020204746573098, | |
| "grad_norm": 0.4903646409511566, | |
| "learning_rate": 9.934654101857854e-06, | |
| "loss": 1.4204, | |
| "step": 520 | |
| }, | |
| { | |
| "epoch": 0.08035628217239585, | |
| "grad_norm": 0.41959303617477417, | |
| "learning_rate": 9.9342509973651e-06, | |
| "loss": 1.4077, | |
| "step": 521 | |
| }, | |
| { | |
| "epoch": 0.08051051687906072, | |
| "grad_norm": 0.47519707679748535, | |
| "learning_rate": 9.933846661589628e-06, | |
| "loss": 1.3868, | |
| "step": 522 | |
| }, | |
| { | |
| "epoch": 0.08066475158572557, | |
| "grad_norm": 0.42335045337677, | |
| "learning_rate": 9.93344109463233e-06, | |
| "loss": 1.392, | |
| "step": 523 | |
| }, | |
| { | |
| "epoch": 0.08081898629239044, | |
| "grad_norm": 0.4691239595413208, | |
| "learning_rate": 9.933034296594415e-06, | |
| "loss": 1.4053, | |
| "step": 524 | |
| }, | |
| { | |
| "epoch": 0.08097322099905531, | |
| "grad_norm": 0.49240291118621826, | |
| "learning_rate": 9.932626267577394e-06, | |
| "loss": 1.4047, | |
| "step": 525 | |
| }, | |
| { | |
| "epoch": 0.08112745570572018, | |
| "grad_norm": 0.4474901556968689, | |
| "learning_rate": 9.932217007683085e-06, | |
| "loss": 1.3939, | |
| "step": 526 | |
| }, | |
| { | |
| "epoch": 0.08128169041238505, | |
| "grad_norm": 0.45516321063041687, | |
| "learning_rate": 9.931806517013612e-06, | |
| "loss": 1.3773, | |
| "step": 527 | |
| }, | |
| { | |
| "epoch": 0.08143592511904992, | |
| "grad_norm": 0.4424881339073181, | |
| "learning_rate": 9.931394795671412e-06, | |
| "loss": 1.3964, | |
| "step": 528 | |
| }, | |
| { | |
| "epoch": 0.08159015982571478, | |
| "grad_norm": 0.46286776661872864, | |
| "learning_rate": 9.930981843759222e-06, | |
| "loss": 1.4642, | |
| "step": 529 | |
| }, | |
| { | |
| "epoch": 0.08174439453237965, | |
| "grad_norm": 0.49491822719573975, | |
| "learning_rate": 9.930567661380089e-06, | |
| "loss": 1.446, | |
| "step": 530 | |
| }, | |
| { | |
| "epoch": 0.08189862923904452, | |
| "grad_norm": 0.4691407084465027, | |
| "learning_rate": 9.930152248637371e-06, | |
| "loss": 1.386, | |
| "step": 531 | |
| }, | |
| { | |
| "epoch": 0.08205286394570939, | |
| "grad_norm": 0.48742979764938354, | |
| "learning_rate": 9.929735605634722e-06, | |
| "loss": 1.4453, | |
| "step": 532 | |
| }, | |
| { | |
| "epoch": 0.08220709865237424, | |
| "grad_norm": 0.4329613745212555, | |
| "learning_rate": 9.929317732476118e-06, | |
| "loss": 1.402, | |
| "step": 533 | |
| }, | |
| { | |
| "epoch": 0.08236133335903911, | |
| "grad_norm": 0.4595591425895691, | |
| "learning_rate": 9.92889862926583e-06, | |
| "loss": 1.3863, | |
| "step": 534 | |
| }, | |
| { | |
| "epoch": 0.08251556806570398, | |
| "grad_norm": 0.46652889251708984, | |
| "learning_rate": 9.928478296108442e-06, | |
| "loss": 1.4018, | |
| "step": 535 | |
| }, | |
| { | |
| "epoch": 0.08266980277236885, | |
| "grad_norm": 0.45996397733688354, | |
| "learning_rate": 9.928056733108842e-06, | |
| "loss": 1.3791, | |
| "step": 536 | |
| }, | |
| { | |
| "epoch": 0.08282403747903372, | |
| "grad_norm": 0.4811365008354187, | |
| "learning_rate": 9.927633940372226e-06, | |
| "loss": 1.4108, | |
| "step": 537 | |
| }, | |
| { | |
| "epoch": 0.08297827218569859, | |
| "grad_norm": 0.5085799694061279, | |
| "learning_rate": 9.927209918004095e-06, | |
| "loss": 1.432, | |
| "step": 538 | |
| }, | |
| { | |
| "epoch": 0.08313250689236346, | |
| "grad_norm": 0.4167533218860626, | |
| "learning_rate": 9.926784666110263e-06, | |
| "loss": 1.379, | |
| "step": 539 | |
| }, | |
| { | |
| "epoch": 0.08328674159902832, | |
| "grad_norm": 0.4424698054790497, | |
| "learning_rate": 9.926358184796843e-06, | |
| "loss": 1.3841, | |
| "step": 540 | |
| }, | |
| { | |
| "epoch": 0.08344097630569319, | |
| "grad_norm": 0.527334451675415, | |
| "learning_rate": 9.925930474170258e-06, | |
| "loss": 1.4124, | |
| "step": 541 | |
| }, | |
| { | |
| "epoch": 0.08359521101235806, | |
| "grad_norm": 0.458374947309494, | |
| "learning_rate": 9.925501534337241e-06, | |
| "loss": 1.3867, | |
| "step": 542 | |
| }, | |
| { | |
| "epoch": 0.08374944571902292, | |
| "grad_norm": 0.45460638403892517, | |
| "learning_rate": 9.925071365404826e-06, | |
| "loss": 1.3858, | |
| "step": 543 | |
| }, | |
| { | |
| "epoch": 0.08390368042568778, | |
| "grad_norm": 0.5177557468414307, | |
| "learning_rate": 9.924639967480358e-06, | |
| "loss": 1.3962, | |
| "step": 544 | |
| }, | |
| { | |
| "epoch": 0.08405791513235265, | |
| "grad_norm": 0.47196143865585327, | |
| "learning_rate": 9.924207340671486e-06, | |
| "loss": 1.3993, | |
| "step": 545 | |
| }, | |
| { | |
| "epoch": 0.08421214983901752, | |
| "grad_norm": 0.4587273895740509, | |
| "learning_rate": 9.92377348508617e-06, | |
| "loss": 1.3671, | |
| "step": 546 | |
| }, | |
| { | |
| "epoch": 0.08436638454568239, | |
| "grad_norm": 0.5247725248336792, | |
| "learning_rate": 9.923338400832668e-06, | |
| "loss": 1.4056, | |
| "step": 547 | |
| }, | |
| { | |
| "epoch": 0.08452061925234726, | |
| "grad_norm": 0.436396062374115, | |
| "learning_rate": 9.922902088019552e-06, | |
| "loss": 1.412, | |
| "step": 548 | |
| }, | |
| { | |
| "epoch": 0.08467485395901213, | |
| "grad_norm": 0.5055376887321472, | |
| "learning_rate": 9.922464546755702e-06, | |
| "loss": 1.431, | |
| "step": 549 | |
| }, | |
| { | |
| "epoch": 0.084829088665677, | |
| "grad_norm": 0.4799085259437561, | |
| "learning_rate": 9.922025777150299e-06, | |
| "loss": 1.402, | |
| "step": 550 | |
| }, | |
| { | |
| "epoch": 0.08498332337234187, | |
| "grad_norm": 0.46313726902008057, | |
| "learning_rate": 9.92158577931283e-06, | |
| "loss": 1.4204, | |
| "step": 551 | |
| }, | |
| { | |
| "epoch": 0.08513755807900673, | |
| "grad_norm": 0.4481837749481201, | |
| "learning_rate": 9.921144553353095e-06, | |
| "loss": 1.3986, | |
| "step": 552 | |
| }, | |
| { | |
| "epoch": 0.0852917927856716, | |
| "grad_norm": 0.4738650619983673, | |
| "learning_rate": 9.920702099381193e-06, | |
| "loss": 1.4068, | |
| "step": 553 | |
| }, | |
| { | |
| "epoch": 0.08544602749233646, | |
| "grad_norm": 0.44733938574790955, | |
| "learning_rate": 9.920258417507538e-06, | |
| "loss": 1.4144, | |
| "step": 554 | |
| }, | |
| { | |
| "epoch": 0.08560026219900133, | |
| "grad_norm": 0.4281189739704132, | |
| "learning_rate": 9.919813507842842e-06, | |
| "loss": 1.3828, | |
| "step": 555 | |
| }, | |
| { | |
| "epoch": 0.0857544969056662, | |
| "grad_norm": 0.4548930823802948, | |
| "learning_rate": 9.919367370498125e-06, | |
| "loss": 1.4117, | |
| "step": 556 | |
| }, | |
| { | |
| "epoch": 0.08590873161233106, | |
| "grad_norm": 0.4846823513507843, | |
| "learning_rate": 9.91892000558472e-06, | |
| "loss": 1.4223, | |
| "step": 557 | |
| }, | |
| { | |
| "epoch": 0.08606296631899593, | |
| "grad_norm": 0.42776811122894287, | |
| "learning_rate": 9.918471413214257e-06, | |
| "loss": 1.358, | |
| "step": 558 | |
| }, | |
| { | |
| "epoch": 0.0862172010256608, | |
| "grad_norm": 0.4271652102470398, | |
| "learning_rate": 9.91802159349868e-06, | |
| "loss": 1.3906, | |
| "step": 559 | |
| }, | |
| { | |
| "epoch": 0.08637143573232567, | |
| "grad_norm": 0.4573461413383484, | |
| "learning_rate": 9.917570546550235e-06, | |
| "loss": 1.4093, | |
| "step": 560 | |
| }, | |
| { | |
| "epoch": 0.08652567043899054, | |
| "grad_norm": 0.4535485506057739, | |
| "learning_rate": 9.917118272481474e-06, | |
| "loss": 1.3617, | |
| "step": 561 | |
| }, | |
| { | |
| "epoch": 0.0866799051456554, | |
| "grad_norm": 0.42113131284713745, | |
| "learning_rate": 9.91666477140526e-06, | |
| "loss": 1.4072, | |
| "step": 562 | |
| }, | |
| { | |
| "epoch": 0.08683413985232027, | |
| "grad_norm": 0.4826679527759552, | |
| "learning_rate": 9.916210043434753e-06, | |
| "loss": 1.3735, | |
| "step": 563 | |
| }, | |
| { | |
| "epoch": 0.08698837455898513, | |
| "grad_norm": 0.4485046863555908, | |
| "learning_rate": 9.915754088683431e-06, | |
| "loss": 1.4153, | |
| "step": 564 | |
| }, | |
| { | |
| "epoch": 0.08714260926565, | |
| "grad_norm": 0.47439292073249817, | |
| "learning_rate": 9.915296907265068e-06, | |
| "loss": 1.3989, | |
| "step": 565 | |
| }, | |
| { | |
| "epoch": 0.08729684397231487, | |
| "grad_norm": 0.43979352712631226, | |
| "learning_rate": 9.914838499293751e-06, | |
| "loss": 1.3966, | |
| "step": 566 | |
| }, | |
| { | |
| "epoch": 0.08745107867897974, | |
| "grad_norm": 0.46793246269226074, | |
| "learning_rate": 9.914378864883866e-06, | |
| "loss": 1.4114, | |
| "step": 567 | |
| }, | |
| { | |
| "epoch": 0.0876053133856446, | |
| "grad_norm": 0.41661736369132996, | |
| "learning_rate": 9.913918004150115e-06, | |
| "loss": 1.3494, | |
| "step": 568 | |
| }, | |
| { | |
| "epoch": 0.08775954809230947, | |
| "grad_norm": 0.4547062814235687, | |
| "learning_rate": 9.913455917207495e-06, | |
| "loss": 1.3858, | |
| "step": 569 | |
| }, | |
| { | |
| "epoch": 0.08791378279897434, | |
| "grad_norm": 0.45024025440216064, | |
| "learning_rate": 9.912992604171318e-06, | |
| "loss": 1.4006, | |
| "step": 570 | |
| }, | |
| { | |
| "epoch": 0.08806801750563921, | |
| "grad_norm": 0.4168526530265808, | |
| "learning_rate": 9.912528065157195e-06, | |
| "loss": 1.3758, | |
| "step": 571 | |
| }, | |
| { | |
| "epoch": 0.08822225221230408, | |
| "grad_norm": 0.5073679089546204, | |
| "learning_rate": 9.912062300281046e-06, | |
| "loss": 1.3924, | |
| "step": 572 | |
| }, | |
| { | |
| "epoch": 0.08837648691896895, | |
| "grad_norm": 0.4602091610431671, | |
| "learning_rate": 9.9115953096591e-06, | |
| "loss": 1.3909, | |
| "step": 573 | |
| }, | |
| { | |
| "epoch": 0.0885307216256338, | |
| "grad_norm": 0.4300541877746582, | |
| "learning_rate": 9.91112709340789e-06, | |
| "loss": 1.3835, | |
| "step": 574 | |
| }, | |
| { | |
| "epoch": 0.08868495633229867, | |
| "grad_norm": 0.4745858609676361, | |
| "learning_rate": 9.91065765164425e-06, | |
| "loss": 1.4162, | |
| "step": 575 | |
| }, | |
| { | |
| "epoch": 0.08883919103896354, | |
| "grad_norm": 0.4340323805809021, | |
| "learning_rate": 9.910186984485321e-06, | |
| "loss": 1.4063, | |
| "step": 576 | |
| }, | |
| { | |
| "epoch": 0.08899342574562841, | |
| "grad_norm": 0.4576251804828644, | |
| "learning_rate": 9.90971509204856e-06, | |
| "loss": 1.4024, | |
| "step": 577 | |
| }, | |
| { | |
| "epoch": 0.08914766045229328, | |
| "grad_norm": 0.4342866539955139, | |
| "learning_rate": 9.909241974451716e-06, | |
| "loss": 1.3802, | |
| "step": 578 | |
| }, | |
| { | |
| "epoch": 0.08930189515895814, | |
| "grad_norm": 0.42924508452415466, | |
| "learning_rate": 9.908767631812852e-06, | |
| "loss": 1.4092, | |
| "step": 579 | |
| }, | |
| { | |
| "epoch": 0.08945612986562301, | |
| "grad_norm": 0.46538642048835754, | |
| "learning_rate": 9.908292064250333e-06, | |
| "loss": 1.4128, | |
| "step": 580 | |
| }, | |
| { | |
| "epoch": 0.08961036457228788, | |
| "grad_norm": 0.4348650574684143, | |
| "learning_rate": 9.907815271882834e-06, | |
| "loss": 1.3565, | |
| "step": 581 | |
| }, | |
| { | |
| "epoch": 0.08976459927895275, | |
| "grad_norm": 0.43505600094795227, | |
| "learning_rate": 9.90733725482933e-06, | |
| "loss": 1.4258, | |
| "step": 582 | |
| }, | |
| { | |
| "epoch": 0.08991883398561762, | |
| "grad_norm": 0.4212532341480255, | |
| "learning_rate": 9.906858013209108e-06, | |
| "loss": 1.3833, | |
| "step": 583 | |
| }, | |
| { | |
| "epoch": 0.09007306869228249, | |
| "grad_norm": 0.43133947253227234, | |
| "learning_rate": 9.906377547141751e-06, | |
| "loss": 1.3456, | |
| "step": 584 | |
| }, | |
| { | |
| "epoch": 0.09022730339894734, | |
| "grad_norm": 0.45998647809028625, | |
| "learning_rate": 9.90589585674716e-06, | |
| "loss": 1.4233, | |
| "step": 585 | |
| }, | |
| { | |
| "epoch": 0.09038153810561221, | |
| "grad_norm": 0.4433024227619171, | |
| "learning_rate": 9.905412942145531e-06, | |
| "loss": 1.3847, | |
| "step": 586 | |
| }, | |
| { | |
| "epoch": 0.09053577281227708, | |
| "grad_norm": 0.45453864336013794, | |
| "learning_rate": 9.90492880345737e-06, | |
| "loss": 1.3716, | |
| "step": 587 | |
| }, | |
| { | |
| "epoch": 0.09069000751894195, | |
| "grad_norm": 0.4899587333202362, | |
| "learning_rate": 9.904443440803488e-06, | |
| "loss": 1.393, | |
| "step": 588 | |
| }, | |
| { | |
| "epoch": 0.09084424222560682, | |
| "grad_norm": 0.42056283354759216, | |
| "learning_rate": 9.903956854305003e-06, | |
| "loss": 1.3851, | |
| "step": 589 | |
| }, | |
| { | |
| "epoch": 0.09099847693227169, | |
| "grad_norm": 0.45995429158210754, | |
| "learning_rate": 9.903469044083336e-06, | |
| "loss": 1.3902, | |
| "step": 590 | |
| }, | |
| { | |
| "epoch": 0.09115271163893655, | |
| "grad_norm": 0.4808293581008911, | |
| "learning_rate": 9.902980010260212e-06, | |
| "loss": 1.4228, | |
| "step": 591 | |
| }, | |
| { | |
| "epoch": 0.09130694634560142, | |
| "grad_norm": 0.40312135219573975, | |
| "learning_rate": 9.902489752957667e-06, | |
| "loss": 1.3703, | |
| "step": 592 | |
| }, | |
| { | |
| "epoch": 0.09146118105226629, | |
| "grad_norm": 0.45251020789146423, | |
| "learning_rate": 9.901998272298037e-06, | |
| "loss": 1.3894, | |
| "step": 593 | |
| }, | |
| { | |
| "epoch": 0.09161541575893116, | |
| "grad_norm": 0.48815882205963135, | |
| "learning_rate": 9.901505568403963e-06, | |
| "loss": 1.4019, | |
| "step": 594 | |
| }, | |
| { | |
| "epoch": 0.09176965046559601, | |
| "grad_norm": 0.4210294485092163, | |
| "learning_rate": 9.901011641398398e-06, | |
| "loss": 1.4528, | |
| "step": 595 | |
| }, | |
| { | |
| "epoch": 0.09192388517226088, | |
| "grad_norm": 0.4283800423145294, | |
| "learning_rate": 9.900516491404592e-06, | |
| "loss": 1.3665, | |
| "step": 596 | |
| }, | |
| { | |
| "epoch": 0.09207811987892575, | |
| "grad_norm": 0.5013861060142517, | |
| "learning_rate": 9.900020118546105e-06, | |
| "loss": 1.3941, | |
| "step": 597 | |
| }, | |
| { | |
| "epoch": 0.09223235458559062, | |
| "grad_norm": 0.48357877135276794, | |
| "learning_rate": 9.899522522946799e-06, | |
| "loss": 1.4081, | |
| "step": 598 | |
| }, | |
| { | |
| "epoch": 0.09238658929225549, | |
| "grad_norm": 0.46260568499565125, | |
| "learning_rate": 9.899023704730845e-06, | |
| "loss": 1.3829, | |
| "step": 599 | |
| }, | |
| { | |
| "epoch": 0.09254082399892036, | |
| "grad_norm": 0.4724692702293396, | |
| "learning_rate": 9.898523664022717e-06, | |
| "loss": 1.3559, | |
| "step": 600 | |
| }, | |
| { | |
| "epoch": 0.09269505870558523, | |
| "grad_norm": 0.45046794414520264, | |
| "learning_rate": 9.898022400947194e-06, | |
| "loss": 1.4507, | |
| "step": 601 | |
| }, | |
| { | |
| "epoch": 0.0928492934122501, | |
| "grad_norm": 0.4409693479537964, | |
| "learning_rate": 9.897519915629358e-06, | |
| "loss": 1.3446, | |
| "step": 602 | |
| }, | |
| { | |
| "epoch": 0.09300352811891496, | |
| "grad_norm": 0.4491008222103119, | |
| "learning_rate": 9.897016208194604e-06, | |
| "loss": 1.3471, | |
| "step": 603 | |
| }, | |
| { | |
| "epoch": 0.09315776282557983, | |
| "grad_norm": 0.430292546749115, | |
| "learning_rate": 9.896511278768619e-06, | |
| "loss": 1.4139, | |
| "step": 604 | |
| }, | |
| { | |
| "epoch": 0.09331199753224469, | |
| "grad_norm": 0.4214726984500885, | |
| "learning_rate": 9.896005127477403e-06, | |
| "loss": 1.3987, | |
| "step": 605 | |
| }, | |
| { | |
| "epoch": 0.09346623223890956, | |
| "grad_norm": 0.4535372853279114, | |
| "learning_rate": 9.895497754447266e-06, | |
| "loss": 1.3829, | |
| "step": 606 | |
| }, | |
| { | |
| "epoch": 0.09362046694557442, | |
| "grad_norm": 0.4810298979282379, | |
| "learning_rate": 9.894989159804811e-06, | |
| "loss": 1.4361, | |
| "step": 607 | |
| }, | |
| { | |
| "epoch": 0.09377470165223929, | |
| "grad_norm": 0.4526076912879944, | |
| "learning_rate": 9.894479343676953e-06, | |
| "loss": 1.3716, | |
| "step": 608 | |
| }, | |
| { | |
| "epoch": 0.09392893635890416, | |
| "grad_norm": 0.47583112120628357, | |
| "learning_rate": 9.89396830619091e-06, | |
| "loss": 1.4015, | |
| "step": 609 | |
| }, | |
| { | |
| "epoch": 0.09408317106556903, | |
| "grad_norm": 0.4426989257335663, | |
| "learning_rate": 9.893456047474209e-06, | |
| "loss": 1.4258, | |
| "step": 610 | |
| }, | |
| { | |
| "epoch": 0.0942374057722339, | |
| "grad_norm": 0.44269248843193054, | |
| "learning_rate": 9.892942567654672e-06, | |
| "loss": 1.4032, | |
| "step": 611 | |
| }, | |
| { | |
| "epoch": 0.09439164047889877, | |
| "grad_norm": 0.4720871150493622, | |
| "learning_rate": 9.892427866860436e-06, | |
| "loss": 1.349, | |
| "step": 612 | |
| }, | |
| { | |
| "epoch": 0.09454587518556364, | |
| "grad_norm": 0.4475248456001282, | |
| "learning_rate": 9.891911945219939e-06, | |
| "loss": 1.4319, | |
| "step": 613 | |
| }, | |
| { | |
| "epoch": 0.0947001098922285, | |
| "grad_norm": 0.42031311988830566, | |
| "learning_rate": 9.89139480286192e-06, | |
| "loss": 1.3978, | |
| "step": 614 | |
| }, | |
| { | |
| "epoch": 0.09485434459889337, | |
| "grad_norm": 0.4473288953304291, | |
| "learning_rate": 9.890876439915426e-06, | |
| "loss": 1.372, | |
| "step": 615 | |
| }, | |
| { | |
| "epoch": 0.09500857930555823, | |
| "grad_norm": 0.47153615951538086, | |
| "learning_rate": 9.89035685650981e-06, | |
| "loss": 1.4072, | |
| "step": 616 | |
| }, | |
| { | |
| "epoch": 0.0951628140122231, | |
| "grad_norm": 0.43978315591812134, | |
| "learning_rate": 9.889836052774726e-06, | |
| "loss": 1.373, | |
| "step": 617 | |
| }, | |
| { | |
| "epoch": 0.09531704871888796, | |
| "grad_norm": 0.47577616572380066, | |
| "learning_rate": 9.889314028840136e-06, | |
| "loss": 1.3831, | |
| "step": 618 | |
| }, | |
| { | |
| "epoch": 0.09547128342555283, | |
| "grad_norm": 0.49060899019241333, | |
| "learning_rate": 9.888790784836302e-06, | |
| "loss": 1.379, | |
| "step": 619 | |
| }, | |
| { | |
| "epoch": 0.0956255181322177, | |
| "grad_norm": 0.45542556047439575, | |
| "learning_rate": 9.888266320893797e-06, | |
| "loss": 1.3848, | |
| "step": 620 | |
| }, | |
| { | |
| "epoch": 0.09577975283888257, | |
| "grad_norm": 0.42567890882492065, | |
| "learning_rate": 9.887740637143492e-06, | |
| "loss": 1.3765, | |
| "step": 621 | |
| }, | |
| { | |
| "epoch": 0.09593398754554744, | |
| "grad_norm": 0.46832770109176636, | |
| "learning_rate": 9.887213733716566e-06, | |
| "loss": 1.3881, | |
| "step": 622 | |
| }, | |
| { | |
| "epoch": 0.09608822225221231, | |
| "grad_norm": 0.4309346675872803, | |
| "learning_rate": 9.8866856107445e-06, | |
| "loss": 1.4073, | |
| "step": 623 | |
| }, | |
| { | |
| "epoch": 0.09624245695887718, | |
| "grad_norm": 0.46827831864356995, | |
| "learning_rate": 9.886156268359082e-06, | |
| "loss": 1.407, | |
| "step": 624 | |
| }, | |
| { | |
| "epoch": 0.09639669166554204, | |
| "grad_norm": 0.4217469394207001, | |
| "learning_rate": 9.885625706692403e-06, | |
| "loss": 1.3838, | |
| "step": 625 | |
| }, | |
| { | |
| "epoch": 0.0965509263722069, | |
| "grad_norm": 0.48413199186325073, | |
| "learning_rate": 9.885093925876858e-06, | |
| "loss": 1.4183, | |
| "step": 626 | |
| }, | |
| { | |
| "epoch": 0.09670516107887177, | |
| "grad_norm": 0.464229553937912, | |
| "learning_rate": 9.884560926045149e-06, | |
| "loss": 1.3891, | |
| "step": 627 | |
| }, | |
| { | |
| "epoch": 0.09685939578553664, | |
| "grad_norm": 0.4711248576641083, | |
| "learning_rate": 9.884026707330274e-06, | |
| "loss": 1.4003, | |
| "step": 628 | |
| }, | |
| { | |
| "epoch": 0.0970136304922015, | |
| "grad_norm": 0.4417979121208191, | |
| "learning_rate": 9.883491269865544e-06, | |
| "loss": 1.3903, | |
| "step": 629 | |
| }, | |
| { | |
| "epoch": 0.09716786519886637, | |
| "grad_norm": 0.424405038356781, | |
| "learning_rate": 9.882954613784572e-06, | |
| "loss": 1.3815, | |
| "step": 630 | |
| }, | |
| { | |
| "epoch": 0.09732209990553124, | |
| "grad_norm": 0.43959248065948486, | |
| "learning_rate": 9.882416739221273e-06, | |
| "loss": 1.3575, | |
| "step": 631 | |
| }, | |
| { | |
| "epoch": 0.09747633461219611, | |
| "grad_norm": 0.45256170630455017, | |
| "learning_rate": 9.881877646309867e-06, | |
| "loss": 1.4527, | |
| "step": 632 | |
| }, | |
| { | |
| "epoch": 0.09763056931886098, | |
| "grad_norm": 0.4756705164909363, | |
| "learning_rate": 9.881337335184879e-06, | |
| "loss": 1.3538, | |
| "step": 633 | |
| }, | |
| { | |
| "epoch": 0.09778480402552585, | |
| "grad_norm": 0.4799821376800537, | |
| "learning_rate": 9.880795805981136e-06, | |
| "loss": 1.3892, | |
| "step": 634 | |
| }, | |
| { | |
| "epoch": 0.09793903873219072, | |
| "grad_norm": 0.4668935537338257, | |
| "learning_rate": 9.880253058833769e-06, | |
| "loss": 1.3748, | |
| "step": 635 | |
| }, | |
| { | |
| "epoch": 0.09809327343885557, | |
| "grad_norm": 0.41657018661499023, | |
| "learning_rate": 9.879709093878218e-06, | |
| "loss": 1.3714, | |
| "step": 636 | |
| }, | |
| { | |
| "epoch": 0.09824750814552044, | |
| "grad_norm": 0.4139534533023834, | |
| "learning_rate": 9.879163911250217e-06, | |
| "loss": 1.348, | |
| "step": 637 | |
| }, | |
| { | |
| "epoch": 0.09840174285218531, | |
| "grad_norm": 0.4281579852104187, | |
| "learning_rate": 9.878617511085815e-06, | |
| "loss": 1.3785, | |
| "step": 638 | |
| }, | |
| { | |
| "epoch": 0.09855597755885018, | |
| "grad_norm": 0.4374670684337616, | |
| "learning_rate": 9.878069893521358e-06, | |
| "loss": 1.4011, | |
| "step": 639 | |
| }, | |
| { | |
| "epoch": 0.09871021226551505, | |
| "grad_norm": 0.45784109830856323, | |
| "learning_rate": 9.877521058693495e-06, | |
| "loss": 1.4022, | |
| "step": 640 | |
| }, | |
| { | |
| "epoch": 0.09886444697217991, | |
| "grad_norm": 0.4866529107093811, | |
| "learning_rate": 9.876971006739185e-06, | |
| "loss": 1.3893, | |
| "step": 641 | |
| }, | |
| { | |
| "epoch": 0.09901868167884478, | |
| "grad_norm": 0.42958325147628784, | |
| "learning_rate": 9.876419737795683e-06, | |
| "loss": 1.4028, | |
| "step": 642 | |
| }, | |
| { | |
| "epoch": 0.09917291638550965, | |
| "grad_norm": 0.45041510462760925, | |
| "learning_rate": 9.875867252000555e-06, | |
| "loss": 1.3597, | |
| "step": 643 | |
| }, | |
| { | |
| "epoch": 0.09932715109217452, | |
| "grad_norm": 0.4576846659183502, | |
| "learning_rate": 9.875313549491665e-06, | |
| "loss": 1.3534, | |
| "step": 644 | |
| }, | |
| { | |
| "epoch": 0.09948138579883939, | |
| "grad_norm": 0.44920751452445984, | |
| "learning_rate": 9.874758630407184e-06, | |
| "loss": 1.4016, | |
| "step": 645 | |
| }, | |
| { | |
| "epoch": 0.09963562050550426, | |
| "grad_norm": 0.4399208724498749, | |
| "learning_rate": 9.874202494885582e-06, | |
| "loss": 1.3635, | |
| "step": 646 | |
| }, | |
| { | |
| "epoch": 0.09978985521216911, | |
| "grad_norm": 0.44910728931427, | |
| "learning_rate": 9.873645143065641e-06, | |
| "loss": 1.4061, | |
| "step": 647 | |
| }, | |
| { | |
| "epoch": 0.09994408991883398, | |
| "grad_norm": 0.44466012716293335, | |
| "learning_rate": 9.873086575086439e-06, | |
| "loss": 1.3774, | |
| "step": 648 | |
| }, | |
| { | |
| "epoch": 0.10009832462549885, | |
| "grad_norm": 0.46906405687332153, | |
| "learning_rate": 9.87252679108736e-06, | |
| "loss": 1.3687, | |
| "step": 649 | |
| }, | |
| { | |
| "epoch": 0.10025255933216372, | |
| "grad_norm": 0.49215084314346313, | |
| "learning_rate": 9.87196579120809e-06, | |
| "loss": 1.4054, | |
| "step": 650 | |
| }, | |
| { | |
| "epoch": 0.10040679403882859, | |
| "grad_norm": 0.4284855127334595, | |
| "learning_rate": 9.87140357558862e-06, | |
| "loss": 1.4053, | |
| "step": 651 | |
| }, | |
| { | |
| "epoch": 0.10056102874549346, | |
| "grad_norm": 0.4497528672218323, | |
| "learning_rate": 9.870840144369247e-06, | |
| "loss": 1.346, | |
| "step": 652 | |
| }, | |
| { | |
| "epoch": 0.10071526345215832, | |
| "grad_norm": 0.4088262915611267, | |
| "learning_rate": 9.870275497690565e-06, | |
| "loss": 1.3576, | |
| "step": 653 | |
| }, | |
| { | |
| "epoch": 0.10086949815882319, | |
| "grad_norm": 0.44848471879959106, | |
| "learning_rate": 9.869709635693478e-06, | |
| "loss": 1.4066, | |
| "step": 654 | |
| }, | |
| { | |
| "epoch": 0.10102373286548806, | |
| "grad_norm": 0.4360213577747345, | |
| "learning_rate": 9.869142558519187e-06, | |
| "loss": 1.3374, | |
| "step": 655 | |
| }, | |
| { | |
| "epoch": 0.10117796757215293, | |
| "grad_norm": 0.427133709192276, | |
| "learning_rate": 9.868574266309201e-06, | |
| "loss": 1.3747, | |
| "step": 656 | |
| }, | |
| { | |
| "epoch": 0.10133220227881778, | |
| "grad_norm": 0.45959198474884033, | |
| "learning_rate": 9.86800475920533e-06, | |
| "loss": 1.368, | |
| "step": 657 | |
| }, | |
| { | |
| "epoch": 0.10148643698548265, | |
| "grad_norm": 0.4175659120082855, | |
| "learning_rate": 9.867434037349685e-06, | |
| "loss": 1.4017, | |
| "step": 658 | |
| }, | |
| { | |
| "epoch": 0.10164067169214752, | |
| "grad_norm": 0.44435247778892517, | |
| "learning_rate": 9.866862100884688e-06, | |
| "loss": 1.3568, | |
| "step": 659 | |
| }, | |
| { | |
| "epoch": 0.10179490639881239, | |
| "grad_norm": 0.44337543845176697, | |
| "learning_rate": 9.866288949953057e-06, | |
| "loss": 1.4203, | |
| "step": 660 | |
| }, | |
| { | |
| "epoch": 0.10194914110547726, | |
| "grad_norm": 0.4260760247707367, | |
| "learning_rate": 9.865714584697812e-06, | |
| "loss": 1.3508, | |
| "step": 661 | |
| }, | |
| { | |
| "epoch": 0.10210337581214213, | |
| "grad_norm": 0.4754478931427002, | |
| "learning_rate": 9.865139005262283e-06, | |
| "loss": 1.3922, | |
| "step": 662 | |
| }, | |
| { | |
| "epoch": 0.102257610518807, | |
| "grad_norm": 0.45311057567596436, | |
| "learning_rate": 9.864562211790095e-06, | |
| "loss": 1.395, | |
| "step": 663 | |
| }, | |
| { | |
| "epoch": 0.10241184522547186, | |
| "grad_norm": 0.42213669419288635, | |
| "learning_rate": 9.863984204425183e-06, | |
| "loss": 1.3314, | |
| "step": 664 | |
| }, | |
| { | |
| "epoch": 0.10256607993213673, | |
| "grad_norm": 0.4258612096309662, | |
| "learning_rate": 9.863404983311779e-06, | |
| "loss": 1.3581, | |
| "step": 665 | |
| }, | |
| { | |
| "epoch": 0.1027203146388016, | |
| "grad_norm": 0.45202726125717163, | |
| "learning_rate": 9.862824548594423e-06, | |
| "loss": 1.3909, | |
| "step": 666 | |
| }, | |
| { | |
| "epoch": 0.10287454934546646, | |
| "grad_norm": 0.42718157172203064, | |
| "learning_rate": 9.862242900417954e-06, | |
| "loss": 1.3761, | |
| "step": 667 | |
| }, | |
| { | |
| "epoch": 0.10302878405213133, | |
| "grad_norm": 0.42724889516830444, | |
| "learning_rate": 9.861660038927516e-06, | |
| "loss": 1.3607, | |
| "step": 668 | |
| }, | |
| { | |
| "epoch": 0.1031830187587962, | |
| "grad_norm": 0.4834701716899872, | |
| "learning_rate": 9.861075964268556e-06, | |
| "loss": 1.382, | |
| "step": 669 | |
| }, | |
| { | |
| "epoch": 0.10333725346546106, | |
| "grad_norm": 0.43723833560943604, | |
| "learning_rate": 9.86049067658682e-06, | |
| "loss": 1.3925, | |
| "step": 670 | |
| }, | |
| { | |
| "epoch": 0.10349148817212593, | |
| "grad_norm": 0.43868571519851685, | |
| "learning_rate": 9.859904176028364e-06, | |
| "loss": 1.3673, | |
| "step": 671 | |
| }, | |
| { | |
| "epoch": 0.1036457228787908, | |
| "grad_norm": 0.5013939142227173, | |
| "learning_rate": 9.859316462739536e-06, | |
| "loss": 1.4017, | |
| "step": 672 | |
| }, | |
| { | |
| "epoch": 0.10379995758545567, | |
| "grad_norm": 0.4373146891593933, | |
| "learning_rate": 9.858727536866997e-06, | |
| "loss": 1.3865, | |
| "step": 673 | |
| }, | |
| { | |
| "epoch": 0.10395419229212054, | |
| "grad_norm": 0.45148810744285583, | |
| "learning_rate": 9.858137398557705e-06, | |
| "loss": 1.3455, | |
| "step": 674 | |
| }, | |
| { | |
| "epoch": 0.1041084269987854, | |
| "grad_norm": 0.47480836510658264, | |
| "learning_rate": 9.857546047958923e-06, | |
| "loss": 1.4015, | |
| "step": 675 | |
| }, | |
| { | |
| "epoch": 0.10426266170545027, | |
| "grad_norm": 0.42332684993743896, | |
| "learning_rate": 9.856953485218215e-06, | |
| "loss": 1.3788, | |
| "step": 676 | |
| }, | |
| { | |
| "epoch": 0.10441689641211514, | |
| "grad_norm": 0.4758417308330536, | |
| "learning_rate": 9.856359710483446e-06, | |
| "loss": 1.3571, | |
| "step": 677 | |
| }, | |
| { | |
| "epoch": 0.10457113111878, | |
| "grad_norm": 0.4168352484703064, | |
| "learning_rate": 9.855764723902788e-06, | |
| "loss": 1.3588, | |
| "step": 678 | |
| }, | |
| { | |
| "epoch": 0.10472536582544487, | |
| "grad_norm": 0.4867233633995056, | |
| "learning_rate": 9.85516852562471e-06, | |
| "loss": 1.4212, | |
| "step": 679 | |
| }, | |
| { | |
| "epoch": 0.10487960053210973, | |
| "grad_norm": 0.43304023146629333, | |
| "learning_rate": 9.854571115797989e-06, | |
| "loss": 1.4159, | |
| "step": 680 | |
| }, | |
| { | |
| "epoch": 0.1050338352387746, | |
| "grad_norm": 0.4678906202316284, | |
| "learning_rate": 9.8539724945717e-06, | |
| "loss": 1.381, | |
| "step": 681 | |
| }, | |
| { | |
| "epoch": 0.10518806994543947, | |
| "grad_norm": 0.4683140516281128, | |
| "learning_rate": 9.853372662095222e-06, | |
| "loss": 1.394, | |
| "step": 682 | |
| }, | |
| { | |
| "epoch": 0.10534230465210434, | |
| "grad_norm": 0.4631720185279846, | |
| "learning_rate": 9.852771618518234e-06, | |
| "loss": 1.3296, | |
| "step": 683 | |
| }, | |
| { | |
| "epoch": 0.10549653935876921, | |
| "grad_norm": 0.43896958231925964, | |
| "learning_rate": 9.852169363990722e-06, | |
| "loss": 1.3613, | |
| "step": 684 | |
| }, | |
| { | |
| "epoch": 0.10565077406543408, | |
| "grad_norm": 0.4659942090511322, | |
| "learning_rate": 9.85156589866297e-06, | |
| "loss": 1.3918, | |
| "step": 685 | |
| }, | |
| { | |
| "epoch": 0.10580500877209895, | |
| "grad_norm": 0.44510629773139954, | |
| "learning_rate": 9.850961222685566e-06, | |
| "loss": 1.3554, | |
| "step": 686 | |
| }, | |
| { | |
| "epoch": 0.10595924347876381, | |
| "grad_norm": 0.43709951639175415, | |
| "learning_rate": 9.850355336209398e-06, | |
| "loss": 1.3751, | |
| "step": 687 | |
| }, | |
| { | |
| "epoch": 0.10611347818542867, | |
| "grad_norm": 0.4969240725040436, | |
| "learning_rate": 9.849748239385661e-06, | |
| "loss": 1.4108, | |
| "step": 688 | |
| }, | |
| { | |
| "epoch": 0.10626771289209354, | |
| "grad_norm": 0.44518589973449707, | |
| "learning_rate": 9.849139932365844e-06, | |
| "loss": 1.356, | |
| "step": 689 | |
| }, | |
| { | |
| "epoch": 0.1064219475987584, | |
| "grad_norm": 0.4284765422344208, | |
| "learning_rate": 9.848530415301748e-06, | |
| "loss": 1.3381, | |
| "step": 690 | |
| }, | |
| { | |
| "epoch": 0.10657618230542328, | |
| "grad_norm": 0.48101645708084106, | |
| "learning_rate": 9.847919688345465e-06, | |
| "loss": 1.3908, | |
| "step": 691 | |
| }, | |
| { | |
| "epoch": 0.10673041701208814, | |
| "grad_norm": 0.41678643226623535, | |
| "learning_rate": 9.847307751649398e-06, | |
| "loss": 1.3842, | |
| "step": 692 | |
| }, | |
| { | |
| "epoch": 0.10688465171875301, | |
| "grad_norm": 0.5147459506988525, | |
| "learning_rate": 9.846694605366248e-06, | |
| "loss": 1.3385, | |
| "step": 693 | |
| }, | |
| { | |
| "epoch": 0.10703888642541788, | |
| "grad_norm": 0.5198816657066345, | |
| "learning_rate": 9.846080249649018e-06, | |
| "loss": 1.3991, | |
| "step": 694 | |
| }, | |
| { | |
| "epoch": 0.10719312113208275, | |
| "grad_norm": 0.42182374000549316, | |
| "learning_rate": 9.845464684651015e-06, | |
| "loss": 1.3687, | |
| "step": 695 | |
| }, | |
| { | |
| "epoch": 0.10734735583874762, | |
| "grad_norm": 0.46364015340805054, | |
| "learning_rate": 9.84484791052584e-06, | |
| "loss": 1.3715, | |
| "step": 696 | |
| }, | |
| { | |
| "epoch": 0.10750159054541249, | |
| "grad_norm": 0.47631120681762695, | |
| "learning_rate": 9.844229927427408e-06, | |
| "loss": 1.4032, | |
| "step": 697 | |
| }, | |
| { | |
| "epoch": 0.10765582525207734, | |
| "grad_norm": 0.41958925127983093, | |
| "learning_rate": 9.843610735509927e-06, | |
| "loss": 1.3897, | |
| "step": 698 | |
| }, | |
| { | |
| "epoch": 0.10781005995874221, | |
| "grad_norm": 0.44546833634376526, | |
| "learning_rate": 9.842990334927906e-06, | |
| "loss": 1.3419, | |
| "step": 699 | |
| }, | |
| { | |
| "epoch": 0.10796429466540708, | |
| "grad_norm": 0.4467076063156128, | |
| "learning_rate": 9.842368725836163e-06, | |
| "loss": 1.3475, | |
| "step": 700 | |
| }, | |
| { | |
| "epoch": 0.10811852937207195, | |
| "grad_norm": 0.43269142508506775, | |
| "learning_rate": 9.84174590838981e-06, | |
| "loss": 1.3879, | |
| "step": 701 | |
| }, | |
| { | |
| "epoch": 0.10827276407873682, | |
| "grad_norm": 0.4104573130607605, | |
| "learning_rate": 9.841121882744267e-06, | |
| "loss": 1.3569, | |
| "step": 702 | |
| }, | |
| { | |
| "epoch": 0.10842699878540168, | |
| "grad_norm": 0.44374406337738037, | |
| "learning_rate": 9.840496649055246e-06, | |
| "loss": 1.3612, | |
| "step": 703 | |
| }, | |
| { | |
| "epoch": 0.10858123349206655, | |
| "grad_norm": 0.44906085729599, | |
| "learning_rate": 9.839870207478772e-06, | |
| "loss": 1.3744, | |
| "step": 704 | |
| }, | |
| { | |
| "epoch": 0.10873546819873142, | |
| "grad_norm": 0.43347328901290894, | |
| "learning_rate": 9.839242558171165e-06, | |
| "loss": 1.3777, | |
| "step": 705 | |
| }, | |
| { | |
| "epoch": 0.10888970290539629, | |
| "grad_norm": 0.3885936737060547, | |
| "learning_rate": 9.838613701289043e-06, | |
| "loss": 1.3612, | |
| "step": 706 | |
| }, | |
| { | |
| "epoch": 0.10904393761206116, | |
| "grad_norm": 0.43079331517219543, | |
| "learning_rate": 9.837983636989337e-06, | |
| "loss": 1.429, | |
| "step": 707 | |
| }, | |
| { | |
| "epoch": 0.10919817231872603, | |
| "grad_norm": 0.4597599506378174, | |
| "learning_rate": 9.837352365429265e-06, | |
| "loss": 1.3698, | |
| "step": 708 | |
| }, | |
| { | |
| "epoch": 0.10935240702539088, | |
| "grad_norm": 0.4553662836551666, | |
| "learning_rate": 9.836719886766357e-06, | |
| "loss": 1.3528, | |
| "step": 709 | |
| }, | |
| { | |
| "epoch": 0.10950664173205575, | |
| "grad_norm": 0.5038666129112244, | |
| "learning_rate": 9.83608620115844e-06, | |
| "loss": 1.3793, | |
| "step": 710 | |
| }, | |
| { | |
| "epoch": 0.10966087643872062, | |
| "grad_norm": 0.44157353043556213, | |
| "learning_rate": 9.835451308763642e-06, | |
| "loss": 1.3579, | |
| "step": 711 | |
| }, | |
| { | |
| "epoch": 0.10981511114538549, | |
| "grad_norm": 0.43951690196990967, | |
| "learning_rate": 9.834815209740393e-06, | |
| "loss": 1.3518, | |
| "step": 712 | |
| }, | |
| { | |
| "epoch": 0.10996934585205036, | |
| "grad_norm": 0.4976160526275635, | |
| "learning_rate": 9.834177904247423e-06, | |
| "loss": 1.3759, | |
| "step": 713 | |
| }, | |
| { | |
| "epoch": 0.11012358055871523, | |
| "grad_norm": 0.4879795014858246, | |
| "learning_rate": 9.833539392443764e-06, | |
| "loss": 1.3789, | |
| "step": 714 | |
| }, | |
| { | |
| "epoch": 0.1102778152653801, | |
| "grad_norm": 0.40307706594467163, | |
| "learning_rate": 9.83289967448875e-06, | |
| "loss": 1.3235, | |
| "step": 715 | |
| }, | |
| { | |
| "epoch": 0.11043204997204496, | |
| "grad_norm": 0.4686153531074524, | |
| "learning_rate": 9.832258750542016e-06, | |
| "loss": 1.3707, | |
| "step": 716 | |
| }, | |
| { | |
| "epoch": 0.11058628467870983, | |
| "grad_norm": 0.4298979938030243, | |
| "learning_rate": 9.831616620763494e-06, | |
| "loss": 1.3485, | |
| "step": 717 | |
| }, | |
| { | |
| "epoch": 0.1107405193853747, | |
| "grad_norm": 0.43293291330337524, | |
| "learning_rate": 9.830973285313418e-06, | |
| "loss": 1.3612, | |
| "step": 718 | |
| }, | |
| { | |
| "epoch": 0.11089475409203955, | |
| "grad_norm": 0.5468214154243469, | |
| "learning_rate": 9.830328744352332e-06, | |
| "loss": 1.4148, | |
| "step": 719 | |
| }, | |
| { | |
| "epoch": 0.11104898879870442, | |
| "grad_norm": 0.46644720435142517, | |
| "learning_rate": 9.829682998041065e-06, | |
| "loss": 1.3908, | |
| "step": 720 | |
| }, | |
| { | |
| "epoch": 0.11120322350536929, | |
| "grad_norm": 0.43414947390556335, | |
| "learning_rate": 9.829036046540763e-06, | |
| "loss": 1.387, | |
| "step": 721 | |
| }, | |
| { | |
| "epoch": 0.11135745821203416, | |
| "grad_norm": 0.5359362959861755, | |
| "learning_rate": 9.828387890012858e-06, | |
| "loss": 1.3777, | |
| "step": 722 | |
| }, | |
| { | |
| "epoch": 0.11151169291869903, | |
| "grad_norm": 0.43617022037506104, | |
| "learning_rate": 9.827738528619095e-06, | |
| "loss": 1.3629, | |
| "step": 723 | |
| }, | |
| { | |
| "epoch": 0.1116659276253639, | |
| "grad_norm": 0.42857152223587036, | |
| "learning_rate": 9.82708796252151e-06, | |
| "loss": 1.3716, | |
| "step": 724 | |
| }, | |
| { | |
| "epoch": 0.11182016233202877, | |
| "grad_norm": 0.45435619354248047, | |
| "learning_rate": 9.826436191882446e-06, | |
| "loss": 1.3584, | |
| "step": 725 | |
| }, | |
| { | |
| "epoch": 0.11197439703869363, | |
| "grad_norm": 0.42542126774787903, | |
| "learning_rate": 9.825783216864545e-06, | |
| "loss": 1.3684, | |
| "step": 726 | |
| }, | |
| { | |
| "epoch": 0.1121286317453585, | |
| "grad_norm": 0.4516334533691406, | |
| "learning_rate": 9.825129037630748e-06, | |
| "loss": 1.3659, | |
| "step": 727 | |
| }, | |
| { | |
| "epoch": 0.11228286645202337, | |
| "grad_norm": 0.4535215497016907, | |
| "learning_rate": 9.824473654344297e-06, | |
| "loss": 1.3876, | |
| "step": 728 | |
| }, | |
| { | |
| "epoch": 0.11243710115868824, | |
| "grad_norm": 0.4218862056732178, | |
| "learning_rate": 9.823817067168737e-06, | |
| "loss": 1.4005, | |
| "step": 729 | |
| }, | |
| { | |
| "epoch": 0.1125913358653531, | |
| "grad_norm": 0.45613449811935425, | |
| "learning_rate": 9.823159276267911e-06, | |
| "loss": 1.3582, | |
| "step": 730 | |
| }, | |
| { | |
| "epoch": 0.11274557057201796, | |
| "grad_norm": 0.446209579706192, | |
| "learning_rate": 9.82250028180596e-06, | |
| "loss": 1.3736, | |
| "step": 731 | |
| }, | |
| { | |
| "epoch": 0.11289980527868283, | |
| "grad_norm": 0.4332846701145172, | |
| "learning_rate": 9.821840083947332e-06, | |
| "loss": 1.3837, | |
| "step": 732 | |
| }, | |
| { | |
| "epoch": 0.1130540399853477, | |
| "grad_norm": 0.4187641739845276, | |
| "learning_rate": 9.82117868285677e-06, | |
| "loss": 1.3782, | |
| "step": 733 | |
| }, | |
| { | |
| "epoch": 0.11320827469201257, | |
| "grad_norm": 0.4384898543357849, | |
| "learning_rate": 9.820516078699316e-06, | |
| "loss": 1.343, | |
| "step": 734 | |
| }, | |
| { | |
| "epoch": 0.11336250939867744, | |
| "grad_norm": 0.4686693549156189, | |
| "learning_rate": 9.819852271640318e-06, | |
| "loss": 1.4175, | |
| "step": 735 | |
| }, | |
| { | |
| "epoch": 0.11351674410534231, | |
| "grad_norm": 0.4504725933074951, | |
| "learning_rate": 9.81918726184542e-06, | |
| "loss": 1.3828, | |
| "step": 736 | |
| }, | |
| { | |
| "epoch": 0.11367097881200718, | |
| "grad_norm": 0.43755751848220825, | |
| "learning_rate": 9.818521049480566e-06, | |
| "loss": 1.3831, | |
| "step": 737 | |
| }, | |
| { | |
| "epoch": 0.11382521351867204, | |
| "grad_norm": 0.4349918067455292, | |
| "learning_rate": 9.817853634712004e-06, | |
| "loss": 1.3678, | |
| "step": 738 | |
| }, | |
| { | |
| "epoch": 0.11397944822533691, | |
| "grad_norm": 0.4427676796913147, | |
| "learning_rate": 9.817185017706277e-06, | |
| "loss": 1.3417, | |
| "step": 739 | |
| }, | |
| { | |
| "epoch": 0.11413368293200177, | |
| "grad_norm": 0.4279189109802246, | |
| "learning_rate": 9.816515198630232e-06, | |
| "loss": 1.3511, | |
| "step": 740 | |
| }, | |
| { | |
| "epoch": 0.11428791763866664, | |
| "grad_norm": 0.48394179344177246, | |
| "learning_rate": 9.815844177651012e-06, | |
| "loss": 1.3928, | |
| "step": 741 | |
| }, | |
| { | |
| "epoch": 0.1144421523453315, | |
| "grad_norm": 0.42550933361053467, | |
| "learning_rate": 9.815171954936064e-06, | |
| "loss": 1.3732, | |
| "step": 742 | |
| }, | |
| { | |
| "epoch": 0.11459638705199637, | |
| "grad_norm": 0.4324493408203125, | |
| "learning_rate": 9.814498530653135e-06, | |
| "loss": 1.379, | |
| "step": 743 | |
| }, | |
| { | |
| "epoch": 0.11475062175866124, | |
| "grad_norm": 0.4873847961425781, | |
| "learning_rate": 9.813823904970264e-06, | |
| "loss": 1.3833, | |
| "step": 744 | |
| }, | |
| { | |
| "epoch": 0.11490485646532611, | |
| "grad_norm": 0.4243428111076355, | |
| "learning_rate": 9.813148078055801e-06, | |
| "loss": 1.3913, | |
| "step": 745 | |
| }, | |
| { | |
| "epoch": 0.11505909117199098, | |
| "grad_norm": 0.42793238162994385, | |
| "learning_rate": 9.812471050078389e-06, | |
| "loss": 1.3644, | |
| "step": 746 | |
| }, | |
| { | |
| "epoch": 0.11521332587865585, | |
| "grad_norm": 0.46352872252464294, | |
| "learning_rate": 9.81179282120697e-06, | |
| "loss": 1.3711, | |
| "step": 747 | |
| }, | |
| { | |
| "epoch": 0.11536756058532072, | |
| "grad_norm": 0.45879653096199036, | |
| "learning_rate": 9.81111339161079e-06, | |
| "loss": 1.4004, | |
| "step": 748 | |
| }, | |
| { | |
| "epoch": 0.11552179529198559, | |
| "grad_norm": 0.4373062551021576, | |
| "learning_rate": 9.810432761459392e-06, | |
| "loss": 1.3753, | |
| "step": 749 | |
| }, | |
| { | |
| "epoch": 0.11567602999865044, | |
| "grad_norm": 0.4547845423221588, | |
| "learning_rate": 9.80975093092262e-06, | |
| "loss": 1.3864, | |
| "step": 750 | |
| }, | |
| { | |
| "epoch": 0.11583026470531531, | |
| "grad_norm": 0.4024827480316162, | |
| "learning_rate": 9.809067900170614e-06, | |
| "loss": 1.3696, | |
| "step": 751 | |
| }, | |
| { | |
| "epoch": 0.11598449941198018, | |
| "grad_norm": 0.4349729120731354, | |
| "learning_rate": 9.80838366937382e-06, | |
| "loss": 1.4157, | |
| "step": 752 | |
| }, | |
| { | |
| "epoch": 0.11613873411864505, | |
| "grad_norm": 0.4341493844985962, | |
| "learning_rate": 9.807698238702975e-06, | |
| "loss": 1.3466, | |
| "step": 753 | |
| }, | |
| { | |
| "epoch": 0.11629296882530991, | |
| "grad_norm": 0.44768255949020386, | |
| "learning_rate": 9.807011608329121e-06, | |
| "loss": 1.3646, | |
| "step": 754 | |
| }, | |
| { | |
| "epoch": 0.11644720353197478, | |
| "grad_norm": 0.4326797127723694, | |
| "learning_rate": 9.806323778423603e-06, | |
| "loss": 1.3306, | |
| "step": 755 | |
| }, | |
| { | |
| "epoch": 0.11660143823863965, | |
| "grad_norm": 0.4152265191078186, | |
| "learning_rate": 9.805634749158056e-06, | |
| "loss": 1.3257, | |
| "step": 756 | |
| }, | |
| { | |
| "epoch": 0.11675567294530452, | |
| "grad_norm": 0.4453123211860657, | |
| "learning_rate": 9.804944520704421e-06, | |
| "loss": 1.4102, | |
| "step": 757 | |
| }, | |
| { | |
| "epoch": 0.11690990765196939, | |
| "grad_norm": 0.4206942319869995, | |
| "learning_rate": 9.804253093234933e-06, | |
| "loss": 1.3729, | |
| "step": 758 | |
| }, | |
| { | |
| "epoch": 0.11706414235863426, | |
| "grad_norm": 0.41999831795692444, | |
| "learning_rate": 9.803560466922131e-06, | |
| "loss": 1.3468, | |
| "step": 759 | |
| }, | |
| { | |
| "epoch": 0.11721837706529913, | |
| "grad_norm": 0.48270198702812195, | |
| "learning_rate": 9.802866641938853e-06, | |
| "loss": 1.3515, | |
| "step": 760 | |
| }, | |
| { | |
| "epoch": 0.11737261177196398, | |
| "grad_norm": 0.46071720123291016, | |
| "learning_rate": 9.802171618458235e-06, | |
| "loss": 1.3699, | |
| "step": 761 | |
| }, | |
| { | |
| "epoch": 0.11752684647862885, | |
| "grad_norm": 0.43003150820732117, | |
| "learning_rate": 9.80147539665371e-06, | |
| "loss": 1.336, | |
| "step": 762 | |
| }, | |
| { | |
| "epoch": 0.11768108118529372, | |
| "grad_norm": 0.43177399039268494, | |
| "learning_rate": 9.800777976699012e-06, | |
| "loss": 1.3794, | |
| "step": 763 | |
| }, | |
| { | |
| "epoch": 0.11783531589195859, | |
| "grad_norm": 0.42428192496299744, | |
| "learning_rate": 9.800079358768173e-06, | |
| "loss": 1.3696, | |
| "step": 764 | |
| }, | |
| { | |
| "epoch": 0.11798955059862345, | |
| "grad_norm": 0.42885127663612366, | |
| "learning_rate": 9.799379543035527e-06, | |
| "loss": 1.3453, | |
| "step": 765 | |
| }, | |
| { | |
| "epoch": 0.11814378530528832, | |
| "grad_norm": 0.4755268394947052, | |
| "learning_rate": 9.798678529675702e-06, | |
| "loss": 1.3988, | |
| "step": 766 | |
| }, | |
| { | |
| "epoch": 0.11829802001195319, | |
| "grad_norm": 0.4355374872684479, | |
| "learning_rate": 9.79797631886363e-06, | |
| "loss": 1.3498, | |
| "step": 767 | |
| }, | |
| { | |
| "epoch": 0.11845225471861806, | |
| "grad_norm": 0.4248248338699341, | |
| "learning_rate": 9.797272910774535e-06, | |
| "loss": 1.3342, | |
| "step": 768 | |
| }, | |
| { | |
| "epoch": 0.11860648942528293, | |
| "grad_norm": 0.46175631880760193, | |
| "learning_rate": 9.796568305583949e-06, | |
| "loss": 1.3667, | |
| "step": 769 | |
| }, | |
| { | |
| "epoch": 0.1187607241319478, | |
| "grad_norm": 0.4631972908973694, | |
| "learning_rate": 9.795862503467695e-06, | |
| "loss": 1.3815, | |
| "step": 770 | |
| }, | |
| { | |
| "epoch": 0.11891495883861265, | |
| "grad_norm": 0.4286893904209137, | |
| "learning_rate": 9.795155504601897e-06, | |
| "loss": 1.3581, | |
| "step": 771 | |
| }, | |
| { | |
| "epoch": 0.11906919354527752, | |
| "grad_norm": 0.5162618160247803, | |
| "learning_rate": 9.79444730916298e-06, | |
| "loss": 1.3318, | |
| "step": 772 | |
| }, | |
| { | |
| "epoch": 0.11922342825194239, | |
| "grad_norm": 0.433258056640625, | |
| "learning_rate": 9.793737917327665e-06, | |
| "loss": 1.4014, | |
| "step": 773 | |
| }, | |
| { | |
| "epoch": 0.11937766295860726, | |
| "grad_norm": 0.448296457529068, | |
| "learning_rate": 9.79302732927297e-06, | |
| "loss": 1.3916, | |
| "step": 774 | |
| }, | |
| { | |
| "epoch": 0.11953189766527213, | |
| "grad_norm": 0.40876999497413635, | |
| "learning_rate": 9.792315545176216e-06, | |
| "loss": 1.3495, | |
| "step": 775 | |
| }, | |
| { | |
| "epoch": 0.119686132371937, | |
| "grad_norm": 0.4315214157104492, | |
| "learning_rate": 9.79160256521502e-06, | |
| "loss": 1.4107, | |
| "step": 776 | |
| }, | |
| { | |
| "epoch": 0.11984036707860186, | |
| "grad_norm": 0.46493902802467346, | |
| "learning_rate": 9.790888389567297e-06, | |
| "loss": 1.35, | |
| "step": 777 | |
| }, | |
| { | |
| "epoch": 0.11999460178526673, | |
| "grad_norm": 0.4051251709461212, | |
| "learning_rate": 9.79017301841126e-06, | |
| "loss": 1.3896, | |
| "step": 778 | |
| }, | |
| { | |
| "epoch": 0.1201488364919316, | |
| "grad_norm": 0.4369288980960846, | |
| "learning_rate": 9.789456451925423e-06, | |
| "loss": 1.3605, | |
| "step": 779 | |
| }, | |
| { | |
| "epoch": 0.12030307119859647, | |
| "grad_norm": 0.4111925959587097, | |
| "learning_rate": 9.788738690288595e-06, | |
| "loss": 1.3671, | |
| "step": 780 | |
| }, | |
| { | |
| "epoch": 0.12045730590526132, | |
| "grad_norm": 0.44237005710601807, | |
| "learning_rate": 9.788019733679885e-06, | |
| "loss": 1.365, | |
| "step": 781 | |
| }, | |
| { | |
| "epoch": 0.1206115406119262, | |
| "grad_norm": 0.43273478746414185, | |
| "learning_rate": 9.787299582278702e-06, | |
| "loss": 1.38, | |
| "step": 782 | |
| }, | |
| { | |
| "epoch": 0.12076577531859106, | |
| "grad_norm": 0.42054733633995056, | |
| "learning_rate": 9.786578236264748e-06, | |
| "loss": 1.3513, | |
| "step": 783 | |
| }, | |
| { | |
| "epoch": 0.12092001002525593, | |
| "grad_norm": 0.41294705867767334, | |
| "learning_rate": 9.785855695818028e-06, | |
| "loss": 1.3609, | |
| "step": 784 | |
| }, | |
| { | |
| "epoch": 0.1210742447319208, | |
| "grad_norm": 0.4511352479457855, | |
| "learning_rate": 9.785131961118843e-06, | |
| "loss": 1.3624, | |
| "step": 785 | |
| }, | |
| { | |
| "epoch": 0.12122847943858567, | |
| "grad_norm": 0.4249388873577118, | |
| "learning_rate": 9.784407032347792e-06, | |
| "loss": 1.3466, | |
| "step": 786 | |
| }, | |
| { | |
| "epoch": 0.12138271414525054, | |
| "grad_norm": 0.4437543749809265, | |
| "learning_rate": 9.783680909685772e-06, | |
| "loss": 1.3322, | |
| "step": 787 | |
| }, | |
| { | |
| "epoch": 0.1215369488519154, | |
| "grad_norm": 0.44621041417121887, | |
| "learning_rate": 9.782953593313978e-06, | |
| "loss": 1.393, | |
| "step": 788 | |
| }, | |
| { | |
| "epoch": 0.12169118355858027, | |
| "grad_norm": 0.43378946185112, | |
| "learning_rate": 9.782225083413905e-06, | |
| "loss": 1.3732, | |
| "step": 789 | |
| }, | |
| { | |
| "epoch": 0.12184541826524514, | |
| "grad_norm": 0.4374745190143585, | |
| "learning_rate": 9.78149538016734e-06, | |
| "loss": 1.3789, | |
| "step": 790 | |
| }, | |
| { | |
| "epoch": 0.12199965297191001, | |
| "grad_norm": 0.48876145482063293, | |
| "learning_rate": 9.780764483756375e-06, | |
| "loss": 1.3626, | |
| "step": 791 | |
| }, | |
| { | |
| "epoch": 0.12215388767857487, | |
| "grad_norm": 0.44628390669822693, | |
| "learning_rate": 9.780032394363397e-06, | |
| "loss": 1.3866, | |
| "step": 792 | |
| }, | |
| { | |
| "epoch": 0.12230812238523973, | |
| "grad_norm": 0.43893060088157654, | |
| "learning_rate": 9.779299112171086e-06, | |
| "loss": 1.3582, | |
| "step": 793 | |
| }, | |
| { | |
| "epoch": 0.1224623570919046, | |
| "grad_norm": 0.6715783476829529, | |
| "learning_rate": 9.778564637362426e-06, | |
| "loss": 1.3684, | |
| "step": 794 | |
| }, | |
| { | |
| "epoch": 0.12261659179856947, | |
| "grad_norm": 0.3864818215370178, | |
| "learning_rate": 9.777828970120697e-06, | |
| "loss": 1.3793, | |
| "step": 795 | |
| }, | |
| { | |
| "epoch": 0.12277082650523434, | |
| "grad_norm": 0.39914947748184204, | |
| "learning_rate": 9.777092110629475e-06, | |
| "loss": 1.3372, | |
| "step": 796 | |
| }, | |
| { | |
| "epoch": 0.12292506121189921, | |
| "grad_norm": 0.4848926067352295, | |
| "learning_rate": 9.776354059072638e-06, | |
| "loss": 1.3822, | |
| "step": 797 | |
| }, | |
| { | |
| "epoch": 0.12307929591856408, | |
| "grad_norm": 0.42694732546806335, | |
| "learning_rate": 9.77561481563435e-06, | |
| "loss": 1.3719, | |
| "step": 798 | |
| }, | |
| { | |
| "epoch": 0.12323353062522895, | |
| "grad_norm": 0.4372820258140564, | |
| "learning_rate": 9.774874380499086e-06, | |
| "loss": 1.3837, | |
| "step": 799 | |
| }, | |
| { | |
| "epoch": 0.12338776533189381, | |
| "grad_norm": 0.45094388723373413, | |
| "learning_rate": 9.774132753851611e-06, | |
| "loss": 1.3531, | |
| "step": 800 | |
| }, | |
| { | |
| "epoch": 0.12354200003855868, | |
| "grad_norm": 0.42827868461608887, | |
| "learning_rate": 9.773389935876992e-06, | |
| "loss": 1.391, | |
| "step": 801 | |
| }, | |
| { | |
| "epoch": 0.12369623474522354, | |
| "grad_norm": 0.4116842448711395, | |
| "learning_rate": 9.772645926760584e-06, | |
| "loss": 1.3232, | |
| "step": 802 | |
| }, | |
| { | |
| "epoch": 0.1238504694518884, | |
| "grad_norm": 0.4244012236595154, | |
| "learning_rate": 9.77190072668805e-06, | |
| "loss": 1.3327, | |
| "step": 803 | |
| }, | |
| { | |
| "epoch": 0.12400470415855327, | |
| "grad_norm": 0.45345398783683777, | |
| "learning_rate": 9.771154335845345e-06, | |
| "loss": 1.371, | |
| "step": 804 | |
| }, | |
| { | |
| "epoch": 0.12415893886521814, | |
| "grad_norm": 0.4498525559902191, | |
| "learning_rate": 9.770406754418723e-06, | |
| "loss": 1.3164, | |
| "step": 805 | |
| }, | |
| { | |
| "epoch": 0.12431317357188301, | |
| "grad_norm": 0.4071389138698578, | |
| "learning_rate": 9.76965798259473e-06, | |
| "loss": 1.3267, | |
| "step": 806 | |
| }, | |
| { | |
| "epoch": 0.12446740827854788, | |
| "grad_norm": 0.4534646272659302, | |
| "learning_rate": 9.768908020560218e-06, | |
| "loss": 1.3653, | |
| "step": 807 | |
| }, | |
| { | |
| "epoch": 0.12462164298521275, | |
| "grad_norm": 0.41573333740234375, | |
| "learning_rate": 9.768156868502328e-06, | |
| "loss": 1.3693, | |
| "step": 808 | |
| }, | |
| { | |
| "epoch": 0.12477587769187762, | |
| "grad_norm": 0.42766252160072327, | |
| "learning_rate": 9.7674045266085e-06, | |
| "loss": 1.3123, | |
| "step": 809 | |
| }, | |
| { | |
| "epoch": 0.12493011239854249, | |
| "grad_norm": 0.4314340054988861, | |
| "learning_rate": 9.766650995066474e-06, | |
| "loss": 1.346, | |
| "step": 810 | |
| }, | |
| { | |
| "epoch": 0.12508434710520736, | |
| "grad_norm": 0.4385635554790497, | |
| "learning_rate": 9.765896274064283e-06, | |
| "loss": 1.3588, | |
| "step": 811 | |
| }, | |
| { | |
| "epoch": 0.12523858181187222, | |
| "grad_norm": 0.4253045320510864, | |
| "learning_rate": 9.76514036379026e-06, | |
| "loss": 1.3636, | |
| "step": 812 | |
| }, | |
| { | |
| "epoch": 0.1253928165185371, | |
| "grad_norm": 0.44756290316581726, | |
| "learning_rate": 9.764383264433033e-06, | |
| "loss": 1.3638, | |
| "step": 813 | |
| }, | |
| { | |
| "epoch": 0.12554705122520196, | |
| "grad_norm": 0.4206821024417877, | |
| "learning_rate": 9.763624976181524e-06, | |
| "loss": 1.3488, | |
| "step": 814 | |
| }, | |
| { | |
| "epoch": 0.12570128593186683, | |
| "grad_norm": 0.4024866223335266, | |
| "learning_rate": 9.76286549922496e-06, | |
| "loss": 1.3053, | |
| "step": 815 | |
| }, | |
| { | |
| "epoch": 0.1258555206385317, | |
| "grad_norm": 0.4793635904788971, | |
| "learning_rate": 9.762104833752857e-06, | |
| "loss": 1.3593, | |
| "step": 816 | |
| }, | |
| { | |
| "epoch": 0.12600975534519654, | |
| "grad_norm": 0.4584435820579529, | |
| "learning_rate": 9.761342979955028e-06, | |
| "loss": 1.383, | |
| "step": 817 | |
| }, | |
| { | |
| "epoch": 0.1261639900518614, | |
| "grad_norm": 0.41160374879837036, | |
| "learning_rate": 9.760579938021586e-06, | |
| "loss": 1.3495, | |
| "step": 818 | |
| }, | |
| { | |
| "epoch": 0.12631822475852628, | |
| "grad_norm": 0.45896100997924805, | |
| "learning_rate": 9.75981570814294e-06, | |
| "loss": 1.3939, | |
| "step": 819 | |
| }, | |
| { | |
| "epoch": 0.12647245946519114, | |
| "grad_norm": 0.446318656206131, | |
| "learning_rate": 9.759050290509793e-06, | |
| "loss": 1.3978, | |
| "step": 820 | |
| }, | |
| { | |
| "epoch": 0.126626694171856, | |
| "grad_norm": 0.4176998436450958, | |
| "learning_rate": 9.758283685313144e-06, | |
| "loss": 1.3333, | |
| "step": 821 | |
| }, | |
| { | |
| "epoch": 0.12678092887852088, | |
| "grad_norm": 0.43633905053138733, | |
| "learning_rate": 9.757515892744294e-06, | |
| "loss": 1.4195, | |
| "step": 822 | |
| }, | |
| { | |
| "epoch": 0.12693516358518575, | |
| "grad_norm": 0.41222596168518066, | |
| "learning_rate": 9.756746912994832e-06, | |
| "loss": 1.384, | |
| "step": 823 | |
| }, | |
| { | |
| "epoch": 0.12708939829185062, | |
| "grad_norm": 0.46195337176322937, | |
| "learning_rate": 9.755976746256652e-06, | |
| "loss": 1.3395, | |
| "step": 824 | |
| }, | |
| { | |
| "epoch": 0.1272436329985155, | |
| "grad_norm": 0.44091737270355225, | |
| "learning_rate": 9.755205392721937e-06, | |
| "loss": 1.361, | |
| "step": 825 | |
| }, | |
| { | |
| "epoch": 0.12739786770518036, | |
| "grad_norm": 0.4277057945728302, | |
| "learning_rate": 9.754432852583168e-06, | |
| "loss": 1.3699, | |
| "step": 826 | |
| }, | |
| { | |
| "epoch": 0.12755210241184523, | |
| "grad_norm": 0.40494897961616516, | |
| "learning_rate": 9.753659126033126e-06, | |
| "loss": 1.33, | |
| "step": 827 | |
| }, | |
| { | |
| "epoch": 0.1277063371185101, | |
| "grad_norm": 0.45431292057037354, | |
| "learning_rate": 9.752884213264885e-06, | |
| "loss": 1.3496, | |
| "step": 828 | |
| }, | |
| { | |
| "epoch": 0.12786057182517496, | |
| "grad_norm": 0.4500824213027954, | |
| "learning_rate": 9.752108114471812e-06, | |
| "loss": 1.3996, | |
| "step": 829 | |
| }, | |
| { | |
| "epoch": 0.12801480653183983, | |
| "grad_norm": 0.4346963167190552, | |
| "learning_rate": 9.751330829847577e-06, | |
| "loss": 1.359, | |
| "step": 830 | |
| }, | |
| { | |
| "epoch": 0.1281690412385047, | |
| "grad_norm": 0.40969425439834595, | |
| "learning_rate": 9.750552359586138e-06, | |
| "loss": 1.3537, | |
| "step": 831 | |
| }, | |
| { | |
| "epoch": 0.12832327594516957, | |
| "grad_norm": 0.4441986382007599, | |
| "learning_rate": 9.749772703881756e-06, | |
| "loss": 1.3715, | |
| "step": 832 | |
| }, | |
| { | |
| "epoch": 0.12847751065183444, | |
| "grad_norm": 0.43944767117500305, | |
| "learning_rate": 9.748991862928982e-06, | |
| "loss": 1.3516, | |
| "step": 833 | |
| }, | |
| { | |
| "epoch": 0.1286317453584993, | |
| "grad_norm": 0.4336840808391571, | |
| "learning_rate": 9.74820983692267e-06, | |
| "loss": 1.3596, | |
| "step": 834 | |
| }, | |
| { | |
| "epoch": 0.12878598006516417, | |
| "grad_norm": 0.46026572585105896, | |
| "learning_rate": 9.747426626057959e-06, | |
| "loss": 1.3847, | |
| "step": 835 | |
| }, | |
| { | |
| "epoch": 0.12894021477182904, | |
| "grad_norm": 0.4183986783027649, | |
| "learning_rate": 9.746642230530294e-06, | |
| "loss": 1.3972, | |
| "step": 836 | |
| }, | |
| { | |
| "epoch": 0.1290944494784939, | |
| "grad_norm": 0.3883669376373291, | |
| "learning_rate": 9.74585665053541e-06, | |
| "loss": 1.3173, | |
| "step": 837 | |
| }, | |
| { | |
| "epoch": 0.12924868418515875, | |
| "grad_norm": 0.4601610004901886, | |
| "learning_rate": 9.74506988626934e-06, | |
| "loss": 1.4002, | |
| "step": 838 | |
| }, | |
| { | |
| "epoch": 0.12940291889182362, | |
| "grad_norm": 0.44732755422592163, | |
| "learning_rate": 9.74428193792841e-06, | |
| "loss": 1.3914, | |
| "step": 839 | |
| }, | |
| { | |
| "epoch": 0.1295571535984885, | |
| "grad_norm": 0.45681890845298767, | |
| "learning_rate": 9.743492805709244e-06, | |
| "loss": 1.3837, | |
| "step": 840 | |
| }, | |
| { | |
| "epoch": 0.12971138830515336, | |
| "grad_norm": 0.471767783164978, | |
| "learning_rate": 9.742702489808761e-06, | |
| "loss": 1.3944, | |
| "step": 841 | |
| }, | |
| { | |
| "epoch": 0.12986562301181823, | |
| "grad_norm": 0.40301236510276794, | |
| "learning_rate": 9.741910990424173e-06, | |
| "loss": 1.3561, | |
| "step": 842 | |
| }, | |
| { | |
| "epoch": 0.1300198577184831, | |
| "grad_norm": 0.4440667927265167, | |
| "learning_rate": 9.74111830775299e-06, | |
| "loss": 1.368, | |
| "step": 843 | |
| }, | |
| { | |
| "epoch": 0.13017409242514796, | |
| "grad_norm": 0.5251238346099854, | |
| "learning_rate": 9.740324441993018e-06, | |
| "loss": 1.3995, | |
| "step": 844 | |
| }, | |
| { | |
| "epoch": 0.13032832713181283, | |
| "grad_norm": 0.41470107436180115, | |
| "learning_rate": 9.739529393342355e-06, | |
| "loss": 1.3369, | |
| "step": 845 | |
| }, | |
| { | |
| "epoch": 0.1304825618384777, | |
| "grad_norm": 0.4555199146270752, | |
| "learning_rate": 9.738733161999394e-06, | |
| "loss": 1.3424, | |
| "step": 846 | |
| }, | |
| { | |
| "epoch": 0.13063679654514257, | |
| "grad_norm": 0.4859868586063385, | |
| "learning_rate": 9.737935748162828e-06, | |
| "loss": 1.4185, | |
| "step": 847 | |
| }, | |
| { | |
| "epoch": 0.13079103125180744, | |
| "grad_norm": 0.4408034682273865, | |
| "learning_rate": 9.73713715203164e-06, | |
| "loss": 1.4042, | |
| "step": 848 | |
| }, | |
| { | |
| "epoch": 0.1309452659584723, | |
| "grad_norm": 0.4171469509601593, | |
| "learning_rate": 9.736337373805111e-06, | |
| "loss": 1.3344, | |
| "step": 849 | |
| }, | |
| { | |
| "epoch": 0.13109950066513718, | |
| "grad_norm": 0.3878125846385956, | |
| "learning_rate": 9.735536413682816e-06, | |
| "loss": 1.3237, | |
| "step": 850 | |
| }, | |
| { | |
| "epoch": 0.13125373537180204, | |
| "grad_norm": 0.43099191784858704, | |
| "learning_rate": 9.734734271864625e-06, | |
| "loss": 1.3605, | |
| "step": 851 | |
| }, | |
| { | |
| "epoch": 0.1314079700784669, | |
| "grad_norm": 0.4446086585521698, | |
| "learning_rate": 9.733930948550702e-06, | |
| "loss": 1.3158, | |
| "step": 852 | |
| }, | |
| { | |
| "epoch": 0.13156220478513178, | |
| "grad_norm": 0.474112331867218, | |
| "learning_rate": 9.733126443941507e-06, | |
| "loss": 1.3609, | |
| "step": 853 | |
| }, | |
| { | |
| "epoch": 0.13171643949179665, | |
| "grad_norm": 0.43937546014785767, | |
| "learning_rate": 9.732320758237794e-06, | |
| "loss": 1.3735, | |
| "step": 854 | |
| }, | |
| { | |
| "epoch": 0.13187067419846152, | |
| "grad_norm": 0.41289564967155457, | |
| "learning_rate": 9.731513891640613e-06, | |
| "loss": 1.3828, | |
| "step": 855 | |
| }, | |
| { | |
| "epoch": 0.1320249089051264, | |
| "grad_norm": 0.4423099458217621, | |
| "learning_rate": 9.730705844351309e-06, | |
| "loss": 1.3681, | |
| "step": 856 | |
| }, | |
| { | |
| "epoch": 0.13217914361179126, | |
| "grad_norm": 0.4533740282058716, | |
| "learning_rate": 9.729896616571519e-06, | |
| "loss": 1.367, | |
| "step": 857 | |
| }, | |
| { | |
| "epoch": 0.13233337831845612, | |
| "grad_norm": 0.46049922704696655, | |
| "learning_rate": 9.729086208503174e-06, | |
| "loss": 1.3652, | |
| "step": 858 | |
| }, | |
| { | |
| "epoch": 0.13248761302512096, | |
| "grad_norm": 0.46381819248199463, | |
| "learning_rate": 9.728274620348504e-06, | |
| "loss": 1.3368, | |
| "step": 859 | |
| }, | |
| { | |
| "epoch": 0.13264184773178583, | |
| "grad_norm": 0.4899432957172394, | |
| "learning_rate": 9.727461852310032e-06, | |
| "loss": 1.3698, | |
| "step": 860 | |
| }, | |
| { | |
| "epoch": 0.1327960824384507, | |
| "grad_norm": 0.3975609242916107, | |
| "learning_rate": 9.726647904590572e-06, | |
| "loss": 1.3373, | |
| "step": 861 | |
| }, | |
| { | |
| "epoch": 0.13295031714511557, | |
| "grad_norm": 0.42786306142807007, | |
| "learning_rate": 9.725832777393234e-06, | |
| "loss": 1.3073, | |
| "step": 862 | |
| }, | |
| { | |
| "epoch": 0.13310455185178044, | |
| "grad_norm": 0.4436379373073578, | |
| "learning_rate": 9.725016470921426e-06, | |
| "loss": 1.3573, | |
| "step": 863 | |
| }, | |
| { | |
| "epoch": 0.1332587865584453, | |
| "grad_norm": 0.45234617590904236, | |
| "learning_rate": 9.724198985378847e-06, | |
| "loss": 1.3429, | |
| "step": 864 | |
| }, | |
| { | |
| "epoch": 0.13341302126511018, | |
| "grad_norm": 0.41882964968681335, | |
| "learning_rate": 9.72338032096949e-06, | |
| "loss": 1.3558, | |
| "step": 865 | |
| }, | |
| { | |
| "epoch": 0.13356725597177505, | |
| "grad_norm": 0.48642703890800476, | |
| "learning_rate": 9.722560477897642e-06, | |
| "loss": 1.3553, | |
| "step": 866 | |
| }, | |
| { | |
| "epoch": 0.1337214906784399, | |
| "grad_norm": 0.4716489017009735, | |
| "learning_rate": 9.721739456367886e-06, | |
| "loss": 1.3769, | |
| "step": 867 | |
| }, | |
| { | |
| "epoch": 0.13387572538510478, | |
| "grad_norm": 0.4115413427352905, | |
| "learning_rate": 9.720917256585098e-06, | |
| "loss": 1.3463, | |
| "step": 868 | |
| }, | |
| { | |
| "epoch": 0.13402996009176965, | |
| "grad_norm": 0.47043079137802124, | |
| "learning_rate": 9.72009387875445e-06, | |
| "loss": 1.3902, | |
| "step": 869 | |
| }, | |
| { | |
| "epoch": 0.13418419479843452, | |
| "grad_norm": 0.4174489974975586, | |
| "learning_rate": 9.719269323081399e-06, | |
| "loss": 1.374, | |
| "step": 870 | |
| }, | |
| { | |
| "epoch": 0.1343384295050994, | |
| "grad_norm": 0.40322747826576233, | |
| "learning_rate": 9.71844358977171e-06, | |
| "loss": 1.3286, | |
| "step": 871 | |
| }, | |
| { | |
| "epoch": 0.13449266421176426, | |
| "grad_norm": 0.4627361595630646, | |
| "learning_rate": 9.717616679031432e-06, | |
| "loss": 1.3526, | |
| "step": 872 | |
| }, | |
| { | |
| "epoch": 0.13464689891842913, | |
| "grad_norm": 0.42119118571281433, | |
| "learning_rate": 9.716788591066911e-06, | |
| "loss": 1.3801, | |
| "step": 873 | |
| }, | |
| { | |
| "epoch": 0.134801133625094, | |
| "grad_norm": 0.44734987616539, | |
| "learning_rate": 9.715959326084787e-06, | |
| "loss": 1.3503, | |
| "step": 874 | |
| }, | |
| { | |
| "epoch": 0.13495536833175886, | |
| "grad_norm": 0.425028532743454, | |
| "learning_rate": 9.715128884291991e-06, | |
| "loss": 1.3584, | |
| "step": 875 | |
| }, | |
| { | |
| "epoch": 0.13510960303842373, | |
| "grad_norm": 0.4234400689601898, | |
| "learning_rate": 9.714297265895754e-06, | |
| "loss": 1.3766, | |
| "step": 876 | |
| }, | |
| { | |
| "epoch": 0.1352638377450886, | |
| "grad_norm": 0.41229814291000366, | |
| "learning_rate": 9.71346447110359e-06, | |
| "loss": 1.3476, | |
| "step": 877 | |
| }, | |
| { | |
| "epoch": 0.13541807245175347, | |
| "grad_norm": 0.4352407455444336, | |
| "learning_rate": 9.712630500123317e-06, | |
| "loss": 1.3671, | |
| "step": 878 | |
| }, | |
| { | |
| "epoch": 0.1355723071584183, | |
| "grad_norm": 0.44079896807670593, | |
| "learning_rate": 9.711795353163042e-06, | |
| "loss": 1.4354, | |
| "step": 879 | |
| }, | |
| { | |
| "epoch": 0.13572654186508318, | |
| "grad_norm": 0.41312628984451294, | |
| "learning_rate": 9.710959030431167e-06, | |
| "loss": 1.3838, | |
| "step": 880 | |
| }, | |
| { | |
| "epoch": 0.13588077657174805, | |
| "grad_norm": 0.44633620977401733, | |
| "learning_rate": 9.710121532136383e-06, | |
| "loss": 1.3159, | |
| "step": 881 | |
| }, | |
| { | |
| "epoch": 0.13603501127841292, | |
| "grad_norm": 0.43944814801216125, | |
| "learning_rate": 9.709282858487682e-06, | |
| "loss": 1.374, | |
| "step": 882 | |
| }, | |
| { | |
| "epoch": 0.13618924598507778, | |
| "grad_norm": 0.4563775062561035, | |
| "learning_rate": 9.708443009694339e-06, | |
| "loss": 1.3817, | |
| "step": 883 | |
| }, | |
| { | |
| "epoch": 0.13634348069174265, | |
| "grad_norm": 0.4323616921901703, | |
| "learning_rate": 9.707601985965932e-06, | |
| "loss": 1.3369, | |
| "step": 884 | |
| }, | |
| { | |
| "epoch": 0.13649771539840752, | |
| "grad_norm": 0.45572516322135925, | |
| "learning_rate": 9.706759787512328e-06, | |
| "loss": 1.392, | |
| "step": 885 | |
| }, | |
| { | |
| "epoch": 0.1366519501050724, | |
| "grad_norm": 0.41872602701187134, | |
| "learning_rate": 9.705916414543688e-06, | |
| "loss": 1.3704, | |
| "step": 886 | |
| }, | |
| { | |
| "epoch": 0.13680618481173726, | |
| "grad_norm": 0.42820021510124207, | |
| "learning_rate": 9.705071867270464e-06, | |
| "loss": 1.3701, | |
| "step": 887 | |
| }, | |
| { | |
| "epoch": 0.13696041951840213, | |
| "grad_norm": 0.4520045220851898, | |
| "learning_rate": 9.704226145903401e-06, | |
| "loss": 1.3537, | |
| "step": 888 | |
| }, | |
| { | |
| "epoch": 0.137114654225067, | |
| "grad_norm": 0.42270752787590027, | |
| "learning_rate": 9.703379250653544e-06, | |
| "loss": 1.3596, | |
| "step": 889 | |
| }, | |
| { | |
| "epoch": 0.13726888893173186, | |
| "grad_norm": 0.4259921610355377, | |
| "learning_rate": 9.70253118173222e-06, | |
| "loss": 1.3489, | |
| "step": 890 | |
| }, | |
| { | |
| "epoch": 0.13742312363839673, | |
| "grad_norm": 0.48967087268829346, | |
| "learning_rate": 9.701681939351058e-06, | |
| "loss": 1.3759, | |
| "step": 891 | |
| }, | |
| { | |
| "epoch": 0.1375773583450616, | |
| "grad_norm": 0.43577277660369873, | |
| "learning_rate": 9.700831523721974e-06, | |
| "loss": 1.3622, | |
| "step": 892 | |
| }, | |
| { | |
| "epoch": 0.13773159305172647, | |
| "grad_norm": 0.4252191483974457, | |
| "learning_rate": 9.69997993505718e-06, | |
| "loss": 1.3046, | |
| "step": 893 | |
| }, | |
| { | |
| "epoch": 0.13788582775839134, | |
| "grad_norm": 0.48695123195648193, | |
| "learning_rate": 9.69912717356918e-06, | |
| "loss": 1.3962, | |
| "step": 894 | |
| }, | |
| { | |
| "epoch": 0.1380400624650562, | |
| "grad_norm": 0.43830999732017517, | |
| "learning_rate": 9.698273239470767e-06, | |
| "loss": 1.3528, | |
| "step": 895 | |
| }, | |
| { | |
| "epoch": 0.13819429717172108, | |
| "grad_norm": 0.4534652829170227, | |
| "learning_rate": 9.697418132975036e-06, | |
| "loss": 1.3556, | |
| "step": 896 | |
| }, | |
| { | |
| "epoch": 0.13834853187838594, | |
| "grad_norm": 0.4268292188644409, | |
| "learning_rate": 9.696561854295363e-06, | |
| "loss": 1.3599, | |
| "step": 897 | |
| }, | |
| { | |
| "epoch": 0.1385027665850508, | |
| "grad_norm": 0.45288994908332825, | |
| "learning_rate": 9.695704403645425e-06, | |
| "loss": 1.3747, | |
| "step": 898 | |
| }, | |
| { | |
| "epoch": 0.13865700129171568, | |
| "grad_norm": 0.39907610416412354, | |
| "learning_rate": 9.694845781239188e-06, | |
| "loss": 1.3374, | |
| "step": 899 | |
| }, | |
| { | |
| "epoch": 0.13881123599838052, | |
| "grad_norm": 0.43562206625938416, | |
| "learning_rate": 9.69398598729091e-06, | |
| "loss": 1.3245, | |
| "step": 900 | |
| }, | |
| { | |
| "epoch": 0.1389654707050454, | |
| "grad_norm": 0.45644381642341614, | |
| "learning_rate": 9.693125022015142e-06, | |
| "loss": 1.3876, | |
| "step": 901 | |
| }, | |
| { | |
| "epoch": 0.13911970541171026, | |
| "grad_norm": 0.43593016266822815, | |
| "learning_rate": 9.692262885626728e-06, | |
| "loss": 1.3541, | |
| "step": 902 | |
| }, | |
| { | |
| "epoch": 0.13927394011837513, | |
| "grad_norm": 0.45160678029060364, | |
| "learning_rate": 9.691399578340804e-06, | |
| "loss": 1.3582, | |
| "step": 903 | |
| }, | |
| { | |
| "epoch": 0.13942817482504, | |
| "grad_norm": 0.4698358178138733, | |
| "learning_rate": 9.690535100372798e-06, | |
| "loss": 1.3571, | |
| "step": 904 | |
| }, | |
| { | |
| "epoch": 0.13958240953170487, | |
| "grad_norm": 0.44391974806785583, | |
| "learning_rate": 9.689669451938429e-06, | |
| "loss": 1.3267, | |
| "step": 905 | |
| }, | |
| { | |
| "epoch": 0.13973664423836973, | |
| "grad_norm": 0.40989452600479126, | |
| "learning_rate": 9.68880263325371e-06, | |
| "loss": 1.3324, | |
| "step": 906 | |
| }, | |
| { | |
| "epoch": 0.1398908789450346, | |
| "grad_norm": 0.43320152163505554, | |
| "learning_rate": 9.687934644534946e-06, | |
| "loss": 1.3727, | |
| "step": 907 | |
| }, | |
| { | |
| "epoch": 0.14004511365169947, | |
| "grad_norm": 0.4489857852458954, | |
| "learning_rate": 9.68706548599873e-06, | |
| "loss": 1.3658, | |
| "step": 908 | |
| }, | |
| { | |
| "epoch": 0.14019934835836434, | |
| "grad_norm": 0.426380455493927, | |
| "learning_rate": 9.686195157861954e-06, | |
| "loss": 1.3786, | |
| "step": 909 | |
| }, | |
| { | |
| "epoch": 0.1403535830650292, | |
| "grad_norm": 0.46585339307785034, | |
| "learning_rate": 9.685323660341795e-06, | |
| "loss": 1.3756, | |
| "step": 910 | |
| }, | |
| { | |
| "epoch": 0.14050781777169408, | |
| "grad_norm": 0.41312724351882935, | |
| "learning_rate": 9.684450993655726e-06, | |
| "loss": 1.3106, | |
| "step": 911 | |
| }, | |
| { | |
| "epoch": 0.14066205247835895, | |
| "grad_norm": 0.41777506470680237, | |
| "learning_rate": 9.68357715802151e-06, | |
| "loss": 1.3505, | |
| "step": 912 | |
| }, | |
| { | |
| "epoch": 0.14081628718502381, | |
| "grad_norm": 0.45272010564804077, | |
| "learning_rate": 9.682702153657201e-06, | |
| "loss": 1.3374, | |
| "step": 913 | |
| }, | |
| { | |
| "epoch": 0.14097052189168868, | |
| "grad_norm": 0.44729048013687134, | |
| "learning_rate": 9.68182598078115e-06, | |
| "loss": 1.3296, | |
| "step": 914 | |
| }, | |
| { | |
| "epoch": 0.14112475659835355, | |
| "grad_norm": 0.4224020540714264, | |
| "learning_rate": 9.680948639611989e-06, | |
| "loss": 1.3445, | |
| "step": 915 | |
| }, | |
| { | |
| "epoch": 0.14127899130501842, | |
| "grad_norm": 0.47426870465278625, | |
| "learning_rate": 9.680070130368652e-06, | |
| "loss": 1.3767, | |
| "step": 916 | |
| }, | |
| { | |
| "epoch": 0.1414332260116833, | |
| "grad_norm": 0.39650842547416687, | |
| "learning_rate": 9.679190453270362e-06, | |
| "loss": 1.369, | |
| "step": 917 | |
| }, | |
| { | |
| "epoch": 0.14158746071834816, | |
| "grad_norm": 0.44931554794311523, | |
| "learning_rate": 9.678309608536626e-06, | |
| "loss": 1.3383, | |
| "step": 918 | |
| }, | |
| { | |
| "epoch": 0.14174169542501303, | |
| "grad_norm": 0.45185673236846924, | |
| "learning_rate": 9.677427596387254e-06, | |
| "loss": 1.3198, | |
| "step": 919 | |
| }, | |
| { | |
| "epoch": 0.1418959301316779, | |
| "grad_norm": 0.4276964068412781, | |
| "learning_rate": 9.676544417042337e-06, | |
| "loss": 1.3496, | |
| "step": 920 | |
| }, | |
| { | |
| "epoch": 0.14205016483834274, | |
| "grad_norm": 0.4189877212047577, | |
| "learning_rate": 9.675660070722266e-06, | |
| "loss": 1.3889, | |
| "step": 921 | |
| }, | |
| { | |
| "epoch": 0.1422043995450076, | |
| "grad_norm": 0.47979092597961426, | |
| "learning_rate": 9.674774557647715e-06, | |
| "loss": 1.352, | |
| "step": 922 | |
| }, | |
| { | |
| "epoch": 0.14235863425167247, | |
| "grad_norm": 0.4569590091705322, | |
| "learning_rate": 9.673887878039654e-06, | |
| "loss": 1.4002, | |
| "step": 923 | |
| }, | |
| { | |
| "epoch": 0.14251286895833734, | |
| "grad_norm": 0.43906304240226746, | |
| "learning_rate": 9.673000032119346e-06, | |
| "loss": 1.3192, | |
| "step": 924 | |
| }, | |
| { | |
| "epoch": 0.1426671036650022, | |
| "grad_norm": 0.4342762231826782, | |
| "learning_rate": 9.672111020108339e-06, | |
| "loss": 1.3421, | |
| "step": 925 | |
| }, | |
| { | |
| "epoch": 0.14282133837166708, | |
| "grad_norm": 0.4287731647491455, | |
| "learning_rate": 9.671220842228476e-06, | |
| "loss": 1.38, | |
| "step": 926 | |
| }, | |
| { | |
| "epoch": 0.14297557307833195, | |
| "grad_norm": 0.44181495904922485, | |
| "learning_rate": 9.670329498701892e-06, | |
| "loss": 1.3506, | |
| "step": 927 | |
| }, | |
| { | |
| "epoch": 0.14312980778499682, | |
| "grad_norm": 0.4163863956928253, | |
| "learning_rate": 9.669436989751007e-06, | |
| "loss": 1.3436, | |
| "step": 928 | |
| }, | |
| { | |
| "epoch": 0.14328404249166168, | |
| "grad_norm": 0.46653324365615845, | |
| "learning_rate": 9.66854331559854e-06, | |
| "loss": 1.3784, | |
| "step": 929 | |
| }, | |
| { | |
| "epoch": 0.14343827719832655, | |
| "grad_norm": 0.4513910710811615, | |
| "learning_rate": 9.667648476467495e-06, | |
| "loss": 1.3189, | |
| "step": 930 | |
| }, | |
| { | |
| "epoch": 0.14359251190499142, | |
| "grad_norm": 0.39425528049468994, | |
| "learning_rate": 9.666752472581166e-06, | |
| "loss": 1.318, | |
| "step": 931 | |
| }, | |
| { | |
| "epoch": 0.1437467466116563, | |
| "grad_norm": 0.4691428542137146, | |
| "learning_rate": 9.665855304163145e-06, | |
| "loss": 1.3478, | |
| "step": 932 | |
| }, | |
| { | |
| "epoch": 0.14390098131832116, | |
| "grad_norm": 0.4166172444820404, | |
| "learning_rate": 9.664956971437306e-06, | |
| "loss": 1.3009, | |
| "step": 933 | |
| }, | |
| { | |
| "epoch": 0.14405521602498603, | |
| "grad_norm": 0.4548834264278412, | |
| "learning_rate": 9.664057474627817e-06, | |
| "loss": 1.3604, | |
| "step": 934 | |
| }, | |
| { | |
| "epoch": 0.1442094507316509, | |
| "grad_norm": 0.4332239031791687, | |
| "learning_rate": 9.663156813959139e-06, | |
| "loss": 1.3781, | |
| "step": 935 | |
| }, | |
| { | |
| "epoch": 0.14436368543831576, | |
| "grad_norm": 0.4403667449951172, | |
| "learning_rate": 9.662254989656018e-06, | |
| "loss": 1.3606, | |
| "step": 936 | |
| }, | |
| { | |
| "epoch": 0.14451792014498063, | |
| "grad_norm": 0.40217745304107666, | |
| "learning_rate": 9.661352001943494e-06, | |
| "loss": 1.3691, | |
| "step": 937 | |
| }, | |
| { | |
| "epoch": 0.1446721548516455, | |
| "grad_norm": 0.44711363315582275, | |
| "learning_rate": 9.660447851046898e-06, | |
| "loss": 1.3521, | |
| "step": 938 | |
| }, | |
| { | |
| "epoch": 0.14482638955831037, | |
| "grad_norm": 0.4292323589324951, | |
| "learning_rate": 9.659542537191849e-06, | |
| "loss": 1.3577, | |
| "step": 939 | |
| }, | |
| { | |
| "epoch": 0.14498062426497524, | |
| "grad_norm": 0.4293443560600281, | |
| "learning_rate": 9.658636060604258e-06, | |
| "loss": 1.3742, | |
| "step": 940 | |
| }, | |
| { | |
| "epoch": 0.1451348589716401, | |
| "grad_norm": 0.5029200315475464, | |
| "learning_rate": 9.657728421510324e-06, | |
| "loss": 1.3586, | |
| "step": 941 | |
| }, | |
| { | |
| "epoch": 0.14528909367830495, | |
| "grad_norm": 0.444880872964859, | |
| "learning_rate": 9.656819620136538e-06, | |
| "loss": 1.3642, | |
| "step": 942 | |
| }, | |
| { | |
| "epoch": 0.14544332838496982, | |
| "grad_norm": 0.4514634311199188, | |
| "learning_rate": 9.65590965670968e-06, | |
| "loss": 1.3464, | |
| "step": 943 | |
| }, | |
| { | |
| "epoch": 0.14559756309163469, | |
| "grad_norm": 0.43868181109428406, | |
| "learning_rate": 9.654998531456822e-06, | |
| "loss": 1.3661, | |
| "step": 944 | |
| }, | |
| { | |
| "epoch": 0.14575179779829955, | |
| "grad_norm": 0.4196695387363434, | |
| "learning_rate": 9.654086244605324e-06, | |
| "loss": 1.349, | |
| "step": 945 | |
| }, | |
| { | |
| "epoch": 0.14590603250496442, | |
| "grad_norm": 0.419352650642395, | |
| "learning_rate": 9.653172796382834e-06, | |
| "loss": 1.3434, | |
| "step": 946 | |
| }, | |
| { | |
| "epoch": 0.1460602672116293, | |
| "grad_norm": 0.45958200097084045, | |
| "learning_rate": 9.652258187017293e-06, | |
| "loss": 1.341, | |
| "step": 947 | |
| }, | |
| { | |
| "epoch": 0.14621450191829416, | |
| "grad_norm": 0.40580084919929504, | |
| "learning_rate": 9.651342416736932e-06, | |
| "loss": 1.3546, | |
| "step": 948 | |
| }, | |
| { | |
| "epoch": 0.14636873662495903, | |
| "grad_norm": 0.418164998292923, | |
| "learning_rate": 9.650425485770268e-06, | |
| "loss": 1.3513, | |
| "step": 949 | |
| }, | |
| { | |
| "epoch": 0.1465229713316239, | |
| "grad_norm": 0.43666157126426697, | |
| "learning_rate": 9.649507394346113e-06, | |
| "loss": 1.3664, | |
| "step": 950 | |
| }, | |
| { | |
| "epoch": 0.14667720603828877, | |
| "grad_norm": 0.4278264343738556, | |
| "learning_rate": 9.648588142693562e-06, | |
| "loss": 1.3672, | |
| "step": 951 | |
| }, | |
| { | |
| "epoch": 0.14683144074495363, | |
| "grad_norm": 0.4103301465511322, | |
| "learning_rate": 9.647667731042008e-06, | |
| "loss": 1.3623, | |
| "step": 952 | |
| }, | |
| { | |
| "epoch": 0.1469856754516185, | |
| "grad_norm": 0.41230452060699463, | |
| "learning_rate": 9.646746159621124e-06, | |
| "loss": 1.3589, | |
| "step": 953 | |
| }, | |
| { | |
| "epoch": 0.14713991015828337, | |
| "grad_norm": 0.4213346838951111, | |
| "learning_rate": 9.645823428660879e-06, | |
| "loss": 1.3741, | |
| "step": 954 | |
| }, | |
| { | |
| "epoch": 0.14729414486494824, | |
| "grad_norm": 0.3737544119358063, | |
| "learning_rate": 9.64489953839153e-06, | |
| "loss": 1.3636, | |
| "step": 955 | |
| }, | |
| { | |
| "epoch": 0.1474483795716131, | |
| "grad_norm": 0.4100255072116852, | |
| "learning_rate": 9.64397448904362e-06, | |
| "loss": 1.3759, | |
| "step": 956 | |
| }, | |
| { | |
| "epoch": 0.14760261427827798, | |
| "grad_norm": 0.4222797751426697, | |
| "learning_rate": 9.643048280847987e-06, | |
| "loss": 1.3542, | |
| "step": 957 | |
| }, | |
| { | |
| "epoch": 0.14775684898494285, | |
| "grad_norm": 0.41794565320014954, | |
| "learning_rate": 9.642120914035752e-06, | |
| "loss": 1.3968, | |
| "step": 958 | |
| }, | |
| { | |
| "epoch": 0.14791108369160771, | |
| "grad_norm": 0.4112548232078552, | |
| "learning_rate": 9.641192388838333e-06, | |
| "loss": 1.3563, | |
| "step": 959 | |
| }, | |
| { | |
| "epoch": 0.14806531839827258, | |
| "grad_norm": 0.4756266176700592, | |
| "learning_rate": 9.640262705487429e-06, | |
| "loss": 1.3644, | |
| "step": 960 | |
| }, | |
| { | |
| "epoch": 0.14821955310493745, | |
| "grad_norm": 0.421749085187912, | |
| "learning_rate": 9.639331864215028e-06, | |
| "loss": 1.3707, | |
| "step": 961 | |
| }, | |
| { | |
| "epoch": 0.1483737878116023, | |
| "grad_norm": 0.43855324387550354, | |
| "learning_rate": 9.638399865253417e-06, | |
| "loss": 1.3365, | |
| "step": 962 | |
| }, | |
| { | |
| "epoch": 0.14852802251826716, | |
| "grad_norm": 0.45007380843162537, | |
| "learning_rate": 9.637466708835162e-06, | |
| "loss": 1.3759, | |
| "step": 963 | |
| }, | |
| { | |
| "epoch": 0.14868225722493203, | |
| "grad_norm": 0.38929274678230286, | |
| "learning_rate": 9.636532395193119e-06, | |
| "loss": 1.3174, | |
| "step": 964 | |
| }, | |
| { | |
| "epoch": 0.1488364919315969, | |
| "grad_norm": 0.4295295476913452, | |
| "learning_rate": 9.635596924560435e-06, | |
| "loss": 1.3696, | |
| "step": 965 | |
| }, | |
| { | |
| "epoch": 0.14899072663826177, | |
| "grad_norm": 0.46214181184768677, | |
| "learning_rate": 9.634660297170549e-06, | |
| "loss": 1.3849, | |
| "step": 966 | |
| }, | |
| { | |
| "epoch": 0.14914496134492664, | |
| "grad_norm": 0.4029429852962494, | |
| "learning_rate": 9.633722513257183e-06, | |
| "loss": 1.3407, | |
| "step": 967 | |
| }, | |
| { | |
| "epoch": 0.1492991960515915, | |
| "grad_norm": 0.445356547832489, | |
| "learning_rate": 9.632783573054347e-06, | |
| "loss": 1.3544, | |
| "step": 968 | |
| }, | |
| { | |
| "epoch": 0.14945343075825637, | |
| "grad_norm": 0.49758583307266235, | |
| "learning_rate": 9.631843476796348e-06, | |
| "loss": 1.3534, | |
| "step": 969 | |
| }, | |
| { | |
| "epoch": 0.14960766546492124, | |
| "grad_norm": 0.40837979316711426, | |
| "learning_rate": 9.63090222471777e-06, | |
| "loss": 1.3528, | |
| "step": 970 | |
| }, | |
| { | |
| "epoch": 0.1497619001715861, | |
| "grad_norm": 0.42469340562820435, | |
| "learning_rate": 9.629959817053494e-06, | |
| "loss": 1.3276, | |
| "step": 971 | |
| }, | |
| { | |
| "epoch": 0.14991613487825098, | |
| "grad_norm": 0.44042232632637024, | |
| "learning_rate": 9.629016254038685e-06, | |
| "loss": 1.3828, | |
| "step": 972 | |
| }, | |
| { | |
| "epoch": 0.15007036958491585, | |
| "grad_norm": 0.3965208828449249, | |
| "learning_rate": 9.628071535908799e-06, | |
| "loss": 1.3825, | |
| "step": 973 | |
| }, | |
| { | |
| "epoch": 0.15022460429158072, | |
| "grad_norm": 0.420837938785553, | |
| "learning_rate": 9.627125662899578e-06, | |
| "loss": 1.3114, | |
| "step": 974 | |
| }, | |
| { | |
| "epoch": 0.15037883899824558, | |
| "grad_norm": 0.40281036496162415, | |
| "learning_rate": 9.626178635247054e-06, | |
| "loss": 1.3175, | |
| "step": 975 | |
| }, | |
| { | |
| "epoch": 0.15053307370491045, | |
| "grad_norm": 0.4451516270637512, | |
| "learning_rate": 9.625230453187548e-06, | |
| "loss": 1.3864, | |
| "step": 976 | |
| }, | |
| { | |
| "epoch": 0.15068730841157532, | |
| "grad_norm": 0.3975658714771271, | |
| "learning_rate": 9.624281116957664e-06, | |
| "loss": 1.3553, | |
| "step": 977 | |
| }, | |
| { | |
| "epoch": 0.1508415431182402, | |
| "grad_norm": 0.41001689434051514, | |
| "learning_rate": 9.623330626794298e-06, | |
| "loss": 1.3426, | |
| "step": 978 | |
| }, | |
| { | |
| "epoch": 0.15099577782490506, | |
| "grad_norm": 0.44088777899742126, | |
| "learning_rate": 9.622378982934635e-06, | |
| "loss": 1.3996, | |
| "step": 979 | |
| }, | |
| { | |
| "epoch": 0.15115001253156993, | |
| "grad_norm": 0.4407082796096802, | |
| "learning_rate": 9.621426185616146e-06, | |
| "loss": 1.3617, | |
| "step": 980 | |
| }, | |
| { | |
| "epoch": 0.1513042472382348, | |
| "grad_norm": 0.41040855646133423, | |
| "learning_rate": 9.620472235076587e-06, | |
| "loss": 1.3487, | |
| "step": 981 | |
| }, | |
| { | |
| "epoch": 0.15145848194489966, | |
| "grad_norm": 0.4353833496570587, | |
| "learning_rate": 9.619517131554008e-06, | |
| "loss": 1.3804, | |
| "step": 982 | |
| }, | |
| { | |
| "epoch": 0.1516127166515645, | |
| "grad_norm": 0.4234057366847992, | |
| "learning_rate": 9.618560875286745e-06, | |
| "loss": 1.3538, | |
| "step": 983 | |
| }, | |
| { | |
| "epoch": 0.15176695135822937, | |
| "grad_norm": 0.428462415933609, | |
| "learning_rate": 9.617603466513415e-06, | |
| "loss": 1.3589, | |
| "step": 984 | |
| }, | |
| { | |
| "epoch": 0.15192118606489424, | |
| "grad_norm": 0.45914000272750854, | |
| "learning_rate": 9.616644905472932e-06, | |
| "loss": 1.3518, | |
| "step": 985 | |
| }, | |
| { | |
| "epoch": 0.1520754207715591, | |
| "grad_norm": 0.4305824935436249, | |
| "learning_rate": 9.615685192404494e-06, | |
| "loss": 1.3552, | |
| "step": 986 | |
| }, | |
| { | |
| "epoch": 0.15222965547822398, | |
| "grad_norm": 0.44539913535118103, | |
| "learning_rate": 9.614724327547582e-06, | |
| "loss": 1.3502, | |
| "step": 987 | |
| }, | |
| { | |
| "epoch": 0.15238389018488885, | |
| "grad_norm": 0.4699813723564148, | |
| "learning_rate": 9.61376231114197e-06, | |
| "loss": 1.3621, | |
| "step": 988 | |
| }, | |
| { | |
| "epoch": 0.15253812489155372, | |
| "grad_norm": 0.3810335397720337, | |
| "learning_rate": 9.612799143427717e-06, | |
| "loss": 1.3402, | |
| "step": 989 | |
| }, | |
| { | |
| "epoch": 0.15269235959821859, | |
| "grad_norm": 0.4232047200202942, | |
| "learning_rate": 9.611834824645172e-06, | |
| "loss": 1.3369, | |
| "step": 990 | |
| }, | |
| { | |
| "epoch": 0.15284659430488345, | |
| "grad_norm": 0.4489132761955261, | |
| "learning_rate": 9.610869355034968e-06, | |
| "loss": 1.3648, | |
| "step": 991 | |
| }, | |
| { | |
| "epoch": 0.15300082901154832, | |
| "grad_norm": 0.4090892970561981, | |
| "learning_rate": 9.609902734838024e-06, | |
| "loss": 1.3336, | |
| "step": 992 | |
| }, | |
| { | |
| "epoch": 0.1531550637182132, | |
| "grad_norm": 0.41862618923187256, | |
| "learning_rate": 9.608934964295552e-06, | |
| "loss": 1.3361, | |
| "step": 993 | |
| }, | |
| { | |
| "epoch": 0.15330929842487806, | |
| "grad_norm": 0.45245254039764404, | |
| "learning_rate": 9.607966043649047e-06, | |
| "loss": 1.3474, | |
| "step": 994 | |
| }, | |
| { | |
| "epoch": 0.15346353313154293, | |
| "grad_norm": 0.4146495759487152, | |
| "learning_rate": 9.606995973140287e-06, | |
| "loss": 1.3285, | |
| "step": 995 | |
| }, | |
| { | |
| "epoch": 0.1536177678382078, | |
| "grad_norm": 0.4099392890930176, | |
| "learning_rate": 9.606024753011346e-06, | |
| "loss": 1.3385, | |
| "step": 996 | |
| }, | |
| { | |
| "epoch": 0.15377200254487267, | |
| "grad_norm": 0.46234220266342163, | |
| "learning_rate": 9.605052383504578e-06, | |
| "loss": 1.3286, | |
| "step": 997 | |
| }, | |
| { | |
| "epoch": 0.15392623725153753, | |
| "grad_norm": 0.3993547558784485, | |
| "learning_rate": 9.60407886486263e-06, | |
| "loss": 1.33, | |
| "step": 998 | |
| }, | |
| { | |
| "epoch": 0.1540804719582024, | |
| "grad_norm": 0.43349605798721313, | |
| "learning_rate": 9.603104197328426e-06, | |
| "loss": 1.3501, | |
| "step": 999 | |
| }, | |
| { | |
| "epoch": 0.15423470666486727, | |
| "grad_norm": 0.4560033977031708, | |
| "learning_rate": 9.602128381145186e-06, | |
| "loss": 1.3747, | |
| "step": 1000 | |
| }, | |
| { | |
| "epoch": 0.15438894137153214, | |
| "grad_norm": 0.43051019310951233, | |
| "learning_rate": 9.60115141655641e-06, | |
| "loss": 1.3829, | |
| "step": 1001 | |
| }, | |
| { | |
| "epoch": 0.154543176078197, | |
| "grad_norm": 0.40189671516418457, | |
| "learning_rate": 9.600173303805893e-06, | |
| "loss": 1.347, | |
| "step": 1002 | |
| }, | |
| { | |
| "epoch": 0.15469741078486188, | |
| "grad_norm": 0.4193477928638458, | |
| "learning_rate": 9.599194043137707e-06, | |
| "loss": 1.3523, | |
| "step": 1003 | |
| }, | |
| { | |
| "epoch": 0.15485164549152672, | |
| "grad_norm": 0.44596585631370544, | |
| "learning_rate": 9.598213634796218e-06, | |
| "loss": 1.3744, | |
| "step": 1004 | |
| }, | |
| { | |
| "epoch": 0.1550058801981916, | |
| "grad_norm": 0.4346173405647278, | |
| "learning_rate": 9.597232079026071e-06, | |
| "loss": 1.3562, | |
| "step": 1005 | |
| }, | |
| { | |
| "epoch": 0.15516011490485646, | |
| "grad_norm": 0.40672799944877625, | |
| "learning_rate": 9.596249376072204e-06, | |
| "loss": 1.383, | |
| "step": 1006 | |
| }, | |
| { | |
| "epoch": 0.15531434961152132, | |
| "grad_norm": 0.4428286850452423, | |
| "learning_rate": 9.595265526179839e-06, | |
| "loss": 1.3863, | |
| "step": 1007 | |
| }, | |
| { | |
| "epoch": 0.1554685843181862, | |
| "grad_norm": 0.4054853320121765, | |
| "learning_rate": 9.59428052959448e-06, | |
| "loss": 1.3395, | |
| "step": 1008 | |
| }, | |
| { | |
| "epoch": 0.15562281902485106, | |
| "grad_norm": 0.4263022541999817, | |
| "learning_rate": 9.593294386561926e-06, | |
| "loss": 1.3331, | |
| "step": 1009 | |
| }, | |
| { | |
| "epoch": 0.15577705373151593, | |
| "grad_norm": 0.4388751983642578, | |
| "learning_rate": 9.592307097328255e-06, | |
| "loss": 1.393, | |
| "step": 1010 | |
| }, | |
| { | |
| "epoch": 0.1559312884381808, | |
| "grad_norm": 0.4239785671234131, | |
| "learning_rate": 9.591318662139833e-06, | |
| "loss": 1.3521, | |
| "step": 1011 | |
| }, | |
| { | |
| "epoch": 0.15608552314484567, | |
| "grad_norm": 0.4259006381034851, | |
| "learning_rate": 9.59032908124331e-06, | |
| "loss": 1.369, | |
| "step": 1012 | |
| }, | |
| { | |
| "epoch": 0.15623975785151054, | |
| "grad_norm": 0.47064313292503357, | |
| "learning_rate": 9.58933835488563e-06, | |
| "loss": 1.3671, | |
| "step": 1013 | |
| }, | |
| { | |
| "epoch": 0.1563939925581754, | |
| "grad_norm": 0.4133605659008026, | |
| "learning_rate": 9.58834648331401e-06, | |
| "loss": 1.3291, | |
| "step": 1014 | |
| }, | |
| { | |
| "epoch": 0.15654822726484027, | |
| "grad_norm": 0.38037538528442383, | |
| "learning_rate": 9.587353466775963e-06, | |
| "loss": 1.3178, | |
| "step": 1015 | |
| }, | |
| { | |
| "epoch": 0.15670246197150514, | |
| "grad_norm": 0.44501787424087524, | |
| "learning_rate": 9.586359305519283e-06, | |
| "loss": 1.3593, | |
| "step": 1016 | |
| }, | |
| { | |
| "epoch": 0.15685669667817, | |
| "grad_norm": 0.40758368372917175, | |
| "learning_rate": 9.585363999792053e-06, | |
| "loss": 1.3694, | |
| "step": 1017 | |
| }, | |
| { | |
| "epoch": 0.15701093138483488, | |
| "grad_norm": 0.4244682788848877, | |
| "learning_rate": 9.584367549842638e-06, | |
| "loss": 1.3232, | |
| "step": 1018 | |
| }, | |
| { | |
| "epoch": 0.15716516609149975, | |
| "grad_norm": 0.5118983387947083, | |
| "learning_rate": 9.58336995591969e-06, | |
| "loss": 1.3609, | |
| "step": 1019 | |
| }, | |
| { | |
| "epoch": 0.15731940079816462, | |
| "grad_norm": 0.3951735496520996, | |
| "learning_rate": 9.582371218272146e-06, | |
| "loss": 1.3782, | |
| "step": 1020 | |
| }, | |
| { | |
| "epoch": 0.15747363550482948, | |
| "grad_norm": 0.38878145813941956, | |
| "learning_rate": 9.581371337149232e-06, | |
| "loss": 1.3093, | |
| "step": 1021 | |
| }, | |
| { | |
| "epoch": 0.15762787021149435, | |
| "grad_norm": 0.4443088471889496, | |
| "learning_rate": 9.580370312800452e-06, | |
| "loss": 1.366, | |
| "step": 1022 | |
| }, | |
| { | |
| "epoch": 0.15778210491815922, | |
| "grad_norm": 0.45588213205337524, | |
| "learning_rate": 9.5793681454756e-06, | |
| "loss": 1.3797, | |
| "step": 1023 | |
| }, | |
| { | |
| "epoch": 0.15793633962482406, | |
| "grad_norm": 0.3731274902820587, | |
| "learning_rate": 9.578364835424758e-06, | |
| "loss": 1.3328, | |
| "step": 1024 | |
| }, | |
| { | |
| "epoch": 0.15809057433148893, | |
| "grad_norm": 0.43674901127815247, | |
| "learning_rate": 9.577360382898289e-06, | |
| "loss": 1.3419, | |
| "step": 1025 | |
| }, | |
| { | |
| "epoch": 0.1582448090381538, | |
| "grad_norm": 0.42338722944259644, | |
| "learning_rate": 9.57635478814684e-06, | |
| "loss": 1.3628, | |
| "step": 1026 | |
| }, | |
| { | |
| "epoch": 0.15839904374481867, | |
| "grad_norm": 0.38588276505470276, | |
| "learning_rate": 9.575348051421347e-06, | |
| "loss": 1.3147, | |
| "step": 1027 | |
| }, | |
| { | |
| "epoch": 0.15855327845148354, | |
| "grad_norm": 0.3909562826156616, | |
| "learning_rate": 9.574340172973027e-06, | |
| "loss": 1.3209, | |
| "step": 1028 | |
| }, | |
| { | |
| "epoch": 0.1587075131581484, | |
| "grad_norm": 0.38329264521598816, | |
| "learning_rate": 9.573331153053385e-06, | |
| "loss": 1.336, | |
| "step": 1029 | |
| }, | |
| { | |
| "epoch": 0.15886174786481327, | |
| "grad_norm": 0.400493860244751, | |
| "learning_rate": 9.572320991914209e-06, | |
| "loss": 1.3121, | |
| "step": 1030 | |
| }, | |
| { | |
| "epoch": 0.15901598257147814, | |
| "grad_norm": 0.42534539103507996, | |
| "learning_rate": 9.571309689807575e-06, | |
| "loss": 1.3077, | |
| "step": 1031 | |
| }, | |
| { | |
| "epoch": 0.159170217278143, | |
| "grad_norm": 0.4600400924682617, | |
| "learning_rate": 9.570297246985838e-06, | |
| "loss": 1.3919, | |
| "step": 1032 | |
| }, | |
| { | |
| "epoch": 0.15932445198480788, | |
| "grad_norm": 0.3911516070365906, | |
| "learning_rate": 9.569283663701643e-06, | |
| "loss": 1.3613, | |
| "step": 1033 | |
| }, | |
| { | |
| "epoch": 0.15947868669147275, | |
| "grad_norm": 0.40016788244247437, | |
| "learning_rate": 9.568268940207917e-06, | |
| "loss": 1.3612, | |
| "step": 1034 | |
| }, | |
| { | |
| "epoch": 0.15963292139813762, | |
| "grad_norm": 0.4020020365715027, | |
| "learning_rate": 9.56725307675787e-06, | |
| "loss": 1.328, | |
| "step": 1035 | |
| }, | |
| { | |
| "epoch": 0.15978715610480249, | |
| "grad_norm": 0.3938751220703125, | |
| "learning_rate": 9.566236073605002e-06, | |
| "loss": 1.2962, | |
| "step": 1036 | |
| }, | |
| { | |
| "epoch": 0.15994139081146735, | |
| "grad_norm": 0.4025926887989044, | |
| "learning_rate": 9.565217931003092e-06, | |
| "loss": 1.3177, | |
| "step": 1037 | |
| }, | |
| { | |
| "epoch": 0.16009562551813222, | |
| "grad_norm": 0.4471730589866638, | |
| "learning_rate": 9.564198649206204e-06, | |
| "loss": 1.3639, | |
| "step": 1038 | |
| }, | |
| { | |
| "epoch": 0.1602498602247971, | |
| "grad_norm": 0.4198334217071533, | |
| "learning_rate": 9.563178228468689e-06, | |
| "loss": 1.3248, | |
| "step": 1039 | |
| }, | |
| { | |
| "epoch": 0.16040409493146196, | |
| "grad_norm": 0.433456689119339, | |
| "learning_rate": 9.56215666904518e-06, | |
| "loss": 1.3612, | |
| "step": 1040 | |
| }, | |
| { | |
| "epoch": 0.16055832963812683, | |
| "grad_norm": 0.5922003388404846, | |
| "learning_rate": 9.561133971190597e-06, | |
| "loss": 1.3588, | |
| "step": 1041 | |
| }, | |
| { | |
| "epoch": 0.1607125643447917, | |
| "grad_norm": 0.4096234142780304, | |
| "learning_rate": 9.560110135160139e-06, | |
| "loss": 1.3321, | |
| "step": 1042 | |
| }, | |
| { | |
| "epoch": 0.16086679905145657, | |
| "grad_norm": 0.41673043370246887, | |
| "learning_rate": 9.559085161209293e-06, | |
| "loss": 1.3201, | |
| "step": 1043 | |
| }, | |
| { | |
| "epoch": 0.16102103375812143, | |
| "grad_norm": 0.49432477355003357, | |
| "learning_rate": 9.558059049593829e-06, | |
| "loss": 1.3519, | |
| "step": 1044 | |
| }, | |
| { | |
| "epoch": 0.16117526846478628, | |
| "grad_norm": 0.41908732056617737, | |
| "learning_rate": 9.5570318005698e-06, | |
| "loss": 1.3613, | |
| "step": 1045 | |
| }, | |
| { | |
| "epoch": 0.16132950317145114, | |
| "grad_norm": 0.4030068814754486, | |
| "learning_rate": 9.556003414393544e-06, | |
| "loss": 1.332, | |
| "step": 1046 | |
| }, | |
| { | |
| "epoch": 0.161483737878116, | |
| "grad_norm": 0.45960256457328796, | |
| "learning_rate": 9.554973891321683e-06, | |
| "loss": 1.3313, | |
| "step": 1047 | |
| }, | |
| { | |
| "epoch": 0.16163797258478088, | |
| "grad_norm": 0.44315245747566223, | |
| "learning_rate": 9.553943231611123e-06, | |
| "loss": 1.36, | |
| "step": 1048 | |
| }, | |
| { | |
| "epoch": 0.16179220729144575, | |
| "grad_norm": 0.43170469999313354, | |
| "learning_rate": 9.552911435519049e-06, | |
| "loss": 1.3353, | |
| "step": 1049 | |
| }, | |
| { | |
| "epoch": 0.16194644199811062, | |
| "grad_norm": 0.42984703183174133, | |
| "learning_rate": 9.551878503302936e-06, | |
| "loss": 1.3839, | |
| "step": 1050 | |
| }, | |
| { | |
| "epoch": 0.1621006767047755, | |
| "grad_norm": 0.3931952714920044, | |
| "learning_rate": 9.55084443522054e-06, | |
| "loss": 1.3711, | |
| "step": 1051 | |
| }, | |
| { | |
| "epoch": 0.16225491141144036, | |
| "grad_norm": 0.4282974600791931, | |
| "learning_rate": 9.5498092315299e-06, | |
| "loss": 1.3377, | |
| "step": 1052 | |
| }, | |
| { | |
| "epoch": 0.16240914611810522, | |
| "grad_norm": 0.41101571917533875, | |
| "learning_rate": 9.548772892489335e-06, | |
| "loss": 1.3453, | |
| "step": 1053 | |
| }, | |
| { | |
| "epoch": 0.1625633808247701, | |
| "grad_norm": 0.43313929438591003, | |
| "learning_rate": 9.547735418357458e-06, | |
| "loss": 1.3655, | |
| "step": 1054 | |
| }, | |
| { | |
| "epoch": 0.16271761553143496, | |
| "grad_norm": 0.4140447676181793, | |
| "learning_rate": 9.546696809393151e-06, | |
| "loss": 1.3421, | |
| "step": 1055 | |
| }, | |
| { | |
| "epoch": 0.16287185023809983, | |
| "grad_norm": 0.4221991300582886, | |
| "learning_rate": 9.54565706585559e-06, | |
| "loss": 1.3045, | |
| "step": 1056 | |
| }, | |
| { | |
| "epoch": 0.1630260849447647, | |
| "grad_norm": 0.4684382379055023, | |
| "learning_rate": 9.54461618800423e-06, | |
| "loss": 1.3527, | |
| "step": 1057 | |
| }, | |
| { | |
| "epoch": 0.16318031965142957, | |
| "grad_norm": 0.42063719034194946, | |
| "learning_rate": 9.54357417609881e-06, | |
| "loss": 1.3409, | |
| "step": 1058 | |
| }, | |
| { | |
| "epoch": 0.16333455435809444, | |
| "grad_norm": 0.39833924174308777, | |
| "learning_rate": 9.54253103039935e-06, | |
| "loss": 1.3076, | |
| "step": 1059 | |
| }, | |
| { | |
| "epoch": 0.1634887890647593, | |
| "grad_norm": 0.43191730976104736, | |
| "learning_rate": 9.541486751166156e-06, | |
| "loss": 1.3582, | |
| "step": 1060 | |
| }, | |
| { | |
| "epoch": 0.16364302377142417, | |
| "grad_norm": 0.39600446820259094, | |
| "learning_rate": 9.540441338659813e-06, | |
| "loss": 1.333, | |
| "step": 1061 | |
| }, | |
| { | |
| "epoch": 0.16379725847808904, | |
| "grad_norm": 0.41398027539253235, | |
| "learning_rate": 9.539394793141193e-06, | |
| "loss": 1.3444, | |
| "step": 1062 | |
| }, | |
| { | |
| "epoch": 0.1639514931847539, | |
| "grad_norm": 0.4630861282348633, | |
| "learning_rate": 9.538347114871449e-06, | |
| "loss": 1.3206, | |
| "step": 1063 | |
| }, | |
| { | |
| "epoch": 0.16410572789141878, | |
| "grad_norm": 0.4108981192111969, | |
| "learning_rate": 9.537298304112015e-06, | |
| "loss": 1.3563, | |
| "step": 1064 | |
| }, | |
| { | |
| "epoch": 0.16425996259808365, | |
| "grad_norm": 0.38527730107307434, | |
| "learning_rate": 9.536248361124611e-06, | |
| "loss": 1.3242, | |
| "step": 1065 | |
| }, | |
| { | |
| "epoch": 0.1644141973047485, | |
| "grad_norm": 0.460708886384964, | |
| "learning_rate": 9.535197286171235e-06, | |
| "loss": 1.3316, | |
| "step": 1066 | |
| }, | |
| { | |
| "epoch": 0.16456843201141336, | |
| "grad_norm": 0.4495241641998291, | |
| "learning_rate": 9.534145079514172e-06, | |
| "loss": 1.3554, | |
| "step": 1067 | |
| }, | |
| { | |
| "epoch": 0.16472266671807823, | |
| "grad_norm": 0.39525145292282104, | |
| "learning_rate": 9.533091741415986e-06, | |
| "loss": 1.3183, | |
| "step": 1068 | |
| }, | |
| { | |
| "epoch": 0.1648769014247431, | |
| "grad_norm": 0.5031570196151733, | |
| "learning_rate": 9.532037272139527e-06, | |
| "loss": 1.371, | |
| "step": 1069 | |
| }, | |
| { | |
| "epoch": 0.16503113613140796, | |
| "grad_norm": 0.42758193612098694, | |
| "learning_rate": 9.530981671947924e-06, | |
| "loss": 1.3389, | |
| "step": 1070 | |
| }, | |
| { | |
| "epoch": 0.16518537083807283, | |
| "grad_norm": 0.4300083816051483, | |
| "learning_rate": 9.529924941104586e-06, | |
| "loss": 1.3397, | |
| "step": 1071 | |
| }, | |
| { | |
| "epoch": 0.1653396055447377, | |
| "grad_norm": 0.45306211709976196, | |
| "learning_rate": 9.528867079873214e-06, | |
| "loss": 1.3425, | |
| "step": 1072 | |
| }, | |
| { | |
| "epoch": 0.16549384025140257, | |
| "grad_norm": 0.42708519101142883, | |
| "learning_rate": 9.52780808851778e-06, | |
| "loss": 1.4067, | |
| "step": 1073 | |
| }, | |
| { | |
| "epoch": 0.16564807495806744, | |
| "grad_norm": 0.39134591817855835, | |
| "learning_rate": 9.526747967302544e-06, | |
| "loss": 1.3273, | |
| "step": 1074 | |
| }, | |
| { | |
| "epoch": 0.1658023096647323, | |
| "grad_norm": 0.3904189467430115, | |
| "learning_rate": 9.525686716492044e-06, | |
| "loss": 1.3488, | |
| "step": 1075 | |
| }, | |
| { | |
| "epoch": 0.16595654437139717, | |
| "grad_norm": 0.40552303194999695, | |
| "learning_rate": 9.524624336351104e-06, | |
| "loss": 1.3742, | |
| "step": 1076 | |
| }, | |
| { | |
| "epoch": 0.16611077907806204, | |
| "grad_norm": 0.4283524751663208, | |
| "learning_rate": 9.523560827144829e-06, | |
| "loss": 1.3375, | |
| "step": 1077 | |
| }, | |
| { | |
| "epoch": 0.1662650137847269, | |
| "grad_norm": 0.4074269235134125, | |
| "learning_rate": 9.522496189138606e-06, | |
| "loss": 1.3307, | |
| "step": 1078 | |
| }, | |
| { | |
| "epoch": 0.16641924849139178, | |
| "grad_norm": 0.41989561915397644, | |
| "learning_rate": 9.521430422598099e-06, | |
| "loss": 1.3519, | |
| "step": 1079 | |
| }, | |
| { | |
| "epoch": 0.16657348319805665, | |
| "grad_norm": 0.42565736174583435, | |
| "learning_rate": 9.520363527789258e-06, | |
| "loss": 1.3043, | |
| "step": 1080 | |
| }, | |
| { | |
| "epoch": 0.16672771790472152, | |
| "grad_norm": 0.43175825476646423, | |
| "learning_rate": 9.519295504978316e-06, | |
| "loss": 1.3264, | |
| "step": 1081 | |
| }, | |
| { | |
| "epoch": 0.16688195261138639, | |
| "grad_norm": 0.4620734751224518, | |
| "learning_rate": 9.518226354431785e-06, | |
| "loss": 1.3223, | |
| "step": 1082 | |
| }, | |
| { | |
| "epoch": 0.16703618731805125, | |
| "grad_norm": 0.3752366602420807, | |
| "learning_rate": 9.517156076416457e-06, | |
| "loss": 1.3205, | |
| "step": 1083 | |
| }, | |
| { | |
| "epoch": 0.16719042202471612, | |
| "grad_norm": 0.43047410249710083, | |
| "learning_rate": 9.516084671199405e-06, | |
| "loss": 1.3581, | |
| "step": 1084 | |
| }, | |
| { | |
| "epoch": 0.167344656731381, | |
| "grad_norm": 0.46193546056747437, | |
| "learning_rate": 9.515012139047989e-06, | |
| "loss": 1.3808, | |
| "step": 1085 | |
| }, | |
| { | |
| "epoch": 0.16749889143804583, | |
| "grad_norm": 0.42722025513648987, | |
| "learning_rate": 9.513938480229845e-06, | |
| "loss": 1.3486, | |
| "step": 1086 | |
| }, | |
| { | |
| "epoch": 0.1676531261447107, | |
| "grad_norm": 0.3964281976222992, | |
| "learning_rate": 9.512863695012892e-06, | |
| "loss": 1.3063, | |
| "step": 1087 | |
| }, | |
| { | |
| "epoch": 0.16780736085137557, | |
| "grad_norm": 0.44473353028297424, | |
| "learning_rate": 9.511787783665329e-06, | |
| "loss": 1.3665, | |
| "step": 1088 | |
| }, | |
| { | |
| "epoch": 0.16796159555804044, | |
| "grad_norm": 0.4227943420410156, | |
| "learning_rate": 9.510710746455636e-06, | |
| "loss": 1.3032, | |
| "step": 1089 | |
| }, | |
| { | |
| "epoch": 0.1681158302647053, | |
| "grad_norm": 0.41199952363967896, | |
| "learning_rate": 9.509632583652578e-06, | |
| "loss": 1.2954, | |
| "step": 1090 | |
| }, | |
| { | |
| "epoch": 0.16827006497137018, | |
| "grad_norm": 0.4591391086578369, | |
| "learning_rate": 9.508553295525191e-06, | |
| "loss": 1.3524, | |
| "step": 1091 | |
| }, | |
| { | |
| "epoch": 0.16842429967803504, | |
| "grad_norm": 0.4313882887363434, | |
| "learning_rate": 9.507472882342805e-06, | |
| "loss": 1.3684, | |
| "step": 1092 | |
| }, | |
| { | |
| "epoch": 0.1685785343846999, | |
| "grad_norm": 0.3902830481529236, | |
| "learning_rate": 9.506391344375022e-06, | |
| "loss": 1.3093, | |
| "step": 1093 | |
| }, | |
| { | |
| "epoch": 0.16873276909136478, | |
| "grad_norm": 0.47131410241127014, | |
| "learning_rate": 9.505308681891725e-06, | |
| "loss": 1.342, | |
| "step": 1094 | |
| }, | |
| { | |
| "epoch": 0.16888700379802965, | |
| "grad_norm": 0.42069944739341736, | |
| "learning_rate": 9.50422489516308e-06, | |
| "loss": 1.3922, | |
| "step": 1095 | |
| }, | |
| { | |
| "epoch": 0.16904123850469452, | |
| "grad_norm": 0.4462716579437256, | |
| "learning_rate": 9.503139984459536e-06, | |
| "loss": 1.3458, | |
| "step": 1096 | |
| }, | |
| { | |
| "epoch": 0.1691954732113594, | |
| "grad_norm": 0.49075546860694885, | |
| "learning_rate": 9.502053950051815e-06, | |
| "loss": 1.3583, | |
| "step": 1097 | |
| }, | |
| { | |
| "epoch": 0.16934970791802426, | |
| "grad_norm": 0.40180158615112305, | |
| "learning_rate": 9.500966792210926e-06, | |
| "loss": 1.3663, | |
| "step": 1098 | |
| }, | |
| { | |
| "epoch": 0.16950394262468912, | |
| "grad_norm": 0.40839266777038574, | |
| "learning_rate": 9.499878511208155e-06, | |
| "loss": 1.3398, | |
| "step": 1099 | |
| }, | |
| { | |
| "epoch": 0.169658177331354, | |
| "grad_norm": 0.45156973600387573, | |
| "learning_rate": 9.498789107315069e-06, | |
| "loss": 1.3064, | |
| "step": 1100 | |
| } | |
| ], | |
| "logging_steps": 1, | |
| "max_steps": 6484, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 1, | |
| "save_steps": 100, | |
| "stateful_callbacks": { | |
| "TrainerControl": { | |
| "args": { | |
| "should_epoch_stop": false, | |
| "should_evaluate": false, | |
| "should_log": false, | |
| "should_save": true, | |
| "should_training_stop": false | |
| }, | |
| "attributes": {} | |
| } | |
| }, | |
| "total_flos": 1.375515394495742e+18, | |
| "train_batch_size": 1, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |