| { | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 0.20008972633467922, | |
| "eval_steps": 500, | |
| "global_step": 1338, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.00014954389113204725, | |
| "grad_norm": 35.95169344376715, | |
| "learning_rate": 4.975124378109453e-08, | |
| "loss": 1.1911, | |
| "step": 1 | |
| }, | |
| { | |
| "epoch": 0.0002990877822640945, | |
| "grad_norm": 29.047342527504238, | |
| "learning_rate": 9.950248756218906e-08, | |
| "loss": 1.4707, | |
| "step": 2 | |
| }, | |
| { | |
| "epoch": 0.00044863167339614175, | |
| "grad_norm": 24.718727160032117, | |
| "learning_rate": 1.4925373134328358e-07, | |
| "loss": 0.9534, | |
| "step": 3 | |
| }, | |
| { | |
| "epoch": 0.000598175564528189, | |
| "grad_norm": 32.87218994198639, | |
| "learning_rate": 1.9900497512437812e-07, | |
| "loss": 1.2192, | |
| "step": 4 | |
| }, | |
| { | |
| "epoch": 0.0007477194556602363, | |
| "grad_norm": 25.398344980222138, | |
| "learning_rate": 2.4875621890547267e-07, | |
| "loss": 1.1835, | |
| "step": 5 | |
| }, | |
| { | |
| "epoch": 0.0008972633467922835, | |
| "grad_norm": 30.48079389364258, | |
| "learning_rate": 2.9850746268656716e-07, | |
| "loss": 1.0024, | |
| "step": 6 | |
| }, | |
| { | |
| "epoch": 0.0010468072379243307, | |
| "grad_norm": 27.780032565686206, | |
| "learning_rate": 3.4825870646766175e-07, | |
| "loss": 1.1796, | |
| "step": 7 | |
| }, | |
| { | |
| "epoch": 0.001196351129056378, | |
| "grad_norm": 33.19634259772052, | |
| "learning_rate": 3.9800995024875624e-07, | |
| "loss": 0.9585, | |
| "step": 8 | |
| }, | |
| { | |
| "epoch": 0.0013458950201884253, | |
| "grad_norm": 32.92097675417938, | |
| "learning_rate": 4.4776119402985074e-07, | |
| "loss": 1.1831, | |
| "step": 9 | |
| }, | |
| { | |
| "epoch": 0.0014954389113204726, | |
| "grad_norm": 31.267461918177617, | |
| "learning_rate": 4.975124378109453e-07, | |
| "loss": 0.9208, | |
| "step": 10 | |
| }, | |
| { | |
| "epoch": 0.0016449828024525197, | |
| "grad_norm": 31.652990928454088, | |
| "learning_rate": 5.472636815920398e-07, | |
| "loss": 0.8882, | |
| "step": 11 | |
| }, | |
| { | |
| "epoch": 0.001794526693584567, | |
| "grad_norm": 33.800482625732165, | |
| "learning_rate": 5.970149253731343e-07, | |
| "loss": 1.2138, | |
| "step": 12 | |
| }, | |
| { | |
| "epoch": 0.0019440705847166143, | |
| "grad_norm": 30.753216086819556, | |
| "learning_rate": 6.467661691542289e-07, | |
| "loss": 0.9896, | |
| "step": 13 | |
| }, | |
| { | |
| "epoch": 0.0020936144758486614, | |
| "grad_norm": 32.57679525538582, | |
| "learning_rate": 6.965174129353235e-07, | |
| "loss": 0.9195, | |
| "step": 14 | |
| }, | |
| { | |
| "epoch": 0.0022431583669807087, | |
| "grad_norm": 25.334089702892793, | |
| "learning_rate": 7.462686567164179e-07, | |
| "loss": 0.7515, | |
| "step": 15 | |
| }, | |
| { | |
| "epoch": 0.002392702258112756, | |
| "grad_norm": 22.2961872211284, | |
| "learning_rate": 7.960199004975125e-07, | |
| "loss": 0.6638, | |
| "step": 16 | |
| }, | |
| { | |
| "epoch": 0.0025422461492448033, | |
| "grad_norm": 24.245556768411276, | |
| "learning_rate": 8.457711442786071e-07, | |
| "loss": 0.7704, | |
| "step": 17 | |
| }, | |
| { | |
| "epoch": 0.0026917900403768506, | |
| "grad_norm": 19.23412917202397, | |
| "learning_rate": 8.955223880597015e-07, | |
| "loss": 0.7354, | |
| "step": 18 | |
| }, | |
| { | |
| "epoch": 0.002841333931508898, | |
| "grad_norm": 18.58051317424024, | |
| "learning_rate": 9.452736318407961e-07, | |
| "loss": 0.5749, | |
| "step": 19 | |
| }, | |
| { | |
| "epoch": 0.0029908778226409452, | |
| "grad_norm": 11.242228896944281, | |
| "learning_rate": 9.950248756218907e-07, | |
| "loss": 0.4914, | |
| "step": 20 | |
| }, | |
| { | |
| "epoch": 0.0031404217137729925, | |
| "grad_norm": 11.163527479225325, | |
| "learning_rate": 1.044776119402985e-06, | |
| "loss": 0.5823, | |
| "step": 21 | |
| }, | |
| { | |
| "epoch": 0.0032899656049050394, | |
| "grad_norm": 9.100766388616314, | |
| "learning_rate": 1.0945273631840796e-06, | |
| "loss": 0.6887, | |
| "step": 22 | |
| }, | |
| { | |
| "epoch": 0.0034395094960370867, | |
| "grad_norm": 9.371427313022828, | |
| "learning_rate": 1.1442786069651742e-06, | |
| "loss": 0.3365, | |
| "step": 23 | |
| }, | |
| { | |
| "epoch": 0.003589053387169134, | |
| "grad_norm": 6.591365654298028, | |
| "learning_rate": 1.1940298507462686e-06, | |
| "loss": 0.4092, | |
| "step": 24 | |
| }, | |
| { | |
| "epoch": 0.0037385972783011813, | |
| "grad_norm": 6.692920733889971, | |
| "learning_rate": 1.2437810945273632e-06, | |
| "loss": 0.4459, | |
| "step": 25 | |
| }, | |
| { | |
| "epoch": 0.0038881411694332286, | |
| "grad_norm": 6.609492289627464, | |
| "learning_rate": 1.2935323383084578e-06, | |
| "loss": 0.4577, | |
| "step": 26 | |
| }, | |
| { | |
| "epoch": 0.004037685060565276, | |
| "grad_norm": 4.9115623336358, | |
| "learning_rate": 1.3432835820895524e-06, | |
| "loss": 0.5349, | |
| "step": 27 | |
| }, | |
| { | |
| "epoch": 0.004187228951697323, | |
| "grad_norm": 5.117676678055004, | |
| "learning_rate": 1.393034825870647e-06, | |
| "loss": 0.5483, | |
| "step": 28 | |
| }, | |
| { | |
| "epoch": 0.0043367728428293706, | |
| "grad_norm": 5.263481949191207, | |
| "learning_rate": 1.4427860696517414e-06, | |
| "loss": 0.5991, | |
| "step": 29 | |
| }, | |
| { | |
| "epoch": 0.004486316733961417, | |
| "grad_norm": 6.131569220022702, | |
| "learning_rate": 1.4925373134328358e-06, | |
| "loss": 0.3908, | |
| "step": 30 | |
| }, | |
| { | |
| "epoch": 0.004635860625093465, | |
| "grad_norm": 5.928579435490833, | |
| "learning_rate": 1.5422885572139304e-06, | |
| "loss": 0.2084, | |
| "step": 31 | |
| }, | |
| { | |
| "epoch": 0.004785404516225512, | |
| "grad_norm": 5.916757088180695, | |
| "learning_rate": 1.592039800995025e-06, | |
| "loss": 0.3858, | |
| "step": 32 | |
| }, | |
| { | |
| "epoch": 0.00493494840735756, | |
| "grad_norm": 8.20423570651997, | |
| "learning_rate": 1.6417910447761196e-06, | |
| "loss": 0.2901, | |
| "step": 33 | |
| }, | |
| { | |
| "epoch": 0.005084492298489607, | |
| "grad_norm": 8.219360009824356, | |
| "learning_rate": 1.6915422885572142e-06, | |
| "loss": 0.3919, | |
| "step": 34 | |
| }, | |
| { | |
| "epoch": 0.005234036189621654, | |
| "grad_norm": 5.998450714995048, | |
| "learning_rate": 1.7412935323383088e-06, | |
| "loss": 0.2445, | |
| "step": 35 | |
| }, | |
| { | |
| "epoch": 0.005383580080753701, | |
| "grad_norm": 4.267389037528284, | |
| "learning_rate": 1.791044776119403e-06, | |
| "loss": 0.2062, | |
| "step": 36 | |
| }, | |
| { | |
| "epoch": 0.005533123971885748, | |
| "grad_norm": 5.463746992191978, | |
| "learning_rate": 1.8407960199004975e-06, | |
| "loss": 0.5357, | |
| "step": 37 | |
| }, | |
| { | |
| "epoch": 0.005682667863017796, | |
| "grad_norm": 4.306281637510176, | |
| "learning_rate": 1.8905472636815921e-06, | |
| "loss": 0.1867, | |
| "step": 38 | |
| }, | |
| { | |
| "epoch": 0.005832211754149843, | |
| "grad_norm": 6.551059942168939, | |
| "learning_rate": 1.9402985074626867e-06, | |
| "loss": 0.5944, | |
| "step": 39 | |
| }, | |
| { | |
| "epoch": 0.0059817556452818905, | |
| "grad_norm": 6.110559490141819, | |
| "learning_rate": 1.9900497512437813e-06, | |
| "loss": 0.6173, | |
| "step": 40 | |
| }, | |
| { | |
| "epoch": 0.006131299536413937, | |
| "grad_norm": 4.577457366278138, | |
| "learning_rate": 2.0398009950248755e-06, | |
| "loss": 0.3634, | |
| "step": 41 | |
| }, | |
| { | |
| "epoch": 0.006280843427545985, | |
| "grad_norm": 6.020057986889502, | |
| "learning_rate": 2.08955223880597e-06, | |
| "loss": 0.5398, | |
| "step": 42 | |
| }, | |
| { | |
| "epoch": 0.006430387318678032, | |
| "grad_norm": 12.119213807947853, | |
| "learning_rate": 2.1393034825870647e-06, | |
| "loss": 0.2376, | |
| "step": 43 | |
| }, | |
| { | |
| "epoch": 0.006579931209810079, | |
| "grad_norm": 4.977979102095054, | |
| "learning_rate": 2.1890547263681593e-06, | |
| "loss": 0.2455, | |
| "step": 44 | |
| }, | |
| { | |
| "epoch": 0.006729475100942127, | |
| "grad_norm": 3.4274663141099166, | |
| "learning_rate": 2.238805970149254e-06, | |
| "loss": 0.2356, | |
| "step": 45 | |
| }, | |
| { | |
| "epoch": 0.0068790189920741734, | |
| "grad_norm": 4.552279062958819, | |
| "learning_rate": 2.2885572139303485e-06, | |
| "loss": 0.1681, | |
| "step": 46 | |
| }, | |
| { | |
| "epoch": 0.007028562883206221, | |
| "grad_norm": 2.9323320786902496, | |
| "learning_rate": 2.338308457711443e-06, | |
| "loss": 0.2303, | |
| "step": 47 | |
| }, | |
| { | |
| "epoch": 0.007178106774338268, | |
| "grad_norm": 4.623033466327724, | |
| "learning_rate": 2.3880597014925373e-06, | |
| "loss": 0.2404, | |
| "step": 48 | |
| }, | |
| { | |
| "epoch": 0.007327650665470316, | |
| "grad_norm": 5.05007020882628, | |
| "learning_rate": 2.437810945273632e-06, | |
| "loss": 0.4128, | |
| "step": 49 | |
| }, | |
| { | |
| "epoch": 0.007477194556602363, | |
| "grad_norm": 2.5237349934200273, | |
| "learning_rate": 2.4875621890547264e-06, | |
| "loss": 0.2196, | |
| "step": 50 | |
| }, | |
| { | |
| "epoch": 0.00762673844773441, | |
| "grad_norm": 3.7483142878646594, | |
| "learning_rate": 2.537313432835821e-06, | |
| "loss": 0.1725, | |
| "step": 51 | |
| }, | |
| { | |
| "epoch": 0.007776282338866457, | |
| "grad_norm": 4.032155563605261, | |
| "learning_rate": 2.5870646766169156e-06, | |
| "loss": 0.3821, | |
| "step": 52 | |
| }, | |
| { | |
| "epoch": 0.007925826229998505, | |
| "grad_norm": 3.7782327104964333, | |
| "learning_rate": 2.6368159203980102e-06, | |
| "loss": 0.2207, | |
| "step": 53 | |
| }, | |
| { | |
| "epoch": 0.008075370121130552, | |
| "grad_norm": 4.816720331969929, | |
| "learning_rate": 2.686567164179105e-06, | |
| "loss": 0.2265, | |
| "step": 54 | |
| }, | |
| { | |
| "epoch": 0.008224914012262599, | |
| "grad_norm": 2.8481845548797478, | |
| "learning_rate": 2.736318407960199e-06, | |
| "loss": 0.2174, | |
| "step": 55 | |
| }, | |
| { | |
| "epoch": 0.008374457903394646, | |
| "grad_norm": 4.501151176073331, | |
| "learning_rate": 2.786069651741294e-06, | |
| "loss": 0.2306, | |
| "step": 56 | |
| }, | |
| { | |
| "epoch": 0.008524001794526694, | |
| "grad_norm": 4.326693136186164, | |
| "learning_rate": 2.835820895522388e-06, | |
| "loss": 0.4023, | |
| "step": 57 | |
| }, | |
| { | |
| "epoch": 0.008673545685658741, | |
| "grad_norm": 4.061925818141106, | |
| "learning_rate": 2.885572139303483e-06, | |
| "loss": 0.7602, | |
| "step": 58 | |
| }, | |
| { | |
| "epoch": 0.008823089576790788, | |
| "grad_norm": 6.144988240043741, | |
| "learning_rate": 2.9353233830845774e-06, | |
| "loss": 0.4451, | |
| "step": 59 | |
| }, | |
| { | |
| "epoch": 0.008972633467922835, | |
| "grad_norm": 4.985549166627373, | |
| "learning_rate": 2.9850746268656716e-06, | |
| "loss": 0.4621, | |
| "step": 60 | |
| }, | |
| { | |
| "epoch": 0.009122177359054883, | |
| "grad_norm": 3.192079125281125, | |
| "learning_rate": 3.0348258706467666e-06, | |
| "loss": 0.3694, | |
| "step": 61 | |
| }, | |
| { | |
| "epoch": 0.00927172125018693, | |
| "grad_norm": 4.653619400771914, | |
| "learning_rate": 3.0845771144278608e-06, | |
| "loss": 0.2416, | |
| "step": 62 | |
| }, | |
| { | |
| "epoch": 0.009421265141318977, | |
| "grad_norm": 3.4214006556775156, | |
| "learning_rate": 3.1343283582089558e-06, | |
| "loss": 0.4755, | |
| "step": 63 | |
| }, | |
| { | |
| "epoch": 0.009570809032451024, | |
| "grad_norm": 3.0809019894250613, | |
| "learning_rate": 3.18407960199005e-06, | |
| "loss": 0.4154, | |
| "step": 64 | |
| }, | |
| { | |
| "epoch": 0.009720352923583071, | |
| "grad_norm": 4.190290076677796, | |
| "learning_rate": 3.233830845771145e-06, | |
| "loss": 0.4362, | |
| "step": 65 | |
| }, | |
| { | |
| "epoch": 0.00986989681471512, | |
| "grad_norm": 3.1777725686355356, | |
| "learning_rate": 3.283582089552239e-06, | |
| "loss": 0.3635, | |
| "step": 66 | |
| }, | |
| { | |
| "epoch": 0.010019440705847166, | |
| "grad_norm": 2.592442539170553, | |
| "learning_rate": 3.3333333333333333e-06, | |
| "loss": 0.1739, | |
| "step": 67 | |
| }, | |
| { | |
| "epoch": 0.010168984596979213, | |
| "grad_norm": 4.610893839801018, | |
| "learning_rate": 3.3830845771144283e-06, | |
| "loss": 0.3845, | |
| "step": 68 | |
| }, | |
| { | |
| "epoch": 0.01031852848811126, | |
| "grad_norm": 2.941030939381248, | |
| "learning_rate": 3.4328358208955225e-06, | |
| "loss": 0.226, | |
| "step": 69 | |
| }, | |
| { | |
| "epoch": 0.010468072379243309, | |
| "grad_norm": 2.641062959772403, | |
| "learning_rate": 3.4825870646766175e-06, | |
| "loss": 0.2083, | |
| "step": 70 | |
| }, | |
| { | |
| "epoch": 0.010617616270375356, | |
| "grad_norm": 4.573399002022637, | |
| "learning_rate": 3.5323383084577117e-06, | |
| "loss": 0.3639, | |
| "step": 71 | |
| }, | |
| { | |
| "epoch": 0.010767160161507403, | |
| "grad_norm": 3.811597787697304, | |
| "learning_rate": 3.582089552238806e-06, | |
| "loss": 0.2046, | |
| "step": 72 | |
| }, | |
| { | |
| "epoch": 0.01091670405263945, | |
| "grad_norm": 7.593654702612937, | |
| "learning_rate": 3.631840796019901e-06, | |
| "loss": 0.3831, | |
| "step": 73 | |
| }, | |
| { | |
| "epoch": 0.011066247943771496, | |
| "grad_norm": 2.6372126137968013, | |
| "learning_rate": 3.681592039800995e-06, | |
| "loss": 0.2155, | |
| "step": 74 | |
| }, | |
| { | |
| "epoch": 0.011215791834903545, | |
| "grad_norm": 3.401033168780161, | |
| "learning_rate": 3.73134328358209e-06, | |
| "loss": 0.2439, | |
| "step": 75 | |
| }, | |
| { | |
| "epoch": 0.011365335726035592, | |
| "grad_norm": 2.8172647382036047, | |
| "learning_rate": 3.7810945273631843e-06, | |
| "loss": 0.1614, | |
| "step": 76 | |
| }, | |
| { | |
| "epoch": 0.011514879617167639, | |
| "grad_norm": 3.525793180439174, | |
| "learning_rate": 3.8308457711442784e-06, | |
| "loss": 0.2176, | |
| "step": 77 | |
| }, | |
| { | |
| "epoch": 0.011664423508299685, | |
| "grad_norm": 2.4029805525684527, | |
| "learning_rate": 3.8805970149253735e-06, | |
| "loss": 0.1893, | |
| "step": 78 | |
| }, | |
| { | |
| "epoch": 0.011813967399431732, | |
| "grad_norm": 5.727795685387504, | |
| "learning_rate": 3.930348258706468e-06, | |
| "loss": 0.5702, | |
| "step": 79 | |
| }, | |
| { | |
| "epoch": 0.011963511290563781, | |
| "grad_norm": 4.021893784746645, | |
| "learning_rate": 3.980099502487563e-06, | |
| "loss": 0.4027, | |
| "step": 80 | |
| }, | |
| { | |
| "epoch": 0.012113055181695828, | |
| "grad_norm": 2.7773808558650535, | |
| "learning_rate": 4.029850746268657e-06, | |
| "loss": 0.2963, | |
| "step": 81 | |
| }, | |
| { | |
| "epoch": 0.012262599072827875, | |
| "grad_norm": 3.4349426033049992, | |
| "learning_rate": 4.079601990049751e-06, | |
| "loss": 0.2211, | |
| "step": 82 | |
| }, | |
| { | |
| "epoch": 0.012412142963959922, | |
| "grad_norm": 4.127258766074891, | |
| "learning_rate": 4.129353233830846e-06, | |
| "loss": 0.2516, | |
| "step": 83 | |
| }, | |
| { | |
| "epoch": 0.01256168685509197, | |
| "grad_norm": 3.551977981988865, | |
| "learning_rate": 4.17910447761194e-06, | |
| "loss": 0.2206, | |
| "step": 84 | |
| }, | |
| { | |
| "epoch": 0.012711230746224017, | |
| "grad_norm": 2.988554589230421, | |
| "learning_rate": 4.228855721393035e-06, | |
| "loss": 0.366, | |
| "step": 85 | |
| }, | |
| { | |
| "epoch": 0.012860774637356064, | |
| "grad_norm": 3.256233912334862, | |
| "learning_rate": 4.278606965174129e-06, | |
| "loss": 0.341, | |
| "step": 86 | |
| }, | |
| { | |
| "epoch": 0.01301031852848811, | |
| "grad_norm": 3.917242635149468, | |
| "learning_rate": 4.3283582089552236e-06, | |
| "loss": 0.281, | |
| "step": 87 | |
| }, | |
| { | |
| "epoch": 0.013159862419620158, | |
| "grad_norm": 3.8372869351661247, | |
| "learning_rate": 4.378109452736319e-06, | |
| "loss": 0.1933, | |
| "step": 88 | |
| }, | |
| { | |
| "epoch": 0.013309406310752206, | |
| "grad_norm": 4.03192980896834, | |
| "learning_rate": 4.427860696517413e-06, | |
| "loss": 0.184, | |
| "step": 89 | |
| }, | |
| { | |
| "epoch": 0.013458950201884253, | |
| "grad_norm": 4.944440623197377, | |
| "learning_rate": 4.477611940298508e-06, | |
| "loss": 0.2406, | |
| "step": 90 | |
| }, | |
| { | |
| "epoch": 0.0136084940930163, | |
| "grad_norm": 3.2771345760625916, | |
| "learning_rate": 4.527363184079602e-06, | |
| "loss": 0.3635, | |
| "step": 91 | |
| }, | |
| { | |
| "epoch": 0.013758037984148347, | |
| "grad_norm": 2.5552685161479913, | |
| "learning_rate": 4.577114427860697e-06, | |
| "loss": 0.3581, | |
| "step": 92 | |
| }, | |
| { | |
| "epoch": 0.013907581875280395, | |
| "grad_norm": 3.825258197515859, | |
| "learning_rate": 4.626865671641791e-06, | |
| "loss": 0.2157, | |
| "step": 93 | |
| }, | |
| { | |
| "epoch": 0.014057125766412442, | |
| "grad_norm": 3.820006828326968, | |
| "learning_rate": 4.676616915422886e-06, | |
| "loss": 0.401, | |
| "step": 94 | |
| }, | |
| { | |
| "epoch": 0.01420666965754449, | |
| "grad_norm": 3.4269639891084056, | |
| "learning_rate": 4.72636815920398e-06, | |
| "loss": 0.21, | |
| "step": 95 | |
| }, | |
| { | |
| "epoch": 0.014356213548676536, | |
| "grad_norm": 3.614177044324435, | |
| "learning_rate": 4.7761194029850745e-06, | |
| "loss": 0.2305, | |
| "step": 96 | |
| }, | |
| { | |
| "epoch": 0.014505757439808583, | |
| "grad_norm": 2.8474787904051633, | |
| "learning_rate": 4.8258706467661695e-06, | |
| "loss": 0.2002, | |
| "step": 97 | |
| }, | |
| { | |
| "epoch": 0.014655301330940632, | |
| "grad_norm": 3.1529185682156333, | |
| "learning_rate": 4.875621890547264e-06, | |
| "loss": 0.3126, | |
| "step": 98 | |
| }, | |
| { | |
| "epoch": 0.014804845222072678, | |
| "grad_norm": 2.805579699726101, | |
| "learning_rate": 4.925373134328359e-06, | |
| "loss": 0.3977, | |
| "step": 99 | |
| }, | |
| { | |
| "epoch": 0.014954389113204725, | |
| "grad_norm": 2.5072872378288134, | |
| "learning_rate": 4.975124378109453e-06, | |
| "loss": 0.1986, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 0.015103933004336772, | |
| "grad_norm": 2.8773082972301816, | |
| "learning_rate": 5.024875621890548e-06, | |
| "loss": 0.2421, | |
| "step": 101 | |
| }, | |
| { | |
| "epoch": 0.01525347689546882, | |
| "grad_norm": 2.3650776175631765, | |
| "learning_rate": 5.074626865671642e-06, | |
| "loss": 0.1864, | |
| "step": 102 | |
| }, | |
| { | |
| "epoch": 0.015403020786600868, | |
| "grad_norm": 4.721891286027898, | |
| "learning_rate": 5.124378109452737e-06, | |
| "loss": 0.2939, | |
| "step": 103 | |
| }, | |
| { | |
| "epoch": 0.015552564677732915, | |
| "grad_norm": 2.6753396233648705, | |
| "learning_rate": 5.174129353233831e-06, | |
| "loss": 0.2558, | |
| "step": 104 | |
| }, | |
| { | |
| "epoch": 0.01570210856886496, | |
| "grad_norm": 3.149876968312327, | |
| "learning_rate": 5.2238805970149255e-06, | |
| "loss": 0.3405, | |
| "step": 105 | |
| }, | |
| { | |
| "epoch": 0.01585165245999701, | |
| "grad_norm": 1.6322197066205648, | |
| "learning_rate": 5.2736318407960205e-06, | |
| "loss": 0.1453, | |
| "step": 106 | |
| }, | |
| { | |
| "epoch": 0.016001196351129055, | |
| "grad_norm": 3.3492234789043236, | |
| "learning_rate": 5.323383084577115e-06, | |
| "loss": 0.404, | |
| "step": 107 | |
| }, | |
| { | |
| "epoch": 0.016150740242261104, | |
| "grad_norm": 2.2518951047915157, | |
| "learning_rate": 5.37313432835821e-06, | |
| "loss": 0.2278, | |
| "step": 108 | |
| }, | |
| { | |
| "epoch": 0.016300284133393152, | |
| "grad_norm": 3.0471913491370404, | |
| "learning_rate": 5.422885572139304e-06, | |
| "loss": 0.265, | |
| "step": 109 | |
| }, | |
| { | |
| "epoch": 0.016449828024525198, | |
| "grad_norm": 1.6928519222295142, | |
| "learning_rate": 5.472636815920398e-06, | |
| "loss": 0.2169, | |
| "step": 110 | |
| }, | |
| { | |
| "epoch": 0.016599371915657246, | |
| "grad_norm": 3.265018826674296, | |
| "learning_rate": 5.522388059701493e-06, | |
| "loss": 0.429, | |
| "step": 111 | |
| }, | |
| { | |
| "epoch": 0.01674891580678929, | |
| "grad_norm": 2.637671664378066, | |
| "learning_rate": 5.572139303482588e-06, | |
| "loss": 0.2762, | |
| "step": 112 | |
| }, | |
| { | |
| "epoch": 0.01689845969792134, | |
| "grad_norm": 3.1617986987096134, | |
| "learning_rate": 5.621890547263682e-06, | |
| "loss": 0.4272, | |
| "step": 113 | |
| }, | |
| { | |
| "epoch": 0.01704800358905339, | |
| "grad_norm": 3.0132316717807175, | |
| "learning_rate": 5.671641791044776e-06, | |
| "loss": 0.3644, | |
| "step": 114 | |
| }, | |
| { | |
| "epoch": 0.017197547480185434, | |
| "grad_norm": 2.2850314864309813, | |
| "learning_rate": 5.721393034825871e-06, | |
| "loss": 0.1967, | |
| "step": 115 | |
| }, | |
| { | |
| "epoch": 0.017347091371317482, | |
| "grad_norm": 3.0835871860462314, | |
| "learning_rate": 5.771144278606966e-06, | |
| "loss": 0.2322, | |
| "step": 116 | |
| }, | |
| { | |
| "epoch": 0.017496635262449527, | |
| "grad_norm": 3.5275796788122893, | |
| "learning_rate": 5.820895522388061e-06, | |
| "loss": 0.3543, | |
| "step": 117 | |
| }, | |
| { | |
| "epoch": 0.017646179153581576, | |
| "grad_norm": 3.1301356173345494, | |
| "learning_rate": 5.870646766169155e-06, | |
| "loss": 0.5064, | |
| "step": 118 | |
| }, | |
| { | |
| "epoch": 0.017795723044713625, | |
| "grad_norm": 3.9689250366780313, | |
| "learning_rate": 5.920398009950249e-06, | |
| "loss": 0.8428, | |
| "step": 119 | |
| }, | |
| { | |
| "epoch": 0.01794526693584567, | |
| "grad_norm": 2.6992548320472984, | |
| "learning_rate": 5.970149253731343e-06, | |
| "loss": 0.2727, | |
| "step": 120 | |
| }, | |
| { | |
| "epoch": 0.01809481082697772, | |
| "grad_norm": 2.8823271138601414, | |
| "learning_rate": 6.019900497512439e-06, | |
| "loss": 0.3301, | |
| "step": 121 | |
| }, | |
| { | |
| "epoch": 0.018244354718109767, | |
| "grad_norm": 2.652199321292131, | |
| "learning_rate": 6.069651741293533e-06, | |
| "loss": 0.234, | |
| "step": 122 | |
| }, | |
| { | |
| "epoch": 0.018393898609241812, | |
| "grad_norm": 4.008459949806747, | |
| "learning_rate": 6.119402985074627e-06, | |
| "loss": 0.5713, | |
| "step": 123 | |
| }, | |
| { | |
| "epoch": 0.01854344250037386, | |
| "grad_norm": 2.8867543983581236, | |
| "learning_rate": 6.1691542288557215e-06, | |
| "loss": 0.2146, | |
| "step": 124 | |
| }, | |
| { | |
| "epoch": 0.018692986391505906, | |
| "grad_norm": 2.379666412119815, | |
| "learning_rate": 6.218905472636816e-06, | |
| "loss": 0.3812, | |
| "step": 125 | |
| }, | |
| { | |
| "epoch": 0.018842530282637954, | |
| "grad_norm": 2.8364015730213716, | |
| "learning_rate": 6.2686567164179116e-06, | |
| "loss": 0.3729, | |
| "step": 126 | |
| }, | |
| { | |
| "epoch": 0.018992074173770003, | |
| "grad_norm": 2.9731590306978957, | |
| "learning_rate": 6.318407960199006e-06, | |
| "loss": 0.3922, | |
| "step": 127 | |
| }, | |
| { | |
| "epoch": 0.019141618064902048, | |
| "grad_norm": 2.431931443805707, | |
| "learning_rate": 6.3681592039801e-06, | |
| "loss": 0.2316, | |
| "step": 128 | |
| }, | |
| { | |
| "epoch": 0.019291161956034097, | |
| "grad_norm": 2.5964092588685594, | |
| "learning_rate": 6.417910447761194e-06, | |
| "loss": 0.2129, | |
| "step": 129 | |
| }, | |
| { | |
| "epoch": 0.019440705847166142, | |
| "grad_norm": 4.241711858566103, | |
| "learning_rate": 6.46766169154229e-06, | |
| "loss": 0.2677, | |
| "step": 130 | |
| }, | |
| { | |
| "epoch": 0.01959024973829819, | |
| "grad_norm": 3.743763522090278, | |
| "learning_rate": 6.517412935323384e-06, | |
| "loss": 0.7324, | |
| "step": 131 | |
| }, | |
| { | |
| "epoch": 0.01973979362943024, | |
| "grad_norm": 2.325325226468886, | |
| "learning_rate": 6.567164179104478e-06, | |
| "loss": 0.2282, | |
| "step": 132 | |
| }, | |
| { | |
| "epoch": 0.019889337520562284, | |
| "grad_norm": 2.187485810642544, | |
| "learning_rate": 6.6169154228855725e-06, | |
| "loss": 0.3479, | |
| "step": 133 | |
| }, | |
| { | |
| "epoch": 0.020038881411694333, | |
| "grad_norm": 2.555235252803596, | |
| "learning_rate": 6.666666666666667e-06, | |
| "loss": 0.3084, | |
| "step": 134 | |
| }, | |
| { | |
| "epoch": 0.020188425302826378, | |
| "grad_norm": 2.1409254211343405, | |
| "learning_rate": 6.7164179104477625e-06, | |
| "loss": 0.2413, | |
| "step": 135 | |
| }, | |
| { | |
| "epoch": 0.020337969193958427, | |
| "grad_norm": 2.9475030013466292, | |
| "learning_rate": 6.766169154228857e-06, | |
| "loss": 0.5899, | |
| "step": 136 | |
| }, | |
| { | |
| "epoch": 0.020487513085090475, | |
| "grad_norm": 3.161190387153201, | |
| "learning_rate": 6.815920398009951e-06, | |
| "loss": 0.2722, | |
| "step": 137 | |
| }, | |
| { | |
| "epoch": 0.02063705697622252, | |
| "grad_norm": 3.4231688087143786, | |
| "learning_rate": 6.865671641791045e-06, | |
| "loss": 0.25, | |
| "step": 138 | |
| }, | |
| { | |
| "epoch": 0.02078660086735457, | |
| "grad_norm": 2.891852432700459, | |
| "learning_rate": 6.915422885572139e-06, | |
| "loss": 0.5206, | |
| "step": 139 | |
| }, | |
| { | |
| "epoch": 0.020936144758486618, | |
| "grad_norm": 2.4149596821734645, | |
| "learning_rate": 6.965174129353235e-06, | |
| "loss": 0.2792, | |
| "step": 140 | |
| }, | |
| { | |
| "epoch": 0.021085688649618663, | |
| "grad_norm": 2.737327253049286, | |
| "learning_rate": 7.014925373134329e-06, | |
| "loss": 0.1785, | |
| "step": 141 | |
| }, | |
| { | |
| "epoch": 0.02123523254075071, | |
| "grad_norm": 2.271710572333297, | |
| "learning_rate": 7.064676616915423e-06, | |
| "loss": 0.2216, | |
| "step": 142 | |
| }, | |
| { | |
| "epoch": 0.021384776431882756, | |
| "grad_norm": 3.123818135886555, | |
| "learning_rate": 7.114427860696518e-06, | |
| "loss": 0.5292, | |
| "step": 143 | |
| }, | |
| { | |
| "epoch": 0.021534320323014805, | |
| "grad_norm": 3.4353230085188775, | |
| "learning_rate": 7.164179104477612e-06, | |
| "loss": 0.257, | |
| "step": 144 | |
| }, | |
| { | |
| "epoch": 0.021683864214146854, | |
| "grad_norm": 3.292198842322858, | |
| "learning_rate": 7.213930348258708e-06, | |
| "loss": 0.4413, | |
| "step": 145 | |
| }, | |
| { | |
| "epoch": 0.0218334081052789, | |
| "grad_norm": 2.408669543365234, | |
| "learning_rate": 7.263681592039802e-06, | |
| "loss": 0.4034, | |
| "step": 146 | |
| }, | |
| { | |
| "epoch": 0.021982951996410947, | |
| "grad_norm": 2.918318139010717, | |
| "learning_rate": 7.313432835820896e-06, | |
| "loss": 0.1789, | |
| "step": 147 | |
| }, | |
| { | |
| "epoch": 0.022132495887542993, | |
| "grad_norm": 2.016064943310167, | |
| "learning_rate": 7.36318407960199e-06, | |
| "loss": 0.2454, | |
| "step": 148 | |
| }, | |
| { | |
| "epoch": 0.02228203977867504, | |
| "grad_norm": 3.375282717272202, | |
| "learning_rate": 7.412935323383084e-06, | |
| "loss": 0.5047, | |
| "step": 149 | |
| }, | |
| { | |
| "epoch": 0.02243158366980709, | |
| "grad_norm": 2.747548142801912, | |
| "learning_rate": 7.46268656716418e-06, | |
| "loss": 0.3193, | |
| "step": 150 | |
| }, | |
| { | |
| "epoch": 0.022581127560939135, | |
| "grad_norm": 5.014531999850111, | |
| "learning_rate": 7.512437810945274e-06, | |
| "loss": 0.5367, | |
| "step": 151 | |
| }, | |
| { | |
| "epoch": 0.022730671452071183, | |
| "grad_norm": 1.7396197448467992, | |
| "learning_rate": 7.5621890547263685e-06, | |
| "loss": 0.1602, | |
| "step": 152 | |
| }, | |
| { | |
| "epoch": 0.02288021534320323, | |
| "grad_norm": 3.9271159318267452, | |
| "learning_rate": 7.611940298507463e-06, | |
| "loss": 0.2763, | |
| "step": 153 | |
| }, | |
| { | |
| "epoch": 0.023029759234335277, | |
| "grad_norm": 2.093726492507833, | |
| "learning_rate": 7.661691542288557e-06, | |
| "loss": 0.169, | |
| "step": 154 | |
| }, | |
| { | |
| "epoch": 0.023179303125467326, | |
| "grad_norm": 1.5357011381308088, | |
| "learning_rate": 7.711442786069654e-06, | |
| "loss": 0.1619, | |
| "step": 155 | |
| }, | |
| { | |
| "epoch": 0.02332884701659937, | |
| "grad_norm": 2.3824458230974863, | |
| "learning_rate": 7.761194029850747e-06, | |
| "loss": 0.2094, | |
| "step": 156 | |
| }, | |
| { | |
| "epoch": 0.02347839090773142, | |
| "grad_norm": 2.8236663879690784, | |
| "learning_rate": 7.810945273631842e-06, | |
| "loss": 0.3426, | |
| "step": 157 | |
| }, | |
| { | |
| "epoch": 0.023627934798863465, | |
| "grad_norm": 3.1375695638809815, | |
| "learning_rate": 7.860696517412935e-06, | |
| "loss": 0.5518, | |
| "step": 158 | |
| }, | |
| { | |
| "epoch": 0.023777478689995513, | |
| "grad_norm": 3.2182906468856105, | |
| "learning_rate": 7.91044776119403e-06, | |
| "loss": 0.1995, | |
| "step": 159 | |
| }, | |
| { | |
| "epoch": 0.023927022581127562, | |
| "grad_norm": 14.749841980168513, | |
| "learning_rate": 7.960199004975125e-06, | |
| "loss": 0.5578, | |
| "step": 160 | |
| }, | |
| { | |
| "epoch": 0.024076566472259607, | |
| "grad_norm": 3.0100123201004045, | |
| "learning_rate": 8.00995024875622e-06, | |
| "loss": 0.5091, | |
| "step": 161 | |
| }, | |
| { | |
| "epoch": 0.024226110363391656, | |
| "grad_norm": 3.5091520525666433, | |
| "learning_rate": 8.059701492537314e-06, | |
| "loss": 0.5357, | |
| "step": 162 | |
| }, | |
| { | |
| "epoch": 0.024375654254523704, | |
| "grad_norm": 2.934851375582722, | |
| "learning_rate": 8.109452736318409e-06, | |
| "loss": 0.2267, | |
| "step": 163 | |
| }, | |
| { | |
| "epoch": 0.02452519814565575, | |
| "grad_norm": 2.5911339240383544, | |
| "learning_rate": 8.159203980099502e-06, | |
| "loss": 0.1782, | |
| "step": 164 | |
| }, | |
| { | |
| "epoch": 0.024674742036787798, | |
| "grad_norm": 2.847206263316536, | |
| "learning_rate": 8.208955223880599e-06, | |
| "loss": 0.2252, | |
| "step": 165 | |
| }, | |
| { | |
| "epoch": 0.024824285927919843, | |
| "grad_norm": 3.5380431553535976, | |
| "learning_rate": 8.258706467661692e-06, | |
| "loss": 0.4295, | |
| "step": 166 | |
| }, | |
| { | |
| "epoch": 0.024973829819051892, | |
| "grad_norm": 3.150492354924513, | |
| "learning_rate": 8.308457711442787e-06, | |
| "loss": 0.3276, | |
| "step": 167 | |
| }, | |
| { | |
| "epoch": 0.02512337371018394, | |
| "grad_norm": 3.114695975436696, | |
| "learning_rate": 8.35820895522388e-06, | |
| "loss": 0.5181, | |
| "step": 168 | |
| }, | |
| { | |
| "epoch": 0.025272917601315985, | |
| "grad_norm": 2.6180846619509355, | |
| "learning_rate": 8.407960199004975e-06, | |
| "loss": 0.2577, | |
| "step": 169 | |
| }, | |
| { | |
| "epoch": 0.025422461492448034, | |
| "grad_norm": 1.859950631659999, | |
| "learning_rate": 8.45771144278607e-06, | |
| "loss": 0.1838, | |
| "step": 170 | |
| }, | |
| { | |
| "epoch": 0.02557200538358008, | |
| "grad_norm": 4.092195798232618, | |
| "learning_rate": 8.507462686567165e-06, | |
| "loss": 0.2676, | |
| "step": 171 | |
| }, | |
| { | |
| "epoch": 0.025721549274712128, | |
| "grad_norm": 2.0820308098425766, | |
| "learning_rate": 8.557213930348259e-06, | |
| "loss": 0.2528, | |
| "step": 172 | |
| }, | |
| { | |
| "epoch": 0.025871093165844176, | |
| "grad_norm": 2.8153771201369087, | |
| "learning_rate": 8.606965174129354e-06, | |
| "loss": 0.3374, | |
| "step": 173 | |
| }, | |
| { | |
| "epoch": 0.02602063705697622, | |
| "grad_norm": 2.6417342231989114, | |
| "learning_rate": 8.656716417910447e-06, | |
| "loss": 0.4309, | |
| "step": 174 | |
| }, | |
| { | |
| "epoch": 0.02617018094810827, | |
| "grad_norm": 3.3553357791865825, | |
| "learning_rate": 8.706467661691544e-06, | |
| "loss": 0.279, | |
| "step": 175 | |
| }, | |
| { | |
| "epoch": 0.026319724839240315, | |
| "grad_norm": 2.5896987414147707, | |
| "learning_rate": 8.756218905472637e-06, | |
| "loss": 0.2505, | |
| "step": 176 | |
| }, | |
| { | |
| "epoch": 0.026469268730372364, | |
| "grad_norm": 15.917959164107543, | |
| "learning_rate": 8.805970149253732e-06, | |
| "loss": 0.3903, | |
| "step": 177 | |
| }, | |
| { | |
| "epoch": 0.026618812621504413, | |
| "grad_norm": 1.897502276352634, | |
| "learning_rate": 8.855721393034826e-06, | |
| "loss": 0.3051, | |
| "step": 178 | |
| }, | |
| { | |
| "epoch": 0.026768356512636458, | |
| "grad_norm": 3.498345426750877, | |
| "learning_rate": 8.905472636815922e-06, | |
| "loss": 0.8122, | |
| "step": 179 | |
| }, | |
| { | |
| "epoch": 0.026917900403768506, | |
| "grad_norm": 3.2270107650642297, | |
| "learning_rate": 8.955223880597016e-06, | |
| "loss": 0.2312, | |
| "step": 180 | |
| }, | |
| { | |
| "epoch": 0.027067444294900555, | |
| "grad_norm": 2.373617987334166, | |
| "learning_rate": 9.00497512437811e-06, | |
| "loss": 0.3553, | |
| "step": 181 | |
| }, | |
| { | |
| "epoch": 0.0272169881860326, | |
| "grad_norm": 2.022495433415561, | |
| "learning_rate": 9.054726368159204e-06, | |
| "loss": 0.3372, | |
| "step": 182 | |
| }, | |
| { | |
| "epoch": 0.02736653207716465, | |
| "grad_norm": 2.471303542690233, | |
| "learning_rate": 9.104477611940299e-06, | |
| "loss": 0.2764, | |
| "step": 183 | |
| }, | |
| { | |
| "epoch": 0.027516075968296694, | |
| "grad_norm": 2.170550660433261, | |
| "learning_rate": 9.154228855721394e-06, | |
| "loss": 0.2429, | |
| "step": 184 | |
| }, | |
| { | |
| "epoch": 0.027665619859428742, | |
| "grad_norm": 1.7750572924031363, | |
| "learning_rate": 9.203980099502489e-06, | |
| "loss": 0.1749, | |
| "step": 185 | |
| }, | |
| { | |
| "epoch": 0.02781516375056079, | |
| "grad_norm": 1.9803173977955488, | |
| "learning_rate": 9.253731343283582e-06, | |
| "loss": 0.3061, | |
| "step": 186 | |
| }, | |
| { | |
| "epoch": 0.027964707641692836, | |
| "grad_norm": 2.686793479118654, | |
| "learning_rate": 9.303482587064677e-06, | |
| "loss": 0.2704, | |
| "step": 187 | |
| }, | |
| { | |
| "epoch": 0.028114251532824885, | |
| "grad_norm": 3.0095995560762088, | |
| "learning_rate": 9.353233830845772e-06, | |
| "loss": 0.3935, | |
| "step": 188 | |
| }, | |
| { | |
| "epoch": 0.02826379542395693, | |
| "grad_norm": 3.296780241377357, | |
| "learning_rate": 9.402985074626867e-06, | |
| "loss": 0.4349, | |
| "step": 189 | |
| }, | |
| { | |
| "epoch": 0.02841333931508898, | |
| "grad_norm": 2.0473844316492262, | |
| "learning_rate": 9.45273631840796e-06, | |
| "loss": 0.3594, | |
| "step": 190 | |
| }, | |
| { | |
| "epoch": 0.028562883206221027, | |
| "grad_norm": 2.6746439974295986, | |
| "learning_rate": 9.502487562189056e-06, | |
| "loss": 0.2507, | |
| "step": 191 | |
| }, | |
| { | |
| "epoch": 0.028712427097353072, | |
| "grad_norm": 2.171372767224107, | |
| "learning_rate": 9.552238805970149e-06, | |
| "loss": 0.4442, | |
| "step": 192 | |
| }, | |
| { | |
| "epoch": 0.02886197098848512, | |
| "grad_norm": 3.412610878033882, | |
| "learning_rate": 9.601990049751244e-06, | |
| "loss": 0.5065, | |
| "step": 193 | |
| }, | |
| { | |
| "epoch": 0.029011514879617166, | |
| "grad_norm": 2.5249672849820843, | |
| "learning_rate": 9.651741293532339e-06, | |
| "loss": 0.2775, | |
| "step": 194 | |
| }, | |
| { | |
| "epoch": 0.029161058770749215, | |
| "grad_norm": 1.9244063665371054, | |
| "learning_rate": 9.701492537313434e-06, | |
| "loss": 0.2501, | |
| "step": 195 | |
| }, | |
| { | |
| "epoch": 0.029310602661881263, | |
| "grad_norm": 2.2928756876943788, | |
| "learning_rate": 9.751243781094527e-06, | |
| "loss": 0.391, | |
| "step": 196 | |
| }, | |
| { | |
| "epoch": 0.02946014655301331, | |
| "grad_norm": 3.2090175671059464, | |
| "learning_rate": 9.800995024875622e-06, | |
| "loss": 0.355, | |
| "step": 197 | |
| }, | |
| { | |
| "epoch": 0.029609690444145357, | |
| "grad_norm": 2.564275054094989, | |
| "learning_rate": 9.850746268656717e-06, | |
| "loss": 0.3824, | |
| "step": 198 | |
| }, | |
| { | |
| "epoch": 0.029759234335277406, | |
| "grad_norm": 2.2612313847384473, | |
| "learning_rate": 9.900497512437812e-06, | |
| "loss": 0.255, | |
| "step": 199 | |
| }, | |
| { | |
| "epoch": 0.02990877822640945, | |
| "grad_norm": 2.867410801811384, | |
| "learning_rate": 9.950248756218906e-06, | |
| "loss": 0.2321, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 0.0300583221175415, | |
| "grad_norm": 2.7017080308625316, | |
| "learning_rate": 1e-05, | |
| "loss": 0.5355, | |
| "step": 201 | |
| }, | |
| { | |
| "epoch": 0.030207866008673544, | |
| "grad_norm": 1.7563631058650533, | |
| "learning_rate": 9.999999413475907e-06, | |
| "loss": 0.2366, | |
| "step": 202 | |
| }, | |
| { | |
| "epoch": 0.030357409899805593, | |
| "grad_norm": 2.7923486514729134, | |
| "learning_rate": 9.999997653903764e-06, | |
| "loss": 0.5735, | |
| "step": 203 | |
| }, | |
| { | |
| "epoch": 0.03050695379093764, | |
| "grad_norm": 2.5477270678585935, | |
| "learning_rate": 9.999994721283985e-06, | |
| "loss": 0.2316, | |
| "step": 204 | |
| }, | |
| { | |
| "epoch": 0.030656497682069687, | |
| "grad_norm": 1.6435827637040603, | |
| "learning_rate": 9.99999061561726e-06, | |
| "loss": 0.1958, | |
| "step": 205 | |
| }, | |
| { | |
| "epoch": 0.030806041573201735, | |
| "grad_norm": 4.225438559077688, | |
| "learning_rate": 9.999985336904546e-06, | |
| "loss": 0.6052, | |
| "step": 206 | |
| }, | |
| { | |
| "epoch": 0.03095558546433378, | |
| "grad_norm": 2.384218907777814, | |
| "learning_rate": 9.999978885147086e-06, | |
| "loss": 0.382, | |
| "step": 207 | |
| }, | |
| { | |
| "epoch": 0.03110512935546583, | |
| "grad_norm": 3.082533240684358, | |
| "learning_rate": 9.999971260346394e-06, | |
| "loss": 0.4615, | |
| "step": 208 | |
| }, | |
| { | |
| "epoch": 0.03125467324659788, | |
| "grad_norm": 2.126341746782405, | |
| "learning_rate": 9.999962462504259e-06, | |
| "loss": 0.3489, | |
| "step": 209 | |
| }, | |
| { | |
| "epoch": 0.03140421713772992, | |
| "grad_norm": 2.3157719584793974, | |
| "learning_rate": 9.99995249162274e-06, | |
| "loss": 0.351, | |
| "step": 210 | |
| }, | |
| { | |
| "epoch": 0.03155376102886197, | |
| "grad_norm": 3.2569828989709046, | |
| "learning_rate": 9.999941347704183e-06, | |
| "loss": 0.5452, | |
| "step": 211 | |
| }, | |
| { | |
| "epoch": 0.03170330491999402, | |
| "grad_norm": 2.4010549422177747, | |
| "learning_rate": 9.999929030751199e-06, | |
| "loss": 0.5511, | |
| "step": 212 | |
| }, | |
| { | |
| "epoch": 0.031852848811126065, | |
| "grad_norm": 2.2021354319659956, | |
| "learning_rate": 9.999915540766679e-06, | |
| "loss": 0.409, | |
| "step": 213 | |
| }, | |
| { | |
| "epoch": 0.03200239270225811, | |
| "grad_norm": 2.7467598032746467, | |
| "learning_rate": 9.999900877753786e-06, | |
| "loss": 0.2769, | |
| "step": 214 | |
| }, | |
| { | |
| "epoch": 0.03215193659339016, | |
| "grad_norm": 2.250991470386846, | |
| "learning_rate": 9.99988504171596e-06, | |
| "loss": 0.4243, | |
| "step": 215 | |
| }, | |
| { | |
| "epoch": 0.03230148048452221, | |
| "grad_norm": 7.389570164962262, | |
| "learning_rate": 9.999868032656921e-06, | |
| "loss": 0.5661, | |
| "step": 216 | |
| }, | |
| { | |
| "epoch": 0.03245102437565425, | |
| "grad_norm": 2.3232325152419904, | |
| "learning_rate": 9.999849850580653e-06, | |
| "loss": 0.3622, | |
| "step": 217 | |
| }, | |
| { | |
| "epoch": 0.032600568266786305, | |
| "grad_norm": 2.8448629192721153, | |
| "learning_rate": 9.999830495491425e-06, | |
| "loss": 0.5013, | |
| "step": 218 | |
| }, | |
| { | |
| "epoch": 0.03275011215791835, | |
| "grad_norm": 1.9203985094095042, | |
| "learning_rate": 9.99980996739378e-06, | |
| "loss": 0.2597, | |
| "step": 219 | |
| }, | |
| { | |
| "epoch": 0.032899656049050395, | |
| "grad_norm": 2.1343351176097705, | |
| "learning_rate": 9.99978826629253e-06, | |
| "loss": 0.333, | |
| "step": 220 | |
| }, | |
| { | |
| "epoch": 0.03304919994018244, | |
| "grad_norm": 2.675496675158128, | |
| "learning_rate": 9.999765392192766e-06, | |
| "loss": 0.4679, | |
| "step": 221 | |
| }, | |
| { | |
| "epoch": 0.03319874383131449, | |
| "grad_norm": 2.954897252892918, | |
| "learning_rate": 9.99974134509986e-06, | |
| "loss": 0.5779, | |
| "step": 222 | |
| }, | |
| { | |
| "epoch": 0.03334828772244654, | |
| "grad_norm": 3.164155125145253, | |
| "learning_rate": 9.999716125019448e-06, | |
| "loss": 0.5192, | |
| "step": 223 | |
| }, | |
| { | |
| "epoch": 0.03349783161357858, | |
| "grad_norm": 2.9422429580445377, | |
| "learning_rate": 9.99968973195745e-06, | |
| "loss": 0.3514, | |
| "step": 224 | |
| }, | |
| { | |
| "epoch": 0.033647375504710635, | |
| "grad_norm": 2.016818218277119, | |
| "learning_rate": 9.999662165920056e-06, | |
| "loss": 0.3657, | |
| "step": 225 | |
| }, | |
| { | |
| "epoch": 0.03379691939584268, | |
| "grad_norm": 2.805692301474297, | |
| "learning_rate": 9.999633426913733e-06, | |
| "loss": 0.1912, | |
| "step": 226 | |
| }, | |
| { | |
| "epoch": 0.033946463286974725, | |
| "grad_norm": 2.205403428118743, | |
| "learning_rate": 9.999603514945227e-06, | |
| "loss": 0.234, | |
| "step": 227 | |
| }, | |
| { | |
| "epoch": 0.03409600717810678, | |
| "grad_norm": 2.013271573198516, | |
| "learning_rate": 9.999572430021553e-06, | |
| "loss": 0.464, | |
| "step": 228 | |
| }, | |
| { | |
| "epoch": 0.03424555106923882, | |
| "grad_norm": 3.033803346792209, | |
| "learning_rate": 9.999540172150005e-06, | |
| "loss": 0.2599, | |
| "step": 229 | |
| }, | |
| { | |
| "epoch": 0.03439509496037087, | |
| "grad_norm": 2.854186400231596, | |
| "learning_rate": 9.99950674133815e-06, | |
| "loss": 0.6431, | |
| "step": 230 | |
| }, | |
| { | |
| "epoch": 0.03454463885150292, | |
| "grad_norm": 2.162434347622467, | |
| "learning_rate": 9.999472137593829e-06, | |
| "loss": 0.4779, | |
| "step": 231 | |
| }, | |
| { | |
| "epoch": 0.034694182742634964, | |
| "grad_norm": 1.4691335020169023, | |
| "learning_rate": 9.999436360925165e-06, | |
| "loss": 0.1827, | |
| "step": 232 | |
| }, | |
| { | |
| "epoch": 0.03484372663376701, | |
| "grad_norm": 1.6955188606947214, | |
| "learning_rate": 9.99939941134055e-06, | |
| "loss": 0.2336, | |
| "step": 233 | |
| }, | |
| { | |
| "epoch": 0.034993270524899055, | |
| "grad_norm": 2.0710606069082167, | |
| "learning_rate": 9.99936128884865e-06, | |
| "loss": 0.3671, | |
| "step": 234 | |
| }, | |
| { | |
| "epoch": 0.03514281441603111, | |
| "grad_norm": 2.128464465717484, | |
| "learning_rate": 9.999321993458411e-06, | |
| "loss": 0.2928, | |
| "step": 235 | |
| }, | |
| { | |
| "epoch": 0.03529235830716315, | |
| "grad_norm": 1.9685227247781487, | |
| "learning_rate": 9.999281525179054e-06, | |
| "loss": 0.185, | |
| "step": 236 | |
| }, | |
| { | |
| "epoch": 0.0354419021982952, | |
| "grad_norm": 2.3203573768463115, | |
| "learning_rate": 9.99923988402007e-06, | |
| "loss": 0.3733, | |
| "step": 237 | |
| }, | |
| { | |
| "epoch": 0.03559144608942725, | |
| "grad_norm": 2.2161639851963457, | |
| "learning_rate": 9.99919706999123e-06, | |
| "loss": 0.4, | |
| "step": 238 | |
| }, | |
| { | |
| "epoch": 0.035740989980559294, | |
| "grad_norm": 1.551687214387557, | |
| "learning_rate": 9.99915308310258e-06, | |
| "loss": 0.1723, | |
| "step": 239 | |
| }, | |
| { | |
| "epoch": 0.03589053387169134, | |
| "grad_norm": 1.9544776771870587, | |
| "learning_rate": 9.999107923364436e-06, | |
| "loss": 0.2587, | |
| "step": 240 | |
| }, | |
| { | |
| "epoch": 0.03604007776282339, | |
| "grad_norm": 2.1986380601508375, | |
| "learning_rate": 9.999061590787394e-06, | |
| "loss": 0.544, | |
| "step": 241 | |
| }, | |
| { | |
| "epoch": 0.03618962165395544, | |
| "grad_norm": 2.5816888510040457, | |
| "learning_rate": 9.999014085382326e-06, | |
| "loss": 0.4619, | |
| "step": 242 | |
| }, | |
| { | |
| "epoch": 0.03633916554508748, | |
| "grad_norm": 1.8291845348661409, | |
| "learning_rate": 9.998965407160377e-06, | |
| "loss": 0.2052, | |
| "step": 243 | |
| }, | |
| { | |
| "epoch": 0.036488709436219534, | |
| "grad_norm": 3.167062575704647, | |
| "learning_rate": 9.998915556132966e-06, | |
| "loss": 0.6123, | |
| "step": 244 | |
| }, | |
| { | |
| "epoch": 0.03663825332735158, | |
| "grad_norm": 1.8628898225455814, | |
| "learning_rate": 9.99886453231179e-06, | |
| "loss": 0.3634, | |
| "step": 245 | |
| }, | |
| { | |
| "epoch": 0.036787797218483624, | |
| "grad_norm": 1.7903762911789451, | |
| "learning_rate": 9.998812335708818e-06, | |
| "loss": 0.2162, | |
| "step": 246 | |
| }, | |
| { | |
| "epoch": 0.03693734110961567, | |
| "grad_norm": 1.3282642487848175, | |
| "learning_rate": 9.998758966336296e-06, | |
| "loss": 0.1875, | |
| "step": 247 | |
| }, | |
| { | |
| "epoch": 0.03708688500074772, | |
| "grad_norm": 1.8364953512469955, | |
| "learning_rate": 9.998704424206747e-06, | |
| "loss": 0.208, | |
| "step": 248 | |
| }, | |
| { | |
| "epoch": 0.037236428891879766, | |
| "grad_norm": 1.3941303606582691, | |
| "learning_rate": 9.998648709332965e-06, | |
| "loss": 0.1737, | |
| "step": 249 | |
| }, | |
| { | |
| "epoch": 0.03738597278301181, | |
| "grad_norm": 1.7239196409011197, | |
| "learning_rate": 9.998591821728022e-06, | |
| "loss": 0.2339, | |
| "step": 250 | |
| }, | |
| { | |
| "epoch": 0.037535516674143864, | |
| "grad_norm": 2.623262386600702, | |
| "learning_rate": 9.998533761405265e-06, | |
| "loss": 0.3988, | |
| "step": 251 | |
| }, | |
| { | |
| "epoch": 0.03768506056527591, | |
| "grad_norm": 3.0417113736320354, | |
| "learning_rate": 9.998474528378315e-06, | |
| "loss": 0.3998, | |
| "step": 252 | |
| }, | |
| { | |
| "epoch": 0.037834604456407954, | |
| "grad_norm": 2.3389769972346532, | |
| "learning_rate": 9.998414122661066e-06, | |
| "loss": 0.2157, | |
| "step": 253 | |
| }, | |
| { | |
| "epoch": 0.037984148347540006, | |
| "grad_norm": 2.776666496961099, | |
| "learning_rate": 9.998352544267696e-06, | |
| "loss": 0.5598, | |
| "step": 254 | |
| }, | |
| { | |
| "epoch": 0.03813369223867205, | |
| "grad_norm": 2.1472401976055746, | |
| "learning_rate": 9.998289793212645e-06, | |
| "loss": 0.2375, | |
| "step": 255 | |
| }, | |
| { | |
| "epoch": 0.038283236129804096, | |
| "grad_norm": 2.258529852719024, | |
| "learning_rate": 9.99822586951064e-06, | |
| "loss": 0.257, | |
| "step": 256 | |
| }, | |
| { | |
| "epoch": 0.03843278002093614, | |
| "grad_norm": 2.234662282588329, | |
| "learning_rate": 9.998160773176676e-06, | |
| "loss": 0.2513, | |
| "step": 257 | |
| }, | |
| { | |
| "epoch": 0.038582323912068194, | |
| "grad_norm": 1.557075634748184, | |
| "learning_rate": 9.998094504226025e-06, | |
| "loss": 0.2154, | |
| "step": 258 | |
| }, | |
| { | |
| "epoch": 0.03873186780320024, | |
| "grad_norm": 1.2782097805836874, | |
| "learning_rate": 9.998027062674236e-06, | |
| "loss": 0.1997, | |
| "step": 259 | |
| }, | |
| { | |
| "epoch": 0.038881411694332284, | |
| "grad_norm": 1.5754692941437902, | |
| "learning_rate": 9.997958448537129e-06, | |
| "loss": 0.2271, | |
| "step": 260 | |
| }, | |
| { | |
| "epoch": 0.039030955585464336, | |
| "grad_norm": 2.3273358127526516, | |
| "learning_rate": 9.997888661830803e-06, | |
| "loss": 0.4129, | |
| "step": 261 | |
| }, | |
| { | |
| "epoch": 0.03918049947659638, | |
| "grad_norm": 2.5932478274973705, | |
| "learning_rate": 9.997817702571631e-06, | |
| "loss": 0.2762, | |
| "step": 262 | |
| }, | |
| { | |
| "epoch": 0.039330043367728426, | |
| "grad_norm": 1.7415819067090217, | |
| "learning_rate": 9.99774557077626e-06, | |
| "loss": 0.2677, | |
| "step": 263 | |
| }, | |
| { | |
| "epoch": 0.03947958725886048, | |
| "grad_norm": 2.1983315861883974, | |
| "learning_rate": 9.997672266461613e-06, | |
| "loss": 0.3412, | |
| "step": 264 | |
| }, | |
| { | |
| "epoch": 0.03962913114999252, | |
| "grad_norm": 2.8445138272257666, | |
| "learning_rate": 9.997597789644889e-06, | |
| "loss": 0.3471, | |
| "step": 265 | |
| }, | |
| { | |
| "epoch": 0.03977867504112457, | |
| "grad_norm": 2.6658347323464575, | |
| "learning_rate": 9.997522140343558e-06, | |
| "loss": 0.3785, | |
| "step": 266 | |
| }, | |
| { | |
| "epoch": 0.03992821893225662, | |
| "grad_norm": 1.2913669477506569, | |
| "learning_rate": 9.997445318575371e-06, | |
| "loss": 0.2089, | |
| "step": 267 | |
| }, | |
| { | |
| "epoch": 0.040077762823388666, | |
| "grad_norm": 2.440102551085522, | |
| "learning_rate": 9.99736732435835e-06, | |
| "loss": 0.5639, | |
| "step": 268 | |
| }, | |
| { | |
| "epoch": 0.04022730671452071, | |
| "grad_norm": 2.252623935384866, | |
| "learning_rate": 9.997288157710795e-06, | |
| "loss": 0.447, | |
| "step": 269 | |
| }, | |
| { | |
| "epoch": 0.040376850605652756, | |
| "grad_norm": 1.9038309319538977, | |
| "learning_rate": 9.997207818651273e-06, | |
| "loss": 0.2784, | |
| "step": 270 | |
| }, | |
| { | |
| "epoch": 0.04052639449678481, | |
| "grad_norm": 2.05316637395224, | |
| "learning_rate": 9.99712630719864e-06, | |
| "loss": 0.3874, | |
| "step": 271 | |
| }, | |
| { | |
| "epoch": 0.04067593838791685, | |
| "grad_norm": 4.663034399257074, | |
| "learning_rate": 9.997043623372016e-06, | |
| "loss": 0.3558, | |
| "step": 272 | |
| }, | |
| { | |
| "epoch": 0.0408254822790489, | |
| "grad_norm": 2.0324793909935375, | |
| "learning_rate": 9.996959767190799e-06, | |
| "loss": 0.3884, | |
| "step": 273 | |
| }, | |
| { | |
| "epoch": 0.04097502617018095, | |
| "grad_norm": 2.1897027573531003, | |
| "learning_rate": 9.996874738674663e-06, | |
| "loss": 0.2372, | |
| "step": 274 | |
| }, | |
| { | |
| "epoch": 0.041124570061312996, | |
| "grad_norm": 1.9410471939157525, | |
| "learning_rate": 9.996788537843558e-06, | |
| "loss": 0.3478, | |
| "step": 275 | |
| }, | |
| { | |
| "epoch": 0.04127411395244504, | |
| "grad_norm": 3.650983914269082, | |
| "learning_rate": 9.996701164717704e-06, | |
| "loss": 0.4213, | |
| "step": 276 | |
| }, | |
| { | |
| "epoch": 0.04142365784357709, | |
| "grad_norm": 3.067988013237884, | |
| "learning_rate": 9.996612619317602e-06, | |
| "loss": 0.7209, | |
| "step": 277 | |
| }, | |
| { | |
| "epoch": 0.04157320173470914, | |
| "grad_norm": 2.5863303551652033, | |
| "learning_rate": 9.996522901664028e-06, | |
| "loss": 0.5418, | |
| "step": 278 | |
| }, | |
| { | |
| "epoch": 0.04172274562584118, | |
| "grad_norm": 2.1885641779249476, | |
| "learning_rate": 9.996432011778026e-06, | |
| "loss": 0.371, | |
| "step": 279 | |
| }, | |
| { | |
| "epoch": 0.041872289516973235, | |
| "grad_norm": 2.398824728854803, | |
| "learning_rate": 9.99633994968092e-06, | |
| "loss": 0.5508, | |
| "step": 280 | |
| }, | |
| { | |
| "epoch": 0.04202183340810528, | |
| "grad_norm": 1.5732032420608302, | |
| "learning_rate": 9.996246715394314e-06, | |
| "loss": 0.2468, | |
| "step": 281 | |
| }, | |
| { | |
| "epoch": 0.042171377299237325, | |
| "grad_norm": 2.8532279807617944, | |
| "learning_rate": 9.996152308940075e-06, | |
| "loss": 0.5503, | |
| "step": 282 | |
| }, | |
| { | |
| "epoch": 0.04232092119036937, | |
| "grad_norm": 2.4502727303222733, | |
| "learning_rate": 9.996056730340356e-06, | |
| "loss": 0.4046, | |
| "step": 283 | |
| }, | |
| { | |
| "epoch": 0.04247046508150142, | |
| "grad_norm": 1.9272098426705169, | |
| "learning_rate": 9.995959979617578e-06, | |
| "loss": 0.3906, | |
| "step": 284 | |
| }, | |
| { | |
| "epoch": 0.04262000897263347, | |
| "grad_norm": 2.290690335549339, | |
| "learning_rate": 9.995862056794441e-06, | |
| "loss": 0.2464, | |
| "step": 285 | |
| }, | |
| { | |
| "epoch": 0.04276955286376551, | |
| "grad_norm": 1.656564250859485, | |
| "learning_rate": 9.99576296189392e-06, | |
| "loss": 0.1996, | |
| "step": 286 | |
| }, | |
| { | |
| "epoch": 0.042919096754897565, | |
| "grad_norm": 2.1259148220336965, | |
| "learning_rate": 9.995662694939262e-06, | |
| "loss": 0.3994, | |
| "step": 287 | |
| }, | |
| { | |
| "epoch": 0.04306864064602961, | |
| "grad_norm": 2.286901143642134, | |
| "learning_rate": 9.99556125595399e-06, | |
| "loss": 0.4047, | |
| "step": 288 | |
| }, | |
| { | |
| "epoch": 0.043218184537161655, | |
| "grad_norm": 1.3559455912309712, | |
| "learning_rate": 9.995458644961902e-06, | |
| "loss": 0.2228, | |
| "step": 289 | |
| }, | |
| { | |
| "epoch": 0.04336772842829371, | |
| "grad_norm": 2.285750924681825, | |
| "learning_rate": 9.995354861987075e-06, | |
| "loss": 0.2367, | |
| "step": 290 | |
| }, | |
| { | |
| "epoch": 0.04351727231942575, | |
| "grad_norm": 1.923824453592428, | |
| "learning_rate": 9.995249907053854e-06, | |
| "loss": 0.3951, | |
| "step": 291 | |
| }, | |
| { | |
| "epoch": 0.0436668162105578, | |
| "grad_norm": 1.968047953500074, | |
| "learning_rate": 9.995143780186865e-06, | |
| "loss": 0.2149, | |
| "step": 292 | |
| }, | |
| { | |
| "epoch": 0.04381636010168984, | |
| "grad_norm": 2.3975790519132074, | |
| "learning_rate": 9.995036481411005e-06, | |
| "loss": 0.5312, | |
| "step": 293 | |
| }, | |
| { | |
| "epoch": 0.043965903992821895, | |
| "grad_norm": 1.9664546058841197, | |
| "learning_rate": 9.994928010751447e-06, | |
| "loss": 0.4832, | |
| "step": 294 | |
| }, | |
| { | |
| "epoch": 0.04411544788395394, | |
| "grad_norm": 2.1609011533249785, | |
| "learning_rate": 9.994818368233639e-06, | |
| "loss": 0.571, | |
| "step": 295 | |
| }, | |
| { | |
| "epoch": 0.044264991775085985, | |
| "grad_norm": 1.2099666806993736, | |
| "learning_rate": 9.994707553883305e-06, | |
| "loss": 0.1801, | |
| "step": 296 | |
| }, | |
| { | |
| "epoch": 0.04441453566621804, | |
| "grad_norm": 1.8811137964659612, | |
| "learning_rate": 9.994595567726444e-06, | |
| "loss": 0.2708, | |
| "step": 297 | |
| }, | |
| { | |
| "epoch": 0.04456407955735008, | |
| "grad_norm": 1.6387011737954997, | |
| "learning_rate": 9.994482409789329e-06, | |
| "loss": 0.245, | |
| "step": 298 | |
| }, | |
| { | |
| "epoch": 0.04471362344848213, | |
| "grad_norm": 2.4061797367092486, | |
| "learning_rate": 9.994368080098505e-06, | |
| "loss": 0.204, | |
| "step": 299 | |
| }, | |
| { | |
| "epoch": 0.04486316733961418, | |
| "grad_norm": 2.555264958903577, | |
| "learning_rate": 9.994252578680796e-06, | |
| "loss": 0.5251, | |
| "step": 300 | |
| }, | |
| { | |
| "epoch": 0.045012711230746225, | |
| "grad_norm": 3.1965886018503897, | |
| "learning_rate": 9.994135905563302e-06, | |
| "loss": 0.4353, | |
| "step": 301 | |
| }, | |
| { | |
| "epoch": 0.04516225512187827, | |
| "grad_norm": 2.390530599961774, | |
| "learning_rate": 9.994018060773396e-06, | |
| "loss": 0.4199, | |
| "step": 302 | |
| }, | |
| { | |
| "epoch": 0.04531179901301032, | |
| "grad_norm": 2.694731420269419, | |
| "learning_rate": 9.993899044338722e-06, | |
| "loss": 0.4029, | |
| "step": 303 | |
| }, | |
| { | |
| "epoch": 0.04546134290414237, | |
| "grad_norm": 2.5518583518075437, | |
| "learning_rate": 9.993778856287205e-06, | |
| "loss": 0.3712, | |
| "step": 304 | |
| }, | |
| { | |
| "epoch": 0.04561088679527441, | |
| "grad_norm": 1.958382495979976, | |
| "learning_rate": 9.99365749664704e-06, | |
| "loss": 0.3617, | |
| "step": 305 | |
| }, | |
| { | |
| "epoch": 0.04576043068640646, | |
| "grad_norm": 2.299652220902115, | |
| "learning_rate": 9.993534965446701e-06, | |
| "loss": 0.4059, | |
| "step": 306 | |
| }, | |
| { | |
| "epoch": 0.04590997457753851, | |
| "grad_norm": 4.086258301258261, | |
| "learning_rate": 9.993411262714934e-06, | |
| "loss": 0.2774, | |
| "step": 307 | |
| }, | |
| { | |
| "epoch": 0.046059518468670554, | |
| "grad_norm": 2.0081624141767156, | |
| "learning_rate": 9.993286388480763e-06, | |
| "loss": 0.2724, | |
| "step": 308 | |
| }, | |
| { | |
| "epoch": 0.0462090623598026, | |
| "grad_norm": 2.388037596587926, | |
| "learning_rate": 9.993160342773483e-06, | |
| "loss": 0.2706, | |
| "step": 309 | |
| }, | |
| { | |
| "epoch": 0.04635860625093465, | |
| "grad_norm": 1.5868739255084185, | |
| "learning_rate": 9.993033125622665e-06, | |
| "loss": 0.256, | |
| "step": 310 | |
| }, | |
| { | |
| "epoch": 0.0465081501420667, | |
| "grad_norm": 1.8286822342955051, | |
| "learning_rate": 9.992904737058157e-06, | |
| "loss": 0.209, | |
| "step": 311 | |
| }, | |
| { | |
| "epoch": 0.04665769403319874, | |
| "grad_norm": 2.2060332987484306, | |
| "learning_rate": 9.992775177110078e-06, | |
| "loss": 0.4253, | |
| "step": 312 | |
| }, | |
| { | |
| "epoch": 0.046807237924330794, | |
| "grad_norm": 1.39628419375001, | |
| "learning_rate": 9.992644445808826e-06, | |
| "loss": 0.1693, | |
| "step": 313 | |
| }, | |
| { | |
| "epoch": 0.04695678181546284, | |
| "grad_norm": 1.5668060198088787, | |
| "learning_rate": 9.99251254318507e-06, | |
| "loss": 0.24, | |
| "step": 314 | |
| }, | |
| { | |
| "epoch": 0.047106325706594884, | |
| "grad_norm": 1.998270389587923, | |
| "learning_rate": 9.992379469269758e-06, | |
| "loss": 0.2519, | |
| "step": 315 | |
| }, | |
| { | |
| "epoch": 0.04725586959772693, | |
| "grad_norm": 1.9609810436779118, | |
| "learning_rate": 9.99224522409411e-06, | |
| "loss": 0.2023, | |
| "step": 316 | |
| }, | |
| { | |
| "epoch": 0.04740541348885898, | |
| "grad_norm": 1.4580736241239847, | |
| "learning_rate": 9.992109807689619e-06, | |
| "loss": 0.2387, | |
| "step": 317 | |
| }, | |
| { | |
| "epoch": 0.04755495737999103, | |
| "grad_norm": 2.710681694340303, | |
| "learning_rate": 9.991973220088057e-06, | |
| "loss": 0.6738, | |
| "step": 318 | |
| }, | |
| { | |
| "epoch": 0.04770450127112307, | |
| "grad_norm": 1.2469776099691643, | |
| "learning_rate": 9.991835461321466e-06, | |
| "loss": 0.2013, | |
| "step": 319 | |
| }, | |
| { | |
| "epoch": 0.047854045162255124, | |
| "grad_norm": 2.128896128779159, | |
| "learning_rate": 9.99169653142217e-06, | |
| "loss": 0.3432, | |
| "step": 320 | |
| }, | |
| { | |
| "epoch": 0.04800358905338717, | |
| "grad_norm": 1.6053097848087672, | |
| "learning_rate": 9.991556430422759e-06, | |
| "loss": 0.2301, | |
| "step": 321 | |
| }, | |
| { | |
| "epoch": 0.048153132944519214, | |
| "grad_norm": 1.7774787600035602, | |
| "learning_rate": 9.991415158356106e-06, | |
| "loss": 0.2535, | |
| "step": 322 | |
| }, | |
| { | |
| "epoch": 0.048302676835651266, | |
| "grad_norm": 1.449815289318445, | |
| "learning_rate": 9.991272715255351e-06, | |
| "loss": 0.1878, | |
| "step": 323 | |
| }, | |
| { | |
| "epoch": 0.04845222072678331, | |
| "grad_norm": 1.5118547669168991, | |
| "learning_rate": 9.991129101153916e-06, | |
| "loss": 0.3186, | |
| "step": 324 | |
| }, | |
| { | |
| "epoch": 0.048601764617915356, | |
| "grad_norm": 1.461388444407636, | |
| "learning_rate": 9.99098431608549e-06, | |
| "loss": 0.1747, | |
| "step": 325 | |
| }, | |
| { | |
| "epoch": 0.04875130850904741, | |
| "grad_norm": 2.3912366570769974, | |
| "learning_rate": 9.990838360084045e-06, | |
| "loss": 0.5325, | |
| "step": 326 | |
| }, | |
| { | |
| "epoch": 0.048900852400179454, | |
| "grad_norm": 2.5611474084390937, | |
| "learning_rate": 9.990691233183823e-06, | |
| "loss": 0.2606, | |
| "step": 327 | |
| }, | |
| { | |
| "epoch": 0.0490503962913115, | |
| "grad_norm": 2.21899436894442, | |
| "learning_rate": 9.990542935419341e-06, | |
| "loss": 0.4253, | |
| "step": 328 | |
| }, | |
| { | |
| "epoch": 0.049199940182443544, | |
| "grad_norm": 1.6883179263006298, | |
| "learning_rate": 9.99039346682539e-06, | |
| "loss": 0.1768, | |
| "step": 329 | |
| }, | |
| { | |
| "epoch": 0.049349484073575596, | |
| "grad_norm": 3.2358870266119006, | |
| "learning_rate": 9.990242827437036e-06, | |
| "loss": 0.7866, | |
| "step": 330 | |
| }, | |
| { | |
| "epoch": 0.04949902796470764, | |
| "grad_norm": 2.0627143054944153, | |
| "learning_rate": 9.990091017289623e-06, | |
| "loss": 0.3286, | |
| "step": 331 | |
| }, | |
| { | |
| "epoch": 0.049648571855839686, | |
| "grad_norm": 2.1246533005850523, | |
| "learning_rate": 9.989938036418766e-06, | |
| "loss": 0.2716, | |
| "step": 332 | |
| }, | |
| { | |
| "epoch": 0.04979811574697174, | |
| "grad_norm": 2.6250279686209828, | |
| "learning_rate": 9.989783884860355e-06, | |
| "loss": 0.5058, | |
| "step": 333 | |
| }, | |
| { | |
| "epoch": 0.049947659638103784, | |
| "grad_norm": 2.3409062617647627, | |
| "learning_rate": 9.989628562650558e-06, | |
| "loss": 0.2589, | |
| "step": 334 | |
| }, | |
| { | |
| "epoch": 0.05009720352923583, | |
| "grad_norm": 1.835901073337933, | |
| "learning_rate": 9.989472069825811e-06, | |
| "loss": 0.3493, | |
| "step": 335 | |
| }, | |
| { | |
| "epoch": 0.05024674742036788, | |
| "grad_norm": 2.2454393810241298, | |
| "learning_rate": 9.989314406422835e-06, | |
| "loss": 0.4113, | |
| "step": 336 | |
| }, | |
| { | |
| "epoch": 0.050396291311499926, | |
| "grad_norm": 2.2906853778474674, | |
| "learning_rate": 9.989155572478611e-06, | |
| "loss": 0.5289, | |
| "step": 337 | |
| }, | |
| { | |
| "epoch": 0.05054583520263197, | |
| "grad_norm": 2.3899442476389665, | |
| "learning_rate": 9.98899556803041e-06, | |
| "loss": 0.2174, | |
| "step": 338 | |
| }, | |
| { | |
| "epoch": 0.05069537909376402, | |
| "grad_norm": 1.3681982854338133, | |
| "learning_rate": 9.988834393115768e-06, | |
| "loss": 0.2021, | |
| "step": 339 | |
| }, | |
| { | |
| "epoch": 0.05084492298489607, | |
| "grad_norm": 1.5118760155287632, | |
| "learning_rate": 9.988672047772497e-06, | |
| "loss": 0.1927, | |
| "step": 340 | |
| }, | |
| { | |
| "epoch": 0.05099446687602811, | |
| "grad_norm": 2.1144895431001105, | |
| "learning_rate": 9.988508532038685e-06, | |
| "loss": 0.3325, | |
| "step": 341 | |
| }, | |
| { | |
| "epoch": 0.05114401076716016, | |
| "grad_norm": 1.8616803287346595, | |
| "learning_rate": 9.988343845952697e-06, | |
| "loss": 0.3018, | |
| "step": 342 | |
| }, | |
| { | |
| "epoch": 0.05129355465829221, | |
| "grad_norm": 2.787967616575242, | |
| "learning_rate": 9.988177989553167e-06, | |
| "loss": 0.4641, | |
| "step": 343 | |
| }, | |
| { | |
| "epoch": 0.051443098549424256, | |
| "grad_norm": 2.2905797584406242, | |
| "learning_rate": 9.98801096287901e-06, | |
| "loss": 0.5336, | |
| "step": 344 | |
| }, | |
| { | |
| "epoch": 0.0515926424405563, | |
| "grad_norm": 1.769311364935245, | |
| "learning_rate": 9.987842765969408e-06, | |
| "loss": 0.2843, | |
| "step": 345 | |
| }, | |
| { | |
| "epoch": 0.05174218633168835, | |
| "grad_norm": 1.7122732613639495, | |
| "learning_rate": 9.987673398863824e-06, | |
| "loss": 0.2272, | |
| "step": 346 | |
| }, | |
| { | |
| "epoch": 0.0518917302228204, | |
| "grad_norm": 2.328359950454365, | |
| "learning_rate": 9.987502861601991e-06, | |
| "loss": 0.2645, | |
| "step": 347 | |
| }, | |
| { | |
| "epoch": 0.05204127411395244, | |
| "grad_norm": 2.208277642399548, | |
| "learning_rate": 9.987331154223922e-06, | |
| "loss": 0.5877, | |
| "step": 348 | |
| }, | |
| { | |
| "epoch": 0.052190818005084495, | |
| "grad_norm": 2.154817789687723, | |
| "learning_rate": 9.9871582767699e-06, | |
| "loss": 0.3414, | |
| "step": 349 | |
| }, | |
| { | |
| "epoch": 0.05234036189621654, | |
| "grad_norm": 2.0510314098551814, | |
| "learning_rate": 9.986984229280483e-06, | |
| "loss": 0.3981, | |
| "step": 350 | |
| }, | |
| { | |
| "epoch": 0.052489905787348586, | |
| "grad_norm": 2.346735661125246, | |
| "learning_rate": 9.986809011796503e-06, | |
| "loss": 0.6596, | |
| "step": 351 | |
| }, | |
| { | |
| "epoch": 0.05263944967848063, | |
| "grad_norm": 1.641693244293744, | |
| "learning_rate": 9.98663262435907e-06, | |
| "loss": 0.3657, | |
| "step": 352 | |
| }, | |
| { | |
| "epoch": 0.05278899356961268, | |
| "grad_norm": 2.240226359797858, | |
| "learning_rate": 9.986455067009566e-06, | |
| "loss": 0.3706, | |
| "step": 353 | |
| }, | |
| { | |
| "epoch": 0.05293853746074473, | |
| "grad_norm": 2.3791485993411357, | |
| "learning_rate": 9.986276339789648e-06, | |
| "loss": 0.5428, | |
| "step": 354 | |
| }, | |
| { | |
| "epoch": 0.05308808135187677, | |
| "grad_norm": 1.7806897327965683, | |
| "learning_rate": 9.986096442741241e-06, | |
| "loss": 0.2336, | |
| "step": 355 | |
| }, | |
| { | |
| "epoch": 0.053237625243008825, | |
| "grad_norm": 1.8563417208131827, | |
| "learning_rate": 9.98591537590656e-06, | |
| "loss": 0.2129, | |
| "step": 356 | |
| }, | |
| { | |
| "epoch": 0.05338716913414087, | |
| "grad_norm": 2.2115041121315895, | |
| "learning_rate": 9.98573313932808e-06, | |
| "loss": 0.5232, | |
| "step": 357 | |
| }, | |
| { | |
| "epoch": 0.053536713025272915, | |
| "grad_norm": 1.3693709893910027, | |
| "learning_rate": 9.985549733048556e-06, | |
| "loss": 0.3524, | |
| "step": 358 | |
| }, | |
| { | |
| "epoch": 0.05368625691640497, | |
| "grad_norm": 2.033727598383455, | |
| "learning_rate": 9.985365157111017e-06, | |
| "loss": 0.3987, | |
| "step": 359 | |
| }, | |
| { | |
| "epoch": 0.05383580080753701, | |
| "grad_norm": 2.3258255541409505, | |
| "learning_rate": 9.985179411558767e-06, | |
| "loss": 0.5489, | |
| "step": 360 | |
| }, | |
| { | |
| "epoch": 0.05398534469866906, | |
| "grad_norm": 2.0805855861837057, | |
| "learning_rate": 9.984992496435383e-06, | |
| "loss": 0.3982, | |
| "step": 361 | |
| }, | |
| { | |
| "epoch": 0.05413488858980111, | |
| "grad_norm": 1.4938394292792039, | |
| "learning_rate": 9.984804411784717e-06, | |
| "loss": 0.2279, | |
| "step": 362 | |
| }, | |
| { | |
| "epoch": 0.054284432480933155, | |
| "grad_norm": 1.935765339737269, | |
| "learning_rate": 9.984615157650896e-06, | |
| "loss": 0.2208, | |
| "step": 363 | |
| }, | |
| { | |
| "epoch": 0.0544339763720652, | |
| "grad_norm": 2.294825440673555, | |
| "learning_rate": 9.98442473407832e-06, | |
| "loss": 0.4006, | |
| "step": 364 | |
| }, | |
| { | |
| "epoch": 0.054583520263197245, | |
| "grad_norm": 1.7404498428206792, | |
| "learning_rate": 9.984233141111663e-06, | |
| "loss": 0.3859, | |
| "step": 365 | |
| }, | |
| { | |
| "epoch": 0.0547330641543293, | |
| "grad_norm": 2.382616866788976, | |
| "learning_rate": 9.984040378795879e-06, | |
| "loss": 0.5393, | |
| "step": 366 | |
| }, | |
| { | |
| "epoch": 0.05488260804546134, | |
| "grad_norm": 2.121310368782044, | |
| "learning_rate": 9.983846447176186e-06, | |
| "loss": 0.3808, | |
| "step": 367 | |
| }, | |
| { | |
| "epoch": 0.05503215193659339, | |
| "grad_norm": 1.4327836947551182, | |
| "learning_rate": 9.983651346298089e-06, | |
| "loss": 0.21, | |
| "step": 368 | |
| }, | |
| { | |
| "epoch": 0.05518169582772544, | |
| "grad_norm": 1.8551217286702022, | |
| "learning_rate": 9.983455076207353e-06, | |
| "loss": 0.3611, | |
| "step": 369 | |
| }, | |
| { | |
| "epoch": 0.055331239718857485, | |
| "grad_norm": 1.1962615317465979, | |
| "learning_rate": 9.983257636950032e-06, | |
| "loss": 0.1632, | |
| "step": 370 | |
| }, | |
| { | |
| "epoch": 0.05548078360998953, | |
| "grad_norm": 2.210937603202386, | |
| "learning_rate": 9.983059028572443e-06, | |
| "loss": 0.2054, | |
| "step": 371 | |
| }, | |
| { | |
| "epoch": 0.05563032750112158, | |
| "grad_norm": 1.3676870965949202, | |
| "learning_rate": 9.982859251121183e-06, | |
| "loss": 0.2257, | |
| "step": 372 | |
| }, | |
| { | |
| "epoch": 0.05577987139225363, | |
| "grad_norm": 1.877238753038072, | |
| "learning_rate": 9.98265830464312e-06, | |
| "loss": 0.3069, | |
| "step": 373 | |
| }, | |
| { | |
| "epoch": 0.05592941528338567, | |
| "grad_norm": 2.6215120058588743, | |
| "learning_rate": 9.9824561891854e-06, | |
| "loss": 0.3812, | |
| "step": 374 | |
| }, | |
| { | |
| "epoch": 0.056078959174517724, | |
| "grad_norm": 1.5353869053774183, | |
| "learning_rate": 9.982252904795437e-06, | |
| "loss": 0.3038, | |
| "step": 375 | |
| }, | |
| { | |
| "epoch": 0.05622850306564977, | |
| "grad_norm": 1.5387274188562523, | |
| "learning_rate": 9.98204845152093e-06, | |
| "loss": 0.1784, | |
| "step": 376 | |
| }, | |
| { | |
| "epoch": 0.056378046956781815, | |
| "grad_norm": 2.3221296907492444, | |
| "learning_rate": 9.981842829409842e-06, | |
| "loss": 0.4253, | |
| "step": 377 | |
| }, | |
| { | |
| "epoch": 0.05652759084791386, | |
| "grad_norm": 1.8464138105889263, | |
| "learning_rate": 9.981636038510414e-06, | |
| "loss": 0.2137, | |
| "step": 378 | |
| }, | |
| { | |
| "epoch": 0.05667713473904591, | |
| "grad_norm": 1.9213502252741161, | |
| "learning_rate": 9.98142807887116e-06, | |
| "loss": 0.2652, | |
| "step": 379 | |
| }, | |
| { | |
| "epoch": 0.05682667863017796, | |
| "grad_norm": 1.7697460473662174, | |
| "learning_rate": 9.981218950540874e-06, | |
| "loss": 0.2525, | |
| "step": 380 | |
| }, | |
| { | |
| "epoch": 0.05697622252131, | |
| "grad_norm": 2.001502054151958, | |
| "learning_rate": 9.981008653568613e-06, | |
| "loss": 0.3749, | |
| "step": 381 | |
| }, | |
| { | |
| "epoch": 0.057125766412442054, | |
| "grad_norm": 1.7507480997796745, | |
| "learning_rate": 9.98079718800372e-06, | |
| "loss": 0.3293, | |
| "step": 382 | |
| }, | |
| { | |
| "epoch": 0.0572753103035741, | |
| "grad_norm": 1.8995856376763527, | |
| "learning_rate": 9.980584553895805e-06, | |
| "loss": 0.2595, | |
| "step": 383 | |
| }, | |
| { | |
| "epoch": 0.057424854194706144, | |
| "grad_norm": 1.6960817341003291, | |
| "learning_rate": 9.980370751294754e-06, | |
| "loss": 0.3214, | |
| "step": 384 | |
| }, | |
| { | |
| "epoch": 0.057574398085838197, | |
| "grad_norm": 2.747620756274178, | |
| "learning_rate": 9.980155780250728e-06, | |
| "loss": 0.4678, | |
| "step": 385 | |
| }, | |
| { | |
| "epoch": 0.05772394197697024, | |
| "grad_norm": 1.429295181164985, | |
| "learning_rate": 9.979939640814158e-06, | |
| "loss": 0.3417, | |
| "step": 386 | |
| }, | |
| { | |
| "epoch": 0.05787348586810229, | |
| "grad_norm": 1.546941524577904, | |
| "learning_rate": 9.979722333035757e-06, | |
| "loss": 0.3017, | |
| "step": 387 | |
| }, | |
| { | |
| "epoch": 0.05802302975923433, | |
| "grad_norm": 2.3243262803022753, | |
| "learning_rate": 9.979503856966504e-06, | |
| "loss": 0.3906, | |
| "step": 388 | |
| }, | |
| { | |
| "epoch": 0.058172573650366384, | |
| "grad_norm": 1.5367077444523152, | |
| "learning_rate": 9.979284212657658e-06, | |
| "loss": 0.2735, | |
| "step": 389 | |
| }, | |
| { | |
| "epoch": 0.05832211754149843, | |
| "grad_norm": 1.0259751361449947, | |
| "learning_rate": 9.979063400160747e-06, | |
| "loss": 0.1788, | |
| "step": 390 | |
| }, | |
| { | |
| "epoch": 0.058471661432630474, | |
| "grad_norm": 1.7811616961442123, | |
| "learning_rate": 9.97884141952758e-06, | |
| "loss": 0.2071, | |
| "step": 391 | |
| }, | |
| { | |
| "epoch": 0.058621205323762526, | |
| "grad_norm": 2.347009922116326, | |
| "learning_rate": 9.978618270810229e-06, | |
| "loss": 0.4248, | |
| "step": 392 | |
| }, | |
| { | |
| "epoch": 0.05877074921489457, | |
| "grad_norm": 1.3076474084417338, | |
| "learning_rate": 9.978393954061052e-06, | |
| "loss": 0.1771, | |
| "step": 393 | |
| }, | |
| { | |
| "epoch": 0.05892029310602662, | |
| "grad_norm": 2.4165379692755455, | |
| "learning_rate": 9.978168469332677e-06, | |
| "loss": 0.4913, | |
| "step": 394 | |
| }, | |
| { | |
| "epoch": 0.05906983699715867, | |
| "grad_norm": 1.6584516839965744, | |
| "learning_rate": 9.977941816678e-06, | |
| "loss": 0.2292, | |
| "step": 395 | |
| }, | |
| { | |
| "epoch": 0.059219380888290714, | |
| "grad_norm": 1.3323879687206615, | |
| "learning_rate": 9.9777139961502e-06, | |
| "loss": 0.2042, | |
| "step": 396 | |
| }, | |
| { | |
| "epoch": 0.05936892477942276, | |
| "grad_norm": 1.242996863833067, | |
| "learning_rate": 9.977485007802725e-06, | |
| "loss": 0.1759, | |
| "step": 397 | |
| }, | |
| { | |
| "epoch": 0.05951846867055481, | |
| "grad_norm": 2.0289613301318057, | |
| "learning_rate": 9.977254851689297e-06, | |
| "loss": 0.3391, | |
| "step": 398 | |
| }, | |
| { | |
| "epoch": 0.059668012561686856, | |
| "grad_norm": 1.7111890076718022, | |
| "learning_rate": 9.977023527863913e-06, | |
| "loss": 0.318, | |
| "step": 399 | |
| }, | |
| { | |
| "epoch": 0.0598175564528189, | |
| "grad_norm": 2.360289838407607, | |
| "learning_rate": 9.976791036380844e-06, | |
| "loss": 0.7436, | |
| "step": 400 | |
| }, | |
| { | |
| "epoch": 0.059967100343950946, | |
| "grad_norm": 1.6556682149662436, | |
| "learning_rate": 9.976557377294634e-06, | |
| "loss": 0.3579, | |
| "step": 401 | |
| }, | |
| { | |
| "epoch": 0.060116644235083, | |
| "grad_norm": 1.9472299876725607, | |
| "learning_rate": 9.976322550660103e-06, | |
| "loss": 0.3939, | |
| "step": 402 | |
| }, | |
| { | |
| "epoch": 0.060266188126215044, | |
| "grad_norm": 1.2625006623785717, | |
| "learning_rate": 9.976086556532343e-06, | |
| "loss": 0.1777, | |
| "step": 403 | |
| }, | |
| { | |
| "epoch": 0.06041573201734709, | |
| "grad_norm": 2.142440158571368, | |
| "learning_rate": 9.975849394966721e-06, | |
| "loss": 0.4728, | |
| "step": 404 | |
| }, | |
| { | |
| "epoch": 0.06056527590847914, | |
| "grad_norm": 1.3109446375337697, | |
| "learning_rate": 9.975611066018876e-06, | |
| "loss": 0.2035, | |
| "step": 405 | |
| }, | |
| { | |
| "epoch": 0.060714819799611186, | |
| "grad_norm": 1.473069250695052, | |
| "learning_rate": 9.975371569744723e-06, | |
| "loss": 0.2502, | |
| "step": 406 | |
| }, | |
| { | |
| "epoch": 0.06086436369074323, | |
| "grad_norm": 1.4147256960977963, | |
| "learning_rate": 9.975130906200453e-06, | |
| "loss": 0.1861, | |
| "step": 407 | |
| }, | |
| { | |
| "epoch": 0.06101390758187528, | |
| "grad_norm": 1.5107559691714745, | |
| "learning_rate": 9.97488907544252e-06, | |
| "loss": 0.2309, | |
| "step": 408 | |
| }, | |
| { | |
| "epoch": 0.06116345147300733, | |
| "grad_norm": 1.5467720756101462, | |
| "learning_rate": 9.97464607752767e-06, | |
| "loss": 0.235, | |
| "step": 409 | |
| }, | |
| { | |
| "epoch": 0.061312995364139374, | |
| "grad_norm": 1.2901444374034334, | |
| "learning_rate": 9.974401912512905e-06, | |
| "loss": 0.1877, | |
| "step": 410 | |
| }, | |
| { | |
| "epoch": 0.061462539255271426, | |
| "grad_norm": 1.8751659558285558, | |
| "learning_rate": 9.974156580455512e-06, | |
| "loss": 0.2941, | |
| "step": 411 | |
| }, | |
| { | |
| "epoch": 0.06161208314640347, | |
| "grad_norm": 1.2187366523072891, | |
| "learning_rate": 9.973910081413048e-06, | |
| "loss": 0.2, | |
| "step": 412 | |
| }, | |
| { | |
| "epoch": 0.061761627037535516, | |
| "grad_norm": 2.56665763030278, | |
| "learning_rate": 9.973662415443342e-06, | |
| "loss": 0.4259, | |
| "step": 413 | |
| }, | |
| { | |
| "epoch": 0.06191117092866756, | |
| "grad_norm": 1.5201509236946156, | |
| "learning_rate": 9.973413582604502e-06, | |
| "loss": 0.2098, | |
| "step": 414 | |
| }, | |
| { | |
| "epoch": 0.06206071481979961, | |
| "grad_norm": 2.2299268067487183, | |
| "learning_rate": 9.973163582954903e-06, | |
| "loss": 0.5054, | |
| "step": 415 | |
| }, | |
| { | |
| "epoch": 0.06221025871093166, | |
| "grad_norm": 2.195400724979985, | |
| "learning_rate": 9.972912416553202e-06, | |
| "loss": 0.3856, | |
| "step": 416 | |
| }, | |
| { | |
| "epoch": 0.0623598026020637, | |
| "grad_norm": 2.3196273331545876, | |
| "learning_rate": 9.972660083458321e-06, | |
| "loss": 0.5608, | |
| "step": 417 | |
| }, | |
| { | |
| "epoch": 0.06250934649319576, | |
| "grad_norm": 1.6815269422927719, | |
| "learning_rate": 9.97240658372946e-06, | |
| "loss": 0.3682, | |
| "step": 418 | |
| }, | |
| { | |
| "epoch": 0.0626588903843278, | |
| "grad_norm": 1.7582779956751238, | |
| "learning_rate": 9.972151917426095e-06, | |
| "loss": 0.2256, | |
| "step": 419 | |
| }, | |
| { | |
| "epoch": 0.06280843427545985, | |
| "grad_norm": 1.9523974169697056, | |
| "learning_rate": 9.97189608460797e-06, | |
| "loss": 0.2303, | |
| "step": 420 | |
| }, | |
| { | |
| "epoch": 0.06295797816659189, | |
| "grad_norm": 2.120409254412015, | |
| "learning_rate": 9.97163908533511e-06, | |
| "loss": 0.2198, | |
| "step": 421 | |
| }, | |
| { | |
| "epoch": 0.06310752205772394, | |
| "grad_norm": 1.7213130956608376, | |
| "learning_rate": 9.971380919667806e-06, | |
| "loss": 0.3355, | |
| "step": 422 | |
| }, | |
| { | |
| "epoch": 0.063257065948856, | |
| "grad_norm": 1.6609701125154137, | |
| "learning_rate": 9.971121587666627e-06, | |
| "loss": 0.2354, | |
| "step": 423 | |
| }, | |
| { | |
| "epoch": 0.06340660983998804, | |
| "grad_norm": 1.2809919353271448, | |
| "learning_rate": 9.970861089392415e-06, | |
| "loss": 0.2043, | |
| "step": 424 | |
| }, | |
| { | |
| "epoch": 0.06355615373112009, | |
| "grad_norm": 1.137987748410028, | |
| "learning_rate": 9.970599424906285e-06, | |
| "loss": 0.1714, | |
| "step": 425 | |
| }, | |
| { | |
| "epoch": 0.06370569762225213, | |
| "grad_norm": 2.241505455994119, | |
| "learning_rate": 9.970336594269627e-06, | |
| "loss": 0.559, | |
| "step": 426 | |
| }, | |
| { | |
| "epoch": 0.06385524151338418, | |
| "grad_norm": 1.8145782296174282, | |
| "learning_rate": 9.970072597544102e-06, | |
| "loss": 0.4695, | |
| "step": 427 | |
| }, | |
| { | |
| "epoch": 0.06400478540451622, | |
| "grad_norm": 2.6609160560733924, | |
| "learning_rate": 9.96980743479165e-06, | |
| "loss": 0.3927, | |
| "step": 428 | |
| }, | |
| { | |
| "epoch": 0.06415432929564828, | |
| "grad_norm": 1.5902127205656447, | |
| "learning_rate": 9.969541106074477e-06, | |
| "loss": 0.3221, | |
| "step": 429 | |
| }, | |
| { | |
| "epoch": 0.06430387318678032, | |
| "grad_norm": 1.354440824254012, | |
| "learning_rate": 9.969273611455066e-06, | |
| "loss": 0.1982, | |
| "step": 430 | |
| }, | |
| { | |
| "epoch": 0.06445341707791237, | |
| "grad_norm": 2.1796464676908682, | |
| "learning_rate": 9.969004950996175e-06, | |
| "loss": 0.5947, | |
| "step": 431 | |
| }, | |
| { | |
| "epoch": 0.06460296096904442, | |
| "grad_norm": 1.6772295444343943, | |
| "learning_rate": 9.968735124760834e-06, | |
| "loss": 0.3567, | |
| "step": 432 | |
| }, | |
| { | |
| "epoch": 0.06475250486017646, | |
| "grad_norm": 2.326608368656497, | |
| "learning_rate": 9.968464132812348e-06, | |
| "loss": 0.3934, | |
| "step": 433 | |
| }, | |
| { | |
| "epoch": 0.0649020487513085, | |
| "grad_norm": 1.9737750855760885, | |
| "learning_rate": 9.968191975214293e-06, | |
| "loss": 0.3936, | |
| "step": 434 | |
| }, | |
| { | |
| "epoch": 0.06505159264244055, | |
| "grad_norm": 2.09687169461338, | |
| "learning_rate": 9.967918652030522e-06, | |
| "loss": 0.3644, | |
| "step": 435 | |
| }, | |
| { | |
| "epoch": 0.06520113653357261, | |
| "grad_norm": 2.1122151786614967, | |
| "learning_rate": 9.967644163325157e-06, | |
| "loss": 0.2169, | |
| "step": 436 | |
| }, | |
| { | |
| "epoch": 0.06535068042470465, | |
| "grad_norm": 1.8368706867911107, | |
| "learning_rate": 9.967368509162595e-06, | |
| "loss": 0.3956, | |
| "step": 437 | |
| }, | |
| { | |
| "epoch": 0.0655002243158367, | |
| "grad_norm": 1.7823169737575542, | |
| "learning_rate": 9.96709168960751e-06, | |
| "loss": 0.232, | |
| "step": 438 | |
| }, | |
| { | |
| "epoch": 0.06564976820696874, | |
| "grad_norm": 2.1565508943507194, | |
| "learning_rate": 9.966813704724844e-06, | |
| "loss": 0.2228, | |
| "step": 439 | |
| }, | |
| { | |
| "epoch": 0.06579931209810079, | |
| "grad_norm": 2.2075342060994414, | |
| "learning_rate": 9.966534554579816e-06, | |
| "loss": 0.204, | |
| "step": 440 | |
| }, | |
| { | |
| "epoch": 0.06594885598923284, | |
| "grad_norm": 2.0929887441012602, | |
| "learning_rate": 9.966254239237917e-06, | |
| "loss": 0.3946, | |
| "step": 441 | |
| }, | |
| { | |
| "epoch": 0.06609839988036488, | |
| "grad_norm": 2.0382287962872834, | |
| "learning_rate": 9.965972758764912e-06, | |
| "loss": 0.4633, | |
| "step": 442 | |
| }, | |
| { | |
| "epoch": 0.06624794377149694, | |
| "grad_norm": 1.2772439274586147, | |
| "learning_rate": 9.96569011322684e-06, | |
| "loss": 0.1784, | |
| "step": 443 | |
| }, | |
| { | |
| "epoch": 0.06639748766262898, | |
| "grad_norm": 1.1024457344648066, | |
| "learning_rate": 9.965406302690011e-06, | |
| "loss": 0.1625, | |
| "step": 444 | |
| }, | |
| { | |
| "epoch": 0.06654703155376103, | |
| "grad_norm": 1.2184559623271476, | |
| "learning_rate": 9.965121327221007e-06, | |
| "loss": 0.1959, | |
| "step": 445 | |
| }, | |
| { | |
| "epoch": 0.06669657544489307, | |
| "grad_norm": 1.9215235980087064, | |
| "learning_rate": 9.964835186886692e-06, | |
| "loss": 0.2493, | |
| "step": 446 | |
| }, | |
| { | |
| "epoch": 0.06684611933602512, | |
| "grad_norm": 2.1443052954533974, | |
| "learning_rate": 9.964547881754194e-06, | |
| "loss": 0.3611, | |
| "step": 447 | |
| }, | |
| { | |
| "epoch": 0.06699566322715717, | |
| "grad_norm": 2.6967138020110712, | |
| "learning_rate": 9.964259411890918e-06, | |
| "loss": 0.5427, | |
| "step": 448 | |
| }, | |
| { | |
| "epoch": 0.06714520711828922, | |
| "grad_norm": 1.688779610685555, | |
| "learning_rate": 9.96396977736454e-06, | |
| "loss": 0.2569, | |
| "step": 449 | |
| }, | |
| { | |
| "epoch": 0.06729475100942127, | |
| "grad_norm": 2.1241026975378694, | |
| "learning_rate": 9.963678978243014e-06, | |
| "loss": 0.3863, | |
| "step": 450 | |
| }, | |
| { | |
| "epoch": 0.06744429490055331, | |
| "grad_norm": 1.9388647656441462, | |
| "learning_rate": 9.96338701459456e-06, | |
| "loss": 0.2726, | |
| "step": 451 | |
| }, | |
| { | |
| "epoch": 0.06759383879168536, | |
| "grad_norm": 1.4657993620125664, | |
| "learning_rate": 9.963093886487683e-06, | |
| "loss": 0.2338, | |
| "step": 452 | |
| }, | |
| { | |
| "epoch": 0.0677433826828174, | |
| "grad_norm": 2.307173509923502, | |
| "learning_rate": 9.962799593991146e-06, | |
| "loss": 0.8039, | |
| "step": 453 | |
| }, | |
| { | |
| "epoch": 0.06789292657394945, | |
| "grad_norm": 1.2669540134016812, | |
| "learning_rate": 9.962504137173997e-06, | |
| "loss": 0.169, | |
| "step": 454 | |
| }, | |
| { | |
| "epoch": 0.0680424704650815, | |
| "grad_norm": 1.5981790001004936, | |
| "learning_rate": 9.962207516105552e-06, | |
| "loss": 0.2019, | |
| "step": 455 | |
| }, | |
| { | |
| "epoch": 0.06819201435621355, | |
| "grad_norm": 1.740837427237262, | |
| "learning_rate": 9.9619097308554e-06, | |
| "loss": 0.2116, | |
| "step": 456 | |
| }, | |
| { | |
| "epoch": 0.0683415582473456, | |
| "grad_norm": 1.9511590671787182, | |
| "learning_rate": 9.961610781493407e-06, | |
| "loss": 0.2611, | |
| "step": 457 | |
| }, | |
| { | |
| "epoch": 0.06849110213847764, | |
| "grad_norm": 1.9814713665794252, | |
| "learning_rate": 9.961310668089708e-06, | |
| "loss": 0.3714, | |
| "step": 458 | |
| }, | |
| { | |
| "epoch": 0.06864064602960969, | |
| "grad_norm": 2.755804773731971, | |
| "learning_rate": 9.96100939071471e-06, | |
| "loss": 0.5178, | |
| "step": 459 | |
| }, | |
| { | |
| "epoch": 0.06879018992074173, | |
| "grad_norm": 2.5378159735000225, | |
| "learning_rate": 9.960706949439101e-06, | |
| "loss": 0.7334, | |
| "step": 460 | |
| }, | |
| { | |
| "epoch": 0.06893973381187378, | |
| "grad_norm": 2.3557582569765003, | |
| "learning_rate": 9.960403344333832e-06, | |
| "loss": 0.5763, | |
| "step": 461 | |
| }, | |
| { | |
| "epoch": 0.06908927770300584, | |
| "grad_norm": 1.6501148783544786, | |
| "learning_rate": 9.960098575470131e-06, | |
| "loss": 0.3681, | |
| "step": 462 | |
| }, | |
| { | |
| "epoch": 0.06923882159413788, | |
| "grad_norm": 1.3521314881367383, | |
| "learning_rate": 9.959792642919505e-06, | |
| "loss": 0.216, | |
| "step": 463 | |
| }, | |
| { | |
| "epoch": 0.06938836548526993, | |
| "grad_norm": 1.9967115308447656, | |
| "learning_rate": 9.959485546753724e-06, | |
| "loss": 0.4411, | |
| "step": 464 | |
| }, | |
| { | |
| "epoch": 0.06953790937640197, | |
| "grad_norm": 1.6934835527025132, | |
| "learning_rate": 9.959177287044839e-06, | |
| "loss": 0.3013, | |
| "step": 465 | |
| }, | |
| { | |
| "epoch": 0.06968745326753402, | |
| "grad_norm": 2.1881268216288703, | |
| "learning_rate": 9.958867863865168e-06, | |
| "loss": 0.386, | |
| "step": 466 | |
| }, | |
| { | |
| "epoch": 0.06983699715866606, | |
| "grad_norm": 1.746249573857031, | |
| "learning_rate": 9.958557277287307e-06, | |
| "loss": 0.3486, | |
| "step": 467 | |
| }, | |
| { | |
| "epoch": 0.06998654104979811, | |
| "grad_norm": 1.3309239290400467, | |
| "learning_rate": 9.958245527384118e-06, | |
| "loss": 0.2512, | |
| "step": 468 | |
| }, | |
| { | |
| "epoch": 0.07013608494093017, | |
| "grad_norm": 1.780095751208227, | |
| "learning_rate": 9.957932614228746e-06, | |
| "loss": 0.3579, | |
| "step": 469 | |
| }, | |
| { | |
| "epoch": 0.07028562883206221, | |
| "grad_norm": 2.058627302052003, | |
| "learning_rate": 9.957618537894602e-06, | |
| "loss": 0.2234, | |
| "step": 470 | |
| }, | |
| { | |
| "epoch": 0.07043517272319426, | |
| "grad_norm": 2.1643867800571286, | |
| "learning_rate": 9.95730329845537e-06, | |
| "loss": 0.2658, | |
| "step": 471 | |
| }, | |
| { | |
| "epoch": 0.0705847166143263, | |
| "grad_norm": 1.9162877246393155, | |
| "learning_rate": 9.956986895985009e-06, | |
| "loss": 0.3514, | |
| "step": 472 | |
| }, | |
| { | |
| "epoch": 0.07073426050545835, | |
| "grad_norm": 2.0198300655217474, | |
| "learning_rate": 9.95666933055775e-06, | |
| "loss": 0.4191, | |
| "step": 473 | |
| }, | |
| { | |
| "epoch": 0.0708838043965904, | |
| "grad_norm": 1.8174642496449622, | |
| "learning_rate": 9.956350602248095e-06, | |
| "loss": 0.1802, | |
| "step": 474 | |
| }, | |
| { | |
| "epoch": 0.07103334828772245, | |
| "grad_norm": 1.7641599345266465, | |
| "learning_rate": 9.956030711130824e-06, | |
| "loss": 0.2181, | |
| "step": 475 | |
| }, | |
| { | |
| "epoch": 0.0711828921788545, | |
| "grad_norm": 1.5149058769435404, | |
| "learning_rate": 9.955709657280985e-06, | |
| "loss": 0.2068, | |
| "step": 476 | |
| }, | |
| { | |
| "epoch": 0.07133243606998654, | |
| "grad_norm": 2.14267612952952, | |
| "learning_rate": 9.955387440773902e-06, | |
| "loss": 0.2799, | |
| "step": 477 | |
| }, | |
| { | |
| "epoch": 0.07148197996111859, | |
| "grad_norm": 1.8794948861297893, | |
| "learning_rate": 9.955064061685166e-06, | |
| "loss": 0.3437, | |
| "step": 478 | |
| }, | |
| { | |
| "epoch": 0.07163152385225063, | |
| "grad_norm": 1.595856928796192, | |
| "learning_rate": 9.954739520090649e-06, | |
| "loss": 0.1741, | |
| "step": 479 | |
| }, | |
| { | |
| "epoch": 0.07178106774338268, | |
| "grad_norm": 1.4775459266699813, | |
| "learning_rate": 9.95441381606649e-06, | |
| "loss": 0.2009, | |
| "step": 480 | |
| }, | |
| { | |
| "epoch": 0.07193061163451472, | |
| "grad_norm": 1.4624583034603231, | |
| "learning_rate": 9.954086949689102e-06, | |
| "loss": 0.2413, | |
| "step": 481 | |
| }, | |
| { | |
| "epoch": 0.07208015552564678, | |
| "grad_norm": 1.5685428117813849, | |
| "learning_rate": 9.953758921035171e-06, | |
| "loss": 0.2381, | |
| "step": 482 | |
| }, | |
| { | |
| "epoch": 0.07222969941677883, | |
| "grad_norm": 2.0490413587537524, | |
| "learning_rate": 9.953429730181653e-06, | |
| "loss": 0.4092, | |
| "step": 483 | |
| }, | |
| { | |
| "epoch": 0.07237924330791087, | |
| "grad_norm": 2.605633491672469, | |
| "learning_rate": 9.953099377205786e-06, | |
| "loss": 0.56, | |
| "step": 484 | |
| }, | |
| { | |
| "epoch": 0.07252878719904292, | |
| "grad_norm": 1.6836189923086853, | |
| "learning_rate": 9.952767862185071e-06, | |
| "loss": 0.3514, | |
| "step": 485 | |
| }, | |
| { | |
| "epoch": 0.07267833109017496, | |
| "grad_norm": 2.165692386982445, | |
| "learning_rate": 9.952435185197281e-06, | |
| "loss": 0.4363, | |
| "step": 486 | |
| }, | |
| { | |
| "epoch": 0.07282787498130701, | |
| "grad_norm": 2.328987566639375, | |
| "learning_rate": 9.952101346320471e-06, | |
| "loss": 0.5953, | |
| "step": 487 | |
| }, | |
| { | |
| "epoch": 0.07297741887243907, | |
| "grad_norm": 1.857109300243422, | |
| "learning_rate": 9.951766345632957e-06, | |
| "loss": 0.4125, | |
| "step": 488 | |
| }, | |
| { | |
| "epoch": 0.07312696276357111, | |
| "grad_norm": 1.780608988332075, | |
| "learning_rate": 9.951430183213338e-06, | |
| "loss": 0.2793, | |
| "step": 489 | |
| }, | |
| { | |
| "epoch": 0.07327650665470316, | |
| "grad_norm": 1.2718866410706833, | |
| "learning_rate": 9.951092859140479e-06, | |
| "loss": 0.1878, | |
| "step": 490 | |
| }, | |
| { | |
| "epoch": 0.0734260505458352, | |
| "grad_norm": 1.389385388824981, | |
| "learning_rate": 9.95075437349352e-06, | |
| "loss": 0.1922, | |
| "step": 491 | |
| }, | |
| { | |
| "epoch": 0.07357559443696725, | |
| "grad_norm": 1.2364018773804621, | |
| "learning_rate": 9.950414726351873e-06, | |
| "loss": 0.1972, | |
| "step": 492 | |
| }, | |
| { | |
| "epoch": 0.0737251383280993, | |
| "grad_norm": 1.6438922682719497, | |
| "learning_rate": 9.95007391779522e-06, | |
| "loss": 0.3835, | |
| "step": 493 | |
| }, | |
| { | |
| "epoch": 0.07387468221923134, | |
| "grad_norm": 1.9223258334837023, | |
| "learning_rate": 9.949731947903523e-06, | |
| "loss": 0.5421, | |
| "step": 494 | |
| }, | |
| { | |
| "epoch": 0.0740242261103634, | |
| "grad_norm": 2.1294087718057955, | |
| "learning_rate": 9.949388816757009e-06, | |
| "loss": 0.6584, | |
| "step": 495 | |
| }, | |
| { | |
| "epoch": 0.07417377000149544, | |
| "grad_norm": 1.9620720670123732, | |
| "learning_rate": 9.949044524436178e-06, | |
| "loss": 0.3427, | |
| "step": 496 | |
| }, | |
| { | |
| "epoch": 0.07432331389262749, | |
| "grad_norm": 1.8767982308843718, | |
| "learning_rate": 9.948699071021806e-06, | |
| "loss": 0.2221, | |
| "step": 497 | |
| }, | |
| { | |
| "epoch": 0.07447285778375953, | |
| "grad_norm": 1.5717369659821445, | |
| "learning_rate": 9.948352456594938e-06, | |
| "loss": 0.3915, | |
| "step": 498 | |
| }, | |
| { | |
| "epoch": 0.07462240167489158, | |
| "grad_norm": 1.9105988284269253, | |
| "learning_rate": 9.948004681236896e-06, | |
| "loss": 0.4049, | |
| "step": 499 | |
| }, | |
| { | |
| "epoch": 0.07477194556602362, | |
| "grad_norm": 2.051255434710168, | |
| "learning_rate": 9.94765574502927e-06, | |
| "loss": 0.263, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 0.07492148945715567, | |
| "grad_norm": 1.1727115808022262, | |
| "learning_rate": 9.947305648053924e-06, | |
| "loss": 0.2061, | |
| "step": 501 | |
| }, | |
| { | |
| "epoch": 0.07507103334828773, | |
| "grad_norm": 2.3851218898633566, | |
| "learning_rate": 9.946954390392995e-06, | |
| "loss": 0.3587, | |
| "step": 502 | |
| }, | |
| { | |
| "epoch": 0.07522057723941977, | |
| "grad_norm": 2.668333899893354, | |
| "learning_rate": 9.94660197212889e-06, | |
| "loss": 0.279, | |
| "step": 503 | |
| }, | |
| { | |
| "epoch": 0.07537012113055182, | |
| "grad_norm": 2.324044177768054, | |
| "learning_rate": 9.946248393344289e-06, | |
| "loss": 0.5219, | |
| "step": 504 | |
| }, | |
| { | |
| "epoch": 0.07551966502168386, | |
| "grad_norm": 2.252535927387564, | |
| "learning_rate": 9.945893654122147e-06, | |
| "loss": 0.4462, | |
| "step": 505 | |
| }, | |
| { | |
| "epoch": 0.07566920891281591, | |
| "grad_norm": 1.2553962948323492, | |
| "learning_rate": 9.945537754545689e-06, | |
| "loss": 0.1829, | |
| "step": 506 | |
| }, | |
| { | |
| "epoch": 0.07581875280394795, | |
| "grad_norm": 2.009514792075129, | |
| "learning_rate": 9.94518069469841e-06, | |
| "loss": 0.334, | |
| "step": 507 | |
| }, | |
| { | |
| "epoch": 0.07596829669508001, | |
| "grad_norm": 1.7045023449590413, | |
| "learning_rate": 9.944822474664082e-06, | |
| "loss": 0.3202, | |
| "step": 508 | |
| }, | |
| { | |
| "epoch": 0.07611784058621206, | |
| "grad_norm": 1.0508191419172128, | |
| "learning_rate": 9.944463094526747e-06, | |
| "loss": 0.205, | |
| "step": 509 | |
| }, | |
| { | |
| "epoch": 0.0762673844773441, | |
| "grad_norm": 1.6097293192900886, | |
| "learning_rate": 9.944102554370718e-06, | |
| "loss": 0.2324, | |
| "step": 510 | |
| }, | |
| { | |
| "epoch": 0.07641692836847615, | |
| "grad_norm": 1.9399148366487866, | |
| "learning_rate": 9.943740854280582e-06, | |
| "loss": 0.4526, | |
| "step": 511 | |
| }, | |
| { | |
| "epoch": 0.07656647225960819, | |
| "grad_norm": 2.0362256511499335, | |
| "learning_rate": 9.943377994341197e-06, | |
| "loss": 0.3979, | |
| "step": 512 | |
| }, | |
| { | |
| "epoch": 0.07671601615074024, | |
| "grad_norm": 1.5296316888698338, | |
| "learning_rate": 9.943013974637693e-06, | |
| "loss": 0.3789, | |
| "step": 513 | |
| }, | |
| { | |
| "epoch": 0.07686556004187228, | |
| "grad_norm": 1.496691000675503, | |
| "learning_rate": 9.942648795255473e-06, | |
| "loss": 0.2497, | |
| "step": 514 | |
| }, | |
| { | |
| "epoch": 0.07701510393300434, | |
| "grad_norm": 1.4146486247851384, | |
| "learning_rate": 9.942282456280212e-06, | |
| "loss": 0.3088, | |
| "step": 515 | |
| }, | |
| { | |
| "epoch": 0.07716464782413639, | |
| "grad_norm": 1.3671722765483707, | |
| "learning_rate": 9.941914957797855e-06, | |
| "loss": 0.2076, | |
| "step": 516 | |
| }, | |
| { | |
| "epoch": 0.07731419171526843, | |
| "grad_norm": 1.8485057563465108, | |
| "learning_rate": 9.941546299894623e-06, | |
| "loss": 0.3676, | |
| "step": 517 | |
| }, | |
| { | |
| "epoch": 0.07746373560640048, | |
| "grad_norm": 2.0438588429845255, | |
| "learning_rate": 9.941176482657005e-06, | |
| "loss": 0.4905, | |
| "step": 518 | |
| }, | |
| { | |
| "epoch": 0.07761327949753252, | |
| "grad_norm": 1.3215533906334498, | |
| "learning_rate": 9.940805506171765e-06, | |
| "loss": 0.2028, | |
| "step": 519 | |
| }, | |
| { | |
| "epoch": 0.07776282338866457, | |
| "grad_norm": 2.499241081917891, | |
| "learning_rate": 9.940433370525937e-06, | |
| "loss": 0.4323, | |
| "step": 520 | |
| }, | |
| { | |
| "epoch": 0.07791236727979663, | |
| "grad_norm": 1.4654220634749195, | |
| "learning_rate": 9.940060075806827e-06, | |
| "loss": 0.1928, | |
| "step": 521 | |
| }, | |
| { | |
| "epoch": 0.07806191117092867, | |
| "grad_norm": 2.32501667334618, | |
| "learning_rate": 9.939685622102013e-06, | |
| "loss": 0.6039, | |
| "step": 522 | |
| }, | |
| { | |
| "epoch": 0.07821145506206072, | |
| "grad_norm": 2.0353313744113644, | |
| "learning_rate": 9.939310009499348e-06, | |
| "loss": 0.434, | |
| "step": 523 | |
| }, | |
| { | |
| "epoch": 0.07836099895319276, | |
| "grad_norm": 1.5916248439200642, | |
| "learning_rate": 9.938933238086952e-06, | |
| "loss": 0.2484, | |
| "step": 524 | |
| }, | |
| { | |
| "epoch": 0.07851054284432481, | |
| "grad_norm": 1.510761606083, | |
| "learning_rate": 9.938555307953221e-06, | |
| "loss": 0.2761, | |
| "step": 525 | |
| }, | |
| { | |
| "epoch": 0.07866008673545685, | |
| "grad_norm": 1.6041562012438388, | |
| "learning_rate": 9.93817621918682e-06, | |
| "loss": 0.3032, | |
| "step": 526 | |
| }, | |
| { | |
| "epoch": 0.0788096306265889, | |
| "grad_norm": 1.5831322947558841, | |
| "learning_rate": 9.937795971876686e-06, | |
| "loss": 0.3486, | |
| "step": 527 | |
| }, | |
| { | |
| "epoch": 0.07895917451772096, | |
| "grad_norm": 2.2247878916503856, | |
| "learning_rate": 9.93741456611203e-06, | |
| "loss": 0.4087, | |
| "step": 528 | |
| }, | |
| { | |
| "epoch": 0.079108718408853, | |
| "grad_norm": 2.152252638423622, | |
| "learning_rate": 9.937032001982334e-06, | |
| "loss": 0.5629, | |
| "step": 529 | |
| }, | |
| { | |
| "epoch": 0.07925826229998505, | |
| "grad_norm": 2.0483514105705525, | |
| "learning_rate": 9.93664827957735e-06, | |
| "loss": 0.5279, | |
| "step": 530 | |
| }, | |
| { | |
| "epoch": 0.07940780619111709, | |
| "grad_norm": 1.2448870158155207, | |
| "learning_rate": 9.936263398987103e-06, | |
| "loss": 0.3744, | |
| "step": 531 | |
| }, | |
| { | |
| "epoch": 0.07955735008224914, | |
| "grad_norm": 0.9489762178863248, | |
| "learning_rate": 9.93587736030189e-06, | |
| "loss": 0.1631, | |
| "step": 532 | |
| }, | |
| { | |
| "epoch": 0.07970689397338118, | |
| "grad_norm": 1.3545590640653586, | |
| "learning_rate": 9.935490163612279e-06, | |
| "loss": 0.1975, | |
| "step": 533 | |
| }, | |
| { | |
| "epoch": 0.07985643786451324, | |
| "grad_norm": 1.3663228011672384, | |
| "learning_rate": 9.93510180900911e-06, | |
| "loss": 0.184, | |
| "step": 534 | |
| }, | |
| { | |
| "epoch": 0.08000598175564529, | |
| "grad_norm": 1.5768436668872405, | |
| "learning_rate": 9.934712296583497e-06, | |
| "loss": 0.3183, | |
| "step": 535 | |
| }, | |
| { | |
| "epoch": 0.08015552564677733, | |
| "grad_norm": 1.926347057489139, | |
| "learning_rate": 9.93432162642682e-06, | |
| "loss": 0.3305, | |
| "step": 536 | |
| }, | |
| { | |
| "epoch": 0.08030506953790938, | |
| "grad_norm": 2.0791782850566474, | |
| "learning_rate": 9.933929798630738e-06, | |
| "loss": 0.5009, | |
| "step": 537 | |
| }, | |
| { | |
| "epoch": 0.08045461342904142, | |
| "grad_norm": 2.1023331544425523, | |
| "learning_rate": 9.933536813287172e-06, | |
| "loss": 0.4292, | |
| "step": 538 | |
| }, | |
| { | |
| "epoch": 0.08060415732017347, | |
| "grad_norm": 2.8605361415271493, | |
| "learning_rate": 9.933142670488324e-06, | |
| "loss": 0.2666, | |
| "step": 539 | |
| }, | |
| { | |
| "epoch": 0.08075370121130551, | |
| "grad_norm": 2.7087693572573968, | |
| "learning_rate": 9.932747370326664e-06, | |
| "loss": 0.2544, | |
| "step": 540 | |
| }, | |
| { | |
| "epoch": 0.08090324510243757, | |
| "grad_norm": 1.5804074183588281, | |
| "learning_rate": 9.932350912894932e-06, | |
| "loss": 0.2089, | |
| "step": 541 | |
| }, | |
| { | |
| "epoch": 0.08105278899356962, | |
| "grad_norm": 1.6448934387271092, | |
| "learning_rate": 9.931953298286141e-06, | |
| "loss": 0.181, | |
| "step": 542 | |
| }, | |
| { | |
| "epoch": 0.08120233288470166, | |
| "grad_norm": 1.373017928034036, | |
| "learning_rate": 9.931554526593576e-06, | |
| "loss": 0.3218, | |
| "step": 543 | |
| }, | |
| { | |
| "epoch": 0.0813518767758337, | |
| "grad_norm": 1.4895748889012388, | |
| "learning_rate": 9.931154597910791e-06, | |
| "loss": 0.2472, | |
| "step": 544 | |
| }, | |
| { | |
| "epoch": 0.08150142066696575, | |
| "grad_norm": 2.064608760225509, | |
| "learning_rate": 9.930753512331615e-06, | |
| "loss": 0.3765, | |
| "step": 545 | |
| }, | |
| { | |
| "epoch": 0.0816509645580978, | |
| "grad_norm": 1.6526846905937504, | |
| "learning_rate": 9.930351269950144e-06, | |
| "loss": 0.3177, | |
| "step": 546 | |
| }, | |
| { | |
| "epoch": 0.08180050844922986, | |
| "grad_norm": 2.047798829134187, | |
| "learning_rate": 9.92994787086075e-06, | |
| "loss": 0.3192, | |
| "step": 547 | |
| }, | |
| { | |
| "epoch": 0.0819500523403619, | |
| "grad_norm": 2.122394373762569, | |
| "learning_rate": 9.929543315158073e-06, | |
| "loss": 0.5554, | |
| "step": 548 | |
| }, | |
| { | |
| "epoch": 0.08209959623149395, | |
| "grad_norm": 2.311960518258969, | |
| "learning_rate": 9.929137602937028e-06, | |
| "loss": 0.3797, | |
| "step": 549 | |
| }, | |
| { | |
| "epoch": 0.08224914012262599, | |
| "grad_norm": 1.8449832380251867, | |
| "learning_rate": 9.928730734292797e-06, | |
| "loss": 0.3894, | |
| "step": 550 | |
| }, | |
| { | |
| "epoch": 0.08239868401375804, | |
| "grad_norm": 1.995255157883457, | |
| "learning_rate": 9.928322709320834e-06, | |
| "loss": 0.3925, | |
| "step": 551 | |
| }, | |
| { | |
| "epoch": 0.08254822790489008, | |
| "grad_norm": 2.755405061449222, | |
| "learning_rate": 9.92791352811687e-06, | |
| "loss": 0.6899, | |
| "step": 552 | |
| }, | |
| { | |
| "epoch": 0.08269777179602213, | |
| "grad_norm": 1.2254981142470793, | |
| "learning_rate": 9.9275031907769e-06, | |
| "loss": 0.2225, | |
| "step": 553 | |
| }, | |
| { | |
| "epoch": 0.08284731568715419, | |
| "grad_norm": 1.9323036995913243, | |
| "learning_rate": 9.927091697397192e-06, | |
| "loss": 0.3865, | |
| "step": 554 | |
| }, | |
| { | |
| "epoch": 0.08299685957828623, | |
| "grad_norm": 2.0962863974348593, | |
| "learning_rate": 9.926679048074289e-06, | |
| "loss": 0.4, | |
| "step": 555 | |
| }, | |
| { | |
| "epoch": 0.08314640346941828, | |
| "grad_norm": 1.5847691098448267, | |
| "learning_rate": 9.926265242904998e-06, | |
| "loss": 0.247, | |
| "step": 556 | |
| }, | |
| { | |
| "epoch": 0.08329594736055032, | |
| "grad_norm": 2.5967594290859903, | |
| "learning_rate": 9.925850281986408e-06, | |
| "loss": 0.2083, | |
| "step": 557 | |
| }, | |
| { | |
| "epoch": 0.08344549125168237, | |
| "grad_norm": 2.0426826933231226, | |
| "learning_rate": 9.925434165415868e-06, | |
| "loss": 0.449, | |
| "step": 558 | |
| }, | |
| { | |
| "epoch": 0.08359503514281441, | |
| "grad_norm": 1.7693278888452375, | |
| "learning_rate": 9.925016893291007e-06, | |
| "loss": 0.2789, | |
| "step": 559 | |
| }, | |
| { | |
| "epoch": 0.08374457903394647, | |
| "grad_norm": 1.6227416269049326, | |
| "learning_rate": 9.924598465709717e-06, | |
| "loss": 0.2209, | |
| "step": 560 | |
| }, | |
| { | |
| "epoch": 0.08389412292507852, | |
| "grad_norm": 1.7055307729140163, | |
| "learning_rate": 9.924178882770166e-06, | |
| "loss": 0.3554, | |
| "step": 561 | |
| }, | |
| { | |
| "epoch": 0.08404366681621056, | |
| "grad_norm": 1.9245436136675982, | |
| "learning_rate": 9.923758144570792e-06, | |
| "loss": 0.5343, | |
| "step": 562 | |
| }, | |
| { | |
| "epoch": 0.0841932107073426, | |
| "grad_norm": 1.3916186974123048, | |
| "learning_rate": 9.923336251210306e-06, | |
| "loss": 0.2328, | |
| "step": 563 | |
| }, | |
| { | |
| "epoch": 0.08434275459847465, | |
| "grad_norm": 1.8724253939088875, | |
| "learning_rate": 9.92291320278769e-06, | |
| "loss": 0.2691, | |
| "step": 564 | |
| }, | |
| { | |
| "epoch": 0.0844922984896067, | |
| "grad_norm": 1.545927153493535, | |
| "learning_rate": 9.922488999402191e-06, | |
| "loss": 0.2049, | |
| "step": 565 | |
| }, | |
| { | |
| "epoch": 0.08464184238073874, | |
| "grad_norm": 2.216312298348258, | |
| "learning_rate": 9.922063641153332e-06, | |
| "loss": 0.5844, | |
| "step": 566 | |
| }, | |
| { | |
| "epoch": 0.0847913862718708, | |
| "grad_norm": 1.2444734652143745, | |
| "learning_rate": 9.921637128140909e-06, | |
| "loss": 0.2872, | |
| "step": 567 | |
| }, | |
| { | |
| "epoch": 0.08494093016300285, | |
| "grad_norm": 2.133851301389792, | |
| "learning_rate": 9.921209460464983e-06, | |
| "loss": 0.2418, | |
| "step": 568 | |
| }, | |
| { | |
| "epoch": 0.08509047405413489, | |
| "grad_norm": 1.5462263702909163, | |
| "learning_rate": 9.92078063822589e-06, | |
| "loss": 0.3438, | |
| "step": 569 | |
| }, | |
| { | |
| "epoch": 0.08524001794526694, | |
| "grad_norm": 2.341879963295622, | |
| "learning_rate": 9.920350661524237e-06, | |
| "loss": 0.5783, | |
| "step": 570 | |
| }, | |
| { | |
| "epoch": 0.08538956183639898, | |
| "grad_norm": 1.7633187330163729, | |
| "learning_rate": 9.919919530460899e-06, | |
| "loss": 0.3503, | |
| "step": 571 | |
| }, | |
| { | |
| "epoch": 0.08553910572753103, | |
| "grad_norm": 2.1676160714531107, | |
| "learning_rate": 9.919487245137024e-06, | |
| "loss": 0.2098, | |
| "step": 572 | |
| }, | |
| { | |
| "epoch": 0.08568864961866307, | |
| "grad_norm": 2.198855334486466, | |
| "learning_rate": 9.919053805654029e-06, | |
| "loss": 0.3876, | |
| "step": 573 | |
| }, | |
| { | |
| "epoch": 0.08583819350979513, | |
| "grad_norm": 1.821472616891953, | |
| "learning_rate": 9.918619212113607e-06, | |
| "loss": 0.391, | |
| "step": 574 | |
| }, | |
| { | |
| "epoch": 0.08598773740092717, | |
| "grad_norm": 1.4553776733520012, | |
| "learning_rate": 9.918183464617714e-06, | |
| "loss": 0.2032, | |
| "step": 575 | |
| }, | |
| { | |
| "epoch": 0.08613728129205922, | |
| "grad_norm": 1.5817735791823646, | |
| "learning_rate": 9.917746563268581e-06, | |
| "loss": 0.2658, | |
| "step": 576 | |
| }, | |
| { | |
| "epoch": 0.08628682518319127, | |
| "grad_norm": 2.255323258805483, | |
| "learning_rate": 9.917308508168712e-06, | |
| "loss": 0.39, | |
| "step": 577 | |
| }, | |
| { | |
| "epoch": 0.08643636907432331, | |
| "grad_norm": 1.699175902078527, | |
| "learning_rate": 9.916869299420875e-06, | |
| "loss": 0.1906, | |
| "step": 578 | |
| }, | |
| { | |
| "epoch": 0.08658591296545536, | |
| "grad_norm": 1.5572993513277051, | |
| "learning_rate": 9.916428937128117e-06, | |
| "loss": 0.3438, | |
| "step": 579 | |
| }, | |
| { | |
| "epoch": 0.08673545685658741, | |
| "grad_norm": 1.5095119263162684, | |
| "learning_rate": 9.915987421393747e-06, | |
| "loss": 0.272, | |
| "step": 580 | |
| }, | |
| { | |
| "epoch": 0.08688500074771946, | |
| "grad_norm": 2.8137128440101735, | |
| "learning_rate": 9.91554475232135e-06, | |
| "loss": 0.3833, | |
| "step": 581 | |
| }, | |
| { | |
| "epoch": 0.0870345446388515, | |
| "grad_norm": 1.845156278788705, | |
| "learning_rate": 9.915100930014786e-06, | |
| "loss": 0.4658, | |
| "step": 582 | |
| }, | |
| { | |
| "epoch": 0.08718408852998355, | |
| "grad_norm": 1.7624433765379017, | |
| "learning_rate": 9.914655954578171e-06, | |
| "loss": 0.3968, | |
| "step": 583 | |
| }, | |
| { | |
| "epoch": 0.0873336324211156, | |
| "grad_norm": 1.7915618837196812, | |
| "learning_rate": 9.914209826115906e-06, | |
| "loss": 0.4901, | |
| "step": 584 | |
| }, | |
| { | |
| "epoch": 0.08748317631224764, | |
| "grad_norm": 1.8335500777788887, | |
| "learning_rate": 9.913762544732654e-06, | |
| "loss": 0.249, | |
| "step": 585 | |
| }, | |
| { | |
| "epoch": 0.08763272020337969, | |
| "grad_norm": 1.5116580783389033, | |
| "learning_rate": 9.913314110533355e-06, | |
| "loss": 0.3999, | |
| "step": 586 | |
| }, | |
| { | |
| "epoch": 0.08778226409451174, | |
| "grad_norm": 1.9828537343745032, | |
| "learning_rate": 9.912864523623214e-06, | |
| "loss": 0.4153, | |
| "step": 587 | |
| }, | |
| { | |
| "epoch": 0.08793180798564379, | |
| "grad_norm": 1.6056147158647165, | |
| "learning_rate": 9.912413784107709e-06, | |
| "loss": 0.357, | |
| "step": 588 | |
| }, | |
| { | |
| "epoch": 0.08808135187677583, | |
| "grad_norm": 1.7642170812152784, | |
| "learning_rate": 9.911961892092587e-06, | |
| "loss": 0.3425, | |
| "step": 589 | |
| }, | |
| { | |
| "epoch": 0.08823089576790788, | |
| "grad_norm": 1.925307511563271, | |
| "learning_rate": 9.911508847683867e-06, | |
| "loss": 0.4476, | |
| "step": 590 | |
| }, | |
| { | |
| "epoch": 0.08838043965903992, | |
| "grad_norm": 1.9824372539957273, | |
| "learning_rate": 9.911054650987837e-06, | |
| "loss": 0.4597, | |
| "step": 591 | |
| }, | |
| { | |
| "epoch": 0.08852998355017197, | |
| "grad_norm": 1.5805088418089035, | |
| "learning_rate": 9.910599302111057e-06, | |
| "loss": 0.1935, | |
| "step": 592 | |
| }, | |
| { | |
| "epoch": 0.08867952744130403, | |
| "grad_norm": 2.157404890931188, | |
| "learning_rate": 9.910142801160355e-06, | |
| "loss": 0.3443, | |
| "step": 593 | |
| }, | |
| { | |
| "epoch": 0.08882907133243607, | |
| "grad_norm": 2.094900000445731, | |
| "learning_rate": 9.909685148242831e-06, | |
| "loss": 0.404, | |
| "step": 594 | |
| }, | |
| { | |
| "epoch": 0.08897861522356812, | |
| "grad_norm": 2.336415519412793, | |
| "learning_rate": 9.909226343465856e-06, | |
| "loss": 0.6382, | |
| "step": 595 | |
| }, | |
| { | |
| "epoch": 0.08912815911470016, | |
| "grad_norm": 2.0552137049182497, | |
| "learning_rate": 9.908766386937067e-06, | |
| "loss": 0.3908, | |
| "step": 596 | |
| }, | |
| { | |
| "epoch": 0.08927770300583221, | |
| "grad_norm": 1.1564393734179468, | |
| "learning_rate": 9.908305278764376e-06, | |
| "loss": 0.2457, | |
| "step": 597 | |
| }, | |
| { | |
| "epoch": 0.08942724689696425, | |
| "grad_norm": 1.8704284289450437, | |
| "learning_rate": 9.907843019055966e-06, | |
| "loss": 0.3604, | |
| "step": 598 | |
| }, | |
| { | |
| "epoch": 0.0895767907880963, | |
| "grad_norm": 1.295042190600909, | |
| "learning_rate": 9.907379607920281e-06, | |
| "loss": 0.2075, | |
| "step": 599 | |
| }, | |
| { | |
| "epoch": 0.08972633467922836, | |
| "grad_norm": 1.8305770820800886, | |
| "learning_rate": 9.90691504546605e-06, | |
| "loss": 0.2698, | |
| "step": 600 | |
| }, | |
| { | |
| "epoch": 0.0898758785703604, | |
| "grad_norm": 1.7240290275544472, | |
| "learning_rate": 9.906449331802256e-06, | |
| "loss": 0.2504, | |
| "step": 601 | |
| }, | |
| { | |
| "epoch": 0.09002542246149245, | |
| "grad_norm": 1.0036789417827203, | |
| "learning_rate": 9.905982467038167e-06, | |
| "loss": 0.195, | |
| "step": 602 | |
| }, | |
| { | |
| "epoch": 0.0901749663526245, | |
| "grad_norm": 1.6777253578130231, | |
| "learning_rate": 9.905514451283308e-06, | |
| "loss": 0.2436, | |
| "step": 603 | |
| }, | |
| { | |
| "epoch": 0.09032451024375654, | |
| "grad_norm": 1.9190873052270145, | |
| "learning_rate": 9.905045284647483e-06, | |
| "loss": 0.4006, | |
| "step": 604 | |
| }, | |
| { | |
| "epoch": 0.09047405413488858, | |
| "grad_norm": 1.77001911452716, | |
| "learning_rate": 9.904574967240764e-06, | |
| "loss": 0.3703, | |
| "step": 605 | |
| }, | |
| { | |
| "epoch": 0.09062359802602064, | |
| "grad_norm": 1.3114492277508998, | |
| "learning_rate": 9.904103499173487e-06, | |
| "loss": 0.2323, | |
| "step": 606 | |
| }, | |
| { | |
| "epoch": 0.09077314191715269, | |
| "grad_norm": 1.6694643051834908, | |
| "learning_rate": 9.90363088055627e-06, | |
| "loss": 0.2881, | |
| "step": 607 | |
| }, | |
| { | |
| "epoch": 0.09092268580828473, | |
| "grad_norm": 1.4448454411512122, | |
| "learning_rate": 9.903157111499988e-06, | |
| "loss": 0.2341, | |
| "step": 608 | |
| }, | |
| { | |
| "epoch": 0.09107222969941678, | |
| "grad_norm": 1.8302982894061834, | |
| "learning_rate": 9.902682192115795e-06, | |
| "loss": 0.3497, | |
| "step": 609 | |
| }, | |
| { | |
| "epoch": 0.09122177359054882, | |
| "grad_norm": 1.4089802820999182, | |
| "learning_rate": 9.902206122515113e-06, | |
| "loss": 0.1565, | |
| "step": 610 | |
| }, | |
| { | |
| "epoch": 0.09137131748168087, | |
| "grad_norm": 2.275670976517465, | |
| "learning_rate": 9.901728902809627e-06, | |
| "loss": 0.482, | |
| "step": 611 | |
| }, | |
| { | |
| "epoch": 0.09152086137281291, | |
| "grad_norm": 2.3916744409549997, | |
| "learning_rate": 9.901250533111301e-06, | |
| "loss": 0.539, | |
| "step": 612 | |
| }, | |
| { | |
| "epoch": 0.09167040526394497, | |
| "grad_norm": 1.110965438282227, | |
| "learning_rate": 9.900771013532367e-06, | |
| "loss": 0.2257, | |
| "step": 613 | |
| }, | |
| { | |
| "epoch": 0.09181994915507702, | |
| "grad_norm": 1.6169969209154105, | |
| "learning_rate": 9.900290344185321e-06, | |
| "loss": 0.2316, | |
| "step": 614 | |
| }, | |
| { | |
| "epoch": 0.09196949304620906, | |
| "grad_norm": 1.390950490331229, | |
| "learning_rate": 9.899808525182935e-06, | |
| "loss": 0.1735, | |
| "step": 615 | |
| }, | |
| { | |
| "epoch": 0.09211903693734111, | |
| "grad_norm": 1.26641152514348, | |
| "learning_rate": 9.899325556638247e-06, | |
| "loss": 0.2269, | |
| "step": 616 | |
| }, | |
| { | |
| "epoch": 0.09226858082847315, | |
| "grad_norm": 1.107259968960053, | |
| "learning_rate": 9.898841438664568e-06, | |
| "loss": 0.2082, | |
| "step": 617 | |
| }, | |
| { | |
| "epoch": 0.0924181247196052, | |
| "grad_norm": 1.6779136428714192, | |
| "learning_rate": 9.898356171375473e-06, | |
| "loss": 0.3744, | |
| "step": 618 | |
| }, | |
| { | |
| "epoch": 0.09256766861073726, | |
| "grad_norm": 1.8012739115801626, | |
| "learning_rate": 9.897869754884816e-06, | |
| "loss": 0.2438, | |
| "step": 619 | |
| }, | |
| { | |
| "epoch": 0.0927172125018693, | |
| "grad_norm": 1.6400812519548655, | |
| "learning_rate": 9.89738218930671e-06, | |
| "loss": 0.3692, | |
| "step": 620 | |
| }, | |
| { | |
| "epoch": 0.09286675639300135, | |
| "grad_norm": 2.7659374426954972, | |
| "learning_rate": 9.896893474755547e-06, | |
| "loss": 0.5873, | |
| "step": 621 | |
| }, | |
| { | |
| "epoch": 0.0930163002841334, | |
| "grad_norm": 3.020452608035097, | |
| "learning_rate": 9.89640361134598e-06, | |
| "loss": 0.4177, | |
| "step": 622 | |
| }, | |
| { | |
| "epoch": 0.09316584417526544, | |
| "grad_norm": 1.4907614824403637, | |
| "learning_rate": 9.895912599192937e-06, | |
| "loss": 0.2516, | |
| "step": 623 | |
| }, | |
| { | |
| "epoch": 0.09331538806639748, | |
| "grad_norm": 1.6636615032724535, | |
| "learning_rate": 9.895420438411616e-06, | |
| "loss": 0.1935, | |
| "step": 624 | |
| }, | |
| { | |
| "epoch": 0.09346493195752953, | |
| "grad_norm": 1.9719905447621995, | |
| "learning_rate": 9.89492712911748e-06, | |
| "loss": 0.2135, | |
| "step": 625 | |
| }, | |
| { | |
| "epoch": 0.09361447584866159, | |
| "grad_norm": 1.3681787330772102, | |
| "learning_rate": 9.894432671426264e-06, | |
| "loss": 0.208, | |
| "step": 626 | |
| }, | |
| { | |
| "epoch": 0.09376401973979363, | |
| "grad_norm": 2.0793649946453043, | |
| "learning_rate": 9.893937065453976e-06, | |
| "loss": 0.3719, | |
| "step": 627 | |
| }, | |
| { | |
| "epoch": 0.09391356363092568, | |
| "grad_norm": 1.685584025343787, | |
| "learning_rate": 9.893440311316887e-06, | |
| "loss": 0.2164, | |
| "step": 628 | |
| }, | |
| { | |
| "epoch": 0.09406310752205772, | |
| "grad_norm": 1.2145425693019332, | |
| "learning_rate": 9.892942409131541e-06, | |
| "loss": 0.1725, | |
| "step": 629 | |
| }, | |
| { | |
| "epoch": 0.09421265141318977, | |
| "grad_norm": 1.1438517718036314, | |
| "learning_rate": 9.892443359014752e-06, | |
| "loss": 0.2367, | |
| "step": 630 | |
| }, | |
| { | |
| "epoch": 0.09436219530432181, | |
| "grad_norm": 1.4416913213257094, | |
| "learning_rate": 9.8919431610836e-06, | |
| "loss": 0.2254, | |
| "step": 631 | |
| }, | |
| { | |
| "epoch": 0.09451173919545386, | |
| "grad_norm": 1.2656296241346114, | |
| "learning_rate": 9.891441815455436e-06, | |
| "loss": 0.2485, | |
| "step": 632 | |
| }, | |
| { | |
| "epoch": 0.09466128308658592, | |
| "grad_norm": 1.4276056880724206, | |
| "learning_rate": 9.890939322247881e-06, | |
| "loss": 0.1908, | |
| "step": 633 | |
| }, | |
| { | |
| "epoch": 0.09481082697771796, | |
| "grad_norm": 1.8185771152087218, | |
| "learning_rate": 9.890435681578827e-06, | |
| "loss": 0.2096, | |
| "step": 634 | |
| }, | |
| { | |
| "epoch": 0.09496037086885001, | |
| "grad_norm": 1.2794518689910337, | |
| "learning_rate": 9.88993089356643e-06, | |
| "loss": 0.2394, | |
| "step": 635 | |
| }, | |
| { | |
| "epoch": 0.09510991475998205, | |
| "grad_norm": 2.0227594086297738, | |
| "learning_rate": 9.88942495832912e-06, | |
| "loss": 0.59, | |
| "step": 636 | |
| }, | |
| { | |
| "epoch": 0.0952594586511141, | |
| "grad_norm": 1.3323082817593526, | |
| "learning_rate": 9.888917875985593e-06, | |
| "loss": 0.2073, | |
| "step": 637 | |
| }, | |
| { | |
| "epoch": 0.09540900254224614, | |
| "grad_norm": 1.7884206661676574, | |
| "learning_rate": 9.888409646654818e-06, | |
| "loss": 0.3897, | |
| "step": 638 | |
| }, | |
| { | |
| "epoch": 0.0955585464333782, | |
| "grad_norm": 2.124144136353745, | |
| "learning_rate": 9.887900270456025e-06, | |
| "loss": 0.5683, | |
| "step": 639 | |
| }, | |
| { | |
| "epoch": 0.09570809032451025, | |
| "grad_norm": 1.4793433841619534, | |
| "learning_rate": 9.887389747508725e-06, | |
| "loss": 0.3727, | |
| "step": 640 | |
| }, | |
| { | |
| "epoch": 0.09585763421564229, | |
| "grad_norm": 1.0661747667222115, | |
| "learning_rate": 9.88687807793269e-06, | |
| "loss": 0.1983, | |
| "step": 641 | |
| }, | |
| { | |
| "epoch": 0.09600717810677434, | |
| "grad_norm": 1.615153009655538, | |
| "learning_rate": 9.886365261847957e-06, | |
| "loss": 0.3675, | |
| "step": 642 | |
| }, | |
| { | |
| "epoch": 0.09615672199790638, | |
| "grad_norm": 1.4963878387365324, | |
| "learning_rate": 9.885851299374844e-06, | |
| "loss": 0.1805, | |
| "step": 643 | |
| }, | |
| { | |
| "epoch": 0.09630626588903843, | |
| "grad_norm": 1.8529323065992462, | |
| "learning_rate": 9.88533619063393e-06, | |
| "loss": 0.391, | |
| "step": 644 | |
| }, | |
| { | |
| "epoch": 0.09645580978017047, | |
| "grad_norm": 2.4764246014732145, | |
| "learning_rate": 9.884819935746063e-06, | |
| "loss": 0.2605, | |
| "step": 645 | |
| }, | |
| { | |
| "epoch": 0.09660535367130253, | |
| "grad_norm": 1.904672440883197, | |
| "learning_rate": 9.884302534832361e-06, | |
| "loss": 0.3935, | |
| "step": 646 | |
| }, | |
| { | |
| "epoch": 0.09675489756243458, | |
| "grad_norm": 1.9431435460380113, | |
| "learning_rate": 9.883783988014216e-06, | |
| "loss": 0.2092, | |
| "step": 647 | |
| }, | |
| { | |
| "epoch": 0.09690444145356662, | |
| "grad_norm": 2.0946695671241553, | |
| "learning_rate": 9.883264295413278e-06, | |
| "loss": 0.3957, | |
| "step": 648 | |
| }, | |
| { | |
| "epoch": 0.09705398534469867, | |
| "grad_norm": 1.0944344711946927, | |
| "learning_rate": 9.882743457151476e-06, | |
| "loss": 0.202, | |
| "step": 649 | |
| }, | |
| { | |
| "epoch": 0.09720352923583071, | |
| "grad_norm": 1.5147259026498003, | |
| "learning_rate": 9.882221473351e-06, | |
| "loss": 0.3029, | |
| "step": 650 | |
| }, | |
| { | |
| "epoch": 0.09735307312696276, | |
| "grad_norm": 1.3452835965457643, | |
| "learning_rate": 9.881698344134316e-06, | |
| "loss": 0.2159, | |
| "step": 651 | |
| }, | |
| { | |
| "epoch": 0.09750261701809482, | |
| "grad_norm": 1.7952640402406481, | |
| "learning_rate": 9.881174069624155e-06, | |
| "loss": 0.4006, | |
| "step": 652 | |
| }, | |
| { | |
| "epoch": 0.09765216090922686, | |
| "grad_norm": 2.468540255171398, | |
| "learning_rate": 9.880648649943515e-06, | |
| "loss": 0.4393, | |
| "step": 653 | |
| }, | |
| { | |
| "epoch": 0.09780170480035891, | |
| "grad_norm": 1.5332585075726441, | |
| "learning_rate": 9.880122085215664e-06, | |
| "loss": 0.2401, | |
| "step": 654 | |
| }, | |
| { | |
| "epoch": 0.09795124869149095, | |
| "grad_norm": 1.5882881108110953, | |
| "learning_rate": 9.87959437556414e-06, | |
| "loss": 0.2078, | |
| "step": 655 | |
| }, | |
| { | |
| "epoch": 0.098100792582623, | |
| "grad_norm": 1.7962702189497488, | |
| "learning_rate": 9.87906552111275e-06, | |
| "loss": 0.4793, | |
| "step": 656 | |
| }, | |
| { | |
| "epoch": 0.09825033647375504, | |
| "grad_norm": 1.860004859316795, | |
| "learning_rate": 9.878535521985568e-06, | |
| "loss": 0.2388, | |
| "step": 657 | |
| }, | |
| { | |
| "epoch": 0.09839988036488709, | |
| "grad_norm": 1.9861019609665855, | |
| "learning_rate": 9.878004378306934e-06, | |
| "loss": 0.3721, | |
| "step": 658 | |
| }, | |
| { | |
| "epoch": 0.09854942425601915, | |
| "grad_norm": 1.5404208138898199, | |
| "learning_rate": 9.877472090201463e-06, | |
| "loss": 0.3534, | |
| "step": 659 | |
| }, | |
| { | |
| "epoch": 0.09869896814715119, | |
| "grad_norm": 3.0119825067072306, | |
| "learning_rate": 9.876938657794036e-06, | |
| "loss": 0.6732, | |
| "step": 660 | |
| }, | |
| { | |
| "epoch": 0.09884851203828324, | |
| "grad_norm": 1.5069735817087104, | |
| "learning_rate": 9.876404081209796e-06, | |
| "loss": 0.4004, | |
| "step": 661 | |
| }, | |
| { | |
| "epoch": 0.09899805592941528, | |
| "grad_norm": 1.6856753387650372, | |
| "learning_rate": 9.875868360574164e-06, | |
| "loss": 0.2942, | |
| "step": 662 | |
| }, | |
| { | |
| "epoch": 0.09914759982054733, | |
| "grad_norm": 1.6896901311725145, | |
| "learning_rate": 9.875331496012822e-06, | |
| "loss": 0.239, | |
| "step": 663 | |
| }, | |
| { | |
| "epoch": 0.09929714371167937, | |
| "grad_norm": 2.2770505228904225, | |
| "learning_rate": 9.87479348765173e-06, | |
| "loss": 0.4755, | |
| "step": 664 | |
| }, | |
| { | |
| "epoch": 0.09944668760281143, | |
| "grad_norm": 1.9016485099179228, | |
| "learning_rate": 9.874254335617102e-06, | |
| "loss": 0.4645, | |
| "step": 665 | |
| }, | |
| { | |
| "epoch": 0.09959623149394348, | |
| "grad_norm": 1.6638896812103354, | |
| "learning_rate": 9.873714040035434e-06, | |
| "loss": 0.2512, | |
| "step": 666 | |
| }, | |
| { | |
| "epoch": 0.09974577538507552, | |
| "grad_norm": 1.7233554952000107, | |
| "learning_rate": 9.873172601033482e-06, | |
| "loss": 0.3958, | |
| "step": 667 | |
| }, | |
| { | |
| "epoch": 0.09989531927620757, | |
| "grad_norm": 1.7250170911584946, | |
| "learning_rate": 9.872630018738271e-06, | |
| "loss": 0.3115, | |
| "step": 668 | |
| }, | |
| { | |
| "epoch": 0.10004486316733961, | |
| "grad_norm": 1.8843746906489027, | |
| "learning_rate": 9.872086293277101e-06, | |
| "loss": 0.3789, | |
| "step": 669 | |
| }, | |
| { | |
| "epoch": 0.10019440705847166, | |
| "grad_norm": 1.943275185299739, | |
| "learning_rate": 9.871541424777534e-06, | |
| "loss": 0.4192, | |
| "step": 670 | |
| }, | |
| { | |
| "epoch": 0.1003439509496037, | |
| "grad_norm": 1.4918005726247283, | |
| "learning_rate": 9.870995413367397e-06, | |
| "loss": 0.2538, | |
| "step": 671 | |
| }, | |
| { | |
| "epoch": 0.10049349484073576, | |
| "grad_norm": 1.6441123648652987, | |
| "learning_rate": 9.870448259174791e-06, | |
| "loss": 0.2295, | |
| "step": 672 | |
| }, | |
| { | |
| "epoch": 0.1006430387318678, | |
| "grad_norm": 1.933429186975597, | |
| "learning_rate": 9.86989996232809e-06, | |
| "loss": 0.4015, | |
| "step": 673 | |
| }, | |
| { | |
| "epoch": 0.10079258262299985, | |
| "grad_norm": 1.8125640882474123, | |
| "learning_rate": 9.869350522955921e-06, | |
| "loss": 0.3807, | |
| "step": 674 | |
| }, | |
| { | |
| "epoch": 0.1009421265141319, | |
| "grad_norm": 1.9369733002230116, | |
| "learning_rate": 9.868799941187193e-06, | |
| "loss": 0.5201, | |
| "step": 675 | |
| }, | |
| { | |
| "epoch": 0.10109167040526394, | |
| "grad_norm": 1.5216959755972845, | |
| "learning_rate": 9.868248217151075e-06, | |
| "loss": 0.3624, | |
| "step": 676 | |
| }, | |
| { | |
| "epoch": 0.10124121429639599, | |
| "grad_norm": 1.532054269025379, | |
| "learning_rate": 9.867695350977009e-06, | |
| "loss": 0.2738, | |
| "step": 677 | |
| }, | |
| { | |
| "epoch": 0.10139075818752805, | |
| "grad_norm": 1.9725714032650388, | |
| "learning_rate": 9.867141342794703e-06, | |
| "loss": 0.5802, | |
| "step": 678 | |
| }, | |
| { | |
| "epoch": 0.10154030207866009, | |
| "grad_norm": 1.9107978616944274, | |
| "learning_rate": 9.86658619273413e-06, | |
| "loss": 0.482, | |
| "step": 679 | |
| }, | |
| { | |
| "epoch": 0.10168984596979214, | |
| "grad_norm": 1.919639496784501, | |
| "learning_rate": 9.866029900925535e-06, | |
| "loss": 0.3558, | |
| "step": 680 | |
| }, | |
| { | |
| "epoch": 0.10183938986092418, | |
| "grad_norm": 2.174841069849439, | |
| "learning_rate": 9.865472467499431e-06, | |
| "loss": 0.6996, | |
| "step": 681 | |
| }, | |
| { | |
| "epoch": 0.10198893375205623, | |
| "grad_norm": 2.2558702972279807, | |
| "learning_rate": 9.864913892586596e-06, | |
| "loss": 0.2397, | |
| "step": 682 | |
| }, | |
| { | |
| "epoch": 0.10213847764318827, | |
| "grad_norm": 2.1196800969183105, | |
| "learning_rate": 9.864354176318076e-06, | |
| "loss": 0.3793, | |
| "step": 683 | |
| }, | |
| { | |
| "epoch": 0.10228802153432032, | |
| "grad_norm": 2.165719475550091, | |
| "learning_rate": 9.863793318825186e-06, | |
| "loss": 0.2154, | |
| "step": 684 | |
| }, | |
| { | |
| "epoch": 0.10243756542545238, | |
| "grad_norm": 1.7513134063770632, | |
| "learning_rate": 9.86323132023951e-06, | |
| "loss": 0.3816, | |
| "step": 685 | |
| }, | |
| { | |
| "epoch": 0.10258710931658442, | |
| "grad_norm": 1.7103742255808732, | |
| "learning_rate": 9.862668180692897e-06, | |
| "loss": 0.2469, | |
| "step": 686 | |
| }, | |
| { | |
| "epoch": 0.10273665320771647, | |
| "grad_norm": 1.9784764768939407, | |
| "learning_rate": 9.862103900317467e-06, | |
| "loss": 0.2279, | |
| "step": 687 | |
| }, | |
| { | |
| "epoch": 0.10288619709884851, | |
| "grad_norm": 2.5430996153598877, | |
| "learning_rate": 9.861538479245603e-06, | |
| "loss": 0.4512, | |
| "step": 688 | |
| }, | |
| { | |
| "epoch": 0.10303574098998056, | |
| "grad_norm": 1.3584315188319882, | |
| "learning_rate": 9.86097191760996e-06, | |
| "loss": 0.2521, | |
| "step": 689 | |
| }, | |
| { | |
| "epoch": 0.1031852848811126, | |
| "grad_norm": 1.8041511333081743, | |
| "learning_rate": 9.860404215543458e-06, | |
| "loss": 0.3794, | |
| "step": 690 | |
| }, | |
| { | |
| "epoch": 0.10333482877224466, | |
| "grad_norm": 2.261581805469511, | |
| "learning_rate": 9.859835373179285e-06, | |
| "loss": 0.5264, | |
| "step": 691 | |
| }, | |
| { | |
| "epoch": 0.1034843726633767, | |
| "grad_norm": 1.4531049528328563, | |
| "learning_rate": 9.859265390650897e-06, | |
| "loss": 0.2069, | |
| "step": 692 | |
| }, | |
| { | |
| "epoch": 0.10363391655450875, | |
| "grad_norm": 1.6530791454319427, | |
| "learning_rate": 9.85869426809202e-06, | |
| "loss": 0.2304, | |
| "step": 693 | |
| }, | |
| { | |
| "epoch": 0.1037834604456408, | |
| "grad_norm": 1.5868398701857311, | |
| "learning_rate": 9.85812200563664e-06, | |
| "loss": 0.3894, | |
| "step": 694 | |
| }, | |
| { | |
| "epoch": 0.10393300433677284, | |
| "grad_norm": 1.4690408418702507, | |
| "learning_rate": 9.857548603419019e-06, | |
| "loss": 0.3383, | |
| "step": 695 | |
| }, | |
| { | |
| "epoch": 0.10408254822790489, | |
| "grad_norm": 1.868386725453433, | |
| "learning_rate": 9.856974061573682e-06, | |
| "loss": 0.4666, | |
| "step": 696 | |
| }, | |
| { | |
| "epoch": 0.10423209211903693, | |
| "grad_norm": 1.3771017197315938, | |
| "learning_rate": 9.856398380235422e-06, | |
| "loss": 0.2285, | |
| "step": 697 | |
| }, | |
| { | |
| "epoch": 0.10438163601016899, | |
| "grad_norm": 2.452990479638216, | |
| "learning_rate": 9.855821559539298e-06, | |
| "loss": 0.7219, | |
| "step": 698 | |
| }, | |
| { | |
| "epoch": 0.10453117990130104, | |
| "grad_norm": 1.8742322224001207, | |
| "learning_rate": 9.85524359962064e-06, | |
| "loss": 0.4803, | |
| "step": 699 | |
| }, | |
| { | |
| "epoch": 0.10468072379243308, | |
| "grad_norm": 1.858692042760981, | |
| "learning_rate": 9.854664500615041e-06, | |
| "loss": 0.2273, | |
| "step": 700 | |
| }, | |
| { | |
| "epoch": 0.10483026768356513, | |
| "grad_norm": 1.1355721780236596, | |
| "learning_rate": 9.854084262658365e-06, | |
| "loss": 0.1947, | |
| "step": 701 | |
| }, | |
| { | |
| "epoch": 0.10497981157469717, | |
| "grad_norm": 1.3464195395769243, | |
| "learning_rate": 9.853502885886738e-06, | |
| "loss": 0.1988, | |
| "step": 702 | |
| }, | |
| { | |
| "epoch": 0.10512935546582922, | |
| "grad_norm": 1.204875080370136, | |
| "learning_rate": 9.852920370436561e-06, | |
| "loss": 0.3027, | |
| "step": 703 | |
| }, | |
| { | |
| "epoch": 0.10527889935696126, | |
| "grad_norm": 1.3557124537174092, | |
| "learning_rate": 9.852336716444496e-06, | |
| "loss": 0.2158, | |
| "step": 704 | |
| }, | |
| { | |
| "epoch": 0.10542844324809332, | |
| "grad_norm": 1.5752529363149261, | |
| "learning_rate": 9.851751924047472e-06, | |
| "loss": 0.3324, | |
| "step": 705 | |
| }, | |
| { | |
| "epoch": 0.10557798713922537, | |
| "grad_norm": 1.7915590890665287, | |
| "learning_rate": 9.85116599338269e-06, | |
| "loss": 0.4936, | |
| "step": 706 | |
| }, | |
| { | |
| "epoch": 0.10572753103035741, | |
| "grad_norm": 1.6842493918087815, | |
| "learning_rate": 9.850578924587614e-06, | |
| "loss": 0.4249, | |
| "step": 707 | |
| }, | |
| { | |
| "epoch": 0.10587707492148946, | |
| "grad_norm": 1.781419189683173, | |
| "learning_rate": 9.849990717799975e-06, | |
| "loss": 0.2615, | |
| "step": 708 | |
| }, | |
| { | |
| "epoch": 0.1060266188126215, | |
| "grad_norm": 1.4202393409091985, | |
| "learning_rate": 9.849401373157772e-06, | |
| "loss": 0.3256, | |
| "step": 709 | |
| }, | |
| { | |
| "epoch": 0.10617616270375355, | |
| "grad_norm": 1.3714522045342281, | |
| "learning_rate": 9.84881089079927e-06, | |
| "loss": 0.219, | |
| "step": 710 | |
| }, | |
| { | |
| "epoch": 0.1063257065948856, | |
| "grad_norm": 1.7391677942386203, | |
| "learning_rate": 9.848219270863005e-06, | |
| "loss": 0.2249, | |
| "step": 711 | |
| }, | |
| { | |
| "epoch": 0.10647525048601765, | |
| "grad_norm": 1.3023890791191592, | |
| "learning_rate": 9.847626513487774e-06, | |
| "loss": 0.3693, | |
| "step": 712 | |
| }, | |
| { | |
| "epoch": 0.1066247943771497, | |
| "grad_norm": 1.7969068078667318, | |
| "learning_rate": 9.847032618812647e-06, | |
| "loss": 0.2298, | |
| "step": 713 | |
| }, | |
| { | |
| "epoch": 0.10677433826828174, | |
| "grad_norm": 2.102291030534645, | |
| "learning_rate": 9.846437586976952e-06, | |
| "loss": 0.4688, | |
| "step": 714 | |
| }, | |
| { | |
| "epoch": 0.10692388215941379, | |
| "grad_norm": 1.072288463866959, | |
| "learning_rate": 9.845841418120295e-06, | |
| "loss": 0.2023, | |
| "step": 715 | |
| }, | |
| { | |
| "epoch": 0.10707342605054583, | |
| "grad_norm": 1.3278088866624802, | |
| "learning_rate": 9.845244112382536e-06, | |
| "loss": 0.3492, | |
| "step": 716 | |
| }, | |
| { | |
| "epoch": 0.10722296994167788, | |
| "grad_norm": 1.3771047197586663, | |
| "learning_rate": 9.844645669903816e-06, | |
| "loss": 0.2152, | |
| "step": 717 | |
| }, | |
| { | |
| "epoch": 0.10737251383280993, | |
| "grad_norm": 1.243148446265919, | |
| "learning_rate": 9.844046090824533e-06, | |
| "loss": 0.2419, | |
| "step": 718 | |
| }, | |
| { | |
| "epoch": 0.10752205772394198, | |
| "grad_norm": 1.3994827626329662, | |
| "learning_rate": 9.843445375285351e-06, | |
| "loss": 0.3578, | |
| "step": 719 | |
| }, | |
| { | |
| "epoch": 0.10767160161507403, | |
| "grad_norm": 2.180600395588636, | |
| "learning_rate": 9.842843523427207e-06, | |
| "loss": 0.4159, | |
| "step": 720 | |
| }, | |
| { | |
| "epoch": 0.10782114550620607, | |
| "grad_norm": 1.853639106134475, | |
| "learning_rate": 9.842240535391301e-06, | |
| "loss": 0.3929, | |
| "step": 721 | |
| }, | |
| { | |
| "epoch": 0.10797068939733812, | |
| "grad_norm": 2.1662710209518306, | |
| "learning_rate": 9.841636411319098e-06, | |
| "loss": 0.298, | |
| "step": 722 | |
| }, | |
| { | |
| "epoch": 0.10812023328847016, | |
| "grad_norm": 1.604340910092426, | |
| "learning_rate": 9.841031151352332e-06, | |
| "loss": 0.2175, | |
| "step": 723 | |
| }, | |
| { | |
| "epoch": 0.10826977717960222, | |
| "grad_norm": 2.486345181702559, | |
| "learning_rate": 9.840424755633002e-06, | |
| "loss": 0.5179, | |
| "step": 724 | |
| }, | |
| { | |
| "epoch": 0.10841932107073426, | |
| "grad_norm": 1.415864057650498, | |
| "learning_rate": 9.83981722430338e-06, | |
| "loss": 0.3539, | |
| "step": 725 | |
| }, | |
| { | |
| "epoch": 0.10856886496186631, | |
| "grad_norm": 1.4949172725362427, | |
| "learning_rate": 9.839208557505989e-06, | |
| "loss": 0.382, | |
| "step": 726 | |
| }, | |
| { | |
| "epoch": 0.10871840885299835, | |
| "grad_norm": 2.2920695398684576, | |
| "learning_rate": 9.838598755383636e-06, | |
| "loss": 0.5086, | |
| "step": 727 | |
| }, | |
| { | |
| "epoch": 0.1088679527441304, | |
| "grad_norm": 1.6667328126020315, | |
| "learning_rate": 9.837987818079382e-06, | |
| "loss": 0.3736, | |
| "step": 728 | |
| }, | |
| { | |
| "epoch": 0.10901749663526245, | |
| "grad_norm": 1.3398213843433537, | |
| "learning_rate": 9.837375745736562e-06, | |
| "loss": 0.1827, | |
| "step": 729 | |
| }, | |
| { | |
| "epoch": 0.10916704052639449, | |
| "grad_norm": 1.9606925081810038, | |
| "learning_rate": 9.83676253849877e-06, | |
| "loss": 0.3992, | |
| "step": 730 | |
| }, | |
| { | |
| "epoch": 0.10931658441752655, | |
| "grad_norm": 1.7774772468148792, | |
| "learning_rate": 9.836148196509875e-06, | |
| "loss": 0.4769, | |
| "step": 731 | |
| }, | |
| { | |
| "epoch": 0.1094661283086586, | |
| "grad_norm": 1.9643639344581283, | |
| "learning_rate": 9.835532719914005e-06, | |
| "loss": 0.6049, | |
| "step": 732 | |
| }, | |
| { | |
| "epoch": 0.10961567219979064, | |
| "grad_norm": 2.205274189695602, | |
| "learning_rate": 9.834916108855557e-06, | |
| "loss": 0.3679, | |
| "step": 733 | |
| }, | |
| { | |
| "epoch": 0.10976521609092268, | |
| "grad_norm": 1.5293962633909763, | |
| "learning_rate": 9.834298363479193e-06, | |
| "loss": 0.3355, | |
| "step": 734 | |
| }, | |
| { | |
| "epoch": 0.10991475998205473, | |
| "grad_norm": 1.985725165029928, | |
| "learning_rate": 9.833679483929846e-06, | |
| "loss": 0.357, | |
| "step": 735 | |
| }, | |
| { | |
| "epoch": 0.11006430387318678, | |
| "grad_norm": 1.938156924090921, | |
| "learning_rate": 9.833059470352705e-06, | |
| "loss": 0.2667, | |
| "step": 736 | |
| }, | |
| { | |
| "epoch": 0.11021384776431883, | |
| "grad_norm": 1.6208704990029696, | |
| "learning_rate": 9.832438322893235e-06, | |
| "loss": 0.2751, | |
| "step": 737 | |
| }, | |
| { | |
| "epoch": 0.11036339165545088, | |
| "grad_norm": 1.6610841289834064, | |
| "learning_rate": 9.831816041697164e-06, | |
| "loss": 0.2319, | |
| "step": 738 | |
| }, | |
| { | |
| "epoch": 0.11051293554658292, | |
| "grad_norm": 2.003195385581558, | |
| "learning_rate": 9.831192626910482e-06, | |
| "loss": 0.3799, | |
| "step": 739 | |
| }, | |
| { | |
| "epoch": 0.11066247943771497, | |
| "grad_norm": 1.8861050802330894, | |
| "learning_rate": 9.83056807867945e-06, | |
| "loss": 0.4804, | |
| "step": 740 | |
| }, | |
| { | |
| "epoch": 0.11081202332884701, | |
| "grad_norm": 1.6483143403386502, | |
| "learning_rate": 9.829942397150593e-06, | |
| "loss": 0.3658, | |
| "step": 741 | |
| }, | |
| { | |
| "epoch": 0.11096156721997906, | |
| "grad_norm": 1.5438600790491723, | |
| "learning_rate": 9.829315582470702e-06, | |
| "loss": 0.2297, | |
| "step": 742 | |
| }, | |
| { | |
| "epoch": 0.1111111111111111, | |
| "grad_norm": 1.4215916414139778, | |
| "learning_rate": 9.828687634786834e-06, | |
| "loss": 0.2365, | |
| "step": 743 | |
| }, | |
| { | |
| "epoch": 0.11126065500224316, | |
| "grad_norm": 1.7761192463313074, | |
| "learning_rate": 9.828058554246309e-06, | |
| "loss": 0.3052, | |
| "step": 744 | |
| }, | |
| { | |
| "epoch": 0.11141019889337521, | |
| "grad_norm": 1.6148872971257344, | |
| "learning_rate": 9.82742834099672e-06, | |
| "loss": 0.2199, | |
| "step": 745 | |
| }, | |
| { | |
| "epoch": 0.11155974278450725, | |
| "grad_norm": 2.0162005563823646, | |
| "learning_rate": 9.826796995185916e-06, | |
| "loss": 0.3839, | |
| "step": 746 | |
| }, | |
| { | |
| "epoch": 0.1117092866756393, | |
| "grad_norm": 1.4737452330787222, | |
| "learning_rate": 9.826164516962022e-06, | |
| "loss": 0.1869, | |
| "step": 747 | |
| }, | |
| { | |
| "epoch": 0.11185883056677134, | |
| "grad_norm": 2.102437337141145, | |
| "learning_rate": 9.82553090647342e-06, | |
| "loss": 0.2615, | |
| "step": 748 | |
| }, | |
| { | |
| "epoch": 0.11200837445790339, | |
| "grad_norm": 1.7050095093194846, | |
| "learning_rate": 9.82489616386876e-06, | |
| "loss": 0.2518, | |
| "step": 749 | |
| }, | |
| { | |
| "epoch": 0.11215791834903545, | |
| "grad_norm": 1.5205595562607015, | |
| "learning_rate": 9.824260289296963e-06, | |
| "loss": 0.1792, | |
| "step": 750 | |
| }, | |
| { | |
| "epoch": 0.1123074622401675, | |
| "grad_norm": 1.7480859507023934, | |
| "learning_rate": 9.823623282907207e-06, | |
| "loss": 0.4179, | |
| "step": 751 | |
| }, | |
| { | |
| "epoch": 0.11245700613129954, | |
| "grad_norm": 1.5498394808983003, | |
| "learning_rate": 9.822985144848944e-06, | |
| "loss": 0.3358, | |
| "step": 752 | |
| }, | |
| { | |
| "epoch": 0.11260655002243158, | |
| "grad_norm": 1.6393482758244988, | |
| "learning_rate": 9.822345875271884e-06, | |
| "loss": 0.2149, | |
| "step": 753 | |
| }, | |
| { | |
| "epoch": 0.11275609391356363, | |
| "grad_norm": 1.9754923234031054, | |
| "learning_rate": 9.821705474326006e-06, | |
| "loss": 0.434, | |
| "step": 754 | |
| }, | |
| { | |
| "epoch": 0.11290563780469567, | |
| "grad_norm": 2.2364190645343154, | |
| "learning_rate": 9.821063942161558e-06, | |
| "loss": 0.2228, | |
| "step": 755 | |
| }, | |
| { | |
| "epoch": 0.11305518169582772, | |
| "grad_norm": 1.4307479990009164, | |
| "learning_rate": 9.820421278929045e-06, | |
| "loss": 0.3547, | |
| "step": 756 | |
| }, | |
| { | |
| "epoch": 0.11320472558695978, | |
| "grad_norm": 1.2078809494224174, | |
| "learning_rate": 9.819777484779242e-06, | |
| "loss": 0.2245, | |
| "step": 757 | |
| }, | |
| { | |
| "epoch": 0.11335426947809182, | |
| "grad_norm": 2.6305960032740354, | |
| "learning_rate": 9.819132559863194e-06, | |
| "loss": 0.6771, | |
| "step": 758 | |
| }, | |
| { | |
| "epoch": 0.11350381336922387, | |
| "grad_norm": 1.4792675137281683, | |
| "learning_rate": 9.818486504332203e-06, | |
| "loss": 0.318, | |
| "step": 759 | |
| }, | |
| { | |
| "epoch": 0.11365335726035591, | |
| "grad_norm": 2.127350110714495, | |
| "learning_rate": 9.817839318337839e-06, | |
| "loss": 0.4925, | |
| "step": 760 | |
| }, | |
| { | |
| "epoch": 0.11380290115148796, | |
| "grad_norm": 1.8233415508114148, | |
| "learning_rate": 9.81719100203194e-06, | |
| "loss": 0.2747, | |
| "step": 761 | |
| }, | |
| { | |
| "epoch": 0.11395244504262, | |
| "grad_norm": 2.1187219443475156, | |
| "learning_rate": 9.81654155556661e-06, | |
| "loss": 0.4595, | |
| "step": 762 | |
| }, | |
| { | |
| "epoch": 0.11410198893375205, | |
| "grad_norm": 1.5759792753813915, | |
| "learning_rate": 9.81589097909421e-06, | |
| "loss": 0.3553, | |
| "step": 763 | |
| }, | |
| { | |
| "epoch": 0.11425153282488411, | |
| "grad_norm": 1.708776908270571, | |
| "learning_rate": 9.815239272767373e-06, | |
| "loss": 0.3091, | |
| "step": 764 | |
| }, | |
| { | |
| "epoch": 0.11440107671601615, | |
| "grad_norm": 1.703775430420963, | |
| "learning_rate": 9.814586436738998e-06, | |
| "loss": 0.3728, | |
| "step": 765 | |
| }, | |
| { | |
| "epoch": 0.1145506206071482, | |
| "grad_norm": 1.6198262441466886, | |
| "learning_rate": 9.813932471162245e-06, | |
| "loss": 0.2498, | |
| "step": 766 | |
| }, | |
| { | |
| "epoch": 0.11470016449828024, | |
| "grad_norm": 1.4858642435718663, | |
| "learning_rate": 9.813277376190539e-06, | |
| "loss": 0.2299, | |
| "step": 767 | |
| }, | |
| { | |
| "epoch": 0.11484970838941229, | |
| "grad_norm": 1.8052387881768808, | |
| "learning_rate": 9.812621151977574e-06, | |
| "loss": 0.3834, | |
| "step": 768 | |
| }, | |
| { | |
| "epoch": 0.11499925228054433, | |
| "grad_norm": 1.7902664470941898, | |
| "learning_rate": 9.811963798677306e-06, | |
| "loss": 0.2282, | |
| "step": 769 | |
| }, | |
| { | |
| "epoch": 0.11514879617167639, | |
| "grad_norm": 1.6316784429425562, | |
| "learning_rate": 9.811305316443956e-06, | |
| "loss": 0.2396, | |
| "step": 770 | |
| }, | |
| { | |
| "epoch": 0.11529834006280844, | |
| "grad_norm": 1.414088700773603, | |
| "learning_rate": 9.81064570543201e-06, | |
| "loss": 0.2353, | |
| "step": 771 | |
| }, | |
| { | |
| "epoch": 0.11544788395394048, | |
| "grad_norm": 1.9219176628835946, | |
| "learning_rate": 9.80998496579622e-06, | |
| "loss": 0.3379, | |
| "step": 772 | |
| }, | |
| { | |
| "epoch": 0.11559742784507253, | |
| "grad_norm": 1.113085528787142, | |
| "learning_rate": 9.809323097691602e-06, | |
| "loss": 0.213, | |
| "step": 773 | |
| }, | |
| { | |
| "epoch": 0.11574697173620457, | |
| "grad_norm": 1.6091723360768109, | |
| "learning_rate": 9.808660101273435e-06, | |
| "loss": 0.3457, | |
| "step": 774 | |
| }, | |
| { | |
| "epoch": 0.11589651562733662, | |
| "grad_norm": 1.4626018681006754, | |
| "learning_rate": 9.807995976697267e-06, | |
| "loss": 0.1777, | |
| "step": 775 | |
| }, | |
| { | |
| "epoch": 0.11604605951846866, | |
| "grad_norm": 2.3953869334660522, | |
| "learning_rate": 9.807330724118906e-06, | |
| "loss": 0.449, | |
| "step": 776 | |
| }, | |
| { | |
| "epoch": 0.11619560340960072, | |
| "grad_norm": 1.595059614098865, | |
| "learning_rate": 9.806664343694425e-06, | |
| "loss": 0.3367, | |
| "step": 777 | |
| }, | |
| { | |
| "epoch": 0.11634514730073277, | |
| "grad_norm": 1.1067814732956414, | |
| "learning_rate": 9.805996835580169e-06, | |
| "loss": 0.1828, | |
| "step": 778 | |
| }, | |
| { | |
| "epoch": 0.11649469119186481, | |
| "grad_norm": 1.62643731894747, | |
| "learning_rate": 9.805328199932736e-06, | |
| "loss": 0.2005, | |
| "step": 779 | |
| }, | |
| { | |
| "epoch": 0.11664423508299686, | |
| "grad_norm": 1.5535907543030336, | |
| "learning_rate": 9.804658436908996e-06, | |
| "loss": 0.2635, | |
| "step": 780 | |
| }, | |
| { | |
| "epoch": 0.1167937789741289, | |
| "grad_norm": 1.0891099881486959, | |
| "learning_rate": 9.803987546666083e-06, | |
| "loss": 0.2012, | |
| "step": 781 | |
| }, | |
| { | |
| "epoch": 0.11694332286526095, | |
| "grad_norm": 1.9927493555965012, | |
| "learning_rate": 9.803315529361395e-06, | |
| "loss": 0.5297, | |
| "step": 782 | |
| }, | |
| { | |
| "epoch": 0.11709286675639301, | |
| "grad_norm": 1.6333695435696685, | |
| "learning_rate": 9.802642385152593e-06, | |
| "loss": 0.2959, | |
| "step": 783 | |
| }, | |
| { | |
| "epoch": 0.11724241064752505, | |
| "grad_norm": 2.0905230122366896, | |
| "learning_rate": 9.8019681141976e-06, | |
| "loss": 0.3662, | |
| "step": 784 | |
| }, | |
| { | |
| "epoch": 0.1173919545386571, | |
| "grad_norm": 1.3626106000772258, | |
| "learning_rate": 9.80129271665461e-06, | |
| "loss": 0.2065, | |
| "step": 785 | |
| }, | |
| { | |
| "epoch": 0.11754149842978914, | |
| "grad_norm": 1.7357589724302078, | |
| "learning_rate": 9.800616192682077e-06, | |
| "loss": 0.3269, | |
| "step": 786 | |
| }, | |
| { | |
| "epoch": 0.11769104232092119, | |
| "grad_norm": 1.3095611640218061, | |
| "learning_rate": 9.79993854243872e-06, | |
| "loss": 0.1993, | |
| "step": 787 | |
| }, | |
| { | |
| "epoch": 0.11784058621205323, | |
| "grad_norm": 2.008966146034465, | |
| "learning_rate": 9.799259766083522e-06, | |
| "loss": 0.2346, | |
| "step": 788 | |
| }, | |
| { | |
| "epoch": 0.11799013010318528, | |
| "grad_norm": 1.4166616715548845, | |
| "learning_rate": 9.798579863775733e-06, | |
| "loss": 0.2053, | |
| "step": 789 | |
| }, | |
| { | |
| "epoch": 0.11813967399431734, | |
| "grad_norm": 2.2231308106975742, | |
| "learning_rate": 9.79789883567486e-06, | |
| "loss": 0.3138, | |
| "step": 790 | |
| }, | |
| { | |
| "epoch": 0.11828921788544938, | |
| "grad_norm": 2.104337845030462, | |
| "learning_rate": 9.79721668194068e-06, | |
| "loss": 0.5896, | |
| "step": 791 | |
| }, | |
| { | |
| "epoch": 0.11843876177658143, | |
| "grad_norm": 1.3961164790172769, | |
| "learning_rate": 9.796533402733235e-06, | |
| "loss": 0.2023, | |
| "step": 792 | |
| }, | |
| { | |
| "epoch": 0.11858830566771347, | |
| "grad_norm": 1.9644410315987328, | |
| "learning_rate": 9.79584899821283e-06, | |
| "loss": 0.2389, | |
| "step": 793 | |
| }, | |
| { | |
| "epoch": 0.11873784955884552, | |
| "grad_norm": 1.911243493993425, | |
| "learning_rate": 9.795163468540028e-06, | |
| "loss": 0.2319, | |
| "step": 794 | |
| }, | |
| { | |
| "epoch": 0.11888739344997756, | |
| "grad_norm": 1.5442762653540485, | |
| "learning_rate": 9.794476813875665e-06, | |
| "loss": 0.256, | |
| "step": 795 | |
| }, | |
| { | |
| "epoch": 0.11903693734110962, | |
| "grad_norm": 1.6347185262551664, | |
| "learning_rate": 9.793789034380833e-06, | |
| "loss": 0.3659, | |
| "step": 796 | |
| }, | |
| { | |
| "epoch": 0.11918648123224167, | |
| "grad_norm": 6.93121052791821, | |
| "learning_rate": 9.793100130216895e-06, | |
| "loss": 0.3348, | |
| "step": 797 | |
| }, | |
| { | |
| "epoch": 0.11933602512337371, | |
| "grad_norm": 1.1914924746720745, | |
| "learning_rate": 9.792410101545475e-06, | |
| "loss": 0.2475, | |
| "step": 798 | |
| }, | |
| { | |
| "epoch": 0.11948556901450576, | |
| "grad_norm": 1.4728413245474197, | |
| "learning_rate": 9.791718948528457e-06, | |
| "loss": 0.3569, | |
| "step": 799 | |
| }, | |
| { | |
| "epoch": 0.1196351129056378, | |
| "grad_norm": 2.0173892018585113, | |
| "learning_rate": 9.791026671327996e-06, | |
| "loss": 0.3154, | |
| "step": 800 | |
| }, | |
| { | |
| "epoch": 0.11978465679676985, | |
| "grad_norm": 1.863844432530015, | |
| "learning_rate": 9.790333270106505e-06, | |
| "loss": 0.349, | |
| "step": 801 | |
| }, | |
| { | |
| "epoch": 0.11993420068790189, | |
| "grad_norm": 1.348620907056274, | |
| "learning_rate": 9.789638745026661e-06, | |
| "loss": 0.2553, | |
| "step": 802 | |
| }, | |
| { | |
| "epoch": 0.12008374457903395, | |
| "grad_norm": 1.7207895705367349, | |
| "learning_rate": 9.78894309625141e-06, | |
| "loss": 0.3931, | |
| "step": 803 | |
| }, | |
| { | |
| "epoch": 0.120233288470166, | |
| "grad_norm": 1.3131471894535065, | |
| "learning_rate": 9.788246323943954e-06, | |
| "loss": 0.1473, | |
| "step": 804 | |
| }, | |
| { | |
| "epoch": 0.12038283236129804, | |
| "grad_norm": 1.054327043113717, | |
| "learning_rate": 9.787548428267766e-06, | |
| "loss": 0.1945, | |
| "step": 805 | |
| }, | |
| { | |
| "epoch": 0.12053237625243009, | |
| "grad_norm": 1.9526157668969721, | |
| "learning_rate": 9.786849409386577e-06, | |
| "loss": 0.3906, | |
| "step": 806 | |
| }, | |
| { | |
| "epoch": 0.12068192014356213, | |
| "grad_norm": 1.8267497285704608, | |
| "learning_rate": 9.786149267464382e-06, | |
| "loss": 0.4193, | |
| "step": 807 | |
| }, | |
| { | |
| "epoch": 0.12083146403469418, | |
| "grad_norm": 0.8038770137897923, | |
| "learning_rate": 9.785448002665446e-06, | |
| "loss": 0.2392, | |
| "step": 808 | |
| }, | |
| { | |
| "epoch": 0.12098100792582624, | |
| "grad_norm": 1.5253624272396114, | |
| "learning_rate": 9.784745615154286e-06, | |
| "loss": 0.3366, | |
| "step": 809 | |
| }, | |
| { | |
| "epoch": 0.12113055181695828, | |
| "grad_norm": 2.093861559130543, | |
| "learning_rate": 9.784042105095694e-06, | |
| "loss": 0.4947, | |
| "step": 810 | |
| }, | |
| { | |
| "epoch": 0.12128009570809033, | |
| "grad_norm": 1.4395999315986885, | |
| "learning_rate": 9.78333747265472e-06, | |
| "loss": 0.3721, | |
| "step": 811 | |
| }, | |
| { | |
| "epoch": 0.12142963959922237, | |
| "grad_norm": 1.852326670020495, | |
| "learning_rate": 9.782631717996675e-06, | |
| "loss": 0.4779, | |
| "step": 812 | |
| }, | |
| { | |
| "epoch": 0.12157918349035442, | |
| "grad_norm": 1.6061911802246367, | |
| "learning_rate": 9.781924841287136e-06, | |
| "loss": 0.3634, | |
| "step": 813 | |
| }, | |
| { | |
| "epoch": 0.12172872738148646, | |
| "grad_norm": 1.6480313202927959, | |
| "learning_rate": 9.781216842691945e-06, | |
| "loss": 0.3486, | |
| "step": 814 | |
| }, | |
| { | |
| "epoch": 0.12187827127261851, | |
| "grad_norm": 1.7352908193060639, | |
| "learning_rate": 9.780507722377205e-06, | |
| "loss": 0.2405, | |
| "step": 815 | |
| }, | |
| { | |
| "epoch": 0.12202781516375057, | |
| "grad_norm": 1.819605505850209, | |
| "learning_rate": 9.779797480509281e-06, | |
| "loss": 0.2702, | |
| "step": 816 | |
| }, | |
| { | |
| "epoch": 0.12217735905488261, | |
| "grad_norm": 5.030925574416197, | |
| "learning_rate": 9.779086117254804e-06, | |
| "loss": 0.4802, | |
| "step": 817 | |
| }, | |
| { | |
| "epoch": 0.12232690294601466, | |
| "grad_norm": 1.4714728946101239, | |
| "learning_rate": 9.778373632780665e-06, | |
| "loss": 0.4002, | |
| "step": 818 | |
| }, | |
| { | |
| "epoch": 0.1224764468371467, | |
| "grad_norm": 1.8961195589940971, | |
| "learning_rate": 9.777660027254022e-06, | |
| "loss": 0.5022, | |
| "step": 819 | |
| }, | |
| { | |
| "epoch": 0.12262599072827875, | |
| "grad_norm": 2.1718036722230343, | |
| "learning_rate": 9.776945300842292e-06, | |
| "loss": 0.3274, | |
| "step": 820 | |
| }, | |
| { | |
| "epoch": 0.12277553461941079, | |
| "grad_norm": 1.620505499680087, | |
| "learning_rate": 9.776229453713158e-06, | |
| "loss": 0.2316, | |
| "step": 821 | |
| }, | |
| { | |
| "epoch": 0.12292507851054285, | |
| "grad_norm": 1.6978035349883904, | |
| "learning_rate": 9.775512486034564e-06, | |
| "loss": 0.3388, | |
| "step": 822 | |
| }, | |
| { | |
| "epoch": 0.1230746224016749, | |
| "grad_norm": 1.8097210824887537, | |
| "learning_rate": 9.774794397974715e-06, | |
| "loss": 0.2658, | |
| "step": 823 | |
| }, | |
| { | |
| "epoch": 0.12322416629280694, | |
| "grad_norm": 1.7832381045534218, | |
| "learning_rate": 9.774075189702085e-06, | |
| "loss": 0.236, | |
| "step": 824 | |
| }, | |
| { | |
| "epoch": 0.12337371018393899, | |
| "grad_norm": 1.5481034516154306, | |
| "learning_rate": 9.773354861385408e-06, | |
| "loss": 0.2209, | |
| "step": 825 | |
| }, | |
| { | |
| "epoch": 0.12352325407507103, | |
| "grad_norm": 1.808929914702085, | |
| "learning_rate": 9.772633413193677e-06, | |
| "loss": 0.3936, | |
| "step": 826 | |
| }, | |
| { | |
| "epoch": 0.12367279796620308, | |
| "grad_norm": 1.4632324741175244, | |
| "learning_rate": 9.771910845296151e-06, | |
| "loss": 0.1809, | |
| "step": 827 | |
| }, | |
| { | |
| "epoch": 0.12382234185733512, | |
| "grad_norm": 1.602480536861921, | |
| "learning_rate": 9.771187157862352e-06, | |
| "loss": 0.3631, | |
| "step": 828 | |
| }, | |
| { | |
| "epoch": 0.12397188574846718, | |
| "grad_norm": 1.695314807275002, | |
| "learning_rate": 9.770462351062065e-06, | |
| "loss": 0.3419, | |
| "step": 829 | |
| }, | |
| { | |
| "epoch": 0.12412142963959923, | |
| "grad_norm": 2.2542289621861262, | |
| "learning_rate": 9.769736425065333e-06, | |
| "loss": 0.4292, | |
| "step": 830 | |
| }, | |
| { | |
| "epoch": 0.12427097353073127, | |
| "grad_norm": 1.7697982755032058, | |
| "learning_rate": 9.76900938004247e-06, | |
| "loss": 0.3735, | |
| "step": 831 | |
| }, | |
| { | |
| "epoch": 0.12442051742186332, | |
| "grad_norm": 1.8120887881814032, | |
| "learning_rate": 9.768281216164045e-06, | |
| "loss": 0.3568, | |
| "step": 832 | |
| }, | |
| { | |
| "epoch": 0.12457006131299536, | |
| "grad_norm": 1.581710048140236, | |
| "learning_rate": 9.767551933600896e-06, | |
| "loss": 0.1999, | |
| "step": 833 | |
| }, | |
| { | |
| "epoch": 0.1247196052041274, | |
| "grad_norm": 1.8611636134135094, | |
| "learning_rate": 9.766821532524113e-06, | |
| "loss": 0.4111, | |
| "step": 834 | |
| }, | |
| { | |
| "epoch": 0.12486914909525945, | |
| "grad_norm": 1.3596930734620556, | |
| "learning_rate": 9.76609001310506e-06, | |
| "loss": 0.1893, | |
| "step": 835 | |
| }, | |
| { | |
| "epoch": 0.1250186929863915, | |
| "grad_norm": 1.7562268713789106, | |
| "learning_rate": 9.76535737551536e-06, | |
| "loss": 0.3948, | |
| "step": 836 | |
| }, | |
| { | |
| "epoch": 0.12516823687752354, | |
| "grad_norm": 2.0138359003020136, | |
| "learning_rate": 9.764623619926891e-06, | |
| "loss": 0.2182, | |
| "step": 837 | |
| }, | |
| { | |
| "epoch": 0.1253177807686556, | |
| "grad_norm": 1.447071144741178, | |
| "learning_rate": 9.763888746511804e-06, | |
| "loss": 0.2027, | |
| "step": 838 | |
| }, | |
| { | |
| "epoch": 0.12546732465978766, | |
| "grad_norm": 2.1652568729944734, | |
| "learning_rate": 9.763152755442504e-06, | |
| "loss": 0.6314, | |
| "step": 839 | |
| }, | |
| { | |
| "epoch": 0.1256168685509197, | |
| "grad_norm": 1.8038679466625127, | |
| "learning_rate": 9.762415646891665e-06, | |
| "loss": 0.3578, | |
| "step": 840 | |
| }, | |
| { | |
| "epoch": 0.12576641244205175, | |
| "grad_norm": 1.6058605347195138, | |
| "learning_rate": 9.761677421032218e-06, | |
| "loss": 0.4411, | |
| "step": 841 | |
| }, | |
| { | |
| "epoch": 0.12591595633318378, | |
| "grad_norm": 1.590419871100753, | |
| "learning_rate": 9.760938078037358e-06, | |
| "loss": 0.3562, | |
| "step": 842 | |
| }, | |
| { | |
| "epoch": 0.12606550022431584, | |
| "grad_norm": 2.1851801492136267, | |
| "learning_rate": 9.76019761808054e-06, | |
| "loss": 0.5822, | |
| "step": 843 | |
| }, | |
| { | |
| "epoch": 0.12621504411544787, | |
| "grad_norm": 1.5855983503039581, | |
| "learning_rate": 9.759456041335487e-06, | |
| "loss": 0.2229, | |
| "step": 844 | |
| }, | |
| { | |
| "epoch": 0.12636458800657993, | |
| "grad_norm": 1.370525319712137, | |
| "learning_rate": 9.758713347976179e-06, | |
| "loss": 0.2126, | |
| "step": 845 | |
| }, | |
| { | |
| "epoch": 0.126514131897712, | |
| "grad_norm": 6.394283226949693, | |
| "learning_rate": 9.757969538176856e-06, | |
| "loss": 0.5925, | |
| "step": 846 | |
| }, | |
| { | |
| "epoch": 0.12666367578884402, | |
| "grad_norm": 1.6599084242802136, | |
| "learning_rate": 9.757224612112026e-06, | |
| "loss": 0.2939, | |
| "step": 847 | |
| }, | |
| { | |
| "epoch": 0.12681321967997608, | |
| "grad_norm": 1.7907787465869436, | |
| "learning_rate": 9.756478569956455e-06, | |
| "loss": 0.222, | |
| "step": 848 | |
| }, | |
| { | |
| "epoch": 0.1269627635711081, | |
| "grad_norm": 2.2366122735755707, | |
| "learning_rate": 9.755731411885172e-06, | |
| "loss": 0.6684, | |
| "step": 849 | |
| }, | |
| { | |
| "epoch": 0.12711230746224017, | |
| "grad_norm": 1.2342377688986181, | |
| "learning_rate": 9.754983138073466e-06, | |
| "loss": 0.1731, | |
| "step": 850 | |
| }, | |
| { | |
| "epoch": 0.1272618513533722, | |
| "grad_norm": 1.890953555602396, | |
| "learning_rate": 9.75423374869689e-06, | |
| "loss": 0.3518, | |
| "step": 851 | |
| }, | |
| { | |
| "epoch": 0.12741139524450426, | |
| "grad_norm": 1.6475639435427634, | |
| "learning_rate": 9.75348324393126e-06, | |
| "loss": 0.2398, | |
| "step": 852 | |
| }, | |
| { | |
| "epoch": 0.12756093913563632, | |
| "grad_norm": 1.6270554683226957, | |
| "learning_rate": 9.752731623952647e-06, | |
| "loss": 0.4891, | |
| "step": 853 | |
| }, | |
| { | |
| "epoch": 0.12771048302676835, | |
| "grad_norm": 1.26855312080081, | |
| "learning_rate": 9.751978888937394e-06, | |
| "loss": 0.256, | |
| "step": 854 | |
| }, | |
| { | |
| "epoch": 0.1278600269179004, | |
| "grad_norm": 1.8185769247015318, | |
| "learning_rate": 9.751225039062096e-06, | |
| "loss": 0.4165, | |
| "step": 855 | |
| }, | |
| { | |
| "epoch": 0.12800957080903244, | |
| "grad_norm": 1.751422967721224, | |
| "learning_rate": 9.750470074503616e-06, | |
| "loss": 0.4006, | |
| "step": 856 | |
| }, | |
| { | |
| "epoch": 0.1281591147001645, | |
| "grad_norm": 1.520424463551602, | |
| "learning_rate": 9.749713995439072e-06, | |
| "loss": 0.221, | |
| "step": 857 | |
| }, | |
| { | |
| "epoch": 0.12830865859129656, | |
| "grad_norm": 1.3721869164602227, | |
| "learning_rate": 9.74895680204585e-06, | |
| "loss": 0.2902, | |
| "step": 858 | |
| }, | |
| { | |
| "epoch": 0.1284582024824286, | |
| "grad_norm": 0.9768480150555632, | |
| "learning_rate": 9.748198494501598e-06, | |
| "loss": 0.2115, | |
| "step": 859 | |
| }, | |
| { | |
| "epoch": 0.12860774637356065, | |
| "grad_norm": 1.1411458146693625, | |
| "learning_rate": 9.747439072984217e-06, | |
| "loss": 0.2657, | |
| "step": 860 | |
| }, | |
| { | |
| "epoch": 0.12875729026469268, | |
| "grad_norm": 1.351877109756975, | |
| "learning_rate": 9.746678537671876e-06, | |
| "loss": 0.1998, | |
| "step": 861 | |
| }, | |
| { | |
| "epoch": 0.12890683415582474, | |
| "grad_norm": 1.735519954859839, | |
| "learning_rate": 9.745916888743006e-06, | |
| "loss": 0.3916, | |
| "step": 862 | |
| }, | |
| { | |
| "epoch": 0.12905637804695677, | |
| "grad_norm": 1.5843585668319187, | |
| "learning_rate": 9.745154126376295e-06, | |
| "loss": 0.2412, | |
| "step": 863 | |
| }, | |
| { | |
| "epoch": 0.12920592193808883, | |
| "grad_norm": 1.2682977552751018, | |
| "learning_rate": 9.744390250750694e-06, | |
| "loss": 0.2082, | |
| "step": 864 | |
| }, | |
| { | |
| "epoch": 0.1293554658292209, | |
| "grad_norm": 1.972644277101951, | |
| "learning_rate": 9.74362526204542e-06, | |
| "loss": 0.5327, | |
| "step": 865 | |
| }, | |
| { | |
| "epoch": 0.12950500972035292, | |
| "grad_norm": 1.9379037313358354, | |
| "learning_rate": 9.74285916043994e-06, | |
| "loss": 0.5184, | |
| "step": 866 | |
| }, | |
| { | |
| "epoch": 0.12965455361148498, | |
| "grad_norm": 2.118494372996469, | |
| "learning_rate": 9.742091946113994e-06, | |
| "loss": 0.4367, | |
| "step": 867 | |
| }, | |
| { | |
| "epoch": 0.129804097502617, | |
| "grad_norm": 3.0042960877566904, | |
| "learning_rate": 9.741323619247575e-06, | |
| "loss": 0.2971, | |
| "step": 868 | |
| }, | |
| { | |
| "epoch": 0.12995364139374907, | |
| "grad_norm": 1.4822743195387478, | |
| "learning_rate": 9.740554180020944e-06, | |
| "loss": 0.2324, | |
| "step": 869 | |
| }, | |
| { | |
| "epoch": 0.1301031852848811, | |
| "grad_norm": 1.629265135056018, | |
| "learning_rate": 9.739783628614614e-06, | |
| "loss": 0.3717, | |
| "step": 870 | |
| }, | |
| { | |
| "epoch": 0.13025272917601316, | |
| "grad_norm": 2.663192450334603, | |
| "learning_rate": 9.739011965209366e-06, | |
| "loss": 0.4405, | |
| "step": 871 | |
| }, | |
| { | |
| "epoch": 0.13040227306714522, | |
| "grad_norm": 1.942071044818614, | |
| "learning_rate": 9.738239189986239e-06, | |
| "loss": 0.2141, | |
| "step": 872 | |
| }, | |
| { | |
| "epoch": 0.13055181695827725, | |
| "grad_norm": 3.5555352510097684, | |
| "learning_rate": 9.737465303126533e-06, | |
| "loss": 0.5084, | |
| "step": 873 | |
| }, | |
| { | |
| "epoch": 0.1307013608494093, | |
| "grad_norm": 131.61785973638575, | |
| "learning_rate": 9.736690304811811e-06, | |
| "loss": 0.2431, | |
| "step": 874 | |
| }, | |
| { | |
| "epoch": 0.13085090474054134, | |
| "grad_norm": 1.190310223040302, | |
| "learning_rate": 9.735914195223894e-06, | |
| "loss": 0.1586, | |
| "step": 875 | |
| }, | |
| { | |
| "epoch": 0.1310004486316734, | |
| "grad_norm": 1.8346983968963104, | |
| "learning_rate": 9.735136974544866e-06, | |
| "loss": 0.5247, | |
| "step": 876 | |
| }, | |
| { | |
| "epoch": 0.13114999252280543, | |
| "grad_norm": 1.7905067752668935, | |
| "learning_rate": 9.734358642957068e-06, | |
| "loss": 0.2645, | |
| "step": 877 | |
| }, | |
| { | |
| "epoch": 0.1312995364139375, | |
| "grad_norm": 1.9545038391601572, | |
| "learning_rate": 9.733579200643108e-06, | |
| "loss": 0.3769, | |
| "step": 878 | |
| }, | |
| { | |
| "epoch": 0.13144908030506955, | |
| "grad_norm": 2.1943279379956477, | |
| "learning_rate": 9.732798647785847e-06, | |
| "loss": 0.5142, | |
| "step": 879 | |
| }, | |
| { | |
| "epoch": 0.13159862419620158, | |
| "grad_norm": 1.8055649624971999, | |
| "learning_rate": 9.73201698456841e-06, | |
| "loss": 0.1857, | |
| "step": 880 | |
| }, | |
| { | |
| "epoch": 0.13174816808733364, | |
| "grad_norm": 1.8149442634221358, | |
| "learning_rate": 9.731234211174188e-06, | |
| "loss": 0.2233, | |
| "step": 881 | |
| }, | |
| { | |
| "epoch": 0.13189771197846567, | |
| "grad_norm": 2.080170101944024, | |
| "learning_rate": 9.73045032778682e-06, | |
| "loss": 0.3904, | |
| "step": 882 | |
| }, | |
| { | |
| "epoch": 0.13204725586959773, | |
| "grad_norm": 1.9510038015167501, | |
| "learning_rate": 9.729665334590217e-06, | |
| "loss": 0.3821, | |
| "step": 883 | |
| }, | |
| { | |
| "epoch": 0.13219679976072976, | |
| "grad_norm": 2.1650257376887545, | |
| "learning_rate": 9.728879231768547e-06, | |
| "loss": 0.2357, | |
| "step": 884 | |
| }, | |
| { | |
| "epoch": 0.13234634365186182, | |
| "grad_norm": 1.1711773735000268, | |
| "learning_rate": 9.728092019506233e-06, | |
| "loss": 0.181, | |
| "step": 885 | |
| }, | |
| { | |
| "epoch": 0.13249588754299388, | |
| "grad_norm": 1.3626230838557185, | |
| "learning_rate": 9.727303697987965e-06, | |
| "loss": 0.2283, | |
| "step": 886 | |
| }, | |
| { | |
| "epoch": 0.1326454314341259, | |
| "grad_norm": 1.9510781854659551, | |
| "learning_rate": 9.72651426739869e-06, | |
| "loss": 0.5154, | |
| "step": 887 | |
| }, | |
| { | |
| "epoch": 0.13279497532525797, | |
| "grad_norm": 0.9715289377560907, | |
| "learning_rate": 9.72572372792362e-06, | |
| "loss": 0.248, | |
| "step": 888 | |
| }, | |
| { | |
| "epoch": 0.13294451921639, | |
| "grad_norm": 2.146774938769699, | |
| "learning_rate": 9.724932079748218e-06, | |
| "loss": 0.5735, | |
| "step": 889 | |
| }, | |
| { | |
| "epoch": 0.13309406310752206, | |
| "grad_norm": 1.9362171525305378, | |
| "learning_rate": 9.724139323058213e-06, | |
| "loss": 0.4643, | |
| "step": 890 | |
| }, | |
| { | |
| "epoch": 0.13324360699865412, | |
| "grad_norm": 1.2101289634017103, | |
| "learning_rate": 9.723345458039595e-06, | |
| "loss": 0.2266, | |
| "step": 891 | |
| }, | |
| { | |
| "epoch": 0.13339315088978615, | |
| "grad_norm": 1.5975435725539564, | |
| "learning_rate": 9.722550484878612e-06, | |
| "loss": 0.2212, | |
| "step": 892 | |
| }, | |
| { | |
| "epoch": 0.1335426947809182, | |
| "grad_norm": 2.1131453338181783, | |
| "learning_rate": 9.721754403761773e-06, | |
| "loss": 0.5017, | |
| "step": 893 | |
| }, | |
| { | |
| "epoch": 0.13369223867205024, | |
| "grad_norm": 1.8229015728915987, | |
| "learning_rate": 9.720957214875846e-06, | |
| "loss": 0.3833, | |
| "step": 894 | |
| }, | |
| { | |
| "epoch": 0.1338417825631823, | |
| "grad_norm": 2.2277630697934416, | |
| "learning_rate": 9.720158918407859e-06, | |
| "loss": 0.2482, | |
| "step": 895 | |
| }, | |
| { | |
| "epoch": 0.13399132645431433, | |
| "grad_norm": 2.7493650830526413, | |
| "learning_rate": 9.719359514545097e-06, | |
| "loss": 0.3227, | |
| "step": 896 | |
| }, | |
| { | |
| "epoch": 0.1341408703454464, | |
| "grad_norm": 1.326522393176365, | |
| "learning_rate": 9.718559003475114e-06, | |
| "loss": 0.3694, | |
| "step": 897 | |
| }, | |
| { | |
| "epoch": 0.13429041423657845, | |
| "grad_norm": 2.031851235687534, | |
| "learning_rate": 9.717757385385713e-06, | |
| "loss": 0.4018, | |
| "step": 898 | |
| }, | |
| { | |
| "epoch": 0.13443995812771048, | |
| "grad_norm": 1.8621806446966467, | |
| "learning_rate": 9.716954660464962e-06, | |
| "loss": 0.4906, | |
| "step": 899 | |
| }, | |
| { | |
| "epoch": 0.13458950201884254, | |
| "grad_norm": 1.2770259017691152, | |
| "learning_rate": 9.716150828901189e-06, | |
| "loss": 0.1858, | |
| "step": 900 | |
| }, | |
| { | |
| "epoch": 0.13473904590997457, | |
| "grad_norm": 1.5913545450304718, | |
| "learning_rate": 9.71534589088298e-06, | |
| "loss": 0.197, | |
| "step": 901 | |
| }, | |
| { | |
| "epoch": 0.13488858980110663, | |
| "grad_norm": 1.2063585747949084, | |
| "learning_rate": 9.714539846599183e-06, | |
| "loss": 0.2366, | |
| "step": 902 | |
| }, | |
| { | |
| "epoch": 0.13503813369223866, | |
| "grad_norm": 2.5200659605885036, | |
| "learning_rate": 9.713732696238901e-06, | |
| "loss": 0.5999, | |
| "step": 903 | |
| }, | |
| { | |
| "epoch": 0.13518767758337072, | |
| "grad_norm": 1.9024340551353864, | |
| "learning_rate": 9.7129244399915e-06, | |
| "loss": 0.3618, | |
| "step": 904 | |
| }, | |
| { | |
| "epoch": 0.13533722147450278, | |
| "grad_norm": 2.069806998096416, | |
| "learning_rate": 9.712115078046606e-06, | |
| "loss": 0.2392, | |
| "step": 905 | |
| }, | |
| { | |
| "epoch": 0.1354867653656348, | |
| "grad_norm": 1.9190350188819136, | |
| "learning_rate": 9.711304610594104e-06, | |
| "loss": 0.4096, | |
| "step": 906 | |
| }, | |
| { | |
| "epoch": 0.13563630925676687, | |
| "grad_norm": 1.0348199460008871, | |
| "learning_rate": 9.710493037824133e-06, | |
| "loss": 0.2071, | |
| "step": 907 | |
| }, | |
| { | |
| "epoch": 0.1357858531478989, | |
| "grad_norm": 2.014537240536291, | |
| "learning_rate": 9.709680359927101e-06, | |
| "loss": 0.4374, | |
| "step": 908 | |
| }, | |
| { | |
| "epoch": 0.13593539703903096, | |
| "grad_norm": 1.7241079783150106, | |
| "learning_rate": 9.708866577093665e-06, | |
| "loss": 0.4161, | |
| "step": 909 | |
| }, | |
| { | |
| "epoch": 0.136084940930163, | |
| "grad_norm": 1.3280448342419884, | |
| "learning_rate": 9.70805168951475e-06, | |
| "loss": 0.1967, | |
| "step": 910 | |
| }, | |
| { | |
| "epoch": 0.13623448482129505, | |
| "grad_norm": 1.6364992809413539, | |
| "learning_rate": 9.707235697381536e-06, | |
| "loss": 0.3394, | |
| "step": 911 | |
| }, | |
| { | |
| "epoch": 0.1363840287124271, | |
| "grad_norm": 1.1211253304635729, | |
| "learning_rate": 9.706418600885462e-06, | |
| "loss": 0.3542, | |
| "step": 912 | |
| }, | |
| { | |
| "epoch": 0.13653357260355914, | |
| "grad_norm": 4.417441150249539, | |
| "learning_rate": 9.705600400218227e-06, | |
| "loss": 0.2605, | |
| "step": 913 | |
| }, | |
| { | |
| "epoch": 0.1366831164946912, | |
| "grad_norm": 1.6849430545358892, | |
| "learning_rate": 9.704781095571788e-06, | |
| "loss": 0.3434, | |
| "step": 914 | |
| }, | |
| { | |
| "epoch": 0.13683266038582323, | |
| "grad_norm": 1.0419590452262997, | |
| "learning_rate": 9.703960687138363e-06, | |
| "loss": 0.1759, | |
| "step": 915 | |
| }, | |
| { | |
| "epoch": 0.1369822042769553, | |
| "grad_norm": 2.3605687929632286, | |
| "learning_rate": 9.703139175110425e-06, | |
| "loss": 0.6175, | |
| "step": 916 | |
| }, | |
| { | |
| "epoch": 0.13713174816808735, | |
| "grad_norm": 1.6017722806543409, | |
| "learning_rate": 9.702316559680714e-06, | |
| "loss": 0.2687, | |
| "step": 917 | |
| }, | |
| { | |
| "epoch": 0.13728129205921938, | |
| "grad_norm": 1.3442020598442603, | |
| "learning_rate": 9.701492841042217e-06, | |
| "loss": 0.3801, | |
| "step": 918 | |
| }, | |
| { | |
| "epoch": 0.13743083595035144, | |
| "grad_norm": 1.6758219573938795, | |
| "learning_rate": 9.70066801938819e-06, | |
| "loss": 0.3869, | |
| "step": 919 | |
| }, | |
| { | |
| "epoch": 0.13758037984148347, | |
| "grad_norm": 1.787015495544375, | |
| "learning_rate": 9.699842094912146e-06, | |
| "loss": 0.319, | |
| "step": 920 | |
| }, | |
| { | |
| "epoch": 0.13772992373261553, | |
| "grad_norm": 1.8015526879631494, | |
| "learning_rate": 9.699015067807851e-06, | |
| "loss": 0.493, | |
| "step": 921 | |
| }, | |
| { | |
| "epoch": 0.13787946762374756, | |
| "grad_norm": 0.9887387021286004, | |
| "learning_rate": 9.698186938269334e-06, | |
| "loss": 0.1724, | |
| "step": 922 | |
| }, | |
| { | |
| "epoch": 0.13802901151487962, | |
| "grad_norm": 1.7253102296559673, | |
| "learning_rate": 9.697357706490885e-06, | |
| "loss": 0.5363, | |
| "step": 923 | |
| }, | |
| { | |
| "epoch": 0.13817855540601168, | |
| "grad_norm": 1.5558864293295054, | |
| "learning_rate": 9.696527372667046e-06, | |
| "loss": 0.2863, | |
| "step": 924 | |
| }, | |
| { | |
| "epoch": 0.1383280992971437, | |
| "grad_norm": 1.8210322672031793, | |
| "learning_rate": 9.695695936992624e-06, | |
| "loss": 0.4107, | |
| "step": 925 | |
| }, | |
| { | |
| "epoch": 0.13847764318827577, | |
| "grad_norm": 1.6117992353983686, | |
| "learning_rate": 9.69486339966268e-06, | |
| "loss": 0.2162, | |
| "step": 926 | |
| }, | |
| { | |
| "epoch": 0.1386271870794078, | |
| "grad_norm": 1.7018476473220923, | |
| "learning_rate": 9.694029760872539e-06, | |
| "loss": 0.3609, | |
| "step": 927 | |
| }, | |
| { | |
| "epoch": 0.13877673097053986, | |
| "grad_norm": 1.5673565467226127, | |
| "learning_rate": 9.693195020817776e-06, | |
| "loss": 0.3164, | |
| "step": 928 | |
| }, | |
| { | |
| "epoch": 0.1389262748616719, | |
| "grad_norm": 1.6536061805316273, | |
| "learning_rate": 9.69235917969423e-06, | |
| "loss": 0.5039, | |
| "step": 929 | |
| }, | |
| { | |
| "epoch": 0.13907581875280395, | |
| "grad_norm": 1.4953772716061529, | |
| "learning_rate": 9.691522237698001e-06, | |
| "loss": 0.2073, | |
| "step": 930 | |
| }, | |
| { | |
| "epoch": 0.139225362643936, | |
| "grad_norm": 1.0372555974478648, | |
| "learning_rate": 9.69068419502544e-06, | |
| "loss": 0.1904, | |
| "step": 931 | |
| }, | |
| { | |
| "epoch": 0.13937490653506804, | |
| "grad_norm": 1.2803091164977878, | |
| "learning_rate": 9.689845051873161e-06, | |
| "loss": 0.2085, | |
| "step": 932 | |
| }, | |
| { | |
| "epoch": 0.1395244504262001, | |
| "grad_norm": 1.4758036204854348, | |
| "learning_rate": 9.689004808438036e-06, | |
| "loss": 0.2012, | |
| "step": 933 | |
| }, | |
| { | |
| "epoch": 0.13967399431733213, | |
| "grad_norm": 1.6660973952553224, | |
| "learning_rate": 9.688163464917191e-06, | |
| "loss": 0.3286, | |
| "step": 934 | |
| }, | |
| { | |
| "epoch": 0.1398235382084642, | |
| "grad_norm": 1.1549059427655604, | |
| "learning_rate": 9.687321021508018e-06, | |
| "loss": 0.2267, | |
| "step": 935 | |
| }, | |
| { | |
| "epoch": 0.13997308209959622, | |
| "grad_norm": 1.272574916603474, | |
| "learning_rate": 9.686477478408159e-06, | |
| "loss": 0.1829, | |
| "step": 936 | |
| }, | |
| { | |
| "epoch": 0.14012262599072828, | |
| "grad_norm": 2.754918857840336, | |
| "learning_rate": 9.685632835815519e-06, | |
| "loss": 0.4481, | |
| "step": 937 | |
| }, | |
| { | |
| "epoch": 0.14027216988186034, | |
| "grad_norm": 1.1790985103907, | |
| "learning_rate": 9.684787093928256e-06, | |
| "loss": 0.1814, | |
| "step": 938 | |
| }, | |
| { | |
| "epoch": 0.14042171377299237, | |
| "grad_norm": 1.011660485817637, | |
| "learning_rate": 9.683940252944794e-06, | |
| "loss": 0.1863, | |
| "step": 939 | |
| }, | |
| { | |
| "epoch": 0.14057125766412443, | |
| "grad_norm": 1.3525074345715755, | |
| "learning_rate": 9.68309231306381e-06, | |
| "loss": 0.2084, | |
| "step": 940 | |
| }, | |
| { | |
| "epoch": 0.14072080155525646, | |
| "grad_norm": 1.6719478297190948, | |
| "learning_rate": 9.682243274484231e-06, | |
| "loss": 0.3459, | |
| "step": 941 | |
| }, | |
| { | |
| "epoch": 0.14087034544638852, | |
| "grad_norm": 1.5225980842484328, | |
| "learning_rate": 9.681393137405259e-06, | |
| "loss": 0.3082, | |
| "step": 942 | |
| }, | |
| { | |
| "epoch": 0.14101988933752055, | |
| "grad_norm": 1.4403779528104341, | |
| "learning_rate": 9.680541902026342e-06, | |
| "loss": 0.1952, | |
| "step": 943 | |
| }, | |
| { | |
| "epoch": 0.1411694332286526, | |
| "grad_norm": 1.7704358094140293, | |
| "learning_rate": 9.679689568547184e-06, | |
| "loss": 0.2925, | |
| "step": 944 | |
| }, | |
| { | |
| "epoch": 0.14131897711978467, | |
| "grad_norm": 1.8325825494125016, | |
| "learning_rate": 9.678836137167753e-06, | |
| "loss": 0.2354, | |
| "step": 945 | |
| }, | |
| { | |
| "epoch": 0.1414685210109167, | |
| "grad_norm": 8.228207444568621, | |
| "learning_rate": 9.677981608088274e-06, | |
| "loss": 0.1945, | |
| "step": 946 | |
| }, | |
| { | |
| "epoch": 0.14161806490204876, | |
| "grad_norm": 1.9420821742118657, | |
| "learning_rate": 9.677125981509227e-06, | |
| "loss": 0.3745, | |
| "step": 947 | |
| }, | |
| { | |
| "epoch": 0.1417676087931808, | |
| "grad_norm": 1.4287526091354055, | |
| "learning_rate": 9.676269257631348e-06, | |
| "loss": 0.159, | |
| "step": 948 | |
| }, | |
| { | |
| "epoch": 0.14191715268431285, | |
| "grad_norm": 2.2979804382628726, | |
| "learning_rate": 9.675411436655636e-06, | |
| "loss": 0.6715, | |
| "step": 949 | |
| }, | |
| { | |
| "epoch": 0.1420666965754449, | |
| "grad_norm": 1.2906292785961546, | |
| "learning_rate": 9.67455251878334e-06, | |
| "loss": 0.1863, | |
| "step": 950 | |
| }, | |
| { | |
| "epoch": 0.14221624046657694, | |
| "grad_norm": 1.542985394545003, | |
| "learning_rate": 9.673692504215974e-06, | |
| "loss": 0.276, | |
| "step": 951 | |
| }, | |
| { | |
| "epoch": 0.142365784357709, | |
| "grad_norm": 2.076155049712511, | |
| "learning_rate": 9.672831393155304e-06, | |
| "loss": 0.2878, | |
| "step": 952 | |
| }, | |
| { | |
| "epoch": 0.14251532824884103, | |
| "grad_norm": 3.2389836303118265, | |
| "learning_rate": 9.671969185803357e-06, | |
| "loss": 0.4539, | |
| "step": 953 | |
| }, | |
| { | |
| "epoch": 0.1426648721399731, | |
| "grad_norm": 0.8145950923200616, | |
| "learning_rate": 9.671105882362412e-06, | |
| "loss": 0.1916, | |
| "step": 954 | |
| }, | |
| { | |
| "epoch": 0.14281441603110512, | |
| "grad_norm": 1.7529614161433102, | |
| "learning_rate": 9.67024148303501e-06, | |
| "loss": 0.3852, | |
| "step": 955 | |
| }, | |
| { | |
| "epoch": 0.14296395992223718, | |
| "grad_norm": 1.6140653815156045, | |
| "learning_rate": 9.669375988023947e-06, | |
| "loss": 0.3317, | |
| "step": 956 | |
| }, | |
| { | |
| "epoch": 0.14311350381336924, | |
| "grad_norm": 1.5917328640289674, | |
| "learning_rate": 9.668509397532278e-06, | |
| "loss": 0.205, | |
| "step": 957 | |
| }, | |
| { | |
| "epoch": 0.14326304770450127, | |
| "grad_norm": 1.9053910041720175, | |
| "learning_rate": 9.667641711763311e-06, | |
| "loss": 0.2016, | |
| "step": 958 | |
| }, | |
| { | |
| "epoch": 0.14341259159563333, | |
| "grad_norm": 1.2223818916012819, | |
| "learning_rate": 9.666772930920614e-06, | |
| "loss": 0.1818, | |
| "step": 959 | |
| }, | |
| { | |
| "epoch": 0.14356213548676536, | |
| "grad_norm": 1.4130639929342779, | |
| "learning_rate": 9.665903055208013e-06, | |
| "loss": 0.1776, | |
| "step": 960 | |
| }, | |
| { | |
| "epoch": 0.14371167937789742, | |
| "grad_norm": 2.515402250912616, | |
| "learning_rate": 9.665032084829588e-06, | |
| "loss": 0.7429, | |
| "step": 961 | |
| }, | |
| { | |
| "epoch": 0.14386122326902945, | |
| "grad_norm": 2.118211041321287, | |
| "learning_rate": 9.66416001998968e-06, | |
| "loss": 0.5489, | |
| "step": 962 | |
| }, | |
| { | |
| "epoch": 0.1440107671601615, | |
| "grad_norm": 1.6323921378905693, | |
| "learning_rate": 9.663286860892877e-06, | |
| "loss": 0.3446, | |
| "step": 963 | |
| }, | |
| { | |
| "epoch": 0.14416031105129357, | |
| "grad_norm": 2.4057165427715335, | |
| "learning_rate": 9.662412607744036e-06, | |
| "loss": 0.2152, | |
| "step": 964 | |
| }, | |
| { | |
| "epoch": 0.1443098549424256, | |
| "grad_norm": 1.5942384074329368, | |
| "learning_rate": 9.661537260748264e-06, | |
| "loss": 0.3746, | |
| "step": 965 | |
| }, | |
| { | |
| "epoch": 0.14445939883355766, | |
| "grad_norm": 1.3815463338875527, | |
| "learning_rate": 9.660660820110926e-06, | |
| "loss": 0.2255, | |
| "step": 966 | |
| }, | |
| { | |
| "epoch": 0.1446089427246897, | |
| "grad_norm": 2.071453535788066, | |
| "learning_rate": 9.659783286037643e-06, | |
| "loss": 0.5075, | |
| "step": 967 | |
| }, | |
| { | |
| "epoch": 0.14475848661582175, | |
| "grad_norm": 1.3265840791490535, | |
| "learning_rate": 9.658904658734293e-06, | |
| "loss": 0.2295, | |
| "step": 968 | |
| }, | |
| { | |
| "epoch": 0.14490803050695378, | |
| "grad_norm": 1.5352887928749521, | |
| "learning_rate": 9.658024938407011e-06, | |
| "loss": 0.3484, | |
| "step": 969 | |
| }, | |
| { | |
| "epoch": 0.14505757439808584, | |
| "grad_norm": 1.5831033158953907, | |
| "learning_rate": 9.657144125262186e-06, | |
| "loss": 0.4039, | |
| "step": 970 | |
| }, | |
| { | |
| "epoch": 0.1452071182892179, | |
| "grad_norm": 1.6289190913913172, | |
| "learning_rate": 9.65626221950647e-06, | |
| "loss": 0.2471, | |
| "step": 971 | |
| }, | |
| { | |
| "epoch": 0.14535666218034993, | |
| "grad_norm": 1.6042830876012686, | |
| "learning_rate": 9.655379221346758e-06, | |
| "loss": 0.4886, | |
| "step": 972 | |
| }, | |
| { | |
| "epoch": 0.14550620607148199, | |
| "grad_norm": 1.2790362471165744, | |
| "learning_rate": 9.654495130990218e-06, | |
| "loss": 0.2065, | |
| "step": 973 | |
| }, | |
| { | |
| "epoch": 0.14565574996261402, | |
| "grad_norm": 1.72777953773445, | |
| "learning_rate": 9.653609948644263e-06, | |
| "loss": 0.4006, | |
| "step": 974 | |
| }, | |
| { | |
| "epoch": 0.14580529385374608, | |
| "grad_norm": 1.7629618868465593, | |
| "learning_rate": 9.652723674516566e-06, | |
| "loss": 0.2142, | |
| "step": 975 | |
| }, | |
| { | |
| "epoch": 0.14595483774487814, | |
| "grad_norm": 2.0277477662995174, | |
| "learning_rate": 9.651836308815055e-06, | |
| "loss": 0.5248, | |
| "step": 976 | |
| }, | |
| { | |
| "epoch": 0.14610438163601017, | |
| "grad_norm": 1.972708159415252, | |
| "learning_rate": 9.650947851747913e-06, | |
| "loss": 0.6236, | |
| "step": 977 | |
| }, | |
| { | |
| "epoch": 0.14625392552714223, | |
| "grad_norm": 1.760683770673202, | |
| "learning_rate": 9.650058303523583e-06, | |
| "loss": 0.3418, | |
| "step": 978 | |
| }, | |
| { | |
| "epoch": 0.14640346941827426, | |
| "grad_norm": 1.8429433949445744, | |
| "learning_rate": 9.649167664350762e-06, | |
| "loss": 0.475, | |
| "step": 979 | |
| }, | |
| { | |
| "epoch": 0.14655301330940632, | |
| "grad_norm": 1.9248727992532182, | |
| "learning_rate": 9.6482759344384e-06, | |
| "loss": 0.3368, | |
| "step": 980 | |
| }, | |
| { | |
| "epoch": 0.14670255720053835, | |
| "grad_norm": 1.9573379093599792, | |
| "learning_rate": 9.647383113995707e-06, | |
| "loss": 0.4203, | |
| "step": 981 | |
| }, | |
| { | |
| "epoch": 0.1468521010916704, | |
| "grad_norm": 2.7789135237612803, | |
| "learning_rate": 9.646489203232145e-06, | |
| "loss": 0.4581, | |
| "step": 982 | |
| }, | |
| { | |
| "epoch": 0.14700164498280247, | |
| "grad_norm": 2.5119171064254724, | |
| "learning_rate": 9.645594202357438e-06, | |
| "loss": 0.8202, | |
| "step": 983 | |
| }, | |
| { | |
| "epoch": 0.1471511888739345, | |
| "grad_norm": 1.4269567118604072, | |
| "learning_rate": 9.644698111581562e-06, | |
| "loss": 0.1954, | |
| "step": 984 | |
| }, | |
| { | |
| "epoch": 0.14730073276506656, | |
| "grad_norm": 1.5211432396508893, | |
| "learning_rate": 9.643800931114742e-06, | |
| "loss": 0.207, | |
| "step": 985 | |
| }, | |
| { | |
| "epoch": 0.1474502766561986, | |
| "grad_norm": 1.607901993958729, | |
| "learning_rate": 9.642902661167472e-06, | |
| "loss": 0.3046, | |
| "step": 986 | |
| }, | |
| { | |
| "epoch": 0.14759982054733065, | |
| "grad_norm": 2.0792720647718776, | |
| "learning_rate": 9.642003301950491e-06, | |
| "loss": 0.4314, | |
| "step": 987 | |
| }, | |
| { | |
| "epoch": 0.14774936443846268, | |
| "grad_norm": 1.6967562403841654, | |
| "learning_rate": 9.641102853674799e-06, | |
| "loss": 0.2142, | |
| "step": 988 | |
| }, | |
| { | |
| "epoch": 0.14789890832959474, | |
| "grad_norm": 1.8666018241841429, | |
| "learning_rate": 9.640201316551651e-06, | |
| "loss": 0.4817, | |
| "step": 989 | |
| }, | |
| { | |
| "epoch": 0.1480484522207268, | |
| "grad_norm": 1.6231253521213436, | |
| "learning_rate": 9.639298690792554e-06, | |
| "loss": 0.304, | |
| "step": 990 | |
| }, | |
| { | |
| "epoch": 0.14819799611185883, | |
| "grad_norm": 0.9588780548142521, | |
| "learning_rate": 9.638394976609274e-06, | |
| "loss": 0.1709, | |
| "step": 991 | |
| }, | |
| { | |
| "epoch": 0.14834754000299089, | |
| "grad_norm": 1.7153802262812925, | |
| "learning_rate": 9.637490174213828e-06, | |
| "loss": 0.1959, | |
| "step": 992 | |
| }, | |
| { | |
| "epoch": 0.14849708389412292, | |
| "grad_norm": 1.8524843327507126, | |
| "learning_rate": 9.636584283818496e-06, | |
| "loss": 0.3957, | |
| "step": 993 | |
| }, | |
| { | |
| "epoch": 0.14864662778525498, | |
| "grad_norm": 1.8045411979525945, | |
| "learning_rate": 9.635677305635807e-06, | |
| "loss": 0.2565, | |
| "step": 994 | |
| }, | |
| { | |
| "epoch": 0.148796171676387, | |
| "grad_norm": 2.1506980932575175, | |
| "learning_rate": 9.634769239878545e-06, | |
| "loss": 0.3777, | |
| "step": 995 | |
| }, | |
| { | |
| "epoch": 0.14894571556751907, | |
| "grad_norm": 2.1465696781275563, | |
| "learning_rate": 9.633860086759753e-06, | |
| "loss": 0.6056, | |
| "step": 996 | |
| }, | |
| { | |
| "epoch": 0.14909525945865112, | |
| "grad_norm": 1.3341555017873934, | |
| "learning_rate": 9.632949846492728e-06, | |
| "loss": 0.2219, | |
| "step": 997 | |
| }, | |
| { | |
| "epoch": 0.14924480334978316, | |
| "grad_norm": 1.2276798431588063, | |
| "learning_rate": 9.632038519291017e-06, | |
| "loss": 0.2074, | |
| "step": 998 | |
| }, | |
| { | |
| "epoch": 0.14939434724091522, | |
| "grad_norm": 1.53279693471178, | |
| "learning_rate": 9.63112610536843e-06, | |
| "loss": 0.4373, | |
| "step": 999 | |
| }, | |
| { | |
| "epoch": 0.14954389113204725, | |
| "grad_norm": 1.770529951492653, | |
| "learning_rate": 9.630212604939026e-06, | |
| "loss": 0.2495, | |
| "step": 1000 | |
| }, | |
| { | |
| "epoch": 0.1496934350231793, | |
| "grad_norm": 1.6948398115693124, | |
| "learning_rate": 9.629298018217123e-06, | |
| "loss": 0.467, | |
| "step": 1001 | |
| }, | |
| { | |
| "epoch": 0.14984297891431134, | |
| "grad_norm": 2.0292952301635534, | |
| "learning_rate": 9.628382345417291e-06, | |
| "loss": 0.5509, | |
| "step": 1002 | |
| }, | |
| { | |
| "epoch": 0.1499925228054434, | |
| "grad_norm": 2.4704877435341186, | |
| "learning_rate": 9.627465586754354e-06, | |
| "loss": 0.2214, | |
| "step": 1003 | |
| }, | |
| { | |
| "epoch": 0.15014206669657545, | |
| "grad_norm": 1.6257900509098846, | |
| "learning_rate": 9.626547742443394e-06, | |
| "loss": 0.3365, | |
| "step": 1004 | |
| }, | |
| { | |
| "epoch": 0.15029161058770749, | |
| "grad_norm": 1.8331565532381362, | |
| "learning_rate": 9.625628812699747e-06, | |
| "loss": 0.4435, | |
| "step": 1005 | |
| }, | |
| { | |
| "epoch": 0.15044115447883954, | |
| "grad_norm": 2.977053464112802, | |
| "learning_rate": 9.624708797739002e-06, | |
| "loss": 0.3177, | |
| "step": 1006 | |
| }, | |
| { | |
| "epoch": 0.15059069836997158, | |
| "grad_norm": 1.756417225108672, | |
| "learning_rate": 9.623787697777001e-06, | |
| "loss": 0.3878, | |
| "step": 1007 | |
| }, | |
| { | |
| "epoch": 0.15074024226110364, | |
| "grad_norm": 1.6276234348428167, | |
| "learning_rate": 9.622865513029846e-06, | |
| "loss": 0.1901, | |
| "step": 1008 | |
| }, | |
| { | |
| "epoch": 0.1508897861522357, | |
| "grad_norm": 3.3786401544653253, | |
| "learning_rate": 9.62194224371389e-06, | |
| "loss": 0.2454, | |
| "step": 1009 | |
| }, | |
| { | |
| "epoch": 0.15103933004336773, | |
| "grad_norm": 2.0516682586662456, | |
| "learning_rate": 9.621017890045739e-06, | |
| "loss": 0.7163, | |
| "step": 1010 | |
| }, | |
| { | |
| "epoch": 0.15118887393449978, | |
| "grad_norm": 1.0885572394332461, | |
| "learning_rate": 9.620092452242257e-06, | |
| "loss": 0.2232, | |
| "step": 1011 | |
| }, | |
| { | |
| "epoch": 0.15133841782563182, | |
| "grad_norm": 1.287954715641653, | |
| "learning_rate": 9.61916593052056e-06, | |
| "loss": 0.2124, | |
| "step": 1012 | |
| }, | |
| { | |
| "epoch": 0.15148796171676387, | |
| "grad_norm": 2.089649291216167, | |
| "learning_rate": 9.618238325098021e-06, | |
| "loss": 0.5129, | |
| "step": 1013 | |
| }, | |
| { | |
| "epoch": 0.1516375056078959, | |
| "grad_norm": 1.512994828933595, | |
| "learning_rate": 9.617309636192262e-06, | |
| "loss": 0.2986, | |
| "step": 1014 | |
| }, | |
| { | |
| "epoch": 0.15178704949902797, | |
| "grad_norm": 1.6901350993268085, | |
| "learning_rate": 9.616379864021163e-06, | |
| "loss": 0.1893, | |
| "step": 1015 | |
| }, | |
| { | |
| "epoch": 0.15193659339016002, | |
| "grad_norm": 1.6240769043159409, | |
| "learning_rate": 9.615449008802858e-06, | |
| "loss": 0.1984, | |
| "step": 1016 | |
| }, | |
| { | |
| "epoch": 0.15208613728129206, | |
| "grad_norm": 2.2110380817085815, | |
| "learning_rate": 9.614517070755736e-06, | |
| "loss": 0.4573, | |
| "step": 1017 | |
| }, | |
| { | |
| "epoch": 0.15223568117242411, | |
| "grad_norm": 2.3590384489615452, | |
| "learning_rate": 9.613584050098436e-06, | |
| "loss": 0.536, | |
| "step": 1018 | |
| }, | |
| { | |
| "epoch": 0.15238522506355615, | |
| "grad_norm": 1.1021064926756596, | |
| "learning_rate": 9.612649947049856e-06, | |
| "loss": 0.224, | |
| "step": 1019 | |
| }, | |
| { | |
| "epoch": 0.1525347689546882, | |
| "grad_norm": 1.73759279762283, | |
| "learning_rate": 9.611714761829146e-06, | |
| "loss": 0.2581, | |
| "step": 1020 | |
| }, | |
| { | |
| "epoch": 0.15268431284582024, | |
| "grad_norm": 1.9739121848543864, | |
| "learning_rate": 9.610778494655706e-06, | |
| "loss": 0.2116, | |
| "step": 1021 | |
| }, | |
| { | |
| "epoch": 0.1528338567369523, | |
| "grad_norm": 1.7608676639305771, | |
| "learning_rate": 9.609841145749196e-06, | |
| "loss": 0.2343, | |
| "step": 1022 | |
| }, | |
| { | |
| "epoch": 0.15298340062808435, | |
| "grad_norm": 1.8973842729049986, | |
| "learning_rate": 9.608902715329527e-06, | |
| "loss": 0.416, | |
| "step": 1023 | |
| }, | |
| { | |
| "epoch": 0.15313294451921639, | |
| "grad_norm": 1.232118282013805, | |
| "learning_rate": 9.607963203616862e-06, | |
| "loss": 0.2428, | |
| "step": 1024 | |
| }, | |
| { | |
| "epoch": 0.15328248841034844, | |
| "grad_norm": 1.65077206870404, | |
| "learning_rate": 9.607022610831623e-06, | |
| "loss": 0.395, | |
| "step": 1025 | |
| }, | |
| { | |
| "epoch": 0.15343203230148048, | |
| "grad_norm": 1.8519962462671438, | |
| "learning_rate": 9.606080937194478e-06, | |
| "loss": 0.3225, | |
| "step": 1026 | |
| }, | |
| { | |
| "epoch": 0.15358157619261253, | |
| "grad_norm": 1.5406605876000279, | |
| "learning_rate": 9.605138182926355e-06, | |
| "loss": 0.1962, | |
| "step": 1027 | |
| }, | |
| { | |
| "epoch": 0.15373112008374457, | |
| "grad_norm": 1.68724090741763, | |
| "learning_rate": 9.604194348248432e-06, | |
| "loss": 0.3412, | |
| "step": 1028 | |
| }, | |
| { | |
| "epoch": 0.15388066397487662, | |
| "grad_norm": 1.3196225832605089, | |
| "learning_rate": 9.603249433382145e-06, | |
| "loss": 0.203, | |
| "step": 1029 | |
| }, | |
| { | |
| "epoch": 0.15403020786600868, | |
| "grad_norm": 1.4552647720547889, | |
| "learning_rate": 9.602303438549177e-06, | |
| "loss": 0.2683, | |
| "step": 1030 | |
| }, | |
| { | |
| "epoch": 0.15417975175714072, | |
| "grad_norm": 1.9356496958747662, | |
| "learning_rate": 9.601356363971467e-06, | |
| "loss": 0.4085, | |
| "step": 1031 | |
| }, | |
| { | |
| "epoch": 0.15432929564827277, | |
| "grad_norm": 1.5727583829762162, | |
| "learning_rate": 9.60040820987121e-06, | |
| "loss": 0.2702, | |
| "step": 1032 | |
| }, | |
| { | |
| "epoch": 0.1544788395394048, | |
| "grad_norm": 1.9130911912208222, | |
| "learning_rate": 9.59945897647085e-06, | |
| "loss": 0.3831, | |
| "step": 1033 | |
| }, | |
| { | |
| "epoch": 0.15462838343053686, | |
| "grad_norm": 1.8240341076741502, | |
| "learning_rate": 9.59850866399309e-06, | |
| "loss": 0.4938, | |
| "step": 1034 | |
| }, | |
| { | |
| "epoch": 0.15477792732166892, | |
| "grad_norm": 3.1510439843833953, | |
| "learning_rate": 9.597557272660878e-06, | |
| "loss": 0.4082, | |
| "step": 1035 | |
| }, | |
| { | |
| "epoch": 0.15492747121280095, | |
| "grad_norm": 2.7952488908518998, | |
| "learning_rate": 9.596604802697422e-06, | |
| "loss": 1.0219, | |
| "step": 1036 | |
| }, | |
| { | |
| "epoch": 0.155077015103933, | |
| "grad_norm": 1.5968867223126475, | |
| "learning_rate": 9.595651254326179e-06, | |
| "loss": 0.3575, | |
| "step": 1037 | |
| }, | |
| { | |
| "epoch": 0.15522655899506504, | |
| "grad_norm": 1.7270374421609462, | |
| "learning_rate": 9.594696627770863e-06, | |
| "loss": 0.4184, | |
| "step": 1038 | |
| }, | |
| { | |
| "epoch": 0.1553761028861971, | |
| "grad_norm": 1.623026584906972, | |
| "learning_rate": 9.593740923255437e-06, | |
| "loss": 0.3364, | |
| "step": 1039 | |
| }, | |
| { | |
| "epoch": 0.15552564677732914, | |
| "grad_norm": 1.724702934068898, | |
| "learning_rate": 9.592784141004118e-06, | |
| "loss": 0.5197, | |
| "step": 1040 | |
| }, | |
| { | |
| "epoch": 0.1556751906684612, | |
| "grad_norm": 1.2087417414530044, | |
| "learning_rate": 9.591826281241379e-06, | |
| "loss": 0.3232, | |
| "step": 1041 | |
| }, | |
| { | |
| "epoch": 0.15582473455959325, | |
| "grad_norm": 2.0110371073704716, | |
| "learning_rate": 9.590867344191941e-06, | |
| "loss": 0.4617, | |
| "step": 1042 | |
| }, | |
| { | |
| "epoch": 0.15597427845072528, | |
| "grad_norm": 2.300355596627081, | |
| "learning_rate": 9.58990733008078e-06, | |
| "loss": 0.3483, | |
| "step": 1043 | |
| }, | |
| { | |
| "epoch": 0.15612382234185734, | |
| "grad_norm": 1.9228199791045963, | |
| "learning_rate": 9.588946239133123e-06, | |
| "loss": 0.4723, | |
| "step": 1044 | |
| }, | |
| { | |
| "epoch": 0.15627336623298937, | |
| "grad_norm": 1.3725700472934328, | |
| "learning_rate": 9.587984071574455e-06, | |
| "loss": 0.212, | |
| "step": 1045 | |
| }, | |
| { | |
| "epoch": 0.15642291012412143, | |
| "grad_norm": 1.9304969649682522, | |
| "learning_rate": 9.587020827630507e-06, | |
| "loss": 0.2317, | |
| "step": 1046 | |
| }, | |
| { | |
| "epoch": 0.15657245401525346, | |
| "grad_norm": 1.6204729661048964, | |
| "learning_rate": 9.586056507527266e-06, | |
| "loss": 0.2135, | |
| "step": 1047 | |
| }, | |
| { | |
| "epoch": 0.15672199790638552, | |
| "grad_norm": 1.2120441425361188, | |
| "learning_rate": 9.58509111149097e-06, | |
| "loss": 0.2785, | |
| "step": 1048 | |
| }, | |
| { | |
| "epoch": 0.15687154179751758, | |
| "grad_norm": 2.1856010368938135, | |
| "learning_rate": 9.584124639748114e-06, | |
| "loss": 0.4117, | |
| "step": 1049 | |
| }, | |
| { | |
| "epoch": 0.15702108568864961, | |
| "grad_norm": 1.4004860796245104, | |
| "learning_rate": 9.583157092525435e-06, | |
| "loss": 0.375, | |
| "step": 1050 | |
| }, | |
| { | |
| "epoch": 0.15717062957978167, | |
| "grad_norm": 1.1728395243844014, | |
| "learning_rate": 9.582188470049935e-06, | |
| "loss": 0.2286, | |
| "step": 1051 | |
| }, | |
| { | |
| "epoch": 0.1573201734709137, | |
| "grad_norm": 1.3417244382182179, | |
| "learning_rate": 9.58121877254886e-06, | |
| "loss": 0.2105, | |
| "step": 1052 | |
| }, | |
| { | |
| "epoch": 0.15746971736204576, | |
| "grad_norm": 1.3092141915312672, | |
| "learning_rate": 9.580248000249709e-06, | |
| "loss": 0.253, | |
| "step": 1053 | |
| }, | |
| { | |
| "epoch": 0.1576192612531778, | |
| "grad_norm": 0.8768020427524035, | |
| "learning_rate": 9.579276153380234e-06, | |
| "loss": 0.199, | |
| "step": 1054 | |
| }, | |
| { | |
| "epoch": 0.15776880514430985, | |
| "grad_norm": 1.8273743204900599, | |
| "learning_rate": 9.578303232168442e-06, | |
| "loss": 0.5377, | |
| "step": 1055 | |
| }, | |
| { | |
| "epoch": 0.1579183490354419, | |
| "grad_norm": 1.4617178204858172, | |
| "learning_rate": 9.57732923684259e-06, | |
| "loss": 0.2318, | |
| "step": 1056 | |
| }, | |
| { | |
| "epoch": 0.15806789292657394, | |
| "grad_norm": 1.7897112328181541, | |
| "learning_rate": 9.576354167631186e-06, | |
| "loss": 0.2312, | |
| "step": 1057 | |
| }, | |
| { | |
| "epoch": 0.158217436817706, | |
| "grad_norm": 2.055748147218842, | |
| "learning_rate": 9.575378024762991e-06, | |
| "loss": 0.208, | |
| "step": 1058 | |
| }, | |
| { | |
| "epoch": 0.15836698070883803, | |
| "grad_norm": 1.9825123132080376, | |
| "learning_rate": 9.574400808467015e-06, | |
| "loss": 0.4415, | |
| "step": 1059 | |
| }, | |
| { | |
| "epoch": 0.1585165245999701, | |
| "grad_norm": 1.203644734681587, | |
| "learning_rate": 9.573422518972524e-06, | |
| "loss": 0.1651, | |
| "step": 1060 | |
| }, | |
| { | |
| "epoch": 0.15866606849110212, | |
| "grad_norm": 2.143121544386558, | |
| "learning_rate": 9.572443156509035e-06, | |
| "loss": 0.2813, | |
| "step": 1061 | |
| }, | |
| { | |
| "epoch": 0.15881561238223418, | |
| "grad_norm": 1.7645898839865752, | |
| "learning_rate": 9.571462721306315e-06, | |
| "loss": 0.314, | |
| "step": 1062 | |
| }, | |
| { | |
| "epoch": 0.15896515627336624, | |
| "grad_norm": 1.6678516650591295, | |
| "learning_rate": 9.570481213594385e-06, | |
| "loss": 0.289, | |
| "step": 1063 | |
| }, | |
| { | |
| "epoch": 0.15911470016449827, | |
| "grad_norm": 1.2850170647115653, | |
| "learning_rate": 9.569498633603513e-06, | |
| "loss": 0.2024, | |
| "step": 1064 | |
| }, | |
| { | |
| "epoch": 0.15926424405563033, | |
| "grad_norm": 1.814229267711803, | |
| "learning_rate": 9.568514981564226e-06, | |
| "loss": 0.4606, | |
| "step": 1065 | |
| }, | |
| { | |
| "epoch": 0.15941378794676236, | |
| "grad_norm": 1.243462382180521, | |
| "learning_rate": 9.567530257707294e-06, | |
| "loss": 0.204, | |
| "step": 1066 | |
| }, | |
| { | |
| "epoch": 0.15956333183789442, | |
| "grad_norm": 1.2909371933651599, | |
| "learning_rate": 9.566544462263744e-06, | |
| "loss": 0.2209, | |
| "step": 1067 | |
| }, | |
| { | |
| "epoch": 0.15971287572902648, | |
| "grad_norm": 1.6817484277248902, | |
| "learning_rate": 9.565557595464854e-06, | |
| "loss": 0.2266, | |
| "step": 1068 | |
| }, | |
| { | |
| "epoch": 0.1598624196201585, | |
| "grad_norm": 1.6783737367668008, | |
| "learning_rate": 9.564569657542153e-06, | |
| "loss": 0.3126, | |
| "step": 1069 | |
| }, | |
| { | |
| "epoch": 0.16001196351129057, | |
| "grad_norm": 1.7121174458116146, | |
| "learning_rate": 9.56358064872742e-06, | |
| "loss": 0.4938, | |
| "step": 1070 | |
| }, | |
| { | |
| "epoch": 0.1601615074024226, | |
| "grad_norm": 1.4168299418446093, | |
| "learning_rate": 9.562590569252685e-06, | |
| "loss": 0.1859, | |
| "step": 1071 | |
| }, | |
| { | |
| "epoch": 0.16031105129355466, | |
| "grad_norm": 1.1574720210896832, | |
| "learning_rate": 9.561599419350233e-06, | |
| "loss": 0.2076, | |
| "step": 1072 | |
| }, | |
| { | |
| "epoch": 0.1604605951846867, | |
| "grad_norm": 1.88010212201121, | |
| "learning_rate": 9.560607199252594e-06, | |
| "loss": 0.489, | |
| "step": 1073 | |
| }, | |
| { | |
| "epoch": 0.16061013907581875, | |
| "grad_norm": 1.7557755514977327, | |
| "learning_rate": 9.559613909192553e-06, | |
| "loss": 0.2593, | |
| "step": 1074 | |
| }, | |
| { | |
| "epoch": 0.1607596829669508, | |
| "grad_norm": 2.0354173981462864, | |
| "learning_rate": 9.558619549403148e-06, | |
| "loss": 0.266, | |
| "step": 1075 | |
| }, | |
| { | |
| "epoch": 0.16090922685808284, | |
| "grad_norm": 1.984771028183608, | |
| "learning_rate": 9.557624120117663e-06, | |
| "loss": 0.1823, | |
| "step": 1076 | |
| }, | |
| { | |
| "epoch": 0.1610587707492149, | |
| "grad_norm": 1.2435802855207878, | |
| "learning_rate": 9.556627621569636e-06, | |
| "loss": 0.2133, | |
| "step": 1077 | |
| }, | |
| { | |
| "epoch": 0.16120831464034693, | |
| "grad_norm": 2.1628484109014603, | |
| "learning_rate": 9.555630053992854e-06, | |
| "loss": 0.6313, | |
| "step": 1078 | |
| }, | |
| { | |
| "epoch": 0.161357858531479, | |
| "grad_norm": 1.3025632600056798, | |
| "learning_rate": 9.554631417621358e-06, | |
| "loss": 0.1861, | |
| "step": 1079 | |
| }, | |
| { | |
| "epoch": 0.16150740242261102, | |
| "grad_norm": 1.2033276727813813, | |
| "learning_rate": 9.553631712689437e-06, | |
| "loss": 0.1411, | |
| "step": 1080 | |
| }, | |
| { | |
| "epoch": 0.16165694631374308, | |
| "grad_norm": 1.7192459951334418, | |
| "learning_rate": 9.55263093943163e-06, | |
| "loss": 0.3415, | |
| "step": 1081 | |
| }, | |
| { | |
| "epoch": 0.16180649020487514, | |
| "grad_norm": 2.195446146090599, | |
| "learning_rate": 9.55162909808273e-06, | |
| "loss": 0.5552, | |
| "step": 1082 | |
| }, | |
| { | |
| "epoch": 0.16195603409600717, | |
| "grad_norm": 1.2518961650965623, | |
| "learning_rate": 9.550626188877779e-06, | |
| "loss": 0.1376, | |
| "step": 1083 | |
| }, | |
| { | |
| "epoch": 0.16210557798713923, | |
| "grad_norm": 1.621475542449237, | |
| "learning_rate": 9.549622212052067e-06, | |
| "loss": 0.1918, | |
| "step": 1084 | |
| }, | |
| { | |
| "epoch": 0.16225512187827126, | |
| "grad_norm": 1.6022576009951304, | |
| "learning_rate": 9.548617167841139e-06, | |
| "loss": 0.4754, | |
| "step": 1085 | |
| }, | |
| { | |
| "epoch": 0.16240466576940332, | |
| "grad_norm": 2.140796057625746, | |
| "learning_rate": 9.547611056480785e-06, | |
| "loss": 0.3365, | |
| "step": 1086 | |
| }, | |
| { | |
| "epoch": 0.16255420966053535, | |
| "grad_norm": 2.1352436057574415, | |
| "learning_rate": 9.54660387820705e-06, | |
| "loss": 0.2548, | |
| "step": 1087 | |
| }, | |
| { | |
| "epoch": 0.1627037535516674, | |
| "grad_norm": 1.533857631085752, | |
| "learning_rate": 9.54559563325623e-06, | |
| "loss": 0.2295, | |
| "step": 1088 | |
| }, | |
| { | |
| "epoch": 0.16285329744279947, | |
| "grad_norm": 1.3212471086223223, | |
| "learning_rate": 9.544586321864865e-06, | |
| "loss": 0.2047, | |
| "step": 1089 | |
| }, | |
| { | |
| "epoch": 0.1630028413339315, | |
| "grad_norm": 1.0794095044455165, | |
| "learning_rate": 9.543575944269752e-06, | |
| "loss": 0.2824, | |
| "step": 1090 | |
| }, | |
| { | |
| "epoch": 0.16315238522506356, | |
| "grad_norm": 1.7780256493338729, | |
| "learning_rate": 9.542564500707934e-06, | |
| "loss": 0.3763, | |
| "step": 1091 | |
| }, | |
| { | |
| "epoch": 0.1633019291161956, | |
| "grad_norm": 1.1347608812400427, | |
| "learning_rate": 9.541551991416704e-06, | |
| "loss": 0.2046, | |
| "step": 1092 | |
| }, | |
| { | |
| "epoch": 0.16345147300732765, | |
| "grad_norm": 1.7298153575861623, | |
| "learning_rate": 9.540538416633611e-06, | |
| "loss": 0.3394, | |
| "step": 1093 | |
| }, | |
| { | |
| "epoch": 0.1636010168984597, | |
| "grad_norm": 1.822850678248627, | |
| "learning_rate": 9.539523776596446e-06, | |
| "loss": 0.4254, | |
| "step": 1094 | |
| }, | |
| { | |
| "epoch": 0.16375056078959174, | |
| "grad_norm": 1.762563084427053, | |
| "learning_rate": 9.538508071543253e-06, | |
| "loss": 0.4083, | |
| "step": 1095 | |
| }, | |
| { | |
| "epoch": 0.1639001046807238, | |
| "grad_norm": 1.653109483519857, | |
| "learning_rate": 9.537491301712328e-06, | |
| "loss": 0.3344, | |
| "step": 1096 | |
| }, | |
| { | |
| "epoch": 0.16404964857185583, | |
| "grad_norm": 2.0381344440794713, | |
| "learning_rate": 9.536473467342213e-06, | |
| "loss": 0.6724, | |
| "step": 1097 | |
| }, | |
| { | |
| "epoch": 0.1641991924629879, | |
| "grad_norm": 1.9754898794658875, | |
| "learning_rate": 9.535454568671705e-06, | |
| "loss": 0.5666, | |
| "step": 1098 | |
| }, | |
| { | |
| "epoch": 0.16434873635411992, | |
| "grad_norm": 2.3447226731261344, | |
| "learning_rate": 9.534434605939845e-06, | |
| "loss": 0.3964, | |
| "step": 1099 | |
| }, | |
| { | |
| "epoch": 0.16449828024525198, | |
| "grad_norm": 1.773763244389934, | |
| "learning_rate": 9.533413579385925e-06, | |
| "loss": 0.3883, | |
| "step": 1100 | |
| }, | |
| { | |
| "epoch": 0.16464782413638404, | |
| "grad_norm": 1.8563640201434743, | |
| "learning_rate": 9.532391489249489e-06, | |
| "loss": 0.5295, | |
| "step": 1101 | |
| }, | |
| { | |
| "epoch": 0.16479736802751607, | |
| "grad_norm": 1.610026424473214, | |
| "learning_rate": 9.53136833577033e-06, | |
| "loss": 0.2045, | |
| "step": 1102 | |
| }, | |
| { | |
| "epoch": 0.16494691191864813, | |
| "grad_norm": 1.1595609057345122, | |
| "learning_rate": 9.530344119188489e-06, | |
| "loss": 0.196, | |
| "step": 1103 | |
| }, | |
| { | |
| "epoch": 0.16509645580978016, | |
| "grad_norm": 1.380807492186586, | |
| "learning_rate": 9.529318839744257e-06, | |
| "loss": 0.1665, | |
| "step": 1104 | |
| }, | |
| { | |
| "epoch": 0.16524599970091222, | |
| "grad_norm": 1.224405894645481, | |
| "learning_rate": 9.528292497678175e-06, | |
| "loss": 0.1904, | |
| "step": 1105 | |
| }, | |
| { | |
| "epoch": 0.16539554359204425, | |
| "grad_norm": 1.6321539600343615, | |
| "learning_rate": 9.527265093231031e-06, | |
| "loss": 0.3776, | |
| "step": 1106 | |
| }, | |
| { | |
| "epoch": 0.1655450874831763, | |
| "grad_norm": 1.8242961573055154, | |
| "learning_rate": 9.526236626643867e-06, | |
| "loss": 0.2581, | |
| "step": 1107 | |
| }, | |
| { | |
| "epoch": 0.16569463137430837, | |
| "grad_norm": 1.3475980511137085, | |
| "learning_rate": 9.525207098157968e-06, | |
| "loss": 0.3415, | |
| "step": 1108 | |
| }, | |
| { | |
| "epoch": 0.1658441752654404, | |
| "grad_norm": 1.9882440245015858, | |
| "learning_rate": 9.524176508014873e-06, | |
| "loss": 0.5573, | |
| "step": 1109 | |
| }, | |
| { | |
| "epoch": 0.16599371915657246, | |
| "grad_norm": 1.5816342314828584, | |
| "learning_rate": 9.523144856456367e-06, | |
| "loss": 0.2691, | |
| "step": 1110 | |
| }, | |
| { | |
| "epoch": 0.1661432630477045, | |
| "grad_norm": 1.3850164199795663, | |
| "learning_rate": 9.522112143724489e-06, | |
| "loss": 0.3378, | |
| "step": 1111 | |
| }, | |
| { | |
| "epoch": 0.16629280693883655, | |
| "grad_norm": 1.4909627841603759, | |
| "learning_rate": 9.52107837006152e-06, | |
| "loss": 0.4023, | |
| "step": 1112 | |
| }, | |
| { | |
| "epoch": 0.16644235082996858, | |
| "grad_norm": 1.338411558324243, | |
| "learning_rate": 9.520043535709994e-06, | |
| "loss": 0.225, | |
| "step": 1113 | |
| }, | |
| { | |
| "epoch": 0.16659189472110064, | |
| "grad_norm": 1.5004852001929436, | |
| "learning_rate": 9.519007640912691e-06, | |
| "loss": 0.367, | |
| "step": 1114 | |
| }, | |
| { | |
| "epoch": 0.1667414386122327, | |
| "grad_norm": 1.3522433759924148, | |
| "learning_rate": 9.517970685912648e-06, | |
| "loss": 0.3267, | |
| "step": 1115 | |
| }, | |
| { | |
| "epoch": 0.16689098250336473, | |
| "grad_norm": 1.2771009278550414, | |
| "learning_rate": 9.516932670953137e-06, | |
| "loss": 0.2343, | |
| "step": 1116 | |
| }, | |
| { | |
| "epoch": 0.1670405263944968, | |
| "grad_norm": 1.2558718520533958, | |
| "learning_rate": 9.515893596277692e-06, | |
| "loss": 0.2146, | |
| "step": 1117 | |
| }, | |
| { | |
| "epoch": 0.16719007028562882, | |
| "grad_norm": 1.747427684722942, | |
| "learning_rate": 9.514853462130087e-06, | |
| "loss": 0.3897, | |
| "step": 1118 | |
| }, | |
| { | |
| "epoch": 0.16733961417676088, | |
| "grad_norm": 1.350710739291567, | |
| "learning_rate": 9.51381226875435e-06, | |
| "loss": 0.1962, | |
| "step": 1119 | |
| }, | |
| { | |
| "epoch": 0.16748915806789294, | |
| "grad_norm": 1.7048566484317351, | |
| "learning_rate": 9.512770016394754e-06, | |
| "loss": 0.4704, | |
| "step": 1120 | |
| }, | |
| { | |
| "epoch": 0.16763870195902497, | |
| "grad_norm": 1.1820262059156426, | |
| "learning_rate": 9.51172670529582e-06, | |
| "loss": 0.2269, | |
| "step": 1121 | |
| }, | |
| { | |
| "epoch": 0.16778824585015703, | |
| "grad_norm": 1.4968174934291172, | |
| "learning_rate": 9.51068233570232e-06, | |
| "loss": 0.2122, | |
| "step": 1122 | |
| }, | |
| { | |
| "epoch": 0.16793778974128906, | |
| "grad_norm": 1.7978736217167008, | |
| "learning_rate": 9.50963690785927e-06, | |
| "loss": 0.4423, | |
| "step": 1123 | |
| }, | |
| { | |
| "epoch": 0.16808733363242112, | |
| "grad_norm": 1.9086737935680094, | |
| "learning_rate": 9.508590422011943e-06, | |
| "loss": 0.5837, | |
| "step": 1124 | |
| }, | |
| { | |
| "epoch": 0.16823687752355315, | |
| "grad_norm": 1.809798628211579, | |
| "learning_rate": 9.507542878405853e-06, | |
| "loss": 0.4956, | |
| "step": 1125 | |
| }, | |
| { | |
| "epoch": 0.1683864214146852, | |
| "grad_norm": 2.256680269043329, | |
| "learning_rate": 9.506494277286762e-06, | |
| "loss": 0.3622, | |
| "step": 1126 | |
| }, | |
| { | |
| "epoch": 0.16853596530581727, | |
| "grad_norm": 1.7846176870962676, | |
| "learning_rate": 9.505444618900682e-06, | |
| "loss": 0.3712, | |
| "step": 1127 | |
| }, | |
| { | |
| "epoch": 0.1686855091969493, | |
| "grad_norm": 2.142681380142415, | |
| "learning_rate": 9.504393903493874e-06, | |
| "loss": 0.2523, | |
| "step": 1128 | |
| }, | |
| { | |
| "epoch": 0.16883505308808136, | |
| "grad_norm": 1.993049487456574, | |
| "learning_rate": 9.503342131312847e-06, | |
| "loss": 0.4667, | |
| "step": 1129 | |
| }, | |
| { | |
| "epoch": 0.1689845969792134, | |
| "grad_norm": 1.4929357757100867, | |
| "learning_rate": 9.502289302604355e-06, | |
| "loss": 0.2124, | |
| "step": 1130 | |
| }, | |
| { | |
| "epoch": 0.16913414087034545, | |
| "grad_norm": 1.763381368002964, | |
| "learning_rate": 9.501235417615402e-06, | |
| "loss": 0.2563, | |
| "step": 1131 | |
| }, | |
| { | |
| "epoch": 0.16928368476147748, | |
| "grad_norm": 1.879192084699487, | |
| "learning_rate": 9.50018047659324e-06, | |
| "loss": 0.6, | |
| "step": 1132 | |
| }, | |
| { | |
| "epoch": 0.16943322865260954, | |
| "grad_norm": 1.7482598713089221, | |
| "learning_rate": 9.49912447978537e-06, | |
| "loss": 0.4743, | |
| "step": 1133 | |
| }, | |
| { | |
| "epoch": 0.1695827725437416, | |
| "grad_norm": 2.413100609605461, | |
| "learning_rate": 9.498067427439535e-06, | |
| "loss": 0.5021, | |
| "step": 1134 | |
| }, | |
| { | |
| "epoch": 0.16973231643487363, | |
| "grad_norm": 1.6675332056868586, | |
| "learning_rate": 9.497009319803732e-06, | |
| "loss": 0.3704, | |
| "step": 1135 | |
| }, | |
| { | |
| "epoch": 0.1698818603260057, | |
| "grad_norm": 1.4103198339625334, | |
| "learning_rate": 9.495950157126204e-06, | |
| "loss": 0.3525, | |
| "step": 1136 | |
| }, | |
| { | |
| "epoch": 0.17003140421713772, | |
| "grad_norm": 1.7592756830906924, | |
| "learning_rate": 9.49488993965544e-06, | |
| "loss": 0.2118, | |
| "step": 1137 | |
| }, | |
| { | |
| "epoch": 0.17018094810826978, | |
| "grad_norm": 1.209600322516803, | |
| "learning_rate": 9.493828667640179e-06, | |
| "loss": 0.2156, | |
| "step": 1138 | |
| }, | |
| { | |
| "epoch": 0.1703304919994018, | |
| "grad_norm": 1.8747303848334145, | |
| "learning_rate": 9.492766341329402e-06, | |
| "loss": 0.2998, | |
| "step": 1139 | |
| }, | |
| { | |
| "epoch": 0.17048003589053387, | |
| "grad_norm": 1.9498503355521368, | |
| "learning_rate": 9.491702960972343e-06, | |
| "loss": 0.3723, | |
| "step": 1140 | |
| }, | |
| { | |
| "epoch": 0.17062957978166593, | |
| "grad_norm": 1.6726039919291162, | |
| "learning_rate": 9.490638526818482e-06, | |
| "loss": 0.408, | |
| "step": 1141 | |
| }, | |
| { | |
| "epoch": 0.17077912367279796, | |
| "grad_norm": 3.7262669891973386, | |
| "learning_rate": 9.489573039117543e-06, | |
| "loss": 0.4009, | |
| "step": 1142 | |
| }, | |
| { | |
| "epoch": 0.17092866756393002, | |
| "grad_norm": 1.8934485182919263, | |
| "learning_rate": 9.488506498119502e-06, | |
| "loss": 0.5141, | |
| "step": 1143 | |
| }, | |
| { | |
| "epoch": 0.17107821145506205, | |
| "grad_norm": 1.4710896960429016, | |
| "learning_rate": 9.487438904074581e-06, | |
| "loss": 0.3149, | |
| "step": 1144 | |
| }, | |
| { | |
| "epoch": 0.1712277553461941, | |
| "grad_norm": 0.7650448318548279, | |
| "learning_rate": 9.486370257233244e-06, | |
| "loss": 0.1787, | |
| "step": 1145 | |
| }, | |
| { | |
| "epoch": 0.17137729923732614, | |
| "grad_norm": 1.110992686058532, | |
| "learning_rate": 9.48530055784621e-06, | |
| "loss": 0.2058, | |
| "step": 1146 | |
| }, | |
| { | |
| "epoch": 0.1715268431284582, | |
| "grad_norm": 1.7079056261573908, | |
| "learning_rate": 9.484229806164435e-06, | |
| "loss": 0.2944, | |
| "step": 1147 | |
| }, | |
| { | |
| "epoch": 0.17167638701959026, | |
| "grad_norm": 1.5033153679810882, | |
| "learning_rate": 9.483158002439134e-06, | |
| "loss": 0.2221, | |
| "step": 1148 | |
| }, | |
| { | |
| "epoch": 0.1718259309107223, | |
| "grad_norm": 1.672220461796242, | |
| "learning_rate": 9.48208514692176e-06, | |
| "loss": 0.3671, | |
| "step": 1149 | |
| }, | |
| { | |
| "epoch": 0.17197547480185435, | |
| "grad_norm": 2.0626867491532885, | |
| "learning_rate": 9.481011239864014e-06, | |
| "loss": 0.3915, | |
| "step": 1150 | |
| }, | |
| { | |
| "epoch": 0.17212501869298638, | |
| "grad_norm": 1.6104087564965828, | |
| "learning_rate": 9.479936281517848e-06, | |
| "loss": 0.3084, | |
| "step": 1151 | |
| }, | |
| { | |
| "epoch": 0.17227456258411844, | |
| "grad_norm": 1.2158699033191027, | |
| "learning_rate": 9.478860272135452e-06, | |
| "loss": 0.2074, | |
| "step": 1152 | |
| }, | |
| { | |
| "epoch": 0.1724241064752505, | |
| "grad_norm": 1.5670367135087604, | |
| "learning_rate": 9.477783211969273e-06, | |
| "loss": 0.3216, | |
| "step": 1153 | |
| }, | |
| { | |
| "epoch": 0.17257365036638253, | |
| "grad_norm": 1.1456620290612602, | |
| "learning_rate": 9.476705101272e-06, | |
| "loss": 0.1631, | |
| "step": 1154 | |
| }, | |
| { | |
| "epoch": 0.1727231942575146, | |
| "grad_norm": 2.240627806354505, | |
| "learning_rate": 9.475625940296567e-06, | |
| "loss": 0.3657, | |
| "step": 1155 | |
| }, | |
| { | |
| "epoch": 0.17287273814864662, | |
| "grad_norm": 1.5745880507830639, | |
| "learning_rate": 9.474545729296152e-06, | |
| "loss": 0.2223, | |
| "step": 1156 | |
| }, | |
| { | |
| "epoch": 0.17302228203977868, | |
| "grad_norm": 2.112259044674356, | |
| "learning_rate": 9.47346446852419e-06, | |
| "loss": 0.6911, | |
| "step": 1157 | |
| }, | |
| { | |
| "epoch": 0.1731718259309107, | |
| "grad_norm": 1.5810443743533733, | |
| "learning_rate": 9.472382158234349e-06, | |
| "loss": 0.3099, | |
| "step": 1158 | |
| }, | |
| { | |
| "epoch": 0.17332136982204277, | |
| "grad_norm": 1.2778440299252458, | |
| "learning_rate": 9.47129879868055e-06, | |
| "loss": 0.2136, | |
| "step": 1159 | |
| }, | |
| { | |
| "epoch": 0.17347091371317483, | |
| "grad_norm": 1.4299544830347497, | |
| "learning_rate": 9.470214390116965e-06, | |
| "loss": 0.2145, | |
| "step": 1160 | |
| }, | |
| { | |
| "epoch": 0.17362045760430686, | |
| "grad_norm": 1.3698311545901527, | |
| "learning_rate": 9.469128932798e-06, | |
| "loss": 0.2377, | |
| "step": 1161 | |
| }, | |
| { | |
| "epoch": 0.17377000149543892, | |
| "grad_norm": 1.2952413351897127, | |
| "learning_rate": 9.468042426978319e-06, | |
| "loss": 0.2452, | |
| "step": 1162 | |
| }, | |
| { | |
| "epoch": 0.17391954538657095, | |
| "grad_norm": 1.3794054233492332, | |
| "learning_rate": 9.466954872912823e-06, | |
| "loss": 0.1923, | |
| "step": 1163 | |
| }, | |
| { | |
| "epoch": 0.174069089277703, | |
| "grad_norm": 1.1390407334512478, | |
| "learning_rate": 9.465866270856665e-06, | |
| "loss": 0.251, | |
| "step": 1164 | |
| }, | |
| { | |
| "epoch": 0.17421863316883504, | |
| "grad_norm": 2.280807269259536, | |
| "learning_rate": 9.46477662106524e-06, | |
| "loss": 0.5433, | |
| "step": 1165 | |
| }, | |
| { | |
| "epoch": 0.1743681770599671, | |
| "grad_norm": 1.4481272121275899, | |
| "learning_rate": 9.463685923794191e-06, | |
| "loss": 0.3209, | |
| "step": 1166 | |
| }, | |
| { | |
| "epoch": 0.17451772095109916, | |
| "grad_norm": 1.3194380931708358, | |
| "learning_rate": 9.462594179299408e-06, | |
| "loss": 0.2208, | |
| "step": 1167 | |
| }, | |
| { | |
| "epoch": 0.1746672648422312, | |
| "grad_norm": 2.0036222277986, | |
| "learning_rate": 9.46150138783702e-06, | |
| "loss": 0.2654, | |
| "step": 1168 | |
| }, | |
| { | |
| "epoch": 0.17481680873336325, | |
| "grad_norm": 0.8359301847512149, | |
| "learning_rate": 9.460407549663411e-06, | |
| "loss": 0.2131, | |
| "step": 1169 | |
| }, | |
| { | |
| "epoch": 0.17496635262449528, | |
| "grad_norm": 1.8746705630094216, | |
| "learning_rate": 9.459312665035203e-06, | |
| "loss": 0.2415, | |
| "step": 1170 | |
| }, | |
| { | |
| "epoch": 0.17511589651562734, | |
| "grad_norm": 2.187475783350278, | |
| "learning_rate": 9.458216734209269e-06, | |
| "loss": 0.6378, | |
| "step": 1171 | |
| }, | |
| { | |
| "epoch": 0.17526544040675937, | |
| "grad_norm": 1.3984177897039258, | |
| "learning_rate": 9.457119757442723e-06, | |
| "loss": 0.2034, | |
| "step": 1172 | |
| }, | |
| { | |
| "epoch": 0.17541498429789143, | |
| "grad_norm": 1.776218225609686, | |
| "learning_rate": 9.456021734992928e-06, | |
| "loss": 0.3717, | |
| "step": 1173 | |
| }, | |
| { | |
| "epoch": 0.1755645281890235, | |
| "grad_norm": 1.360440620305923, | |
| "learning_rate": 9.45492266711749e-06, | |
| "loss": 0.2499, | |
| "step": 1174 | |
| }, | |
| { | |
| "epoch": 0.17571407208015552, | |
| "grad_norm": 1.11686981125786, | |
| "learning_rate": 9.453822554074259e-06, | |
| "loss": 0.1718, | |
| "step": 1175 | |
| }, | |
| { | |
| "epoch": 0.17586361597128758, | |
| "grad_norm": 1.7801052355130151, | |
| "learning_rate": 9.452721396121333e-06, | |
| "loss": 0.392, | |
| "step": 1176 | |
| }, | |
| { | |
| "epoch": 0.1760131598624196, | |
| "grad_norm": 1.4238435171417776, | |
| "learning_rate": 9.451619193517057e-06, | |
| "loss": 0.4248, | |
| "step": 1177 | |
| }, | |
| { | |
| "epoch": 0.17616270375355167, | |
| "grad_norm": 1.4639661830539468, | |
| "learning_rate": 9.450515946520016e-06, | |
| "loss": 0.2049, | |
| "step": 1178 | |
| }, | |
| { | |
| "epoch": 0.17631224764468373, | |
| "grad_norm": 1.5499038616483287, | |
| "learning_rate": 9.449411655389042e-06, | |
| "loss": 0.4062, | |
| "step": 1179 | |
| }, | |
| { | |
| "epoch": 0.17646179153581576, | |
| "grad_norm": 1.7136605249554677, | |
| "learning_rate": 9.448306320383215e-06, | |
| "loss": 0.3911, | |
| "step": 1180 | |
| }, | |
| { | |
| "epoch": 0.17661133542694782, | |
| "grad_norm": 1.4594560853866743, | |
| "learning_rate": 9.447199941761852e-06, | |
| "loss": 0.2117, | |
| "step": 1181 | |
| }, | |
| { | |
| "epoch": 0.17676087931807985, | |
| "grad_norm": 1.6116818115006617, | |
| "learning_rate": 9.446092519784525e-06, | |
| "loss": 0.19, | |
| "step": 1182 | |
| }, | |
| { | |
| "epoch": 0.1769104232092119, | |
| "grad_norm": 1.5420971786965234, | |
| "learning_rate": 9.444984054711044e-06, | |
| "loss": 0.3199, | |
| "step": 1183 | |
| }, | |
| { | |
| "epoch": 0.17705996710034394, | |
| "grad_norm": 1.6968645734800356, | |
| "learning_rate": 9.443874546801465e-06, | |
| "loss": 0.3796, | |
| "step": 1184 | |
| }, | |
| { | |
| "epoch": 0.177209510991476, | |
| "grad_norm": 1.3181328359871092, | |
| "learning_rate": 9.442763996316093e-06, | |
| "loss": 0.3955, | |
| "step": 1185 | |
| }, | |
| { | |
| "epoch": 0.17735905488260806, | |
| "grad_norm": 2.8227798204695733, | |
| "learning_rate": 9.44165240351547e-06, | |
| "loss": 0.5894, | |
| "step": 1186 | |
| }, | |
| { | |
| "epoch": 0.1775085987737401, | |
| "grad_norm": 1.9231832633740291, | |
| "learning_rate": 9.440539768660386e-06, | |
| "loss": 0.3844, | |
| "step": 1187 | |
| }, | |
| { | |
| "epoch": 0.17765814266487215, | |
| "grad_norm": 1.4508084994855301, | |
| "learning_rate": 9.439426092011877e-06, | |
| "loss": 0.1983, | |
| "step": 1188 | |
| }, | |
| { | |
| "epoch": 0.17780768655600418, | |
| "grad_norm": 1.838910674129457, | |
| "learning_rate": 9.438311373831224e-06, | |
| "loss": 0.5758, | |
| "step": 1189 | |
| }, | |
| { | |
| "epoch": 0.17795723044713624, | |
| "grad_norm": 1.878745494476723, | |
| "learning_rate": 9.437195614379947e-06, | |
| "loss": 0.4892, | |
| "step": 1190 | |
| }, | |
| { | |
| "epoch": 0.17810677433826827, | |
| "grad_norm": 1.5763295601740275, | |
| "learning_rate": 9.436078813919818e-06, | |
| "loss": 0.4209, | |
| "step": 1191 | |
| }, | |
| { | |
| "epoch": 0.17825631822940033, | |
| "grad_norm": 1.939965363898736, | |
| "learning_rate": 9.434960972712846e-06, | |
| "loss": 0.4915, | |
| "step": 1192 | |
| }, | |
| { | |
| "epoch": 0.1784058621205324, | |
| "grad_norm": 1.777010297411083, | |
| "learning_rate": 9.433842091021287e-06, | |
| "loss": 0.4445, | |
| "step": 1193 | |
| }, | |
| { | |
| "epoch": 0.17855540601166442, | |
| "grad_norm": 1.8576886519979177, | |
| "learning_rate": 9.432722169107647e-06, | |
| "loss": 0.2065, | |
| "step": 1194 | |
| }, | |
| { | |
| "epoch": 0.17870494990279648, | |
| "grad_norm": 1.6414559576032928, | |
| "learning_rate": 9.431601207234663e-06, | |
| "loss": 0.2878, | |
| "step": 1195 | |
| }, | |
| { | |
| "epoch": 0.1788544937939285, | |
| "grad_norm": 1.6433452397947506, | |
| "learning_rate": 9.430479205665329e-06, | |
| "loss": 0.3933, | |
| "step": 1196 | |
| }, | |
| { | |
| "epoch": 0.17900403768506057, | |
| "grad_norm": 1.3219617553631218, | |
| "learning_rate": 9.429356164662872e-06, | |
| "loss": 0.1886, | |
| "step": 1197 | |
| }, | |
| { | |
| "epoch": 0.1791535815761926, | |
| "grad_norm": 1.3673182854086454, | |
| "learning_rate": 9.428232084490774e-06, | |
| "loss": 0.2098, | |
| "step": 1198 | |
| }, | |
| { | |
| "epoch": 0.17930312546732466, | |
| "grad_norm": 1.4932716672657123, | |
| "learning_rate": 9.427106965412752e-06, | |
| "loss": 0.1868, | |
| "step": 1199 | |
| }, | |
| { | |
| "epoch": 0.17945266935845672, | |
| "grad_norm": 1.2835655324809725, | |
| "learning_rate": 9.425980807692771e-06, | |
| "loss": 0.2841, | |
| "step": 1200 | |
| }, | |
| { | |
| "epoch": 0.17960221324958875, | |
| "grad_norm": 1.5229676329003083, | |
| "learning_rate": 9.424853611595037e-06, | |
| "loss": 0.429, | |
| "step": 1201 | |
| }, | |
| { | |
| "epoch": 0.1797517571407208, | |
| "grad_norm": 1.3834763754455093, | |
| "learning_rate": 9.423725377384e-06, | |
| "loss": 0.199, | |
| "step": 1202 | |
| }, | |
| { | |
| "epoch": 0.17990130103185284, | |
| "grad_norm": 1.172574987366, | |
| "learning_rate": 9.42259610532436e-06, | |
| "loss": 0.2422, | |
| "step": 1203 | |
| }, | |
| { | |
| "epoch": 0.1800508449229849, | |
| "grad_norm": 2.3677332220742753, | |
| "learning_rate": 9.421465795681048e-06, | |
| "loss": 0.6703, | |
| "step": 1204 | |
| }, | |
| { | |
| "epoch": 0.18020038881411693, | |
| "grad_norm": 1.743670576433428, | |
| "learning_rate": 9.420334448719251e-06, | |
| "loss": 0.3879, | |
| "step": 1205 | |
| }, | |
| { | |
| "epoch": 0.180349932705249, | |
| "grad_norm": 1.4354052350500734, | |
| "learning_rate": 9.419202064704393e-06, | |
| "loss": 0.2261, | |
| "step": 1206 | |
| }, | |
| { | |
| "epoch": 0.18049947659638105, | |
| "grad_norm": 1.5355684537494616, | |
| "learning_rate": 9.41806864390214e-06, | |
| "loss": 0.2323, | |
| "step": 1207 | |
| }, | |
| { | |
| "epoch": 0.18064902048751308, | |
| "grad_norm": 2.7749706919729067, | |
| "learning_rate": 9.416934186578403e-06, | |
| "loss": 0.2457, | |
| "step": 1208 | |
| }, | |
| { | |
| "epoch": 0.18079856437864514, | |
| "grad_norm": 1.7998786782731084, | |
| "learning_rate": 9.41579869299934e-06, | |
| "loss": 0.5115, | |
| "step": 1209 | |
| }, | |
| { | |
| "epoch": 0.18094810826977717, | |
| "grad_norm": 1.722249547477117, | |
| "learning_rate": 9.414662163431347e-06, | |
| "loss": 0.3978, | |
| "step": 1210 | |
| }, | |
| { | |
| "epoch": 0.18109765216090923, | |
| "grad_norm": 1.470878100530038, | |
| "learning_rate": 9.413524598141065e-06, | |
| "loss": 0.3655, | |
| "step": 1211 | |
| }, | |
| { | |
| "epoch": 0.1812471960520413, | |
| "grad_norm": 1.2321837235938764, | |
| "learning_rate": 9.412385997395377e-06, | |
| "loss": 0.206, | |
| "step": 1212 | |
| }, | |
| { | |
| "epoch": 0.18139673994317332, | |
| "grad_norm": 1.71695513424398, | |
| "learning_rate": 9.41124636146141e-06, | |
| "loss": 0.2988, | |
| "step": 1213 | |
| }, | |
| { | |
| "epoch": 0.18154628383430538, | |
| "grad_norm": 1.5665377801862033, | |
| "learning_rate": 9.410105690606533e-06, | |
| "loss": 0.435, | |
| "step": 1214 | |
| }, | |
| { | |
| "epoch": 0.1816958277254374, | |
| "grad_norm": 1.9574571743722469, | |
| "learning_rate": 9.40896398509836e-06, | |
| "loss": 0.3844, | |
| "step": 1215 | |
| }, | |
| { | |
| "epoch": 0.18184537161656947, | |
| "grad_norm": 1.732474617655161, | |
| "learning_rate": 9.407821245204746e-06, | |
| "loss": 0.4532, | |
| "step": 1216 | |
| }, | |
| { | |
| "epoch": 0.1819949155077015, | |
| "grad_norm": 1.6282505343946028, | |
| "learning_rate": 9.406677471193788e-06, | |
| "loss": 0.346, | |
| "step": 1217 | |
| }, | |
| { | |
| "epoch": 0.18214445939883356, | |
| "grad_norm": 1.7687288700904007, | |
| "learning_rate": 9.405532663333826e-06, | |
| "loss": 0.2398, | |
| "step": 1218 | |
| }, | |
| { | |
| "epoch": 0.18229400328996562, | |
| "grad_norm": 1.710427334466053, | |
| "learning_rate": 9.404386821893442e-06, | |
| "loss": 0.2851, | |
| "step": 1219 | |
| }, | |
| { | |
| "epoch": 0.18244354718109765, | |
| "grad_norm": 1.017825559673437, | |
| "learning_rate": 9.403239947141467e-06, | |
| "loss": 0.1898, | |
| "step": 1220 | |
| }, | |
| { | |
| "epoch": 0.1825930910722297, | |
| "grad_norm": 1.7058191164095473, | |
| "learning_rate": 9.402092039346961e-06, | |
| "loss": 0.3391, | |
| "step": 1221 | |
| }, | |
| { | |
| "epoch": 0.18274263496336174, | |
| "grad_norm": 1.2429292971081916, | |
| "learning_rate": 9.40094309877924e-06, | |
| "loss": 0.247, | |
| "step": 1222 | |
| }, | |
| { | |
| "epoch": 0.1828921788544938, | |
| "grad_norm": 1.26527696992994, | |
| "learning_rate": 9.399793125707853e-06, | |
| "loss": 0.2229, | |
| "step": 1223 | |
| }, | |
| { | |
| "epoch": 0.18304172274562583, | |
| "grad_norm": 1.150282472600963, | |
| "learning_rate": 9.398642120402596e-06, | |
| "loss": 0.2145, | |
| "step": 1224 | |
| }, | |
| { | |
| "epoch": 0.1831912666367579, | |
| "grad_norm": 1.3914149403501497, | |
| "learning_rate": 9.39749008313351e-06, | |
| "loss": 0.231, | |
| "step": 1225 | |
| }, | |
| { | |
| "epoch": 0.18334081052788995, | |
| "grad_norm": 1.3685090802839712, | |
| "learning_rate": 9.396337014170866e-06, | |
| "loss": 0.1872, | |
| "step": 1226 | |
| }, | |
| { | |
| "epoch": 0.18349035441902198, | |
| "grad_norm": 1.6709772065779387, | |
| "learning_rate": 9.395182913785192e-06, | |
| "loss": 0.2055, | |
| "step": 1227 | |
| }, | |
| { | |
| "epoch": 0.18363989831015404, | |
| "grad_norm": 2.0418194880673783, | |
| "learning_rate": 9.394027782247247e-06, | |
| "loss": 0.4888, | |
| "step": 1228 | |
| }, | |
| { | |
| "epoch": 0.18378944220128607, | |
| "grad_norm": 1.5794839342981186, | |
| "learning_rate": 9.392871619828036e-06, | |
| "loss": 0.3355, | |
| "step": 1229 | |
| }, | |
| { | |
| "epoch": 0.18393898609241813, | |
| "grad_norm": 2.365767436986478, | |
| "learning_rate": 9.39171442679881e-06, | |
| "loss": 0.4306, | |
| "step": 1230 | |
| }, | |
| { | |
| "epoch": 0.18408852998355016, | |
| "grad_norm": 1.535735557296357, | |
| "learning_rate": 9.390556203431053e-06, | |
| "loss": 0.3454, | |
| "step": 1231 | |
| }, | |
| { | |
| "epoch": 0.18423807387468222, | |
| "grad_norm": 2.0146640105762, | |
| "learning_rate": 9.3893969499965e-06, | |
| "loss": 0.5002, | |
| "step": 1232 | |
| }, | |
| { | |
| "epoch": 0.18438761776581428, | |
| "grad_norm": 1.0888630229716356, | |
| "learning_rate": 9.388236666767119e-06, | |
| "loss": 0.1717, | |
| "step": 1233 | |
| }, | |
| { | |
| "epoch": 0.1845371616569463, | |
| "grad_norm": 1.471926551369625, | |
| "learning_rate": 9.387075354015125e-06, | |
| "loss": 0.2728, | |
| "step": 1234 | |
| }, | |
| { | |
| "epoch": 0.18468670554807837, | |
| "grad_norm": 1.2418392055984802, | |
| "learning_rate": 9.385913012012972e-06, | |
| "loss": 0.2338, | |
| "step": 1235 | |
| }, | |
| { | |
| "epoch": 0.1848362494392104, | |
| "grad_norm": 1.1326547586847213, | |
| "learning_rate": 9.384749641033358e-06, | |
| "loss": 0.2014, | |
| "step": 1236 | |
| }, | |
| { | |
| "epoch": 0.18498579333034246, | |
| "grad_norm": 1.2625669973249032, | |
| "learning_rate": 9.383585241349223e-06, | |
| "loss": 0.2257, | |
| "step": 1237 | |
| }, | |
| { | |
| "epoch": 0.18513533722147452, | |
| "grad_norm": 2.0231610702160494, | |
| "learning_rate": 9.382419813233741e-06, | |
| "loss": 0.6136, | |
| "step": 1238 | |
| }, | |
| { | |
| "epoch": 0.18528488111260655, | |
| "grad_norm": 2.2265632194384035, | |
| "learning_rate": 9.381253356960339e-06, | |
| "loss": 0.379, | |
| "step": 1239 | |
| }, | |
| { | |
| "epoch": 0.1854344250037386, | |
| "grad_norm": 1.7519589257117685, | |
| "learning_rate": 9.380085872802672e-06, | |
| "loss": 0.4481, | |
| "step": 1240 | |
| }, | |
| { | |
| "epoch": 0.18558396889487064, | |
| "grad_norm": 1.8894247538731719, | |
| "learning_rate": 9.37891736103465e-06, | |
| "loss": 0.2349, | |
| "step": 1241 | |
| }, | |
| { | |
| "epoch": 0.1857335127860027, | |
| "grad_norm": 2.0840730120928153, | |
| "learning_rate": 9.377747821930411e-06, | |
| "loss": 0.386, | |
| "step": 1242 | |
| }, | |
| { | |
| "epoch": 0.18588305667713473, | |
| "grad_norm": 1.2303960642463392, | |
| "learning_rate": 9.376577255764346e-06, | |
| "loss": 0.2138, | |
| "step": 1243 | |
| }, | |
| { | |
| "epoch": 0.1860326005682668, | |
| "grad_norm": 1.0736052443136495, | |
| "learning_rate": 9.375405662811076e-06, | |
| "loss": 0.2919, | |
| "step": 1244 | |
| }, | |
| { | |
| "epoch": 0.18618214445939885, | |
| "grad_norm": 2.1694546083973236, | |
| "learning_rate": 9.37423304334547e-06, | |
| "loss": 0.4716, | |
| "step": 1245 | |
| }, | |
| { | |
| "epoch": 0.18633168835053088, | |
| "grad_norm": 1.7953994969561728, | |
| "learning_rate": 9.373059397642637e-06, | |
| "loss": 0.2303, | |
| "step": 1246 | |
| }, | |
| { | |
| "epoch": 0.18648123224166294, | |
| "grad_norm": 1.1331346308690267, | |
| "learning_rate": 9.371884725977924e-06, | |
| "loss": 0.1681, | |
| "step": 1247 | |
| }, | |
| { | |
| "epoch": 0.18663077613279497, | |
| "grad_norm": 1.8818511328803789, | |
| "learning_rate": 9.370709028626921e-06, | |
| "loss": 0.3736, | |
| "step": 1248 | |
| }, | |
| { | |
| "epoch": 0.18678032002392703, | |
| "grad_norm": 1.080778831083804, | |
| "learning_rate": 9.369532305865459e-06, | |
| "loss": 0.2155, | |
| "step": 1249 | |
| }, | |
| { | |
| "epoch": 0.18692986391505906, | |
| "grad_norm": 2.005915788639095, | |
| "learning_rate": 9.368354557969606e-06, | |
| "loss": 0.4026, | |
| "step": 1250 | |
| }, | |
| { | |
| "epoch": 0.18707940780619112, | |
| "grad_norm": 0.8756048068588032, | |
| "learning_rate": 9.367175785215674e-06, | |
| "loss": 0.183, | |
| "step": 1251 | |
| }, | |
| { | |
| "epoch": 0.18722895169732318, | |
| "grad_norm": 2.100099144522435, | |
| "learning_rate": 9.365995987880216e-06, | |
| "loss": 0.182, | |
| "step": 1252 | |
| }, | |
| { | |
| "epoch": 0.1873784955884552, | |
| "grad_norm": 1.9441741117291806, | |
| "learning_rate": 9.364815166240023e-06, | |
| "loss": 0.3865, | |
| "step": 1253 | |
| }, | |
| { | |
| "epoch": 0.18752803947958727, | |
| "grad_norm": 1.1762687046527927, | |
| "learning_rate": 9.363633320572124e-06, | |
| "loss": 0.2105, | |
| "step": 1254 | |
| }, | |
| { | |
| "epoch": 0.1876775833707193, | |
| "grad_norm": 1.170057500642311, | |
| "learning_rate": 9.362450451153795e-06, | |
| "loss": 0.201, | |
| "step": 1255 | |
| }, | |
| { | |
| "epoch": 0.18782712726185136, | |
| "grad_norm": 1.774823231357248, | |
| "learning_rate": 9.36126655826255e-06, | |
| "loss": 0.2958, | |
| "step": 1256 | |
| }, | |
| { | |
| "epoch": 0.1879766711529834, | |
| "grad_norm": 1.847386943393164, | |
| "learning_rate": 9.360081642176137e-06, | |
| "loss": 0.2783, | |
| "step": 1257 | |
| }, | |
| { | |
| "epoch": 0.18812621504411545, | |
| "grad_norm": 1.2317043700510546, | |
| "learning_rate": 9.358895703172552e-06, | |
| "loss": 0.2237, | |
| "step": 1258 | |
| }, | |
| { | |
| "epoch": 0.1882757589352475, | |
| "grad_norm": 1.4962863598933458, | |
| "learning_rate": 9.357708741530025e-06, | |
| "loss": 0.208, | |
| "step": 1259 | |
| }, | |
| { | |
| "epoch": 0.18842530282637954, | |
| "grad_norm": 1.753807685308467, | |
| "learning_rate": 9.356520757527032e-06, | |
| "loss": 0.513, | |
| "step": 1260 | |
| }, | |
| { | |
| "epoch": 0.1885748467175116, | |
| "grad_norm": 2.457507671022133, | |
| "learning_rate": 9.355331751442284e-06, | |
| "loss": 0.8743, | |
| "step": 1261 | |
| }, | |
| { | |
| "epoch": 0.18872439060864363, | |
| "grad_norm": 1.7444181168119555, | |
| "learning_rate": 9.354141723554734e-06, | |
| "loss": 0.3346, | |
| "step": 1262 | |
| }, | |
| { | |
| "epoch": 0.1888739344997757, | |
| "grad_norm": 1.971056965005781, | |
| "learning_rate": 9.35295067414357e-06, | |
| "loss": 0.2297, | |
| "step": 1263 | |
| }, | |
| { | |
| "epoch": 0.18902347839090772, | |
| "grad_norm": 1.5052086349314306, | |
| "learning_rate": 9.35175860348823e-06, | |
| "loss": 0.2149, | |
| "step": 1264 | |
| }, | |
| { | |
| "epoch": 0.18917302228203978, | |
| "grad_norm": 1.4361302390685748, | |
| "learning_rate": 9.35056551186838e-06, | |
| "loss": 0.3298, | |
| "step": 1265 | |
| }, | |
| { | |
| "epoch": 0.18932256617317184, | |
| "grad_norm": 1.4206462492110938, | |
| "learning_rate": 9.349371399563935e-06, | |
| "loss": 0.1929, | |
| "step": 1266 | |
| }, | |
| { | |
| "epoch": 0.18947211006430387, | |
| "grad_norm": 1.4913953281160535, | |
| "learning_rate": 9.348176266855042e-06, | |
| "loss": 0.2526, | |
| "step": 1267 | |
| }, | |
| { | |
| "epoch": 0.18962165395543593, | |
| "grad_norm": 2.365270322972236, | |
| "learning_rate": 9.346980114022092e-06, | |
| "loss": 0.5066, | |
| "step": 1268 | |
| }, | |
| { | |
| "epoch": 0.18977119784656796, | |
| "grad_norm": 1.8955954404187068, | |
| "learning_rate": 9.345782941345714e-06, | |
| "loss": 0.4404, | |
| "step": 1269 | |
| }, | |
| { | |
| "epoch": 0.18992074173770002, | |
| "grad_norm": 1.1285554099802715, | |
| "learning_rate": 9.344584749106775e-06, | |
| "loss": 0.2001, | |
| "step": 1270 | |
| }, | |
| { | |
| "epoch": 0.19007028562883208, | |
| "grad_norm": 1.6210163029014748, | |
| "learning_rate": 9.343385537586385e-06, | |
| "loss": 0.3274, | |
| "step": 1271 | |
| }, | |
| { | |
| "epoch": 0.1902198295199641, | |
| "grad_norm": 1.4031028340124463, | |
| "learning_rate": 9.342185307065888e-06, | |
| "loss": 0.1922, | |
| "step": 1272 | |
| }, | |
| { | |
| "epoch": 0.19036937341109617, | |
| "grad_norm": 1.5025368685887945, | |
| "learning_rate": 9.340984057826872e-06, | |
| "loss": 0.4106, | |
| "step": 1273 | |
| }, | |
| { | |
| "epoch": 0.1905189173022282, | |
| "grad_norm": 1.3363573267962257, | |
| "learning_rate": 9.339781790151159e-06, | |
| "loss": 0.2906, | |
| "step": 1274 | |
| }, | |
| { | |
| "epoch": 0.19066846119336026, | |
| "grad_norm": 2.2033082601743263, | |
| "learning_rate": 9.338578504320815e-06, | |
| "loss": 0.3913, | |
| "step": 1275 | |
| }, | |
| { | |
| "epoch": 0.1908180050844923, | |
| "grad_norm": 1.5703985946217345, | |
| "learning_rate": 9.337374200618141e-06, | |
| "loss": 0.2363, | |
| "step": 1276 | |
| }, | |
| { | |
| "epoch": 0.19096754897562435, | |
| "grad_norm": 1.8441964019229968, | |
| "learning_rate": 9.336168879325678e-06, | |
| "loss": 0.2193, | |
| "step": 1277 | |
| }, | |
| { | |
| "epoch": 0.1911170928667564, | |
| "grad_norm": 1.9461041413852502, | |
| "learning_rate": 9.334962540726208e-06, | |
| "loss": 0.3327, | |
| "step": 1278 | |
| }, | |
| { | |
| "epoch": 0.19126663675788844, | |
| "grad_norm": 1.987695873853033, | |
| "learning_rate": 9.333755185102747e-06, | |
| "loss": 0.5218, | |
| "step": 1279 | |
| }, | |
| { | |
| "epoch": 0.1914161806490205, | |
| "grad_norm": 1.9784687710756435, | |
| "learning_rate": 9.332546812738555e-06, | |
| "loss": 0.4903, | |
| "step": 1280 | |
| }, | |
| { | |
| "epoch": 0.19156572454015253, | |
| "grad_norm": 1.6276484190964966, | |
| "learning_rate": 9.331337423917126e-06, | |
| "loss": 0.3464, | |
| "step": 1281 | |
| }, | |
| { | |
| "epoch": 0.19171526843128459, | |
| "grad_norm": 1.2032949092333924, | |
| "learning_rate": 9.330127018922195e-06, | |
| "loss": 0.1529, | |
| "step": 1282 | |
| }, | |
| { | |
| "epoch": 0.19186481232241662, | |
| "grad_norm": 1.8200067255136916, | |
| "learning_rate": 9.328915598037733e-06, | |
| "loss": 0.4354, | |
| "step": 1283 | |
| }, | |
| { | |
| "epoch": 0.19201435621354868, | |
| "grad_norm": 1.4794611609702433, | |
| "learning_rate": 9.327703161547952e-06, | |
| "loss": 0.2071, | |
| "step": 1284 | |
| }, | |
| { | |
| "epoch": 0.19216390010468073, | |
| "grad_norm": 1.8032485542741101, | |
| "learning_rate": 9.326489709737303e-06, | |
| "loss": 0.3813, | |
| "step": 1285 | |
| }, | |
| { | |
| "epoch": 0.19231344399581277, | |
| "grad_norm": 1.3455083426481262, | |
| "learning_rate": 9.325275242890472e-06, | |
| "loss": 0.1853, | |
| "step": 1286 | |
| }, | |
| { | |
| "epoch": 0.19246298788694483, | |
| "grad_norm": 2.125860082250447, | |
| "learning_rate": 9.324059761292385e-06, | |
| "loss": 0.71, | |
| "step": 1287 | |
| }, | |
| { | |
| "epoch": 0.19261253177807686, | |
| "grad_norm": 1.8089276946794224, | |
| "learning_rate": 9.322843265228206e-06, | |
| "loss": 0.3672, | |
| "step": 1288 | |
| }, | |
| { | |
| "epoch": 0.19276207566920892, | |
| "grad_norm": 1.8361099385383872, | |
| "learning_rate": 9.321625754983335e-06, | |
| "loss": 0.3484, | |
| "step": 1289 | |
| }, | |
| { | |
| "epoch": 0.19291161956034095, | |
| "grad_norm": 1.6363642315445044, | |
| "learning_rate": 9.320407230843413e-06, | |
| "loss": 0.3042, | |
| "step": 1290 | |
| }, | |
| { | |
| "epoch": 0.193061163451473, | |
| "grad_norm": 2.021061654973304, | |
| "learning_rate": 9.319187693094318e-06, | |
| "loss": 0.5033, | |
| "step": 1291 | |
| }, | |
| { | |
| "epoch": 0.19321070734260506, | |
| "grad_norm": 2.62527535830696, | |
| "learning_rate": 9.317967142022163e-06, | |
| "loss": 0.275, | |
| "step": 1292 | |
| }, | |
| { | |
| "epoch": 0.1933602512337371, | |
| "grad_norm": 1.3806620083144838, | |
| "learning_rate": 9.316745577913304e-06, | |
| "loss": 0.2855, | |
| "step": 1293 | |
| }, | |
| { | |
| "epoch": 0.19350979512486916, | |
| "grad_norm": 1.7655797800670596, | |
| "learning_rate": 9.31552300105433e-06, | |
| "loss": 0.3915, | |
| "step": 1294 | |
| }, | |
| { | |
| "epoch": 0.1936593390160012, | |
| "grad_norm": 1.3465049850252158, | |
| "learning_rate": 9.314299411732069e-06, | |
| "loss": 0.249, | |
| "step": 1295 | |
| }, | |
| { | |
| "epoch": 0.19380888290713325, | |
| "grad_norm": 0.8256201784583667, | |
| "learning_rate": 9.313074810233589e-06, | |
| "loss": 0.1543, | |
| "step": 1296 | |
| }, | |
| { | |
| "epoch": 0.1939584267982653, | |
| "grad_norm": 1.9261313307855723, | |
| "learning_rate": 9.31184919684619e-06, | |
| "loss": 0.6008, | |
| "step": 1297 | |
| }, | |
| { | |
| "epoch": 0.19410797068939734, | |
| "grad_norm": 1.519619673139573, | |
| "learning_rate": 9.310622571857417e-06, | |
| "loss": 0.239, | |
| "step": 1298 | |
| }, | |
| { | |
| "epoch": 0.1942575145805294, | |
| "grad_norm": 2.0122490785681717, | |
| "learning_rate": 9.309394935555042e-06, | |
| "loss": 0.33, | |
| "step": 1299 | |
| }, | |
| { | |
| "epoch": 0.19440705847166143, | |
| "grad_norm": 1.5990433489122537, | |
| "learning_rate": 9.308166288227088e-06, | |
| "loss": 0.4012, | |
| "step": 1300 | |
| }, | |
| { | |
| "epoch": 0.19455660236279348, | |
| "grad_norm": 1.4956175342537672, | |
| "learning_rate": 9.3069366301618e-06, | |
| "loss": 0.5736, | |
| "step": 1301 | |
| }, | |
| { | |
| "epoch": 0.19470614625392552, | |
| "grad_norm": 1.2896015261249874, | |
| "learning_rate": 9.305705961647672e-06, | |
| "loss": 0.1798, | |
| "step": 1302 | |
| }, | |
| { | |
| "epoch": 0.19485569014505758, | |
| "grad_norm": 1.3006798401099697, | |
| "learning_rate": 9.304474282973432e-06, | |
| "loss": 0.3653, | |
| "step": 1303 | |
| }, | |
| { | |
| "epoch": 0.19500523403618963, | |
| "grad_norm": 2.1249741454515054, | |
| "learning_rate": 9.30324159442804e-06, | |
| "loss": 0.7342, | |
| "step": 1304 | |
| }, | |
| { | |
| "epoch": 0.19515477792732167, | |
| "grad_norm": 1.486234854981151, | |
| "learning_rate": 9.302007896300697e-06, | |
| "loss": 0.2874, | |
| "step": 1305 | |
| }, | |
| { | |
| "epoch": 0.19530432181845372, | |
| "grad_norm": 1.486723968689139, | |
| "learning_rate": 9.300773188880843e-06, | |
| "loss": 0.2301, | |
| "step": 1306 | |
| }, | |
| { | |
| "epoch": 0.19545386570958576, | |
| "grad_norm": 1.697087725237096, | |
| "learning_rate": 9.29953747245815e-06, | |
| "loss": 0.3191, | |
| "step": 1307 | |
| }, | |
| { | |
| "epoch": 0.19560340960071781, | |
| "grad_norm": 1.9495679450825656, | |
| "learning_rate": 9.29830074732253e-06, | |
| "loss": 0.3954, | |
| "step": 1308 | |
| }, | |
| { | |
| "epoch": 0.19575295349184985, | |
| "grad_norm": 1.2105140299371033, | |
| "learning_rate": 9.29706301376413e-06, | |
| "loss": 0.2557, | |
| "step": 1309 | |
| }, | |
| { | |
| "epoch": 0.1959024973829819, | |
| "grad_norm": 1.547760007057872, | |
| "learning_rate": 9.295824272073334e-06, | |
| "loss": 0.2865, | |
| "step": 1310 | |
| }, | |
| { | |
| "epoch": 0.19605204127411396, | |
| "grad_norm": 1.6181428409490188, | |
| "learning_rate": 9.294584522540766e-06, | |
| "loss": 0.3332, | |
| "step": 1311 | |
| }, | |
| { | |
| "epoch": 0.196201585165246, | |
| "grad_norm": 1.664852192256293, | |
| "learning_rate": 9.293343765457278e-06, | |
| "loss": 0.3058, | |
| "step": 1312 | |
| }, | |
| { | |
| "epoch": 0.19635112905637805, | |
| "grad_norm": 1.8608018825705885, | |
| "learning_rate": 9.292102001113968e-06, | |
| "loss": 0.3048, | |
| "step": 1313 | |
| }, | |
| { | |
| "epoch": 0.19650067294751009, | |
| "grad_norm": 1.420503009424543, | |
| "learning_rate": 9.290859229802162e-06, | |
| "loss": 0.2283, | |
| "step": 1314 | |
| }, | |
| { | |
| "epoch": 0.19665021683864214, | |
| "grad_norm": 1.996771180524021, | |
| "learning_rate": 9.289615451813428e-06, | |
| "loss": 0.4804, | |
| "step": 1315 | |
| }, | |
| { | |
| "epoch": 0.19679976072977418, | |
| "grad_norm": 1.8295883207210475, | |
| "learning_rate": 9.28837066743957e-06, | |
| "loss": 0.3065, | |
| "step": 1316 | |
| }, | |
| { | |
| "epoch": 0.19694930462090623, | |
| "grad_norm": 1.6508456488855519, | |
| "learning_rate": 9.287124876972625e-06, | |
| "loss": 0.2617, | |
| "step": 1317 | |
| }, | |
| { | |
| "epoch": 0.1970988485120383, | |
| "grad_norm": 1.7646395203323395, | |
| "learning_rate": 9.285878080704866e-06, | |
| "loss": 0.3484, | |
| "step": 1318 | |
| }, | |
| { | |
| "epoch": 0.19724839240317033, | |
| "grad_norm": 1.6976643786387164, | |
| "learning_rate": 9.284630278928805e-06, | |
| "loss": 0.4485, | |
| "step": 1319 | |
| }, | |
| { | |
| "epoch": 0.19739793629430238, | |
| "grad_norm": 1.6867112197107144, | |
| "learning_rate": 9.283381471937188e-06, | |
| "loss": 0.381, | |
| "step": 1320 | |
| }, | |
| { | |
| "epoch": 0.19754748018543442, | |
| "grad_norm": 1.650278888960391, | |
| "learning_rate": 9.282131660022997e-06, | |
| "loss": 0.2289, | |
| "step": 1321 | |
| }, | |
| { | |
| "epoch": 0.19769702407656647, | |
| "grad_norm": 2.0028177208667977, | |
| "learning_rate": 9.28088084347945e-06, | |
| "loss": 0.5132, | |
| "step": 1322 | |
| }, | |
| { | |
| "epoch": 0.19784656796769853, | |
| "grad_norm": 1.030996633782416, | |
| "learning_rate": 9.279629022600002e-06, | |
| "loss": 0.1764, | |
| "step": 1323 | |
| }, | |
| { | |
| "epoch": 0.19799611185883056, | |
| "grad_norm": 1.448960209222983, | |
| "learning_rate": 9.27837619767834e-06, | |
| "loss": 0.2575, | |
| "step": 1324 | |
| }, | |
| { | |
| "epoch": 0.19814565574996262, | |
| "grad_norm": 2.099657510604881, | |
| "learning_rate": 9.27712236900839e-06, | |
| "loss": 0.4117, | |
| "step": 1325 | |
| }, | |
| { | |
| "epoch": 0.19829519964109465, | |
| "grad_norm": 1.1059240433107884, | |
| "learning_rate": 9.27586753688431e-06, | |
| "loss": 0.2064, | |
| "step": 1326 | |
| }, | |
| { | |
| "epoch": 0.19844474353222671, | |
| "grad_norm": 0.9819176984170996, | |
| "learning_rate": 9.274611701600502e-06, | |
| "loss": 0.2357, | |
| "step": 1327 | |
| }, | |
| { | |
| "epoch": 0.19859428742335875, | |
| "grad_norm": 1.5836465571763443, | |
| "learning_rate": 9.273354863451589e-06, | |
| "loss": 0.2478, | |
| "step": 1328 | |
| }, | |
| { | |
| "epoch": 0.1987438313144908, | |
| "grad_norm": 1.8864795778055325, | |
| "learning_rate": 9.272097022732444e-06, | |
| "loss": 0.3705, | |
| "step": 1329 | |
| }, | |
| { | |
| "epoch": 0.19889337520562286, | |
| "grad_norm": 1.5474710953559745, | |
| "learning_rate": 9.270838179738164e-06, | |
| "loss": 0.1888, | |
| "step": 1330 | |
| }, | |
| { | |
| "epoch": 0.1990429190967549, | |
| "grad_norm": 1.6239105754270915, | |
| "learning_rate": 9.269578334764087e-06, | |
| "loss": 0.3698, | |
| "step": 1331 | |
| }, | |
| { | |
| "epoch": 0.19919246298788695, | |
| "grad_norm": 1.033656840947032, | |
| "learning_rate": 9.268317488105787e-06, | |
| "loss": 0.1741, | |
| "step": 1332 | |
| }, | |
| { | |
| "epoch": 0.19934200687901898, | |
| "grad_norm": 1.2644053752133695, | |
| "learning_rate": 9.267055640059068e-06, | |
| "loss": 0.3292, | |
| "step": 1333 | |
| }, | |
| { | |
| "epoch": 0.19949155077015104, | |
| "grad_norm": 2.0362957566742224, | |
| "learning_rate": 9.265792790919972e-06, | |
| "loss": 0.3348, | |
| "step": 1334 | |
| }, | |
| { | |
| "epoch": 0.19964109466128308, | |
| "grad_norm": 1.528100233715919, | |
| "learning_rate": 9.264528940984777e-06, | |
| "loss": 0.2456, | |
| "step": 1335 | |
| }, | |
| { | |
| "epoch": 0.19979063855241513, | |
| "grad_norm": 1.420463259410139, | |
| "learning_rate": 9.263264090549992e-06, | |
| "loss": 0.3396, | |
| "step": 1336 | |
| }, | |
| { | |
| "epoch": 0.1999401824435472, | |
| "grad_norm": 1.7091828149298964, | |
| "learning_rate": 9.261998239912367e-06, | |
| "loss": 0.2596, | |
| "step": 1337 | |
| }, | |
| { | |
| "epoch": 0.20008972633467922, | |
| "grad_norm": 2.259480708170105, | |
| "learning_rate": 9.26073138936888e-06, | |
| "loss": 0.4451, | |
| "step": 1338 | |
| } | |
| ], | |
| "logging_steps": 1.0, | |
| "max_steps": 6687, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 1, | |
| "save_steps": 669, | |
| "total_flos": 83353485963264.0, | |
| "train_batch_size": 2, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |