| { |
| "best_metric": null, |
| "best_model_checkpoint": null, |
| "epoch": 1.0, |
| "eval_steps": 500, |
| "global_step": 1806, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 0.0005537098560354374, |
| "grad_norm": 5.9498114585876465, |
| "learning_rate": 5.524861878453039e-08, |
| "loss": 0.862, |
| "step": 1 |
| }, |
| { |
| "epoch": 0.0011074197120708748, |
| "grad_norm": 5.8652262687683105, |
| "learning_rate": 1.1049723756906078e-07, |
| "loss": 0.8523, |
| "step": 2 |
| }, |
| { |
| "epoch": 0.0016611295681063123, |
| "grad_norm": 5.964611053466797, |
| "learning_rate": 1.6574585635359117e-07, |
| "loss": 0.8463, |
| "step": 3 |
| }, |
| { |
| "epoch": 0.0022148394241417496, |
| "grad_norm": 6.093450546264648, |
| "learning_rate": 2.2099447513812156e-07, |
| "loss": 0.8815, |
| "step": 4 |
| }, |
| { |
| "epoch": 0.0027685492801771874, |
| "grad_norm": 5.786026477813721, |
| "learning_rate": 2.7624309392265196e-07, |
| "loss": 0.8466, |
| "step": 5 |
| }, |
| { |
| "epoch": 0.0033222591362126247, |
| "grad_norm": 6.09655237197876, |
| "learning_rate": 3.3149171270718233e-07, |
| "loss": 0.8834, |
| "step": 6 |
| }, |
| { |
| "epoch": 0.003875968992248062, |
| "grad_norm": 6.1001691818237305, |
| "learning_rate": 3.867403314917127e-07, |
| "loss": 0.8818, |
| "step": 7 |
| }, |
| { |
| "epoch": 0.004429678848283499, |
| "grad_norm": 5.940769672393799, |
| "learning_rate": 4.419889502762431e-07, |
| "loss": 0.8718, |
| "step": 8 |
| }, |
| { |
| "epoch": 0.0049833887043189366, |
| "grad_norm": 6.211287021636963, |
| "learning_rate": 4.972375690607735e-07, |
| "loss": 0.9087, |
| "step": 9 |
| }, |
| { |
| "epoch": 0.005537098560354375, |
| "grad_norm": 5.576071262359619, |
| "learning_rate": 5.524861878453039e-07, |
| "loss": 0.8313, |
| "step": 10 |
| }, |
| { |
| "epoch": 0.006090808416389812, |
| "grad_norm": 5.358067512512207, |
| "learning_rate": 6.077348066298343e-07, |
| "loss": 0.8397, |
| "step": 11 |
| }, |
| { |
| "epoch": 0.006644518272425249, |
| "grad_norm": 5.2589569091796875, |
| "learning_rate": 6.629834254143647e-07, |
| "loss": 0.8471, |
| "step": 12 |
| }, |
| { |
| "epoch": 0.007198228128460687, |
| "grad_norm": 5.519755840301514, |
| "learning_rate": 7.18232044198895e-07, |
| "loss": 0.8814, |
| "step": 13 |
| }, |
| { |
| "epoch": 0.007751937984496124, |
| "grad_norm": 4.506252765655518, |
| "learning_rate": 7.734806629834254e-07, |
| "loss": 0.8166, |
| "step": 14 |
| }, |
| { |
| "epoch": 0.008305647840531562, |
| "grad_norm": 4.339894771575928, |
| "learning_rate": 8.287292817679559e-07, |
| "loss": 0.8434, |
| "step": 15 |
| }, |
| { |
| "epoch": 0.008859357696566999, |
| "grad_norm": 4.21922492980957, |
| "learning_rate": 8.839779005524863e-07, |
| "loss": 0.8234, |
| "step": 16 |
| }, |
| { |
| "epoch": 0.009413067552602437, |
| "grad_norm": 4.036139011383057, |
| "learning_rate": 9.392265193370166e-07, |
| "loss": 0.8213, |
| "step": 17 |
| }, |
| { |
| "epoch": 0.009966777408637873, |
| "grad_norm": 3.8818323612213135, |
| "learning_rate": 9.94475138121547e-07, |
| "loss": 0.8183, |
| "step": 18 |
| }, |
| { |
| "epoch": 0.010520487264673311, |
| "grad_norm": 2.2787609100341797, |
| "learning_rate": 1.0497237569060774e-06, |
| "loss": 0.8084, |
| "step": 19 |
| }, |
| { |
| "epoch": 0.01107419712070875, |
| "grad_norm": 2.223111867904663, |
| "learning_rate": 1.1049723756906078e-06, |
| "loss": 0.7873, |
| "step": 20 |
| }, |
| { |
| "epoch": 0.011627906976744186, |
| "grad_norm": 2.168884515762329, |
| "learning_rate": 1.160220994475138e-06, |
| "loss": 0.7801, |
| "step": 21 |
| }, |
| { |
| "epoch": 0.012181616832779624, |
| "grad_norm": 1.910096287727356, |
| "learning_rate": 1.2154696132596686e-06, |
| "loss": 0.7352, |
| "step": 22 |
| }, |
| { |
| "epoch": 0.01273532668881506, |
| "grad_norm": 1.8659913539886475, |
| "learning_rate": 1.270718232044199e-06, |
| "loss": 0.7379, |
| "step": 23 |
| }, |
| { |
| "epoch": 0.013289036544850499, |
| "grad_norm": 1.8087621927261353, |
| "learning_rate": 1.3259668508287293e-06, |
| "loss": 0.752, |
| "step": 24 |
| }, |
| { |
| "epoch": 0.013842746400885935, |
| "grad_norm": 1.5981807708740234, |
| "learning_rate": 1.3812154696132598e-06, |
| "loss": 0.731, |
| "step": 25 |
| }, |
| { |
| "epoch": 0.014396456256921373, |
| "grad_norm": 2.118313789367676, |
| "learning_rate": 1.43646408839779e-06, |
| "loss": 0.69, |
| "step": 26 |
| }, |
| { |
| "epoch": 0.014950166112956811, |
| "grad_norm": 3.023369789123535, |
| "learning_rate": 1.4917127071823205e-06, |
| "loss": 0.7747, |
| "step": 27 |
| }, |
| { |
| "epoch": 0.015503875968992248, |
| "grad_norm": 2.6562323570251465, |
| "learning_rate": 1.5469613259668508e-06, |
| "loss": 0.7159, |
| "step": 28 |
| }, |
| { |
| "epoch": 0.016057585825027684, |
| "grad_norm": 2.647136926651001, |
| "learning_rate": 1.6022099447513815e-06, |
| "loss": 0.7343, |
| "step": 29 |
| }, |
| { |
| "epoch": 0.016611295681063124, |
| "grad_norm": 2.517387866973877, |
| "learning_rate": 1.6574585635359118e-06, |
| "loss": 0.7349, |
| "step": 30 |
| }, |
| { |
| "epoch": 0.01716500553709856, |
| "grad_norm": 2.214327335357666, |
| "learning_rate": 1.7127071823204422e-06, |
| "loss": 0.6861, |
| "step": 31 |
| }, |
| { |
| "epoch": 0.017718715393133997, |
| "grad_norm": 2.009155511856079, |
| "learning_rate": 1.7679558011049725e-06, |
| "loss": 0.7274, |
| "step": 32 |
| }, |
| { |
| "epoch": 0.018272425249169437, |
| "grad_norm": 1.5480848550796509, |
| "learning_rate": 1.823204419889503e-06, |
| "loss": 0.7111, |
| "step": 33 |
| }, |
| { |
| "epoch": 0.018826135105204873, |
| "grad_norm": 1.0842509269714355, |
| "learning_rate": 1.8784530386740332e-06, |
| "loss": 0.7029, |
| "step": 34 |
| }, |
| { |
| "epoch": 0.01937984496124031, |
| "grad_norm": 0.9915339350700378, |
| "learning_rate": 1.933701657458564e-06, |
| "loss": 0.6761, |
| "step": 35 |
| }, |
| { |
| "epoch": 0.019933554817275746, |
| "grad_norm": 1.0703707933425903, |
| "learning_rate": 1.988950276243094e-06, |
| "loss": 0.6833, |
| "step": 36 |
| }, |
| { |
| "epoch": 0.020487264673311186, |
| "grad_norm": 1.0729620456695557, |
| "learning_rate": 2.0441988950276245e-06, |
| "loss": 0.6164, |
| "step": 37 |
| }, |
| { |
| "epoch": 0.021040974529346623, |
| "grad_norm": 1.251114010810852, |
| "learning_rate": 2.0994475138121547e-06, |
| "loss": 0.6802, |
| "step": 38 |
| }, |
| { |
| "epoch": 0.02159468438538206, |
| "grad_norm": 0.9531381726264954, |
| "learning_rate": 2.1546961325966854e-06, |
| "loss": 0.6638, |
| "step": 39 |
| }, |
| { |
| "epoch": 0.0221483942414175, |
| "grad_norm": 0.8726658225059509, |
| "learning_rate": 2.2099447513812157e-06, |
| "loss": 0.641, |
| "step": 40 |
| }, |
| { |
| "epoch": 0.022702104097452935, |
| "grad_norm": 0.8520472049713135, |
| "learning_rate": 2.265193370165746e-06, |
| "loss": 0.663, |
| "step": 41 |
| }, |
| { |
| "epoch": 0.023255813953488372, |
| "grad_norm": 0.9407281279563904, |
| "learning_rate": 2.320441988950276e-06, |
| "loss": 0.6627, |
| "step": 42 |
| }, |
| { |
| "epoch": 0.023809523809523808, |
| "grad_norm": 0.7615970373153687, |
| "learning_rate": 2.375690607734807e-06, |
| "loss": 0.6401, |
| "step": 43 |
| }, |
| { |
| "epoch": 0.024363233665559248, |
| "grad_norm": 0.7133028507232666, |
| "learning_rate": 2.430939226519337e-06, |
| "loss": 0.6575, |
| "step": 44 |
| }, |
| { |
| "epoch": 0.024916943521594685, |
| "grad_norm": 0.7690572142601013, |
| "learning_rate": 2.486187845303868e-06, |
| "loss": 0.6733, |
| "step": 45 |
| }, |
| { |
| "epoch": 0.02547065337763012, |
| "grad_norm": 0.705028235912323, |
| "learning_rate": 2.541436464088398e-06, |
| "loss": 0.6162, |
| "step": 46 |
| }, |
| { |
| "epoch": 0.02602436323366556, |
| "grad_norm": 0.7950240969657898, |
| "learning_rate": 2.5966850828729284e-06, |
| "loss": 0.656, |
| "step": 47 |
| }, |
| { |
| "epoch": 0.026578073089700997, |
| "grad_norm": 0.7641728520393372, |
| "learning_rate": 2.6519337016574586e-06, |
| "loss": 0.6353, |
| "step": 48 |
| }, |
| { |
| "epoch": 0.027131782945736434, |
| "grad_norm": 0.7284824252128601, |
| "learning_rate": 2.707182320441989e-06, |
| "loss": 0.6285, |
| "step": 49 |
| }, |
| { |
| "epoch": 0.02768549280177187, |
| "grad_norm": 0.5608909726142883, |
| "learning_rate": 2.7624309392265196e-06, |
| "loss": 0.6057, |
| "step": 50 |
| }, |
| { |
| "epoch": 0.02823920265780731, |
| "grad_norm": 0.5988432168960571, |
| "learning_rate": 2.81767955801105e-06, |
| "loss": 0.6032, |
| "step": 51 |
| }, |
| { |
| "epoch": 0.028792912513842746, |
| "grad_norm": 0.7558192610740662, |
| "learning_rate": 2.87292817679558e-06, |
| "loss": 0.5942, |
| "step": 52 |
| }, |
| { |
| "epoch": 0.029346622369878183, |
| "grad_norm": 0.7168074250221252, |
| "learning_rate": 2.9281767955801104e-06, |
| "loss": 0.5914, |
| "step": 53 |
| }, |
| { |
| "epoch": 0.029900332225913623, |
| "grad_norm": 0.7384585738182068, |
| "learning_rate": 2.983425414364641e-06, |
| "loss": 0.6502, |
| "step": 54 |
| }, |
| { |
| "epoch": 0.03045404208194906, |
| "grad_norm": 0.5286744832992554, |
| "learning_rate": 3.0386740331491713e-06, |
| "loss": 0.5973, |
| "step": 55 |
| }, |
| { |
| "epoch": 0.031007751937984496, |
| "grad_norm": 0.6040074229240417, |
| "learning_rate": 3.0939226519337016e-06, |
| "loss": 0.6212, |
| "step": 56 |
| }, |
| { |
| "epoch": 0.03156146179401993, |
| "grad_norm": 0.6607239842414856, |
| "learning_rate": 3.149171270718232e-06, |
| "loss": 0.5962, |
| "step": 57 |
| }, |
| { |
| "epoch": 0.03211517165005537, |
| "grad_norm": 0.5292708873748779, |
| "learning_rate": 3.204419889502763e-06, |
| "loss": 0.5785, |
| "step": 58 |
| }, |
| { |
| "epoch": 0.032668881506090805, |
| "grad_norm": 0.5248478055000305, |
| "learning_rate": 3.2596685082872933e-06, |
| "loss": 0.643, |
| "step": 59 |
| }, |
| { |
| "epoch": 0.03322259136212625, |
| "grad_norm": 0.5045427680015564, |
| "learning_rate": 3.3149171270718235e-06, |
| "loss": 0.5906, |
| "step": 60 |
| }, |
| { |
| "epoch": 0.033776301218161685, |
| "grad_norm": 0.5321183800697327, |
| "learning_rate": 3.370165745856354e-06, |
| "loss": 0.5901, |
| "step": 61 |
| }, |
| { |
| "epoch": 0.03433001107419712, |
| "grad_norm": 0.5732381939888, |
| "learning_rate": 3.4254143646408845e-06, |
| "loss": 0.6214, |
| "step": 62 |
| }, |
| { |
| "epoch": 0.03488372093023256, |
| "grad_norm": 0.4849403202533722, |
| "learning_rate": 3.4806629834254147e-06, |
| "loss": 0.5658, |
| "step": 63 |
| }, |
| { |
| "epoch": 0.035437430786267994, |
| "grad_norm": 0.5135958790779114, |
| "learning_rate": 3.535911602209945e-06, |
| "loss": 0.5828, |
| "step": 64 |
| }, |
| { |
| "epoch": 0.03599114064230343, |
| "grad_norm": 0.5140406489372253, |
| "learning_rate": 3.5911602209944757e-06, |
| "loss": 0.5816, |
| "step": 65 |
| }, |
| { |
| "epoch": 0.036544850498338874, |
| "grad_norm": 0.47636738419532776, |
| "learning_rate": 3.646408839779006e-06, |
| "loss": 0.5968, |
| "step": 66 |
| }, |
| { |
| "epoch": 0.03709856035437431, |
| "grad_norm": 0.44486942887306213, |
| "learning_rate": 3.7016574585635362e-06, |
| "loss": 0.573, |
| "step": 67 |
| }, |
| { |
| "epoch": 0.03765227021040975, |
| "grad_norm": 0.45608875155448914, |
| "learning_rate": 3.7569060773480665e-06, |
| "loss": 0.5612, |
| "step": 68 |
| }, |
| { |
| "epoch": 0.03820598006644518, |
| "grad_norm": 0.4425041377544403, |
| "learning_rate": 3.812154696132597e-06, |
| "loss": 0.5806, |
| "step": 69 |
| }, |
| { |
| "epoch": 0.03875968992248062, |
| "grad_norm": 0.5609722137451172, |
| "learning_rate": 3.867403314917128e-06, |
| "loss": 0.5642, |
| "step": 70 |
| }, |
| { |
| "epoch": 0.039313399778516056, |
| "grad_norm": 0.4668285548686981, |
| "learning_rate": 3.922651933701658e-06, |
| "loss": 0.5704, |
| "step": 71 |
| }, |
| { |
| "epoch": 0.03986710963455149, |
| "grad_norm": 0.4177052676677704, |
| "learning_rate": 3.977900552486188e-06, |
| "loss": 0.5787, |
| "step": 72 |
| }, |
| { |
| "epoch": 0.040420819490586936, |
| "grad_norm": 0.4882914423942566, |
| "learning_rate": 4.033149171270719e-06, |
| "loss": 0.5835, |
| "step": 73 |
| }, |
| { |
| "epoch": 0.04097452934662237, |
| "grad_norm": 0.44587841629981995, |
| "learning_rate": 4.088397790055249e-06, |
| "loss": 0.561, |
| "step": 74 |
| }, |
| { |
| "epoch": 0.04152823920265781, |
| "grad_norm": 0.46285197138786316, |
| "learning_rate": 4.143646408839779e-06, |
| "loss": 0.5867, |
| "step": 75 |
| }, |
| { |
| "epoch": 0.042081949058693245, |
| "grad_norm": 0.44081801176071167, |
| "learning_rate": 4.1988950276243095e-06, |
| "loss": 0.5788, |
| "step": 76 |
| }, |
| { |
| "epoch": 0.04263565891472868, |
| "grad_norm": 0.4478652775287628, |
| "learning_rate": 4.2541436464088406e-06, |
| "loss": 0.5631, |
| "step": 77 |
| }, |
| { |
| "epoch": 0.04318936877076412, |
| "grad_norm": 0.4420766830444336, |
| "learning_rate": 4.309392265193371e-06, |
| "loss": 0.5746, |
| "step": 78 |
| }, |
| { |
| "epoch": 0.043743078626799554, |
| "grad_norm": 0.440927654504776, |
| "learning_rate": 4.364640883977901e-06, |
| "loss": 0.5907, |
| "step": 79 |
| }, |
| { |
| "epoch": 0.044296788482835, |
| "grad_norm": 0.4461442828178406, |
| "learning_rate": 4.419889502762431e-06, |
| "loss": 0.5495, |
| "step": 80 |
| }, |
| { |
| "epoch": 0.044850498338870434, |
| "grad_norm": 0.477222740650177, |
| "learning_rate": 4.475138121546962e-06, |
| "loss": 0.6103, |
| "step": 81 |
| }, |
| { |
| "epoch": 0.04540420819490587, |
| "grad_norm": 0.4078069031238556, |
| "learning_rate": 4.530386740331492e-06, |
| "loss": 0.5715, |
| "step": 82 |
| }, |
| { |
| "epoch": 0.04595791805094131, |
| "grad_norm": 0.4012727737426758, |
| "learning_rate": 4.585635359116022e-06, |
| "loss": 0.5388, |
| "step": 83 |
| }, |
| { |
| "epoch": 0.046511627906976744, |
| "grad_norm": 0.41609224677085876, |
| "learning_rate": 4.640883977900552e-06, |
| "loss": 0.5713, |
| "step": 84 |
| }, |
| { |
| "epoch": 0.04706533776301218, |
| "grad_norm": 0.45814239978790283, |
| "learning_rate": 4.6961325966850835e-06, |
| "loss": 0.6032, |
| "step": 85 |
| }, |
| { |
| "epoch": 0.047619047619047616, |
| "grad_norm": 0.4698704183101654, |
| "learning_rate": 4.751381215469614e-06, |
| "loss": 0.5671, |
| "step": 86 |
| }, |
| { |
| "epoch": 0.04817275747508306, |
| "grad_norm": 0.4096158742904663, |
| "learning_rate": 4.806629834254144e-06, |
| "loss": 0.5474, |
| "step": 87 |
| }, |
| { |
| "epoch": 0.048726467331118496, |
| "grad_norm": 0.4519617557525635, |
| "learning_rate": 4.861878453038674e-06, |
| "loss": 0.5641, |
| "step": 88 |
| }, |
| { |
| "epoch": 0.04928017718715393, |
| "grad_norm": 0.3691461980342865, |
| "learning_rate": 4.9171270718232054e-06, |
| "loss": 0.5661, |
| "step": 89 |
| }, |
| { |
| "epoch": 0.04983388704318937, |
| "grad_norm": 0.417376309633255, |
| "learning_rate": 4.972375690607736e-06, |
| "loss": 0.5554, |
| "step": 90 |
| }, |
| { |
| "epoch": 0.050387596899224806, |
| "grad_norm": 0.42060375213623047, |
| "learning_rate": 5.027624309392266e-06, |
| "loss": 0.5667, |
| "step": 91 |
| }, |
| { |
| "epoch": 0.05094130675526024, |
| "grad_norm": 0.41582152247428894, |
| "learning_rate": 5.082872928176796e-06, |
| "loss": 0.5524, |
| "step": 92 |
| }, |
| { |
| "epoch": 0.05149501661129568, |
| "grad_norm": 0.39075276255607605, |
| "learning_rate": 5.1381215469613265e-06, |
| "loss": 0.5698, |
| "step": 93 |
| }, |
| { |
| "epoch": 0.05204872646733112, |
| "grad_norm": 0.42373329401016235, |
| "learning_rate": 5.193370165745857e-06, |
| "loss": 0.564, |
| "step": 94 |
| }, |
| { |
| "epoch": 0.05260243632336656, |
| "grad_norm": 0.433353066444397, |
| "learning_rate": 5.248618784530387e-06, |
| "loss": 0.5763, |
| "step": 95 |
| }, |
| { |
| "epoch": 0.053156146179401995, |
| "grad_norm": 0.4021036922931671, |
| "learning_rate": 5.303867403314917e-06, |
| "loss": 0.5425, |
| "step": 96 |
| }, |
| { |
| "epoch": 0.05370985603543743, |
| "grad_norm": 0.3890472948551178, |
| "learning_rate": 5.3591160220994476e-06, |
| "loss": 0.5486, |
| "step": 97 |
| }, |
| { |
| "epoch": 0.05426356589147287, |
| "grad_norm": 0.3637540638446808, |
| "learning_rate": 5.414364640883978e-06, |
| "loss": 0.5249, |
| "step": 98 |
| }, |
| { |
| "epoch": 0.054817275747508304, |
| "grad_norm": 0.40060943365097046, |
| "learning_rate": 5.469613259668509e-06, |
| "loss": 0.582, |
| "step": 99 |
| }, |
| { |
| "epoch": 0.05537098560354374, |
| "grad_norm": 0.41719919443130493, |
| "learning_rate": 5.524861878453039e-06, |
| "loss": 0.5838, |
| "step": 100 |
| }, |
| { |
| "epoch": 0.055924695459579184, |
| "grad_norm": 0.46253830194473267, |
| "learning_rate": 5.5801104972375695e-06, |
| "loss": 0.5752, |
| "step": 101 |
| }, |
| { |
| "epoch": 0.05647840531561462, |
| "grad_norm": 0.4139224588871002, |
| "learning_rate": 5.6353591160221e-06, |
| "loss": 0.5517, |
| "step": 102 |
| }, |
| { |
| "epoch": 0.05703211517165006, |
| "grad_norm": 0.5012995004653931, |
| "learning_rate": 5.69060773480663e-06, |
| "loss": 0.5253, |
| "step": 103 |
| }, |
| { |
| "epoch": 0.05758582502768549, |
| "grad_norm": 0.4264964163303375, |
| "learning_rate": 5.74585635359116e-06, |
| "loss": 0.5499, |
| "step": 104 |
| }, |
| { |
| "epoch": 0.05813953488372093, |
| "grad_norm": 0.4827825725078583, |
| "learning_rate": 5.8011049723756905e-06, |
| "loss": 0.5679, |
| "step": 105 |
| }, |
| { |
| "epoch": 0.058693244739756366, |
| "grad_norm": 0.4288899600505829, |
| "learning_rate": 5.856353591160221e-06, |
| "loss": 0.5534, |
| "step": 106 |
| }, |
| { |
| "epoch": 0.0592469545957918, |
| "grad_norm": 0.5517690181732178, |
| "learning_rate": 5.911602209944752e-06, |
| "loss": 0.5809, |
| "step": 107 |
| }, |
| { |
| "epoch": 0.059800664451827246, |
| "grad_norm": 0.41885989904403687, |
| "learning_rate": 5.966850828729282e-06, |
| "loss": 0.513, |
| "step": 108 |
| }, |
| { |
| "epoch": 0.06035437430786268, |
| "grad_norm": 0.4665631055831909, |
| "learning_rate": 6.0220994475138124e-06, |
| "loss": 0.5555, |
| "step": 109 |
| }, |
| { |
| "epoch": 0.06090808416389812, |
| "grad_norm": 0.46911975741386414, |
| "learning_rate": 6.077348066298343e-06, |
| "loss": 0.5769, |
| "step": 110 |
| }, |
| { |
| "epoch": 0.061461794019933555, |
| "grad_norm": 0.5048715472221375, |
| "learning_rate": 6.132596685082873e-06, |
| "loss": 0.5394, |
| "step": 111 |
| }, |
| { |
| "epoch": 0.06201550387596899, |
| "grad_norm": 0.49173814058303833, |
| "learning_rate": 6.187845303867403e-06, |
| "loss": 0.5256, |
| "step": 112 |
| }, |
| { |
| "epoch": 0.06256921373200443, |
| "grad_norm": 0.43289491534233093, |
| "learning_rate": 6.2430939226519335e-06, |
| "loss": 0.5481, |
| "step": 113 |
| }, |
| { |
| "epoch": 0.06312292358803986, |
| "grad_norm": 0.5766280889511108, |
| "learning_rate": 6.298342541436464e-06, |
| "loss": 0.5448, |
| "step": 114 |
| }, |
| { |
| "epoch": 0.0636766334440753, |
| "grad_norm": 0.4479210674762726, |
| "learning_rate": 6.353591160220996e-06, |
| "loss": 0.5419, |
| "step": 115 |
| }, |
| { |
| "epoch": 0.06423034330011074, |
| "grad_norm": 0.46152201294898987, |
| "learning_rate": 6.408839779005526e-06, |
| "loss": 0.5534, |
| "step": 116 |
| }, |
| { |
| "epoch": 0.06478405315614617, |
| "grad_norm": 0.36882197856903076, |
| "learning_rate": 6.464088397790056e-06, |
| "loss": 0.527, |
| "step": 117 |
| }, |
| { |
| "epoch": 0.06533776301218161, |
| "grad_norm": 0.46747303009033203, |
| "learning_rate": 6.5193370165745865e-06, |
| "loss": 0.5454, |
| "step": 118 |
| }, |
| { |
| "epoch": 0.06589147286821706, |
| "grad_norm": 0.44827598333358765, |
| "learning_rate": 6.574585635359117e-06, |
| "loss": 0.5337, |
| "step": 119 |
| }, |
| { |
| "epoch": 0.0664451827242525, |
| "grad_norm": 0.468741238117218, |
| "learning_rate": 6.629834254143647e-06, |
| "loss": 0.5405, |
| "step": 120 |
| }, |
| { |
| "epoch": 0.06699889258028793, |
| "grad_norm": 0.41223788261413574, |
| "learning_rate": 6.685082872928177e-06, |
| "loss": 0.5283, |
| "step": 121 |
| }, |
| { |
| "epoch": 0.06755260243632337, |
| "grad_norm": 0.49077704548835754, |
| "learning_rate": 6.740331491712708e-06, |
| "loss": 0.5487, |
| "step": 122 |
| }, |
| { |
| "epoch": 0.0681063122923588, |
| "grad_norm": 0.4232911765575409, |
| "learning_rate": 6.795580110497239e-06, |
| "loss": 0.5511, |
| "step": 123 |
| }, |
| { |
| "epoch": 0.06866002214839424, |
| "grad_norm": 0.4715983271598816, |
| "learning_rate": 6.850828729281769e-06, |
| "loss": 0.5556, |
| "step": 124 |
| }, |
| { |
| "epoch": 0.06921373200442968, |
| "grad_norm": 0.4605570435523987, |
| "learning_rate": 6.906077348066299e-06, |
| "loss": 0.5431, |
| "step": 125 |
| }, |
| { |
| "epoch": 0.06976744186046512, |
| "grad_norm": 0.44163256883621216, |
| "learning_rate": 6.9613259668508295e-06, |
| "loss": 0.5496, |
| "step": 126 |
| }, |
| { |
| "epoch": 0.07032115171650055, |
| "grad_norm": 0.4515061676502228, |
| "learning_rate": 7.01657458563536e-06, |
| "loss": 0.5589, |
| "step": 127 |
| }, |
| { |
| "epoch": 0.07087486157253599, |
| "grad_norm": 0.42739376425743103, |
| "learning_rate": 7.07182320441989e-06, |
| "loss": 0.4992, |
| "step": 128 |
| }, |
| { |
| "epoch": 0.07142857142857142, |
| "grad_norm": 0.4851279854774475, |
| "learning_rate": 7.12707182320442e-06, |
| "loss": 0.5672, |
| "step": 129 |
| }, |
| { |
| "epoch": 0.07198228128460686, |
| "grad_norm": 0.4500119984149933, |
| "learning_rate": 7.182320441988951e-06, |
| "loss": 0.5416, |
| "step": 130 |
| }, |
| { |
| "epoch": 0.0725359911406423, |
| "grad_norm": 0.4973263144493103, |
| "learning_rate": 7.237569060773482e-06, |
| "loss": 0.5557, |
| "step": 131 |
| }, |
| { |
| "epoch": 0.07308970099667775, |
| "grad_norm": 0.44461292028427124, |
| "learning_rate": 7.292817679558012e-06, |
| "loss": 0.5227, |
| "step": 132 |
| }, |
| { |
| "epoch": 0.07364341085271318, |
| "grad_norm": 0.47895634174346924, |
| "learning_rate": 7.348066298342542e-06, |
| "loss": 0.5235, |
| "step": 133 |
| }, |
| { |
| "epoch": 0.07419712070874862, |
| "grad_norm": 0.4273644685745239, |
| "learning_rate": 7.4033149171270724e-06, |
| "loss": 0.5454, |
| "step": 134 |
| }, |
| { |
| "epoch": 0.07475083056478406, |
| "grad_norm": 0.4457617998123169, |
| "learning_rate": 7.458563535911603e-06, |
| "loss": 0.5361, |
| "step": 135 |
| }, |
| { |
| "epoch": 0.0753045404208195, |
| "grad_norm": 0.48914194107055664, |
| "learning_rate": 7.513812154696133e-06, |
| "loss": 0.563, |
| "step": 136 |
| }, |
| { |
| "epoch": 0.07585825027685493, |
| "grad_norm": 0.4490724503993988, |
| "learning_rate": 7.569060773480663e-06, |
| "loss": 0.532, |
| "step": 137 |
| }, |
| { |
| "epoch": 0.07641196013289037, |
| "grad_norm": 0.5025115013122559, |
| "learning_rate": 7.624309392265194e-06, |
| "loss": 0.546, |
| "step": 138 |
| }, |
| { |
| "epoch": 0.0769656699889258, |
| "grad_norm": 0.5027139186859131, |
| "learning_rate": 7.679558011049725e-06, |
| "loss": 0.5729, |
| "step": 139 |
| }, |
| { |
| "epoch": 0.07751937984496124, |
| "grad_norm": 0.46010562777519226, |
| "learning_rate": 7.734806629834256e-06, |
| "loss": 0.5299, |
| "step": 140 |
| }, |
| { |
| "epoch": 0.07807308970099668, |
| "grad_norm": 0.46475037932395935, |
| "learning_rate": 7.790055248618785e-06, |
| "loss": 0.5403, |
| "step": 141 |
| }, |
| { |
| "epoch": 0.07862679955703211, |
| "grad_norm": 0.48647457361221313, |
| "learning_rate": 7.845303867403316e-06, |
| "loss": 0.5281, |
| "step": 142 |
| }, |
| { |
| "epoch": 0.07918050941306755, |
| "grad_norm": 0.5236656069755554, |
| "learning_rate": 7.900552486187846e-06, |
| "loss": 0.5555, |
| "step": 143 |
| }, |
| { |
| "epoch": 0.07973421926910298, |
| "grad_norm": 0.5494616031646729, |
| "learning_rate": 7.955801104972377e-06, |
| "loss": 0.5621, |
| "step": 144 |
| }, |
| { |
| "epoch": 0.08028792912513842, |
| "grad_norm": 0.4531148076057434, |
| "learning_rate": 8.011049723756906e-06, |
| "loss": 0.5576, |
| "step": 145 |
| }, |
| { |
| "epoch": 0.08084163898117387, |
| "grad_norm": 0.45845550298690796, |
| "learning_rate": 8.066298342541437e-06, |
| "loss": 0.554, |
| "step": 146 |
| }, |
| { |
| "epoch": 0.08139534883720931, |
| "grad_norm": 0.6133643388748169, |
| "learning_rate": 8.121546961325968e-06, |
| "loss": 0.5496, |
| "step": 147 |
| }, |
| { |
| "epoch": 0.08194905869324474, |
| "grad_norm": 0.43506351113319397, |
| "learning_rate": 8.176795580110498e-06, |
| "loss": 0.548, |
| "step": 148 |
| }, |
| { |
| "epoch": 0.08250276854928018, |
| "grad_norm": 0.4651344120502472, |
| "learning_rate": 8.232044198895029e-06, |
| "loss": 0.527, |
| "step": 149 |
| }, |
| { |
| "epoch": 0.08305647840531562, |
| "grad_norm": 0.43981248140335083, |
| "learning_rate": 8.287292817679558e-06, |
| "loss": 0.5379, |
| "step": 150 |
| }, |
| { |
| "epoch": 0.08361018826135105, |
| "grad_norm": 0.5043104290962219, |
| "learning_rate": 8.34254143646409e-06, |
| "loss": 0.5298, |
| "step": 151 |
| }, |
| { |
| "epoch": 0.08416389811738649, |
| "grad_norm": 0.3996557891368866, |
| "learning_rate": 8.397790055248619e-06, |
| "loss": 0.5308, |
| "step": 152 |
| }, |
| { |
| "epoch": 0.08471760797342193, |
| "grad_norm": 0.5180063843727112, |
| "learning_rate": 8.45303867403315e-06, |
| "loss": 0.5278, |
| "step": 153 |
| }, |
| { |
| "epoch": 0.08527131782945736, |
| "grad_norm": 0.48788321018218994, |
| "learning_rate": 8.508287292817681e-06, |
| "loss": 0.5215, |
| "step": 154 |
| }, |
| { |
| "epoch": 0.0858250276854928, |
| "grad_norm": 0.4473685026168823, |
| "learning_rate": 8.56353591160221e-06, |
| "loss": 0.5346, |
| "step": 155 |
| }, |
| { |
| "epoch": 0.08637873754152824, |
| "grad_norm": 0.5907010436058044, |
| "learning_rate": 8.618784530386742e-06, |
| "loss": 0.5278, |
| "step": 156 |
| }, |
| { |
| "epoch": 0.08693244739756367, |
| "grad_norm": 0.495604932308197, |
| "learning_rate": 8.674033149171271e-06, |
| "loss": 0.5076, |
| "step": 157 |
| }, |
| { |
| "epoch": 0.08748615725359911, |
| "grad_norm": 0.5206358432769775, |
| "learning_rate": 8.729281767955802e-06, |
| "loss": 0.5517, |
| "step": 158 |
| }, |
| { |
| "epoch": 0.08803986710963455, |
| "grad_norm": 0.5317306518554688, |
| "learning_rate": 8.784530386740332e-06, |
| "loss": 0.5221, |
| "step": 159 |
| }, |
| { |
| "epoch": 0.08859357696567, |
| "grad_norm": 0.5278891921043396, |
| "learning_rate": 8.839779005524863e-06, |
| "loss": 0.5531, |
| "step": 160 |
| }, |
| { |
| "epoch": 0.08914728682170543, |
| "grad_norm": 0.4803391695022583, |
| "learning_rate": 8.895027624309392e-06, |
| "loss": 0.5359, |
| "step": 161 |
| }, |
| { |
| "epoch": 0.08970099667774087, |
| "grad_norm": 0.6480128169059753, |
| "learning_rate": 8.950276243093923e-06, |
| "loss": 0.5393, |
| "step": 162 |
| }, |
| { |
| "epoch": 0.0902547065337763, |
| "grad_norm": 0.4067532420158386, |
| "learning_rate": 9.005524861878454e-06, |
| "loss": 0.5344, |
| "step": 163 |
| }, |
| { |
| "epoch": 0.09080841638981174, |
| "grad_norm": 0.4894218444824219, |
| "learning_rate": 9.060773480662984e-06, |
| "loss": 0.5316, |
| "step": 164 |
| }, |
| { |
| "epoch": 0.09136212624584718, |
| "grad_norm": 0.6006916761398315, |
| "learning_rate": 9.116022099447515e-06, |
| "loss": 0.5531, |
| "step": 165 |
| }, |
| { |
| "epoch": 0.09191583610188261, |
| "grad_norm": 0.5255333185195923, |
| "learning_rate": 9.171270718232044e-06, |
| "loss": 0.5253, |
| "step": 166 |
| }, |
| { |
| "epoch": 0.09246954595791805, |
| "grad_norm": 0.5868107676506042, |
| "learning_rate": 9.226519337016575e-06, |
| "loss": 0.5451, |
| "step": 167 |
| }, |
| { |
| "epoch": 0.09302325581395349, |
| "grad_norm": 0.5775595307350159, |
| "learning_rate": 9.281767955801105e-06, |
| "loss": 0.5496, |
| "step": 168 |
| }, |
| { |
| "epoch": 0.09357696566998892, |
| "grad_norm": 0.4332346022129059, |
| "learning_rate": 9.337016574585636e-06, |
| "loss": 0.4993, |
| "step": 169 |
| }, |
| { |
| "epoch": 0.09413067552602436, |
| "grad_norm": 0.6276872158050537, |
| "learning_rate": 9.392265193370167e-06, |
| "loss": 0.5209, |
| "step": 170 |
| }, |
| { |
| "epoch": 0.0946843853820598, |
| "grad_norm": 0.4875026047229767, |
| "learning_rate": 9.447513812154696e-06, |
| "loss": 0.5293, |
| "step": 171 |
| }, |
| { |
| "epoch": 0.09523809523809523, |
| "grad_norm": 0.4791780412197113, |
| "learning_rate": 9.502762430939228e-06, |
| "loss": 0.528, |
| "step": 172 |
| }, |
| { |
| "epoch": 0.09579180509413067, |
| "grad_norm": 0.4861852526664734, |
| "learning_rate": 9.558011049723757e-06, |
| "loss": 0.498, |
| "step": 173 |
| }, |
| { |
| "epoch": 0.09634551495016612, |
| "grad_norm": 0.44678786396980286, |
| "learning_rate": 9.613259668508288e-06, |
| "loss": 0.52, |
| "step": 174 |
| }, |
| { |
| "epoch": 0.09689922480620156, |
| "grad_norm": 0.5340399146080017, |
| "learning_rate": 9.668508287292818e-06, |
| "loss": 0.5291, |
| "step": 175 |
| }, |
| { |
| "epoch": 0.09745293466223699, |
| "grad_norm": 0.5051288604736328, |
| "learning_rate": 9.723756906077349e-06, |
| "loss": 0.5231, |
| "step": 176 |
| }, |
| { |
| "epoch": 0.09800664451827243, |
| "grad_norm": 0.5003914833068848, |
| "learning_rate": 9.779005524861878e-06, |
| "loss": 0.4915, |
| "step": 177 |
| }, |
| { |
| "epoch": 0.09856035437430787, |
| "grad_norm": 0.5183685421943665, |
| "learning_rate": 9.834254143646411e-06, |
| "loss": 0.5206, |
| "step": 178 |
| }, |
| { |
| "epoch": 0.0991140642303433, |
| "grad_norm": 0.5782157778739929, |
| "learning_rate": 9.88950276243094e-06, |
| "loss": 0.5309, |
| "step": 179 |
| }, |
| { |
| "epoch": 0.09966777408637874, |
| "grad_norm": 0.47796350717544556, |
| "learning_rate": 9.944751381215471e-06, |
| "loss": 0.5286, |
| "step": 180 |
| }, |
| { |
| "epoch": 0.10022148394241417, |
| "grad_norm": 0.5073690414428711, |
| "learning_rate": 1e-05, |
| "loss": 0.5304, |
| "step": 181 |
| }, |
| { |
| "epoch": 0.10077519379844961, |
| "grad_norm": 0.49675506353378296, |
| "learning_rate": 9.999990655998744e-06, |
| "loss": 0.5272, |
| "step": 182 |
| }, |
| { |
| "epoch": 0.10132890365448505, |
| "grad_norm": 0.4897266924381256, |
| "learning_rate": 9.9999626240299e-06, |
| "loss": 0.5136, |
| "step": 183 |
| }, |
| { |
| "epoch": 0.10188261351052048, |
| "grad_norm": 0.4757635295391083, |
| "learning_rate": 9.999915904198239e-06, |
| "loss": 0.5275, |
| "step": 184 |
| }, |
| { |
| "epoch": 0.10243632336655592, |
| "grad_norm": 0.5029352307319641, |
| "learning_rate": 9.99985049667838e-06, |
| "loss": 0.4964, |
| "step": 185 |
| }, |
| { |
| "epoch": 0.10299003322259136, |
| "grad_norm": 0.45012396574020386, |
| "learning_rate": 9.999766401714795e-06, |
| "loss": 0.515, |
| "step": 186 |
| }, |
| { |
| "epoch": 0.1035437430786268, |
| "grad_norm": 0.500487744808197, |
| "learning_rate": 9.999663619621793e-06, |
| "loss": 0.5304, |
| "step": 187 |
| }, |
| { |
| "epoch": 0.10409745293466224, |
| "grad_norm": 0.4716833233833313, |
| "learning_rate": 9.999542150783533e-06, |
| "loss": 0.504, |
| "step": 188 |
| }, |
| { |
| "epoch": 0.10465116279069768, |
| "grad_norm": 0.48771926760673523, |
| "learning_rate": 9.999401995654018e-06, |
| "loss": 0.5434, |
| "step": 189 |
| }, |
| { |
| "epoch": 0.10520487264673312, |
| "grad_norm": 0.49197521805763245, |
| "learning_rate": 9.999243154757092e-06, |
| "loss": 0.5254, |
| "step": 190 |
| }, |
| { |
| "epoch": 0.10575858250276855, |
| "grad_norm": 0.443828284740448, |
| "learning_rate": 9.999065628686439e-06, |
| "loss": 0.5076, |
| "step": 191 |
| }, |
| { |
| "epoch": 0.10631229235880399, |
| "grad_norm": 0.44356590509414673, |
| "learning_rate": 9.998869418105578e-06, |
| "loss": 0.5017, |
| "step": 192 |
| }, |
| { |
| "epoch": 0.10686600221483943, |
| "grad_norm": 0.4854886829853058, |
| "learning_rate": 9.99865452374787e-06, |
| "loss": 0.5067, |
| "step": 193 |
| }, |
| { |
| "epoch": 0.10741971207087486, |
| "grad_norm": 0.5027791857719421, |
| "learning_rate": 9.9984209464165e-06, |
| "loss": 0.5234, |
| "step": 194 |
| }, |
| { |
| "epoch": 0.1079734219269103, |
| "grad_norm": 0.49939441680908203, |
| "learning_rate": 9.99816868698449e-06, |
| "loss": 0.5005, |
| "step": 195 |
| }, |
| { |
| "epoch": 0.10852713178294573, |
| "grad_norm": 0.5125188231468201, |
| "learning_rate": 9.997897746394684e-06, |
| "loss": 0.5197, |
| "step": 196 |
| }, |
| { |
| "epoch": 0.10908084163898117, |
| "grad_norm": 0.508100152015686, |
| "learning_rate": 9.99760812565975e-06, |
| "loss": 0.5081, |
| "step": 197 |
| }, |
| { |
| "epoch": 0.10963455149501661, |
| "grad_norm": 0.5313785076141357, |
| "learning_rate": 9.997299825862172e-06, |
| "loss": 0.4863, |
| "step": 198 |
| }, |
| { |
| "epoch": 0.11018826135105204, |
| "grad_norm": 0.47651734948158264, |
| "learning_rate": 9.996972848154254e-06, |
| "loss": 0.504, |
| "step": 199 |
| }, |
| { |
| "epoch": 0.11074197120708748, |
| "grad_norm": 0.5211071968078613, |
| "learning_rate": 9.996627193758108e-06, |
| "loss": 0.5289, |
| "step": 200 |
| }, |
| { |
| "epoch": 0.11129568106312292, |
| "grad_norm": 0.5157010555267334, |
| "learning_rate": 9.996262863965651e-06, |
| "loss": 0.5057, |
| "step": 201 |
| }, |
| { |
| "epoch": 0.11184939091915837, |
| "grad_norm": 0.5023174285888672, |
| "learning_rate": 9.995879860138605e-06, |
| "loss": 0.5255, |
| "step": 202 |
| }, |
| { |
| "epoch": 0.1124031007751938, |
| "grad_norm": 0.5226082801818848, |
| "learning_rate": 9.99547818370848e-06, |
| "loss": 0.5076, |
| "step": 203 |
| }, |
| { |
| "epoch": 0.11295681063122924, |
| "grad_norm": 0.5416733622550964, |
| "learning_rate": 9.995057836176588e-06, |
| "loss": 0.5243, |
| "step": 204 |
| }, |
| { |
| "epoch": 0.11351052048726468, |
| "grad_norm": 0.5339394807815552, |
| "learning_rate": 9.994618819114015e-06, |
| "loss": 0.5288, |
| "step": 205 |
| }, |
| { |
| "epoch": 0.11406423034330011, |
| "grad_norm": 0.5571205615997314, |
| "learning_rate": 9.994161134161635e-06, |
| "loss": 0.5187, |
| "step": 206 |
| }, |
| { |
| "epoch": 0.11461794019933555, |
| "grad_norm": 0.48999401926994324, |
| "learning_rate": 9.99368478303009e-06, |
| "loss": 0.5229, |
| "step": 207 |
| }, |
| { |
| "epoch": 0.11517165005537099, |
| "grad_norm": 0.5341947078704834, |
| "learning_rate": 9.993189767499789e-06, |
| "loss": 0.5136, |
| "step": 208 |
| }, |
| { |
| "epoch": 0.11572535991140642, |
| "grad_norm": 0.43289002776145935, |
| "learning_rate": 9.992676089420903e-06, |
| "loss": 0.5227, |
| "step": 209 |
| }, |
| { |
| "epoch": 0.11627906976744186, |
| "grad_norm": 0.5622230172157288, |
| "learning_rate": 9.99214375071336e-06, |
| "loss": 0.5189, |
| "step": 210 |
| }, |
| { |
| "epoch": 0.1168327796234773, |
| "grad_norm": 0.43735480308532715, |
| "learning_rate": 9.991592753366822e-06, |
| "loss": 0.5103, |
| "step": 211 |
| }, |
| { |
| "epoch": 0.11738648947951273, |
| "grad_norm": 0.49578508734703064, |
| "learning_rate": 9.991023099440702e-06, |
| "loss": 0.5234, |
| "step": 212 |
| }, |
| { |
| "epoch": 0.11794019933554817, |
| "grad_norm": 0.5016103982925415, |
| "learning_rate": 9.990434791064137e-06, |
| "loss": 0.5143, |
| "step": 213 |
| }, |
| { |
| "epoch": 0.1184939091915836, |
| "grad_norm": 0.46542438864707947, |
| "learning_rate": 9.98982783043599e-06, |
| "loss": 0.5073, |
| "step": 214 |
| }, |
| { |
| "epoch": 0.11904761904761904, |
| "grad_norm": 0.47478312253952026, |
| "learning_rate": 9.989202219824834e-06, |
| "loss": 0.5102, |
| "step": 215 |
| }, |
| { |
| "epoch": 0.11960132890365449, |
| "grad_norm": 0.48327192664146423, |
| "learning_rate": 9.988557961568956e-06, |
| "loss": 0.4817, |
| "step": 216 |
| }, |
| { |
| "epoch": 0.12015503875968993, |
| "grad_norm": 0.4620007574558258, |
| "learning_rate": 9.987895058076334e-06, |
| "loss": 0.5077, |
| "step": 217 |
| }, |
| { |
| "epoch": 0.12070874861572536, |
| "grad_norm": 0.4638729393482208, |
| "learning_rate": 9.987213511824634e-06, |
| "loss": 0.5415, |
| "step": 218 |
| }, |
| { |
| "epoch": 0.1212624584717608, |
| "grad_norm": 0.4861536920070648, |
| "learning_rate": 9.986513325361209e-06, |
| "loss": 0.4944, |
| "step": 219 |
| }, |
| { |
| "epoch": 0.12181616832779624, |
| "grad_norm": 0.4344837963581085, |
| "learning_rate": 9.98579450130307e-06, |
| "loss": 0.4911, |
| "step": 220 |
| }, |
| { |
| "epoch": 0.12236987818383167, |
| "grad_norm": 0.46759846806526184, |
| "learning_rate": 9.985057042336898e-06, |
| "loss": 0.5114, |
| "step": 221 |
| }, |
| { |
| "epoch": 0.12292358803986711, |
| "grad_norm": 0.5901568531990051, |
| "learning_rate": 9.984300951219022e-06, |
| "loss": 0.5624, |
| "step": 222 |
| }, |
| { |
| "epoch": 0.12347729789590255, |
| "grad_norm": 0.45916667580604553, |
| "learning_rate": 9.983526230775405e-06, |
| "loss": 0.508, |
| "step": 223 |
| }, |
| { |
| "epoch": 0.12403100775193798, |
| "grad_norm": 0.45065367221832275, |
| "learning_rate": 9.982732883901641e-06, |
| "loss": 0.5452, |
| "step": 224 |
| }, |
| { |
| "epoch": 0.12458471760797342, |
| "grad_norm": 0.4651075601577759, |
| "learning_rate": 9.981920913562948e-06, |
| "loss": 0.5167, |
| "step": 225 |
| }, |
| { |
| "epoch": 0.12513842746400886, |
| "grad_norm": 0.4300501346588135, |
| "learning_rate": 9.981090322794145e-06, |
| "loss": 0.5086, |
| "step": 226 |
| }, |
| { |
| "epoch": 0.1256921373200443, |
| "grad_norm": 0.5401392579078674, |
| "learning_rate": 9.980241114699647e-06, |
| "loss": 0.5161, |
| "step": 227 |
| }, |
| { |
| "epoch": 0.12624584717607973, |
| "grad_norm": 0.5935031771659851, |
| "learning_rate": 9.979373292453457e-06, |
| "loss": 0.5069, |
| "step": 228 |
| }, |
| { |
| "epoch": 0.12679955703211518, |
| "grad_norm": 0.45174965262413025, |
| "learning_rate": 9.978486859299146e-06, |
| "loss": 0.4954, |
| "step": 229 |
| }, |
| { |
| "epoch": 0.1273532668881506, |
| "grad_norm": 0.535507082939148, |
| "learning_rate": 9.977581818549849e-06, |
| "loss": 0.5104, |
| "step": 230 |
| }, |
| { |
| "epoch": 0.12790697674418605, |
| "grad_norm": 0.5454712510108948, |
| "learning_rate": 9.976658173588244e-06, |
| "loss": 0.5423, |
| "step": 231 |
| }, |
| { |
| "epoch": 0.12846068660022147, |
| "grad_norm": 0.5051414370536804, |
| "learning_rate": 9.97571592786655e-06, |
| "loss": 0.5128, |
| "step": 232 |
| }, |
| { |
| "epoch": 0.12901439645625692, |
| "grad_norm": 0.449792742729187, |
| "learning_rate": 9.974755084906503e-06, |
| "loss": 0.5228, |
| "step": 233 |
| }, |
| { |
| "epoch": 0.12956810631229235, |
| "grad_norm": 0.44292494654655457, |
| "learning_rate": 9.973775648299349e-06, |
| "loss": 0.5058, |
| "step": 234 |
| }, |
| { |
| "epoch": 0.1301218161683278, |
| "grad_norm": 0.4995569884777069, |
| "learning_rate": 9.972777621705833e-06, |
| "loss": 0.5044, |
| "step": 235 |
| }, |
| { |
| "epoch": 0.13067552602436322, |
| "grad_norm": 0.46285441517829895, |
| "learning_rate": 9.97176100885618e-06, |
| "loss": 0.5111, |
| "step": 236 |
| }, |
| { |
| "epoch": 0.13122923588039867, |
| "grad_norm": 0.5752739906311035, |
| "learning_rate": 9.970725813550081e-06, |
| "loss": 0.5184, |
| "step": 237 |
| }, |
| { |
| "epoch": 0.13178294573643412, |
| "grad_norm": 0.5005012154579163, |
| "learning_rate": 9.969672039656684e-06, |
| "loss": 0.5095, |
| "step": 238 |
| }, |
| { |
| "epoch": 0.13233665559246954, |
| "grad_norm": 0.5429736375808716, |
| "learning_rate": 9.968599691114573e-06, |
| "loss": 0.4957, |
| "step": 239 |
| }, |
| { |
| "epoch": 0.132890365448505, |
| "grad_norm": 0.4731188118457794, |
| "learning_rate": 9.967508771931761e-06, |
| "loss": 0.5231, |
| "step": 240 |
| }, |
| { |
| "epoch": 0.13344407530454042, |
| "grad_norm": 0.4413149356842041, |
| "learning_rate": 9.966399286185666e-06, |
| "loss": 0.4852, |
| "step": 241 |
| }, |
| { |
| "epoch": 0.13399778516057587, |
| "grad_norm": 0.5604010224342346, |
| "learning_rate": 9.965271238023102e-06, |
| "loss": 0.4953, |
| "step": 242 |
| }, |
| { |
| "epoch": 0.1345514950166113, |
| "grad_norm": 0.4779647886753082, |
| "learning_rate": 9.964124631660266e-06, |
| "loss": 0.4806, |
| "step": 243 |
| }, |
| { |
| "epoch": 0.13510520487264674, |
| "grad_norm": 0.4577624201774597, |
| "learning_rate": 9.96295947138271e-06, |
| "loss": 0.5214, |
| "step": 244 |
| }, |
| { |
| "epoch": 0.13565891472868216, |
| "grad_norm": 0.5134978890419006, |
| "learning_rate": 9.96177576154534e-06, |
| "loss": 0.5123, |
| "step": 245 |
| }, |
| { |
| "epoch": 0.1362126245847176, |
| "grad_norm": 0.4479624032974243, |
| "learning_rate": 9.960573506572391e-06, |
| "loss": 0.4983, |
| "step": 246 |
| }, |
| { |
| "epoch": 0.13676633444075303, |
| "grad_norm": 0.4445071518421173, |
| "learning_rate": 9.959352710957408e-06, |
| "loss": 0.4886, |
| "step": 247 |
| }, |
| { |
| "epoch": 0.13732004429678848, |
| "grad_norm": 0.4781419634819031, |
| "learning_rate": 9.958113379263243e-06, |
| "loss": 0.5194, |
| "step": 248 |
| }, |
| { |
| "epoch": 0.1378737541528239, |
| "grad_norm": 0.46459653973579407, |
| "learning_rate": 9.95685551612202e-06, |
| "loss": 0.5139, |
| "step": 249 |
| }, |
| { |
| "epoch": 0.13842746400885936, |
| "grad_norm": 0.491256445646286, |
| "learning_rate": 9.955579126235129e-06, |
| "loss": 0.518, |
| "step": 250 |
| }, |
| { |
| "epoch": 0.1389811738648948, |
| "grad_norm": 0.5470618009567261, |
| "learning_rate": 9.954284214373204e-06, |
| "loss": 0.5461, |
| "step": 251 |
| }, |
| { |
| "epoch": 0.13953488372093023, |
| "grad_norm": 0.5898417830467224, |
| "learning_rate": 9.95297078537611e-06, |
| "loss": 0.523, |
| "step": 252 |
| }, |
| { |
| "epoch": 0.14008859357696568, |
| "grad_norm": 0.48347923159599304, |
| "learning_rate": 9.95163884415292e-06, |
| "loss": 0.51, |
| "step": 253 |
| }, |
| { |
| "epoch": 0.1406423034330011, |
| "grad_norm": 0.5616808533668518, |
| "learning_rate": 9.950288395681898e-06, |
| "loss": 0.5021, |
| "step": 254 |
| }, |
| { |
| "epoch": 0.14119601328903655, |
| "grad_norm": 0.6873836517333984, |
| "learning_rate": 9.94891944501048e-06, |
| "loss": 0.5247, |
| "step": 255 |
| }, |
| { |
| "epoch": 0.14174972314507198, |
| "grad_norm": 0.6375631093978882, |
| "learning_rate": 9.947531997255256e-06, |
| "loss": 0.5126, |
| "step": 256 |
| }, |
| { |
| "epoch": 0.14230343300110743, |
| "grad_norm": 0.6284964084625244, |
| "learning_rate": 9.946126057601954e-06, |
| "loss": 0.514, |
| "step": 257 |
| }, |
| { |
| "epoch": 0.14285714285714285, |
| "grad_norm": 0.6849797368049622, |
| "learning_rate": 9.944701631305413e-06, |
| "loss": 0.5186, |
| "step": 258 |
| }, |
| { |
| "epoch": 0.1434108527131783, |
| "grad_norm": 0.5404208898544312, |
| "learning_rate": 9.94325872368957e-06, |
| "loss": 0.5004, |
| "step": 259 |
| }, |
| { |
| "epoch": 0.14396456256921372, |
| "grad_norm": 0.6033118367195129, |
| "learning_rate": 9.941797340147439e-06, |
| "loss": 0.5213, |
| "step": 260 |
| }, |
| { |
| "epoch": 0.14451827242524917, |
| "grad_norm": 0.5408035516738892, |
| "learning_rate": 9.940317486141084e-06, |
| "loss": 0.545, |
| "step": 261 |
| }, |
| { |
| "epoch": 0.1450719822812846, |
| "grad_norm": 0.5264489650726318, |
| "learning_rate": 9.93881916720161e-06, |
| "loss": 0.5101, |
| "step": 262 |
| }, |
| { |
| "epoch": 0.14562569213732005, |
| "grad_norm": 0.5625776648521423, |
| "learning_rate": 9.937302388929136e-06, |
| "loss": 0.5039, |
| "step": 263 |
| }, |
| { |
| "epoch": 0.1461794019933555, |
| "grad_norm": 0.5865488052368164, |
| "learning_rate": 9.93576715699277e-06, |
| "loss": 0.5297, |
| "step": 264 |
| }, |
| { |
| "epoch": 0.14673311184939092, |
| "grad_norm": 0.5614110231399536, |
| "learning_rate": 9.934213477130599e-06, |
| "loss": 0.4956, |
| "step": 265 |
| }, |
| { |
| "epoch": 0.14728682170542637, |
| "grad_norm": 0.5261831879615784, |
| "learning_rate": 9.932641355149655e-06, |
| "loss": 0.5429, |
| "step": 266 |
| }, |
| { |
| "epoch": 0.1478405315614618, |
| "grad_norm": 0.7056304812431335, |
| "learning_rate": 9.931050796925906e-06, |
| "loss": 0.5356, |
| "step": 267 |
| }, |
| { |
| "epoch": 0.14839424141749724, |
| "grad_norm": 0.5166594386100769, |
| "learning_rate": 9.929441808404217e-06, |
| "loss": 0.5076, |
| "step": 268 |
| }, |
| { |
| "epoch": 0.14894795127353266, |
| "grad_norm": 0.5735101103782654, |
| "learning_rate": 9.927814395598349e-06, |
| "loss": 0.5305, |
| "step": 269 |
| }, |
| { |
| "epoch": 0.14950166112956811, |
| "grad_norm": 0.5775477886199951, |
| "learning_rate": 9.92616856459092e-06, |
| "loss": 0.5089, |
| "step": 270 |
| }, |
| { |
| "epoch": 0.15005537098560354, |
| "grad_norm": 0.48364558815956116, |
| "learning_rate": 9.924504321533387e-06, |
| "loss": 0.484, |
| "step": 271 |
| }, |
| { |
| "epoch": 0.150609080841639, |
| "grad_norm": 0.5991694331169128, |
| "learning_rate": 9.922821672646028e-06, |
| "loss": 0.5232, |
| "step": 272 |
| }, |
| { |
| "epoch": 0.1511627906976744, |
| "grad_norm": 0.5030311942100525, |
| "learning_rate": 9.92112062421791e-06, |
| "loss": 0.5018, |
| "step": 273 |
| }, |
| { |
| "epoch": 0.15171650055370986, |
| "grad_norm": 0.5942660570144653, |
| "learning_rate": 9.919401182606876e-06, |
| "loss": 0.501, |
| "step": 274 |
| }, |
| { |
| "epoch": 0.15227021040974528, |
| "grad_norm": 0.4828101396560669, |
| "learning_rate": 9.917663354239508e-06, |
| "loss": 0.501, |
| "step": 275 |
| }, |
| { |
| "epoch": 0.15282392026578073, |
| "grad_norm": 0.6254435181617737, |
| "learning_rate": 9.915907145611117e-06, |
| "loss": 0.5147, |
| "step": 276 |
| }, |
| { |
| "epoch": 0.15337763012181616, |
| "grad_norm": 0.510230541229248, |
| "learning_rate": 9.914132563285707e-06, |
| "loss": 0.5085, |
| "step": 277 |
| }, |
| { |
| "epoch": 0.1539313399778516, |
| "grad_norm": 0.5653350949287415, |
| "learning_rate": 9.91233961389596e-06, |
| "loss": 0.5024, |
| "step": 278 |
| }, |
| { |
| "epoch": 0.15448504983388706, |
| "grad_norm": 0.5247920751571655, |
| "learning_rate": 9.910528304143203e-06, |
| "loss": 0.4716, |
| "step": 279 |
| }, |
| { |
| "epoch": 0.15503875968992248, |
| "grad_norm": 0.624584972858429, |
| "learning_rate": 9.908698640797389e-06, |
| "loss": 0.5145, |
| "step": 280 |
| }, |
| { |
| "epoch": 0.15559246954595793, |
| "grad_norm": 0.521739661693573, |
| "learning_rate": 9.906850630697068e-06, |
| "loss": 0.5145, |
| "step": 281 |
| }, |
| { |
| "epoch": 0.15614617940199335, |
| "grad_norm": 0.5611006617546082, |
| "learning_rate": 9.904984280749364e-06, |
| "loss": 0.4909, |
| "step": 282 |
| }, |
| { |
| "epoch": 0.1566998892580288, |
| "grad_norm": 0.5463786721229553, |
| "learning_rate": 9.903099597929947e-06, |
| "loss": 0.5127, |
| "step": 283 |
| }, |
| { |
| "epoch": 0.15725359911406422, |
| "grad_norm": 0.5769833326339722, |
| "learning_rate": 9.901196589283009e-06, |
| "loss": 0.4759, |
| "step": 284 |
| }, |
| { |
| "epoch": 0.15780730897009967, |
| "grad_norm": 0.5909711122512817, |
| "learning_rate": 9.899275261921236e-06, |
| "loss": 0.4908, |
| "step": 285 |
| }, |
| { |
| "epoch": 0.1583610188261351, |
| "grad_norm": 0.47673356533050537, |
| "learning_rate": 9.89733562302578e-06, |
| "loss": 0.494, |
| "step": 286 |
| }, |
| { |
| "epoch": 0.15891472868217055, |
| "grad_norm": 0.5346972346305847, |
| "learning_rate": 9.89537767984624e-06, |
| "loss": 0.5071, |
| "step": 287 |
| }, |
| { |
| "epoch": 0.15946843853820597, |
| "grad_norm": 0.5338662266731262, |
| "learning_rate": 9.893401439700624e-06, |
| "loss": 0.491, |
| "step": 288 |
| }, |
| { |
| "epoch": 0.16002214839424142, |
| "grad_norm": 0.4213932156562805, |
| "learning_rate": 9.891406909975328e-06, |
| "loss": 0.5105, |
| "step": 289 |
| }, |
| { |
| "epoch": 0.16057585825027684, |
| "grad_norm": 0.4888423979282379, |
| "learning_rate": 9.889394098125105e-06, |
| "loss": 0.4887, |
| "step": 290 |
| }, |
| { |
| "epoch": 0.1611295681063123, |
| "grad_norm": 0.4586566686630249, |
| "learning_rate": 9.887363011673046e-06, |
| "loss": 0.5191, |
| "step": 291 |
| }, |
| { |
| "epoch": 0.16168327796234774, |
| "grad_norm": 0.41070297360420227, |
| "learning_rate": 9.885313658210537e-06, |
| "loss": 0.5102, |
| "step": 292 |
| }, |
| { |
| "epoch": 0.16223698781838317, |
| "grad_norm": 0.46849679946899414, |
| "learning_rate": 9.883246045397244e-06, |
| "loss": 0.522, |
| "step": 293 |
| }, |
| { |
| "epoch": 0.16279069767441862, |
| "grad_norm": 0.4664832353591919, |
| "learning_rate": 9.881160180961078e-06, |
| "loss": 0.4865, |
| "step": 294 |
| }, |
| { |
| "epoch": 0.16334440753045404, |
| "grad_norm": 0.49740853905677795, |
| "learning_rate": 9.879056072698168e-06, |
| "loss": 0.4989, |
| "step": 295 |
| }, |
| { |
| "epoch": 0.1638981173864895, |
| "grad_norm": 0.5010776519775391, |
| "learning_rate": 9.876933728472826e-06, |
| "loss": 0.4947, |
| "step": 296 |
| }, |
| { |
| "epoch": 0.1644518272425249, |
| "grad_norm": 0.6259229183197021, |
| "learning_rate": 9.874793156217533e-06, |
| "loss": 0.5134, |
| "step": 297 |
| }, |
| { |
| "epoch": 0.16500553709856036, |
| "grad_norm": 0.5744408965110779, |
| "learning_rate": 9.872634363932887e-06, |
| "loss": 0.5049, |
| "step": 298 |
| }, |
| { |
| "epoch": 0.16555924695459578, |
| "grad_norm": 0.5046638250350952, |
| "learning_rate": 9.870457359687593e-06, |
| "loss": 0.4824, |
| "step": 299 |
| }, |
| { |
| "epoch": 0.16611295681063123, |
| "grad_norm": 0.4570577144622803, |
| "learning_rate": 9.868262151618426e-06, |
| "loss": 0.5228, |
| "step": 300 |
| }, |
| { |
| "epoch": 0.16666666666666666, |
| "grad_norm": 0.45581790804862976, |
| "learning_rate": 9.866048747930194e-06, |
| "loss": 0.5099, |
| "step": 301 |
| }, |
| { |
| "epoch": 0.1672203765227021, |
| "grad_norm": 0.4104360044002533, |
| "learning_rate": 9.863817156895715e-06, |
| "loss": 0.4927, |
| "step": 302 |
| }, |
| { |
| "epoch": 0.16777408637873753, |
| "grad_norm": 0.44734492897987366, |
| "learning_rate": 9.861567386855789e-06, |
| "loss": 0.4926, |
| "step": 303 |
| }, |
| { |
| "epoch": 0.16832779623477298, |
| "grad_norm": 0.49278324842453003, |
| "learning_rate": 9.85929944621915e-06, |
| "loss": 0.5049, |
| "step": 304 |
| }, |
| { |
| "epoch": 0.1688815060908084, |
| "grad_norm": 0.48833972215652466, |
| "learning_rate": 9.857013343462463e-06, |
| "loss": 0.5319, |
| "step": 305 |
| }, |
| { |
| "epoch": 0.16943521594684385, |
| "grad_norm": 0.41666170954704285, |
| "learning_rate": 9.854709087130261e-06, |
| "loss": 0.51, |
| "step": 306 |
| }, |
| { |
| "epoch": 0.1699889258028793, |
| "grad_norm": 0.4672697186470032, |
| "learning_rate": 9.852386685834936e-06, |
| "loss": 0.5044, |
| "step": 307 |
| }, |
| { |
| "epoch": 0.17054263565891473, |
| "grad_norm": 0.5205575823783875, |
| "learning_rate": 9.850046148256693e-06, |
| "loss": 0.5341, |
| "step": 308 |
| }, |
| { |
| "epoch": 0.17109634551495018, |
| "grad_norm": 0.4313163161277771, |
| "learning_rate": 9.847687483143532e-06, |
| "loss": 0.4979, |
| "step": 309 |
| }, |
| { |
| "epoch": 0.1716500553709856, |
| "grad_norm": 0.5207853317260742, |
| "learning_rate": 9.845310699311197e-06, |
| "loss": 0.5417, |
| "step": 310 |
| }, |
| { |
| "epoch": 0.17220376522702105, |
| "grad_norm": 0.4820159077644348, |
| "learning_rate": 9.842915805643156e-06, |
| "loss": 0.529, |
| "step": 311 |
| }, |
| { |
| "epoch": 0.17275747508305647, |
| "grad_norm": 0.5075066685676575, |
| "learning_rate": 9.840502811090568e-06, |
| "loss": 0.4788, |
| "step": 312 |
| }, |
| { |
| "epoch": 0.17331118493909192, |
| "grad_norm": 0.5771706104278564, |
| "learning_rate": 9.838071724672239e-06, |
| "loss": 0.4903, |
| "step": 313 |
| }, |
| { |
| "epoch": 0.17386489479512734, |
| "grad_norm": 0.5288019776344299, |
| "learning_rate": 9.835622555474601e-06, |
| "loss": 0.4909, |
| "step": 314 |
| }, |
| { |
| "epoch": 0.1744186046511628, |
| "grad_norm": 0.5946136116981506, |
| "learning_rate": 9.83315531265167e-06, |
| "loss": 0.5153, |
| "step": 315 |
| }, |
| { |
| "epoch": 0.17497231450719822, |
| "grad_norm": 0.5433559417724609, |
| "learning_rate": 9.830670005425012e-06, |
| "loss": 0.5281, |
| "step": 316 |
| }, |
| { |
| "epoch": 0.17552602436323367, |
| "grad_norm": 0.419915109872818, |
| "learning_rate": 9.828166643083716e-06, |
| "loss": 0.4612, |
| "step": 317 |
| }, |
| { |
| "epoch": 0.1760797342192691, |
| "grad_norm": 0.6224991679191589, |
| "learning_rate": 9.825645234984347e-06, |
| "loss": 0.5013, |
| "step": 318 |
| }, |
| { |
| "epoch": 0.17663344407530454, |
| "grad_norm": 0.5434386134147644, |
| "learning_rate": 9.823105790550925e-06, |
| "loss": 0.4927, |
| "step": 319 |
| }, |
| { |
| "epoch": 0.17718715393134, |
| "grad_norm": 0.5309631824493408, |
| "learning_rate": 9.820548319274875e-06, |
| "loss": 0.522, |
| "step": 320 |
| }, |
| { |
| "epoch": 0.1777408637873754, |
| "grad_norm": 0.6311752200126648, |
| "learning_rate": 9.817972830715003e-06, |
| "loss": 0.5171, |
| "step": 321 |
| }, |
| { |
| "epoch": 0.17829457364341086, |
| "grad_norm": 0.5074132084846497, |
| "learning_rate": 9.81537933449746e-06, |
| "loss": 0.4971, |
| "step": 322 |
| }, |
| { |
| "epoch": 0.1788482834994463, |
| "grad_norm": 0.5580747723579407, |
| "learning_rate": 9.812767840315693e-06, |
| "loss": 0.4926, |
| "step": 323 |
| }, |
| { |
| "epoch": 0.17940199335548174, |
| "grad_norm": 0.6106091737747192, |
| "learning_rate": 9.81013835793043e-06, |
| "loss": 0.5026, |
| "step": 324 |
| }, |
| { |
| "epoch": 0.17995570321151716, |
| "grad_norm": 0.5311607122421265, |
| "learning_rate": 9.807490897169622e-06, |
| "loss": 0.4811, |
| "step": 325 |
| }, |
| { |
| "epoch": 0.1805094130675526, |
| "grad_norm": 0.5105768442153931, |
| "learning_rate": 9.804825467928423e-06, |
| "loss": 0.4779, |
| "step": 326 |
| }, |
| { |
| "epoch": 0.18106312292358803, |
| "grad_norm": 0.558866024017334, |
| "learning_rate": 9.802142080169138e-06, |
| "loss": 0.5181, |
| "step": 327 |
| }, |
| { |
| "epoch": 0.18161683277962348, |
| "grad_norm": 0.5689678192138672, |
| "learning_rate": 9.799440743921203e-06, |
| "loss": 0.4731, |
| "step": 328 |
| }, |
| { |
| "epoch": 0.1821705426356589, |
| "grad_norm": 0.4810315668582916, |
| "learning_rate": 9.79672146928113e-06, |
| "loss": 0.4948, |
| "step": 329 |
| }, |
| { |
| "epoch": 0.18272425249169436, |
| "grad_norm": 0.5226458311080933, |
| "learning_rate": 9.793984266412481e-06, |
| "loss": 0.4862, |
| "step": 330 |
| }, |
| { |
| "epoch": 0.18327796234772978, |
| "grad_norm": 0.4900760054588318, |
| "learning_rate": 9.791229145545832e-06, |
| "loss": 0.4869, |
| "step": 331 |
| }, |
| { |
| "epoch": 0.18383167220376523, |
| "grad_norm": 0.5214505195617676, |
| "learning_rate": 9.78845611697872e-06, |
| "loss": 0.4987, |
| "step": 332 |
| }, |
| { |
| "epoch": 0.18438538205980065, |
| "grad_norm": 0.46621766686439514, |
| "learning_rate": 9.785665191075618e-06, |
| "loss": 0.4897, |
| "step": 333 |
| }, |
| { |
| "epoch": 0.1849390919158361, |
| "grad_norm": 0.45763033628463745, |
| "learning_rate": 9.782856378267893e-06, |
| "loss": 0.5123, |
| "step": 334 |
| }, |
| { |
| "epoch": 0.18549280177187155, |
| "grad_norm": 0.4653575122356415, |
| "learning_rate": 9.780029689053765e-06, |
| "loss": 0.5129, |
| "step": 335 |
| }, |
| { |
| "epoch": 0.18604651162790697, |
| "grad_norm": 0.43498504161834717, |
| "learning_rate": 9.777185133998268e-06, |
| "loss": 0.4994, |
| "step": 336 |
| }, |
| { |
| "epoch": 0.18660022148394242, |
| "grad_norm": 0.5097606182098389, |
| "learning_rate": 9.774322723733216e-06, |
| "loss": 0.4974, |
| "step": 337 |
| }, |
| { |
| "epoch": 0.18715393133997785, |
| "grad_norm": 0.4725209176540375, |
| "learning_rate": 9.771442468957151e-06, |
| "loss": 0.473, |
| "step": 338 |
| }, |
| { |
| "epoch": 0.1877076411960133, |
| "grad_norm": 0.4831293821334839, |
| "learning_rate": 9.768544380435316e-06, |
| "loss": 0.5038, |
| "step": 339 |
| }, |
| { |
| "epoch": 0.18826135105204872, |
| "grad_norm": 0.5691421031951904, |
| "learning_rate": 9.765628468999609e-06, |
| "loss": 0.493, |
| "step": 340 |
| }, |
| { |
| "epoch": 0.18881506090808417, |
| "grad_norm": 0.4951483905315399, |
| "learning_rate": 9.76269474554854e-06, |
| "loss": 0.5177, |
| "step": 341 |
| }, |
| { |
| "epoch": 0.1893687707641196, |
| "grad_norm": 0.5355369448661804, |
| "learning_rate": 9.759743221047198e-06, |
| "loss": 0.4999, |
| "step": 342 |
| }, |
| { |
| "epoch": 0.18992248062015504, |
| "grad_norm": 0.5432518124580383, |
| "learning_rate": 9.756773906527199e-06, |
| "loss": 0.4963, |
| "step": 343 |
| }, |
| { |
| "epoch": 0.19047619047619047, |
| "grad_norm": 0.5756740570068359, |
| "learning_rate": 9.753786813086658e-06, |
| "loss": 0.5119, |
| "step": 344 |
| }, |
| { |
| "epoch": 0.19102990033222592, |
| "grad_norm": 0.6138593554496765, |
| "learning_rate": 9.750781951890135e-06, |
| "loss": 0.5123, |
| "step": 345 |
| }, |
| { |
| "epoch": 0.19158361018826134, |
| "grad_norm": 0.4841378331184387, |
| "learning_rate": 9.747759334168602e-06, |
| "loss": 0.5032, |
| "step": 346 |
| }, |
| { |
| "epoch": 0.1921373200442968, |
| "grad_norm": 0.5776627063751221, |
| "learning_rate": 9.744718971219395e-06, |
| "loss": 0.5259, |
| "step": 347 |
| }, |
| { |
| "epoch": 0.19269102990033224, |
| "grad_norm": 0.4886176884174347, |
| "learning_rate": 9.741660874406176e-06, |
| "loss": 0.4962, |
| "step": 348 |
| }, |
| { |
| "epoch": 0.19324473975636766, |
| "grad_norm": 0.6149806380271912, |
| "learning_rate": 9.738585055158891e-06, |
| "loss": 0.498, |
| "step": 349 |
| }, |
| { |
| "epoch": 0.1937984496124031, |
| "grad_norm": 0.43883392214775085, |
| "learning_rate": 9.735491524973723e-06, |
| "loss": 0.4827, |
| "step": 350 |
| }, |
| { |
| "epoch": 0.19435215946843853, |
| "grad_norm": 0.5252010226249695, |
| "learning_rate": 9.73238029541305e-06, |
| "loss": 0.4993, |
| "step": 351 |
| }, |
| { |
| "epoch": 0.19490586932447398, |
| "grad_norm": 0.5176405906677246, |
| "learning_rate": 9.729251378105407e-06, |
| "loss": 0.4976, |
| "step": 352 |
| }, |
| { |
| "epoch": 0.1954595791805094, |
| "grad_norm": 0.5629587769508362, |
| "learning_rate": 9.726104784745437e-06, |
| "loss": 0.5145, |
| "step": 353 |
| }, |
| { |
| "epoch": 0.19601328903654486, |
| "grad_norm": 0.5694234371185303, |
| "learning_rate": 9.722940527093847e-06, |
| "loss": 0.516, |
| "step": 354 |
| }, |
| { |
| "epoch": 0.19656699889258028, |
| "grad_norm": 0.48244956135749817, |
| "learning_rate": 9.719758616977372e-06, |
| "loss": 0.4862, |
| "step": 355 |
| }, |
| { |
| "epoch": 0.19712070874861573, |
| "grad_norm": 0.5657898187637329, |
| "learning_rate": 9.716559066288716e-06, |
| "loss": 0.4904, |
| "step": 356 |
| }, |
| { |
| "epoch": 0.19767441860465115, |
| "grad_norm": 0.5076687335968018, |
| "learning_rate": 9.713341886986525e-06, |
| "loss": 0.5049, |
| "step": 357 |
| }, |
| { |
| "epoch": 0.1982281284606866, |
| "grad_norm": 0.5623184442520142, |
| "learning_rate": 9.710107091095328e-06, |
| "loss": 0.4778, |
| "step": 358 |
| }, |
| { |
| "epoch": 0.19878183831672203, |
| "grad_norm": 0.5495237708091736, |
| "learning_rate": 9.706854690705501e-06, |
| "loss": 0.5049, |
| "step": 359 |
| }, |
| { |
| "epoch": 0.19933554817275748, |
| "grad_norm": 0.5105648040771484, |
| "learning_rate": 9.703584697973213e-06, |
| "loss": 0.4879, |
| "step": 360 |
| }, |
| { |
| "epoch": 0.1998892580287929, |
| "grad_norm": 0.46286460757255554, |
| "learning_rate": 9.7002971251204e-06, |
| "loss": 0.4927, |
| "step": 361 |
| }, |
| { |
| "epoch": 0.20044296788482835, |
| "grad_norm": 0.48685401678085327, |
| "learning_rate": 9.696991984434686e-06, |
| "loss": 0.51, |
| "step": 362 |
| }, |
| { |
| "epoch": 0.2009966777408638, |
| "grad_norm": 0.4311748743057251, |
| "learning_rate": 9.693669288269371e-06, |
| "loss": 0.5145, |
| "step": 363 |
| }, |
| { |
| "epoch": 0.20155038759689922, |
| "grad_norm": 0.4911429286003113, |
| "learning_rate": 9.690329049043366e-06, |
| "loss": 0.514, |
| "step": 364 |
| }, |
| { |
| "epoch": 0.20210409745293467, |
| "grad_norm": 0.4419792890548706, |
| "learning_rate": 9.686971279241151e-06, |
| "loss": 0.5039, |
| "step": 365 |
| }, |
| { |
| "epoch": 0.2026578073089701, |
| "grad_norm": 0.5263059735298157, |
| "learning_rate": 9.683595991412725e-06, |
| "loss": 0.4971, |
| "step": 366 |
| }, |
| { |
| "epoch": 0.20321151716500555, |
| "grad_norm": 0.40632978081703186, |
| "learning_rate": 9.68020319817357e-06, |
| "loss": 0.4705, |
| "step": 367 |
| }, |
| { |
| "epoch": 0.20376522702104097, |
| "grad_norm": 0.4889412820339203, |
| "learning_rate": 9.676792912204589e-06, |
| "loss": 0.4831, |
| "step": 368 |
| }, |
| { |
| "epoch": 0.20431893687707642, |
| "grad_norm": 0.44732460379600525, |
| "learning_rate": 9.673365146252067e-06, |
| "loss": 0.5058, |
| "step": 369 |
| }, |
| { |
| "epoch": 0.20487264673311184, |
| "grad_norm": 0.43679168820381165, |
| "learning_rate": 9.669919913127628e-06, |
| "loss": 0.4924, |
| "step": 370 |
| }, |
| { |
| "epoch": 0.2054263565891473, |
| "grad_norm": 0.4535861909389496, |
| "learning_rate": 9.666457225708175e-06, |
| "loss": 0.4978, |
| "step": 371 |
| }, |
| { |
| "epoch": 0.2059800664451827, |
| "grad_norm": 0.4649087190628052, |
| "learning_rate": 9.662977096935849e-06, |
| "loss": 0.5243, |
| "step": 372 |
| }, |
| { |
| "epoch": 0.20653377630121816, |
| "grad_norm": 0.4369707405567169, |
| "learning_rate": 9.659479539817982e-06, |
| "loss": 0.4962, |
| "step": 373 |
| }, |
| { |
| "epoch": 0.2070874861572536, |
| "grad_norm": 0.45305919647216797, |
| "learning_rate": 9.655964567427046e-06, |
| "loss": 0.4802, |
| "step": 374 |
| }, |
| { |
| "epoch": 0.20764119601328904, |
| "grad_norm": 0.47477301955223083, |
| "learning_rate": 9.652432192900602e-06, |
| "loss": 0.5024, |
| "step": 375 |
| }, |
| { |
| "epoch": 0.2081949058693245, |
| "grad_norm": 0.420511394739151, |
| "learning_rate": 9.648882429441258e-06, |
| "loss": 0.4961, |
| "step": 376 |
| }, |
| { |
| "epoch": 0.2087486157253599, |
| "grad_norm": 0.46836888790130615, |
| "learning_rate": 9.645315290316607e-06, |
| "loss": 0.4913, |
| "step": 377 |
| }, |
| { |
| "epoch": 0.20930232558139536, |
| "grad_norm": 0.48647841811180115, |
| "learning_rate": 9.641730788859194e-06, |
| "loss": 0.4992, |
| "step": 378 |
| }, |
| { |
| "epoch": 0.20985603543743078, |
| "grad_norm": 0.48303425312042236, |
| "learning_rate": 9.63812893846645e-06, |
| "loss": 0.4811, |
| "step": 379 |
| }, |
| { |
| "epoch": 0.21040974529346623, |
| "grad_norm": 0.5188234448432922, |
| "learning_rate": 9.634509752600658e-06, |
| "loss": 0.4814, |
| "step": 380 |
| }, |
| { |
| "epoch": 0.21096345514950166, |
| "grad_norm": 0.5916765928268433, |
| "learning_rate": 9.630873244788884e-06, |
| "loss": 0.4971, |
| "step": 381 |
| }, |
| { |
| "epoch": 0.2115171650055371, |
| "grad_norm": 0.47264301776885986, |
| "learning_rate": 9.627219428622942e-06, |
| "loss": 0.4978, |
| "step": 382 |
| }, |
| { |
| "epoch": 0.21207087486157253, |
| "grad_norm": 0.5251865386962891, |
| "learning_rate": 9.62354831775934e-06, |
| "loss": 0.5099, |
| "step": 383 |
| }, |
| { |
| "epoch": 0.21262458471760798, |
| "grad_norm": 0.5563480854034424, |
| "learning_rate": 9.61985992591922e-06, |
| "loss": 0.5136, |
| "step": 384 |
| }, |
| { |
| "epoch": 0.2131782945736434, |
| "grad_norm": 0.490176796913147, |
| "learning_rate": 9.61615426688832e-06, |
| "loss": 0.4963, |
| "step": 385 |
| }, |
| { |
| "epoch": 0.21373200442967885, |
| "grad_norm": 0.46513262391090393, |
| "learning_rate": 9.612431354516912e-06, |
| "loss": 0.4945, |
| "step": 386 |
| }, |
| { |
| "epoch": 0.21428571428571427, |
| "grad_norm": 0.4832211136817932, |
| "learning_rate": 9.608691202719755e-06, |
| "loss": 0.4794, |
| "step": 387 |
| }, |
| { |
| "epoch": 0.21483942414174972, |
| "grad_norm": 0.4905065894126892, |
| "learning_rate": 9.604933825476044e-06, |
| "loss": 0.5094, |
| "step": 388 |
| }, |
| { |
| "epoch": 0.21539313399778517, |
| "grad_norm": 0.45242419838905334, |
| "learning_rate": 9.601159236829353e-06, |
| "loss": 0.4969, |
| "step": 389 |
| }, |
| { |
| "epoch": 0.2159468438538206, |
| "grad_norm": 0.5123769640922546, |
| "learning_rate": 9.597367450887585e-06, |
| "loss": 0.489, |
| "step": 390 |
| }, |
| { |
| "epoch": 0.21650055370985605, |
| "grad_norm": 0.43457096815109253, |
| "learning_rate": 9.593558481822923e-06, |
| "loss": 0.5024, |
| "step": 391 |
| }, |
| { |
| "epoch": 0.21705426356589147, |
| "grad_norm": 0.4841693937778473, |
| "learning_rate": 9.58973234387177e-06, |
| "loss": 0.5089, |
| "step": 392 |
| }, |
| { |
| "epoch": 0.21760797342192692, |
| "grad_norm": 0.4351038336753845, |
| "learning_rate": 9.585889051334702e-06, |
| "loss": 0.5071, |
| "step": 393 |
| }, |
| { |
| "epoch": 0.21816168327796234, |
| "grad_norm": 0.46998244524002075, |
| "learning_rate": 9.58202861857641e-06, |
| "loss": 0.513, |
| "step": 394 |
| }, |
| { |
| "epoch": 0.2187153931339978, |
| "grad_norm": 0.46563470363616943, |
| "learning_rate": 9.578151060025654e-06, |
| "loss": 0.4747, |
| "step": 395 |
| }, |
| { |
| "epoch": 0.21926910299003322, |
| "grad_norm": 0.475676566362381, |
| "learning_rate": 9.574256390175192e-06, |
| "loss": 0.498, |
| "step": 396 |
| }, |
| { |
| "epoch": 0.21982281284606867, |
| "grad_norm": 0.49711453914642334, |
| "learning_rate": 9.570344623581748e-06, |
| "loss": 0.4831, |
| "step": 397 |
| }, |
| { |
| "epoch": 0.2203765227021041, |
| "grad_norm": 0.48777928948402405, |
| "learning_rate": 9.566415774865943e-06, |
| "loss": 0.4876, |
| "step": 398 |
| }, |
| { |
| "epoch": 0.22093023255813954, |
| "grad_norm": 0.4752987325191498, |
| "learning_rate": 9.562469858712243e-06, |
| "loss": 0.4664, |
| "step": 399 |
| }, |
| { |
| "epoch": 0.22148394241417496, |
| "grad_norm": 0.5489518046379089, |
| "learning_rate": 9.558506889868906e-06, |
| "loss": 0.4946, |
| "step": 400 |
| }, |
| { |
| "epoch": 0.2220376522702104, |
| "grad_norm": 0.5207077264785767, |
| "learning_rate": 9.554526883147926e-06, |
| "loss": 0.4835, |
| "step": 401 |
| }, |
| { |
| "epoch": 0.22259136212624583, |
| "grad_norm": 0.6723493933677673, |
| "learning_rate": 9.550529853424979e-06, |
| "loss": 0.4743, |
| "step": 402 |
| }, |
| { |
| "epoch": 0.22314507198228128, |
| "grad_norm": 0.44945645332336426, |
| "learning_rate": 9.546515815639365e-06, |
| "loss": 0.4774, |
| "step": 403 |
| }, |
| { |
| "epoch": 0.22369878183831673, |
| "grad_norm": 0.49513328075408936, |
| "learning_rate": 9.542484784793954e-06, |
| "loss": 0.5039, |
| "step": 404 |
| }, |
| { |
| "epoch": 0.22425249169435216, |
| "grad_norm": 0.5728231072425842, |
| "learning_rate": 9.538436775955128e-06, |
| "loss": 0.5207, |
| "step": 405 |
| }, |
| { |
| "epoch": 0.2248062015503876, |
| "grad_norm": 0.48615574836730957, |
| "learning_rate": 9.534371804252727e-06, |
| "loss": 0.4949, |
| "step": 406 |
| }, |
| { |
| "epoch": 0.22535991140642303, |
| "grad_norm": 0.4927036166191101, |
| "learning_rate": 9.530289884879993e-06, |
| "loss": 0.4657, |
| "step": 407 |
| }, |
| { |
| "epoch": 0.22591362126245848, |
| "grad_norm": 0.5298251509666443, |
| "learning_rate": 9.526191033093509e-06, |
| "loss": 0.4885, |
| "step": 408 |
| }, |
| { |
| "epoch": 0.2264673311184939, |
| "grad_norm": 0.49086251854896545, |
| "learning_rate": 9.522075264213144e-06, |
| "loss": 0.4696, |
| "step": 409 |
| }, |
| { |
| "epoch": 0.22702104097452935, |
| "grad_norm": 0.5596606135368347, |
| "learning_rate": 9.517942593621998e-06, |
| "loss": 0.534, |
| "step": 410 |
| }, |
| { |
| "epoch": 0.22757475083056478, |
| "grad_norm": 0.5208204388618469, |
| "learning_rate": 9.513793036766345e-06, |
| "loss": 0.4666, |
| "step": 411 |
| }, |
| { |
| "epoch": 0.22812846068660023, |
| "grad_norm": 0.49649903178215027, |
| "learning_rate": 9.50962660915557e-06, |
| "loss": 0.4396, |
| "step": 412 |
| }, |
| { |
| "epoch": 0.22868217054263565, |
| "grad_norm": 0.6174575090408325, |
| "learning_rate": 9.505443326362113e-06, |
| "loss": 0.5177, |
| "step": 413 |
| }, |
| { |
| "epoch": 0.2292358803986711, |
| "grad_norm": 0.4835375249385834, |
| "learning_rate": 9.501243204021415e-06, |
| "loss": 0.4791, |
| "step": 414 |
| }, |
| { |
| "epoch": 0.22978959025470652, |
| "grad_norm": 0.509118378162384, |
| "learning_rate": 9.497026257831856e-06, |
| "loss": 0.4943, |
| "step": 415 |
| }, |
| { |
| "epoch": 0.23034330011074197, |
| "grad_norm": 0.5107400417327881, |
| "learning_rate": 9.492792503554695e-06, |
| "loss": 0.4875, |
| "step": 416 |
| }, |
| { |
| "epoch": 0.23089700996677742, |
| "grad_norm": 0.45292192697525024, |
| "learning_rate": 9.488541957014017e-06, |
| "loss": 0.5137, |
| "step": 417 |
| }, |
| { |
| "epoch": 0.23145071982281284, |
| "grad_norm": 0.5446093678474426, |
| "learning_rate": 9.484274634096663e-06, |
| "loss": 0.4871, |
| "step": 418 |
| }, |
| { |
| "epoch": 0.2320044296788483, |
| "grad_norm": 0.4633888304233551, |
| "learning_rate": 9.479990550752184e-06, |
| "loss": 0.4926, |
| "step": 419 |
| }, |
| { |
| "epoch": 0.23255813953488372, |
| "grad_norm": 0.455936461687088, |
| "learning_rate": 9.47568972299277e-06, |
| "loss": 0.4705, |
| "step": 420 |
| }, |
| { |
| "epoch": 0.23311184939091917, |
| "grad_norm": 0.4749588072299957, |
| "learning_rate": 9.4713721668932e-06, |
| "loss": 0.4988, |
| "step": 421 |
| }, |
| { |
| "epoch": 0.2336655592469546, |
| "grad_norm": 0.4403315782546997, |
| "learning_rate": 9.46703789859077e-06, |
| "loss": 0.4785, |
| "step": 422 |
| }, |
| { |
| "epoch": 0.23421926910299004, |
| "grad_norm": 0.4634638726711273, |
| "learning_rate": 9.462686934285245e-06, |
| "loss": 0.5134, |
| "step": 423 |
| }, |
| { |
| "epoch": 0.23477297895902546, |
| "grad_norm": 0.47634756565093994, |
| "learning_rate": 9.458319290238793e-06, |
| "loss": 0.4953, |
| "step": 424 |
| }, |
| { |
| "epoch": 0.2353266888150609, |
| "grad_norm": 0.4883749186992645, |
| "learning_rate": 9.45393498277592e-06, |
| "loss": 0.5086, |
| "step": 425 |
| }, |
| { |
| "epoch": 0.23588039867109634, |
| "grad_norm": 0.4272972345352173, |
| "learning_rate": 9.44953402828342e-06, |
| "loss": 0.4852, |
| "step": 426 |
| }, |
| { |
| "epoch": 0.2364341085271318, |
| "grad_norm": 0.45259276032447815, |
| "learning_rate": 9.445116443210299e-06, |
| "loss": 0.4689, |
| "step": 427 |
| }, |
| { |
| "epoch": 0.2369878183831672, |
| "grad_norm": 0.48061129450798035, |
| "learning_rate": 9.440682244067724e-06, |
| "loss": 0.5048, |
| "step": 428 |
| }, |
| { |
| "epoch": 0.23754152823920266, |
| "grad_norm": 0.4182446300983429, |
| "learning_rate": 9.436231447428964e-06, |
| "loss": 0.5004, |
| "step": 429 |
| }, |
| { |
| "epoch": 0.23809523809523808, |
| "grad_norm": 0.4484826624393463, |
| "learning_rate": 9.431764069929314e-06, |
| "loss": 0.4719, |
| "step": 430 |
| }, |
| { |
| "epoch": 0.23864894795127353, |
| "grad_norm": 0.4638371467590332, |
| "learning_rate": 9.427280128266049e-06, |
| "loss": 0.5041, |
| "step": 431 |
| }, |
| { |
| "epoch": 0.23920265780730898, |
| "grad_norm": 0.4376615881919861, |
| "learning_rate": 9.422779639198353e-06, |
| "loss": 0.4749, |
| "step": 432 |
| }, |
| { |
| "epoch": 0.2397563676633444, |
| "grad_norm": 0.426476389169693, |
| "learning_rate": 9.418262619547255e-06, |
| "loss": 0.4676, |
| "step": 433 |
| }, |
| { |
| "epoch": 0.24031007751937986, |
| "grad_norm": 0.46875908970832825, |
| "learning_rate": 9.41372908619557e-06, |
| "loss": 0.4663, |
| "step": 434 |
| }, |
| { |
| "epoch": 0.24086378737541528, |
| "grad_norm": 0.40445950627326965, |
| "learning_rate": 9.409179056087836e-06, |
| "loss": 0.5113, |
| "step": 435 |
| }, |
| { |
| "epoch": 0.24141749723145073, |
| "grad_norm": 0.44816190004348755, |
| "learning_rate": 9.404612546230244e-06, |
| "loss": 0.4909, |
| "step": 436 |
| }, |
| { |
| "epoch": 0.24197120708748615, |
| "grad_norm": 0.4637437164783478, |
| "learning_rate": 9.400029573690586e-06, |
| "loss": 0.5023, |
| "step": 437 |
| }, |
| { |
| "epoch": 0.2425249169435216, |
| "grad_norm": 0.43382760882377625, |
| "learning_rate": 9.395430155598182e-06, |
| "loss": 0.4956, |
| "step": 438 |
| }, |
| { |
| "epoch": 0.24307862679955702, |
| "grad_norm": 0.4382372796535492, |
| "learning_rate": 9.39081430914382e-06, |
| "loss": 0.4634, |
| "step": 439 |
| }, |
| { |
| "epoch": 0.24363233665559247, |
| "grad_norm": 0.40292054414749146, |
| "learning_rate": 9.38618205157969e-06, |
| "loss": 0.48, |
| "step": 440 |
| }, |
| { |
| "epoch": 0.2441860465116279, |
| "grad_norm": 0.5044105648994446, |
| "learning_rate": 9.381533400219319e-06, |
| "loss": 0.4981, |
| "step": 441 |
| }, |
| { |
| "epoch": 0.24473975636766335, |
| "grad_norm": 0.4372206926345825, |
| "learning_rate": 9.37686837243751e-06, |
| "loss": 0.4665, |
| "step": 442 |
| }, |
| { |
| "epoch": 0.24529346622369877, |
| "grad_norm": 0.5021594166755676, |
| "learning_rate": 9.37218698567027e-06, |
| "loss": 0.5061, |
| "step": 443 |
| }, |
| { |
| "epoch": 0.24584717607973422, |
| "grad_norm": 0.45043492317199707, |
| "learning_rate": 9.367489257414759e-06, |
| "loss": 0.4939, |
| "step": 444 |
| }, |
| { |
| "epoch": 0.24640088593576967, |
| "grad_norm": 0.4383167028427124, |
| "learning_rate": 9.362775205229201e-06, |
| "loss": 0.458, |
| "step": 445 |
| }, |
| { |
| "epoch": 0.2469545957918051, |
| "grad_norm": 0.4411497712135315, |
| "learning_rate": 9.358044846732848e-06, |
| "loss": 0.4899, |
| "step": 446 |
| }, |
| { |
| "epoch": 0.24750830564784054, |
| "grad_norm": 0.5189668536186218, |
| "learning_rate": 9.353298199605882e-06, |
| "loss": 0.4853, |
| "step": 447 |
| }, |
| { |
| "epoch": 0.24806201550387597, |
| "grad_norm": 0.530316174030304, |
| "learning_rate": 9.348535281589379e-06, |
| "loss": 0.4822, |
| "step": 448 |
| }, |
| { |
| "epoch": 0.24861572535991142, |
| "grad_norm": 0.48705223202705383, |
| "learning_rate": 9.34375611048522e-06, |
| "loss": 0.4794, |
| "step": 449 |
| }, |
| { |
| "epoch": 0.24916943521594684, |
| "grad_norm": 0.5219366550445557, |
| "learning_rate": 9.338960704156042e-06, |
| "loss": 0.4532, |
| "step": 450 |
| }, |
| { |
| "epoch": 0.2497231450719823, |
| "grad_norm": 0.4505665898323059, |
| "learning_rate": 9.334149080525154e-06, |
| "loss": 0.5145, |
| "step": 451 |
| }, |
| { |
| "epoch": 0.2502768549280177, |
| "grad_norm": 0.4620222747325897, |
| "learning_rate": 9.329321257576487e-06, |
| "loss": 0.4703, |
| "step": 452 |
| }, |
| { |
| "epoch": 0.25083056478405313, |
| "grad_norm": 0.44555017352104187, |
| "learning_rate": 9.32447725335451e-06, |
| "loss": 0.5018, |
| "step": 453 |
| }, |
| { |
| "epoch": 0.2513842746400886, |
| "grad_norm": 0.4511682689189911, |
| "learning_rate": 9.319617085964177e-06, |
| "loss": 0.4998, |
| "step": 454 |
| }, |
| { |
| "epoch": 0.25193798449612403, |
| "grad_norm": 0.4824512004852295, |
| "learning_rate": 9.314740773570854e-06, |
| "loss": 0.4958, |
| "step": 455 |
| }, |
| { |
| "epoch": 0.25249169435215946, |
| "grad_norm": 0.42782482504844666, |
| "learning_rate": 9.309848334400247e-06, |
| "loss": 0.4762, |
| "step": 456 |
| }, |
| { |
| "epoch": 0.2530454042081949, |
| "grad_norm": 0.5117877125740051, |
| "learning_rate": 9.30493978673834e-06, |
| "loss": 0.4906, |
| "step": 457 |
| }, |
| { |
| "epoch": 0.25359911406423036, |
| "grad_norm": 0.48038557171821594, |
| "learning_rate": 9.300015148931321e-06, |
| "loss": 0.4771, |
| "step": 458 |
| }, |
| { |
| "epoch": 0.2541528239202658, |
| "grad_norm": 0.4039292335510254, |
| "learning_rate": 9.295074439385521e-06, |
| "loss": 0.4614, |
| "step": 459 |
| }, |
| { |
| "epoch": 0.2547065337763012, |
| "grad_norm": 0.44665515422821045, |
| "learning_rate": 9.290117676567339e-06, |
| "loss": 0.4749, |
| "step": 460 |
| }, |
| { |
| "epoch": 0.2552602436323367, |
| "grad_norm": 0.44022291898727417, |
| "learning_rate": 9.285144879003173e-06, |
| "loss": 0.4935, |
| "step": 461 |
| }, |
| { |
| "epoch": 0.2558139534883721, |
| "grad_norm": 0.4289686381816864, |
| "learning_rate": 9.280156065279353e-06, |
| "loss": 0.4812, |
| "step": 462 |
| }, |
| { |
| "epoch": 0.2563676633444075, |
| "grad_norm": 0.5331130027770996, |
| "learning_rate": 9.275151254042072e-06, |
| "loss": 0.4951, |
| "step": 463 |
| }, |
| { |
| "epoch": 0.25692137320044295, |
| "grad_norm": 0.43817734718322754, |
| "learning_rate": 9.270130463997317e-06, |
| "loss": 0.4943, |
| "step": 464 |
| }, |
| { |
| "epoch": 0.2574750830564784, |
| "grad_norm": 0.5689101815223694, |
| "learning_rate": 9.265093713910792e-06, |
| "loss": 0.4549, |
| "step": 465 |
| }, |
| { |
| "epoch": 0.25802879291251385, |
| "grad_norm": 0.48502880334854126, |
| "learning_rate": 9.26004102260786e-06, |
| "loss": 0.4844, |
| "step": 466 |
| }, |
| { |
| "epoch": 0.25858250276854927, |
| "grad_norm": 0.5620638728141785, |
| "learning_rate": 9.25497240897346e-06, |
| "loss": 0.4903, |
| "step": 467 |
| }, |
| { |
| "epoch": 0.2591362126245847, |
| "grad_norm": 0.5323494076728821, |
| "learning_rate": 9.249887891952047e-06, |
| "loss": 0.4995, |
| "step": 468 |
| }, |
| { |
| "epoch": 0.2596899224806202, |
| "grad_norm": 0.4310426414012909, |
| "learning_rate": 9.244787490547513e-06, |
| "loss": 0.4763, |
| "step": 469 |
| }, |
| { |
| "epoch": 0.2602436323366556, |
| "grad_norm": 0.5547283291816711, |
| "learning_rate": 9.23967122382312e-06, |
| "loss": 0.4784, |
| "step": 470 |
| }, |
| { |
| "epoch": 0.260797342192691, |
| "grad_norm": 0.6222092509269714, |
| "learning_rate": 9.23453911090143e-06, |
| "loss": 0.4986, |
| "step": 471 |
| }, |
| { |
| "epoch": 0.26135105204872644, |
| "grad_norm": 0.495560884475708, |
| "learning_rate": 9.229391170964233e-06, |
| "loss": 0.4729, |
| "step": 472 |
| }, |
| { |
| "epoch": 0.2619047619047619, |
| "grad_norm": 0.5985239744186401, |
| "learning_rate": 9.224227423252468e-06, |
| "loss": 0.4979, |
| "step": 473 |
| }, |
| { |
| "epoch": 0.26245847176079734, |
| "grad_norm": 0.4933107793331146, |
| "learning_rate": 9.219047887066163e-06, |
| "loss": 0.4934, |
| "step": 474 |
| }, |
| { |
| "epoch": 0.26301218161683276, |
| "grad_norm": 0.46847131848335266, |
| "learning_rate": 9.213852581764358e-06, |
| "loss": 0.4931, |
| "step": 475 |
| }, |
| { |
| "epoch": 0.26356589147286824, |
| "grad_norm": 0.584668755531311, |
| "learning_rate": 9.208641526765024e-06, |
| "loss": 0.5173, |
| "step": 476 |
| }, |
| { |
| "epoch": 0.26411960132890366, |
| "grad_norm": 0.48668819665908813, |
| "learning_rate": 9.203414741545003e-06, |
| "loss": 0.4867, |
| "step": 477 |
| }, |
| { |
| "epoch": 0.2646733111849391, |
| "grad_norm": 0.47595974802970886, |
| "learning_rate": 9.198172245639932e-06, |
| "loss": 0.4921, |
| "step": 478 |
| }, |
| { |
| "epoch": 0.2652270210409745, |
| "grad_norm": 0.4644550681114197, |
| "learning_rate": 9.192914058644167e-06, |
| "loss": 0.4832, |
| "step": 479 |
| }, |
| { |
| "epoch": 0.26578073089701, |
| "grad_norm": 0.46028774976730347, |
| "learning_rate": 9.18764020021071e-06, |
| "loss": 0.4826, |
| "step": 480 |
| }, |
| { |
| "epoch": 0.2663344407530454, |
| "grad_norm": 0.4817364811897278, |
| "learning_rate": 9.182350690051134e-06, |
| "loss": 0.4847, |
| "step": 481 |
| }, |
| { |
| "epoch": 0.26688815060908083, |
| "grad_norm": 0.4588821232318878, |
| "learning_rate": 9.177045547935519e-06, |
| "loss": 0.4507, |
| "step": 482 |
| }, |
| { |
| "epoch": 0.26744186046511625, |
| "grad_norm": 0.49955952167510986, |
| "learning_rate": 9.171724793692363e-06, |
| "loss": 0.4958, |
| "step": 483 |
| }, |
| { |
| "epoch": 0.26799557032115173, |
| "grad_norm": 0.46924176812171936, |
| "learning_rate": 9.166388447208524e-06, |
| "loss": 0.4977, |
| "step": 484 |
| }, |
| { |
| "epoch": 0.26854928017718716, |
| "grad_norm": 0.44414854049682617, |
| "learning_rate": 9.161036528429128e-06, |
| "loss": 0.4989, |
| "step": 485 |
| }, |
| { |
| "epoch": 0.2691029900332226, |
| "grad_norm": 0.4286213219165802, |
| "learning_rate": 9.155669057357515e-06, |
| "loss": 0.4721, |
| "step": 486 |
| }, |
| { |
| "epoch": 0.26965669988925806, |
| "grad_norm": 0.47305360436439514, |
| "learning_rate": 9.150286054055143e-06, |
| "loss": 0.4812, |
| "step": 487 |
| }, |
| { |
| "epoch": 0.2702104097452935, |
| "grad_norm": 0.49933749437332153, |
| "learning_rate": 9.144887538641532e-06, |
| "loss": 0.4762, |
| "step": 488 |
| }, |
| { |
| "epoch": 0.2707641196013289, |
| "grad_norm": 0.4837988018989563, |
| "learning_rate": 9.139473531294171e-06, |
| "loss": 0.4857, |
| "step": 489 |
| }, |
| { |
| "epoch": 0.2713178294573643, |
| "grad_norm": 0.5129116177558899, |
| "learning_rate": 9.13404405224846e-06, |
| "loss": 0.4954, |
| "step": 490 |
| }, |
| { |
| "epoch": 0.2718715393133998, |
| "grad_norm": 0.46823567152023315, |
| "learning_rate": 9.12859912179762e-06, |
| "loss": 0.4532, |
| "step": 491 |
| }, |
| { |
| "epoch": 0.2724252491694352, |
| "grad_norm": 0.5194388628005981, |
| "learning_rate": 9.123138760292631e-06, |
| "loss": 0.4809, |
| "step": 492 |
| }, |
| { |
| "epoch": 0.27297895902547065, |
| "grad_norm": 0.52385413646698, |
| "learning_rate": 9.117662988142138e-06, |
| "loss": 0.476, |
| "step": 493 |
| }, |
| { |
| "epoch": 0.27353266888150607, |
| "grad_norm": 0.47071465849876404, |
| "learning_rate": 9.11217182581239e-06, |
| "loss": 0.498, |
| "step": 494 |
| }, |
| { |
| "epoch": 0.27408637873754155, |
| "grad_norm": 0.44149866700172424, |
| "learning_rate": 9.106665293827162e-06, |
| "loss": 0.489, |
| "step": 495 |
| }, |
| { |
| "epoch": 0.27464008859357697, |
| "grad_norm": 0.526095986366272, |
| "learning_rate": 9.101143412767665e-06, |
| "loss": 0.498, |
| "step": 496 |
| }, |
| { |
| "epoch": 0.2751937984496124, |
| "grad_norm": 0.5003870725631714, |
| "learning_rate": 9.09560620327249e-06, |
| "loss": 0.5074, |
| "step": 497 |
| }, |
| { |
| "epoch": 0.2757475083056478, |
| "grad_norm": 0.4868812561035156, |
| "learning_rate": 9.09005368603751e-06, |
| "loss": 0.518, |
| "step": 498 |
| }, |
| { |
| "epoch": 0.2763012181616833, |
| "grad_norm": 0.46059319376945496, |
| "learning_rate": 9.084485881815818e-06, |
| "loss": 0.489, |
| "step": 499 |
| }, |
| { |
| "epoch": 0.2768549280177187, |
| "grad_norm": 0.4523199200630188, |
| "learning_rate": 9.07890281141764e-06, |
| "loss": 0.4737, |
| "step": 500 |
| }, |
| { |
| "epoch": 0.27740863787375414, |
| "grad_norm": 0.5640616416931152, |
| "learning_rate": 9.073304495710267e-06, |
| "loss": 0.4811, |
| "step": 501 |
| }, |
| { |
| "epoch": 0.2779623477297896, |
| "grad_norm": 0.4549531042575836, |
| "learning_rate": 9.067690955617962e-06, |
| "loss": 0.4714, |
| "step": 502 |
| }, |
| { |
| "epoch": 0.27851605758582504, |
| "grad_norm": 0.49987301230430603, |
| "learning_rate": 9.062062212121897e-06, |
| "loss": 0.4923, |
| "step": 503 |
| }, |
| { |
| "epoch": 0.27906976744186046, |
| "grad_norm": 0.5010316967964172, |
| "learning_rate": 9.056418286260066e-06, |
| "loss": 0.4672, |
| "step": 504 |
| }, |
| { |
| "epoch": 0.2796234772978959, |
| "grad_norm": 0.47485628724098206, |
| "learning_rate": 9.050759199127211e-06, |
| "loss": 0.4889, |
| "step": 505 |
| }, |
| { |
| "epoch": 0.28017718715393136, |
| "grad_norm": 0.4733990430831909, |
| "learning_rate": 9.045084971874738e-06, |
| "loss": 0.5024, |
| "step": 506 |
| }, |
| { |
| "epoch": 0.2807308970099668, |
| "grad_norm": 0.4807707965373993, |
| "learning_rate": 9.039395625710641e-06, |
| "loss": 0.4899, |
| "step": 507 |
| }, |
| { |
| "epoch": 0.2812846068660022, |
| "grad_norm": 0.49026361107826233, |
| "learning_rate": 9.033691181899422e-06, |
| "loss": 0.4824, |
| "step": 508 |
| }, |
| { |
| "epoch": 0.28183831672203763, |
| "grad_norm": 0.46360817551612854, |
| "learning_rate": 9.027971661762016e-06, |
| "loss": 0.478, |
| "step": 509 |
| }, |
| { |
| "epoch": 0.2823920265780731, |
| "grad_norm": 0.47777828574180603, |
| "learning_rate": 9.022237086675702e-06, |
| "loss": 0.5157, |
| "step": 510 |
| }, |
| { |
| "epoch": 0.28294573643410853, |
| "grad_norm": 0.4797463119029999, |
| "learning_rate": 9.016487478074032e-06, |
| "loss": 0.4767, |
| "step": 511 |
| }, |
| { |
| "epoch": 0.28349944629014395, |
| "grad_norm": 0.5193693041801453, |
| "learning_rate": 9.010722857446745e-06, |
| "loss": 0.4857, |
| "step": 512 |
| }, |
| { |
| "epoch": 0.2840531561461794, |
| "grad_norm": 0.454267293214798, |
| "learning_rate": 9.00494324633969e-06, |
| "loss": 0.5032, |
| "step": 513 |
| }, |
| { |
| "epoch": 0.28460686600221485, |
| "grad_norm": 0.4507046341896057, |
| "learning_rate": 8.999148666354746e-06, |
| "loss": 0.4872, |
| "step": 514 |
| }, |
| { |
| "epoch": 0.2851605758582503, |
| "grad_norm": 0.5065588355064392, |
| "learning_rate": 8.993339139149737e-06, |
| "loss": 0.4834, |
| "step": 515 |
| }, |
| { |
| "epoch": 0.2857142857142857, |
| "grad_norm": 0.5654738545417786, |
| "learning_rate": 8.987514686438353e-06, |
| "loss": 0.496, |
| "step": 516 |
| }, |
| { |
| "epoch": 0.2862679955703212, |
| "grad_norm": 0.4761645197868347, |
| "learning_rate": 8.981675329990074e-06, |
| "loss": 0.4953, |
| "step": 517 |
| }, |
| { |
| "epoch": 0.2868217054263566, |
| "grad_norm": 0.5840123295783997, |
| "learning_rate": 8.975821091630082e-06, |
| "loss": 0.5041, |
| "step": 518 |
| }, |
| { |
| "epoch": 0.287375415282392, |
| "grad_norm": 0.47698304057121277, |
| "learning_rate": 8.969951993239177e-06, |
| "loss": 0.4965, |
| "step": 519 |
| }, |
| { |
| "epoch": 0.28792912513842744, |
| "grad_norm": 0.5020713806152344, |
| "learning_rate": 8.964068056753708e-06, |
| "loss": 0.5008, |
| "step": 520 |
| }, |
| { |
| "epoch": 0.2884828349944629, |
| "grad_norm": 0.5571221113204956, |
| "learning_rate": 8.95816930416548e-06, |
| "loss": 0.4879, |
| "step": 521 |
| }, |
| { |
| "epoch": 0.28903654485049834, |
| "grad_norm": 0.4395380914211273, |
| "learning_rate": 8.952255757521669e-06, |
| "loss": 0.5181, |
| "step": 522 |
| }, |
| { |
| "epoch": 0.28959025470653377, |
| "grad_norm": 0.5196503400802612, |
| "learning_rate": 8.946327438924755e-06, |
| "loss": 0.4755, |
| "step": 523 |
| }, |
| { |
| "epoch": 0.2901439645625692, |
| "grad_norm": 0.46694231033325195, |
| "learning_rate": 8.94038437053242e-06, |
| "loss": 0.4723, |
| "step": 524 |
| }, |
| { |
| "epoch": 0.29069767441860467, |
| "grad_norm": 0.47375500202178955, |
| "learning_rate": 8.934426574557483e-06, |
| "loss": 0.4697, |
| "step": 525 |
| }, |
| { |
| "epoch": 0.2912513842746401, |
| "grad_norm": 0.40896084904670715, |
| "learning_rate": 8.928454073267801e-06, |
| "loss": 0.4844, |
| "step": 526 |
| }, |
| { |
| "epoch": 0.2918050941306755, |
| "grad_norm": 0.5264450907707214, |
| "learning_rate": 8.922466888986203e-06, |
| "loss": 0.4682, |
| "step": 527 |
| }, |
| { |
| "epoch": 0.292358803986711, |
| "grad_norm": 0.5325335264205933, |
| "learning_rate": 8.916465044090389e-06, |
| "loss": 0.4813, |
| "step": 528 |
| }, |
| { |
| "epoch": 0.2929125138427464, |
| "grad_norm": 0.4511902630329132, |
| "learning_rate": 8.910448561012859e-06, |
| "loss": 0.489, |
| "step": 529 |
| }, |
| { |
| "epoch": 0.29346622369878184, |
| "grad_norm": 0.5324682593345642, |
| "learning_rate": 8.90441746224082e-06, |
| "loss": 0.5032, |
| "step": 530 |
| }, |
| { |
| "epoch": 0.29401993355481726, |
| "grad_norm": 0.4793945848941803, |
| "learning_rate": 8.898371770316113e-06, |
| "loss": 0.4649, |
| "step": 531 |
| }, |
| { |
| "epoch": 0.29457364341085274, |
| "grad_norm": 0.419558048248291, |
| "learning_rate": 8.892311507835118e-06, |
| "loss": 0.5066, |
| "step": 532 |
| }, |
| { |
| "epoch": 0.29512735326688816, |
| "grad_norm": 0.443803995847702, |
| "learning_rate": 8.886236697448675e-06, |
| "loss": 0.4939, |
| "step": 533 |
| }, |
| { |
| "epoch": 0.2956810631229236, |
| "grad_norm": 0.4384360909461975, |
| "learning_rate": 8.880147361862e-06, |
| "loss": 0.4853, |
| "step": 534 |
| }, |
| { |
| "epoch": 0.296234772978959, |
| "grad_norm": 0.4051086902618408, |
| "learning_rate": 8.874043523834593e-06, |
| "loss": 0.4765, |
| "step": 535 |
| }, |
| { |
| "epoch": 0.2967884828349945, |
| "grad_norm": 0.4231143295764923, |
| "learning_rate": 8.867925206180166e-06, |
| "loss": 0.4914, |
| "step": 536 |
| }, |
| { |
| "epoch": 0.2973421926910299, |
| "grad_norm": 0.4417089521884918, |
| "learning_rate": 8.861792431766544e-06, |
| "loss": 0.5051, |
| "step": 537 |
| }, |
| { |
| "epoch": 0.2978959025470653, |
| "grad_norm": 0.4100184440612793, |
| "learning_rate": 8.85564522351559e-06, |
| "loss": 0.4933, |
| "step": 538 |
| }, |
| { |
| "epoch": 0.29844961240310075, |
| "grad_norm": 0.4534027576446533, |
| "learning_rate": 8.849483604403108e-06, |
| "loss": 0.4824, |
| "step": 539 |
| }, |
| { |
| "epoch": 0.29900332225913623, |
| "grad_norm": 0.47366371750831604, |
| "learning_rate": 8.843307597458775e-06, |
| "loss": 0.4855, |
| "step": 540 |
| }, |
| { |
| "epoch": 0.29955703211517165, |
| "grad_norm": 0.5023186206817627, |
| "learning_rate": 8.837117225766033e-06, |
| "loss": 0.5054, |
| "step": 541 |
| }, |
| { |
| "epoch": 0.3001107419712071, |
| "grad_norm": 0.42479321360588074, |
| "learning_rate": 8.830912512462018e-06, |
| "loss": 0.4959, |
| "step": 542 |
| }, |
| { |
| "epoch": 0.30066445182724255, |
| "grad_norm": 0.5450031161308289, |
| "learning_rate": 8.824693480737472e-06, |
| "loss": 0.4906, |
| "step": 543 |
| }, |
| { |
| "epoch": 0.301218161683278, |
| "grad_norm": 0.416326642036438, |
| "learning_rate": 8.818460153836653e-06, |
| "loss": 0.504, |
| "step": 544 |
| }, |
| { |
| "epoch": 0.3017718715393134, |
| "grad_norm": 0.4415566623210907, |
| "learning_rate": 8.81221255505724e-06, |
| "loss": 0.5013, |
| "step": 545 |
| }, |
| { |
| "epoch": 0.3023255813953488, |
| "grad_norm": 0.405342698097229, |
| "learning_rate": 8.805950707750268e-06, |
| "loss": 0.4886, |
| "step": 546 |
| }, |
| { |
| "epoch": 0.3028792912513843, |
| "grad_norm": 0.43602555990219116, |
| "learning_rate": 8.799674635320017e-06, |
| "loss": 0.4953, |
| "step": 547 |
| }, |
| { |
| "epoch": 0.3034330011074197, |
| "grad_norm": 0.4456259608268738, |
| "learning_rate": 8.79338436122394e-06, |
| "loss": 0.4985, |
| "step": 548 |
| }, |
| { |
| "epoch": 0.30398671096345514, |
| "grad_norm": 0.40702182054519653, |
| "learning_rate": 8.787079908972567e-06, |
| "loss": 0.4674, |
| "step": 549 |
| }, |
| { |
| "epoch": 0.30454042081949056, |
| "grad_norm": 0.5078481435775757, |
| "learning_rate": 8.780761302129424e-06, |
| "loss": 0.506, |
| "step": 550 |
| }, |
| { |
| "epoch": 0.30509413067552604, |
| "grad_norm": 0.4316215217113495, |
| "learning_rate": 8.774428564310939e-06, |
| "loss": 0.4771, |
| "step": 551 |
| }, |
| { |
| "epoch": 0.30564784053156147, |
| "grad_norm": 0.40345603227615356, |
| "learning_rate": 8.768081719186354e-06, |
| "loss": 0.5055, |
| "step": 552 |
| }, |
| { |
| "epoch": 0.3062015503875969, |
| "grad_norm": 0.5201309323310852, |
| "learning_rate": 8.76172079047764e-06, |
| "loss": 0.5051, |
| "step": 553 |
| }, |
| { |
| "epoch": 0.3067552602436323, |
| "grad_norm": 0.4317896366119385, |
| "learning_rate": 8.755345801959412e-06, |
| "loss": 0.4869, |
| "step": 554 |
| }, |
| { |
| "epoch": 0.3073089700996678, |
| "grad_norm": 0.41571563482284546, |
| "learning_rate": 8.748956777458828e-06, |
| "loss": 0.4908, |
| "step": 555 |
| }, |
| { |
| "epoch": 0.3078626799557032, |
| "grad_norm": 0.46132898330688477, |
| "learning_rate": 8.742553740855507e-06, |
| "loss": 0.4869, |
| "step": 556 |
| }, |
| { |
| "epoch": 0.30841638981173863, |
| "grad_norm": 0.4685657322406769, |
| "learning_rate": 8.736136716081443e-06, |
| "loss": 0.4896, |
| "step": 557 |
| }, |
| { |
| "epoch": 0.3089700996677741, |
| "grad_norm": 0.43309783935546875, |
| "learning_rate": 8.729705727120911e-06, |
| "loss": 0.4873, |
| "step": 558 |
| }, |
| { |
| "epoch": 0.30952380952380953, |
| "grad_norm": 0.4360080063343048, |
| "learning_rate": 8.72326079801038e-06, |
| "loss": 0.4486, |
| "step": 559 |
| }, |
| { |
| "epoch": 0.31007751937984496, |
| "grad_norm": 0.4630446434020996, |
| "learning_rate": 8.71680195283842e-06, |
| "loss": 0.4756, |
| "step": 560 |
| }, |
| { |
| "epoch": 0.3106312292358804, |
| "grad_norm": 0.441049724817276, |
| "learning_rate": 8.710329215745612e-06, |
| "loss": 0.4804, |
| "step": 561 |
| }, |
| { |
| "epoch": 0.31118493909191586, |
| "grad_norm": 0.4433397650718689, |
| "learning_rate": 8.703842610924463e-06, |
| "loss": 0.4681, |
| "step": 562 |
| }, |
| { |
| "epoch": 0.3117386489479513, |
| "grad_norm": 0.4608594477176666, |
| "learning_rate": 8.697342162619308e-06, |
| "loss": 0.5161, |
| "step": 563 |
| }, |
| { |
| "epoch": 0.3122923588039867, |
| "grad_norm": 0.4613628089427948, |
| "learning_rate": 8.69082789512623e-06, |
| "loss": 0.4891, |
| "step": 564 |
| }, |
| { |
| "epoch": 0.3128460686600221, |
| "grad_norm": 0.41381844878196716, |
| "learning_rate": 8.684299832792958e-06, |
| "loss": 0.4929, |
| "step": 565 |
| }, |
| { |
| "epoch": 0.3133997785160576, |
| "grad_norm": 0.46331459283828735, |
| "learning_rate": 8.677758000018777e-06, |
| "loss": 0.4786, |
| "step": 566 |
| }, |
| { |
| "epoch": 0.313953488372093, |
| "grad_norm": 0.44575268030166626, |
| "learning_rate": 8.671202421254448e-06, |
| "loss": 0.5057, |
| "step": 567 |
| }, |
| { |
| "epoch": 0.31450719822812845, |
| "grad_norm": 0.4823036193847656, |
| "learning_rate": 8.664633121002103e-06, |
| "loss": 0.4785, |
| "step": 568 |
| }, |
| { |
| "epoch": 0.31506090808416387, |
| "grad_norm": 0.43519729375839233, |
| "learning_rate": 8.658050123815166e-06, |
| "loss": 0.4987, |
| "step": 569 |
| }, |
| { |
| "epoch": 0.31561461794019935, |
| "grad_norm": 0.47774675488471985, |
| "learning_rate": 8.651453454298244e-06, |
| "loss": 0.4667, |
| "step": 570 |
| }, |
| { |
| "epoch": 0.31616832779623477, |
| "grad_norm": 0.5016129612922668, |
| "learning_rate": 8.644843137107058e-06, |
| "loss": 0.4776, |
| "step": 571 |
| }, |
| { |
| "epoch": 0.3167220376522702, |
| "grad_norm": 0.40757277607917786, |
| "learning_rate": 8.638219196948332e-06, |
| "loss": 0.4707, |
| "step": 572 |
| }, |
| { |
| "epoch": 0.31727574750830567, |
| "grad_norm": 0.4288807213306427, |
| "learning_rate": 8.631581658579706e-06, |
| "loss": 0.4859, |
| "step": 573 |
| }, |
| { |
| "epoch": 0.3178294573643411, |
| "grad_norm": 0.4194124639034271, |
| "learning_rate": 8.624930546809649e-06, |
| "loss": 0.4499, |
| "step": 574 |
| }, |
| { |
| "epoch": 0.3183831672203765, |
| "grad_norm": 0.45525485277175903, |
| "learning_rate": 8.618265886497357e-06, |
| "loss": 0.4783, |
| "step": 575 |
| }, |
| { |
| "epoch": 0.31893687707641194, |
| "grad_norm": 0.435594379901886, |
| "learning_rate": 8.61158770255267e-06, |
| "loss": 0.481, |
| "step": 576 |
| }, |
| { |
| "epoch": 0.3194905869324474, |
| "grad_norm": 0.4703901410102844, |
| "learning_rate": 8.604896019935971e-06, |
| "loss": 0.4903, |
| "step": 577 |
| }, |
| { |
| "epoch": 0.32004429678848284, |
| "grad_norm": 0.40026766061782837, |
| "learning_rate": 8.598190863658096e-06, |
| "loss": 0.4995, |
| "step": 578 |
| }, |
| { |
| "epoch": 0.32059800664451826, |
| "grad_norm": 0.4388001561164856, |
| "learning_rate": 8.591472258780242e-06, |
| "loss": 0.4662, |
| "step": 579 |
| }, |
| { |
| "epoch": 0.3211517165005537, |
| "grad_norm": 0.49517446756362915, |
| "learning_rate": 8.584740230413867e-06, |
| "loss": 0.4639, |
| "step": 580 |
| }, |
| { |
| "epoch": 0.32170542635658916, |
| "grad_norm": 0.3800186216831207, |
| "learning_rate": 8.577994803720605e-06, |
| "loss": 0.4756, |
| "step": 581 |
| }, |
| { |
| "epoch": 0.3222591362126246, |
| "grad_norm": 0.5055624842643738, |
| "learning_rate": 8.57123600391217e-06, |
| "loss": 0.4823, |
| "step": 582 |
| }, |
| { |
| "epoch": 0.32281284606866, |
| "grad_norm": 0.4609836935997009, |
| "learning_rate": 8.56446385625025e-06, |
| "loss": 0.4909, |
| "step": 583 |
| }, |
| { |
| "epoch": 0.3233665559246955, |
| "grad_norm": 0.49489548802375793, |
| "learning_rate": 8.557678386046429e-06, |
| "loss": 0.4568, |
| "step": 584 |
| }, |
| { |
| "epoch": 0.3239202657807309, |
| "grad_norm": 0.47447633743286133, |
| "learning_rate": 8.550879618662083e-06, |
| "loss": 0.488, |
| "step": 585 |
| }, |
| { |
| "epoch": 0.32447397563676633, |
| "grad_norm": 0.44722503423690796, |
| "learning_rate": 8.544067579508292e-06, |
| "loss": 0.4867, |
| "step": 586 |
| }, |
| { |
| "epoch": 0.32502768549280175, |
| "grad_norm": 0.5135213136672974, |
| "learning_rate": 8.537242294045733e-06, |
| "loss": 0.4976, |
| "step": 587 |
| }, |
| { |
| "epoch": 0.32558139534883723, |
| "grad_norm": 0.4393743574619293, |
| "learning_rate": 8.5304037877846e-06, |
| "loss": 0.472, |
| "step": 588 |
| }, |
| { |
| "epoch": 0.32613510520487266, |
| "grad_norm": 0.47811412811279297, |
| "learning_rate": 8.523552086284495e-06, |
| "loss": 0.486, |
| "step": 589 |
| }, |
| { |
| "epoch": 0.3266888150609081, |
| "grad_norm": 0.4685250520706177, |
| "learning_rate": 8.516687215154341e-06, |
| "loss": 0.4721, |
| "step": 590 |
| }, |
| { |
| "epoch": 0.3272425249169435, |
| "grad_norm": 0.4332244098186493, |
| "learning_rate": 8.509809200052286e-06, |
| "loss": 0.4712, |
| "step": 591 |
| }, |
| { |
| "epoch": 0.327796234772979, |
| "grad_norm": 0.5710234045982361, |
| "learning_rate": 8.5029180666856e-06, |
| "loss": 0.5037, |
| "step": 592 |
| }, |
| { |
| "epoch": 0.3283499446290144, |
| "grad_norm": 0.48631003499031067, |
| "learning_rate": 8.496013840810586e-06, |
| "loss": 0.4998, |
| "step": 593 |
| }, |
| { |
| "epoch": 0.3289036544850498, |
| "grad_norm": 0.5860962271690369, |
| "learning_rate": 8.489096548232485e-06, |
| "loss": 0.5009, |
| "step": 594 |
| }, |
| { |
| "epoch": 0.32945736434108525, |
| "grad_norm": 0.4546487033367157, |
| "learning_rate": 8.482166214805374e-06, |
| "loss": 0.4571, |
| "step": 595 |
| }, |
| { |
| "epoch": 0.3300110741971207, |
| "grad_norm": 0.5256265997886658, |
| "learning_rate": 8.475222866432065e-06, |
| "loss": 0.4758, |
| "step": 596 |
| }, |
| { |
| "epoch": 0.33056478405315615, |
| "grad_norm": 0.4427199959754944, |
| "learning_rate": 8.468266529064025e-06, |
| "loss": 0.4751, |
| "step": 597 |
| }, |
| { |
| "epoch": 0.33111849390919157, |
| "grad_norm": 0.51978600025177, |
| "learning_rate": 8.461297228701264e-06, |
| "loss": 0.4886, |
| "step": 598 |
| }, |
| { |
| "epoch": 0.33167220376522705, |
| "grad_norm": 0.438180148601532, |
| "learning_rate": 8.45431499139224e-06, |
| "loss": 0.4514, |
| "step": 599 |
| }, |
| { |
| "epoch": 0.33222591362126247, |
| "grad_norm": 0.38737887144088745, |
| "learning_rate": 8.44731984323377e-06, |
| "loss": 0.4647, |
| "step": 600 |
| }, |
| { |
| "epoch": 0.3327796234772979, |
| "grad_norm": 0.42485183477401733, |
| "learning_rate": 8.440311810370921e-06, |
| "loss": 0.4648, |
| "step": 601 |
| }, |
| { |
| "epoch": 0.3333333333333333, |
| "grad_norm": 0.44904154539108276, |
| "learning_rate": 8.433290918996921e-06, |
| "loss": 0.5129, |
| "step": 602 |
| }, |
| { |
| "epoch": 0.3338870431893688, |
| "grad_norm": 0.43713030219078064, |
| "learning_rate": 8.426257195353055e-06, |
| "loss": 0.4506, |
| "step": 603 |
| }, |
| { |
| "epoch": 0.3344407530454042, |
| "grad_norm": 0.40078917145729065, |
| "learning_rate": 8.419210665728577e-06, |
| "loss": 0.4887, |
| "step": 604 |
| }, |
| { |
| "epoch": 0.33499446290143964, |
| "grad_norm": 0.45686566829681396, |
| "learning_rate": 8.412151356460593e-06, |
| "loss": 0.46, |
| "step": 605 |
| }, |
| { |
| "epoch": 0.33554817275747506, |
| "grad_norm": 0.4962942600250244, |
| "learning_rate": 8.405079293933986e-06, |
| "loss": 0.4685, |
| "step": 606 |
| }, |
| { |
| "epoch": 0.33610188261351054, |
| "grad_norm": 0.38770344853401184, |
| "learning_rate": 8.3979945045813e-06, |
| "loss": 0.4919, |
| "step": 607 |
| }, |
| { |
| "epoch": 0.33665559246954596, |
| "grad_norm": 0.5221530795097351, |
| "learning_rate": 8.390897014882645e-06, |
| "loss": 0.4757, |
| "step": 608 |
| }, |
| { |
| "epoch": 0.3372093023255814, |
| "grad_norm": 0.44620612263679504, |
| "learning_rate": 8.383786851365601e-06, |
| "loss": 0.489, |
| "step": 609 |
| }, |
| { |
| "epoch": 0.3377630121816168, |
| "grad_norm": 0.47988009452819824, |
| "learning_rate": 8.376664040605122e-06, |
| "loss": 0.474, |
| "step": 610 |
| }, |
| { |
| "epoch": 0.3383167220376523, |
| "grad_norm": 0.42253127694129944, |
| "learning_rate": 8.36952860922343e-06, |
| "loss": 0.4723, |
| "step": 611 |
| }, |
| { |
| "epoch": 0.3388704318936877, |
| "grad_norm": 0.48353639245033264, |
| "learning_rate": 8.362380583889912e-06, |
| "loss": 0.4513, |
| "step": 612 |
| }, |
| { |
| "epoch": 0.33942414174972313, |
| "grad_norm": 0.46105682849884033, |
| "learning_rate": 8.355219991321035e-06, |
| "loss": 0.5074, |
| "step": 613 |
| }, |
| { |
| "epoch": 0.3399778516057586, |
| "grad_norm": 0.4596821367740631, |
| "learning_rate": 8.348046858280233e-06, |
| "loss": 0.505, |
| "step": 614 |
| }, |
| { |
| "epoch": 0.34053156146179403, |
| "grad_norm": 0.3729395270347595, |
| "learning_rate": 8.34086121157781e-06, |
| "loss": 0.4834, |
| "step": 615 |
| }, |
| { |
| "epoch": 0.34108527131782945, |
| "grad_norm": 0.511202871799469, |
| "learning_rate": 8.333663078070845e-06, |
| "loss": 0.4521, |
| "step": 616 |
| }, |
| { |
| "epoch": 0.3416389811738649, |
| "grad_norm": 0.4841696321964264, |
| "learning_rate": 8.326452484663083e-06, |
| "loss": 0.4761, |
| "step": 617 |
| }, |
| { |
| "epoch": 0.34219269102990035, |
| "grad_norm": 0.3986373543739319, |
| "learning_rate": 8.319229458304843e-06, |
| "loss": 0.4655, |
| "step": 618 |
| }, |
| { |
| "epoch": 0.3427464008859358, |
| "grad_norm": 0.5737797021865845, |
| "learning_rate": 8.311994025992912e-06, |
| "loss": 0.5066, |
| "step": 619 |
| }, |
| { |
| "epoch": 0.3433001107419712, |
| "grad_norm": 0.44409626722335815, |
| "learning_rate": 8.304746214770445e-06, |
| "loss": 0.4903, |
| "step": 620 |
| }, |
| { |
| "epoch": 0.3438538205980066, |
| "grad_norm": 0.49516579508781433, |
| "learning_rate": 8.297486051726864e-06, |
| "loss": 0.4537, |
| "step": 621 |
| }, |
| { |
| "epoch": 0.3444075304540421, |
| "grad_norm": 0.498826801776886, |
| "learning_rate": 8.290213563997758e-06, |
| "loss": 0.4901, |
| "step": 622 |
| }, |
| { |
| "epoch": 0.3449612403100775, |
| "grad_norm": 0.5155895948410034, |
| "learning_rate": 8.282928778764783e-06, |
| "loss": 0.456, |
| "step": 623 |
| }, |
| { |
| "epoch": 0.34551495016611294, |
| "grad_norm": 0.4448562264442444, |
| "learning_rate": 8.275631723255556e-06, |
| "loss": 0.4912, |
| "step": 624 |
| }, |
| { |
| "epoch": 0.34606866002214837, |
| "grad_norm": 0.4876919984817505, |
| "learning_rate": 8.268322424743552e-06, |
| "loss": 0.4935, |
| "step": 625 |
| }, |
| { |
| "epoch": 0.34662236987818384, |
| "grad_norm": 0.46334221959114075, |
| "learning_rate": 8.26100091054801e-06, |
| "loss": 0.4767, |
| "step": 626 |
| }, |
| { |
| "epoch": 0.34717607973421927, |
| "grad_norm": 0.4294886291027069, |
| "learning_rate": 8.253667208033828e-06, |
| "loss": 0.4636, |
| "step": 627 |
| }, |
| { |
| "epoch": 0.3477297895902547, |
| "grad_norm": 0.5191714763641357, |
| "learning_rate": 8.246321344611455e-06, |
| "loss": 0.482, |
| "step": 628 |
| }, |
| { |
| "epoch": 0.34828349944629017, |
| "grad_norm": 0.5100964903831482, |
| "learning_rate": 8.23896334773679e-06, |
| "loss": 0.4838, |
| "step": 629 |
| }, |
| { |
| "epoch": 0.3488372093023256, |
| "grad_norm": 0.4342910349369049, |
| "learning_rate": 8.23159324491109e-06, |
| "loss": 0.5089, |
| "step": 630 |
| }, |
| { |
| "epoch": 0.349390919158361, |
| "grad_norm": 0.44776540994644165, |
| "learning_rate": 8.224211063680854e-06, |
| "loss": 0.479, |
| "step": 631 |
| }, |
| { |
| "epoch": 0.34994462901439644, |
| "grad_norm": 0.47771504521369934, |
| "learning_rate": 8.216816831637726e-06, |
| "loss": 0.4875, |
| "step": 632 |
| }, |
| { |
| "epoch": 0.3504983388704319, |
| "grad_norm": 0.44938650727272034, |
| "learning_rate": 8.209410576418391e-06, |
| "loss": 0.4789, |
| "step": 633 |
| }, |
| { |
| "epoch": 0.35105204872646734, |
| "grad_norm": 0.4639085531234741, |
| "learning_rate": 8.201992325704473e-06, |
| "loss": 0.4756, |
| "step": 634 |
| }, |
| { |
| "epoch": 0.35160575858250276, |
| "grad_norm": 0.47028908133506775, |
| "learning_rate": 8.19456210722243e-06, |
| "loss": 0.4708, |
| "step": 635 |
| }, |
| { |
| "epoch": 0.3521594684385382, |
| "grad_norm": 0.44380685687065125, |
| "learning_rate": 8.18711994874345e-06, |
| "loss": 0.4676, |
| "step": 636 |
| }, |
| { |
| "epoch": 0.35271317829457366, |
| "grad_norm": 0.4652746021747589, |
| "learning_rate": 8.179665878083347e-06, |
| "loss": 0.4828, |
| "step": 637 |
| }, |
| { |
| "epoch": 0.3532668881506091, |
| "grad_norm": 0.4208925664424896, |
| "learning_rate": 8.172199923102459e-06, |
| "loss": 0.4877, |
| "step": 638 |
| }, |
| { |
| "epoch": 0.3538205980066445, |
| "grad_norm": 0.5734950304031372, |
| "learning_rate": 8.164722111705545e-06, |
| "loss": 0.5077, |
| "step": 639 |
| }, |
| { |
| "epoch": 0.35437430786268, |
| "grad_norm": 0.42535200715065, |
| "learning_rate": 8.157232471841676e-06, |
| "loss": 0.4602, |
| "step": 640 |
| }, |
| { |
| "epoch": 0.3549280177187154, |
| "grad_norm": 0.4676467180252075, |
| "learning_rate": 8.149731031504136e-06, |
| "loss": 0.4651, |
| "step": 641 |
| }, |
| { |
| "epoch": 0.3554817275747508, |
| "grad_norm": 0.4505887031555176, |
| "learning_rate": 8.142217818730307e-06, |
| "loss": 0.4801, |
| "step": 642 |
| }, |
| { |
| "epoch": 0.35603543743078625, |
| "grad_norm": 0.4688456952571869, |
| "learning_rate": 8.13469286160158e-06, |
| "loss": 0.4717, |
| "step": 643 |
| }, |
| { |
| "epoch": 0.35658914728682173, |
| "grad_norm": 0.4681268334388733, |
| "learning_rate": 8.127156188243239e-06, |
| "loss": 0.4702, |
| "step": 644 |
| }, |
| { |
| "epoch": 0.35714285714285715, |
| "grad_norm": 0.3870069086551666, |
| "learning_rate": 8.119607826824356e-06, |
| "loss": 0.4632, |
| "step": 645 |
| }, |
| { |
| "epoch": 0.3576965669988926, |
| "grad_norm": 0.5866634249687195, |
| "learning_rate": 8.112047805557693e-06, |
| "loss": 0.4879, |
| "step": 646 |
| }, |
| { |
| "epoch": 0.358250276854928, |
| "grad_norm": 0.4573242664337158, |
| "learning_rate": 8.104476152699587e-06, |
| "loss": 0.5011, |
| "step": 647 |
| }, |
| { |
| "epoch": 0.3588039867109635, |
| "grad_norm": 0.4478485584259033, |
| "learning_rate": 8.096892896549853e-06, |
| "loss": 0.4865, |
| "step": 648 |
| }, |
| { |
| "epoch": 0.3593576965669989, |
| "grad_norm": 0.4456096291542053, |
| "learning_rate": 8.089298065451673e-06, |
| "loss": 0.4943, |
| "step": 649 |
| }, |
| { |
| "epoch": 0.3599114064230343, |
| "grad_norm": 0.4899655878543854, |
| "learning_rate": 8.081691687791491e-06, |
| "loss": 0.4712, |
| "step": 650 |
| }, |
| { |
| "epoch": 0.36046511627906974, |
| "grad_norm": 0.4818911850452423, |
| "learning_rate": 8.074073791998907e-06, |
| "loss": 0.4808, |
| "step": 651 |
| }, |
| { |
| "epoch": 0.3610188261351052, |
| "grad_norm": 0.4100266695022583, |
| "learning_rate": 8.066444406546573e-06, |
| "loss": 0.4944, |
| "step": 652 |
| }, |
| { |
| "epoch": 0.36157253599114064, |
| "grad_norm": 0.43672144412994385, |
| "learning_rate": 8.058803559950086e-06, |
| "loss": 0.4906, |
| "step": 653 |
| }, |
| { |
| "epoch": 0.36212624584717606, |
| "grad_norm": 0.45784133672714233, |
| "learning_rate": 8.051151280767874e-06, |
| "loss": 0.467, |
| "step": 654 |
| }, |
| { |
| "epoch": 0.36267995570321154, |
| "grad_norm": 0.5059024691581726, |
| "learning_rate": 8.043487597601104e-06, |
| "loss": 0.482, |
| "step": 655 |
| }, |
| { |
| "epoch": 0.36323366555924697, |
| "grad_norm": 0.391807496547699, |
| "learning_rate": 8.035812539093557e-06, |
| "loss": 0.4749, |
| "step": 656 |
| }, |
| { |
| "epoch": 0.3637873754152824, |
| "grad_norm": 0.49046629667282104, |
| "learning_rate": 8.02812613393154e-06, |
| "loss": 0.5014, |
| "step": 657 |
| }, |
| { |
| "epoch": 0.3643410852713178, |
| "grad_norm": 0.509397029876709, |
| "learning_rate": 8.020428410843762e-06, |
| "loss": 0.4889, |
| "step": 658 |
| }, |
| { |
| "epoch": 0.3648947951273533, |
| "grad_norm": 0.46450507640838623, |
| "learning_rate": 8.012719398601239e-06, |
| "loss": 0.49, |
| "step": 659 |
| }, |
| { |
| "epoch": 0.3654485049833887, |
| "grad_norm": 0.46102917194366455, |
| "learning_rate": 8.004999126017177e-06, |
| "loss": 0.4549, |
| "step": 660 |
| }, |
| { |
| "epoch": 0.36600221483942413, |
| "grad_norm": 0.5289508104324341, |
| "learning_rate": 7.997267621946871e-06, |
| "loss": 0.4809, |
| "step": 661 |
| }, |
| { |
| "epoch": 0.36655592469545956, |
| "grad_norm": 0.4355222284793854, |
| "learning_rate": 7.989524915287595e-06, |
| "loss": 0.4654, |
| "step": 662 |
| }, |
| { |
| "epoch": 0.36710963455149503, |
| "grad_norm": 0.44085797667503357, |
| "learning_rate": 7.981771034978494e-06, |
| "loss": 0.5027, |
| "step": 663 |
| }, |
| { |
| "epoch": 0.36766334440753046, |
| "grad_norm": 0.41785871982574463, |
| "learning_rate": 7.974006010000474e-06, |
| "loss": 0.4424, |
| "step": 664 |
| }, |
| { |
| "epoch": 0.3682170542635659, |
| "grad_norm": 0.46425795555114746, |
| "learning_rate": 7.966229869376097e-06, |
| "loss": 0.4901, |
| "step": 665 |
| }, |
| { |
| "epoch": 0.3687707641196013, |
| "grad_norm": 0.40710482001304626, |
| "learning_rate": 7.958442642169469e-06, |
| "loss": 0.4632, |
| "step": 666 |
| }, |
| { |
| "epoch": 0.3693244739756368, |
| "grad_norm": 0.39024198055267334, |
| "learning_rate": 7.950644357486134e-06, |
| "loss": 0.4792, |
| "step": 667 |
| }, |
| { |
| "epoch": 0.3698781838316722, |
| "grad_norm": 0.40468570590019226, |
| "learning_rate": 7.942835044472965e-06, |
| "loss": 0.4895, |
| "step": 668 |
| }, |
| { |
| "epoch": 0.3704318936877076, |
| "grad_norm": 0.41393882036209106, |
| "learning_rate": 7.935014732318057e-06, |
| "loss": 0.4763, |
| "step": 669 |
| }, |
| { |
| "epoch": 0.3709856035437431, |
| "grad_norm": 0.44807004928588867, |
| "learning_rate": 7.92718345025061e-06, |
| "loss": 0.4789, |
| "step": 670 |
| }, |
| { |
| "epoch": 0.3715393133997785, |
| "grad_norm": 0.4278004467487335, |
| "learning_rate": 7.919341227540828e-06, |
| "loss": 0.4627, |
| "step": 671 |
| }, |
| { |
| "epoch": 0.37209302325581395, |
| "grad_norm": 0.40687111020088196, |
| "learning_rate": 7.911488093499806e-06, |
| "loss": 0.4686, |
| "step": 672 |
| }, |
| { |
| "epoch": 0.37264673311184937, |
| "grad_norm": 0.4299582540988922, |
| "learning_rate": 7.903624077479424e-06, |
| "loss": 0.4911, |
| "step": 673 |
| }, |
| { |
| "epoch": 0.37320044296788485, |
| "grad_norm": 0.40537703037261963, |
| "learning_rate": 7.895749208872232e-06, |
| "loss": 0.4676, |
| "step": 674 |
| }, |
| { |
| "epoch": 0.37375415282392027, |
| "grad_norm": 0.3911261260509491, |
| "learning_rate": 7.887863517111337e-06, |
| "loss": 0.4721, |
| "step": 675 |
| }, |
| { |
| "epoch": 0.3743078626799557, |
| "grad_norm": 0.4901961088180542, |
| "learning_rate": 7.879967031670313e-06, |
| "loss": 0.5181, |
| "step": 676 |
| }, |
| { |
| "epoch": 0.3748615725359911, |
| "grad_norm": 0.4302717447280884, |
| "learning_rate": 7.872059782063064e-06, |
| "loss": 0.4837, |
| "step": 677 |
| }, |
| { |
| "epoch": 0.3754152823920266, |
| "grad_norm": 0.3713219165802002, |
| "learning_rate": 7.86414179784373e-06, |
| "loss": 0.4703, |
| "step": 678 |
| }, |
| { |
| "epoch": 0.375968992248062, |
| "grad_norm": 0.47798439860343933, |
| "learning_rate": 7.856213108606571e-06, |
| "loss": 0.459, |
| "step": 679 |
| }, |
| { |
| "epoch": 0.37652270210409744, |
| "grad_norm": 0.45034360885620117, |
| "learning_rate": 7.848273743985863e-06, |
| "loss": 0.4803, |
| "step": 680 |
| }, |
| { |
| "epoch": 0.3770764119601329, |
| "grad_norm": 0.45023995637893677, |
| "learning_rate": 7.84032373365578e-06, |
| "loss": 0.4545, |
| "step": 681 |
| }, |
| { |
| "epoch": 0.37763012181616834, |
| "grad_norm": 0.4966435432434082, |
| "learning_rate": 7.832363107330281e-06, |
| "loss": 0.5031, |
| "step": 682 |
| }, |
| { |
| "epoch": 0.37818383167220376, |
| "grad_norm": 0.6042452454566956, |
| "learning_rate": 7.824391894763008e-06, |
| "loss": 0.4875, |
| "step": 683 |
| }, |
| { |
| "epoch": 0.3787375415282392, |
| "grad_norm": 0.4374452531337738, |
| "learning_rate": 7.816410125747172e-06, |
| "loss": 0.4841, |
| "step": 684 |
| }, |
| { |
| "epoch": 0.37929125138427466, |
| "grad_norm": 0.47047990560531616, |
| "learning_rate": 7.808417830115432e-06, |
| "loss": 0.4634, |
| "step": 685 |
| }, |
| { |
| "epoch": 0.3798449612403101, |
| "grad_norm": 0.5158135890960693, |
| "learning_rate": 7.800415037739802e-06, |
| "loss": 0.4796, |
| "step": 686 |
| }, |
| { |
| "epoch": 0.3803986710963455, |
| "grad_norm": 0.48473745584487915, |
| "learning_rate": 7.792401778531517e-06, |
| "loss": 0.4689, |
| "step": 687 |
| }, |
| { |
| "epoch": 0.38095238095238093, |
| "grad_norm": 0.4797394871711731, |
| "learning_rate": 7.78437808244094e-06, |
| "loss": 0.4863, |
| "step": 688 |
| }, |
| { |
| "epoch": 0.3815060908084164, |
| "grad_norm": 0.4698885977268219, |
| "learning_rate": 7.776343979457446e-06, |
| "loss": 0.4736, |
| "step": 689 |
| }, |
| { |
| "epoch": 0.38205980066445183, |
| "grad_norm": 0.5596975088119507, |
| "learning_rate": 7.768299499609296e-06, |
| "loss": 0.4818, |
| "step": 690 |
| }, |
| { |
| "epoch": 0.38261351052048725, |
| "grad_norm": 0.4723421037197113, |
| "learning_rate": 7.760244672963548e-06, |
| "loss": 0.4832, |
| "step": 691 |
| }, |
| { |
| "epoch": 0.3831672203765227, |
| "grad_norm": 0.4778919816017151, |
| "learning_rate": 7.752179529625922e-06, |
| "loss": 0.487, |
| "step": 692 |
| }, |
| { |
| "epoch": 0.38372093023255816, |
| "grad_norm": 0.46932482719421387, |
| "learning_rate": 7.744104099740703e-06, |
| "loss": 0.4793, |
| "step": 693 |
| }, |
| { |
| "epoch": 0.3842746400885936, |
| "grad_norm": 0.4695429801940918, |
| "learning_rate": 7.736018413490622e-06, |
| "loss": 0.4824, |
| "step": 694 |
| }, |
| { |
| "epoch": 0.384828349944629, |
| "grad_norm": 0.5957847833633423, |
| "learning_rate": 7.727922501096743e-06, |
| "loss": 0.4762, |
| "step": 695 |
| }, |
| { |
| "epoch": 0.3853820598006645, |
| "grad_norm": 0.4242111146450043, |
| "learning_rate": 7.719816392818354e-06, |
| "loss": 0.476, |
| "step": 696 |
| }, |
| { |
| "epoch": 0.3859357696566999, |
| "grad_norm": 0.5669819116592407, |
| "learning_rate": 7.711700118952848e-06, |
| "loss": 0.4537, |
| "step": 697 |
| }, |
| { |
| "epoch": 0.3864894795127353, |
| "grad_norm": 0.4524780809879303, |
| "learning_rate": 7.703573709835614e-06, |
| "loss": 0.4592, |
| "step": 698 |
| }, |
| { |
| "epoch": 0.38704318936877075, |
| "grad_norm": 0.44671958684921265, |
| "learning_rate": 7.695437195839925e-06, |
| "loss": 0.472, |
| "step": 699 |
| }, |
| { |
| "epoch": 0.3875968992248062, |
| "grad_norm": 0.5542345643043518, |
| "learning_rate": 7.687290607376816e-06, |
| "loss": 0.4881, |
| "step": 700 |
| }, |
| { |
| "epoch": 0.38815060908084165, |
| "grad_norm": 0.4053798317909241, |
| "learning_rate": 7.679133974894984e-06, |
| "loss": 0.4556, |
| "step": 701 |
| }, |
| { |
| "epoch": 0.38870431893687707, |
| "grad_norm": 0.4634324014186859, |
| "learning_rate": 7.67096732888066e-06, |
| "loss": 0.4926, |
| "step": 702 |
| }, |
| { |
| "epoch": 0.3892580287929125, |
| "grad_norm": 0.40314167737960815, |
| "learning_rate": 7.662790699857506e-06, |
| "loss": 0.4779, |
| "step": 703 |
| }, |
| { |
| "epoch": 0.38981173864894797, |
| "grad_norm": 0.40341395139694214, |
| "learning_rate": 7.654604118386494e-06, |
| "loss": 0.4719, |
| "step": 704 |
| }, |
| { |
| "epoch": 0.3903654485049834, |
| "grad_norm": 0.49669671058654785, |
| "learning_rate": 7.646407615065796e-06, |
| "loss": 0.4764, |
| "step": 705 |
| }, |
| { |
| "epoch": 0.3909191583610188, |
| "grad_norm": 0.38740184903144836, |
| "learning_rate": 7.638201220530664e-06, |
| "loss": 0.4801, |
| "step": 706 |
| }, |
| { |
| "epoch": 0.39147286821705424, |
| "grad_norm": 0.4930890202522278, |
| "learning_rate": 7.629984965453326e-06, |
| "loss": 0.4877, |
| "step": 707 |
| }, |
| { |
| "epoch": 0.3920265780730897, |
| "grad_norm": 0.39426112174987793, |
| "learning_rate": 7.621758880542859e-06, |
| "loss": 0.4567, |
| "step": 708 |
| }, |
| { |
| "epoch": 0.39258028792912514, |
| "grad_norm": 0.502477765083313, |
| "learning_rate": 7.613522996545082e-06, |
| "loss": 0.4924, |
| "step": 709 |
| }, |
| { |
| "epoch": 0.39313399778516056, |
| "grad_norm": 0.4422747492790222, |
| "learning_rate": 7.60527734424244e-06, |
| "loss": 0.4679, |
| "step": 710 |
| }, |
| { |
| "epoch": 0.39368770764119604, |
| "grad_norm": 0.3748304843902588, |
| "learning_rate": 7.597021954453887e-06, |
| "loss": 0.5187, |
| "step": 711 |
| }, |
| { |
| "epoch": 0.39424141749723146, |
| "grad_norm": 0.43841761350631714, |
| "learning_rate": 7.588756858034772e-06, |
| "loss": 0.4762, |
| "step": 712 |
| }, |
| { |
| "epoch": 0.3947951273532669, |
| "grad_norm": 0.4414544105529785, |
| "learning_rate": 7.580482085876722e-06, |
| "loss": 0.4796, |
| "step": 713 |
| }, |
| { |
| "epoch": 0.3953488372093023, |
| "grad_norm": 0.42060527205467224, |
| "learning_rate": 7.572197668907533e-06, |
| "loss": 0.5126, |
| "step": 714 |
| }, |
| { |
| "epoch": 0.3959025470653378, |
| "grad_norm": 0.5003881454467773, |
| "learning_rate": 7.563903638091042e-06, |
| "loss": 0.4744, |
| "step": 715 |
| }, |
| { |
| "epoch": 0.3964562569213732, |
| "grad_norm": 0.4138297736644745, |
| "learning_rate": 7.555600024427028e-06, |
| "loss": 0.4629, |
| "step": 716 |
| }, |
| { |
| "epoch": 0.39700996677740863, |
| "grad_norm": 0.5040468573570251, |
| "learning_rate": 7.547286858951075e-06, |
| "loss": 0.4491, |
| "step": 717 |
| }, |
| { |
| "epoch": 0.39756367663344405, |
| "grad_norm": 0.5510542392730713, |
| "learning_rate": 7.538964172734479e-06, |
| "loss": 0.4804, |
| "step": 718 |
| }, |
| { |
| "epoch": 0.39811738648947953, |
| "grad_norm": 0.39894410967826843, |
| "learning_rate": 7.530631996884117e-06, |
| "loss": 0.4842, |
| "step": 719 |
| }, |
| { |
| "epoch": 0.39867109634551495, |
| "grad_norm": 0.5509627461433411, |
| "learning_rate": 7.522290362542329e-06, |
| "loss": 0.4684, |
| "step": 720 |
| }, |
| { |
| "epoch": 0.3992248062015504, |
| "grad_norm": 0.4482865035533905, |
| "learning_rate": 7.513939300886816e-06, |
| "loss": 0.4619, |
| "step": 721 |
| }, |
| { |
| "epoch": 0.3997785160575858, |
| "grad_norm": 0.442939817905426, |
| "learning_rate": 7.505578843130508e-06, |
| "loss": 0.4642, |
| "step": 722 |
| }, |
| { |
| "epoch": 0.4003322259136213, |
| "grad_norm": 0.4382091760635376, |
| "learning_rate": 7.4972090205214564e-06, |
| "loss": 0.4803, |
| "step": 723 |
| }, |
| { |
| "epoch": 0.4008859357696567, |
| "grad_norm": 0.44710400700569153, |
| "learning_rate": 7.488829864342717e-06, |
| "loss": 0.4651, |
| "step": 724 |
| }, |
| { |
| "epoch": 0.4014396456256921, |
| "grad_norm": 0.4985845386981964, |
| "learning_rate": 7.480441405912223e-06, |
| "loss": 0.477, |
| "step": 725 |
| }, |
| { |
| "epoch": 0.4019933554817276, |
| "grad_norm": 0.44586989283561707, |
| "learning_rate": 7.472043676582685e-06, |
| "loss": 0.4749, |
| "step": 726 |
| }, |
| { |
| "epoch": 0.402547065337763, |
| "grad_norm": 0.5637676119804382, |
| "learning_rate": 7.463636707741458e-06, |
| "loss": 0.4686, |
| "step": 727 |
| }, |
| { |
| "epoch": 0.40310077519379844, |
| "grad_norm": 0.5102462768554688, |
| "learning_rate": 7.455220530810436e-06, |
| "loss": 0.4895, |
| "step": 728 |
| }, |
| { |
| "epoch": 0.40365448504983387, |
| "grad_norm": 0.4997349977493286, |
| "learning_rate": 7.446795177245923e-06, |
| "loss": 0.486, |
| "step": 729 |
| }, |
| { |
| "epoch": 0.40420819490586934, |
| "grad_norm": 0.5639171600341797, |
| "learning_rate": 7.4383606785385254e-06, |
| "loss": 0.4731, |
| "step": 730 |
| }, |
| { |
| "epoch": 0.40476190476190477, |
| "grad_norm": 0.448219895362854, |
| "learning_rate": 7.42991706621303e-06, |
| "loss": 0.4297, |
| "step": 731 |
| }, |
| { |
| "epoch": 0.4053156146179402, |
| "grad_norm": 0.6095700860023499, |
| "learning_rate": 7.4214643718282886e-06, |
| "loss": 0.4727, |
| "step": 732 |
| }, |
| { |
| "epoch": 0.4058693244739756, |
| "grad_norm": 0.44827568531036377, |
| "learning_rate": 7.413002626977092e-06, |
| "loss": 0.4684, |
| "step": 733 |
| }, |
| { |
| "epoch": 0.4064230343300111, |
| "grad_norm": 0.44243839383125305, |
| "learning_rate": 7.404531863286066e-06, |
| "loss": 0.4696, |
| "step": 734 |
| }, |
| { |
| "epoch": 0.4069767441860465, |
| "grad_norm": 0.53190016746521, |
| "learning_rate": 7.396052112415539e-06, |
| "loss": 0.5035, |
| "step": 735 |
| }, |
| { |
| "epoch": 0.40753045404208194, |
| "grad_norm": 0.4007076025009155, |
| "learning_rate": 7.387563406059433e-06, |
| "loss": 0.4649, |
| "step": 736 |
| }, |
| { |
| "epoch": 0.4080841638981174, |
| "grad_norm": 0.4566161036491394, |
| "learning_rate": 7.37906577594514e-06, |
| "loss": 0.4703, |
| "step": 737 |
| }, |
| { |
| "epoch": 0.40863787375415284, |
| "grad_norm": 0.4270188510417938, |
| "learning_rate": 7.370559253833407e-06, |
| "loss": 0.468, |
| "step": 738 |
| }, |
| { |
| "epoch": 0.40919158361018826, |
| "grad_norm": 0.4487841725349426, |
| "learning_rate": 7.362043871518216e-06, |
| "loss": 0.4454, |
| "step": 739 |
| }, |
| { |
| "epoch": 0.4097452934662237, |
| "grad_norm": 0.4769379496574402, |
| "learning_rate": 7.353519660826665e-06, |
| "loss": 0.4815, |
| "step": 740 |
| }, |
| { |
| "epoch": 0.41029900332225916, |
| "grad_norm": 0.4311200976371765, |
| "learning_rate": 7.344986653618844e-06, |
| "loss": 0.4951, |
| "step": 741 |
| }, |
| { |
| "epoch": 0.4108527131782946, |
| "grad_norm": 0.4666946828365326, |
| "learning_rate": 7.33644488178773e-06, |
| "loss": 0.4806, |
| "step": 742 |
| }, |
| { |
| "epoch": 0.41140642303433, |
| "grad_norm": 0.4465944170951843, |
| "learning_rate": 7.327894377259051e-06, |
| "loss": 0.4892, |
| "step": 743 |
| }, |
| { |
| "epoch": 0.4119601328903654, |
| "grad_norm": 0.3847818076610565, |
| "learning_rate": 7.319335171991178e-06, |
| "loss": 0.4801, |
| "step": 744 |
| }, |
| { |
| "epoch": 0.4125138427464009, |
| "grad_norm": 0.3697161078453064, |
| "learning_rate": 7.310767297975e-06, |
| "loss": 0.4735, |
| "step": 745 |
| }, |
| { |
| "epoch": 0.4130675526024363, |
| "grad_norm": 0.44218286871910095, |
| "learning_rate": 7.302190787233808e-06, |
| "loss": 0.4794, |
| "step": 746 |
| }, |
| { |
| "epoch": 0.41362126245847175, |
| "grad_norm": 0.40929174423217773, |
| "learning_rate": 7.293605671823173e-06, |
| "loss": 0.4502, |
| "step": 747 |
| }, |
| { |
| "epoch": 0.4141749723145072, |
| "grad_norm": 0.41390007734298706, |
| "learning_rate": 7.2850119838308255e-06, |
| "loss": 0.4673, |
| "step": 748 |
| }, |
| { |
| "epoch": 0.41472868217054265, |
| "grad_norm": 0.3917429447174072, |
| "learning_rate": 7.27640975537654e-06, |
| "loss": 0.4625, |
| "step": 749 |
| }, |
| { |
| "epoch": 0.4152823920265781, |
| "grad_norm": 0.44105830788612366, |
| "learning_rate": 7.267799018612008e-06, |
| "loss": 0.4546, |
| "step": 750 |
| }, |
| { |
| "epoch": 0.4158361018826135, |
| "grad_norm": 0.4174969494342804, |
| "learning_rate": 7.259179805720726e-06, |
| "loss": 0.4889, |
| "step": 751 |
| }, |
| { |
| "epoch": 0.416389811738649, |
| "grad_norm": 0.452042818069458, |
| "learning_rate": 7.250552148917865e-06, |
| "loss": 0.4856, |
| "step": 752 |
| }, |
| { |
| "epoch": 0.4169435215946844, |
| "grad_norm": 0.38706308603286743, |
| "learning_rate": 7.241916080450163e-06, |
| "loss": 0.5123, |
| "step": 753 |
| }, |
| { |
| "epoch": 0.4174972314507198, |
| "grad_norm": 0.39623257517814636, |
| "learning_rate": 7.2332716325957905e-06, |
| "loss": 0.4785, |
| "step": 754 |
| }, |
| { |
| "epoch": 0.41805094130675524, |
| "grad_norm": 0.4757280945777893, |
| "learning_rate": 7.224618837664241e-06, |
| "loss": 0.4895, |
| "step": 755 |
| }, |
| { |
| "epoch": 0.4186046511627907, |
| "grad_norm": 0.39697709679603577, |
| "learning_rate": 7.215957727996208e-06, |
| "loss": 0.4812, |
| "step": 756 |
| }, |
| { |
| "epoch": 0.41915836101882614, |
| "grad_norm": 0.4762042760848999, |
| "learning_rate": 7.207288335963456e-06, |
| "loss": 0.4858, |
| "step": 757 |
| }, |
| { |
| "epoch": 0.41971207087486156, |
| "grad_norm": 0.41127634048461914, |
| "learning_rate": 7.198610693968711e-06, |
| "loss": 0.4963, |
| "step": 758 |
| }, |
| { |
| "epoch": 0.420265780730897, |
| "grad_norm": 0.4493284523487091, |
| "learning_rate": 7.18992483444553e-06, |
| "loss": 0.4685, |
| "step": 759 |
| }, |
| { |
| "epoch": 0.42081949058693247, |
| "grad_norm": 0.3796096444129944, |
| "learning_rate": 7.181230789858186e-06, |
| "loss": 0.4608, |
| "step": 760 |
| }, |
| { |
| "epoch": 0.4213732004429679, |
| "grad_norm": 0.40398499369621277, |
| "learning_rate": 7.17252859270155e-06, |
| "loss": 0.4976, |
| "step": 761 |
| }, |
| { |
| "epoch": 0.4219269102990033, |
| "grad_norm": 0.4124141335487366, |
| "learning_rate": 7.163818275500951e-06, |
| "loss": 0.4507, |
| "step": 762 |
| }, |
| { |
| "epoch": 0.42248062015503873, |
| "grad_norm": 0.4746420979499817, |
| "learning_rate": 7.1550998708120785e-06, |
| "loss": 0.4811, |
| "step": 763 |
| }, |
| { |
| "epoch": 0.4230343300110742, |
| "grad_norm": 0.41351497173309326, |
| "learning_rate": 7.146373411220846e-06, |
| "loss": 0.4819, |
| "step": 764 |
| }, |
| { |
| "epoch": 0.42358803986710963, |
| "grad_norm": 0.5182345509529114, |
| "learning_rate": 7.137638929343274e-06, |
| "loss": 0.4929, |
| "step": 765 |
| }, |
| { |
| "epoch": 0.42414174972314506, |
| "grad_norm": 0.3741333782672882, |
| "learning_rate": 7.128896457825364e-06, |
| "loss": 0.4508, |
| "step": 766 |
| }, |
| { |
| "epoch": 0.42469545957918053, |
| "grad_norm": 0.48205241560935974, |
| "learning_rate": 7.120146029342985e-06, |
| "loss": 0.469, |
| "step": 767 |
| }, |
| { |
| "epoch": 0.42524916943521596, |
| "grad_norm": 0.4315168857574463, |
| "learning_rate": 7.11138767660174e-06, |
| "loss": 0.4873, |
| "step": 768 |
| }, |
| { |
| "epoch": 0.4258028792912514, |
| "grad_norm": 0.4207547903060913, |
| "learning_rate": 7.102621432336853e-06, |
| "loss": 0.4851, |
| "step": 769 |
| }, |
| { |
| "epoch": 0.4263565891472868, |
| "grad_norm": 0.4015323221683502, |
| "learning_rate": 7.093847329313046e-06, |
| "loss": 0.4694, |
| "step": 770 |
| }, |
| { |
| "epoch": 0.4269102990033223, |
| "grad_norm": 0.5271881818771362, |
| "learning_rate": 7.085065400324407e-06, |
| "loss": 0.5133, |
| "step": 771 |
| }, |
| { |
| "epoch": 0.4274640088593577, |
| "grad_norm": 0.46586957573890686, |
| "learning_rate": 7.07627567819428e-06, |
| "loss": 0.4873, |
| "step": 772 |
| }, |
| { |
| "epoch": 0.4280177187153931, |
| "grad_norm": 0.5215753316879272, |
| "learning_rate": 7.0674781957751346e-06, |
| "loss": 0.4536, |
| "step": 773 |
| }, |
| { |
| "epoch": 0.42857142857142855, |
| "grad_norm": 0.4311859905719757, |
| "learning_rate": 7.058672985948447e-06, |
| "loss": 0.4794, |
| "step": 774 |
| }, |
| { |
| "epoch": 0.429125138427464, |
| "grad_norm": 0.45404115319252014, |
| "learning_rate": 7.049860081624572e-06, |
| "loss": 0.4752, |
| "step": 775 |
| }, |
| { |
| "epoch": 0.42967884828349945, |
| "grad_norm": 0.3787217140197754, |
| "learning_rate": 7.041039515742626e-06, |
| "loss": 0.4666, |
| "step": 776 |
| }, |
| { |
| "epoch": 0.43023255813953487, |
| "grad_norm": 0.4381999373435974, |
| "learning_rate": 7.0322113212703594e-06, |
| "loss": 0.4722, |
| "step": 777 |
| }, |
| { |
| "epoch": 0.43078626799557035, |
| "grad_norm": 0.46380746364593506, |
| "learning_rate": 7.023375531204038e-06, |
| "loss": 0.4953, |
| "step": 778 |
| }, |
| { |
| "epoch": 0.43133997785160577, |
| "grad_norm": 0.42247340083122253, |
| "learning_rate": 7.014532178568314e-06, |
| "loss": 0.4864, |
| "step": 779 |
| }, |
| { |
| "epoch": 0.4318936877076412, |
| "grad_norm": 0.4111511707305908, |
| "learning_rate": 7.005681296416107e-06, |
| "loss": 0.4859, |
| "step": 780 |
| }, |
| { |
| "epoch": 0.4324473975636766, |
| "grad_norm": 0.45308950543403625, |
| "learning_rate": 6.9968229178284775e-06, |
| "loss": 0.484, |
| "step": 781 |
| }, |
| { |
| "epoch": 0.4330011074197121, |
| "grad_norm": 0.40989553928375244, |
| "learning_rate": 6.9879570759145085e-06, |
| "loss": 0.4985, |
| "step": 782 |
| }, |
| { |
| "epoch": 0.4335548172757475, |
| "grad_norm": 0.4102574288845062, |
| "learning_rate": 6.979083803811173e-06, |
| "loss": 0.4838, |
| "step": 783 |
| }, |
| { |
| "epoch": 0.43410852713178294, |
| "grad_norm": 0.41136178374290466, |
| "learning_rate": 6.970203134683218e-06, |
| "loss": 0.4483, |
| "step": 784 |
| }, |
| { |
| "epoch": 0.43466223698781836, |
| "grad_norm": 0.4319741129875183, |
| "learning_rate": 6.961315101723036e-06, |
| "loss": 0.4738, |
| "step": 785 |
| }, |
| { |
| "epoch": 0.43521594684385384, |
| "grad_norm": 0.3880046010017395, |
| "learning_rate": 6.952419738150546e-06, |
| "loss": 0.4729, |
| "step": 786 |
| }, |
| { |
| "epoch": 0.43576965669988926, |
| "grad_norm": 0.5079280734062195, |
| "learning_rate": 6.94351707721306e-06, |
| "loss": 0.4801, |
| "step": 787 |
| }, |
| { |
| "epoch": 0.4363233665559247, |
| "grad_norm": 0.44152313470840454, |
| "learning_rate": 6.934607152185169e-06, |
| "loss": 0.4843, |
| "step": 788 |
| }, |
| { |
| "epoch": 0.4368770764119601, |
| "grad_norm": 0.40796777606010437, |
| "learning_rate": 6.9256899963686145e-06, |
| "loss": 0.4654, |
| "step": 789 |
| }, |
| { |
| "epoch": 0.4374307862679956, |
| "grad_norm": 0.5313437581062317, |
| "learning_rate": 6.916765643092162e-06, |
| "loss": 0.4652, |
| "step": 790 |
| }, |
| { |
| "epoch": 0.437984496124031, |
| "grad_norm": 0.508833110332489, |
| "learning_rate": 6.9078341257114765e-06, |
| "loss": 0.4676, |
| "step": 791 |
| }, |
| { |
| "epoch": 0.43853820598006643, |
| "grad_norm": 0.41130679845809937, |
| "learning_rate": 6.898895477609007e-06, |
| "loss": 0.4833, |
| "step": 792 |
| }, |
| { |
| "epoch": 0.4390919158361019, |
| "grad_norm": 0.4427821636199951, |
| "learning_rate": 6.889949732193844e-06, |
| "loss": 0.4807, |
| "step": 793 |
| }, |
| { |
| "epoch": 0.43964562569213733, |
| "grad_norm": 0.46690595149993896, |
| "learning_rate": 6.880996922901613e-06, |
| "loss": 0.5057, |
| "step": 794 |
| }, |
| { |
| "epoch": 0.44019933554817275, |
| "grad_norm": 0.4449859857559204, |
| "learning_rate": 6.8720370831943385e-06, |
| "loss": 0.4806, |
| "step": 795 |
| }, |
| { |
| "epoch": 0.4407530454042082, |
| "grad_norm": 0.3911839425563812, |
| "learning_rate": 6.863070246560319e-06, |
| "loss": 0.4865, |
| "step": 796 |
| }, |
| { |
| "epoch": 0.44130675526024365, |
| "grad_norm": 0.39620286226272583, |
| "learning_rate": 6.85409644651401e-06, |
| "loss": 0.4897, |
| "step": 797 |
| }, |
| { |
| "epoch": 0.4418604651162791, |
| "grad_norm": 0.45482540130615234, |
| "learning_rate": 6.845115716595893e-06, |
| "loss": 0.455, |
| "step": 798 |
| }, |
| { |
| "epoch": 0.4424141749723145, |
| "grad_norm": 0.4602965712547302, |
| "learning_rate": 6.836128090372345e-06, |
| "loss": 0.4692, |
| "step": 799 |
| }, |
| { |
| "epoch": 0.4429678848283499, |
| "grad_norm": 0.3748089075088501, |
| "learning_rate": 6.827133601435524e-06, |
| "loss": 0.4638, |
| "step": 800 |
| }, |
| { |
| "epoch": 0.4435215946843854, |
| "grad_norm": 0.3914950489997864, |
| "learning_rate": 6.818132283403236e-06, |
| "loss": 0.4794, |
| "step": 801 |
| }, |
| { |
| "epoch": 0.4440753045404208, |
| "grad_norm": 0.4420660436153412, |
| "learning_rate": 6.80912416991881e-06, |
| "loss": 0.4861, |
| "step": 802 |
| }, |
| { |
| "epoch": 0.44462901439645625, |
| "grad_norm": 0.47777289152145386, |
| "learning_rate": 6.800109294650981e-06, |
| "loss": 0.5003, |
| "step": 803 |
| }, |
| { |
| "epoch": 0.44518272425249167, |
| "grad_norm": 0.49404582381248474, |
| "learning_rate": 6.7910876912937455e-06, |
| "loss": 0.4506, |
| "step": 804 |
| }, |
| { |
| "epoch": 0.44573643410852715, |
| "grad_norm": 0.40919193625450134, |
| "learning_rate": 6.782059393566254e-06, |
| "loss": 0.4529, |
| "step": 805 |
| }, |
| { |
| "epoch": 0.44629014396456257, |
| "grad_norm": 0.46079081296920776, |
| "learning_rate": 6.773024435212678e-06, |
| "loss": 0.4674, |
| "step": 806 |
| }, |
| { |
| "epoch": 0.446843853820598, |
| "grad_norm": 0.43327951431274414, |
| "learning_rate": 6.763982850002084e-06, |
| "loss": 0.4565, |
| "step": 807 |
| }, |
| { |
| "epoch": 0.44739756367663347, |
| "grad_norm": 0.4099007546901703, |
| "learning_rate": 6.754934671728301e-06, |
| "loss": 0.4714, |
| "step": 808 |
| }, |
| { |
| "epoch": 0.4479512735326689, |
| "grad_norm": 0.45909908413887024, |
| "learning_rate": 6.745879934209808e-06, |
| "loss": 0.4851, |
| "step": 809 |
| }, |
| { |
| "epoch": 0.4485049833887043, |
| "grad_norm": 0.42515650391578674, |
| "learning_rate": 6.736818671289596e-06, |
| "loss": 0.4425, |
| "step": 810 |
| }, |
| { |
| "epoch": 0.44905869324473974, |
| "grad_norm": 0.4645708501338959, |
| "learning_rate": 6.7277509168350445e-06, |
| "loss": 0.4846, |
| "step": 811 |
| }, |
| { |
| "epoch": 0.4496124031007752, |
| "grad_norm": 0.44562819600105286, |
| "learning_rate": 6.718676704737798e-06, |
| "loss": 0.4713, |
| "step": 812 |
| }, |
| { |
| "epoch": 0.45016611295681064, |
| "grad_norm": 0.4501514434814453, |
| "learning_rate": 6.709596068913635e-06, |
| "loss": 0.4569, |
| "step": 813 |
| }, |
| { |
| "epoch": 0.45071982281284606, |
| "grad_norm": 0.38734912872314453, |
| "learning_rate": 6.700509043302349e-06, |
| "loss": 0.4444, |
| "step": 814 |
| }, |
| { |
| "epoch": 0.4512735326688815, |
| "grad_norm": 0.44585832953453064, |
| "learning_rate": 6.6914156618676065e-06, |
| "loss": 0.4747, |
| "step": 815 |
| }, |
| { |
| "epoch": 0.45182724252491696, |
| "grad_norm": 0.473246306180954, |
| "learning_rate": 6.6823159585968355e-06, |
| "loss": 0.4614, |
| "step": 816 |
| }, |
| { |
| "epoch": 0.4523809523809524, |
| "grad_norm": 0.44483834505081177, |
| "learning_rate": 6.673209967501093e-06, |
| "loss": 0.4819, |
| "step": 817 |
| }, |
| { |
| "epoch": 0.4529346622369878, |
| "grad_norm": 0.4596951901912689, |
| "learning_rate": 6.664097722614934e-06, |
| "loss": 0.4717, |
| "step": 818 |
| }, |
| { |
| "epoch": 0.45348837209302323, |
| "grad_norm": 0.4508548080921173, |
| "learning_rate": 6.654979257996292e-06, |
| "loss": 0.5065, |
| "step": 819 |
| }, |
| { |
| "epoch": 0.4540420819490587, |
| "grad_norm": 0.41932225227355957, |
| "learning_rate": 6.645854607726343e-06, |
| "loss": 0.4982, |
| "step": 820 |
| }, |
| { |
| "epoch": 0.45459579180509413, |
| "grad_norm": 0.4616346061229706, |
| "learning_rate": 6.636723805909384e-06, |
| "loss": 0.4782, |
| "step": 821 |
| }, |
| { |
| "epoch": 0.45514950166112955, |
| "grad_norm": 0.41363874077796936, |
| "learning_rate": 6.627586886672707e-06, |
| "loss": 0.4667, |
| "step": 822 |
| }, |
| { |
| "epoch": 0.45570321151716503, |
| "grad_norm": 0.4332622289657593, |
| "learning_rate": 6.6184438841664635e-06, |
| "loss": 0.4681, |
| "step": 823 |
| }, |
| { |
| "epoch": 0.45625692137320045, |
| "grad_norm": 0.4488495886325836, |
| "learning_rate": 6.6092948325635466e-06, |
| "loss": 0.4591, |
| "step": 824 |
| }, |
| { |
| "epoch": 0.4568106312292359, |
| "grad_norm": 0.39039409160614014, |
| "learning_rate": 6.600139766059453e-06, |
| "loss": 0.4544, |
| "step": 825 |
| }, |
| { |
| "epoch": 0.4573643410852713, |
| "grad_norm": 0.4736301898956299, |
| "learning_rate": 6.590978718872166e-06, |
| "loss": 0.469, |
| "step": 826 |
| }, |
| { |
| "epoch": 0.4579180509413068, |
| "grad_norm": 0.4278981685638428, |
| "learning_rate": 6.58181172524202e-06, |
| "loss": 0.5094, |
| "step": 827 |
| }, |
| { |
| "epoch": 0.4584717607973422, |
| "grad_norm": 0.4077831506729126, |
| "learning_rate": 6.572638819431576e-06, |
| "loss": 0.4631, |
| "step": 828 |
| }, |
| { |
| "epoch": 0.4590254706533776, |
| "grad_norm": 0.37462881207466125, |
| "learning_rate": 6.563460035725489e-06, |
| "loss": 0.4709, |
| "step": 829 |
| }, |
| { |
| "epoch": 0.45957918050941304, |
| "grad_norm": 0.3800598978996277, |
| "learning_rate": 6.554275408430388e-06, |
| "loss": 0.4896, |
| "step": 830 |
| }, |
| { |
| "epoch": 0.4601328903654485, |
| "grad_norm": 0.4581829011440277, |
| "learning_rate": 6.545084971874738e-06, |
| "loss": 0.4839, |
| "step": 831 |
| }, |
| { |
| "epoch": 0.46068660022148394, |
| "grad_norm": 0.3906550705432892, |
| "learning_rate": 6.535888760408722e-06, |
| "loss": 0.4923, |
| "step": 832 |
| }, |
| { |
| "epoch": 0.46124031007751937, |
| "grad_norm": 0.3892533481121063, |
| "learning_rate": 6.526686808404101e-06, |
| "loss": 0.476, |
| "step": 833 |
| }, |
| { |
| "epoch": 0.46179401993355484, |
| "grad_norm": 0.38538169860839844, |
| "learning_rate": 6.517479150254099e-06, |
| "loss": 0.4492, |
| "step": 834 |
| }, |
| { |
| "epoch": 0.46234772978959027, |
| "grad_norm": 0.4162067770957947, |
| "learning_rate": 6.508265820373262e-06, |
| "loss": 0.4496, |
| "step": 835 |
| }, |
| { |
| "epoch": 0.4629014396456257, |
| "grad_norm": 0.4131470322608948, |
| "learning_rate": 6.499046853197338e-06, |
| "loss": 0.4703, |
| "step": 836 |
| }, |
| { |
| "epoch": 0.4634551495016611, |
| "grad_norm": 0.425884485244751, |
| "learning_rate": 6.489822283183142e-06, |
| "loss": 0.4682, |
| "step": 837 |
| }, |
| { |
| "epoch": 0.4640088593576966, |
| "grad_norm": 0.40070706605911255, |
| "learning_rate": 6.48059214480843e-06, |
| "loss": 0.4764, |
| "step": 838 |
| }, |
| { |
| "epoch": 0.464562569213732, |
| "grad_norm": 0.43763288855552673, |
| "learning_rate": 6.4713564725717736e-06, |
| "loss": 0.4711, |
| "step": 839 |
| }, |
| { |
| "epoch": 0.46511627906976744, |
| "grad_norm": 0.4114384651184082, |
| "learning_rate": 6.462115300992427e-06, |
| "loss": 0.4701, |
| "step": 840 |
| }, |
| { |
| "epoch": 0.46566998892580286, |
| "grad_norm": 0.44266989827156067, |
| "learning_rate": 6.452868664610197e-06, |
| "loss": 0.4989, |
| "step": 841 |
| }, |
| { |
| "epoch": 0.46622369878183834, |
| "grad_norm": 0.4306333065032959, |
| "learning_rate": 6.443616597985315e-06, |
| "loss": 0.4924, |
| "step": 842 |
| }, |
| { |
| "epoch": 0.46677740863787376, |
| "grad_norm": 0.4200533926486969, |
| "learning_rate": 6.434359135698311e-06, |
| "loss": 0.4782, |
| "step": 843 |
| }, |
| { |
| "epoch": 0.4673311184939092, |
| "grad_norm": 0.4041113257408142, |
| "learning_rate": 6.425096312349881e-06, |
| "loss": 0.4602, |
| "step": 844 |
| }, |
| { |
| "epoch": 0.4678848283499446, |
| "grad_norm": 0.46134626865386963, |
| "learning_rate": 6.415828162560758e-06, |
| "loss": 0.452, |
| "step": 845 |
| }, |
| { |
| "epoch": 0.4684385382059801, |
| "grad_norm": 0.4863552451133728, |
| "learning_rate": 6.406554720971583e-06, |
| "loss": 0.486, |
| "step": 846 |
| }, |
| { |
| "epoch": 0.4689922480620155, |
| "grad_norm": 0.4371113181114197, |
| "learning_rate": 6.397276022242775e-06, |
| "loss": 0.4846, |
| "step": 847 |
| }, |
| { |
| "epoch": 0.4695459579180509, |
| "grad_norm": 0.42974036931991577, |
| "learning_rate": 6.3879921010544055e-06, |
| "loss": 0.4689, |
| "step": 848 |
| }, |
| { |
| "epoch": 0.4700996677740864, |
| "grad_norm": 0.44053035974502563, |
| "learning_rate": 6.3787029921060615e-06, |
| "loss": 0.4669, |
| "step": 849 |
| }, |
| { |
| "epoch": 0.4706533776301218, |
| "grad_norm": 0.40121155977249146, |
| "learning_rate": 6.369408730116721e-06, |
| "loss": 0.4883, |
| "step": 850 |
| }, |
| { |
| "epoch": 0.47120708748615725, |
| "grad_norm": 0.42475467920303345, |
| "learning_rate": 6.3601093498246215e-06, |
| "loss": 0.4653, |
| "step": 851 |
| }, |
| { |
| "epoch": 0.4717607973421927, |
| "grad_norm": 0.42221271991729736, |
| "learning_rate": 6.350804885987133e-06, |
| "loss": 0.4889, |
| "step": 852 |
| }, |
| { |
| "epoch": 0.47231450719822815, |
| "grad_norm": 0.4220812916755676, |
| "learning_rate": 6.341495373380625e-06, |
| "loss": 0.4519, |
| "step": 853 |
| }, |
| { |
| "epoch": 0.4728682170542636, |
| "grad_norm": 0.42883118987083435, |
| "learning_rate": 6.332180846800335e-06, |
| "loss": 0.4625, |
| "step": 854 |
| }, |
| { |
| "epoch": 0.473421926910299, |
| "grad_norm": 0.40138551592826843, |
| "learning_rate": 6.322861341060241e-06, |
| "loss": 0.4538, |
| "step": 855 |
| }, |
| { |
| "epoch": 0.4739756367663344, |
| "grad_norm": 0.4246816635131836, |
| "learning_rate": 6.313536890992935e-06, |
| "loss": 0.4815, |
| "step": 856 |
| }, |
| { |
| "epoch": 0.4745293466223699, |
| "grad_norm": 0.39402779936790466, |
| "learning_rate": 6.304207531449486e-06, |
| "loss": 0.4705, |
| "step": 857 |
| }, |
| { |
| "epoch": 0.4750830564784053, |
| "grad_norm": 0.40957632660865784, |
| "learning_rate": 6.29487329729931e-06, |
| "loss": 0.478, |
| "step": 858 |
| }, |
| { |
| "epoch": 0.47563676633444074, |
| "grad_norm": 0.39527779817581177, |
| "learning_rate": 6.2855342234300475e-06, |
| "loss": 0.4632, |
| "step": 859 |
| }, |
| { |
| "epoch": 0.47619047619047616, |
| "grad_norm": 0.40325671434402466, |
| "learning_rate": 6.2761903447474285e-06, |
| "loss": 0.4812, |
| "step": 860 |
| }, |
| { |
| "epoch": 0.47674418604651164, |
| "grad_norm": 0.3848477900028229, |
| "learning_rate": 6.266841696175132e-06, |
| "loss": 0.4764, |
| "step": 861 |
| }, |
| { |
| "epoch": 0.47729789590254706, |
| "grad_norm": 0.36117005348205566, |
| "learning_rate": 6.257488312654678e-06, |
| "loss": 0.4682, |
| "step": 862 |
| }, |
| { |
| "epoch": 0.4778516057585825, |
| "grad_norm": 0.4402804970741272, |
| "learning_rate": 6.248130229145273e-06, |
| "loss": 0.4768, |
| "step": 863 |
| }, |
| { |
| "epoch": 0.47840531561461797, |
| "grad_norm": 0.415265291929245, |
| "learning_rate": 6.238767480623697e-06, |
| "loss": 0.4896, |
| "step": 864 |
| }, |
| { |
| "epoch": 0.4789590254706534, |
| "grad_norm": 0.35998308658599854, |
| "learning_rate": 6.229400102084162e-06, |
| "loss": 0.4617, |
| "step": 865 |
| }, |
| { |
| "epoch": 0.4795127353266888, |
| "grad_norm": 0.5202546119689941, |
| "learning_rate": 6.220028128538188e-06, |
| "loss": 0.4704, |
| "step": 866 |
| }, |
| { |
| "epoch": 0.48006644518272423, |
| "grad_norm": 0.3583022654056549, |
| "learning_rate": 6.210651595014468e-06, |
| "loss": 0.4531, |
| "step": 867 |
| }, |
| { |
| "epoch": 0.4806201550387597, |
| "grad_norm": 0.40789127349853516, |
| "learning_rate": 6.201270536558738e-06, |
| "loss": 0.471, |
| "step": 868 |
| }, |
| { |
| "epoch": 0.48117386489479513, |
| "grad_norm": 0.4366607367992401, |
| "learning_rate": 6.191884988233647e-06, |
| "loss": 0.478, |
| "step": 869 |
| }, |
| { |
| "epoch": 0.48172757475083056, |
| "grad_norm": 0.36813703179359436, |
| "learning_rate": 6.182494985118625e-06, |
| "loss": 0.459, |
| "step": 870 |
| }, |
| { |
| "epoch": 0.482281284606866, |
| "grad_norm": 0.3980870842933655, |
| "learning_rate": 6.173100562309751e-06, |
| "loss": 0.4494, |
| "step": 871 |
| }, |
| { |
| "epoch": 0.48283499446290146, |
| "grad_norm": 0.3553970456123352, |
| "learning_rate": 6.163701754919626e-06, |
| "loss": 0.4728, |
| "step": 872 |
| }, |
| { |
| "epoch": 0.4833887043189369, |
| "grad_norm": 0.38380372524261475, |
| "learning_rate": 6.15429859807724e-06, |
| "loss": 0.4675, |
| "step": 873 |
| }, |
| { |
| "epoch": 0.4839424141749723, |
| "grad_norm": 0.4252444803714752, |
| "learning_rate": 6.14489112692783e-06, |
| "loss": 0.4878, |
| "step": 874 |
| }, |
| { |
| "epoch": 0.4844961240310077, |
| "grad_norm": 0.39901813864707947, |
| "learning_rate": 6.1354793766327706e-06, |
| "loss": 0.4662, |
| "step": 875 |
| }, |
| { |
| "epoch": 0.4850498338870432, |
| "grad_norm": 0.398833304643631, |
| "learning_rate": 6.1260633823694224e-06, |
| "loss": 0.4907, |
| "step": 876 |
| }, |
| { |
| "epoch": 0.4856035437430786, |
| "grad_norm": 0.39341622591018677, |
| "learning_rate": 6.1166431793310095e-06, |
| "loss": 0.491, |
| "step": 877 |
| }, |
| { |
| "epoch": 0.48615725359911405, |
| "grad_norm": 0.3746095299720764, |
| "learning_rate": 6.10721880272649e-06, |
| "loss": 0.4732, |
| "step": 878 |
| }, |
| { |
| "epoch": 0.4867109634551495, |
| "grad_norm": 0.384216845035553, |
| "learning_rate": 6.097790287780417e-06, |
| "loss": 0.4574, |
| "step": 879 |
| }, |
| { |
| "epoch": 0.48726467331118495, |
| "grad_norm": 0.39541691541671753, |
| "learning_rate": 6.08835766973281e-06, |
| "loss": 0.4686, |
| "step": 880 |
| }, |
| { |
| "epoch": 0.48781838316722037, |
| "grad_norm": 0.4029048681259155, |
| "learning_rate": 6.078920983839032e-06, |
| "loss": 0.4843, |
| "step": 881 |
| }, |
| { |
| "epoch": 0.4883720930232558, |
| "grad_norm": 0.37557727098464966, |
| "learning_rate": 6.069480265369642e-06, |
| "loss": 0.4515, |
| "step": 882 |
| }, |
| { |
| "epoch": 0.48892580287929127, |
| "grad_norm": 0.469387948513031, |
| "learning_rate": 6.060035549610275e-06, |
| "loss": 0.4749, |
| "step": 883 |
| }, |
| { |
| "epoch": 0.4894795127353267, |
| "grad_norm": 0.4006125032901764, |
| "learning_rate": 6.050586871861503e-06, |
| "loss": 0.462, |
| "step": 884 |
| }, |
| { |
| "epoch": 0.4900332225913621, |
| "grad_norm": 0.36115720868110657, |
| "learning_rate": 6.041134267438713e-06, |
| "loss": 0.4589, |
| "step": 885 |
| }, |
| { |
| "epoch": 0.49058693244739754, |
| "grad_norm": 0.39920949935913086, |
| "learning_rate": 6.031677771671962e-06, |
| "loss": 0.4508, |
| "step": 886 |
| }, |
| { |
| "epoch": 0.491140642303433, |
| "grad_norm": 0.36050114035606384, |
| "learning_rate": 6.022217419905851e-06, |
| "loss": 0.4697, |
| "step": 887 |
| }, |
| { |
| "epoch": 0.49169435215946844, |
| "grad_norm": 0.36328694224357605, |
| "learning_rate": 6.0127532474993985e-06, |
| "loss": 0.475, |
| "step": 888 |
| }, |
| { |
| "epoch": 0.49224806201550386, |
| "grad_norm": 0.3833206593990326, |
| "learning_rate": 6.0032852898258996e-06, |
| "loss": 0.475, |
| "step": 889 |
| }, |
| { |
| "epoch": 0.49280177187153934, |
| "grad_norm": 0.41371577978134155, |
| "learning_rate": 5.9938135822727984e-06, |
| "loss": 0.4687, |
| "step": 890 |
| }, |
| { |
| "epoch": 0.49335548172757476, |
| "grad_norm": 0.3939271867275238, |
| "learning_rate": 5.984338160241552e-06, |
| "loss": 0.4724, |
| "step": 891 |
| }, |
| { |
| "epoch": 0.4939091915836102, |
| "grad_norm": 0.37482693791389465, |
| "learning_rate": 5.974859059147503e-06, |
| "loss": 0.486, |
| "step": 892 |
| }, |
| { |
| "epoch": 0.4944629014396456, |
| "grad_norm": 0.3931341767311096, |
| "learning_rate": 5.965376314419744e-06, |
| "loss": 0.4768, |
| "step": 893 |
| }, |
| { |
| "epoch": 0.4950166112956811, |
| "grad_norm": 0.40611469745635986, |
| "learning_rate": 5.955889961500988e-06, |
| "loss": 0.4655, |
| "step": 894 |
| }, |
| { |
| "epoch": 0.4955703211517165, |
| "grad_norm": 0.39005982875823975, |
| "learning_rate": 5.946400035847431e-06, |
| "loss": 0.4787, |
| "step": 895 |
| }, |
| { |
| "epoch": 0.49612403100775193, |
| "grad_norm": 0.43622326850891113, |
| "learning_rate": 5.936906572928625e-06, |
| "loss": 0.4943, |
| "step": 896 |
| }, |
| { |
| "epoch": 0.49667774086378735, |
| "grad_norm": 0.3975144624710083, |
| "learning_rate": 5.927409608227339e-06, |
| "loss": 0.4595, |
| "step": 897 |
| }, |
| { |
| "epoch": 0.49723145071982283, |
| "grad_norm": 0.3896026909351349, |
| "learning_rate": 5.917909177239438e-06, |
| "loss": 0.5006, |
| "step": 898 |
| }, |
| { |
| "epoch": 0.49778516057585825, |
| "grad_norm": 0.4361266791820526, |
| "learning_rate": 5.908405315473733e-06, |
| "loss": 0.4685, |
| "step": 899 |
| }, |
| { |
| "epoch": 0.4983388704318937, |
| "grad_norm": 0.4157734513282776, |
| "learning_rate": 5.898898058451865e-06, |
| "loss": 0.479, |
| "step": 900 |
| }, |
| { |
| "epoch": 0.4988925802879291, |
| "grad_norm": 0.4577910602092743, |
| "learning_rate": 5.889387441708162e-06, |
| "loss": 0.4809, |
| "step": 901 |
| }, |
| { |
| "epoch": 0.4994462901439646, |
| "grad_norm": 0.458420991897583, |
| "learning_rate": 5.8798735007895095e-06, |
| "loss": 0.4635, |
| "step": 902 |
| }, |
| { |
| "epoch": 0.5, |
| "grad_norm": 0.3840048015117645, |
| "learning_rate": 5.8703562712552195e-06, |
| "loss": 0.4587, |
| "step": 903 |
| }, |
| { |
| "epoch": 0.5005537098560354, |
| "grad_norm": 0.4343316853046417, |
| "learning_rate": 5.860835788676892e-06, |
| "loss": 0.5033, |
| "step": 904 |
| }, |
| { |
| "epoch": 0.5011074197120708, |
| "grad_norm": 0.4067619740962982, |
| "learning_rate": 5.851312088638287e-06, |
| "loss": 0.4762, |
| "step": 905 |
| }, |
| { |
| "epoch": 0.5016611295681063, |
| "grad_norm": 0.4486081302165985, |
| "learning_rate": 5.841785206735192e-06, |
| "loss": 0.4712, |
| "step": 906 |
| }, |
| { |
| "epoch": 0.5022148394241418, |
| "grad_norm": 0.5145954489707947, |
| "learning_rate": 5.832255178575288e-06, |
| "loss": 0.4599, |
| "step": 907 |
| }, |
| { |
| "epoch": 0.5027685492801772, |
| "grad_norm": 0.4490329921245575, |
| "learning_rate": 5.822722039778008e-06, |
| "loss": 0.4578, |
| "step": 908 |
| }, |
| { |
| "epoch": 0.5033222591362126, |
| "grad_norm": 0.39363399147987366, |
| "learning_rate": 5.813185825974419e-06, |
| "loss": 0.478, |
| "step": 909 |
| }, |
| { |
| "epoch": 0.5038759689922481, |
| "grad_norm": 0.4732396602630615, |
| "learning_rate": 5.803646572807078e-06, |
| "loss": 0.4729, |
| "step": 910 |
| }, |
| { |
| "epoch": 0.5044296788482835, |
| "grad_norm": 0.43937402963638306, |
| "learning_rate": 5.794104315929904e-06, |
| "loss": 0.4867, |
| "step": 911 |
| }, |
| { |
| "epoch": 0.5049833887043189, |
| "grad_norm": 0.38331788778305054, |
| "learning_rate": 5.784559091008037e-06, |
| "loss": 0.4994, |
| "step": 912 |
| }, |
| { |
| "epoch": 0.5055370985603543, |
| "grad_norm": 0.36274176836013794, |
| "learning_rate": 5.7750109337177185e-06, |
| "loss": 0.4517, |
| "step": 913 |
| }, |
| { |
| "epoch": 0.5060908084163898, |
| "grad_norm": 0.4266386330127716, |
| "learning_rate": 5.7654598797461445e-06, |
| "loss": 0.491, |
| "step": 914 |
| }, |
| { |
| "epoch": 0.5066445182724253, |
| "grad_norm": 0.42704376578330994, |
| "learning_rate": 5.755905964791341e-06, |
| "loss": 0.4807, |
| "step": 915 |
| }, |
| { |
| "epoch": 0.5071982281284607, |
| "grad_norm": 0.4034692943096161, |
| "learning_rate": 5.746349224562021e-06, |
| "loss": 0.4639, |
| "step": 916 |
| }, |
| { |
| "epoch": 0.5077519379844961, |
| "grad_norm": 0.39037638902664185, |
| "learning_rate": 5.736789694777465e-06, |
| "loss": 0.4515, |
| "step": 917 |
| }, |
| { |
| "epoch": 0.5083056478405316, |
| "grad_norm": 0.45550546050071716, |
| "learning_rate": 5.727227411167377e-06, |
| "loss": 0.471, |
| "step": 918 |
| }, |
| { |
| "epoch": 0.508859357696567, |
| "grad_norm": 0.4011116325855255, |
| "learning_rate": 5.717662409471751e-06, |
| "loss": 0.4617, |
| "step": 919 |
| }, |
| { |
| "epoch": 0.5094130675526024, |
| "grad_norm": 0.4205820560455322, |
| "learning_rate": 5.708094725440742e-06, |
| "loss": 0.4531, |
| "step": 920 |
| }, |
| { |
| "epoch": 0.5099667774086378, |
| "grad_norm": 0.5211153626441956, |
| "learning_rate": 5.698524394834531e-06, |
| "loss": 0.5083, |
| "step": 921 |
| }, |
| { |
| "epoch": 0.5105204872646734, |
| "grad_norm": 0.40547680854797363, |
| "learning_rate": 5.68895145342319e-06, |
| "loss": 0.4626, |
| "step": 922 |
| }, |
| { |
| "epoch": 0.5110741971207088, |
| "grad_norm": 0.4124176502227783, |
| "learning_rate": 5.679375936986553e-06, |
| "loss": 0.4612, |
| "step": 923 |
| }, |
| { |
| "epoch": 0.5116279069767442, |
| "grad_norm": 0.413004606962204, |
| "learning_rate": 5.669797881314072e-06, |
| "loss": 0.4644, |
| "step": 924 |
| }, |
| { |
| "epoch": 0.5121816168327796, |
| "grad_norm": 0.42170557379722595, |
| "learning_rate": 5.660217322204692e-06, |
| "loss": 0.4743, |
| "step": 925 |
| }, |
| { |
| "epoch": 0.512735326688815, |
| "grad_norm": 0.4040520191192627, |
| "learning_rate": 5.650634295466717e-06, |
| "loss": 0.4898, |
| "step": 926 |
| }, |
| { |
| "epoch": 0.5132890365448505, |
| "grad_norm": 0.3825586140155792, |
| "learning_rate": 5.641048836917672e-06, |
| "loss": 0.4778, |
| "step": 927 |
| }, |
| { |
| "epoch": 0.5138427464008859, |
| "grad_norm": 0.3947638273239136, |
| "learning_rate": 5.631460982384174e-06, |
| "loss": 0.4488, |
| "step": 928 |
| }, |
| { |
| "epoch": 0.5143964562569213, |
| "grad_norm": 0.4007805287837982, |
| "learning_rate": 5.621870767701788e-06, |
| "loss": 0.5048, |
| "step": 929 |
| }, |
| { |
| "epoch": 0.5149501661129569, |
| "grad_norm": 0.4218864142894745, |
| "learning_rate": 5.612278228714909e-06, |
| "loss": 0.4668, |
| "step": 930 |
| }, |
| { |
| "epoch": 0.5155038759689923, |
| "grad_norm": 0.3780379593372345, |
| "learning_rate": 5.6026834012766155e-06, |
| "loss": 0.433, |
| "step": 931 |
| }, |
| { |
| "epoch": 0.5160575858250277, |
| "grad_norm": 0.37311238050460815, |
| "learning_rate": 5.593086321248539e-06, |
| "loss": 0.4871, |
| "step": 932 |
| }, |
| { |
| "epoch": 0.5166112956810631, |
| "grad_norm": 0.4193789064884186, |
| "learning_rate": 5.583487024500729e-06, |
| "loss": 0.482, |
| "step": 933 |
| }, |
| { |
| "epoch": 0.5171650055370985, |
| "grad_norm": 0.43569833040237427, |
| "learning_rate": 5.573885546911523e-06, |
| "loss": 0.4499, |
| "step": 934 |
| }, |
| { |
| "epoch": 0.517718715393134, |
| "grad_norm": 0.3610110580921173, |
| "learning_rate": 5.5642819243674085e-06, |
| "loss": 0.4838, |
| "step": 935 |
| }, |
| { |
| "epoch": 0.5182724252491694, |
| "grad_norm": 0.4621051549911499, |
| "learning_rate": 5.554676192762891e-06, |
| "loss": 0.4646, |
| "step": 936 |
| }, |
| { |
| "epoch": 0.5188261351052049, |
| "grad_norm": 0.4081271290779114, |
| "learning_rate": 5.5450683880003555e-06, |
| "loss": 0.4675, |
| "step": 937 |
| }, |
| { |
| "epoch": 0.5193798449612403, |
| "grad_norm": 0.4200206696987152, |
| "learning_rate": 5.535458545989939e-06, |
| "loss": 0.4545, |
| "step": 938 |
| }, |
| { |
| "epoch": 0.5199335548172758, |
| "grad_norm": 0.44750723242759705, |
| "learning_rate": 5.525846702649394e-06, |
| "loss": 0.5031, |
| "step": 939 |
| }, |
| { |
| "epoch": 0.5204872646733112, |
| "grad_norm": 0.4381738305091858, |
| "learning_rate": 5.516232893903946e-06, |
| "loss": 0.4738, |
| "step": 940 |
| }, |
| { |
| "epoch": 0.5210409745293466, |
| "grad_norm": 0.39572015404701233, |
| "learning_rate": 5.506617155686177e-06, |
| "loss": 0.4856, |
| "step": 941 |
| }, |
| { |
| "epoch": 0.521594684385382, |
| "grad_norm": 0.38003742694854736, |
| "learning_rate": 5.49699952393587e-06, |
| "loss": 0.4857, |
| "step": 942 |
| }, |
| { |
| "epoch": 0.5221483942414175, |
| "grad_norm": 0.364629328250885, |
| "learning_rate": 5.487380034599893e-06, |
| "loss": 0.4649, |
| "step": 943 |
| }, |
| { |
| "epoch": 0.5227021040974529, |
| "grad_norm": 0.40337440371513367, |
| "learning_rate": 5.477758723632055e-06, |
| "loss": 0.5055, |
| "step": 944 |
| }, |
| { |
| "epoch": 0.5232558139534884, |
| "grad_norm": 0.40286746621131897, |
| "learning_rate": 5.4681356269929704e-06, |
| "loss": 0.4466, |
| "step": 945 |
| }, |
| { |
| "epoch": 0.5238095238095238, |
| "grad_norm": 0.368254154920578, |
| "learning_rate": 5.458510780649932e-06, |
| "loss": 0.4984, |
| "step": 946 |
| }, |
| { |
| "epoch": 0.5243632336655593, |
| "grad_norm": 0.38113436102867126, |
| "learning_rate": 5.448884220576768e-06, |
| "loss": 0.4902, |
| "step": 947 |
| }, |
| { |
| "epoch": 0.5249169435215947, |
| "grad_norm": 0.40411466360092163, |
| "learning_rate": 5.439255982753717e-06, |
| "loss": 0.473, |
| "step": 948 |
| }, |
| { |
| "epoch": 0.5254706533776301, |
| "grad_norm": 0.32906490564346313, |
| "learning_rate": 5.429626103167284e-06, |
| "loss": 0.4664, |
| "step": 949 |
| }, |
| { |
| "epoch": 0.5260243632336655, |
| "grad_norm": 0.3590736389160156, |
| "learning_rate": 5.41999461781011e-06, |
| "loss": 0.4544, |
| "step": 950 |
| }, |
| { |
| "epoch": 0.526578073089701, |
| "grad_norm": 0.3264465630054474, |
| "learning_rate": 5.4103615626808426e-06, |
| "loss": 0.4455, |
| "step": 951 |
| }, |
| { |
| "epoch": 0.5271317829457365, |
| "grad_norm": 0.39324450492858887, |
| "learning_rate": 5.400726973783993e-06, |
| "loss": 0.4935, |
| "step": 952 |
| }, |
| { |
| "epoch": 0.5276854928017719, |
| "grad_norm": 0.3779219388961792, |
| "learning_rate": 5.391090887129804e-06, |
| "loss": 0.4859, |
| "step": 953 |
| }, |
| { |
| "epoch": 0.5282392026578073, |
| "grad_norm": 0.37633198499679565, |
| "learning_rate": 5.381453338734119e-06, |
| "loss": 0.4607, |
| "step": 954 |
| }, |
| { |
| "epoch": 0.5287929125138427, |
| "grad_norm": 0.4018500745296478, |
| "learning_rate": 5.371814364618244e-06, |
| "loss": 0.4624, |
| "step": 955 |
| }, |
| { |
| "epoch": 0.5293466223698782, |
| "grad_norm": 0.40497103333473206, |
| "learning_rate": 5.362174000808813e-06, |
| "loss": 0.4885, |
| "step": 956 |
| }, |
| { |
| "epoch": 0.5299003322259136, |
| "grad_norm": 0.3769908547401428, |
| "learning_rate": 5.352532283337655e-06, |
| "loss": 0.4967, |
| "step": 957 |
| }, |
| { |
| "epoch": 0.530454042081949, |
| "grad_norm": 0.36260783672332764, |
| "learning_rate": 5.342889248241656e-06, |
| "loss": 0.5198, |
| "step": 958 |
| }, |
| { |
| "epoch": 0.5310077519379846, |
| "grad_norm": 0.44563496112823486, |
| "learning_rate": 5.33324493156263e-06, |
| "loss": 0.4935, |
| "step": 959 |
| }, |
| { |
| "epoch": 0.53156146179402, |
| "grad_norm": 0.36103156208992004, |
| "learning_rate": 5.323599369347181e-06, |
| "loss": 0.4701, |
| "step": 960 |
| }, |
| { |
| "epoch": 0.5321151716500554, |
| "grad_norm": 0.37980565428733826, |
| "learning_rate": 5.3139525976465675e-06, |
| "loss": 0.4718, |
| "step": 961 |
| }, |
| { |
| "epoch": 0.5326688815060908, |
| "grad_norm": 0.4384520351886749, |
| "learning_rate": 5.304304652516566e-06, |
| "loss": 0.4595, |
| "step": 962 |
| }, |
| { |
| "epoch": 0.5332225913621262, |
| "grad_norm": 0.3531224727630615, |
| "learning_rate": 5.294655570017344e-06, |
| "loss": 0.4469, |
| "step": 963 |
| }, |
| { |
| "epoch": 0.5337763012181617, |
| "grad_norm": 0.3683699071407318, |
| "learning_rate": 5.2850053862133135e-06, |
| "loss": 0.4648, |
| "step": 964 |
| }, |
| { |
| "epoch": 0.5343300110741971, |
| "grad_norm": 0.4316402077674866, |
| "learning_rate": 5.27535413717301e-06, |
| "loss": 0.4605, |
| "step": 965 |
| }, |
| { |
| "epoch": 0.5348837209302325, |
| "grad_norm": 0.4019063115119934, |
| "learning_rate": 5.265701858968944e-06, |
| "loss": 0.4468, |
| "step": 966 |
| }, |
| { |
| "epoch": 0.535437430786268, |
| "grad_norm": 0.4216720163822174, |
| "learning_rate": 5.256048587677476e-06, |
| "loss": 0.5113, |
| "step": 967 |
| }, |
| { |
| "epoch": 0.5359911406423035, |
| "grad_norm": 0.3609980046749115, |
| "learning_rate": 5.246394359378678e-06, |
| "loss": 0.4999, |
| "step": 968 |
| }, |
| { |
| "epoch": 0.5365448504983389, |
| "grad_norm": 0.3771976828575134, |
| "learning_rate": 5.236739210156201e-06, |
| "loss": 0.4787, |
| "step": 969 |
| }, |
| { |
| "epoch": 0.5370985603543743, |
| "grad_norm": 0.3443913757801056, |
| "learning_rate": 5.22708317609713e-06, |
| "loss": 0.4793, |
| "step": 970 |
| }, |
| { |
| "epoch": 0.5376522702104097, |
| "grad_norm": 0.3635353147983551, |
| "learning_rate": 5.217426293291869e-06, |
| "loss": 0.4803, |
| "step": 971 |
| }, |
| { |
| "epoch": 0.5382059800664452, |
| "grad_norm": 0.37993019819259644, |
| "learning_rate": 5.207768597833982e-06, |
| "loss": 0.4866, |
| "step": 972 |
| }, |
| { |
| "epoch": 0.5387596899224806, |
| "grad_norm": 0.32572728395462036, |
| "learning_rate": 5.198110125820082e-06, |
| "loss": 0.4793, |
| "step": 973 |
| }, |
| { |
| "epoch": 0.5393133997785161, |
| "grad_norm": 0.3797398805618286, |
| "learning_rate": 5.188450913349674e-06, |
| "loss": 0.456, |
| "step": 974 |
| }, |
| { |
| "epoch": 0.5398671096345515, |
| "grad_norm": 0.40099626779556274, |
| "learning_rate": 5.178790996525038e-06, |
| "loss": 0.4947, |
| "step": 975 |
| }, |
| { |
| "epoch": 0.540420819490587, |
| "grad_norm": 0.35796770453453064, |
| "learning_rate": 5.169130411451083e-06, |
| "loss": 0.4529, |
| "step": 976 |
| }, |
| { |
| "epoch": 0.5409745293466224, |
| "grad_norm": 0.39428651332855225, |
| "learning_rate": 5.1594691942352195e-06, |
| "loss": 0.4799, |
| "step": 977 |
| }, |
| { |
| "epoch": 0.5415282392026578, |
| "grad_norm": 0.4011034369468689, |
| "learning_rate": 5.149807380987213e-06, |
| "loss": 0.4938, |
| "step": 978 |
| }, |
| { |
| "epoch": 0.5420819490586932, |
| "grad_norm": 0.3786684572696686, |
| "learning_rate": 5.140145007819064e-06, |
| "loss": 0.4474, |
| "step": 979 |
| }, |
| { |
| "epoch": 0.5426356589147286, |
| "grad_norm": 0.3715454041957855, |
| "learning_rate": 5.1304821108448645e-06, |
| "loss": 0.4317, |
| "step": 980 |
| }, |
| { |
| "epoch": 0.5431893687707641, |
| "grad_norm": 0.4249853789806366, |
| "learning_rate": 5.120818726180662e-06, |
| "loss": 0.5195, |
| "step": 981 |
| }, |
| { |
| "epoch": 0.5437430786267996, |
| "grad_norm": 0.38960522413253784, |
| "learning_rate": 5.111154889944328e-06, |
| "loss": 0.4787, |
| "step": 982 |
| }, |
| { |
| "epoch": 0.544296788482835, |
| "grad_norm": 0.4187440574169159, |
| "learning_rate": 5.1014906382554206e-06, |
| "loss": 0.4718, |
| "step": 983 |
| }, |
| { |
| "epoch": 0.5448504983388704, |
| "grad_norm": 0.4285171627998352, |
| "learning_rate": 5.091826007235053e-06, |
| "loss": 0.48, |
| "step": 984 |
| }, |
| { |
| "epoch": 0.5454042081949059, |
| "grad_norm": 0.37511613965034485, |
| "learning_rate": 5.0821610330057545e-06, |
| "loss": 0.4623, |
| "step": 985 |
| }, |
| { |
| "epoch": 0.5459579180509413, |
| "grad_norm": 0.40002259612083435, |
| "learning_rate": 5.072495751691338e-06, |
| "loss": 0.4527, |
| "step": 986 |
| }, |
| { |
| "epoch": 0.5465116279069767, |
| "grad_norm": 0.3954589366912842, |
| "learning_rate": 5.062830199416764e-06, |
| "loss": 0.4515, |
| "step": 987 |
| }, |
| { |
| "epoch": 0.5470653377630121, |
| "grad_norm": 0.40525105595588684, |
| "learning_rate": 5.053164412308005e-06, |
| "loss": 0.4916, |
| "step": 988 |
| }, |
| { |
| "epoch": 0.5476190476190477, |
| "grad_norm": 0.3854787051677704, |
| "learning_rate": 5.043498426491911e-06, |
| "loss": 0.5072, |
| "step": 989 |
| }, |
| { |
| "epoch": 0.5481727574750831, |
| "grad_norm": 0.3894112706184387, |
| "learning_rate": 5.033832278096077e-06, |
| "loss": 0.4542, |
| "step": 990 |
| }, |
| { |
| "epoch": 0.5487264673311185, |
| "grad_norm": 0.4283125400543213, |
| "learning_rate": 5.024166003248703e-06, |
| "loss": 0.463, |
| "step": 991 |
| }, |
| { |
| "epoch": 0.5492801771871539, |
| "grad_norm": 0.39848053455352783, |
| "learning_rate": 5.014499638078463e-06, |
| "loss": 0.4835, |
| "step": 992 |
| }, |
| { |
| "epoch": 0.5498338870431894, |
| "grad_norm": 0.40187305212020874, |
| "learning_rate": 5.004833218714368e-06, |
| "loss": 0.4613, |
| "step": 993 |
| }, |
| { |
| "epoch": 0.5503875968992248, |
| "grad_norm": 0.38503843545913696, |
| "learning_rate": 4.995166781285633e-06, |
| "loss": 0.4754, |
| "step": 994 |
| }, |
| { |
| "epoch": 0.5509413067552602, |
| "grad_norm": 0.388287752866745, |
| "learning_rate": 4.985500361921539e-06, |
| "loss": 0.4695, |
| "step": 995 |
| }, |
| { |
| "epoch": 0.5514950166112956, |
| "grad_norm": 0.39484068751335144, |
| "learning_rate": 4.9758339967512995e-06, |
| "loss": 0.4676, |
| "step": 996 |
| }, |
| { |
| "epoch": 0.5520487264673312, |
| "grad_norm": 0.4063197076320648, |
| "learning_rate": 4.966167721903925e-06, |
| "loss": 0.4418, |
| "step": 997 |
| }, |
| { |
| "epoch": 0.5526024363233666, |
| "grad_norm": 0.3962213099002838, |
| "learning_rate": 4.956501573508091e-06, |
| "loss": 0.476, |
| "step": 998 |
| }, |
| { |
| "epoch": 0.553156146179402, |
| "grad_norm": 0.4217541515827179, |
| "learning_rate": 4.946835587691997e-06, |
| "loss": 0.4653, |
| "step": 999 |
| }, |
| { |
| "epoch": 0.5537098560354374, |
| "grad_norm": 0.3963378369808197, |
| "learning_rate": 4.937169800583237e-06, |
| "loss": 0.4785, |
| "step": 1000 |
| }, |
| { |
| "epoch": 0.5542635658914729, |
| "grad_norm": 0.40990614891052246, |
| "learning_rate": 4.927504248308663e-06, |
| "loss": 0.4887, |
| "step": 1001 |
| }, |
| { |
| "epoch": 0.5548172757475083, |
| "grad_norm": 0.41964200139045715, |
| "learning_rate": 4.917838966994246e-06, |
| "loss": 0.4649, |
| "step": 1002 |
| }, |
| { |
| "epoch": 0.5553709856035437, |
| "grad_norm": 0.4327523410320282, |
| "learning_rate": 4.908173992764949e-06, |
| "loss": 0.4834, |
| "step": 1003 |
| }, |
| { |
| "epoch": 0.5559246954595792, |
| "grad_norm": 0.39587798714637756, |
| "learning_rate": 4.898509361744581e-06, |
| "loss": 0.4874, |
| "step": 1004 |
| }, |
| { |
| "epoch": 0.5564784053156147, |
| "grad_norm": 0.4436945617198944, |
| "learning_rate": 4.888845110055674e-06, |
| "loss": 0.457, |
| "step": 1005 |
| }, |
| { |
| "epoch": 0.5570321151716501, |
| "grad_norm": 0.40062418580055237, |
| "learning_rate": 4.87918127381934e-06, |
| "loss": 0.4727, |
| "step": 1006 |
| }, |
| { |
| "epoch": 0.5575858250276855, |
| "grad_norm": 0.41501954197883606, |
| "learning_rate": 4.869517889155136e-06, |
| "loss": 0.4644, |
| "step": 1007 |
| }, |
| { |
| "epoch": 0.5581395348837209, |
| "grad_norm": 0.41733792424201965, |
| "learning_rate": 4.8598549921809364e-06, |
| "loss": 0.4644, |
| "step": 1008 |
| }, |
| { |
| "epoch": 0.5586932447397563, |
| "grad_norm": 0.3771803677082062, |
| "learning_rate": 4.8501926190127895e-06, |
| "loss": 0.4728, |
| "step": 1009 |
| }, |
| { |
| "epoch": 0.5592469545957918, |
| "grad_norm": 0.3525100648403168, |
| "learning_rate": 4.840530805764783e-06, |
| "loss": 0.4761, |
| "step": 1010 |
| }, |
| { |
| "epoch": 0.5598006644518272, |
| "grad_norm": 0.37297025322914124, |
| "learning_rate": 4.830869588548918e-06, |
| "loss": 0.4827, |
| "step": 1011 |
| }, |
| { |
| "epoch": 0.5603543743078627, |
| "grad_norm": 0.41562744975090027, |
| "learning_rate": 4.821209003474963e-06, |
| "loss": 0.4667, |
| "step": 1012 |
| }, |
| { |
| "epoch": 0.5609080841638981, |
| "grad_norm": 0.39811256527900696, |
| "learning_rate": 4.811549086650327e-06, |
| "loss": 0.459, |
| "step": 1013 |
| }, |
| { |
| "epoch": 0.5614617940199336, |
| "grad_norm": 0.3600512742996216, |
| "learning_rate": 4.801889874179921e-06, |
| "loss": 0.4695, |
| "step": 1014 |
| }, |
| { |
| "epoch": 0.562015503875969, |
| "grad_norm": 0.3680749833583832, |
| "learning_rate": 4.792231402166019e-06, |
| "loss": 0.4491, |
| "step": 1015 |
| }, |
| { |
| "epoch": 0.5625692137320044, |
| "grad_norm": 0.44280192255973816, |
| "learning_rate": 4.782573706708133e-06, |
| "loss": 0.4828, |
| "step": 1016 |
| }, |
| { |
| "epoch": 0.5631229235880398, |
| "grad_norm": 0.37224942445755005, |
| "learning_rate": 4.772916823902871e-06, |
| "loss": 0.45, |
| "step": 1017 |
| }, |
| { |
| "epoch": 0.5636766334440753, |
| "grad_norm": 0.3616679906845093, |
| "learning_rate": 4.763260789843801e-06, |
| "loss": 0.4882, |
| "step": 1018 |
| }, |
| { |
| "epoch": 0.5642303433001108, |
| "grad_norm": 0.47011885046958923, |
| "learning_rate": 4.753605640621323e-06, |
| "loss": 0.4584, |
| "step": 1019 |
| }, |
| { |
| "epoch": 0.5647840531561462, |
| "grad_norm": 0.4231053292751312, |
| "learning_rate": 4.743951412322524e-06, |
| "loss": 0.4806, |
| "step": 1020 |
| }, |
| { |
| "epoch": 0.5653377630121816, |
| "grad_norm": 0.38794267177581787, |
| "learning_rate": 4.734298141031057e-06, |
| "loss": 0.4364, |
| "step": 1021 |
| }, |
| { |
| "epoch": 0.5658914728682171, |
| "grad_norm": 0.44208985567092896, |
| "learning_rate": 4.724645862826992e-06, |
| "loss": 0.5135, |
| "step": 1022 |
| }, |
| { |
| "epoch": 0.5664451827242525, |
| "grad_norm": 0.36872124671936035, |
| "learning_rate": 4.7149946137866865e-06, |
| "loss": 0.456, |
| "step": 1023 |
| }, |
| { |
| "epoch": 0.5669988925802879, |
| "grad_norm": 0.47403469681739807, |
| "learning_rate": 4.705344429982658e-06, |
| "loss": 0.4622, |
| "step": 1024 |
| }, |
| { |
| "epoch": 0.5675526024363233, |
| "grad_norm": 0.39829424023628235, |
| "learning_rate": 4.6956953474834355e-06, |
| "loss": 0.4562, |
| "step": 1025 |
| }, |
| { |
| "epoch": 0.5681063122923588, |
| "grad_norm": 0.36361780762672424, |
| "learning_rate": 4.686047402353433e-06, |
| "loss": 0.4603, |
| "step": 1026 |
| }, |
| { |
| "epoch": 0.5686600221483943, |
| "grad_norm": 0.37249621748924255, |
| "learning_rate": 4.67640063065282e-06, |
| "loss": 0.4335, |
| "step": 1027 |
| }, |
| { |
| "epoch": 0.5692137320044297, |
| "grad_norm": 0.43237707018852234, |
| "learning_rate": 4.6667550684373705e-06, |
| "loss": 0.4742, |
| "step": 1028 |
| }, |
| { |
| "epoch": 0.5697674418604651, |
| "grad_norm": 0.4022100865840912, |
| "learning_rate": 4.657110751758346e-06, |
| "loss": 0.4659, |
| "step": 1029 |
| }, |
| { |
| "epoch": 0.5703211517165006, |
| "grad_norm": 0.3795289099216461, |
| "learning_rate": 4.647467716662349e-06, |
| "loss": 0.4469, |
| "step": 1030 |
| }, |
| { |
| "epoch": 0.570874861572536, |
| "grad_norm": 0.43125849962234497, |
| "learning_rate": 4.637825999191189e-06, |
| "loss": 0.4573, |
| "step": 1031 |
| }, |
| { |
| "epoch": 0.5714285714285714, |
| "grad_norm": 0.3857414126396179, |
| "learning_rate": 4.628185635381757e-06, |
| "loss": 0.46, |
| "step": 1032 |
| }, |
| { |
| "epoch": 0.5719822812846068, |
| "grad_norm": 0.3959072530269623, |
| "learning_rate": 4.6185466612658825e-06, |
| "loss": 0.4626, |
| "step": 1033 |
| }, |
| { |
| "epoch": 0.5725359911406424, |
| "grad_norm": 0.39055493474006653, |
| "learning_rate": 4.608909112870197e-06, |
| "loss": 0.4705, |
| "step": 1034 |
| }, |
| { |
| "epoch": 0.5730897009966778, |
| "grad_norm": 0.40661776065826416, |
| "learning_rate": 4.599273026216009e-06, |
| "loss": 0.4673, |
| "step": 1035 |
| }, |
| { |
| "epoch": 0.5736434108527132, |
| "grad_norm": 0.36414268612861633, |
| "learning_rate": 4.589638437319157e-06, |
| "loss": 0.4659, |
| "step": 1036 |
| }, |
| { |
| "epoch": 0.5741971207087486, |
| "grad_norm": 0.407930463552475, |
| "learning_rate": 4.580005382189891e-06, |
| "loss": 0.4689, |
| "step": 1037 |
| }, |
| { |
| "epoch": 0.574750830564784, |
| "grad_norm": 0.34159794449806213, |
| "learning_rate": 4.5703738968327194e-06, |
| "loss": 0.4716, |
| "step": 1038 |
| }, |
| { |
| "epoch": 0.5753045404208195, |
| "grad_norm": 0.3685348927974701, |
| "learning_rate": 4.560744017246284e-06, |
| "loss": 0.473, |
| "step": 1039 |
| }, |
| { |
| "epoch": 0.5758582502768549, |
| "grad_norm": 0.37837618589401245, |
| "learning_rate": 4.551115779423234e-06, |
| "loss": 0.4688, |
| "step": 1040 |
| }, |
| { |
| "epoch": 0.5764119601328903, |
| "grad_norm": 0.352651983499527, |
| "learning_rate": 4.541489219350069e-06, |
| "loss": 0.4649, |
| "step": 1041 |
| }, |
| { |
| "epoch": 0.5769656699889258, |
| "grad_norm": 0.33390361070632935, |
| "learning_rate": 4.53186437300703e-06, |
| "loss": 0.4572, |
| "step": 1042 |
| }, |
| { |
| "epoch": 0.5775193798449613, |
| "grad_norm": 0.3548870086669922, |
| "learning_rate": 4.522241276367948e-06, |
| "loss": 0.4641, |
| "step": 1043 |
| }, |
| { |
| "epoch": 0.5780730897009967, |
| "grad_norm": 0.33972635865211487, |
| "learning_rate": 4.512619965400107e-06, |
| "loss": 0.457, |
| "step": 1044 |
| }, |
| { |
| "epoch": 0.5786267995570321, |
| "grad_norm": 0.34692293405532837, |
| "learning_rate": 4.503000476064131e-06, |
| "loss": 0.4372, |
| "step": 1045 |
| }, |
| { |
| "epoch": 0.5791805094130675, |
| "grad_norm": 0.3577854335308075, |
| "learning_rate": 4.493382844313826e-06, |
| "loss": 0.467, |
| "step": 1046 |
| }, |
| { |
| "epoch": 0.579734219269103, |
| "grad_norm": 0.3625636696815491, |
| "learning_rate": 4.483767106096055e-06, |
| "loss": 0.46, |
| "step": 1047 |
| }, |
| { |
| "epoch": 0.5802879291251384, |
| "grad_norm": 0.3549644947052002, |
| "learning_rate": 4.474153297350608e-06, |
| "loss": 0.4898, |
| "step": 1048 |
| }, |
| { |
| "epoch": 0.5808416389811739, |
| "grad_norm": 0.35848698019981384, |
| "learning_rate": 4.464541454010061e-06, |
| "loss": 0.4918, |
| "step": 1049 |
| }, |
| { |
| "epoch": 0.5813953488372093, |
| "grad_norm": 0.3760617971420288, |
| "learning_rate": 4.454931611999646e-06, |
| "loss": 0.4626, |
| "step": 1050 |
| }, |
| { |
| "epoch": 0.5819490586932448, |
| "grad_norm": 0.3809838891029358, |
| "learning_rate": 4.445323807237112e-06, |
| "loss": 0.4471, |
| "step": 1051 |
| }, |
| { |
| "epoch": 0.5825027685492802, |
| "grad_norm": 0.3663243353366852, |
| "learning_rate": 4.4357180756325915e-06, |
| "loss": 0.4865, |
| "step": 1052 |
| }, |
| { |
| "epoch": 0.5830564784053156, |
| "grad_norm": 0.3606342077255249, |
| "learning_rate": 4.426114453088479e-06, |
| "loss": 0.4627, |
| "step": 1053 |
| }, |
| { |
| "epoch": 0.583610188261351, |
| "grad_norm": 0.36999428272247314, |
| "learning_rate": 4.4165129754992736e-06, |
| "loss": 0.4499, |
| "step": 1054 |
| }, |
| { |
| "epoch": 0.5841638981173864, |
| "grad_norm": 0.3812705874443054, |
| "learning_rate": 4.406913678751463e-06, |
| "loss": 0.486, |
| "step": 1055 |
| }, |
| { |
| "epoch": 0.584717607973422, |
| "grad_norm": 0.3783071041107178, |
| "learning_rate": 4.397316598723385e-06, |
| "loss": 0.4596, |
| "step": 1056 |
| }, |
| { |
| "epoch": 0.5852713178294574, |
| "grad_norm": 0.473897248506546, |
| "learning_rate": 4.387721771285091e-06, |
| "loss": 0.4448, |
| "step": 1057 |
| }, |
| { |
| "epoch": 0.5858250276854928, |
| "grad_norm": 0.390671968460083, |
| "learning_rate": 4.378129232298213e-06, |
| "loss": 0.4495, |
| "step": 1058 |
| }, |
| { |
| "epoch": 0.5863787375415282, |
| "grad_norm": 0.4029969871044159, |
| "learning_rate": 4.3685390176158295e-06, |
| "loss": 0.4644, |
| "step": 1059 |
| }, |
| { |
| "epoch": 0.5869324473975637, |
| "grad_norm": 0.4267359972000122, |
| "learning_rate": 4.358951163082328e-06, |
| "loss": 0.4639, |
| "step": 1060 |
| }, |
| { |
| "epoch": 0.5874861572535991, |
| "grad_norm": 0.40942493081092834, |
| "learning_rate": 4.349365704533285e-06, |
| "loss": 0.455, |
| "step": 1061 |
| }, |
| { |
| "epoch": 0.5880398671096345, |
| "grad_norm": 0.40282365679740906, |
| "learning_rate": 4.33978267779531e-06, |
| "loss": 0.4696, |
| "step": 1062 |
| }, |
| { |
| "epoch": 0.5885935769656699, |
| "grad_norm": 0.4315662086009979, |
| "learning_rate": 4.33020211868593e-06, |
| "loss": 0.4714, |
| "step": 1063 |
| }, |
| { |
| "epoch": 0.5891472868217055, |
| "grad_norm": 0.38608118891716003, |
| "learning_rate": 4.320624063013449e-06, |
| "loss": 0.4901, |
| "step": 1064 |
| }, |
| { |
| "epoch": 0.5897009966777409, |
| "grad_norm": 0.3713338375091553, |
| "learning_rate": 4.31104854657681e-06, |
| "loss": 0.4566, |
| "step": 1065 |
| }, |
| { |
| "epoch": 0.5902547065337763, |
| "grad_norm": 0.37049630284309387, |
| "learning_rate": 4.301475605165471e-06, |
| "loss": 0.4652, |
| "step": 1066 |
| }, |
| { |
| "epoch": 0.5908084163898117, |
| "grad_norm": 0.4406527578830719, |
| "learning_rate": 4.291905274559262e-06, |
| "loss": 0.4783, |
| "step": 1067 |
| }, |
| { |
| "epoch": 0.5913621262458472, |
| "grad_norm": 0.385221928358078, |
| "learning_rate": 4.282337590528251e-06, |
| "loss": 0.4625, |
| "step": 1068 |
| }, |
| { |
| "epoch": 0.5919158361018826, |
| "grad_norm": 0.3984861969947815, |
| "learning_rate": 4.272772588832626e-06, |
| "loss": 0.4652, |
| "step": 1069 |
| }, |
| { |
| "epoch": 0.592469545957918, |
| "grad_norm": 0.3912111222743988, |
| "learning_rate": 4.263210305222535e-06, |
| "loss": 0.4624, |
| "step": 1070 |
| }, |
| { |
| "epoch": 0.5930232558139535, |
| "grad_norm": 0.4461668133735657, |
| "learning_rate": 4.25365077543798e-06, |
| "loss": 0.4763, |
| "step": 1071 |
| }, |
| { |
| "epoch": 0.593576965669989, |
| "grad_norm": 0.37593454122543335, |
| "learning_rate": 4.244094035208662e-06, |
| "loss": 0.4716, |
| "step": 1072 |
| }, |
| { |
| "epoch": 0.5941306755260244, |
| "grad_norm": 0.39943602681159973, |
| "learning_rate": 4.2345401202538555e-06, |
| "loss": 0.4556, |
| "step": 1073 |
| }, |
| { |
| "epoch": 0.5946843853820598, |
| "grad_norm": 0.3936353027820587, |
| "learning_rate": 4.224989066282282e-06, |
| "loss": 0.4495, |
| "step": 1074 |
| }, |
| { |
| "epoch": 0.5952380952380952, |
| "grad_norm": 0.359379380941391, |
| "learning_rate": 4.2154409089919654e-06, |
| "loss": 0.4216, |
| "step": 1075 |
| }, |
| { |
| "epoch": 0.5957918050941307, |
| "grad_norm": 0.37251192331314087, |
| "learning_rate": 4.205895684070099e-06, |
| "loss": 0.4679, |
| "step": 1076 |
| }, |
| { |
| "epoch": 0.5963455149501661, |
| "grad_norm": 0.41129574179649353, |
| "learning_rate": 4.1963534271929235e-06, |
| "loss": 0.4697, |
| "step": 1077 |
| }, |
| { |
| "epoch": 0.5968992248062015, |
| "grad_norm": 0.39789658784866333, |
| "learning_rate": 4.186814174025582e-06, |
| "loss": 0.485, |
| "step": 1078 |
| }, |
| { |
| "epoch": 0.597452934662237, |
| "grad_norm": 0.40488293766975403, |
| "learning_rate": 4.177277960221993e-06, |
| "loss": 0.4886, |
| "step": 1079 |
| }, |
| { |
| "epoch": 0.5980066445182725, |
| "grad_norm": 0.4230530261993408, |
| "learning_rate": 4.167744821424714e-06, |
| "loss": 0.4796, |
| "step": 1080 |
| }, |
| { |
| "epoch": 0.5985603543743079, |
| "grad_norm": 0.3796553611755371, |
| "learning_rate": 4.158214793264808e-06, |
| "loss": 0.4483, |
| "step": 1081 |
| }, |
| { |
| "epoch": 0.5991140642303433, |
| "grad_norm": 0.36902010440826416, |
| "learning_rate": 4.148687911361714e-06, |
| "loss": 0.461, |
| "step": 1082 |
| }, |
| { |
| "epoch": 0.5996677740863787, |
| "grad_norm": 0.42175090312957764, |
| "learning_rate": 4.139164211323111e-06, |
| "loss": 0.4723, |
| "step": 1083 |
| }, |
| { |
| "epoch": 0.6002214839424141, |
| "grad_norm": 0.38418591022491455, |
| "learning_rate": 4.129643728744782e-06, |
| "loss": 0.492, |
| "step": 1084 |
| }, |
| { |
| "epoch": 0.6007751937984496, |
| "grad_norm": 0.39179977774620056, |
| "learning_rate": 4.120126499210491e-06, |
| "loss": 0.4718, |
| "step": 1085 |
| }, |
| { |
| "epoch": 0.6013289036544851, |
| "grad_norm": 0.3454154431819916, |
| "learning_rate": 4.1106125582918385e-06, |
| "loss": 0.4647, |
| "step": 1086 |
| }, |
| { |
| "epoch": 0.6018826135105205, |
| "grad_norm": 0.3575115203857422, |
| "learning_rate": 4.101101941548136e-06, |
| "loss": 0.4789, |
| "step": 1087 |
| }, |
| { |
| "epoch": 0.602436323366556, |
| "grad_norm": 0.43414831161499023, |
| "learning_rate": 4.091594684526269e-06, |
| "loss": 0.4646, |
| "step": 1088 |
| }, |
| { |
| "epoch": 0.6029900332225914, |
| "grad_norm": 0.37361279129981995, |
| "learning_rate": 4.082090822760563e-06, |
| "loss": 0.475, |
| "step": 1089 |
| }, |
| { |
| "epoch": 0.6035437430786268, |
| "grad_norm": 0.3831091821193695, |
| "learning_rate": 4.072590391772662e-06, |
| "loss": 0.4601, |
| "step": 1090 |
| }, |
| { |
| "epoch": 0.6040974529346622, |
| "grad_norm": 0.35764801502227783, |
| "learning_rate": 4.063093427071376e-06, |
| "loss": 0.4461, |
| "step": 1091 |
| }, |
| { |
| "epoch": 0.6046511627906976, |
| "grad_norm": 0.35605666041374207, |
| "learning_rate": 4.05359996415257e-06, |
| "loss": 0.4778, |
| "step": 1092 |
| }, |
| { |
| "epoch": 0.6052048726467331, |
| "grad_norm": 0.3591611981391907, |
| "learning_rate": 4.044110038499014e-06, |
| "loss": 0.4526, |
| "step": 1093 |
| }, |
| { |
| "epoch": 0.6057585825027686, |
| "grad_norm": 0.3578726649284363, |
| "learning_rate": 4.034623685580257e-06, |
| "loss": 0.4478, |
| "step": 1094 |
| }, |
| { |
| "epoch": 0.606312292358804, |
| "grad_norm": 0.35668474435806274, |
| "learning_rate": 4.0251409408524985e-06, |
| "loss": 0.4653, |
| "step": 1095 |
| }, |
| { |
| "epoch": 0.6068660022148394, |
| "grad_norm": 0.35329243540763855, |
| "learning_rate": 4.01566183975845e-06, |
| "loss": 0.4801, |
| "step": 1096 |
| }, |
| { |
| "epoch": 0.6074197120708749, |
| "grad_norm": 0.354142963886261, |
| "learning_rate": 4.006186417727203e-06, |
| "loss": 0.4598, |
| "step": 1097 |
| }, |
| { |
| "epoch": 0.6079734219269103, |
| "grad_norm": 0.3247867226600647, |
| "learning_rate": 3.996714710174101e-06, |
| "loss": 0.4464, |
| "step": 1098 |
| }, |
| { |
| "epoch": 0.6085271317829457, |
| "grad_norm": 0.3490568697452545, |
| "learning_rate": 3.987246752500601e-06, |
| "loss": 0.4573, |
| "step": 1099 |
| }, |
| { |
| "epoch": 0.6090808416389811, |
| "grad_norm": 0.38571274280548096, |
| "learning_rate": 3.97778258009415e-06, |
| "loss": 0.4771, |
| "step": 1100 |
| }, |
| { |
| "epoch": 0.6096345514950167, |
| "grad_norm": 0.39078643918037415, |
| "learning_rate": 3.968322228328041e-06, |
| "loss": 0.4766, |
| "step": 1101 |
| }, |
| { |
| "epoch": 0.6101882613510521, |
| "grad_norm": 0.351081520318985, |
| "learning_rate": 3.958865732561288e-06, |
| "loss": 0.4777, |
| "step": 1102 |
| }, |
| { |
| "epoch": 0.6107419712070875, |
| "grad_norm": 0.3306477665901184, |
| "learning_rate": 3.9494131281384975e-06, |
| "loss": 0.4573, |
| "step": 1103 |
| }, |
| { |
| "epoch": 0.6112956810631229, |
| "grad_norm": 0.3765094578266144, |
| "learning_rate": 3.939964450389728e-06, |
| "loss": 0.478, |
| "step": 1104 |
| }, |
| { |
| "epoch": 0.6118493909191584, |
| "grad_norm": 0.3690263032913208, |
| "learning_rate": 3.93051973463036e-06, |
| "loss": 0.4546, |
| "step": 1105 |
| }, |
| { |
| "epoch": 0.6124031007751938, |
| "grad_norm": 0.40970906615257263, |
| "learning_rate": 3.92107901616097e-06, |
| "loss": 0.5092, |
| "step": 1106 |
| }, |
| { |
| "epoch": 0.6129568106312292, |
| "grad_norm": 0.35802197456359863, |
| "learning_rate": 3.911642330267191e-06, |
| "loss": 0.4453, |
| "step": 1107 |
| }, |
| { |
| "epoch": 0.6135105204872646, |
| "grad_norm": 0.35536572337150574, |
| "learning_rate": 3.902209712219586e-06, |
| "loss": 0.4637, |
| "step": 1108 |
| }, |
| { |
| "epoch": 0.6140642303433002, |
| "grad_norm": 0.3452344834804535, |
| "learning_rate": 3.892781197273512e-06, |
| "loss": 0.4587, |
| "step": 1109 |
| }, |
| { |
| "epoch": 0.6146179401993356, |
| "grad_norm": 0.3386285901069641, |
| "learning_rate": 3.883356820668991e-06, |
| "loss": 0.4518, |
| "step": 1110 |
| }, |
| { |
| "epoch": 0.615171650055371, |
| "grad_norm": 0.3814370632171631, |
| "learning_rate": 3.873936617630578e-06, |
| "loss": 0.4759, |
| "step": 1111 |
| }, |
| { |
| "epoch": 0.6157253599114064, |
| "grad_norm": 0.40263649821281433, |
| "learning_rate": 3.864520623367231e-06, |
| "loss": 0.4768, |
| "step": 1112 |
| }, |
| { |
| "epoch": 0.6162790697674418, |
| "grad_norm": 0.3794068694114685, |
| "learning_rate": 3.855108873072171e-06, |
| "loss": 0.471, |
| "step": 1113 |
| }, |
| { |
| "epoch": 0.6168327796234773, |
| "grad_norm": 0.36389413475990295, |
| "learning_rate": 3.845701401922763e-06, |
| "loss": 0.4668, |
| "step": 1114 |
| }, |
| { |
| "epoch": 0.6173864894795127, |
| "grad_norm": 0.3553192913532257, |
| "learning_rate": 3.836298245080374e-06, |
| "loss": 0.4709, |
| "step": 1115 |
| }, |
| { |
| "epoch": 0.6179401993355482, |
| "grad_norm": 0.4235493242740631, |
| "learning_rate": 3.82689943769025e-06, |
| "loss": 0.4916, |
| "step": 1116 |
| }, |
| { |
| "epoch": 0.6184939091915836, |
| "grad_norm": 0.3584069311618805, |
| "learning_rate": 3.817505014881378e-06, |
| "loss": 0.4455, |
| "step": 1117 |
| }, |
| { |
| "epoch": 0.6190476190476191, |
| "grad_norm": 0.36167699098587036, |
| "learning_rate": 3.8081150117663547e-06, |
| "loss": 0.4902, |
| "step": 1118 |
| }, |
| { |
| "epoch": 0.6196013289036545, |
| "grad_norm": 0.38572049140930176, |
| "learning_rate": 3.7987294634412643e-06, |
| "loss": 0.4788, |
| "step": 1119 |
| }, |
| { |
| "epoch": 0.6201550387596899, |
| "grad_norm": 0.35073527693748474, |
| "learning_rate": 3.7893484049855323e-06, |
| "loss": 0.483, |
| "step": 1120 |
| }, |
| { |
| "epoch": 0.6207087486157253, |
| "grad_norm": 0.3707284927368164, |
| "learning_rate": 3.779971871461813e-06, |
| "loss": 0.4665, |
| "step": 1121 |
| }, |
| { |
| "epoch": 0.6212624584717608, |
| "grad_norm": 0.3623058497905731, |
| "learning_rate": 3.77059989791584e-06, |
| "loss": 0.4945, |
| "step": 1122 |
| }, |
| { |
| "epoch": 0.6218161683277962, |
| "grad_norm": 0.36620059609413147, |
| "learning_rate": 3.7612325193763045e-06, |
| "loss": 0.4728, |
| "step": 1123 |
| }, |
| { |
| "epoch": 0.6223698781838317, |
| "grad_norm": 0.3564867079257965, |
| "learning_rate": 3.7518697708547285e-06, |
| "loss": 0.498, |
| "step": 1124 |
| }, |
| { |
| "epoch": 0.6229235880398671, |
| "grad_norm": 0.3427715599536896, |
| "learning_rate": 3.742511687345325e-06, |
| "loss": 0.4391, |
| "step": 1125 |
| }, |
| { |
| "epoch": 0.6234772978959026, |
| "grad_norm": 0.34288641810417175, |
| "learning_rate": 3.7331583038248688e-06, |
| "loss": 0.4517, |
| "step": 1126 |
| }, |
| { |
| "epoch": 0.624031007751938, |
| "grad_norm": 0.32349908351898193, |
| "learning_rate": 3.7238096552525736e-06, |
| "loss": 0.4588, |
| "step": 1127 |
| }, |
| { |
| "epoch": 0.6245847176079734, |
| "grad_norm": 0.3646223247051239, |
| "learning_rate": 3.714465776569952e-06, |
| "loss": 0.4508, |
| "step": 1128 |
| }, |
| { |
| "epoch": 0.6251384274640088, |
| "grad_norm": 0.40397608280181885, |
| "learning_rate": 3.705126702700691e-06, |
| "loss": 0.4988, |
| "step": 1129 |
| }, |
| { |
| "epoch": 0.6256921373200443, |
| "grad_norm": 0.419245183467865, |
| "learning_rate": 3.695792468550517e-06, |
| "loss": 0.4731, |
| "step": 1130 |
| }, |
| { |
| "epoch": 0.6262458471760798, |
| "grad_norm": 0.3632652461528778, |
| "learning_rate": 3.6864631090070656e-06, |
| "loss": 0.482, |
| "step": 1131 |
| }, |
| { |
| "epoch": 0.6267995570321152, |
| "grad_norm": 0.38198965787887573, |
| "learning_rate": 3.6771386589397608e-06, |
| "loss": 0.4964, |
| "step": 1132 |
| }, |
| { |
| "epoch": 0.6273532668881506, |
| "grad_norm": 0.38686636090278625, |
| "learning_rate": 3.6678191531996683e-06, |
| "loss": 0.4447, |
| "step": 1133 |
| }, |
| { |
| "epoch": 0.627906976744186, |
| "grad_norm": 0.4080636501312256, |
| "learning_rate": 3.658504626619376e-06, |
| "loss": 0.4557, |
| "step": 1134 |
| }, |
| { |
| "epoch": 0.6284606866002215, |
| "grad_norm": 0.3649447560310364, |
| "learning_rate": 3.6491951140128685e-06, |
| "loss": 0.4828, |
| "step": 1135 |
| }, |
| { |
| "epoch": 0.6290143964562569, |
| "grad_norm": 0.45332175493240356, |
| "learning_rate": 3.639890650175379e-06, |
| "loss": 0.4802, |
| "step": 1136 |
| }, |
| { |
| "epoch": 0.6295681063122923, |
| "grad_norm": 0.361508846282959, |
| "learning_rate": 3.6305912698832813e-06, |
| "loss": 0.4757, |
| "step": 1137 |
| }, |
| { |
| "epoch": 0.6301218161683277, |
| "grad_norm": 0.3899231255054474, |
| "learning_rate": 3.6212970078939414e-06, |
| "loss": 0.4664, |
| "step": 1138 |
| }, |
| { |
| "epoch": 0.6306755260243633, |
| "grad_norm": 0.424633651971817, |
| "learning_rate": 3.6120078989455953e-06, |
| "loss": 0.4905, |
| "step": 1139 |
| }, |
| { |
| "epoch": 0.6312292358803987, |
| "grad_norm": 0.37771520018577576, |
| "learning_rate": 3.6027239777572253e-06, |
| "loss": 0.4464, |
| "step": 1140 |
| }, |
| { |
| "epoch": 0.6317829457364341, |
| "grad_norm": 0.3850044012069702, |
| "learning_rate": 3.593445279028418e-06, |
| "loss": 0.4665, |
| "step": 1141 |
| }, |
| { |
| "epoch": 0.6323366555924695, |
| "grad_norm": 0.44365182518959045, |
| "learning_rate": 3.5841718374392435e-06, |
| "loss": 0.4637, |
| "step": 1142 |
| }, |
| { |
| "epoch": 0.632890365448505, |
| "grad_norm": 0.4106205403804779, |
| "learning_rate": 3.5749036876501196e-06, |
| "loss": 0.4544, |
| "step": 1143 |
| }, |
| { |
| "epoch": 0.6334440753045404, |
| "grad_norm": 0.32994240522384644, |
| "learning_rate": 3.5656408643016892e-06, |
| "loss": 0.4729, |
| "step": 1144 |
| }, |
| { |
| "epoch": 0.6339977851605758, |
| "grad_norm": 0.38055476546287537, |
| "learning_rate": 3.5563834020146864e-06, |
| "loss": 0.4486, |
| "step": 1145 |
| }, |
| { |
| "epoch": 0.6345514950166113, |
| "grad_norm": 0.48720213770866394, |
| "learning_rate": 3.5471313353898056e-06, |
| "loss": 0.4491, |
| "step": 1146 |
| }, |
| { |
| "epoch": 0.6351052048726468, |
| "grad_norm": 0.43611204624176025, |
| "learning_rate": 3.5378846990075734e-06, |
| "loss": 0.5003, |
| "step": 1147 |
| }, |
| { |
| "epoch": 0.6356589147286822, |
| "grad_norm": 0.38427719473838806, |
| "learning_rate": 3.5286435274282277e-06, |
| "loss": 0.4511, |
| "step": 1148 |
| }, |
| { |
| "epoch": 0.6362126245847176, |
| "grad_norm": 0.39842531085014343, |
| "learning_rate": 3.5194078551915704e-06, |
| "loss": 0.4951, |
| "step": 1149 |
| }, |
| { |
| "epoch": 0.636766334440753, |
| "grad_norm": 0.4029242694377899, |
| "learning_rate": 3.5101777168168603e-06, |
| "loss": 0.482, |
| "step": 1150 |
| }, |
| { |
| "epoch": 0.6373200442967885, |
| "grad_norm": 0.35558003187179565, |
| "learning_rate": 3.5009531468026646e-06, |
| "loss": 0.4548, |
| "step": 1151 |
| }, |
| { |
| "epoch": 0.6378737541528239, |
| "grad_norm": 0.36913105845451355, |
| "learning_rate": 3.491734179626738e-06, |
| "loss": 0.4639, |
| "step": 1152 |
| }, |
| { |
| "epoch": 0.6384274640088593, |
| "grad_norm": 0.3489953279495239, |
| "learning_rate": 3.482520849745902e-06, |
| "loss": 0.4699, |
| "step": 1153 |
| }, |
| { |
| "epoch": 0.6389811738648948, |
| "grad_norm": 0.3615221679210663, |
| "learning_rate": 3.4733131915959008e-06, |
| "loss": 0.4629, |
| "step": 1154 |
| }, |
| { |
| "epoch": 0.6395348837209303, |
| "grad_norm": 0.4305569529533386, |
| "learning_rate": 3.46411123959128e-06, |
| "loss": 0.4781, |
| "step": 1155 |
| }, |
| { |
| "epoch": 0.6400885935769657, |
| "grad_norm": 0.3422296643257141, |
| "learning_rate": 3.4549150281252635e-06, |
| "loss": 0.453, |
| "step": 1156 |
| }, |
| { |
| "epoch": 0.6406423034330011, |
| "grad_norm": 0.4306909143924713, |
| "learning_rate": 3.4457245915696134e-06, |
| "loss": 0.4936, |
| "step": 1157 |
| }, |
| { |
| "epoch": 0.6411960132890365, |
| "grad_norm": 0.3581586480140686, |
| "learning_rate": 3.436539964274512e-06, |
| "loss": 0.4652, |
| "step": 1158 |
| }, |
| { |
| "epoch": 0.641749723145072, |
| "grad_norm": 0.37111344933509827, |
| "learning_rate": 3.4273611805684254e-06, |
| "loss": 0.4532, |
| "step": 1159 |
| }, |
| { |
| "epoch": 0.6423034330011074, |
| "grad_norm": 0.4637528955936432, |
| "learning_rate": 3.41818827475798e-06, |
| "loss": 0.4633, |
| "step": 1160 |
| }, |
| { |
| "epoch": 0.6428571428571429, |
| "grad_norm": 0.3899680972099304, |
| "learning_rate": 3.409021281127835e-06, |
| "loss": 0.448, |
| "step": 1161 |
| }, |
| { |
| "epoch": 0.6434108527131783, |
| "grad_norm": 0.3871779441833496, |
| "learning_rate": 3.3998602339405495e-06, |
| "loss": 0.4699, |
| "step": 1162 |
| }, |
| { |
| "epoch": 0.6439645625692137, |
| "grad_norm": 0.39854106307029724, |
| "learning_rate": 3.3907051674364555e-06, |
| "loss": 0.4591, |
| "step": 1163 |
| }, |
| { |
| "epoch": 0.6445182724252492, |
| "grad_norm": 0.41372933983802795, |
| "learning_rate": 3.381556115833538e-06, |
| "loss": 0.4825, |
| "step": 1164 |
| }, |
| { |
| "epoch": 0.6450719822812846, |
| "grad_norm": 0.35258594155311584, |
| "learning_rate": 3.3724131133272937e-06, |
| "loss": 0.4702, |
| "step": 1165 |
| }, |
| { |
| "epoch": 0.64562569213732, |
| "grad_norm": 0.3785751461982727, |
| "learning_rate": 3.3632761940906167e-06, |
| "loss": 0.4598, |
| "step": 1166 |
| }, |
| { |
| "epoch": 0.6461794019933554, |
| "grad_norm": 0.36929285526275635, |
| "learning_rate": 3.35414539227366e-06, |
| "loss": 0.4626, |
| "step": 1167 |
| }, |
| { |
| "epoch": 0.646733111849391, |
| "grad_norm": 0.3928925693035126, |
| "learning_rate": 3.3450207420037094e-06, |
| "loss": 0.4503, |
| "step": 1168 |
| }, |
| { |
| "epoch": 0.6472868217054264, |
| "grad_norm": 0.43884503841400146, |
| "learning_rate": 3.3359022773850673e-06, |
| "loss": 0.4991, |
| "step": 1169 |
| }, |
| { |
| "epoch": 0.6478405315614618, |
| "grad_norm": 0.40776365995407104, |
| "learning_rate": 3.3267900324989087e-06, |
| "loss": 0.4883, |
| "step": 1170 |
| }, |
| { |
| "epoch": 0.6483942414174972, |
| "grad_norm": 0.3640215992927551, |
| "learning_rate": 3.3176840414031653e-06, |
| "loss": 0.4856, |
| "step": 1171 |
| }, |
| { |
| "epoch": 0.6489479512735327, |
| "grad_norm": 0.3607640862464905, |
| "learning_rate": 3.3085843381323956e-06, |
| "loss": 0.4526, |
| "step": 1172 |
| }, |
| { |
| "epoch": 0.6495016611295681, |
| "grad_norm": 0.40391892194747925, |
| "learning_rate": 3.299490956697653e-06, |
| "loss": 0.4709, |
| "step": 1173 |
| }, |
| { |
| "epoch": 0.6500553709856035, |
| "grad_norm": 0.3893533945083618, |
| "learning_rate": 3.2904039310863654e-06, |
| "loss": 0.4507, |
| "step": 1174 |
| }, |
| { |
| "epoch": 0.6506090808416389, |
| "grad_norm": 0.34658515453338623, |
| "learning_rate": 3.281323295262203e-06, |
| "loss": 0.4437, |
| "step": 1175 |
| }, |
| { |
| "epoch": 0.6511627906976745, |
| "grad_norm": 0.38739728927612305, |
| "learning_rate": 3.2722490831649568e-06, |
| "loss": 0.4797, |
| "step": 1176 |
| }, |
| { |
| "epoch": 0.6517165005537099, |
| "grad_norm": 0.38036441802978516, |
| "learning_rate": 3.2631813287104065e-06, |
| "loss": 0.4714, |
| "step": 1177 |
| }, |
| { |
| "epoch": 0.6522702104097453, |
| "grad_norm": 0.3743520677089691, |
| "learning_rate": 3.254120065790193e-06, |
| "loss": 0.4658, |
| "step": 1178 |
| }, |
| { |
| "epoch": 0.6528239202657807, |
| "grad_norm": 0.346492737531662, |
| "learning_rate": 3.2450653282717003e-06, |
| "loss": 0.4731, |
| "step": 1179 |
| }, |
| { |
| "epoch": 0.6533776301218162, |
| "grad_norm": 0.3950633704662323, |
| "learning_rate": 3.2360171499979186e-06, |
| "loss": 0.4668, |
| "step": 1180 |
| }, |
| { |
| "epoch": 0.6539313399778516, |
| "grad_norm": 0.36540117859840393, |
| "learning_rate": 3.226975564787322e-06, |
| "loss": 0.4664, |
| "step": 1181 |
| }, |
| { |
| "epoch": 0.654485049833887, |
| "grad_norm": 0.3785662055015564, |
| "learning_rate": 3.217940606433747e-06, |
| "loss": 0.4797, |
| "step": 1182 |
| }, |
| { |
| "epoch": 0.6550387596899225, |
| "grad_norm": 0.36079704761505127, |
| "learning_rate": 3.2089123087062574e-06, |
| "loss": 0.4706, |
| "step": 1183 |
| }, |
| { |
| "epoch": 0.655592469545958, |
| "grad_norm": 0.4160771667957306, |
| "learning_rate": 3.199890705349021e-06, |
| "loss": 0.4633, |
| "step": 1184 |
| }, |
| { |
| "epoch": 0.6561461794019934, |
| "grad_norm": 0.3826664984226227, |
| "learning_rate": 3.1908758300811902e-06, |
| "loss": 0.4436, |
| "step": 1185 |
| }, |
| { |
| "epoch": 0.6566998892580288, |
| "grad_norm": 0.3960643410682678, |
| "learning_rate": 3.181867716596765e-06, |
| "loss": 0.47, |
| "step": 1186 |
| }, |
| { |
| "epoch": 0.6572535991140642, |
| "grad_norm": 0.37728485465049744, |
| "learning_rate": 3.172866398564477e-06, |
| "loss": 0.462, |
| "step": 1187 |
| }, |
| { |
| "epoch": 0.6578073089700996, |
| "grad_norm": 0.3493827283382416, |
| "learning_rate": 3.1638719096276565e-06, |
| "loss": 0.4714, |
| "step": 1188 |
| }, |
| { |
| "epoch": 0.6583610188261351, |
| "grad_norm": 0.4141739308834076, |
| "learning_rate": 3.1548842834041083e-06, |
| "loss": 0.4814, |
| "step": 1189 |
| }, |
| { |
| "epoch": 0.6589147286821705, |
| "grad_norm": 0.3749389052391052, |
| "learning_rate": 3.1459035534859906e-06, |
| "loss": 0.4969, |
| "step": 1190 |
| }, |
| { |
| "epoch": 0.659468438538206, |
| "grad_norm": 0.3280302882194519, |
| "learning_rate": 3.1369297534396823e-06, |
| "loss": 0.4745, |
| "step": 1191 |
| }, |
| { |
| "epoch": 0.6600221483942414, |
| "grad_norm": 0.368042916059494, |
| "learning_rate": 3.1279629168056635e-06, |
| "loss": 0.4589, |
| "step": 1192 |
| }, |
| { |
| "epoch": 0.6605758582502769, |
| "grad_norm": 0.3801417052745819, |
| "learning_rate": 3.1190030770983894e-06, |
| "loss": 0.4485, |
| "step": 1193 |
| }, |
| { |
| "epoch": 0.6611295681063123, |
| "grad_norm": 0.36523929238319397, |
| "learning_rate": 3.1100502678061566e-06, |
| "loss": 0.4749, |
| "step": 1194 |
| }, |
| { |
| "epoch": 0.6616832779623477, |
| "grad_norm": 0.3876314163208008, |
| "learning_rate": 3.1011045223909954e-06, |
| "loss": 0.4407, |
| "step": 1195 |
| }, |
| { |
| "epoch": 0.6622369878183831, |
| "grad_norm": 0.3626687526702881, |
| "learning_rate": 3.092165874288525e-06, |
| "loss": 0.4769, |
| "step": 1196 |
| }, |
| { |
| "epoch": 0.6627906976744186, |
| "grad_norm": 0.38137850165367126, |
| "learning_rate": 3.08323435690784e-06, |
| "loss": 0.4747, |
| "step": 1197 |
| }, |
| { |
| "epoch": 0.6633444075304541, |
| "grad_norm": 0.3258206844329834, |
| "learning_rate": 3.0743100036313876e-06, |
| "loss": 0.4757, |
| "step": 1198 |
| }, |
| { |
| "epoch": 0.6638981173864895, |
| "grad_norm": 0.3893488049507141, |
| "learning_rate": 3.065392847814832e-06, |
| "loss": 0.4631, |
| "step": 1199 |
| }, |
| { |
| "epoch": 0.6644518272425249, |
| "grad_norm": 0.4116152226924896, |
| "learning_rate": 3.056482922786942e-06, |
| "loss": 0.4965, |
| "step": 1200 |
| }, |
| { |
| "epoch": 0.6650055370985604, |
| "grad_norm": 0.3937761187553406, |
| "learning_rate": 3.0475802618494564e-06, |
| "loss": 0.4452, |
| "step": 1201 |
| }, |
| { |
| "epoch": 0.6655592469545958, |
| "grad_norm": 0.38089385628700256, |
| "learning_rate": 3.038684898276964e-06, |
| "loss": 0.4912, |
| "step": 1202 |
| }, |
| { |
| "epoch": 0.6661129568106312, |
| "grad_norm": 0.36229807138442993, |
| "learning_rate": 3.0297968653167833e-06, |
| "loss": 0.4743, |
| "step": 1203 |
| }, |
| { |
| "epoch": 0.6666666666666666, |
| "grad_norm": 0.374954491853714, |
| "learning_rate": 3.0209161961888283e-06, |
| "loss": 0.4553, |
| "step": 1204 |
| }, |
| { |
| "epoch": 0.667220376522702, |
| "grad_norm": 0.39966997504234314, |
| "learning_rate": 3.0120429240854927e-06, |
| "loss": 0.4504, |
| "step": 1205 |
| }, |
| { |
| "epoch": 0.6677740863787376, |
| "grad_norm": 0.3550257682800293, |
| "learning_rate": 3.0031770821715233e-06, |
| "loss": 0.4563, |
| "step": 1206 |
| }, |
| { |
| "epoch": 0.668327796234773, |
| "grad_norm": 0.38138848543167114, |
| "learning_rate": 2.9943187035838937e-06, |
| "loss": 0.4545, |
| "step": 1207 |
| }, |
| { |
| "epoch": 0.6688815060908084, |
| "grad_norm": 0.3662111759185791, |
| "learning_rate": 2.9854678214316875e-06, |
| "loss": 0.4671, |
| "step": 1208 |
| }, |
| { |
| "epoch": 0.6694352159468439, |
| "grad_norm": 0.3831300735473633, |
| "learning_rate": 2.9766244687959643e-06, |
| "loss": 0.4651, |
| "step": 1209 |
| }, |
| { |
| "epoch": 0.6699889258028793, |
| "grad_norm": 0.3436860144138336, |
| "learning_rate": 2.967788678729641e-06, |
| "loss": 0.4455, |
| "step": 1210 |
| }, |
| { |
| "epoch": 0.6705426356589147, |
| "grad_norm": 0.40770378708839417, |
| "learning_rate": 2.9589604842573762e-06, |
| "loss": 0.4374, |
| "step": 1211 |
| }, |
| { |
| "epoch": 0.6710963455149501, |
| "grad_norm": 0.3695485591888428, |
| "learning_rate": 2.9501399183754297e-06, |
| "loss": 0.4902, |
| "step": 1212 |
| }, |
| { |
| "epoch": 0.6716500553709857, |
| "grad_norm": 0.3176988363265991, |
| "learning_rate": 2.941327014051554e-06, |
| "loss": 0.464, |
| "step": 1213 |
| }, |
| { |
| "epoch": 0.6722037652270211, |
| "grad_norm": 0.3670944273471832, |
| "learning_rate": 2.932521804224866e-06, |
| "loss": 0.4565, |
| "step": 1214 |
| }, |
| { |
| "epoch": 0.6727574750830565, |
| "grad_norm": 0.3454347252845764, |
| "learning_rate": 2.92372432180572e-06, |
| "loss": 0.471, |
| "step": 1215 |
| }, |
| { |
| "epoch": 0.6733111849390919, |
| "grad_norm": 0.36517590284347534, |
| "learning_rate": 2.914934599675594e-06, |
| "loss": 0.4668, |
| "step": 1216 |
| }, |
| { |
| "epoch": 0.6738648947951273, |
| "grad_norm": 0.3948805034160614, |
| "learning_rate": 2.906152670686957e-06, |
| "loss": 0.4839, |
| "step": 1217 |
| }, |
| { |
| "epoch": 0.6744186046511628, |
| "grad_norm": 0.39370593428611755, |
| "learning_rate": 2.897378567663147e-06, |
| "loss": 0.4666, |
| "step": 1218 |
| }, |
| { |
| "epoch": 0.6749723145071982, |
| "grad_norm": 0.33612701296806335, |
| "learning_rate": 2.8886123233982623e-06, |
| "loss": 0.4575, |
| "step": 1219 |
| }, |
| { |
| "epoch": 0.6755260243632336, |
| "grad_norm": 0.3216228485107422, |
| "learning_rate": 2.879853970657016e-06, |
| "loss": 0.4569, |
| "step": 1220 |
| }, |
| { |
| "epoch": 0.6760797342192691, |
| "grad_norm": 0.35591426491737366, |
| "learning_rate": 2.871103542174637e-06, |
| "loss": 0.4555, |
| "step": 1221 |
| }, |
| { |
| "epoch": 0.6766334440753046, |
| "grad_norm": 0.3582440912723541, |
| "learning_rate": 2.862361070656728e-06, |
| "loss": 0.4594, |
| "step": 1222 |
| }, |
| { |
| "epoch": 0.67718715393134, |
| "grad_norm": 0.34880882501602173, |
| "learning_rate": 2.853626588779154e-06, |
| "loss": 0.4193, |
| "step": 1223 |
| }, |
| { |
| "epoch": 0.6777408637873754, |
| "grad_norm": 0.3358144462108612, |
| "learning_rate": 2.844900129187922e-06, |
| "loss": 0.4843, |
| "step": 1224 |
| }, |
| { |
| "epoch": 0.6782945736434108, |
| "grad_norm": 0.3304033875465393, |
| "learning_rate": 2.836181724499051e-06, |
| "loss": 0.4528, |
| "step": 1225 |
| }, |
| { |
| "epoch": 0.6788482834994463, |
| "grad_norm": 0.33431103825569153, |
| "learning_rate": 2.827471407298451e-06, |
| "loss": 0.4593, |
| "step": 1226 |
| }, |
| { |
| "epoch": 0.6794019933554817, |
| "grad_norm": 0.3479402959346771, |
| "learning_rate": 2.8187692101418127e-06, |
| "loss": 0.4689, |
| "step": 1227 |
| }, |
| { |
| "epoch": 0.6799557032115172, |
| "grad_norm": 0.3931315243244171, |
| "learning_rate": 2.8100751655544716e-06, |
| "loss": 0.4749, |
| "step": 1228 |
| }, |
| { |
| "epoch": 0.6805094130675526, |
| "grad_norm": 0.36747539043426514, |
| "learning_rate": 2.8013893060312923e-06, |
| "loss": 0.4823, |
| "step": 1229 |
| }, |
| { |
| "epoch": 0.6810631229235881, |
| "grad_norm": 0.3489404618740082, |
| "learning_rate": 2.792711664036547e-06, |
| "loss": 0.467, |
| "step": 1230 |
| }, |
| { |
| "epoch": 0.6816168327796235, |
| "grad_norm": 0.318730890750885, |
| "learning_rate": 2.7840422720037943e-06, |
| "loss": 0.4383, |
| "step": 1231 |
| }, |
| { |
| "epoch": 0.6821705426356589, |
| "grad_norm": 0.37444746494293213, |
| "learning_rate": 2.7753811623357607e-06, |
| "loss": 0.4608, |
| "step": 1232 |
| }, |
| { |
| "epoch": 0.6827242524916943, |
| "grad_norm": 0.3400449752807617, |
| "learning_rate": 2.7667283674042132e-06, |
| "loss": 0.4401, |
| "step": 1233 |
| }, |
| { |
| "epoch": 0.6832779623477298, |
| "grad_norm": 0.3871501684188843, |
| "learning_rate": 2.7580839195498397e-06, |
| "loss": 0.4544, |
| "step": 1234 |
| }, |
| { |
| "epoch": 0.6838316722037652, |
| "grad_norm": 0.34454110264778137, |
| "learning_rate": 2.749447851082137e-06, |
| "loss": 0.4701, |
| "step": 1235 |
| }, |
| { |
| "epoch": 0.6843853820598007, |
| "grad_norm": 0.3465222716331482, |
| "learning_rate": 2.7408201942792755e-06, |
| "loss": 0.4815, |
| "step": 1236 |
| }, |
| { |
| "epoch": 0.6849390919158361, |
| "grad_norm": 0.3567723333835602, |
| "learning_rate": 2.732200981387993e-06, |
| "loss": 0.4555, |
| "step": 1237 |
| }, |
| { |
| "epoch": 0.6854928017718716, |
| "grad_norm": 0.30830711126327515, |
| "learning_rate": 2.723590244623462e-06, |
| "loss": 0.4738, |
| "step": 1238 |
| }, |
| { |
| "epoch": 0.686046511627907, |
| "grad_norm": 0.3953273296356201, |
| "learning_rate": 2.714988016169175e-06, |
| "loss": 0.4785, |
| "step": 1239 |
| }, |
| { |
| "epoch": 0.6866002214839424, |
| "grad_norm": 0.36648789048194885, |
| "learning_rate": 2.706394328176829e-06, |
| "loss": 0.4632, |
| "step": 1240 |
| }, |
| { |
| "epoch": 0.6871539313399778, |
| "grad_norm": 0.3629242479801178, |
| "learning_rate": 2.697809212766195e-06, |
| "loss": 0.4333, |
| "step": 1241 |
| }, |
| { |
| "epoch": 0.6877076411960132, |
| "grad_norm": 0.3562483489513397, |
| "learning_rate": 2.6892327020250013e-06, |
| "loss": 0.4497, |
| "step": 1242 |
| }, |
| { |
| "epoch": 0.6882613510520488, |
| "grad_norm": 0.3277631103992462, |
| "learning_rate": 2.6806648280088243e-06, |
| "loss": 0.4761, |
| "step": 1243 |
| }, |
| { |
| "epoch": 0.6888150609080842, |
| "grad_norm": 0.3348289132118225, |
| "learning_rate": 2.67210562274095e-06, |
| "loss": 0.4475, |
| "step": 1244 |
| }, |
| { |
| "epoch": 0.6893687707641196, |
| "grad_norm": 0.34823429584503174, |
| "learning_rate": 2.663555118212272e-06, |
| "loss": 0.4468, |
| "step": 1245 |
| }, |
| { |
| "epoch": 0.689922480620155, |
| "grad_norm": 0.362206369638443, |
| "learning_rate": 2.655013346381158e-06, |
| "loss": 0.4937, |
| "step": 1246 |
| }, |
| { |
| "epoch": 0.6904761904761905, |
| "grad_norm": 0.36554214358329773, |
| "learning_rate": 2.646480339173337e-06, |
| "loss": 0.4469, |
| "step": 1247 |
| }, |
| { |
| "epoch": 0.6910299003322259, |
| "grad_norm": 0.35671401023864746, |
| "learning_rate": 2.6379561284817856e-06, |
| "loss": 0.4761, |
| "step": 1248 |
| }, |
| { |
| "epoch": 0.6915836101882613, |
| "grad_norm": 0.38154640793800354, |
| "learning_rate": 2.6294407461665927e-06, |
| "loss": 0.4695, |
| "step": 1249 |
| }, |
| { |
| "epoch": 0.6921373200442967, |
| "grad_norm": 0.3522584140300751, |
| "learning_rate": 2.620934224054861e-06, |
| "loss": 0.4794, |
| "step": 1250 |
| }, |
| { |
| "epoch": 0.6926910299003323, |
| "grad_norm": 0.3220556974411011, |
| "learning_rate": 2.612436593940568e-06, |
| "loss": 0.4845, |
| "step": 1251 |
| }, |
| { |
| "epoch": 0.6932447397563677, |
| "grad_norm": 0.3791353702545166, |
| "learning_rate": 2.6039478875844603e-06, |
| "loss": 0.4887, |
| "step": 1252 |
| }, |
| { |
| "epoch": 0.6937984496124031, |
| "grad_norm": 0.34861162304878235, |
| "learning_rate": 2.595468136713934e-06, |
| "loss": 0.4857, |
| "step": 1253 |
| }, |
| { |
| "epoch": 0.6943521594684385, |
| "grad_norm": 0.3978254199028015, |
| "learning_rate": 2.586997373022908e-06, |
| "loss": 0.4762, |
| "step": 1254 |
| }, |
| { |
| "epoch": 0.694905869324474, |
| "grad_norm": 0.375564843416214, |
| "learning_rate": 2.578535628171711e-06, |
| "loss": 0.4591, |
| "step": 1255 |
| }, |
| { |
| "epoch": 0.6954595791805094, |
| "grad_norm": 0.4125734567642212, |
| "learning_rate": 2.57008293378697e-06, |
| "loss": 0.453, |
| "step": 1256 |
| }, |
| { |
| "epoch": 0.6960132890365448, |
| "grad_norm": 0.3337445855140686, |
| "learning_rate": 2.561639321461476e-06, |
| "loss": 0.4481, |
| "step": 1257 |
| }, |
| { |
| "epoch": 0.6965669988925803, |
| "grad_norm": 0.38525938987731934, |
| "learning_rate": 2.5532048227540773e-06, |
| "loss": 0.4791, |
| "step": 1258 |
| }, |
| { |
| "epoch": 0.6971207087486158, |
| "grad_norm": 0.35480937361717224, |
| "learning_rate": 2.5447794691895657e-06, |
| "loss": 0.4436, |
| "step": 1259 |
| }, |
| { |
| "epoch": 0.6976744186046512, |
| "grad_norm": 0.34399178624153137, |
| "learning_rate": 2.536363292258543e-06, |
| "loss": 0.4605, |
| "step": 1260 |
| }, |
| { |
| "epoch": 0.6982281284606866, |
| "grad_norm": 0.3646879196166992, |
| "learning_rate": 2.5279563234173177e-06, |
| "loss": 0.4759, |
| "step": 1261 |
| }, |
| { |
| "epoch": 0.698781838316722, |
| "grad_norm": 0.36042407155036926, |
| "learning_rate": 2.519558594087778e-06, |
| "loss": 0.4642, |
| "step": 1262 |
| }, |
| { |
| "epoch": 0.6993355481727574, |
| "grad_norm": 0.35599207878112793, |
| "learning_rate": 2.511170135657286e-06, |
| "loss": 0.4768, |
| "step": 1263 |
| }, |
| { |
| "epoch": 0.6998892580287929, |
| "grad_norm": 0.3732999265193939, |
| "learning_rate": 2.5027909794785452e-06, |
| "loss": 0.4819, |
| "step": 1264 |
| }, |
| { |
| "epoch": 0.7004429678848284, |
| "grad_norm": 0.37701067328453064, |
| "learning_rate": 2.494421156869493e-06, |
| "loss": 0.4843, |
| "step": 1265 |
| }, |
| { |
| "epoch": 0.7009966777408638, |
| "grad_norm": 0.35381215810775757, |
| "learning_rate": 2.4860606991131857e-06, |
| "loss": 0.4837, |
| "step": 1266 |
| }, |
| { |
| "epoch": 0.7015503875968992, |
| "grad_norm": 0.3333852291107178, |
| "learning_rate": 2.4777096374576724e-06, |
| "loss": 0.4719, |
| "step": 1267 |
| }, |
| { |
| "epoch": 0.7021040974529347, |
| "grad_norm": 0.3859536647796631, |
| "learning_rate": 2.4693680031158844e-06, |
| "loss": 0.479, |
| "step": 1268 |
| }, |
| { |
| "epoch": 0.7026578073089701, |
| "grad_norm": 0.4252912402153015, |
| "learning_rate": 2.4610358272655214e-06, |
| "loss": 0.4704, |
| "step": 1269 |
| }, |
| { |
| "epoch": 0.7032115171650055, |
| "grad_norm": 0.37061989307403564, |
| "learning_rate": 2.4527131410489267e-06, |
| "loss": 0.4977, |
| "step": 1270 |
| }, |
| { |
| "epoch": 0.7037652270210409, |
| "grad_norm": 0.32284095883369446, |
| "learning_rate": 2.444399975572974e-06, |
| "loss": 0.4469, |
| "step": 1271 |
| }, |
| { |
| "epoch": 0.7043189368770764, |
| "grad_norm": 0.3401859700679779, |
| "learning_rate": 2.4360963619089584e-06, |
| "loss": 0.4563, |
| "step": 1272 |
| }, |
| { |
| "epoch": 0.7048726467331119, |
| "grad_norm": 0.3887861371040344, |
| "learning_rate": 2.4278023310924676e-06, |
| "loss": 0.4489, |
| "step": 1273 |
| }, |
| { |
| "epoch": 0.7054263565891473, |
| "grad_norm": 0.3525770902633667, |
| "learning_rate": 2.4195179141232787e-06, |
| "loss": 0.4727, |
| "step": 1274 |
| }, |
| { |
| "epoch": 0.7059800664451827, |
| "grad_norm": 0.3447239398956299, |
| "learning_rate": 2.4112431419652305e-06, |
| "loss": 0.4821, |
| "step": 1275 |
| }, |
| { |
| "epoch": 0.7065337763012182, |
| "grad_norm": 0.37898221611976624, |
| "learning_rate": 2.402978045546114e-06, |
| "loss": 0.4754, |
| "step": 1276 |
| }, |
| { |
| "epoch": 0.7070874861572536, |
| "grad_norm": 0.3542633056640625, |
| "learning_rate": 2.3947226557575615e-06, |
| "loss": 0.5011, |
| "step": 1277 |
| }, |
| { |
| "epoch": 0.707641196013289, |
| "grad_norm": 0.33939436078071594, |
| "learning_rate": 2.3864770034549186e-06, |
| "loss": 0.4461, |
| "step": 1278 |
| }, |
| { |
| "epoch": 0.7081949058693244, |
| "grad_norm": 0.34798309206962585, |
| "learning_rate": 2.3782411194571425e-06, |
| "loss": 0.4591, |
| "step": 1279 |
| }, |
| { |
| "epoch": 0.70874861572536, |
| "grad_norm": 0.3668760061264038, |
| "learning_rate": 2.3700150345466754e-06, |
| "loss": 0.4584, |
| "step": 1280 |
| }, |
| { |
| "epoch": 0.7093023255813954, |
| "grad_norm": 0.37262019515037537, |
| "learning_rate": 2.3617987794693358e-06, |
| "loss": 0.478, |
| "step": 1281 |
| }, |
| { |
| "epoch": 0.7098560354374308, |
| "grad_norm": 0.3444177508354187, |
| "learning_rate": 2.353592384934206e-06, |
| "loss": 0.488, |
| "step": 1282 |
| }, |
| { |
| "epoch": 0.7104097452934662, |
| "grad_norm": 0.3594864010810852, |
| "learning_rate": 2.345395881613507e-06, |
| "loss": 0.4681, |
| "step": 1283 |
| }, |
| { |
| "epoch": 0.7109634551495017, |
| "grad_norm": 0.3391314744949341, |
| "learning_rate": 2.337209300142494e-06, |
| "loss": 0.4594, |
| "step": 1284 |
| }, |
| { |
| "epoch": 0.7115171650055371, |
| "grad_norm": 0.35806921124458313, |
| "learning_rate": 2.3290326711193407e-06, |
| "loss": 0.4779, |
| "step": 1285 |
| }, |
| { |
| "epoch": 0.7120708748615725, |
| "grad_norm": 0.39370259642601013, |
| "learning_rate": 2.320866025105016e-06, |
| "loss": 0.4686, |
| "step": 1286 |
| }, |
| { |
| "epoch": 0.7126245847176079, |
| "grad_norm": 0.3590087592601776, |
| "learning_rate": 2.3127093926231842e-06, |
| "loss": 0.4743, |
| "step": 1287 |
| }, |
| { |
| "epoch": 0.7131782945736435, |
| "grad_norm": 0.3464341163635254, |
| "learning_rate": 2.304562804160077e-06, |
| "loss": 0.4739, |
| "step": 1288 |
| }, |
| { |
| "epoch": 0.7137320044296789, |
| "grad_norm": 0.31786057353019714, |
| "learning_rate": 2.2964262901643875e-06, |
| "loss": 0.4318, |
| "step": 1289 |
| }, |
| { |
| "epoch": 0.7142857142857143, |
| "grad_norm": 0.3480754792690277, |
| "learning_rate": 2.288299881047153e-06, |
| "loss": 0.4591, |
| "step": 1290 |
| }, |
| { |
| "epoch": 0.7148394241417497, |
| "grad_norm": 0.344590961933136, |
| "learning_rate": 2.2801836071816476e-06, |
| "loss": 0.458, |
| "step": 1291 |
| }, |
| { |
| "epoch": 0.7153931339977851, |
| "grad_norm": 0.3685104250907898, |
| "learning_rate": 2.2720774989032583e-06, |
| "loss": 0.4689, |
| "step": 1292 |
| }, |
| { |
| "epoch": 0.7159468438538206, |
| "grad_norm": 0.3524629473686218, |
| "learning_rate": 2.263981586509381e-06, |
| "loss": 0.4528, |
| "step": 1293 |
| }, |
| { |
| "epoch": 0.716500553709856, |
| "grad_norm": 0.31772321462631226, |
| "learning_rate": 2.255895900259298e-06, |
| "loss": 0.4476, |
| "step": 1294 |
| }, |
| { |
| "epoch": 0.7170542635658915, |
| "grad_norm": 0.3279765844345093, |
| "learning_rate": 2.2478204703740796e-06, |
| "loss": 0.4553, |
| "step": 1295 |
| }, |
| { |
| "epoch": 0.717607973421927, |
| "grad_norm": 0.3653477430343628, |
| "learning_rate": 2.2397553270364546e-06, |
| "loss": 0.4362, |
| "step": 1296 |
| }, |
| { |
| "epoch": 0.7181616832779624, |
| "grad_norm": 0.3631284534931183, |
| "learning_rate": 2.2317005003907044e-06, |
| "loss": 0.4776, |
| "step": 1297 |
| }, |
| { |
| "epoch": 0.7187153931339978, |
| "grad_norm": 0.33382081985473633, |
| "learning_rate": 2.2236560205425565e-06, |
| "loss": 0.4563, |
| "step": 1298 |
| }, |
| { |
| "epoch": 0.7192691029900332, |
| "grad_norm": 0.3235117793083191, |
| "learning_rate": 2.2156219175590623e-06, |
| "loss": 0.4498, |
| "step": 1299 |
| }, |
| { |
| "epoch": 0.7198228128460686, |
| "grad_norm": 0.3693845570087433, |
| "learning_rate": 2.2075982214684855e-06, |
| "loss": 0.4643, |
| "step": 1300 |
| }, |
| { |
| "epoch": 0.7203765227021041, |
| "grad_norm": 0.36044248938560486, |
| "learning_rate": 2.1995849622602017e-06, |
| "loss": 0.4705, |
| "step": 1301 |
| }, |
| { |
| "epoch": 0.7209302325581395, |
| "grad_norm": 0.3060952425003052, |
| "learning_rate": 2.1915821698845687e-06, |
| "loss": 0.443, |
| "step": 1302 |
| }, |
| { |
| "epoch": 0.721483942414175, |
| "grad_norm": 0.365112841129303, |
| "learning_rate": 2.18358987425283e-06, |
| "loss": 0.4414, |
| "step": 1303 |
| }, |
| { |
| "epoch": 0.7220376522702104, |
| "grad_norm": 0.35041436553001404, |
| "learning_rate": 2.175608105236993e-06, |
| "loss": 0.4599, |
| "step": 1304 |
| }, |
| { |
| "epoch": 0.7225913621262459, |
| "grad_norm": 0.3773949444293976, |
| "learning_rate": 2.16763689266972e-06, |
| "loss": 0.4568, |
| "step": 1305 |
| }, |
| { |
| "epoch": 0.7231450719822813, |
| "grad_norm": 0.3188624978065491, |
| "learning_rate": 2.159676266344222e-06, |
| "loss": 0.4283, |
| "step": 1306 |
| }, |
| { |
| "epoch": 0.7236987818383167, |
| "grad_norm": 0.35047447681427, |
| "learning_rate": 2.151726256014136e-06, |
| "loss": 0.4658, |
| "step": 1307 |
| }, |
| { |
| "epoch": 0.7242524916943521, |
| "grad_norm": 0.33076420426368713, |
| "learning_rate": 2.143786891393429e-06, |
| "loss": 0.457, |
| "step": 1308 |
| }, |
| { |
| "epoch": 0.7248062015503876, |
| "grad_norm": 0.32635653018951416, |
| "learning_rate": 2.1358582021562724e-06, |
| "loss": 0.4376, |
| "step": 1309 |
| }, |
| { |
| "epoch": 0.7253599114064231, |
| "grad_norm": 0.35420915484428406, |
| "learning_rate": 2.1279402179369363e-06, |
| "loss": 0.4753, |
| "step": 1310 |
| }, |
| { |
| "epoch": 0.7259136212624585, |
| "grad_norm": 0.330625981092453, |
| "learning_rate": 2.120032968329687e-06, |
| "loss": 0.4578, |
| "step": 1311 |
| }, |
| { |
| "epoch": 0.7264673311184939, |
| "grad_norm": 0.3413280248641968, |
| "learning_rate": 2.112136482888663e-06, |
| "loss": 0.483, |
| "step": 1312 |
| }, |
| { |
| "epoch": 0.7270210409745294, |
| "grad_norm": 0.32706838846206665, |
| "learning_rate": 2.10425079112777e-06, |
| "loss": 0.4465, |
| "step": 1313 |
| }, |
| { |
| "epoch": 0.7275747508305648, |
| "grad_norm": 0.300112247467041, |
| "learning_rate": 2.0963759225205764e-06, |
| "loss": 0.4377, |
| "step": 1314 |
| }, |
| { |
| "epoch": 0.7281284606866002, |
| "grad_norm": 0.3354182243347168, |
| "learning_rate": 2.088511906500193e-06, |
| "loss": 0.4747, |
| "step": 1315 |
| }, |
| { |
| "epoch": 0.7286821705426356, |
| "grad_norm": 0.3380527198314667, |
| "learning_rate": 2.0806587724591725e-06, |
| "loss": 0.4821, |
| "step": 1316 |
| }, |
| { |
| "epoch": 0.729235880398671, |
| "grad_norm": 0.35922887921333313, |
| "learning_rate": 2.0728165497493913e-06, |
| "loss": 0.4804, |
| "step": 1317 |
| }, |
| { |
| "epoch": 0.7297895902547066, |
| "grad_norm": 0.3212301433086395, |
| "learning_rate": 2.0649852676819426e-06, |
| "loss": 0.4503, |
| "step": 1318 |
| }, |
| { |
| "epoch": 0.730343300110742, |
| "grad_norm": 0.3472149074077606, |
| "learning_rate": 2.0571649555270345e-06, |
| "loss": 0.4654, |
| "step": 1319 |
| }, |
| { |
| "epoch": 0.7308970099667774, |
| "grad_norm": 0.3622226119041443, |
| "learning_rate": 2.049355642513868e-06, |
| "loss": 0.4337, |
| "step": 1320 |
| }, |
| { |
| "epoch": 0.7314507198228128, |
| "grad_norm": 0.353891521692276, |
| "learning_rate": 2.0415573578305343e-06, |
| "loss": 0.4485, |
| "step": 1321 |
| }, |
| { |
| "epoch": 0.7320044296788483, |
| "grad_norm": 0.3391493558883667, |
| "learning_rate": 2.0337701306239048e-06, |
| "loss": 0.4843, |
| "step": 1322 |
| }, |
| { |
| "epoch": 0.7325581395348837, |
| "grad_norm": 0.32824766635894775, |
| "learning_rate": 2.025993989999528e-06, |
| "loss": 0.4853, |
| "step": 1323 |
| }, |
| { |
| "epoch": 0.7331118493909191, |
| "grad_norm": 0.33276161551475525, |
| "learning_rate": 2.0182289650215082e-06, |
| "loss": 0.4489, |
| "step": 1324 |
| }, |
| { |
| "epoch": 0.7336655592469546, |
| "grad_norm": 0.3398074209690094, |
| "learning_rate": 2.0104750847124075e-06, |
| "loss": 0.4525, |
| "step": 1325 |
| }, |
| { |
| "epoch": 0.7342192691029901, |
| "grad_norm": 0.34588155150413513, |
| "learning_rate": 2.0027323780531312e-06, |
| "loss": 0.4582, |
| "step": 1326 |
| }, |
| { |
| "epoch": 0.7347729789590255, |
| "grad_norm": 0.333475798368454, |
| "learning_rate": 1.995000873982826e-06, |
| "loss": 0.4527, |
| "step": 1327 |
| }, |
| { |
| "epoch": 0.7353266888150609, |
| "grad_norm": 0.3277673125267029, |
| "learning_rate": 1.9872806013987626e-06, |
| "loss": 0.453, |
| "step": 1328 |
| }, |
| { |
| "epoch": 0.7358803986710963, |
| "grad_norm": 0.329764187335968, |
| "learning_rate": 1.9795715891562393e-06, |
| "loss": 0.4547, |
| "step": 1329 |
| }, |
| { |
| "epoch": 0.7364341085271318, |
| "grad_norm": 0.3777225911617279, |
| "learning_rate": 1.9718738660684627e-06, |
| "loss": 0.4953, |
| "step": 1330 |
| }, |
| { |
| "epoch": 0.7369878183831672, |
| "grad_norm": 0.35038667917251587, |
| "learning_rate": 1.9641874609064443e-06, |
| "loss": 0.4764, |
| "step": 1331 |
| }, |
| { |
| "epoch": 0.7375415282392026, |
| "grad_norm": 0.3255554437637329, |
| "learning_rate": 1.956512402398899e-06, |
| "loss": 0.4557, |
| "step": 1332 |
| }, |
| { |
| "epoch": 0.7380952380952381, |
| "grad_norm": 0.32248249650001526, |
| "learning_rate": 1.948848719232128e-06, |
| "loss": 0.4455, |
| "step": 1333 |
| }, |
| { |
| "epoch": 0.7386489479512736, |
| "grad_norm": 0.34824302792549133, |
| "learning_rate": 1.941196440049916e-06, |
| "loss": 0.4608, |
| "step": 1334 |
| }, |
| { |
| "epoch": 0.739202657807309, |
| "grad_norm": 0.36187997460365295, |
| "learning_rate": 1.9335555934534283e-06, |
| "loss": 0.4614, |
| "step": 1335 |
| }, |
| { |
| "epoch": 0.7397563676633444, |
| "grad_norm": 0.32338353991508484, |
| "learning_rate": 1.9259262080010938e-06, |
| "loss": 0.4761, |
| "step": 1336 |
| }, |
| { |
| "epoch": 0.7403100775193798, |
| "grad_norm": 0.32866108417510986, |
| "learning_rate": 1.918308312208511e-06, |
| "loss": 0.461, |
| "step": 1337 |
| }, |
| { |
| "epoch": 0.7408637873754153, |
| "grad_norm": 0.3138168454170227, |
| "learning_rate": 1.910701934548329e-06, |
| "loss": 0.4486, |
| "step": 1338 |
| }, |
| { |
| "epoch": 0.7414174972314507, |
| "grad_norm": 0.34086328744888306, |
| "learning_rate": 1.9031071034501475e-06, |
| "loss": 0.4873, |
| "step": 1339 |
| }, |
| { |
| "epoch": 0.7419712070874862, |
| "grad_norm": 0.34671589732170105, |
| "learning_rate": 1.895523847300414e-06, |
| "loss": 0.4529, |
| "step": 1340 |
| }, |
| { |
| "epoch": 0.7425249169435216, |
| "grad_norm": 0.34147655963897705, |
| "learning_rate": 1.887952194442309e-06, |
| "loss": 0.4652, |
| "step": 1341 |
| }, |
| { |
| "epoch": 0.743078626799557, |
| "grad_norm": 0.32080572843551636, |
| "learning_rate": 1.8803921731756447e-06, |
| "loss": 0.4613, |
| "step": 1342 |
| }, |
| { |
| "epoch": 0.7436323366555925, |
| "grad_norm": 0.3686572015285492, |
| "learning_rate": 1.8728438117567626e-06, |
| "loss": 0.48, |
| "step": 1343 |
| }, |
| { |
| "epoch": 0.7441860465116279, |
| "grad_norm": 0.35076168179512024, |
| "learning_rate": 1.86530713839842e-06, |
| "loss": 0.4854, |
| "step": 1344 |
| }, |
| { |
| "epoch": 0.7447397563676633, |
| "grad_norm": 0.3071606457233429, |
| "learning_rate": 1.8577821812696939e-06, |
| "loss": 0.4453, |
| "step": 1345 |
| }, |
| { |
| "epoch": 0.7452934662236987, |
| "grad_norm": 0.36736711859703064, |
| "learning_rate": 1.8502689684958664e-06, |
| "loss": 0.4646, |
| "step": 1346 |
| }, |
| { |
| "epoch": 0.7458471760797342, |
| "grad_norm": 0.3428582549095154, |
| "learning_rate": 1.8427675281583229e-06, |
| "loss": 0.4709, |
| "step": 1347 |
| }, |
| { |
| "epoch": 0.7464008859357697, |
| "grad_norm": 0.3570258617401123, |
| "learning_rate": 1.835277888294455e-06, |
| "loss": 0.4632, |
| "step": 1348 |
| }, |
| { |
| "epoch": 0.7469545957918051, |
| "grad_norm": 0.3332633078098297, |
| "learning_rate": 1.827800076897542e-06, |
| "loss": 0.4798, |
| "step": 1349 |
| }, |
| { |
| "epoch": 0.7475083056478405, |
| "grad_norm": 0.31923386454582214, |
| "learning_rate": 1.8203341219166537e-06, |
| "loss": 0.449, |
| "step": 1350 |
| }, |
| { |
| "epoch": 0.748062015503876, |
| "grad_norm": 0.3124280273914337, |
| "learning_rate": 1.8128800512565514e-06, |
| "loss": 0.4416, |
| "step": 1351 |
| }, |
| { |
| "epoch": 0.7486157253599114, |
| "grad_norm": 0.38136282563209534, |
| "learning_rate": 1.8054378927775713e-06, |
| "loss": 0.471, |
| "step": 1352 |
| }, |
| { |
| "epoch": 0.7491694352159468, |
| "grad_norm": 0.3354772925376892, |
| "learning_rate": 1.7980076742955282e-06, |
| "loss": 0.4869, |
| "step": 1353 |
| }, |
| { |
| "epoch": 0.7497231450719822, |
| "grad_norm": 0.35330212116241455, |
| "learning_rate": 1.7905894235816096e-06, |
| "loss": 0.4727, |
| "step": 1354 |
| }, |
| { |
| "epoch": 0.7502768549280178, |
| "grad_norm": 0.34706899523735046, |
| "learning_rate": 1.7831831683622758e-06, |
| "loss": 0.4707, |
| "step": 1355 |
| }, |
| { |
| "epoch": 0.7508305647840532, |
| "grad_norm": 0.3170265257358551, |
| "learning_rate": 1.7757889363191484e-06, |
| "loss": 0.4745, |
| "step": 1356 |
| }, |
| { |
| "epoch": 0.7513842746400886, |
| "grad_norm": 0.3163388669490814, |
| "learning_rate": 1.768406755088911e-06, |
| "loss": 0.4739, |
| "step": 1357 |
| }, |
| { |
| "epoch": 0.751937984496124, |
| "grad_norm": 0.39844390749931335, |
| "learning_rate": 1.7610366522632122e-06, |
| "loss": 0.4722, |
| "step": 1358 |
| }, |
| { |
| "epoch": 0.7524916943521595, |
| "grad_norm": 0.3498047888278961, |
| "learning_rate": 1.7536786553885488e-06, |
| "loss": 0.4423, |
| "step": 1359 |
| }, |
| { |
| "epoch": 0.7530454042081949, |
| "grad_norm": 0.36837103962898254, |
| "learning_rate": 1.7463327919661732e-06, |
| "loss": 0.48, |
| "step": 1360 |
| }, |
| { |
| "epoch": 0.7535991140642303, |
| "grad_norm": 0.3232395648956299, |
| "learning_rate": 1.738999089451991e-06, |
| "loss": 0.4858, |
| "step": 1361 |
| }, |
| { |
| "epoch": 0.7541528239202658, |
| "grad_norm": 0.37191465497016907, |
| "learning_rate": 1.7316775752564512e-06, |
| "loss": 0.4691, |
| "step": 1362 |
| }, |
| { |
| "epoch": 0.7547065337763013, |
| "grad_norm": 0.36526915431022644, |
| "learning_rate": 1.7243682767444463e-06, |
| "loss": 0.4562, |
| "step": 1363 |
| }, |
| { |
| "epoch": 0.7552602436323367, |
| "grad_norm": 0.3136215806007385, |
| "learning_rate": 1.7170712212352187e-06, |
| "loss": 0.4571, |
| "step": 1364 |
| }, |
| { |
| "epoch": 0.7558139534883721, |
| "grad_norm": 0.32871949672698975, |
| "learning_rate": 1.7097864360022426e-06, |
| "loss": 0.446, |
| "step": 1365 |
| }, |
| { |
| "epoch": 0.7563676633444075, |
| "grad_norm": 0.37790757417678833, |
| "learning_rate": 1.7025139482731385e-06, |
| "loss": 0.4768, |
| "step": 1366 |
| }, |
| { |
| "epoch": 0.756921373200443, |
| "grad_norm": 0.3020164370536804, |
| "learning_rate": 1.695253785229558e-06, |
| "loss": 0.45, |
| "step": 1367 |
| }, |
| { |
| "epoch": 0.7574750830564784, |
| "grad_norm": 0.3682115375995636, |
| "learning_rate": 1.6880059740070897e-06, |
| "loss": 0.4685, |
| "step": 1368 |
| }, |
| { |
| "epoch": 0.7580287929125138, |
| "grad_norm": 0.36990371346473694, |
| "learning_rate": 1.6807705416951587e-06, |
| "loss": 0.46, |
| "step": 1369 |
| }, |
| { |
| "epoch": 0.7585825027685493, |
| "grad_norm": 0.3583049774169922, |
| "learning_rate": 1.673547515336919e-06, |
| "loss": 0.4788, |
| "step": 1370 |
| }, |
| { |
| "epoch": 0.7591362126245847, |
| "grad_norm": 0.3231860399246216, |
| "learning_rate": 1.6663369219291558e-06, |
| "loss": 0.4708, |
| "step": 1371 |
| }, |
| { |
| "epoch": 0.7596899224806202, |
| "grad_norm": 0.32504814863204956, |
| "learning_rate": 1.6591387884221905e-06, |
| "loss": 0.4601, |
| "step": 1372 |
| }, |
| { |
| "epoch": 0.7602436323366556, |
| "grad_norm": 0.3197862207889557, |
| "learning_rate": 1.651953141719767e-06, |
| "loss": 0.4721, |
| "step": 1373 |
| }, |
| { |
| "epoch": 0.760797342192691, |
| "grad_norm": 0.3388617932796478, |
| "learning_rate": 1.6447800086789651e-06, |
| "loss": 0.4727, |
| "step": 1374 |
| }, |
| { |
| "epoch": 0.7613510520487264, |
| "grad_norm": 0.357910692691803, |
| "learning_rate": 1.637619416110089e-06, |
| "loss": 0.4763, |
| "step": 1375 |
| }, |
| { |
| "epoch": 0.7619047619047619, |
| "grad_norm": 0.32476869225502014, |
| "learning_rate": 1.6304713907765713e-06, |
| "loss": 0.4831, |
| "step": 1376 |
| }, |
| { |
| "epoch": 0.7624584717607974, |
| "grad_norm": 0.33310264348983765, |
| "learning_rate": 1.6233359593948777e-06, |
| "loss": 0.4373, |
| "step": 1377 |
| }, |
| { |
| "epoch": 0.7630121816168328, |
| "grad_norm": 0.32121679186820984, |
| "learning_rate": 1.6162131486344e-06, |
| "loss": 0.4924, |
| "step": 1378 |
| }, |
| { |
| "epoch": 0.7635658914728682, |
| "grad_norm": 0.38050976395606995, |
| "learning_rate": 1.6091029851173567e-06, |
| "loss": 0.4614, |
| "step": 1379 |
| }, |
| { |
| "epoch": 0.7641196013289037, |
| "grad_norm": 0.3336823880672455, |
| "learning_rate": 1.602005495418702e-06, |
| "loss": 0.4379, |
| "step": 1380 |
| }, |
| { |
| "epoch": 0.7646733111849391, |
| "grad_norm": 0.3727882206439972, |
| "learning_rate": 1.5949207060660138e-06, |
| "loss": 0.4645, |
| "step": 1381 |
| }, |
| { |
| "epoch": 0.7652270210409745, |
| "grad_norm": 0.3008807897567749, |
| "learning_rate": 1.587848643539407e-06, |
| "loss": 0.442, |
| "step": 1382 |
| }, |
| { |
| "epoch": 0.7657807308970099, |
| "grad_norm": 0.31580933928489685, |
| "learning_rate": 1.5807893342714247e-06, |
| "loss": 0.4631, |
| "step": 1383 |
| }, |
| { |
| "epoch": 0.7663344407530454, |
| "grad_norm": 0.32674816250801086, |
| "learning_rate": 1.5737428046469455e-06, |
| "loss": 0.4694, |
| "step": 1384 |
| }, |
| { |
| "epoch": 0.7668881506090809, |
| "grad_norm": 0.34736204147338867, |
| "learning_rate": 1.56670908100308e-06, |
| "loss": 0.4578, |
| "step": 1385 |
| }, |
| { |
| "epoch": 0.7674418604651163, |
| "grad_norm": 0.3090921938419342, |
| "learning_rate": 1.55968818962908e-06, |
| "loss": 0.4608, |
| "step": 1386 |
| }, |
| { |
| "epoch": 0.7679955703211517, |
| "grad_norm": 0.3129696249961853, |
| "learning_rate": 1.5526801567662315e-06, |
| "loss": 0.4572, |
| "step": 1387 |
| }, |
| { |
| "epoch": 0.7685492801771872, |
| "grad_norm": 0.3233101963996887, |
| "learning_rate": 1.5456850086077613e-06, |
| "loss": 0.4508, |
| "step": 1388 |
| }, |
| { |
| "epoch": 0.7691029900332226, |
| "grad_norm": 0.33598968386650085, |
| "learning_rate": 1.5387027712987368e-06, |
| "loss": 0.4512, |
| "step": 1389 |
| }, |
| { |
| "epoch": 0.769656699889258, |
| "grad_norm": 0.32115107774734497, |
| "learning_rate": 1.531733470935976e-06, |
| "loss": 0.4532, |
| "step": 1390 |
| }, |
| { |
| "epoch": 0.7702104097452934, |
| "grad_norm": 0.34383609890937805, |
| "learning_rate": 1.5247771335679372e-06, |
| "loss": 0.4579, |
| "step": 1391 |
| }, |
| { |
| "epoch": 0.770764119601329, |
| "grad_norm": 0.32946261763572693, |
| "learning_rate": 1.517833785194629e-06, |
| "loss": 0.4668, |
| "step": 1392 |
| }, |
| { |
| "epoch": 0.7713178294573644, |
| "grad_norm": 0.31901848316192627, |
| "learning_rate": 1.5109034517675164e-06, |
| "loss": 0.4625, |
| "step": 1393 |
| }, |
| { |
| "epoch": 0.7718715393133998, |
| "grad_norm": 0.33310940861701965, |
| "learning_rate": 1.5039861591894146e-06, |
| "loss": 0.4823, |
| "step": 1394 |
| }, |
| { |
| "epoch": 0.7724252491694352, |
| "grad_norm": 0.32345050573349, |
| "learning_rate": 1.4970819333144026e-06, |
| "loss": 0.4592, |
| "step": 1395 |
| }, |
| { |
| "epoch": 0.7729789590254706, |
| "grad_norm": 0.3341536819934845, |
| "learning_rate": 1.4901907999477167e-06, |
| "loss": 0.4517, |
| "step": 1396 |
| }, |
| { |
| "epoch": 0.7735326688815061, |
| "grad_norm": 0.3557744026184082, |
| "learning_rate": 1.4833127848456597e-06, |
| "loss": 0.4695, |
| "step": 1397 |
| }, |
| { |
| "epoch": 0.7740863787375415, |
| "grad_norm": 0.32580217719078064, |
| "learning_rate": 1.4764479137155063e-06, |
| "loss": 0.4751, |
| "step": 1398 |
| }, |
| { |
| "epoch": 0.7746400885935769, |
| "grad_norm": 0.31983494758605957, |
| "learning_rate": 1.4695962122154023e-06, |
| "loss": 0.4472, |
| "step": 1399 |
| }, |
| { |
| "epoch": 0.7751937984496124, |
| "grad_norm": 0.2909253239631653, |
| "learning_rate": 1.4627577059542675e-06, |
| "loss": 0.4353, |
| "step": 1400 |
| }, |
| { |
| "epoch": 0.7757475083056479, |
| "grad_norm": 0.3315528929233551, |
| "learning_rate": 1.4559324204917102e-06, |
| "loss": 0.463, |
| "step": 1401 |
| }, |
| { |
| "epoch": 0.7763012181616833, |
| "grad_norm": 0.31947755813598633, |
| "learning_rate": 1.4491203813379174e-06, |
| "loss": 0.4607, |
| "step": 1402 |
| }, |
| { |
| "epoch": 0.7768549280177187, |
| "grad_norm": 0.3263440430164337, |
| "learning_rate": 1.4423216139535735e-06, |
| "loss": 0.4631, |
| "step": 1403 |
| }, |
| { |
| "epoch": 0.7774086378737541, |
| "grad_norm": 0.29621192812919617, |
| "learning_rate": 1.4355361437497533e-06, |
| "loss": 0.4324, |
| "step": 1404 |
| }, |
| { |
| "epoch": 0.7779623477297896, |
| "grad_norm": 0.3122243583202362, |
| "learning_rate": 1.4287639960878318e-06, |
| "loss": 0.4558, |
| "step": 1405 |
| }, |
| { |
| "epoch": 0.778516057585825, |
| "grad_norm": 0.30342355370521545, |
| "learning_rate": 1.4220051962793952e-06, |
| "loss": 0.4657, |
| "step": 1406 |
| }, |
| { |
| "epoch": 0.7790697674418605, |
| "grad_norm": 0.36989858746528625, |
| "learning_rate": 1.4152597695861331e-06, |
| "loss": 0.4808, |
| "step": 1407 |
| }, |
| { |
| "epoch": 0.7796234772978959, |
| "grad_norm": 0.34097298979759216, |
| "learning_rate": 1.408527741219759e-06, |
| "loss": 0.4524, |
| "step": 1408 |
| }, |
| { |
| "epoch": 0.7801771871539314, |
| "grad_norm": 0.30833345651626587, |
| "learning_rate": 1.4018091363419046e-06, |
| "loss": 0.4455, |
| "step": 1409 |
| }, |
| { |
| "epoch": 0.7807308970099668, |
| "grad_norm": 0.32465872168540955, |
| "learning_rate": 1.3951039800640292e-06, |
| "loss": 0.4355, |
| "step": 1410 |
| }, |
| { |
| "epoch": 0.7812846068660022, |
| "grad_norm": 0.3296130299568176, |
| "learning_rate": 1.3884122974473307e-06, |
| "loss": 0.4696, |
| "step": 1411 |
| }, |
| { |
| "epoch": 0.7818383167220376, |
| "grad_norm": 0.3251802623271942, |
| "learning_rate": 1.381734113502644e-06, |
| "loss": 0.4431, |
| "step": 1412 |
| }, |
| { |
| "epoch": 0.782392026578073, |
| "grad_norm": 0.33919668197631836, |
| "learning_rate": 1.3750694531903518e-06, |
| "loss": 0.4759, |
| "step": 1413 |
| }, |
| { |
| "epoch": 0.7829457364341085, |
| "grad_norm": 0.3112054467201233, |
| "learning_rate": 1.3684183414202946e-06, |
| "loss": 0.4521, |
| "step": 1414 |
| }, |
| { |
| "epoch": 0.783499446290144, |
| "grad_norm": 0.32274097204208374, |
| "learning_rate": 1.3617808030516694e-06, |
| "loss": 0.4641, |
| "step": 1415 |
| }, |
| { |
| "epoch": 0.7840531561461794, |
| "grad_norm": 0.29862481355667114, |
| "learning_rate": 1.3551568628929434e-06, |
| "loss": 0.491, |
| "step": 1416 |
| }, |
| { |
| "epoch": 0.7846068660022149, |
| "grad_norm": 0.3179886043071747, |
| "learning_rate": 1.3485465457017567e-06, |
| "loss": 0.4605, |
| "step": 1417 |
| }, |
| { |
| "epoch": 0.7851605758582503, |
| "grad_norm": 0.3223581612110138, |
| "learning_rate": 1.341949876184837e-06, |
| "loss": 0.4829, |
| "step": 1418 |
| }, |
| { |
| "epoch": 0.7857142857142857, |
| "grad_norm": 0.34942424297332764, |
| "learning_rate": 1.3353668789978991e-06, |
| "loss": 0.4524, |
| "step": 1419 |
| }, |
| { |
| "epoch": 0.7862679955703211, |
| "grad_norm": 0.3048610985279083, |
| "learning_rate": 1.3287975787455554e-06, |
| "loss": 0.4788, |
| "step": 1420 |
| }, |
| { |
| "epoch": 0.7868217054263565, |
| "grad_norm": 0.32578060030937195, |
| "learning_rate": 1.3222419999812248e-06, |
| "loss": 0.492, |
| "step": 1421 |
| }, |
| { |
| "epoch": 0.7873754152823921, |
| "grad_norm": 0.29815563559532166, |
| "learning_rate": 1.3157001672070445e-06, |
| "loss": 0.4337, |
| "step": 1422 |
| }, |
| { |
| "epoch": 0.7879291251384275, |
| "grad_norm": 0.3292684853076935, |
| "learning_rate": 1.3091721048737699e-06, |
| "loss": 0.4438, |
| "step": 1423 |
| }, |
| { |
| "epoch": 0.7884828349944629, |
| "grad_norm": 0.3099350333213806, |
| "learning_rate": 1.3026578373806925e-06, |
| "loss": 0.4711, |
| "step": 1424 |
| }, |
| { |
| "epoch": 0.7890365448504983, |
| "grad_norm": 0.3220853805541992, |
| "learning_rate": 1.2961573890755398e-06, |
| "loss": 0.4717, |
| "step": 1425 |
| }, |
| { |
| "epoch": 0.7895902547065338, |
| "grad_norm": 0.31580111384391785, |
| "learning_rate": 1.2896707842543898e-06, |
| "loss": 0.4682, |
| "step": 1426 |
| }, |
| { |
| "epoch": 0.7901439645625692, |
| "grad_norm": 0.2921631932258606, |
| "learning_rate": 1.2831980471615824e-06, |
| "loss": 0.4445, |
| "step": 1427 |
| }, |
| { |
| "epoch": 0.7906976744186046, |
| "grad_norm": 0.3160112202167511, |
| "learning_rate": 1.2767392019896218e-06, |
| "loss": 0.4798, |
| "step": 1428 |
| }, |
| { |
| "epoch": 0.79125138427464, |
| "grad_norm": 0.3386807143688202, |
| "learning_rate": 1.2702942728790897e-06, |
| "loss": 0.4538, |
| "step": 1429 |
| }, |
| { |
| "epoch": 0.7918050941306756, |
| "grad_norm": 0.3259252905845642, |
| "learning_rate": 1.263863283918559e-06, |
| "loss": 0.4732, |
| "step": 1430 |
| }, |
| { |
| "epoch": 0.792358803986711, |
| "grad_norm": 0.34992191195487976, |
| "learning_rate": 1.257446259144494e-06, |
| "loss": 0.4719, |
| "step": 1431 |
| }, |
| { |
| "epoch": 0.7929125138427464, |
| "grad_norm": 0.3109996020793915, |
| "learning_rate": 1.2510432225411738e-06, |
| "loss": 0.4588, |
| "step": 1432 |
| }, |
| { |
| "epoch": 0.7934662236987818, |
| "grad_norm": 0.33270999789237976, |
| "learning_rate": 1.244654198040589e-06, |
| "loss": 0.4662, |
| "step": 1433 |
| }, |
| { |
| "epoch": 0.7940199335548173, |
| "grad_norm": 0.31173089146614075, |
| "learning_rate": 1.238279209522359e-06, |
| "loss": 0.4564, |
| "step": 1434 |
| }, |
| { |
| "epoch": 0.7945736434108527, |
| "grad_norm": 0.322552889585495, |
| "learning_rate": 1.2319182808136476e-06, |
| "loss": 0.4779, |
| "step": 1435 |
| }, |
| { |
| "epoch": 0.7951273532668881, |
| "grad_norm": 0.3441169261932373, |
| "learning_rate": 1.225571435689062e-06, |
| "loss": 0.4623, |
| "step": 1436 |
| }, |
| { |
| "epoch": 0.7956810631229236, |
| "grad_norm": 0.3150555491447449, |
| "learning_rate": 1.2192386978705766e-06, |
| "loss": 0.4659, |
| "step": 1437 |
| }, |
| { |
| "epoch": 0.7962347729789591, |
| "grad_norm": 0.33501121401786804, |
| "learning_rate": 1.2129200910274341e-06, |
| "loss": 0.4299, |
| "step": 1438 |
| }, |
| { |
| "epoch": 0.7967884828349945, |
| "grad_norm": 0.32314276695251465, |
| "learning_rate": 1.206615638776061e-06, |
| "loss": 0.4588, |
| "step": 1439 |
| }, |
| { |
| "epoch": 0.7973421926910299, |
| "grad_norm": 0.3319147825241089, |
| "learning_rate": 1.2003253646799846e-06, |
| "loss": 0.4456, |
| "step": 1440 |
| }, |
| { |
| "epoch": 0.7978959025470653, |
| "grad_norm": 0.3265640139579773, |
| "learning_rate": 1.1940492922497337e-06, |
| "loss": 0.4751, |
| "step": 1441 |
| }, |
| { |
| "epoch": 0.7984496124031008, |
| "grad_norm": 0.3368552029132843, |
| "learning_rate": 1.18778744494276e-06, |
| "loss": 0.4484, |
| "step": 1442 |
| }, |
| { |
| "epoch": 0.7990033222591362, |
| "grad_norm": 0.3024345636367798, |
| "learning_rate": 1.1815398461633498e-06, |
| "loss": 0.461, |
| "step": 1443 |
| }, |
| { |
| "epoch": 0.7995570321151716, |
| "grad_norm": 0.3282533288002014, |
| "learning_rate": 1.175306519262529e-06, |
| "loss": 0.4899, |
| "step": 1444 |
| }, |
| { |
| "epoch": 0.8001107419712071, |
| "grad_norm": 0.331606924533844, |
| "learning_rate": 1.1690874875379822e-06, |
| "loss": 0.4851, |
| "step": 1445 |
| }, |
| { |
| "epoch": 0.8006644518272426, |
| "grad_norm": 0.31335535645484924, |
| "learning_rate": 1.1628827742339688e-06, |
| "loss": 0.4681, |
| "step": 1446 |
| }, |
| { |
| "epoch": 0.801218161683278, |
| "grad_norm": 0.3058898150920868, |
| "learning_rate": 1.1566924025412268e-06, |
| "loss": 0.4361, |
| "step": 1447 |
| }, |
| { |
| "epoch": 0.8017718715393134, |
| "grad_norm": 0.3312506377696991, |
| "learning_rate": 1.1505163955968928e-06, |
| "loss": 0.4701, |
| "step": 1448 |
| }, |
| { |
| "epoch": 0.8023255813953488, |
| "grad_norm": 0.3389173448085785, |
| "learning_rate": 1.1443547764844114e-06, |
| "loss": 0.4992, |
| "step": 1449 |
| }, |
| { |
| "epoch": 0.8028792912513842, |
| "grad_norm": 0.3187425136566162, |
| "learning_rate": 1.1382075682334566e-06, |
| "loss": 0.4621, |
| "step": 1450 |
| }, |
| { |
| "epoch": 0.8034330011074197, |
| "grad_norm": 0.31678736209869385, |
| "learning_rate": 1.1320747938198356e-06, |
| "loss": 0.4828, |
| "step": 1451 |
| }, |
| { |
| "epoch": 0.8039867109634552, |
| "grad_norm": 0.3524612784385681, |
| "learning_rate": 1.1259564761654073e-06, |
| "loss": 0.4691, |
| "step": 1452 |
| }, |
| { |
| "epoch": 0.8045404208194906, |
| "grad_norm": 0.29647573828697205, |
| "learning_rate": 1.119852638138002e-06, |
| "loss": 0.4725, |
| "step": 1453 |
| }, |
| { |
| "epoch": 0.805094130675526, |
| "grad_norm": 0.30620694160461426, |
| "learning_rate": 1.1137633025513267e-06, |
| "loss": 0.4476, |
| "step": 1454 |
| }, |
| { |
| "epoch": 0.8056478405315615, |
| "grad_norm": 0.33965837955474854, |
| "learning_rate": 1.1076884921648834e-06, |
| "loss": 0.4479, |
| "step": 1455 |
| }, |
| { |
| "epoch": 0.8062015503875969, |
| "grad_norm": 0.31289494037628174, |
| "learning_rate": 1.1016282296838887e-06, |
| "loss": 0.4877, |
| "step": 1456 |
| }, |
| { |
| "epoch": 0.8067552602436323, |
| "grad_norm": 0.35544541478157043, |
| "learning_rate": 1.0955825377591823e-06, |
| "loss": 0.4788, |
| "step": 1457 |
| }, |
| { |
| "epoch": 0.8073089700996677, |
| "grad_norm": 0.3119059205055237, |
| "learning_rate": 1.0895514389871436e-06, |
| "loss": 0.4651, |
| "step": 1458 |
| }, |
| { |
| "epoch": 0.8078626799557033, |
| "grad_norm": 0.3125481903553009, |
| "learning_rate": 1.0835349559096125e-06, |
| "loss": 0.4609, |
| "step": 1459 |
| }, |
| { |
| "epoch": 0.8084163898117387, |
| "grad_norm": 0.3452129662036896, |
| "learning_rate": 1.0775331110137977e-06, |
| "loss": 0.4821, |
| "step": 1460 |
| }, |
| { |
| "epoch": 0.8089700996677741, |
| "grad_norm": 0.31825172901153564, |
| "learning_rate": 1.0715459267321998e-06, |
| "loss": 0.4664, |
| "step": 1461 |
| }, |
| { |
| "epoch": 0.8095238095238095, |
| "grad_norm": 0.3356800079345703, |
| "learning_rate": 1.06557342544252e-06, |
| "loss": 0.4714, |
| "step": 1462 |
| }, |
| { |
| "epoch": 0.810077519379845, |
| "grad_norm": 0.32622677087783813, |
| "learning_rate": 1.0596156294675813e-06, |
| "loss": 0.4664, |
| "step": 1463 |
| }, |
| { |
| "epoch": 0.8106312292358804, |
| "grad_norm": 0.28277918696403503, |
| "learning_rate": 1.0536725610752475e-06, |
| "loss": 0.4277, |
| "step": 1464 |
| }, |
| { |
| "epoch": 0.8111849390919158, |
| "grad_norm": 0.34184351563453674, |
| "learning_rate": 1.0477442424783306e-06, |
| "loss": 0.4587, |
| "step": 1465 |
| }, |
| { |
| "epoch": 0.8117386489479512, |
| "grad_norm": 0.33527636528015137, |
| "learning_rate": 1.0418306958345214e-06, |
| "loss": 0.4489, |
| "step": 1466 |
| }, |
| { |
| "epoch": 0.8122923588039868, |
| "grad_norm": 0.32856327295303345, |
| "learning_rate": 1.0359319432462922e-06, |
| "loss": 0.4883, |
| "step": 1467 |
| }, |
| { |
| "epoch": 0.8128460686600222, |
| "grad_norm": 0.32732442021369934, |
| "learning_rate": 1.0300480067608232e-06, |
| "loss": 0.4688, |
| "step": 1468 |
| }, |
| { |
| "epoch": 0.8133997785160576, |
| "grad_norm": 0.3294256925582886, |
| "learning_rate": 1.02417890836992e-06, |
| "loss": 0.4834, |
| "step": 1469 |
| }, |
| { |
| "epoch": 0.813953488372093, |
| "grad_norm": 0.2979672849178314, |
| "learning_rate": 1.018324670009927e-06, |
| "loss": 0.454, |
| "step": 1470 |
| }, |
| { |
| "epoch": 0.8145071982281284, |
| "grad_norm": 0.3292641341686249, |
| "learning_rate": 1.0124853135616475e-06, |
| "loss": 0.4384, |
| "step": 1471 |
| }, |
| { |
| "epoch": 0.8150609080841639, |
| "grad_norm": 0.33436697721481323, |
| "learning_rate": 1.0066608608502647e-06, |
| "loss": 0.4745, |
| "step": 1472 |
| }, |
| { |
| "epoch": 0.8156146179401993, |
| "grad_norm": 0.30958297848701477, |
| "learning_rate": 1.000851333645254e-06, |
| "loss": 0.4507, |
| "step": 1473 |
| }, |
| { |
| "epoch": 0.8161683277962348, |
| "grad_norm": 0.3246972858905792, |
| "learning_rate": 9.9505675366031e-07, |
| "loss": 0.4799, |
| "step": 1474 |
| }, |
| { |
| "epoch": 0.8167220376522702, |
| "grad_norm": 0.3213115334510803, |
| "learning_rate": 9.89277142553256e-07, |
| "loss": 0.4831, |
| "step": 1475 |
| }, |
| { |
| "epoch": 0.8172757475083057, |
| "grad_norm": 0.29282888770103455, |
| "learning_rate": 9.835125219259694e-07, |
| "loss": 0.4532, |
| "step": 1476 |
| }, |
| { |
| "epoch": 0.8178294573643411, |
| "grad_norm": 0.327378511428833, |
| "learning_rate": 9.777629133242982e-07, |
| "loss": 0.4562, |
| "step": 1477 |
| }, |
| { |
| "epoch": 0.8183831672203765, |
| "grad_norm": 0.3104061484336853, |
| "learning_rate": 9.720283382379852e-07, |
| "loss": 0.4666, |
| "step": 1478 |
| }, |
| { |
| "epoch": 0.8189368770764119, |
| "grad_norm": 0.3109104037284851, |
| "learning_rate": 9.663088181005792e-07, |
| "loss": 0.4735, |
| "step": 1479 |
| }, |
| { |
| "epoch": 0.8194905869324474, |
| "grad_norm": 0.34149253368377686, |
| "learning_rate": 9.606043742893616e-07, |
| "loss": 0.4735, |
| "step": 1480 |
| }, |
| { |
| "epoch": 0.8200442967884828, |
| "grad_norm": 0.29113996028900146, |
| "learning_rate": 9.549150281252633e-07, |
| "loss": 0.4864, |
| "step": 1481 |
| }, |
| { |
| "epoch": 0.8205980066445183, |
| "grad_norm": 0.3134409785270691, |
| "learning_rate": 9.492408008727899e-07, |
| "loss": 0.4717, |
| "step": 1482 |
| }, |
| { |
| "epoch": 0.8211517165005537, |
| "grad_norm": 0.35266929864883423, |
| "learning_rate": 9.435817137399351e-07, |
| "loss": 0.4873, |
| "step": 1483 |
| }, |
| { |
| "epoch": 0.8217054263565892, |
| "grad_norm": 0.341102659702301, |
| "learning_rate": 9.379377878781044e-07, |
| "loss": 0.4468, |
| "step": 1484 |
| }, |
| { |
| "epoch": 0.8222591362126246, |
| "grad_norm": 0.3171820640563965, |
| "learning_rate": 9.323090443820404e-07, |
| "loss": 0.4445, |
| "step": 1485 |
| }, |
| { |
| "epoch": 0.82281284606866, |
| "grad_norm": 0.3287108540534973, |
| "learning_rate": 9.266955042897357e-07, |
| "loss": 0.4614, |
| "step": 1486 |
| }, |
| { |
| "epoch": 0.8233665559246954, |
| "grad_norm": 0.2895143926143646, |
| "learning_rate": 9.210971885823605e-07, |
| "loss": 0.4456, |
| "step": 1487 |
| }, |
| { |
| "epoch": 0.8239202657807309, |
| "grad_norm": 0.27934545278549194, |
| "learning_rate": 9.155141181841843e-07, |
| "loss": 0.4541, |
| "step": 1488 |
| }, |
| { |
| "epoch": 0.8244739756367664, |
| "grad_norm": 0.322287917137146, |
| "learning_rate": 9.099463139624914e-07, |
| "loss": 0.4707, |
| "step": 1489 |
| }, |
| { |
| "epoch": 0.8250276854928018, |
| "grad_norm": 0.31403809785842896, |
| "learning_rate": 9.043937967275119e-07, |
| "loss": 0.4629, |
| "step": 1490 |
| }, |
| { |
| "epoch": 0.8255813953488372, |
| "grad_norm": 0.34129196405410767, |
| "learning_rate": 8.988565872323362e-07, |
| "loss": 0.4812, |
| "step": 1491 |
| }, |
| { |
| "epoch": 0.8261351052048727, |
| "grad_norm": 0.3059549927711487, |
| "learning_rate": 8.933347061728398e-07, |
| "loss": 0.4743, |
| "step": 1492 |
| }, |
| { |
| "epoch": 0.8266888150609081, |
| "grad_norm": 0.3288021981716156, |
| "learning_rate": 8.878281741876105e-07, |
| "loss": 0.4661, |
| "step": 1493 |
| }, |
| { |
| "epoch": 0.8272425249169435, |
| "grad_norm": 0.31026124954223633, |
| "learning_rate": 8.823370118578628e-07, |
| "loss": 0.4679, |
| "step": 1494 |
| }, |
| { |
| "epoch": 0.8277962347729789, |
| "grad_norm": 0.3222355544567108, |
| "learning_rate": 8.7686123970737e-07, |
| "loss": 0.4873, |
| "step": 1495 |
| }, |
| { |
| "epoch": 0.8283499446290143, |
| "grad_norm": 0.29187437891960144, |
| "learning_rate": 8.714008782023797e-07, |
| "loss": 0.4762, |
| "step": 1496 |
| }, |
| { |
| "epoch": 0.8289036544850499, |
| "grad_norm": 0.30391842126846313, |
| "learning_rate": 8.659559477515406e-07, |
| "loss": 0.4432, |
| "step": 1497 |
| }, |
| { |
| "epoch": 0.8294573643410853, |
| "grad_norm": 0.33190590143203735, |
| "learning_rate": 8.605264687058302e-07, |
| "loss": 0.4779, |
| "step": 1498 |
| }, |
| { |
| "epoch": 0.8300110741971207, |
| "grad_norm": 0.29238536953926086, |
| "learning_rate": 8.551124613584705e-07, |
| "loss": 0.4592, |
| "step": 1499 |
| }, |
| { |
| "epoch": 0.8305647840531561, |
| "grad_norm": 0.2889476418495178, |
| "learning_rate": 8.497139459448573e-07, |
| "loss": 0.4639, |
| "step": 1500 |
| }, |
| { |
| "epoch": 0.8311184939091916, |
| "grad_norm": 0.28619977831840515, |
| "learning_rate": 8.443309426424862e-07, |
| "loss": 0.4684, |
| "step": 1501 |
| }, |
| { |
| "epoch": 0.831672203765227, |
| "grad_norm": 0.324690580368042, |
| "learning_rate": 8.389634715708711e-07, |
| "loss": 0.492, |
| "step": 1502 |
| }, |
| { |
| "epoch": 0.8322259136212624, |
| "grad_norm": 0.30507704615592957, |
| "learning_rate": 8.336115527914774e-07, |
| "loss": 0.4645, |
| "step": 1503 |
| }, |
| { |
| "epoch": 0.832779623477298, |
| "grad_norm": 0.307434618473053, |
| "learning_rate": 8.282752063076371e-07, |
| "loss": 0.4735, |
| "step": 1504 |
| }, |
| { |
| "epoch": 0.8333333333333334, |
| "grad_norm": 0.3095044791698456, |
| "learning_rate": 8.229544520644817e-07, |
| "loss": 0.4732, |
| "step": 1505 |
| }, |
| { |
| "epoch": 0.8338870431893688, |
| "grad_norm": 0.3193853199481964, |
| "learning_rate": 8.176493099488664e-07, |
| "loss": 0.4541, |
| "step": 1506 |
| }, |
| { |
| "epoch": 0.8344407530454042, |
| "grad_norm": 0.3113101124763489, |
| "learning_rate": 8.123597997892918e-07, |
| "loss": 0.4514, |
| "step": 1507 |
| }, |
| { |
| "epoch": 0.8349944629014396, |
| "grad_norm": 0.3148142695426941, |
| "learning_rate": 8.070859413558329e-07, |
| "loss": 0.4545, |
| "step": 1508 |
| }, |
| { |
| "epoch": 0.8355481727574751, |
| "grad_norm": 0.32536906003952026, |
| "learning_rate": 8.018277543600683e-07, |
| "loss": 0.4549, |
| "step": 1509 |
| }, |
| { |
| "epoch": 0.8361018826135105, |
| "grad_norm": 0.31320637464523315, |
| "learning_rate": 7.965852584549983e-07, |
| "loss": 0.4666, |
| "step": 1510 |
| }, |
| { |
| "epoch": 0.8366555924695459, |
| "grad_norm": 0.3198712170124054, |
| "learning_rate": 7.913584732349788e-07, |
| "loss": 0.4612, |
| "step": 1511 |
| }, |
| { |
| "epoch": 0.8372093023255814, |
| "grad_norm": 0.32573768496513367, |
| "learning_rate": 7.861474182356449e-07, |
| "loss": 0.4404, |
| "step": 1512 |
| }, |
| { |
| "epoch": 0.8377630121816169, |
| "grad_norm": 0.3041425943374634, |
| "learning_rate": 7.809521129338371e-07, |
| "loss": 0.4841, |
| "step": 1513 |
| }, |
| { |
| "epoch": 0.8383167220376523, |
| "grad_norm": 0.30132317543029785, |
| "learning_rate": 7.757725767475332e-07, |
| "loss": 0.4708, |
| "step": 1514 |
| }, |
| { |
| "epoch": 0.8388704318936877, |
| "grad_norm": 0.32302555441856384, |
| "learning_rate": 7.706088290357683e-07, |
| "loss": 0.4544, |
| "step": 1515 |
| }, |
| { |
| "epoch": 0.8394241417497231, |
| "grad_norm": 0.2973669469356537, |
| "learning_rate": 7.654608890985709e-07, |
| "loss": 0.4586, |
| "step": 1516 |
| }, |
| { |
| "epoch": 0.8399778516057586, |
| "grad_norm": 0.30986711382865906, |
| "learning_rate": 7.603287761768824e-07, |
| "loss": 0.4888, |
| "step": 1517 |
| }, |
| { |
| "epoch": 0.840531561461794, |
| "grad_norm": 0.31494471430778503, |
| "learning_rate": 7.552125094524893e-07, |
| "loss": 0.4699, |
| "step": 1518 |
| }, |
| { |
| "epoch": 0.8410852713178295, |
| "grad_norm": 0.3044757843017578, |
| "learning_rate": 7.501121080479551e-07, |
| "loss": 0.446, |
| "step": 1519 |
| }, |
| { |
| "epoch": 0.8416389811738649, |
| "grad_norm": 0.2932833731174469, |
| "learning_rate": 7.450275910265415e-07, |
| "loss": 0.473, |
| "step": 1520 |
| }, |
| { |
| "epoch": 0.8421926910299004, |
| "grad_norm": 0.294283002614975, |
| "learning_rate": 7.399589773921412e-07, |
| "loss": 0.4609, |
| "step": 1521 |
| }, |
| { |
| "epoch": 0.8427464008859358, |
| "grad_norm": 0.2805401384830475, |
| "learning_rate": 7.349062860892092e-07, |
| "loss": 0.4383, |
| "step": 1522 |
| }, |
| { |
| "epoch": 0.8433001107419712, |
| "grad_norm": 0.33512675762176514, |
| "learning_rate": 7.298695360026842e-07, |
| "loss": 0.4602, |
| "step": 1523 |
| }, |
| { |
| "epoch": 0.8438538205980066, |
| "grad_norm": 0.29946646094322205, |
| "learning_rate": 7.248487459579284e-07, |
| "loss": 0.4289, |
| "step": 1524 |
| }, |
| { |
| "epoch": 0.844407530454042, |
| "grad_norm": 0.30807411670684814, |
| "learning_rate": 7.198439347206487e-07, |
| "loss": 0.446, |
| "step": 1525 |
| }, |
| { |
| "epoch": 0.8449612403100775, |
| "grad_norm": 0.31810346245765686, |
| "learning_rate": 7.148551209968279e-07, |
| "loss": 0.477, |
| "step": 1526 |
| }, |
| { |
| "epoch": 0.845514950166113, |
| "grad_norm": 0.34772250056266785, |
| "learning_rate": 7.098823234326618e-07, |
| "loss": 0.4578, |
| "step": 1527 |
| }, |
| { |
| "epoch": 0.8460686600221484, |
| "grad_norm": 0.29736942052841187, |
| "learning_rate": 7.049255606144795e-07, |
| "loss": 0.4601, |
| "step": 1528 |
| }, |
| { |
| "epoch": 0.8466223698781838, |
| "grad_norm": 0.3065546452999115, |
| "learning_rate": 6.999848510686791e-07, |
| "loss": 0.4552, |
| "step": 1529 |
| }, |
| { |
| "epoch": 0.8471760797342193, |
| "grad_norm": 0.34162911772727966, |
| "learning_rate": 6.950602132616618e-07, |
| "loss": 0.4564, |
| "step": 1530 |
| }, |
| { |
| "epoch": 0.8477297895902547, |
| "grad_norm": 0.3589327931404114, |
| "learning_rate": 6.901516655997536e-07, |
| "loss": 0.4817, |
| "step": 1531 |
| }, |
| { |
| "epoch": 0.8482834994462901, |
| "grad_norm": 0.310115784406662, |
| "learning_rate": 6.852592264291468e-07, |
| "loss": 0.4635, |
| "step": 1532 |
| }, |
| { |
| "epoch": 0.8488372093023255, |
| "grad_norm": 0.2983943223953247, |
| "learning_rate": 6.803829140358237e-07, |
| "loss": 0.4392, |
| "step": 1533 |
| }, |
| { |
| "epoch": 0.8493909191583611, |
| "grad_norm": 0.3104465901851654, |
| "learning_rate": 6.755227466454912e-07, |
| "loss": 0.4707, |
| "step": 1534 |
| }, |
| { |
| "epoch": 0.8499446290143965, |
| "grad_norm": 0.32061025500297546, |
| "learning_rate": 6.706787424235145e-07, |
| "loss": 0.4636, |
| "step": 1535 |
| }, |
| { |
| "epoch": 0.8504983388704319, |
| "grad_norm": 0.31521013379096985, |
| "learning_rate": 6.658509194748463e-07, |
| "loss": 0.4529, |
| "step": 1536 |
| }, |
| { |
| "epoch": 0.8510520487264673, |
| "grad_norm": 0.3287814259529114, |
| "learning_rate": 6.610392958439582e-07, |
| "loss": 0.4682, |
| "step": 1537 |
| }, |
| { |
| "epoch": 0.8516057585825028, |
| "grad_norm": 0.3258051872253418, |
| "learning_rate": 6.562438895147799e-07, |
| "loss": 0.4673, |
| "step": 1538 |
| }, |
| { |
| "epoch": 0.8521594684385382, |
| "grad_norm": 0.2964293360710144, |
| "learning_rate": 6.514647184106232e-07, |
| "loss": 0.466, |
| "step": 1539 |
| }, |
| { |
| "epoch": 0.8527131782945736, |
| "grad_norm": 0.28835752606391907, |
| "learning_rate": 6.467018003941189e-07, |
| "loss": 0.4636, |
| "step": 1540 |
| }, |
| { |
| "epoch": 0.853266888150609, |
| "grad_norm": 0.3069021701812744, |
| "learning_rate": 6.419551532671542e-07, |
| "loss": 0.4921, |
| "step": 1541 |
| }, |
| { |
| "epoch": 0.8538205980066446, |
| "grad_norm": 0.303408682346344, |
| "learning_rate": 6.372247947707988e-07, |
| "loss": 0.4482, |
| "step": 1542 |
| }, |
| { |
| "epoch": 0.85437430786268, |
| "grad_norm": 0.3295760452747345, |
| "learning_rate": 6.325107425852433e-07, |
| "loss": 0.4899, |
| "step": 1543 |
| }, |
| { |
| "epoch": 0.8549280177187154, |
| "grad_norm": 0.30452761054039, |
| "learning_rate": 6.278130143297295e-07, |
| "loss": 0.455, |
| "step": 1544 |
| }, |
| { |
| "epoch": 0.8554817275747508, |
| "grad_norm": 0.33882924914360046, |
| "learning_rate": 6.231316275624921e-07, |
| "loss": 0.4454, |
| "step": 1545 |
| }, |
| { |
| "epoch": 0.8560354374307863, |
| "grad_norm": 0.3470396101474762, |
| "learning_rate": 6.184665997806832e-07, |
| "loss": 0.4663, |
| "step": 1546 |
| }, |
| { |
| "epoch": 0.8565891472868217, |
| "grad_norm": 0.33550286293029785, |
| "learning_rate": 6.138179484203117e-07, |
| "loss": 0.4469, |
| "step": 1547 |
| }, |
| { |
| "epoch": 0.8571428571428571, |
| "grad_norm": 0.29510611295700073, |
| "learning_rate": 6.091856908561811e-07, |
| "loss": 0.4452, |
| "step": 1548 |
| }, |
| { |
| "epoch": 0.8576965669988926, |
| "grad_norm": 0.30699166655540466, |
| "learning_rate": 6.045698444018194e-07, |
| "loss": 0.4325, |
| "step": 1549 |
| }, |
| { |
| "epoch": 0.858250276854928, |
| "grad_norm": 0.3179960548877716, |
| "learning_rate": 5.999704263094147e-07, |
| "loss": 0.4515, |
| "step": 1550 |
| }, |
| { |
| "epoch": 0.8588039867109635, |
| "grad_norm": 0.37318259477615356, |
| "learning_rate": 5.953874537697573e-07, |
| "loss": 0.4755, |
| "step": 1551 |
| }, |
| { |
| "epoch": 0.8593576965669989, |
| "grad_norm": 0.29407113790512085, |
| "learning_rate": 5.908209439121648e-07, |
| "loss": 0.4582, |
| "step": 1552 |
| }, |
| { |
| "epoch": 0.8599114064230343, |
| "grad_norm": 0.30949074029922485, |
| "learning_rate": 5.862709138044298e-07, |
| "loss": 0.4515, |
| "step": 1553 |
| }, |
| { |
| "epoch": 0.8604651162790697, |
| "grad_norm": 0.3142622709274292, |
| "learning_rate": 5.817373804527449e-07, |
| "loss": 0.4545, |
| "step": 1554 |
| }, |
| { |
| "epoch": 0.8610188261351052, |
| "grad_norm": 0.28854456543922424, |
| "learning_rate": 5.772203608016464e-07, |
| "loss": 0.4563, |
| "step": 1555 |
| }, |
| { |
| "epoch": 0.8615725359911407, |
| "grad_norm": 0.3129185140132904, |
| "learning_rate": 5.727198717339511e-07, |
| "loss": 0.4407, |
| "step": 1556 |
| }, |
| { |
| "epoch": 0.8621262458471761, |
| "grad_norm": 0.29644128680229187, |
| "learning_rate": 5.68235930070688e-07, |
| "loss": 0.4401, |
| "step": 1557 |
| }, |
| { |
| "epoch": 0.8626799557032115, |
| "grad_norm": 0.3216518461704254, |
| "learning_rate": 5.637685525710384e-07, |
| "loss": 0.4764, |
| "step": 1558 |
| }, |
| { |
| "epoch": 0.863233665559247, |
| "grad_norm": 0.29683998227119446, |
| "learning_rate": 5.593177559322776e-07, |
| "loss": 0.4586, |
| "step": 1559 |
| }, |
| { |
| "epoch": 0.8637873754152824, |
| "grad_norm": 0.31808000802993774, |
| "learning_rate": 5.548835567897031e-07, |
| "loss": 0.4541, |
| "step": 1560 |
| }, |
| { |
| "epoch": 0.8643410852713178, |
| "grad_norm": 0.2945192754268646, |
| "learning_rate": 5.504659717165812e-07, |
| "loss": 0.4668, |
| "step": 1561 |
| }, |
| { |
| "epoch": 0.8648947951273532, |
| "grad_norm": 0.30420994758605957, |
| "learning_rate": 5.460650172240795e-07, |
| "loss": 0.4588, |
| "step": 1562 |
| }, |
| { |
| "epoch": 0.8654485049833887, |
| "grad_norm": 0.3046761751174927, |
| "learning_rate": 5.416807097612071e-07, |
| "loss": 0.447, |
| "step": 1563 |
| }, |
| { |
| "epoch": 0.8660022148394242, |
| "grad_norm": 0.2961207628250122, |
| "learning_rate": 5.373130657147552e-07, |
| "loss": 0.4357, |
| "step": 1564 |
| }, |
| { |
| "epoch": 0.8665559246954596, |
| "grad_norm": 0.3244534730911255, |
| "learning_rate": 5.329621014092318e-07, |
| "loss": 0.4773, |
| "step": 1565 |
| }, |
| { |
| "epoch": 0.867109634551495, |
| "grad_norm": 0.28872519731521606, |
| "learning_rate": 5.286278331068018e-07, |
| "loss": 0.4718, |
| "step": 1566 |
| }, |
| { |
| "epoch": 0.8676633444075305, |
| "grad_norm": 0.2985605299472809, |
| "learning_rate": 5.243102770072306e-07, |
| "loss": 0.4594, |
| "step": 1567 |
| }, |
| { |
| "epoch": 0.8682170542635659, |
| "grad_norm": 0.30403968691825867, |
| "learning_rate": 5.200094492478169e-07, |
| "loss": 0.4869, |
| "step": 1568 |
| }, |
| { |
| "epoch": 0.8687707641196013, |
| "grad_norm": 0.2938711941242218, |
| "learning_rate": 5.157253659033379e-07, |
| "loss": 0.4498, |
| "step": 1569 |
| }, |
| { |
| "epoch": 0.8693244739756367, |
| "grad_norm": 0.32247763872146606, |
| "learning_rate": 5.114580429859844e-07, |
| "loss": 0.4642, |
| "step": 1570 |
| }, |
| { |
| "epoch": 0.8698781838316723, |
| "grad_norm": 0.3126090466976166, |
| "learning_rate": 5.072074964453055e-07, |
| "loss": 0.4743, |
| "step": 1571 |
| }, |
| { |
| "epoch": 0.8704318936877077, |
| "grad_norm": 0.29365554451942444, |
| "learning_rate": 5.029737421681446e-07, |
| "loss": 0.4649, |
| "step": 1572 |
| }, |
| { |
| "epoch": 0.8709856035437431, |
| "grad_norm": 0.32420745491981506, |
| "learning_rate": 4.98756795978586e-07, |
| "loss": 0.4653, |
| "step": 1573 |
| }, |
| { |
| "epoch": 0.8715393133997785, |
| "grad_norm": 0.3036859631538391, |
| "learning_rate": 4.945566736378887e-07, |
| "loss": 0.465, |
| "step": 1574 |
| }, |
| { |
| "epoch": 0.872093023255814, |
| "grad_norm": 0.2864364683628082, |
| "learning_rate": 4.903733908444325e-07, |
| "loss": 0.4465, |
| "step": 1575 |
| }, |
| { |
| "epoch": 0.8726467331118494, |
| "grad_norm": 0.30921804904937744, |
| "learning_rate": 4.862069632336558e-07, |
| "loss": 0.4459, |
| "step": 1576 |
| }, |
| { |
| "epoch": 0.8732004429678848, |
| "grad_norm": 0.3261505365371704, |
| "learning_rate": 4.820574063780031e-07, |
| "loss": 0.4849, |
| "step": 1577 |
| }, |
| { |
| "epoch": 0.8737541528239202, |
| "grad_norm": 0.3188358545303345, |
| "learning_rate": 4.779247357868583e-07, |
| "loss": 0.475, |
| "step": 1578 |
| }, |
| { |
| "epoch": 0.8743078626799557, |
| "grad_norm": 0.3161148726940155, |
| "learning_rate": 4.738089669064927e-07, |
| "loss": 0.4494, |
| "step": 1579 |
| }, |
| { |
| "epoch": 0.8748615725359912, |
| "grad_norm": 0.3190706670284271, |
| "learning_rate": 4.697101151200079e-07, |
| "loss": 0.459, |
| "step": 1580 |
| }, |
| { |
| "epoch": 0.8754152823920266, |
| "grad_norm": 0.30358344316482544, |
| "learning_rate": 4.6562819574727304e-07, |
| "loss": 0.482, |
| "step": 1581 |
| }, |
| { |
| "epoch": 0.875968992248062, |
| "grad_norm": 0.29602691531181335, |
| "learning_rate": 4.6156322404487306e-07, |
| "loss": 0.4779, |
| "step": 1582 |
| }, |
| { |
| "epoch": 0.8765227021040974, |
| "grad_norm": 0.31430983543395996, |
| "learning_rate": 4.575152152060475e-07, |
| "loss": 0.4677, |
| "step": 1583 |
| }, |
| { |
| "epoch": 0.8770764119601329, |
| "grad_norm": 0.30270159244537354, |
| "learning_rate": 4.534841843606358e-07, |
| "loss": 0.4335, |
| "step": 1584 |
| }, |
| { |
| "epoch": 0.8776301218161683, |
| "grad_norm": 0.3131905794143677, |
| "learning_rate": 4.494701465750217e-07, |
| "loss": 0.4775, |
| "step": 1585 |
| }, |
| { |
| "epoch": 0.8781838316722038, |
| "grad_norm": 0.2916598916053772, |
| "learning_rate": 4.454731168520754e-07, |
| "loss": 0.4704, |
| "step": 1586 |
| }, |
| { |
| "epoch": 0.8787375415282392, |
| "grad_norm": 0.31324756145477295, |
| "learning_rate": 4.414931101310954e-07, |
| "loss": 0.4474, |
| "step": 1587 |
| }, |
| { |
| "epoch": 0.8792912513842747, |
| "grad_norm": 0.2947539687156677, |
| "learning_rate": 4.375301412877586e-07, |
| "loss": 0.4763, |
| "step": 1588 |
| }, |
| { |
| "epoch": 0.8798449612403101, |
| "grad_norm": 0.3165675401687622, |
| "learning_rate": 4.3358422513405776e-07, |
| "loss": 0.4715, |
| "step": 1589 |
| }, |
| { |
| "epoch": 0.8803986710963455, |
| "grad_norm": 0.2980518043041229, |
| "learning_rate": 4.296553764182526e-07, |
| "loss": 0.4567, |
| "step": 1590 |
| }, |
| { |
| "epoch": 0.8809523809523809, |
| "grad_norm": 0.31142324209213257, |
| "learning_rate": 4.257436098248091e-07, |
| "loss": 0.4584, |
| "step": 1591 |
| }, |
| { |
| "epoch": 0.8815060908084164, |
| "grad_norm": 0.31078335642814636, |
| "learning_rate": 4.218489399743481e-07, |
| "loss": 0.4914, |
| "step": 1592 |
| }, |
| { |
| "epoch": 0.8820598006644518, |
| "grad_norm": 0.30437183380126953, |
| "learning_rate": 4.179713814235903e-07, |
| "loss": 0.4673, |
| "step": 1593 |
| }, |
| { |
| "epoch": 0.8826135105204873, |
| "grad_norm": 0.3099023997783661, |
| "learning_rate": 4.141109486652989e-07, |
| "loss": 0.4551, |
| "step": 1594 |
| }, |
| { |
| "epoch": 0.8831672203765227, |
| "grad_norm": 0.3001941740512848, |
| "learning_rate": 4.1026765612823147e-07, |
| "loss": 0.448, |
| "step": 1595 |
| }, |
| { |
| "epoch": 0.8837209302325582, |
| "grad_norm": 0.29348331689834595, |
| "learning_rate": 4.064415181770787e-07, |
| "loss": 0.4466, |
| "step": 1596 |
| }, |
| { |
| "epoch": 0.8842746400885936, |
| "grad_norm": 0.3348081707954407, |
| "learning_rate": 4.0263254911241555e-07, |
| "loss": 0.4672, |
| "step": 1597 |
| }, |
| { |
| "epoch": 0.884828349944629, |
| "grad_norm": 0.31024014949798584, |
| "learning_rate": 3.9884076317064813e-07, |
| "loss": 0.4681, |
| "step": 1598 |
| }, |
| { |
| "epoch": 0.8853820598006644, |
| "grad_norm": 0.29897528886795044, |
| "learning_rate": 3.9506617452395647e-07, |
| "loss": 0.4818, |
| "step": 1599 |
| }, |
| { |
| "epoch": 0.8859357696566998, |
| "grad_norm": 0.27957743406295776, |
| "learning_rate": 3.913087972802443e-07, |
| "loss": 0.4321, |
| "step": 1600 |
| }, |
| { |
| "epoch": 0.8864894795127354, |
| "grad_norm": 0.3344254195690155, |
| "learning_rate": 3.875686454830885e-07, |
| "loss": 0.4694, |
| "step": 1601 |
| }, |
| { |
| "epoch": 0.8870431893687708, |
| "grad_norm": 0.2915976345539093, |
| "learning_rate": 3.8384573311168126e-07, |
| "loss": 0.4728, |
| "step": 1602 |
| }, |
| { |
| "epoch": 0.8875968992248062, |
| "grad_norm": 0.32636356353759766, |
| "learning_rate": 3.8014007408078144e-07, |
| "loss": 0.4639, |
| "step": 1603 |
| }, |
| { |
| "epoch": 0.8881506090808416, |
| "grad_norm": 0.33974215388298035, |
| "learning_rate": 3.764516822406616e-07, |
| "loss": 0.4676, |
| "step": 1604 |
| }, |
| { |
| "epoch": 0.8887043189368771, |
| "grad_norm": 0.312438040971756, |
| "learning_rate": 3.727805713770588e-07, |
| "loss": 0.4828, |
| "step": 1605 |
| }, |
| { |
| "epoch": 0.8892580287929125, |
| "grad_norm": 0.30062320828437805, |
| "learning_rate": 3.691267552111183e-07, |
| "loss": 0.4655, |
| "step": 1606 |
| }, |
| { |
| "epoch": 0.8898117386489479, |
| "grad_norm": 0.2907237410545349, |
| "learning_rate": 3.654902473993438e-07, |
| "loss": 0.4711, |
| "step": 1607 |
| }, |
| { |
| "epoch": 0.8903654485049833, |
| "grad_norm": 0.701089084148407, |
| "learning_rate": 3.618710615335497e-07, |
| "loss": 0.4406, |
| "step": 1608 |
| }, |
| { |
| "epoch": 0.8909191583610189, |
| "grad_norm": 0.29503801465034485, |
| "learning_rate": 3.5826921114080704e-07, |
| "loss": 0.426, |
| "step": 1609 |
| }, |
| { |
| "epoch": 0.8914728682170543, |
| "grad_norm": 0.31222304701805115, |
| "learning_rate": 3.546847096833933e-07, |
| "loss": 0.4556, |
| "step": 1610 |
| }, |
| { |
| "epoch": 0.8920265780730897, |
| "grad_norm": 0.31220102310180664, |
| "learning_rate": 3.511175705587433e-07, |
| "loss": 0.4523, |
| "step": 1611 |
| }, |
| { |
| "epoch": 0.8925802879291251, |
| "grad_norm": 0.3270728588104248, |
| "learning_rate": 3.4756780709939817e-07, |
| "loss": 0.4634, |
| "step": 1612 |
| }, |
| { |
| "epoch": 0.8931339977851606, |
| "grad_norm": 0.3142661154270172, |
| "learning_rate": 3.440354325729545e-07, |
| "loss": 0.4797, |
| "step": 1613 |
| }, |
| { |
| "epoch": 0.893687707641196, |
| "grad_norm": 0.3256533145904541, |
| "learning_rate": 3.405204601820189e-07, |
| "loss": 0.5019, |
| "step": 1614 |
| }, |
| { |
| "epoch": 0.8942414174972314, |
| "grad_norm": 0.3360338509082794, |
| "learning_rate": 3.370229030641525e-07, |
| "loss": 0.4744, |
| "step": 1615 |
| }, |
| { |
| "epoch": 0.8947951273532669, |
| "grad_norm": 0.2963426113128662, |
| "learning_rate": 3.3354277429182626e-07, |
| "loss": 0.4325, |
| "step": 1616 |
| }, |
| { |
| "epoch": 0.8953488372093024, |
| "grad_norm": 0.2993135154247284, |
| "learning_rate": 3.3008008687237305e-07, |
| "loss": 0.45, |
| "step": 1617 |
| }, |
| { |
| "epoch": 0.8959025470653378, |
| "grad_norm": 0.3325082063674927, |
| "learning_rate": 3.2663485374793304e-07, |
| "loss": 0.4673, |
| "step": 1618 |
| }, |
| { |
| "epoch": 0.8964562569213732, |
| "grad_norm": 0.3224189281463623, |
| "learning_rate": 3.23207087795413e-07, |
| "loss": 0.4478, |
| "step": 1619 |
| }, |
| { |
| "epoch": 0.8970099667774086, |
| "grad_norm": 0.30114316940307617, |
| "learning_rate": 3.1979680182643134e-07, |
| "loss": 0.4535, |
| "step": 1620 |
| }, |
| { |
| "epoch": 0.897563676633444, |
| "grad_norm": 0.2878979742527008, |
| "learning_rate": 3.164040085872755e-07, |
| "loss": 0.4835, |
| "step": 1621 |
| }, |
| { |
| "epoch": 0.8981173864894795, |
| "grad_norm": 0.30875077843666077, |
| "learning_rate": 3.130287207588506e-07, |
| "loss": 0.4636, |
| "step": 1622 |
| }, |
| { |
| "epoch": 0.8986710963455149, |
| "grad_norm": 0.29143431782722473, |
| "learning_rate": 3.0967095095663424e-07, |
| "loss": 0.4674, |
| "step": 1623 |
| }, |
| { |
| "epoch": 0.8992248062015504, |
| "grad_norm": 0.278022825717926, |
| "learning_rate": 3.0633071173062966e-07, |
| "loss": 0.4511, |
| "step": 1624 |
| }, |
| { |
| "epoch": 0.8997785160575859, |
| "grad_norm": 0.3196825385093689, |
| "learning_rate": 3.0300801556531536e-07, |
| "loss": 0.4487, |
| "step": 1625 |
| }, |
| { |
| "epoch": 0.9003322259136213, |
| "grad_norm": 0.2895258069038391, |
| "learning_rate": 2.997028748796016e-07, |
| "loss": 0.4693, |
| "step": 1626 |
| }, |
| { |
| "epoch": 0.9008859357696567, |
| "grad_norm": 0.2995036542415619, |
| "learning_rate": 2.9641530202678626e-07, |
| "loss": 0.4484, |
| "step": 1627 |
| }, |
| { |
| "epoch": 0.9014396456256921, |
| "grad_norm": 0.29738712310791016, |
| "learning_rate": 2.9314530929450137e-07, |
| "loss": 0.443, |
| "step": 1628 |
| }, |
| { |
| "epoch": 0.9019933554817275, |
| "grad_norm": 0.3146614134311676, |
| "learning_rate": 2.8989290890467314e-07, |
| "loss": 0.4699, |
| "step": 1629 |
| }, |
| { |
| "epoch": 0.902547065337763, |
| "grad_norm": 0.2757892906665802, |
| "learning_rate": 2.86658113013476e-07, |
| "loss": 0.4328, |
| "step": 1630 |
| }, |
| { |
| "epoch": 0.9031007751937985, |
| "grad_norm": 0.30202972888946533, |
| "learning_rate": 2.834409337112842e-07, |
| "loss": 0.4416, |
| "step": 1631 |
| }, |
| { |
| "epoch": 0.9036544850498339, |
| "grad_norm": 0.31334805488586426, |
| "learning_rate": 2.8024138302262913e-07, |
| "loss": 0.4592, |
| "step": 1632 |
| }, |
| { |
| "epoch": 0.9042081949058693, |
| "grad_norm": 0.3020130693912506, |
| "learning_rate": 2.770594729061532e-07, |
| "loss": 0.4496, |
| "step": 1633 |
| }, |
| { |
| "epoch": 0.9047619047619048, |
| "grad_norm": 0.31226208806037903, |
| "learning_rate": 2.738952152545643e-07, |
| "loss": 0.4484, |
| "step": 1634 |
| }, |
| { |
| "epoch": 0.9053156146179402, |
| "grad_norm": 0.3205219805240631, |
| "learning_rate": 2.7074862189459426e-07, |
| "loss": 0.4908, |
| "step": 1635 |
| }, |
| { |
| "epoch": 0.9058693244739756, |
| "grad_norm": 0.2950468063354492, |
| "learning_rate": 2.676197045869511e-07, |
| "loss": 0.4308, |
| "step": 1636 |
| }, |
| { |
| "epoch": 0.906423034330011, |
| "grad_norm": 0.2958923876285553, |
| "learning_rate": 2.6450847502627883e-07, |
| "loss": 0.4338, |
| "step": 1637 |
| }, |
| { |
| "epoch": 0.9069767441860465, |
| "grad_norm": 0.29932379722595215, |
| "learning_rate": 2.6141494484111017e-07, |
| "loss": 0.4521, |
| "step": 1638 |
| }, |
| { |
| "epoch": 0.907530454042082, |
| "grad_norm": 0.3040353059768677, |
| "learning_rate": 2.5833912559382444e-07, |
| "loss": 0.4603, |
| "step": 1639 |
| }, |
| { |
| "epoch": 0.9080841638981174, |
| "grad_norm": 0.28034743666648865, |
| "learning_rate": 2.5528102878060626e-07, |
| "loss": 0.4446, |
| "step": 1640 |
| }, |
| { |
| "epoch": 0.9086378737541528, |
| "grad_norm": 0.35027840733528137, |
| "learning_rate": 2.522406658313997e-07, |
| "loss": 0.4989, |
| "step": 1641 |
| }, |
| { |
| "epoch": 0.9091915836101883, |
| "grad_norm": 0.3450046181678772, |
| "learning_rate": 2.492180481098655e-07, |
| "loss": 0.4531, |
| "step": 1642 |
| }, |
| { |
| "epoch": 0.9097452934662237, |
| "grad_norm": 0.3023633062839508, |
| "learning_rate": 2.462131869133427e-07, |
| "loss": 0.4493, |
| "step": 1643 |
| }, |
| { |
| "epoch": 0.9102990033222591, |
| "grad_norm": 0.2984960079193115, |
| "learning_rate": 2.4322609347280204e-07, |
| "loss": 0.439, |
| "step": 1644 |
| }, |
| { |
| "epoch": 0.9108527131782945, |
| "grad_norm": 0.2857573926448822, |
| "learning_rate": 2.4025677895280377e-07, |
| "loss": 0.4474, |
| "step": 1645 |
| }, |
| { |
| "epoch": 0.9114064230343301, |
| "grad_norm": 0.31294727325439453, |
| "learning_rate": 2.3730525445146146e-07, |
| "loss": 0.4427, |
| "step": 1646 |
| }, |
| { |
| "epoch": 0.9119601328903655, |
| "grad_norm": 0.29343825578689575, |
| "learning_rate": 2.3437153100039244e-07, |
| "loss": 0.4839, |
| "step": 1647 |
| }, |
| { |
| "epoch": 0.9125138427464009, |
| "grad_norm": 0.2941429615020752, |
| "learning_rate": 2.3145561956468555e-07, |
| "loss": 0.4744, |
| "step": 1648 |
| }, |
| { |
| "epoch": 0.9130675526024363, |
| "grad_norm": 0.2945179045200348, |
| "learning_rate": 2.2855753104285062e-07, |
| "loss": 0.4654, |
| "step": 1649 |
| }, |
| { |
| "epoch": 0.9136212624584718, |
| "grad_norm": 0.288025438785553, |
| "learning_rate": 2.2567727626678527e-07, |
| "loss": 0.4441, |
| "step": 1650 |
| }, |
| { |
| "epoch": 0.9141749723145072, |
| "grad_norm": 0.2953549921512604, |
| "learning_rate": 2.2281486600173207e-07, |
| "loss": 0.4745, |
| "step": 1651 |
| }, |
| { |
| "epoch": 0.9147286821705426, |
| "grad_norm": 0.3306369483470917, |
| "learning_rate": 2.199703109462359e-07, |
| "loss": 0.4593, |
| "step": 1652 |
| }, |
| { |
| "epoch": 0.915282392026578, |
| "grad_norm": 0.31291428208351135, |
| "learning_rate": 2.1714362173210824e-07, |
| "loss": 0.4909, |
| "step": 1653 |
| }, |
| { |
| "epoch": 0.9158361018826136, |
| "grad_norm": 0.2797071635723114, |
| "learning_rate": 2.1433480892438353e-07, |
| "loss": 0.446, |
| "step": 1654 |
| }, |
| { |
| "epoch": 0.916389811738649, |
| "grad_norm": 0.2792530953884125, |
| "learning_rate": 2.1154388302128126e-07, |
| "loss": 0.4325, |
| "step": 1655 |
| }, |
| { |
| "epoch": 0.9169435215946844, |
| "grad_norm": 0.2993029057979584, |
| "learning_rate": 2.0877085445416889e-07, |
| "loss": 0.4564, |
| "step": 1656 |
| }, |
| { |
| "epoch": 0.9174972314507198, |
| "grad_norm": 0.2832440435886383, |
| "learning_rate": 2.0601573358751904e-07, |
| "loss": 0.4597, |
| "step": 1657 |
| }, |
| { |
| "epoch": 0.9180509413067552, |
| "grad_norm": 0.2859455943107605, |
| "learning_rate": 2.0327853071887172e-07, |
| "loss": 0.4505, |
| "step": 1658 |
| }, |
| { |
| "epoch": 0.9186046511627907, |
| "grad_norm": 0.2933015823364258, |
| "learning_rate": 2.0055925607879888e-07, |
| "loss": 0.4547, |
| "step": 1659 |
| }, |
| { |
| "epoch": 0.9191583610188261, |
| "grad_norm": 0.29843026399612427, |
| "learning_rate": 1.978579198308622e-07, |
| "loss": 0.444, |
| "step": 1660 |
| }, |
| { |
| "epoch": 0.9197120708748616, |
| "grad_norm": 0.30015841126441956, |
| "learning_rate": 1.9517453207157865e-07, |
| "loss": 0.4544, |
| "step": 1661 |
| }, |
| { |
| "epoch": 0.920265780730897, |
| "grad_norm": 0.30778196454048157, |
| "learning_rate": 1.9250910283037826e-07, |
| "loss": 0.4556, |
| "step": 1662 |
| }, |
| { |
| "epoch": 0.9208194905869325, |
| "grad_norm": 0.3055875897407532, |
| "learning_rate": 1.8986164206957037e-07, |
| "loss": 0.452, |
| "step": 1663 |
| }, |
| { |
| "epoch": 0.9213732004429679, |
| "grad_norm": 0.33771249651908875, |
| "learning_rate": 1.8723215968430687e-07, |
| "loss": 0.4612, |
| "step": 1664 |
| }, |
| { |
| "epoch": 0.9219269102990033, |
| "grad_norm": 0.31438618898391724, |
| "learning_rate": 1.8462066550254232e-07, |
| "loss": 0.4467, |
| "step": 1665 |
| }, |
| { |
| "epoch": 0.9224806201550387, |
| "grad_norm": 0.298490971326828, |
| "learning_rate": 1.8202716928499842e-07, |
| "loss": 0.4786, |
| "step": 1666 |
| }, |
| { |
| "epoch": 0.9230343300110742, |
| "grad_norm": 0.293795108795166, |
| "learning_rate": 1.7945168072512732e-07, |
| "loss": 0.4509, |
| "step": 1667 |
| }, |
| { |
| "epoch": 0.9235880398671097, |
| "grad_norm": 0.31182658672332764, |
| "learning_rate": 1.7689420944907666e-07, |
| "loss": 0.4548, |
| "step": 1668 |
| }, |
| { |
| "epoch": 0.9241417497231451, |
| "grad_norm": 0.29164862632751465, |
| "learning_rate": 1.743547650156535e-07, |
| "loss": 0.4418, |
| "step": 1669 |
| }, |
| { |
| "epoch": 0.9246954595791805, |
| "grad_norm": 0.3175080716609955, |
| "learning_rate": 1.7183335691628556e-07, |
| "loss": 0.4611, |
| "step": 1670 |
| }, |
| { |
| "epoch": 0.925249169435216, |
| "grad_norm": 0.2677769958972931, |
| "learning_rate": 1.6932999457498823e-07, |
| "loss": 0.439, |
| "step": 1671 |
| }, |
| { |
| "epoch": 0.9258028792912514, |
| "grad_norm": 0.3190261423587799, |
| "learning_rate": 1.6684468734833149e-07, |
| "loss": 0.4751, |
| "step": 1672 |
| }, |
| { |
| "epoch": 0.9263565891472868, |
| "grad_norm": 0.28873810172080994, |
| "learning_rate": 1.6437744452539983e-07, |
| "loss": 0.4619, |
| "step": 1673 |
| }, |
| { |
| "epoch": 0.9269102990033222, |
| "grad_norm": 0.2862297594547272, |
| "learning_rate": 1.6192827532776235e-07, |
| "loss": 0.4522, |
| "step": 1674 |
| }, |
| { |
| "epoch": 0.9274640088593576, |
| "grad_norm": 0.26955926418304443, |
| "learning_rate": 1.5949718890943377e-07, |
| "loss": 0.4688, |
| "step": 1675 |
| }, |
| { |
| "epoch": 0.9280177187153932, |
| "grad_norm": 0.3056979477405548, |
| "learning_rate": 1.5708419435684463e-07, |
| "loss": 0.4674, |
| "step": 1676 |
| }, |
| { |
| "epoch": 0.9285714285714286, |
| "grad_norm": 0.2731686532497406, |
| "learning_rate": 1.546893006888045e-07, |
| "loss": 0.4383, |
| "step": 1677 |
| }, |
| { |
| "epoch": 0.929125138427464, |
| "grad_norm": 0.31720587611198425, |
| "learning_rate": 1.523125168564693e-07, |
| "loss": 0.4678, |
| "step": 1678 |
| }, |
| { |
| "epoch": 0.9296788482834994, |
| "grad_norm": 0.3040732145309448, |
| "learning_rate": 1.499538517433069e-07, |
| "loss": 0.4598, |
| "step": 1679 |
| }, |
| { |
| "epoch": 0.9302325581395349, |
| "grad_norm": 0.309842973947525, |
| "learning_rate": 1.4761331416506596e-07, |
| "loss": 0.4482, |
| "step": 1680 |
| }, |
| { |
| "epoch": 0.9307862679955703, |
| "grad_norm": 0.2698879837989807, |
| "learning_rate": 1.4529091286973994e-07, |
| "loss": 0.4442, |
| "step": 1681 |
| }, |
| { |
| "epoch": 0.9313399778516057, |
| "grad_norm": 0.31583163142204285, |
| "learning_rate": 1.4298665653753818e-07, |
| "loss": 0.4895, |
| "step": 1682 |
| }, |
| { |
| "epoch": 0.9318936877076412, |
| "grad_norm": 0.3226719796657562, |
| "learning_rate": 1.407005537808498e-07, |
| "loss": 0.4477, |
| "step": 1683 |
| }, |
| { |
| "epoch": 0.9324473975636767, |
| "grad_norm": 0.30792638659477234, |
| "learning_rate": 1.384326131442132e-07, |
| "loss": 0.4736, |
| "step": 1684 |
| }, |
| { |
| "epoch": 0.9330011074197121, |
| "grad_norm": 0.3065618574619293, |
| "learning_rate": 1.3618284310428554e-07, |
| "loss": 0.4637, |
| "step": 1685 |
| }, |
| { |
| "epoch": 0.9335548172757475, |
| "grad_norm": 0.2993546724319458, |
| "learning_rate": 1.3395125206980774e-07, |
| "loss": 0.4272, |
| "step": 1686 |
| }, |
| { |
| "epoch": 0.9341085271317829, |
| "grad_norm": 0.3157908618450165, |
| "learning_rate": 1.317378483815751e-07, |
| "loss": 0.44, |
| "step": 1687 |
| }, |
| { |
| "epoch": 0.9346622369878184, |
| "grad_norm": 0.3331739604473114, |
| "learning_rate": 1.2954264031240727e-07, |
| "loss": 0.4629, |
| "step": 1688 |
| }, |
| { |
| "epoch": 0.9352159468438538, |
| "grad_norm": 0.31486618518829346, |
| "learning_rate": 1.2736563606711384e-07, |
| "loss": 0.4528, |
| "step": 1689 |
| }, |
| { |
| "epoch": 0.9357696566998892, |
| "grad_norm": 0.31547436118125916, |
| "learning_rate": 1.2520684378246884e-07, |
| "loss": 0.4645, |
| "step": 1690 |
| }, |
| { |
| "epoch": 0.9363233665559247, |
| "grad_norm": 0.3062952160835266, |
| "learning_rate": 1.230662715271741e-07, |
| "loss": 0.4522, |
| "step": 1691 |
| }, |
| { |
| "epoch": 0.9368770764119602, |
| "grad_norm": 0.313684344291687, |
| "learning_rate": 1.2094392730183312e-07, |
| "loss": 0.4547, |
| "step": 1692 |
| }, |
| { |
| "epoch": 0.9374307862679956, |
| "grad_norm": 0.2886435389518738, |
| "learning_rate": 1.1883981903892228e-07, |
| "loss": 0.4552, |
| "step": 1693 |
| }, |
| { |
| "epoch": 0.937984496124031, |
| "grad_norm": 0.33046281337738037, |
| "learning_rate": 1.1675395460275629e-07, |
| "loss": 0.496, |
| "step": 1694 |
| }, |
| { |
| "epoch": 0.9385382059800664, |
| "grad_norm": 0.3314034044742584, |
| "learning_rate": 1.1468634178946392e-07, |
| "loss": 0.482, |
| "step": 1695 |
| }, |
| { |
| "epoch": 0.9390919158361019, |
| "grad_norm": 0.30235835909843445, |
| "learning_rate": 1.1263698832695513e-07, |
| "loss": 0.4503, |
| "step": 1696 |
| }, |
| { |
| "epoch": 0.9396456256921373, |
| "grad_norm": 0.3231419324874878, |
| "learning_rate": 1.1060590187489562e-07, |
| "loss": 0.475, |
| "step": 1697 |
| }, |
| { |
| "epoch": 0.9401993355481728, |
| "grad_norm": 0.2842075228691101, |
| "learning_rate": 1.08593090024674e-07, |
| "loss": 0.4566, |
| "step": 1698 |
| }, |
| { |
| "epoch": 0.9407530454042082, |
| "grad_norm": 0.3157922625541687, |
| "learning_rate": 1.0659856029937688e-07, |
| "loss": 0.4773, |
| "step": 1699 |
| }, |
| { |
| "epoch": 0.9413067552602437, |
| "grad_norm": 0.30469900369644165, |
| "learning_rate": 1.046223201537605e-07, |
| "loss": 0.4837, |
| "step": 1700 |
| }, |
| { |
| "epoch": 0.9418604651162791, |
| "grad_norm": 0.3071087598800659, |
| "learning_rate": 1.0266437697422026e-07, |
| "loss": 0.4611, |
| "step": 1701 |
| }, |
| { |
| "epoch": 0.9424141749723145, |
| "grad_norm": 0.27550947666168213, |
| "learning_rate": 1.007247380787657e-07, |
| "loss": 0.4418, |
| "step": 1702 |
| }, |
| { |
| "epoch": 0.9429678848283499, |
| "grad_norm": 0.27691787481307983, |
| "learning_rate": 9.880341071699274e-08, |
| "loss": 0.4683, |
| "step": 1703 |
| }, |
| { |
| "epoch": 0.9435215946843853, |
| "grad_norm": 0.3100975751876831, |
| "learning_rate": 9.690040207005425e-08, |
| "loss": 0.4815, |
| "step": 1704 |
| }, |
| { |
| "epoch": 0.9440753045404208, |
| "grad_norm": 0.26599806547164917, |
| "learning_rate": 9.501571925063735e-08, |
| "loss": 0.4554, |
| "step": 1705 |
| }, |
| { |
| "epoch": 0.9446290143964563, |
| "grad_norm": 0.30481967329978943, |
| "learning_rate": 9.314936930293283e-08, |
| "loss": 0.4633, |
| "step": 1706 |
| }, |
| { |
| "epoch": 0.9451827242524917, |
| "grad_norm": 0.3184366226196289, |
| "learning_rate": 9.130135920261185e-08, |
| "loss": 0.4567, |
| "step": 1707 |
| }, |
| { |
| "epoch": 0.9457364341085271, |
| "grad_norm": 0.2845808267593384, |
| "learning_rate": 8.94716958567976e-08, |
| "loss": 0.4686, |
| "step": 1708 |
| }, |
| { |
| "epoch": 0.9462901439645626, |
| "grad_norm": 0.3194059729576111, |
| "learning_rate": 8.766038610404037e-08, |
| "loss": 0.4747, |
| "step": 1709 |
| }, |
| { |
| "epoch": 0.946843853820598, |
| "grad_norm": 0.2878986597061157, |
| "learning_rate": 8.58674367142931e-08, |
| "loss": 0.4453, |
| "step": 1710 |
| }, |
| { |
| "epoch": 0.9473975636766334, |
| "grad_norm": 0.30820009112358093, |
| "learning_rate": 8.40928543888836e-08, |
| "loss": 0.4616, |
| "step": 1711 |
| }, |
| { |
| "epoch": 0.9479512735326688, |
| "grad_norm": 0.34989163279533386, |
| "learning_rate": 8.233664576049239e-08, |
| "loss": 0.4737, |
| "step": 1712 |
| }, |
| { |
| "epoch": 0.9485049833887044, |
| "grad_norm": 0.2795168161392212, |
| "learning_rate": 8.059881739312492e-08, |
| "loss": 0.4454, |
| "step": 1713 |
| }, |
| { |
| "epoch": 0.9490586932447398, |
| "grad_norm": 0.28132033348083496, |
| "learning_rate": 7.887937578208992e-08, |
| "loss": 0.4487, |
| "step": 1714 |
| }, |
| { |
| "epoch": 0.9496124031007752, |
| "grad_norm": 0.2817052900791168, |
| "learning_rate": 7.717832735397335e-08, |
| "loss": 0.4594, |
| "step": 1715 |
| }, |
| { |
| "epoch": 0.9501661129568106, |
| "grad_norm": 0.3057515621185303, |
| "learning_rate": 7.549567846661388e-08, |
| "loss": 0.4434, |
| "step": 1716 |
| }, |
| { |
| "epoch": 0.9507198228128461, |
| "grad_norm": 0.2940051257610321, |
| "learning_rate": 7.383143540908189e-08, |
| "loss": 0.4566, |
| "step": 1717 |
| }, |
| { |
| "epoch": 0.9512735326688815, |
| "grad_norm": 0.30491095781326294, |
| "learning_rate": 7.218560440165223e-08, |
| "loss": 0.4598, |
| "step": 1718 |
| }, |
| { |
| "epoch": 0.9518272425249169, |
| "grad_norm": 0.2990501821041107, |
| "learning_rate": 7.055819159578425e-08, |
| "loss": 0.4486, |
| "step": 1719 |
| }, |
| { |
| "epoch": 0.9523809523809523, |
| "grad_norm": 0.29552608728408813, |
| "learning_rate": 6.894920307409624e-08, |
| "loss": 0.4769, |
| "step": 1720 |
| }, |
| { |
| "epoch": 0.9529346622369879, |
| "grad_norm": 0.31582629680633545, |
| "learning_rate": 6.735864485034493e-08, |
| "loss": 0.4729, |
| "step": 1721 |
| }, |
| { |
| "epoch": 0.9534883720930233, |
| "grad_norm": 0.28552842140197754, |
| "learning_rate": 6.578652286940213e-08, |
| "loss": 0.486, |
| "step": 1722 |
| }, |
| { |
| "epoch": 0.9540420819490587, |
| "grad_norm": 0.2903539538383484, |
| "learning_rate": 6.423284300723087e-08, |
| "loss": 0.4664, |
| "step": 1723 |
| }, |
| { |
| "epoch": 0.9545957918050941, |
| "grad_norm": 0.2721926271915436, |
| "learning_rate": 6.269761107086548e-08, |
| "loss": 0.4703, |
| "step": 1724 |
| }, |
| { |
| "epoch": 0.9551495016611296, |
| "grad_norm": 0.2885482907295227, |
| "learning_rate": 6.118083279839094e-08, |
| "loss": 0.4686, |
| "step": 1725 |
| }, |
| { |
| "epoch": 0.955703211517165, |
| "grad_norm": 0.28239181637763977, |
| "learning_rate": 5.968251385891744e-08, |
| "loss": 0.4543, |
| "step": 1726 |
| }, |
| { |
| "epoch": 0.9562569213732004, |
| "grad_norm": 0.32002443075180054, |
| "learning_rate": 5.820265985256257e-08, |
| "loss": 0.4776, |
| "step": 1727 |
| }, |
| { |
| "epoch": 0.9568106312292359, |
| "grad_norm": 0.2961246967315674, |
| "learning_rate": 5.674127631043025e-08, |
| "loss": 0.4498, |
| "step": 1728 |
| }, |
| { |
| "epoch": 0.9573643410852714, |
| "grad_norm": 0.3170013129711151, |
| "learning_rate": 5.529836869458738e-08, |
| "loss": 0.4737, |
| "step": 1729 |
| }, |
| { |
| "epoch": 0.9579180509413068, |
| "grad_norm": 0.2816410958766937, |
| "learning_rate": 5.387394239804666e-08, |
| "loss": 0.4628, |
| "step": 1730 |
| }, |
| { |
| "epoch": 0.9584717607973422, |
| "grad_norm": 0.3266596794128418, |
| "learning_rate": 5.246800274474439e-08, |
| "loss": 0.4755, |
| "step": 1731 |
| }, |
| { |
| "epoch": 0.9590254706533776, |
| "grad_norm": 0.30849379301071167, |
| "learning_rate": 5.1080554989522136e-08, |
| "loss": 0.4629, |
| "step": 1732 |
| }, |
| { |
| "epoch": 0.959579180509413, |
| "grad_norm": 0.31269577145576477, |
| "learning_rate": 4.971160431810396e-08, |
| "loss": 0.4689, |
| "step": 1733 |
| }, |
| { |
| "epoch": 0.9601328903654485, |
| "grad_norm": 0.28276434540748596, |
| "learning_rate": 4.8361155847080896e-08, |
| "loss": 0.4354, |
| "step": 1734 |
| }, |
| { |
| "epoch": 0.9606866002214839, |
| "grad_norm": 0.2990057170391083, |
| "learning_rate": 4.7029214623890984e-08, |
| "loss": 0.4491, |
| "step": 1735 |
| }, |
| { |
| "epoch": 0.9612403100775194, |
| "grad_norm": 0.2993354797363281, |
| "learning_rate": 4.571578562679757e-08, |
| "loss": 0.4614, |
| "step": 1736 |
| }, |
| { |
| "epoch": 0.9617940199335548, |
| "grad_norm": 0.2988957464694977, |
| "learning_rate": 4.44208737648727e-08, |
| "loss": 0.4296, |
| "step": 1737 |
| }, |
| { |
| "epoch": 0.9623477297895903, |
| "grad_norm": 0.2898132801055908, |
| "learning_rate": 4.314448387798098e-08, |
| "loss": 0.4654, |
| "step": 1738 |
| }, |
| { |
| "epoch": 0.9629014396456257, |
| "grad_norm": 0.2892858684062958, |
| "learning_rate": 4.1886620736757425e-08, |
| "loss": 0.4774, |
| "step": 1739 |
| }, |
| { |
| "epoch": 0.9634551495016611, |
| "grad_norm": 0.33196502923965454, |
| "learning_rate": 4.064728904259185e-08, |
| "loss": 0.4687, |
| "step": 1740 |
| }, |
| { |
| "epoch": 0.9640088593576965, |
| "grad_norm": 0.30135905742645264, |
| "learning_rate": 3.9426493427611177e-08, |
| "loss": 0.468, |
| "step": 1741 |
| }, |
| { |
| "epoch": 0.964562569213732, |
| "grad_norm": 0.295409619808197, |
| "learning_rate": 3.8224238454661056e-08, |
| "loss": 0.4378, |
| "step": 1742 |
| }, |
| { |
| "epoch": 0.9651162790697675, |
| "grad_norm": 0.3131703734397888, |
| "learning_rate": 3.704052861729146e-08, |
| "loss": 0.4601, |
| "step": 1743 |
| }, |
| { |
| "epoch": 0.9656699889258029, |
| "grad_norm": 0.3141990602016449, |
| "learning_rate": 3.587536833973559e-08, |
| "loss": 0.4844, |
| "step": 1744 |
| }, |
| { |
| "epoch": 0.9662236987818383, |
| "grad_norm": 0.32879433035850525, |
| "learning_rate": 3.472876197689823e-08, |
| "loss": 0.4773, |
| "step": 1745 |
| }, |
| { |
| "epoch": 0.9667774086378738, |
| "grad_norm": 0.29594501852989197, |
| "learning_rate": 3.360071381433516e-08, |
| "loss": 0.4628, |
| "step": 1746 |
| }, |
| { |
| "epoch": 0.9673311184939092, |
| "grad_norm": 0.29919105768203735, |
| "learning_rate": 3.24912280682399e-08, |
| "loss": 0.437, |
| "step": 1747 |
| }, |
| { |
| "epoch": 0.9678848283499446, |
| "grad_norm": 0.3089485764503479, |
| "learning_rate": 3.1400308885427e-08, |
| "loss": 0.4666, |
| "step": 1748 |
| }, |
| { |
| "epoch": 0.96843853820598, |
| "grad_norm": 0.2961753308773041, |
| "learning_rate": 3.0327960343317084e-08, |
| "loss": 0.4576, |
| "step": 1749 |
| }, |
| { |
| "epoch": 0.9689922480620154, |
| "grad_norm": 0.2940447926521301, |
| "learning_rate": 2.927418644991964e-08, |
| "loss": 0.4601, |
| "step": 1750 |
| }, |
| { |
| "epoch": 0.969545957918051, |
| "grad_norm": 0.28923407196998596, |
| "learning_rate": 2.823899114382078e-08, |
| "loss": 0.4709, |
| "step": 1751 |
| }, |
| { |
| "epoch": 0.9700996677740864, |
| "grad_norm": 0.33245790004730225, |
| "learning_rate": 2.722237829416774e-08, |
| "loss": 0.4832, |
| "step": 1752 |
| }, |
| { |
| "epoch": 0.9706533776301218, |
| "grad_norm": 0.3206743001937866, |
| "learning_rate": 2.6224351700652184e-08, |
| "loss": 0.4695, |
| "step": 1753 |
| }, |
| { |
| "epoch": 0.9712070874861573, |
| "grad_norm": 0.30548450350761414, |
| "learning_rate": 2.5244915093499134e-08, |
| "loss": 0.4379, |
| "step": 1754 |
| }, |
| { |
| "epoch": 0.9717607973421927, |
| "grad_norm": 0.30433905124664307, |
| "learning_rate": 2.4284072133451408e-08, |
| "loss": 0.4377, |
| "step": 1755 |
| }, |
| { |
| "epoch": 0.9723145071982281, |
| "grad_norm": 0.29879283905029297, |
| "learning_rate": 2.3341826411756863e-08, |
| "loss": 0.4641, |
| "step": 1756 |
| }, |
| { |
| "epoch": 0.9728682170542635, |
| "grad_norm": 0.3096272647380829, |
| "learning_rate": 2.241818145015284e-08, |
| "loss": 0.4694, |
| "step": 1757 |
| }, |
| { |
| "epoch": 0.973421926910299, |
| "grad_norm": 0.29536715149879456, |
| "learning_rate": 2.1513140700855085e-08, |
| "loss": 0.4439, |
| "step": 1758 |
| }, |
| { |
| "epoch": 0.9739756367663345, |
| "grad_norm": 0.31942957639694214, |
| "learning_rate": 2.0626707546543833e-08, |
| "loss": 0.4351, |
| "step": 1759 |
| }, |
| { |
| "epoch": 0.9745293466223699, |
| "grad_norm": 0.29551762342453003, |
| "learning_rate": 1.9758885300353858e-08, |
| "loss": 0.4833, |
| "step": 1760 |
| }, |
| { |
| "epoch": 0.9750830564784053, |
| "grad_norm": 0.3155532479286194, |
| "learning_rate": 1.8909677205856682e-08, |
| "loss": 0.4722, |
| "step": 1761 |
| }, |
| { |
| "epoch": 0.9756367663344407, |
| "grad_norm": 0.30302658677101135, |
| "learning_rate": 1.8079086437052805e-08, |
| "loss": 0.4622, |
| "step": 1762 |
| }, |
| { |
| "epoch": 0.9761904761904762, |
| "grad_norm": 0.3077971041202545, |
| "learning_rate": 1.7267116098359516e-08, |
| "loss": 0.4336, |
| "step": 1763 |
| }, |
| { |
| "epoch": 0.9767441860465116, |
| "grad_norm": 0.28718769550323486, |
| "learning_rate": 1.647376922459698e-08, |
| "loss": 0.4413, |
| "step": 1764 |
| }, |
| { |
| "epoch": 0.9772978959025471, |
| "grad_norm": 0.3143884837627411, |
| "learning_rate": 1.5699048780979388e-08, |
| "loss": 0.4567, |
| "step": 1765 |
| }, |
| { |
| "epoch": 0.9778516057585825, |
| "grad_norm": 0.30350565910339355, |
| "learning_rate": 1.494295766310161e-08, |
| "loss": 0.4637, |
| "step": 1766 |
| }, |
| { |
| "epoch": 0.978405315614618, |
| "grad_norm": 0.30483105778694153, |
| "learning_rate": 1.4205498696930332e-08, |
| "loss": 0.4617, |
| "step": 1767 |
| }, |
| { |
| "epoch": 0.9789590254706534, |
| "grad_norm": 0.30251219868659973, |
| "learning_rate": 1.348667463879294e-08, |
| "loss": 0.4829, |
| "step": 1768 |
| }, |
| { |
| "epoch": 0.9795127353266888, |
| "grad_norm": 0.3110658824443817, |
| "learning_rate": 1.2786488175366429e-08, |
| "loss": 0.4719, |
| "step": 1769 |
| }, |
| { |
| "epoch": 0.9800664451827242, |
| "grad_norm": 0.2927253544330597, |
| "learning_rate": 1.2104941923667956e-08, |
| "loss": 0.4556, |
| "step": 1770 |
| }, |
| { |
| "epoch": 0.9806201550387597, |
| "grad_norm": 0.2785692512989044, |
| "learning_rate": 1.1442038431044856e-08, |
| "loss": 0.4622, |
| "step": 1771 |
| }, |
| { |
| "epoch": 0.9811738648947951, |
| "grad_norm": 0.30565908551216125, |
| "learning_rate": 1.0797780175166305e-08, |
| "loss": 0.4356, |
| "step": 1772 |
| }, |
| { |
| "epoch": 0.9817275747508306, |
| "grad_norm": 0.28984615206718445, |
| "learning_rate": 1.017216956401168e-08, |
| "loss": 0.4377, |
| "step": 1773 |
| }, |
| { |
| "epoch": 0.982281284606866, |
| "grad_norm": 0.31095460057258606, |
| "learning_rate": 9.565208935863878e-09, |
| "loss": 0.453, |
| "step": 1774 |
| }, |
| { |
| "epoch": 0.9828349944629015, |
| "grad_norm": 0.3264619708061218, |
| "learning_rate": 8.97690055929934e-09, |
| "loss": 0.4563, |
| "step": 1775 |
| }, |
| { |
| "epoch": 0.9833887043189369, |
| "grad_norm": 0.309268057346344, |
| "learning_rate": 8.407246633178601e-09, |
| "loss": 0.4777, |
| "step": 1776 |
| }, |
| { |
| "epoch": 0.9839424141749723, |
| "grad_norm": 0.30327308177948, |
| "learning_rate": 7.856249286642414e-09, |
| "loss": 0.465, |
| "step": 1777 |
| }, |
| { |
| "epoch": 0.9844961240310077, |
| "grad_norm": 0.31233885884284973, |
| "learning_rate": 7.32391057909676e-09, |
| "loss": 0.4541, |
| "step": 1778 |
| }, |
| { |
| "epoch": 0.9850498338870431, |
| "grad_norm": 0.2879256010055542, |
| "learning_rate": 6.810232500212288e-09, |
| "loss": 0.4271, |
| "step": 1779 |
| }, |
| { |
| "epoch": 0.9856035437430787, |
| "grad_norm": 0.3132525086402893, |
| "learning_rate": 6.315216969912663e-09, |
| "loss": 0.4738, |
| "step": 1780 |
| }, |
| { |
| "epoch": 0.9861572535991141, |
| "grad_norm": 0.31130507588386536, |
| "learning_rate": 5.838865838366792e-09, |
| "loss": 0.4787, |
| "step": 1781 |
| }, |
| { |
| "epoch": 0.9867109634551495, |
| "grad_norm": 0.2845606803894043, |
| "learning_rate": 5.3811808859866035e-09, |
| "loss": 0.4547, |
| "step": 1782 |
| }, |
| { |
| "epoch": 0.987264673311185, |
| "grad_norm": 0.3038474917411804, |
| "learning_rate": 4.942163823414281e-09, |
| "loss": 0.4498, |
| "step": 1783 |
| }, |
| { |
| "epoch": 0.9878183831672204, |
| "grad_norm": 0.2789425253868103, |
| "learning_rate": 4.521816291520597e-09, |
| "loss": 0.4579, |
| "step": 1784 |
| }, |
| { |
| "epoch": 0.9883720930232558, |
| "grad_norm": 0.29215115308761597, |
| "learning_rate": 4.120139861397143e-09, |
| "loss": 0.491, |
| "step": 1785 |
| }, |
| { |
| "epoch": 0.9889258028792912, |
| "grad_norm": 0.3025710880756378, |
| "learning_rate": 3.737136034349109e-09, |
| "loss": 0.4745, |
| "step": 1786 |
| }, |
| { |
| "epoch": 0.9894795127353266, |
| "grad_norm": 0.2971054017543793, |
| "learning_rate": 3.372806241892512e-09, |
| "loss": 0.49, |
| "step": 1787 |
| }, |
| { |
| "epoch": 0.9900332225913622, |
| "grad_norm": 0.2840878963470459, |
| "learning_rate": 3.0271518457464235e-09, |
| "loss": 0.4816, |
| "step": 1788 |
| }, |
| { |
| "epoch": 0.9905869324473976, |
| "grad_norm": 0.3151835501194, |
| "learning_rate": 2.7001741378290815e-09, |
| "loss": 0.4736, |
| "step": 1789 |
| }, |
| { |
| "epoch": 0.991140642303433, |
| "grad_norm": 0.3166757822036743, |
| "learning_rate": 2.3918743402517874e-09, |
| "loss": 0.4668, |
| "step": 1790 |
| }, |
| { |
| "epoch": 0.9916943521594684, |
| "grad_norm": 0.295709490776062, |
| "learning_rate": 2.102253605316684e-09, |
| "loss": 0.4622, |
| "step": 1791 |
| }, |
| { |
| "epoch": 0.9922480620155039, |
| "grad_norm": 0.31266582012176514, |
| "learning_rate": 1.8313130155100944e-09, |
| "loss": 0.4752, |
| "step": 1792 |
| }, |
| { |
| "epoch": 0.9928017718715393, |
| "grad_norm": 0.31861746311187744, |
| "learning_rate": 1.5790535835003006e-09, |
| "loss": 0.4417, |
| "step": 1793 |
| }, |
| { |
| "epoch": 0.9933554817275747, |
| "grad_norm": 0.30592185258865356, |
| "learning_rate": 1.3454762521314391e-09, |
| "loss": 0.4751, |
| "step": 1794 |
| }, |
| { |
| "epoch": 0.9939091915836102, |
| "grad_norm": 0.29663553833961487, |
| "learning_rate": 1.130581894422389e-09, |
| "loss": 0.4939, |
| "step": 1795 |
| }, |
| { |
| "epoch": 0.9944629014396457, |
| "grad_norm": 0.30990320444107056, |
| "learning_rate": 9.343713135623323e-10, |
| "loss": 0.4533, |
| "step": 1796 |
| }, |
| { |
| "epoch": 0.9950166112956811, |
| "grad_norm": 0.2900349795818329, |
| "learning_rate": 7.568452429090877e-10, |
| "loss": 0.4802, |
| "step": 1797 |
| }, |
| { |
| "epoch": 0.9955703211517165, |
| "grad_norm": 0.2999788224697113, |
| "learning_rate": 5.980043459830054e-10, |
| "loss": 0.4733, |
| "step": 1798 |
| }, |
| { |
| "epoch": 0.9961240310077519, |
| "grad_norm": 0.3328549265861511, |
| "learning_rate": 4.578492164680759e-10, |
| "loss": 0.4518, |
| "step": 1799 |
| }, |
| { |
| "epoch": 0.9966777408637874, |
| "grad_norm": 0.28633761405944824, |
| "learning_rate": 3.363803782086006e-10, |
| "loss": 0.4466, |
| "step": 1800 |
| }, |
| { |
| "epoch": 0.9972314507198228, |
| "grad_norm": 0.3402009606361389, |
| "learning_rate": 2.335982852064156e-10, |
| "loss": 0.45, |
| "step": 1801 |
| }, |
| { |
| "epoch": 0.9977851605758582, |
| "grad_norm": 0.2974521517753601, |
| "learning_rate": 1.4950332161978164e-10, |
| "loss": 0.4425, |
| "step": 1802 |
| }, |
| { |
| "epoch": 0.9983388704318937, |
| "grad_norm": 0.29301029443740845, |
| "learning_rate": 8.409580176282905e-11, |
| "loss": 0.4514, |
| "step": 1803 |
| }, |
| { |
| "epoch": 0.9988925802879292, |
| "grad_norm": 0.2983539402484894, |
| "learning_rate": 3.737597010111671e-11, |
| "loss": 0.4824, |
| "step": 1804 |
| }, |
| { |
| "epoch": 0.9994462901439646, |
| "grad_norm": 0.2854422628879547, |
| "learning_rate": 9.344001256628154e-12, |
| "loss": 0.4563, |
| "step": 1805 |
| }, |
| { |
| "epoch": 1.0, |
| "grad_norm": 0.2957075536251068, |
| "learning_rate": 0.0, |
| "loss": 0.4749, |
| "step": 1806 |
| }, |
| { |
| "epoch": 1.0, |
| "step": 1806, |
| "total_flos": 2727819468472320.0, |
| "train_loss": 0.4883633825107006, |
| "train_runtime": 63393.9102, |
| "train_samples_per_second": 2.734, |
| "train_steps_per_second": 0.028 |
| } |
| ], |
| "logging_steps": 1.0, |
| "max_steps": 1806, |
| "num_input_tokens_seen": 0, |
| "num_train_epochs": 1, |
| "save_steps": 500, |
| "stateful_callbacks": { |
| "TrainerControl": { |
| "args": { |
| "should_epoch_stop": false, |
| "should_evaluate": false, |
| "should_log": false, |
| "should_save": true, |
| "should_training_stop": true |
| }, |
| "attributes": {} |
| } |
| }, |
| "total_flos": 2727819468472320.0, |
| "train_batch_size": 1, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|