| { |
| "best_global_step": null, |
| "best_metric": null, |
| "best_model_checkpoint": null, |
| "epoch": 0.2, |
| "eval_steps": 500, |
| "global_step": 2000, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 0.0005, |
| "grad_norm": 430.7713928222656, |
| "learning_rate": 9.996e-06, |
| "loss": 9.3758, |
| "step": 5 |
| }, |
| { |
| "epoch": 0.001, |
| "grad_norm": 115.31803894042969, |
| "learning_rate": 9.991000000000001e-06, |
| "loss": 7.1112, |
| "step": 10 |
| }, |
| { |
| "epoch": 0.0015, |
| "grad_norm": 107.39344787597656, |
| "learning_rate": 9.986e-06, |
| "loss": 7.7463, |
| "step": 15 |
| }, |
| { |
| "epoch": 0.002, |
| "grad_norm": 81.79249572753906, |
| "learning_rate": 9.981000000000002e-06, |
| "loss": 6.9878, |
| "step": 20 |
| }, |
| { |
| "epoch": 0.0025, |
| "grad_norm": 81.8421859741211, |
| "learning_rate": 9.976e-06, |
| "loss": 5.8778, |
| "step": 25 |
| }, |
| { |
| "epoch": 0.003, |
| "grad_norm": 97.48958587646484, |
| "learning_rate": 9.971e-06, |
| "loss": 6.7617, |
| "step": 30 |
| }, |
| { |
| "epoch": 0.0035, |
| "grad_norm": 67.10735321044922, |
| "learning_rate": 9.966e-06, |
| "loss": 6.6815, |
| "step": 35 |
| }, |
| { |
| "epoch": 0.004, |
| "grad_norm": 91.79243469238281, |
| "learning_rate": 9.961e-06, |
| "loss": 5.5256, |
| "step": 40 |
| }, |
| { |
| "epoch": 0.0045, |
| "grad_norm": 73.99298858642578, |
| "learning_rate": 9.956000000000001e-06, |
| "loss": 6.301, |
| "step": 45 |
| }, |
| { |
| "epoch": 0.005, |
| "grad_norm": 82.86575317382812, |
| "learning_rate": 9.951e-06, |
| "loss": 6.0791, |
| "step": 50 |
| }, |
| { |
| "epoch": 0.0055, |
| "grad_norm": 79.45413208007812, |
| "learning_rate": 9.946000000000001e-06, |
| "loss": 6.4806, |
| "step": 55 |
| }, |
| { |
| "epoch": 0.006, |
| "grad_norm": 93.1746597290039, |
| "learning_rate": 9.941e-06, |
| "loss": 6.0065, |
| "step": 60 |
| }, |
| { |
| "epoch": 0.0065, |
| "grad_norm": 80.89791107177734, |
| "learning_rate": 9.936000000000001e-06, |
| "loss": 7.1122, |
| "step": 65 |
| }, |
| { |
| "epoch": 0.007, |
| "grad_norm": 71.02462005615234, |
| "learning_rate": 9.931e-06, |
| "loss": 5.8262, |
| "step": 70 |
| }, |
| { |
| "epoch": 0.0075, |
| "grad_norm": 88.24502563476562, |
| "learning_rate": 9.926000000000001e-06, |
| "loss": 6.1168, |
| "step": 75 |
| }, |
| { |
| "epoch": 0.008, |
| "grad_norm": 85.9001693725586, |
| "learning_rate": 9.921e-06, |
| "loss": 6.6863, |
| "step": 80 |
| }, |
| { |
| "epoch": 0.0085, |
| "grad_norm": 86.51998138427734, |
| "learning_rate": 9.916000000000001e-06, |
| "loss": 6.4471, |
| "step": 85 |
| }, |
| { |
| "epoch": 0.009, |
| "grad_norm": 66.9721450805664, |
| "learning_rate": 9.911e-06, |
| "loss": 6.1214, |
| "step": 90 |
| }, |
| { |
| "epoch": 0.0095, |
| "grad_norm": 81.62995147705078, |
| "learning_rate": 9.906000000000001e-06, |
| "loss": 7.0519, |
| "step": 95 |
| }, |
| { |
| "epoch": 0.01, |
| "grad_norm": 76.39604949951172, |
| "learning_rate": 9.901e-06, |
| "loss": 5.5445, |
| "step": 100 |
| }, |
| { |
| "epoch": 0.0105, |
| "grad_norm": 84.44085693359375, |
| "learning_rate": 9.896000000000001e-06, |
| "loss": 6.6272, |
| "step": 105 |
| }, |
| { |
| "epoch": 0.011, |
| "grad_norm": 82.27601623535156, |
| "learning_rate": 9.891e-06, |
| "loss": 6.0362, |
| "step": 110 |
| }, |
| { |
| "epoch": 0.0115, |
| "grad_norm": 83.45719146728516, |
| "learning_rate": 9.886000000000002e-06, |
| "loss": 6.0516, |
| "step": 115 |
| }, |
| { |
| "epoch": 0.012, |
| "grad_norm": 69.73688507080078, |
| "learning_rate": 9.881e-06, |
| "loss": 5.4801, |
| "step": 120 |
| }, |
| { |
| "epoch": 0.0125, |
| "grad_norm": 61.0240364074707, |
| "learning_rate": 9.876000000000002e-06, |
| "loss": 4.7676, |
| "step": 125 |
| }, |
| { |
| "epoch": 0.013, |
| "grad_norm": 110.38282775878906, |
| "learning_rate": 9.871000000000001e-06, |
| "loss": 6.1181, |
| "step": 130 |
| }, |
| { |
| "epoch": 0.0135, |
| "grad_norm": 68.89127349853516, |
| "learning_rate": 9.866000000000002e-06, |
| "loss": 5.1128, |
| "step": 135 |
| }, |
| { |
| "epoch": 0.014, |
| "grad_norm": 120.67707061767578, |
| "learning_rate": 9.861000000000001e-06, |
| "loss": 5.8327, |
| "step": 140 |
| }, |
| { |
| "epoch": 0.0145, |
| "grad_norm": 84.3951644897461, |
| "learning_rate": 9.856000000000002e-06, |
| "loss": 6.9349, |
| "step": 145 |
| }, |
| { |
| "epoch": 0.015, |
| "grad_norm": 73.58941650390625, |
| "learning_rate": 9.851000000000001e-06, |
| "loss": 5.4627, |
| "step": 150 |
| }, |
| { |
| "epoch": 0.0155, |
| "grad_norm": 72.00989532470703, |
| "learning_rate": 9.846000000000002e-06, |
| "loss": 5.6341, |
| "step": 155 |
| }, |
| { |
| "epoch": 0.016, |
| "grad_norm": 78.54756927490234, |
| "learning_rate": 9.841000000000001e-06, |
| "loss": 5.5729, |
| "step": 160 |
| }, |
| { |
| "epoch": 0.0165, |
| "grad_norm": 94.46847534179688, |
| "learning_rate": 9.836e-06, |
| "loss": 6.7456, |
| "step": 165 |
| }, |
| { |
| "epoch": 0.017, |
| "grad_norm": 68.8740463256836, |
| "learning_rate": 9.831000000000001e-06, |
| "loss": 6.0283, |
| "step": 170 |
| }, |
| { |
| "epoch": 0.0175, |
| "grad_norm": 67.59810638427734, |
| "learning_rate": 9.826e-06, |
| "loss": 5.9708, |
| "step": 175 |
| }, |
| { |
| "epoch": 0.018, |
| "grad_norm": 82.12776184082031, |
| "learning_rate": 9.821000000000001e-06, |
| "loss": 6.137, |
| "step": 180 |
| }, |
| { |
| "epoch": 0.0185, |
| "grad_norm": 86.02208709716797, |
| "learning_rate": 9.816e-06, |
| "loss": 5.9061, |
| "step": 185 |
| }, |
| { |
| "epoch": 0.019, |
| "grad_norm": 68.40846252441406, |
| "learning_rate": 9.811e-06, |
| "loss": 5.9535, |
| "step": 190 |
| }, |
| { |
| "epoch": 0.0195, |
| "grad_norm": 71.01241302490234, |
| "learning_rate": 9.806e-06, |
| "loss": 5.3895, |
| "step": 195 |
| }, |
| { |
| "epoch": 0.02, |
| "grad_norm": 69.2813720703125, |
| "learning_rate": 9.801e-06, |
| "loss": 5.8592, |
| "step": 200 |
| }, |
| { |
| "epoch": 0.0205, |
| "grad_norm": 91.32173156738281, |
| "learning_rate": 9.796e-06, |
| "loss": 6.4962, |
| "step": 205 |
| }, |
| { |
| "epoch": 0.021, |
| "grad_norm": 72.96836853027344, |
| "learning_rate": 9.791e-06, |
| "loss": 5.7814, |
| "step": 210 |
| }, |
| { |
| "epoch": 0.0215, |
| "grad_norm": 67.66992950439453, |
| "learning_rate": 9.786e-06, |
| "loss": 5.7865, |
| "step": 215 |
| }, |
| { |
| "epoch": 0.022, |
| "grad_norm": 66.08545684814453, |
| "learning_rate": 9.781e-06, |
| "loss": 5.8699, |
| "step": 220 |
| }, |
| { |
| "epoch": 0.0225, |
| "grad_norm": 88.2244873046875, |
| "learning_rate": 9.776000000000001e-06, |
| "loss": 6.0525, |
| "step": 225 |
| }, |
| { |
| "epoch": 0.023, |
| "grad_norm": 77.0357666015625, |
| "learning_rate": 9.771e-06, |
| "loss": 5.8639, |
| "step": 230 |
| }, |
| { |
| "epoch": 0.0235, |
| "grad_norm": 67.81790161132812, |
| "learning_rate": 9.766000000000001e-06, |
| "loss": 5.2133, |
| "step": 235 |
| }, |
| { |
| "epoch": 0.024, |
| "grad_norm": 86.99850463867188, |
| "learning_rate": 9.761e-06, |
| "loss": 5.5895, |
| "step": 240 |
| }, |
| { |
| "epoch": 0.0245, |
| "grad_norm": 75.6593017578125, |
| "learning_rate": 9.756000000000001e-06, |
| "loss": 5.8113, |
| "step": 245 |
| }, |
| { |
| "epoch": 0.025, |
| "grad_norm": 68.56720733642578, |
| "learning_rate": 9.751e-06, |
| "loss": 5.2984, |
| "step": 250 |
| }, |
| { |
| "epoch": 0.0255, |
| "grad_norm": 66.91255187988281, |
| "learning_rate": 9.746000000000001e-06, |
| "loss": 5.224, |
| "step": 255 |
| }, |
| { |
| "epoch": 0.026, |
| "grad_norm": 98.80131530761719, |
| "learning_rate": 9.741e-06, |
| "loss": 5.8157, |
| "step": 260 |
| }, |
| { |
| "epoch": 0.0265, |
| "grad_norm": 74.00731658935547, |
| "learning_rate": 9.736000000000001e-06, |
| "loss": 5.6423, |
| "step": 265 |
| }, |
| { |
| "epoch": 0.027, |
| "grad_norm": 66.43975830078125, |
| "learning_rate": 9.731e-06, |
| "loss": 4.4168, |
| "step": 270 |
| }, |
| { |
| "epoch": 0.0275, |
| "grad_norm": 78.20140838623047, |
| "learning_rate": 9.726000000000001e-06, |
| "loss": 6.0293, |
| "step": 275 |
| }, |
| { |
| "epoch": 0.028, |
| "grad_norm": 76.42058563232422, |
| "learning_rate": 9.721e-06, |
| "loss": 6.1047, |
| "step": 280 |
| }, |
| { |
| "epoch": 0.0285, |
| "grad_norm": 69.54177856445312, |
| "learning_rate": 9.716000000000002e-06, |
| "loss": 5.6279, |
| "step": 285 |
| }, |
| { |
| "epoch": 0.029, |
| "grad_norm": 71.5013656616211, |
| "learning_rate": 9.711e-06, |
| "loss": 5.3354, |
| "step": 290 |
| }, |
| { |
| "epoch": 0.0295, |
| "grad_norm": 69.59915161132812, |
| "learning_rate": 9.706000000000002e-06, |
| "loss": 4.9435, |
| "step": 295 |
| }, |
| { |
| "epoch": 0.03, |
| "grad_norm": 68.15699005126953, |
| "learning_rate": 9.701e-06, |
| "loss": 6.8088, |
| "step": 300 |
| }, |
| { |
| "epoch": 0.0305, |
| "grad_norm": 83.83426666259766, |
| "learning_rate": 9.696000000000002e-06, |
| "loss": 6.3353, |
| "step": 305 |
| }, |
| { |
| "epoch": 0.031, |
| "grad_norm": 100.34281921386719, |
| "learning_rate": 9.691000000000001e-06, |
| "loss": 6.3657, |
| "step": 310 |
| }, |
| { |
| "epoch": 0.0315, |
| "grad_norm": 60.91353225708008, |
| "learning_rate": 9.686000000000002e-06, |
| "loss": 5.495, |
| "step": 315 |
| }, |
| { |
| "epoch": 0.032, |
| "grad_norm": 68.33106231689453, |
| "learning_rate": 9.681000000000001e-06, |
| "loss": 6.231, |
| "step": 320 |
| }, |
| { |
| "epoch": 0.0325, |
| "grad_norm": 56.71453094482422, |
| "learning_rate": 9.676e-06, |
| "loss": 5.9889, |
| "step": 325 |
| }, |
| { |
| "epoch": 0.033, |
| "grad_norm": 68.593505859375, |
| "learning_rate": 9.671000000000001e-06, |
| "loss": 6.3372, |
| "step": 330 |
| }, |
| { |
| "epoch": 0.0335, |
| "grad_norm": 66.93341064453125, |
| "learning_rate": 9.666e-06, |
| "loss": 5.4936, |
| "step": 335 |
| }, |
| { |
| "epoch": 0.034, |
| "grad_norm": 70.41497802734375, |
| "learning_rate": 9.661000000000001e-06, |
| "loss": 6.5437, |
| "step": 340 |
| }, |
| { |
| "epoch": 0.0345, |
| "grad_norm": 55.9371452331543, |
| "learning_rate": 9.656e-06, |
| "loss": 6.0847, |
| "step": 345 |
| }, |
| { |
| "epoch": 0.035, |
| "grad_norm": 72.59474182128906, |
| "learning_rate": 9.651e-06, |
| "loss": 5.2704, |
| "step": 350 |
| }, |
| { |
| "epoch": 0.0355, |
| "grad_norm": 65.90139770507812, |
| "learning_rate": 9.646e-06, |
| "loss": 5.6678, |
| "step": 355 |
| }, |
| { |
| "epoch": 0.036, |
| "grad_norm": 55.99635696411133, |
| "learning_rate": 9.641e-06, |
| "loss": 5.2667, |
| "step": 360 |
| }, |
| { |
| "epoch": 0.0365, |
| "grad_norm": 61.77568817138672, |
| "learning_rate": 9.636e-06, |
| "loss": 4.7788, |
| "step": 365 |
| }, |
| { |
| "epoch": 0.037, |
| "grad_norm": 62.45391845703125, |
| "learning_rate": 9.631e-06, |
| "loss": 5.3709, |
| "step": 370 |
| }, |
| { |
| "epoch": 0.0375, |
| "grad_norm": 77.93061828613281, |
| "learning_rate": 9.626e-06, |
| "loss": 6.911, |
| "step": 375 |
| }, |
| { |
| "epoch": 0.038, |
| "grad_norm": 77.62232208251953, |
| "learning_rate": 9.621e-06, |
| "loss": 6.6592, |
| "step": 380 |
| }, |
| { |
| "epoch": 0.0385, |
| "grad_norm": 67.21658325195312, |
| "learning_rate": 9.616e-06, |
| "loss": 5.5068, |
| "step": 385 |
| }, |
| { |
| "epoch": 0.039, |
| "grad_norm": 61.39186477661133, |
| "learning_rate": 9.611e-06, |
| "loss": 6.015, |
| "step": 390 |
| }, |
| { |
| "epoch": 0.0395, |
| "grad_norm": 68.21427154541016, |
| "learning_rate": 9.606000000000001e-06, |
| "loss": 5.2284, |
| "step": 395 |
| }, |
| { |
| "epoch": 0.04, |
| "grad_norm": 70.14752197265625, |
| "learning_rate": 9.601e-06, |
| "loss": 6.014, |
| "step": 400 |
| }, |
| { |
| "epoch": 0.0405, |
| "grad_norm": 70.03300476074219, |
| "learning_rate": 9.596000000000001e-06, |
| "loss": 5.2755, |
| "step": 405 |
| }, |
| { |
| "epoch": 0.041, |
| "grad_norm": 71.4956283569336, |
| "learning_rate": 9.591e-06, |
| "loss": 4.7642, |
| "step": 410 |
| }, |
| { |
| "epoch": 0.0415, |
| "grad_norm": 65.81194305419922, |
| "learning_rate": 9.586000000000001e-06, |
| "loss": 5.8357, |
| "step": 415 |
| }, |
| { |
| "epoch": 0.042, |
| "grad_norm": 80.47106170654297, |
| "learning_rate": 9.581e-06, |
| "loss": 5.1515, |
| "step": 420 |
| }, |
| { |
| "epoch": 0.0425, |
| "grad_norm": 78.7089614868164, |
| "learning_rate": 9.576000000000001e-06, |
| "loss": 5.3348, |
| "step": 425 |
| }, |
| { |
| "epoch": 0.043, |
| "grad_norm": 75.13068389892578, |
| "learning_rate": 9.571e-06, |
| "loss": 5.3623, |
| "step": 430 |
| }, |
| { |
| "epoch": 0.0435, |
| "grad_norm": 69.73881530761719, |
| "learning_rate": 9.566000000000001e-06, |
| "loss": 5.8583, |
| "step": 435 |
| }, |
| { |
| "epoch": 0.044, |
| "grad_norm": 60.87718963623047, |
| "learning_rate": 9.561e-06, |
| "loss": 5.3451, |
| "step": 440 |
| }, |
| { |
| "epoch": 0.0445, |
| "grad_norm": 68.39297485351562, |
| "learning_rate": 9.556000000000001e-06, |
| "loss": 5.2354, |
| "step": 445 |
| }, |
| { |
| "epoch": 0.045, |
| "grad_norm": 80.71501159667969, |
| "learning_rate": 9.551e-06, |
| "loss": 5.6821, |
| "step": 450 |
| }, |
| { |
| "epoch": 0.0455, |
| "grad_norm": 75.60702514648438, |
| "learning_rate": 9.546000000000001e-06, |
| "loss": 6.4379, |
| "step": 455 |
| }, |
| { |
| "epoch": 0.046, |
| "grad_norm": 65.37830352783203, |
| "learning_rate": 9.541e-06, |
| "loss": 5.7551, |
| "step": 460 |
| }, |
| { |
| "epoch": 0.0465, |
| "grad_norm": 63.455440521240234, |
| "learning_rate": 9.536000000000002e-06, |
| "loss": 5.6863, |
| "step": 465 |
| }, |
| { |
| "epoch": 0.047, |
| "grad_norm": 66.560546875, |
| "learning_rate": 9.531e-06, |
| "loss": 5.3967, |
| "step": 470 |
| }, |
| { |
| "epoch": 0.0475, |
| "grad_norm": 66.29322052001953, |
| "learning_rate": 9.526000000000002e-06, |
| "loss": 6.5743, |
| "step": 475 |
| }, |
| { |
| "epoch": 0.048, |
| "grad_norm": 73.60453033447266, |
| "learning_rate": 9.521e-06, |
| "loss": 5.4218, |
| "step": 480 |
| }, |
| { |
| "epoch": 0.0485, |
| "grad_norm": 63.51287841796875, |
| "learning_rate": 9.516e-06, |
| "loss": 5.9678, |
| "step": 485 |
| }, |
| { |
| "epoch": 0.049, |
| "grad_norm": 55.5192985534668, |
| "learning_rate": 9.511000000000001e-06, |
| "loss": 5.7875, |
| "step": 490 |
| }, |
| { |
| "epoch": 0.0495, |
| "grad_norm": 62.022682189941406, |
| "learning_rate": 9.506e-06, |
| "loss": 7.1336, |
| "step": 495 |
| }, |
| { |
| "epoch": 0.05, |
| "grad_norm": 57.97554397583008, |
| "learning_rate": 9.501000000000001e-06, |
| "loss": 5.1286, |
| "step": 500 |
| }, |
| { |
| "epoch": 0.0505, |
| "grad_norm": 58.80134582519531, |
| "learning_rate": 9.496e-06, |
| "loss": 5.0009, |
| "step": 505 |
| }, |
| { |
| "epoch": 0.051, |
| "grad_norm": 57.95747756958008, |
| "learning_rate": 9.491000000000001e-06, |
| "loss": 4.9228, |
| "step": 510 |
| }, |
| { |
| "epoch": 0.0515, |
| "grad_norm": 60.494632720947266, |
| "learning_rate": 9.486e-06, |
| "loss": 5.7521, |
| "step": 515 |
| }, |
| { |
| "epoch": 0.052, |
| "grad_norm": 53.30976104736328, |
| "learning_rate": 9.481000000000001e-06, |
| "loss": 5.8064, |
| "step": 520 |
| }, |
| { |
| "epoch": 0.0525, |
| "grad_norm": 71.98876190185547, |
| "learning_rate": 9.476e-06, |
| "loss": 4.4395, |
| "step": 525 |
| }, |
| { |
| "epoch": 0.053, |
| "grad_norm": 59.24486541748047, |
| "learning_rate": 9.471000000000001e-06, |
| "loss": 5.1754, |
| "step": 530 |
| }, |
| { |
| "epoch": 0.0535, |
| "grad_norm": 83.90294647216797, |
| "learning_rate": 9.466e-06, |
| "loss": 5.4962, |
| "step": 535 |
| }, |
| { |
| "epoch": 0.054, |
| "grad_norm": 60.665740966796875, |
| "learning_rate": 9.461000000000001e-06, |
| "loss": 5.3369, |
| "step": 540 |
| }, |
| { |
| "epoch": 0.0545, |
| "grad_norm": 74.39241790771484, |
| "learning_rate": 9.456e-06, |
| "loss": 5.2247, |
| "step": 545 |
| }, |
| { |
| "epoch": 0.055, |
| "grad_norm": 66.4027328491211, |
| "learning_rate": 9.451000000000002e-06, |
| "loss": 5.2229, |
| "step": 550 |
| }, |
| { |
| "epoch": 0.0555, |
| "grad_norm": 61.06802749633789, |
| "learning_rate": 9.446e-06, |
| "loss": 5.8417, |
| "step": 555 |
| }, |
| { |
| "epoch": 0.056, |
| "grad_norm": 57.60587692260742, |
| "learning_rate": 9.441000000000002e-06, |
| "loss": 4.9819, |
| "step": 560 |
| }, |
| { |
| "epoch": 0.0565, |
| "grad_norm": 73.18106079101562, |
| "learning_rate": 9.436e-06, |
| "loss": 6.4829, |
| "step": 565 |
| }, |
| { |
| "epoch": 0.057, |
| "grad_norm": 70.94274139404297, |
| "learning_rate": 9.431000000000002e-06, |
| "loss": 5.8026, |
| "step": 570 |
| }, |
| { |
| "epoch": 0.0575, |
| "grad_norm": 72.48558044433594, |
| "learning_rate": 9.426000000000001e-06, |
| "loss": 5.5023, |
| "step": 575 |
| }, |
| { |
| "epoch": 0.058, |
| "grad_norm": 46.47978591918945, |
| "learning_rate": 9.421000000000002e-06, |
| "loss": 4.4358, |
| "step": 580 |
| }, |
| { |
| "epoch": 0.0585, |
| "grad_norm": 59.145790100097656, |
| "learning_rate": 9.416000000000001e-06, |
| "loss": 5.5065, |
| "step": 585 |
| }, |
| { |
| "epoch": 0.059, |
| "grad_norm": 59.508522033691406, |
| "learning_rate": 9.411000000000002e-06, |
| "loss": 4.9478, |
| "step": 590 |
| }, |
| { |
| "epoch": 0.0595, |
| "grad_norm": 83.52436065673828, |
| "learning_rate": 9.406000000000001e-06, |
| "loss": 5.4239, |
| "step": 595 |
| }, |
| { |
| "epoch": 0.06, |
| "grad_norm": 63.06760025024414, |
| "learning_rate": 9.401000000000002e-06, |
| "loss": 5.2395, |
| "step": 600 |
| }, |
| { |
| "epoch": 0.0605, |
| "grad_norm": 77.39131927490234, |
| "learning_rate": 9.396000000000001e-06, |
| "loss": 4.9941, |
| "step": 605 |
| }, |
| { |
| "epoch": 0.061, |
| "grad_norm": 72.02144622802734, |
| "learning_rate": 9.391e-06, |
| "loss": 6.3654, |
| "step": 610 |
| }, |
| { |
| "epoch": 0.0615, |
| "grad_norm": 78.32617950439453, |
| "learning_rate": 9.386000000000001e-06, |
| "loss": 5.6073, |
| "step": 615 |
| }, |
| { |
| "epoch": 0.062, |
| "grad_norm": 64.85309600830078, |
| "learning_rate": 9.381e-06, |
| "loss": 5.1284, |
| "step": 620 |
| }, |
| { |
| "epoch": 0.0625, |
| "grad_norm": 65.42955017089844, |
| "learning_rate": 9.376000000000001e-06, |
| "loss": 5.4364, |
| "step": 625 |
| }, |
| { |
| "epoch": 0.063, |
| "grad_norm": 72.40003204345703, |
| "learning_rate": 9.371e-06, |
| "loss": 4.9955, |
| "step": 630 |
| }, |
| { |
| "epoch": 0.0635, |
| "grad_norm": 85.38143920898438, |
| "learning_rate": 9.366000000000001e-06, |
| "loss": 6.3228, |
| "step": 635 |
| }, |
| { |
| "epoch": 0.064, |
| "grad_norm": 63.02680587768555, |
| "learning_rate": 9.361e-06, |
| "loss": 5.5719, |
| "step": 640 |
| }, |
| { |
| "epoch": 0.0645, |
| "grad_norm": 71.41946411132812, |
| "learning_rate": 9.356e-06, |
| "loss": 6.0309, |
| "step": 645 |
| }, |
| { |
| "epoch": 0.065, |
| "grad_norm": 61.37795639038086, |
| "learning_rate": 9.351e-06, |
| "loss": 5.2103, |
| "step": 650 |
| }, |
| { |
| "epoch": 0.0655, |
| "grad_norm": 49.167747497558594, |
| "learning_rate": 9.346e-06, |
| "loss": 4.906, |
| "step": 655 |
| }, |
| { |
| "epoch": 0.066, |
| "grad_norm": 61.07039260864258, |
| "learning_rate": 9.341000000000001e-06, |
| "loss": 4.6488, |
| "step": 660 |
| }, |
| { |
| "epoch": 0.0665, |
| "grad_norm": 57.769874572753906, |
| "learning_rate": 9.336e-06, |
| "loss": 6.048, |
| "step": 665 |
| }, |
| { |
| "epoch": 0.067, |
| "grad_norm": 60.57929229736328, |
| "learning_rate": 9.331000000000001e-06, |
| "loss": 5.1988, |
| "step": 670 |
| }, |
| { |
| "epoch": 0.0675, |
| "grad_norm": 51.742740631103516, |
| "learning_rate": 9.326e-06, |
| "loss": 4.782, |
| "step": 675 |
| }, |
| { |
| "epoch": 0.068, |
| "grad_norm": 66.58326721191406, |
| "learning_rate": 9.321000000000001e-06, |
| "loss": 5.2994, |
| "step": 680 |
| }, |
| { |
| "epoch": 0.0685, |
| "grad_norm": 51.75037384033203, |
| "learning_rate": 9.316e-06, |
| "loss": 6.0222, |
| "step": 685 |
| }, |
| { |
| "epoch": 0.069, |
| "grad_norm": 74.25454711914062, |
| "learning_rate": 9.311000000000001e-06, |
| "loss": 5.7173, |
| "step": 690 |
| }, |
| { |
| "epoch": 0.0695, |
| "grad_norm": 77.25601959228516, |
| "learning_rate": 9.306e-06, |
| "loss": 5.3204, |
| "step": 695 |
| }, |
| { |
| "epoch": 0.07, |
| "grad_norm": 83.57421875, |
| "learning_rate": 9.301000000000001e-06, |
| "loss": 4.3073, |
| "step": 700 |
| }, |
| { |
| "epoch": 0.0705, |
| "grad_norm": 72.01386260986328, |
| "learning_rate": 9.296e-06, |
| "loss": 6.278, |
| "step": 705 |
| }, |
| { |
| "epoch": 0.071, |
| "grad_norm": 58.889522552490234, |
| "learning_rate": 9.291000000000001e-06, |
| "loss": 5.3862, |
| "step": 710 |
| }, |
| { |
| "epoch": 0.0715, |
| "grad_norm": 55.614715576171875, |
| "learning_rate": 9.286e-06, |
| "loss": 5.1135, |
| "step": 715 |
| }, |
| { |
| "epoch": 0.072, |
| "grad_norm": 66.86405181884766, |
| "learning_rate": 9.281000000000001e-06, |
| "loss": 6.0137, |
| "step": 720 |
| }, |
| { |
| "epoch": 0.0725, |
| "grad_norm": 73.3634033203125, |
| "learning_rate": 9.276e-06, |
| "loss": 5.3904, |
| "step": 725 |
| }, |
| { |
| "epoch": 0.073, |
| "grad_norm": 65.3800277709961, |
| "learning_rate": 9.271000000000002e-06, |
| "loss": 5.104, |
| "step": 730 |
| }, |
| { |
| "epoch": 0.0735, |
| "grad_norm": 57.99812316894531, |
| "learning_rate": 9.266e-06, |
| "loss": 6.0557, |
| "step": 735 |
| }, |
| { |
| "epoch": 0.074, |
| "grad_norm": 72.21461486816406, |
| "learning_rate": 9.261000000000002e-06, |
| "loss": 5.2916, |
| "step": 740 |
| }, |
| { |
| "epoch": 0.0745, |
| "grad_norm": 46.72555160522461, |
| "learning_rate": 9.256e-06, |
| "loss": 4.765, |
| "step": 745 |
| }, |
| { |
| "epoch": 0.075, |
| "grad_norm": 56.59814453125, |
| "learning_rate": 9.251000000000002e-06, |
| "loss": 5.1524, |
| "step": 750 |
| }, |
| { |
| "epoch": 0.0755, |
| "grad_norm": 51.47397994995117, |
| "learning_rate": 9.246000000000001e-06, |
| "loss": 4.679, |
| "step": 755 |
| }, |
| { |
| "epoch": 0.076, |
| "grad_norm": 66.05585479736328, |
| "learning_rate": 9.241000000000002e-06, |
| "loss": 5.2957, |
| "step": 760 |
| }, |
| { |
| "epoch": 0.0765, |
| "grad_norm": 95.89804077148438, |
| "learning_rate": 9.236000000000001e-06, |
| "loss": 6.7028, |
| "step": 765 |
| }, |
| { |
| "epoch": 0.077, |
| "grad_norm": 55.590572357177734, |
| "learning_rate": 9.231000000000002e-06, |
| "loss": 5.4986, |
| "step": 770 |
| }, |
| { |
| "epoch": 0.0775, |
| "grad_norm": 64.92922973632812, |
| "learning_rate": 9.226000000000001e-06, |
| "loss": 4.4751, |
| "step": 775 |
| }, |
| { |
| "epoch": 0.078, |
| "grad_norm": 77.49915313720703, |
| "learning_rate": 9.221e-06, |
| "loss": 5.5307, |
| "step": 780 |
| }, |
| { |
| "epoch": 0.0785, |
| "grad_norm": 71.59280395507812, |
| "learning_rate": 9.216000000000001e-06, |
| "loss": 5.7804, |
| "step": 785 |
| }, |
| { |
| "epoch": 0.079, |
| "grad_norm": 47.804176330566406, |
| "learning_rate": 9.211e-06, |
| "loss": 4.3967, |
| "step": 790 |
| }, |
| { |
| "epoch": 0.0795, |
| "grad_norm": 70.14527130126953, |
| "learning_rate": 9.206000000000001e-06, |
| "loss": 5.6562, |
| "step": 795 |
| }, |
| { |
| "epoch": 0.08, |
| "grad_norm": 65.86378479003906, |
| "learning_rate": 9.201e-06, |
| "loss": 4.1849, |
| "step": 800 |
| }, |
| { |
| "epoch": 0.0805, |
| "grad_norm": 75.2146987915039, |
| "learning_rate": 9.196e-06, |
| "loss": 4.4274, |
| "step": 805 |
| }, |
| { |
| "epoch": 0.081, |
| "grad_norm": 69.72351837158203, |
| "learning_rate": 9.191e-06, |
| "loss": 5.9283, |
| "step": 810 |
| }, |
| { |
| "epoch": 0.0815, |
| "grad_norm": 70.36526489257812, |
| "learning_rate": 9.186e-06, |
| "loss": 5.6442, |
| "step": 815 |
| }, |
| { |
| "epoch": 0.082, |
| "grad_norm": 61.410221099853516, |
| "learning_rate": 9.181e-06, |
| "loss": 4.9186, |
| "step": 820 |
| }, |
| { |
| "epoch": 0.0825, |
| "grad_norm": 62.3479118347168, |
| "learning_rate": 9.176e-06, |
| "loss": 4.9193, |
| "step": 825 |
| }, |
| { |
| "epoch": 0.083, |
| "grad_norm": 77.28399658203125, |
| "learning_rate": 9.171e-06, |
| "loss": 4.8504, |
| "step": 830 |
| }, |
| { |
| "epoch": 0.0835, |
| "grad_norm": 57.84767532348633, |
| "learning_rate": 9.166e-06, |
| "loss": 6.5448, |
| "step": 835 |
| }, |
| { |
| "epoch": 0.084, |
| "grad_norm": 70.60618591308594, |
| "learning_rate": 9.161000000000001e-06, |
| "loss": 4.4438, |
| "step": 840 |
| }, |
| { |
| "epoch": 0.0845, |
| "grad_norm": 60.7105827331543, |
| "learning_rate": 9.156e-06, |
| "loss": 5.6495, |
| "step": 845 |
| }, |
| { |
| "epoch": 0.085, |
| "grad_norm": 129.35887145996094, |
| "learning_rate": 9.151000000000001e-06, |
| "loss": 5.5979, |
| "step": 850 |
| }, |
| { |
| "epoch": 0.0855, |
| "grad_norm": 91.48816680908203, |
| "learning_rate": 9.146e-06, |
| "loss": 5.0048, |
| "step": 855 |
| }, |
| { |
| "epoch": 0.086, |
| "grad_norm": 52.0786018371582, |
| "learning_rate": 9.141000000000001e-06, |
| "loss": 5.2717, |
| "step": 860 |
| }, |
| { |
| "epoch": 0.0865, |
| "grad_norm": 65.6240005493164, |
| "learning_rate": 9.136e-06, |
| "loss": 5.4725, |
| "step": 865 |
| }, |
| { |
| "epoch": 0.087, |
| "grad_norm": 72.78899383544922, |
| "learning_rate": 9.131000000000001e-06, |
| "loss": 5.6896, |
| "step": 870 |
| }, |
| { |
| "epoch": 0.0875, |
| "grad_norm": 59.107357025146484, |
| "learning_rate": 9.126e-06, |
| "loss": 5.9547, |
| "step": 875 |
| }, |
| { |
| "epoch": 0.088, |
| "grad_norm": 74.16963958740234, |
| "learning_rate": 9.121000000000001e-06, |
| "loss": 5.3027, |
| "step": 880 |
| }, |
| { |
| "epoch": 0.0885, |
| "grad_norm": 70.86022186279297, |
| "learning_rate": 9.116e-06, |
| "loss": 5.8291, |
| "step": 885 |
| }, |
| { |
| "epoch": 0.089, |
| "grad_norm": 52.165042877197266, |
| "learning_rate": 9.111000000000001e-06, |
| "loss": 5.4198, |
| "step": 890 |
| }, |
| { |
| "epoch": 0.0895, |
| "grad_norm": 63.6991081237793, |
| "learning_rate": 9.106e-06, |
| "loss": 5.4296, |
| "step": 895 |
| }, |
| { |
| "epoch": 0.09, |
| "grad_norm": 50.96390914916992, |
| "learning_rate": 9.101000000000001e-06, |
| "loss": 4.4431, |
| "step": 900 |
| }, |
| { |
| "epoch": 0.0905, |
| "grad_norm": 82.38723754882812, |
| "learning_rate": 9.096e-06, |
| "loss": 5.2341, |
| "step": 905 |
| }, |
| { |
| "epoch": 0.091, |
| "grad_norm": 53.057498931884766, |
| "learning_rate": 9.091000000000002e-06, |
| "loss": 5.6911, |
| "step": 910 |
| }, |
| { |
| "epoch": 0.0915, |
| "grad_norm": 61.33327102661133, |
| "learning_rate": 9.086e-06, |
| "loss": 5.1844, |
| "step": 915 |
| }, |
| { |
| "epoch": 0.092, |
| "grad_norm": 82.66905975341797, |
| "learning_rate": 9.081000000000002e-06, |
| "loss": 5.5432, |
| "step": 920 |
| }, |
| { |
| "epoch": 0.0925, |
| "grad_norm": 58.46932601928711, |
| "learning_rate": 9.076000000000001e-06, |
| "loss": 4.5893, |
| "step": 925 |
| }, |
| { |
| "epoch": 0.093, |
| "grad_norm": 49.836265563964844, |
| "learning_rate": 9.071000000000002e-06, |
| "loss": 5.752, |
| "step": 930 |
| }, |
| { |
| "epoch": 0.0935, |
| "grad_norm": 58.416526794433594, |
| "learning_rate": 9.066000000000001e-06, |
| "loss": 4.6827, |
| "step": 935 |
| }, |
| { |
| "epoch": 0.094, |
| "grad_norm": 65.9136962890625, |
| "learning_rate": 9.061e-06, |
| "loss": 5.8126, |
| "step": 940 |
| }, |
| { |
| "epoch": 0.0945, |
| "grad_norm": 52.34477233886719, |
| "learning_rate": 9.056000000000001e-06, |
| "loss": 5.0553, |
| "step": 945 |
| }, |
| { |
| "epoch": 0.095, |
| "grad_norm": 54.70780563354492, |
| "learning_rate": 9.051e-06, |
| "loss": 4.8854, |
| "step": 950 |
| }, |
| { |
| "epoch": 0.0955, |
| "grad_norm": 51.67803192138672, |
| "learning_rate": 9.046000000000001e-06, |
| "loss": 4.9881, |
| "step": 955 |
| }, |
| { |
| "epoch": 0.096, |
| "grad_norm": 52.68552017211914, |
| "learning_rate": 9.041e-06, |
| "loss": 5.656, |
| "step": 960 |
| }, |
| { |
| "epoch": 0.0965, |
| "grad_norm": 80.78492736816406, |
| "learning_rate": 9.036e-06, |
| "loss": 6.34, |
| "step": 965 |
| }, |
| { |
| "epoch": 0.097, |
| "grad_norm": 59.34943389892578, |
| "learning_rate": 9.031e-06, |
| "loss": 5.2822, |
| "step": 970 |
| }, |
| { |
| "epoch": 0.0975, |
| "grad_norm": 62.7556037902832, |
| "learning_rate": 9.026e-06, |
| "loss": 4.8106, |
| "step": 975 |
| }, |
| { |
| "epoch": 0.098, |
| "grad_norm": 50.799312591552734, |
| "learning_rate": 9.021e-06, |
| "loss": 5.4295, |
| "step": 980 |
| }, |
| { |
| "epoch": 0.0985, |
| "grad_norm": 57.404300689697266, |
| "learning_rate": 9.016e-06, |
| "loss": 5.6952, |
| "step": 985 |
| }, |
| { |
| "epoch": 0.099, |
| "grad_norm": 74.7780532836914, |
| "learning_rate": 9.011e-06, |
| "loss": 3.9573, |
| "step": 990 |
| }, |
| { |
| "epoch": 0.0995, |
| "grad_norm": 59.960636138916016, |
| "learning_rate": 9.006e-06, |
| "loss": 5.8612, |
| "step": 995 |
| }, |
| { |
| "epoch": 0.1, |
| "grad_norm": 79.70378112792969, |
| "learning_rate": 9.001e-06, |
| "loss": 5.2286, |
| "step": 1000 |
| }, |
| { |
| "epoch": 0.1005, |
| "grad_norm": 69.96094512939453, |
| "learning_rate": 8.996e-06, |
| "loss": 5.5803, |
| "step": 1005 |
| }, |
| { |
| "epoch": 0.101, |
| "grad_norm": 69.7931900024414, |
| "learning_rate": 8.991e-06, |
| "loss": 5.2732, |
| "step": 1010 |
| }, |
| { |
| "epoch": 0.1015, |
| "grad_norm": 96.09075164794922, |
| "learning_rate": 8.986e-06, |
| "loss": 5.2639, |
| "step": 1015 |
| }, |
| { |
| "epoch": 0.102, |
| "grad_norm": 51.90883255004883, |
| "learning_rate": 8.981000000000001e-06, |
| "loss": 4.0375, |
| "step": 1020 |
| }, |
| { |
| "epoch": 0.1025, |
| "grad_norm": 62.40837478637695, |
| "learning_rate": 8.976e-06, |
| "loss": 5.5941, |
| "step": 1025 |
| }, |
| { |
| "epoch": 0.103, |
| "grad_norm": 57.33722686767578, |
| "learning_rate": 8.971000000000001e-06, |
| "loss": 4.6877, |
| "step": 1030 |
| }, |
| { |
| "epoch": 0.1035, |
| "grad_norm": 69.11624145507812, |
| "learning_rate": 8.966e-06, |
| "loss": 4.684, |
| "step": 1035 |
| }, |
| { |
| "epoch": 0.104, |
| "grad_norm": 61.289737701416016, |
| "learning_rate": 8.961000000000001e-06, |
| "loss": 5.9492, |
| "step": 1040 |
| }, |
| { |
| "epoch": 0.1045, |
| "grad_norm": 61.19724655151367, |
| "learning_rate": 8.956e-06, |
| "loss": 5.7873, |
| "step": 1045 |
| }, |
| { |
| "epoch": 0.105, |
| "grad_norm": 70.41716766357422, |
| "learning_rate": 8.951000000000001e-06, |
| "loss": 4.8212, |
| "step": 1050 |
| }, |
| { |
| "epoch": 0.1055, |
| "grad_norm": 63.66280746459961, |
| "learning_rate": 8.946e-06, |
| "loss": 4.8835, |
| "step": 1055 |
| }, |
| { |
| "epoch": 0.106, |
| "grad_norm": 88.53239440917969, |
| "learning_rate": 8.941000000000001e-06, |
| "loss": 6.0866, |
| "step": 1060 |
| }, |
| { |
| "epoch": 0.1065, |
| "grad_norm": 59.35116958618164, |
| "learning_rate": 8.936e-06, |
| "loss": 5.0788, |
| "step": 1065 |
| }, |
| { |
| "epoch": 0.107, |
| "grad_norm": 56.121864318847656, |
| "learning_rate": 8.931000000000001e-06, |
| "loss": 5.2856, |
| "step": 1070 |
| }, |
| { |
| "epoch": 0.1075, |
| "grad_norm": 55.64980697631836, |
| "learning_rate": 8.926e-06, |
| "loss": 5.9382, |
| "step": 1075 |
| }, |
| { |
| "epoch": 0.108, |
| "grad_norm": 65.16487121582031, |
| "learning_rate": 8.921000000000001e-06, |
| "loss": 4.5096, |
| "step": 1080 |
| }, |
| { |
| "epoch": 0.1085, |
| "grad_norm": 55.02686309814453, |
| "learning_rate": 8.916e-06, |
| "loss": 5.0421, |
| "step": 1085 |
| }, |
| { |
| "epoch": 0.109, |
| "grad_norm": 65.21896362304688, |
| "learning_rate": 8.911000000000002e-06, |
| "loss": 4.7471, |
| "step": 1090 |
| }, |
| { |
| "epoch": 0.1095, |
| "grad_norm": 84.38319396972656, |
| "learning_rate": 8.906e-06, |
| "loss": 5.9924, |
| "step": 1095 |
| }, |
| { |
| "epoch": 0.11, |
| "grad_norm": 84.8278579711914, |
| "learning_rate": 8.901e-06, |
| "loss": 4.4728, |
| "step": 1100 |
| }, |
| { |
| "epoch": 0.1105, |
| "grad_norm": 55.53620910644531, |
| "learning_rate": 8.896000000000001e-06, |
| "loss": 4.709, |
| "step": 1105 |
| }, |
| { |
| "epoch": 0.111, |
| "grad_norm": 65.74577331542969, |
| "learning_rate": 8.891e-06, |
| "loss": 5.1826, |
| "step": 1110 |
| }, |
| { |
| "epoch": 0.1115, |
| "grad_norm": 68.25616455078125, |
| "learning_rate": 8.886000000000001e-06, |
| "loss": 7.1784, |
| "step": 1115 |
| }, |
| { |
| "epoch": 0.112, |
| "grad_norm": 51.782901763916016, |
| "learning_rate": 8.881e-06, |
| "loss": 5.2915, |
| "step": 1120 |
| }, |
| { |
| "epoch": 0.1125, |
| "grad_norm": 61.892391204833984, |
| "learning_rate": 8.876e-06, |
| "loss": 6.0908, |
| "step": 1125 |
| }, |
| { |
| "epoch": 0.113, |
| "grad_norm": 54.9797248840332, |
| "learning_rate": 8.871e-06, |
| "loss": 4.4188, |
| "step": 1130 |
| }, |
| { |
| "epoch": 0.1135, |
| "grad_norm": 78.96302795410156, |
| "learning_rate": 8.866000000000001e-06, |
| "loss": 4.8732, |
| "step": 1135 |
| }, |
| { |
| "epoch": 0.114, |
| "grad_norm": 67.2728500366211, |
| "learning_rate": 8.861e-06, |
| "loss": 5.6544, |
| "step": 1140 |
| }, |
| { |
| "epoch": 0.1145, |
| "grad_norm": 53.67226028442383, |
| "learning_rate": 8.856000000000001e-06, |
| "loss": 4.7992, |
| "step": 1145 |
| }, |
| { |
| "epoch": 0.115, |
| "grad_norm": 63.462032318115234, |
| "learning_rate": 8.851e-06, |
| "loss": 5.5585, |
| "step": 1150 |
| }, |
| { |
| "epoch": 0.1155, |
| "grad_norm": 85.72888946533203, |
| "learning_rate": 8.846000000000001e-06, |
| "loss": 5.7742, |
| "step": 1155 |
| }, |
| { |
| "epoch": 0.116, |
| "grad_norm": 60.94392013549805, |
| "learning_rate": 8.841e-06, |
| "loss": 5.0155, |
| "step": 1160 |
| }, |
| { |
| "epoch": 0.1165, |
| "grad_norm": 56.96271896362305, |
| "learning_rate": 8.836000000000001e-06, |
| "loss": 4.5992, |
| "step": 1165 |
| }, |
| { |
| "epoch": 0.117, |
| "grad_norm": 50.74510192871094, |
| "learning_rate": 8.831e-06, |
| "loss": 4.5616, |
| "step": 1170 |
| }, |
| { |
| "epoch": 0.1175, |
| "grad_norm": 90.70756530761719, |
| "learning_rate": 8.826000000000002e-06, |
| "loss": 6.4825, |
| "step": 1175 |
| }, |
| { |
| "epoch": 0.118, |
| "grad_norm": 59.96350860595703, |
| "learning_rate": 8.821e-06, |
| "loss": 4.4625, |
| "step": 1180 |
| }, |
| { |
| "epoch": 0.1185, |
| "grad_norm": 89.02561950683594, |
| "learning_rate": 8.816000000000002e-06, |
| "loss": 5.7029, |
| "step": 1185 |
| }, |
| { |
| "epoch": 0.119, |
| "grad_norm": 61.081298828125, |
| "learning_rate": 8.811000000000001e-06, |
| "loss": 5.1897, |
| "step": 1190 |
| }, |
| { |
| "epoch": 0.1195, |
| "grad_norm": 75.82427978515625, |
| "learning_rate": 8.806000000000002e-06, |
| "loss": 5.4992, |
| "step": 1195 |
| }, |
| { |
| "epoch": 0.12, |
| "grad_norm": 53.01462936401367, |
| "learning_rate": 8.801000000000001e-06, |
| "loss": 6.01, |
| "step": 1200 |
| }, |
| { |
| "epoch": 0.1205, |
| "grad_norm": 68.6491928100586, |
| "learning_rate": 8.796000000000002e-06, |
| "loss": 5.4257, |
| "step": 1205 |
| }, |
| { |
| "epoch": 0.121, |
| "grad_norm": 67.61992645263672, |
| "learning_rate": 8.791000000000001e-06, |
| "loss": 5.0935, |
| "step": 1210 |
| }, |
| { |
| "epoch": 0.1215, |
| "grad_norm": 75.9515609741211, |
| "learning_rate": 8.786000000000002e-06, |
| "loss": 4.7788, |
| "step": 1215 |
| }, |
| { |
| "epoch": 0.122, |
| "grad_norm": 55.77494812011719, |
| "learning_rate": 8.781000000000001e-06, |
| "loss": 5.2895, |
| "step": 1220 |
| }, |
| { |
| "epoch": 0.1225, |
| "grad_norm": 78.90836334228516, |
| "learning_rate": 8.776e-06, |
| "loss": 5.7502, |
| "step": 1225 |
| }, |
| { |
| "epoch": 0.123, |
| "grad_norm": 54.38423156738281, |
| "learning_rate": 8.771000000000001e-06, |
| "loss": 5.2534, |
| "step": 1230 |
| }, |
| { |
| "epoch": 0.1235, |
| "grad_norm": 48.77484130859375, |
| "learning_rate": 8.766e-06, |
| "loss": 5.0617, |
| "step": 1235 |
| }, |
| { |
| "epoch": 0.124, |
| "grad_norm": 73.08646392822266, |
| "learning_rate": 8.761000000000001e-06, |
| "loss": 5.6851, |
| "step": 1240 |
| }, |
| { |
| "epoch": 0.1245, |
| "grad_norm": 49.47875213623047, |
| "learning_rate": 8.756e-06, |
| "loss": 4.7621, |
| "step": 1245 |
| }, |
| { |
| "epoch": 0.125, |
| "grad_norm": 42.435768127441406, |
| "learning_rate": 8.751000000000001e-06, |
| "loss": 4.2181, |
| "step": 1250 |
| }, |
| { |
| "epoch": 0.1255, |
| "grad_norm": 59.38146209716797, |
| "learning_rate": 8.746e-06, |
| "loss": 5.838, |
| "step": 1255 |
| }, |
| { |
| "epoch": 0.126, |
| "grad_norm": 60.928714752197266, |
| "learning_rate": 8.741e-06, |
| "loss": 4.903, |
| "step": 1260 |
| }, |
| { |
| "epoch": 0.1265, |
| "grad_norm": 62.644805908203125, |
| "learning_rate": 8.736e-06, |
| "loss": 6.1991, |
| "step": 1265 |
| }, |
| { |
| "epoch": 0.127, |
| "grad_norm": 62.247047424316406, |
| "learning_rate": 8.731e-06, |
| "loss": 5.3907, |
| "step": 1270 |
| }, |
| { |
| "epoch": 0.1275, |
| "grad_norm": 61.00815200805664, |
| "learning_rate": 8.726e-06, |
| "loss": 5.2265, |
| "step": 1275 |
| }, |
| { |
| "epoch": 0.128, |
| "grad_norm": 57.970314025878906, |
| "learning_rate": 8.721e-06, |
| "loss": 5.0272, |
| "step": 1280 |
| }, |
| { |
| "epoch": 0.1285, |
| "grad_norm": 78.71075439453125, |
| "learning_rate": 8.716000000000001e-06, |
| "loss": 5.3924, |
| "step": 1285 |
| }, |
| { |
| "epoch": 0.129, |
| "grad_norm": 45.33870315551758, |
| "learning_rate": 8.711e-06, |
| "loss": 4.1664, |
| "step": 1290 |
| }, |
| { |
| "epoch": 0.1295, |
| "grad_norm": 59.856685638427734, |
| "learning_rate": 8.706000000000001e-06, |
| "loss": 4.9717, |
| "step": 1295 |
| }, |
| { |
| "epoch": 0.13, |
| "grad_norm": 70.93965911865234, |
| "learning_rate": 8.701e-06, |
| "loss": 4.1831, |
| "step": 1300 |
| }, |
| { |
| "epoch": 0.1305, |
| "grad_norm": 54.16468811035156, |
| "learning_rate": 8.696000000000001e-06, |
| "loss": 5.3112, |
| "step": 1305 |
| }, |
| { |
| "epoch": 0.131, |
| "grad_norm": 77.19535064697266, |
| "learning_rate": 8.691e-06, |
| "loss": 5.0046, |
| "step": 1310 |
| }, |
| { |
| "epoch": 0.1315, |
| "grad_norm": 73.0103988647461, |
| "learning_rate": 8.686000000000001e-06, |
| "loss": 5.0891, |
| "step": 1315 |
| }, |
| { |
| "epoch": 0.132, |
| "grad_norm": 79.43903350830078, |
| "learning_rate": 8.681e-06, |
| "loss": 5.621, |
| "step": 1320 |
| }, |
| { |
| "epoch": 0.1325, |
| "grad_norm": 69.93892669677734, |
| "learning_rate": 8.676000000000001e-06, |
| "loss": 4.6726, |
| "step": 1325 |
| }, |
| { |
| "epoch": 0.133, |
| "grad_norm": 56.596439361572266, |
| "learning_rate": 8.671e-06, |
| "loss": 4.7146, |
| "step": 1330 |
| }, |
| { |
| "epoch": 0.1335, |
| "grad_norm": 59.12910079956055, |
| "learning_rate": 8.666000000000001e-06, |
| "loss": 5.683, |
| "step": 1335 |
| }, |
| { |
| "epoch": 0.134, |
| "grad_norm": 65.23446655273438, |
| "learning_rate": 8.661e-06, |
| "loss": 5.9475, |
| "step": 1340 |
| }, |
| { |
| "epoch": 0.1345, |
| "grad_norm": 60.68814468383789, |
| "learning_rate": 8.656000000000001e-06, |
| "loss": 4.8507, |
| "step": 1345 |
| }, |
| { |
| "epoch": 0.135, |
| "grad_norm": 58.06928634643555, |
| "learning_rate": 8.651e-06, |
| "loss": 5.7808, |
| "step": 1350 |
| }, |
| { |
| "epoch": 0.1355, |
| "grad_norm": 58.640655517578125, |
| "learning_rate": 8.646000000000002e-06, |
| "loss": 5.1012, |
| "step": 1355 |
| }, |
| { |
| "epoch": 0.136, |
| "grad_norm": 65.93994140625, |
| "learning_rate": 8.641e-06, |
| "loss": 4.8662, |
| "step": 1360 |
| }, |
| { |
| "epoch": 0.1365, |
| "grad_norm": 66.67878723144531, |
| "learning_rate": 8.636000000000002e-06, |
| "loss": 5.6589, |
| "step": 1365 |
| }, |
| { |
| "epoch": 0.137, |
| "grad_norm": 68.66671752929688, |
| "learning_rate": 8.631000000000001e-06, |
| "loss": 4.7309, |
| "step": 1370 |
| }, |
| { |
| "epoch": 0.1375, |
| "grad_norm": 64.89599609375, |
| "learning_rate": 8.626000000000002e-06, |
| "loss": 4.832, |
| "step": 1375 |
| }, |
| { |
| "epoch": 0.138, |
| "grad_norm": 70.13557434082031, |
| "learning_rate": 8.621000000000001e-06, |
| "loss": 5.6079, |
| "step": 1380 |
| }, |
| { |
| "epoch": 0.1385, |
| "grad_norm": 61.200347900390625, |
| "learning_rate": 8.616000000000002e-06, |
| "loss": 5.5559, |
| "step": 1385 |
| }, |
| { |
| "epoch": 0.139, |
| "grad_norm": 64.06344604492188, |
| "learning_rate": 8.611000000000001e-06, |
| "loss": 4.7863, |
| "step": 1390 |
| }, |
| { |
| "epoch": 0.1395, |
| "grad_norm": 81.44152069091797, |
| "learning_rate": 8.606e-06, |
| "loss": 6.071, |
| "step": 1395 |
| }, |
| { |
| "epoch": 0.14, |
| "grad_norm": 52.7180290222168, |
| "learning_rate": 8.601000000000001e-06, |
| "loss": 5.2613, |
| "step": 1400 |
| }, |
| { |
| "epoch": 0.1405, |
| "grad_norm": 72.11871337890625, |
| "learning_rate": 8.596e-06, |
| "loss": 4.4381, |
| "step": 1405 |
| }, |
| { |
| "epoch": 0.141, |
| "grad_norm": 85.8184814453125, |
| "learning_rate": 8.591000000000001e-06, |
| "loss": 6.2463, |
| "step": 1410 |
| }, |
| { |
| "epoch": 0.1415, |
| "grad_norm": 59.726993560791016, |
| "learning_rate": 8.586e-06, |
| "loss": 4.9436, |
| "step": 1415 |
| }, |
| { |
| "epoch": 0.142, |
| "grad_norm": 55.018795013427734, |
| "learning_rate": 8.581e-06, |
| "loss": 4.7814, |
| "step": 1420 |
| }, |
| { |
| "epoch": 0.1425, |
| "grad_norm": 56.400421142578125, |
| "learning_rate": 8.576e-06, |
| "loss": 4.6833, |
| "step": 1425 |
| }, |
| { |
| "epoch": 0.143, |
| "grad_norm": 46.20795822143555, |
| "learning_rate": 8.571e-06, |
| "loss": 3.7731, |
| "step": 1430 |
| }, |
| { |
| "epoch": 0.1435, |
| "grad_norm": 57.5872917175293, |
| "learning_rate": 8.566e-06, |
| "loss": 5.7952, |
| "step": 1435 |
| }, |
| { |
| "epoch": 0.144, |
| "grad_norm": 60.95462417602539, |
| "learning_rate": 8.561e-06, |
| "loss": 6.742, |
| "step": 1440 |
| }, |
| { |
| "epoch": 0.1445, |
| "grad_norm": 80.12348175048828, |
| "learning_rate": 8.556e-06, |
| "loss": 5.3024, |
| "step": 1445 |
| }, |
| { |
| "epoch": 0.145, |
| "grad_norm": 59.4682731628418, |
| "learning_rate": 8.551e-06, |
| "loss": 3.4684, |
| "step": 1450 |
| }, |
| { |
| "epoch": 0.1455, |
| "grad_norm": 87.38482666015625, |
| "learning_rate": 8.546000000000001e-06, |
| "loss": 5.9045, |
| "step": 1455 |
| }, |
| { |
| "epoch": 0.146, |
| "grad_norm": 56.61424255371094, |
| "learning_rate": 8.541e-06, |
| "loss": 5.1945, |
| "step": 1460 |
| }, |
| { |
| "epoch": 0.1465, |
| "grad_norm": 51.18035125732422, |
| "learning_rate": 8.536000000000001e-06, |
| "loss": 4.7717, |
| "step": 1465 |
| }, |
| { |
| "epoch": 0.147, |
| "grad_norm": 71.49605560302734, |
| "learning_rate": 8.531e-06, |
| "loss": 5.6623, |
| "step": 1470 |
| }, |
| { |
| "epoch": 0.1475, |
| "grad_norm": 70.1756362915039, |
| "learning_rate": 8.526000000000001e-06, |
| "loss": 4.667, |
| "step": 1475 |
| }, |
| { |
| "epoch": 0.148, |
| "grad_norm": 52.19149398803711, |
| "learning_rate": 8.521e-06, |
| "loss": 4.177, |
| "step": 1480 |
| }, |
| { |
| "epoch": 0.1485, |
| "grad_norm": 71.5694351196289, |
| "learning_rate": 8.516000000000001e-06, |
| "loss": 4.8019, |
| "step": 1485 |
| }, |
| { |
| "epoch": 0.149, |
| "grad_norm": 62.04697036743164, |
| "learning_rate": 8.511e-06, |
| "loss": 5.1555, |
| "step": 1490 |
| }, |
| { |
| "epoch": 0.1495, |
| "grad_norm": 79.89663696289062, |
| "learning_rate": 8.506000000000001e-06, |
| "loss": 5.8517, |
| "step": 1495 |
| }, |
| { |
| "epoch": 0.15, |
| "grad_norm": 93.36337280273438, |
| "learning_rate": 8.501e-06, |
| "loss": 6.1533, |
| "step": 1500 |
| }, |
| { |
| "epoch": 0.1505, |
| "grad_norm": 54.96908187866211, |
| "learning_rate": 8.496000000000001e-06, |
| "loss": 4.6604, |
| "step": 1505 |
| }, |
| { |
| "epoch": 0.151, |
| "grad_norm": 61.35822296142578, |
| "learning_rate": 8.491e-06, |
| "loss": 5.0536, |
| "step": 1510 |
| }, |
| { |
| "epoch": 0.1515, |
| "grad_norm": 66.18203735351562, |
| "learning_rate": 8.486000000000001e-06, |
| "loss": 4.9661, |
| "step": 1515 |
| }, |
| { |
| "epoch": 0.152, |
| "grad_norm": 50.83629608154297, |
| "learning_rate": 8.481e-06, |
| "loss": 5.2605, |
| "step": 1520 |
| }, |
| { |
| "epoch": 0.1525, |
| "grad_norm": 68.134033203125, |
| "learning_rate": 8.476000000000002e-06, |
| "loss": 5.9831, |
| "step": 1525 |
| }, |
| { |
| "epoch": 0.153, |
| "grad_norm": 55.83713150024414, |
| "learning_rate": 8.471e-06, |
| "loss": 5.2597, |
| "step": 1530 |
| }, |
| { |
| "epoch": 0.1535, |
| "grad_norm": 49.900516510009766, |
| "learning_rate": 8.466000000000002e-06, |
| "loss": 5.0297, |
| "step": 1535 |
| }, |
| { |
| "epoch": 0.154, |
| "grad_norm": 55.53334045410156, |
| "learning_rate": 8.461e-06, |
| "loss": 4.7405, |
| "step": 1540 |
| }, |
| { |
| "epoch": 0.1545, |
| "grad_norm": 51.795074462890625, |
| "learning_rate": 8.456000000000002e-06, |
| "loss": 4.7491, |
| "step": 1545 |
| }, |
| { |
| "epoch": 0.155, |
| "grad_norm": 43.57197570800781, |
| "learning_rate": 8.451000000000001e-06, |
| "loss": 5.0043, |
| "step": 1550 |
| }, |
| { |
| "epoch": 0.1555, |
| "grad_norm": 51.24105453491211, |
| "learning_rate": 8.446e-06, |
| "loss": 4.9249, |
| "step": 1555 |
| }, |
| { |
| "epoch": 0.156, |
| "grad_norm": 65.57850646972656, |
| "learning_rate": 8.441000000000001e-06, |
| "loss": 4.7407, |
| "step": 1560 |
| }, |
| { |
| "epoch": 0.1565, |
| "grad_norm": 64.2021255493164, |
| "learning_rate": 8.436e-06, |
| "loss": 5.7441, |
| "step": 1565 |
| }, |
| { |
| "epoch": 0.157, |
| "grad_norm": 65.62643432617188, |
| "learning_rate": 8.431000000000001e-06, |
| "loss": 5.1524, |
| "step": 1570 |
| }, |
| { |
| "epoch": 0.1575, |
| "grad_norm": 60.66287612915039, |
| "learning_rate": 8.426e-06, |
| "loss": 6.0503, |
| "step": 1575 |
| }, |
| { |
| "epoch": 0.158, |
| "grad_norm": 59.155853271484375, |
| "learning_rate": 8.421e-06, |
| "loss": 6.0681, |
| "step": 1580 |
| }, |
| { |
| "epoch": 0.1585, |
| "grad_norm": 54.823699951171875, |
| "learning_rate": 8.416e-06, |
| "loss": 4.6115, |
| "step": 1585 |
| }, |
| { |
| "epoch": 0.159, |
| "grad_norm": 58.49845504760742, |
| "learning_rate": 8.411e-06, |
| "loss": 4.8539, |
| "step": 1590 |
| }, |
| { |
| "epoch": 0.1595, |
| "grad_norm": 63.808223724365234, |
| "learning_rate": 8.406e-06, |
| "loss": 4.931, |
| "step": 1595 |
| }, |
| { |
| "epoch": 0.16, |
| "grad_norm": 49.438392639160156, |
| "learning_rate": 8.401e-06, |
| "loss": 4.449, |
| "step": 1600 |
| }, |
| { |
| "epoch": 0.1605, |
| "grad_norm": 58.23267364501953, |
| "learning_rate": 8.396e-06, |
| "loss": 5.3667, |
| "step": 1605 |
| }, |
| { |
| "epoch": 0.161, |
| "grad_norm": 64.92132568359375, |
| "learning_rate": 8.391e-06, |
| "loss": 5.3969, |
| "step": 1610 |
| }, |
| { |
| "epoch": 0.1615, |
| "grad_norm": 76.33795928955078, |
| "learning_rate": 8.386e-06, |
| "loss": 5.0351, |
| "step": 1615 |
| }, |
| { |
| "epoch": 0.162, |
| "grad_norm": 68.49414825439453, |
| "learning_rate": 8.381e-06, |
| "loss": 6.1171, |
| "step": 1620 |
| }, |
| { |
| "epoch": 0.1625, |
| "grad_norm": 77.73714447021484, |
| "learning_rate": 8.376e-06, |
| "loss": 4.633, |
| "step": 1625 |
| }, |
| { |
| "epoch": 0.163, |
| "grad_norm": 73.00968933105469, |
| "learning_rate": 8.371e-06, |
| "loss": 5.813, |
| "step": 1630 |
| }, |
| { |
| "epoch": 0.1635, |
| "grad_norm": 71.55262756347656, |
| "learning_rate": 8.366000000000001e-06, |
| "loss": 5.2287, |
| "step": 1635 |
| }, |
| { |
| "epoch": 0.164, |
| "grad_norm": 67.30160522460938, |
| "learning_rate": 8.361e-06, |
| "loss": 5.2435, |
| "step": 1640 |
| }, |
| { |
| "epoch": 0.1645, |
| "grad_norm": 59.34938430786133, |
| "learning_rate": 8.356000000000001e-06, |
| "loss": 5.6554, |
| "step": 1645 |
| }, |
| { |
| "epoch": 0.165, |
| "grad_norm": 50.35622024536133, |
| "learning_rate": 8.351e-06, |
| "loss": 5.1153, |
| "step": 1650 |
| }, |
| { |
| "epoch": 0.1655, |
| "grad_norm": 70.94762420654297, |
| "learning_rate": 8.346000000000001e-06, |
| "loss": 4.9395, |
| "step": 1655 |
| }, |
| { |
| "epoch": 0.166, |
| "grad_norm": 71.47914123535156, |
| "learning_rate": 8.341e-06, |
| "loss": 4.832, |
| "step": 1660 |
| }, |
| { |
| "epoch": 0.1665, |
| "grad_norm": 67.92778015136719, |
| "learning_rate": 8.336000000000001e-06, |
| "loss": 4.3402, |
| "step": 1665 |
| }, |
| { |
| "epoch": 0.167, |
| "grad_norm": 67.71968841552734, |
| "learning_rate": 8.331e-06, |
| "loss": 5.1193, |
| "step": 1670 |
| }, |
| { |
| "epoch": 0.1675, |
| "grad_norm": 48.13265609741211, |
| "learning_rate": 8.326000000000001e-06, |
| "loss": 5.1677, |
| "step": 1675 |
| }, |
| { |
| "epoch": 0.168, |
| "grad_norm": 57.75625991821289, |
| "learning_rate": 8.321e-06, |
| "loss": 5.2929, |
| "step": 1680 |
| }, |
| { |
| "epoch": 0.1685, |
| "grad_norm": 59.2353630065918, |
| "learning_rate": 8.316000000000001e-06, |
| "loss": 4.6238, |
| "step": 1685 |
| }, |
| { |
| "epoch": 0.169, |
| "grad_norm": 72.31878662109375, |
| "learning_rate": 8.311e-06, |
| "loss": 4.9348, |
| "step": 1690 |
| }, |
| { |
| "epoch": 0.1695, |
| "grad_norm": 66.85990905761719, |
| "learning_rate": 8.306000000000001e-06, |
| "loss": 5.2399, |
| "step": 1695 |
| }, |
| { |
| "epoch": 0.17, |
| "grad_norm": 65.82227325439453, |
| "learning_rate": 8.301e-06, |
| "loss": 3.9255, |
| "step": 1700 |
| }, |
| { |
| "epoch": 0.1705, |
| "grad_norm": 64.09220886230469, |
| "learning_rate": 8.296000000000002e-06, |
| "loss": 5.0496, |
| "step": 1705 |
| }, |
| { |
| "epoch": 0.171, |
| "grad_norm": 70.31346893310547, |
| "learning_rate": 8.291e-06, |
| "loss": 5.5094, |
| "step": 1710 |
| }, |
| { |
| "epoch": 0.1715, |
| "grad_norm": 58.26755142211914, |
| "learning_rate": 8.286e-06, |
| "loss": 4.0126, |
| "step": 1715 |
| }, |
| { |
| "epoch": 0.172, |
| "grad_norm": 53.582611083984375, |
| "learning_rate": 8.281e-06, |
| "loss": 4.79, |
| "step": 1720 |
| }, |
| { |
| "epoch": 0.1725, |
| "grad_norm": 56.40678405761719, |
| "learning_rate": 8.276e-06, |
| "loss": 5.1313, |
| "step": 1725 |
| }, |
| { |
| "epoch": 0.173, |
| "grad_norm": 54.7972526550293, |
| "learning_rate": 8.271000000000001e-06, |
| "loss": 5.046, |
| "step": 1730 |
| }, |
| { |
| "epoch": 0.1735, |
| "grad_norm": 52.5406379699707, |
| "learning_rate": 8.266e-06, |
| "loss": 4.7231, |
| "step": 1735 |
| }, |
| { |
| "epoch": 0.174, |
| "grad_norm": 48.16586685180664, |
| "learning_rate": 8.261e-06, |
| "loss": 5.1313, |
| "step": 1740 |
| }, |
| { |
| "epoch": 0.1745, |
| "grad_norm": 47.292720794677734, |
| "learning_rate": 8.256e-06, |
| "loss": 4.6004, |
| "step": 1745 |
| }, |
| { |
| "epoch": 0.175, |
| "grad_norm": 60.821266174316406, |
| "learning_rate": 8.251e-06, |
| "loss": 5.5364, |
| "step": 1750 |
| }, |
| { |
| "epoch": 0.1755, |
| "grad_norm": 93.91549682617188, |
| "learning_rate": 8.246e-06, |
| "loss": 5.5144, |
| "step": 1755 |
| }, |
| { |
| "epoch": 0.176, |
| "grad_norm": 114.97464752197266, |
| "learning_rate": 8.241000000000001e-06, |
| "loss": 5.0595, |
| "step": 1760 |
| }, |
| { |
| "epoch": 0.1765, |
| "grad_norm": 64.73751831054688, |
| "learning_rate": 8.236e-06, |
| "loss": 4.3854, |
| "step": 1765 |
| }, |
| { |
| "epoch": 0.177, |
| "grad_norm": 64.93399810791016, |
| "learning_rate": 8.231000000000001e-06, |
| "loss": 5.1058, |
| "step": 1770 |
| }, |
| { |
| "epoch": 0.1775, |
| "grad_norm": 67.42242431640625, |
| "learning_rate": 8.226e-06, |
| "loss": 4.8133, |
| "step": 1775 |
| }, |
| { |
| "epoch": 0.178, |
| "grad_norm": 52.42443084716797, |
| "learning_rate": 8.221000000000001e-06, |
| "loss": 5.0636, |
| "step": 1780 |
| }, |
| { |
| "epoch": 0.1785, |
| "grad_norm": 79.13347625732422, |
| "learning_rate": 8.216e-06, |
| "loss": 5.2212, |
| "step": 1785 |
| }, |
| { |
| "epoch": 0.179, |
| "grad_norm": 67.81745910644531, |
| "learning_rate": 8.211000000000002e-06, |
| "loss": 5.2105, |
| "step": 1790 |
| }, |
| { |
| "epoch": 0.1795, |
| "grad_norm": 62.96812057495117, |
| "learning_rate": 8.206e-06, |
| "loss": 5.3702, |
| "step": 1795 |
| }, |
| { |
| "epoch": 0.18, |
| "grad_norm": 76.28816223144531, |
| "learning_rate": 8.201000000000002e-06, |
| "loss": 4.6774, |
| "step": 1800 |
| }, |
| { |
| "epoch": 0.1805, |
| "grad_norm": 129.661865234375, |
| "learning_rate": 8.196e-06, |
| "loss": 6.0545, |
| "step": 1805 |
| }, |
| { |
| "epoch": 0.181, |
| "grad_norm": 68.19229888916016, |
| "learning_rate": 8.191000000000002e-06, |
| "loss": 5.403, |
| "step": 1810 |
| }, |
| { |
| "epoch": 0.1815, |
| "grad_norm": 65.75102996826172, |
| "learning_rate": 8.186000000000001e-06, |
| "loss": 4.7501, |
| "step": 1815 |
| }, |
| { |
| "epoch": 0.182, |
| "grad_norm": 61.759761810302734, |
| "learning_rate": 8.181000000000002e-06, |
| "loss": 4.7871, |
| "step": 1820 |
| }, |
| { |
| "epoch": 0.1825, |
| "grad_norm": 66.68859100341797, |
| "learning_rate": 8.176000000000001e-06, |
| "loss": 4.5813, |
| "step": 1825 |
| }, |
| { |
| "epoch": 0.183, |
| "grad_norm": 71.10906219482422, |
| "learning_rate": 8.171000000000002e-06, |
| "loss": 4.9447, |
| "step": 1830 |
| }, |
| { |
| "epoch": 0.1835, |
| "grad_norm": 68.92731475830078, |
| "learning_rate": 8.166000000000001e-06, |
| "loss": 5.2777, |
| "step": 1835 |
| }, |
| { |
| "epoch": 0.184, |
| "grad_norm": 68.01397705078125, |
| "learning_rate": 8.161e-06, |
| "loss": 5.3131, |
| "step": 1840 |
| }, |
| { |
| "epoch": 0.1845, |
| "grad_norm": 63.41286849975586, |
| "learning_rate": 8.156000000000001e-06, |
| "loss": 4.3475, |
| "step": 1845 |
| }, |
| { |
| "epoch": 0.185, |
| "grad_norm": 70.69223022460938, |
| "learning_rate": 8.151e-06, |
| "loss": 5.0628, |
| "step": 1850 |
| }, |
| { |
| "epoch": 0.1855, |
| "grad_norm": 84.73713684082031, |
| "learning_rate": 8.146000000000001e-06, |
| "loss": 6.1366, |
| "step": 1855 |
| }, |
| { |
| "epoch": 0.186, |
| "grad_norm": 73.59104919433594, |
| "learning_rate": 8.141e-06, |
| "loss": 4.4199, |
| "step": 1860 |
| }, |
| { |
| "epoch": 0.1865, |
| "grad_norm": 71.16675567626953, |
| "learning_rate": 8.136000000000001e-06, |
| "loss": 4.4485, |
| "step": 1865 |
| }, |
| { |
| "epoch": 0.187, |
| "grad_norm": 58.05774688720703, |
| "learning_rate": 8.131e-06, |
| "loss": 5.5044, |
| "step": 1870 |
| }, |
| { |
| "epoch": 0.1875, |
| "grad_norm": 55.99666976928711, |
| "learning_rate": 8.126e-06, |
| "loss": 3.8585, |
| "step": 1875 |
| }, |
| { |
| "epoch": 0.188, |
| "grad_norm": 69.0136947631836, |
| "learning_rate": 8.121e-06, |
| "loss": 4.2551, |
| "step": 1880 |
| }, |
| { |
| "epoch": 0.1885, |
| "grad_norm": 43.44075012207031, |
| "learning_rate": 8.116e-06, |
| "loss": 4.511, |
| "step": 1885 |
| }, |
| { |
| "epoch": 0.189, |
| "grad_norm": 60.94331741333008, |
| "learning_rate": 8.111e-06, |
| "loss": 5.4013, |
| "step": 1890 |
| }, |
| { |
| "epoch": 0.1895, |
| "grad_norm": 64.44482421875, |
| "learning_rate": 8.106e-06, |
| "loss": 4.2638, |
| "step": 1895 |
| }, |
| { |
| "epoch": 0.19, |
| "grad_norm": 52.195465087890625, |
| "learning_rate": 8.101000000000001e-06, |
| "loss": 3.9795, |
| "step": 1900 |
| }, |
| { |
| "epoch": 0.1905, |
| "grad_norm": 76.83987426757812, |
| "learning_rate": 8.096e-06, |
| "loss": 4.3378, |
| "step": 1905 |
| }, |
| { |
| "epoch": 0.191, |
| "grad_norm": 71.9560317993164, |
| "learning_rate": 8.091000000000001e-06, |
| "loss": 5.4242, |
| "step": 1910 |
| }, |
| { |
| "epoch": 0.1915, |
| "grad_norm": 66.99281311035156, |
| "learning_rate": 8.086e-06, |
| "loss": 5.5902, |
| "step": 1915 |
| }, |
| { |
| "epoch": 0.192, |
| "grad_norm": 56.23039245605469, |
| "learning_rate": 8.081000000000001e-06, |
| "loss": 5.7197, |
| "step": 1920 |
| }, |
| { |
| "epoch": 0.1925, |
| "grad_norm": 80.09093475341797, |
| "learning_rate": 8.076e-06, |
| "loss": 5.1211, |
| "step": 1925 |
| }, |
| { |
| "epoch": 0.193, |
| "grad_norm": 65.79684448242188, |
| "learning_rate": 8.071000000000001e-06, |
| "loss": 4.8171, |
| "step": 1930 |
| }, |
| { |
| "epoch": 0.1935, |
| "grad_norm": 69.53257751464844, |
| "learning_rate": 8.066e-06, |
| "loss": 4.4696, |
| "step": 1935 |
| }, |
| { |
| "epoch": 0.194, |
| "grad_norm": 61.77272033691406, |
| "learning_rate": 8.061000000000001e-06, |
| "loss": 4.1851, |
| "step": 1940 |
| }, |
| { |
| "epoch": 0.1945, |
| "grad_norm": 77.31800842285156, |
| "learning_rate": 8.056e-06, |
| "loss": 5.2775, |
| "step": 1945 |
| }, |
| { |
| "epoch": 0.195, |
| "grad_norm": 56.944435119628906, |
| "learning_rate": 8.051000000000001e-06, |
| "loss": 4.62, |
| "step": 1950 |
| }, |
| { |
| "epoch": 0.1955, |
| "grad_norm": 88.08844757080078, |
| "learning_rate": 8.046e-06, |
| "loss": 6.7407, |
| "step": 1955 |
| }, |
| { |
| "epoch": 0.196, |
| "grad_norm": 69.57597351074219, |
| "learning_rate": 8.041000000000001e-06, |
| "loss": 4.6671, |
| "step": 1960 |
| }, |
| { |
| "epoch": 0.1965, |
| "grad_norm": 65.85086822509766, |
| "learning_rate": 8.036e-06, |
| "loss": 4.2913, |
| "step": 1965 |
| }, |
| { |
| "epoch": 0.197, |
| "grad_norm": 71.38687133789062, |
| "learning_rate": 8.031000000000002e-06, |
| "loss": 4.5964, |
| "step": 1970 |
| }, |
| { |
| "epoch": 0.1975, |
| "grad_norm": 72.68925476074219, |
| "learning_rate": 8.026e-06, |
| "loss": 4.7652, |
| "step": 1975 |
| }, |
| { |
| "epoch": 0.198, |
| "grad_norm": 56.921199798583984, |
| "learning_rate": 8.021000000000002e-06, |
| "loss": 5.8899, |
| "step": 1980 |
| }, |
| { |
| "epoch": 0.1985, |
| "grad_norm": 60.6448860168457, |
| "learning_rate": 8.016e-06, |
| "loss": 4.4663, |
| "step": 1985 |
| }, |
| { |
| "epoch": 0.199, |
| "grad_norm": 77.55535888671875, |
| "learning_rate": 8.011000000000002e-06, |
| "loss": 4.9308, |
| "step": 1990 |
| }, |
| { |
| "epoch": 0.1995, |
| "grad_norm": 73.72390747070312, |
| "learning_rate": 8.006000000000001e-06, |
| "loss": 5.127, |
| "step": 1995 |
| }, |
| { |
| "epoch": 0.2, |
| "grad_norm": 80.28380584716797, |
| "learning_rate": 8.001000000000002e-06, |
| "loss": 5.1619, |
| "step": 2000 |
| } |
| ], |
| "logging_steps": 5, |
| "max_steps": 10000, |
| "num_input_tokens_seen": 0, |
| "num_train_epochs": 1, |
| "save_steps": 1000, |
| "stateful_callbacks": { |
| "TrainerControl": { |
| "args": { |
| "should_epoch_stop": false, |
| "should_evaluate": false, |
| "should_log": false, |
| "should_save": true, |
| "should_training_stop": false |
| }, |
| "attributes": {} |
| } |
| }, |
| "total_flos": 0.0, |
| "train_batch_size": 1, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|