| { |
| "best_global_step": null, |
| "best_metric": null, |
| "best_model_checkpoint": null, |
| "epoch": 0.25, |
| "eval_steps": 500, |
| "global_step": 500, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 0.0005, |
| "grad_norm": 6.1962890625, |
| "learning_rate": 9.995e-07, |
| "loss": -0.0, |
| "step": 1 |
| }, |
| { |
| "epoch": 0.001, |
| "grad_norm": 6.744086742401123, |
| "learning_rate": 9.989999999999999e-07, |
| "loss": -0.0, |
| "step": 2 |
| }, |
| { |
| "epoch": 0.0015, |
| "grad_norm": 6.945072174072266, |
| "learning_rate": 9.985e-07, |
| "loss": 0.0, |
| "step": 3 |
| }, |
| { |
| "epoch": 0.002, |
| "grad_norm": 6.354312419891357, |
| "learning_rate": 9.98e-07, |
| "loss": -0.0, |
| "step": 4 |
| }, |
| { |
| "epoch": 0.0025, |
| "grad_norm": 5.802479267120361, |
| "learning_rate": 9.975e-07, |
| "loss": 0.0, |
| "step": 5 |
| }, |
| { |
| "epoch": 0.003, |
| "grad_norm": 4.5852274894714355, |
| "learning_rate": 9.97e-07, |
| "loss": 0.0, |
| "step": 6 |
| }, |
| { |
| "epoch": 0.0035, |
| "grad_norm": 7.049472332000732, |
| "learning_rate": 9.965e-07, |
| "loss": 0.0, |
| "step": 7 |
| }, |
| { |
| "epoch": 0.004, |
| "grad_norm": 21.362648010253906, |
| "learning_rate": 9.959999999999999e-07, |
| "loss": -0.0, |
| "step": 8 |
| }, |
| { |
| "epoch": 0.0045, |
| "grad_norm": 5.594510555267334, |
| "learning_rate": 9.955e-07, |
| "loss": 0.0, |
| "step": 9 |
| }, |
| { |
| "epoch": 0.005, |
| "grad_norm": 5.9653730392456055, |
| "learning_rate": 9.95e-07, |
| "loss": 0.0, |
| "step": 10 |
| }, |
| { |
| "epoch": 0.0055, |
| "grad_norm": 5.095400333404541, |
| "learning_rate": 9.945e-07, |
| "loss": -0.0, |
| "step": 11 |
| }, |
| { |
| "epoch": 0.006, |
| "grad_norm": 0.0, |
| "learning_rate": 9.94e-07, |
| "loss": 0.0, |
| "step": 12 |
| }, |
| { |
| "epoch": 0.0065, |
| "grad_norm": 10.911425590515137, |
| "learning_rate": 9.935e-07, |
| "loss": -0.0, |
| "step": 13 |
| }, |
| { |
| "epoch": 0.007, |
| "grad_norm": 9.652170181274414, |
| "learning_rate": 9.929999999999999e-07, |
| "loss": 0.0, |
| "step": 14 |
| }, |
| { |
| "epoch": 0.0075, |
| "grad_norm": 6.956664562225342, |
| "learning_rate": 9.925e-07, |
| "loss": 0.0, |
| "step": 15 |
| }, |
| { |
| "epoch": 0.008, |
| "grad_norm": 12.070667266845703, |
| "learning_rate": 9.92e-07, |
| "loss": 0.0, |
| "step": 16 |
| }, |
| { |
| "epoch": 0.0085, |
| "grad_norm": 14.007853507995605, |
| "learning_rate": 9.915e-07, |
| "loss": 0.0, |
| "step": 17 |
| }, |
| { |
| "epoch": 0.009, |
| "grad_norm": 4.017375469207764, |
| "learning_rate": 9.91e-07, |
| "loss": 0.0, |
| "step": 18 |
| }, |
| { |
| "epoch": 0.0095, |
| "grad_norm": 0.0, |
| "learning_rate": 9.905e-07, |
| "loss": 0.0, |
| "step": 19 |
| }, |
| { |
| "epoch": 0.01, |
| "grad_norm": 6.546974182128906, |
| "learning_rate": 9.9e-07, |
| "loss": 0.0, |
| "step": 20 |
| }, |
| { |
| "epoch": 0.0105, |
| "grad_norm": 7.551206588745117, |
| "learning_rate": 9.895e-07, |
| "loss": -0.0, |
| "step": 21 |
| }, |
| { |
| "epoch": 0.011, |
| "grad_norm": 0.0, |
| "learning_rate": 9.89e-07, |
| "loss": 0.0, |
| "step": 22 |
| }, |
| { |
| "epoch": 0.0115, |
| "grad_norm": 6.233001232147217, |
| "learning_rate": 9.885e-07, |
| "loss": -0.0, |
| "step": 23 |
| }, |
| { |
| "epoch": 0.012, |
| "grad_norm": 0.0, |
| "learning_rate": 9.88e-07, |
| "loss": 0.0, |
| "step": 24 |
| }, |
| { |
| "epoch": 0.0125, |
| "grad_norm": 7.307622909545898, |
| "learning_rate": 9.875e-07, |
| "loss": -0.0, |
| "step": 25 |
| }, |
| { |
| "epoch": 0.013, |
| "grad_norm": 5.898115158081055, |
| "learning_rate": 9.87e-07, |
| "loss": -0.0, |
| "step": 26 |
| }, |
| { |
| "epoch": 0.0135, |
| "grad_norm": 8.286269187927246, |
| "learning_rate": 9.865e-07, |
| "loss": 0.0, |
| "step": 27 |
| }, |
| { |
| "epoch": 0.014, |
| "grad_norm": 9.178420066833496, |
| "learning_rate": 9.86e-07, |
| "loss": 0.0, |
| "step": 28 |
| }, |
| { |
| "epoch": 0.0145, |
| "grad_norm": 7.090274810791016, |
| "learning_rate": 9.855e-07, |
| "loss": 0.0, |
| "step": 29 |
| }, |
| { |
| "epoch": 0.015, |
| "grad_norm": 10.001739501953125, |
| "learning_rate": 9.849999999999999e-07, |
| "loss": 0.0, |
| "step": 30 |
| }, |
| { |
| "epoch": 0.0155, |
| "grad_norm": 8.978482246398926, |
| "learning_rate": 9.845e-07, |
| "loss": 0.0, |
| "step": 31 |
| }, |
| { |
| "epoch": 0.016, |
| "grad_norm": 8.083369255065918, |
| "learning_rate": 9.84e-07, |
| "loss": -0.0, |
| "step": 32 |
| }, |
| { |
| "epoch": 0.0165, |
| "grad_norm": 9.646997451782227, |
| "learning_rate": 9.835e-07, |
| "loss": 0.0, |
| "step": 33 |
| }, |
| { |
| "epoch": 0.017, |
| "grad_norm": 6.892234802246094, |
| "learning_rate": 9.83e-07, |
| "loss": 0.0, |
| "step": 34 |
| }, |
| { |
| "epoch": 0.0175, |
| "grad_norm": 0.0, |
| "learning_rate": 9.825e-07, |
| "loss": 0.0, |
| "step": 35 |
| }, |
| { |
| "epoch": 0.018, |
| "grad_norm": 6.182197570800781, |
| "learning_rate": 9.819999999999999e-07, |
| "loss": 0.0, |
| "step": 36 |
| }, |
| { |
| "epoch": 0.0185, |
| "grad_norm": 5.895266532897949, |
| "learning_rate": 9.815e-07, |
| "loss": -0.0, |
| "step": 37 |
| }, |
| { |
| "epoch": 0.019, |
| "grad_norm": 11.212841033935547, |
| "learning_rate": 9.81e-07, |
| "loss": -0.0, |
| "step": 38 |
| }, |
| { |
| "epoch": 0.0195, |
| "grad_norm": 7.982095241546631, |
| "learning_rate": 9.805e-07, |
| "loss": 0.0, |
| "step": 39 |
| }, |
| { |
| "epoch": 0.02, |
| "grad_norm": 5.73940896987915, |
| "learning_rate": 9.8e-07, |
| "loss": 0.0, |
| "step": 40 |
| }, |
| { |
| "epoch": 0.0205, |
| "grad_norm": 8.540511131286621, |
| "learning_rate": 9.795e-07, |
| "loss": -0.0, |
| "step": 41 |
| }, |
| { |
| "epoch": 0.021, |
| "grad_norm": 0.0, |
| "learning_rate": 9.789999999999999e-07, |
| "loss": 0.0, |
| "step": 42 |
| }, |
| { |
| "epoch": 0.0215, |
| "grad_norm": 8.709277153015137, |
| "learning_rate": 9.785e-07, |
| "loss": -0.0, |
| "step": 43 |
| }, |
| { |
| "epoch": 0.022, |
| "grad_norm": 6.68982458114624, |
| "learning_rate": 9.78e-07, |
| "loss": 0.0, |
| "step": 44 |
| }, |
| { |
| "epoch": 0.0225, |
| "grad_norm": 6.988176345825195, |
| "learning_rate": 9.775e-07, |
| "loss": 0.0, |
| "step": 45 |
| }, |
| { |
| "epoch": 0.023, |
| "grad_norm": 7.0302910804748535, |
| "learning_rate": 9.77e-07, |
| "loss": 0.0, |
| "step": 46 |
| }, |
| { |
| "epoch": 0.0235, |
| "grad_norm": 8.396454811096191, |
| "learning_rate": 9.765e-07, |
| "loss": -0.0, |
| "step": 47 |
| }, |
| { |
| "epoch": 0.024, |
| "grad_norm": 4.7376227378845215, |
| "learning_rate": 9.759999999999998e-07, |
| "loss": -0.0, |
| "step": 48 |
| }, |
| { |
| "epoch": 0.0245, |
| "grad_norm": 0.0, |
| "learning_rate": 9.755e-07, |
| "loss": 0.0, |
| "step": 49 |
| }, |
| { |
| "epoch": 0.025, |
| "grad_norm": 6.381641387939453, |
| "learning_rate": 9.75e-07, |
| "loss": 0.0, |
| "step": 50 |
| }, |
| { |
| "epoch": 0.0255, |
| "grad_norm": 0.0, |
| "learning_rate": 9.745e-07, |
| "loss": 0.0, |
| "step": 51 |
| }, |
| { |
| "epoch": 0.026, |
| "grad_norm": 8.140380859375, |
| "learning_rate": 9.74e-07, |
| "loss": -0.0, |
| "step": 52 |
| }, |
| { |
| "epoch": 0.0265, |
| "grad_norm": 4.727418899536133, |
| "learning_rate": 9.735e-07, |
| "loss": 0.0, |
| "step": 53 |
| }, |
| { |
| "epoch": 0.027, |
| "grad_norm": 6.386085510253906, |
| "learning_rate": 9.729999999999998e-07, |
| "loss": -0.0, |
| "step": 54 |
| }, |
| { |
| "epoch": 0.0275, |
| "grad_norm": 6.39836311340332, |
| "learning_rate": 9.725e-07, |
| "loss": 0.0, |
| "step": 55 |
| }, |
| { |
| "epoch": 0.028, |
| "grad_norm": 5.749513149261475, |
| "learning_rate": 9.72e-07, |
| "loss": -0.0, |
| "step": 56 |
| }, |
| { |
| "epoch": 0.0285, |
| "grad_norm": 4.699296474456787, |
| "learning_rate": 9.715e-07, |
| "loss": -0.0, |
| "step": 57 |
| }, |
| { |
| "epoch": 0.029, |
| "grad_norm": 8.458806037902832, |
| "learning_rate": 9.709999999999999e-07, |
| "loss": -0.0, |
| "step": 58 |
| }, |
| { |
| "epoch": 0.0295, |
| "grad_norm": 9.1854248046875, |
| "learning_rate": 9.705e-07, |
| "loss": -0.0, |
| "step": 59 |
| }, |
| { |
| "epoch": 0.03, |
| "grad_norm": 6.844909191131592, |
| "learning_rate": 9.7e-07, |
| "loss": 0.0, |
| "step": 60 |
| }, |
| { |
| "epoch": 0.0305, |
| "grad_norm": 33.0734977722168, |
| "learning_rate": 9.695e-07, |
| "loss": 0.0, |
| "step": 61 |
| }, |
| { |
| "epoch": 0.031, |
| "grad_norm": 0.0, |
| "learning_rate": 9.69e-07, |
| "loss": 0.0, |
| "step": 62 |
| }, |
| { |
| "epoch": 0.0315, |
| "grad_norm": 7.425229072570801, |
| "learning_rate": 9.685e-07, |
| "loss": 0.0, |
| "step": 63 |
| }, |
| { |
| "epoch": 0.032, |
| "grad_norm": 9.169403076171875, |
| "learning_rate": 9.679999999999999e-07, |
| "loss": -0.0, |
| "step": 64 |
| }, |
| { |
| "epoch": 0.0325, |
| "grad_norm": 13.490100860595703, |
| "learning_rate": 9.675e-07, |
| "loss": 0.0, |
| "step": 65 |
| }, |
| { |
| "epoch": 0.033, |
| "grad_norm": 7.570629596710205, |
| "learning_rate": 9.67e-07, |
| "loss": -0.0, |
| "step": 66 |
| }, |
| { |
| "epoch": 0.0335, |
| "grad_norm": 5.252549648284912, |
| "learning_rate": 9.665e-07, |
| "loss": 0.0, |
| "step": 67 |
| }, |
| { |
| "epoch": 0.034, |
| "grad_norm": 5.543639183044434, |
| "learning_rate": 9.66e-07, |
| "loss": -0.0, |
| "step": 68 |
| }, |
| { |
| "epoch": 0.0345, |
| "grad_norm": 0.0, |
| "learning_rate": 9.655e-07, |
| "loss": 0.0, |
| "step": 69 |
| }, |
| { |
| "epoch": 0.035, |
| "grad_norm": 5.360587120056152, |
| "learning_rate": 9.649999999999999e-07, |
| "loss": 0.0, |
| "step": 70 |
| }, |
| { |
| "epoch": 0.0355, |
| "grad_norm": 7.327621936798096, |
| "learning_rate": 9.645e-07, |
| "loss": 0.0, |
| "step": 71 |
| }, |
| { |
| "epoch": 0.036, |
| "grad_norm": 9.594143867492676, |
| "learning_rate": 9.64e-07, |
| "loss": 0.0, |
| "step": 72 |
| }, |
| { |
| "epoch": 0.0365, |
| "grad_norm": 5.346116065979004, |
| "learning_rate": 9.635e-07, |
| "loss": 0.0, |
| "step": 73 |
| }, |
| { |
| "epoch": 0.037, |
| "grad_norm": 5.963859558105469, |
| "learning_rate": 9.63e-07, |
| "loss": 0.0, |
| "step": 74 |
| }, |
| { |
| "epoch": 0.0375, |
| "grad_norm": 7.078248023986816, |
| "learning_rate": 9.624999999999999e-07, |
| "loss": 0.0, |
| "step": 75 |
| }, |
| { |
| "epoch": 0.038, |
| "grad_norm": 5.854560375213623, |
| "learning_rate": 9.619999999999999e-07, |
| "loss": 0.0, |
| "step": 76 |
| }, |
| { |
| "epoch": 0.0385, |
| "grad_norm": 8.13651180267334, |
| "learning_rate": 9.615e-07, |
| "loss": -0.0, |
| "step": 77 |
| }, |
| { |
| "epoch": 0.039, |
| "grad_norm": 8.167058944702148, |
| "learning_rate": 9.61e-07, |
| "loss": -0.0, |
| "step": 78 |
| }, |
| { |
| "epoch": 0.0395, |
| "grad_norm": 5.878276348114014, |
| "learning_rate": 9.605e-07, |
| "loss": 0.0, |
| "step": 79 |
| }, |
| { |
| "epoch": 0.04, |
| "grad_norm": 12.290175437927246, |
| "learning_rate": 9.6e-07, |
| "loss": 0.0, |
| "step": 80 |
| }, |
| { |
| "epoch": 0.0405, |
| "grad_norm": 4.8677496910095215, |
| "learning_rate": 9.594999999999999e-07, |
| "loss": 0.0, |
| "step": 81 |
| }, |
| { |
| "epoch": 0.041, |
| "grad_norm": 9.993011474609375, |
| "learning_rate": 9.589999999999998e-07, |
| "loss": 0.0, |
| "step": 82 |
| }, |
| { |
| "epoch": 0.0415, |
| "grad_norm": 7.9544477462768555, |
| "learning_rate": 9.585e-07, |
| "loss": 0.0, |
| "step": 83 |
| }, |
| { |
| "epoch": 0.042, |
| "grad_norm": 8.334663391113281, |
| "learning_rate": 9.58e-07, |
| "loss": -0.0, |
| "step": 84 |
| }, |
| { |
| "epoch": 0.0425, |
| "grad_norm": 21.026262283325195, |
| "learning_rate": 9.575e-07, |
| "loss": -0.0, |
| "step": 85 |
| }, |
| { |
| "epoch": 0.043, |
| "grad_norm": 13.211177825927734, |
| "learning_rate": 9.57e-07, |
| "loss": 0.0, |
| "step": 86 |
| }, |
| { |
| "epoch": 0.0435, |
| "grad_norm": 9.141230583190918, |
| "learning_rate": 9.565e-07, |
| "loss": 0.0, |
| "step": 87 |
| }, |
| { |
| "epoch": 0.044, |
| "grad_norm": 7.934508800506592, |
| "learning_rate": 9.559999999999998e-07, |
| "loss": -0.0, |
| "step": 88 |
| }, |
| { |
| "epoch": 0.0445, |
| "grad_norm": 8.56117057800293, |
| "learning_rate": 9.555e-07, |
| "loss": 0.0, |
| "step": 89 |
| }, |
| { |
| "epoch": 0.045, |
| "grad_norm": 0.0, |
| "learning_rate": 9.55e-07, |
| "loss": 0.0, |
| "step": 90 |
| }, |
| { |
| "epoch": 0.0455, |
| "grad_norm": 15.598448753356934, |
| "learning_rate": 9.545e-07, |
| "loss": 0.0, |
| "step": 91 |
| }, |
| { |
| "epoch": 0.046, |
| "grad_norm": 9.095897674560547, |
| "learning_rate": 9.539999999999999e-07, |
| "loss": -0.0, |
| "step": 92 |
| }, |
| { |
| "epoch": 0.0465, |
| "grad_norm": 4.865746974945068, |
| "learning_rate": 9.535e-07, |
| "loss": -0.0, |
| "step": 93 |
| }, |
| { |
| "epoch": 0.047, |
| "grad_norm": 0.0, |
| "learning_rate": 9.529999999999999e-07, |
| "loss": 0.0, |
| "step": 94 |
| }, |
| { |
| "epoch": 0.0475, |
| "grad_norm": 5.1494951248168945, |
| "learning_rate": 9.525e-07, |
| "loss": 0.0, |
| "step": 95 |
| }, |
| { |
| "epoch": 0.048, |
| "grad_norm": 11.34716510772705, |
| "learning_rate": 9.52e-07, |
| "loss": 0.0, |
| "step": 96 |
| }, |
| { |
| "epoch": 0.0485, |
| "grad_norm": 11.986861228942871, |
| "learning_rate": 9.515e-07, |
| "loss": 0.0, |
| "step": 97 |
| }, |
| { |
| "epoch": 0.049, |
| "grad_norm": 7.944230079650879, |
| "learning_rate": 9.509999999999999e-07, |
| "loss": 0.0, |
| "step": 98 |
| }, |
| { |
| "epoch": 0.0495, |
| "grad_norm": 7.5184783935546875, |
| "learning_rate": 9.504999999999999e-07, |
| "loss": -0.0, |
| "step": 99 |
| }, |
| { |
| "epoch": 0.05, |
| "grad_norm": 4.20994758605957, |
| "learning_rate": 9.499999999999999e-07, |
| "loss": 0.0, |
| "step": 100 |
| }, |
| { |
| "epoch": 0.0505, |
| "grad_norm": 0.0, |
| "learning_rate": 9.495e-07, |
| "loss": 0.0, |
| "step": 101 |
| }, |
| { |
| "epoch": 0.051, |
| "grad_norm": 0.0, |
| "learning_rate": 9.489999999999999e-07, |
| "loss": 0.0, |
| "step": 102 |
| }, |
| { |
| "epoch": 0.0515, |
| "grad_norm": 7.179519176483154, |
| "learning_rate": 9.485e-07, |
| "loss": -0.0, |
| "step": 103 |
| }, |
| { |
| "epoch": 0.052, |
| "grad_norm": 8.312400817871094, |
| "learning_rate": 9.479999999999999e-07, |
| "loss": 0.0, |
| "step": 104 |
| }, |
| { |
| "epoch": 0.0525, |
| "grad_norm": 0.0, |
| "learning_rate": 9.474999999999999e-07, |
| "loss": 0.0, |
| "step": 105 |
| }, |
| { |
| "epoch": 0.053, |
| "grad_norm": 6.276727676391602, |
| "learning_rate": 9.469999999999999e-07, |
| "loss": 0.0, |
| "step": 106 |
| }, |
| { |
| "epoch": 0.0535, |
| "grad_norm": 6.952809810638428, |
| "learning_rate": 9.465e-07, |
| "loss": 0.0, |
| "step": 107 |
| }, |
| { |
| "epoch": 0.054, |
| "grad_norm": 12.95068645477295, |
| "learning_rate": 9.459999999999999e-07, |
| "loss": -0.0, |
| "step": 108 |
| }, |
| { |
| "epoch": 0.0545, |
| "grad_norm": 0.0, |
| "learning_rate": 9.455e-07, |
| "loss": 0.0, |
| "step": 109 |
| }, |
| { |
| "epoch": 0.055, |
| "grad_norm": 13.65576457977295, |
| "learning_rate": 9.45e-07, |
| "loss": 0.0, |
| "step": 110 |
| }, |
| { |
| "epoch": 0.0555, |
| "grad_norm": 8.414222717285156, |
| "learning_rate": 9.444999999999999e-07, |
| "loss": 0.0, |
| "step": 111 |
| }, |
| { |
| "epoch": 0.056, |
| "grad_norm": 7.828263759613037, |
| "learning_rate": 9.439999999999999e-07, |
| "loss": 0.0, |
| "step": 112 |
| }, |
| { |
| "epoch": 0.0565, |
| "grad_norm": 0.0, |
| "learning_rate": 9.434999999999999e-07, |
| "loss": 0.0, |
| "step": 113 |
| }, |
| { |
| "epoch": 0.057, |
| "grad_norm": 7.849336624145508, |
| "learning_rate": 9.429999999999999e-07, |
| "loss": 0.0, |
| "step": 114 |
| }, |
| { |
| "epoch": 0.0575, |
| "grad_norm": 13.594552993774414, |
| "learning_rate": 9.425e-07, |
| "loss": 0.0, |
| "step": 115 |
| }, |
| { |
| "epoch": 0.058, |
| "grad_norm": 6.633617877960205, |
| "learning_rate": 9.419999999999999e-07, |
| "loss": 0.0, |
| "step": 116 |
| }, |
| { |
| "epoch": 0.0585, |
| "grad_norm": 7.893250942230225, |
| "learning_rate": 9.415e-07, |
| "loss": -0.0, |
| "step": 117 |
| }, |
| { |
| "epoch": 0.059, |
| "grad_norm": 7.897842884063721, |
| "learning_rate": 9.409999999999999e-07, |
| "loss": -0.0, |
| "step": 118 |
| }, |
| { |
| "epoch": 0.0595, |
| "grad_norm": 7.738225936889648, |
| "learning_rate": 9.404999999999999e-07, |
| "loss": 0.0, |
| "step": 119 |
| }, |
| { |
| "epoch": 0.06, |
| "grad_norm": 10.054285049438477, |
| "learning_rate": 9.399999999999999e-07, |
| "loss": 0.0, |
| "step": 120 |
| }, |
| { |
| "epoch": 0.0605, |
| "grad_norm": 6.2317328453063965, |
| "learning_rate": 9.395e-07, |
| "loss": 0.0, |
| "step": 121 |
| }, |
| { |
| "epoch": 0.061, |
| "grad_norm": 7.4707207679748535, |
| "learning_rate": 9.389999999999999e-07, |
| "loss": -0.0, |
| "step": 122 |
| }, |
| { |
| "epoch": 0.0615, |
| "grad_norm": 0.0, |
| "learning_rate": 9.385e-07, |
| "loss": 0.0, |
| "step": 123 |
| }, |
| { |
| "epoch": 0.062, |
| "grad_norm": 6.883451461791992, |
| "learning_rate": 9.379999999999998e-07, |
| "loss": 0.0, |
| "step": 124 |
| }, |
| { |
| "epoch": 0.0625, |
| "grad_norm": 5.7558274269104, |
| "learning_rate": 9.374999999999999e-07, |
| "loss": 0.0, |
| "step": 125 |
| }, |
| { |
| "epoch": 0.063, |
| "grad_norm": 4.654928207397461, |
| "learning_rate": 9.37e-07, |
| "loss": -0.0, |
| "step": 126 |
| }, |
| { |
| "epoch": 0.0635, |
| "grad_norm": 13.459746360778809, |
| "learning_rate": 9.365e-07, |
| "loss": 0.0, |
| "step": 127 |
| }, |
| { |
| "epoch": 0.064, |
| "grad_norm": 6.189227104187012, |
| "learning_rate": 9.36e-07, |
| "loss": -0.0, |
| "step": 128 |
| }, |
| { |
| "epoch": 0.0645, |
| "grad_norm": 15.807933807373047, |
| "learning_rate": 9.355e-07, |
| "loss": -0.0, |
| "step": 129 |
| }, |
| { |
| "epoch": 0.065, |
| "grad_norm": 8.20335865020752, |
| "learning_rate": 9.35e-07, |
| "loss": -0.0, |
| "step": 130 |
| }, |
| { |
| "epoch": 0.0655, |
| "grad_norm": 7.410068511962891, |
| "learning_rate": 9.344999999999999e-07, |
| "loss": 0.0, |
| "step": 131 |
| }, |
| { |
| "epoch": 0.066, |
| "grad_norm": 5.982290744781494, |
| "learning_rate": 9.34e-07, |
| "loss": 0.0, |
| "step": 132 |
| }, |
| { |
| "epoch": 0.0665, |
| "grad_norm": 7.302867889404297, |
| "learning_rate": 9.334999999999999e-07, |
| "loss": 0.0, |
| "step": 133 |
| }, |
| { |
| "epoch": 0.067, |
| "grad_norm": 7.16635799407959, |
| "learning_rate": 9.33e-07, |
| "loss": 0.0, |
| "step": 134 |
| }, |
| { |
| "epoch": 0.0675, |
| "grad_norm": 0.0, |
| "learning_rate": 9.325e-07, |
| "loss": 0.0, |
| "step": 135 |
| }, |
| { |
| "epoch": 0.068, |
| "grad_norm": 5.66601037979126, |
| "learning_rate": 9.32e-07, |
| "loss": -0.0, |
| "step": 136 |
| }, |
| { |
| "epoch": 0.0685, |
| "grad_norm": 0.0, |
| "learning_rate": 9.315e-07, |
| "loss": 0.0, |
| "step": 137 |
| }, |
| { |
| "epoch": 0.069, |
| "grad_norm": 12.146499633789062, |
| "learning_rate": 9.31e-07, |
| "loss": -0.0, |
| "step": 138 |
| }, |
| { |
| "epoch": 0.0695, |
| "grad_norm": 6.333805084228516, |
| "learning_rate": 9.304999999999999e-07, |
| "loss": 0.0, |
| "step": 139 |
| }, |
| { |
| "epoch": 0.07, |
| "grad_norm": 17.41741943359375, |
| "learning_rate": 9.3e-07, |
| "loss": -0.0, |
| "step": 140 |
| }, |
| { |
| "epoch": 0.0705, |
| "grad_norm": 0.0, |
| "learning_rate": 9.295e-07, |
| "loss": 0.0, |
| "step": 141 |
| }, |
| { |
| "epoch": 0.071, |
| "grad_norm": 18.96269989013672, |
| "learning_rate": 9.29e-07, |
| "loss": 0.0, |
| "step": 142 |
| }, |
| { |
| "epoch": 0.0715, |
| "grad_norm": 30.19170570373535, |
| "learning_rate": 9.285e-07, |
| "loss": 0.0, |
| "step": 143 |
| }, |
| { |
| "epoch": 0.072, |
| "grad_norm": 12.67878532409668, |
| "learning_rate": 9.28e-07, |
| "loss": -0.0, |
| "step": 144 |
| }, |
| { |
| "epoch": 0.0725, |
| "grad_norm": 16.92245101928711, |
| "learning_rate": 9.274999999999999e-07, |
| "loss": 0.0, |
| "step": 145 |
| }, |
| { |
| "epoch": 0.073, |
| "grad_norm": 8.775379180908203, |
| "learning_rate": 9.27e-07, |
| "loss": 0.0, |
| "step": 146 |
| }, |
| { |
| "epoch": 0.0735, |
| "grad_norm": 0.0, |
| "learning_rate": 9.264999999999999e-07, |
| "loss": 0.0, |
| "step": 147 |
| }, |
| { |
| "epoch": 0.074, |
| "grad_norm": 12.122485160827637, |
| "learning_rate": 9.26e-07, |
| "loss": 0.0, |
| "step": 148 |
| }, |
| { |
| "epoch": 0.0745, |
| "grad_norm": 41.2854118347168, |
| "learning_rate": 9.255e-07, |
| "loss": 0.0, |
| "step": 149 |
| }, |
| { |
| "epoch": 0.075, |
| "grad_norm": 0.0, |
| "learning_rate": 9.25e-07, |
| "loss": 0.0, |
| "step": 150 |
| }, |
| { |
| "epoch": 0.0755, |
| "grad_norm": 12.417732238769531, |
| "learning_rate": 9.244999999999999e-07, |
| "loss": 0.0, |
| "step": 151 |
| }, |
| { |
| "epoch": 0.076, |
| "grad_norm": 23.242403030395508, |
| "learning_rate": 9.24e-07, |
| "loss": 0.0, |
| "step": 152 |
| }, |
| { |
| "epoch": 0.0765, |
| "grad_norm": 0.0, |
| "learning_rate": 9.234999999999999e-07, |
| "loss": 0.0, |
| "step": 153 |
| }, |
| { |
| "epoch": 0.077, |
| "grad_norm": 8.696711540222168, |
| "learning_rate": 9.23e-07, |
| "loss": -0.0, |
| "step": 154 |
| }, |
| { |
| "epoch": 0.0775, |
| "grad_norm": 0.0, |
| "learning_rate": 9.225e-07, |
| "loss": 0.0, |
| "step": 155 |
| }, |
| { |
| "epoch": 0.078, |
| "grad_norm": 0.0, |
| "learning_rate": 9.22e-07, |
| "loss": 0.0, |
| "step": 156 |
| }, |
| { |
| "epoch": 0.0785, |
| "grad_norm": 12.881440162658691, |
| "learning_rate": 9.215e-07, |
| "loss": -0.0, |
| "step": 157 |
| }, |
| { |
| "epoch": 0.079, |
| "grad_norm": 0.0, |
| "learning_rate": 9.21e-07, |
| "loss": 0.0, |
| "step": 158 |
| }, |
| { |
| "epoch": 0.0795, |
| "grad_norm": 21.86204719543457, |
| "learning_rate": 9.204999999999999e-07, |
| "loss": 0.0, |
| "step": 159 |
| }, |
| { |
| "epoch": 0.08, |
| "grad_norm": 16.32013702392578, |
| "learning_rate": 9.2e-07, |
| "loss": -0.0, |
| "step": 160 |
| }, |
| { |
| "epoch": 0.0805, |
| "grad_norm": 0.0, |
| "learning_rate": 9.194999999999999e-07, |
| "loss": 0.0, |
| "step": 161 |
| }, |
| { |
| "epoch": 0.081, |
| "grad_norm": 0.0, |
| "learning_rate": 9.19e-07, |
| "loss": 0.0, |
| "step": 162 |
| }, |
| { |
| "epoch": 0.0815, |
| "grad_norm": 21.536087036132812, |
| "learning_rate": 9.185e-07, |
| "loss": 0.0, |
| "step": 163 |
| }, |
| { |
| "epoch": 0.082, |
| "grad_norm": 15.687423706054688, |
| "learning_rate": 9.18e-07, |
| "loss": 0.0, |
| "step": 164 |
| }, |
| { |
| "epoch": 0.0825, |
| "grad_norm": 0.0, |
| "learning_rate": 9.174999999999999e-07, |
| "loss": 0.0, |
| "step": 165 |
| }, |
| { |
| "epoch": 0.083, |
| "grad_norm": 0.0, |
| "learning_rate": 9.17e-07, |
| "loss": 0.0, |
| "step": 166 |
| }, |
| { |
| "epoch": 0.0835, |
| "grad_norm": 0.0, |
| "learning_rate": 9.164999999999999e-07, |
| "loss": 0.0, |
| "step": 167 |
| }, |
| { |
| "epoch": 0.084, |
| "grad_norm": 0.0, |
| "learning_rate": 9.16e-07, |
| "loss": 0.0, |
| "step": 168 |
| }, |
| { |
| "epoch": 0.0845, |
| "grad_norm": 0.0, |
| "learning_rate": 9.155e-07, |
| "loss": 0.0, |
| "step": 169 |
| }, |
| { |
| "epoch": 0.085, |
| "grad_norm": 0.0, |
| "learning_rate": 9.15e-07, |
| "loss": 0.0, |
| "step": 170 |
| }, |
| { |
| "epoch": 0.0855, |
| "grad_norm": 25.705774307250977, |
| "learning_rate": 9.145e-07, |
| "loss": -0.0, |
| "step": 171 |
| }, |
| { |
| "epoch": 0.086, |
| "grad_norm": 21.59645652770996, |
| "learning_rate": 9.14e-07, |
| "loss": -0.0, |
| "step": 172 |
| }, |
| { |
| "epoch": 0.0865, |
| "grad_norm": 10.857905387878418, |
| "learning_rate": 9.134999999999999e-07, |
| "loss": -0.0, |
| "step": 173 |
| }, |
| { |
| "epoch": 0.087, |
| "grad_norm": 0.0, |
| "learning_rate": 9.13e-07, |
| "loss": 0.0, |
| "step": 174 |
| }, |
| { |
| "epoch": 0.0875, |
| "grad_norm": 0.0, |
| "learning_rate": 9.124999999999999e-07, |
| "loss": 0.0, |
| "step": 175 |
| }, |
| { |
| "epoch": 0.088, |
| "grad_norm": 0.0, |
| "learning_rate": 9.12e-07, |
| "loss": 0.0, |
| "step": 176 |
| }, |
| { |
| "epoch": 0.0885, |
| "grad_norm": 0.0, |
| "learning_rate": 9.115e-07, |
| "loss": 0.0, |
| "step": 177 |
| }, |
| { |
| "epoch": 0.089, |
| "grad_norm": 20.786745071411133, |
| "learning_rate": 9.109999999999999e-07, |
| "loss": 0.0, |
| "step": 178 |
| }, |
| { |
| "epoch": 0.0895, |
| "grad_norm": 8.460957527160645, |
| "learning_rate": 9.104999999999999e-07, |
| "loss": -0.0, |
| "step": 179 |
| }, |
| { |
| "epoch": 0.09, |
| "grad_norm": 0.0, |
| "learning_rate": 9.1e-07, |
| "loss": 0.0, |
| "step": 180 |
| }, |
| { |
| "epoch": 0.0905, |
| "grad_norm": 0.0, |
| "learning_rate": 9.094999999999999e-07, |
| "loss": 0.0, |
| "step": 181 |
| }, |
| { |
| "epoch": 0.091, |
| "grad_norm": 0.0, |
| "learning_rate": 9.09e-07, |
| "loss": 0.0, |
| "step": 182 |
| }, |
| { |
| "epoch": 0.0915, |
| "grad_norm": 49.33989715576172, |
| "learning_rate": 9.085e-07, |
| "loss": -0.0, |
| "step": 183 |
| }, |
| { |
| "epoch": 0.092, |
| "grad_norm": 0.0, |
| "learning_rate": 9.08e-07, |
| "loss": 0.0, |
| "step": 184 |
| }, |
| { |
| "epoch": 0.0925, |
| "grad_norm": 0.0, |
| "learning_rate": 9.074999999999999e-07, |
| "loss": 0.0, |
| "step": 185 |
| }, |
| { |
| "epoch": 0.093, |
| "grad_norm": 0.0, |
| "learning_rate": 9.07e-07, |
| "loss": 0.0, |
| "step": 186 |
| }, |
| { |
| "epoch": 0.0935, |
| "grad_norm": 0.0, |
| "learning_rate": 9.064999999999999e-07, |
| "loss": 0.0, |
| "step": 187 |
| }, |
| { |
| "epoch": 0.094, |
| "grad_norm": 16.010793685913086, |
| "learning_rate": 9.06e-07, |
| "loss": 0.0, |
| "step": 188 |
| }, |
| { |
| "epoch": 0.0945, |
| "grad_norm": 17.950115203857422, |
| "learning_rate": 9.055e-07, |
| "loss": 0.0, |
| "step": 189 |
| }, |
| { |
| "epoch": 0.095, |
| "grad_norm": 0.0, |
| "learning_rate": 9.05e-07, |
| "loss": 0.0, |
| "step": 190 |
| }, |
| { |
| "epoch": 0.0955, |
| "grad_norm": 0.0, |
| "learning_rate": 9.045e-07, |
| "loss": 0.0, |
| "step": 191 |
| }, |
| { |
| "epoch": 0.096, |
| "grad_norm": 8.419339179992676, |
| "learning_rate": 9.039999999999999e-07, |
| "loss": -0.0, |
| "step": 192 |
| }, |
| { |
| "epoch": 0.0965, |
| "grad_norm": 0.0, |
| "learning_rate": 9.034999999999999e-07, |
| "loss": 0.0, |
| "step": 193 |
| }, |
| { |
| "epoch": 0.097, |
| "grad_norm": 17.22492790222168, |
| "learning_rate": 9.03e-07, |
| "loss": -0.0, |
| "step": 194 |
| }, |
| { |
| "epoch": 0.0975, |
| "grad_norm": 0.0, |
| "learning_rate": 9.024999999999999e-07, |
| "loss": 0.0, |
| "step": 195 |
| }, |
| { |
| "epoch": 0.098, |
| "grad_norm": 15.984553337097168, |
| "learning_rate": 9.02e-07, |
| "loss": 0.0, |
| "step": 196 |
| }, |
| { |
| "epoch": 0.0985, |
| "grad_norm": 0.0, |
| "learning_rate": 9.015e-07, |
| "loss": 0.0, |
| "step": 197 |
| }, |
| { |
| "epoch": 0.099, |
| "grad_norm": 11.981531143188477, |
| "learning_rate": 9.01e-07, |
| "loss": 0.0, |
| "step": 198 |
| }, |
| { |
| "epoch": 0.0995, |
| "grad_norm": 0.0, |
| "learning_rate": 9.004999999999999e-07, |
| "loss": 0.0, |
| "step": 199 |
| }, |
| { |
| "epoch": 0.1, |
| "grad_norm": 16.9019832611084, |
| "learning_rate": 9e-07, |
| "loss": -0.0, |
| "step": 200 |
| }, |
| { |
| "epoch": 0.1005, |
| "grad_norm": 0.0, |
| "learning_rate": 8.994999999999999e-07, |
| "loss": 0.0, |
| "step": 201 |
| }, |
| { |
| "epoch": 0.101, |
| "grad_norm": 10.651970863342285, |
| "learning_rate": 8.99e-07, |
| "loss": 0.0, |
| "step": 202 |
| }, |
| { |
| "epoch": 0.1015, |
| "grad_norm": 0.0, |
| "learning_rate": 8.985e-07, |
| "loss": 0.0, |
| "step": 203 |
| }, |
| { |
| "epoch": 0.102, |
| "grad_norm": 0.0, |
| "learning_rate": 8.98e-07, |
| "loss": 0.0, |
| "step": 204 |
| }, |
| { |
| "epoch": 0.1025, |
| "grad_norm": 0.0, |
| "learning_rate": 8.974999999999999e-07, |
| "loss": 0.0, |
| "step": 205 |
| }, |
| { |
| "epoch": 0.103, |
| "grad_norm": 33.05813980102539, |
| "learning_rate": 8.969999999999999e-07, |
| "loss": 0.0, |
| "step": 206 |
| }, |
| { |
| "epoch": 0.1035, |
| "grad_norm": 26.88140296936035, |
| "learning_rate": 8.964999999999999e-07, |
| "loss": 0.0, |
| "step": 207 |
| }, |
| { |
| "epoch": 0.104, |
| "grad_norm": 18.670848846435547, |
| "learning_rate": 8.96e-07, |
| "loss": -0.0, |
| "step": 208 |
| }, |
| { |
| "epoch": 0.1045, |
| "grad_norm": 18.841079711914062, |
| "learning_rate": 8.954999999999999e-07, |
| "loss": -0.0, |
| "step": 209 |
| }, |
| { |
| "epoch": 0.105, |
| "grad_norm": 0.0, |
| "learning_rate": 8.95e-07, |
| "loss": 0.0, |
| "step": 210 |
| }, |
| { |
| "epoch": 0.1055, |
| "grad_norm": 13.156370162963867, |
| "learning_rate": 8.945e-07, |
| "loss": 0.0, |
| "step": 211 |
| }, |
| { |
| "epoch": 0.106, |
| "grad_norm": 0.0, |
| "learning_rate": 8.939999999999999e-07, |
| "loss": 0.0, |
| "step": 212 |
| }, |
| { |
| "epoch": 0.1065, |
| "grad_norm": 0.0, |
| "learning_rate": 8.934999999999999e-07, |
| "loss": 0.0, |
| "step": 213 |
| }, |
| { |
| "epoch": 0.107, |
| "grad_norm": 23.25225830078125, |
| "learning_rate": 8.93e-07, |
| "loss": 0.0, |
| "step": 214 |
| }, |
| { |
| "epoch": 0.1075, |
| "grad_norm": 0.0, |
| "learning_rate": 8.924999999999999e-07, |
| "loss": 0.0, |
| "step": 215 |
| }, |
| { |
| "epoch": 0.108, |
| "grad_norm": 0.0, |
| "learning_rate": 8.92e-07, |
| "loss": 0.0, |
| "step": 216 |
| }, |
| { |
| "epoch": 0.1085, |
| "grad_norm": 0.0, |
| "learning_rate": 8.915e-07, |
| "loss": 0.0, |
| "step": 217 |
| }, |
| { |
| "epoch": 0.109, |
| "grad_norm": 0.0, |
| "learning_rate": 8.91e-07, |
| "loss": 0.0, |
| "step": 218 |
| }, |
| { |
| "epoch": 0.1095, |
| "grad_norm": 57.88274383544922, |
| "learning_rate": 8.904999999999999e-07, |
| "loss": 0.0, |
| "step": 219 |
| }, |
| { |
| "epoch": 0.11, |
| "grad_norm": 31.124988555908203, |
| "learning_rate": 8.9e-07, |
| "loss": 0.0, |
| "step": 220 |
| }, |
| { |
| "epoch": 0.1105, |
| "grad_norm": 0.0, |
| "learning_rate": 8.894999999999999e-07, |
| "loss": 0.0, |
| "step": 221 |
| }, |
| { |
| "epoch": 0.111, |
| "grad_norm": 22.94927215576172, |
| "learning_rate": 8.89e-07, |
| "loss": -0.0, |
| "step": 222 |
| }, |
| { |
| "epoch": 0.1115, |
| "grad_norm": 0.0, |
| "learning_rate": 8.884999999999999e-07, |
| "loss": 0.0, |
| "step": 223 |
| }, |
| { |
| "epoch": 0.112, |
| "grad_norm": 0.0, |
| "learning_rate": 8.88e-07, |
| "loss": 0.0, |
| "step": 224 |
| }, |
| { |
| "epoch": 0.1125, |
| "grad_norm": 22.883502960205078, |
| "learning_rate": 8.874999999999999e-07, |
| "loss": 0.0, |
| "step": 225 |
| }, |
| { |
| "epoch": 0.113, |
| "grad_norm": 10.071247100830078, |
| "learning_rate": 8.869999999999999e-07, |
| "loss": 0.0, |
| "step": 226 |
| }, |
| { |
| "epoch": 0.1135, |
| "grad_norm": 0.0, |
| "learning_rate": 8.864999999999999e-07, |
| "loss": 0.0, |
| "step": 227 |
| }, |
| { |
| "epoch": 0.114, |
| "grad_norm": 231.0457305908203, |
| "learning_rate": 8.86e-07, |
| "loss": -0.0, |
| "step": 228 |
| }, |
| { |
| "epoch": 0.1145, |
| "grad_norm": 0.0, |
| "learning_rate": 8.854999999999999e-07, |
| "loss": 0.0, |
| "step": 229 |
| }, |
| { |
| "epoch": 0.115, |
| "grad_norm": 23.97252655029297, |
| "learning_rate": 8.85e-07, |
| "loss": 0.0, |
| "step": 230 |
| }, |
| { |
| "epoch": 0.1155, |
| "grad_norm": 15.410896301269531, |
| "learning_rate": 8.845e-07, |
| "loss": 0.0, |
| "step": 231 |
| }, |
| { |
| "epoch": 0.116, |
| "grad_norm": 39.541412353515625, |
| "learning_rate": 8.839999999999999e-07, |
| "loss": 0.0, |
| "step": 232 |
| }, |
| { |
| "epoch": 0.1165, |
| "grad_norm": 13.713851928710938, |
| "learning_rate": 8.834999999999999e-07, |
| "loss": 0.0, |
| "step": 233 |
| }, |
| { |
| "epoch": 0.117, |
| "grad_norm": 35.34727096557617, |
| "learning_rate": 8.83e-07, |
| "loss": -0.0, |
| "step": 234 |
| }, |
| { |
| "epoch": 0.1175, |
| "grad_norm": 45.32273864746094, |
| "learning_rate": 8.824999999999999e-07, |
| "loss": 0.0, |
| "step": 235 |
| }, |
| { |
| "epoch": 0.118, |
| "grad_norm": 0.0, |
| "learning_rate": 8.82e-07, |
| "loss": 0.0, |
| "step": 236 |
| }, |
| { |
| "epoch": 0.1185, |
| "grad_norm": 0.0, |
| "learning_rate": 8.814999999999999e-07, |
| "loss": 0.0, |
| "step": 237 |
| }, |
| { |
| "epoch": 0.119, |
| "grad_norm": 267.7450256347656, |
| "learning_rate": 8.81e-07, |
| "loss": 0.0, |
| "step": 238 |
| }, |
| { |
| "epoch": 0.1195, |
| "grad_norm": 143.29161071777344, |
| "learning_rate": 8.804999999999999e-07, |
| "loss": -0.0, |
| "step": 239 |
| }, |
| { |
| "epoch": 0.12, |
| "grad_norm": 52.909034729003906, |
| "learning_rate": 8.799999999999999e-07, |
| "loss": -0.0, |
| "step": 240 |
| }, |
| { |
| "epoch": 0.1205, |
| "grad_norm": 0.0, |
| "learning_rate": 8.794999999999999e-07, |
| "loss": 0.0, |
| "step": 241 |
| }, |
| { |
| "epoch": 0.121, |
| "grad_norm": 37.857696533203125, |
| "learning_rate": 8.79e-07, |
| "loss": 0.0, |
| "step": 242 |
| }, |
| { |
| "epoch": 0.1215, |
| "grad_norm": 0.0, |
| "learning_rate": 8.784999999999999e-07, |
| "loss": 0.0, |
| "step": 243 |
| }, |
| { |
| "epoch": 0.122, |
| "grad_norm": 0.0, |
| "learning_rate": 8.78e-07, |
| "loss": 0.0, |
| "step": 244 |
| }, |
| { |
| "epoch": 0.1225, |
| "grad_norm": 0.0, |
| "learning_rate": 8.774999999999999e-07, |
| "loss": 0.0, |
| "step": 245 |
| }, |
| { |
| "epoch": 0.123, |
| "grad_norm": 0.0, |
| "learning_rate": 8.769999999999999e-07, |
| "loss": 0.0, |
| "step": 246 |
| }, |
| { |
| "epoch": 0.1235, |
| "grad_norm": 30.24044418334961, |
| "learning_rate": 8.764999999999999e-07, |
| "loss": 0.0, |
| "step": 247 |
| }, |
| { |
| "epoch": 0.124, |
| "grad_norm": 0.0, |
| "learning_rate": 8.76e-07, |
| "loss": 0.0, |
| "step": 248 |
| }, |
| { |
| "epoch": 0.1245, |
| "grad_norm": 33.06248092651367, |
| "learning_rate": 8.754999999999999e-07, |
| "loss": 0.0, |
| "step": 249 |
| }, |
| { |
| "epoch": 0.125, |
| "grad_norm": 20.05577278137207, |
| "learning_rate": 8.75e-07, |
| "loss": -0.0, |
| "step": 250 |
| }, |
| { |
| "epoch": 0.1255, |
| "grad_norm": 0.0, |
| "learning_rate": 8.745000000000001e-07, |
| "loss": 0.0, |
| "step": 251 |
| }, |
| { |
| "epoch": 0.126, |
| "grad_norm": 18.56123161315918, |
| "learning_rate": 8.739999999999999e-07, |
| "loss": 0.0, |
| "step": 252 |
| }, |
| { |
| "epoch": 0.1265, |
| "grad_norm": 0.0, |
| "learning_rate": 8.735e-07, |
| "loss": 0.0, |
| "step": 253 |
| }, |
| { |
| "epoch": 0.127, |
| "grad_norm": 12.27500057220459, |
| "learning_rate": 8.729999999999999e-07, |
| "loss": 0.0, |
| "step": 254 |
| }, |
| { |
| "epoch": 0.1275, |
| "grad_norm": 0.0, |
| "learning_rate": 8.725e-07, |
| "loss": 0.0, |
| "step": 255 |
| }, |
| { |
| "epoch": 0.128, |
| "grad_norm": 53.35928726196289, |
| "learning_rate": 8.72e-07, |
| "loss": -0.0, |
| "step": 256 |
| }, |
| { |
| "epoch": 0.1285, |
| "grad_norm": 0.0, |
| "learning_rate": 8.715e-07, |
| "loss": 0.0, |
| "step": 257 |
| }, |
| { |
| "epoch": 0.129, |
| "grad_norm": 0.0, |
| "learning_rate": 8.71e-07, |
| "loss": 0.0, |
| "step": 258 |
| }, |
| { |
| "epoch": 0.1295, |
| "grad_norm": 0.0, |
| "learning_rate": 8.705e-07, |
| "loss": 0.0, |
| "step": 259 |
| }, |
| { |
| "epoch": 0.13, |
| "grad_norm": 0.0, |
| "learning_rate": 8.699999999999999e-07, |
| "loss": 0.0, |
| "step": 260 |
| }, |
| { |
| "epoch": 0.1305, |
| "grad_norm": 40.95280838012695, |
| "learning_rate": 8.695e-07, |
| "loss": 0.0, |
| "step": 261 |
| }, |
| { |
| "epoch": 0.131, |
| "grad_norm": 0.0, |
| "learning_rate": 8.69e-07, |
| "loss": 0.0, |
| "step": 262 |
| }, |
| { |
| "epoch": 0.1315, |
| "grad_norm": 0.0, |
| "learning_rate": 8.685e-07, |
| "loss": 0.0, |
| "step": 263 |
| }, |
| { |
| "epoch": 0.132, |
| "grad_norm": 0.0, |
| "learning_rate": 8.68e-07, |
| "loss": 0.0, |
| "step": 264 |
| }, |
| { |
| "epoch": 0.1325, |
| "grad_norm": 0.0, |
| "learning_rate": 8.675000000000001e-07, |
| "loss": 0.0, |
| "step": 265 |
| }, |
| { |
| "epoch": 0.133, |
| "grad_norm": 0.0, |
| "learning_rate": 8.669999999999999e-07, |
| "loss": 0.0, |
| "step": 266 |
| }, |
| { |
| "epoch": 0.1335, |
| "grad_norm": 0.0, |
| "learning_rate": 8.665e-07, |
| "loss": 0.0, |
| "step": 267 |
| }, |
| { |
| "epoch": 0.134, |
| "grad_norm": 0.0, |
| "learning_rate": 8.659999999999999e-07, |
| "loss": 0.0, |
| "step": 268 |
| }, |
| { |
| "epoch": 0.1345, |
| "grad_norm": 29.156984329223633, |
| "learning_rate": 8.655e-07, |
| "loss": -0.0, |
| "step": 269 |
| }, |
| { |
| "epoch": 0.135, |
| "grad_norm": 25.566734313964844, |
| "learning_rate": 8.65e-07, |
| "loss": 0.0, |
| "step": 270 |
| }, |
| { |
| "epoch": 0.1355, |
| "grad_norm": 90.18716430664062, |
| "learning_rate": 8.645e-07, |
| "loss": 0.0, |
| "step": 271 |
| }, |
| { |
| "epoch": 0.136, |
| "grad_norm": 0.0, |
| "learning_rate": 8.639999999999999e-07, |
| "loss": 0.0, |
| "step": 272 |
| }, |
| { |
| "epoch": 0.1365, |
| "grad_norm": 0.0, |
| "learning_rate": 8.635e-07, |
| "loss": 0.0, |
| "step": 273 |
| }, |
| { |
| "epoch": 0.137, |
| "grad_norm": 0.0, |
| "learning_rate": 8.629999999999999e-07, |
| "loss": 0.0, |
| "step": 274 |
| }, |
| { |
| "epoch": 0.1375, |
| "grad_norm": 0.0, |
| "learning_rate": 8.625e-07, |
| "loss": 0.0, |
| "step": 275 |
| }, |
| { |
| "epoch": 0.138, |
| "grad_norm": 0.0, |
| "learning_rate": 8.62e-07, |
| "loss": 0.0, |
| "step": 276 |
| }, |
| { |
| "epoch": 0.1385, |
| "grad_norm": 0.0, |
| "learning_rate": 8.615e-07, |
| "loss": 0.0, |
| "step": 277 |
| }, |
| { |
| "epoch": 0.139, |
| "grad_norm": 74.6231460571289, |
| "learning_rate": 8.61e-07, |
| "loss": 0.0, |
| "step": 278 |
| }, |
| { |
| "epoch": 0.1395, |
| "grad_norm": 0.0, |
| "learning_rate": 8.605e-07, |
| "loss": 0.0, |
| "step": 279 |
| }, |
| { |
| "epoch": 0.14, |
| "grad_norm": 0.0, |
| "learning_rate": 8.599999999999999e-07, |
| "loss": 0.0, |
| "step": 280 |
| }, |
| { |
| "epoch": 0.1405, |
| "grad_norm": 0.0, |
| "learning_rate": 8.595e-07, |
| "loss": 0.0, |
| "step": 281 |
| }, |
| { |
| "epoch": 0.141, |
| "grad_norm": 0.0, |
| "learning_rate": 8.59e-07, |
| "loss": 0.0, |
| "step": 282 |
| }, |
| { |
| "epoch": 0.1415, |
| "grad_norm": 0.0, |
| "learning_rate": 8.585e-07, |
| "loss": 0.0, |
| "step": 283 |
| }, |
| { |
| "epoch": 0.142, |
| "grad_norm": 562.8270263671875, |
| "learning_rate": 8.58e-07, |
| "loss": 0.0, |
| "step": 284 |
| }, |
| { |
| "epoch": 0.1425, |
| "grad_norm": 0.0, |
| "learning_rate": 8.575e-07, |
| "loss": 0.0, |
| "step": 285 |
| }, |
| { |
| "epoch": 0.143, |
| "grad_norm": 0.0, |
| "learning_rate": 8.569999999999999e-07, |
| "loss": 0.0, |
| "step": 286 |
| }, |
| { |
| "epoch": 0.1435, |
| "grad_norm": 0.0, |
| "learning_rate": 8.565e-07, |
| "loss": 0.0, |
| "step": 287 |
| }, |
| { |
| "epoch": 0.144, |
| "grad_norm": 0.0, |
| "learning_rate": 8.559999999999999e-07, |
| "loss": 0.0, |
| "step": 288 |
| }, |
| { |
| "epoch": 0.1445, |
| "grad_norm": 0.0, |
| "learning_rate": 8.555e-07, |
| "loss": 0.0, |
| "step": 289 |
| }, |
| { |
| "epoch": 0.145, |
| "grad_norm": 0.0, |
| "learning_rate": 8.55e-07, |
| "loss": 0.0, |
| "step": 290 |
| }, |
| { |
| "epoch": 0.1455, |
| "grad_norm": 0.0, |
| "learning_rate": 8.545e-07, |
| "loss": 0.0, |
| "step": 291 |
| }, |
| { |
| "epoch": 0.146, |
| "grad_norm": 0.0, |
| "learning_rate": 8.539999999999999e-07, |
| "loss": 0.0, |
| "step": 292 |
| }, |
| { |
| "epoch": 0.1465, |
| "grad_norm": 0.0, |
| "learning_rate": 8.535e-07, |
| "loss": 0.0, |
| "step": 293 |
| }, |
| { |
| "epoch": 0.147, |
| "grad_norm": 0.0, |
| "learning_rate": 8.529999999999999e-07, |
| "loss": 0.0, |
| "step": 294 |
| }, |
| { |
| "epoch": 0.1475, |
| "grad_norm": 0.0, |
| "learning_rate": 8.525e-07, |
| "loss": 0.0, |
| "step": 295 |
| }, |
| { |
| "epoch": 0.148, |
| "grad_norm": 0.0, |
| "learning_rate": 8.52e-07, |
| "loss": 0.0, |
| "step": 296 |
| }, |
| { |
| "epoch": 0.1485, |
| "grad_norm": 0.0, |
| "learning_rate": 8.515e-07, |
| "loss": 0.0, |
| "step": 297 |
| }, |
| { |
| "epoch": 0.149, |
| "grad_norm": 0.0, |
| "learning_rate": 8.51e-07, |
| "loss": 0.0, |
| "step": 298 |
| }, |
| { |
| "epoch": 0.1495, |
| "grad_norm": 0.0, |
| "learning_rate": 8.504999999999999e-07, |
| "loss": 0.0, |
| "step": 299 |
| }, |
| { |
| "epoch": 0.15, |
| "grad_norm": 0.0, |
| "learning_rate": 8.499999999999999e-07, |
| "loss": 0.0, |
| "step": 300 |
| }, |
| { |
| "epoch": 0.1505, |
| "grad_norm": 0.0, |
| "learning_rate": 8.495e-07, |
| "loss": 0.0, |
| "step": 301 |
| }, |
| { |
| "epoch": 0.151, |
| "grad_norm": 0.0, |
| "learning_rate": 8.489999999999999e-07, |
| "loss": 0.0, |
| "step": 302 |
| }, |
| { |
| "epoch": 0.1515, |
| "grad_norm": 0.0, |
| "learning_rate": 8.485e-07, |
| "loss": 0.0, |
| "step": 303 |
| }, |
| { |
| "epoch": 0.152, |
| "grad_norm": 0.0, |
| "learning_rate": 8.48e-07, |
| "loss": 0.0, |
| "step": 304 |
| }, |
| { |
| "epoch": 0.1525, |
| "grad_norm": 0.0, |
| "learning_rate": 8.475e-07, |
| "loss": 0.0, |
| "step": 305 |
| }, |
| { |
| "epoch": 0.153, |
| "grad_norm": 0.0, |
| "learning_rate": 8.469999999999999e-07, |
| "loss": 0.0, |
| "step": 306 |
| }, |
| { |
| "epoch": 0.1535, |
| "grad_norm": 53.436363220214844, |
| "learning_rate": 8.465e-07, |
| "loss": 0.0, |
| "step": 307 |
| }, |
| { |
| "epoch": 0.154, |
| "grad_norm": 0.0, |
| "learning_rate": 8.459999999999999e-07, |
| "loss": 0.0, |
| "step": 308 |
| }, |
| { |
| "epoch": 0.1545, |
| "grad_norm": 0.0, |
| "learning_rate": 8.455e-07, |
| "loss": 0.0, |
| "step": 309 |
| }, |
| { |
| "epoch": 0.155, |
| "grad_norm": 45.34641647338867, |
| "learning_rate": 8.45e-07, |
| "loss": -0.0, |
| "step": 310 |
| }, |
| { |
| "epoch": 0.1555, |
| "grad_norm": 0.0, |
| "learning_rate": 8.445e-07, |
| "loss": 0.0, |
| "step": 311 |
| }, |
| { |
| "epoch": 0.156, |
| "grad_norm": 0.0, |
| "learning_rate": 8.439999999999999e-07, |
| "loss": 0.0, |
| "step": 312 |
| }, |
| { |
| "epoch": 0.1565, |
| "grad_norm": 0.0, |
| "learning_rate": 8.435e-07, |
| "loss": 0.0, |
| "step": 313 |
| }, |
| { |
| "epoch": 0.157, |
| "grad_norm": 0.0, |
| "learning_rate": 8.429999999999999e-07, |
| "loss": 0.0, |
| "step": 314 |
| }, |
| { |
| "epoch": 0.1575, |
| "grad_norm": 207.4761962890625, |
| "learning_rate": 8.425e-07, |
| "loss": -0.0, |
| "step": 315 |
| }, |
| { |
| "epoch": 0.158, |
| "grad_norm": 0.0, |
| "learning_rate": 8.419999999999999e-07, |
| "loss": 0.0, |
| "step": 316 |
| }, |
| { |
| "epoch": 0.1585, |
| "grad_norm": 49.840850830078125, |
| "learning_rate": 8.415e-07, |
| "loss": -0.0, |
| "step": 317 |
| }, |
| { |
| "epoch": 0.159, |
| "grad_norm": 0.0, |
| "learning_rate": 8.41e-07, |
| "loss": 0.0, |
| "step": 318 |
| }, |
| { |
| "epoch": 0.1595, |
| "grad_norm": 0.0, |
| "learning_rate": 8.404999999999999e-07, |
| "loss": 0.0, |
| "step": 319 |
| }, |
| { |
| "epoch": 0.16, |
| "grad_norm": 0.0, |
| "learning_rate": 8.399999999999999e-07, |
| "loss": 0.0, |
| "step": 320 |
| }, |
| { |
| "epoch": 0.1605, |
| "grad_norm": 42.99878692626953, |
| "learning_rate": 8.395e-07, |
| "loss": -0.0, |
| "step": 321 |
| }, |
| { |
| "epoch": 0.161, |
| "grad_norm": 0.0, |
| "learning_rate": 8.389999999999999e-07, |
| "loss": 0.0, |
| "step": 322 |
| }, |
| { |
| "epoch": 0.1615, |
| "grad_norm": 0.0, |
| "learning_rate": 8.385e-07, |
| "loss": 0.0, |
| "step": 323 |
| }, |
| { |
| "epoch": 0.162, |
| "grad_norm": 26.691635131835938, |
| "learning_rate": 8.38e-07, |
| "loss": 0.0, |
| "step": 324 |
| }, |
| { |
| "epoch": 0.1625, |
| "grad_norm": 0.0, |
| "learning_rate": 8.375e-07, |
| "loss": 0.0, |
| "step": 325 |
| }, |
| { |
| "epoch": 0.163, |
| "grad_norm": 0.0, |
| "learning_rate": 8.369999999999999e-07, |
| "loss": 0.0, |
| "step": 326 |
| }, |
| { |
| "epoch": 0.1635, |
| "grad_norm": 0.0, |
| "learning_rate": 8.365e-07, |
| "loss": 0.0, |
| "step": 327 |
| }, |
| { |
| "epoch": 0.164, |
| "grad_norm": 0.0, |
| "learning_rate": 8.359999999999999e-07, |
| "loss": 0.0, |
| "step": 328 |
| }, |
| { |
| "epoch": 0.1645, |
| "grad_norm": 0.0, |
| "learning_rate": 8.355e-07, |
| "loss": 0.0, |
| "step": 329 |
| }, |
| { |
| "epoch": 0.165, |
| "grad_norm": 78.05026245117188, |
| "learning_rate": 8.349999999999999e-07, |
| "loss": -0.0, |
| "step": 330 |
| }, |
| { |
| "epoch": 0.1655, |
| "grad_norm": 0.0, |
| "learning_rate": 8.345e-07, |
| "loss": 0.0, |
| "step": 331 |
| }, |
| { |
| "epoch": 0.166, |
| "grad_norm": 0.0, |
| "learning_rate": 8.34e-07, |
| "loss": 0.0, |
| "step": 332 |
| }, |
| { |
| "epoch": 0.1665, |
| "grad_norm": 0.0, |
| "learning_rate": 8.334999999999999e-07, |
| "loss": 0.0, |
| "step": 333 |
| }, |
| { |
| "epoch": 0.167, |
| "grad_norm": 0.0, |
| "learning_rate": 8.329999999999999e-07, |
| "loss": 0.0, |
| "step": 334 |
| }, |
| { |
| "epoch": 0.1675, |
| "grad_norm": 0.0, |
| "learning_rate": 8.325e-07, |
| "loss": 0.0, |
| "step": 335 |
| }, |
| { |
| "epoch": 0.168, |
| "grad_norm": 0.0, |
| "learning_rate": 8.319999999999999e-07, |
| "loss": 0.0, |
| "step": 336 |
| }, |
| { |
| "epoch": 0.1685, |
| "grad_norm": 0.0, |
| "learning_rate": 8.315e-07, |
| "loss": 0.0, |
| "step": 337 |
| }, |
| { |
| "epoch": 0.169, |
| "grad_norm": 0.0, |
| "learning_rate": 8.31e-07, |
| "loss": 0.0, |
| "step": 338 |
| }, |
| { |
| "epoch": 0.1695, |
| "grad_norm": 54.89845657348633, |
| "learning_rate": 8.304999999999999e-07, |
| "loss": -0.0, |
| "step": 339 |
| }, |
| { |
| "epoch": 0.17, |
| "grad_norm": 0.0, |
| "learning_rate": 8.299999999999999e-07, |
| "loss": 0.0, |
| "step": 340 |
| }, |
| { |
| "epoch": 0.1705, |
| "grad_norm": 0.0, |
| "learning_rate": 8.295e-07, |
| "loss": 0.0, |
| "step": 341 |
| }, |
| { |
| "epoch": 0.171, |
| "grad_norm": 0.0, |
| "learning_rate": 8.289999999999999e-07, |
| "loss": 0.0, |
| "step": 342 |
| }, |
| { |
| "epoch": 0.1715, |
| "grad_norm": 0.0, |
| "learning_rate": 8.285e-07, |
| "loss": 0.0, |
| "step": 343 |
| }, |
| { |
| "epoch": 0.172, |
| "grad_norm": 0.0, |
| "learning_rate": 8.28e-07, |
| "loss": 0.0, |
| "step": 344 |
| }, |
| { |
| "epoch": 0.1725, |
| "grad_norm": 0.0, |
| "learning_rate": 8.275e-07, |
| "loss": 0.0, |
| "step": 345 |
| }, |
| { |
| "epoch": 0.173, |
| "grad_norm": 0.0, |
| "learning_rate": 8.269999999999999e-07, |
| "loss": 0.0, |
| "step": 346 |
| }, |
| { |
| "epoch": 0.1735, |
| "grad_norm": 0.0, |
| "learning_rate": 8.264999999999999e-07, |
| "loss": 0.0, |
| "step": 347 |
| }, |
| { |
| "epoch": 0.174, |
| "grad_norm": 0.0, |
| "learning_rate": 8.259999999999999e-07, |
| "loss": 0.0, |
| "step": 348 |
| }, |
| { |
| "epoch": 0.1745, |
| "grad_norm": 0.0, |
| "learning_rate": 8.255e-07, |
| "loss": 0.0, |
| "step": 349 |
| }, |
| { |
| "epoch": 0.175, |
| "grad_norm": 0.0, |
| "learning_rate": 8.249999999999999e-07, |
| "loss": 0.0, |
| "step": 350 |
| }, |
| { |
| "epoch": 0.1755, |
| "grad_norm": 0.0, |
| "learning_rate": 8.245e-07, |
| "loss": 0.0, |
| "step": 351 |
| }, |
| { |
| "epoch": 0.176, |
| "grad_norm": 0.0, |
| "learning_rate": 8.24e-07, |
| "loss": 0.0, |
| "step": 352 |
| }, |
| { |
| "epoch": 0.1765, |
| "grad_norm": 0.0, |
| "learning_rate": 8.234999999999999e-07, |
| "loss": 0.0, |
| "step": 353 |
| }, |
| { |
| "epoch": 0.177, |
| "grad_norm": 0.0, |
| "learning_rate": 8.229999999999999e-07, |
| "loss": 0.0, |
| "step": 354 |
| }, |
| { |
| "epoch": 0.1775, |
| "grad_norm": 0.0, |
| "learning_rate": 8.225e-07, |
| "loss": 0.0, |
| "step": 355 |
| }, |
| { |
| "epoch": 0.178, |
| "grad_norm": 0.0, |
| "learning_rate": 8.219999999999999e-07, |
| "loss": 0.0, |
| "step": 356 |
| }, |
| { |
| "epoch": 0.1785, |
| "grad_norm": 0.0, |
| "learning_rate": 8.215e-07, |
| "loss": 0.0, |
| "step": 357 |
| }, |
| { |
| "epoch": 0.179, |
| "grad_norm": 95.88402557373047, |
| "learning_rate": 8.21e-07, |
| "loss": 0.0, |
| "step": 358 |
| }, |
| { |
| "epoch": 0.1795, |
| "grad_norm": 0.0, |
| "learning_rate": 8.205e-07, |
| "loss": 0.0, |
| "step": 359 |
| }, |
| { |
| "epoch": 0.18, |
| "grad_norm": 0.0, |
| "learning_rate": 8.199999999999999e-07, |
| "loss": 0.0, |
| "step": 360 |
| }, |
| { |
| "epoch": 0.1805, |
| "grad_norm": 0.0, |
| "learning_rate": 8.194999999999999e-07, |
| "loss": 0.0, |
| "step": 361 |
| }, |
| { |
| "epoch": 0.181, |
| "grad_norm": 16.117612838745117, |
| "learning_rate": 8.189999999999999e-07, |
| "loss": 0.0, |
| "step": 362 |
| }, |
| { |
| "epoch": 0.1815, |
| "grad_norm": 0.0, |
| "learning_rate": 8.185e-07, |
| "loss": 0.0, |
| "step": 363 |
| }, |
| { |
| "epoch": 0.182, |
| "grad_norm": 0.0, |
| "learning_rate": 8.179999999999999e-07, |
| "loss": 0.0, |
| "step": 364 |
| }, |
| { |
| "epoch": 0.1825, |
| "grad_norm": 82.06559753417969, |
| "learning_rate": 8.175e-07, |
| "loss": 0.0, |
| "step": 365 |
| }, |
| { |
| "epoch": 0.183, |
| "grad_norm": 0.0, |
| "learning_rate": 8.169999999999999e-07, |
| "loss": 0.0, |
| "step": 366 |
| }, |
| { |
| "epoch": 0.1835, |
| "grad_norm": 0.0, |
| "learning_rate": 8.164999999999999e-07, |
| "loss": 0.0, |
| "step": 367 |
| }, |
| { |
| "epoch": 0.184, |
| "grad_norm": 0.0, |
| "learning_rate": 8.159999999999999e-07, |
| "loss": 0.0, |
| "step": 368 |
| }, |
| { |
| "epoch": 0.1845, |
| "grad_norm": 134.08810424804688, |
| "learning_rate": 8.155e-07, |
| "loss": 0.0, |
| "step": 369 |
| }, |
| { |
| "epoch": 0.185, |
| "grad_norm": 0.0, |
| "learning_rate": 8.149999999999999e-07, |
| "loss": 0.0, |
| "step": 370 |
| }, |
| { |
| "epoch": 0.1855, |
| "grad_norm": 0.0, |
| "learning_rate": 8.145e-07, |
| "loss": 0.0, |
| "step": 371 |
| }, |
| { |
| "epoch": 0.186, |
| "grad_norm": 0.0, |
| "learning_rate": 8.14e-07, |
| "loss": 0.0, |
| "step": 372 |
| }, |
| { |
| "epoch": 0.1865, |
| "grad_norm": 0.0, |
| "learning_rate": 8.134999999999999e-07, |
| "loss": 0.0, |
| "step": 373 |
| }, |
| { |
| "epoch": 0.187, |
| "grad_norm": 0.0, |
| "learning_rate": 8.129999999999999e-07, |
| "loss": 0.0, |
| "step": 374 |
| }, |
| { |
| "epoch": 0.1875, |
| "grad_norm": 0.0, |
| "learning_rate": 8.125e-07, |
| "loss": 0.0, |
| "step": 375 |
| }, |
| { |
| "epoch": 0.188, |
| "grad_norm": 0.0, |
| "learning_rate": 8.12e-07, |
| "loss": 0.0, |
| "step": 376 |
| }, |
| { |
| "epoch": 0.1885, |
| "grad_norm": 0.0, |
| "learning_rate": 8.115e-07, |
| "loss": 0.0, |
| "step": 377 |
| }, |
| { |
| "epoch": 0.189, |
| "grad_norm": 0.0, |
| "learning_rate": 8.11e-07, |
| "loss": 0.0, |
| "step": 378 |
| }, |
| { |
| "epoch": 0.1895, |
| "grad_norm": 0.0, |
| "learning_rate": 8.105e-07, |
| "loss": 0.0, |
| "step": 379 |
| }, |
| { |
| "epoch": 0.19, |
| "grad_norm": 0.0, |
| "learning_rate": 8.1e-07, |
| "loss": 0.0, |
| "step": 380 |
| }, |
| { |
| "epoch": 0.1905, |
| "grad_norm": 0.0, |
| "learning_rate": 8.094999999999999e-07, |
| "loss": 0.0, |
| "step": 381 |
| }, |
| { |
| "epoch": 0.191, |
| "grad_norm": 0.0, |
| "learning_rate": 8.09e-07, |
| "loss": 0.0, |
| "step": 382 |
| }, |
| { |
| "epoch": 0.1915, |
| "grad_norm": 0.0, |
| "learning_rate": 8.085e-07, |
| "loss": 0.0, |
| "step": 383 |
| }, |
| { |
| "epoch": 0.192, |
| "grad_norm": 0.0, |
| "learning_rate": 8.08e-07, |
| "loss": 0.0, |
| "step": 384 |
| }, |
| { |
| "epoch": 0.1925, |
| "grad_norm": 0.0, |
| "learning_rate": 8.075e-07, |
| "loss": 0.0, |
| "step": 385 |
| }, |
| { |
| "epoch": 0.193, |
| "grad_norm": 0.0, |
| "learning_rate": 8.070000000000001e-07, |
| "loss": 0.0, |
| "step": 386 |
| }, |
| { |
| "epoch": 0.1935, |
| "grad_norm": 0.0, |
| "learning_rate": 8.064999999999999e-07, |
| "loss": 0.0, |
| "step": 387 |
| }, |
| { |
| "epoch": 0.194, |
| "grad_norm": 0.0, |
| "learning_rate": 8.06e-07, |
| "loss": 0.0, |
| "step": 388 |
| }, |
| { |
| "epoch": 0.1945, |
| "grad_norm": 0.0, |
| "learning_rate": 8.055e-07, |
| "loss": 0.0, |
| "step": 389 |
| }, |
| { |
| "epoch": 0.195, |
| "grad_norm": 0.0, |
| "learning_rate": 8.05e-07, |
| "loss": 0.0, |
| "step": 390 |
| }, |
| { |
| "epoch": 0.1955, |
| "grad_norm": 15.130922317504883, |
| "learning_rate": 8.045e-07, |
| "loss": 0.0, |
| "step": 391 |
| }, |
| { |
| "epoch": 0.196, |
| "grad_norm": 0.0, |
| "learning_rate": 8.04e-07, |
| "loss": 0.0, |
| "step": 392 |
| }, |
| { |
| "epoch": 0.1965, |
| "grad_norm": 0.0, |
| "learning_rate": 8.034999999999999e-07, |
| "loss": 0.0, |
| "step": 393 |
| }, |
| { |
| "epoch": 0.197, |
| "grad_norm": 0.0, |
| "learning_rate": 8.03e-07, |
| "loss": 0.0, |
| "step": 394 |
| }, |
| { |
| "epoch": 0.1975, |
| "grad_norm": 0.0, |
| "learning_rate": 8.024999999999999e-07, |
| "loss": 0.0, |
| "step": 395 |
| }, |
| { |
| "epoch": 0.198, |
| "grad_norm": 0.0, |
| "learning_rate": 8.02e-07, |
| "loss": 0.0, |
| "step": 396 |
| }, |
| { |
| "epoch": 0.1985, |
| "grad_norm": 0.0, |
| "learning_rate": 8.015e-07, |
| "loss": 0.0, |
| "step": 397 |
| }, |
| { |
| "epoch": 0.199, |
| "grad_norm": 0.0, |
| "learning_rate": 8.01e-07, |
| "loss": 0.0, |
| "step": 398 |
| }, |
| { |
| "epoch": 0.1995, |
| "grad_norm": 0.0, |
| "learning_rate": 8.005e-07, |
| "loss": 0.0, |
| "step": 399 |
| }, |
| { |
| "epoch": 0.2, |
| "grad_norm": 0.0, |
| "learning_rate": 8e-07, |
| "loss": 0.0, |
| "step": 400 |
| }, |
| { |
| "epoch": 0.2005, |
| "grad_norm": 0.0, |
| "learning_rate": 7.994999999999999e-07, |
| "loss": 0.0, |
| "step": 401 |
| }, |
| { |
| "epoch": 0.201, |
| "grad_norm": 0.0, |
| "learning_rate": 7.99e-07, |
| "loss": 0.0, |
| "step": 402 |
| }, |
| { |
| "epoch": 0.2015, |
| "grad_norm": 0.0, |
| "learning_rate": 7.985e-07, |
| "loss": 0.0, |
| "step": 403 |
| }, |
| { |
| "epoch": 0.202, |
| "grad_norm": 0.0, |
| "learning_rate": 7.98e-07, |
| "loss": 0.0, |
| "step": 404 |
| }, |
| { |
| "epoch": 0.2025, |
| "grad_norm": 0.0, |
| "learning_rate": 7.975e-07, |
| "loss": 0.0, |
| "step": 405 |
| }, |
| { |
| "epoch": 0.203, |
| "grad_norm": 0.0, |
| "learning_rate": 7.970000000000001e-07, |
| "loss": 0.0, |
| "step": 406 |
| }, |
| { |
| "epoch": 0.2035, |
| "grad_norm": 0.0, |
| "learning_rate": 7.964999999999999e-07, |
| "loss": 0.0, |
| "step": 407 |
| }, |
| { |
| "epoch": 0.204, |
| "grad_norm": 139.8319854736328, |
| "learning_rate": 7.96e-07, |
| "loss": 0.0, |
| "step": 408 |
| }, |
| { |
| "epoch": 0.2045, |
| "grad_norm": 0.0, |
| "learning_rate": 7.954999999999999e-07, |
| "loss": 0.0, |
| "step": 409 |
| }, |
| { |
| "epoch": 0.205, |
| "grad_norm": 0.0, |
| "learning_rate": 7.95e-07, |
| "loss": 0.0, |
| "step": 410 |
| }, |
| { |
| "epoch": 0.2055, |
| "grad_norm": 72.6037368774414, |
| "learning_rate": 7.945e-07, |
| "loss": 0.0, |
| "step": 411 |
| }, |
| { |
| "epoch": 0.206, |
| "grad_norm": 0.0, |
| "learning_rate": 7.94e-07, |
| "loss": 0.0, |
| "step": 412 |
| }, |
| { |
| "epoch": 0.2065, |
| "grad_norm": 0.0, |
| "learning_rate": 7.934999999999999e-07, |
| "loss": 0.0, |
| "step": 413 |
| }, |
| { |
| "epoch": 0.207, |
| "grad_norm": 0.0, |
| "learning_rate": 7.93e-07, |
| "loss": 0.0, |
| "step": 414 |
| }, |
| { |
| "epoch": 0.2075, |
| "grad_norm": 0.0, |
| "learning_rate": 7.924999999999999e-07, |
| "loss": 0.0, |
| "step": 415 |
| }, |
| { |
| "epoch": 0.208, |
| "grad_norm": 0.0, |
| "learning_rate": 7.92e-07, |
| "loss": 0.0, |
| "step": 416 |
| }, |
| { |
| "epoch": 0.2085, |
| "grad_norm": 0.0, |
| "learning_rate": 7.915e-07, |
| "loss": 0.0, |
| "step": 417 |
| }, |
| { |
| "epoch": 0.209, |
| "grad_norm": 0.0, |
| "learning_rate": 7.91e-07, |
| "loss": 0.0, |
| "step": 418 |
| }, |
| { |
| "epoch": 0.2095, |
| "grad_norm": 0.0, |
| "learning_rate": 7.905e-07, |
| "loss": 0.0, |
| "step": 419 |
| }, |
| { |
| "epoch": 0.21, |
| "grad_norm": 0.0, |
| "learning_rate": 7.9e-07, |
| "loss": 0.0, |
| "step": 420 |
| }, |
| { |
| "epoch": 0.2105, |
| "grad_norm": 0.0, |
| "learning_rate": 7.894999999999999e-07, |
| "loss": 0.0, |
| "step": 421 |
| }, |
| { |
| "epoch": 0.211, |
| "grad_norm": 0.0, |
| "learning_rate": 7.89e-07, |
| "loss": 0.0, |
| "step": 422 |
| }, |
| { |
| "epoch": 0.2115, |
| "grad_norm": 0.0, |
| "learning_rate": 7.884999999999999e-07, |
| "loss": 0.0, |
| "step": 423 |
| }, |
| { |
| "epoch": 0.212, |
| "grad_norm": 0.0, |
| "learning_rate": 7.88e-07, |
| "loss": 0.0, |
| "step": 424 |
| }, |
| { |
| "epoch": 0.2125, |
| "grad_norm": 66.85465240478516, |
| "learning_rate": 7.875e-07, |
| "loss": 0.0, |
| "step": 425 |
| }, |
| { |
| "epoch": 0.213, |
| "grad_norm": 108.80921936035156, |
| "learning_rate": 7.87e-07, |
| "loss": -0.0, |
| "step": 426 |
| }, |
| { |
| "epoch": 0.2135, |
| "grad_norm": 0.0, |
| "learning_rate": 7.864999999999999e-07, |
| "loss": 0.0, |
| "step": 427 |
| }, |
| { |
| "epoch": 0.214, |
| "grad_norm": 0.0, |
| "learning_rate": 7.86e-07, |
| "loss": 0.0, |
| "step": 428 |
| }, |
| { |
| "epoch": 0.2145, |
| "grad_norm": 0.0, |
| "learning_rate": 7.854999999999999e-07, |
| "loss": 0.0, |
| "step": 429 |
| }, |
| { |
| "epoch": 0.215, |
| "grad_norm": 0.0, |
| "learning_rate": 7.85e-07, |
| "loss": 0.0, |
| "step": 430 |
| }, |
| { |
| "epoch": 0.2155, |
| "grad_norm": 107.53791046142578, |
| "learning_rate": 7.845e-07, |
| "loss": -0.0, |
| "step": 431 |
| }, |
| { |
| "epoch": 0.216, |
| "grad_norm": 0.0, |
| "learning_rate": 7.84e-07, |
| "loss": 0.0, |
| "step": 432 |
| }, |
| { |
| "epoch": 0.2165, |
| "grad_norm": 0.0, |
| "learning_rate": 7.834999999999999e-07, |
| "loss": 0.0, |
| "step": 433 |
| }, |
| { |
| "epoch": 0.217, |
| "grad_norm": 0.0, |
| "learning_rate": 7.83e-07, |
| "loss": 0.0, |
| "step": 434 |
| }, |
| { |
| "epoch": 0.2175, |
| "grad_norm": 0.0, |
| "learning_rate": 7.824999999999999e-07, |
| "loss": 0.0, |
| "step": 435 |
| }, |
| { |
| "epoch": 0.218, |
| "grad_norm": 184.61976623535156, |
| "learning_rate": 7.82e-07, |
| "loss": 0.0, |
| "step": 436 |
| }, |
| { |
| "epoch": 0.2185, |
| "grad_norm": 0.0, |
| "learning_rate": 7.815e-07, |
| "loss": 0.0, |
| "step": 437 |
| }, |
| { |
| "epoch": 0.219, |
| "grad_norm": 73.76115417480469, |
| "learning_rate": 7.81e-07, |
| "loss": 0.0, |
| "step": 438 |
| }, |
| { |
| "epoch": 0.2195, |
| "grad_norm": 0.0, |
| "learning_rate": 7.805e-07, |
| "loss": 0.0, |
| "step": 439 |
| }, |
| { |
| "epoch": 0.22, |
| "grad_norm": 0.0, |
| "learning_rate": 7.799999999999999e-07, |
| "loss": 0.0, |
| "step": 440 |
| }, |
| { |
| "epoch": 0.2205, |
| "grad_norm": 0.0, |
| "learning_rate": 7.794999999999999e-07, |
| "loss": 0.0, |
| "step": 441 |
| }, |
| { |
| "epoch": 0.221, |
| "grad_norm": 0.0, |
| "learning_rate": 7.79e-07, |
| "loss": 0.0, |
| "step": 442 |
| }, |
| { |
| "epoch": 0.2215, |
| "grad_norm": 82.87494659423828, |
| "learning_rate": 7.784999999999999e-07, |
| "loss": 0.0, |
| "step": 443 |
| }, |
| { |
| "epoch": 0.222, |
| "grad_norm": 0.0, |
| "learning_rate": 7.78e-07, |
| "loss": 0.0, |
| "step": 444 |
| }, |
| { |
| "epoch": 0.2225, |
| "grad_norm": 126.44339752197266, |
| "learning_rate": 7.775e-07, |
| "loss": -0.0, |
| "step": 445 |
| }, |
| { |
| "epoch": 0.223, |
| "grad_norm": 0.0, |
| "learning_rate": 7.77e-07, |
| "loss": 0.0, |
| "step": 446 |
| }, |
| { |
| "epoch": 0.2235, |
| "grad_norm": 0.0, |
| "learning_rate": 7.764999999999999e-07, |
| "loss": 0.0, |
| "step": 447 |
| }, |
| { |
| "epoch": 0.224, |
| "grad_norm": 0.0, |
| "learning_rate": 7.76e-07, |
| "loss": 0.0, |
| "step": 448 |
| }, |
| { |
| "epoch": 0.2245, |
| "grad_norm": 0.0, |
| "learning_rate": 7.754999999999999e-07, |
| "loss": 0.0, |
| "step": 449 |
| }, |
| { |
| "epoch": 0.225, |
| "grad_norm": 0.0, |
| "learning_rate": 7.75e-07, |
| "loss": 0.0, |
| "step": 450 |
| }, |
| { |
| "epoch": 0.2255, |
| "grad_norm": 0.0, |
| "learning_rate": 7.745e-07, |
| "loss": 0.0, |
| "step": 451 |
| }, |
| { |
| "epoch": 0.226, |
| "grad_norm": 0.0, |
| "learning_rate": 7.74e-07, |
| "loss": 0.0, |
| "step": 452 |
| }, |
| { |
| "epoch": 0.2265, |
| "grad_norm": 0.0, |
| "learning_rate": 7.734999999999999e-07, |
| "loss": 0.0, |
| "step": 453 |
| }, |
| { |
| "epoch": 0.227, |
| "grad_norm": 37.326351165771484, |
| "learning_rate": 7.729999999999999e-07, |
| "loss": 0.0, |
| "step": 454 |
| }, |
| { |
| "epoch": 0.2275, |
| "grad_norm": 0.0, |
| "learning_rate": 7.724999999999999e-07, |
| "loss": 0.0, |
| "step": 455 |
| }, |
| { |
| "epoch": 0.228, |
| "grad_norm": 0.0, |
| "learning_rate": 7.72e-07, |
| "loss": 0.0, |
| "step": 456 |
| }, |
| { |
| "epoch": 0.2285, |
| "grad_norm": 0.0, |
| "learning_rate": 7.714999999999999e-07, |
| "loss": 0.0, |
| "step": 457 |
| }, |
| { |
| "epoch": 0.229, |
| "grad_norm": 0.0, |
| "learning_rate": 7.71e-07, |
| "loss": 0.0, |
| "step": 458 |
| }, |
| { |
| "epoch": 0.2295, |
| "grad_norm": 0.0, |
| "learning_rate": 7.705e-07, |
| "loss": 0.0, |
| "step": 459 |
| }, |
| { |
| "epoch": 0.23, |
| "grad_norm": 0.0, |
| "learning_rate": 7.699999999999999e-07, |
| "loss": 0.0, |
| "step": 460 |
| }, |
| { |
| "epoch": 0.2305, |
| "grad_norm": 0.0, |
| "learning_rate": 7.694999999999999e-07, |
| "loss": 0.0, |
| "step": 461 |
| }, |
| { |
| "epoch": 0.231, |
| "grad_norm": 0.0, |
| "learning_rate": 7.69e-07, |
| "loss": 0.0, |
| "step": 462 |
| }, |
| { |
| "epoch": 0.2315, |
| "grad_norm": 0.0, |
| "learning_rate": 7.684999999999999e-07, |
| "loss": 0.0, |
| "step": 463 |
| }, |
| { |
| "epoch": 0.232, |
| "grad_norm": 0.0, |
| "learning_rate": 7.68e-07, |
| "loss": 0.0, |
| "step": 464 |
| }, |
| { |
| "epoch": 0.2325, |
| "grad_norm": 0.0, |
| "learning_rate": 7.675e-07, |
| "loss": 0.0, |
| "step": 465 |
| }, |
| { |
| "epoch": 0.233, |
| "grad_norm": 0.0, |
| "learning_rate": 7.67e-07, |
| "loss": 0.0, |
| "step": 466 |
| }, |
| { |
| "epoch": 0.2335, |
| "grad_norm": 0.0, |
| "learning_rate": 7.664999999999999e-07, |
| "loss": 0.0, |
| "step": 467 |
| }, |
| { |
| "epoch": 0.234, |
| "grad_norm": 0.0, |
| "learning_rate": 7.66e-07, |
| "loss": 0.0, |
| "step": 468 |
| }, |
| { |
| "epoch": 0.2345, |
| "grad_norm": 0.0, |
| "learning_rate": 7.654999999999999e-07, |
| "loss": 0.0, |
| "step": 469 |
| }, |
| { |
| "epoch": 0.235, |
| "grad_norm": 0.0, |
| "learning_rate": 7.65e-07, |
| "loss": 0.0, |
| "step": 470 |
| }, |
| { |
| "epoch": 0.2355, |
| "grad_norm": 0.0, |
| "learning_rate": 7.644999999999999e-07, |
| "loss": 0.0, |
| "step": 471 |
| }, |
| { |
| "epoch": 0.236, |
| "grad_norm": 67.02527618408203, |
| "learning_rate": 7.64e-07, |
| "loss": -0.0, |
| "step": 472 |
| }, |
| { |
| "epoch": 0.2365, |
| "grad_norm": 0.0, |
| "learning_rate": 7.635e-07, |
| "loss": 0.0, |
| "step": 473 |
| }, |
| { |
| "epoch": 0.237, |
| "grad_norm": 0.0, |
| "learning_rate": 7.629999999999999e-07, |
| "loss": 0.0, |
| "step": 474 |
| }, |
| { |
| "epoch": 0.2375, |
| "grad_norm": 0.0, |
| "learning_rate": 7.624999999999999e-07, |
| "loss": 0.0, |
| "step": 475 |
| }, |
| { |
| "epoch": 0.238, |
| "grad_norm": 0.0, |
| "learning_rate": 7.62e-07, |
| "loss": 0.0, |
| "step": 476 |
| }, |
| { |
| "epoch": 0.2385, |
| "grad_norm": 0.0, |
| "learning_rate": 7.614999999999999e-07, |
| "loss": 0.0, |
| "step": 477 |
| }, |
| { |
| "epoch": 0.239, |
| "grad_norm": 0.0, |
| "learning_rate": 7.61e-07, |
| "loss": 0.0, |
| "step": 478 |
| }, |
| { |
| "epoch": 0.2395, |
| "grad_norm": 0.0, |
| "learning_rate": 7.605e-07, |
| "loss": 0.0, |
| "step": 479 |
| }, |
| { |
| "epoch": 0.24, |
| "grad_norm": 0.0, |
| "learning_rate": 7.599999999999999e-07, |
| "loss": 0.0, |
| "step": 480 |
| }, |
| { |
| "epoch": 0.2405, |
| "grad_norm": 0.0, |
| "learning_rate": 7.594999999999999e-07, |
| "loss": 0.0, |
| "step": 481 |
| }, |
| { |
| "epoch": 0.241, |
| "grad_norm": 0.0, |
| "learning_rate": 7.59e-07, |
| "loss": 0.0, |
| "step": 482 |
| }, |
| { |
| "epoch": 0.2415, |
| "grad_norm": 0.0, |
| "learning_rate": 7.584999999999999e-07, |
| "loss": 0.0, |
| "step": 483 |
| }, |
| { |
| "epoch": 0.242, |
| "grad_norm": 0.0, |
| "learning_rate": 7.58e-07, |
| "loss": 0.0, |
| "step": 484 |
| }, |
| { |
| "epoch": 0.2425, |
| "grad_norm": 0.0, |
| "learning_rate": 7.575e-07, |
| "loss": 0.0, |
| "step": 485 |
| }, |
| { |
| "epoch": 0.243, |
| "grad_norm": 0.0, |
| "learning_rate": 7.57e-07, |
| "loss": 0.0, |
| "step": 486 |
| }, |
| { |
| "epoch": 0.2435, |
| "grad_norm": 0.0, |
| "learning_rate": 7.564999999999999e-07, |
| "loss": 0.0, |
| "step": 487 |
| }, |
| { |
| "epoch": 0.244, |
| "grad_norm": 0.0, |
| "learning_rate": 7.559999999999999e-07, |
| "loss": 0.0, |
| "step": 488 |
| }, |
| { |
| "epoch": 0.2445, |
| "grad_norm": 0.0, |
| "learning_rate": 7.554999999999999e-07, |
| "loss": 0.0, |
| "step": 489 |
| }, |
| { |
| "epoch": 0.245, |
| "grad_norm": 0.0, |
| "learning_rate": 7.55e-07, |
| "loss": 0.0, |
| "step": 490 |
| }, |
| { |
| "epoch": 0.2455, |
| "grad_norm": 0.0, |
| "learning_rate": 7.544999999999999e-07, |
| "loss": 0.0, |
| "step": 491 |
| }, |
| { |
| "epoch": 0.246, |
| "grad_norm": 0.0, |
| "learning_rate": 7.54e-07, |
| "loss": 0.0, |
| "step": 492 |
| }, |
| { |
| "epoch": 0.2465, |
| "grad_norm": 0.0, |
| "learning_rate": 7.535e-07, |
| "loss": 0.0, |
| "step": 493 |
| }, |
| { |
| "epoch": 0.247, |
| "grad_norm": 0.0, |
| "learning_rate": 7.529999999999999e-07, |
| "loss": 0.0, |
| "step": 494 |
| }, |
| { |
| "epoch": 0.2475, |
| "grad_norm": 0.0, |
| "learning_rate": 7.524999999999999e-07, |
| "loss": 0.0, |
| "step": 495 |
| }, |
| { |
| "epoch": 0.248, |
| "grad_norm": 0.0, |
| "learning_rate": 7.52e-07, |
| "loss": 0.0, |
| "step": 496 |
| }, |
| { |
| "epoch": 0.2485, |
| "grad_norm": 0.0, |
| "learning_rate": 7.514999999999999e-07, |
| "loss": 0.0, |
| "step": 497 |
| }, |
| { |
| "epoch": 0.249, |
| "grad_norm": 59.718631744384766, |
| "learning_rate": 7.51e-07, |
| "loss": 0.0, |
| "step": 498 |
| }, |
| { |
| "epoch": 0.2495, |
| "grad_norm": 0.0, |
| "learning_rate": 7.505e-07, |
| "loss": 0.0, |
| "step": 499 |
| }, |
| { |
| "epoch": 0.25, |
| "grad_norm": 0.0, |
| "learning_rate": 7.5e-07, |
| "loss": 0.0, |
| "step": 500 |
| } |
| ], |
| "logging_steps": 1.0, |
| "max_steps": 2000, |
| "num_input_tokens_seen": 0, |
| "num_train_epochs": 1, |
| "save_steps": 100, |
| "stateful_callbacks": { |
| "TrainerControl": { |
| "args": { |
| "should_epoch_stop": false, |
| "should_evaluate": false, |
| "should_log": false, |
| "should_save": true, |
| "should_training_stop": false |
| }, |
| "attributes": {} |
| } |
| }, |
| "total_flos": 0.0, |
| "train_batch_size": 1, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|