VideoThinker-R1-Bias-3B / trainer_state.json
Falconss1's picture
Upload model and track tokenizer.json with LFS
5115dac
{
"best_global_step": null,
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 0.25,
"eval_steps": 500,
"global_step": 500,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.0005,
"grad_norm": 6.1962890625,
"learning_rate": 9.995e-07,
"loss": -0.0,
"step": 1
},
{
"epoch": 0.001,
"grad_norm": 6.744086742401123,
"learning_rate": 9.989999999999999e-07,
"loss": -0.0,
"step": 2
},
{
"epoch": 0.0015,
"grad_norm": 6.945072174072266,
"learning_rate": 9.985e-07,
"loss": 0.0,
"step": 3
},
{
"epoch": 0.002,
"grad_norm": 6.354312419891357,
"learning_rate": 9.98e-07,
"loss": -0.0,
"step": 4
},
{
"epoch": 0.0025,
"grad_norm": 5.802479267120361,
"learning_rate": 9.975e-07,
"loss": 0.0,
"step": 5
},
{
"epoch": 0.003,
"grad_norm": 4.5852274894714355,
"learning_rate": 9.97e-07,
"loss": 0.0,
"step": 6
},
{
"epoch": 0.0035,
"grad_norm": 7.049472332000732,
"learning_rate": 9.965e-07,
"loss": 0.0,
"step": 7
},
{
"epoch": 0.004,
"grad_norm": 21.362648010253906,
"learning_rate": 9.959999999999999e-07,
"loss": -0.0,
"step": 8
},
{
"epoch": 0.0045,
"grad_norm": 5.594510555267334,
"learning_rate": 9.955e-07,
"loss": 0.0,
"step": 9
},
{
"epoch": 0.005,
"grad_norm": 5.9653730392456055,
"learning_rate": 9.95e-07,
"loss": 0.0,
"step": 10
},
{
"epoch": 0.0055,
"grad_norm": 5.095400333404541,
"learning_rate": 9.945e-07,
"loss": -0.0,
"step": 11
},
{
"epoch": 0.006,
"grad_norm": 0.0,
"learning_rate": 9.94e-07,
"loss": 0.0,
"step": 12
},
{
"epoch": 0.0065,
"grad_norm": 10.911425590515137,
"learning_rate": 9.935e-07,
"loss": -0.0,
"step": 13
},
{
"epoch": 0.007,
"grad_norm": 9.652170181274414,
"learning_rate": 9.929999999999999e-07,
"loss": 0.0,
"step": 14
},
{
"epoch": 0.0075,
"grad_norm": 6.956664562225342,
"learning_rate": 9.925e-07,
"loss": 0.0,
"step": 15
},
{
"epoch": 0.008,
"grad_norm": 12.070667266845703,
"learning_rate": 9.92e-07,
"loss": 0.0,
"step": 16
},
{
"epoch": 0.0085,
"grad_norm": 14.007853507995605,
"learning_rate": 9.915e-07,
"loss": 0.0,
"step": 17
},
{
"epoch": 0.009,
"grad_norm": 4.017375469207764,
"learning_rate": 9.91e-07,
"loss": 0.0,
"step": 18
},
{
"epoch": 0.0095,
"grad_norm": 0.0,
"learning_rate": 9.905e-07,
"loss": 0.0,
"step": 19
},
{
"epoch": 0.01,
"grad_norm": 6.546974182128906,
"learning_rate": 9.9e-07,
"loss": 0.0,
"step": 20
},
{
"epoch": 0.0105,
"grad_norm": 7.551206588745117,
"learning_rate": 9.895e-07,
"loss": -0.0,
"step": 21
},
{
"epoch": 0.011,
"grad_norm": 0.0,
"learning_rate": 9.89e-07,
"loss": 0.0,
"step": 22
},
{
"epoch": 0.0115,
"grad_norm": 6.233001232147217,
"learning_rate": 9.885e-07,
"loss": -0.0,
"step": 23
},
{
"epoch": 0.012,
"grad_norm": 0.0,
"learning_rate": 9.88e-07,
"loss": 0.0,
"step": 24
},
{
"epoch": 0.0125,
"grad_norm": 7.307622909545898,
"learning_rate": 9.875e-07,
"loss": -0.0,
"step": 25
},
{
"epoch": 0.013,
"grad_norm": 5.898115158081055,
"learning_rate": 9.87e-07,
"loss": -0.0,
"step": 26
},
{
"epoch": 0.0135,
"grad_norm": 8.286269187927246,
"learning_rate": 9.865e-07,
"loss": 0.0,
"step": 27
},
{
"epoch": 0.014,
"grad_norm": 9.178420066833496,
"learning_rate": 9.86e-07,
"loss": 0.0,
"step": 28
},
{
"epoch": 0.0145,
"grad_norm": 7.090274810791016,
"learning_rate": 9.855e-07,
"loss": 0.0,
"step": 29
},
{
"epoch": 0.015,
"grad_norm": 10.001739501953125,
"learning_rate": 9.849999999999999e-07,
"loss": 0.0,
"step": 30
},
{
"epoch": 0.0155,
"grad_norm": 8.978482246398926,
"learning_rate": 9.845e-07,
"loss": 0.0,
"step": 31
},
{
"epoch": 0.016,
"grad_norm": 8.083369255065918,
"learning_rate": 9.84e-07,
"loss": -0.0,
"step": 32
},
{
"epoch": 0.0165,
"grad_norm": 9.646997451782227,
"learning_rate": 9.835e-07,
"loss": 0.0,
"step": 33
},
{
"epoch": 0.017,
"grad_norm": 6.892234802246094,
"learning_rate": 9.83e-07,
"loss": 0.0,
"step": 34
},
{
"epoch": 0.0175,
"grad_norm": 0.0,
"learning_rate": 9.825e-07,
"loss": 0.0,
"step": 35
},
{
"epoch": 0.018,
"grad_norm": 6.182197570800781,
"learning_rate": 9.819999999999999e-07,
"loss": 0.0,
"step": 36
},
{
"epoch": 0.0185,
"grad_norm": 5.895266532897949,
"learning_rate": 9.815e-07,
"loss": -0.0,
"step": 37
},
{
"epoch": 0.019,
"grad_norm": 11.212841033935547,
"learning_rate": 9.81e-07,
"loss": -0.0,
"step": 38
},
{
"epoch": 0.0195,
"grad_norm": 7.982095241546631,
"learning_rate": 9.805e-07,
"loss": 0.0,
"step": 39
},
{
"epoch": 0.02,
"grad_norm": 5.73940896987915,
"learning_rate": 9.8e-07,
"loss": 0.0,
"step": 40
},
{
"epoch": 0.0205,
"grad_norm": 8.540511131286621,
"learning_rate": 9.795e-07,
"loss": -0.0,
"step": 41
},
{
"epoch": 0.021,
"grad_norm": 0.0,
"learning_rate": 9.789999999999999e-07,
"loss": 0.0,
"step": 42
},
{
"epoch": 0.0215,
"grad_norm": 8.709277153015137,
"learning_rate": 9.785e-07,
"loss": -0.0,
"step": 43
},
{
"epoch": 0.022,
"grad_norm": 6.68982458114624,
"learning_rate": 9.78e-07,
"loss": 0.0,
"step": 44
},
{
"epoch": 0.0225,
"grad_norm": 6.988176345825195,
"learning_rate": 9.775e-07,
"loss": 0.0,
"step": 45
},
{
"epoch": 0.023,
"grad_norm": 7.0302910804748535,
"learning_rate": 9.77e-07,
"loss": 0.0,
"step": 46
},
{
"epoch": 0.0235,
"grad_norm": 8.396454811096191,
"learning_rate": 9.765e-07,
"loss": -0.0,
"step": 47
},
{
"epoch": 0.024,
"grad_norm": 4.7376227378845215,
"learning_rate": 9.759999999999998e-07,
"loss": -0.0,
"step": 48
},
{
"epoch": 0.0245,
"grad_norm": 0.0,
"learning_rate": 9.755e-07,
"loss": 0.0,
"step": 49
},
{
"epoch": 0.025,
"grad_norm": 6.381641387939453,
"learning_rate": 9.75e-07,
"loss": 0.0,
"step": 50
},
{
"epoch": 0.0255,
"grad_norm": 0.0,
"learning_rate": 9.745e-07,
"loss": 0.0,
"step": 51
},
{
"epoch": 0.026,
"grad_norm": 8.140380859375,
"learning_rate": 9.74e-07,
"loss": -0.0,
"step": 52
},
{
"epoch": 0.0265,
"grad_norm": 4.727418899536133,
"learning_rate": 9.735e-07,
"loss": 0.0,
"step": 53
},
{
"epoch": 0.027,
"grad_norm": 6.386085510253906,
"learning_rate": 9.729999999999998e-07,
"loss": -0.0,
"step": 54
},
{
"epoch": 0.0275,
"grad_norm": 6.39836311340332,
"learning_rate": 9.725e-07,
"loss": 0.0,
"step": 55
},
{
"epoch": 0.028,
"grad_norm": 5.749513149261475,
"learning_rate": 9.72e-07,
"loss": -0.0,
"step": 56
},
{
"epoch": 0.0285,
"grad_norm": 4.699296474456787,
"learning_rate": 9.715e-07,
"loss": -0.0,
"step": 57
},
{
"epoch": 0.029,
"grad_norm": 8.458806037902832,
"learning_rate": 9.709999999999999e-07,
"loss": -0.0,
"step": 58
},
{
"epoch": 0.0295,
"grad_norm": 9.1854248046875,
"learning_rate": 9.705e-07,
"loss": -0.0,
"step": 59
},
{
"epoch": 0.03,
"grad_norm": 6.844909191131592,
"learning_rate": 9.7e-07,
"loss": 0.0,
"step": 60
},
{
"epoch": 0.0305,
"grad_norm": 33.0734977722168,
"learning_rate": 9.695e-07,
"loss": 0.0,
"step": 61
},
{
"epoch": 0.031,
"grad_norm": 0.0,
"learning_rate": 9.69e-07,
"loss": 0.0,
"step": 62
},
{
"epoch": 0.0315,
"grad_norm": 7.425229072570801,
"learning_rate": 9.685e-07,
"loss": 0.0,
"step": 63
},
{
"epoch": 0.032,
"grad_norm": 9.169403076171875,
"learning_rate": 9.679999999999999e-07,
"loss": -0.0,
"step": 64
},
{
"epoch": 0.0325,
"grad_norm": 13.490100860595703,
"learning_rate": 9.675e-07,
"loss": 0.0,
"step": 65
},
{
"epoch": 0.033,
"grad_norm": 7.570629596710205,
"learning_rate": 9.67e-07,
"loss": -0.0,
"step": 66
},
{
"epoch": 0.0335,
"grad_norm": 5.252549648284912,
"learning_rate": 9.665e-07,
"loss": 0.0,
"step": 67
},
{
"epoch": 0.034,
"grad_norm": 5.543639183044434,
"learning_rate": 9.66e-07,
"loss": -0.0,
"step": 68
},
{
"epoch": 0.0345,
"grad_norm": 0.0,
"learning_rate": 9.655e-07,
"loss": 0.0,
"step": 69
},
{
"epoch": 0.035,
"grad_norm": 5.360587120056152,
"learning_rate": 9.649999999999999e-07,
"loss": 0.0,
"step": 70
},
{
"epoch": 0.0355,
"grad_norm": 7.327621936798096,
"learning_rate": 9.645e-07,
"loss": 0.0,
"step": 71
},
{
"epoch": 0.036,
"grad_norm": 9.594143867492676,
"learning_rate": 9.64e-07,
"loss": 0.0,
"step": 72
},
{
"epoch": 0.0365,
"grad_norm": 5.346116065979004,
"learning_rate": 9.635e-07,
"loss": 0.0,
"step": 73
},
{
"epoch": 0.037,
"grad_norm": 5.963859558105469,
"learning_rate": 9.63e-07,
"loss": 0.0,
"step": 74
},
{
"epoch": 0.0375,
"grad_norm": 7.078248023986816,
"learning_rate": 9.624999999999999e-07,
"loss": 0.0,
"step": 75
},
{
"epoch": 0.038,
"grad_norm": 5.854560375213623,
"learning_rate": 9.619999999999999e-07,
"loss": 0.0,
"step": 76
},
{
"epoch": 0.0385,
"grad_norm": 8.13651180267334,
"learning_rate": 9.615e-07,
"loss": -0.0,
"step": 77
},
{
"epoch": 0.039,
"grad_norm": 8.167058944702148,
"learning_rate": 9.61e-07,
"loss": -0.0,
"step": 78
},
{
"epoch": 0.0395,
"grad_norm": 5.878276348114014,
"learning_rate": 9.605e-07,
"loss": 0.0,
"step": 79
},
{
"epoch": 0.04,
"grad_norm": 12.290175437927246,
"learning_rate": 9.6e-07,
"loss": 0.0,
"step": 80
},
{
"epoch": 0.0405,
"grad_norm": 4.8677496910095215,
"learning_rate": 9.594999999999999e-07,
"loss": 0.0,
"step": 81
},
{
"epoch": 0.041,
"grad_norm": 9.993011474609375,
"learning_rate": 9.589999999999998e-07,
"loss": 0.0,
"step": 82
},
{
"epoch": 0.0415,
"grad_norm": 7.9544477462768555,
"learning_rate": 9.585e-07,
"loss": 0.0,
"step": 83
},
{
"epoch": 0.042,
"grad_norm": 8.334663391113281,
"learning_rate": 9.58e-07,
"loss": -0.0,
"step": 84
},
{
"epoch": 0.0425,
"grad_norm": 21.026262283325195,
"learning_rate": 9.575e-07,
"loss": -0.0,
"step": 85
},
{
"epoch": 0.043,
"grad_norm": 13.211177825927734,
"learning_rate": 9.57e-07,
"loss": 0.0,
"step": 86
},
{
"epoch": 0.0435,
"grad_norm": 9.141230583190918,
"learning_rate": 9.565e-07,
"loss": 0.0,
"step": 87
},
{
"epoch": 0.044,
"grad_norm": 7.934508800506592,
"learning_rate": 9.559999999999998e-07,
"loss": -0.0,
"step": 88
},
{
"epoch": 0.0445,
"grad_norm": 8.56117057800293,
"learning_rate": 9.555e-07,
"loss": 0.0,
"step": 89
},
{
"epoch": 0.045,
"grad_norm": 0.0,
"learning_rate": 9.55e-07,
"loss": 0.0,
"step": 90
},
{
"epoch": 0.0455,
"grad_norm": 15.598448753356934,
"learning_rate": 9.545e-07,
"loss": 0.0,
"step": 91
},
{
"epoch": 0.046,
"grad_norm": 9.095897674560547,
"learning_rate": 9.539999999999999e-07,
"loss": -0.0,
"step": 92
},
{
"epoch": 0.0465,
"grad_norm": 4.865746974945068,
"learning_rate": 9.535e-07,
"loss": -0.0,
"step": 93
},
{
"epoch": 0.047,
"grad_norm": 0.0,
"learning_rate": 9.529999999999999e-07,
"loss": 0.0,
"step": 94
},
{
"epoch": 0.0475,
"grad_norm": 5.1494951248168945,
"learning_rate": 9.525e-07,
"loss": 0.0,
"step": 95
},
{
"epoch": 0.048,
"grad_norm": 11.34716510772705,
"learning_rate": 9.52e-07,
"loss": 0.0,
"step": 96
},
{
"epoch": 0.0485,
"grad_norm": 11.986861228942871,
"learning_rate": 9.515e-07,
"loss": 0.0,
"step": 97
},
{
"epoch": 0.049,
"grad_norm": 7.944230079650879,
"learning_rate": 9.509999999999999e-07,
"loss": 0.0,
"step": 98
},
{
"epoch": 0.0495,
"grad_norm": 7.5184783935546875,
"learning_rate": 9.504999999999999e-07,
"loss": -0.0,
"step": 99
},
{
"epoch": 0.05,
"grad_norm": 4.20994758605957,
"learning_rate": 9.499999999999999e-07,
"loss": 0.0,
"step": 100
},
{
"epoch": 0.0505,
"grad_norm": 0.0,
"learning_rate": 9.495e-07,
"loss": 0.0,
"step": 101
},
{
"epoch": 0.051,
"grad_norm": 0.0,
"learning_rate": 9.489999999999999e-07,
"loss": 0.0,
"step": 102
},
{
"epoch": 0.0515,
"grad_norm": 7.179519176483154,
"learning_rate": 9.485e-07,
"loss": -0.0,
"step": 103
},
{
"epoch": 0.052,
"grad_norm": 8.312400817871094,
"learning_rate": 9.479999999999999e-07,
"loss": 0.0,
"step": 104
},
{
"epoch": 0.0525,
"grad_norm": 0.0,
"learning_rate": 9.474999999999999e-07,
"loss": 0.0,
"step": 105
},
{
"epoch": 0.053,
"grad_norm": 6.276727676391602,
"learning_rate": 9.469999999999999e-07,
"loss": 0.0,
"step": 106
},
{
"epoch": 0.0535,
"grad_norm": 6.952809810638428,
"learning_rate": 9.465e-07,
"loss": 0.0,
"step": 107
},
{
"epoch": 0.054,
"grad_norm": 12.95068645477295,
"learning_rate": 9.459999999999999e-07,
"loss": -0.0,
"step": 108
},
{
"epoch": 0.0545,
"grad_norm": 0.0,
"learning_rate": 9.455e-07,
"loss": 0.0,
"step": 109
},
{
"epoch": 0.055,
"grad_norm": 13.65576457977295,
"learning_rate": 9.45e-07,
"loss": 0.0,
"step": 110
},
{
"epoch": 0.0555,
"grad_norm": 8.414222717285156,
"learning_rate": 9.444999999999999e-07,
"loss": 0.0,
"step": 111
},
{
"epoch": 0.056,
"grad_norm": 7.828263759613037,
"learning_rate": 9.439999999999999e-07,
"loss": 0.0,
"step": 112
},
{
"epoch": 0.0565,
"grad_norm": 0.0,
"learning_rate": 9.434999999999999e-07,
"loss": 0.0,
"step": 113
},
{
"epoch": 0.057,
"grad_norm": 7.849336624145508,
"learning_rate": 9.429999999999999e-07,
"loss": 0.0,
"step": 114
},
{
"epoch": 0.0575,
"grad_norm": 13.594552993774414,
"learning_rate": 9.425e-07,
"loss": 0.0,
"step": 115
},
{
"epoch": 0.058,
"grad_norm": 6.633617877960205,
"learning_rate": 9.419999999999999e-07,
"loss": 0.0,
"step": 116
},
{
"epoch": 0.0585,
"grad_norm": 7.893250942230225,
"learning_rate": 9.415e-07,
"loss": -0.0,
"step": 117
},
{
"epoch": 0.059,
"grad_norm": 7.897842884063721,
"learning_rate": 9.409999999999999e-07,
"loss": -0.0,
"step": 118
},
{
"epoch": 0.0595,
"grad_norm": 7.738225936889648,
"learning_rate": 9.404999999999999e-07,
"loss": 0.0,
"step": 119
},
{
"epoch": 0.06,
"grad_norm": 10.054285049438477,
"learning_rate": 9.399999999999999e-07,
"loss": 0.0,
"step": 120
},
{
"epoch": 0.0605,
"grad_norm": 6.2317328453063965,
"learning_rate": 9.395e-07,
"loss": 0.0,
"step": 121
},
{
"epoch": 0.061,
"grad_norm": 7.4707207679748535,
"learning_rate": 9.389999999999999e-07,
"loss": -0.0,
"step": 122
},
{
"epoch": 0.0615,
"grad_norm": 0.0,
"learning_rate": 9.385e-07,
"loss": 0.0,
"step": 123
},
{
"epoch": 0.062,
"grad_norm": 6.883451461791992,
"learning_rate": 9.379999999999998e-07,
"loss": 0.0,
"step": 124
},
{
"epoch": 0.0625,
"grad_norm": 5.7558274269104,
"learning_rate": 9.374999999999999e-07,
"loss": 0.0,
"step": 125
},
{
"epoch": 0.063,
"grad_norm": 4.654928207397461,
"learning_rate": 9.37e-07,
"loss": -0.0,
"step": 126
},
{
"epoch": 0.0635,
"grad_norm": 13.459746360778809,
"learning_rate": 9.365e-07,
"loss": 0.0,
"step": 127
},
{
"epoch": 0.064,
"grad_norm": 6.189227104187012,
"learning_rate": 9.36e-07,
"loss": -0.0,
"step": 128
},
{
"epoch": 0.0645,
"grad_norm": 15.807933807373047,
"learning_rate": 9.355e-07,
"loss": -0.0,
"step": 129
},
{
"epoch": 0.065,
"grad_norm": 8.20335865020752,
"learning_rate": 9.35e-07,
"loss": -0.0,
"step": 130
},
{
"epoch": 0.0655,
"grad_norm": 7.410068511962891,
"learning_rate": 9.344999999999999e-07,
"loss": 0.0,
"step": 131
},
{
"epoch": 0.066,
"grad_norm": 5.982290744781494,
"learning_rate": 9.34e-07,
"loss": 0.0,
"step": 132
},
{
"epoch": 0.0665,
"grad_norm": 7.302867889404297,
"learning_rate": 9.334999999999999e-07,
"loss": 0.0,
"step": 133
},
{
"epoch": 0.067,
"grad_norm": 7.16635799407959,
"learning_rate": 9.33e-07,
"loss": 0.0,
"step": 134
},
{
"epoch": 0.0675,
"grad_norm": 0.0,
"learning_rate": 9.325e-07,
"loss": 0.0,
"step": 135
},
{
"epoch": 0.068,
"grad_norm": 5.66601037979126,
"learning_rate": 9.32e-07,
"loss": -0.0,
"step": 136
},
{
"epoch": 0.0685,
"grad_norm": 0.0,
"learning_rate": 9.315e-07,
"loss": 0.0,
"step": 137
},
{
"epoch": 0.069,
"grad_norm": 12.146499633789062,
"learning_rate": 9.31e-07,
"loss": -0.0,
"step": 138
},
{
"epoch": 0.0695,
"grad_norm": 6.333805084228516,
"learning_rate": 9.304999999999999e-07,
"loss": 0.0,
"step": 139
},
{
"epoch": 0.07,
"grad_norm": 17.41741943359375,
"learning_rate": 9.3e-07,
"loss": -0.0,
"step": 140
},
{
"epoch": 0.0705,
"grad_norm": 0.0,
"learning_rate": 9.295e-07,
"loss": 0.0,
"step": 141
},
{
"epoch": 0.071,
"grad_norm": 18.96269989013672,
"learning_rate": 9.29e-07,
"loss": 0.0,
"step": 142
},
{
"epoch": 0.0715,
"grad_norm": 30.19170570373535,
"learning_rate": 9.285e-07,
"loss": 0.0,
"step": 143
},
{
"epoch": 0.072,
"grad_norm": 12.67878532409668,
"learning_rate": 9.28e-07,
"loss": -0.0,
"step": 144
},
{
"epoch": 0.0725,
"grad_norm": 16.92245101928711,
"learning_rate": 9.274999999999999e-07,
"loss": 0.0,
"step": 145
},
{
"epoch": 0.073,
"grad_norm": 8.775379180908203,
"learning_rate": 9.27e-07,
"loss": 0.0,
"step": 146
},
{
"epoch": 0.0735,
"grad_norm": 0.0,
"learning_rate": 9.264999999999999e-07,
"loss": 0.0,
"step": 147
},
{
"epoch": 0.074,
"grad_norm": 12.122485160827637,
"learning_rate": 9.26e-07,
"loss": 0.0,
"step": 148
},
{
"epoch": 0.0745,
"grad_norm": 41.2854118347168,
"learning_rate": 9.255e-07,
"loss": 0.0,
"step": 149
},
{
"epoch": 0.075,
"grad_norm": 0.0,
"learning_rate": 9.25e-07,
"loss": 0.0,
"step": 150
},
{
"epoch": 0.0755,
"grad_norm": 12.417732238769531,
"learning_rate": 9.244999999999999e-07,
"loss": 0.0,
"step": 151
},
{
"epoch": 0.076,
"grad_norm": 23.242403030395508,
"learning_rate": 9.24e-07,
"loss": 0.0,
"step": 152
},
{
"epoch": 0.0765,
"grad_norm": 0.0,
"learning_rate": 9.234999999999999e-07,
"loss": 0.0,
"step": 153
},
{
"epoch": 0.077,
"grad_norm": 8.696711540222168,
"learning_rate": 9.23e-07,
"loss": -0.0,
"step": 154
},
{
"epoch": 0.0775,
"grad_norm": 0.0,
"learning_rate": 9.225e-07,
"loss": 0.0,
"step": 155
},
{
"epoch": 0.078,
"grad_norm": 0.0,
"learning_rate": 9.22e-07,
"loss": 0.0,
"step": 156
},
{
"epoch": 0.0785,
"grad_norm": 12.881440162658691,
"learning_rate": 9.215e-07,
"loss": -0.0,
"step": 157
},
{
"epoch": 0.079,
"grad_norm": 0.0,
"learning_rate": 9.21e-07,
"loss": 0.0,
"step": 158
},
{
"epoch": 0.0795,
"grad_norm": 21.86204719543457,
"learning_rate": 9.204999999999999e-07,
"loss": 0.0,
"step": 159
},
{
"epoch": 0.08,
"grad_norm": 16.32013702392578,
"learning_rate": 9.2e-07,
"loss": -0.0,
"step": 160
},
{
"epoch": 0.0805,
"grad_norm": 0.0,
"learning_rate": 9.194999999999999e-07,
"loss": 0.0,
"step": 161
},
{
"epoch": 0.081,
"grad_norm": 0.0,
"learning_rate": 9.19e-07,
"loss": 0.0,
"step": 162
},
{
"epoch": 0.0815,
"grad_norm": 21.536087036132812,
"learning_rate": 9.185e-07,
"loss": 0.0,
"step": 163
},
{
"epoch": 0.082,
"grad_norm": 15.687423706054688,
"learning_rate": 9.18e-07,
"loss": 0.0,
"step": 164
},
{
"epoch": 0.0825,
"grad_norm": 0.0,
"learning_rate": 9.174999999999999e-07,
"loss": 0.0,
"step": 165
},
{
"epoch": 0.083,
"grad_norm": 0.0,
"learning_rate": 9.17e-07,
"loss": 0.0,
"step": 166
},
{
"epoch": 0.0835,
"grad_norm": 0.0,
"learning_rate": 9.164999999999999e-07,
"loss": 0.0,
"step": 167
},
{
"epoch": 0.084,
"grad_norm": 0.0,
"learning_rate": 9.16e-07,
"loss": 0.0,
"step": 168
},
{
"epoch": 0.0845,
"grad_norm": 0.0,
"learning_rate": 9.155e-07,
"loss": 0.0,
"step": 169
},
{
"epoch": 0.085,
"grad_norm": 0.0,
"learning_rate": 9.15e-07,
"loss": 0.0,
"step": 170
},
{
"epoch": 0.0855,
"grad_norm": 25.705774307250977,
"learning_rate": 9.145e-07,
"loss": -0.0,
"step": 171
},
{
"epoch": 0.086,
"grad_norm": 21.59645652770996,
"learning_rate": 9.14e-07,
"loss": -0.0,
"step": 172
},
{
"epoch": 0.0865,
"grad_norm": 10.857905387878418,
"learning_rate": 9.134999999999999e-07,
"loss": -0.0,
"step": 173
},
{
"epoch": 0.087,
"grad_norm": 0.0,
"learning_rate": 9.13e-07,
"loss": 0.0,
"step": 174
},
{
"epoch": 0.0875,
"grad_norm": 0.0,
"learning_rate": 9.124999999999999e-07,
"loss": 0.0,
"step": 175
},
{
"epoch": 0.088,
"grad_norm": 0.0,
"learning_rate": 9.12e-07,
"loss": 0.0,
"step": 176
},
{
"epoch": 0.0885,
"grad_norm": 0.0,
"learning_rate": 9.115e-07,
"loss": 0.0,
"step": 177
},
{
"epoch": 0.089,
"grad_norm": 20.786745071411133,
"learning_rate": 9.109999999999999e-07,
"loss": 0.0,
"step": 178
},
{
"epoch": 0.0895,
"grad_norm": 8.460957527160645,
"learning_rate": 9.104999999999999e-07,
"loss": -0.0,
"step": 179
},
{
"epoch": 0.09,
"grad_norm": 0.0,
"learning_rate": 9.1e-07,
"loss": 0.0,
"step": 180
},
{
"epoch": 0.0905,
"grad_norm": 0.0,
"learning_rate": 9.094999999999999e-07,
"loss": 0.0,
"step": 181
},
{
"epoch": 0.091,
"grad_norm": 0.0,
"learning_rate": 9.09e-07,
"loss": 0.0,
"step": 182
},
{
"epoch": 0.0915,
"grad_norm": 49.33989715576172,
"learning_rate": 9.085e-07,
"loss": -0.0,
"step": 183
},
{
"epoch": 0.092,
"grad_norm": 0.0,
"learning_rate": 9.08e-07,
"loss": 0.0,
"step": 184
},
{
"epoch": 0.0925,
"grad_norm": 0.0,
"learning_rate": 9.074999999999999e-07,
"loss": 0.0,
"step": 185
},
{
"epoch": 0.093,
"grad_norm": 0.0,
"learning_rate": 9.07e-07,
"loss": 0.0,
"step": 186
},
{
"epoch": 0.0935,
"grad_norm": 0.0,
"learning_rate": 9.064999999999999e-07,
"loss": 0.0,
"step": 187
},
{
"epoch": 0.094,
"grad_norm": 16.010793685913086,
"learning_rate": 9.06e-07,
"loss": 0.0,
"step": 188
},
{
"epoch": 0.0945,
"grad_norm": 17.950115203857422,
"learning_rate": 9.055e-07,
"loss": 0.0,
"step": 189
},
{
"epoch": 0.095,
"grad_norm": 0.0,
"learning_rate": 9.05e-07,
"loss": 0.0,
"step": 190
},
{
"epoch": 0.0955,
"grad_norm": 0.0,
"learning_rate": 9.045e-07,
"loss": 0.0,
"step": 191
},
{
"epoch": 0.096,
"grad_norm": 8.419339179992676,
"learning_rate": 9.039999999999999e-07,
"loss": -0.0,
"step": 192
},
{
"epoch": 0.0965,
"grad_norm": 0.0,
"learning_rate": 9.034999999999999e-07,
"loss": 0.0,
"step": 193
},
{
"epoch": 0.097,
"grad_norm": 17.22492790222168,
"learning_rate": 9.03e-07,
"loss": -0.0,
"step": 194
},
{
"epoch": 0.0975,
"grad_norm": 0.0,
"learning_rate": 9.024999999999999e-07,
"loss": 0.0,
"step": 195
},
{
"epoch": 0.098,
"grad_norm": 15.984553337097168,
"learning_rate": 9.02e-07,
"loss": 0.0,
"step": 196
},
{
"epoch": 0.0985,
"grad_norm": 0.0,
"learning_rate": 9.015e-07,
"loss": 0.0,
"step": 197
},
{
"epoch": 0.099,
"grad_norm": 11.981531143188477,
"learning_rate": 9.01e-07,
"loss": 0.0,
"step": 198
},
{
"epoch": 0.0995,
"grad_norm": 0.0,
"learning_rate": 9.004999999999999e-07,
"loss": 0.0,
"step": 199
},
{
"epoch": 0.1,
"grad_norm": 16.9019832611084,
"learning_rate": 9e-07,
"loss": -0.0,
"step": 200
},
{
"epoch": 0.1005,
"grad_norm": 0.0,
"learning_rate": 8.994999999999999e-07,
"loss": 0.0,
"step": 201
},
{
"epoch": 0.101,
"grad_norm": 10.651970863342285,
"learning_rate": 8.99e-07,
"loss": 0.0,
"step": 202
},
{
"epoch": 0.1015,
"grad_norm": 0.0,
"learning_rate": 8.985e-07,
"loss": 0.0,
"step": 203
},
{
"epoch": 0.102,
"grad_norm": 0.0,
"learning_rate": 8.98e-07,
"loss": 0.0,
"step": 204
},
{
"epoch": 0.1025,
"grad_norm": 0.0,
"learning_rate": 8.974999999999999e-07,
"loss": 0.0,
"step": 205
},
{
"epoch": 0.103,
"grad_norm": 33.05813980102539,
"learning_rate": 8.969999999999999e-07,
"loss": 0.0,
"step": 206
},
{
"epoch": 0.1035,
"grad_norm": 26.88140296936035,
"learning_rate": 8.964999999999999e-07,
"loss": 0.0,
"step": 207
},
{
"epoch": 0.104,
"grad_norm": 18.670848846435547,
"learning_rate": 8.96e-07,
"loss": -0.0,
"step": 208
},
{
"epoch": 0.1045,
"grad_norm": 18.841079711914062,
"learning_rate": 8.954999999999999e-07,
"loss": -0.0,
"step": 209
},
{
"epoch": 0.105,
"grad_norm": 0.0,
"learning_rate": 8.95e-07,
"loss": 0.0,
"step": 210
},
{
"epoch": 0.1055,
"grad_norm": 13.156370162963867,
"learning_rate": 8.945e-07,
"loss": 0.0,
"step": 211
},
{
"epoch": 0.106,
"grad_norm": 0.0,
"learning_rate": 8.939999999999999e-07,
"loss": 0.0,
"step": 212
},
{
"epoch": 0.1065,
"grad_norm": 0.0,
"learning_rate": 8.934999999999999e-07,
"loss": 0.0,
"step": 213
},
{
"epoch": 0.107,
"grad_norm": 23.25225830078125,
"learning_rate": 8.93e-07,
"loss": 0.0,
"step": 214
},
{
"epoch": 0.1075,
"grad_norm": 0.0,
"learning_rate": 8.924999999999999e-07,
"loss": 0.0,
"step": 215
},
{
"epoch": 0.108,
"grad_norm": 0.0,
"learning_rate": 8.92e-07,
"loss": 0.0,
"step": 216
},
{
"epoch": 0.1085,
"grad_norm": 0.0,
"learning_rate": 8.915e-07,
"loss": 0.0,
"step": 217
},
{
"epoch": 0.109,
"grad_norm": 0.0,
"learning_rate": 8.91e-07,
"loss": 0.0,
"step": 218
},
{
"epoch": 0.1095,
"grad_norm": 57.88274383544922,
"learning_rate": 8.904999999999999e-07,
"loss": 0.0,
"step": 219
},
{
"epoch": 0.11,
"grad_norm": 31.124988555908203,
"learning_rate": 8.9e-07,
"loss": 0.0,
"step": 220
},
{
"epoch": 0.1105,
"grad_norm": 0.0,
"learning_rate": 8.894999999999999e-07,
"loss": 0.0,
"step": 221
},
{
"epoch": 0.111,
"grad_norm": 22.94927215576172,
"learning_rate": 8.89e-07,
"loss": -0.0,
"step": 222
},
{
"epoch": 0.1115,
"grad_norm": 0.0,
"learning_rate": 8.884999999999999e-07,
"loss": 0.0,
"step": 223
},
{
"epoch": 0.112,
"grad_norm": 0.0,
"learning_rate": 8.88e-07,
"loss": 0.0,
"step": 224
},
{
"epoch": 0.1125,
"grad_norm": 22.883502960205078,
"learning_rate": 8.874999999999999e-07,
"loss": 0.0,
"step": 225
},
{
"epoch": 0.113,
"grad_norm": 10.071247100830078,
"learning_rate": 8.869999999999999e-07,
"loss": 0.0,
"step": 226
},
{
"epoch": 0.1135,
"grad_norm": 0.0,
"learning_rate": 8.864999999999999e-07,
"loss": 0.0,
"step": 227
},
{
"epoch": 0.114,
"grad_norm": 231.0457305908203,
"learning_rate": 8.86e-07,
"loss": -0.0,
"step": 228
},
{
"epoch": 0.1145,
"grad_norm": 0.0,
"learning_rate": 8.854999999999999e-07,
"loss": 0.0,
"step": 229
},
{
"epoch": 0.115,
"grad_norm": 23.97252655029297,
"learning_rate": 8.85e-07,
"loss": 0.0,
"step": 230
},
{
"epoch": 0.1155,
"grad_norm": 15.410896301269531,
"learning_rate": 8.845e-07,
"loss": 0.0,
"step": 231
},
{
"epoch": 0.116,
"grad_norm": 39.541412353515625,
"learning_rate": 8.839999999999999e-07,
"loss": 0.0,
"step": 232
},
{
"epoch": 0.1165,
"grad_norm": 13.713851928710938,
"learning_rate": 8.834999999999999e-07,
"loss": 0.0,
"step": 233
},
{
"epoch": 0.117,
"grad_norm": 35.34727096557617,
"learning_rate": 8.83e-07,
"loss": -0.0,
"step": 234
},
{
"epoch": 0.1175,
"grad_norm": 45.32273864746094,
"learning_rate": 8.824999999999999e-07,
"loss": 0.0,
"step": 235
},
{
"epoch": 0.118,
"grad_norm": 0.0,
"learning_rate": 8.82e-07,
"loss": 0.0,
"step": 236
},
{
"epoch": 0.1185,
"grad_norm": 0.0,
"learning_rate": 8.814999999999999e-07,
"loss": 0.0,
"step": 237
},
{
"epoch": 0.119,
"grad_norm": 267.7450256347656,
"learning_rate": 8.81e-07,
"loss": 0.0,
"step": 238
},
{
"epoch": 0.1195,
"grad_norm": 143.29161071777344,
"learning_rate": 8.804999999999999e-07,
"loss": -0.0,
"step": 239
},
{
"epoch": 0.12,
"grad_norm": 52.909034729003906,
"learning_rate": 8.799999999999999e-07,
"loss": -0.0,
"step": 240
},
{
"epoch": 0.1205,
"grad_norm": 0.0,
"learning_rate": 8.794999999999999e-07,
"loss": 0.0,
"step": 241
},
{
"epoch": 0.121,
"grad_norm": 37.857696533203125,
"learning_rate": 8.79e-07,
"loss": 0.0,
"step": 242
},
{
"epoch": 0.1215,
"grad_norm": 0.0,
"learning_rate": 8.784999999999999e-07,
"loss": 0.0,
"step": 243
},
{
"epoch": 0.122,
"grad_norm": 0.0,
"learning_rate": 8.78e-07,
"loss": 0.0,
"step": 244
},
{
"epoch": 0.1225,
"grad_norm": 0.0,
"learning_rate": 8.774999999999999e-07,
"loss": 0.0,
"step": 245
},
{
"epoch": 0.123,
"grad_norm": 0.0,
"learning_rate": 8.769999999999999e-07,
"loss": 0.0,
"step": 246
},
{
"epoch": 0.1235,
"grad_norm": 30.24044418334961,
"learning_rate": 8.764999999999999e-07,
"loss": 0.0,
"step": 247
},
{
"epoch": 0.124,
"grad_norm": 0.0,
"learning_rate": 8.76e-07,
"loss": 0.0,
"step": 248
},
{
"epoch": 0.1245,
"grad_norm": 33.06248092651367,
"learning_rate": 8.754999999999999e-07,
"loss": 0.0,
"step": 249
},
{
"epoch": 0.125,
"grad_norm": 20.05577278137207,
"learning_rate": 8.75e-07,
"loss": -0.0,
"step": 250
},
{
"epoch": 0.1255,
"grad_norm": 0.0,
"learning_rate": 8.745000000000001e-07,
"loss": 0.0,
"step": 251
},
{
"epoch": 0.126,
"grad_norm": 18.56123161315918,
"learning_rate": 8.739999999999999e-07,
"loss": 0.0,
"step": 252
},
{
"epoch": 0.1265,
"grad_norm": 0.0,
"learning_rate": 8.735e-07,
"loss": 0.0,
"step": 253
},
{
"epoch": 0.127,
"grad_norm": 12.27500057220459,
"learning_rate": 8.729999999999999e-07,
"loss": 0.0,
"step": 254
},
{
"epoch": 0.1275,
"grad_norm": 0.0,
"learning_rate": 8.725e-07,
"loss": 0.0,
"step": 255
},
{
"epoch": 0.128,
"grad_norm": 53.35928726196289,
"learning_rate": 8.72e-07,
"loss": -0.0,
"step": 256
},
{
"epoch": 0.1285,
"grad_norm": 0.0,
"learning_rate": 8.715e-07,
"loss": 0.0,
"step": 257
},
{
"epoch": 0.129,
"grad_norm": 0.0,
"learning_rate": 8.71e-07,
"loss": 0.0,
"step": 258
},
{
"epoch": 0.1295,
"grad_norm": 0.0,
"learning_rate": 8.705e-07,
"loss": 0.0,
"step": 259
},
{
"epoch": 0.13,
"grad_norm": 0.0,
"learning_rate": 8.699999999999999e-07,
"loss": 0.0,
"step": 260
},
{
"epoch": 0.1305,
"grad_norm": 40.95280838012695,
"learning_rate": 8.695e-07,
"loss": 0.0,
"step": 261
},
{
"epoch": 0.131,
"grad_norm": 0.0,
"learning_rate": 8.69e-07,
"loss": 0.0,
"step": 262
},
{
"epoch": 0.1315,
"grad_norm": 0.0,
"learning_rate": 8.685e-07,
"loss": 0.0,
"step": 263
},
{
"epoch": 0.132,
"grad_norm": 0.0,
"learning_rate": 8.68e-07,
"loss": 0.0,
"step": 264
},
{
"epoch": 0.1325,
"grad_norm": 0.0,
"learning_rate": 8.675000000000001e-07,
"loss": 0.0,
"step": 265
},
{
"epoch": 0.133,
"grad_norm": 0.0,
"learning_rate": 8.669999999999999e-07,
"loss": 0.0,
"step": 266
},
{
"epoch": 0.1335,
"grad_norm": 0.0,
"learning_rate": 8.665e-07,
"loss": 0.0,
"step": 267
},
{
"epoch": 0.134,
"grad_norm": 0.0,
"learning_rate": 8.659999999999999e-07,
"loss": 0.0,
"step": 268
},
{
"epoch": 0.1345,
"grad_norm": 29.156984329223633,
"learning_rate": 8.655e-07,
"loss": -0.0,
"step": 269
},
{
"epoch": 0.135,
"grad_norm": 25.566734313964844,
"learning_rate": 8.65e-07,
"loss": 0.0,
"step": 270
},
{
"epoch": 0.1355,
"grad_norm": 90.18716430664062,
"learning_rate": 8.645e-07,
"loss": 0.0,
"step": 271
},
{
"epoch": 0.136,
"grad_norm": 0.0,
"learning_rate": 8.639999999999999e-07,
"loss": 0.0,
"step": 272
},
{
"epoch": 0.1365,
"grad_norm": 0.0,
"learning_rate": 8.635e-07,
"loss": 0.0,
"step": 273
},
{
"epoch": 0.137,
"grad_norm": 0.0,
"learning_rate": 8.629999999999999e-07,
"loss": 0.0,
"step": 274
},
{
"epoch": 0.1375,
"grad_norm": 0.0,
"learning_rate": 8.625e-07,
"loss": 0.0,
"step": 275
},
{
"epoch": 0.138,
"grad_norm": 0.0,
"learning_rate": 8.62e-07,
"loss": 0.0,
"step": 276
},
{
"epoch": 0.1385,
"grad_norm": 0.0,
"learning_rate": 8.615e-07,
"loss": 0.0,
"step": 277
},
{
"epoch": 0.139,
"grad_norm": 74.6231460571289,
"learning_rate": 8.61e-07,
"loss": 0.0,
"step": 278
},
{
"epoch": 0.1395,
"grad_norm": 0.0,
"learning_rate": 8.605e-07,
"loss": 0.0,
"step": 279
},
{
"epoch": 0.14,
"grad_norm": 0.0,
"learning_rate": 8.599999999999999e-07,
"loss": 0.0,
"step": 280
},
{
"epoch": 0.1405,
"grad_norm": 0.0,
"learning_rate": 8.595e-07,
"loss": 0.0,
"step": 281
},
{
"epoch": 0.141,
"grad_norm": 0.0,
"learning_rate": 8.59e-07,
"loss": 0.0,
"step": 282
},
{
"epoch": 0.1415,
"grad_norm": 0.0,
"learning_rate": 8.585e-07,
"loss": 0.0,
"step": 283
},
{
"epoch": 0.142,
"grad_norm": 562.8270263671875,
"learning_rate": 8.58e-07,
"loss": 0.0,
"step": 284
},
{
"epoch": 0.1425,
"grad_norm": 0.0,
"learning_rate": 8.575e-07,
"loss": 0.0,
"step": 285
},
{
"epoch": 0.143,
"grad_norm": 0.0,
"learning_rate": 8.569999999999999e-07,
"loss": 0.0,
"step": 286
},
{
"epoch": 0.1435,
"grad_norm": 0.0,
"learning_rate": 8.565e-07,
"loss": 0.0,
"step": 287
},
{
"epoch": 0.144,
"grad_norm": 0.0,
"learning_rate": 8.559999999999999e-07,
"loss": 0.0,
"step": 288
},
{
"epoch": 0.1445,
"grad_norm": 0.0,
"learning_rate": 8.555e-07,
"loss": 0.0,
"step": 289
},
{
"epoch": 0.145,
"grad_norm": 0.0,
"learning_rate": 8.55e-07,
"loss": 0.0,
"step": 290
},
{
"epoch": 0.1455,
"grad_norm": 0.0,
"learning_rate": 8.545e-07,
"loss": 0.0,
"step": 291
},
{
"epoch": 0.146,
"grad_norm": 0.0,
"learning_rate": 8.539999999999999e-07,
"loss": 0.0,
"step": 292
},
{
"epoch": 0.1465,
"grad_norm": 0.0,
"learning_rate": 8.535e-07,
"loss": 0.0,
"step": 293
},
{
"epoch": 0.147,
"grad_norm": 0.0,
"learning_rate": 8.529999999999999e-07,
"loss": 0.0,
"step": 294
},
{
"epoch": 0.1475,
"grad_norm": 0.0,
"learning_rate": 8.525e-07,
"loss": 0.0,
"step": 295
},
{
"epoch": 0.148,
"grad_norm": 0.0,
"learning_rate": 8.52e-07,
"loss": 0.0,
"step": 296
},
{
"epoch": 0.1485,
"grad_norm": 0.0,
"learning_rate": 8.515e-07,
"loss": 0.0,
"step": 297
},
{
"epoch": 0.149,
"grad_norm": 0.0,
"learning_rate": 8.51e-07,
"loss": 0.0,
"step": 298
},
{
"epoch": 0.1495,
"grad_norm": 0.0,
"learning_rate": 8.504999999999999e-07,
"loss": 0.0,
"step": 299
},
{
"epoch": 0.15,
"grad_norm": 0.0,
"learning_rate": 8.499999999999999e-07,
"loss": 0.0,
"step": 300
},
{
"epoch": 0.1505,
"grad_norm": 0.0,
"learning_rate": 8.495e-07,
"loss": 0.0,
"step": 301
},
{
"epoch": 0.151,
"grad_norm": 0.0,
"learning_rate": 8.489999999999999e-07,
"loss": 0.0,
"step": 302
},
{
"epoch": 0.1515,
"grad_norm": 0.0,
"learning_rate": 8.485e-07,
"loss": 0.0,
"step": 303
},
{
"epoch": 0.152,
"grad_norm": 0.0,
"learning_rate": 8.48e-07,
"loss": 0.0,
"step": 304
},
{
"epoch": 0.1525,
"grad_norm": 0.0,
"learning_rate": 8.475e-07,
"loss": 0.0,
"step": 305
},
{
"epoch": 0.153,
"grad_norm": 0.0,
"learning_rate": 8.469999999999999e-07,
"loss": 0.0,
"step": 306
},
{
"epoch": 0.1535,
"grad_norm": 53.436363220214844,
"learning_rate": 8.465e-07,
"loss": 0.0,
"step": 307
},
{
"epoch": 0.154,
"grad_norm": 0.0,
"learning_rate": 8.459999999999999e-07,
"loss": 0.0,
"step": 308
},
{
"epoch": 0.1545,
"grad_norm": 0.0,
"learning_rate": 8.455e-07,
"loss": 0.0,
"step": 309
},
{
"epoch": 0.155,
"grad_norm": 45.34641647338867,
"learning_rate": 8.45e-07,
"loss": -0.0,
"step": 310
},
{
"epoch": 0.1555,
"grad_norm": 0.0,
"learning_rate": 8.445e-07,
"loss": 0.0,
"step": 311
},
{
"epoch": 0.156,
"grad_norm": 0.0,
"learning_rate": 8.439999999999999e-07,
"loss": 0.0,
"step": 312
},
{
"epoch": 0.1565,
"grad_norm": 0.0,
"learning_rate": 8.435e-07,
"loss": 0.0,
"step": 313
},
{
"epoch": 0.157,
"grad_norm": 0.0,
"learning_rate": 8.429999999999999e-07,
"loss": 0.0,
"step": 314
},
{
"epoch": 0.1575,
"grad_norm": 207.4761962890625,
"learning_rate": 8.425e-07,
"loss": -0.0,
"step": 315
},
{
"epoch": 0.158,
"grad_norm": 0.0,
"learning_rate": 8.419999999999999e-07,
"loss": 0.0,
"step": 316
},
{
"epoch": 0.1585,
"grad_norm": 49.840850830078125,
"learning_rate": 8.415e-07,
"loss": -0.0,
"step": 317
},
{
"epoch": 0.159,
"grad_norm": 0.0,
"learning_rate": 8.41e-07,
"loss": 0.0,
"step": 318
},
{
"epoch": 0.1595,
"grad_norm": 0.0,
"learning_rate": 8.404999999999999e-07,
"loss": 0.0,
"step": 319
},
{
"epoch": 0.16,
"grad_norm": 0.0,
"learning_rate": 8.399999999999999e-07,
"loss": 0.0,
"step": 320
},
{
"epoch": 0.1605,
"grad_norm": 42.99878692626953,
"learning_rate": 8.395e-07,
"loss": -0.0,
"step": 321
},
{
"epoch": 0.161,
"grad_norm": 0.0,
"learning_rate": 8.389999999999999e-07,
"loss": 0.0,
"step": 322
},
{
"epoch": 0.1615,
"grad_norm": 0.0,
"learning_rate": 8.385e-07,
"loss": 0.0,
"step": 323
},
{
"epoch": 0.162,
"grad_norm": 26.691635131835938,
"learning_rate": 8.38e-07,
"loss": 0.0,
"step": 324
},
{
"epoch": 0.1625,
"grad_norm": 0.0,
"learning_rate": 8.375e-07,
"loss": 0.0,
"step": 325
},
{
"epoch": 0.163,
"grad_norm": 0.0,
"learning_rate": 8.369999999999999e-07,
"loss": 0.0,
"step": 326
},
{
"epoch": 0.1635,
"grad_norm": 0.0,
"learning_rate": 8.365e-07,
"loss": 0.0,
"step": 327
},
{
"epoch": 0.164,
"grad_norm": 0.0,
"learning_rate": 8.359999999999999e-07,
"loss": 0.0,
"step": 328
},
{
"epoch": 0.1645,
"grad_norm": 0.0,
"learning_rate": 8.355e-07,
"loss": 0.0,
"step": 329
},
{
"epoch": 0.165,
"grad_norm": 78.05026245117188,
"learning_rate": 8.349999999999999e-07,
"loss": -0.0,
"step": 330
},
{
"epoch": 0.1655,
"grad_norm": 0.0,
"learning_rate": 8.345e-07,
"loss": 0.0,
"step": 331
},
{
"epoch": 0.166,
"grad_norm": 0.0,
"learning_rate": 8.34e-07,
"loss": 0.0,
"step": 332
},
{
"epoch": 0.1665,
"grad_norm": 0.0,
"learning_rate": 8.334999999999999e-07,
"loss": 0.0,
"step": 333
},
{
"epoch": 0.167,
"grad_norm": 0.0,
"learning_rate": 8.329999999999999e-07,
"loss": 0.0,
"step": 334
},
{
"epoch": 0.1675,
"grad_norm": 0.0,
"learning_rate": 8.325e-07,
"loss": 0.0,
"step": 335
},
{
"epoch": 0.168,
"grad_norm": 0.0,
"learning_rate": 8.319999999999999e-07,
"loss": 0.0,
"step": 336
},
{
"epoch": 0.1685,
"grad_norm": 0.0,
"learning_rate": 8.315e-07,
"loss": 0.0,
"step": 337
},
{
"epoch": 0.169,
"grad_norm": 0.0,
"learning_rate": 8.31e-07,
"loss": 0.0,
"step": 338
},
{
"epoch": 0.1695,
"grad_norm": 54.89845657348633,
"learning_rate": 8.304999999999999e-07,
"loss": -0.0,
"step": 339
},
{
"epoch": 0.17,
"grad_norm": 0.0,
"learning_rate": 8.299999999999999e-07,
"loss": 0.0,
"step": 340
},
{
"epoch": 0.1705,
"grad_norm": 0.0,
"learning_rate": 8.295e-07,
"loss": 0.0,
"step": 341
},
{
"epoch": 0.171,
"grad_norm": 0.0,
"learning_rate": 8.289999999999999e-07,
"loss": 0.0,
"step": 342
},
{
"epoch": 0.1715,
"grad_norm": 0.0,
"learning_rate": 8.285e-07,
"loss": 0.0,
"step": 343
},
{
"epoch": 0.172,
"grad_norm": 0.0,
"learning_rate": 8.28e-07,
"loss": 0.0,
"step": 344
},
{
"epoch": 0.1725,
"grad_norm": 0.0,
"learning_rate": 8.275e-07,
"loss": 0.0,
"step": 345
},
{
"epoch": 0.173,
"grad_norm": 0.0,
"learning_rate": 8.269999999999999e-07,
"loss": 0.0,
"step": 346
},
{
"epoch": 0.1735,
"grad_norm": 0.0,
"learning_rate": 8.264999999999999e-07,
"loss": 0.0,
"step": 347
},
{
"epoch": 0.174,
"grad_norm": 0.0,
"learning_rate": 8.259999999999999e-07,
"loss": 0.0,
"step": 348
},
{
"epoch": 0.1745,
"grad_norm": 0.0,
"learning_rate": 8.255e-07,
"loss": 0.0,
"step": 349
},
{
"epoch": 0.175,
"grad_norm": 0.0,
"learning_rate": 8.249999999999999e-07,
"loss": 0.0,
"step": 350
},
{
"epoch": 0.1755,
"grad_norm": 0.0,
"learning_rate": 8.245e-07,
"loss": 0.0,
"step": 351
},
{
"epoch": 0.176,
"grad_norm": 0.0,
"learning_rate": 8.24e-07,
"loss": 0.0,
"step": 352
},
{
"epoch": 0.1765,
"grad_norm": 0.0,
"learning_rate": 8.234999999999999e-07,
"loss": 0.0,
"step": 353
},
{
"epoch": 0.177,
"grad_norm": 0.0,
"learning_rate": 8.229999999999999e-07,
"loss": 0.0,
"step": 354
},
{
"epoch": 0.1775,
"grad_norm": 0.0,
"learning_rate": 8.225e-07,
"loss": 0.0,
"step": 355
},
{
"epoch": 0.178,
"grad_norm": 0.0,
"learning_rate": 8.219999999999999e-07,
"loss": 0.0,
"step": 356
},
{
"epoch": 0.1785,
"grad_norm": 0.0,
"learning_rate": 8.215e-07,
"loss": 0.0,
"step": 357
},
{
"epoch": 0.179,
"grad_norm": 95.88402557373047,
"learning_rate": 8.21e-07,
"loss": 0.0,
"step": 358
},
{
"epoch": 0.1795,
"grad_norm": 0.0,
"learning_rate": 8.205e-07,
"loss": 0.0,
"step": 359
},
{
"epoch": 0.18,
"grad_norm": 0.0,
"learning_rate": 8.199999999999999e-07,
"loss": 0.0,
"step": 360
},
{
"epoch": 0.1805,
"grad_norm": 0.0,
"learning_rate": 8.194999999999999e-07,
"loss": 0.0,
"step": 361
},
{
"epoch": 0.181,
"grad_norm": 16.117612838745117,
"learning_rate": 8.189999999999999e-07,
"loss": 0.0,
"step": 362
},
{
"epoch": 0.1815,
"grad_norm": 0.0,
"learning_rate": 8.185e-07,
"loss": 0.0,
"step": 363
},
{
"epoch": 0.182,
"grad_norm": 0.0,
"learning_rate": 8.179999999999999e-07,
"loss": 0.0,
"step": 364
},
{
"epoch": 0.1825,
"grad_norm": 82.06559753417969,
"learning_rate": 8.175e-07,
"loss": 0.0,
"step": 365
},
{
"epoch": 0.183,
"grad_norm": 0.0,
"learning_rate": 8.169999999999999e-07,
"loss": 0.0,
"step": 366
},
{
"epoch": 0.1835,
"grad_norm": 0.0,
"learning_rate": 8.164999999999999e-07,
"loss": 0.0,
"step": 367
},
{
"epoch": 0.184,
"grad_norm": 0.0,
"learning_rate": 8.159999999999999e-07,
"loss": 0.0,
"step": 368
},
{
"epoch": 0.1845,
"grad_norm": 134.08810424804688,
"learning_rate": 8.155e-07,
"loss": 0.0,
"step": 369
},
{
"epoch": 0.185,
"grad_norm": 0.0,
"learning_rate": 8.149999999999999e-07,
"loss": 0.0,
"step": 370
},
{
"epoch": 0.1855,
"grad_norm": 0.0,
"learning_rate": 8.145e-07,
"loss": 0.0,
"step": 371
},
{
"epoch": 0.186,
"grad_norm": 0.0,
"learning_rate": 8.14e-07,
"loss": 0.0,
"step": 372
},
{
"epoch": 0.1865,
"grad_norm": 0.0,
"learning_rate": 8.134999999999999e-07,
"loss": 0.0,
"step": 373
},
{
"epoch": 0.187,
"grad_norm": 0.0,
"learning_rate": 8.129999999999999e-07,
"loss": 0.0,
"step": 374
},
{
"epoch": 0.1875,
"grad_norm": 0.0,
"learning_rate": 8.125e-07,
"loss": 0.0,
"step": 375
},
{
"epoch": 0.188,
"grad_norm": 0.0,
"learning_rate": 8.12e-07,
"loss": 0.0,
"step": 376
},
{
"epoch": 0.1885,
"grad_norm": 0.0,
"learning_rate": 8.115e-07,
"loss": 0.0,
"step": 377
},
{
"epoch": 0.189,
"grad_norm": 0.0,
"learning_rate": 8.11e-07,
"loss": 0.0,
"step": 378
},
{
"epoch": 0.1895,
"grad_norm": 0.0,
"learning_rate": 8.105e-07,
"loss": 0.0,
"step": 379
},
{
"epoch": 0.19,
"grad_norm": 0.0,
"learning_rate": 8.1e-07,
"loss": 0.0,
"step": 380
},
{
"epoch": 0.1905,
"grad_norm": 0.0,
"learning_rate": 8.094999999999999e-07,
"loss": 0.0,
"step": 381
},
{
"epoch": 0.191,
"grad_norm": 0.0,
"learning_rate": 8.09e-07,
"loss": 0.0,
"step": 382
},
{
"epoch": 0.1915,
"grad_norm": 0.0,
"learning_rate": 8.085e-07,
"loss": 0.0,
"step": 383
},
{
"epoch": 0.192,
"grad_norm": 0.0,
"learning_rate": 8.08e-07,
"loss": 0.0,
"step": 384
},
{
"epoch": 0.1925,
"grad_norm": 0.0,
"learning_rate": 8.075e-07,
"loss": 0.0,
"step": 385
},
{
"epoch": 0.193,
"grad_norm": 0.0,
"learning_rate": 8.070000000000001e-07,
"loss": 0.0,
"step": 386
},
{
"epoch": 0.1935,
"grad_norm": 0.0,
"learning_rate": 8.064999999999999e-07,
"loss": 0.0,
"step": 387
},
{
"epoch": 0.194,
"grad_norm": 0.0,
"learning_rate": 8.06e-07,
"loss": 0.0,
"step": 388
},
{
"epoch": 0.1945,
"grad_norm": 0.0,
"learning_rate": 8.055e-07,
"loss": 0.0,
"step": 389
},
{
"epoch": 0.195,
"grad_norm": 0.0,
"learning_rate": 8.05e-07,
"loss": 0.0,
"step": 390
},
{
"epoch": 0.1955,
"grad_norm": 15.130922317504883,
"learning_rate": 8.045e-07,
"loss": 0.0,
"step": 391
},
{
"epoch": 0.196,
"grad_norm": 0.0,
"learning_rate": 8.04e-07,
"loss": 0.0,
"step": 392
},
{
"epoch": 0.1965,
"grad_norm": 0.0,
"learning_rate": 8.034999999999999e-07,
"loss": 0.0,
"step": 393
},
{
"epoch": 0.197,
"grad_norm": 0.0,
"learning_rate": 8.03e-07,
"loss": 0.0,
"step": 394
},
{
"epoch": 0.1975,
"grad_norm": 0.0,
"learning_rate": 8.024999999999999e-07,
"loss": 0.0,
"step": 395
},
{
"epoch": 0.198,
"grad_norm": 0.0,
"learning_rate": 8.02e-07,
"loss": 0.0,
"step": 396
},
{
"epoch": 0.1985,
"grad_norm": 0.0,
"learning_rate": 8.015e-07,
"loss": 0.0,
"step": 397
},
{
"epoch": 0.199,
"grad_norm": 0.0,
"learning_rate": 8.01e-07,
"loss": 0.0,
"step": 398
},
{
"epoch": 0.1995,
"grad_norm": 0.0,
"learning_rate": 8.005e-07,
"loss": 0.0,
"step": 399
},
{
"epoch": 0.2,
"grad_norm": 0.0,
"learning_rate": 8e-07,
"loss": 0.0,
"step": 400
},
{
"epoch": 0.2005,
"grad_norm": 0.0,
"learning_rate": 7.994999999999999e-07,
"loss": 0.0,
"step": 401
},
{
"epoch": 0.201,
"grad_norm": 0.0,
"learning_rate": 7.99e-07,
"loss": 0.0,
"step": 402
},
{
"epoch": 0.2015,
"grad_norm": 0.0,
"learning_rate": 7.985e-07,
"loss": 0.0,
"step": 403
},
{
"epoch": 0.202,
"grad_norm": 0.0,
"learning_rate": 7.98e-07,
"loss": 0.0,
"step": 404
},
{
"epoch": 0.2025,
"grad_norm": 0.0,
"learning_rate": 7.975e-07,
"loss": 0.0,
"step": 405
},
{
"epoch": 0.203,
"grad_norm": 0.0,
"learning_rate": 7.970000000000001e-07,
"loss": 0.0,
"step": 406
},
{
"epoch": 0.2035,
"grad_norm": 0.0,
"learning_rate": 7.964999999999999e-07,
"loss": 0.0,
"step": 407
},
{
"epoch": 0.204,
"grad_norm": 139.8319854736328,
"learning_rate": 7.96e-07,
"loss": 0.0,
"step": 408
},
{
"epoch": 0.2045,
"grad_norm": 0.0,
"learning_rate": 7.954999999999999e-07,
"loss": 0.0,
"step": 409
},
{
"epoch": 0.205,
"grad_norm": 0.0,
"learning_rate": 7.95e-07,
"loss": 0.0,
"step": 410
},
{
"epoch": 0.2055,
"grad_norm": 72.6037368774414,
"learning_rate": 7.945e-07,
"loss": 0.0,
"step": 411
},
{
"epoch": 0.206,
"grad_norm": 0.0,
"learning_rate": 7.94e-07,
"loss": 0.0,
"step": 412
},
{
"epoch": 0.2065,
"grad_norm": 0.0,
"learning_rate": 7.934999999999999e-07,
"loss": 0.0,
"step": 413
},
{
"epoch": 0.207,
"grad_norm": 0.0,
"learning_rate": 7.93e-07,
"loss": 0.0,
"step": 414
},
{
"epoch": 0.2075,
"grad_norm": 0.0,
"learning_rate": 7.924999999999999e-07,
"loss": 0.0,
"step": 415
},
{
"epoch": 0.208,
"grad_norm": 0.0,
"learning_rate": 7.92e-07,
"loss": 0.0,
"step": 416
},
{
"epoch": 0.2085,
"grad_norm": 0.0,
"learning_rate": 7.915e-07,
"loss": 0.0,
"step": 417
},
{
"epoch": 0.209,
"grad_norm": 0.0,
"learning_rate": 7.91e-07,
"loss": 0.0,
"step": 418
},
{
"epoch": 0.2095,
"grad_norm": 0.0,
"learning_rate": 7.905e-07,
"loss": 0.0,
"step": 419
},
{
"epoch": 0.21,
"grad_norm": 0.0,
"learning_rate": 7.9e-07,
"loss": 0.0,
"step": 420
},
{
"epoch": 0.2105,
"grad_norm": 0.0,
"learning_rate": 7.894999999999999e-07,
"loss": 0.0,
"step": 421
},
{
"epoch": 0.211,
"grad_norm": 0.0,
"learning_rate": 7.89e-07,
"loss": 0.0,
"step": 422
},
{
"epoch": 0.2115,
"grad_norm": 0.0,
"learning_rate": 7.884999999999999e-07,
"loss": 0.0,
"step": 423
},
{
"epoch": 0.212,
"grad_norm": 0.0,
"learning_rate": 7.88e-07,
"loss": 0.0,
"step": 424
},
{
"epoch": 0.2125,
"grad_norm": 66.85465240478516,
"learning_rate": 7.875e-07,
"loss": 0.0,
"step": 425
},
{
"epoch": 0.213,
"grad_norm": 108.80921936035156,
"learning_rate": 7.87e-07,
"loss": -0.0,
"step": 426
},
{
"epoch": 0.2135,
"grad_norm": 0.0,
"learning_rate": 7.864999999999999e-07,
"loss": 0.0,
"step": 427
},
{
"epoch": 0.214,
"grad_norm": 0.0,
"learning_rate": 7.86e-07,
"loss": 0.0,
"step": 428
},
{
"epoch": 0.2145,
"grad_norm": 0.0,
"learning_rate": 7.854999999999999e-07,
"loss": 0.0,
"step": 429
},
{
"epoch": 0.215,
"grad_norm": 0.0,
"learning_rate": 7.85e-07,
"loss": 0.0,
"step": 430
},
{
"epoch": 0.2155,
"grad_norm": 107.53791046142578,
"learning_rate": 7.845e-07,
"loss": -0.0,
"step": 431
},
{
"epoch": 0.216,
"grad_norm": 0.0,
"learning_rate": 7.84e-07,
"loss": 0.0,
"step": 432
},
{
"epoch": 0.2165,
"grad_norm": 0.0,
"learning_rate": 7.834999999999999e-07,
"loss": 0.0,
"step": 433
},
{
"epoch": 0.217,
"grad_norm": 0.0,
"learning_rate": 7.83e-07,
"loss": 0.0,
"step": 434
},
{
"epoch": 0.2175,
"grad_norm": 0.0,
"learning_rate": 7.824999999999999e-07,
"loss": 0.0,
"step": 435
},
{
"epoch": 0.218,
"grad_norm": 184.61976623535156,
"learning_rate": 7.82e-07,
"loss": 0.0,
"step": 436
},
{
"epoch": 0.2185,
"grad_norm": 0.0,
"learning_rate": 7.815e-07,
"loss": 0.0,
"step": 437
},
{
"epoch": 0.219,
"grad_norm": 73.76115417480469,
"learning_rate": 7.81e-07,
"loss": 0.0,
"step": 438
},
{
"epoch": 0.2195,
"grad_norm": 0.0,
"learning_rate": 7.805e-07,
"loss": 0.0,
"step": 439
},
{
"epoch": 0.22,
"grad_norm": 0.0,
"learning_rate": 7.799999999999999e-07,
"loss": 0.0,
"step": 440
},
{
"epoch": 0.2205,
"grad_norm": 0.0,
"learning_rate": 7.794999999999999e-07,
"loss": 0.0,
"step": 441
},
{
"epoch": 0.221,
"grad_norm": 0.0,
"learning_rate": 7.79e-07,
"loss": 0.0,
"step": 442
},
{
"epoch": 0.2215,
"grad_norm": 82.87494659423828,
"learning_rate": 7.784999999999999e-07,
"loss": 0.0,
"step": 443
},
{
"epoch": 0.222,
"grad_norm": 0.0,
"learning_rate": 7.78e-07,
"loss": 0.0,
"step": 444
},
{
"epoch": 0.2225,
"grad_norm": 126.44339752197266,
"learning_rate": 7.775e-07,
"loss": -0.0,
"step": 445
},
{
"epoch": 0.223,
"grad_norm": 0.0,
"learning_rate": 7.77e-07,
"loss": 0.0,
"step": 446
},
{
"epoch": 0.2235,
"grad_norm": 0.0,
"learning_rate": 7.764999999999999e-07,
"loss": 0.0,
"step": 447
},
{
"epoch": 0.224,
"grad_norm": 0.0,
"learning_rate": 7.76e-07,
"loss": 0.0,
"step": 448
},
{
"epoch": 0.2245,
"grad_norm": 0.0,
"learning_rate": 7.754999999999999e-07,
"loss": 0.0,
"step": 449
},
{
"epoch": 0.225,
"grad_norm": 0.0,
"learning_rate": 7.75e-07,
"loss": 0.0,
"step": 450
},
{
"epoch": 0.2255,
"grad_norm": 0.0,
"learning_rate": 7.745e-07,
"loss": 0.0,
"step": 451
},
{
"epoch": 0.226,
"grad_norm": 0.0,
"learning_rate": 7.74e-07,
"loss": 0.0,
"step": 452
},
{
"epoch": 0.2265,
"grad_norm": 0.0,
"learning_rate": 7.734999999999999e-07,
"loss": 0.0,
"step": 453
},
{
"epoch": 0.227,
"grad_norm": 37.326351165771484,
"learning_rate": 7.729999999999999e-07,
"loss": 0.0,
"step": 454
},
{
"epoch": 0.2275,
"grad_norm": 0.0,
"learning_rate": 7.724999999999999e-07,
"loss": 0.0,
"step": 455
},
{
"epoch": 0.228,
"grad_norm": 0.0,
"learning_rate": 7.72e-07,
"loss": 0.0,
"step": 456
},
{
"epoch": 0.2285,
"grad_norm": 0.0,
"learning_rate": 7.714999999999999e-07,
"loss": 0.0,
"step": 457
},
{
"epoch": 0.229,
"grad_norm": 0.0,
"learning_rate": 7.71e-07,
"loss": 0.0,
"step": 458
},
{
"epoch": 0.2295,
"grad_norm": 0.0,
"learning_rate": 7.705e-07,
"loss": 0.0,
"step": 459
},
{
"epoch": 0.23,
"grad_norm": 0.0,
"learning_rate": 7.699999999999999e-07,
"loss": 0.0,
"step": 460
},
{
"epoch": 0.2305,
"grad_norm": 0.0,
"learning_rate": 7.694999999999999e-07,
"loss": 0.0,
"step": 461
},
{
"epoch": 0.231,
"grad_norm": 0.0,
"learning_rate": 7.69e-07,
"loss": 0.0,
"step": 462
},
{
"epoch": 0.2315,
"grad_norm": 0.0,
"learning_rate": 7.684999999999999e-07,
"loss": 0.0,
"step": 463
},
{
"epoch": 0.232,
"grad_norm": 0.0,
"learning_rate": 7.68e-07,
"loss": 0.0,
"step": 464
},
{
"epoch": 0.2325,
"grad_norm": 0.0,
"learning_rate": 7.675e-07,
"loss": 0.0,
"step": 465
},
{
"epoch": 0.233,
"grad_norm": 0.0,
"learning_rate": 7.67e-07,
"loss": 0.0,
"step": 466
},
{
"epoch": 0.2335,
"grad_norm": 0.0,
"learning_rate": 7.664999999999999e-07,
"loss": 0.0,
"step": 467
},
{
"epoch": 0.234,
"grad_norm": 0.0,
"learning_rate": 7.66e-07,
"loss": 0.0,
"step": 468
},
{
"epoch": 0.2345,
"grad_norm": 0.0,
"learning_rate": 7.654999999999999e-07,
"loss": 0.0,
"step": 469
},
{
"epoch": 0.235,
"grad_norm": 0.0,
"learning_rate": 7.65e-07,
"loss": 0.0,
"step": 470
},
{
"epoch": 0.2355,
"grad_norm": 0.0,
"learning_rate": 7.644999999999999e-07,
"loss": 0.0,
"step": 471
},
{
"epoch": 0.236,
"grad_norm": 67.02527618408203,
"learning_rate": 7.64e-07,
"loss": -0.0,
"step": 472
},
{
"epoch": 0.2365,
"grad_norm": 0.0,
"learning_rate": 7.635e-07,
"loss": 0.0,
"step": 473
},
{
"epoch": 0.237,
"grad_norm": 0.0,
"learning_rate": 7.629999999999999e-07,
"loss": 0.0,
"step": 474
},
{
"epoch": 0.2375,
"grad_norm": 0.0,
"learning_rate": 7.624999999999999e-07,
"loss": 0.0,
"step": 475
},
{
"epoch": 0.238,
"grad_norm": 0.0,
"learning_rate": 7.62e-07,
"loss": 0.0,
"step": 476
},
{
"epoch": 0.2385,
"grad_norm": 0.0,
"learning_rate": 7.614999999999999e-07,
"loss": 0.0,
"step": 477
},
{
"epoch": 0.239,
"grad_norm": 0.0,
"learning_rate": 7.61e-07,
"loss": 0.0,
"step": 478
},
{
"epoch": 0.2395,
"grad_norm": 0.0,
"learning_rate": 7.605e-07,
"loss": 0.0,
"step": 479
},
{
"epoch": 0.24,
"grad_norm": 0.0,
"learning_rate": 7.599999999999999e-07,
"loss": 0.0,
"step": 480
},
{
"epoch": 0.2405,
"grad_norm": 0.0,
"learning_rate": 7.594999999999999e-07,
"loss": 0.0,
"step": 481
},
{
"epoch": 0.241,
"grad_norm": 0.0,
"learning_rate": 7.59e-07,
"loss": 0.0,
"step": 482
},
{
"epoch": 0.2415,
"grad_norm": 0.0,
"learning_rate": 7.584999999999999e-07,
"loss": 0.0,
"step": 483
},
{
"epoch": 0.242,
"grad_norm": 0.0,
"learning_rate": 7.58e-07,
"loss": 0.0,
"step": 484
},
{
"epoch": 0.2425,
"grad_norm": 0.0,
"learning_rate": 7.575e-07,
"loss": 0.0,
"step": 485
},
{
"epoch": 0.243,
"grad_norm": 0.0,
"learning_rate": 7.57e-07,
"loss": 0.0,
"step": 486
},
{
"epoch": 0.2435,
"grad_norm": 0.0,
"learning_rate": 7.564999999999999e-07,
"loss": 0.0,
"step": 487
},
{
"epoch": 0.244,
"grad_norm": 0.0,
"learning_rate": 7.559999999999999e-07,
"loss": 0.0,
"step": 488
},
{
"epoch": 0.2445,
"grad_norm": 0.0,
"learning_rate": 7.554999999999999e-07,
"loss": 0.0,
"step": 489
},
{
"epoch": 0.245,
"grad_norm": 0.0,
"learning_rate": 7.55e-07,
"loss": 0.0,
"step": 490
},
{
"epoch": 0.2455,
"grad_norm": 0.0,
"learning_rate": 7.544999999999999e-07,
"loss": 0.0,
"step": 491
},
{
"epoch": 0.246,
"grad_norm": 0.0,
"learning_rate": 7.54e-07,
"loss": 0.0,
"step": 492
},
{
"epoch": 0.2465,
"grad_norm": 0.0,
"learning_rate": 7.535e-07,
"loss": 0.0,
"step": 493
},
{
"epoch": 0.247,
"grad_norm": 0.0,
"learning_rate": 7.529999999999999e-07,
"loss": 0.0,
"step": 494
},
{
"epoch": 0.2475,
"grad_norm": 0.0,
"learning_rate": 7.524999999999999e-07,
"loss": 0.0,
"step": 495
},
{
"epoch": 0.248,
"grad_norm": 0.0,
"learning_rate": 7.52e-07,
"loss": 0.0,
"step": 496
},
{
"epoch": 0.2485,
"grad_norm": 0.0,
"learning_rate": 7.514999999999999e-07,
"loss": 0.0,
"step": 497
},
{
"epoch": 0.249,
"grad_norm": 59.718631744384766,
"learning_rate": 7.51e-07,
"loss": 0.0,
"step": 498
},
{
"epoch": 0.2495,
"grad_norm": 0.0,
"learning_rate": 7.505e-07,
"loss": 0.0,
"step": 499
},
{
"epoch": 0.25,
"grad_norm": 0.0,
"learning_rate": 7.5e-07,
"loss": 0.0,
"step": 500
}
],
"logging_steps": 1.0,
"max_steps": 2000,
"num_input_tokens_seen": 0,
"num_train_epochs": 1,
"save_steps": 100,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": false
},
"attributes": {}
}
},
"total_flos": 0.0,
"train_batch_size": 1,
"trial_name": null,
"trial_params": null
}