Grover_Pretrained / trainer_state.json
nancyH's picture
Upload folder using huggingface_hub
76bffa0 verified
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 15.120586678763136,
"global_step": 200000,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.08,
"learning_rate": 4.000000000000001e-06,
"loss": 5.7461,
"step": 1000
},
{
"epoch": 0.15,
"learning_rate": 8.000000000000001e-06,
"loss": 5.2435,
"step": 2000
},
{
"epoch": 0.23,
"learning_rate": 1.2e-05,
"loss": 5.1697,
"step": 3000
},
{
"epoch": 0.3,
"learning_rate": 1.6000000000000003e-05,
"loss": 5.1422,
"step": 4000
},
{
"epoch": 0.38,
"learning_rate": 2e-05,
"loss": 5.1265,
"step": 5000
},
{
"epoch": 0.45,
"learning_rate": 2.4e-05,
"loss": 5.1186,
"step": 6000
},
{
"epoch": 0.53,
"learning_rate": 2.8e-05,
"loss": 5.1159,
"step": 7000
},
{
"epoch": 0.6,
"learning_rate": 3.2000000000000005e-05,
"loss": 5.11,
"step": 8000
},
{
"epoch": 0.68,
"learning_rate": 3.6e-05,
"loss": 5.1084,
"step": 9000
},
{
"epoch": 0.76,
"learning_rate": 4e-05,
"loss": 5.1073,
"step": 10000
},
{
"epoch": 0.83,
"learning_rate": 3.978947368421053e-05,
"loss": 5.1033,
"step": 11000
},
{
"epoch": 0.91,
"learning_rate": 3.9578947368421056e-05,
"loss": 5.103,
"step": 12000
},
{
"epoch": 0.98,
"learning_rate": 3.936842105263158e-05,
"loss": 5.0998,
"step": 13000
},
{
"epoch": 1.06,
"learning_rate": 3.915789473684211e-05,
"loss": 5.099,
"step": 14000
},
{
"epoch": 1.13,
"learning_rate": 3.8947368421052636e-05,
"loss": 5.0958,
"step": 15000
},
{
"epoch": 1.21,
"learning_rate": 3.873684210526316e-05,
"loss": 5.0956,
"step": 16000
},
{
"epoch": 1.29,
"learning_rate": 3.852631578947369e-05,
"loss": 5.0925,
"step": 17000
},
{
"epoch": 1.36,
"learning_rate": 3.8315789473684216e-05,
"loss": 5.0947,
"step": 18000
},
{
"epoch": 1.44,
"learning_rate": 3.8105263157894735e-05,
"loss": 5.0925,
"step": 19000
},
{
"epoch": 1.51,
"learning_rate": 3.789473684210526e-05,
"loss": 5.093,
"step": 20000
},
{
"epoch": 1.59,
"learning_rate": 3.7684210526315795e-05,
"loss": 5.0922,
"step": 21000
},
{
"epoch": 1.66,
"learning_rate": 3.747368421052632e-05,
"loss": 5.0894,
"step": 22000
},
{
"epoch": 1.74,
"learning_rate": 3.726315789473685e-05,
"loss": 5.0699,
"step": 23000
},
{
"epoch": 1.81,
"learning_rate": 3.705263157894737e-05,
"loss": 5.037,
"step": 24000
},
{
"epoch": 1.89,
"learning_rate": 3.6842105263157895e-05,
"loss": 5.0146,
"step": 25000
},
{
"epoch": 1.97,
"learning_rate": 3.663157894736842e-05,
"loss": 4.9928,
"step": 26000
},
{
"epoch": 2.04,
"learning_rate": 3.6421052631578955e-05,
"loss": 4.9687,
"step": 27000
},
{
"epoch": 2.12,
"learning_rate": 3.621052631578948e-05,
"loss": 4.9464,
"step": 28000
},
{
"epoch": 2.19,
"learning_rate": 3.6e-05,
"loss": 4.922,
"step": 29000
},
{
"epoch": 2.27,
"learning_rate": 3.578947368421053e-05,
"loss": 4.8983,
"step": 30000
},
{
"epoch": 2.34,
"learning_rate": 3.5578947368421054e-05,
"loss": 4.8721,
"step": 31000
},
{
"epoch": 2.42,
"learning_rate": 3.536842105263158e-05,
"loss": 4.8441,
"step": 32000
},
{
"epoch": 2.49,
"learning_rate": 3.515789473684211e-05,
"loss": 4.8125,
"step": 33000
},
{
"epoch": 2.57,
"learning_rate": 3.4947368421052634e-05,
"loss": 4.7796,
"step": 34000
},
{
"epoch": 2.65,
"learning_rate": 3.473684210526316e-05,
"loss": 4.7264,
"step": 35000
},
{
"epoch": 2.72,
"learning_rate": 3.452631578947369e-05,
"loss": 4.6306,
"step": 36000
},
{
"epoch": 2.8,
"learning_rate": 3.431578947368421e-05,
"loss": 4.5571,
"step": 37000
},
{
"epoch": 2.87,
"learning_rate": 3.410526315789474e-05,
"loss": 4.4757,
"step": 38000
},
{
"epoch": 2.95,
"learning_rate": 3.3894736842105266e-05,
"loss": 4.3942,
"step": 39000
},
{
"epoch": 3.02,
"learning_rate": 3.368421052631579e-05,
"loss": 4.3376,
"step": 40000
},
{
"epoch": 3.1,
"learning_rate": 3.347368421052632e-05,
"loss": 4.2928,
"step": 41000
},
{
"epoch": 3.18,
"learning_rate": 3.3263157894736846e-05,
"loss": 4.2614,
"step": 42000
},
{
"epoch": 3.25,
"learning_rate": 3.305263157894737e-05,
"loss": 4.2338,
"step": 43000
},
{
"epoch": 3.33,
"learning_rate": 3.28421052631579e-05,
"loss": 4.2183,
"step": 44000
},
{
"epoch": 3.4,
"learning_rate": 3.2631578947368426e-05,
"loss": 4.194,
"step": 45000
},
{
"epoch": 3.48,
"learning_rate": 3.2421052631578945e-05,
"loss": 4.1761,
"step": 46000
},
{
"epoch": 3.55,
"learning_rate": 3.221052631578947e-05,
"loss": 4.1644,
"step": 47000
},
{
"epoch": 3.63,
"learning_rate": 3.2000000000000005e-05,
"loss": 4.1498,
"step": 48000
},
{
"epoch": 3.7,
"learning_rate": 3.178947368421053e-05,
"loss": 4.1397,
"step": 49000
},
{
"epoch": 3.78,
"learning_rate": 3.157894736842106e-05,
"loss": 4.1275,
"step": 50000
},
{
"epoch": 3.86,
"learning_rate": 3.136842105263158e-05,
"loss": 4.1146,
"step": 51000
},
{
"epoch": 3.93,
"learning_rate": 3.1157894736842105e-05,
"loss": 4.1074,
"step": 52000
},
{
"epoch": 4.01,
"learning_rate": 3.094736842105263e-05,
"loss": 4.1001,
"step": 53000
},
{
"epoch": 4.08,
"learning_rate": 3.073684210526316e-05,
"loss": 4.0887,
"step": 54000
},
{
"epoch": 4.16,
"learning_rate": 3.052631578947369e-05,
"loss": 4.0836,
"step": 55000
},
{
"epoch": 4.23,
"learning_rate": 3.0315789473684214e-05,
"loss": 4.0808,
"step": 56000
},
{
"epoch": 4.31,
"learning_rate": 3.010526315789474e-05,
"loss": 4.0691,
"step": 57000
},
{
"epoch": 4.38,
"learning_rate": 2.9894736842105264e-05,
"loss": 4.0624,
"step": 58000
},
{
"epoch": 4.46,
"learning_rate": 2.968421052631579e-05,
"loss": 4.0501,
"step": 59000
},
{
"epoch": 4.54,
"learning_rate": 2.9473684210526317e-05,
"loss": 4.0523,
"step": 60000
},
{
"epoch": 4.61,
"learning_rate": 2.9263157894736847e-05,
"loss": 4.0434,
"step": 61000
},
{
"epoch": 4.69,
"learning_rate": 2.9052631578947374e-05,
"loss": 4.0381,
"step": 62000
},
{
"epoch": 4.76,
"learning_rate": 2.8842105263157897e-05,
"loss": 4.0363,
"step": 63000
},
{
"epoch": 4.84,
"learning_rate": 2.8631578947368423e-05,
"loss": 4.0303,
"step": 64000
},
{
"epoch": 4.91,
"learning_rate": 2.842105263157895e-05,
"loss": 4.0211,
"step": 65000
},
{
"epoch": 4.99,
"learning_rate": 2.8210526315789476e-05,
"loss": 4.0168,
"step": 66000
},
{
"epoch": 5.07,
"learning_rate": 2.8e-05,
"loss": 4.0122,
"step": 67000
},
{
"epoch": 5.14,
"learning_rate": 2.778947368421053e-05,
"loss": 4.0091,
"step": 68000
},
{
"epoch": 5.22,
"learning_rate": 2.7578947368421056e-05,
"loss": 4.0019,
"step": 69000
},
{
"epoch": 5.29,
"learning_rate": 2.7368421052631583e-05,
"loss": 3.9973,
"step": 70000
},
{
"epoch": 5.37,
"learning_rate": 2.715789473684211e-05,
"loss": 3.9994,
"step": 71000
},
{
"epoch": 5.44,
"learning_rate": 2.6947368421052632e-05,
"loss": 3.9907,
"step": 72000
},
{
"epoch": 5.52,
"learning_rate": 2.673684210526316e-05,
"loss": 3.9873,
"step": 73000
},
{
"epoch": 5.59,
"learning_rate": 2.6526315789473685e-05,
"loss": 3.984,
"step": 74000
},
{
"epoch": 5.67,
"learning_rate": 2.6315789473684215e-05,
"loss": 3.9795,
"step": 75000
},
{
"epoch": 5.75,
"learning_rate": 2.610526315789474e-05,
"loss": 3.973,
"step": 76000
},
{
"epoch": 5.82,
"learning_rate": 2.5894736842105265e-05,
"loss": 3.9739,
"step": 77000
},
{
"epoch": 5.9,
"learning_rate": 2.568421052631579e-05,
"loss": 3.9637,
"step": 78000
},
{
"epoch": 5.97,
"learning_rate": 2.5473684210526318e-05,
"loss": 3.9668,
"step": 79000
},
{
"epoch": 6.05,
"learning_rate": 2.526315789473684e-05,
"loss": 3.9624,
"step": 80000
},
{
"epoch": 6.12,
"learning_rate": 2.5052631578947368e-05,
"loss": 3.9513,
"step": 81000
},
{
"epoch": 6.2,
"learning_rate": 2.4842105263157898e-05,
"loss": 3.9586,
"step": 82000
},
{
"epoch": 6.28,
"learning_rate": 2.4631578947368424e-05,
"loss": 3.9509,
"step": 83000
},
{
"epoch": 6.35,
"learning_rate": 2.442105263157895e-05,
"loss": 3.9526,
"step": 84000
},
{
"epoch": 6.43,
"learning_rate": 2.4210526315789474e-05,
"loss": 3.9427,
"step": 85000
},
{
"epoch": 6.5,
"learning_rate": 2.4e-05,
"loss": 3.9448,
"step": 86000
},
{
"epoch": 6.58,
"learning_rate": 2.3789473684210527e-05,
"loss": 3.9331,
"step": 87000
},
{
"epoch": 6.65,
"learning_rate": 2.3578947368421054e-05,
"loss": 3.9368,
"step": 88000
},
{
"epoch": 6.73,
"learning_rate": 2.3368421052631584e-05,
"loss": 3.9316,
"step": 89000
},
{
"epoch": 6.8,
"learning_rate": 2.3157894736842107e-05,
"loss": 3.932,
"step": 90000
},
{
"epoch": 6.88,
"learning_rate": 2.2947368421052633e-05,
"loss": 3.9249,
"step": 91000
},
{
"epoch": 6.96,
"learning_rate": 2.273684210526316e-05,
"loss": 3.9272,
"step": 92000
},
{
"epoch": 7.03,
"learning_rate": 2.2526315789473686e-05,
"loss": 3.9157,
"step": 93000
},
{
"epoch": 7.11,
"learning_rate": 2.231578947368421e-05,
"loss": 3.9139,
"step": 94000
},
{
"epoch": 7.18,
"learning_rate": 2.210526315789474e-05,
"loss": 3.9091,
"step": 95000
},
{
"epoch": 7.26,
"learning_rate": 2.1894736842105266e-05,
"loss": 3.9164,
"step": 96000
},
{
"epoch": 7.33,
"learning_rate": 2.1684210526315793e-05,
"loss": 3.9119,
"step": 97000
},
{
"epoch": 7.41,
"learning_rate": 2.147368421052632e-05,
"loss": 3.9058,
"step": 98000
},
{
"epoch": 7.48,
"learning_rate": 2.1263157894736842e-05,
"loss": 3.9131,
"step": 99000
},
{
"epoch": 7.56,
"learning_rate": 2.105263157894737e-05,
"loss": 3.9097,
"step": 100000
},
{
"epoch": 7.64,
"learning_rate": 2.0842105263157895e-05,
"loss": 3.9044,
"step": 101000
},
{
"epoch": 7.71,
"learning_rate": 2.0631578947368425e-05,
"loss": 3.9004,
"step": 102000
},
{
"epoch": 7.79,
"learning_rate": 2.0421052631578952e-05,
"loss": 3.8937,
"step": 103000
},
{
"epoch": 7.86,
"learning_rate": 2.0210526315789475e-05,
"loss": 3.9017,
"step": 104000
},
{
"epoch": 7.94,
"learning_rate": 2e-05,
"loss": 3.899,
"step": 105000
},
{
"epoch": 8.01,
"learning_rate": 1.9789473684210528e-05,
"loss": 3.8979,
"step": 106000
},
{
"epoch": 8.09,
"learning_rate": 1.9578947368421055e-05,
"loss": 3.8889,
"step": 107000
},
{
"epoch": 8.17,
"learning_rate": 1.936842105263158e-05,
"loss": 3.8857,
"step": 108000
},
{
"epoch": 8.24,
"learning_rate": 1.9157894736842108e-05,
"loss": 3.8892,
"step": 109000
},
{
"epoch": 8.32,
"learning_rate": 1.894736842105263e-05,
"loss": 3.887,
"step": 110000
},
{
"epoch": 8.39,
"learning_rate": 1.873684210526316e-05,
"loss": 3.8848,
"step": 111000
},
{
"epoch": 8.47,
"learning_rate": 1.8526315789473684e-05,
"loss": 3.8805,
"step": 112000
},
{
"epoch": 8.54,
"learning_rate": 1.831578947368421e-05,
"loss": 3.8819,
"step": 113000
},
{
"epoch": 8.62,
"learning_rate": 1.810526315789474e-05,
"loss": 3.8753,
"step": 114000
},
{
"epoch": 8.69,
"learning_rate": 1.7894736842105264e-05,
"loss": 3.881,
"step": 115000
},
{
"epoch": 8.77,
"learning_rate": 1.768421052631579e-05,
"loss": 3.8784,
"step": 116000
},
{
"epoch": 8.85,
"learning_rate": 1.7473684210526317e-05,
"loss": 3.8694,
"step": 117000
},
{
"epoch": 8.92,
"learning_rate": 1.7263157894736843e-05,
"loss": 3.8727,
"step": 118000
},
{
"epoch": 9.0,
"learning_rate": 1.705263157894737e-05,
"loss": 3.8701,
"step": 119000
},
{
"epoch": 9.07,
"learning_rate": 1.6842105263157896e-05,
"loss": 3.8676,
"step": 120000
},
{
"epoch": 9.15,
"learning_rate": 1.6631578947368423e-05,
"loss": 3.8632,
"step": 121000
},
{
"epoch": 9.22,
"learning_rate": 1.642105263157895e-05,
"loss": 3.8645,
"step": 122000
},
{
"epoch": 9.3,
"learning_rate": 1.6210526315789473e-05,
"loss": 3.8682,
"step": 123000
},
{
"epoch": 9.37,
"learning_rate": 1.6000000000000003e-05,
"loss": 3.8575,
"step": 124000
},
{
"epoch": 9.45,
"learning_rate": 1.578947368421053e-05,
"loss": 3.8633,
"step": 125000
},
{
"epoch": 9.53,
"learning_rate": 1.5578947368421052e-05,
"loss": 3.8594,
"step": 126000
},
{
"epoch": 9.6,
"learning_rate": 1.536842105263158e-05,
"loss": 3.8552,
"step": 127000
},
{
"epoch": 9.68,
"learning_rate": 1.5157894736842107e-05,
"loss": 3.8563,
"step": 128000
},
{
"epoch": 9.75,
"learning_rate": 1.4947368421052632e-05,
"loss": 3.8596,
"step": 129000
},
{
"epoch": 9.83,
"learning_rate": 1.4736842105263159e-05,
"loss": 3.8565,
"step": 130000
},
{
"epoch": 9.9,
"learning_rate": 1.4526315789473687e-05,
"loss": 3.8512,
"step": 131000
},
{
"epoch": 9.98,
"learning_rate": 1.4315789473684212e-05,
"loss": 3.8531,
"step": 132000
},
{
"epoch": 10.06,
"learning_rate": 1.4105263157894738e-05,
"loss": 3.8538,
"step": 133000
},
{
"epoch": 10.13,
"learning_rate": 1.3894736842105265e-05,
"loss": 3.8477,
"step": 134000
},
{
"epoch": 10.21,
"learning_rate": 1.3684210526315791e-05,
"loss": 3.8497,
"step": 135000
},
{
"epoch": 10.28,
"learning_rate": 1.3473684210526316e-05,
"loss": 3.8455,
"step": 136000
},
{
"epoch": 10.36,
"learning_rate": 1.3263157894736843e-05,
"loss": 3.8442,
"step": 137000
},
{
"epoch": 10.43,
"learning_rate": 1.305263157894737e-05,
"loss": 3.8466,
"step": 138000
},
{
"epoch": 10.51,
"learning_rate": 1.2842105263157896e-05,
"loss": 3.8465,
"step": 139000
},
{
"epoch": 10.58,
"learning_rate": 1.263157894736842e-05,
"loss": 3.8443,
"step": 140000
},
{
"epoch": 10.66,
"learning_rate": 1.2421052631578949e-05,
"loss": 3.8407,
"step": 141000
},
{
"epoch": 10.74,
"learning_rate": 1.2210526315789475e-05,
"loss": 3.8414,
"step": 142000
},
{
"epoch": 10.81,
"learning_rate": 1.2e-05,
"loss": 3.835,
"step": 143000
},
{
"epoch": 10.89,
"learning_rate": 1.1789473684210527e-05,
"loss": 3.8321,
"step": 144000
},
{
"epoch": 10.96,
"learning_rate": 1.1578947368421053e-05,
"loss": 3.8366,
"step": 145000
},
{
"epoch": 11.04,
"learning_rate": 1.136842105263158e-05,
"loss": 3.8353,
"step": 146000
},
{
"epoch": 11.11,
"learning_rate": 1.1157894736842105e-05,
"loss": 3.8347,
"step": 147000
},
{
"epoch": 11.19,
"learning_rate": 1.0947368421052633e-05,
"loss": 3.833,
"step": 148000
},
{
"epoch": 11.26,
"learning_rate": 1.073684210526316e-05,
"loss": 3.8337,
"step": 149000
},
{
"epoch": 11.34,
"learning_rate": 1.0526315789473684e-05,
"loss": 3.8342,
"step": 150000
},
{
"epoch": 11.42,
"learning_rate": 1.0315789473684213e-05,
"loss": 3.8302,
"step": 151000
},
{
"epoch": 11.49,
"learning_rate": 1.0105263157894738e-05,
"loss": 3.8326,
"step": 152000
},
{
"epoch": 11.57,
"learning_rate": 9.894736842105264e-06,
"loss": 3.827,
"step": 153000
},
{
"epoch": 11.64,
"learning_rate": 9.68421052631579e-06,
"loss": 3.8309,
"step": 154000
},
{
"epoch": 11.72,
"learning_rate": 9.473684210526315e-06,
"loss": 3.8293,
"step": 155000
},
{
"epoch": 11.79,
"learning_rate": 9.263157894736842e-06,
"loss": 3.8277,
"step": 156000
},
{
"epoch": 11.87,
"learning_rate": 9.05263157894737e-06,
"loss": 3.8273,
"step": 157000
},
{
"epoch": 11.95,
"learning_rate": 8.842105263157895e-06,
"loss": 3.8199,
"step": 158000
},
{
"epoch": 12.02,
"learning_rate": 8.631578947368422e-06,
"loss": 3.8253,
"step": 159000
},
{
"epoch": 12.1,
"learning_rate": 8.421052631578948e-06,
"loss": 3.8267,
"step": 160000
},
{
"epoch": 12.17,
"learning_rate": 8.210526315789475e-06,
"loss": 3.8287,
"step": 161000
},
{
"epoch": 12.25,
"learning_rate": 8.000000000000001e-06,
"loss": 3.8196,
"step": 162000
},
{
"epoch": 12.32,
"learning_rate": 7.789473684210526e-06,
"loss": 3.8208,
"step": 163000
},
{
"epoch": 12.4,
"learning_rate": 7.578947368421054e-06,
"loss": 3.8287,
"step": 164000
},
{
"epoch": 12.47,
"learning_rate": 7.368421052631579e-06,
"loss": 3.8223,
"step": 165000
},
{
"epoch": 12.55,
"learning_rate": 7.157894736842106e-06,
"loss": 3.8103,
"step": 166000
},
{
"epoch": 12.63,
"learning_rate": 6.947368421052632e-06,
"loss": 3.8183,
"step": 167000
},
{
"epoch": 12.7,
"learning_rate": 6.736842105263158e-06,
"loss": 3.8174,
"step": 168000
},
{
"epoch": 12.78,
"learning_rate": 6.526315789473685e-06,
"loss": 3.8154,
"step": 169000
},
{
"epoch": 12.85,
"learning_rate": 6.31578947368421e-06,
"loss": 3.8204,
"step": 170000
},
{
"epoch": 12.93,
"learning_rate": 6.105263157894738e-06,
"loss": 3.8104,
"step": 171000
},
{
"epoch": 13.0,
"learning_rate": 5.8947368421052634e-06,
"loss": 3.8173,
"step": 172000
},
{
"epoch": 13.08,
"learning_rate": 5.68421052631579e-06,
"loss": 3.8139,
"step": 173000
},
{
"epoch": 13.15,
"learning_rate": 5.4736842105263165e-06,
"loss": 3.809,
"step": 174000
},
{
"epoch": 13.23,
"learning_rate": 5.263157894736842e-06,
"loss": 3.8151,
"step": 175000
},
{
"epoch": 13.31,
"learning_rate": 5.052631578947369e-06,
"loss": 3.8129,
"step": 176000
},
{
"epoch": 13.38,
"learning_rate": 4.842105263157895e-06,
"loss": 3.815,
"step": 177000
},
{
"epoch": 13.46,
"learning_rate": 4.631578947368421e-06,
"loss": 3.8175,
"step": 178000
},
{
"epoch": 13.53,
"learning_rate": 4.4210526315789476e-06,
"loss": 3.8125,
"step": 179000
},
{
"epoch": 13.61,
"learning_rate": 4.210526315789474e-06,
"loss": 3.8101,
"step": 180000
},
{
"epoch": 13.68,
"learning_rate": 4.000000000000001e-06,
"loss": 3.8079,
"step": 181000
},
{
"epoch": 13.76,
"learning_rate": 3.789473684210527e-06,
"loss": 3.8124,
"step": 182000
},
{
"epoch": 13.84,
"learning_rate": 3.578947368421053e-06,
"loss": 3.8134,
"step": 183000
},
{
"epoch": 13.91,
"learning_rate": 3.368421052631579e-06,
"loss": 3.8048,
"step": 184000
},
{
"epoch": 13.99,
"learning_rate": 3.157894736842105e-06,
"loss": 3.8167,
"step": 185000
},
{
"epoch": 14.06,
"learning_rate": 2.9473684210526317e-06,
"loss": 3.812,
"step": 186000
},
{
"epoch": 14.14,
"learning_rate": 2.7368421052631583e-06,
"loss": 3.8089,
"step": 187000
},
{
"epoch": 14.21,
"learning_rate": 2.5263157894736844e-06,
"loss": 3.8119,
"step": 188000
},
{
"epoch": 14.29,
"learning_rate": 2.3157894736842105e-06,
"loss": 3.8058,
"step": 189000
},
{
"epoch": 14.36,
"learning_rate": 2.105263157894737e-06,
"loss": 3.8089,
"step": 190000
},
{
"epoch": 14.44,
"learning_rate": 1.8947368421052634e-06,
"loss": 3.8058,
"step": 191000
},
{
"epoch": 14.52,
"learning_rate": 1.6842105263157895e-06,
"loss": 3.8099,
"step": 192000
},
{
"epoch": 14.59,
"learning_rate": 1.4736842105263159e-06,
"loss": 3.7993,
"step": 193000
},
{
"epoch": 14.67,
"learning_rate": 1.2631578947368422e-06,
"loss": 3.8072,
"step": 194000
},
{
"epoch": 14.74,
"learning_rate": 1.0526315789473685e-06,
"loss": 3.806,
"step": 195000
},
{
"epoch": 14.82,
"learning_rate": 8.421052631578948e-07,
"loss": 3.8084,
"step": 196000
},
{
"epoch": 14.89,
"learning_rate": 6.315789473684211e-07,
"loss": 3.8074,
"step": 197000
},
{
"epoch": 14.97,
"learning_rate": 4.210526315789474e-07,
"loss": 3.8013,
"step": 198000
},
{
"epoch": 15.04,
"learning_rate": 2.105263157894737e-07,
"loss": 3.8081,
"step": 199000
},
{
"epoch": 15.12,
"learning_rate": 0.0,
"loss": 3.8009,
"step": 200000
}
],
"max_steps": 200000,
"num_train_epochs": 16,
"total_flos": 5.051724710578883e+18,
"trial_name": null,
"trial_params": null
}