interview-question-remake / trainer_state.json
hyechanj
First model version
a15191a
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 10.0,
"global_step": 162110,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.03,
"learning_rate": 4.9845783727098886e-05,
"loss": 3.5716,
"step": 500
},
{
"epoch": 0.06,
"learning_rate": 4.969156745419777e-05,
"loss": 3.488,
"step": 1000
},
{
"epoch": 0.09,
"learning_rate": 4.953735118129665e-05,
"loss": 3.4241,
"step": 1500
},
{
"epoch": 0.12,
"learning_rate": 4.9383134908395535e-05,
"loss": 3.4241,
"step": 2000
},
{
"epoch": 0.15,
"learning_rate": 4.922891863549442e-05,
"loss": 3.4084,
"step": 2500
},
{
"epoch": 0.19,
"learning_rate": 4.907470236259331e-05,
"loss": 3.4088,
"step": 3000
},
{
"epoch": 0.22,
"learning_rate": 4.8920486089692185e-05,
"loss": 3.3556,
"step": 3500
},
{
"epoch": 0.25,
"learning_rate": 4.876626981679107e-05,
"loss": 3.3617,
"step": 4000
},
{
"epoch": 0.28,
"learning_rate": 4.861205354388995e-05,
"loss": 3.3387,
"step": 4500
},
{
"epoch": 0.31,
"learning_rate": 4.8457837270988835e-05,
"loss": 3.3603,
"step": 5000
},
{
"epoch": 0.34,
"learning_rate": 4.8303620998087725e-05,
"loss": 3.3323,
"step": 5500
},
{
"epoch": 0.37,
"learning_rate": 4.81494047251866e-05,
"loss": 3.3145,
"step": 6000
},
{
"epoch": 0.4,
"learning_rate": 4.799518845228549e-05,
"loss": 3.3182,
"step": 6500
},
{
"epoch": 0.43,
"learning_rate": 4.784097217938437e-05,
"loss": 3.3018,
"step": 7000
},
{
"epoch": 0.46,
"learning_rate": 4.768675590648325e-05,
"loss": 3.3106,
"step": 7500
},
{
"epoch": 0.49,
"learning_rate": 4.753253963358214e-05,
"loss": 3.3128,
"step": 8000
},
{
"epoch": 0.52,
"learning_rate": 4.737832336068102e-05,
"loss": 3.2902,
"step": 8500
},
{
"epoch": 0.56,
"learning_rate": 4.722410708777991e-05,
"loss": 3.2901,
"step": 9000
},
{
"epoch": 0.59,
"learning_rate": 4.7069890814878784e-05,
"loss": 3.2667,
"step": 9500
},
{
"epoch": 0.62,
"learning_rate": 4.6915674541977675e-05,
"loss": 3.252,
"step": 10000
},
{
"epoch": 0.65,
"learning_rate": 4.676145826907656e-05,
"loss": 3.2849,
"step": 10500
},
{
"epoch": 0.68,
"learning_rate": 4.660724199617544e-05,
"loss": 3.2668,
"step": 11000
},
{
"epoch": 0.71,
"learning_rate": 4.6453025723274324e-05,
"loss": 3.2758,
"step": 11500
},
{
"epoch": 0.74,
"learning_rate": 4.62988094503732e-05,
"loss": 3.2699,
"step": 12000
},
{
"epoch": 0.77,
"learning_rate": 4.614459317747209e-05,
"loss": 3.2566,
"step": 12500
},
{
"epoch": 0.8,
"learning_rate": 4.599037690457097e-05,
"loss": 3.2478,
"step": 13000
},
{
"epoch": 0.83,
"learning_rate": 4.583616063166986e-05,
"loss": 3.2408,
"step": 13500
},
{
"epoch": 0.86,
"learning_rate": 4.568194435876874e-05,
"loss": 3.2459,
"step": 14000
},
{
"epoch": 0.89,
"learning_rate": 4.5527728085867624e-05,
"loss": 3.2581,
"step": 14500
},
{
"epoch": 0.93,
"learning_rate": 4.537351181296651e-05,
"loss": 3.2349,
"step": 15000
},
{
"epoch": 0.96,
"learning_rate": 4.521929554006539e-05,
"loss": 3.2619,
"step": 15500
},
{
"epoch": 0.99,
"learning_rate": 4.5065079267164274e-05,
"loss": 3.2307,
"step": 16000
},
{
"epoch": 1.02,
"learning_rate": 4.491086299426316e-05,
"loss": 3.0671,
"step": 16500
},
{
"epoch": 1.05,
"learning_rate": 4.475664672136204e-05,
"loss": 2.9847,
"step": 17000
},
{
"epoch": 1.08,
"learning_rate": 4.4602430448460924e-05,
"loss": 2.969,
"step": 17500
},
{
"epoch": 1.11,
"learning_rate": 4.444821417555981e-05,
"loss": 2.9732,
"step": 18000
},
{
"epoch": 1.14,
"learning_rate": 4.429399790265869e-05,
"loss": 2.9817,
"step": 18500
},
{
"epoch": 1.17,
"learning_rate": 4.413978162975757e-05,
"loss": 3.0009,
"step": 19000
},
{
"epoch": 1.2,
"learning_rate": 4.398556535685646e-05,
"loss": 2.9889,
"step": 19500
},
{
"epoch": 1.23,
"learning_rate": 4.383134908395534e-05,
"loss": 2.9974,
"step": 20000
},
{
"epoch": 1.26,
"learning_rate": 4.367713281105422e-05,
"loss": 2.997,
"step": 20500
},
{
"epoch": 1.3,
"learning_rate": 4.3522916538153106e-05,
"loss": 2.9957,
"step": 21000
},
{
"epoch": 1.33,
"learning_rate": 4.3368700265251996e-05,
"loss": 3.0038,
"step": 21500
},
{
"epoch": 1.36,
"learning_rate": 4.321448399235087e-05,
"loss": 3.021,
"step": 22000
},
{
"epoch": 1.39,
"learning_rate": 4.3060267719449756e-05,
"loss": 3.0072,
"step": 22500
},
{
"epoch": 1.42,
"learning_rate": 4.290605144654864e-05,
"loss": 3.0103,
"step": 23000
},
{
"epoch": 1.45,
"learning_rate": 4.275183517364752e-05,
"loss": 3.0178,
"step": 23500
},
{
"epoch": 1.48,
"learning_rate": 4.259761890074641e-05,
"loss": 2.9975,
"step": 24000
},
{
"epoch": 1.51,
"learning_rate": 4.244340262784529e-05,
"loss": 2.9901,
"step": 24500
},
{
"epoch": 1.54,
"learning_rate": 4.228918635494418e-05,
"loss": 3.013,
"step": 25000
},
{
"epoch": 1.57,
"learning_rate": 4.2134970082043056e-05,
"loss": 3.0131,
"step": 25500
},
{
"epoch": 1.6,
"learning_rate": 4.1980753809141946e-05,
"loss": 3.0112,
"step": 26000
},
{
"epoch": 1.63,
"learning_rate": 4.182653753624083e-05,
"loss": 3.019,
"step": 26500
},
{
"epoch": 1.67,
"learning_rate": 4.1672321263339706e-05,
"loss": 3.0125,
"step": 27000
},
{
"epoch": 1.7,
"learning_rate": 4.1518104990438596e-05,
"loss": 3.0208,
"step": 27500
},
{
"epoch": 1.73,
"learning_rate": 4.136388871753747e-05,
"loss": 3.0321,
"step": 28000
},
{
"epoch": 1.76,
"learning_rate": 4.120967244463636e-05,
"loss": 3.0425,
"step": 28500
},
{
"epoch": 1.79,
"learning_rate": 4.1055456171735245e-05,
"loss": 3.0076,
"step": 29000
},
{
"epoch": 1.82,
"learning_rate": 4.090123989883413e-05,
"loss": 3.0243,
"step": 29500
},
{
"epoch": 1.85,
"learning_rate": 4.074702362593301e-05,
"loss": 3.038,
"step": 30000
},
{
"epoch": 1.88,
"learning_rate": 4.0592807353031895e-05,
"loss": 3.0264,
"step": 30500
},
{
"epoch": 1.91,
"learning_rate": 4.043859108013078e-05,
"loss": 3.0321,
"step": 31000
},
{
"epoch": 1.94,
"learning_rate": 4.028437480722966e-05,
"loss": 3.0399,
"step": 31500
},
{
"epoch": 1.97,
"learning_rate": 4.0130158534328545e-05,
"loss": 3.0232,
"step": 32000
},
{
"epoch": 2.0,
"learning_rate": 3.997594226142743e-05,
"loss": 2.9806,
"step": 32500
},
{
"epoch": 2.04,
"learning_rate": 3.982172598852631e-05,
"loss": 2.761,
"step": 33000
},
{
"epoch": 2.07,
"learning_rate": 3.9667509715625195e-05,
"loss": 2.7645,
"step": 33500
},
{
"epoch": 2.1,
"learning_rate": 3.951329344272408e-05,
"loss": 2.7481,
"step": 34000
},
{
"epoch": 2.13,
"learning_rate": 3.935907716982296e-05,
"loss": 2.7584,
"step": 34500
},
{
"epoch": 2.16,
"learning_rate": 3.9204860896921845e-05,
"loss": 2.7846,
"step": 35000
},
{
"epoch": 2.19,
"learning_rate": 3.905064462402073e-05,
"loss": 2.7609,
"step": 35500
},
{
"epoch": 2.22,
"learning_rate": 3.889642835111961e-05,
"loss": 2.7771,
"step": 36000
},
{
"epoch": 2.25,
"learning_rate": 3.8742212078218494e-05,
"loss": 2.8008,
"step": 36500
},
{
"epoch": 2.28,
"learning_rate": 3.858799580531738e-05,
"loss": 2.8029,
"step": 37000
},
{
"epoch": 2.31,
"learning_rate": 3.843377953241626e-05,
"loss": 2.7776,
"step": 37500
},
{
"epoch": 2.34,
"learning_rate": 3.8279563259515144e-05,
"loss": 2.7912,
"step": 38000
},
{
"epoch": 2.37,
"learning_rate": 3.812534698661403e-05,
"loss": 2.7857,
"step": 38500
},
{
"epoch": 2.41,
"learning_rate": 3.797113071371291e-05,
"loss": 2.8002,
"step": 39000
},
{
"epoch": 2.44,
"learning_rate": 3.7816914440811794e-05,
"loss": 2.8027,
"step": 39500
},
{
"epoch": 2.47,
"learning_rate": 3.7662698167910684e-05,
"loss": 2.8114,
"step": 40000
},
{
"epoch": 2.5,
"learning_rate": 3.750848189500956e-05,
"loss": 2.8027,
"step": 40500
},
{
"epoch": 2.53,
"learning_rate": 3.735426562210845e-05,
"loss": 2.8086,
"step": 41000
},
{
"epoch": 2.56,
"learning_rate": 3.720004934920733e-05,
"loss": 2.8143,
"step": 41500
},
{
"epoch": 2.59,
"learning_rate": 3.704583307630621e-05,
"loss": 2.8259,
"step": 42000
},
{
"epoch": 2.62,
"learning_rate": 3.68916168034051e-05,
"loss": 2.8417,
"step": 42500
},
{
"epoch": 2.65,
"learning_rate": 3.673740053050398e-05,
"loss": 2.8076,
"step": 43000
},
{
"epoch": 2.68,
"learning_rate": 3.658318425760287e-05,
"loss": 2.8147,
"step": 43500
},
{
"epoch": 2.71,
"learning_rate": 3.6428967984701744e-05,
"loss": 2.811,
"step": 44000
},
{
"epoch": 2.75,
"learning_rate": 3.6274751711800634e-05,
"loss": 2.8112,
"step": 44500
},
{
"epoch": 2.78,
"learning_rate": 3.612053543889952e-05,
"loss": 2.8338,
"step": 45000
},
{
"epoch": 2.81,
"learning_rate": 3.596631916599839e-05,
"loss": 2.8319,
"step": 45500
},
{
"epoch": 2.84,
"learning_rate": 3.581210289309728e-05,
"loss": 2.8619,
"step": 46000
},
{
"epoch": 2.87,
"learning_rate": 3.565788662019616e-05,
"loss": 2.837,
"step": 46500
},
{
"epoch": 2.9,
"learning_rate": 3.550367034729505e-05,
"loss": 2.8362,
"step": 47000
},
{
"epoch": 2.93,
"learning_rate": 3.534945407439393e-05,
"loss": 2.8691,
"step": 47500
},
{
"epoch": 2.96,
"learning_rate": 3.5195237801492816e-05,
"loss": 2.8708,
"step": 48000
},
{
"epoch": 2.99,
"learning_rate": 3.50410215285917e-05,
"loss": 2.8552,
"step": 48500
},
{
"epoch": 3.02,
"learning_rate": 3.488680525569058e-05,
"loss": 2.6518,
"step": 49000
},
{
"epoch": 3.05,
"learning_rate": 3.4732588982789466e-05,
"loss": 2.5816,
"step": 49500
},
{
"epoch": 3.08,
"learning_rate": 3.457837270988835e-05,
"loss": 2.5747,
"step": 50000
},
{
"epoch": 3.12,
"learning_rate": 3.442415643698723e-05,
"loss": 2.5805,
"step": 50500
},
{
"epoch": 3.15,
"learning_rate": 3.4269940164086116e-05,
"loss": 2.5997,
"step": 51000
},
{
"epoch": 3.18,
"learning_rate": 3.4115723891185e-05,
"loss": 2.5675,
"step": 51500
},
{
"epoch": 3.21,
"learning_rate": 3.396150761828388e-05,
"loss": 2.5793,
"step": 52000
},
{
"epoch": 3.24,
"learning_rate": 3.3807291345382766e-05,
"loss": 2.6122,
"step": 52500
},
{
"epoch": 3.27,
"learning_rate": 3.365307507248165e-05,
"loss": 2.6107,
"step": 53000
},
{
"epoch": 3.3,
"learning_rate": 3.349885879958053e-05,
"loss": 2.6113,
"step": 53500
},
{
"epoch": 3.33,
"learning_rate": 3.3344642526679416e-05,
"loss": 2.6212,
"step": 54000
},
{
"epoch": 3.36,
"learning_rate": 3.31904262537783e-05,
"loss": 2.6346,
"step": 54500
},
{
"epoch": 3.39,
"learning_rate": 3.303620998087719e-05,
"loss": 2.6517,
"step": 55000
},
{
"epoch": 3.42,
"learning_rate": 3.2881993707976065e-05,
"loss": 2.6164,
"step": 55500
},
{
"epoch": 3.45,
"learning_rate": 3.2727777435074955e-05,
"loss": 2.6346,
"step": 56000
},
{
"epoch": 3.49,
"learning_rate": 3.257356116217383e-05,
"loss": 2.6645,
"step": 56500
},
{
"epoch": 3.52,
"learning_rate": 3.2419344889272715e-05,
"loss": 2.658,
"step": 57000
},
{
"epoch": 3.55,
"learning_rate": 3.2265128616371605e-05,
"loss": 2.649,
"step": 57500
},
{
"epoch": 3.58,
"learning_rate": 3.211091234347048e-05,
"loss": 2.654,
"step": 58000
},
{
"epoch": 3.61,
"learning_rate": 3.195669607056937e-05,
"loss": 2.6365,
"step": 58500
},
{
"epoch": 3.64,
"learning_rate": 3.180247979766825e-05,
"loss": 2.6294,
"step": 59000
},
{
"epoch": 3.67,
"learning_rate": 3.164826352476714e-05,
"loss": 2.6531,
"step": 59500
},
{
"epoch": 3.7,
"learning_rate": 3.1494047251866015e-05,
"loss": 2.6313,
"step": 60000
},
{
"epoch": 3.73,
"learning_rate": 3.13398309789649e-05,
"loss": 2.668,
"step": 60500
},
{
"epoch": 3.76,
"learning_rate": 3.118561470606379e-05,
"loss": 2.6504,
"step": 61000
},
{
"epoch": 3.79,
"learning_rate": 3.1031398433162665e-05,
"loss": 2.6747,
"step": 61500
},
{
"epoch": 3.82,
"learning_rate": 3.0877182160261555e-05,
"loss": 2.6477,
"step": 62000
},
{
"epoch": 3.86,
"learning_rate": 3.072296588736043e-05,
"loss": 2.6616,
"step": 62500
},
{
"epoch": 3.89,
"learning_rate": 3.056874961445932e-05,
"loss": 2.6733,
"step": 63000
},
{
"epoch": 3.92,
"learning_rate": 3.0414533341558205e-05,
"loss": 2.6793,
"step": 63500
},
{
"epoch": 3.95,
"learning_rate": 3.0260317068657084e-05,
"loss": 2.6563,
"step": 64000
},
{
"epoch": 3.98,
"learning_rate": 3.010610079575597e-05,
"loss": 2.6447,
"step": 64500
},
{
"epoch": 4.01,
"learning_rate": 2.995188452285485e-05,
"loss": 2.6038,
"step": 65000
},
{
"epoch": 4.04,
"learning_rate": 2.9797668249953738e-05,
"loss": 2.4287,
"step": 65500
},
{
"epoch": 4.07,
"learning_rate": 2.964345197705262e-05,
"loss": 2.441,
"step": 66000
},
{
"epoch": 4.1,
"learning_rate": 2.94892357041515e-05,
"loss": 2.4128,
"step": 66500
},
{
"epoch": 4.13,
"learning_rate": 2.9335019431250387e-05,
"loss": 2.4571,
"step": 67000
},
{
"epoch": 4.16,
"learning_rate": 2.9180803158349267e-05,
"loss": 2.4115,
"step": 67500
},
{
"epoch": 4.19,
"learning_rate": 2.9026586885448154e-05,
"loss": 2.4405,
"step": 68000
},
{
"epoch": 4.23,
"learning_rate": 2.887237061254704e-05,
"loss": 2.4474,
"step": 68500
},
{
"epoch": 4.26,
"learning_rate": 2.871815433964592e-05,
"loss": 2.4561,
"step": 69000
},
{
"epoch": 4.29,
"learning_rate": 2.8563938066744804e-05,
"loss": 2.4527,
"step": 69500
},
{
"epoch": 4.32,
"learning_rate": 2.8409721793843687e-05,
"loss": 2.4407,
"step": 70000
},
{
"epoch": 4.35,
"learning_rate": 2.825550552094257e-05,
"loss": 2.4659,
"step": 70500
},
{
"epoch": 4.38,
"learning_rate": 2.8101289248041457e-05,
"loss": 2.4568,
"step": 71000
},
{
"epoch": 4.41,
"learning_rate": 2.7947072975140337e-05,
"loss": 2.4747,
"step": 71500
},
{
"epoch": 4.44,
"learning_rate": 2.7792856702239223e-05,
"loss": 2.475,
"step": 72000
},
{
"epoch": 4.47,
"learning_rate": 2.7638640429338103e-05,
"loss": 2.4684,
"step": 72500
},
{
"epoch": 4.5,
"learning_rate": 2.748442415643699e-05,
"loss": 2.5052,
"step": 73000
},
{
"epoch": 4.53,
"learning_rate": 2.7330207883535873e-05,
"loss": 2.4652,
"step": 73500
},
{
"epoch": 4.56,
"learning_rate": 2.7175991610634753e-05,
"loss": 2.4664,
"step": 74000
},
{
"epoch": 4.6,
"learning_rate": 2.702177533773364e-05,
"loss": 2.4976,
"step": 74500
},
{
"epoch": 4.63,
"learning_rate": 2.686755906483252e-05,
"loss": 2.4907,
"step": 75000
},
{
"epoch": 4.66,
"learning_rate": 2.6713342791931406e-05,
"loss": 2.5042,
"step": 75500
},
{
"epoch": 4.69,
"learning_rate": 2.6559126519030293e-05,
"loss": 2.4854,
"step": 76000
},
{
"epoch": 4.72,
"learning_rate": 2.6404910246129173e-05,
"loss": 2.4824,
"step": 76500
},
{
"epoch": 4.75,
"learning_rate": 2.6250693973228056e-05,
"loss": 2.5114,
"step": 77000
},
{
"epoch": 4.78,
"learning_rate": 2.609647770032694e-05,
"loss": 2.4997,
"step": 77500
},
{
"epoch": 4.81,
"learning_rate": 2.5942261427425823e-05,
"loss": 2.49,
"step": 78000
},
{
"epoch": 4.84,
"learning_rate": 2.578804515452471e-05,
"loss": 2.4856,
"step": 78500
},
{
"epoch": 4.87,
"learning_rate": 2.563382888162359e-05,
"loss": 2.5155,
"step": 79000
},
{
"epoch": 4.9,
"learning_rate": 2.5479612608722476e-05,
"loss": 2.4911,
"step": 79500
},
{
"epoch": 4.93,
"learning_rate": 2.5325396335821356e-05,
"loss": 2.498,
"step": 80000
},
{
"epoch": 4.97,
"learning_rate": 2.5171180062920242e-05,
"loss": 2.5189,
"step": 80500
},
{
"epoch": 5.0,
"learning_rate": 2.5016963790019126e-05,
"loss": 2.5012,
"step": 81000
},
{
"epoch": 5.03,
"learning_rate": 2.4862747517118006e-05,
"loss": 2.2808,
"step": 81500
},
{
"epoch": 5.06,
"learning_rate": 2.470853124421689e-05,
"loss": 2.2686,
"step": 82000
},
{
"epoch": 5.09,
"learning_rate": 2.4554314971315775e-05,
"loss": 2.2863,
"step": 82500
},
{
"epoch": 5.12,
"learning_rate": 2.440009869841466e-05,
"loss": 2.2829,
"step": 83000
},
{
"epoch": 5.15,
"learning_rate": 2.4245882425513542e-05,
"loss": 2.2903,
"step": 83500
},
{
"epoch": 5.18,
"learning_rate": 2.4091666152612425e-05,
"loss": 2.3022,
"step": 84000
},
{
"epoch": 5.21,
"learning_rate": 2.393744987971131e-05,
"loss": 2.3068,
"step": 84500
},
{
"epoch": 5.24,
"learning_rate": 2.3783233606810192e-05,
"loss": 2.3022,
"step": 85000
},
{
"epoch": 5.27,
"learning_rate": 2.3629017333909075e-05,
"loss": 2.3168,
"step": 85500
},
{
"epoch": 5.31,
"learning_rate": 2.347480106100796e-05,
"loss": 2.3058,
"step": 86000
},
{
"epoch": 5.34,
"learning_rate": 2.332058478810684e-05,
"loss": 2.3049,
"step": 86500
},
{
"epoch": 5.37,
"learning_rate": 2.3166368515205725e-05,
"loss": 2.2949,
"step": 87000
},
{
"epoch": 5.4,
"learning_rate": 2.301215224230461e-05,
"loss": 2.3447,
"step": 87500
},
{
"epoch": 5.43,
"learning_rate": 2.2857935969403495e-05,
"loss": 2.3235,
"step": 88000
},
{
"epoch": 5.46,
"learning_rate": 2.2703719696502375e-05,
"loss": 2.3185,
"step": 88500
},
{
"epoch": 5.49,
"learning_rate": 2.2549503423601258e-05,
"loss": 2.3146,
"step": 89000
},
{
"epoch": 5.52,
"learning_rate": 2.239528715070014e-05,
"loss": 2.3479,
"step": 89500
},
{
"epoch": 5.55,
"learning_rate": 2.2241070877799024e-05,
"loss": 2.3553,
"step": 90000
},
{
"epoch": 5.58,
"learning_rate": 2.208685460489791e-05,
"loss": 2.3444,
"step": 90500
},
{
"epoch": 5.61,
"learning_rate": 2.1932638331996794e-05,
"loss": 2.3379,
"step": 91000
},
{
"epoch": 5.64,
"learning_rate": 2.1778422059095678e-05,
"loss": 2.3675,
"step": 91500
},
{
"epoch": 5.68,
"learning_rate": 2.162420578619456e-05,
"loss": 2.346,
"step": 92000
},
{
"epoch": 5.71,
"learning_rate": 2.1469989513293444e-05,
"loss": 2.3542,
"step": 92500
},
{
"epoch": 5.74,
"learning_rate": 2.1315773240392327e-05,
"loss": 2.35,
"step": 93000
},
{
"epoch": 5.77,
"learning_rate": 2.116155696749121e-05,
"loss": 2.3474,
"step": 93500
},
{
"epoch": 5.8,
"learning_rate": 2.1007340694590094e-05,
"loss": 2.352,
"step": 94000
},
{
"epoch": 5.83,
"learning_rate": 2.0853124421688977e-05,
"loss": 2.3325,
"step": 94500
},
{
"epoch": 5.86,
"learning_rate": 2.069890814878786e-05,
"loss": 2.3424,
"step": 95000
},
{
"epoch": 5.89,
"learning_rate": 2.0544691875886747e-05,
"loss": 2.3506,
"step": 95500
},
{
"epoch": 5.92,
"learning_rate": 2.0390475602985627e-05,
"loss": 2.3756,
"step": 96000
},
{
"epoch": 5.95,
"learning_rate": 2.023625933008451e-05,
"loss": 2.3787,
"step": 96500
},
{
"epoch": 5.98,
"learning_rate": 2.0082043057183394e-05,
"loss": 2.3619,
"step": 97000
},
{
"epoch": 6.01,
"learning_rate": 1.9927826784282277e-05,
"loss": 2.2638,
"step": 97500
},
{
"epoch": 6.05,
"learning_rate": 1.9773610511381164e-05,
"loss": 2.1504,
"step": 98000
},
{
"epoch": 6.08,
"learning_rate": 1.9619394238480047e-05,
"loss": 2.141,
"step": 98500
},
{
"epoch": 6.11,
"learning_rate": 1.946517796557893e-05,
"loss": 2.1287,
"step": 99000
},
{
"epoch": 6.14,
"learning_rate": 1.9310961692677813e-05,
"loss": 2.1477,
"step": 99500
},
{
"epoch": 6.17,
"learning_rate": 1.9156745419776693e-05,
"loss": 2.1732,
"step": 100000
},
{
"epoch": 6.2,
"learning_rate": 1.9002529146875576e-05,
"loss": 2.1753,
"step": 100500
},
{
"epoch": 6.23,
"learning_rate": 1.8848312873974463e-05,
"loss": 2.1794,
"step": 101000
},
{
"epoch": 6.26,
"learning_rate": 1.8694096601073346e-05,
"loss": 2.1646,
"step": 101500
},
{
"epoch": 6.29,
"learning_rate": 1.853988032817223e-05,
"loss": 2.1717,
"step": 102000
},
{
"epoch": 6.32,
"learning_rate": 1.8385664055271113e-05,
"loss": 2.1759,
"step": 102500
},
{
"epoch": 6.35,
"learning_rate": 1.8231447782369996e-05,
"loss": 2.1716,
"step": 103000
},
{
"epoch": 6.38,
"learning_rate": 1.807723150946888e-05,
"loss": 2.1785,
"step": 103500
},
{
"epoch": 6.42,
"learning_rate": 1.7923015236567763e-05,
"loss": 2.1945,
"step": 104000
},
{
"epoch": 6.45,
"learning_rate": 1.7768798963666646e-05,
"loss": 2.2027,
"step": 104500
},
{
"epoch": 6.48,
"learning_rate": 1.761458269076553e-05,
"loss": 2.1936,
"step": 105000
},
{
"epoch": 6.51,
"learning_rate": 1.7460366417864413e-05,
"loss": 2.1835,
"step": 105500
},
{
"epoch": 6.54,
"learning_rate": 1.73061501449633e-05,
"loss": 2.2049,
"step": 106000
},
{
"epoch": 6.57,
"learning_rate": 1.7151933872062182e-05,
"loss": 2.2161,
"step": 106500
},
{
"epoch": 6.6,
"learning_rate": 1.6997717599161066e-05,
"loss": 2.2136,
"step": 107000
},
{
"epoch": 6.63,
"learning_rate": 1.6843501326259946e-05,
"loss": 2.1981,
"step": 107500
},
{
"epoch": 6.66,
"learning_rate": 1.668928505335883e-05,
"loss": 2.2208,
"step": 108000
},
{
"epoch": 6.69,
"learning_rate": 1.6535068780457716e-05,
"loss": 2.2148,
"step": 108500
},
{
"epoch": 6.72,
"learning_rate": 1.63808525075566e-05,
"loss": 2.2163,
"step": 109000
},
{
"epoch": 6.75,
"learning_rate": 1.6226636234655482e-05,
"loss": 2.202,
"step": 109500
},
{
"epoch": 6.79,
"learning_rate": 1.6072419961754365e-05,
"loss": 2.2127,
"step": 110000
},
{
"epoch": 6.82,
"learning_rate": 1.591820368885325e-05,
"loss": 2.2334,
"step": 110500
},
{
"epoch": 6.85,
"learning_rate": 1.5763987415952132e-05,
"loss": 2.2174,
"step": 111000
},
{
"epoch": 6.88,
"learning_rate": 1.5609771143051015e-05,
"loss": 2.2229,
"step": 111500
},
{
"epoch": 6.91,
"learning_rate": 1.54555548701499e-05,
"loss": 2.2225,
"step": 112000
},
{
"epoch": 6.94,
"learning_rate": 1.5301338597248782e-05,
"loss": 2.2266,
"step": 112500
},
{
"epoch": 6.97,
"learning_rate": 1.5147122324347665e-05,
"loss": 2.2197,
"step": 113000
},
{
"epoch": 7.0,
"learning_rate": 1.4992906051446548e-05,
"loss": 2.2349,
"step": 113500
},
{
"epoch": 7.03,
"learning_rate": 1.4838689778545433e-05,
"loss": 2.0295,
"step": 114000
},
{
"epoch": 7.06,
"learning_rate": 1.4684473505644316e-05,
"loss": 2.0302,
"step": 114500
},
{
"epoch": 7.09,
"learning_rate": 1.45302572327432e-05,
"loss": 2.0477,
"step": 115000
},
{
"epoch": 7.12,
"learning_rate": 1.4376040959842083e-05,
"loss": 2.0597,
"step": 115500
},
{
"epoch": 7.16,
"learning_rate": 1.4221824686940966e-05,
"loss": 2.046,
"step": 116000
},
{
"epoch": 7.19,
"learning_rate": 1.4067608414039851e-05,
"loss": 2.0449,
"step": 116500
},
{
"epoch": 7.22,
"learning_rate": 1.3913392141138735e-05,
"loss": 2.061,
"step": 117000
},
{
"epoch": 7.25,
"learning_rate": 1.3759175868237618e-05,
"loss": 2.0655,
"step": 117500
},
{
"epoch": 7.28,
"learning_rate": 1.36049595953365e-05,
"loss": 2.0623,
"step": 118000
},
{
"epoch": 7.31,
"learning_rate": 1.3450743322435383e-05,
"loss": 2.0526,
"step": 118500
},
{
"epoch": 7.34,
"learning_rate": 1.329652704953427e-05,
"loss": 2.06,
"step": 119000
},
{
"epoch": 7.37,
"learning_rate": 1.314231077663315e-05,
"loss": 2.0729,
"step": 119500
},
{
"epoch": 7.4,
"learning_rate": 1.2988094503732034e-05,
"loss": 2.0705,
"step": 120000
},
{
"epoch": 7.43,
"learning_rate": 1.2833878230830917e-05,
"loss": 2.0842,
"step": 120500
},
{
"epoch": 7.46,
"learning_rate": 1.26796619579298e-05,
"loss": 2.0781,
"step": 121000
},
{
"epoch": 7.49,
"learning_rate": 1.2525445685028686e-05,
"loss": 2.0875,
"step": 121500
},
{
"epoch": 7.53,
"learning_rate": 1.2371229412127567e-05,
"loss": 2.0942,
"step": 122000
},
{
"epoch": 7.56,
"learning_rate": 1.2217013139226452e-05,
"loss": 2.0806,
"step": 122500
},
{
"epoch": 7.59,
"learning_rate": 1.2062796866325335e-05,
"loss": 2.1183,
"step": 123000
},
{
"epoch": 7.62,
"learning_rate": 1.1908580593424219e-05,
"loss": 2.0795,
"step": 123500
},
{
"epoch": 7.65,
"learning_rate": 1.1754364320523102e-05,
"loss": 2.076,
"step": 124000
},
{
"epoch": 7.68,
"learning_rate": 1.1600148047621985e-05,
"loss": 2.0863,
"step": 124500
},
{
"epoch": 7.71,
"learning_rate": 1.144593177472087e-05,
"loss": 2.0943,
"step": 125000
},
{
"epoch": 7.74,
"learning_rate": 1.1291715501819752e-05,
"loss": 2.093,
"step": 125500
},
{
"epoch": 7.77,
"learning_rate": 1.1137499228918637e-05,
"loss": 2.1124,
"step": 126000
},
{
"epoch": 7.8,
"learning_rate": 1.098328295601752e-05,
"loss": 2.0669,
"step": 126500
},
{
"epoch": 7.83,
"learning_rate": 1.0829066683116403e-05,
"loss": 2.0848,
"step": 127000
},
{
"epoch": 7.87,
"learning_rate": 1.0674850410215287e-05,
"loss": 2.1032,
"step": 127500
},
{
"epoch": 7.9,
"learning_rate": 1.052063413731417e-05,
"loss": 2.0905,
"step": 128000
},
{
"epoch": 7.93,
"learning_rate": 1.0366417864413053e-05,
"loss": 2.0861,
"step": 128500
},
{
"epoch": 7.96,
"learning_rate": 1.0212201591511936e-05,
"loss": 2.1257,
"step": 129000
},
{
"epoch": 7.99,
"learning_rate": 1.005798531861082e-05,
"loss": 2.1051,
"step": 129500
},
{
"epoch": 8.02,
"learning_rate": 9.903769045709705e-06,
"loss": 2.0009,
"step": 130000
},
{
"epoch": 8.05,
"learning_rate": 9.749552772808588e-06,
"loss": 1.9365,
"step": 130500
},
{
"epoch": 8.08,
"learning_rate": 9.59533649990747e-06,
"loss": 1.9696,
"step": 131000
},
{
"epoch": 8.11,
"learning_rate": 9.441120227006354e-06,
"loss": 1.9647,
"step": 131500
},
{
"epoch": 8.14,
"learning_rate": 9.286903954105238e-06,
"loss": 1.969,
"step": 132000
},
{
"epoch": 8.17,
"learning_rate": 9.132687681204123e-06,
"loss": 1.9503,
"step": 132500
},
{
"epoch": 8.2,
"learning_rate": 8.978471408303004e-06,
"loss": 1.9559,
"step": 133000
},
{
"epoch": 8.24,
"learning_rate": 8.824255135401887e-06,
"loss": 1.9534,
"step": 133500
},
{
"epoch": 8.27,
"learning_rate": 8.670038862500772e-06,
"loss": 1.9658,
"step": 134000
},
{
"epoch": 8.3,
"learning_rate": 8.515822589599656e-06,
"loss": 1.9844,
"step": 134500
},
{
"epoch": 8.33,
"learning_rate": 8.361606316698537e-06,
"loss": 1.9706,
"step": 135000
},
{
"epoch": 8.36,
"learning_rate": 8.207390043797422e-06,
"loss": 1.9742,
"step": 135500
},
{
"epoch": 8.39,
"learning_rate": 8.053173770896305e-06,
"loss": 1.9711,
"step": 136000
},
{
"epoch": 8.42,
"learning_rate": 7.898957497995189e-06,
"loss": 1.9776,
"step": 136500
},
{
"epoch": 8.45,
"learning_rate": 7.744741225094072e-06,
"loss": 1.9803,
"step": 137000
},
{
"epoch": 8.48,
"learning_rate": 7.590524952192955e-06,
"loss": 1.994,
"step": 137500
},
{
"epoch": 8.51,
"learning_rate": 7.436308679291839e-06,
"loss": 1.9877,
"step": 138000
},
{
"epoch": 8.54,
"learning_rate": 7.282092406390723e-06,
"loss": 2.0027,
"step": 138500
},
{
"epoch": 8.57,
"learning_rate": 7.127876133489605e-06,
"loss": 1.9775,
"step": 139000
},
{
"epoch": 8.61,
"learning_rate": 6.97365986058849e-06,
"loss": 1.987,
"step": 139500
},
{
"epoch": 8.64,
"learning_rate": 6.8194435876873724e-06,
"loss": 2.0007,
"step": 140000
},
{
"epoch": 8.67,
"learning_rate": 6.665227314786257e-06,
"loss": 1.9846,
"step": 140500
},
{
"epoch": 8.7,
"learning_rate": 6.51101104188514e-06,
"loss": 1.9958,
"step": 141000
},
{
"epoch": 8.73,
"learning_rate": 6.356794768984023e-06,
"loss": 1.989,
"step": 141500
},
{
"epoch": 8.76,
"learning_rate": 6.202578496082907e-06,
"loss": 1.988,
"step": 142000
},
{
"epoch": 8.79,
"learning_rate": 6.0483622231817905e-06,
"loss": 1.9945,
"step": 142500
},
{
"epoch": 8.82,
"learning_rate": 5.894145950280674e-06,
"loss": 1.9978,
"step": 143000
},
{
"epoch": 8.85,
"learning_rate": 5.739929677379558e-06,
"loss": 1.9756,
"step": 143500
},
{
"epoch": 8.88,
"learning_rate": 5.585713404478441e-06,
"loss": 1.9904,
"step": 144000
},
{
"epoch": 8.91,
"learning_rate": 5.431497131577324e-06,
"loss": 2.0072,
"step": 144500
},
{
"epoch": 8.94,
"learning_rate": 5.277280858676208e-06,
"loss": 1.977,
"step": 145000
},
{
"epoch": 8.98,
"learning_rate": 5.123064585775091e-06,
"loss": 1.9875,
"step": 145500
},
{
"epoch": 9.01,
"learning_rate": 4.968848312873975e-06,
"loss": 1.9618,
"step": 146000
},
{
"epoch": 9.04,
"learning_rate": 4.8146320399728575e-06,
"loss": 1.8886,
"step": 146500
},
{
"epoch": 9.07,
"learning_rate": 4.660415767071742e-06,
"loss": 1.9177,
"step": 147000
},
{
"epoch": 9.1,
"learning_rate": 4.506199494170625e-06,
"loss": 1.8996,
"step": 147500
},
{
"epoch": 9.13,
"learning_rate": 4.351983221269509e-06,
"loss": 1.9055,
"step": 148000
},
{
"epoch": 9.16,
"learning_rate": 4.197766948368392e-06,
"loss": 1.9022,
"step": 148500
},
{
"epoch": 9.19,
"learning_rate": 4.0435506754672755e-06,
"loss": 1.8831,
"step": 149000
},
{
"epoch": 9.22,
"learning_rate": 3.889334402566159e-06,
"loss": 1.8946,
"step": 149500
},
{
"epoch": 9.25,
"learning_rate": 3.7351181296650425e-06,
"loss": 1.9204,
"step": 150000
},
{
"epoch": 9.28,
"learning_rate": 3.580901856763926e-06,
"loss": 1.9099,
"step": 150500
},
{
"epoch": 9.31,
"learning_rate": 3.426685583862809e-06,
"loss": 1.9149,
"step": 151000
},
{
"epoch": 9.35,
"learning_rate": 3.2724693109616927e-06,
"loss": 1.9105,
"step": 151500
},
{
"epoch": 9.38,
"learning_rate": 3.1182530380605764e-06,
"loss": 1.8916,
"step": 152000
},
{
"epoch": 9.41,
"learning_rate": 2.9640367651594596e-06,
"loss": 1.9104,
"step": 152500
},
{
"epoch": 9.44,
"learning_rate": 2.8098204922583433e-06,
"loss": 1.9142,
"step": 153000
},
{
"epoch": 9.47,
"learning_rate": 2.6556042193572266e-06,
"loss": 1.9066,
"step": 153500
},
{
"epoch": 9.5,
"learning_rate": 2.5013879464561103e-06,
"loss": 1.9052,
"step": 154000
},
{
"epoch": 9.53,
"learning_rate": 2.3471716735549936e-06,
"loss": 1.9173,
"step": 154500
},
{
"epoch": 9.56,
"learning_rate": 2.1929554006538773e-06,
"loss": 1.8909,
"step": 155000
},
{
"epoch": 9.59,
"learning_rate": 2.0387391277527605e-06,
"loss": 1.8904,
"step": 155500
},
{
"epoch": 9.62,
"learning_rate": 1.884522854851644e-06,
"loss": 1.897,
"step": 156000
},
{
"epoch": 9.65,
"learning_rate": 1.7303065819505277e-06,
"loss": 1.9324,
"step": 156500
},
{
"epoch": 9.68,
"learning_rate": 1.576090309049411e-06,
"loss": 1.8916,
"step": 157000
},
{
"epoch": 9.72,
"learning_rate": 1.4218740361482944e-06,
"loss": 1.9087,
"step": 157500
},
{
"epoch": 9.75,
"learning_rate": 1.267657763247178e-06,
"loss": 1.9107,
"step": 158000
},
{
"epoch": 9.78,
"learning_rate": 1.1134414903460612e-06,
"loss": 1.8954,
"step": 158500
},
{
"epoch": 9.81,
"learning_rate": 9.592252174449449e-07,
"loss": 1.9147,
"step": 159000
},
{
"epoch": 9.84,
"learning_rate": 8.050089445438284e-07,
"loss": 1.8947,
"step": 159500
},
{
"epoch": 9.87,
"learning_rate": 6.507926716427117e-07,
"loss": 1.92,
"step": 160000
},
{
"epoch": 9.9,
"learning_rate": 4.965763987415952e-07,
"loss": 1.8919,
"step": 160500
},
{
"epoch": 9.93,
"learning_rate": 3.423601258404787e-07,
"loss": 1.9054,
"step": 161000
},
{
"epoch": 9.96,
"learning_rate": 1.8814385293936217e-07,
"loss": 1.9198,
"step": 161500
},
{
"epoch": 9.99,
"learning_rate": 3.392758003824564e-08,
"loss": 1.9037,
"step": 162000
},
{
"epoch": 10.0,
"step": 162110,
"total_flos": 8.002959346916352e+16,
"train_loss": 2.4721177677010764,
"train_runtime": 17898.7927,
"train_samples_per_second": 36.226,
"train_steps_per_second": 9.057
}
],
"max_steps": 162110,
"num_train_epochs": 10,
"total_flos": 8.002959346916352e+16,
"trial_name": null,
"trial_params": null
}