VietAi-FinalProject-VIT5 / trainer_state.json
QyQy's picture
Upload trainer_state.json
02aa886
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 2.224483333333333,
"global_step": 60000,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.04,
"learning_rate": 0.002197932219132781,
"loss": 3.6508,
"step": 1000
},
{
"epoch": 0.09,
"learning_rate": 0.004419300239533186,
"loss": 2.1932,
"step": 2000
},
{
"epoch": 0.13,
"learning_rate": 0.006713071372359991,
"loss": 1.9369,
"step": 3000
},
{
"epoch": 0.17,
"learning_rate": 0.009095062501728535,
"loss": 1.6844,
"step": 4000
},
{
"epoch": 0.22,
"learning_rate": 0.011655605398118496,
"loss": 1.557,
"step": 5000
},
{
"epoch": 0.26,
"learning_rate": 0.014309737831354141,
"loss": 1.4849,
"step": 6000
},
{
"epoch": 0.3,
"learning_rate": 0.016971396282315254,
"loss": 1.6145,
"step": 7000
},
{
"epoch": 0.35,
"learning_rate": 0.01942763663828373,
"loss": 1.6191,
"step": 8000
},
{
"epoch": 0.39,
"learning_rate": 0.022188229486346245,
"loss": 1.5503,
"step": 9000
},
{
"epoch": 0.43,
"learning_rate": 0.025435281917452812,
"loss": 1.5535,
"step": 10000
},
{
"epoch": 0.43,
"eval_bleu": 11.554505529017694,
"eval_loss": 1.8505867719650269,
"eval_runtime": 81.2961,
"eval_samples_per_second": 230.257,
"eval_steps_per_second": 0.91,
"step": 10000
},
{
"epoch": 0.48,
"learning_rate": 0.024702614173293114,
"loss": 1.4991,
"step": 11000
},
{
"epoch": 0.52,
"learning_rate": 0.024255122989416122,
"loss": 1.4803,
"step": 12000
},
{
"epoch": 0.57,
"learning_rate": 0.02393551729619503,
"loss": 1.4415,
"step": 13000
},
{
"epoch": 0.61,
"learning_rate": 0.02371666394174099,
"loss": 1.4454,
"step": 14000
},
{
"epoch": 0.65,
"learning_rate": 0.02343085967004299,
"loss": 1.4106,
"step": 15000
},
{
"epoch": 0.7,
"learning_rate": 0.02309497445821762,
"loss": 1.3996,
"step": 16000
},
{
"epoch": 0.74,
"learning_rate": 0.02294100448489189,
"loss": 1.3815,
"step": 17000
},
{
"epoch": 0.78,
"learning_rate": 0.022620119154453278,
"loss": 1.3915,
"step": 18000
},
{
"epoch": 0.83,
"learning_rate": 0.022044328972697258,
"loss": 1.5557,
"step": 19000
},
{
"epoch": 0.87,
"learning_rate": 0.02174384333193302,
"loss": 1.2879,
"step": 20000
},
{
"epoch": 0.87,
"eval_bleu": 15.736323415434928,
"eval_loss": 1.4790531396865845,
"eval_runtime": 81.7862,
"eval_samples_per_second": 228.877,
"eval_steps_per_second": 0.905,
"step": 20000
},
{
"epoch": 0.91,
"learning_rate": 0.02188653126358986,
"loss": 1.0018,
"step": 21000
},
{
"epoch": 0.48,
"learning_rate": 0.02189297415316105,
"loss": 0.9382,
"step": 22000
},
{
"epoch": 0.5,
"learning_rate": 0.02184857614338398,
"loss": 0.9229,
"step": 23000
},
{
"epoch": 1.02,
"learning_rate": 0.021777568385004997,
"loss": 1.2887,
"step": 24000
},
{
"epoch": 1.04,
"learning_rate": 0.021719269454479218,
"loss": 1.3278,
"step": 24500
},
{
"epoch": 1.06,
"learning_rate": 0.021626409143209457,
"loss": 1.3052,
"step": 25000
},
{
"epoch": 1.06,
"eval_bleu": 15.689633530325342,
"eval_loss": 1.344283103942871,
"eval_runtime": 81.5254,
"eval_samples_per_second": 229.61,
"eval_steps_per_second": 0.908,
"step": 25000
},
{
"epoch": 1.07,
"learning_rate": 0.02152758091688156,
"loss": 1.2788,
"step": 25500
},
{
"epoch": 1.09,
"learning_rate": 0.021510712802410126,
"loss": 1.369,
"step": 26000
},
{
"epoch": 1.11,
"learning_rate": 0.021486839279532433,
"loss": 1.3663,
"step": 26500
},
{
"epoch": 1.12,
"learning_rate": 0.021426061168313026,
"loss": 1.2913,
"step": 27000
},
{
"epoch": 1.14,
"learning_rate": 0.021364226937294006,
"loss": 1.2785,
"step": 27500
},
{
"epoch": 1.16,
"learning_rate": 0.021301250904798508,
"loss": 1.2471,
"step": 28000
},
{
"epoch": 1.17,
"learning_rate": 0.021244272589683533,
"loss": 1.2191,
"step": 28500
},
{
"epoch": 1.19,
"learning_rate": 0.021167641505599022,
"loss": 1.2128,
"step": 29000
},
{
"epoch": 1.21,
"learning_rate": 0.021093450486660004,
"loss": 1.1959,
"step": 29500
},
{
"epoch": 1.22,
"learning_rate": 0.02111968770623207,
"loss": 1.3139,
"step": 30000
},
{
"epoch": 1.22,
"eval_bleu": 16.118544297247887,
"eval_loss": 1.2931314706802368,
"eval_runtime": 81.4634,
"eval_samples_per_second": 229.784,
"eval_steps_per_second": 0.908,
"step": 30000
},
{
"epoch": 1.18,
"learning_rate": 0.021165331825613976,
"loss": 1.3204,
"step": 30500
},
{
"epoch": 1.19,
"learning_rate": 0.021186990663409233,
"loss": 1.2925,
"step": 31000
},
{
"epoch": 1.21,
"learning_rate": 0.021194208413362503,
"loss": 1.3002,
"step": 31500
},
{
"epoch": 1.22,
"learning_rate": 0.021185804158449173,
"loss": 1.2473,
"step": 32000
},
{
"epoch": 1.23,
"learning_rate": 0.021163810044527054,
"loss": 1.2455,
"step": 32500
},
{
"epoch": 1.24,
"learning_rate": 0.021145079284906387,
"loss": 1.2411,
"step": 33000
},
{
"epoch": 1.26,
"learning_rate": 0.021122504025697708,
"loss": 1.2214,
"step": 33500
},
{
"epoch": 1.27,
"learning_rate": 0.021091610193252563,
"loss": 1.2239,
"step": 34000
},
{
"epoch": 1.28,
"learning_rate": 0.021048951894044876,
"loss": 1.228,
"step": 34500
},
{
"epoch": 1.29,
"learning_rate": 0.02102178893983364,
"loss": 1.2119,
"step": 35000
},
{
"epoch": 1.29,
"eval_bleu": 15.491337721835558,
"eval_loss": 1.3143055438995361,
"eval_runtime": 81.4828,
"eval_samples_per_second": 229.73,
"eval_steps_per_second": 0.908,
"step": 35000
},
{
"epoch": 1.31,
"learning_rate": 0.020987574011087418,
"loss": 1.1862,
"step": 35500
},
{
"epoch": 1.32,
"learning_rate": 0.02096775360405445,
"loss": 1.2127,
"step": 36000
},
{
"epoch": 1.33,
"learning_rate": 0.02094915322959423,
"loss": 1.228,
"step": 36500
},
{
"epoch": 1.34,
"learning_rate": 0.02091248333454132,
"loss": 1.2456,
"step": 37000
},
{
"epoch": 1.36,
"learning_rate": 0.020875241607427597,
"loss": 1.2494,
"step": 37500
},
{
"epoch": 1.37,
"learning_rate": 0.020842362195253372,
"loss": 1.1684,
"step": 38000
},
{
"epoch": 1.38,
"learning_rate": 0.02081664651632309,
"loss": 1.2389,
"step": 38500
},
{
"epoch": 1.39,
"learning_rate": 0.020784636959433556,
"loss": 1.1898,
"step": 39000
},
{
"epoch": 1.41,
"learning_rate": 0.020755469799041748,
"loss": 1.1915,
"step": 39500
},
{
"epoch": 1.42,
"learning_rate": 0.02073347009718418,
"loss": 1.1929,
"step": 40000
},
{
"epoch": 1.42,
"eval_bleu": 15.669781616522553,
"eval_loss": 1.3155781030654907,
"eval_runtime": 81.5967,
"eval_samples_per_second": 229.409,
"eval_steps_per_second": 0.907,
"step": 40000
},
{
"epoch": 1.29,
"learning_rate": 0.020710671320557594,
"loss": 1.2275,
"step": 40500
},
{
"epoch": 1.3,
"learning_rate": 0.020680107176303864,
"loss": 1.1966,
"step": 41000
},
{
"epoch": 1.3,
"learning_rate": 0.020645136013627052,
"loss": 1.2111,
"step": 41500
},
{
"epoch": 1.31,
"learning_rate": 0.020648332312703133,
"loss": 1.3908,
"step": 42000
},
{
"epoch": 1.32,
"learning_rate": 0.02063341997563839,
"loss": 1.3234,
"step": 42500
},
{
"epoch": 1.33,
"learning_rate": 0.02064467966556549,
"loss": 0.9668,
"step": 43000
},
{
"epoch": 1.34,
"learning_rate": 0.020663069561123848,
"loss": 0.8413,
"step": 43500
},
{
"epoch": 1.35,
"learning_rate": 0.0206832867115736,
"loss": 0.8273,
"step": 44000
},
{
"epoch": 1.35,
"learning_rate": 0.020704049617052078,
"loss": 0.7956,
"step": 44500
},
{
"epoch": 1.36,
"learning_rate": 0.020729372277855873,
"loss": 0.7875,
"step": 45000
},
{
"epoch": 1.36,
"eval_bleu": 17.782492667022247,
"eval_loss": 1.2672479152679443,
"eval_runtime": 81.5877,
"eval_samples_per_second": 229.434,
"eval_steps_per_second": 0.907,
"step": 45000
},
{
"epoch": 1.37,
"learning_rate": 0.020742451772093773,
"loss": 0.7773,
"step": 45500
},
{
"epoch": 1.38,
"learning_rate": 0.020756695419549942,
"loss": 0.7604,
"step": 46000
},
{
"epoch": 1.39,
"learning_rate": 0.020774947479367256,
"loss": 0.8933,
"step": 46500
},
{
"epoch": 2.01,
"learning_rate": 0.02075868472456932,
"loss": 1.2112,
"step": 47000
},
{
"epoch": 2.02,
"learning_rate": 0.020726632326841354,
"loss": 1.1824,
"step": 47500
},
{
"epoch": 2.02,
"learning_rate": 0.020695462822914124,
"loss": 1.1919,
"step": 48000
},
{
"epoch": 2.03,
"learning_rate": 0.020659752190113068,
"loss": 1.1771,
"step": 48500
},
{
"epoch": 2.04,
"learning_rate": 0.02061435766518116,
"loss": 1.1045,
"step": 49000
},
{
"epoch": 2.05,
"learning_rate": 0.02060552127659321,
"loss": 1.2627,
"step": 49500
},
{
"epoch": 2.06,
"learning_rate": 0.020578160881996155,
"loss": 1.1741,
"step": 50000
},
{
"epoch": 2.06,
"eval_bleu": 17.817437675612407,
"eval_loss": 1.2213643789291382,
"eval_runtime": 81.6429,
"eval_samples_per_second": 229.279,
"eval_steps_per_second": 0.906,
"step": 50000
},
{
"epoch": 2.07,
"learning_rate": 0.02054954506456852,
"loss": 1.1366,
"step": 50500
},
{
"epoch": 2.07,
"learning_rate": 0.020517559722065926,
"loss": 1.1321,
"step": 51000
},
{
"epoch": 2.08,
"learning_rate": 0.020486876368522644,
"loss": 1.1128,
"step": 51500
},
{
"epoch": 2.09,
"learning_rate": 0.020454248413443565,
"loss": 1.0992,
"step": 52000
},
{
"epoch": 2.1,
"learning_rate": 0.020409852266311646,
"loss": 1.0874,
"step": 52500
},
{
"epoch": 2.11,
"learning_rate": 0.020372800529003143,
"loss": 1.0964,
"step": 53000
},
{
"epoch": 2.12,
"learning_rate": 0.020410917699337006,
"loss": 1.2197,
"step": 53500
},
{
"epoch": 2.12,
"learning_rate": 0.020421497523784637,
"loss": 1.1628,
"step": 54000
},
{
"epoch": 2.13,
"learning_rate": 0.02043827436864376,
"loss": 1.1952,
"step": 54500
},
{
"epoch": 2.14,
"learning_rate": 0.020448317751288414,
"loss": 1.1603,
"step": 55000
},
{
"epoch": 2.14,
"eval_bleu": 17.473470393567123,
"eval_loss": 1.1755515336990356,
"eval_runtime": 81.7968,
"eval_samples_per_second": 228.848,
"eval_steps_per_second": 0.905,
"step": 55000
},
{
"epoch": 2.15,
"learning_rate": 0.020445365458726883,
"loss": 1.1201,
"step": 55500
},
{
"epoch": 2.16,
"learning_rate": 0.020447757095098495,
"loss": 1.1362,
"step": 56000
},
{
"epoch": 2.17,
"learning_rate": 0.020444199442863464,
"loss": 1.151,
"step": 56500
},
{
"epoch": 2.17,
"learning_rate": 0.020440472289919853,
"loss": 1.125,
"step": 57000
},
{
"epoch": 2.18,
"learning_rate": 0.020431550219655037,
"loss": 1.1041,
"step": 57500
},
{
"epoch": 2.19,
"learning_rate": 0.0204232819378376,
"loss": 1.1338,
"step": 58000
},
{
"epoch": 2.2,
"learning_rate": 0.02041398547589779,
"loss": 1.1279,
"step": 58500
},
{
"epoch": 2.21,
"learning_rate": 0.02040201798081398,
"loss": 1.0957,
"step": 59000
},
{
"epoch": 2.22,
"learning_rate": 0.020392674952745438,
"loss": 1.1315,
"step": 59500
},
{
"epoch": 2.22,
"learning_rate": 0.020384889096021652,
"loss": 1.146,
"step": 60000
},
{
"epoch": 2.22,
"eval_bleu": 17.10106003319275,
"eval_loss": 1.2006129026412964,
"eval_runtime": 81.8911,
"eval_samples_per_second": 228.584,
"eval_steps_per_second": 0.904,
"step": 60000
}
],
"max_steps": 60000,
"num_train_epochs": 9223372036854775807,
"total_flos": 4.6881481692662784e+17,
"trial_name": null,
"trial_params": null
}