rocbart-large-chinese-v3 / trainer_state.json
fenffef's picture
Upload 10 files
02cec5d verified
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 3.999856278819091,
"global_step": 83492,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.02,
"learning_rate": 2.9826330666411153e-06,
"loss": 1.4668,
"step": 500
},
{
"epoch": 0.05,
"learning_rate": 2.9652661332822306e-06,
"loss": 1.4671,
"step": 1000
},
{
"epoch": 0.07,
"learning_rate": 2.9478991999233463e-06,
"loss": 1.467,
"step": 1500
},
{
"epoch": 0.1,
"learning_rate": 2.930532266564461e-06,
"loss": 1.4669,
"step": 2000
},
{
"epoch": 0.12,
"learning_rate": 2.913165333205577e-06,
"loss": 1.4671,
"step": 2500
},
{
"epoch": 0.14,
"learning_rate": 2.8957983998466917e-06,
"loss": 1.4675,
"step": 3000
},
{
"epoch": 0.17,
"learning_rate": 2.8784314664878074e-06,
"loss": 1.467,
"step": 3500
},
{
"epoch": 0.19,
"learning_rate": 2.8610645331289227e-06,
"loss": 1.4663,
"step": 4000
},
{
"epoch": 0.22,
"learning_rate": 2.843697599770038e-06,
"loss": 1.4664,
"step": 4500
},
{
"epoch": 0.24,
"learning_rate": 2.826330666411153e-06,
"loss": 1.4667,
"step": 5000
},
{
"epoch": 0.26,
"learning_rate": 2.8089637330522685e-06,
"loss": 1.467,
"step": 5500
},
{
"epoch": 0.29,
"learning_rate": 2.791596799693384e-06,
"loss": 1.4669,
"step": 6000
},
{
"epoch": 0.31,
"learning_rate": 2.774229866334499e-06,
"loss": 1.4668,
"step": 6500
},
{
"epoch": 0.34,
"learning_rate": 2.7568629329756147e-06,
"loss": 1.4663,
"step": 7000
},
{
"epoch": 0.36,
"learning_rate": 2.7394959996167295e-06,
"loss": 1.4668,
"step": 7500
},
{
"epoch": 0.38,
"learning_rate": 2.7221290662578452e-06,
"loss": 1.4665,
"step": 8000
},
{
"epoch": 0.41,
"learning_rate": 2.7047621328989605e-06,
"loss": 1.4665,
"step": 8500
},
{
"epoch": 0.43,
"learning_rate": 2.6873951995400758e-06,
"loss": 1.4663,
"step": 9000
},
{
"epoch": 0.46,
"learning_rate": 2.670028266181191e-06,
"loss": 1.4663,
"step": 9500
},
{
"epoch": 0.48,
"learning_rate": 2.6526613328223063e-06,
"loss": 1.4663,
"step": 10000
},
{
"epoch": 0.5,
"learning_rate": 2.635294399463422e-06,
"loss": 1.4658,
"step": 10500
},
{
"epoch": 0.53,
"learning_rate": 2.617927466104537e-06,
"loss": 1.4657,
"step": 11000
},
{
"epoch": 0.55,
"learning_rate": 2.6005605327456525e-06,
"loss": 1.4657,
"step": 11500
},
{
"epoch": 0.57,
"learning_rate": 2.5831935993867674e-06,
"loss": 1.4659,
"step": 12000
},
{
"epoch": 0.6,
"learning_rate": 2.565826666027883e-06,
"loss": 1.4661,
"step": 12500
},
{
"epoch": 0.62,
"learning_rate": 2.5484597326689983e-06,
"loss": 1.4657,
"step": 13000
},
{
"epoch": 0.65,
"learning_rate": 2.5310927993101136e-06,
"loss": 1.4661,
"step": 13500
},
{
"epoch": 0.67,
"learning_rate": 2.513725865951229e-06,
"loss": 1.4654,
"step": 14000
},
{
"epoch": 0.69,
"learning_rate": 2.496358932592344e-06,
"loss": 1.466,
"step": 14500
},
{
"epoch": 0.72,
"learning_rate": 2.4789919992334594e-06,
"loss": 1.4661,
"step": 15000
},
{
"epoch": 0.74,
"learning_rate": 2.4616250658745747e-06,
"loss": 1.4655,
"step": 15500
},
{
"epoch": 0.77,
"learning_rate": 2.4442581325156904e-06,
"loss": 1.4654,
"step": 16000
},
{
"epoch": 0.79,
"learning_rate": 2.4268911991568052e-06,
"loss": 1.4654,
"step": 16500
},
{
"epoch": 0.81,
"learning_rate": 2.409524265797921e-06,
"loss": 1.4647,
"step": 17000
},
{
"epoch": 0.84,
"learning_rate": 2.392157332439036e-06,
"loss": 1.4651,
"step": 17500
},
{
"epoch": 0.86,
"learning_rate": 2.3747903990801515e-06,
"loss": 1.4656,
"step": 18000
},
{
"epoch": 0.89,
"learning_rate": 2.3574234657212667e-06,
"loss": 1.4652,
"step": 18500
},
{
"epoch": 0.91,
"learning_rate": 2.340056532362382e-06,
"loss": 1.4649,
"step": 19000
},
{
"epoch": 0.93,
"learning_rate": 2.3226895990034977e-06,
"loss": 1.4654,
"step": 19500
},
{
"epoch": 0.96,
"learning_rate": 2.3053226656446125e-06,
"loss": 1.4649,
"step": 20000
},
{
"epoch": 0.98,
"learning_rate": 2.2879557322857282e-06,
"loss": 1.4648,
"step": 20500
},
{
"epoch": 1.0,
"eval_loss": 1.4683704376220703,
"eval_runtime": 319.8553,
"eval_samples_per_second": 93.792,
"eval_steps_per_second": 1.466,
"step": 20873
},
{
"epoch": 1.01,
"learning_rate": 2.270588798926843e-06,
"loss": 1.4643,
"step": 21000
},
{
"epoch": 1.03,
"learning_rate": 2.2532218655679588e-06,
"loss": 1.4633,
"step": 21500
},
{
"epoch": 1.05,
"learning_rate": 2.235854932209074e-06,
"loss": 1.4635,
"step": 22000
},
{
"epoch": 1.08,
"learning_rate": 2.2184879988501893e-06,
"loss": 1.4633,
"step": 22500
},
{
"epoch": 1.1,
"learning_rate": 2.2011210654913046e-06,
"loss": 1.4634,
"step": 23000
},
{
"epoch": 1.13,
"learning_rate": 2.18375413213242e-06,
"loss": 1.4632,
"step": 23500
},
{
"epoch": 1.15,
"learning_rate": 2.166387198773535e-06,
"loss": 1.4632,
"step": 24000
},
{
"epoch": 1.17,
"learning_rate": 2.1490202654146504e-06,
"loss": 1.4632,
"step": 24500
},
{
"epoch": 1.2,
"learning_rate": 2.131653332055766e-06,
"loss": 1.4634,
"step": 25000
},
{
"epoch": 1.22,
"learning_rate": 2.114286398696881e-06,
"loss": 1.463,
"step": 25500
},
{
"epoch": 1.25,
"learning_rate": 2.0969194653379966e-06,
"loss": 1.4635,
"step": 26000
},
{
"epoch": 1.27,
"learning_rate": 2.079552531979112e-06,
"loss": 1.4636,
"step": 26500
},
{
"epoch": 1.29,
"learning_rate": 2.062185598620227e-06,
"loss": 1.4629,
"step": 27000
},
{
"epoch": 1.32,
"learning_rate": 2.0448186652613424e-06,
"loss": 1.463,
"step": 27500
},
{
"epoch": 1.34,
"learning_rate": 2.0274517319024577e-06,
"loss": 1.4627,
"step": 28000
},
{
"epoch": 1.37,
"learning_rate": 2.0100847985435734e-06,
"loss": 1.4635,
"step": 28500
},
{
"epoch": 1.39,
"learning_rate": 1.9927178651846882e-06,
"loss": 1.463,
"step": 29000
},
{
"epoch": 1.41,
"learning_rate": 1.9753509318258035e-06,
"loss": 1.4631,
"step": 29500
},
{
"epoch": 1.44,
"learning_rate": 1.957983998466919e-06,
"loss": 1.463,
"step": 30000
},
{
"epoch": 1.46,
"learning_rate": 1.9406170651080345e-06,
"loss": 1.4632,
"step": 30500
},
{
"epoch": 1.49,
"learning_rate": 1.9232501317491497e-06,
"loss": 1.4626,
"step": 31000
},
{
"epoch": 1.51,
"learning_rate": 1.9058831983902652e-06,
"loss": 1.463,
"step": 31500
},
{
"epoch": 1.53,
"learning_rate": 1.8885162650313807e-06,
"loss": 1.4632,
"step": 32000
},
{
"epoch": 1.56,
"learning_rate": 1.8711493316724955e-06,
"loss": 1.4629,
"step": 32500
},
{
"epoch": 1.58,
"learning_rate": 1.853782398313611e-06,
"loss": 1.4628,
"step": 33000
},
{
"epoch": 1.6,
"learning_rate": 1.836415464954726e-06,
"loss": 1.463,
"step": 33500
},
{
"epoch": 1.63,
"learning_rate": 1.8190485315958416e-06,
"loss": 1.4627,
"step": 34000
},
{
"epoch": 1.65,
"learning_rate": 1.801681598236957e-06,
"loss": 1.4626,
"step": 34500
},
{
"epoch": 1.68,
"learning_rate": 1.7843146648780719e-06,
"loss": 1.4631,
"step": 35000
},
{
"epoch": 1.7,
"learning_rate": 1.766947731519188e-06,
"loss": 1.4628,
"step": 35500
},
{
"epoch": 1.72,
"learning_rate": 1.7495807981603028e-06,
"loss": 1.4625,
"step": 36000
},
{
"epoch": 1.75,
"learning_rate": 1.7322138648014181e-06,
"loss": 1.4627,
"step": 36500
},
{
"epoch": 1.77,
"learning_rate": 1.7148469314425332e-06,
"loss": 1.4628,
"step": 37000
},
{
"epoch": 1.8,
"learning_rate": 1.697479998083649e-06,
"loss": 1.4623,
"step": 37500
},
{
"epoch": 1.82,
"learning_rate": 1.680113064724764e-06,
"loss": 1.4626,
"step": 38000
},
{
"epoch": 1.84,
"learning_rate": 1.662746131365879e-06,
"loss": 1.4621,
"step": 38500
},
{
"epoch": 1.87,
"learning_rate": 1.6453791980069949e-06,
"loss": 1.4626,
"step": 39000
},
{
"epoch": 1.89,
"learning_rate": 1.62801226464811e-06,
"loss": 1.4623,
"step": 39500
},
{
"epoch": 1.92,
"learning_rate": 1.6106453312892254e-06,
"loss": 1.4622,
"step": 40000
},
{
"epoch": 1.94,
"learning_rate": 1.5932783979303407e-06,
"loss": 1.4624,
"step": 40500
},
{
"epoch": 1.96,
"learning_rate": 1.5759114645714566e-06,
"loss": 1.4626,
"step": 41000
},
{
"epoch": 1.99,
"learning_rate": 1.5585445312125714e-06,
"loss": 1.4619,
"step": 41500
},
{
"epoch": 2.0,
"eval_loss": 1.467372179031372,
"eval_runtime": 318.5344,
"eval_samples_per_second": 94.181,
"eval_steps_per_second": 1.472,
"step": 41747
},
{
"epoch": 2.01,
"learning_rate": 1.5411775978536865e-06,
"loss": 1.4618,
"step": 42000
},
{
"epoch": 2.04,
"learning_rate": 1.5238106644948016e-06,
"loss": 1.4609,
"step": 42500
},
{
"epoch": 2.06,
"learning_rate": 1.5064437311359175e-06,
"loss": 1.461,
"step": 43000
},
{
"epoch": 2.08,
"learning_rate": 1.4890767977770327e-06,
"loss": 1.4613,
"step": 43500
},
{
"epoch": 2.11,
"learning_rate": 1.471709864418148e-06,
"loss": 1.4607,
"step": 44000
},
{
"epoch": 2.13,
"learning_rate": 1.4543429310592633e-06,
"loss": 1.4611,
"step": 44500
},
{
"epoch": 2.16,
"learning_rate": 1.4369759977003785e-06,
"loss": 1.4614,
"step": 45000
},
{
"epoch": 2.18,
"learning_rate": 1.4196090643414938e-06,
"loss": 1.4608,
"step": 45500
},
{
"epoch": 2.2,
"learning_rate": 1.402242130982609e-06,
"loss": 1.4609,
"step": 46000
},
{
"epoch": 2.23,
"learning_rate": 1.3848751976237243e-06,
"loss": 1.4618,
"step": 46500
},
{
"epoch": 2.25,
"learning_rate": 1.3675082642648396e-06,
"loss": 1.4609,
"step": 47000
},
{
"epoch": 2.28,
"learning_rate": 1.3501413309059553e-06,
"loss": 1.4606,
"step": 47500
},
{
"epoch": 2.3,
"learning_rate": 1.3327743975470706e-06,
"loss": 1.461,
"step": 48000
},
{
"epoch": 2.32,
"learning_rate": 1.3154074641881858e-06,
"loss": 1.4609,
"step": 48500
},
{
"epoch": 2.35,
"learning_rate": 1.2980405308293011e-06,
"loss": 1.4611,
"step": 49000
},
{
"epoch": 2.37,
"learning_rate": 1.2806735974704164e-06,
"loss": 1.4608,
"step": 49500
},
{
"epoch": 2.4,
"learning_rate": 1.2633066641115317e-06,
"loss": 1.4604,
"step": 50000
},
{
"epoch": 2.42,
"learning_rate": 1.245939730752647e-06,
"loss": 1.4608,
"step": 50500
},
{
"epoch": 2.44,
"learning_rate": 1.2285727973937622e-06,
"loss": 1.4607,
"step": 51000
},
{
"epoch": 2.47,
"learning_rate": 1.2112058640348775e-06,
"loss": 1.4606,
"step": 51500
},
{
"epoch": 2.49,
"learning_rate": 1.1938389306759932e-06,
"loss": 1.4606,
"step": 52000
},
{
"epoch": 2.52,
"learning_rate": 1.1764719973171084e-06,
"loss": 1.4607,
"step": 52500
},
{
"epoch": 2.54,
"learning_rate": 1.1591050639582237e-06,
"loss": 1.4606,
"step": 53000
},
{
"epoch": 2.56,
"learning_rate": 1.141738130599339e-06,
"loss": 1.4609,
"step": 53500
},
{
"epoch": 2.59,
"learning_rate": 1.1243711972404542e-06,
"loss": 1.4609,
"step": 54000
},
{
"epoch": 2.61,
"learning_rate": 1.1070042638815695e-06,
"loss": 1.4604,
"step": 54500
},
{
"epoch": 2.63,
"learning_rate": 1.0896373305226848e-06,
"loss": 1.4608,
"step": 55000
},
{
"epoch": 2.66,
"learning_rate": 1.0722703971638e-06,
"loss": 1.4604,
"step": 55500
},
{
"epoch": 2.68,
"learning_rate": 1.0549034638049153e-06,
"loss": 1.4607,
"step": 56000
},
{
"epoch": 2.71,
"learning_rate": 1.037536530446031e-06,
"loss": 1.4607,
"step": 56500
},
{
"epoch": 2.73,
"learning_rate": 1.020169597087146e-06,
"loss": 1.4609,
"step": 57000
},
{
"epoch": 2.75,
"learning_rate": 1.0028026637282615e-06,
"loss": 1.461,
"step": 57500
},
{
"epoch": 2.78,
"learning_rate": 9.854357303693768e-07,
"loss": 1.4609,
"step": 58000
},
{
"epoch": 2.8,
"learning_rate": 9.68068797010492e-07,
"loss": 1.4608,
"step": 58500
},
{
"epoch": 2.83,
"learning_rate": 9.507018636516072e-07,
"loss": 1.4603,
"step": 59000
},
{
"epoch": 2.85,
"learning_rate": 9.333349302927227e-07,
"loss": 1.4606,
"step": 59500
},
{
"epoch": 2.87,
"learning_rate": 9.159679969338379e-07,
"loss": 1.4605,
"step": 60000
},
{
"epoch": 2.9,
"learning_rate": 8.986010635749534e-07,
"loss": 1.4603,
"step": 60500
},
{
"epoch": 2.92,
"learning_rate": 8.81234130216069e-07,
"loss": 1.4606,
"step": 61000
},
{
"epoch": 2.95,
"learning_rate": 8.638671968571839e-07,
"loss": 1.4611,
"step": 61500
},
{
"epoch": 2.97,
"learning_rate": 8.465002634982994e-07,
"loss": 1.4603,
"step": 62000
},
{
"epoch": 2.99,
"learning_rate": 8.291333301394146e-07,
"loss": 1.4606,
"step": 62500
},
{
"epoch": 3.0,
"eval_loss": 1.4666800498962402,
"eval_runtime": 320.1083,
"eval_samples_per_second": 93.718,
"eval_steps_per_second": 1.465,
"step": 62621
},
{
"epoch": 3.02,
"learning_rate": 8.1176639678053e-07,
"loss": 1.4591,
"step": 63000
},
{
"epoch": 3.04,
"learning_rate": 7.943994634216451e-07,
"loss": 1.46,
"step": 63500
},
{
"epoch": 3.07,
"learning_rate": 7.770325300627606e-07,
"loss": 1.4594,
"step": 64000
},
{
"epoch": 3.09,
"learning_rate": 7.596655967038757e-07,
"loss": 1.4603,
"step": 64500
},
{
"epoch": 3.11,
"learning_rate": 7.422986633449912e-07,
"loss": 1.4599,
"step": 65000
},
{
"epoch": 3.14,
"learning_rate": 7.249317299861067e-07,
"loss": 1.4596,
"step": 65500
},
{
"epoch": 3.16,
"learning_rate": 7.075647966272218e-07,
"loss": 1.4593,
"step": 66000
},
{
"epoch": 3.19,
"learning_rate": 6.901978632683372e-07,
"loss": 1.4594,
"step": 66500
},
{
"epoch": 3.21,
"learning_rate": 6.728309299094524e-07,
"loss": 1.4595,
"step": 67000
},
{
"epoch": 3.23,
"learning_rate": 6.554639965505679e-07,
"loss": 1.4602,
"step": 67500
},
{
"epoch": 3.26,
"learning_rate": 6.380970631916829e-07,
"loss": 1.4592,
"step": 68000
},
{
"epoch": 3.28,
"learning_rate": 6.207301298327984e-07,
"loss": 1.4592,
"step": 68500
},
{
"epoch": 3.31,
"learning_rate": 6.033631964739136e-07,
"loss": 1.46,
"step": 69000
},
{
"epoch": 3.33,
"learning_rate": 5.859962631150291e-07,
"loss": 1.4596,
"step": 69500
},
{
"epoch": 3.35,
"learning_rate": 5.686293297561441e-07,
"loss": 1.4597,
"step": 70000
},
{
"epoch": 3.38,
"learning_rate": 5.512623963972596e-07,
"loss": 1.4594,
"step": 70500
},
{
"epoch": 3.4,
"learning_rate": 5.338954630383751e-07,
"loss": 1.4598,
"step": 71000
},
{
"epoch": 3.43,
"learning_rate": 5.165285296794902e-07,
"loss": 1.4596,
"step": 71500
},
{
"epoch": 3.45,
"learning_rate": 4.991615963206056e-07,
"loss": 1.4596,
"step": 72000
},
{
"epoch": 3.47,
"learning_rate": 4.817946629617209e-07,
"loss": 1.4594,
"step": 72500
},
{
"epoch": 3.5,
"learning_rate": 4.6442772960283626e-07,
"loss": 1.4593,
"step": 73000
},
{
"epoch": 3.52,
"learning_rate": 4.4706079624395143e-07,
"loss": 1.4597,
"step": 73500
},
{
"epoch": 3.55,
"learning_rate": 4.296938628850668e-07,
"loss": 1.4601,
"step": 74000
},
{
"epoch": 3.57,
"learning_rate": 4.1232692952618197e-07,
"loss": 1.4592,
"step": 74500
},
{
"epoch": 3.59,
"learning_rate": 3.9495999616729745e-07,
"loss": 1.4599,
"step": 75000
},
{
"epoch": 3.62,
"learning_rate": 3.77593062808413e-07,
"loss": 1.4591,
"step": 75500
},
{
"epoch": 3.64,
"learning_rate": 3.602261294495281e-07,
"loss": 1.4598,
"step": 76000
},
{
"epoch": 3.66,
"learning_rate": 3.428591960906435e-07,
"loss": 1.4592,
"step": 76500
},
{
"epoch": 3.69,
"learning_rate": 3.2549226273175863e-07,
"loss": 1.4597,
"step": 77000
},
{
"epoch": 3.71,
"learning_rate": 3.081253293728741e-07,
"loss": 1.4598,
"step": 77500
},
{
"epoch": 3.74,
"learning_rate": 2.907583960139893e-07,
"loss": 1.4594,
"step": 78000
},
{
"epoch": 3.76,
"learning_rate": 2.7339146265510476e-07,
"loss": 1.4596,
"step": 78500
},
{
"epoch": 3.78,
"learning_rate": 2.5602452929621987e-07,
"loss": 1.4592,
"step": 79000
},
{
"epoch": 3.81,
"learning_rate": 2.386575959373353e-07,
"loss": 1.4597,
"step": 79500
},
{
"epoch": 3.83,
"learning_rate": 2.2129066257845077e-07,
"loss": 1.4593,
"step": 80000
},
{
"epoch": 3.86,
"learning_rate": 2.0392372921956589e-07,
"loss": 1.4598,
"step": 80500
},
{
"epoch": 3.88,
"learning_rate": 1.8655679586068137e-07,
"loss": 1.4593,
"step": 81000
},
{
"epoch": 3.9,
"learning_rate": 1.691898625017965e-07,
"loss": 1.4592,
"step": 81500
},
{
"epoch": 3.93,
"learning_rate": 1.5182292914291196e-07,
"loss": 1.4593,
"step": 82000
},
{
"epoch": 3.95,
"learning_rate": 1.344559957840271e-07,
"loss": 1.4594,
"step": 82500
},
{
"epoch": 3.98,
"learning_rate": 1.1708906242514258e-07,
"loss": 1.4596,
"step": 83000
},
{
"epoch": 4.0,
"eval_loss": 1.4663872718811035,
"eval_runtime": 321.2818,
"eval_samples_per_second": 93.376,
"eval_steps_per_second": 1.46,
"step": 83492
},
{
"epoch": 4.0,
"step": 83492,
"total_flos": 2.080820519714685e+18,
"train_loss": 1.4623237360947567,
"train_runtime": 299973.1617,
"train_samples_per_second": 71.255,
"train_steps_per_second": 0.278
}
],
"max_steps": 83492,
"num_train_epochs": 4,
"total_flos": 2.080820519714685e+18,
"trial_name": null,
"trial_params": null
}