T5Model100 / trainer_state.json
Saddammm's picture
Upload trainer_state.json
7a328eb
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 99.4,
"global_step": 9940,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 1.0,
"learning_rate": 0.00029699999999999996,
"loss": 2.3572,
"step": 100
},
{
"epoch": 1.0,
"eval_loss": 2.0208146572113037,
"eval_rouge1": 0.38044177163260395,
"eval_rouge2": 0.22166452709217227,
"eval_rougeL": 0.32812293401962783,
"eval_rougeLsum": 0.3362999547235502,
"eval_runtime": 101.9509,
"eval_samples_per_second": 2.619,
"eval_steps_per_second": 0.657,
"step": 100
},
{
"epoch": 2.0,
"learning_rate": 0.000294,
"loss": 1.8221,
"step": 200
},
{
"epoch": 2.0,
"eval_loss": 1.9186301231384277,
"eval_rouge1": 0.37852474610193687,
"eval_rouge2": 0.22935359793844923,
"eval_rougeL": 0.33206971678305885,
"eval_rougeLsum": 0.34036536115022115,
"eval_runtime": 88.4707,
"eval_samples_per_second": 3.018,
"eval_steps_per_second": 0.757,
"step": 200
},
{
"epoch": 3.0,
"learning_rate": 0.00029099999999999997,
"loss": 1.499,
"step": 300
},
{
"epoch": 3.0,
"eval_loss": 1.8877462148666382,
"eval_rouge1": 0.38418268500674113,
"eval_rouge2": 0.23776340434064813,
"eval_rougeL": 0.3334841887210931,
"eval_rougeLsum": 0.3427682236088454,
"eval_runtime": 84.7141,
"eval_samples_per_second": 3.152,
"eval_steps_per_second": 0.791,
"step": 300
},
{
"epoch": 4.0,
"learning_rate": 0.00028799999999999995,
"loss": 1.2417,
"step": 400
},
{
"epoch": 4.0,
"eval_loss": 1.9007868766784668,
"eval_rouge1": 0.38459815678190157,
"eval_rouge2": 0.2389841562885784,
"eval_rougeL": 0.3358997192110219,
"eval_rougeLsum": 0.3459186341182623,
"eval_runtime": 87.1466,
"eval_samples_per_second": 3.064,
"eval_steps_per_second": 0.769,
"step": 400
},
{
"epoch": 5.0,
"learning_rate": 0.000285,
"loss": 1.0291,
"step": 500
},
{
"epoch": 5.0,
"eval_loss": 1.9464083909988403,
"eval_rouge1": 0.3771057538216719,
"eval_rouge2": 0.23598990815679244,
"eval_rougeL": 0.33157674303979967,
"eval_rougeLsum": 0.341814656302837,
"eval_runtime": 84.8724,
"eval_samples_per_second": 3.146,
"eval_steps_per_second": 0.789,
"step": 500
},
{
"epoch": 6.0,
"learning_rate": 0.00028199999999999997,
"loss": 0.8654,
"step": 600
},
{
"epoch": 6.0,
"eval_loss": 2.028379201889038,
"eval_rouge1": 0.3765336053931574,
"eval_rouge2": 0.2346491097022591,
"eval_rougeL": 0.33228774930217964,
"eval_rougeLsum": 0.3417265321962749,
"eval_runtime": 81.6724,
"eval_samples_per_second": 3.269,
"eval_steps_per_second": 0.82,
"step": 600
},
{
"epoch": 7.0,
"learning_rate": 0.000279,
"loss": 0.7288,
"step": 700
},
{
"epoch": 7.0,
"eval_loss": 2.0554444789886475,
"eval_rouge1": 0.3800786333023515,
"eval_rouge2": 0.2409070123236104,
"eval_rougeL": 0.33682162100303614,
"eval_rougeLsum": 0.34734833881743354,
"eval_runtime": 73.6883,
"eval_samples_per_second": 3.623,
"eval_steps_per_second": 0.909,
"step": 700
},
{
"epoch": 8.0,
"learning_rate": 0.000276,
"loss": 0.6131,
"step": 800
},
{
"epoch": 8.0,
"eval_loss": 2.166259288787842,
"eval_rouge1": 0.38689909609996354,
"eval_rouge2": 0.24839504760949166,
"eval_rougeL": 0.3423017815734907,
"eval_rougeLsum": 0.35145300780053884,
"eval_runtime": 84.303,
"eval_samples_per_second": 3.167,
"eval_steps_per_second": 0.795,
"step": 800
},
{
"epoch": 9.0,
"learning_rate": 0.00027299999999999997,
"loss": 0.5199,
"step": 900
},
{
"epoch": 9.0,
"eval_loss": 2.285539388656616,
"eval_rouge1": 0.3789430717182254,
"eval_rouge2": 0.24104702341168543,
"eval_rougeL": 0.33609196315407464,
"eval_rougeLsum": 0.34419861929575424,
"eval_runtime": 87.3936,
"eval_samples_per_second": 3.055,
"eval_steps_per_second": 0.767,
"step": 900
},
{
"epoch": 10.0,
"learning_rate": 0.00027,
"loss": 0.4377,
"step": 1000
},
{
"epoch": 10.0,
"eval_loss": 2.3414063453674316,
"eval_rouge1": 0.3804299105262684,
"eval_rouge2": 0.24134979578604437,
"eval_rougeL": 0.33465308007661587,
"eval_rougeLsum": 0.34441539625221235,
"eval_runtime": 74.2399,
"eval_samples_per_second": 3.596,
"eval_steps_per_second": 0.902,
"step": 1000
},
{
"epoch": 11.0,
"learning_rate": 0.000267,
"loss": 0.3774,
"step": 1100
},
{
"epoch": 11.0,
"eval_loss": 2.5906524658203125,
"eval_rouge1": 0.37488053520037123,
"eval_rouge2": 0.2325364461265239,
"eval_rougeL": 0.3284555547843171,
"eval_rougeLsum": 0.33842206829201854,
"eval_runtime": 87.4814,
"eval_samples_per_second": 3.052,
"eval_steps_per_second": 0.766,
"step": 1100
},
{
"epoch": 12.0,
"learning_rate": 0.00026399999999999997,
"loss": 0.3245,
"step": 1200
},
{
"epoch": 12.0,
"eval_loss": 2.575906276702881,
"eval_rouge1": 0.3809104895798368,
"eval_rouge2": 0.2430694084434509,
"eval_rougeL": 0.339586891707485,
"eval_rougeLsum": 0.3479448496850858,
"eval_runtime": 86.2075,
"eval_samples_per_second": 3.097,
"eval_steps_per_second": 0.777,
"step": 1200
},
{
"epoch": 13.0,
"learning_rate": 0.000261,
"loss": 0.2838,
"step": 1300
},
{
"epoch": 13.0,
"eval_loss": 2.713919162750244,
"eval_rouge1": 0.3817118270434364,
"eval_rouge2": 0.24584042005918305,
"eval_rougeL": 0.3378501156668257,
"eval_rougeLsum": 0.34886771837118585,
"eval_runtime": 88.4266,
"eval_samples_per_second": 3.019,
"eval_steps_per_second": 0.758,
"step": 1300
},
{
"epoch": 14.0,
"learning_rate": 0.000258,
"loss": 0.2468,
"step": 1400
},
{
"epoch": 14.0,
"eval_loss": 2.8145313262939453,
"eval_rouge1": 0.379472522792231,
"eval_rouge2": 0.24368264109880694,
"eval_rougeL": 0.3402990243711923,
"eval_rougeLsum": 0.34789779861372483,
"eval_runtime": 87.9487,
"eval_samples_per_second": 3.036,
"eval_steps_per_second": 0.762,
"step": 1400
},
{
"epoch": 15.0,
"learning_rate": 0.00025499999999999996,
"loss": 0.2176,
"step": 1500
},
{
"epoch": 15.0,
"eval_loss": 2.9211342334747314,
"eval_rouge1": 0.38563833059926744,
"eval_rouge2": 0.25107254535227563,
"eval_rougeL": 0.3454046659035601,
"eval_rougeLsum": 0.3550077794346308,
"eval_runtime": 87.7555,
"eval_samples_per_second": 3.043,
"eval_steps_per_second": 0.763,
"step": 1500
},
{
"epoch": 16.0,
"learning_rate": 0.00025199999999999995,
"loss": 0.1971,
"step": 1600
},
{
"epoch": 16.0,
"eval_loss": 3.035965919494629,
"eval_rouge1": 0.3748339777411154,
"eval_rouge2": 0.24127754538743285,
"eval_rougeL": 0.33399204978041663,
"eval_rougeLsum": 0.3436267443506058,
"eval_runtime": 88.2167,
"eval_samples_per_second": 3.027,
"eval_steps_per_second": 0.759,
"step": 1600
},
{
"epoch": 17.0,
"learning_rate": 0.000249,
"loss": 0.1762,
"step": 1700
},
{
"epoch": 17.0,
"eval_loss": 3.1633944511413574,
"eval_rouge1": 0.3756943708883763,
"eval_rouge2": 0.2398374976205459,
"eval_rougeL": 0.3344204501634195,
"eval_rougeLsum": 0.34415067068109806,
"eval_runtime": 88.3785,
"eval_samples_per_second": 3.021,
"eval_steps_per_second": 0.758,
"step": 1700
},
{
"epoch": 18.0,
"learning_rate": 0.00024599999999999996,
"loss": 0.1586,
"step": 1800
},
{
"epoch": 18.0,
"eval_loss": 3.361802339553833,
"eval_rouge1": 0.3771231957637745,
"eval_rouge2": 0.24367019161252174,
"eval_rougeL": 0.33712070645495773,
"eval_rougeLsum": 0.3467573160545896,
"eval_runtime": 88.6648,
"eval_samples_per_second": 3.011,
"eval_steps_per_second": 0.756,
"step": 1800
},
{
"epoch": 19.0,
"learning_rate": 0.000243,
"loss": 0.1431,
"step": 1900
},
{
"epoch": 19.0,
"eval_loss": 3.339010000228882,
"eval_rouge1": 0.38323576739072185,
"eval_rouge2": 0.24826737117741765,
"eval_rougeL": 0.3410750002762108,
"eval_rougeLsum": 0.3503168292106818,
"eval_runtime": 88.2341,
"eval_samples_per_second": 3.026,
"eval_steps_per_second": 0.759,
"step": 1900
},
{
"epoch": 20.0,
"learning_rate": 0.00023999999999999998,
"loss": 0.1369,
"step": 2000
},
{
"epoch": 20.0,
"eval_loss": 3.3738720417022705,
"eval_rouge1": 0.3803519989007255,
"eval_rouge2": 0.24324683470771802,
"eval_rougeL": 0.3413320322258584,
"eval_rougeLsum": 0.3497926224765554,
"eval_runtime": 87.7456,
"eval_samples_per_second": 3.043,
"eval_steps_per_second": 0.764,
"step": 2000
},
{
"epoch": 21.0,
"learning_rate": 0.000237,
"loss": 0.1191,
"step": 2100
},
{
"epoch": 21.0,
"eval_loss": 3.4585511684417725,
"eval_rouge1": 0.3856774466701469,
"eval_rouge2": 0.24573309119665773,
"eval_rougeL": 0.340491594204651,
"eval_rougeLsum": 0.35036530615920203,
"eval_runtime": 84.6104,
"eval_samples_per_second": 3.156,
"eval_steps_per_second": 0.792,
"step": 2100
},
{
"epoch": 22.0,
"learning_rate": 0.000234,
"loss": 0.1123,
"step": 2200
},
{
"epoch": 22.0,
"eval_loss": 3.4547746181488037,
"eval_rouge1": 0.379890674444173,
"eval_rouge2": 0.23951241543587243,
"eval_rougeL": 0.3381674920029052,
"eval_rougeLsum": 0.34605025320246763,
"eval_runtime": 86.4576,
"eval_samples_per_second": 3.088,
"eval_steps_per_second": 0.775,
"step": 2200
},
{
"epoch": 23.0,
"learning_rate": 0.00023099999999999998,
"loss": 0.1006,
"step": 2300
},
{
"epoch": 23.0,
"eval_loss": 3.563164234161377,
"eval_rouge1": 0.383309277836589,
"eval_rouge2": 0.24671832804779853,
"eval_rougeL": 0.3418864003163312,
"eval_rougeLsum": 0.3510389909721277,
"eval_runtime": 86.7629,
"eval_samples_per_second": 3.077,
"eval_steps_per_second": 0.772,
"step": 2300
},
{
"epoch": 24.0,
"learning_rate": 0.00022799999999999999,
"loss": 0.0946,
"step": 2400
},
{
"epoch": 24.0,
"eval_loss": 3.452104091644287,
"eval_rouge1": 0.38778417244680397,
"eval_rouge2": 0.24864493001126428,
"eval_rougeL": 0.34531411329508677,
"eval_rougeLsum": 0.3544382672740283,
"eval_runtime": 81.1305,
"eval_samples_per_second": 3.291,
"eval_steps_per_second": 0.826,
"step": 2400
},
{
"epoch": 25.0,
"learning_rate": 0.000225,
"loss": 0.0852,
"step": 2500
},
{
"epoch": 25.0,
"eval_loss": 3.722707509994507,
"eval_rouge1": 0.38544627790159214,
"eval_rouge2": 0.24835200716378503,
"eval_rougeL": 0.34171087371739595,
"eval_rougeLsum": 0.35119063257011257,
"eval_runtime": 88.2083,
"eval_samples_per_second": 3.027,
"eval_steps_per_second": 0.76,
"step": 2500
},
{
"epoch": 26.0,
"learning_rate": 0.00022199999999999998,
"loss": 0.0819,
"step": 2600
},
{
"epoch": 26.0,
"eval_loss": 3.700070858001709,
"eval_rouge1": 0.3821803340000923,
"eval_rouge2": 0.24725796483857881,
"eval_rougeL": 0.34338516151002396,
"eval_rougeLsum": 0.35257138073899147,
"eval_runtime": 83.8321,
"eval_samples_per_second": 3.185,
"eval_steps_per_second": 0.799,
"step": 2600
},
{
"epoch": 27.0,
"learning_rate": 0.00021899999999999998,
"loss": 0.076,
"step": 2700
},
{
"epoch": 27.0,
"eval_loss": 3.705507278442383,
"eval_rouge1": 0.38289988903470296,
"eval_rouge2": 0.2435924255504497,
"eval_rougeL": 0.34095899111986594,
"eval_rougeLsum": 0.35027734448953907,
"eval_runtime": 84.7019,
"eval_samples_per_second": 3.152,
"eval_steps_per_second": 0.791,
"step": 2700
},
{
"epoch": 28.0,
"learning_rate": 0.00021599999999999996,
"loss": 0.0737,
"step": 2800
},
{
"epoch": 28.0,
"eval_loss": 3.8778345584869385,
"eval_rouge1": 0.3863238066090623,
"eval_rouge2": 0.24626416138837423,
"eval_rougeL": 0.33940239308122,
"eval_rougeLsum": 0.35008151797367537,
"eval_runtime": 88.4609,
"eval_samples_per_second": 3.018,
"eval_steps_per_second": 0.757,
"step": 2800
},
{
"epoch": 29.0,
"learning_rate": 0.00021299999999999997,
"loss": 0.071,
"step": 2900
},
{
"epoch": 29.0,
"eval_loss": 3.7274887561798096,
"eval_rouge1": 0.38452724844736197,
"eval_rouge2": 0.24655003737302858,
"eval_rougeL": 0.34600558721773045,
"eval_rougeLsum": 0.3548096862359722,
"eval_runtime": 87.9179,
"eval_samples_per_second": 3.037,
"eval_steps_per_second": 0.762,
"step": 2900
},
{
"epoch": 30.0,
"learning_rate": 0.00020999999999999998,
"loss": 0.0651,
"step": 3000
},
{
"epoch": 30.0,
"eval_loss": 3.9305973052978516,
"eval_rouge1": 0.38263859087914487,
"eval_rouge2": 0.24388219812962653,
"eval_rougeL": 0.33932726653150014,
"eval_rougeLsum": 0.350347980074978,
"eval_runtime": 86.4392,
"eval_samples_per_second": 3.089,
"eval_steps_per_second": 0.775,
"step": 3000
},
{
"epoch": 31.0,
"learning_rate": 0.00020699999999999996,
"loss": 0.059,
"step": 3100
},
{
"epoch": 31.0,
"eval_loss": 3.8749561309814453,
"eval_rouge1": 0.3838656202146266,
"eval_rouge2": 0.24797544855564618,
"eval_rougeL": 0.3455085604915118,
"eval_rougeLsum": 0.35485772003639693,
"eval_runtime": 80.6768,
"eval_samples_per_second": 3.31,
"eval_steps_per_second": 0.83,
"step": 3100
},
{
"epoch": 32.0,
"learning_rate": 0.000204,
"loss": 0.0575,
"step": 3200
},
{
"epoch": 32.0,
"eval_loss": 4.035264492034912,
"eval_rouge1": 0.3836411899648502,
"eval_rouge2": 0.25059245453298623,
"eval_rougeL": 0.3411469702531179,
"eval_rougeLsum": 0.3521664869788945,
"eval_runtime": 88.2658,
"eval_samples_per_second": 3.025,
"eval_steps_per_second": 0.759,
"step": 3200
},
{
"epoch": 33.0,
"learning_rate": 0.000201,
"loss": 0.0497,
"step": 3300
},
{
"epoch": 33.0,
"eval_loss": 4.112032890319824,
"eval_rouge1": 0.38502854106397544,
"eval_rouge2": 0.24923225337454086,
"eval_rougeL": 0.341407070704573,
"eval_rougeLsum": 0.35156091483395113,
"eval_runtime": 84.7807,
"eval_samples_per_second": 3.149,
"eval_steps_per_second": 0.79,
"step": 3300
},
{
"epoch": 34.0,
"learning_rate": 0.000198,
"loss": 0.0501,
"step": 3400
},
{
"epoch": 34.0,
"eval_loss": 3.95038104057312,
"eval_rouge1": 0.38531511762507886,
"eval_rouge2": 0.25053367172713387,
"eval_rougeL": 0.341979636574758,
"eval_rougeLsum": 0.3530873088985891,
"eval_runtime": 85.2338,
"eval_samples_per_second": 3.133,
"eval_steps_per_second": 0.786,
"step": 3400
},
{
"epoch": 35.0,
"learning_rate": 0.000195,
"loss": 0.047,
"step": 3500
},
{
"epoch": 35.0,
"eval_loss": 4.134089469909668,
"eval_rouge1": 0.38128230648241745,
"eval_rouge2": 0.2492424888975831,
"eval_rougeL": 0.34103290544972104,
"eval_rougeLsum": 0.3516319313437394,
"eval_runtime": 88.3744,
"eval_samples_per_second": 3.021,
"eval_steps_per_second": 0.758,
"step": 3500
},
{
"epoch": 36.0,
"learning_rate": 0.00019199999999999998,
"loss": 0.0453,
"step": 3600
},
{
"epoch": 36.0,
"eval_loss": 4.057723045349121,
"eval_rouge1": 0.3846704975418777,
"eval_rouge2": 0.2541194854411035,
"eval_rougeL": 0.3458972635607298,
"eval_rougeLsum": 0.3547659584174555,
"eval_runtime": 85.1914,
"eval_samples_per_second": 3.134,
"eval_steps_per_second": 0.786,
"step": 3600
},
{
"epoch": 37.0,
"learning_rate": 0.00018899999999999999,
"loss": 0.0462,
"step": 3700
},
{
"epoch": 37.0,
"eval_loss": 4.16575288772583,
"eval_rouge1": 0.38354434412263094,
"eval_rouge2": 0.2510921708756821,
"eval_rougeL": 0.34240824035427875,
"eval_rougeLsum": 0.35173537331908256,
"eval_runtime": 87.6215,
"eval_samples_per_second": 3.047,
"eval_steps_per_second": 0.765,
"step": 3700
},
{
"epoch": 38.0,
"learning_rate": 0.000186,
"loss": 0.0424,
"step": 3800
},
{
"epoch": 38.0,
"eval_loss": 4.315454483032227,
"eval_rouge1": 0.38423210902707555,
"eval_rouge2": 0.24894255969353363,
"eval_rougeL": 0.342343883008779,
"eval_rougeLsum": 0.3524699587337098,
"eval_runtime": 87.9865,
"eval_samples_per_second": 3.035,
"eval_steps_per_second": 0.761,
"step": 3800
},
{
"epoch": 39.0,
"learning_rate": 0.00018299999999999998,
"loss": 0.0412,
"step": 3900
},
{
"epoch": 39.0,
"eval_loss": 4.139455318450928,
"eval_rouge1": 0.3888396904741606,
"eval_rouge2": 0.25152424393630335,
"eval_rougeL": 0.3452689238846711,
"eval_rougeLsum": 0.3565387881051132,
"eval_runtime": 88.7657,
"eval_samples_per_second": 3.008,
"eval_steps_per_second": 0.755,
"step": 3900
},
{
"epoch": 40.0,
"learning_rate": 0.00017999999999999998,
"loss": 0.0405,
"step": 4000
},
{
"epoch": 40.0,
"eval_loss": 4.292513847351074,
"eval_rouge1": 0.3849173670043722,
"eval_rouge2": 0.2516518779916327,
"eval_rougeL": 0.34160286102286824,
"eval_rougeLsum": 0.35354307416998204,
"eval_runtime": 87.0473,
"eval_samples_per_second": 3.067,
"eval_steps_per_second": 0.77,
"step": 4000
},
{
"epoch": 41.0,
"learning_rate": 0.00017699999999999997,
"loss": 0.0337,
"step": 4100
},
{
"epoch": 41.0,
"eval_loss": 4.333091735839844,
"eval_rouge1": 0.38820912557719256,
"eval_rouge2": 0.2534094086585099,
"eval_rougeL": 0.3455980647738194,
"eval_rougeLsum": 0.3547951760533251,
"eval_runtime": 88.6931,
"eval_samples_per_second": 3.01,
"eval_steps_per_second": 0.755,
"step": 4100
},
{
"epoch": 42.0,
"learning_rate": 0.00017399999999999997,
"loss": 0.034,
"step": 4200
},
{
"epoch": 42.0,
"eval_loss": 4.243014335632324,
"eval_rouge1": 0.3856953885948385,
"eval_rouge2": 0.2513949176068939,
"eval_rougeL": 0.34297989861385025,
"eval_rougeLsum": 0.352859061364724,
"eval_runtime": 88.2977,
"eval_samples_per_second": 3.024,
"eval_steps_per_second": 0.759,
"step": 4200
},
{
"epoch": 43.0,
"learning_rate": 0.00017099999999999998,
"loss": 0.0352,
"step": 4300
},
{
"epoch": 43.0,
"eval_loss": 4.183932304382324,
"eval_rouge1": 0.3806969958693784,
"eval_rouge2": 0.2473825570846937,
"eval_rougeL": 0.33932671729837177,
"eval_rougeLsum": 0.34894151771308557,
"eval_runtime": 80.31,
"eval_samples_per_second": 3.325,
"eval_steps_per_second": 0.834,
"step": 4300
},
{
"epoch": 44.0,
"learning_rate": 0.000168,
"loss": 0.0324,
"step": 4400
},
{
"epoch": 44.0,
"eval_loss": 4.373414039611816,
"eval_rouge1": 0.3786215226813438,
"eval_rouge2": 0.2465198631844377,
"eval_rougeL": 0.33922712749886663,
"eval_rougeLsum": 0.34917815226425697,
"eval_runtime": 77.6446,
"eval_samples_per_second": 3.439,
"eval_steps_per_second": 0.863,
"step": 4400
},
{
"epoch": 45.0,
"learning_rate": 0.000165,
"loss": 0.0286,
"step": 4500
},
{
"epoch": 45.0,
"eval_loss": 4.281849384307861,
"eval_rouge1": 0.38349120769596134,
"eval_rouge2": 0.2492446761527376,
"eval_rougeL": 0.34069084890798684,
"eval_rougeLsum": 0.35114847864272203,
"eval_runtime": 88.42,
"eval_samples_per_second": 3.02,
"eval_steps_per_second": 0.758,
"step": 4500
},
{
"epoch": 46.0,
"learning_rate": 0.000162,
"loss": 0.0282,
"step": 4600
},
{
"epoch": 46.0,
"eval_loss": 4.2815632820129395,
"eval_rouge1": 0.3826976100476265,
"eval_rouge2": 0.2509544107400279,
"eval_rougeL": 0.34278980646197255,
"eval_rougeLsum": 0.3538927647811483,
"eval_runtime": 85.7947,
"eval_samples_per_second": 3.112,
"eval_steps_per_second": 0.781,
"step": 4600
},
{
"epoch": 47.0,
"learning_rate": 0.000159,
"loss": 0.028,
"step": 4700
},
{
"epoch": 47.0,
"eval_loss": 4.38587760925293,
"eval_rouge1": 0.3849127298046784,
"eval_rouge2": 0.24789381337766286,
"eval_rougeL": 0.34234361939069524,
"eval_rougeLsum": 0.35325038564459454,
"eval_runtime": 76.1287,
"eval_samples_per_second": 3.507,
"eval_steps_per_second": 0.88,
"step": 4700
},
{
"epoch": 48.0,
"learning_rate": 0.000156,
"loss": 0.0273,
"step": 4800
},
{
"epoch": 48.0,
"eval_loss": 4.371572017669678,
"eval_rouge1": 0.38145021792566747,
"eval_rouge2": 0.2455942994201823,
"eval_rougeL": 0.33902654428346374,
"eval_rougeLsum": 0.3506993238959008,
"eval_runtime": 85.6829,
"eval_samples_per_second": 3.116,
"eval_steps_per_second": 0.782,
"step": 4800
},
{
"epoch": 49.0,
"learning_rate": 0.00015299999999999998,
"loss": 0.0242,
"step": 4900
},
{
"epoch": 49.0,
"eval_loss": 4.31757116317749,
"eval_rouge1": 0.3823041816880143,
"eval_rouge2": 0.2482259591949097,
"eval_rougeL": 0.3423574705681002,
"eval_rougeLsum": 0.3529132375278098,
"eval_runtime": 85.4774,
"eval_samples_per_second": 3.124,
"eval_steps_per_second": 0.784,
"step": 4900
},
{
"epoch": 50.0,
"learning_rate": 0.00015,
"loss": 0.0245,
"step": 5000
},
{
"epoch": 50.0,
"eval_loss": 4.483373641967773,
"eval_rouge1": 0.38129727161097815,
"eval_rouge2": 0.24693927554416154,
"eval_rougeL": 0.3404625026445964,
"eval_rougeLsum": 0.3512449096988902,
"eval_runtime": 86.2171,
"eval_samples_per_second": 3.097,
"eval_steps_per_second": 0.777,
"step": 5000
},
{
"epoch": 51.0,
"learning_rate": 0.000147,
"loss": 0.0233,
"step": 5100
},
{
"epoch": 51.0,
"eval_loss": 4.312454700469971,
"eval_rouge1": 0.3858793124229507,
"eval_rouge2": 0.2506880394304446,
"eval_rougeL": 0.3432986381645482,
"eval_rougeLsum": 0.35437517778600014,
"eval_runtime": 85.0765,
"eval_samples_per_second": 3.138,
"eval_steps_per_second": 0.788,
"step": 5100
},
{
"epoch": 52.0,
"learning_rate": 0.00014399999999999998,
"loss": 0.0226,
"step": 5200
},
{
"epoch": 52.0,
"eval_loss": 4.3415327072143555,
"eval_rouge1": 0.3857470971280717,
"eval_rouge2": 0.24913722198218521,
"eval_rougeL": 0.3436699140258663,
"eval_rougeLsum": 0.3530913028701871,
"eval_runtime": 88.4089,
"eval_samples_per_second": 3.02,
"eval_steps_per_second": 0.758,
"step": 5200
},
{
"epoch": 53.0,
"learning_rate": 0.00014099999999999998,
"loss": 0.0229,
"step": 5300
},
{
"epoch": 53.0,
"eval_loss": 4.483485698699951,
"eval_rouge1": 0.38253789740936506,
"eval_rouge2": 0.2479324630750173,
"eval_rougeL": 0.34339596051490895,
"eval_rougeLsum": 0.3517531826655991,
"eval_runtime": 88.363,
"eval_samples_per_second": 3.022,
"eval_steps_per_second": 0.758,
"step": 5300
},
{
"epoch": 54.0,
"learning_rate": 0.000138,
"loss": 0.0205,
"step": 5400
},
{
"epoch": 54.0,
"eval_loss": 4.5731024742126465,
"eval_rouge1": 0.3844011329346221,
"eval_rouge2": 0.24959249662582936,
"eval_rougeL": 0.3437539050438877,
"eval_rougeLsum": 0.3524112747853908,
"eval_runtime": 87.6537,
"eval_samples_per_second": 3.046,
"eval_steps_per_second": 0.764,
"step": 5400
},
{
"epoch": 55.0,
"learning_rate": 0.000135,
"loss": 0.0194,
"step": 5500
},
{
"epoch": 55.0,
"eval_loss": 4.562352657318115,
"eval_rouge1": 0.37834930186879323,
"eval_rouge2": 0.24392421472693115,
"eval_rougeL": 0.33801495271361487,
"eval_rougeLsum": 0.34801929824525824,
"eval_runtime": 74.8854,
"eval_samples_per_second": 3.565,
"eval_steps_per_second": 0.895,
"step": 5500
},
{
"epoch": 56.0,
"learning_rate": 0.00013199999999999998,
"loss": 0.0195,
"step": 5600
},
{
"epoch": 56.0,
"eval_loss": 4.631711483001709,
"eval_rouge1": 0.38562125740872527,
"eval_rouge2": 0.25396377473858134,
"eval_rougeL": 0.34647449965497257,
"eval_rougeLsum": 0.3559086726171456,
"eval_runtime": 80.6509,
"eval_samples_per_second": 3.311,
"eval_steps_per_second": 0.831,
"step": 5600
},
{
"epoch": 57.0,
"learning_rate": 0.000129,
"loss": 0.0187,
"step": 5700
},
{
"epoch": 57.0,
"eval_loss": 4.58750581741333,
"eval_rouge1": 0.3810792743714226,
"eval_rouge2": 0.24677881916117747,
"eval_rougeL": 0.3413798314429182,
"eval_rougeLsum": 0.3512899517237632,
"eval_runtime": 87.4748,
"eval_samples_per_second": 3.052,
"eval_steps_per_second": 0.766,
"step": 5700
},
{
"epoch": 58.0,
"learning_rate": 0.00012599999999999997,
"loss": 0.0184,
"step": 5800
},
{
"epoch": 58.0,
"eval_loss": 4.566098213195801,
"eval_rouge1": 0.3816478906137458,
"eval_rouge2": 0.24638333085125486,
"eval_rougeL": 0.34077211167545884,
"eval_rougeLsum": 0.3515136659487619,
"eval_runtime": 88.8228,
"eval_samples_per_second": 3.006,
"eval_steps_per_second": 0.754,
"step": 5800
},
{
"epoch": 59.0,
"learning_rate": 0.00012299999999999998,
"loss": 0.0181,
"step": 5900
},
{
"epoch": 59.0,
"eval_loss": 4.4710798263549805,
"eval_rouge1": 0.38077163264424585,
"eval_rouge2": 0.24804038660323457,
"eval_rougeL": 0.3439666979964766,
"eval_rougeLsum": 0.3531348236604766,
"eval_runtime": 85.4002,
"eval_samples_per_second": 3.126,
"eval_steps_per_second": 0.785,
"step": 5900
},
{
"epoch": 60.0,
"learning_rate": 0.00011999999999999999,
"loss": 0.0191,
"step": 6000
},
{
"epoch": 60.0,
"eval_loss": 4.539062023162842,
"eval_rouge1": 0.38696808219881823,
"eval_rouge2": 0.25247148225241317,
"eval_rougeL": 0.34520470532094527,
"eval_rougeLsum": 0.3549651652722977,
"eval_runtime": 85.8906,
"eval_samples_per_second": 3.109,
"eval_steps_per_second": 0.78,
"step": 6000
},
{
"epoch": 61.0,
"learning_rate": 0.000117,
"loss": 0.0159,
"step": 6100
},
{
"epoch": 61.0,
"eval_loss": 4.586240768432617,
"eval_rouge1": 0.3859647235684582,
"eval_rouge2": 0.25007880823210193,
"eval_rougeL": 0.3429738967595123,
"eval_rougeLsum": 0.3537661741374731,
"eval_runtime": 85.7948,
"eval_samples_per_second": 3.112,
"eval_steps_per_second": 0.781,
"step": 6100
},
{
"epoch": 62.0,
"learning_rate": 0.00011399999999999999,
"loss": 0.015,
"step": 6200
},
{
"epoch": 62.0,
"eval_loss": 4.743378639221191,
"eval_rouge1": 0.3846367169172429,
"eval_rouge2": 0.24839404159206457,
"eval_rougeL": 0.34243506876400887,
"eval_rougeLsum": 0.3538437492007503,
"eval_runtime": 85.518,
"eval_samples_per_second": 3.122,
"eval_steps_per_second": 0.783,
"step": 6200
},
{
"epoch": 63.0,
"learning_rate": 0.00011099999999999999,
"loss": 0.0149,
"step": 6300
},
{
"epoch": 63.0,
"eval_loss": 4.629330635070801,
"eval_rouge1": 0.38030212626045157,
"eval_rouge2": 0.24751450469686587,
"eval_rougeL": 0.33809073704758,
"eval_rougeLsum": 0.3483896449835656,
"eval_runtime": 80.0249,
"eval_samples_per_second": 3.336,
"eval_steps_per_second": 0.837,
"step": 6300
},
{
"epoch": 64.0,
"learning_rate": 0.00010799999999999998,
"loss": 0.0126,
"step": 6400
},
{
"epoch": 64.0,
"eval_loss": 4.710384368896484,
"eval_rouge1": 0.3842642647245845,
"eval_rouge2": 0.2512060270485085,
"eval_rougeL": 0.34379327783470054,
"eval_rougeLsum": 0.3545941692511255,
"eval_runtime": 85.176,
"eval_samples_per_second": 3.135,
"eval_steps_per_second": 0.787,
"step": 6400
},
{
"epoch": 65.0,
"learning_rate": 0.00010499999999999999,
"loss": 0.0138,
"step": 6500
},
{
"epoch": 65.0,
"eval_loss": 4.689241886138916,
"eval_rouge1": 0.38298024943411063,
"eval_rouge2": 0.25101113690854776,
"eval_rougeL": 0.3456091975587352,
"eval_rougeLsum": 0.3550951514101951,
"eval_runtime": 88.4731,
"eval_samples_per_second": 3.018,
"eval_steps_per_second": 0.757,
"step": 6500
},
{
"epoch": 66.0,
"learning_rate": 0.000102,
"loss": 0.0145,
"step": 6600
},
{
"epoch": 66.0,
"eval_loss": 4.557338237762451,
"eval_rouge1": 0.380241659345879,
"eval_rouge2": 0.24617583106160457,
"eval_rougeL": 0.3415380685661965,
"eval_rougeLsum": 0.3528959521802908,
"eval_runtime": 88.0004,
"eval_samples_per_second": 3.034,
"eval_steps_per_second": 0.761,
"step": 6600
},
{
"epoch": 67.0,
"learning_rate": 9.9e-05,
"loss": 0.014,
"step": 6700
},
{
"epoch": 67.0,
"eval_loss": 4.767906188964844,
"eval_rouge1": 0.38450247499658086,
"eval_rouge2": 0.24867049264532454,
"eval_rougeL": 0.3433152946814418,
"eval_rougeLsum": 0.35393881134127675,
"eval_runtime": 86.9796,
"eval_samples_per_second": 3.07,
"eval_steps_per_second": 0.77,
"step": 6700
},
{
"epoch": 68.0,
"learning_rate": 9.599999999999999e-05,
"loss": 0.0115,
"step": 6800
},
{
"epoch": 68.0,
"eval_loss": 4.784436225891113,
"eval_rouge1": 0.38354762861806485,
"eval_rouge2": 0.24990989075753453,
"eval_rougeL": 0.342385020020958,
"eval_rougeLsum": 0.35342424612693624,
"eval_runtime": 84.5851,
"eval_samples_per_second": 3.157,
"eval_steps_per_second": 0.792,
"step": 6800
},
{
"epoch": 69.0,
"learning_rate": 9.3e-05,
"loss": 0.012,
"step": 6900
},
{
"epoch": 69.0,
"eval_loss": 4.792604923248291,
"eval_rouge1": 0.3781881725835631,
"eval_rouge2": 0.24531643415669946,
"eval_rougeL": 0.3374030505076982,
"eval_rougeLsum": 0.34856794790036694,
"eval_runtime": 83.9778,
"eval_samples_per_second": 3.179,
"eval_steps_per_second": 0.798,
"step": 6900
},
{
"epoch": 70.0,
"learning_rate": 8.999999999999999e-05,
"loss": 0.0112,
"step": 7000
},
{
"epoch": 70.0,
"eval_loss": 4.755610942840576,
"eval_rouge1": 0.3817160833408457,
"eval_rouge2": 0.24591914048878963,
"eval_rougeL": 0.3399489131887798,
"eval_rougeLsum": 0.34979297011876465,
"eval_runtime": 84.4798,
"eval_samples_per_second": 3.161,
"eval_steps_per_second": 0.793,
"step": 7000
},
{
"epoch": 71.0,
"learning_rate": 8.699999999999999e-05,
"loss": 0.0119,
"step": 7100
},
{
"epoch": 71.0,
"eval_loss": 4.798295974731445,
"eval_rouge1": 0.3851093231346726,
"eval_rouge2": 0.2537220090344301,
"eval_rougeL": 0.345936300548305,
"eval_rougeLsum": 0.35628312786353933,
"eval_runtime": 85.2857,
"eval_samples_per_second": 3.131,
"eval_steps_per_second": 0.786,
"step": 7100
},
{
"epoch": 72.0,
"learning_rate": 8.4e-05,
"loss": 0.0114,
"step": 7200
},
{
"epoch": 72.0,
"eval_loss": 4.856568813323975,
"eval_rouge1": 0.38245157631697796,
"eval_rouge2": 0.2494936430554049,
"eval_rougeL": 0.34129479377535843,
"eval_rougeLsum": 0.35318946273145274,
"eval_runtime": 86.7372,
"eval_samples_per_second": 3.078,
"eval_steps_per_second": 0.772,
"step": 7200
},
{
"epoch": 73.0,
"learning_rate": 8.1e-05,
"loss": 0.0098,
"step": 7300
},
{
"epoch": 73.0,
"eval_loss": 4.907617092132568,
"eval_rouge1": 0.38646161684013636,
"eval_rouge2": 0.24944761609509186,
"eval_rougeL": 0.3455163948277883,
"eval_rougeLsum": 0.35471516616705556,
"eval_runtime": 85.0789,
"eval_samples_per_second": 3.138,
"eval_steps_per_second": 0.788,
"step": 7300
},
{
"epoch": 74.0,
"learning_rate": 7.8e-05,
"loss": 0.0108,
"step": 7400
},
{
"epoch": 74.0,
"eval_loss": 4.841740608215332,
"eval_rouge1": 0.38215421507562997,
"eval_rouge2": 0.244320679535119,
"eval_rougeL": 0.33902569869492843,
"eval_rougeLsum": 0.34863305654053733,
"eval_runtime": 83.1089,
"eval_samples_per_second": 3.213,
"eval_steps_per_second": 0.806,
"step": 7400
},
{
"epoch": 75.0,
"learning_rate": 7.5e-05,
"loss": 0.0098,
"step": 7500
},
{
"epoch": 75.0,
"eval_loss": 4.904059886932373,
"eval_rouge1": 0.3843792931439769,
"eval_rouge2": 0.25004411752753497,
"eval_rougeL": 0.3433196711815563,
"eval_rougeLsum": 0.3532736703238627,
"eval_runtime": 76.8263,
"eval_samples_per_second": 3.475,
"eval_steps_per_second": 0.872,
"step": 7500
},
{
"epoch": 76.0,
"learning_rate": 7.199999999999999e-05,
"loss": 0.0107,
"step": 7600
},
{
"epoch": 76.0,
"eval_loss": 4.855184555053711,
"eval_rouge1": 0.3829128308316423,
"eval_rouge2": 0.2462514914494317,
"eval_rougeL": 0.34099347275539427,
"eval_rougeLsum": 0.35125207376644263,
"eval_runtime": 88.1794,
"eval_samples_per_second": 3.028,
"eval_steps_per_second": 0.76,
"step": 7600
},
{
"epoch": 77.0,
"learning_rate": 6.9e-05,
"loss": 0.0087,
"step": 7700
},
{
"epoch": 77.0,
"eval_loss": 4.914191246032715,
"eval_rouge1": 0.3858568840845371,
"eval_rouge2": 0.2502511174813851,
"eval_rougeL": 0.3440629332400241,
"eval_rougeLsum": 0.3544972438925257,
"eval_runtime": 76.5236,
"eval_samples_per_second": 3.489,
"eval_steps_per_second": 0.876,
"step": 7700
},
{
"epoch": 78.0,
"learning_rate": 6.599999999999999e-05,
"loss": 0.0083,
"step": 7800
},
{
"epoch": 78.0,
"eval_loss": 4.938214302062988,
"eval_rouge1": 0.38030330309287785,
"eval_rouge2": 0.24533353097305155,
"eval_rougeL": 0.33927399233566136,
"eval_rougeLsum": 0.34936840630758315,
"eval_runtime": 86.2001,
"eval_samples_per_second": 3.097,
"eval_steps_per_second": 0.777,
"step": 7800
},
{
"epoch": 79.0,
"learning_rate": 6.299999999999999e-05,
"loss": 0.0092,
"step": 7900
},
{
"epoch": 79.0,
"eval_loss": 4.884538173675537,
"eval_rouge1": 0.38434467710443954,
"eval_rouge2": 0.2497101085906894,
"eval_rougeL": 0.3426642731168459,
"eval_rougeLsum": 0.35354404579979554,
"eval_runtime": 83.0616,
"eval_samples_per_second": 3.214,
"eval_steps_per_second": 0.807,
"step": 7900
},
{
"epoch": 80.0,
"learning_rate": 5.9999999999999995e-05,
"loss": 0.0072,
"step": 8000
},
{
"epoch": 80.0,
"eval_loss": 4.894115924835205,
"eval_rouge1": 0.38544645384117493,
"eval_rouge2": 0.2508322384005437,
"eval_rougeL": 0.3447843356379286,
"eval_rougeLsum": 0.3554394657131237,
"eval_runtime": 83.5657,
"eval_samples_per_second": 3.195,
"eval_steps_per_second": 0.802,
"step": 8000
},
{
"epoch": 81.0,
"learning_rate": 5.6999999999999996e-05,
"loss": 0.0077,
"step": 8100
},
{
"epoch": 81.0,
"eval_loss": 4.973346710205078,
"eval_rouge1": 0.387446314618786,
"eval_rouge2": 0.2529628384702761,
"eval_rougeL": 0.3469758611038962,
"eval_rougeLsum": 0.3572774036661956,
"eval_runtime": 87.8578,
"eval_samples_per_second": 3.039,
"eval_steps_per_second": 0.763,
"step": 8100
},
{
"epoch": 82.0,
"learning_rate": 5.399999999999999e-05,
"loss": 0.008,
"step": 8200
},
{
"epoch": 82.0,
"eval_loss": 4.913274765014648,
"eval_rouge1": 0.3829560613774901,
"eval_rouge2": 0.25172789870975754,
"eval_rougeL": 0.34496794149173104,
"eval_rougeLsum": 0.35508158648356575,
"eval_runtime": 85.6734,
"eval_samples_per_second": 3.116,
"eval_steps_per_second": 0.782,
"step": 8200
},
{
"epoch": 83.0,
"learning_rate": 5.1e-05,
"loss": 0.0075,
"step": 8300
},
{
"epoch": 83.0,
"eval_loss": 4.925784587860107,
"eval_rouge1": 0.385854429603814,
"eval_rouge2": 0.2531988664068715,
"eval_rougeL": 0.346050946742826,
"eval_rougeLsum": 0.35696920925711373,
"eval_runtime": 75.2297,
"eval_samples_per_second": 3.549,
"eval_steps_per_second": 0.891,
"step": 8300
},
{
"epoch": 84.0,
"learning_rate": 4.7999999999999994e-05,
"loss": 0.0076,
"step": 8400
},
{
"epoch": 84.0,
"eval_loss": 4.87917423248291,
"eval_rouge1": 0.38643959051361326,
"eval_rouge2": 0.24825036815478088,
"eval_rougeL": 0.34543928091296466,
"eval_rougeLsum": 0.3560830652803086,
"eval_runtime": 82.6422,
"eval_samples_per_second": 3.231,
"eval_steps_per_second": 0.811,
"step": 8400
},
{
"epoch": 85.0,
"learning_rate": 4.4999999999999996e-05,
"loss": 0.0073,
"step": 8500
},
{
"epoch": 85.0,
"eval_loss": 4.9377264976501465,
"eval_rouge1": 0.38212230866847685,
"eval_rouge2": 0.24573634944784611,
"eval_rougeL": 0.34138760450808503,
"eval_rougeLsum": 0.3523812948696121,
"eval_runtime": 85.716,
"eval_samples_per_second": 3.115,
"eval_steps_per_second": 0.782,
"step": 8500
},
{
"epoch": 86.0,
"learning_rate": 4.2e-05,
"loss": 0.0062,
"step": 8600
},
{
"epoch": 86.0,
"eval_loss": 5.010465621948242,
"eval_rouge1": 0.38248533103068416,
"eval_rouge2": 0.24842178766177614,
"eval_rougeL": 0.3425108161941025,
"eval_rougeLsum": 0.35344680511965376,
"eval_runtime": 85.9789,
"eval_samples_per_second": 3.105,
"eval_steps_per_second": 0.779,
"step": 8600
},
{
"epoch": 87.0,
"learning_rate": 3.9e-05,
"loss": 0.0067,
"step": 8700
},
{
"epoch": 87.0,
"eval_loss": 5.000132083892822,
"eval_rouge1": 0.3844108716094221,
"eval_rouge2": 0.25183492085762804,
"eval_rougeL": 0.34541276301829477,
"eval_rougeLsum": 0.35582368167109873,
"eval_runtime": 85.4159,
"eval_samples_per_second": 3.126,
"eval_steps_per_second": 0.784,
"step": 8700
},
{
"epoch": 88.0,
"learning_rate": 3.5999999999999994e-05,
"loss": 0.0064,
"step": 8800
},
{
"epoch": 88.0,
"eval_loss": 4.985653877258301,
"eval_rouge1": 0.3842791176433939,
"eval_rouge2": 0.24890479606772847,
"eval_rougeL": 0.34373666785104745,
"eval_rougeLsum": 0.35402492917866446,
"eval_runtime": 88.491,
"eval_samples_per_second": 3.017,
"eval_steps_per_second": 0.757,
"step": 8800
},
{
"epoch": 89.0,
"learning_rate": 3.2999999999999996e-05,
"loss": 0.0064,
"step": 8900
},
{
"epoch": 89.0,
"eval_loss": 5.027814865112305,
"eval_rouge1": 0.38374734558183804,
"eval_rouge2": 0.24654112382289783,
"eval_rougeL": 0.3418827464509472,
"eval_rougeLsum": 0.35332526206489273,
"eval_runtime": 88.8366,
"eval_samples_per_second": 3.006,
"eval_steps_per_second": 0.754,
"step": 8900
},
{
"epoch": 90.0,
"learning_rate": 2.9999999999999997e-05,
"loss": 0.0061,
"step": 9000
},
{
"epoch": 90.0,
"eval_loss": 5.058474063873291,
"eval_rouge1": 0.3820717972330683,
"eval_rouge2": 0.2461904812224382,
"eval_rougeL": 0.340562678348688,
"eval_rougeLsum": 0.35149040221377614,
"eval_runtime": 87.2966,
"eval_samples_per_second": 3.059,
"eval_steps_per_second": 0.767,
"step": 9000
},
{
"epoch": 91.0,
"learning_rate": 2.6999999999999996e-05,
"loss": 0.007,
"step": 9100
},
{
"epoch": 91.0,
"eval_loss": 5.010220050811768,
"eval_rouge1": 0.3844908418040261,
"eval_rouge2": 0.25006688388196396,
"eval_rougeL": 0.3429817235545874,
"eval_rougeLsum": 0.35434878150140914,
"eval_runtime": 79.8527,
"eval_samples_per_second": 3.344,
"eval_steps_per_second": 0.839,
"step": 9100
},
{
"epoch": 92.0,
"learning_rate": 2.3999999999999997e-05,
"loss": 0.0059,
"step": 9200
},
{
"epoch": 92.0,
"eval_loss": 5.030458450317383,
"eval_rouge1": 0.38284739488530695,
"eval_rouge2": 0.2494251591428921,
"eval_rougeL": 0.34146682998770905,
"eval_rougeLsum": 0.3524988183011109,
"eval_runtime": 86.6859,
"eval_samples_per_second": 3.08,
"eval_steps_per_second": 0.773,
"step": 9200
},
{
"epoch": 93.0,
"learning_rate": 2.1e-05,
"loss": 0.0066,
"step": 9300
},
{
"epoch": 93.0,
"eval_loss": 4.984446048736572,
"eval_rouge1": 0.38152251971542894,
"eval_rouge2": 0.24878451374892646,
"eval_rougeL": 0.3406473614314545,
"eval_rougeLsum": 0.3513187114756059,
"eval_runtime": 88.5244,
"eval_samples_per_second": 3.016,
"eval_steps_per_second": 0.757,
"step": 9300
},
{
"epoch": 94.0,
"learning_rate": 1.7999999999999997e-05,
"loss": 0.0058,
"step": 9400
},
{
"epoch": 94.0,
"eval_loss": 4.990396022796631,
"eval_rouge1": 0.3825136945109377,
"eval_rouge2": 0.24843252471104438,
"eval_rougeL": 0.3422545305896276,
"eval_rougeLsum": 0.352581643511201,
"eval_runtime": 85.262,
"eval_samples_per_second": 3.132,
"eval_steps_per_second": 0.786,
"step": 9400
},
{
"epoch": 95.0,
"learning_rate": 1.4999999999999999e-05,
"loss": 0.006,
"step": 9500
},
{
"epoch": 95.0,
"eval_loss": 4.97911262512207,
"eval_rouge1": 0.38343787306408117,
"eval_rouge2": 0.24997418070032396,
"eval_rougeL": 0.3422221622019714,
"eval_rougeLsum": 0.35352360810152816,
"eval_runtime": 81.5407,
"eval_samples_per_second": 3.274,
"eval_steps_per_second": 0.822,
"step": 9500
},
{
"epoch": 96.0,
"learning_rate": 1.1999999999999999e-05,
"loss": 0.0049,
"step": 9600
},
{
"epoch": 96.0,
"eval_loss": 5.016595363616943,
"eval_rouge1": 0.3837960479091025,
"eval_rouge2": 0.24885786511102678,
"eval_rougeL": 0.3424026645718613,
"eval_rougeLsum": 0.35330084315799287,
"eval_runtime": 88.6796,
"eval_samples_per_second": 3.011,
"eval_steps_per_second": 0.756,
"step": 9600
},
{
"epoch": 97.0,
"learning_rate": 8.999999999999999e-06,
"loss": 0.0054,
"step": 9700
},
{
"epoch": 97.0,
"eval_loss": 5.0309343338012695,
"eval_rouge1": 0.383180407064846,
"eval_rouge2": 0.2500666839747965,
"eval_rougeL": 0.3424454114218375,
"eval_rougeLsum": 0.3533197372424365,
"eval_runtime": 87.6124,
"eval_samples_per_second": 3.048,
"eval_steps_per_second": 0.765,
"step": 9700
},
{
"epoch": 98.0,
"learning_rate": 5.999999999999999e-06,
"loss": 0.0058,
"step": 9800
},
{
"epoch": 98.0,
"eval_loss": 5.044477462768555,
"eval_rouge1": 0.3841857746723203,
"eval_rouge2": 0.2492668969413123,
"eval_rougeL": 0.34243325874354047,
"eval_rougeLsum": 0.3533788089951856,
"eval_runtime": 88.7303,
"eval_samples_per_second": 3.009,
"eval_steps_per_second": 0.755,
"step": 9800
},
{
"epoch": 99.0,
"learning_rate": 2.9999999999999997e-06,
"loss": 0.0059,
"step": 9900
},
{
"epoch": 99.0,
"eval_loss": 5.052584171295166,
"eval_rouge1": 0.38395960024638665,
"eval_rouge2": 0.2489015750324879,
"eval_rougeL": 0.3410612103718015,
"eval_rougeLsum": 0.3524355955865259,
"eval_runtime": 84.3598,
"eval_samples_per_second": 3.165,
"eval_steps_per_second": 0.794,
"step": 9900
}
],
"max_steps": 10000,
"num_train_epochs": 100,
"total_flos": 3683373994819584.0,
"trial_name": null,
"trial_params": null
}