| loss,grad_norm,learning_rate,epoch,step,eval_loss,eval_rouge1,eval_rouge2,eval_rougeL,eval_rougeLsum,eval_comp_ratio_mean,eval_comp_ratio_p90,eval_pct_violations,eval_runtime,eval_samples_per_second,eval_steps_per_second,train_runtime,train_samples_per_second,train_steps_per_second,total_flos,train_loss |
| 18.5739,3.9425203800201416,8.333792059450905e-05,1.0,8074,,,,,,,,,,,,,,,, |
| ,,,1.0,8074,8.819010734558105,0.021987322512042894,0.0005887691652662004,0.02174060574738519,0.021719102613531008,0.2519929871923932,0.4666666666666667,0.017019036757610316,933.0108,12.217,1.527,,,,, |
| 11.7884,4.045487880706787,5.555976054496663e-05,2.0,16148,,,,,,,,,,,,,,,, |
| ,,,2.0,16148,7.744182586669922,0.016309030444012006,0.00023154421174099724,0.016202583107010227,0.016175665319691692,0.16790600519758392,0.3333333333333333,0.006754978506886569,641.3589,17.773,2.222,,,,, |
| 10.14,3.389225959777832,2.778160049542421e-05,3.0,24222,,,,,,,,,,,,,,,, |
| ,,,3.0,24222,7.445714950561523,0.01499721425418326,0.00017586430448733397,0.014904505382945042,0.014870292300606297,0.13242051844247021,0.2631578947368421,0.0032458987630493903,535.3758,21.292,2.662,,,,, |
| ,,,3.0,24222,,,,,,,,,,,,4094.4479,63.102,7.888,1.239675130724352e+16,13.500753769816695 |
|
|