riss / riss_css_multitask /trainer_state.json
ZzZzzO0o's picture
Upload 10 files
e0d4e88 verified
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 25.0,
"global_step": 900,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 1.0,
"eval_bleu": 0.0,
"eval_gen_len": 51.1593,
"eval_loss": 0.5045212507247925,
"eval_runtime": 26.2663,
"eval_samples_per_second": 14.581,
"eval_sari": 34.6844,
"eval_sari_char_max_mean": 34.6844,
"eval_sari_word": 33.5477,
"eval_sari_word_max_mean": 33.5477,
"eval_steps_per_second": 1.827,
"step": 36
},
{
"epoch": 2.0,
"eval_bleu": 0.0,
"eval_gen_len": 51.4386,
"eval_loss": 0.5030020475387573,
"eval_runtime": 26.8006,
"eval_samples_per_second": 14.291,
"eval_sari": 35.5462,
"eval_sari_char_max_mean": 35.5462,
"eval_sari_word": 34.5936,
"eval_sari_word_max_mean": 34.5936,
"eval_steps_per_second": 1.791,
"step": 72
},
{
"epoch": 3.0,
"eval_bleu": 0.0,
"eval_gen_len": 51.7911,
"eval_loss": 0.5270289182662964,
"eval_runtime": 27.8143,
"eval_samples_per_second": 13.77,
"eval_sari": 35.3103,
"eval_sari_char_max_mean": 35.5462,
"eval_sari_word": 34.2418,
"eval_sari_word_max_mean": 34.5936,
"eval_steps_per_second": 1.726,
"step": 108
},
{
"epoch": 4.0,
"eval_bleu": 0.0,
"eval_gen_len": 51.718,
"eval_loss": 0.5468652248382568,
"eval_runtime": 28.7271,
"eval_samples_per_second": 13.332,
"eval_sari": 36.2393,
"eval_sari_char_max_mean": 36.2393,
"eval_sari_word": 35.3615,
"eval_sari_word_max_mean": 35.3615,
"eval_steps_per_second": 1.671,
"step": 144
},
{
"epoch": 5.0,
"eval_bleu": 0.0,
"eval_gen_len": 51.624,
"eval_loss": 0.5979703664779663,
"eval_runtime": 26.6022,
"eval_samples_per_second": 14.397,
"eval_sari": 37.235,
"eval_sari_char_max_mean": 37.235,
"eval_sari_word": 36.2177,
"eval_sari_word_max_mean": 36.2177,
"eval_steps_per_second": 1.804,
"step": 180
},
{
"epoch": 6.0,
"eval_bleu": 0.0,
"eval_gen_len": 51.4021,
"eval_loss": 0.6683712005615234,
"eval_runtime": 25.8843,
"eval_samples_per_second": 14.797,
"eval_sari": 37.3571,
"eval_sari_char_max_mean": 37.3571,
"eval_sari_word": 36.3451,
"eval_sari_word_max_mean": 36.3451,
"eval_steps_per_second": 1.854,
"step": 216
},
{
"epoch": 7.0,
"eval_bleu": 0.0,
"eval_gen_len": 51.6449,
"eval_loss": 0.7256711721420288,
"eval_runtime": 24.4258,
"eval_samples_per_second": 15.68,
"eval_sari": 37.6591,
"eval_sari_char_max_mean": 37.6591,
"eval_sari_word": 36.7461,
"eval_sari_word_max_mean": 36.7461,
"eval_steps_per_second": 1.965,
"step": 252
},
{
"epoch": 8.0,
"eval_bleu": 0.0,
"eval_gen_len": 51.4595,
"eval_loss": 0.7815266251564026,
"eval_runtime": 26.2721,
"eval_samples_per_second": 14.578,
"eval_sari": 36.8698,
"eval_sari_char_max_mean": 37.6591,
"eval_sari_word": 35.9533,
"eval_sari_word_max_mean": 36.7461,
"eval_steps_per_second": 1.827,
"step": 288
},
{
"epoch": 9.0,
"eval_bleu": 0.0,
"eval_gen_len": 51.4439,
"eval_loss": 0.8019350171089172,
"eval_runtime": 25.6585,
"eval_samples_per_second": 14.927,
"eval_sari": 38.1738,
"eval_sari_char_max_mean": 38.1738,
"eval_sari_word": 37.3238,
"eval_sari_word_max_mean": 37.3238,
"eval_steps_per_second": 1.871,
"step": 324
},
{
"epoch": 10.0,
"eval_bleu": 0.0,
"eval_gen_len": 51.8329,
"eval_loss": 0.8630470037460327,
"eval_runtime": 27.5977,
"eval_samples_per_second": 13.878,
"eval_sari": 37.1796,
"eval_sari_char_max_mean": 38.1738,
"eval_sari_word": 36.3533,
"eval_sari_word_max_mean": 37.3238,
"eval_steps_per_second": 1.739,
"step": 360
},
{
"epoch": 11.0,
"eval_bleu": 0.0,
"eval_gen_len": 51.3499,
"eval_loss": 0.8761558532714844,
"eval_runtime": 27.5091,
"eval_samples_per_second": 13.923,
"eval_sari": 37.5882,
"eval_sari_char_max_mean": 38.1738,
"eval_sari_word": 36.5694,
"eval_sari_word_max_mean": 37.3238,
"eval_steps_per_second": 1.745,
"step": 396
},
{
"epoch": 12.0,
"eval_bleu": 0.0,
"eval_gen_len": 51.6945,
"eval_loss": 0.9026113748550415,
"eval_runtime": 24.8089,
"eval_samples_per_second": 15.438,
"eval_sari": 36.9476,
"eval_sari_char_max_mean": 38.1738,
"eval_sari_word": 35.9499,
"eval_sari_word_max_mean": 37.3238,
"eval_steps_per_second": 1.935,
"step": 432
},
{
"epoch": 13.0,
"eval_bleu": 0.0,
"eval_gen_len": 51.5483,
"eval_loss": 0.8756833672523499,
"eval_runtime": 25.8298,
"eval_samples_per_second": 14.828,
"eval_sari": 37.7806,
"eval_sari_char_max_mean": 38.1738,
"eval_sari_word": 36.8399,
"eval_sari_word_max_mean": 37.3238,
"eval_steps_per_second": 1.858,
"step": 468
},
{
"epoch": 14.0,
"eval_bleu": 0.0,
"eval_gen_len": 51.8851,
"eval_loss": 0.9621314406394958,
"eval_runtime": 30.1521,
"eval_samples_per_second": 12.702,
"eval_sari": 36.7856,
"eval_sari_char_max_mean": 38.1738,
"eval_sari_word": 35.6671,
"eval_sari_word_max_mean": 37.3238,
"eval_steps_per_second": 1.592,
"step": 504
},
{
"epoch": 15.0,
"eval_bleu": 0.0,
"eval_gen_len": 51.2089,
"eval_loss": 0.9188418984413147,
"eval_runtime": 26.8027,
"eval_samples_per_second": 14.29,
"eval_sari": 37.9733,
"eval_sari_char_max_mean": 38.1738,
"eval_sari_word": 36.9371,
"eval_sari_word_max_mean": 37.3238,
"eval_steps_per_second": 1.791,
"step": 540
},
{
"epoch": 16.0,
"eval_bleu": 0.0,
"eval_gen_len": 51.5718,
"eval_loss": 0.9339790940284729,
"eval_runtime": 24.4232,
"eval_samples_per_second": 15.682,
"eval_sari": 37.079,
"eval_sari_char_max_mean": 38.1738,
"eval_sari_word": 36.1099,
"eval_sari_word_max_mean": 37.3238,
"eval_steps_per_second": 1.965,
"step": 576
},
{
"epoch": 17.0,
"eval_bleu": 0.0,
"eval_gen_len": 51.4726,
"eval_loss": 0.9267087578773499,
"eval_runtime": 25.0217,
"eval_samples_per_second": 15.307,
"eval_sari": 37.4181,
"eval_sari_char_max_mean": 38.1738,
"eval_sari_word": 36.4381,
"eval_sari_word_max_mean": 37.3238,
"eval_steps_per_second": 1.918,
"step": 612
},
{
"epoch": 18.0,
"eval_bleu": 0.0,
"eval_gen_len": 51.859,
"eval_loss": 0.9749819040298462,
"eval_runtime": 24.5911,
"eval_samples_per_second": 15.575,
"eval_sari": 36.817,
"eval_sari_char_max_mean": 38.1738,
"eval_sari_word": 35.6851,
"eval_sari_word_max_mean": 37.3238,
"eval_steps_per_second": 1.952,
"step": 648
},
{
"epoch": 19.0,
"eval_bleu": 0.0,
"eval_gen_len": 51.8538,
"eval_loss": 0.940323531627655,
"eval_runtime": 24.7683,
"eval_samples_per_second": 15.463,
"eval_sari": 38.0236,
"eval_sari_char_max_mean": 38.1738,
"eval_sari_word": 37.0731,
"eval_sari_word_max_mean": 37.3238,
"eval_steps_per_second": 1.938,
"step": 684
},
{
"epoch": 20.0,
"eval_bleu": 0.0,
"eval_gen_len": 51.7154,
"eval_loss": 1.0106703042984009,
"eval_runtime": 24.8174,
"eval_samples_per_second": 15.433,
"eval_sari": 36.8683,
"eval_sari_char_max_mean": 38.1738,
"eval_sari_word": 35.7375,
"eval_sari_word_max_mean": 37.3238,
"eval_steps_per_second": 1.934,
"step": 720
},
{
"epoch": 21.0,
"eval_bleu": 0.0,
"eval_gen_len": 51.9008,
"eval_loss": 1.0132747888565063,
"eval_runtime": 24.8125,
"eval_samples_per_second": 15.436,
"eval_sari": 36.8178,
"eval_sari_char_max_mean": 38.1738,
"eval_sari_word": 35.671,
"eval_sari_word_max_mean": 37.3238,
"eval_steps_per_second": 1.935,
"step": 756
},
{
"epoch": 22.0,
"eval_bleu": 0.0,
"eval_gen_len": 51.5692,
"eval_loss": 0.9648149013519287,
"eval_runtime": 24.6869,
"eval_samples_per_second": 15.514,
"eval_sari": 37.9477,
"eval_sari_char_max_mean": 38.1738,
"eval_sari_word": 37.019,
"eval_sari_word_max_mean": 37.3238,
"eval_steps_per_second": 1.944,
"step": 792
},
{
"epoch": 23.0,
"eval_bleu": 0.0,
"eval_gen_len": 51.5718,
"eval_loss": 0.9694363474845886,
"eval_runtime": 24.9269,
"eval_samples_per_second": 15.365,
"eval_sari": 37.6716,
"eval_sari_char_max_mean": 38.1738,
"eval_sari_word": 36.4779,
"eval_sari_word_max_mean": 37.3238,
"eval_steps_per_second": 1.926,
"step": 828
},
{
"epoch": 24.0,
"eval_bleu": 0.0,
"eval_gen_len": 50.9687,
"eval_loss": 0.9936071038246155,
"eval_runtime": 24.1972,
"eval_samples_per_second": 15.828,
"eval_sari": 37.4252,
"eval_sari_char_max_mean": 38.1738,
"eval_sari_word": 36.3252,
"eval_sari_word_max_mean": 37.3238,
"eval_steps_per_second": 1.984,
"step": 864
},
{
"epoch": 25.0,
"eval_bleu": 0.0,
"eval_gen_len": 51.7598,
"eval_loss": 1.032216191291809,
"eval_runtime": 24.8777,
"eval_samples_per_second": 15.395,
"eval_sari": 36.8138,
"eval_sari_char_max_mean": 38.1738,
"eval_sari_word": 35.6419,
"eval_sari_word_max_mean": 37.3238,
"eval_steps_per_second": 1.929,
"step": 900
},
{
"epoch": 25.0,
"step": 900,
"total_flos": 576124735979520.0,
"train_loss": 0.1075565168592665,
"train_runtime": 728.2203,
"train_samples_per_second": 9.887,
"train_steps_per_second": 1.236
}
],
"max_steps": 900,
"num_train_epochs": 25,
"total_flos": 576124735979520.0,
"trial_name": null,
"trial_params": null
}