opus_big_lsp_AoN_wce / trainer_state.json
Ethan Sim
stage best lsp AoN adapt wce model
6f14f61
{
"best_metric": 43.7339,
"best_model_checkpoint": "opus_big_lsp_adapt_wce_prop_0.8_weight_1.75/checkpoint-80000",
"epoch": 1.9716574245224892,
"global_step": 80000,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.2,
"learning_rate": 1.9753635243376465e-05,
"loss": 0.1872,
"step": 8000
},
{
"epoch": 0.2,
"eval_bleu": 42.0446,
"eval_gen_len": 33.721,
"eval_loss": 0.09888985753059387,
"eval_runtime": 130.4471,
"eval_samples_per_second": 7.996,
"eval_steps_per_second": 0.506,
"step": 8000
},
{
"epoch": 0.39,
"learning_rate": 1.950723967960567e-05,
"loss": 0.1556,
"step": 16000
},
{
"epoch": 0.39,
"eval_bleu": 42.2512,
"eval_gen_len": 34.093,
"eval_loss": 0.09752173721790314,
"eval_runtime": 189.6683,
"eval_samples_per_second": 5.499,
"eval_steps_per_second": 0.348,
"step": 16000
},
{
"epoch": 0.59,
"learning_rate": 1.9260874922982132e-05,
"loss": 0.1511,
"step": 24000
},
{
"epoch": 0.59,
"eval_bleu": 42.4039,
"eval_gen_len": 33.7776,
"eval_loss": 0.09804832935333252,
"eval_runtime": 125.3237,
"eval_samples_per_second": 8.322,
"eval_steps_per_second": 0.527,
"step": 24000
},
{
"epoch": 0.79,
"learning_rate": 1.901447935921134e-05,
"loss": 0.1468,
"step": 32000
},
{
"epoch": 0.79,
"eval_bleu": 42.384,
"eval_gen_len": 34.0077,
"eval_loss": 0.09726251661777496,
"eval_runtime": 127.0486,
"eval_samples_per_second": 8.209,
"eval_steps_per_second": 0.519,
"step": 32000
},
{
"epoch": 0.99,
"learning_rate": 1.8768114602587803e-05,
"loss": 0.1453,
"step": 40000
},
{
"epoch": 0.99,
"eval_bleu": 42.5465,
"eval_gen_len": 34.2138,
"eval_loss": 0.09616752713918686,
"eval_runtime": 149.6762,
"eval_samples_per_second": 6.968,
"eval_steps_per_second": 0.441,
"step": 40000
},
{
"epoch": 1.18,
"learning_rate": 1.8521749845964266e-05,
"loss": 0.1292,
"step": 48000
},
{
"epoch": 1.18,
"eval_bleu": 42.7614,
"eval_gen_len": 33.6558,
"eval_loss": 0.09714562445878983,
"eval_runtime": 140.3139,
"eval_samples_per_second": 7.433,
"eval_steps_per_second": 0.47,
"step": 48000
},
{
"epoch": 1.38,
"learning_rate": 1.827535428219347e-05,
"loss": 0.1296,
"step": 56000
},
{
"epoch": 1.38,
"eval_bleu": 42.2625,
"eval_gen_len": 34.7987,
"eval_loss": 0.09766771644353867,
"eval_runtime": 149.3991,
"eval_samples_per_second": 6.981,
"eval_steps_per_second": 0.442,
"step": 56000
},
{
"epoch": 1.58,
"learning_rate": 1.8029020332717192e-05,
"loss": 0.1301,
"step": 64000
},
{
"epoch": 1.58,
"eval_bleu": 42.8366,
"eval_gen_len": 34.4842,
"eval_loss": 0.0971095860004425,
"eval_runtime": 150.9733,
"eval_samples_per_second": 6.909,
"eval_steps_per_second": 0.437,
"step": 64000
},
{
"epoch": 1.77,
"learning_rate": 1.7782624768946397e-05,
"loss": 0.1295,
"step": 72000
},
{
"epoch": 1.77,
"eval_bleu": 42.763,
"eval_gen_len": 34.9012,
"eval_loss": 0.09676354378461838,
"eval_runtime": 156.8388,
"eval_samples_per_second": 6.65,
"eval_steps_per_second": 0.421,
"step": 72000
},
{
"epoch": 1.97,
"learning_rate": 1.753626001232286e-05,
"loss": 0.1293,
"step": 80000
},
{
"epoch": 1.97,
"eval_bleu": 43.7339,
"eval_gen_len": 33.8734,
"eval_loss": 0.09614978730678558,
"eval_runtime": 162.1547,
"eval_samples_per_second": 6.432,
"eval_steps_per_second": 0.407,
"step": 80000
}
],
"max_steps": 649200,
"num_train_epochs": 16,
"total_flos": 2.5844916958632346e+17,
"trial_name": null,
"trial_params": null
}