Ethan Sim
update model with 1.0 safeguard
e905ee4
{
"best_metric": 43.0883,
"best_model_checkpoint": "opus_big_ailem_adaptified/checkpoint-64000",
"epoch": 1.5773259396179915,
"global_step": 64000,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.2,
"learning_rate": 1.9753635243376465e-05,
"loss": 0.1745,
"step": 8000
},
{
"epoch": 0.2,
"eval_bleu": 42.0213,
"eval_gen_len": 33.5187,
"eval_loss": 0.0983896404504776,
"eval_runtime": 137.809,
"eval_samples_per_second": 7.568,
"eval_steps_per_second": 0.479,
"step": 8000
},
{
"epoch": 0.39,
"learning_rate": 1.9507270486752928e-05,
"loss": 0.144,
"step": 16000
},
{
"epoch": 0.39,
"eval_bleu": 42.5956,
"eval_gen_len": 34.3337,
"eval_loss": 0.09711522608995438,
"eval_runtime": 202.6277,
"eval_samples_per_second": 5.147,
"eval_steps_per_second": 0.326,
"step": 16000
},
{
"epoch": 0.59,
"learning_rate": 1.9260874922982132e-05,
"loss": 0.14,
"step": 24000
},
{
"epoch": 0.59,
"eval_bleu": 42.4234,
"eval_gen_len": 33.6222,
"eval_loss": 0.09710835665464401,
"eval_runtime": 146.6065,
"eval_samples_per_second": 7.114,
"eval_steps_per_second": 0.45,
"step": 24000
},
{
"epoch": 0.79,
"learning_rate": 1.90145101663586e-05,
"loss": 0.1363,
"step": 32000
},
{
"epoch": 0.79,
"eval_bleu": 43.0677,
"eval_gen_len": 33.6654,
"eval_loss": 0.0967707633972168,
"eval_runtime": 133.705,
"eval_samples_per_second": 7.801,
"eval_steps_per_second": 0.494,
"step": 32000
},
{
"epoch": 0.99,
"learning_rate": 1.876814540973506e-05,
"loss": 0.1349,
"step": 40000
},
{
"epoch": 0.99,
"eval_bleu": 42.7333,
"eval_gen_len": 34.8533,
"eval_loss": 0.09592821449041367,
"eval_runtime": 131.6304,
"eval_samples_per_second": 7.924,
"eval_steps_per_second": 0.501,
"step": 40000
},
{
"epoch": 1.18,
"learning_rate": 1.8521749845964266e-05,
"loss": 0.1201,
"step": 48000
},
{
"epoch": 1.18,
"eval_bleu": 42.8479,
"eval_gen_len": 34.2771,
"eval_loss": 0.09712815284729004,
"eval_runtime": 129.1233,
"eval_samples_per_second": 8.078,
"eval_steps_per_second": 0.511,
"step": 48000
},
{
"epoch": 1.38,
"learning_rate": 1.827535428219347e-05,
"loss": 0.1205,
"step": 56000
},
{
"epoch": 1.38,
"eval_bleu": 42.4623,
"eval_gen_len": 34.1122,
"eval_loss": 0.09739168733358383,
"eval_runtime": 183.6808,
"eval_samples_per_second": 5.678,
"eval_steps_per_second": 0.359,
"step": 56000
},
{
"epoch": 1.58,
"learning_rate": 1.8029020332717192e-05,
"loss": 0.1211,
"step": 64000
},
{
"epoch": 1.58,
"eval_bleu": 43.0883,
"eval_gen_len": 34.9319,
"eval_loss": 0.09650042653083801,
"eval_runtime": 173.7588,
"eval_samples_per_second": 6.003,
"eval_steps_per_second": 0.38,
"step": 64000
}
],
"max_steps": 649200,
"num_train_epochs": 16,
"total_flos": 2.0676742327723622e+17,
"trial_name": null,
"trial_params": null
}