Ethan Sim
update model with 1.0 safeguard
4895dae
{
"best_metric": 43.3155,
"best_model_checkpoint": "opus_big_ailem_random/checkpoint-80000",
"epoch": 1.9716574245224892,
"global_step": 80000,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.2,
"learning_rate": 1.9753635243376465e-05,
"loss": 0.1794,
"step": 8000
},
{
"epoch": 0.2,
"eval_bleu": 42.1115,
"eval_gen_len": 33.8782,
"eval_loss": 0.09834092110395432,
"eval_runtime": 120.1201,
"eval_samples_per_second": 8.683,
"eval_steps_per_second": 0.549,
"step": 8000
},
{
"epoch": 0.39,
"learning_rate": 1.950723967960567e-05,
"loss": 0.1487,
"step": 16000
},
{
"epoch": 0.39,
"eval_bleu": 42.5044,
"eval_gen_len": 34.0422,
"eval_loss": 0.09755747765302658,
"eval_runtime": 108.595,
"eval_samples_per_second": 9.604,
"eval_steps_per_second": 0.608,
"step": 16000
},
{
"epoch": 0.59,
"learning_rate": 1.926090573012939e-05,
"loss": 0.1445,
"step": 24000
},
{
"epoch": 0.59,
"eval_bleu": 42.1738,
"eval_gen_len": 33.3921,
"eval_loss": 0.09764789789915085,
"eval_runtime": 129.8931,
"eval_samples_per_second": 8.03,
"eval_steps_per_second": 0.508,
"step": 24000
},
{
"epoch": 0.79,
"learning_rate": 1.90145101663586e-05,
"loss": 0.1407,
"step": 32000
},
{
"epoch": 0.79,
"eval_bleu": 42.6761,
"eval_gen_len": 33.8092,
"eval_loss": 0.09669991582632065,
"eval_runtime": 115.0248,
"eval_samples_per_second": 9.068,
"eval_steps_per_second": 0.574,
"step": 32000
},
{
"epoch": 0.99,
"learning_rate": 1.876814540973506e-05,
"loss": 0.1392,
"step": 40000
},
{
"epoch": 0.99,
"eval_bleu": 42.5089,
"eval_gen_len": 33.814,
"eval_loss": 0.09582150727510452,
"eval_runtime": 115.0911,
"eval_samples_per_second": 9.062,
"eval_steps_per_second": 0.573,
"step": 40000
},
{
"epoch": 1.18,
"learning_rate": 1.852178065311152e-05,
"loss": 0.1241,
"step": 48000
},
{
"epoch": 1.18,
"eval_bleu": 42.6428,
"eval_gen_len": 33.954,
"eval_loss": 0.09729909896850586,
"eval_runtime": 116.307,
"eval_samples_per_second": 8.968,
"eval_steps_per_second": 0.567,
"step": 48000
},
{
"epoch": 1.38,
"learning_rate": 1.8275446703635243e-05,
"loss": 0.1244,
"step": 56000
},
{
"epoch": 1.38,
"eval_bleu": 42.5021,
"eval_gen_len": 33.7661,
"eval_loss": 0.09766314178705215,
"eval_runtime": 141.066,
"eval_samples_per_second": 7.394,
"eval_steps_per_second": 0.468,
"step": 56000
},
{
"epoch": 1.58,
"learning_rate": 1.802908194701171e-05,
"loss": 0.125,
"step": 64000
},
{
"epoch": 1.58,
"eval_bleu": 42.8416,
"eval_gen_len": 33.999,
"eval_loss": 0.09677453339099884,
"eval_runtime": 152.4751,
"eval_samples_per_second": 6.84,
"eval_steps_per_second": 0.433,
"step": 64000
},
{
"epoch": 1.77,
"learning_rate": 1.7782717190388173e-05,
"loss": 0.1245,
"step": 72000
},
{
"epoch": 1.77,
"eval_bleu": 42.797,
"eval_gen_len": 34.5053,
"eval_loss": 0.09712178260087967,
"eval_runtime": 129.5634,
"eval_samples_per_second": 8.05,
"eval_steps_per_second": 0.509,
"step": 72000
},
{
"epoch": 1.97,
"learning_rate": 1.7536321626617377e-05,
"loss": 0.1242,
"step": 80000
},
{
"epoch": 1.97,
"eval_bleu": 43.3155,
"eval_gen_len": 34.0355,
"eval_loss": 0.09631923586130142,
"eval_runtime": 129.8003,
"eval_samples_per_second": 8.035,
"eval_steps_per_second": 0.508,
"step": 80000
}
],
"max_steps": 649200,
"num_train_epochs": 16,
"total_flos": 2.5844916958632346e+17,
"trial_name": null,
"trial_params": null
}