Ethan Sim
stage best unsampled glossary simple adapt wce model
21d0b63
{
"best_metric": 43.6441,
"best_model_checkpoint": "opus_big_adapt_wce_gloss_unsampled_bands_7_ubweight_1.5/checkpoint-80000",
"epoch": 1.9716574245224892,
"global_step": 80000,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.2,
"learning_rate": 1.9753635243376465e-05,
"loss": 0.1779,
"step": 8000
},
{
"epoch": 0.2,
"eval_bleu": 42.3466,
"eval_gen_len": 34.1314,
"eval_loss": 0.09819881618022919,
"eval_runtime": 147.9956,
"eval_samples_per_second": 7.048,
"eval_steps_per_second": 0.446,
"step": 8000
},
{
"epoch": 0.39,
"learning_rate": 1.9507270486752928e-05,
"loss": 0.1471,
"step": 16000
},
{
"epoch": 0.39,
"eval_bleu": 42.6127,
"eval_gen_len": 34.3682,
"eval_loss": 0.09763215482234955,
"eval_runtime": 92.3945,
"eval_samples_per_second": 11.289,
"eval_steps_per_second": 0.714,
"step": 16000
},
{
"epoch": 0.59,
"learning_rate": 1.9260844115834877e-05,
"loss": 0.143,
"step": 24000
},
{
"epoch": 0.59,
"eval_bleu": 42.3437,
"eval_gen_len": 33.6031,
"eval_loss": 0.09766430407762527,
"eval_runtime": 84.1375,
"eval_samples_per_second": 12.396,
"eval_steps_per_second": 0.784,
"step": 24000
},
{
"epoch": 0.79,
"learning_rate": 1.901447935921134e-05,
"loss": 0.1391,
"step": 32000
},
{
"epoch": 0.79,
"eval_bleu": 42.6785,
"eval_gen_len": 33.7076,
"eval_loss": 0.0967756137251854,
"eval_runtime": 80.1108,
"eval_samples_per_second": 13.019,
"eval_steps_per_second": 0.824,
"step": 32000
},
{
"epoch": 0.99,
"learning_rate": 1.8768083795440544e-05,
"loss": 0.1377,
"step": 40000
},
{
"epoch": 0.99,
"eval_bleu": 42.593,
"eval_gen_len": 34.2493,
"eval_loss": 0.09624858945608139,
"eval_runtime": 89.2577,
"eval_samples_per_second": 11.685,
"eval_steps_per_second": 0.739,
"step": 40000
},
{
"epoch": 1.18,
"learning_rate": 1.8521749845964266e-05,
"loss": 0.1226,
"step": 48000
},
{
"epoch": 1.18,
"eval_bleu": 43.0072,
"eval_gen_len": 33.9262,
"eval_loss": 0.09690112620592117,
"eval_runtime": 81.2861,
"eval_samples_per_second": 12.831,
"eval_steps_per_second": 0.812,
"step": 48000
},
{
"epoch": 1.38,
"learning_rate": 1.827535428219347e-05,
"loss": 0.1229,
"step": 56000
},
{
"epoch": 1.38,
"eval_bleu": 42.0603,
"eval_gen_len": 34.1515,
"eval_loss": 0.09700236469507217,
"eval_runtime": 106.3162,
"eval_samples_per_second": 9.81,
"eval_steps_per_second": 0.621,
"step": 56000
},
{
"epoch": 1.58,
"learning_rate": 1.8028989525569933e-05,
"loss": 0.1235,
"step": 64000
},
{
"epoch": 1.58,
"eval_bleu": 43.1273,
"eval_gen_len": 34.5503,
"eval_loss": 0.09687265008687973,
"eval_runtime": 104.2899,
"eval_samples_per_second": 10.001,
"eval_steps_per_second": 0.633,
"step": 64000
},
{
"epoch": 1.77,
"learning_rate": 1.7782655576093655e-05,
"loss": 0.123,
"step": 72000
},
{
"epoch": 1.77,
"eval_bleu": 42.8922,
"eval_gen_len": 34.3643,
"eval_loss": 0.09673970192670822,
"eval_runtime": 85.0152,
"eval_samples_per_second": 12.268,
"eval_steps_per_second": 0.776,
"step": 72000
},
{
"epoch": 1.97,
"learning_rate": 1.753626001232286e-05,
"loss": 0.1227,
"step": 80000
},
{
"epoch": 1.97,
"eval_bleu": 43.6441,
"eval_gen_len": 33.999,
"eval_loss": 0.09602336585521698,
"eval_runtime": 92.3918,
"eval_samples_per_second": 11.289,
"eval_steps_per_second": 0.714,
"step": 80000
}
],
"max_steps": 649200,
"num_train_epochs": 16,
"total_flos": 2.5844916958632346e+17,
"trial_name": null,
"trial_params": null
}