opus_big_AoN_freq_wce / trainer_state.json
Ethan Sim
stage best AoN wce model
6a41f33
{
"best_metric": 43.6856,
"best_model_checkpoint": "opus_big_adapt_wce_gloss_train-sampled_prop_0.4_weight_1.25/checkpoint-80000",
"epoch": 1.9716574245224892,
"global_step": 80000,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.2,
"learning_rate": 1.9753635243376465e-05,
"loss": 0.174,
"step": 8000
},
{
"epoch": 0.2,
"eval_bleu": 42.2597,
"eval_gen_len": 33.4382,
"eval_loss": 0.09827317297458649,
"eval_runtime": 183.2558,
"eval_samples_per_second": 5.691,
"eval_steps_per_second": 0.36,
"step": 8000
},
{
"epoch": 0.39,
"learning_rate": 1.9507208872458414e-05,
"loss": 0.1435,
"step": 16000
},
{
"epoch": 0.39,
"eval_bleu": 42.5721,
"eval_gen_len": 33.8734,
"eval_loss": 0.09749287366867065,
"eval_runtime": 109.4359,
"eval_samples_per_second": 9.531,
"eval_steps_per_second": 0.603,
"step": 16000
},
{
"epoch": 0.59,
"learning_rate": 1.9260844115834877e-05,
"loss": 0.1396,
"step": 24000
},
{
"epoch": 0.59,
"eval_bleu": 42.6567,
"eval_gen_len": 33.2752,
"eval_loss": 0.09769554436206818,
"eval_runtime": 90.0599,
"eval_samples_per_second": 11.581,
"eval_steps_per_second": 0.733,
"step": 24000
},
{
"epoch": 0.79,
"learning_rate": 1.901447935921134e-05,
"loss": 0.1358,
"step": 32000
},
{
"epoch": 0.79,
"eval_bleu": 43.0527,
"eval_gen_len": 33.9578,
"eval_loss": 0.09660623222589493,
"eval_runtime": 89.9683,
"eval_samples_per_second": 11.593,
"eval_steps_per_second": 0.734,
"step": 32000
},
{
"epoch": 0.99,
"learning_rate": 1.8768114602587803e-05,
"loss": 0.1344,
"step": 40000
},
{
"epoch": 0.99,
"eval_bleu": 42.7129,
"eval_gen_len": 34.2579,
"eval_loss": 0.09575439989566803,
"eval_runtime": 127.2678,
"eval_samples_per_second": 8.195,
"eval_steps_per_second": 0.519,
"step": 40000
},
{
"epoch": 1.18,
"learning_rate": 1.8521749845964266e-05,
"loss": 0.1197,
"step": 48000
},
{
"epoch": 1.18,
"eval_bleu": 42.7917,
"eval_gen_len": 34.2752,
"eval_loss": 0.09715954214334488,
"eval_runtime": 88.6557,
"eval_samples_per_second": 11.765,
"eval_steps_per_second": 0.744,
"step": 48000
},
{
"epoch": 1.38,
"learning_rate": 1.827538508934073e-05,
"loss": 0.1201,
"step": 56000
},
{
"epoch": 1.38,
"eval_bleu": 42.5384,
"eval_gen_len": 33.8543,
"eval_loss": 0.09737677872180939,
"eval_runtime": 113.0488,
"eval_samples_per_second": 9.226,
"eval_steps_per_second": 0.584,
"step": 56000
},
{
"epoch": 1.58,
"learning_rate": 1.8029020332717192e-05,
"loss": 0.1207,
"step": 64000
},
{
"epoch": 1.58,
"eval_bleu": 43.2782,
"eval_gen_len": 34.1745,
"eval_loss": 0.09611953049898148,
"eval_runtime": 87.1617,
"eval_samples_per_second": 11.966,
"eval_steps_per_second": 0.757,
"step": 64000
},
{
"epoch": 1.77,
"learning_rate": 1.778268638324091e-05,
"loss": 0.1202,
"step": 72000
},
{
"epoch": 1.77,
"eval_bleu": 43.0269,
"eval_gen_len": 34.255,
"eval_loss": 0.09703850746154785,
"eval_runtime": 86.3451,
"eval_samples_per_second": 12.079,
"eval_steps_per_second": 0.764,
"step": 72000
},
{
"epoch": 1.97,
"learning_rate": 1.7536321626617377e-05,
"loss": 0.1199,
"step": 80000
},
{
"epoch": 1.97,
"eval_bleu": 43.6856,
"eval_gen_len": 34.0153,
"eval_loss": 0.09606289863586426,
"eval_runtime": 81.7494,
"eval_samples_per_second": 12.759,
"eval_steps_per_second": 0.807,
"step": 80000
}
],
"max_steps": 649200,
"num_train_epochs": 16,
"total_flos": 2.5844916958632346e+17,
"trial_name": null,
"trial_params": null
}