opus_big_simple_freq_wce / trainer_state.json
Ethan Sim
restage best simple adapt wce model
996d860
{
"best_metric": 43.8722,
"best_model_checkpoint": "opus_big_adapt_wce_gloss_train-sampled_bands_5_ubweight_1.75/checkpoint-80000",
"epoch": 1.9716574245224892,
"global_step": 80000,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.2,
"learning_rate": 1.9753635243376465e-05,
"loss": 0.1776,
"step": 8000
},
{
"epoch": 0.2,
"eval_bleu": 42.0642,
"eval_gen_len": 33.7881,
"eval_loss": 0.09869997948408127,
"eval_runtime": 84.9196,
"eval_samples_per_second": 12.282,
"eval_steps_per_second": 0.777,
"step": 8000
},
{
"epoch": 0.39,
"learning_rate": 1.950723967960567e-05,
"loss": 0.1467,
"step": 16000
},
{
"epoch": 0.39,
"eval_bleu": 41.7526,
"eval_gen_len": 34.7795,
"eval_loss": 0.09779118001461029,
"eval_runtime": 93.0132,
"eval_samples_per_second": 11.213,
"eval_steps_per_second": 0.71,
"step": 16000
},
{
"epoch": 0.59,
"learning_rate": 1.926090573012939e-05,
"loss": 0.1426,
"step": 24000
},
{
"epoch": 0.59,
"eval_bleu": 42.0584,
"eval_gen_len": 33.464,
"eval_loss": 0.09806442260742188,
"eval_runtime": 80.6234,
"eval_samples_per_second": 12.937,
"eval_steps_per_second": 0.819,
"step": 24000
},
{
"epoch": 0.79,
"learning_rate": 1.9014540973505854e-05,
"loss": 0.1387,
"step": 32000
},
{
"epoch": 0.79,
"eval_bleu": 42.6266,
"eval_gen_len": 33.5254,
"eval_loss": 0.09653611481189728,
"eval_runtime": 81.9234,
"eval_samples_per_second": 12.731,
"eval_steps_per_second": 0.806,
"step": 32000
},
{
"epoch": 0.99,
"learning_rate": 1.876814540973506e-05,
"loss": 0.1373,
"step": 40000
},
{
"epoch": 0.99,
"eval_bleu": 42.6264,
"eval_gen_len": 33.9195,
"eval_loss": 0.09597118198871613,
"eval_runtime": 81.6116,
"eval_samples_per_second": 12.78,
"eval_steps_per_second": 0.809,
"step": 40000
},
{
"epoch": 1.18,
"learning_rate": 1.852178065311152e-05,
"loss": 0.1222,
"step": 48000
},
{
"epoch": 1.18,
"eval_bleu": 42.7749,
"eval_gen_len": 34.1208,
"eval_loss": 0.09734106063842773,
"eval_runtime": 94.7556,
"eval_samples_per_second": 11.007,
"eval_steps_per_second": 0.697,
"step": 48000
},
{
"epoch": 1.38,
"learning_rate": 1.8275415896487988e-05,
"loss": 0.1226,
"step": 56000
},
{
"epoch": 1.38,
"eval_bleu": 42.687,
"eval_gen_len": 34.1697,
"eval_loss": 0.09724980592727661,
"eval_runtime": 191.9924,
"eval_samples_per_second": 5.433,
"eval_steps_per_second": 0.344,
"step": 56000
},
{
"epoch": 1.58,
"learning_rate": 1.802905113986445e-05,
"loss": 0.1233,
"step": 64000
},
{
"epoch": 1.58,
"eval_bleu": 42.9609,
"eval_gen_len": 34.3432,
"eval_loss": 0.09664924442768097,
"eval_runtime": 125.3853,
"eval_samples_per_second": 8.318,
"eval_steps_per_second": 0.526,
"step": 64000
},
{
"epoch": 1.77,
"learning_rate": 1.7782717190388173e-05,
"loss": 0.1227,
"step": 72000
},
{
"epoch": 1.77,
"eval_bleu": 42.7711,
"eval_gen_len": 34.1985,
"eval_loss": 0.09645862132310867,
"eval_runtime": 85.6207,
"eval_samples_per_second": 12.182,
"eval_steps_per_second": 0.771,
"step": 72000
},
{
"epoch": 1.97,
"learning_rate": 1.7536321626617377e-05,
"loss": 0.1224,
"step": 80000
},
{
"epoch": 1.97,
"eval_bleu": 43.8722,
"eval_gen_len": 33.5513,
"eval_loss": 0.09601087123155594,
"eval_runtime": 85.5385,
"eval_samples_per_second": 12.193,
"eval_steps_per_second": 0.772,
"step": 80000
}
],
"max_steps": 649200,
"num_train_epochs": 16,
"total_flos": 2.5844916958632346e+17,
"trial_name": null,
"trial_params": null
}