opus_base_fine_freq_wce_unsampled / trainer_state.json
Ethan Sim
stage best unsampled glossary fine adapt wce model
f4001dc
{
"best_metric": 42.7722,
"best_model_checkpoint": "opus_base_adapt_wce_gloss_unsampled_precision_3_ubweight_1.25/checkpoint-80000",
"epoch": 3.943217665615142,
"global_step": 80000,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.2,
"learning_rate": 1.9753918572555207e-05,
"loss": 0.2076,
"step": 4000
},
{
"epoch": 0.2,
"eval_bleu": 35.6766,
"eval_gen_len": 39.8754,
"eval_loss": 0.10901036113500595,
"eval_runtime": 190.0619,
"eval_samples_per_second": 5.488,
"eval_steps_per_second": 0.174,
"step": 4000
},
{
"epoch": 0.39,
"learning_rate": 1.950746746845426e-05,
"loss": 0.1743,
"step": 8000
},
{
"epoch": 0.39,
"eval_bleu": 37.712,
"eval_gen_len": 37.931,
"eval_loss": 0.10654148459434509,
"eval_runtime": 187.1025,
"eval_samples_per_second": 5.574,
"eval_steps_per_second": 0.176,
"step": 8000
},
{
"epoch": 0.59,
"learning_rate": 1.9261016364353314e-05,
"loss": 0.1672,
"step": 12000
},
{
"epoch": 0.59,
"eval_bleu": 41.1339,
"eval_gen_len": 34.9118,
"eval_loss": 0.10531440377235413,
"eval_runtime": 148.6317,
"eval_samples_per_second": 7.017,
"eval_steps_per_second": 0.222,
"step": 12000
},
{
"epoch": 0.79,
"learning_rate": 1.9014565260252367e-05,
"loss": 0.1618,
"step": 16000
},
{
"epoch": 0.79,
"eval_bleu": 41.355,
"eval_gen_len": 35.2416,
"eval_loss": 0.10421621054410934,
"eval_runtime": 124.1237,
"eval_samples_per_second": 8.403,
"eval_steps_per_second": 0.266,
"step": 16000
},
{
"epoch": 0.99,
"learning_rate": 1.876811415615142e-05,
"loss": 0.1594,
"step": 20000
},
{
"epoch": 0.99,
"eval_bleu": 41.6651,
"eval_gen_len": 35.4228,
"eval_loss": 0.10366757214069366,
"eval_runtime": 159.7806,
"eval_samples_per_second": 6.528,
"eval_steps_per_second": 0.207,
"step": 20000
},
{
"epoch": 1.18,
"learning_rate": 1.8521786277602524e-05,
"loss": 0.1477,
"step": 24000
},
{
"epoch": 1.18,
"eval_bleu": 41.9522,
"eval_gen_len": 35.0451,
"eval_loss": 0.10360125452280045,
"eval_runtime": 138.0407,
"eval_samples_per_second": 7.556,
"eval_steps_per_second": 0.239,
"step": 24000
},
{
"epoch": 1.38,
"learning_rate": 1.827545839905363e-05,
"loss": 0.1475,
"step": 28000
},
{
"epoch": 1.38,
"eval_bleu": 40.2121,
"eval_gen_len": 36.4669,
"eval_loss": 0.10278471559286118,
"eval_runtime": 128.7397,
"eval_samples_per_second": 8.102,
"eval_steps_per_second": 0.256,
"step": 28000
},
{
"epoch": 1.58,
"learning_rate": 1.8029130520504733e-05,
"loss": 0.147,
"step": 32000
},
{
"epoch": 1.58,
"eval_bleu": 41.9375,
"eval_gen_len": 35.2301,
"eval_loss": 0.10222012549638748,
"eval_runtime": 132.4166,
"eval_samples_per_second": 7.877,
"eval_steps_per_second": 0.249,
"step": 32000
},
{
"epoch": 1.77,
"learning_rate": 1.7782802641955836e-05,
"loss": 0.1456,
"step": 36000
},
{
"epoch": 1.77,
"eval_bleu": 40.9742,
"eval_gen_len": 37.1965,
"eval_loss": 0.10183104127645493,
"eval_runtime": 119.5559,
"eval_samples_per_second": 8.724,
"eval_steps_per_second": 0.276,
"step": 36000
},
{
"epoch": 1.97,
"learning_rate": 1.7536474763406942e-05,
"loss": 0.1448,
"step": 40000
},
{
"epoch": 1.97,
"eval_bleu": 41.7528,
"eval_gen_len": 35.7248,
"eval_loss": 0.10195966809988022,
"eval_runtime": 123.0307,
"eval_samples_per_second": 8.478,
"eval_steps_per_second": 0.268,
"step": 40000
},
{
"epoch": 2.17,
"learning_rate": 1.7290146884858045e-05,
"loss": 0.1372,
"step": 44000
},
{
"epoch": 2.17,
"eval_bleu": 42.1909,
"eval_gen_len": 35.2924,
"eval_loss": 0.1022593304514885,
"eval_runtime": 159.0132,
"eval_samples_per_second": 6.559,
"eval_steps_per_second": 0.208,
"step": 44000
},
{
"epoch": 2.37,
"learning_rate": 1.7043757393533123e-05,
"loss": 0.1363,
"step": 48000
},
{
"epoch": 2.37,
"eval_bleu": 42.6162,
"eval_gen_len": 34.9866,
"eval_loss": 0.10216603428125381,
"eval_runtime": 116.2982,
"eval_samples_per_second": 8.968,
"eval_steps_per_second": 0.284,
"step": 48000
},
{
"epoch": 2.56,
"learning_rate": 1.679742951498423e-05,
"loss": 0.1352,
"step": 52000
},
{
"epoch": 2.56,
"eval_bleu": 42.4525,
"eval_gen_len": 34.8888,
"eval_loss": 0.10181364417076111,
"eval_runtime": 134.9256,
"eval_samples_per_second": 7.73,
"eval_steps_per_second": 0.245,
"step": 52000
},
{
"epoch": 2.76,
"learning_rate": 1.6551101636435333e-05,
"loss": 0.1355,
"step": 56000
},
{
"epoch": 2.76,
"eval_bleu": 41.9729,
"eval_gen_len": 35.9051,
"eval_loss": 0.10166899114847183,
"eval_runtime": 135.9018,
"eval_samples_per_second": 7.675,
"eval_steps_per_second": 0.243,
"step": 56000
},
{
"epoch": 2.96,
"learning_rate": 1.6304773757886436e-05,
"loss": 0.1358,
"step": 60000
},
{
"epoch": 2.96,
"eval_bleu": 42.3275,
"eval_gen_len": 34.8514,
"eval_loss": 0.10106752812862396,
"eval_runtime": 160.3679,
"eval_samples_per_second": 6.504,
"eval_steps_per_second": 0.206,
"step": 60000
},
{
"epoch": 3.15,
"learning_rate": 1.6058445879337542e-05,
"loss": 0.1294,
"step": 64000
},
{
"epoch": 3.15,
"eval_bleu": 42.2988,
"eval_gen_len": 34.8188,
"eval_loss": 0.10183101147413254,
"eval_runtime": 137.4561,
"eval_samples_per_second": 7.588,
"eval_steps_per_second": 0.24,
"step": 64000
},
{
"epoch": 3.35,
"learning_rate": 1.5812118000788645e-05,
"loss": 0.1289,
"step": 68000
},
{
"epoch": 3.35,
"eval_bleu": 42.7527,
"eval_gen_len": 34.7766,
"eval_loss": 0.10201847553253174,
"eval_runtime": 136.3856,
"eval_samples_per_second": 7.647,
"eval_steps_per_second": 0.242,
"step": 68000
},
{
"epoch": 3.55,
"learning_rate": 1.5565728509463723e-05,
"loss": 0.1277,
"step": 72000
},
{
"epoch": 3.55,
"eval_bleu": 42.3528,
"eval_gen_len": 35.2416,
"eval_loss": 0.10199479013681412,
"eval_runtime": 110.1197,
"eval_samples_per_second": 9.472,
"eval_steps_per_second": 0.3,
"step": 72000
},
{
"epoch": 3.75,
"learning_rate": 1.5319400630914826e-05,
"loss": 0.1282,
"step": 76000
},
{
"epoch": 3.75,
"eval_bleu": 42.4538,
"eval_gen_len": 35.4746,
"eval_loss": 0.10173474997282028,
"eval_runtime": 126.659,
"eval_samples_per_second": 8.235,
"eval_steps_per_second": 0.261,
"step": 76000
},
{
"epoch": 3.94,
"learning_rate": 1.5073072752365931e-05,
"loss": 0.129,
"step": 80000
},
{
"epoch": 3.94,
"eval_bleu": 42.7722,
"eval_gen_len": 34.8581,
"eval_loss": 0.10149160027503967,
"eval_runtime": 151.6781,
"eval_samples_per_second": 6.876,
"eval_steps_per_second": 0.218,
"step": 80000
}
],
"max_steps": 324608,
"num_train_epochs": 16,
"total_flos": 1.3796012702564352e+17,
"trial_name": null,
"trial_params": null
}