Ethan Sim
update model with 1.0 safeguard
0cbff79
{
"best_metric": 42.5239,
"best_model_checkpoint": "opus_base_ailem_random/checkpoint-68000",
"epoch": 3.3517350157728707,
"global_step": 68000,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.2,
"learning_rate": 1.9753918572555207e-05,
"loss": 0.209,
"step": 4000
},
{
"epoch": 0.2,
"eval_bleu": 35.7559,
"eval_gen_len": 39.7057,
"eval_loss": 0.10894892364740372,
"eval_runtime": 204.0367,
"eval_samples_per_second": 5.112,
"eval_steps_per_second": 0.162,
"step": 4000
},
{
"epoch": 0.39,
"learning_rate": 1.950746746845426e-05,
"loss": 0.1757,
"step": 8000
},
{
"epoch": 0.39,
"eval_bleu": 36.6369,
"eval_gen_len": 39.395,
"eval_loss": 0.10665024816989899,
"eval_runtime": 169.0161,
"eval_samples_per_second": 6.171,
"eval_steps_per_second": 0.195,
"step": 8000
},
{
"epoch": 0.59,
"learning_rate": 1.9261016364353314e-05,
"loss": 0.1686,
"step": 12000
},
{
"epoch": 0.59,
"eval_bleu": 41.2508,
"eval_gen_len": 35.1687,
"eval_loss": 0.10527685284614563,
"eval_runtime": 161.3856,
"eval_samples_per_second": 6.463,
"eval_steps_per_second": 0.204,
"step": 12000
},
{
"epoch": 0.79,
"learning_rate": 1.9014565260252367e-05,
"loss": 0.1631,
"step": 16000
},
{
"epoch": 0.79,
"eval_bleu": 41.4292,
"eval_gen_len": 35.1361,
"eval_loss": 0.104288749396801,
"eval_runtime": 136.8172,
"eval_samples_per_second": 7.623,
"eval_steps_per_second": 0.241,
"step": 16000
},
{
"epoch": 0.99,
"learning_rate": 1.876811415615142e-05,
"loss": 0.1608,
"step": 20000
},
{
"epoch": 0.99,
"eval_bleu": 41.7716,
"eval_gen_len": 35.5177,
"eval_loss": 0.10370208323001862,
"eval_runtime": 158.9983,
"eval_samples_per_second": 6.56,
"eval_steps_per_second": 0.208,
"step": 20000
},
{
"epoch": 1.18,
"learning_rate": 1.8521786277602524e-05,
"loss": 0.149,
"step": 24000
},
{
"epoch": 1.18,
"eval_bleu": 40.6317,
"eval_gen_len": 36.8993,
"eval_loss": 0.10362833738327026,
"eval_runtime": 143.5271,
"eval_samples_per_second": 7.267,
"eval_steps_per_second": 0.23,
"step": 24000
},
{
"epoch": 1.38,
"learning_rate": 1.827545839905363e-05,
"loss": 0.1488,
"step": 28000
},
{
"epoch": 1.38,
"eval_bleu": 41.8852,
"eval_gen_len": 35.3174,
"eval_loss": 0.10282401740550995,
"eval_runtime": 137.168,
"eval_samples_per_second": 7.604,
"eval_steps_per_second": 0.241,
"step": 28000
},
{
"epoch": 1.58,
"learning_rate": 1.8029130520504733e-05,
"loss": 0.1484,
"step": 32000
},
{
"epoch": 1.58,
"eval_bleu": 41.5392,
"eval_gen_len": 35.9501,
"eval_loss": 0.10223321616649628,
"eval_runtime": 133.1868,
"eval_samples_per_second": 7.831,
"eval_steps_per_second": 0.248,
"step": 32000
},
{
"epoch": 1.77,
"learning_rate": 1.7782802641955836e-05,
"loss": 0.147,
"step": 36000
},
{
"epoch": 1.77,
"eval_bleu": 40.5681,
"eval_gen_len": 37.5935,
"eval_loss": 0.10191329568624496,
"eval_runtime": 125.5047,
"eval_samples_per_second": 8.31,
"eval_steps_per_second": 0.263,
"step": 36000
},
{
"epoch": 1.97,
"learning_rate": 1.7536474763406942e-05,
"loss": 0.1461,
"step": 40000
},
{
"epoch": 1.97,
"eval_bleu": 42.3964,
"eval_gen_len": 34.768,
"eval_loss": 0.10204007476568222,
"eval_runtime": 124.2655,
"eval_samples_per_second": 8.393,
"eval_steps_per_second": 0.266,
"step": 40000
},
{
"epoch": 2.17,
"learning_rate": 1.7290146884858045e-05,
"loss": 0.1386,
"step": 44000
},
{
"epoch": 2.17,
"eval_bleu": 40.217,
"eval_gen_len": 37.1946,
"eval_loss": 0.10240339487791061,
"eval_runtime": 180.618,
"eval_samples_per_second": 5.775,
"eval_steps_per_second": 0.183,
"step": 44000
},
{
"epoch": 2.37,
"learning_rate": 1.7043757393533123e-05,
"loss": 0.1376,
"step": 48000
},
{
"epoch": 2.37,
"eval_bleu": 41.5097,
"eval_gen_len": 36.3797,
"eval_loss": 0.1021459624171257,
"eval_runtime": 149.0335,
"eval_samples_per_second": 6.998,
"eval_steps_per_second": 0.221,
"step": 48000
},
{
"epoch": 2.56,
"learning_rate": 1.6797491127760255e-05,
"loss": 0.1366,
"step": 52000
},
{
"epoch": 2.56,
"eval_bleu": 42.2181,
"eval_gen_len": 35.0019,
"eval_loss": 0.10173720866441727,
"eval_runtime": 168.836,
"eval_samples_per_second": 6.178,
"eval_steps_per_second": 0.195,
"step": 52000
},
{
"epoch": 2.76,
"learning_rate": 1.6551101636435333e-05,
"loss": 0.1368,
"step": 56000
},
{
"epoch": 2.76,
"eval_bleu": 41.65,
"eval_gen_len": 36.2694,
"eval_loss": 0.10165542364120483,
"eval_runtime": 181.2512,
"eval_samples_per_second": 5.754,
"eval_steps_per_second": 0.182,
"step": 56000
},
{
"epoch": 2.96,
"learning_rate": 1.630471214511041e-05,
"loss": 0.1371,
"step": 60000
},
{
"epoch": 2.96,
"eval_bleu": 42.5203,
"eval_gen_len": 34.1112,
"eval_loss": 0.10101941227912903,
"eval_runtime": 117.7958,
"eval_samples_per_second": 8.854,
"eval_steps_per_second": 0.28,
"step": 60000
},
{
"epoch": 3.15,
"learning_rate": 1.6058384266561514e-05,
"loss": 0.1308,
"step": 64000
},
{
"epoch": 3.15,
"eval_bleu": 42.2011,
"eval_gen_len": 35.1908,
"eval_loss": 0.10181207209825516,
"eval_runtime": 136.2775,
"eval_samples_per_second": 7.654,
"eval_steps_per_second": 0.242,
"step": 64000
},
{
"epoch": 3.35,
"learning_rate": 1.581205638801262e-05,
"loss": 0.1302,
"step": 68000
},
{
"epoch": 3.35,
"eval_bleu": 42.5239,
"eval_gen_len": 34.8715,
"eval_loss": 0.10198543220758438,
"eval_runtime": 134.6516,
"eval_samples_per_second": 7.746,
"eval_steps_per_second": 0.245,
"step": 68000
}
],
"max_steps": 324608,
"num_train_epochs": 16,
"total_flos": 1.1723875993622938e+17,
"trial_name": null,
"trial_params": null
}