{ "best_metric": 42.5239, "best_model_checkpoint": "opus_base_ailem_random/checkpoint-68000", "epoch": 3.3517350157728707, "global_step": 68000, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.2, "learning_rate": 1.9753918572555207e-05, "loss": 0.209, "step": 4000 }, { "epoch": 0.2, "eval_bleu": 35.7559, "eval_gen_len": 39.7057, "eval_loss": 0.10894892364740372, "eval_runtime": 204.0367, "eval_samples_per_second": 5.112, "eval_steps_per_second": 0.162, "step": 4000 }, { "epoch": 0.39, "learning_rate": 1.950746746845426e-05, "loss": 0.1757, "step": 8000 }, { "epoch": 0.39, "eval_bleu": 36.6369, "eval_gen_len": 39.395, "eval_loss": 0.10665024816989899, "eval_runtime": 169.0161, "eval_samples_per_second": 6.171, "eval_steps_per_second": 0.195, "step": 8000 }, { "epoch": 0.59, "learning_rate": 1.9261016364353314e-05, "loss": 0.1686, "step": 12000 }, { "epoch": 0.59, "eval_bleu": 41.2508, "eval_gen_len": 35.1687, "eval_loss": 0.10527685284614563, "eval_runtime": 161.3856, "eval_samples_per_second": 6.463, "eval_steps_per_second": 0.204, "step": 12000 }, { "epoch": 0.79, "learning_rate": 1.9014565260252367e-05, "loss": 0.1631, "step": 16000 }, { "epoch": 0.79, "eval_bleu": 41.4292, "eval_gen_len": 35.1361, "eval_loss": 0.104288749396801, "eval_runtime": 136.8172, "eval_samples_per_second": 7.623, "eval_steps_per_second": 0.241, "step": 16000 }, { "epoch": 0.99, "learning_rate": 1.876811415615142e-05, "loss": 0.1608, "step": 20000 }, { "epoch": 0.99, "eval_bleu": 41.7716, "eval_gen_len": 35.5177, "eval_loss": 0.10370208323001862, "eval_runtime": 158.9983, "eval_samples_per_second": 6.56, "eval_steps_per_second": 0.208, "step": 20000 }, { "epoch": 1.18, "learning_rate": 1.8521786277602524e-05, "loss": 0.149, "step": 24000 }, { "epoch": 1.18, "eval_bleu": 40.6317, "eval_gen_len": 36.8993, "eval_loss": 0.10362833738327026, "eval_runtime": 143.5271, "eval_samples_per_second": 7.267, "eval_steps_per_second": 0.23, "step": 24000 }, { "epoch": 1.38, "learning_rate": 1.827545839905363e-05, "loss": 0.1488, "step": 28000 }, { "epoch": 1.38, "eval_bleu": 41.8852, "eval_gen_len": 35.3174, "eval_loss": 0.10282401740550995, "eval_runtime": 137.168, "eval_samples_per_second": 7.604, "eval_steps_per_second": 0.241, "step": 28000 }, { "epoch": 1.58, "learning_rate": 1.8029130520504733e-05, "loss": 0.1484, "step": 32000 }, { "epoch": 1.58, "eval_bleu": 41.5392, "eval_gen_len": 35.9501, "eval_loss": 0.10223321616649628, "eval_runtime": 133.1868, "eval_samples_per_second": 7.831, "eval_steps_per_second": 0.248, "step": 32000 }, { "epoch": 1.77, "learning_rate": 1.7782802641955836e-05, "loss": 0.147, "step": 36000 }, { "epoch": 1.77, "eval_bleu": 40.5681, "eval_gen_len": 37.5935, "eval_loss": 0.10191329568624496, "eval_runtime": 125.5047, "eval_samples_per_second": 8.31, "eval_steps_per_second": 0.263, "step": 36000 }, { "epoch": 1.97, "learning_rate": 1.7536474763406942e-05, "loss": 0.1461, "step": 40000 }, { "epoch": 1.97, "eval_bleu": 42.3964, "eval_gen_len": 34.768, "eval_loss": 0.10204007476568222, "eval_runtime": 124.2655, "eval_samples_per_second": 8.393, "eval_steps_per_second": 0.266, "step": 40000 }, { "epoch": 2.17, "learning_rate": 1.7290146884858045e-05, "loss": 0.1386, "step": 44000 }, { "epoch": 2.17, "eval_bleu": 40.217, "eval_gen_len": 37.1946, "eval_loss": 0.10240339487791061, "eval_runtime": 180.618, "eval_samples_per_second": 5.775, "eval_steps_per_second": 0.183, "step": 44000 }, { "epoch": 2.37, "learning_rate": 1.7043757393533123e-05, "loss": 0.1376, "step": 48000 }, { "epoch": 2.37, "eval_bleu": 41.5097, "eval_gen_len": 36.3797, "eval_loss": 0.1021459624171257, "eval_runtime": 149.0335, "eval_samples_per_second": 6.998, "eval_steps_per_second": 0.221, "step": 48000 }, { "epoch": 2.56, "learning_rate": 1.6797491127760255e-05, "loss": 0.1366, "step": 52000 }, { "epoch": 2.56, "eval_bleu": 42.2181, "eval_gen_len": 35.0019, "eval_loss": 0.10173720866441727, "eval_runtime": 168.836, "eval_samples_per_second": 6.178, "eval_steps_per_second": 0.195, "step": 52000 }, { "epoch": 2.76, "learning_rate": 1.6551101636435333e-05, "loss": 0.1368, "step": 56000 }, { "epoch": 2.76, "eval_bleu": 41.65, "eval_gen_len": 36.2694, "eval_loss": 0.10165542364120483, "eval_runtime": 181.2512, "eval_samples_per_second": 5.754, "eval_steps_per_second": 0.182, "step": 56000 }, { "epoch": 2.96, "learning_rate": 1.630471214511041e-05, "loss": 0.1371, "step": 60000 }, { "epoch": 2.96, "eval_bleu": 42.5203, "eval_gen_len": 34.1112, "eval_loss": 0.10101941227912903, "eval_runtime": 117.7958, "eval_samples_per_second": 8.854, "eval_steps_per_second": 0.28, "step": 60000 }, { "epoch": 3.15, "learning_rate": 1.6058384266561514e-05, "loss": 0.1308, "step": 64000 }, { "epoch": 3.15, "eval_bleu": 42.2011, "eval_gen_len": 35.1908, "eval_loss": 0.10181207209825516, "eval_runtime": 136.2775, "eval_samples_per_second": 7.654, "eval_steps_per_second": 0.242, "step": 64000 }, { "epoch": 3.35, "learning_rate": 1.581205638801262e-05, "loss": 0.1302, "step": 68000 }, { "epoch": 3.35, "eval_bleu": 42.5239, "eval_gen_len": 34.8715, "eval_loss": 0.10198543220758438, "eval_runtime": 134.6516, "eval_samples_per_second": 7.746, "eval_steps_per_second": 0.245, "step": 68000 } ], "max_steps": 324608, "num_train_epochs": 16, "total_flos": 1.1723875993622938e+17, "trial_name": null, "trial_params": null }