| { | |
| "best_metric": 42.5239, | |
| "best_model_checkpoint": "opus_base_ailem_random/checkpoint-68000", | |
| "epoch": 3.3517350157728707, | |
| "global_step": 68000, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.2, | |
| "learning_rate": 1.9753918572555207e-05, | |
| "loss": 0.209, | |
| "step": 4000 | |
| }, | |
| { | |
| "epoch": 0.2, | |
| "eval_bleu": 35.7559, | |
| "eval_gen_len": 39.7057, | |
| "eval_loss": 0.10894892364740372, | |
| "eval_runtime": 204.0367, | |
| "eval_samples_per_second": 5.112, | |
| "eval_steps_per_second": 0.162, | |
| "step": 4000 | |
| }, | |
| { | |
| "epoch": 0.39, | |
| "learning_rate": 1.950746746845426e-05, | |
| "loss": 0.1757, | |
| "step": 8000 | |
| }, | |
| { | |
| "epoch": 0.39, | |
| "eval_bleu": 36.6369, | |
| "eval_gen_len": 39.395, | |
| "eval_loss": 0.10665024816989899, | |
| "eval_runtime": 169.0161, | |
| "eval_samples_per_second": 6.171, | |
| "eval_steps_per_second": 0.195, | |
| "step": 8000 | |
| }, | |
| { | |
| "epoch": 0.59, | |
| "learning_rate": 1.9261016364353314e-05, | |
| "loss": 0.1686, | |
| "step": 12000 | |
| }, | |
| { | |
| "epoch": 0.59, | |
| "eval_bleu": 41.2508, | |
| "eval_gen_len": 35.1687, | |
| "eval_loss": 0.10527685284614563, | |
| "eval_runtime": 161.3856, | |
| "eval_samples_per_second": 6.463, | |
| "eval_steps_per_second": 0.204, | |
| "step": 12000 | |
| }, | |
| { | |
| "epoch": 0.79, | |
| "learning_rate": 1.9014565260252367e-05, | |
| "loss": 0.1631, | |
| "step": 16000 | |
| }, | |
| { | |
| "epoch": 0.79, | |
| "eval_bleu": 41.4292, | |
| "eval_gen_len": 35.1361, | |
| "eval_loss": 0.104288749396801, | |
| "eval_runtime": 136.8172, | |
| "eval_samples_per_second": 7.623, | |
| "eval_steps_per_second": 0.241, | |
| "step": 16000 | |
| }, | |
| { | |
| "epoch": 0.99, | |
| "learning_rate": 1.876811415615142e-05, | |
| "loss": 0.1608, | |
| "step": 20000 | |
| }, | |
| { | |
| "epoch": 0.99, | |
| "eval_bleu": 41.7716, | |
| "eval_gen_len": 35.5177, | |
| "eval_loss": 0.10370208323001862, | |
| "eval_runtime": 158.9983, | |
| "eval_samples_per_second": 6.56, | |
| "eval_steps_per_second": 0.208, | |
| "step": 20000 | |
| }, | |
| { | |
| "epoch": 1.18, | |
| "learning_rate": 1.8521786277602524e-05, | |
| "loss": 0.149, | |
| "step": 24000 | |
| }, | |
| { | |
| "epoch": 1.18, | |
| "eval_bleu": 40.6317, | |
| "eval_gen_len": 36.8993, | |
| "eval_loss": 0.10362833738327026, | |
| "eval_runtime": 143.5271, | |
| "eval_samples_per_second": 7.267, | |
| "eval_steps_per_second": 0.23, | |
| "step": 24000 | |
| }, | |
| { | |
| "epoch": 1.38, | |
| "learning_rate": 1.827545839905363e-05, | |
| "loss": 0.1488, | |
| "step": 28000 | |
| }, | |
| { | |
| "epoch": 1.38, | |
| "eval_bleu": 41.8852, | |
| "eval_gen_len": 35.3174, | |
| "eval_loss": 0.10282401740550995, | |
| "eval_runtime": 137.168, | |
| "eval_samples_per_second": 7.604, | |
| "eval_steps_per_second": 0.241, | |
| "step": 28000 | |
| }, | |
| { | |
| "epoch": 1.58, | |
| "learning_rate": 1.8029130520504733e-05, | |
| "loss": 0.1484, | |
| "step": 32000 | |
| }, | |
| { | |
| "epoch": 1.58, | |
| "eval_bleu": 41.5392, | |
| "eval_gen_len": 35.9501, | |
| "eval_loss": 0.10223321616649628, | |
| "eval_runtime": 133.1868, | |
| "eval_samples_per_second": 7.831, | |
| "eval_steps_per_second": 0.248, | |
| "step": 32000 | |
| }, | |
| { | |
| "epoch": 1.77, | |
| "learning_rate": 1.7782802641955836e-05, | |
| "loss": 0.147, | |
| "step": 36000 | |
| }, | |
| { | |
| "epoch": 1.77, | |
| "eval_bleu": 40.5681, | |
| "eval_gen_len": 37.5935, | |
| "eval_loss": 0.10191329568624496, | |
| "eval_runtime": 125.5047, | |
| "eval_samples_per_second": 8.31, | |
| "eval_steps_per_second": 0.263, | |
| "step": 36000 | |
| }, | |
| { | |
| "epoch": 1.97, | |
| "learning_rate": 1.7536474763406942e-05, | |
| "loss": 0.1461, | |
| "step": 40000 | |
| }, | |
| { | |
| "epoch": 1.97, | |
| "eval_bleu": 42.3964, | |
| "eval_gen_len": 34.768, | |
| "eval_loss": 0.10204007476568222, | |
| "eval_runtime": 124.2655, | |
| "eval_samples_per_second": 8.393, | |
| "eval_steps_per_second": 0.266, | |
| "step": 40000 | |
| }, | |
| { | |
| "epoch": 2.17, | |
| "learning_rate": 1.7290146884858045e-05, | |
| "loss": 0.1386, | |
| "step": 44000 | |
| }, | |
| { | |
| "epoch": 2.17, | |
| "eval_bleu": 40.217, | |
| "eval_gen_len": 37.1946, | |
| "eval_loss": 0.10240339487791061, | |
| "eval_runtime": 180.618, | |
| "eval_samples_per_second": 5.775, | |
| "eval_steps_per_second": 0.183, | |
| "step": 44000 | |
| }, | |
| { | |
| "epoch": 2.37, | |
| "learning_rate": 1.7043757393533123e-05, | |
| "loss": 0.1376, | |
| "step": 48000 | |
| }, | |
| { | |
| "epoch": 2.37, | |
| "eval_bleu": 41.5097, | |
| "eval_gen_len": 36.3797, | |
| "eval_loss": 0.1021459624171257, | |
| "eval_runtime": 149.0335, | |
| "eval_samples_per_second": 6.998, | |
| "eval_steps_per_second": 0.221, | |
| "step": 48000 | |
| }, | |
| { | |
| "epoch": 2.56, | |
| "learning_rate": 1.6797491127760255e-05, | |
| "loss": 0.1366, | |
| "step": 52000 | |
| }, | |
| { | |
| "epoch": 2.56, | |
| "eval_bleu": 42.2181, | |
| "eval_gen_len": 35.0019, | |
| "eval_loss": 0.10173720866441727, | |
| "eval_runtime": 168.836, | |
| "eval_samples_per_second": 6.178, | |
| "eval_steps_per_second": 0.195, | |
| "step": 52000 | |
| }, | |
| { | |
| "epoch": 2.76, | |
| "learning_rate": 1.6551101636435333e-05, | |
| "loss": 0.1368, | |
| "step": 56000 | |
| }, | |
| { | |
| "epoch": 2.76, | |
| "eval_bleu": 41.65, | |
| "eval_gen_len": 36.2694, | |
| "eval_loss": 0.10165542364120483, | |
| "eval_runtime": 181.2512, | |
| "eval_samples_per_second": 5.754, | |
| "eval_steps_per_second": 0.182, | |
| "step": 56000 | |
| }, | |
| { | |
| "epoch": 2.96, | |
| "learning_rate": 1.630471214511041e-05, | |
| "loss": 0.1371, | |
| "step": 60000 | |
| }, | |
| { | |
| "epoch": 2.96, | |
| "eval_bleu": 42.5203, | |
| "eval_gen_len": 34.1112, | |
| "eval_loss": 0.10101941227912903, | |
| "eval_runtime": 117.7958, | |
| "eval_samples_per_second": 8.854, | |
| "eval_steps_per_second": 0.28, | |
| "step": 60000 | |
| }, | |
| { | |
| "epoch": 3.15, | |
| "learning_rate": 1.6058384266561514e-05, | |
| "loss": 0.1308, | |
| "step": 64000 | |
| }, | |
| { | |
| "epoch": 3.15, | |
| "eval_bleu": 42.2011, | |
| "eval_gen_len": 35.1908, | |
| "eval_loss": 0.10181207209825516, | |
| "eval_runtime": 136.2775, | |
| "eval_samples_per_second": 7.654, | |
| "eval_steps_per_second": 0.242, | |
| "step": 64000 | |
| }, | |
| { | |
| "epoch": 3.35, | |
| "learning_rate": 1.581205638801262e-05, | |
| "loss": 0.1302, | |
| "step": 68000 | |
| }, | |
| { | |
| "epoch": 3.35, | |
| "eval_bleu": 42.5239, | |
| "eval_gen_len": 34.8715, | |
| "eval_loss": 0.10198543220758438, | |
| "eval_runtime": 134.6516, | |
| "eval_samples_per_second": 7.746, | |
| "eval_steps_per_second": 0.245, | |
| "step": 68000 | |
| } | |
| ], | |
| "max_steps": 324608, | |
| "num_train_epochs": 16, | |
| "total_flos": 1.1723875993622938e+17, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |