| { |
| "best_metric": 50.2114, |
| "best_model_checkpoint": "./jako_13p_tokenie_run1/checkpoint-19200", |
| "epoch": 9.997403271877435, |
| "eval_steps": 1600, |
| "global_step": 19250, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 0.26, |
| "learning_rate": 4.9212598425196856e-05, |
| "loss": 1.7571, |
| "step": 500 |
| }, |
| { |
| "epoch": 0.52, |
| "learning_rate": 4.79002624671916e-05, |
| "loss": 1.3294, |
| "step": 1000 |
| }, |
| { |
| "epoch": 0.78, |
| "learning_rate": 4.6587926509186354e-05, |
| "loss": 1.2125, |
| "step": 1500 |
| }, |
| { |
| "epoch": 0.83, |
| "eval_bleu": 44.2732, |
| "eval_gen_len": 18.9394, |
| "eval_loss": 1.1356315612792969, |
| "eval_runtime": 557.6241, |
| "eval_samples_per_second": 13.81, |
| "eval_steps_per_second": 0.864, |
| "step": 1600 |
| }, |
| { |
| "epoch": 1.04, |
| "learning_rate": 4.52755905511811e-05, |
| "loss": 1.1386, |
| "step": 2000 |
| }, |
| { |
| "epoch": 1.3, |
| "learning_rate": 4.396325459317586e-05, |
| "loss": 0.9283, |
| "step": 2500 |
| }, |
| { |
| "epoch": 1.56, |
| "learning_rate": 4.2650918635170604e-05, |
| "loss": 0.8519, |
| "step": 3000 |
| }, |
| { |
| "epoch": 1.66, |
| "eval_bleu": 47.1622, |
| "eval_gen_len": 18.3936, |
| "eval_loss": 1.061800241470337, |
| "eval_runtime": 524.2089, |
| "eval_samples_per_second": 14.691, |
| "eval_steps_per_second": 0.919, |
| "step": 3200 |
| }, |
| { |
| "epoch": 1.82, |
| "learning_rate": 4.133858267716536e-05, |
| "loss": 0.8109, |
| "step": 3500 |
| }, |
| { |
| "epoch": 2.08, |
| "learning_rate": 4.00262467191601e-05, |
| "loss": 0.7727, |
| "step": 4000 |
| }, |
| { |
| "epoch": 2.34, |
| "learning_rate": 3.871391076115486e-05, |
| "loss": 0.6394, |
| "step": 4500 |
| }, |
| { |
| "epoch": 2.49, |
| "eval_bleu": 47.7818, |
| "eval_gen_len": 18.3397, |
| "eval_loss": 1.0923182964324951, |
| "eval_runtime": 516.7936, |
| "eval_samples_per_second": 14.902, |
| "eval_steps_per_second": 0.933, |
| "step": 4800 |
| }, |
| { |
| "epoch": 2.6, |
| "learning_rate": 3.740157480314961e-05, |
| "loss": 0.5875, |
| "step": 5000 |
| }, |
| { |
| "epoch": 2.86, |
| "learning_rate": 3.608923884514436e-05, |
| "loss": 0.5625, |
| "step": 5500 |
| }, |
| { |
| "epoch": 3.12, |
| "learning_rate": 3.4776902887139105e-05, |
| "loss": 0.532, |
| "step": 6000 |
| }, |
| { |
| "epoch": 3.32, |
| "eval_bleu": 48.4283, |
| "eval_gen_len": 18.3375, |
| "eval_loss": 1.1293830871582031, |
| "eval_runtime": 519.3842, |
| "eval_samples_per_second": 14.827, |
| "eval_steps_per_second": 0.928, |
| "step": 6400 |
| }, |
| { |
| "epoch": 3.38, |
| "learning_rate": 3.3464566929133864e-05, |
| "loss": 0.4299, |
| "step": 6500 |
| }, |
| { |
| "epoch": 3.64, |
| "learning_rate": 3.215223097112861e-05, |
| "loss": 0.3984, |
| "step": 7000 |
| }, |
| { |
| "epoch": 3.9, |
| "learning_rate": 3.083989501312336e-05, |
| "loss": 0.3857, |
| "step": 7500 |
| }, |
| { |
| "epoch": 4.15, |
| "learning_rate": 2.952755905511811e-05, |
| "loss": 0.3543, |
| "step": 8000 |
| }, |
| { |
| "epoch": 4.15, |
| "eval_bleu": 47.7916, |
| "eval_gen_len": 18.4422, |
| "eval_loss": 1.176469087600708, |
| "eval_runtime": 519.0077, |
| "eval_samples_per_second": 14.838, |
| "eval_steps_per_second": 0.929, |
| "step": 8000 |
| }, |
| { |
| "epoch": 4.41, |
| "learning_rate": 2.8215223097112863e-05, |
| "loss": 0.2836, |
| "step": 8500 |
| }, |
| { |
| "epoch": 4.67, |
| "learning_rate": 2.6902887139107612e-05, |
| "loss": 0.2648, |
| "step": 9000 |
| }, |
| { |
| "epoch": 4.93, |
| "learning_rate": 2.5590551181102364e-05, |
| "loss": 0.2569, |
| "step": 9500 |
| }, |
| { |
| "epoch": 4.99, |
| "eval_bleu": 48.1268, |
| "eval_gen_len": 18.5385, |
| "eval_loss": 1.2102879285812378, |
| "eval_runtime": 526.7602, |
| "eval_samples_per_second": 14.62, |
| "eval_steps_per_second": 0.915, |
| "step": 9600 |
| }, |
| { |
| "epoch": 5.19, |
| "learning_rate": 2.4278215223097113e-05, |
| "loss": 0.2268, |
| "step": 10000 |
| }, |
| { |
| "epoch": 5.45, |
| "learning_rate": 2.2965879265091865e-05, |
| "loss": 0.1854, |
| "step": 10500 |
| }, |
| { |
| "epoch": 5.71, |
| "learning_rate": 2.1653543307086614e-05, |
| "loss": 0.1732, |
| "step": 11000 |
| }, |
| { |
| "epoch": 5.82, |
| "eval_bleu": 48.9329, |
| "eval_gen_len": 18.2085, |
| "eval_loss": 1.25494384765625, |
| "eval_runtime": 505.0437, |
| "eval_samples_per_second": 15.248, |
| "eval_steps_per_second": 0.954, |
| "step": 11200 |
| }, |
| { |
| "epoch": 5.97, |
| "learning_rate": 2.0341207349081366e-05, |
| "loss": 0.1693, |
| "step": 11500 |
| }, |
| { |
| "epoch": 6.23, |
| "learning_rate": 1.9028871391076115e-05, |
| "loss": 0.1453, |
| "step": 12000 |
| }, |
| { |
| "epoch": 6.49, |
| "learning_rate": 1.7716535433070868e-05, |
| "loss": 0.1228, |
| "step": 12500 |
| }, |
| { |
| "epoch": 6.65, |
| "eval_bleu": 49.0248, |
| "eval_gen_len": 18.2133, |
| "eval_loss": 1.3022269010543823, |
| "eval_runtime": 504.7977, |
| "eval_samples_per_second": 15.256, |
| "eval_steps_per_second": 0.955, |
| "step": 12800 |
| }, |
| { |
| "epoch": 6.75, |
| "learning_rate": 1.6404199475065617e-05, |
| "loss": 0.1158, |
| "step": 13000 |
| }, |
| { |
| "epoch": 7.01, |
| "learning_rate": 1.5091863517060367e-05, |
| "loss": 0.1144, |
| "step": 13500 |
| }, |
| { |
| "epoch": 7.27, |
| "learning_rate": 1.377952755905512e-05, |
| "loss": 0.0937, |
| "step": 14000 |
| }, |
| { |
| "epoch": 7.48, |
| "eval_bleu": 49.3503, |
| "eval_gen_len": 18.1673, |
| "eval_loss": 1.317897081375122, |
| "eval_runtime": 503.3739, |
| "eval_samples_per_second": 15.299, |
| "eval_steps_per_second": 0.958, |
| "step": 14400 |
| }, |
| { |
| "epoch": 7.53, |
| "learning_rate": 1.246719160104987e-05, |
| "loss": 0.0829, |
| "step": 14500 |
| }, |
| { |
| "epoch": 7.79, |
| "learning_rate": 1.115485564304462e-05, |
| "loss": 0.0783, |
| "step": 15000 |
| }, |
| { |
| "epoch": 8.05, |
| "learning_rate": 9.842519685039371e-06, |
| "loss": 0.0779, |
| "step": 15500 |
| }, |
| { |
| "epoch": 8.31, |
| "learning_rate": 8.530183727034122e-06, |
| "loss": 0.0627, |
| "step": 16000 |
| }, |
| { |
| "epoch": 8.31, |
| "eval_bleu": 49.5551, |
| "eval_gen_len": 18.2672, |
| "eval_loss": 1.3408894538879395, |
| "eval_runtime": 506.5726, |
| "eval_samples_per_second": 15.202, |
| "eval_steps_per_second": 0.951, |
| "step": 16000 |
| }, |
| { |
| "epoch": 8.57, |
| "learning_rate": 7.2178477690288725e-06, |
| "loss": 0.0579, |
| "step": 16500 |
| }, |
| { |
| "epoch": 8.83, |
| "learning_rate": 5.905511811023622e-06, |
| "loss": 0.0551, |
| "step": 17000 |
| }, |
| { |
| "epoch": 9.09, |
| "learning_rate": 4.593175853018373e-06, |
| "loss": 0.0558, |
| "step": 17500 |
| }, |
| { |
| "epoch": 9.14, |
| "eval_bleu": 49.7808, |
| "eval_gen_len": 18.2815, |
| "eval_loss": 1.3544921875, |
| "eval_runtime": 505.3645, |
| "eval_samples_per_second": 15.239, |
| "eval_steps_per_second": 0.954, |
| "step": 17600 |
| }, |
| { |
| "epoch": 9.35, |
| "learning_rate": 3.2808398950131235e-06, |
| "loss": 0.0456, |
| "step": 18000 |
| }, |
| { |
| "epoch": 9.61, |
| "learning_rate": 1.968503937007874e-06, |
| "loss": 0.0433, |
| "step": 18500 |
| }, |
| { |
| "epoch": 9.87, |
| "learning_rate": 6.561679790026247e-07, |
| "loss": 0.0442, |
| "step": 19000 |
| }, |
| { |
| "epoch": 9.97, |
| "eval_bleu": 50.2114, |
| "eval_gen_len": 18.2159, |
| "eval_loss": 1.3559678792953491, |
| "eval_runtime": 503.7841, |
| "eval_samples_per_second": 15.286, |
| "eval_steps_per_second": 0.957, |
| "step": 19200 |
| }, |
| { |
| "epoch": 10.0, |
| "step": 19250, |
| "total_flos": 1.334951937048576e+18, |
| "train_loss": 0.4018082245665711, |
| "train_runtime": 39148.2976, |
| "train_samples_per_second": 15.739, |
| "train_steps_per_second": 0.492 |
| } |
| ], |
| "logging_steps": 500, |
| "max_steps": 19250, |
| "num_train_epochs": 10, |
| "save_steps": 1600, |
| "total_flos": 1.334951937048576e+18, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|