| { |
| "best_metric": 17.3273, |
| "best_model_checkpoint": "models/mt0-xl_russian_natprompt_adafactor_updated/checkpoint-6150", |
| "epoch": 14.999024390243903, |
| "eval_steps": 500, |
| "global_step": 7687, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 1.0, |
| "learning_rate": 4.75e-05, |
| "loss": 2.0388, |
| "step": 512 |
| }, |
| { |
| "epoch": 1.0, |
| "eval_gen_len": 16.58484100877193, |
| "eval_loss": 1.6734575033187866, |
| "eval_rouge1": 14.1367, |
| "eval_rouge2": 7.0437, |
| "eval_rougeL": 14.0625, |
| "eval_rougeLsum": 14.0916, |
| "eval_runtime": 270.6111, |
| "eval_samples_per_second": 26.932, |
| "eval_steps_per_second": 0.843, |
| "step": 512 |
| }, |
| { |
| "epoch": 2.0, |
| "learning_rate": 4.4995117187500005e-05, |
| "loss": 1.7098, |
| "step": 1025 |
| }, |
| { |
| "epoch": 2.0, |
| "eval_gen_len": 16.68050986842105, |
| "eval_loss": 1.6203718185424805, |
| "eval_rouge1": 15.2619, |
| "eval_rouge2": 7.8124, |
| "eval_rougeL": 15.159, |
| "eval_rougeLsum": 15.2078, |
| "eval_runtime": 276.6842, |
| "eval_samples_per_second": 26.341, |
| "eval_steps_per_second": 0.824, |
| "step": 1025 |
| }, |
| { |
| "epoch": 3.0, |
| "learning_rate": 4.24951171875e-05, |
| "loss": 1.539, |
| "step": 1537 |
| }, |
| { |
| "epoch": 3.0, |
| "eval_gen_len": 16.61417214912281, |
| "eval_loss": 1.6058766841888428, |
| "eval_rouge1": 15.9942, |
| "eval_rouge2": 8.1827, |
| "eval_rougeL": 15.872, |
| "eval_rougeLsum": 15.9105, |
| "eval_runtime": 263.8074, |
| "eval_samples_per_second": 27.626, |
| "eval_steps_per_second": 0.864, |
| "step": 1537 |
| }, |
| { |
| "epoch": 4.0, |
| "learning_rate": 3.9990234375e-05, |
| "loss": 1.403, |
| "step": 2050 |
| }, |
| { |
| "epoch": 4.0, |
| "eval_gen_len": 16.26343201754386, |
| "eval_loss": 1.6041721105575562, |
| "eval_rouge1": 16.6383, |
| "eval_rouge2": 8.4603, |
| "eval_rougeL": 16.5096, |
| "eval_rougeLsum": 16.5635, |
| "eval_runtime": 251.4581, |
| "eval_samples_per_second": 28.983, |
| "eval_steps_per_second": 0.907, |
| "step": 2050 |
| }, |
| { |
| "epoch": 5.0, |
| "learning_rate": 3.7490234375e-05, |
| "loss": 1.295, |
| "step": 2562 |
| }, |
| { |
| "epoch": 5.0, |
| "eval_gen_len": 15.741365131578947, |
| "eval_loss": 1.6226089000701904, |
| "eval_rouge1": 16.9189, |
| "eval_rouge2": 8.8384, |
| "eval_rougeL": 16.7799, |
| "eval_rougeLsum": 16.8258, |
| "eval_runtime": 169.6881, |
| "eval_samples_per_second": 42.949, |
| "eval_steps_per_second": 1.344, |
| "step": 2562 |
| }, |
| { |
| "epoch": 6.0, |
| "learning_rate": 3.49853515625e-05, |
| "loss": 1.1984, |
| "step": 3075 |
| }, |
| { |
| "epoch": 6.0, |
| "eval_gen_len": 15.888157894736842, |
| "eval_loss": 1.6289030313491821, |
| "eval_rouge1": 16.9788, |
| "eval_rouge2": 8.7272, |
| "eval_rougeL": 16.8238, |
| "eval_rougeLsum": 16.8765, |
| "eval_runtime": 175.0677, |
| "eval_samples_per_second": 41.63, |
| "eval_steps_per_second": 1.302, |
| "step": 3075 |
| }, |
| { |
| "epoch": 7.0, |
| "learning_rate": 3.2485351562499996e-05, |
| "loss": 1.1195, |
| "step": 3587 |
| }, |
| { |
| "epoch": 7.0, |
| "eval_gen_len": 16.23519736842105, |
| "eval_loss": 1.6697918176651, |
| "eval_rouge1": 17.0912, |
| "eval_rouge2": 8.7061, |
| "eval_rougeL": 16.9084, |
| "eval_rougeLsum": 16.9633, |
| "eval_runtime": 171.9395, |
| "eval_samples_per_second": 42.387, |
| "eval_steps_per_second": 1.326, |
| "step": 3587 |
| }, |
| { |
| "epoch": 8.0, |
| "learning_rate": 2.998046875e-05, |
| "loss": 1.0463, |
| "step": 4100 |
| }, |
| { |
| "epoch": 8.0, |
| "eval_gen_len": 16.14761513157895, |
| "eval_loss": 1.6845269203186035, |
| "eval_rouge1": 17.201, |
| "eval_rouge2": 8.7395, |
| "eval_rougeL": 17.003, |
| "eval_rougeLsum": 17.052, |
| "eval_runtime": 252.7052, |
| "eval_samples_per_second": 28.84, |
| "eval_steps_per_second": 0.902, |
| "step": 4100 |
| }, |
| { |
| "epoch": 9.0, |
| "learning_rate": 2.748046875e-05, |
| "loss": 0.9866, |
| "step": 4612 |
| }, |
| { |
| "epoch": 9.0, |
| "eval_gen_len": 15.878837719298245, |
| "eval_loss": 1.726230502128601, |
| "eval_rouge1": 17.3223, |
| "eval_rouge2": 8.8289, |
| "eval_rougeL": 17.1413, |
| "eval_rougeLsum": 17.1756, |
| "eval_runtime": 182.5703, |
| "eval_samples_per_second": 39.919, |
| "eval_steps_per_second": 1.249, |
| "step": 4612 |
| }, |
| { |
| "epoch": 10.0, |
| "learning_rate": 2.49755859375e-05, |
| "loss": 0.9326, |
| "step": 5125 |
| }, |
| { |
| "epoch": 10.0, |
| "eval_gen_len": 15.797149122807017, |
| "eval_loss": 1.7532711029052734, |
| "eval_rouge1": 17.2655, |
| "eval_rouge2": 8.7512, |
| "eval_rougeL": 17.0508, |
| "eval_rougeLsum": 17.1055, |
| "eval_runtime": 168.7949, |
| "eval_samples_per_second": 43.177, |
| "eval_steps_per_second": 1.351, |
| "step": 5125 |
| }, |
| { |
| "epoch": 11.0, |
| "learning_rate": 2.24755859375e-05, |
| "loss": 0.8844, |
| "step": 5637 |
| }, |
| { |
| "epoch": 11.0, |
| "eval_gen_len": 16.32360197368421, |
| "eval_loss": 1.7794246673583984, |
| "eval_rouge1": 17.008, |
| "eval_rouge2": 8.5404, |
| "eval_rougeL": 16.8044, |
| "eval_rougeLsum": 16.848, |
| "eval_runtime": 168.6102, |
| "eval_samples_per_second": 43.224, |
| "eval_steps_per_second": 1.352, |
| "step": 5637 |
| }, |
| { |
| "epoch": 12.0, |
| "learning_rate": 1.9970703125e-05, |
| "loss": 0.8393, |
| "step": 6150 |
| }, |
| { |
| "epoch": 12.0, |
| "eval_gen_len": 16.143092105263158, |
| "eval_loss": 1.7995822429656982, |
| "eval_rouge1": 17.3273, |
| "eval_rouge2": 8.7829, |
| "eval_rougeL": 17.097, |
| "eval_rougeLsum": 17.1644, |
| "eval_runtime": 171.5723, |
| "eval_samples_per_second": 42.478, |
| "eval_steps_per_second": 1.329, |
| "step": 6150 |
| }, |
| { |
| "epoch": 13.0, |
| "learning_rate": 1.7470703125000003e-05, |
| "loss": 0.8046, |
| "step": 6662 |
| }, |
| { |
| "epoch": 13.0, |
| "eval_gen_len": 16.090597587719298, |
| "eval_loss": 1.8266295194625854, |
| "eval_rouge1": 17.1859, |
| "eval_rouge2": 8.6524, |
| "eval_rougeL": 16.9605, |
| "eval_rougeLsum": 17.0118, |
| "eval_runtime": 259.1646, |
| "eval_samples_per_second": 28.121, |
| "eval_steps_per_second": 0.88, |
| "step": 6662 |
| }, |
| { |
| "epoch": 14.0, |
| "learning_rate": 1.49658203125e-05, |
| "loss": 0.7682, |
| "step": 7175 |
| }, |
| { |
| "epoch": 14.0, |
| "eval_gen_len": 16.11239035087719, |
| "eval_loss": 1.8624775409698486, |
| "eval_rouge1": 17.0184, |
| "eval_rouge2": 8.5314, |
| "eval_rougeL": 16.8019, |
| "eval_rougeLsum": 16.847, |
| "eval_runtime": 170.9938, |
| "eval_samples_per_second": 42.621, |
| "eval_steps_per_second": 1.333, |
| "step": 7175 |
| }, |
| { |
| "epoch": 15.0, |
| "learning_rate": 1.2465820312500002e-05, |
| "loss": 0.7419, |
| "step": 7687 |
| }, |
| { |
| "epoch": 15.0, |
| "eval_gen_len": 15.95751096491228, |
| "eval_loss": 1.8779526948928833, |
| "eval_rouge1": 17.2742, |
| "eval_rouge2": 8.6795, |
| "eval_rougeL": 17.0699, |
| "eval_rougeLsum": 17.1118, |
| "eval_runtime": 177.9916, |
| "eval_samples_per_second": 40.946, |
| "eval_steps_per_second": 1.281, |
| "step": 7687 |
| }, |
| { |
| "epoch": 15.0, |
| "step": 7687, |
| "total_flos": 1.7085595424946913e+18, |
| "train_loss": 1.153788715837463, |
| "train_runtime": 20083.7121, |
| "train_samples_per_second": 65.311, |
| "train_steps_per_second": 0.51 |
| } |
| ], |
| "logging_steps": 500, |
| "max_steps": 10240, |
| "num_input_tokens_seen": 0, |
| "num_train_epochs": 20, |
| "save_steps": 500, |
| "total_flos": 1.7085595424946913e+18, |
| "train_batch_size": 4, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|