| { | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 1.0, | |
| "global_step": 35889, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.01, | |
| "learning_rate": 0.0009860681545877568, | |
| "loss": 6.095, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 0.03, | |
| "learning_rate": 0.0009721363091755134, | |
| "loss": 2.9616, | |
| "step": 1000 | |
| }, | |
| { | |
| "epoch": 0.04, | |
| "learning_rate": 0.0009582044637632701, | |
| "loss": 2.666, | |
| "step": 1500 | |
| }, | |
| { | |
| "epoch": 0.06, | |
| "learning_rate": 0.0009442726183510269, | |
| "loss": 2.5328, | |
| "step": 2000 | |
| }, | |
| { | |
| "epoch": 0.07, | |
| "learning_rate": 0.0009303407729387835, | |
| "loss": 2.497, | |
| "step": 2500 | |
| }, | |
| { | |
| "epoch": 0.08, | |
| "learning_rate": 0.0009164089275265401, | |
| "loss": 2.4232, | |
| "step": 3000 | |
| }, | |
| { | |
| "epoch": 0.1, | |
| "learning_rate": 0.0009024770821142969, | |
| "loss": 2.4132, | |
| "step": 3500 | |
| }, | |
| { | |
| "epoch": 0.11, | |
| "learning_rate": 0.0008885452367020536, | |
| "loss": 2.3761, | |
| "step": 4000 | |
| }, | |
| { | |
| "epoch": 0.13, | |
| "learning_rate": 0.0008746133912898102, | |
| "loss": 2.3447, | |
| "step": 4500 | |
| }, | |
| { | |
| "epoch": 0.14, | |
| "learning_rate": 0.000860681545877567, | |
| "loss": 2.2711, | |
| "step": 5000 | |
| }, | |
| { | |
| "epoch": 0.15, | |
| "learning_rate": 0.0008467497004653236, | |
| "loss": 2.2602, | |
| "step": 5500 | |
| }, | |
| { | |
| "epoch": 0.17, | |
| "learning_rate": 0.0008328178550530803, | |
| "loss": 2.2645, | |
| "step": 6000 | |
| }, | |
| { | |
| "epoch": 0.18, | |
| "learning_rate": 0.0008188860096408371, | |
| "loss": 2.2384, | |
| "step": 6500 | |
| }, | |
| { | |
| "epoch": 0.2, | |
| "learning_rate": 0.0008049541642285937, | |
| "loss": 2.2474, | |
| "step": 7000 | |
| }, | |
| { | |
| "epoch": 0.2, | |
| "eval_gen_len": 91.9, | |
| "eval_loss": 1.914839744567871, | |
| "eval_rouge1": 31.9324, | |
| "eval_rouge2": 8.9596, | |
| "eval_rougeL": 18.5991, | |
| "eval_rougeLsum": 26.8462, | |
| "eval_runtime": 20609.4374, | |
| "eval_samples_per_second": 0.649, | |
| "step": 7000 | |
| }, | |
| { | |
| "epoch": 0.21, | |
| "learning_rate": 0.0007910223188163504, | |
| "loss": 2.2039, | |
| "step": 7500 | |
| }, | |
| { | |
| "epoch": 0.22, | |
| "learning_rate": 0.0007770904734041071, | |
| "loss": 2.189, | |
| "step": 8000 | |
| }, | |
| { | |
| "epoch": 0.24, | |
| "learning_rate": 0.0007631586279918639, | |
| "loss": 2.1961, | |
| "step": 8500 | |
| }, | |
| { | |
| "epoch": 0.25, | |
| "learning_rate": 0.0007492267825796205, | |
| "loss": 2.1764, | |
| "step": 9000 | |
| }, | |
| { | |
| "epoch": 0.26, | |
| "learning_rate": 0.0007352949371673772, | |
| "loss": 2.1777, | |
| "step": 9500 | |
| }, | |
| { | |
| "epoch": 0.28, | |
| "learning_rate": 0.0007213630917551339, | |
| "loss": 2.1412, | |
| "step": 10000 | |
| }, | |
| { | |
| "epoch": 0.29, | |
| "learning_rate": 0.0007074312463428905, | |
| "loss": 2.1669, | |
| "step": 10500 | |
| }, | |
| { | |
| "epoch": 0.31, | |
| "learning_rate": 0.0006934994009306474, | |
| "loss": 2.1426, | |
| "step": 11000 | |
| }, | |
| { | |
| "epoch": 0.32, | |
| "learning_rate": 0.000679567555518404, | |
| "loss": 2.1428, | |
| "step": 11500 | |
| }, | |
| { | |
| "epoch": 0.33, | |
| "learning_rate": 0.0006656357101061606, | |
| "loss": 2.1108, | |
| "step": 12000 | |
| }, | |
| { | |
| "epoch": 0.35, | |
| "learning_rate": 0.0006517038646939174, | |
| "loss": 2.0903, | |
| "step": 12500 | |
| }, | |
| { | |
| "epoch": 0.36, | |
| "learning_rate": 0.000637772019281674, | |
| "loss": 2.1057, | |
| "step": 13000 | |
| }, | |
| { | |
| "epoch": 0.38, | |
| "learning_rate": 0.0006238401738694308, | |
| "loss": 2.0874, | |
| "step": 13500 | |
| }, | |
| { | |
| "epoch": 0.39, | |
| "learning_rate": 0.0006099083284571875, | |
| "loss": 2.0877, | |
| "step": 14000 | |
| }, | |
| { | |
| "epoch": 0.39, | |
| "eval_gen_len": 99.3, | |
| "eval_loss": 1.8038697242736816, | |
| "eval_rouge1": 31.4652, | |
| "eval_rouge2": 8.6423, | |
| "eval_rougeL": 18.1808, | |
| "eval_rougeLsum": 26.2653, | |
| "eval_runtime": 21262.825, | |
| "eval_samples_per_second": 0.629, | |
| "step": 14000 | |
| }, | |
| { | |
| "epoch": 0.4, | |
| "learning_rate": 0.0005959764830449441, | |
| "loss": 2.0903, | |
| "step": 14500 | |
| }, | |
| { | |
| "epoch": 0.42, | |
| "learning_rate": 0.0005820446376327009, | |
| "loss": 2.0628, | |
| "step": 15000 | |
| }, | |
| { | |
| "epoch": 0.43, | |
| "learning_rate": 0.0005681127922204575, | |
| "loss": 2.0909, | |
| "step": 15500 | |
| }, | |
| { | |
| "epoch": 0.45, | |
| "learning_rate": 0.0005541809468082142, | |
| "loss": 2.0258, | |
| "step": 16000 | |
| }, | |
| { | |
| "epoch": 0.46, | |
| "learning_rate": 0.000540249101395971, | |
| "loss": 2.0156, | |
| "step": 16500 | |
| }, | |
| { | |
| "epoch": 0.47, | |
| "learning_rate": 0.0005263172559837276, | |
| "loss": 2.0282, | |
| "step": 17000 | |
| }, | |
| { | |
| "epoch": 0.49, | |
| "learning_rate": 0.0005123854105714843, | |
| "loss": 2.0559, | |
| "step": 17500 | |
| }, | |
| { | |
| "epoch": 0.5, | |
| "learning_rate": 0.000498453565159241, | |
| "loss": 2.0603, | |
| "step": 18000 | |
| }, | |
| { | |
| "epoch": 0.52, | |
| "learning_rate": 0.0004845217197469977, | |
| "loss": 2.0077, | |
| "step": 18500 | |
| }, | |
| { | |
| "epoch": 0.53, | |
| "learning_rate": 0.0004705898743347544, | |
| "loss": 2.0119, | |
| "step": 19000 | |
| }, | |
| { | |
| "epoch": 0.54, | |
| "learning_rate": 0.00045665802892251106, | |
| "loss": 1.9855, | |
| "step": 19500 | |
| }, | |
| { | |
| "epoch": 0.56, | |
| "learning_rate": 0.0004427261835102678, | |
| "loss": 2.0089, | |
| "step": 20000 | |
| }, | |
| { | |
| "epoch": 0.57, | |
| "learning_rate": 0.0004287943380980245, | |
| "loss": 2.0029, | |
| "step": 20500 | |
| }, | |
| { | |
| "epoch": 0.59, | |
| "learning_rate": 0.00041486249268578117, | |
| "loss": 1.9773, | |
| "step": 21000 | |
| }, | |
| { | |
| "epoch": 0.59, | |
| "eval_gen_len": 101.6, | |
| "eval_loss": 1.73397696018219, | |
| "eval_rouge1": 31.1574, | |
| "eval_rouge2": 8.645, | |
| "eval_rougeL": 18.096, | |
| "eval_rougeLsum": 25.9822, | |
| "eval_runtime": 21624.4917, | |
| "eval_samples_per_second": 0.618, | |
| "step": 21000 | |
| }, | |
| { | |
| "epoch": 0.6, | |
| "learning_rate": 0.00040093064727353785, | |
| "loss": 1.9632, | |
| "step": 21500 | |
| }, | |
| { | |
| "epoch": 0.61, | |
| "learning_rate": 0.00038699880186129454, | |
| "loss": 1.9768, | |
| "step": 22000 | |
| }, | |
| { | |
| "epoch": 0.63, | |
| "learning_rate": 0.0003730669564490513, | |
| "loss": 1.9755, | |
| "step": 22500 | |
| }, | |
| { | |
| "epoch": 0.64, | |
| "learning_rate": 0.00035913511103680796, | |
| "loss": 2.0125, | |
| "step": 23000 | |
| }, | |
| { | |
| "epoch": 0.65, | |
| "learning_rate": 0.0003452032656245646, | |
| "loss": 1.9427, | |
| "step": 23500 | |
| }, | |
| { | |
| "epoch": 0.67, | |
| "learning_rate": 0.0003312714202123213, | |
| "loss": 1.9776, | |
| "step": 24000 | |
| }, | |
| { | |
| "epoch": 0.68, | |
| "learning_rate": 0.000317339574800078, | |
| "loss": 1.9966, | |
| "step": 24500 | |
| }, | |
| { | |
| "epoch": 0.7, | |
| "learning_rate": 0.00030340772938783475, | |
| "loss": 1.9814, | |
| "step": 25000 | |
| }, | |
| { | |
| "epoch": 0.71, | |
| "learning_rate": 0.00028947588397559143, | |
| "loss": 1.9585, | |
| "step": 25500 | |
| }, | |
| { | |
| "epoch": 0.72, | |
| "learning_rate": 0.00027554403856334806, | |
| "loss": 1.9668, | |
| "step": 26000 | |
| }, | |
| { | |
| "epoch": 0.74, | |
| "learning_rate": 0.0002616121931511048, | |
| "loss": 1.9564, | |
| "step": 26500 | |
| }, | |
| { | |
| "epoch": 0.75, | |
| "learning_rate": 0.0002476803477388615, | |
| "loss": 1.9231, | |
| "step": 27000 | |
| }, | |
| { | |
| "epoch": 0.77, | |
| "learning_rate": 0.00023374850232661817, | |
| "loss": 1.9078, | |
| "step": 27500 | |
| }, | |
| { | |
| "epoch": 0.78, | |
| "learning_rate": 0.00021981665691437488, | |
| "loss": 1.9032, | |
| "step": 28000 | |
| }, | |
| { | |
| "epoch": 0.78, | |
| "eval_gen_len": 106.8, | |
| "eval_loss": 1.6808093786239624, | |
| "eval_rouge1": 31.4328, | |
| "eval_rouge2": 8.6241, | |
| "eval_rougeL": 18.0718, | |
| "eval_rougeLsum": 26.0718, | |
| "eval_runtime": 21979.649, | |
| "eval_samples_per_second": 0.608, | |
| "step": 28000 | |
| }, | |
| { | |
| "epoch": 0.79, | |
| "learning_rate": 0.00020588481150213156, | |
| "loss": 1.931, | |
| "step": 28500 | |
| }, | |
| { | |
| "epoch": 0.81, | |
| "learning_rate": 0.00019195296608988827, | |
| "loss": 1.9416, | |
| "step": 29000 | |
| }, | |
| { | |
| "epoch": 0.82, | |
| "learning_rate": 0.00017802112067764498, | |
| "loss": 1.963, | |
| "step": 29500 | |
| }, | |
| { | |
| "epoch": 0.84, | |
| "learning_rate": 0.00016408927526540167, | |
| "loss": 1.9066, | |
| "step": 30000 | |
| }, | |
| { | |
| "epoch": 0.85, | |
| "learning_rate": 0.00015015742985315835, | |
| "loss": 1.9583, | |
| "step": 30500 | |
| }, | |
| { | |
| "epoch": 0.86, | |
| "learning_rate": 0.00013622558444091503, | |
| "loss": 1.8948, | |
| "step": 31000 | |
| }, | |
| { | |
| "epoch": 0.88, | |
| "learning_rate": 0.00012229373902867174, | |
| "loss": 1.9424, | |
| "step": 31500 | |
| }, | |
| { | |
| "epoch": 0.89, | |
| "learning_rate": 0.00010836189361642844, | |
| "loss": 1.9082, | |
| "step": 32000 | |
| }, | |
| { | |
| "epoch": 0.91, | |
| "learning_rate": 9.443004820418513e-05, | |
| "loss": 1.9195, | |
| "step": 32500 | |
| }, | |
| { | |
| "epoch": 0.92, | |
| "learning_rate": 8.049820279194182e-05, | |
| "loss": 1.8821, | |
| "step": 33000 | |
| }, | |
| { | |
| "epoch": 0.93, | |
| "learning_rate": 6.656635737969852e-05, | |
| "loss": 1.9194, | |
| "step": 33500 | |
| }, | |
| { | |
| "epoch": 0.95, | |
| "learning_rate": 5.263451196745521e-05, | |
| "loss": 1.902, | |
| "step": 34000 | |
| }, | |
| { | |
| "epoch": 0.96, | |
| "learning_rate": 3.87026665552119e-05, | |
| "loss": 1.9154, | |
| "step": 34500 | |
| }, | |
| { | |
| "epoch": 0.98, | |
| "learning_rate": 2.4770821142968598e-05, | |
| "loss": 1.9181, | |
| "step": 35000 | |
| }, | |
| { | |
| "epoch": 0.98, | |
| "eval_gen_len": 107.9, | |
| "eval_loss": 1.6517904996871948, | |
| "eval_rouge1": 31.4185, | |
| "eval_rouge2": 8.601, | |
| "eval_rougeL": 17.9686, | |
| "eval_rougeLsum": 26.0844, | |
| "eval_runtime": 21981.6496, | |
| "eval_samples_per_second": 0.608, | |
| "step": 35000 | |
| }, | |
| { | |
| "epoch": 0.99, | |
| "learning_rate": 1.0838975730725292e-05, | |
| "loss": 1.9275, | |
| "step": 35500 | |
| }, | |
| { | |
| "epoch": 1.0, | |
| "step": 35889, | |
| "total_flos": 1069022126774016000, | |
| "train_runtime": 160064.9582, | |
| "train_samples_per_second": 0.224 | |
| } | |
| ], | |
| "max_steps": 35889, | |
| "num_train_epochs": 1, | |
| "total_flos": 1069022126774016000, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |