| { |
| "best_metric": 1.2869539260864258, |
| "best_model_checkpoint": "./checkpoints/pegasus-xsum/checkpoint-11412", |
| "epoch": 12.0, |
| "global_step": 11412, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 0.27, |
| "learning_rate": 5e-06, |
| "loss": 8.3438, |
| "step": 256 |
| }, |
| { |
| "epoch": 0.54, |
| "learning_rate": 1e-05, |
| "loss": 7.5374, |
| "step": 512 |
| }, |
| { |
| "epoch": 0.81, |
| "learning_rate": 9.93178426774675e-06, |
| "loss": 7.0835, |
| "step": 768 |
| }, |
| { |
| "epoch": 1.0, |
| "eval_loss": 6.375093936920166, |
| "eval_runtime": 8.2682, |
| "eval_samples_per_second": 60.473, |
| "eval_steps_per_second": 7.62, |
| "step": 951 |
| }, |
| { |
| "epoch": 1.08, |
| "learning_rate": 9.863568535493498e-06, |
| "loss": 6.7408, |
| "step": 1024 |
| }, |
| { |
| "epoch": 1.35, |
| "learning_rate": 9.795352803240248e-06, |
| "loss": 6.0547, |
| "step": 1280 |
| }, |
| { |
| "epoch": 1.62, |
| "learning_rate": 9.727137070986997e-06, |
| "loss": 4.6247, |
| "step": 1536 |
| }, |
| { |
| "epoch": 1.88, |
| "learning_rate": 9.658921338733747e-06, |
| "loss": 2.6363, |
| "step": 1792 |
| }, |
| { |
| "epoch": 2.0, |
| "eval_loss": 1.4205403327941895, |
| "eval_runtime": 8.2748, |
| "eval_samples_per_second": 60.424, |
| "eval_steps_per_second": 7.613, |
| "step": 1902 |
| }, |
| { |
| "epoch": 2.15, |
| "learning_rate": 9.590705606480494e-06, |
| "loss": 1.6679, |
| "step": 2048 |
| }, |
| { |
| "epoch": 2.42, |
| "learning_rate": 9.522489874227244e-06, |
| "loss": 1.5104, |
| "step": 2304 |
| }, |
| { |
| "epoch": 2.69, |
| "learning_rate": 9.454274141973993e-06, |
| "loss": 1.5184, |
| "step": 2560 |
| }, |
| { |
| "epoch": 2.96, |
| "learning_rate": 9.386058409720743e-06, |
| "loss": 1.4953, |
| "step": 2816 |
| }, |
| { |
| "epoch": 3.0, |
| "eval_loss": 1.3500312566757202, |
| "eval_runtime": 8.2581, |
| "eval_samples_per_second": 60.546, |
| "eval_steps_per_second": 7.629, |
| "step": 2853 |
| }, |
| { |
| "epoch": 3.23, |
| "learning_rate": 9.31784267746749e-06, |
| "loss": 1.4483, |
| "step": 3072 |
| }, |
| { |
| "epoch": 3.5, |
| "learning_rate": 9.24962694521424e-06, |
| "loss": 1.3758, |
| "step": 3328 |
| }, |
| { |
| "epoch": 3.77, |
| "learning_rate": 9.18141121296099e-06, |
| "loss": 1.3732, |
| "step": 3584 |
| }, |
| { |
| "epoch": 4.0, |
| "eval_loss": 1.3236401081085205, |
| "eval_runtime": 8.2531, |
| "eval_samples_per_second": 60.584, |
| "eval_steps_per_second": 7.634, |
| "step": 3804 |
| }, |
| { |
| "epoch": 4.04, |
| "learning_rate": 9.11319548070774e-06, |
| "loss": 1.4249, |
| "step": 3840 |
| }, |
| { |
| "epoch": 4.31, |
| "learning_rate": 9.044979748454487e-06, |
| "loss": 1.4052, |
| "step": 4096 |
| }, |
| { |
| "epoch": 4.58, |
| "learning_rate": 8.976764016201236e-06, |
| "loss": 1.3323, |
| "step": 4352 |
| }, |
| { |
| "epoch": 4.85, |
| "learning_rate": 8.908548283947986e-06, |
| "loss": 1.3573, |
| "step": 4608 |
| }, |
| { |
| "epoch": 5.0, |
| "eval_loss": 1.311529517173767, |
| "eval_runtime": 8.2933, |
| "eval_samples_per_second": 60.29, |
| "eval_steps_per_second": 7.597, |
| "step": 4755 |
| }, |
| { |
| "epoch": 5.11, |
| "learning_rate": 8.840332551694735e-06, |
| "loss": 1.3169, |
| "step": 4864 |
| }, |
| { |
| "epoch": 5.38, |
| "learning_rate": 8.772116819441483e-06, |
| "loss": 1.3142, |
| "step": 5120 |
| }, |
| { |
| "epoch": 5.65, |
| "learning_rate": 8.703901087188233e-06, |
| "loss": 1.3562, |
| "step": 5376 |
| }, |
| { |
| "epoch": 5.92, |
| "learning_rate": 8.635685354934982e-06, |
| "loss": 1.3171, |
| "step": 5632 |
| }, |
| { |
| "epoch": 6.0, |
| "eval_loss": 1.3023052215576172, |
| "eval_runtime": 8.3165, |
| "eval_samples_per_second": 60.122, |
| "eval_steps_per_second": 7.575, |
| "step": 5706 |
| }, |
| { |
| "epoch": 6.19, |
| "learning_rate": 8.567469622681732e-06, |
| "loss": 1.3457, |
| "step": 5888 |
| }, |
| { |
| "epoch": 6.46, |
| "learning_rate": 8.499253890428481e-06, |
| "loss": 1.2753, |
| "step": 6144 |
| }, |
| { |
| "epoch": 6.73, |
| "learning_rate": 8.431038158175229e-06, |
| "loss": 1.2898, |
| "step": 6400 |
| }, |
| { |
| "epoch": 7.0, |
| "learning_rate": 8.362822425921979e-06, |
| "loss": 1.2848, |
| "step": 6656 |
| }, |
| { |
| "epoch": 7.0, |
| "eval_loss": 1.2965139150619507, |
| "eval_runtime": 8.2552, |
| "eval_samples_per_second": 60.568, |
| "eval_steps_per_second": 7.632, |
| "step": 6657 |
| }, |
| { |
| "epoch": 7.27, |
| "learning_rate": 8.294606693668728e-06, |
| "loss": 1.272, |
| "step": 6912 |
| }, |
| { |
| "epoch": 7.54, |
| "learning_rate": 8.226390961415478e-06, |
| "loss": 1.2554, |
| "step": 7168 |
| }, |
| { |
| "epoch": 7.81, |
| "learning_rate": 8.158175229162227e-06, |
| "loss": 1.2676, |
| "step": 7424 |
| }, |
| { |
| "epoch": 8.0, |
| "eval_loss": 1.2928757667541504, |
| "eval_runtime": 8.2765, |
| "eval_samples_per_second": 60.412, |
| "eval_steps_per_second": 7.612, |
| "step": 7608 |
| }, |
| { |
| "epoch": 8.08, |
| "learning_rate": 8.089959496908975e-06, |
| "loss": 1.305, |
| "step": 7680 |
| }, |
| { |
| "epoch": 8.34, |
| "learning_rate": 8.021743764655724e-06, |
| "loss": 1.2323, |
| "step": 7936 |
| }, |
| { |
| "epoch": 8.61, |
| "learning_rate": 7.953528032402474e-06, |
| "loss": 1.2779, |
| "step": 8192 |
| }, |
| { |
| "epoch": 8.88, |
| "learning_rate": 7.885312300149223e-06, |
| "loss": 1.2661, |
| "step": 8448 |
| }, |
| { |
| "epoch": 9.0, |
| "eval_loss": 1.2911019325256348, |
| "eval_runtime": 8.2542, |
| "eval_samples_per_second": 60.575, |
| "eval_steps_per_second": 7.632, |
| "step": 8559 |
| }, |
| { |
| "epoch": 9.15, |
| "learning_rate": 7.817096567895973e-06, |
| "loss": 1.2381, |
| "step": 8704 |
| }, |
| { |
| "epoch": 9.42, |
| "learning_rate": 7.74888083564272e-06, |
| "loss": 1.2363, |
| "step": 8960 |
| }, |
| { |
| "epoch": 9.69, |
| "learning_rate": 7.68066510338947e-06, |
| "loss": 1.2129, |
| "step": 9216 |
| }, |
| { |
| "epoch": 9.96, |
| "learning_rate": 7.612449371136219e-06, |
| "loss": 1.232, |
| "step": 9472 |
| }, |
| { |
| "epoch": 10.0, |
| "eval_loss": 1.288682460784912, |
| "eval_runtime": 8.2411, |
| "eval_samples_per_second": 60.671, |
| "eval_steps_per_second": 7.645, |
| "step": 9510 |
| }, |
| { |
| "epoch": 10.23, |
| "learning_rate": 7.544233638882968e-06, |
| "loss": 1.2191, |
| "step": 9728 |
| }, |
| { |
| "epoch": 10.5, |
| "learning_rate": 7.476017906629717e-06, |
| "loss": 1.2438, |
| "step": 9984 |
| }, |
| { |
| "epoch": 10.77, |
| "learning_rate": 7.4078021743764664e-06, |
| "loss": 1.1995, |
| "step": 10240 |
| }, |
| { |
| "epoch": 11.0, |
| "eval_loss": 1.2871848344802856, |
| "eval_runtime": 8.2533, |
| "eval_samples_per_second": 60.582, |
| "eval_steps_per_second": 7.633, |
| "step": 10461 |
| }, |
| { |
| "epoch": 11.04, |
| "learning_rate": 7.339586442123215e-06, |
| "loss": 1.223, |
| "step": 10496 |
| }, |
| { |
| "epoch": 11.31, |
| "learning_rate": 7.2713707098699646e-06, |
| "loss": 1.1895, |
| "step": 10752 |
| }, |
| { |
| "epoch": 11.58, |
| "learning_rate": 7.203154977616713e-06, |
| "loss": 1.2133, |
| "step": 11008 |
| }, |
| { |
| "epoch": 11.84, |
| "learning_rate": 7.134939245363463e-06, |
| "loss": 1.1951, |
| "step": 11264 |
| }, |
| { |
| "epoch": 12.0, |
| "eval_loss": 1.2869539260864258, |
| "eval_runtime": 8.2328, |
| "eval_samples_per_second": 60.733, |
| "eval_steps_per_second": 7.652, |
| "step": 11412 |
| } |
| ], |
| "max_steps": 38040, |
| "num_train_epochs": 40, |
| "total_flos": 1.3185127263515443e+17, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|