| { | |
| "best_metric": 2.588986396789551, | |
| "best_model_checkpoint": "output/radiohead/checkpoint-240", | |
| "epoch": 8.0, | |
| "global_step": 240, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.17, | |
| "learning_rate": 0.0001280093426996125, | |
| "loss": 3.2073, | |
| "step": 5 | |
| }, | |
| { | |
| "epoch": 0.33, | |
| "learning_rate": 0.00010290000000000001, | |
| "loss": 2.9899, | |
| "step": 10 | |
| }, | |
| { | |
| "epoch": 0.5, | |
| "learning_rate": 6.86e-05, | |
| "loss": 2.8292, | |
| "step": 15 | |
| }, | |
| { | |
| "epoch": 0.67, | |
| "learning_rate": 3.4300000000000014e-05, | |
| "loss": 2.8291, | |
| "step": 20 | |
| }, | |
| { | |
| "epoch": 0.83, | |
| "learning_rate": 9.190657300387505e-06, | |
| "loss": 2.8479, | |
| "step": 25 | |
| }, | |
| { | |
| "epoch": 1.0, | |
| "learning_rate": 0.0, | |
| "loss": 2.9261, | |
| "step": 30 | |
| }, | |
| { | |
| "epoch": 1.0, | |
| "eval_loss": 2.8060529232025146, | |
| "eval_runtime": 2.0848, | |
| "eval_samples_per_second": 22.545, | |
| "eval_steps_per_second": 2.878, | |
| "step": 30 | |
| }, | |
| { | |
| "epoch": 1.17, | |
| "learning_rate": 9.190657300387513e-06, | |
| "loss": 2.6517, | |
| "step": 35 | |
| }, | |
| { | |
| "epoch": 1.33, | |
| "learning_rate": 3.429999999999997e-05, | |
| "loss": 2.7329, | |
| "step": 40 | |
| }, | |
| { | |
| "epoch": 1.5, | |
| "learning_rate": 6.859999999999999e-05, | |
| "loss": 2.7524, | |
| "step": 45 | |
| }, | |
| { | |
| "epoch": 1.67, | |
| "learning_rate": 0.00010290000000000001, | |
| "loss": 2.5591, | |
| "step": 50 | |
| }, | |
| { | |
| "epoch": 1.83, | |
| "learning_rate": 0.00012800934269961248, | |
| "loss": 2.6201, | |
| "step": 55 | |
| }, | |
| { | |
| "epoch": 2.0, | |
| "learning_rate": 0.0001372, | |
| "loss": 2.5207, | |
| "step": 60 | |
| }, | |
| { | |
| "epoch": 2.0, | |
| "eval_loss": 2.7088782787323, | |
| "eval_runtime": 2.1182, | |
| "eval_samples_per_second": 22.189, | |
| "eval_steps_per_second": 2.833, | |
| "step": 60 | |
| }, | |
| { | |
| "epoch": 2.17, | |
| "learning_rate": 0.00012800934269961253, | |
| "loss": 2.3985, | |
| "step": 65 | |
| }, | |
| { | |
| "epoch": 2.33, | |
| "learning_rate": 0.00010289999999999998, | |
| "loss": 2.3027, | |
| "step": 70 | |
| }, | |
| { | |
| "epoch": 2.5, | |
| "learning_rate": 6.860000000000001e-05, | |
| "loss": 2.1903, | |
| "step": 75 | |
| }, | |
| { | |
| "epoch": 2.67, | |
| "learning_rate": 3.4300000000000054e-05, | |
| "loss": 2.2458, | |
| "step": 80 | |
| }, | |
| { | |
| "epoch": 2.83, | |
| "learning_rate": 9.190657300387498e-06, | |
| "loss": 2.2898, | |
| "step": 85 | |
| }, | |
| { | |
| "epoch": 3.0, | |
| "learning_rate": 0.0, | |
| "loss": 2.2945, | |
| "step": 90 | |
| }, | |
| { | |
| "epoch": 3.0, | |
| "eval_loss": 2.6637511253356934, | |
| "eval_runtime": 2.1338, | |
| "eval_samples_per_second": 22.027, | |
| "eval_steps_per_second": 2.812, | |
| "step": 90 | |
| }, | |
| { | |
| "epoch": 3.17, | |
| "learning_rate": 9.190657300387474e-06, | |
| "loss": 2.1611, | |
| "step": 95 | |
| }, | |
| { | |
| "epoch": 3.33, | |
| "learning_rate": 3.4300000000000014e-05, | |
| "loss": 2.0392, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 3.5, | |
| "learning_rate": 6.859999999999997e-05, | |
| "loss": 2.3157, | |
| "step": 105 | |
| }, | |
| { | |
| "epoch": 3.67, | |
| "learning_rate": 0.00010289999999999994, | |
| "loss": 2.1789, | |
| "step": 110 | |
| }, | |
| { | |
| "epoch": 3.83, | |
| "learning_rate": 0.0001280093426996125, | |
| "loss": 2.1359, | |
| "step": 115 | |
| }, | |
| { | |
| "epoch": 4.0, | |
| "learning_rate": 0.0001372, | |
| "loss": 2.0964, | |
| "step": 120 | |
| }, | |
| { | |
| "epoch": 4.0, | |
| "eval_loss": 2.617748737335205, | |
| "eval_runtime": 2.1438, | |
| "eval_samples_per_second": 21.924, | |
| "eval_steps_per_second": 2.799, | |
| "step": 120 | |
| }, | |
| { | |
| "epoch": 4.17, | |
| "learning_rate": 0.00012800934269961248, | |
| "loss": 1.8717, | |
| "step": 125 | |
| }, | |
| { | |
| "epoch": 4.33, | |
| "learning_rate": 0.00010290000000000009, | |
| "loss": 1.9838, | |
| "step": 130 | |
| }, | |
| { | |
| "epoch": 4.5, | |
| "learning_rate": 6.860000000000003e-05, | |
| "loss": 2.0446, | |
| "step": 135 | |
| }, | |
| { | |
| "epoch": 4.67, | |
| "learning_rate": 3.429999999999996e-05, | |
| "loss": 1.8428, | |
| "step": 140 | |
| }, | |
| { | |
| "epoch": 4.83, | |
| "learning_rate": 9.190657300387574e-06, | |
| "loss": 1.9594, | |
| "step": 145 | |
| }, | |
| { | |
| "epoch": 5.0, | |
| "learning_rate": 0.0, | |
| "loss": 2.0192, | |
| "step": 150 | |
| }, | |
| { | |
| "epoch": 5.0, | |
| "eval_loss": 2.6335015296936035, | |
| "eval_runtime": 2.1534, | |
| "eval_samples_per_second": 21.826, | |
| "eval_steps_per_second": 2.786, | |
| "step": 150 | |
| }, | |
| { | |
| "epoch": 5.17, | |
| "learning_rate": 9.190657300387467e-06, | |
| "loss": 1.7401, | |
| "step": 155 | |
| }, | |
| { | |
| "epoch": 5.33, | |
| "learning_rate": 3.429999999999989e-05, | |
| "loss": 1.8589, | |
| "step": 160 | |
| }, | |
| { | |
| "epoch": 5.5, | |
| "learning_rate": 6.859999999999984e-05, | |
| "loss": 1.7454, | |
| "step": 165 | |
| }, | |
| { | |
| "epoch": 5.67, | |
| "learning_rate": 0.00010290000000000003, | |
| "loss": 1.8506, | |
| "step": 170 | |
| }, | |
| { | |
| "epoch": 5.83, | |
| "learning_rate": 0.00012800934269961248, | |
| "loss": 1.7711, | |
| "step": 175 | |
| }, | |
| { | |
| "epoch": 6.0, | |
| "learning_rate": 0.0001372, | |
| "loss": 1.6952, | |
| "step": 180 | |
| }, | |
| { | |
| "epoch": 6.0, | |
| "eval_loss": 2.604886293411255, | |
| "eval_runtime": 2.1277, | |
| "eval_samples_per_second": 22.09, | |
| "eval_steps_per_second": 2.82, | |
| "step": 180 | |
| }, | |
| { | |
| "epoch": 6.17, | |
| "learning_rate": 0.00012800934269961253, | |
| "loss": 1.7717, | |
| "step": 185 | |
| }, | |
| { | |
| "epoch": 6.33, | |
| "learning_rate": 0.00010290000000000012, | |
| "loss": 1.577, | |
| "step": 190 | |
| }, | |
| { | |
| "epoch": 6.5, | |
| "learning_rate": 6.859999999999993e-05, | |
| "loss": 1.5197, | |
| "step": 195 | |
| }, | |
| { | |
| "epoch": 6.67, | |
| "learning_rate": 3.429999999999998e-05, | |
| "loss": 1.6176, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 6.83, | |
| "learning_rate": 9.19065730038752e-06, | |
| "loss": 1.562, | |
| "step": 205 | |
| }, | |
| { | |
| "epoch": 7.0, | |
| "learning_rate": 0.0, | |
| "loss": 1.6157, | |
| "step": 210 | |
| }, | |
| { | |
| "epoch": 7.0, | |
| "eval_loss": 2.6068503856658936, | |
| "eval_runtime": 2.1309, | |
| "eval_samples_per_second": 22.056, | |
| "eval_steps_per_second": 2.816, | |
| "step": 210 | |
| }, | |
| { | |
| "epoch": 7.17, | |
| "learning_rate": 9.190657300387459e-06, | |
| "loss": 1.5445, | |
| "step": 215 | |
| }, | |
| { | |
| "epoch": 7.33, | |
| "learning_rate": 3.429999999999988e-05, | |
| "loss": 1.5187, | |
| "step": 220 | |
| }, | |
| { | |
| "epoch": 7.5, | |
| "learning_rate": 6.859999999999982e-05, | |
| "loss": 1.4775, | |
| "step": 225 | |
| }, | |
| { | |
| "epoch": 7.67, | |
| "learning_rate": 0.00010290000000000002, | |
| "loss": 1.3245, | |
| "step": 230 | |
| }, | |
| { | |
| "epoch": 7.83, | |
| "learning_rate": 0.00012800934269961248, | |
| "loss": 1.6077, | |
| "step": 235 | |
| }, | |
| { | |
| "epoch": 8.0, | |
| "learning_rate": 0.0001372, | |
| "loss": 1.5085, | |
| "step": 240 | |
| }, | |
| { | |
| "epoch": 8.0, | |
| "eval_loss": 2.588986396789551, | |
| "eval_runtime": 2.1323, | |
| "eval_samples_per_second": 22.042, | |
| "eval_steps_per_second": 2.814, | |
| "step": 240 | |
| } | |
| ], | |
| "max_steps": 300, | |
| "num_train_epochs": 10, | |
| "total_flos": 243524173824000.0, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |