| { | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 9.962406015037594, | |
| "global_step": 160, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.3, | |
| "learning_rate": 6.25e-05, | |
| "loss": 10.8217, | |
| "step": 5 | |
| }, | |
| { | |
| "epoch": 0.6, | |
| "learning_rate": 9.995728791936504e-05, | |
| "loss": 10.8213, | |
| "step": 10 | |
| }, | |
| { | |
| "epoch": 0.9, | |
| "learning_rate": 9.947761466636014e-05, | |
| "loss": 10.8201, | |
| "step": 15 | |
| }, | |
| { | |
| "epoch": 0.96, | |
| "eval_loss": 10.819129943847656, | |
| "eval_runtime": 6.4186, | |
| "eval_samples_per_second": 69.174, | |
| "eval_steps_per_second": 17.294, | |
| "step": 16 | |
| }, | |
| { | |
| "epoch": 1.24, | |
| "learning_rate": 9.847001329696653e-05, | |
| "loss": 12.1716, | |
| "step": 20 | |
| }, | |
| { | |
| "epoch": 1.54, | |
| "learning_rate": 9.694523495787149e-05, | |
| "loss": 10.8181, | |
| "step": 25 | |
| }, | |
| { | |
| "epoch": 1.84, | |
| "learning_rate": 9.491954909459895e-05, | |
| "loss": 10.8167, | |
| "step": 30 | |
| }, | |
| { | |
| "epoch": 1.96, | |
| "eval_loss": 10.81454086303711, | |
| "eval_runtime": 6.3633, | |
| "eval_samples_per_second": 69.775, | |
| "eval_steps_per_second": 17.444, | |
| "step": 32 | |
| }, | |
| { | |
| "epoch": 2.18, | |
| "learning_rate": 9.241456985587868e-05, | |
| "loss": 12.1668, | |
| "step": 35 | |
| }, | |
| { | |
| "epoch": 2.48, | |
| "learning_rate": 8.945702546981969e-05, | |
| "loss": 10.8133, | |
| "step": 40 | |
| }, | |
| { | |
| "epoch": 2.78, | |
| "learning_rate": 8.60784730526531e-05, | |
| "loss": 10.8117, | |
| "step": 45 | |
| }, | |
| { | |
| "epoch": 2.96, | |
| "eval_loss": 10.809527397155762, | |
| "eval_runtime": 6.247, | |
| "eval_samples_per_second": 71.074, | |
| "eval_steps_per_second": 17.769, | |
| "step": 48 | |
| }, | |
| { | |
| "epoch": 3.12, | |
| "learning_rate": 8.231496189304704e-05, | |
| "loss": 12.1615, | |
| "step": 50 | |
| }, | |
| { | |
| "epoch": 3.42, | |
| "learning_rate": 7.820664880476256e-05, | |
| "loss": 10.8084, | |
| "step": 55 | |
| }, | |
| { | |
| "epoch": 3.72, | |
| "learning_rate": 7.379736965185368e-05, | |
| "loss": 10.8058, | |
| "step": 60 | |
| }, | |
| { | |
| "epoch": 3.96, | |
| "eval_loss": 10.802533149719238, | |
| "eval_runtime": 5.8936, | |
| "eval_samples_per_second": 75.335, | |
| "eval_steps_per_second": 18.834, | |
| "step": 64 | |
| }, | |
| { | |
| "epoch": 4.06, | |
| "learning_rate": 6.91341716182545e-05, | |
| "loss": 12.1538, | |
| "step": 65 | |
| }, | |
| { | |
| "epoch": 4.36, | |
| "learning_rate": 6.426681121245527e-05, | |
| "loss": 10.802, | |
| "step": 70 | |
| }, | |
| { | |
| "epoch": 4.66, | |
| "learning_rate": 5.924722336357793e-05, | |
| "loss": 10.8007, | |
| "step": 75 | |
| }, | |
| { | |
| "epoch": 4.96, | |
| "learning_rate": 5.4128967273616625e-05, | |
| "loss": 10.7997, | |
| "step": 80 | |
| }, | |
| { | |
| "epoch": 4.96, | |
| "eval_loss": 10.798870086669922, | |
| "eval_runtime": 6.2383, | |
| "eval_samples_per_second": 71.174, | |
| "eval_steps_per_second": 17.793, | |
| "step": 80 | |
| }, | |
| { | |
| "epoch": 5.3, | |
| "learning_rate": 4.8966654938622295e-05, | |
| "loss": 12.148, | |
| "step": 85 | |
| }, | |
| { | |
| "epoch": 5.6, | |
| "learning_rate": 4.381536843653262e-05, | |
| "loss": 10.7968, | |
| "step": 90 | |
| }, | |
| { | |
| "epoch": 5.9, | |
| "learning_rate": 3.87300721992097e-05, | |
| "loss": 10.7959, | |
| "step": 95 | |
| }, | |
| { | |
| "epoch": 5.96, | |
| "eval_loss": 10.794721603393555, | |
| "eval_runtime": 6.3176, | |
| "eval_samples_per_second": 70.28, | |
| "eval_steps_per_second": 17.57, | |
| "step": 96 | |
| }, | |
| { | |
| "epoch": 6.24, | |
| "learning_rate": 3.3765026539765834e-05, | |
| "loss": 12.1442, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 6.54, | |
| "learning_rate": 2.8973208692864624e-05, | |
| "loss": 10.7943, | |
| "step": 105 | |
| }, | |
| { | |
| "epoch": 6.84, | |
| "learning_rate": 2.4405747545519963e-05, | |
| "loss": 10.7934, | |
| "step": 110 | |
| }, | |
| { | |
| "epoch": 6.96, | |
| "eval_loss": 10.792530059814453, | |
| "eval_runtime": 6.3054, | |
| "eval_samples_per_second": 70.416, | |
| "eval_steps_per_second": 17.604, | |
| "step": 112 | |
| }, | |
| { | |
| "epoch": 7.18, | |
| "learning_rate": 2.0111378089837956e-05, | |
| "loss": 12.1419, | |
| "step": 115 | |
| }, | |
| { | |
| "epoch": 7.48, | |
| "learning_rate": 1.6135921418712956e-05, | |
| "loss": 10.7926, | |
| "step": 120 | |
| }, | |
| { | |
| "epoch": 7.78, | |
| "learning_rate": 1.2521795812943704e-05, | |
| "loss": 10.7924, | |
| "step": 125 | |
| }, | |
| { | |
| "epoch": 7.96, | |
| "eval_loss": 10.791926383972168, | |
| "eval_runtime": 5.785, | |
| "eval_samples_per_second": 76.75, | |
| "eval_steps_per_second": 19.187, | |
| "step": 128 | |
| }, | |
| { | |
| "epoch": 8.12, | |
| "learning_rate": 9.307564136490254e-06, | |
| "loss": 12.1411, | |
| "step": 130 | |
| }, | |
| { | |
| "epoch": 8.42, | |
| "learning_rate": 6.527522369181655e-06, | |
| "loss": 10.7921, | |
| "step": 135 | |
| }, | |
| { | |
| "epoch": 8.72, | |
| "learning_rate": 4.2113336672471245e-06, | |
| "loss": 10.7921, | |
| "step": 140 | |
| }, | |
| { | |
| "epoch": 8.96, | |
| "eval_loss": 10.79179859161377, | |
| "eval_runtime": 6.2936, | |
| "eval_samples_per_second": 70.548, | |
| "eval_steps_per_second": 17.637, | |
| "step": 144 | |
| }, | |
| { | |
| "epoch": 9.06, | |
| "learning_rate": 2.3837118562592797e-06, | |
| "loss": 12.1411, | |
| "step": 145 | |
| }, | |
| { | |
| "epoch": 9.36, | |
| "learning_rate": 1.064157733632276e-06, | |
| "loss": 10.792, | |
| "step": 150 | |
| }, | |
| { | |
| "epoch": 9.66, | |
| "learning_rate": 2.667509943378721e-07, | |
| "loss": 10.792, | |
| "step": 155 | |
| }, | |
| { | |
| "epoch": 9.96, | |
| "learning_rate": 0.0, | |
| "loss": 10.792, | |
| "step": 160 | |
| }, | |
| { | |
| "epoch": 9.96, | |
| "eval_loss": 10.791767120361328, | |
| "eval_runtime": 5.8131, | |
| "eval_samples_per_second": 76.379, | |
| "eval_steps_per_second": 19.095, | |
| "step": 160 | |
| }, | |
| { | |
| "epoch": 9.96, | |
| "step": 160, | |
| "total_flos": 39596851200.0, | |
| "train_loss": 11.18234748840332, | |
| "train_runtime": 256.0921, | |
| "train_samples_per_second": 332.537, | |
| "train_steps_per_second": 0.625 | |
| } | |
| ], | |
| "max_steps": 160, | |
| "num_train_epochs": 10, | |
| "total_flos": 39596851200.0, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |