| { | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 19.38888888888889, | |
| "eval_steps": 500, | |
| "global_step": 140, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.17, | |
| "learning_rate": 2.4999999999999998e-05, | |
| "loss": 1.7264, | |
| "step": 3 | |
| }, | |
| { | |
| "epoch": 0.33, | |
| "learning_rate": 4.9999999999999996e-05, | |
| "loss": 1.9502, | |
| "step": 6 | |
| }, | |
| { | |
| "epoch": 1.11, | |
| "learning_rate": 7.5e-05, | |
| "loss": 1.8229, | |
| "step": 9 | |
| }, | |
| { | |
| "epoch": 1.28, | |
| "learning_rate": 9.999999999999999e-05, | |
| "loss": 1.4868, | |
| "step": 12 | |
| }, | |
| { | |
| "epoch": 2.06, | |
| "learning_rate": 0.000125, | |
| "loss": 1.6675, | |
| "step": 15 | |
| }, | |
| { | |
| "epoch": 2.22, | |
| "learning_rate": 0.00015, | |
| "loss": 1.2277, | |
| "step": 18 | |
| }, | |
| { | |
| "epoch": 2.39, | |
| "learning_rate": 0.000175, | |
| "loss": 1.4422, | |
| "step": 21 | |
| }, | |
| { | |
| "epoch": 3.17, | |
| "learning_rate": 0.00019999999999999998, | |
| "loss": 1.3051, | |
| "step": 24 | |
| }, | |
| { | |
| "epoch": 3.33, | |
| "learning_rate": 0.000225, | |
| "loss": 1.0676, | |
| "step": 27 | |
| }, | |
| { | |
| "epoch": 4.11, | |
| "learning_rate": 0.00025, | |
| "loss": 1.1766, | |
| "step": 30 | |
| }, | |
| { | |
| "epoch": 4.28, | |
| "learning_rate": 0.00027499999999999996, | |
| "loss": 0.9316, | |
| "step": 33 | |
| }, | |
| { | |
| "epoch": 5.06, | |
| "learning_rate": 0.0003, | |
| "loss": 0.6064, | |
| "step": 36 | |
| }, | |
| { | |
| "epoch": 5.22, | |
| "learning_rate": 0.00029722222222222216, | |
| "loss": 0.5883, | |
| "step": 39 | |
| }, | |
| { | |
| "epoch": 5.39, | |
| "learning_rate": 0.00029444444444444445, | |
| "loss": 0.6243, | |
| "step": 42 | |
| }, | |
| { | |
| "epoch": 6.17, | |
| "learning_rate": 0.00029166666666666664, | |
| "loss": 0.5, | |
| "step": 45 | |
| }, | |
| { | |
| "epoch": 6.33, | |
| "learning_rate": 0.0002888888888888888, | |
| "loss": 0.4061, | |
| "step": 48 | |
| }, | |
| { | |
| "epoch": 7.11, | |
| "learning_rate": 0.0002861111111111111, | |
| "loss": 0.3036, | |
| "step": 51 | |
| }, | |
| { | |
| "epoch": 7.28, | |
| "learning_rate": 0.0002833333333333333, | |
| "loss": 0.3437, | |
| "step": 54 | |
| }, | |
| { | |
| "epoch": 8.06, | |
| "learning_rate": 0.00028055555555555554, | |
| "loss": 0.2798, | |
| "step": 57 | |
| }, | |
| { | |
| "epoch": 8.22, | |
| "learning_rate": 0.0002777777777777778, | |
| "loss": 0.1906, | |
| "step": 60 | |
| }, | |
| { | |
| "epoch": 8.39, | |
| "learning_rate": 0.00027499999999999996, | |
| "loss": 0.2051, | |
| "step": 63 | |
| }, | |
| { | |
| "epoch": 9.17, | |
| "learning_rate": 0.0002722222222222222, | |
| "loss": 0.1486, | |
| "step": 66 | |
| }, | |
| { | |
| "epoch": 9.33, | |
| "learning_rate": 0.00026944444444444444, | |
| "loss": 0.1745, | |
| "step": 69 | |
| }, | |
| { | |
| "epoch": 10.11, | |
| "learning_rate": 0.0002666666666666666, | |
| "loss": 0.1235, | |
| "step": 72 | |
| }, | |
| { | |
| "epoch": 10.28, | |
| "learning_rate": 0.00026388888888888886, | |
| "loss": 0.1066, | |
| "step": 75 | |
| }, | |
| { | |
| "epoch": 11.06, | |
| "learning_rate": 0.0002611111111111111, | |
| "loss": 0.0659, | |
| "step": 78 | |
| }, | |
| { | |
| "epoch": 11.22, | |
| "learning_rate": 0.00025833333333333334, | |
| "loss": 0.0865, | |
| "step": 81 | |
| }, | |
| { | |
| "epoch": 11.39, | |
| "learning_rate": 0.00025555555555555553, | |
| "loss": 0.0582, | |
| "step": 84 | |
| }, | |
| { | |
| "epoch": 12.17, | |
| "learning_rate": 0.00025277777777777777, | |
| "loss": 0.0557, | |
| "step": 87 | |
| }, | |
| { | |
| "epoch": 12.33, | |
| "learning_rate": 0.00025, | |
| "loss": 0.0528, | |
| "step": 90 | |
| }, | |
| { | |
| "epoch": 13.11, | |
| "learning_rate": 0.0002472222222222222, | |
| "loss": 0.0384, | |
| "step": 93 | |
| }, | |
| { | |
| "epoch": 13.28, | |
| "learning_rate": 0.00024444444444444443, | |
| "loss": 0.0341, | |
| "step": 96 | |
| }, | |
| { | |
| "epoch": 14.06, | |
| "learning_rate": 0.00024166666666666664, | |
| "loss": 0.0294, | |
| "step": 99 | |
| }, | |
| { | |
| "epoch": 14.22, | |
| "learning_rate": 0.00023888888888888885, | |
| "loss": 0.0193, | |
| "step": 102 | |
| }, | |
| { | |
| "epoch": 14.39, | |
| "learning_rate": 0.00023611111111111112, | |
| "loss": 0.0254, | |
| "step": 105 | |
| }, | |
| { | |
| "epoch": 15.17, | |
| "learning_rate": 0.0002333333333333333, | |
| "loss": 0.0192, | |
| "step": 108 | |
| }, | |
| { | |
| "epoch": 15.33, | |
| "learning_rate": 0.00023055555555555552, | |
| "loss": 0.0149, | |
| "step": 111 | |
| }, | |
| { | |
| "epoch": 16.11, | |
| "learning_rate": 0.00022777777777777778, | |
| "loss": 0.0194, | |
| "step": 114 | |
| }, | |
| { | |
| "epoch": 16.28, | |
| "learning_rate": 0.000225, | |
| "loss": 0.0177, | |
| "step": 117 | |
| }, | |
| { | |
| "epoch": 17.06, | |
| "learning_rate": 0.00022222222222222218, | |
| "loss": 0.0144, | |
| "step": 120 | |
| }, | |
| { | |
| "epoch": 17.22, | |
| "learning_rate": 0.00021944444444444444, | |
| "loss": 0.013, | |
| "step": 123 | |
| }, | |
| { | |
| "epoch": 17.39, | |
| "learning_rate": 0.00021666666666666666, | |
| "loss": 0.0165, | |
| "step": 126 | |
| }, | |
| { | |
| "epoch": 18.17, | |
| "learning_rate": 0.00021388888888888884, | |
| "loss": 0.012, | |
| "step": 129 | |
| }, | |
| { | |
| "epoch": 18.33, | |
| "learning_rate": 0.0002111111111111111, | |
| "loss": 0.0137, | |
| "step": 132 | |
| }, | |
| { | |
| "epoch": 19.11, | |
| "learning_rate": 0.00020833333333333332, | |
| "loss": 0.0083, | |
| "step": 135 | |
| }, | |
| { | |
| "epoch": 19.28, | |
| "learning_rate": 0.00020555555555555556, | |
| "loss": 0.0144, | |
| "step": 138 | |
| } | |
| ], | |
| "logging_steps": 3, | |
| "max_steps": 360, | |
| "num_train_epochs": 20, | |
| "save_steps": 500, | |
| "total_flos": 1.13811835060224e+16, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |