| { |
| "best_metric": null, |
| "best_model_checkpoint": null, |
| "epoch": 3.0, |
| "global_step": 24450, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 0.06, |
| "learning_rate": 4.897750511247444e-05, |
| "loss": 1.7982, |
| "step": 500 |
| }, |
| { |
| "epoch": 0.12, |
| "learning_rate": 4.7955010224948876e-05, |
| "loss": 1.6538, |
| "step": 1000 |
| }, |
| { |
| "epoch": 0.18, |
| "learning_rate": 4.693251533742332e-05, |
| "loss": 1.5864, |
| "step": 1500 |
| }, |
| { |
| "epoch": 0.25, |
| "learning_rate": 4.591002044989775e-05, |
| "loss": 1.5518, |
| "step": 2000 |
| }, |
| { |
| "epoch": 0.31, |
| "learning_rate": 4.488752556237219e-05, |
| "loss": 1.5189, |
| "step": 2500 |
| }, |
| { |
| "epoch": 0.37, |
| "learning_rate": 4.386503067484663e-05, |
| "loss": 1.5078, |
| "step": 3000 |
| }, |
| { |
| "epoch": 0.43, |
| "learning_rate": 4.2842535787321066e-05, |
| "loss": 1.4769, |
| "step": 3500 |
| }, |
| { |
| "epoch": 0.49, |
| "learning_rate": 4.18200408997955e-05, |
| "loss": 1.4499, |
| "step": 4000 |
| }, |
| { |
| "epoch": 0.55, |
| "learning_rate": 4.079754601226994e-05, |
| "loss": 1.4412, |
| "step": 4500 |
| }, |
| { |
| "epoch": 0.61, |
| "learning_rate": 3.9775051124744376e-05, |
| "loss": 1.4154, |
| "step": 5000 |
| }, |
| { |
| "epoch": 0.67, |
| "learning_rate": 3.875255623721882e-05, |
| "loss": 1.4142, |
| "step": 5500 |
| }, |
| { |
| "epoch": 0.74, |
| "learning_rate": 3.773006134969325e-05, |
| "loss": 1.4084, |
| "step": 6000 |
| }, |
| { |
| "epoch": 0.8, |
| "learning_rate": 3.670756646216769e-05, |
| "loss": 1.3804, |
| "step": 6500 |
| }, |
| { |
| "epoch": 0.86, |
| "learning_rate": 3.568507157464213e-05, |
| "loss": 1.3813, |
| "step": 7000 |
| }, |
| { |
| "epoch": 0.92, |
| "learning_rate": 3.4662576687116566e-05, |
| "loss": 1.3566, |
| "step": 7500 |
| }, |
| { |
| "epoch": 0.98, |
| "learning_rate": 3.3640081799591e-05, |
| "loss": 1.3624, |
| "step": 8000 |
| }, |
| { |
| "epoch": 1.0, |
| "eval_accuracy": 0.7132048832822165, |
| "eval_loss": 1.3430438041687012, |
| "eval_runtime": 167.0718, |
| "eval_samples_per_second": 91.2, |
| "eval_steps_per_second": 11.402, |
| "step": 8150 |
| }, |
| { |
| "epoch": 1.04, |
| "learning_rate": 3.261758691206544e-05, |
| "loss": 1.3519, |
| "step": 8500 |
| }, |
| { |
| "epoch": 1.1, |
| "learning_rate": 3.159509202453988e-05, |
| "loss": 1.3203, |
| "step": 9000 |
| }, |
| { |
| "epoch": 1.17, |
| "learning_rate": 3.057259713701431e-05, |
| "loss": 1.3242, |
| "step": 9500 |
| }, |
| { |
| "epoch": 1.23, |
| "learning_rate": 2.9550102249488753e-05, |
| "loss": 1.3069, |
| "step": 10000 |
| }, |
| { |
| "epoch": 1.29, |
| "learning_rate": 2.8527607361963193e-05, |
| "loss": 1.2915, |
| "step": 10500 |
| }, |
| { |
| "epoch": 1.35, |
| "learning_rate": 2.7505112474437626e-05, |
| "loss": 1.3011, |
| "step": 11000 |
| }, |
| { |
| "epoch": 1.41, |
| "learning_rate": 2.6482617586912066e-05, |
| "loss": 1.299, |
| "step": 11500 |
| }, |
| { |
| "epoch": 1.47, |
| "learning_rate": 2.5460122699386503e-05, |
| "loss": 1.2907, |
| "step": 12000 |
| }, |
| { |
| "epoch": 1.53, |
| "learning_rate": 2.4437627811860943e-05, |
| "loss": 1.2647, |
| "step": 12500 |
| }, |
| { |
| "epoch": 1.6, |
| "learning_rate": 2.341513292433538e-05, |
| "loss": 1.2744, |
| "step": 13000 |
| }, |
| { |
| "epoch": 1.66, |
| "learning_rate": 2.239263803680982e-05, |
| "loss": 1.2614, |
| "step": 13500 |
| }, |
| { |
| "epoch": 1.72, |
| "learning_rate": 2.1370143149284256e-05, |
| "loss": 1.2614, |
| "step": 14000 |
| }, |
| { |
| "epoch": 1.78, |
| "learning_rate": 2.0347648261758693e-05, |
| "loss": 1.2551, |
| "step": 14500 |
| }, |
| { |
| "epoch": 1.84, |
| "learning_rate": 1.932515337423313e-05, |
| "loss": 1.2401, |
| "step": 15000 |
| }, |
| { |
| "epoch": 1.9, |
| "learning_rate": 1.8302658486707566e-05, |
| "loss": 1.2415, |
| "step": 15500 |
| }, |
| { |
| "epoch": 1.96, |
| "learning_rate": 1.7280163599182006e-05, |
| "loss": 1.2328, |
| "step": 16000 |
| }, |
| { |
| "epoch": 2.0, |
| "eval_accuracy": 0.7264976637836763, |
| "eval_loss": 1.2673941850662231, |
| "eval_runtime": 167.9128, |
| "eval_samples_per_second": 90.744, |
| "eval_steps_per_second": 11.345, |
| "step": 16300 |
| }, |
| { |
| "epoch": 2.02, |
| "learning_rate": 1.6257668711656443e-05, |
| "loss": 1.2239, |
| "step": 16500 |
| }, |
| { |
| "epoch": 2.09, |
| "learning_rate": 1.523517382413088e-05, |
| "loss": 1.2146, |
| "step": 17000 |
| }, |
| { |
| "epoch": 2.15, |
| "learning_rate": 1.4212678936605318e-05, |
| "loss": 1.2142, |
| "step": 17500 |
| }, |
| { |
| "epoch": 2.21, |
| "learning_rate": 1.3190184049079754e-05, |
| "loss": 1.2113, |
| "step": 18000 |
| }, |
| { |
| "epoch": 2.27, |
| "learning_rate": 1.2167689161554193e-05, |
| "loss": 1.2011, |
| "step": 18500 |
| }, |
| { |
| "epoch": 2.33, |
| "learning_rate": 1.1145194274028631e-05, |
| "loss": 1.2008, |
| "step": 19000 |
| }, |
| { |
| "epoch": 2.39, |
| "learning_rate": 1.0122699386503068e-05, |
| "loss": 1.1976, |
| "step": 19500 |
| }, |
| { |
| "epoch": 2.45, |
| "learning_rate": 9.100204498977506e-06, |
| "loss": 1.1867, |
| "step": 20000 |
| }, |
| { |
| "epoch": 2.52, |
| "learning_rate": 8.077709611451943e-06, |
| "loss": 1.1797, |
| "step": 20500 |
| }, |
| { |
| "epoch": 2.58, |
| "learning_rate": 7.05521472392638e-06, |
| "loss": 1.1877, |
| "step": 21000 |
| }, |
| { |
| "epoch": 2.64, |
| "learning_rate": 6.032719836400819e-06, |
| "loss": 1.1828, |
| "step": 21500 |
| }, |
| { |
| "epoch": 2.7, |
| "learning_rate": 5.0102249488752554e-06, |
| "loss": 1.175, |
| "step": 22000 |
| }, |
| { |
| "epoch": 2.76, |
| "learning_rate": 3.987730061349693e-06, |
| "loss": 1.1786, |
| "step": 22500 |
| }, |
| { |
| "epoch": 2.82, |
| "learning_rate": 2.965235173824131e-06, |
| "loss": 1.1747, |
| "step": 23000 |
| }, |
| { |
| "epoch": 2.88, |
| "learning_rate": 1.942740286298569e-06, |
| "loss": 1.1736, |
| "step": 23500 |
| }, |
| { |
| "epoch": 2.94, |
| "learning_rate": 9.202453987730062e-07, |
| "loss": 1.1753, |
| "step": 24000 |
| }, |
| { |
| "epoch": 3.0, |
| "eval_accuracy": 0.7363739433117852, |
| "eval_loss": 1.2128783464431763, |
| "eval_runtime": 167.207, |
| "eval_samples_per_second": 91.127, |
| "eval_steps_per_second": 11.393, |
| "step": 24450 |
| }, |
| { |
| "epoch": 3.0, |
| "step": 24450, |
| "total_flos": 1.0143417323453616e+17, |
| "train_loss": 1.316010577430023, |
| "train_runtime": 32228.2163, |
| "train_samples_per_second": 24.276, |
| "train_steps_per_second": 0.759 |
| } |
| ], |
| "max_steps": 24450, |
| "num_train_epochs": 3, |
| "total_flos": 1.0143417323453616e+17, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|