| { |
| "best_metric": 0.029607130214571953, |
| "best_model_checkpoint": "model/checkpoint-6500", |
| "epoch": 2.138157894736842, |
| "eval_steps": 500, |
| "global_step": 6500, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 0.16, |
| "learning_rate": 4.7944078947368425e-05, |
| "loss": 0.1494, |
| "step": 500 |
| }, |
| { |
| "epoch": 0.16, |
| "eval_accuracy": 0.9837171052631579, |
| "eval_loss": 0.0790267065167427, |
| "eval_runtime": 113.0978, |
| "eval_samples_per_second": 53.759, |
| "eval_steps_per_second": 6.72, |
| "step": 500 |
| }, |
| { |
| "epoch": 0.33, |
| "learning_rate": 4.588815789473684e-05, |
| "loss": 0.1072, |
| "step": 1000 |
| }, |
| { |
| "epoch": 0.33, |
| "eval_accuracy": 0.9824013157894737, |
| "eval_loss": 0.06455110013484955, |
| "eval_runtime": 113.235, |
| "eval_samples_per_second": 53.694, |
| "eval_steps_per_second": 6.712, |
| "step": 1000 |
| }, |
| { |
| "epoch": 0.49, |
| "learning_rate": 4.383223684210527e-05, |
| "loss": 0.0765, |
| "step": 1500 |
| }, |
| { |
| "epoch": 0.49, |
| "eval_accuracy": 0.9899671052631579, |
| "eval_loss": 0.04877911135554314, |
| "eval_runtime": 113.1524, |
| "eval_samples_per_second": 53.733, |
| "eval_steps_per_second": 6.717, |
| "step": 1500 |
| }, |
| { |
| "epoch": 0.66, |
| "learning_rate": 4.177631578947369e-05, |
| "loss": 0.0742, |
| "step": 2000 |
| }, |
| { |
| "epoch": 0.66, |
| "eval_accuracy": 0.9886513157894737, |
| "eval_loss": 0.054583221673965454, |
| "eval_runtime": 113.2769, |
| "eval_samples_per_second": 53.674, |
| "eval_steps_per_second": 6.709, |
| "step": 2000 |
| }, |
| { |
| "epoch": 0.82, |
| "learning_rate": 3.972039473684211e-05, |
| "loss": 0.0748, |
| "step": 2500 |
| }, |
| { |
| "epoch": 0.82, |
| "eval_accuracy": 0.990625, |
| "eval_loss": 0.04383059963583946, |
| "eval_runtime": 113.3147, |
| "eval_samples_per_second": 53.656, |
| "eval_steps_per_second": 6.707, |
| "step": 2500 |
| }, |
| { |
| "epoch": 0.99, |
| "learning_rate": 3.7664473684210526e-05, |
| "loss": 0.0437, |
| "step": 3000 |
| }, |
| { |
| "epoch": 0.99, |
| "eval_accuracy": 0.9901315789473685, |
| "eval_loss": 0.05416030064225197, |
| "eval_runtime": 113.1123, |
| "eval_samples_per_second": 53.752, |
| "eval_steps_per_second": 6.719, |
| "step": 3000 |
| }, |
| { |
| "epoch": 1.15, |
| "learning_rate": 3.560855263157895e-05, |
| "loss": 0.0134, |
| "step": 3500 |
| }, |
| { |
| "epoch": 1.15, |
| "eval_accuracy": 0.990953947368421, |
| "eval_loss": 0.05749928951263428, |
| "eval_runtime": 113.4787, |
| "eval_samples_per_second": 53.578, |
| "eval_steps_per_second": 6.697, |
| "step": 3500 |
| }, |
| { |
| "epoch": 1.32, |
| "learning_rate": 3.355263157894737e-05, |
| "loss": 0.0277, |
| "step": 4000 |
| }, |
| { |
| "epoch": 1.32, |
| "eval_accuracy": 0.9904605263157895, |
| "eval_loss": 0.06314379721879959, |
| "eval_runtime": 113.4629, |
| "eval_samples_per_second": 53.586, |
| "eval_steps_per_second": 6.698, |
| "step": 4000 |
| }, |
| { |
| "epoch": 1.48, |
| "learning_rate": 3.1496710526315794e-05, |
| "loss": 0.0231, |
| "step": 4500 |
| }, |
| { |
| "epoch": 1.48, |
| "eval_accuracy": 0.9912828947368421, |
| "eval_loss": 0.04834901914000511, |
| "eval_runtime": 113.5144, |
| "eval_samples_per_second": 53.561, |
| "eval_steps_per_second": 6.695, |
| "step": 4500 |
| }, |
| { |
| "epoch": 1.64, |
| "learning_rate": 2.944078947368421e-05, |
| "loss": 0.0243, |
| "step": 5000 |
| }, |
| { |
| "epoch": 1.64, |
| "eval_accuracy": 0.9901315789473685, |
| "eval_loss": 0.06605446338653564, |
| "eval_runtime": 113.6185, |
| "eval_samples_per_second": 53.512, |
| "eval_steps_per_second": 6.689, |
| "step": 5000 |
| }, |
| { |
| "epoch": 1.81, |
| "learning_rate": 2.7384868421052633e-05, |
| "loss": 0.0232, |
| "step": 5500 |
| }, |
| { |
| "epoch": 1.81, |
| "eval_accuracy": 0.9932565789473684, |
| "eval_loss": 0.037462268024683, |
| "eval_runtime": 112.9475, |
| "eval_samples_per_second": 53.83, |
| "eval_steps_per_second": 6.729, |
| "step": 5500 |
| }, |
| { |
| "epoch": 1.97, |
| "learning_rate": 2.5328947368421052e-05, |
| "loss": 0.0198, |
| "step": 6000 |
| }, |
| { |
| "epoch": 1.97, |
| "eval_accuracy": 0.9945723684210527, |
| "eval_loss": 0.034890029579401016, |
| "eval_runtime": 112.9353, |
| "eval_samples_per_second": 53.836, |
| "eval_steps_per_second": 6.73, |
| "step": 6000 |
| }, |
| { |
| "epoch": 2.14, |
| "learning_rate": 2.3273026315789475e-05, |
| "loss": 0.0122, |
| "step": 6500 |
| }, |
| { |
| "epoch": 2.14, |
| "eval_accuracy": 0.9962171052631579, |
| "eval_loss": 0.029607130214571953, |
| "eval_runtime": 112.9987, |
| "eval_samples_per_second": 53.806, |
| "eval_steps_per_second": 6.726, |
| "step": 6500 |
| } |
| ], |
| "logging_steps": 500, |
| "max_steps": 12160, |
| "num_train_epochs": 4, |
| "save_steps": 500, |
| "total_flos": 1.368177487872e+16, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|