| { | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 4.901960784313726, | |
| "global_step": 3000, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.33, | |
| "learning_rate": 4.673202614379085e-05, | |
| "loss": 0.0803, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 0.33, | |
| "eval_accuracy": 0.9874985395490128, | |
| "eval_loss": 0.03710582107305527, | |
| "eval_runtime": 80.6999, | |
| "eval_samples_per_second": 106.06, | |
| "eval_steps_per_second": 2.528, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 0.65, | |
| "learning_rate": 4.3464052287581704e-05, | |
| "loss": 0.0393, | |
| "step": 400 | |
| }, | |
| { | |
| "epoch": 0.65, | |
| "eval_accuracy": 0.9884332281808622, | |
| "eval_loss": 0.03642109036445618, | |
| "eval_runtime": 80.6008, | |
| "eval_samples_per_second": 106.19, | |
| "eval_steps_per_second": 2.531, | |
| "step": 400 | |
| }, | |
| { | |
| "epoch": 0.98, | |
| "learning_rate": 4.0196078431372555e-05, | |
| "loss": 0.0372, | |
| "step": 600 | |
| }, | |
| { | |
| "epoch": 0.98, | |
| "eval_accuracy": 0.986447014838182, | |
| "eval_loss": 0.04179869592189789, | |
| "eval_runtime": 80.6457, | |
| "eval_samples_per_second": 106.131, | |
| "eval_steps_per_second": 2.53, | |
| "step": 600 | |
| }, | |
| { | |
| "epoch": 1.31, | |
| "learning_rate": 3.6928104575163405e-05, | |
| "loss": 0.0114, | |
| "step": 800 | |
| }, | |
| { | |
| "epoch": 1.31, | |
| "eval_accuracy": 0.9896015889706742, | |
| "eval_loss": 0.0394107885658741, | |
| "eval_runtime": 80.6065, | |
| "eval_samples_per_second": 106.182, | |
| "eval_steps_per_second": 2.531, | |
| "step": 800 | |
| }, | |
| { | |
| "epoch": 1.63, | |
| "learning_rate": 3.366013071895425e-05, | |
| "loss": 0.0138, | |
| "step": 1000 | |
| }, | |
| { | |
| "epoch": 1.63, | |
| "eval_accuracy": 0.9897184250496553, | |
| "eval_loss": 0.0397811159491539, | |
| "eval_runtime": 80.5677, | |
| "eval_samples_per_second": 106.234, | |
| "eval_steps_per_second": 2.532, | |
| "step": 1000 | |
| }, | |
| { | |
| "epoch": 1.96, | |
| "learning_rate": 3.0392156862745097e-05, | |
| "loss": 0.0127, | |
| "step": 1200 | |
| }, | |
| { | |
| "epoch": 1.96, | |
| "eval_accuracy": 0.9908867858394672, | |
| "eval_loss": 0.034337081015110016, | |
| "eval_runtime": 80.5302, | |
| "eval_samples_per_second": 106.283, | |
| "eval_steps_per_second": 2.533, | |
| "step": 1200 | |
| }, | |
| { | |
| "epoch": 2.29, | |
| "learning_rate": 2.7124183006535947e-05, | |
| "loss": 0.0052, | |
| "step": 1400 | |
| }, | |
| { | |
| "epoch": 2.29, | |
| "eval_accuracy": 0.9911204579974296, | |
| "eval_loss": 0.037403274327516556, | |
| "eval_runtime": 80.6258, | |
| "eval_samples_per_second": 106.157, | |
| "eval_steps_per_second": 2.53, | |
| "step": 1400 | |
| }, | |
| { | |
| "epoch": 2.61, | |
| "learning_rate": 2.38562091503268e-05, | |
| "loss": 0.0018, | |
| "step": 1600 | |
| }, | |
| { | |
| "epoch": 2.61, | |
| "eval_accuracy": 0.9906531136815049, | |
| "eval_loss": 0.04766124114394188, | |
| "eval_runtime": 80.6271, | |
| "eval_samples_per_second": 106.155, | |
| "eval_steps_per_second": 2.53, | |
| "step": 1600 | |
| }, | |
| { | |
| "epoch": 2.94, | |
| "learning_rate": 2.058823529411765e-05, | |
| "loss": 0.0009, | |
| "step": 1800 | |
| }, | |
| { | |
| "epoch": 2.94, | |
| "eval_accuracy": 0.9912372940764108, | |
| "eval_loss": 0.0513538159430027, | |
| "eval_runtime": 80.6405, | |
| "eval_samples_per_second": 106.138, | |
| "eval_steps_per_second": 2.53, | |
| "step": 1800 | |
| }, | |
| { | |
| "epoch": 3.27, | |
| "learning_rate": 1.7320261437908496e-05, | |
| "loss": 0.001, | |
| "step": 2000 | |
| }, | |
| { | |
| "epoch": 3.27, | |
| "eval_accuracy": 0.9904194415235424, | |
| "eval_loss": 0.046223659068346024, | |
| "eval_runtime": 80.5163, | |
| "eval_samples_per_second": 106.301, | |
| "eval_steps_per_second": 2.534, | |
| "step": 2000 | |
| }, | |
| { | |
| "epoch": 3.59, | |
| "learning_rate": 1.4052287581699347e-05, | |
| "loss": 0.0002, | |
| "step": 2200 | |
| }, | |
| { | |
| "epoch": 3.59, | |
| "eval_accuracy": 0.9911204579974296, | |
| "eval_loss": 0.04658184573054314, | |
| "eval_runtime": 80.6281, | |
| "eval_samples_per_second": 106.154, | |
| "eval_steps_per_second": 2.53, | |
| "step": 2200 | |
| }, | |
| { | |
| "epoch": 3.92, | |
| "learning_rate": 1.0784313725490197e-05, | |
| "loss": 0.0006, | |
| "step": 2400 | |
| }, | |
| { | |
| "epoch": 3.92, | |
| "eval_accuracy": 0.9905362776025236, | |
| "eval_loss": 0.046631619334220886, | |
| "eval_runtime": 80.5126, | |
| "eval_samples_per_second": 106.306, | |
| "eval_steps_per_second": 2.534, | |
| "step": 2400 | |
| }, | |
| { | |
| "epoch": 4.25, | |
| "learning_rate": 7.5163398692810456e-06, | |
| "loss": 0.0001, | |
| "step": 2600 | |
| }, | |
| { | |
| "epoch": 4.25, | |
| "eval_accuracy": 0.990769949760486, | |
| "eval_loss": 0.05143677070736885, | |
| "eval_runtime": 80.6039, | |
| "eval_samples_per_second": 106.186, | |
| "eval_steps_per_second": 2.531, | |
| "step": 2600 | |
| }, | |
| { | |
| "epoch": 4.58, | |
| "learning_rate": 4.2483660130718954e-06, | |
| "loss": 0.0005, | |
| "step": 2800 | |
| }, | |
| { | |
| "epoch": 4.58, | |
| "eval_accuracy": 0.9908867858394672, | |
| "eval_loss": 0.04908544197678566, | |
| "eval_runtime": 80.4782, | |
| "eval_samples_per_second": 106.352, | |
| "eval_steps_per_second": 2.535, | |
| "step": 2800 | |
| }, | |
| { | |
| "epoch": 4.9, | |
| "learning_rate": 9.80392156862745e-07, | |
| "loss": 0.0004, | |
| "step": 3000 | |
| }, | |
| { | |
| "epoch": 4.9, | |
| "eval_accuracy": 0.9910036219184484, | |
| "eval_loss": 0.049937766045331955, | |
| "eval_runtime": 80.6026, | |
| "eval_samples_per_second": 106.188, | |
| "eval_steps_per_second": 2.531, | |
| "step": 3000 | |
| } | |
| ], | |
| "max_steps": 3060, | |
| "num_train_epochs": 5, | |
| "total_flos": 0.0, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |