| { |
| "best_metric": 0.001740535837598145, |
| "best_model_checkpoint": "./results/checkpoint-4400", |
| "epoch": 0.5499340079190497, |
| "global_step": 10000, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 0.02, |
| "learning_rate": 4e-05, |
| "loss": 0.1162, |
| "step": 400 |
| }, |
| { |
| "epoch": 0.02, |
| "eval_accuracy": 0.5149715416975996, |
| "eval_loss": 0.03933835029602051, |
| "eval_runtime": 296.392, |
| "eval_samples_per_second": 13.634, |
| "eval_steps_per_second": 6.819, |
| "step": 400 |
| }, |
| { |
| "epoch": 0.04, |
| "learning_rate": 4.9722489454599276e-05, |
| "loss": 0.0276, |
| "step": 800 |
| }, |
| { |
| "epoch": 0.04, |
| "eval_accuracy": 0.9866369710467706, |
| "eval_loss": 0.01901180110871792, |
| "eval_runtime": 296.5449, |
| "eval_samples_per_second": 13.627, |
| "eval_steps_per_second": 6.815, |
| "step": 800 |
| }, |
| { |
| "epoch": 0.07, |
| "learning_rate": 4.935247539406498e-05, |
| "loss": 0.0101, |
| "step": 1200 |
| }, |
| { |
| "epoch": 0.07, |
| "eval_accuracy": 0.998020292006929, |
| "eval_loss": 0.005310355219990015, |
| "eval_runtime": 296.1395, |
| "eval_samples_per_second": 13.646, |
| "eval_steps_per_second": 6.824, |
| "step": 1200 |
| }, |
| { |
| "epoch": 0.09, |
| "learning_rate": 4.8982461333530676e-05, |
| "loss": 0.1565, |
| "step": 1600 |
| }, |
| { |
| "epoch": 0.09, |
| "eval_accuracy": 0.9965355110121257, |
| "eval_loss": 0.006798223592340946, |
| "eval_runtime": 295.8389, |
| "eval_samples_per_second": 13.659, |
| "eval_steps_per_second": 6.831, |
| "step": 1600 |
| }, |
| { |
| "epoch": 0.11, |
| "learning_rate": 4.861244727299638e-05, |
| "loss": 0.0087, |
| "step": 2000 |
| }, |
| { |
| "epoch": 0.11, |
| "eval_accuracy": 0.9965355110121257, |
| "eval_loss": 0.005225275177508593, |
| "eval_runtime": 295.8222, |
| "eval_samples_per_second": 13.66, |
| "eval_steps_per_second": 6.832, |
| "step": 2000 |
| }, |
| { |
| "epoch": 0.13, |
| "learning_rate": 4.8242433212462075e-05, |
| "loss": 0.0075, |
| "step": 2400 |
| }, |
| { |
| "epoch": 0.13, |
| "eval_accuracy": 0.9965355110121257, |
| "eval_loss": 0.0062185670249164104, |
| "eval_runtime": 295.9205, |
| "eval_samples_per_second": 13.656, |
| "eval_steps_per_second": 6.83, |
| "step": 2400 |
| }, |
| { |
| "epoch": 0.15, |
| "learning_rate": 4.787241915192778e-05, |
| "loss": 0.009, |
| "step": 2800 |
| }, |
| { |
| "epoch": 0.15, |
| "eval_accuracy": 0.9965355110121257, |
| "eval_loss": 0.006374023854732513, |
| "eval_runtime": 295.0119, |
| "eval_samples_per_second": 13.698, |
| "eval_steps_per_second": 6.851, |
| "step": 2800 |
| }, |
| { |
| "epoch": 0.18, |
| "learning_rate": 4.7502405091393474e-05, |
| "loss": 0.0097, |
| "step": 3200 |
| }, |
| { |
| "epoch": 0.18, |
| "eval_accuracy": 0.996040584013858, |
| "eval_loss": 0.004588003735989332, |
| "eval_runtime": 295.221, |
| "eval_samples_per_second": 13.688, |
| "eval_steps_per_second": 6.846, |
| "step": 3200 |
| }, |
| { |
| "epoch": 0.2, |
| "learning_rate": 4.713239103085918e-05, |
| "loss": 0.0112, |
| "step": 3600 |
| }, |
| { |
| "epoch": 0.2, |
| "eval_accuracy": 0.9928235585251175, |
| "eval_loss": 0.008035719394683838, |
| "eval_runtime": 295.0591, |
| "eval_samples_per_second": 13.696, |
| "eval_steps_per_second": 6.849, |
| "step": 3600 |
| }, |
| { |
| "epoch": 0.22, |
| "learning_rate": 4.676237697032487e-05, |
| "loss": 0.012, |
| "step": 4000 |
| }, |
| { |
| "epoch": 0.22, |
| "eval_accuracy": 0.46745854986389507, |
| "eval_loss": 0.004350500181317329, |
| "eval_runtime": 294.9657, |
| "eval_samples_per_second": 13.7, |
| "eval_steps_per_second": 6.852, |
| "step": 4000 |
| }, |
| { |
| "epoch": 0.24, |
| "learning_rate": 4.6392362909790576e-05, |
| "loss": 0.0112, |
| "step": 4400 |
| }, |
| { |
| "epoch": 0.24, |
| "eval_accuracy": 0.46745854986389507, |
| "eval_loss": 0.001740535837598145, |
| "eval_runtime": 295.095, |
| "eval_samples_per_second": 13.694, |
| "eval_steps_per_second": 6.849, |
| "step": 4400 |
| }, |
| { |
| "epoch": 0.26, |
| "learning_rate": 4.602234884925627e-05, |
| "loss": 0.0054, |
| "step": 4800 |
| }, |
| { |
| "epoch": 0.26, |
| "eval_accuracy": 0.9982677555060628, |
| "eval_loss": 0.006510263774544001, |
| "eval_runtime": 295.0687, |
| "eval_samples_per_second": 13.695, |
| "eval_steps_per_second": 6.849, |
| "step": 4800 |
| }, |
| { |
| "epoch": 0.29, |
| "learning_rate": 4.5652334788721976e-05, |
| "loss": 0.0057, |
| "step": 5200 |
| }, |
| { |
| "epoch": 0.29, |
| "eval_accuracy": 0.9982677555060628, |
| "eval_loss": 0.0030648894608020782, |
| "eval_runtime": 295.3544, |
| "eval_samples_per_second": 13.682, |
| "eval_steps_per_second": 6.843, |
| "step": 5200 |
| }, |
| { |
| "epoch": 0.31, |
| "learning_rate": 4.528232072818767e-05, |
| "loss": 0.01, |
| "step": 5600 |
| }, |
| { |
| "epoch": 0.31, |
| "eval_accuracy": 0.9982677555060628, |
| "eval_loss": 0.004091416019946337, |
| "eval_runtime": 295.5003, |
| "eval_samples_per_second": 13.675, |
| "eval_steps_per_second": 6.839, |
| "step": 5600 |
| }, |
| { |
| "epoch": 0.33, |
| "learning_rate": 4.4912306667653375e-05, |
| "loss": 0.0058, |
| "step": 6000 |
| }, |
| { |
| "epoch": 0.33, |
| "eval_accuracy": 0.9982677555060628, |
| "eval_loss": 0.005346548743546009, |
| "eval_runtime": 295.3803, |
| "eval_samples_per_second": 13.681, |
| "eval_steps_per_second": 6.842, |
| "step": 6000 |
| }, |
| { |
| "epoch": 0.35, |
| "learning_rate": 4.454229260711907e-05, |
| "loss": 0.0041, |
| "step": 6400 |
| }, |
| { |
| "epoch": 0.35, |
| "eval_accuracy": 0.9982677555060628, |
| "eval_loss": 0.002612765645608306, |
| "eval_runtime": 295.3189, |
| "eval_samples_per_second": 13.684, |
| "eval_steps_per_second": 6.843, |
| "step": 6400 |
| }, |
| { |
| "epoch": 0.37, |
| "learning_rate": 4.4172278546584774e-05, |
| "loss": 0.007, |
| "step": 6800 |
| }, |
| { |
| "epoch": 0.37, |
| "eval_accuracy": 0.9982677555060628, |
| "eval_loss": 0.0033588616643100977, |
| "eval_runtime": 295.3743, |
| "eval_samples_per_second": 13.681, |
| "eval_steps_per_second": 6.842, |
| "step": 6800 |
| }, |
| { |
| "epoch": 0.4, |
| "learning_rate": 4.380226448605047e-05, |
| "loss": 0.005, |
| "step": 7200 |
| }, |
| { |
| "epoch": 0.4, |
| "eval_accuracy": 0.9982677555060628, |
| "eval_loss": 0.0025339156854897738, |
| "eval_runtime": 295.0446, |
| "eval_samples_per_second": 13.696, |
| "eval_steps_per_second": 6.85, |
| "step": 7200 |
| }, |
| { |
| "epoch": 0.42, |
| "learning_rate": 4.3432250425516174e-05, |
| "loss": 0.0072, |
| "step": 7600 |
| }, |
| { |
| "epoch": 0.42, |
| "eval_accuracy": 0.9982677555060628, |
| "eval_loss": 0.0034889692906290293, |
| "eval_runtime": 295.1897, |
| "eval_samples_per_second": 13.69, |
| "eval_steps_per_second": 6.846, |
| "step": 7600 |
| }, |
| { |
| "epoch": 0.44, |
| "learning_rate": 4.306223636498187e-05, |
| "loss": 0.0095, |
| "step": 8000 |
| }, |
| { |
| "epoch": 0.44, |
| "eval_accuracy": 0.9982677555060628, |
| "eval_loss": 0.00457270722836256, |
| "eval_runtime": 295.4281, |
| "eval_samples_per_second": 13.678, |
| "eval_steps_per_second": 6.841, |
| "step": 8000 |
| }, |
| { |
| "epoch": 0.46, |
| "learning_rate": 4.269222230444757e-05, |
| "loss": 0.0045, |
| "step": 8400 |
| }, |
| { |
| "epoch": 0.46, |
| "eval_accuracy": 0.9982677555060628, |
| "eval_loss": 0.0037365807220339775, |
| "eval_runtime": 295.4056, |
| "eval_samples_per_second": 13.679, |
| "eval_steps_per_second": 6.841, |
| "step": 8400 |
| }, |
| { |
| "epoch": 0.48, |
| "learning_rate": 4.232220824391327e-05, |
| "loss": 0.0035, |
| "step": 8800 |
| }, |
| { |
| "epoch": 0.48, |
| "eval_accuracy": 0.9982677555060628, |
| "eval_loss": 0.0037374396342784166, |
| "eval_runtime": 295.8559, |
| "eval_samples_per_second": 13.659, |
| "eval_steps_per_second": 6.831, |
| "step": 8800 |
| }, |
| { |
| "epoch": 0.51, |
| "learning_rate": 4.1952194183378965e-05, |
| "loss": 0.0058, |
| "step": 9200 |
| }, |
| { |
| "epoch": 0.51, |
| "eval_accuracy": 0.9982677555060628, |
| "eval_loss": 0.0036408177111297846, |
| "eval_runtime": 296.2986, |
| "eval_samples_per_second": 13.638, |
| "eval_steps_per_second": 6.821, |
| "step": 9200 |
| }, |
| { |
| "epoch": 0.53, |
| "learning_rate": 4.158218012284467e-05, |
| "loss": 0.0043, |
| "step": 9600 |
| }, |
| { |
| "epoch": 0.53, |
| "eval_accuracy": 0.9982677555060628, |
| "eval_loss": 0.0026330926921218634, |
| "eval_runtime": 295.7809, |
| "eval_samples_per_second": 13.662, |
| "eval_steps_per_second": 6.833, |
| "step": 9600 |
| }, |
| { |
| "epoch": 0.55, |
| "learning_rate": 4.121216606231037e-05, |
| "loss": 0.0026, |
| "step": 10000 |
| }, |
| { |
| "epoch": 0.55, |
| "eval_accuracy": 0.9982677555060628, |
| "eval_loss": 0.0019218171946704388, |
| "eval_runtime": 296.1402, |
| "eval_samples_per_second": 13.646, |
| "eval_steps_per_second": 6.824, |
| "step": 10000 |
| } |
| ], |
| "max_steps": 54552, |
| "num_train_epochs": 3, |
| "total_flos": 5262173859840000.0, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|