| { | |
| "best_metric": 0.18238767981529236, | |
| "best_model_checkpoint": "./beans_outputs/checkpoint-220", | |
| "epoch": 5.0, | |
| "global_step": 220, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.23, | |
| "learning_rate": 1.9090909090909094e-05, | |
| "loss": 1.0242, | |
| "step": 10 | |
| }, | |
| { | |
| "epoch": 0.45, | |
| "learning_rate": 1.8181818181818182e-05, | |
| "loss": 0.898, | |
| "step": 20 | |
| }, | |
| { | |
| "epoch": 0.68, | |
| "learning_rate": 1.7272727272727274e-05, | |
| "loss": 0.7761, | |
| "step": 30 | |
| }, | |
| { | |
| "epoch": 0.91, | |
| "learning_rate": 1.6363636363636366e-05, | |
| "loss": 0.672, | |
| "step": 40 | |
| }, | |
| { | |
| "epoch": 1.0, | |
| "eval_accuracy": 0.9398496240601504, | |
| "eval_loss": 0.5671981573104858, | |
| "eval_runtime": 2.1561, | |
| "eval_samples_per_second": 61.686, | |
| "eval_steps_per_second": 2.783, | |
| "step": 44 | |
| }, | |
| { | |
| "epoch": 1.14, | |
| "learning_rate": 1.5454545454545454e-05, | |
| "loss": 0.583, | |
| "step": 50 | |
| }, | |
| { | |
| "epoch": 1.36, | |
| "learning_rate": 1.4545454545454546e-05, | |
| "loss": 0.5104, | |
| "step": 60 | |
| }, | |
| { | |
| "epoch": 1.59, | |
| "learning_rate": 1.3636363636363637e-05, | |
| "loss": 0.4104, | |
| "step": 70 | |
| }, | |
| { | |
| "epoch": 1.82, | |
| "learning_rate": 1.2727272727272728e-05, | |
| "loss": 0.411, | |
| "step": 80 | |
| }, | |
| { | |
| "epoch": 2.0, | |
| "eval_accuracy": 0.9699248120300752, | |
| "eval_loss": 0.30274108052253723, | |
| "eval_runtime": 4.0989, | |
| "eval_samples_per_second": 32.448, | |
| "eval_steps_per_second": 1.464, | |
| "step": 88 | |
| }, | |
| { | |
| "epoch": 2.05, | |
| "learning_rate": 1.181818181818182e-05, | |
| "loss": 0.3473, | |
| "step": 90 | |
| }, | |
| { | |
| "epoch": 2.27, | |
| "learning_rate": 1.0909090909090909e-05, | |
| "loss": 0.3066, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 2.5, | |
| "learning_rate": 1e-05, | |
| "loss": 0.2993, | |
| "step": 110 | |
| }, | |
| { | |
| "epoch": 2.73, | |
| "learning_rate": 9.090909090909091e-06, | |
| "loss": 0.2402, | |
| "step": 120 | |
| }, | |
| { | |
| "epoch": 2.95, | |
| "learning_rate": 8.181818181818183e-06, | |
| "loss": 0.2542, | |
| "step": 130 | |
| }, | |
| { | |
| "epoch": 3.0, | |
| "eval_accuracy": 0.9699248120300752, | |
| "eval_loss": 0.20783478021621704, | |
| "eval_runtime": 2.6143, | |
| "eval_samples_per_second": 50.874, | |
| "eval_steps_per_second": 2.295, | |
| "step": 132 | |
| }, | |
| { | |
| "epoch": 3.18, | |
| "learning_rate": 7.272727272727273e-06, | |
| "loss": 0.257, | |
| "step": 140 | |
| }, | |
| { | |
| "epoch": 3.41, | |
| "learning_rate": 6.363636363636364e-06, | |
| "loss": 0.2739, | |
| "step": 150 | |
| }, | |
| { | |
| "epoch": 3.64, | |
| "learning_rate": 5.4545454545454545e-06, | |
| "loss": 0.1899, | |
| "step": 160 | |
| }, | |
| { | |
| "epoch": 3.86, | |
| "learning_rate": 4.5454545454545455e-06, | |
| "loss": 0.1886, | |
| "step": 170 | |
| }, | |
| { | |
| "epoch": 4.0, | |
| "eval_accuracy": 0.9699248120300752, | |
| "eval_loss": 0.1881999671459198, | |
| "eval_runtime": 4.3782, | |
| "eval_samples_per_second": 30.378, | |
| "eval_steps_per_second": 1.37, | |
| "step": 176 | |
| }, | |
| { | |
| "epoch": 4.09, | |
| "learning_rate": 3.6363636363636366e-06, | |
| "loss": 0.1929, | |
| "step": 180 | |
| }, | |
| { | |
| "epoch": 4.32, | |
| "learning_rate": 2.7272727272727272e-06, | |
| "loss": 0.1909, | |
| "step": 190 | |
| }, | |
| { | |
| "epoch": 4.55, | |
| "learning_rate": 1.8181818181818183e-06, | |
| "loss": 0.1607, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 4.77, | |
| "learning_rate": 9.090909090909091e-07, | |
| "loss": 0.1953, | |
| "step": 210 | |
| }, | |
| { | |
| "epoch": 5.0, | |
| "learning_rate": 0.0, | |
| "loss": 0.1931, | |
| "step": 220 | |
| }, | |
| { | |
| "epoch": 5.0, | |
| "eval_accuracy": 0.9699248120300752, | |
| "eval_loss": 0.18238767981529236, | |
| "eval_runtime": 4.8076, | |
| "eval_samples_per_second": 27.664, | |
| "eval_steps_per_second": 1.248, | |
| "step": 220 | |
| }, | |
| { | |
| "epoch": 5.0, | |
| "step": 220, | |
| "total_flos": 4.006371770595533e+17, | |
| "train_loss": 0.3897728649052707, | |
| "train_runtime": 301.7547, | |
| "train_samples_per_second": 17.133, | |
| "train_steps_per_second": 0.729 | |
| } | |
| ], | |
| "max_steps": 220, | |
| "num_train_epochs": 5, | |
| "total_flos": 4.006371770595533e+17, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |