| { |
| "best_metric": 0.9774587154388428, |
| "best_model_checkpoint": "./cardio-alpaca/checkpoint-800", |
| "epoch": 23.947614593077642, |
| "global_step": 800, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 0.3, |
| "learning_rate": 1e-05, |
| "loss": 2.5136, |
| "step": 10 |
| }, |
| { |
| "epoch": 0.6, |
| "learning_rate": 2e-05, |
| "loss": 2.5583, |
| "step": 20 |
| }, |
| { |
| "epoch": 0.9, |
| "learning_rate": 3e-05, |
| "loss": 2.5279, |
| "step": 30 |
| }, |
| { |
| "epoch": 1.2, |
| "learning_rate": 4e-05, |
| "loss": 2.3068, |
| "step": 40 |
| }, |
| { |
| "epoch": 1.5, |
| "learning_rate": 5e-05, |
| "loss": 1.9235, |
| "step": 50 |
| }, |
| { |
| "epoch": 1.8, |
| "learning_rate": 6e-05, |
| "loss": 1.4189, |
| "step": 60 |
| }, |
| { |
| "epoch": 2.1, |
| "learning_rate": 7e-05, |
| "loss": 1.2615, |
| "step": 70 |
| }, |
| { |
| "epoch": 2.39, |
| "learning_rate": 8e-05, |
| "loss": 1.1391, |
| "step": 80 |
| }, |
| { |
| "epoch": 2.69, |
| "learning_rate": 9e-05, |
| "loss": 1.1018, |
| "step": 90 |
| }, |
| { |
| "epoch": 2.99, |
| "learning_rate": 0.0001, |
| "loss": 1.0779, |
| "step": 100 |
| }, |
| { |
| "epoch": 3.29, |
| "learning_rate": 9.862068965517242e-05, |
| "loss": 1.0879, |
| "step": 110 |
| }, |
| { |
| "epoch": 3.59, |
| "learning_rate": 9.724137931034482e-05, |
| "loss": 1.063, |
| "step": 120 |
| }, |
| { |
| "epoch": 3.89, |
| "learning_rate": 9.586206896551725e-05, |
| "loss": 1.0524, |
| "step": 130 |
| }, |
| { |
| "epoch": 4.19, |
| "learning_rate": 9.448275862068966e-05, |
| "loss": 1.0422, |
| "step": 140 |
| }, |
| { |
| "epoch": 4.49, |
| "learning_rate": 9.310344827586207e-05, |
| "loss": 1.0317, |
| "step": 150 |
| }, |
| { |
| "epoch": 4.79, |
| "learning_rate": 9.172413793103448e-05, |
| "loss": 1.0593, |
| "step": 160 |
| }, |
| { |
| "epoch": 5.09, |
| "learning_rate": 9.034482758620691e-05, |
| "loss": 1.0195, |
| "step": 170 |
| }, |
| { |
| "epoch": 5.39, |
| "learning_rate": 8.896551724137931e-05, |
| "loss": 1.0259, |
| "step": 180 |
| }, |
| { |
| "epoch": 5.69, |
| "learning_rate": 8.758620689655173e-05, |
| "loss": 1.0097, |
| "step": 190 |
| }, |
| { |
| "epoch": 5.99, |
| "learning_rate": 8.620689655172413e-05, |
| "loss": 1.023, |
| "step": 200 |
| }, |
| { |
| "epoch": 5.99, |
| "eval_loss": 1.0707768201828003, |
| "eval_runtime": 24.5679, |
| "eval_samples_per_second": 20.352, |
| "eval_steps_per_second": 2.564, |
| "step": 200 |
| }, |
| { |
| "epoch": 6.29, |
| "learning_rate": 8.482758620689656e-05, |
| "loss": 1.0123, |
| "step": 210 |
| }, |
| { |
| "epoch": 6.59, |
| "learning_rate": 8.344827586206896e-05, |
| "loss": 1.0256, |
| "step": 220 |
| }, |
| { |
| "epoch": 6.88, |
| "learning_rate": 8.206896551724138e-05, |
| "loss": 1.0033, |
| "step": 230 |
| }, |
| { |
| "epoch": 7.18, |
| "learning_rate": 8.06896551724138e-05, |
| "loss": 0.9767, |
| "step": 240 |
| }, |
| { |
| "epoch": 7.48, |
| "learning_rate": 7.931034482758621e-05, |
| "loss": 0.9355, |
| "step": 250 |
| }, |
| { |
| "epoch": 7.78, |
| "learning_rate": 7.793103448275862e-05, |
| "loss": 0.9688, |
| "step": 260 |
| }, |
| { |
| "epoch": 8.08, |
| "learning_rate": 7.655172413793103e-05, |
| "loss": 0.9503, |
| "step": 270 |
| }, |
| { |
| "epoch": 8.38, |
| "learning_rate": 7.517241379310345e-05, |
| "loss": 0.9323, |
| "step": 280 |
| }, |
| { |
| "epoch": 8.68, |
| "learning_rate": 7.379310344827587e-05, |
| "loss": 0.9221, |
| "step": 290 |
| }, |
| { |
| "epoch": 8.98, |
| "learning_rate": 7.241379310344828e-05, |
| "loss": 0.9221, |
| "step": 300 |
| }, |
| { |
| "epoch": 9.28, |
| "learning_rate": 7.103448275862069e-05, |
| "loss": 0.9513, |
| "step": 310 |
| }, |
| { |
| "epoch": 9.58, |
| "learning_rate": 6.96551724137931e-05, |
| "loss": 0.9311, |
| "step": 320 |
| }, |
| { |
| "epoch": 9.88, |
| "learning_rate": 6.827586206896552e-05, |
| "loss": 0.9188, |
| "step": 330 |
| }, |
| { |
| "epoch": 10.18, |
| "learning_rate": 6.689655172413794e-05, |
| "loss": 0.9187, |
| "step": 340 |
| }, |
| { |
| "epoch": 10.48, |
| "learning_rate": 6.551724137931034e-05, |
| "loss": 0.8946, |
| "step": 350 |
| }, |
| { |
| "epoch": 10.78, |
| "learning_rate": 6.413793103448276e-05, |
| "loss": 0.9384, |
| "step": 360 |
| }, |
| { |
| "epoch": 11.08, |
| "learning_rate": 6.275862068965517e-05, |
| "loss": 0.9086, |
| "step": 370 |
| }, |
| { |
| "epoch": 11.38, |
| "learning_rate": 6.137931034482759e-05, |
| "loss": 0.9198, |
| "step": 380 |
| }, |
| { |
| "epoch": 11.67, |
| "learning_rate": 6e-05, |
| "loss": 0.899, |
| "step": 390 |
| }, |
| { |
| "epoch": 11.97, |
| "learning_rate": 5.862068965517241e-05, |
| "loss": 0.8919, |
| "step": 400 |
| }, |
| { |
| "epoch": 11.97, |
| "eval_loss": 0.9881613254547119, |
| "eval_runtime": 24.5256, |
| "eval_samples_per_second": 20.387, |
| "eval_steps_per_second": 2.569, |
| "step": 400 |
| }, |
| { |
| "epoch": 12.27, |
| "learning_rate": 5.7241379310344835e-05, |
| "loss": 0.9097, |
| "step": 410 |
| }, |
| { |
| "epoch": 12.57, |
| "learning_rate": 5.5862068965517245e-05, |
| "loss": 0.917, |
| "step": 420 |
| }, |
| { |
| "epoch": 12.87, |
| "learning_rate": 5.4482758620689655e-05, |
| "loss": 0.9038, |
| "step": 430 |
| }, |
| { |
| "epoch": 13.17, |
| "learning_rate": 5.3103448275862065e-05, |
| "loss": 0.897, |
| "step": 440 |
| }, |
| { |
| "epoch": 13.47, |
| "learning_rate": 5.172413793103449e-05, |
| "loss": 0.8834, |
| "step": 450 |
| }, |
| { |
| "epoch": 13.77, |
| "learning_rate": 5.03448275862069e-05, |
| "loss": 0.9135, |
| "step": 460 |
| }, |
| { |
| "epoch": 14.07, |
| "learning_rate": 4.896551724137931e-05, |
| "loss": 0.9002, |
| "step": 470 |
| }, |
| { |
| "epoch": 14.37, |
| "learning_rate": 4.7586206896551725e-05, |
| "loss": 0.9042, |
| "step": 480 |
| }, |
| { |
| "epoch": 14.67, |
| "learning_rate": 4.6206896551724135e-05, |
| "loss": 0.8766, |
| "step": 490 |
| }, |
| { |
| "epoch": 14.97, |
| "learning_rate": 4.482758620689655e-05, |
| "loss": 0.8575, |
| "step": 500 |
| }, |
| { |
| "epoch": 15.27, |
| "learning_rate": 4.344827586206897e-05, |
| "loss": 0.8919, |
| "step": 510 |
| }, |
| { |
| "epoch": 15.57, |
| "learning_rate": 4.2068965517241385e-05, |
| "loss": 0.8938, |
| "step": 520 |
| }, |
| { |
| "epoch": 15.87, |
| "learning_rate": 4.0689655172413795e-05, |
| "loss": 0.8864, |
| "step": 530 |
| }, |
| { |
| "epoch": 16.16, |
| "learning_rate": 3.931034482758621e-05, |
| "loss": 0.88, |
| "step": 540 |
| }, |
| { |
| "epoch": 16.46, |
| "learning_rate": 3.793103448275862e-05, |
| "loss": 0.868, |
| "step": 550 |
| }, |
| { |
| "epoch": 16.76, |
| "learning_rate": 3.655172413793104e-05, |
| "loss": 0.8902, |
| "step": 560 |
| }, |
| { |
| "epoch": 17.06, |
| "learning_rate": 3.517241379310345e-05, |
| "loss": 0.8821, |
| "step": 570 |
| }, |
| { |
| "epoch": 17.36, |
| "learning_rate": 3.3793103448275865e-05, |
| "loss": 0.8836, |
| "step": 580 |
| }, |
| { |
| "epoch": 17.66, |
| "learning_rate": 3.2413793103448275e-05, |
| "loss": 0.8727, |
| "step": 590 |
| }, |
| { |
| "epoch": 17.96, |
| "learning_rate": 3.103448275862069e-05, |
| "loss": 0.8393, |
| "step": 600 |
| }, |
| { |
| "epoch": 17.96, |
| "eval_loss": 0.9780347943305969, |
| "eval_runtime": 24.5589, |
| "eval_samples_per_second": 20.359, |
| "eval_steps_per_second": 2.565, |
| "step": 600 |
| }, |
| { |
| "epoch": 18.26, |
| "learning_rate": 2.96551724137931e-05, |
| "loss": 0.8775, |
| "step": 610 |
| }, |
| { |
| "epoch": 18.56, |
| "learning_rate": 2.8275862068965518e-05, |
| "loss": 0.8866, |
| "step": 620 |
| }, |
| { |
| "epoch": 18.86, |
| "learning_rate": 2.689655172413793e-05, |
| "loss": 0.8682, |
| "step": 630 |
| }, |
| { |
| "epoch": 19.16, |
| "learning_rate": 2.551724137931035e-05, |
| "loss": 0.8755, |
| "step": 640 |
| }, |
| { |
| "epoch": 19.46, |
| "learning_rate": 2.413793103448276e-05, |
| "loss": 0.8382, |
| "step": 650 |
| }, |
| { |
| "epoch": 19.76, |
| "learning_rate": 2.2758620689655175e-05, |
| "loss": 0.8743, |
| "step": 660 |
| }, |
| { |
| "epoch": 20.06, |
| "learning_rate": 2.137931034482759e-05, |
| "loss": 0.873, |
| "step": 670 |
| }, |
| { |
| "epoch": 20.36, |
| "learning_rate": 2e-05, |
| "loss": 0.8768, |
| "step": 680 |
| }, |
| { |
| "epoch": 20.65, |
| "learning_rate": 1.8620689655172415e-05, |
| "loss": 0.8625, |
| "step": 690 |
| }, |
| { |
| "epoch": 20.95, |
| "learning_rate": 1.7241379310344828e-05, |
| "loss": 0.8362, |
| "step": 700 |
| }, |
| { |
| "epoch": 21.25, |
| "learning_rate": 1.586206896551724e-05, |
| "loss": 0.8592, |
| "step": 710 |
| }, |
| { |
| "epoch": 21.55, |
| "learning_rate": 1.4482758620689657e-05, |
| "loss": 0.8853, |
| "step": 720 |
| }, |
| { |
| "epoch": 21.85, |
| "learning_rate": 1.310344827586207e-05, |
| "loss": 0.8596, |
| "step": 730 |
| }, |
| { |
| "epoch": 22.15, |
| "learning_rate": 1.1724137931034483e-05, |
| "loss": 0.8543, |
| "step": 740 |
| }, |
| { |
| "epoch": 22.45, |
| "learning_rate": 1.0344827586206897e-05, |
| "loss": 0.8372, |
| "step": 750 |
| }, |
| { |
| "epoch": 22.75, |
| "learning_rate": 8.96551724137931e-06, |
| "loss": 0.8728, |
| "step": 760 |
| }, |
| { |
| "epoch": 23.05, |
| "learning_rate": 7.586206896551724e-06, |
| "loss": 0.8523, |
| "step": 770 |
| }, |
| { |
| "epoch": 23.35, |
| "learning_rate": 6.206896551724138e-06, |
| "loss": 0.8709, |
| "step": 780 |
| }, |
| { |
| "epoch": 23.65, |
| "learning_rate": 4.827586206896552e-06, |
| "loss": 0.8516, |
| "step": 790 |
| }, |
| { |
| "epoch": 23.95, |
| "learning_rate": 3.448275862068966e-06, |
| "loss": 0.8354, |
| "step": 800 |
| }, |
| { |
| "epoch": 23.95, |
| "eval_loss": 0.9774587154388428, |
| "eval_runtime": 24.473, |
| "eval_samples_per_second": 20.431, |
| "eval_steps_per_second": 2.574, |
| "step": 800 |
| } |
| ], |
| "max_steps": 825, |
| "num_train_epochs": 25, |
| "total_flos": 4.370818958917632e+17, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|