| { |
| "best_metric": null, |
| "best_model_checkpoint": null, |
| "epoch": 100.0, |
| "global_step": 400, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 2.5, |
| "learning_rate": 1e-05, |
| "loss": 2.2814, |
| "step": 10 |
| }, |
| { |
| "epoch": 5.0, |
| "learning_rate": 2e-05, |
| "loss": 2.2162, |
| "step": 20 |
| }, |
| { |
| "epoch": 7.5, |
| "learning_rate": 3e-05, |
| "loss": 2.06, |
| "step": 30 |
| }, |
| { |
| "epoch": 10.0, |
| "learning_rate": 4e-05, |
| "loss": 1.7632, |
| "step": 40 |
| }, |
| { |
| "epoch": 12.5, |
| "learning_rate": 5e-05, |
| "loss": 1.3906, |
| "step": 50 |
| }, |
| { |
| "epoch": 15.0, |
| "learning_rate": 6e-05, |
| "loss": 1.0958, |
| "step": 60 |
| }, |
| { |
| "epoch": 17.5, |
| "learning_rate": 7e-05, |
| "loss": 0.8168, |
| "step": 70 |
| }, |
| { |
| "epoch": 20.0, |
| "learning_rate": 8e-05, |
| "loss": 0.6212, |
| "step": 80 |
| }, |
| { |
| "epoch": 22.5, |
| "learning_rate": 9e-05, |
| "loss": 0.5377, |
| "step": 90 |
| }, |
| { |
| "epoch": 25.0, |
| "learning_rate": 0.0001, |
| "loss": 0.4409, |
| "step": 100 |
| }, |
| { |
| "epoch": 27.5, |
| "learning_rate": 9.972609476841367e-05, |
| "loss": 0.4009, |
| "step": 110 |
| }, |
| { |
| "epoch": 30.0, |
| "learning_rate": 9.890738003669029e-05, |
| "loss": 0.3598, |
| "step": 120 |
| }, |
| { |
| "epoch": 32.5, |
| "learning_rate": 9.755282581475769e-05, |
| "loss": 0.2784, |
| "step": 130 |
| }, |
| { |
| "epoch": 35.0, |
| "learning_rate": 9.567727288213005e-05, |
| "loss": 0.3299, |
| "step": 140 |
| }, |
| { |
| "epoch": 37.5, |
| "learning_rate": 9.330127018922194e-05, |
| "loss": 0.3236, |
| "step": 150 |
| }, |
| { |
| "epoch": 40.0, |
| "learning_rate": 9.045084971874738e-05, |
| "loss": 0.2714, |
| "step": 160 |
| }, |
| { |
| "epoch": 42.5, |
| "learning_rate": 8.715724127386972e-05, |
| "loss": 0.2654, |
| "step": 170 |
| }, |
| { |
| "epoch": 45.0, |
| "learning_rate": 8.345653031794292e-05, |
| "loss": 0.254, |
| "step": 180 |
| }, |
| { |
| "epoch": 47.5, |
| "learning_rate": 7.938926261462366e-05, |
| "loss": 0.2223, |
| "step": 190 |
| }, |
| { |
| "epoch": 50.0, |
| "learning_rate": 7.500000000000001e-05, |
| "loss": 0.2671, |
| "step": 200 |
| }, |
| { |
| "epoch": 52.5, |
| "learning_rate": 7.033683215379002e-05, |
| "loss": 0.2293, |
| "step": 210 |
| }, |
| { |
| "epoch": 55.0, |
| "learning_rate": 6.545084971874738e-05, |
| "loss": 0.2235, |
| "step": 220 |
| }, |
| { |
| "epoch": 57.5, |
| "learning_rate": 6.0395584540887963e-05, |
| "loss": 0.2031, |
| "step": 230 |
| }, |
| { |
| "epoch": 60.0, |
| "learning_rate": 5.522642316338268e-05, |
| "loss": 0.2034, |
| "step": 240 |
| }, |
| { |
| "epoch": 62.5, |
| "learning_rate": 5e-05, |
| "loss": 0.189, |
| "step": 250 |
| }, |
| { |
| "epoch": 65.0, |
| "learning_rate": 4.477357683661734e-05, |
| "loss": 0.1788, |
| "step": 260 |
| }, |
| { |
| "epoch": 67.5, |
| "learning_rate": 3.960441545911204e-05, |
| "loss": 0.1698, |
| "step": 270 |
| }, |
| { |
| "epoch": 70.0, |
| "learning_rate": 3.4549150281252636e-05, |
| "loss": 0.2309, |
| "step": 280 |
| }, |
| { |
| "epoch": 72.5, |
| "learning_rate": 2.9663167846209998e-05, |
| "loss": 0.1637, |
| "step": 290 |
| }, |
| { |
| "epoch": 75.0, |
| "learning_rate": 2.500000000000001e-05, |
| "loss": 0.1669, |
| "step": 300 |
| }, |
| { |
| "epoch": 77.5, |
| "learning_rate": 2.061073738537635e-05, |
| "loss": 0.1786, |
| "step": 310 |
| }, |
| { |
| "epoch": 80.0, |
| "learning_rate": 1.6543469682057106e-05, |
| "loss": 0.1792, |
| "step": 320 |
| }, |
| { |
| "epoch": 82.5, |
| "learning_rate": 1.2842758726130283e-05, |
| "loss": 0.1874, |
| "step": 330 |
| }, |
| { |
| "epoch": 85.0, |
| "learning_rate": 9.549150281252633e-06, |
| "loss": 0.1805, |
| "step": 340 |
| }, |
| { |
| "epoch": 87.5, |
| "learning_rate": 6.698729810778065e-06, |
| "loss": 0.196, |
| "step": 350 |
| }, |
| { |
| "epoch": 90.0, |
| "learning_rate": 4.322727117869951e-06, |
| "loss": 0.1615, |
| "step": 360 |
| }, |
| { |
| "epoch": 92.5, |
| "learning_rate": 2.4471741852423237e-06, |
| "loss": 0.1481, |
| "step": 370 |
| }, |
| { |
| "epoch": 95.0, |
| "learning_rate": 1.0926199633097157e-06, |
| "loss": 0.1633, |
| "step": 380 |
| }, |
| { |
| "epoch": 97.5, |
| "learning_rate": 2.7390523158633554e-07, |
| "loss": 0.1673, |
| "step": 390 |
| }, |
| { |
| "epoch": 100.0, |
| "learning_rate": 0.0, |
| "loss": 0.1748, |
| "step": 400 |
| }, |
| { |
| "epoch": 100.0, |
| "step": 400, |
| "total_flos": 1.3814551337974377e+23, |
| "train_loss": 0.4972930908203125, |
| "train_runtime": 820.6819, |
| "train_samples_per_second": 4242.326, |
| "train_steps_per_second": 0.487 |
| } |
| ], |
| "max_steps": 400, |
| "num_train_epochs": 100, |
| "start_time": 1656513160.5917685, |
| "total_flos": 1.3814551337974377e+23, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|