| [ |
| { |
| "loss": 4.6586, |
| "grad_norm": 5.435790538787842, |
| "learning_rate": 0.00018, |
| "epoch": 0.5, |
| "step": 10 |
| }, |
| { |
| "loss": 0.6975, |
| "grad_norm": 2.621659517288208, |
| "learning_rate": 0.00018, |
| "epoch": 1.0, |
| "step": 20 |
| }, |
| { |
| "loss": 0.2556, |
| "grad_norm": 0.7023759484291077, |
| "learning_rate": 0.0001577777777777778, |
| "epoch": 1.5, |
| "step": 30 |
| }, |
| { |
| "loss": 0.1826, |
| "grad_norm": 1.0450637340545654, |
| "learning_rate": 0.00013555555555555556, |
| "epoch": 2.0, |
| "step": 40 |
| }, |
| { |
| "loss": 0.1342, |
| "grad_norm": 0.9902332425117493, |
| "learning_rate": 0.00011333333333333334, |
| "epoch": 2.5, |
| "step": 50 |
| }, |
| { |
| "loss": 0.1286, |
| "grad_norm": 0.8150554299354553, |
| "learning_rate": 9.111111111111112e-05, |
| "epoch": 3.0, |
| "step": 60 |
| }, |
| { |
| "loss": 0.1012, |
| "grad_norm": 0.5871483087539673, |
| "learning_rate": 6.88888888888889e-05, |
| "epoch": 3.5, |
| "step": 70 |
| }, |
| { |
| "loss": 0.0776, |
| "grad_norm": 0.8369024991989136, |
| "learning_rate": 4.666666666666667e-05, |
| "epoch": 4.0, |
| "step": 80 |
| }, |
| { |
| "loss": 0.0656, |
| "grad_norm": 0.7048304080963135, |
| "learning_rate": 2.4444444444444445e-05, |
| "epoch": 4.5, |
| "step": 90 |
| }, |
| { |
| "loss": 0.0654, |
| "grad_norm": 1.0004146099090576, |
| "learning_rate": 2.2222222222222225e-06, |
| "epoch": 5.0, |
| "step": 100 |
| }, |
| { |
| "train_runtime": 493.2617, |
| "train_samples_per_second": 12.732, |
| "train_steps_per_second": 0.203, |
| "total_flos": 2.585479150123776e+16, |
| "train_loss": 0.6366996705532074, |
| "epoch": 5.0, |
| "step": 100 |
| } |
| ] |