| [ |
| { |
| "loss": 4.6796, |
| "grad_norm": 5.284444808959961, |
| "learning_rate": 0.00018, |
| "epoch": 0.5, |
| "step": 10 |
| }, |
| { |
| "loss": 0.6981, |
| "grad_norm": 2.575591564178467, |
| "learning_rate": 0.00018, |
| "epoch": 1.0, |
| "step": 20 |
| }, |
| { |
| "loss": 0.254, |
| "grad_norm": 0.9394185543060303, |
| "learning_rate": 0.0001577777777777778, |
| "epoch": 1.5, |
| "step": 30 |
| }, |
| { |
| "loss": 0.1831, |
| "grad_norm": 1.0311447381973267, |
| "learning_rate": 0.00013555555555555556, |
| "epoch": 2.0, |
| "step": 40 |
| }, |
| { |
| "loss": 0.1353, |
| "grad_norm": 0.9932435154914856, |
| "learning_rate": 0.00011333333333333334, |
| "epoch": 2.5, |
| "step": 50 |
| }, |
| { |
| "loss": 0.132, |
| "grad_norm": 0.7911968231201172, |
| "learning_rate": 9.111111111111112e-05, |
| "epoch": 3.0, |
| "step": 60 |
| }, |
| { |
| "loss": 0.1023, |
| "grad_norm": 0.566677451133728, |
| "learning_rate": 6.88888888888889e-05, |
| "epoch": 3.5, |
| "step": 70 |
| }, |
| { |
| "loss": 0.0795, |
| "grad_norm": 0.845538318157196, |
| "learning_rate": 4.666666666666667e-05, |
| "epoch": 4.0, |
| "step": 80 |
| }, |
| { |
| "loss": 0.0674, |
| "grad_norm": 0.8802236318588257, |
| "learning_rate": 2.4444444444444445e-05, |
| "epoch": 4.5, |
| "step": 90 |
| }, |
| { |
| "loss": 0.0674, |
| "grad_norm": 1.0313374996185303, |
| "learning_rate": 2.2222222222222225e-06, |
| "epoch": 5.0, |
| "step": 100 |
| }, |
| { |
| "train_runtime": 272.1601, |
| "train_samples_per_second": 23.075, |
| "train_steps_per_second": 0.367, |
| "total_flos": 2.585479150123776e+16, |
| "train_loss": 0.6398634892702103, |
| "epoch": 5.0, |
| "step": 100 |
| } |
| ] |