| [ |
| { |
| "loss": 4.6726, |
| "grad_norm": 5.59173059463501, |
| "learning_rate": 0.00018, |
| "epoch": 0.5, |
| "step": 10 |
| }, |
| { |
| "loss": 0.6936, |
| "grad_norm": 2.593961000442505, |
| "learning_rate": 0.00018, |
| "epoch": 1.0, |
| "step": 20 |
| }, |
| { |
| "loss": 0.2555, |
| "grad_norm": 0.7065151929855347, |
| "learning_rate": 0.0001577777777777778, |
| "epoch": 1.5, |
| "step": 30 |
| }, |
| { |
| "loss": 0.1841, |
| "grad_norm": 1.0202727317810059, |
| "learning_rate": 0.00013555555555555556, |
| "epoch": 2.0, |
| "step": 40 |
| }, |
| { |
| "loss": 0.1344, |
| "grad_norm": 0.9817218780517578, |
| "learning_rate": 0.00011333333333333334, |
| "epoch": 2.5, |
| "step": 50 |
| }, |
| { |
| "loss": 0.1296, |
| "grad_norm": 0.8052711486816406, |
| "learning_rate": 9.111111111111112e-05, |
| "epoch": 3.0, |
| "step": 60 |
| }, |
| { |
| "loss": 0.1017, |
| "grad_norm": 0.5530523657798767, |
| "learning_rate": 6.88888888888889e-05, |
| "epoch": 3.5, |
| "step": 70 |
| }, |
| { |
| "loss": 0.0789, |
| "grad_norm": 0.7885369062423706, |
| "learning_rate": 4.666666666666667e-05, |
| "epoch": 4.0, |
| "step": 80 |
| }, |
| { |
| "loss": 0.0671, |
| "grad_norm": 0.7642138004302979, |
| "learning_rate": 2.4444444444444445e-05, |
| "epoch": 4.5, |
| "step": 90 |
| }, |
| { |
| "loss": 0.0659, |
| "grad_norm": 0.976900041103363, |
| "learning_rate": 2.2222222222222225e-06, |
| "epoch": 5.0, |
| "step": 100 |
| }, |
| { |
| "train_runtime": 268.9097, |
| "train_samples_per_second": 23.354, |
| "train_steps_per_second": 0.372, |
| "total_flos": 2.585479150123776e+16, |
| "train_loss": 0.6383443474769592, |
| "epoch": 5.0, |
| "step": 100 |
| } |
| ] |