| [ |
| { |
| "loss": 2.0368, |
| "grad_norm": 6.189993858337402, |
| "learning_rate": 0.00018, |
| "epoch": 0.5, |
| "step": 10 |
| }, |
| { |
| "loss": 0.2678, |
| "grad_norm": 1.55028235912323, |
| "learning_rate": 0.00019510565162951537, |
| "epoch": 1.0, |
| "step": 20 |
| }, |
| { |
| "loss": 0.1586, |
| "grad_norm": 1.0364527702331543, |
| "learning_rate": 0.00017880107536067218, |
| "epoch": 1.5, |
| "step": 30 |
| }, |
| { |
| "loss": 0.1385, |
| "grad_norm": 1.0755441188812256, |
| "learning_rate": 0.0001529919264233205, |
| "epoch": 2.0, |
| "step": 40 |
| }, |
| { |
| "loss": 0.0836, |
| "grad_norm": 0.8263890743255615, |
| "learning_rate": 0.00012079116908177593, |
| "epoch": 2.5, |
| "step": 50 |
| }, |
| { |
| "loss": 0.0688, |
| "grad_norm": 0.9091669321060181, |
| "learning_rate": 8.608268990399349e-05, |
| "epoch": 3.0, |
| "step": 60 |
| }, |
| { |
| "loss": 0.0358, |
| "grad_norm": 0.4862966537475586, |
| "learning_rate": 5.305284372141095e-05, |
| "epoch": 3.5, |
| "step": 70 |
| }, |
| { |
| "loss": 0.0353, |
| "grad_norm": 1.0672625303268433, |
| "learning_rate": 2.5685517452260567e-05, |
| "epoch": 4.0, |
| "step": 80 |
| }, |
| { |
| "loss": 0.0212, |
| "grad_norm": 0.45949041843414307, |
| "learning_rate": 7.281614543321269e-06, |
| "epoch": 4.5, |
| "step": 90 |
| }, |
| { |
| "loss": 0.02, |
| "grad_norm": 0.47117879986763, |
| "learning_rate": 6.09172980904238e-08, |
| "epoch": 5.0, |
| "step": 100 |
| }, |
| { |
| "train_runtime": 182.6488, |
| "train_samples_per_second": 34.383, |
| "train_steps_per_second": 0.547, |
| "total_flos": 9716776772935680.0, |
| "train_loss": 0.2866386969387531, |
| "epoch": 5.0, |
| "step": 100 |
| } |
| ] |