| { |
| "best_metric": null, |
| "best_model_checkpoint": null, |
| "epoch": 1.0, |
| "eval_steps": 500, |
| "global_step": 31250, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 0.04, |
| "grad_norm": 0.0007822011830285192, |
| "learning_rate": 4.8e-05, |
| "loss": 0.0016, |
| "step": 1250 |
| }, |
| { |
| "epoch": 0.08, |
| "grad_norm": 0.0016089362325146794, |
| "learning_rate": 4.600000000000001e-05, |
| "loss": 0.0006, |
| "step": 2500 |
| }, |
| { |
| "epoch": 0.12, |
| "grad_norm": 7.695078238612041e-05, |
| "learning_rate": 4.4000000000000006e-05, |
| "loss": 0.0, |
| "step": 3750 |
| }, |
| { |
| "epoch": 0.16, |
| "grad_norm": 0.0024884792510420084, |
| "learning_rate": 4.2e-05, |
| "loss": 0.0, |
| "step": 5000 |
| }, |
| { |
| "epoch": 0.2, |
| "grad_norm": 5.6645851145731285e-05, |
| "learning_rate": 4e-05, |
| "loss": 0.0001, |
| "step": 6250 |
| }, |
| { |
| "epoch": 0.24, |
| "grad_norm": 0.00015396725211758167, |
| "learning_rate": 3.8e-05, |
| "loss": 0.0001, |
| "step": 7500 |
| }, |
| { |
| "epoch": 0.28, |
| "grad_norm": 0.0011548411566764116, |
| "learning_rate": 3.6e-05, |
| "loss": 0.0, |
| "step": 8750 |
| }, |
| { |
| "epoch": 0.32, |
| "grad_norm": 7.002039637882262e-05, |
| "learning_rate": 3.4000000000000007e-05, |
| "loss": 0.0001, |
| "step": 10000 |
| }, |
| { |
| "epoch": 0.36, |
| "grad_norm": 0.00010721544094849378, |
| "learning_rate": 3.2000000000000005e-05, |
| "loss": 0.0, |
| "step": 11250 |
| }, |
| { |
| "epoch": 0.4, |
| "grad_norm": 4.72808642371092e-05, |
| "learning_rate": 3e-05, |
| "loss": 0.0001, |
| "step": 12500 |
| }, |
| { |
| "epoch": 0.44, |
| "grad_norm": 1.643113137106411e-05, |
| "learning_rate": 2.8000000000000003e-05, |
| "loss": 0.0, |
| "step": 13750 |
| }, |
| { |
| "epoch": 0.48, |
| "grad_norm": 1.0432020644657314e-05, |
| "learning_rate": 2.6000000000000002e-05, |
| "loss": 0.0, |
| "step": 15000 |
| }, |
| { |
| "epoch": 0.52, |
| "grad_norm": 3.795513839577325e-05, |
| "learning_rate": 2.4e-05, |
| "loss": 0.0, |
| "step": 16250 |
| }, |
| { |
| "epoch": 0.56, |
| "grad_norm": 4.7567787987645715e-05, |
| "learning_rate": 2.2000000000000003e-05, |
| "loss": 0.0002, |
| "step": 17500 |
| }, |
| { |
| "epoch": 0.6, |
| "grad_norm": 2.121076249750331e-05, |
| "learning_rate": 2e-05, |
| "loss": 0.0, |
| "step": 18750 |
| }, |
| { |
| "epoch": 0.64, |
| "grad_norm": 1.4232242392608896e-05, |
| "learning_rate": 1.8e-05, |
| "loss": 0.0, |
| "step": 20000 |
| }, |
| { |
| "epoch": 0.68, |
| "grad_norm": 1.8679733329918236e-05, |
| "learning_rate": 1.6000000000000003e-05, |
| "loss": 0.0, |
| "step": 21250 |
| }, |
| { |
| "epoch": 0.72, |
| "grad_norm": 1.4709683455293998e-05, |
| "learning_rate": 1.4000000000000001e-05, |
| "loss": 0.0, |
| "step": 22500 |
| }, |
| { |
| "epoch": 0.76, |
| "grad_norm": 0.0004699587298091501, |
| "learning_rate": 1.2e-05, |
| "loss": 0.0, |
| "step": 23750 |
| }, |
| { |
| "epoch": 0.8, |
| "grad_norm": 7.580141755170189e-06, |
| "learning_rate": 1e-05, |
| "loss": 0.0, |
| "step": 25000 |
| }, |
| { |
| "epoch": 0.84, |
| "grad_norm": 1.317455644311849e-05, |
| "learning_rate": 8.000000000000001e-06, |
| "loss": 0.0, |
| "step": 26250 |
| }, |
| { |
| "epoch": 0.88, |
| "grad_norm": 0.00012563263589981943, |
| "learning_rate": 6e-06, |
| "loss": 0.0, |
| "step": 27500 |
| }, |
| { |
| "epoch": 0.92, |
| "grad_norm": 6.097168807173148e-06, |
| "learning_rate": 4.000000000000001e-06, |
| "loss": 0.0, |
| "step": 28750 |
| }, |
| { |
| "epoch": 0.96, |
| "grad_norm": 7.088618986017536e-06, |
| "learning_rate": 2.0000000000000003e-06, |
| "loss": 0.0, |
| "step": 30000 |
| }, |
| { |
| "epoch": 1.0, |
| "grad_norm": 1.4892546460032463e-05, |
| "learning_rate": 0.0, |
| "loss": 0.0, |
| "step": 31250 |
| } |
| ], |
| "logging_steps": 1250, |
| "max_steps": 31250, |
| "num_input_tokens_seen": 0, |
| "num_train_epochs": 1, |
| "save_steps": 1250, |
| "total_flos": 1.0770363130241352e+18, |
| "train_batch_size": 16, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|