| { |
| "best_global_step": null, |
| "best_metric": null, |
| "best_model_checkpoint": null, |
| "epoch": 0.4444444444444444, |
| "eval_steps": 500, |
| "global_step": 2000, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 0.022222222222222223, |
| "grad_norm": 6.082106113433838, |
| "learning_rate": 4.89e-05, |
| "loss": 1.0349, |
| "step": 100 |
| }, |
| { |
| "epoch": 0.044444444444444446, |
| "grad_norm": 4.056931972503662, |
| "learning_rate": 4.778888888888889e-05, |
| "loss": 0.8961, |
| "step": 200 |
| }, |
| { |
| "epoch": 0.06666666666666667, |
| "grad_norm": 3.6765899658203125, |
| "learning_rate": 4.6677777777777785e-05, |
| "loss": 0.8295, |
| "step": 300 |
| }, |
| { |
| "epoch": 0.08888888888888889, |
| "grad_norm": 6.516704082489014, |
| "learning_rate": 4.556666666666667e-05, |
| "loss": 0.7245, |
| "step": 400 |
| }, |
| { |
| "epoch": 0.1111111111111111, |
| "grad_norm": 4.329707145690918, |
| "learning_rate": 4.445555555555555e-05, |
| "loss": 0.6863, |
| "step": 500 |
| }, |
| { |
| "epoch": 0.13333333333333333, |
| "grad_norm": 3.380208730697632, |
| "learning_rate": 4.334444444444445e-05, |
| "loss": 0.6626, |
| "step": 600 |
| }, |
| { |
| "epoch": 0.15555555555555556, |
| "grad_norm": 4.52927827835083, |
| "learning_rate": 4.2233333333333334e-05, |
| "loss": 0.6594, |
| "step": 700 |
| }, |
| { |
| "epoch": 0.17777777777777778, |
| "grad_norm": 6.694725513458252, |
| "learning_rate": 4.112222222222222e-05, |
| "loss": 0.5613, |
| "step": 800 |
| }, |
| { |
| "epoch": 0.2, |
| "grad_norm": 5.4532623291015625, |
| "learning_rate": 4.001111111111111e-05, |
| "loss": 0.5501, |
| "step": 900 |
| }, |
| { |
| "epoch": 0.2222222222222222, |
| "grad_norm": 4.265353679656982, |
| "learning_rate": 3.8900000000000004e-05, |
| "loss": 0.515, |
| "step": 1000 |
| }, |
| { |
| "epoch": 0.24444444444444444, |
| "grad_norm": 3.881925344467163, |
| "learning_rate": 3.778888888888889e-05, |
| "loss": 0.4319, |
| "step": 1100 |
| }, |
| { |
| "epoch": 0.26666666666666666, |
| "grad_norm": 4.1854095458984375, |
| "learning_rate": 3.667777777777778e-05, |
| "loss": 0.4552, |
| "step": 1200 |
| }, |
| { |
| "epoch": 0.28888888888888886, |
| "grad_norm": 6.0221757888793945, |
| "learning_rate": 3.556666666666667e-05, |
| "loss": 0.4155, |
| "step": 1300 |
| }, |
| { |
| "epoch": 0.3111111111111111, |
| "grad_norm": 3.2372546195983887, |
| "learning_rate": 3.445555555555556e-05, |
| "loss": 0.3693, |
| "step": 1400 |
| }, |
| { |
| "epoch": 0.3333333333333333, |
| "grad_norm": 3.5173611640930176, |
| "learning_rate": 3.334444444444445e-05, |
| "loss": 0.3579, |
| "step": 1500 |
| }, |
| { |
| "epoch": 0.35555555555555557, |
| "grad_norm": 2.6714348793029785, |
| "learning_rate": 3.2233333333333335e-05, |
| "loss": 0.3441, |
| "step": 1600 |
| }, |
| { |
| "epoch": 0.37777777777777777, |
| "grad_norm": 2.825218439102173, |
| "learning_rate": 3.112222222222222e-05, |
| "loss": 0.2726, |
| "step": 1700 |
| }, |
| { |
| "epoch": 0.4, |
| "grad_norm": 4.0160040855407715, |
| "learning_rate": 3.0011111111111114e-05, |
| "loss": 0.2673, |
| "step": 1800 |
| }, |
| { |
| "epoch": 0.4222222222222222, |
| "grad_norm": 2.8447208404541016, |
| "learning_rate": 2.8899999999999998e-05, |
| "loss": 0.2423, |
| "step": 1900 |
| }, |
| { |
| "epoch": 0.4444444444444444, |
| "grad_norm": 4.038785934448242, |
| "learning_rate": 2.7788888888888892e-05, |
| "loss": 0.218, |
| "step": 2000 |
| } |
| ], |
| "logging_steps": 100, |
| "max_steps": 4500, |
| "num_input_tokens_seen": 0, |
| "num_train_epochs": 1, |
| "save_steps": 1000, |
| "stateful_callbacks": { |
| "TrainerControl": { |
| "args": { |
| "should_epoch_stop": false, |
| "should_evaluate": false, |
| "should_log": false, |
| "should_save": true, |
| "should_training_stop": false |
| }, |
| "attributes": {} |
| } |
| }, |
| "total_flos": 541776150528000.0, |
| "train_batch_size": 2, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|