| { |
| "best_global_step": null, |
| "best_metric": null, |
| "best_model_checkpoint": null, |
| "epoch": 1.0, |
| "eval_steps": 500, |
| "global_step": 125, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 0.04, |
| "grad_norm": 62.0, |
| "learning_rate": 2.857142857142857e-05, |
| "loss": 3.2566, |
| "step": 5 |
| }, |
| { |
| "epoch": 0.08, |
| "grad_norm": 9.625, |
| "learning_rate": 4.996811065272715e-05, |
| "loss": 1.5876, |
| "step": 10 |
| }, |
| { |
| "epoch": 0.12, |
| "grad_norm": 7.28125, |
| "learning_rate": 4.9610392803331726e-05, |
| "loss": 1.2605, |
| "step": 15 |
| }, |
| { |
| "epoch": 0.16, |
| "grad_norm": 49.75, |
| "learning_rate": 4.8861446190538576e-05, |
| "loss": 1.1706, |
| "step": 20 |
| }, |
| { |
| "epoch": 0.2, |
| "grad_norm": 8.8125, |
| "learning_rate": 4.7734522928852436e-05, |
| "loss": 1.0933, |
| "step": 25 |
| }, |
| { |
| "epoch": 0.24, |
| "grad_norm": 9.75, |
| "learning_rate": 4.624956317935659e-05, |
| "loss": 1.0288, |
| "step": 30 |
| }, |
| { |
| "epoch": 0.28, |
| "grad_norm": 7.59375, |
| "learning_rate": 4.443284232176311e-05, |
| "loss": 0.9685, |
| "step": 35 |
| }, |
| { |
| "epoch": 0.32, |
| "grad_norm": 10.4375, |
| "learning_rate": 4.2316506028963374e-05, |
| "loss": 0.89, |
| "step": 40 |
| }, |
| { |
| "epoch": 0.36, |
| "grad_norm": 9.625, |
| "learning_rate": 3.993800147062685e-05, |
| "loss": 0.8948, |
| "step": 45 |
| }, |
| { |
| "epoch": 0.4, |
| "grad_norm": 16.25, |
| "learning_rate": 3.733941471032425e-05, |
| "loss": 0.85, |
| "step": 50 |
| }, |
| { |
| "epoch": 0.44, |
| "grad_norm": 59.25, |
| "learning_rate": 3.4566726020493854e-05, |
| "loss": 0.8486, |
| "step": 55 |
| }, |
| { |
| "epoch": 0.48, |
| "grad_norm": 78.5, |
| "learning_rate": 3.1668996291960073e-05, |
| "loss": 0.7863, |
| "step": 60 |
| }, |
| { |
| "epoch": 0.52, |
| "grad_norm": 14.5625, |
| "learning_rate": 2.869749893394902e-05, |
| "loss": 0.7641, |
| "step": 65 |
| }, |
| { |
| "epoch": 0.56, |
| "grad_norm": 9.5, |
| "learning_rate": 2.570481262505563e-05, |
| "loss": 0.7131, |
| "step": 70 |
| }, |
| { |
| "epoch": 0.6, |
| "grad_norm": 11.25, |
| "learning_rate": 2.2743890968333453e-05, |
| "loss": 0.6848, |
| "step": 75 |
| }, |
| { |
| "epoch": 0.64, |
| "grad_norm": 13.0625, |
| "learning_rate": 1.986712551234432e-05, |
| "loss": 0.6762, |
| "step": 80 |
| }, |
| { |
| "epoch": 0.68, |
| "grad_norm": 10.625, |
| "learning_rate": 1.7125418717390167e-05, |
| "loss": 0.643, |
| "step": 85 |
| }, |
| { |
| "epoch": 0.72, |
| "grad_norm": 13.375, |
| "learning_rate": 1.4567283270175847e-05, |
| "loss": 0.6211, |
| "step": 90 |
| }, |
| { |
| "epoch": 0.76, |
| "grad_norm": 15.375, |
| "learning_rate": 1.2237983683933638e-05, |
| "loss": 0.6132, |
| "step": 95 |
| }, |
| { |
| "epoch": 0.8, |
| "grad_norm": 11.9375, |
| "learning_rate": 1.0178735372827107e-05, |
| "loss": 0.6187, |
| "step": 100 |
| }, |
| { |
| "epoch": 0.84, |
| "grad_norm": 23.75, |
| "learning_rate": 8.425975372482405e-06, |
| "loss": 0.6055, |
| "step": 105 |
| }, |
| { |
| "epoch": 0.88, |
| "grad_norm": 13.1875, |
| "learning_rate": 7.010717610764453e-06, |
| "loss": 0.6133, |
| "step": 110 |
| }, |
| { |
| "epoch": 0.92, |
| "grad_norm": 12.125, |
| "learning_rate": 5.9580041368548775e-06, |
| "loss": 0.6015, |
| "step": 115 |
| }, |
| { |
| "epoch": 0.96, |
| "grad_norm": 10.1875, |
| "learning_rate": 5.286462018769748e-06, |
| "loss": 0.5994, |
| "step": 120 |
| }, |
| { |
| "epoch": 1.0, |
| "grad_norm": 12.9375, |
| "learning_rate": 5.007973749722316e-06, |
| "loss": 0.5922, |
| "step": 125 |
| }, |
| { |
| "epoch": 1.0, |
| "step": 125, |
| "total_flos": 3.486616950459597e+16, |
| "train_loss": 0.9192673854827881, |
| "train_runtime": 250.2316, |
| "train_samples_per_second": 3.996, |
| "train_steps_per_second": 0.5 |
| } |
| ], |
| "logging_steps": 5, |
| "max_steps": 125, |
| "num_input_tokens_seen": 0, |
| "num_train_epochs": 1, |
| "save_steps": 500, |
| "stateful_callbacks": { |
| "TrainerControl": { |
| "args": { |
| "should_epoch_stop": false, |
| "should_evaluate": false, |
| "should_log": false, |
| "should_save": true, |
| "should_training_stop": true |
| }, |
| "attributes": {} |
| } |
| }, |
| "total_flos": 3.486616950459597e+16, |
| "train_batch_size": 2, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|