| { |
| "best_global_step": null, |
| "best_metric": null, |
| "best_model_checkpoint": null, |
| "epoch": 2.0, |
| "eval_steps": 500, |
| "global_step": 226, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 0.08888888888888889, |
| "grad_norm": 1.5063320398330688, |
| "learning_rate": 3.6e-05, |
| "loss": 1.4051, |
| "step": 10 |
| }, |
| { |
| "epoch": 0.17777777777777778, |
| "grad_norm": 1.7991224527359009, |
| "learning_rate": 7.6e-05, |
| "loss": 1.2966, |
| "step": 20 |
| }, |
| { |
| "epoch": 0.26666666666666666, |
| "grad_norm": 1.6531014442443848, |
| "learning_rate": 0.000116, |
| "loss": 1.1773, |
| "step": 30 |
| }, |
| { |
| "epoch": 0.35555555555555557, |
| "grad_norm": 1.5080221891403198, |
| "learning_rate": 0.00015600000000000002, |
| "loss": 1.0591, |
| "step": 40 |
| }, |
| { |
| "epoch": 0.4444444444444444, |
| "grad_norm": 1.470992088317871, |
| "learning_rate": 0.000196, |
| "loss": 1.0119, |
| "step": 50 |
| }, |
| { |
| "epoch": 0.5333333333333333, |
| "grad_norm": 1.616141438484192, |
| "learning_rate": 0.00019377162629757784, |
| "loss": 0.9798, |
| "step": 60 |
| }, |
| { |
| "epoch": 0.6222222222222222, |
| "grad_norm": 1.362313985824585, |
| "learning_rate": 0.00018685121107266437, |
| "loss": 0.9647, |
| "step": 70 |
| }, |
| { |
| "epoch": 0.7111111111111111, |
| "grad_norm": 1.2892446517944336, |
| "learning_rate": 0.00017993079584775087, |
| "loss": 0.9841, |
| "step": 80 |
| }, |
| { |
| "epoch": 0.8, |
| "grad_norm": 1.0493370294570923, |
| "learning_rate": 0.0001730103806228374, |
| "loss": 0.9551, |
| "step": 90 |
| }, |
| { |
| "epoch": 0.8888888888888888, |
| "grad_norm": 1.134190559387207, |
| "learning_rate": 0.00016608996539792386, |
| "loss": 1.0072, |
| "step": 100 |
| }, |
| { |
| "epoch": 0.9777777777777777, |
| "grad_norm": 1.3157190084457397, |
| "learning_rate": 0.0001591695501730104, |
| "loss": 1.0067, |
| "step": 110 |
| }, |
| { |
| "epoch": 1.0, |
| "eval_loss": 0.9378752112388611, |
| "eval_runtime": 6.51, |
| "eval_samples_per_second": 15.361, |
| "eval_steps_per_second": 1.997, |
| "step": 113 |
| }, |
| { |
| "epoch": 1.0622222222222222, |
| "grad_norm": 1.69685959815979, |
| "learning_rate": 0.00015224913494809689, |
| "loss": 0.8346, |
| "step": 120 |
| }, |
| { |
| "epoch": 1.1511111111111112, |
| "grad_norm": 1.2508965730667114, |
| "learning_rate": 0.0001453287197231834, |
| "loss": 0.7803, |
| "step": 130 |
| }, |
| { |
| "epoch": 1.24, |
| "grad_norm": 1.5068080425262451, |
| "learning_rate": 0.00013840830449826988, |
| "loss": 0.7561, |
| "step": 140 |
| }, |
| { |
| "epoch": 1.3288888888888888, |
| "grad_norm": 1.3432313203811646, |
| "learning_rate": 0.0001314878892733564, |
| "loss": 0.7636, |
| "step": 150 |
| }, |
| { |
| "epoch": 1.4177777777777778, |
| "grad_norm": 1.2835805416107178, |
| "learning_rate": 0.0001245674740484429, |
| "loss": 0.7545, |
| "step": 160 |
| }, |
| { |
| "epoch": 1.5066666666666668, |
| "grad_norm": 1.3430417776107788, |
| "learning_rate": 0.00011764705882352942, |
| "loss": 0.8386, |
| "step": 170 |
| }, |
| { |
| "epoch": 1.5955555555555554, |
| "grad_norm": 1.8119564056396484, |
| "learning_rate": 0.00011072664359861593, |
| "loss": 0.7792, |
| "step": 180 |
| }, |
| { |
| "epoch": 1.6844444444444444, |
| "grad_norm": 1.3746200799942017, |
| "learning_rate": 0.00010380622837370242, |
| "loss": 0.8469, |
| "step": 190 |
| }, |
| { |
| "epoch": 1.7733333333333334, |
| "grad_norm": 1.5120090246200562, |
| "learning_rate": 9.688581314878892e-05, |
| "loss": 0.7332, |
| "step": 200 |
| }, |
| { |
| "epoch": 1.8622222222222222, |
| "grad_norm": 1.2306479215621948, |
| "learning_rate": 8.996539792387543e-05, |
| "loss": 0.7843, |
| "step": 210 |
| }, |
| { |
| "epoch": 1.951111111111111, |
| "grad_norm": 1.219697117805481, |
| "learning_rate": 8.304498269896193e-05, |
| "loss": 0.7671, |
| "step": 220 |
| }, |
| { |
| "epoch": 2.0, |
| "eval_loss": 0.9279118180274963, |
| "eval_runtime": 6.5004, |
| "eval_samples_per_second": 15.384, |
| "eval_steps_per_second": 2.0, |
| "step": 226 |
| } |
| ], |
| "logging_steps": 10, |
| "max_steps": 339, |
| "num_input_tokens_seen": 0, |
| "num_train_epochs": 3, |
| "save_steps": 500, |
| "stateful_callbacks": { |
| "TrainerControl": { |
| "args": { |
| "should_epoch_stop": false, |
| "should_evaluate": false, |
| "should_log": false, |
| "should_save": true, |
| "should_training_stop": false |
| }, |
| "attributes": {} |
| } |
| }, |
| "total_flos": 5.92047891283968e+16, |
| "train_batch_size": 1, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|