| { |
| "best_global_step": null, |
| "best_metric": null, |
| "best_model_checkpoint": null, |
| "epoch": 3.0, |
| "eval_steps": 500, |
| "global_step": 105, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 0.14285714285714285, |
| "grad_norm": 3.311917304992676, |
| "learning_rate": 9.964234631709188e-06, |
| "loss": 0.8401, |
| "step": 5 |
| }, |
| { |
| "epoch": 0.2857142857142857, |
| "grad_norm": 2.722205877304077, |
| "learning_rate": 9.819814303479268e-06, |
| "loss": 0.6069, |
| "step": 10 |
| }, |
| { |
| "epoch": 0.42857142857142855, |
| "grad_norm": 2.2730047702789307, |
| "learning_rate": 9.567727288213005e-06, |
| "loss": 0.5425, |
| "step": 15 |
| }, |
| { |
| "epoch": 0.5714285714285714, |
| "grad_norm": 2.044387102127075, |
| "learning_rate": 9.213604793270196e-06, |
| "loss": 0.5169, |
| "step": 20 |
| }, |
| { |
| "epoch": 0.7142857142857143, |
| "grad_norm": 2.071448802947998, |
| "learning_rate": 8.765357330018056e-06, |
| "loss": 0.4866, |
| "step": 25 |
| }, |
| { |
| "epoch": 0.8571428571428571, |
| "grad_norm": 2.1249446868896484, |
| "learning_rate": 8.232998006078998e-06, |
| "loss": 0.4732, |
| "step": 30 |
| }, |
| { |
| "epoch": 1.0, |
| "grad_norm": 1.818131923675537, |
| "learning_rate": 7.628418849052523e-06, |
| "loss": 0.4503, |
| "step": 35 |
| }, |
| { |
| "epoch": 1.1428571428571428, |
| "grad_norm": 1.507593035697937, |
| "learning_rate": 6.965125158269619e-06, |
| "loss": 0.2837, |
| "step": 40 |
| }, |
| { |
| "epoch": 1.2857142857142856, |
| "grad_norm": 1.6006555557250977, |
| "learning_rate": 6.257933818722544e-06, |
| "loss": 0.2649, |
| "step": 45 |
| }, |
| { |
| "epoch": 1.4285714285714286, |
| "grad_norm": 1.6145007610321045, |
| "learning_rate": 5.522642316338268e-06, |
| "loss": 0.2675, |
| "step": 50 |
| }, |
| { |
| "epoch": 1.5714285714285714, |
| "grad_norm": 2.141141891479492, |
| "learning_rate": 4.775675848247427e-06, |
| "loss": 0.2645, |
| "step": 55 |
| }, |
| { |
| "epoch": 1.7142857142857144, |
| "grad_norm": 1.4643751382827759, |
| "learning_rate": 4.033720411022235e-06, |
| "loss": 0.2536, |
| "step": 60 |
| }, |
| { |
| "epoch": 1.8571428571428572, |
| "grad_norm": 1.7821851968765259, |
| "learning_rate": 3.3133500630858507e-06, |
| "loss": 0.2423, |
| "step": 65 |
| }, |
| { |
| "epoch": 2.0, |
| "grad_norm": 1.416107416152954, |
| "learning_rate": 2.6306566876350072e-06, |
| "loss": 0.2274, |
| "step": 70 |
| }, |
| { |
| "epoch": 2.142857142857143, |
| "grad_norm": 1.1690683364868164, |
| "learning_rate": 2.0008905265604316e-06, |
| "loss": 0.1307, |
| "step": 75 |
| }, |
| { |
| "epoch": 2.2857142857142856, |
| "grad_norm": 1.362992525100708, |
| "learning_rate": 1.438119515243277e-06, |
| "loss": 0.1202, |
| "step": 80 |
| }, |
| { |
| "epoch": 2.4285714285714284, |
| "grad_norm": 1.4613163471221924, |
| "learning_rate": 9.549150281252633e-07, |
| "loss": 0.1143, |
| "step": 85 |
| }, |
| { |
| "epoch": 2.571428571428571, |
| "grad_norm": 1.4345860481262207, |
| "learning_rate": 5.620710549772295e-07, |
| "loss": 0.1121, |
| "step": 90 |
| }, |
| { |
| "epoch": 2.7142857142857144, |
| "grad_norm": 1.3550963401794434, |
| "learning_rate": 2.6836308100417874e-07, |
| "loss": 0.1186, |
| "step": 95 |
| }, |
| { |
| "epoch": 2.857142857142857, |
| "grad_norm": 1.4644804000854492, |
| "learning_rate": 8.035205700685167e-08, |
| "loss": 0.1219, |
| "step": 100 |
| }, |
| { |
| "epoch": 3.0, |
| "grad_norm": 1.1941214799880981, |
| "learning_rate": 2.237838582483387e-09, |
| "loss": 0.1134, |
| "step": 105 |
| }, |
| { |
| "epoch": 3.0, |
| "step": 105, |
| "total_flos": 183909281169408.0, |
| "train_loss": 0.31197824875513713, |
| "train_runtime": 8609.0009, |
| "train_samples_per_second": 0.389, |
| "train_steps_per_second": 0.012 |
| } |
| ], |
| "logging_steps": 5, |
| "max_steps": 105, |
| "num_input_tokens_seen": 0, |
| "num_train_epochs": 3, |
| "save_steps": 500, |
| "stateful_callbacks": { |
| "TrainerControl": { |
| "args": { |
| "should_epoch_stop": false, |
| "should_evaluate": false, |
| "should_log": false, |
| "should_save": true, |
| "should_training_stop": true |
| }, |
| "attributes": {} |
| } |
| }, |
| "total_flos": 183909281169408.0, |
| "train_batch_size": 1, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|