| { |
| "best_metric": null, |
| "best_model_checkpoint": null, |
| "epoch": 1.571503404924044, |
| "eval_steps": 500, |
| "global_step": 6000, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 0.13095861707700368, |
| "grad_norm": 9.667867660522461, |
| "learning_rate": 1.912694255281998e-05, |
| "loss": 2.334, |
| "step": 500 |
| }, |
| { |
| "epoch": 0.26191723415400736, |
| "grad_norm": 8.759542465209961, |
| "learning_rate": 1.8253885105639954e-05, |
| "loss": 2.0831, |
| "step": 1000 |
| }, |
| { |
| "epoch": 0.392875851231011, |
| "grad_norm": 10.96821117401123, |
| "learning_rate": 1.738082765845993e-05, |
| "loss": 2.012, |
| "step": 1500 |
| }, |
| { |
| "epoch": 0.5238344683080147, |
| "grad_norm": 7.138434886932373, |
| "learning_rate": 1.6507770211279903e-05, |
| "loss": 1.9943, |
| "step": 2000 |
| }, |
| { |
| "epoch": 0.6547930853850183, |
| "grad_norm": 6.028991222381592, |
| "learning_rate": 1.563471276409988e-05, |
| "loss": 1.9657, |
| "step": 2500 |
| }, |
| { |
| "epoch": 0.785751702462022, |
| "grad_norm": 7.04899263381958, |
| "learning_rate": 1.4761655316919854e-05, |
| "loss": 1.952, |
| "step": 3000 |
| }, |
| { |
| "epoch": 0.9167103195390257, |
| "grad_norm": 5.429271697998047, |
| "learning_rate": 1.388859786973983e-05, |
| "loss": 1.9391, |
| "step": 3500 |
| }, |
| { |
| "epoch": 1.0476689366160294, |
| "grad_norm": 6.139547348022461, |
| "learning_rate": 1.3015540422559805e-05, |
| "loss": 1.9181, |
| "step": 4000 |
| }, |
| { |
| "epoch": 1.178627553693033, |
| "grad_norm": 4.769629001617432, |
| "learning_rate": 1.214248297537978e-05, |
| "loss": 1.8796, |
| "step": 4500 |
| }, |
| { |
| "epoch": 1.3095861707700367, |
| "grad_norm": 5.540957450866699, |
| "learning_rate": 1.1269425528199755e-05, |
| "loss": 1.8727, |
| "step": 5000 |
| }, |
| { |
| "epoch": 1.4405447878470403, |
| "grad_norm": 6.230819225311279, |
| "learning_rate": 1.0396368081019731e-05, |
| "loss": 1.8621, |
| "step": 5500 |
| }, |
| { |
| "epoch": 1.571503404924044, |
| "grad_norm": 5.886082172393799, |
| "learning_rate": 9.523310633839708e-06, |
| "loss": 1.862, |
| "step": 6000 |
| } |
| ], |
| "logging_steps": 500, |
| "max_steps": 11454, |
| "num_input_tokens_seen": 0, |
| "num_train_epochs": 3, |
| "save_steps": 500, |
| "stateful_callbacks": { |
| "TrainerControl": { |
| "args": { |
| "should_epoch_stop": false, |
| "should_evaluate": false, |
| "should_log": false, |
| "should_save": true, |
| "should_training_stop": false |
| }, |
| "attributes": {} |
| } |
| }, |
| "total_flos": 351073403136000.0, |
| "train_batch_size": 8, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|