| { |
| "best_metric": null, |
| "best_model_checkpoint": null, |
| "epoch": 0.00040280351244662856, |
| "eval_steps": 3, |
| "global_step": 10, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 4.028035124466285e-05, |
| "grad_norm": 8.708857536315918, |
| "learning_rate": 2e-05, |
| "loss": 5.2158, |
| "step": 1 |
| }, |
| { |
| "epoch": 4.028035124466285e-05, |
| "eval_loss": 1.1413770914077759, |
| "eval_runtime": 126.7757, |
| "eval_samples_per_second": 82.461, |
| "eval_steps_per_second": 41.23, |
| "step": 1 |
| }, |
| { |
| "epoch": 8.05607024893257e-05, |
| "grad_norm": 9.573758125305176, |
| "learning_rate": 4e-05, |
| "loss": 4.4704, |
| "step": 2 |
| }, |
| { |
| "epoch": 0.00012084105373398856, |
| "grad_norm": 9.162515640258789, |
| "learning_rate": 6e-05, |
| "loss": 4.0082, |
| "step": 3 |
| }, |
| { |
| "epoch": 0.00012084105373398856, |
| "eval_loss": 1.1266303062438965, |
| "eval_runtime": 127.5385, |
| "eval_samples_per_second": 81.967, |
| "eval_steps_per_second": 40.984, |
| "step": 3 |
| }, |
| { |
| "epoch": 0.0001611214049786514, |
| "grad_norm": 7.617592811584473, |
| "learning_rate": 8e-05, |
| "loss": 4.689, |
| "step": 4 |
| }, |
| { |
| "epoch": 0.00020140175622331428, |
| "grad_norm": 7.327009201049805, |
| "learning_rate": 0.0001, |
| "loss": 3.9992, |
| "step": 5 |
| }, |
| { |
| "epoch": 0.00024168210746797713, |
| "grad_norm": 7.231747150421143, |
| "learning_rate": 0.00012, |
| "loss": 4.3443, |
| "step": 6 |
| }, |
| { |
| "epoch": 0.00024168210746797713, |
| "eval_loss": 0.9814525842666626, |
| "eval_runtime": 126.3317, |
| "eval_samples_per_second": 82.75, |
| "eval_steps_per_second": 41.375, |
| "step": 6 |
| }, |
| { |
| "epoch": 0.00028196245871264, |
| "grad_norm": 6.902768611907959, |
| "learning_rate": 0.00014, |
| "loss": 3.3575, |
| "step": 7 |
| }, |
| { |
| "epoch": 0.0003222428099573028, |
| "grad_norm": 7.082916736602783, |
| "learning_rate": 0.00016, |
| "loss": 3.3855, |
| "step": 8 |
| }, |
| { |
| "epoch": 0.0003625231612019657, |
| "grad_norm": 7.2913899421691895, |
| "learning_rate": 0.00018, |
| "loss": 2.9172, |
| "step": 9 |
| }, |
| { |
| "epoch": 0.0003625231612019657, |
| "eval_loss": 0.6874945759773254, |
| "eval_runtime": 126.3353, |
| "eval_samples_per_second": 82.748, |
| "eval_steps_per_second": 41.374, |
| "step": 9 |
| }, |
| { |
| "epoch": 0.00040280351244662856, |
| "grad_norm": 7.292717456817627, |
| "learning_rate": 0.0002, |
| "loss": 2.9434, |
| "step": 10 |
| } |
| ], |
| "logging_steps": 1, |
| "max_steps": 10, |
| "num_input_tokens_seen": 0, |
| "num_train_epochs": 1, |
| "save_steps": 3, |
| "stateful_callbacks": { |
| "TrainerControl": { |
| "args": { |
| "should_epoch_stop": false, |
| "should_evaluate": false, |
| "should_log": false, |
| "should_save": true, |
| "should_training_stop": true |
| }, |
| "attributes": {} |
| } |
| }, |
| "total_flos": 75069652992000.0, |
| "train_batch_size": 2, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|