| { |
| "best_global_step": null, |
| "best_metric": null, |
| "best_model_checkpoint": null, |
| "epoch": 1.0, |
| "eval_steps": 1, |
| "global_step": 17, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 0.058823529411764705, |
| "grad_norm": 27.238601684570312, |
| "learning_rate": 3e-05, |
| "loss": 2.4516, |
| "step": 1 |
| }, |
| { |
| "epoch": 0.11764705882352941, |
| "grad_norm": 10.70695972442627, |
| "learning_rate": 2.974459649525853e-05, |
| "loss": 1.5793, |
| "step": 2 |
| }, |
| { |
| "epoch": 0.17647058823529413, |
| "grad_norm": 10.872797012329102, |
| "learning_rate": 2.8987083441065335e-05, |
| "loss": 1.2955, |
| "step": 3 |
| }, |
| { |
| "epoch": 0.23529411764705882, |
| "grad_norm": 9.115215301513672, |
| "learning_rate": 2.7753257035944216e-05, |
| "loss": 1.2027, |
| "step": 4 |
| }, |
| { |
| "epoch": 0.29411764705882354, |
| "grad_norm": 4.148832321166992, |
| "learning_rate": 2.6085133758309887e-05, |
| "loss": 1.1659, |
| "step": 5 |
| }, |
| { |
| "epoch": 0.35294117647058826, |
| "grad_norm": 3.4040770530700684, |
| "learning_rate": 2.4039519545688848e-05, |
| "loss": 0.9029, |
| "step": 6 |
| }, |
| { |
| "epoch": 0.4117647058823529, |
| "grad_norm": 5.416122913360596, |
| "learning_rate": 2.1686075336648075e-05, |
| "loss": 0.9759, |
| "step": 7 |
| }, |
| { |
| "epoch": 0.47058823529411764, |
| "grad_norm": 2.2975521087646484, |
| "learning_rate": 1.9104944851081247e-05, |
| "loss": 0.8131, |
| "step": 8 |
| }, |
| { |
| "epoch": 0.47058823529411764, |
| "eval_loss": 0.9700676798820496, |
| "eval_runtime": 3.7846, |
| "eval_samples_per_second": 0.793, |
| "eval_steps_per_second": 0.264, |
| "step": 8 |
| }, |
| { |
| "checkpoint_runtime": 67.6562 |
| }, |
| { |
| "epoch": 0.5294117647058824, |
| "grad_norm": 4.633929252624512, |
| "learning_rate": 1.638402539194953e-05, |
| "loss": 1.0221, |
| "step": 9 |
| }, |
| { |
| "epoch": 0.5294117647058824, |
| "eval_loss": 0.9538013339042664, |
| "eval_runtime": 3.7737, |
| "eval_samples_per_second": 0.795, |
| "eval_steps_per_second": 0.265, |
| "step": 9 |
| }, |
| { |
| "checkpoint_runtime": 65.5661 |
| }, |
| { |
| "epoch": 0.5882352941176471, |
| "grad_norm": 2.2429122924804688, |
| "learning_rate": 1.3615974608050472e-05, |
| "loss": 0.8594, |
| "step": 10 |
| }, |
| { |
| "epoch": 0.5882352941176471, |
| "eval_loss": 0.9245920777320862, |
| "eval_runtime": 3.7733, |
| "eval_samples_per_second": 0.795, |
| "eval_steps_per_second": 0.265, |
| "step": 10 |
| }, |
| { |
| "checkpoint_runtime": 63.5691 |
| }, |
| { |
| "epoch": 0.6470588235294118, |
| "grad_norm": 1.361628532409668, |
| "learning_rate": 1.0895055148918758e-05, |
| "loss": 0.8205, |
| "step": 11 |
| }, |
| { |
| "epoch": 0.6470588235294118, |
| "eval_loss": 0.9057205319404602, |
| "eval_runtime": 3.7752, |
| "eval_samples_per_second": 0.795, |
| "eval_steps_per_second": 0.265, |
| "step": 11 |
| }, |
| { |
| "checkpoint_runtime": 62.9327 |
| }, |
| { |
| "epoch": 0.7058823529411765, |
| "grad_norm": 1.3343229293823242, |
| "learning_rate": 8.313924663351927e-06, |
| "loss": 0.7544, |
| "step": 12 |
| }, |
| { |
| "epoch": 0.7058823529411765, |
| "eval_loss": 0.8937684893608093, |
| "eval_runtime": 3.7825, |
| "eval_samples_per_second": 0.793, |
| "eval_steps_per_second": 0.264, |
| "step": 12 |
| }, |
| { |
| "checkpoint_runtime": 65.0649 |
| }, |
| { |
| "epoch": 0.7647058823529411, |
| "grad_norm": 1.254239797592163, |
| "learning_rate": 5.960480454311155e-06, |
| "loss": 0.6886, |
| "step": 13 |
| }, |
| { |
| "epoch": 0.7647058823529411, |
| "eval_loss": 0.8821093440055847, |
| "eval_runtime": 3.7709, |
| "eval_samples_per_second": 0.796, |
| "eval_steps_per_second": 0.265, |
| "step": 13 |
| }, |
| { |
| "checkpoint_runtime": 96.2251 |
| }, |
| { |
| "epoch": 0.8235294117647058, |
| "grad_norm": 0.9345831274986267, |
| "learning_rate": 3.914866241690115e-06, |
| "loss": 0.783, |
| "step": 14 |
| }, |
| { |
| "epoch": 0.8235294117647058, |
| "eval_loss": 0.8749709129333496, |
| "eval_runtime": 3.8098, |
| "eval_samples_per_second": 0.787, |
| "eval_steps_per_second": 0.262, |
| "step": 14 |
| }, |
| { |
| "checkpoint_runtime": 90.7137 |
| }, |
| { |
| "epoch": 0.8823529411764706, |
| "grad_norm": 0.916324257850647, |
| "learning_rate": 2.2467429640557903e-06, |
| "loss": 0.7929, |
| "step": 15 |
| }, |
| { |
| "epoch": 0.8823529411764706, |
| "eval_loss": 0.8703281879425049, |
| "eval_runtime": 3.8259, |
| "eval_samples_per_second": 0.784, |
| "eval_steps_per_second": 0.261, |
| "step": 15 |
| }, |
| { |
| "checkpoint_runtime": 76.2246 |
| }, |
| { |
| "epoch": 0.9411764705882353, |
| "grad_norm": 0.8817465901374817, |
| "learning_rate": 1.0129165589346644e-06, |
| "loss": 0.7776, |
| "step": 16 |
| }, |
| { |
| "epoch": 0.9411764705882353, |
| "eval_loss": 0.8680707812309265, |
| "eval_runtime": 4.2447, |
| "eval_samples_per_second": 0.707, |
| "eval_steps_per_second": 0.236, |
| "step": 16 |
| }, |
| { |
| "checkpoint_runtime": 82.257 |
| }, |
| { |
| "epoch": 1.0, |
| "grad_norm": 0.7789281010627747, |
| "learning_rate": 2.5540350474147324e-07, |
| "loss": 0.702, |
| "step": 17 |
| }, |
| { |
| "epoch": 1.0, |
| "eval_loss": 0.8674740791320801, |
| "eval_runtime": 4.1924, |
| "eval_samples_per_second": 0.716, |
| "eval_steps_per_second": 0.239, |
| "step": 17 |
| } |
| ], |
| "logging_steps": 1.0, |
| "max_steps": 17, |
| "num_input_tokens_seen": 0, |
| "num_train_epochs": 1, |
| "save_steps": 1, |
| "stateful_callbacks": { |
| "TrainerControl": { |
| "args": { |
| "should_epoch_stop": false, |
| "should_evaluate": false, |
| "should_log": false, |
| "should_save": true, |
| "should_training_stop": true |
| }, |
| "attributes": {} |
| } |
| }, |
| "total_flos": 1.0006846020937318e+17, |
| "train_batch_size": 1, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|