| { |
| "best_global_step": null, |
| "best_metric": null, |
| "best_model_checkpoint": null, |
| "epoch": 1.0, |
| "eval_steps": 500, |
| "global_step": 17, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 0.058823529411764705, |
| "grad_norm": 1.1143922805786133, |
| "learning_rate": 0.0, |
| "loss": 0.1007, |
| "step": 1 |
| }, |
| { |
| "epoch": 0.11764705882352941, |
| "grad_norm": 1.2392655611038208, |
| "learning_rate": 1e-05, |
| "loss": 0.1131, |
| "step": 2 |
| }, |
| { |
| "epoch": 0.17647058823529413, |
| "grad_norm": 1.21766197681427, |
| "learning_rate": 2e-05, |
| "loss": 0.099, |
| "step": 3 |
| }, |
| { |
| "epoch": 0.23529411764705882, |
| "grad_norm": 1.0714961290359497, |
| "learning_rate": 1.866666666666667e-05, |
| "loss": 0.0867, |
| "step": 4 |
| }, |
| { |
| "epoch": 0.29411764705882354, |
| "grad_norm": 0.8589863777160645, |
| "learning_rate": 1.7333333333333336e-05, |
| "loss": 0.0682, |
| "step": 5 |
| }, |
| { |
| "epoch": 0.35294117647058826, |
| "grad_norm": 0.9731529951095581, |
| "learning_rate": 1.6000000000000003e-05, |
| "loss": 0.1019, |
| "step": 6 |
| }, |
| { |
| "epoch": 0.4117647058823529, |
| "grad_norm": 0.7355734705924988, |
| "learning_rate": 1.4666666666666666e-05, |
| "loss": 0.0618, |
| "step": 7 |
| }, |
| { |
| "epoch": 0.47058823529411764, |
| "grad_norm": 0.70526123046875, |
| "learning_rate": 1.3333333333333333e-05, |
| "loss": 0.0623, |
| "step": 8 |
| }, |
| { |
| "epoch": 0.5294117647058824, |
| "grad_norm": 0.5791162848472595, |
| "learning_rate": 1.2e-05, |
| "loss": 0.0564, |
| "step": 9 |
| }, |
| { |
| "epoch": 0.5882352941176471, |
| "grad_norm": 0.6825107336044312, |
| "learning_rate": 1.0666666666666667e-05, |
| "loss": 0.0521, |
| "step": 10 |
| }, |
| { |
| "epoch": 0.6470588235294118, |
| "grad_norm": 0.5632421970367432, |
| "learning_rate": 9.333333333333334e-06, |
| "loss": 0.0545, |
| "step": 11 |
| }, |
| { |
| "epoch": 0.7058823529411765, |
| "grad_norm": 0.4283078610897064, |
| "learning_rate": 8.000000000000001e-06, |
| "loss": 0.0335, |
| "step": 12 |
| }, |
| { |
| "epoch": 0.7647058823529411, |
| "grad_norm": 0.5495983958244324, |
| "learning_rate": 6.666666666666667e-06, |
| "loss": 0.0593, |
| "step": 13 |
| }, |
| { |
| "epoch": 0.8235294117647058, |
| "grad_norm": 0.45312389731407166, |
| "learning_rate": 5.333333333333334e-06, |
| "loss": 0.0381, |
| "step": 14 |
| }, |
| { |
| "epoch": 0.8823529411764706, |
| "grad_norm": 0.397487610578537, |
| "learning_rate": 4.000000000000001e-06, |
| "loss": 0.0308, |
| "step": 15 |
| }, |
| { |
| "epoch": 0.9411764705882353, |
| "grad_norm": 0.5438785552978516, |
| "learning_rate": 2.666666666666667e-06, |
| "loss": 0.0487, |
| "step": 16 |
| }, |
| { |
| "epoch": 1.0, |
| "grad_norm": 0.5302333235740662, |
| "learning_rate": 1.3333333333333334e-06, |
| "loss": 0.0398, |
| "step": 17 |
| } |
| ], |
| "logging_steps": 1, |
| "max_steps": 17, |
| "num_input_tokens_seen": 0, |
| "num_train_epochs": 1, |
| "save_steps": 100, |
| "stateful_callbacks": { |
| "TrainerControl": { |
| "args": { |
| "should_epoch_stop": false, |
| "should_evaluate": false, |
| "should_log": false, |
| "should_save": true, |
| "should_training_stop": true |
| }, |
| "attributes": {} |
| } |
| }, |
| "total_flos": 0.0, |
| "train_batch_size": 16, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|