| { | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 2.9869281045751634, | |
| "eval_steps": 500, | |
| "global_step": 228, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.13071895424836602, | |
| "grad_norm": 13.640703803287584, | |
| "learning_rate": 5e-06, | |
| "loss": 0.9844, | |
| "step": 10 | |
| }, | |
| { | |
| "epoch": 0.26143790849673204, | |
| "grad_norm": 0.9447823765191696, | |
| "learning_rate": 5e-06, | |
| "loss": 0.8863, | |
| "step": 20 | |
| }, | |
| { | |
| "epoch": 0.39215686274509803, | |
| "grad_norm": 1.038467083575288, | |
| "learning_rate": 5e-06, | |
| "loss": 0.8449, | |
| "step": 30 | |
| }, | |
| { | |
| "epoch": 0.5228758169934641, | |
| "grad_norm": 1.6427820915875102, | |
| "learning_rate": 5e-06, | |
| "loss": 0.8257, | |
| "step": 40 | |
| }, | |
| { | |
| "epoch": 0.6535947712418301, | |
| "grad_norm": 1.7936488531165335, | |
| "learning_rate": 5e-06, | |
| "loss": 0.8169, | |
| "step": 50 | |
| }, | |
| { | |
| "epoch": 0.7843137254901961, | |
| "grad_norm": 0.7697816321414731, | |
| "learning_rate": 5e-06, | |
| "loss": 0.8056, | |
| "step": 60 | |
| }, | |
| { | |
| "epoch": 0.9150326797385621, | |
| "grad_norm": 1.250994290613137, | |
| "learning_rate": 5e-06, | |
| "loss": 0.7971, | |
| "step": 70 | |
| }, | |
| { | |
| "epoch": 1.0490196078431373, | |
| "grad_norm": 0.843433753244107, | |
| "learning_rate": 5e-06, | |
| "loss": 0.8367, | |
| "step": 80 | |
| }, | |
| { | |
| "epoch": 1.1797385620915033, | |
| "grad_norm": 0.6027656670101825, | |
| "learning_rate": 5e-06, | |
| "loss": 0.7483, | |
| "step": 90 | |
| }, | |
| { | |
| "epoch": 1.3104575163398693, | |
| "grad_norm": 0.8024576817469242, | |
| "learning_rate": 5e-06, | |
| "loss": 0.7428, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 1.4411764705882353, | |
| "grad_norm": 0.7594730875779195, | |
| "learning_rate": 5e-06, | |
| "loss": 0.7401, | |
| "step": 110 | |
| }, | |
| { | |
| "epoch": 1.5718954248366013, | |
| "grad_norm": 0.5293998500966177, | |
| "learning_rate": 5e-06, | |
| "loss": 0.7426, | |
| "step": 120 | |
| }, | |
| { | |
| "epoch": 1.7026143790849673, | |
| "grad_norm": 0.9249042901353932, | |
| "learning_rate": 5e-06, | |
| "loss": 0.7406, | |
| "step": 130 | |
| }, | |
| { | |
| "epoch": 1.8333333333333335, | |
| "grad_norm": 0.5767932991870924, | |
| "learning_rate": 5e-06, | |
| "loss": 0.7379, | |
| "step": 140 | |
| }, | |
| { | |
| "epoch": 1.9640522875816995, | |
| "grad_norm": 0.5974379920218519, | |
| "learning_rate": 5e-06, | |
| "loss": 0.7337, | |
| "step": 150 | |
| }, | |
| { | |
| "epoch": 2.0980392156862746, | |
| "grad_norm": 1.3799060310730653, | |
| "learning_rate": 5e-06, | |
| "loss": 0.7541, | |
| "step": 160 | |
| }, | |
| { | |
| "epoch": 2.2287581699346406, | |
| "grad_norm": 0.8440697657265467, | |
| "learning_rate": 5e-06, | |
| "loss": 0.6849, | |
| "step": 170 | |
| }, | |
| { | |
| "epoch": 2.3594771241830066, | |
| "grad_norm": 0.6523664577578698, | |
| "learning_rate": 5e-06, | |
| "loss": 0.6828, | |
| "step": 180 | |
| }, | |
| { | |
| "epoch": 2.4901960784313726, | |
| "grad_norm": 0.5604368514967889, | |
| "learning_rate": 5e-06, | |
| "loss": 0.6833, | |
| "step": 190 | |
| }, | |
| { | |
| "epoch": 2.6209150326797386, | |
| "grad_norm": 0.651015676014187, | |
| "learning_rate": 5e-06, | |
| "loss": 0.6825, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 2.7516339869281046, | |
| "grad_norm": 0.6331718263692562, | |
| "learning_rate": 5e-06, | |
| "loss": 0.6826, | |
| "step": 210 | |
| }, | |
| { | |
| "epoch": 2.8823529411764706, | |
| "grad_norm": 0.6748382635791591, | |
| "learning_rate": 5e-06, | |
| "loss": 0.6867, | |
| "step": 220 | |
| }, | |
| { | |
| "epoch": 2.9869281045751634, | |
| "step": 228, | |
| "total_flos": 381489732648960.0, | |
| "train_loss": 0.7633350188272041, | |
| "train_runtime": 3333.6415, | |
| "train_samples_per_second": 35.22, | |
| "train_steps_per_second": 0.068 | |
| } | |
| ], | |
| "logging_steps": 10, | |
| "max_steps": 228, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 3, | |
| "save_steps": 500, | |
| "stateful_callbacks": { | |
| "TrainerControl": { | |
| "args": { | |
| "should_epoch_stop": false, | |
| "should_evaluate": false, | |
| "should_log": false, | |
| "should_save": true, | |
| "should_training_stop": true | |
| }, | |
| "attributes": {} | |
| } | |
| }, | |
| "total_flos": 381489732648960.0, | |
| "train_batch_size": 4, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |