| { |
| "best_global_step": null, |
| "best_metric": null, |
| "best_model_checkpoint": null, |
| "epoch": 1.0, |
| "eval_steps": 200, |
| "global_step": 174, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 0.05747126436781609, |
| "grad_norm": 179.0, |
| "learning_rate": 1.9411764705882355e-05, |
| "loss": 10.2864, |
| "step": 10 |
| }, |
| { |
| "epoch": 0.11494252873563218, |
| "grad_norm": 97.0, |
| "learning_rate": 1.823529411764706e-05, |
| "loss": 4.9771, |
| "step": 20 |
| }, |
| { |
| "epoch": 0.1724137931034483, |
| "grad_norm": 94.5, |
| "learning_rate": 1.7058823529411767e-05, |
| "loss": 3.8717, |
| "step": 30 |
| }, |
| { |
| "epoch": 0.22988505747126436, |
| "grad_norm": 101.0, |
| "learning_rate": 1.5882352941176473e-05, |
| "loss": 3.0482, |
| "step": 40 |
| }, |
| { |
| "epoch": 0.28735632183908044, |
| "grad_norm": 90.0, |
| "learning_rate": 1.4705882352941179e-05, |
| "loss": 2.2411, |
| "step": 50 |
| }, |
| { |
| "epoch": 0.3448275862068966, |
| "grad_norm": 77.0, |
| "learning_rate": 1.3529411764705885e-05, |
| "loss": 1.6519, |
| "step": 60 |
| }, |
| { |
| "epoch": 0.40229885057471265, |
| "grad_norm": 66.5, |
| "learning_rate": 1.235294117647059e-05, |
| "loss": 1.1463, |
| "step": 70 |
| }, |
| { |
| "epoch": 0.45977011494252873, |
| "grad_norm": 54.25, |
| "learning_rate": 1.1176470588235295e-05, |
| "loss": 0.7845, |
| "step": 80 |
| }, |
| { |
| "epoch": 0.5172413793103449, |
| "grad_norm": 41.75, |
| "learning_rate": 1e-05, |
| "loss": 0.5763, |
| "step": 90 |
| }, |
| { |
| "epoch": 0.5747126436781609, |
| "grad_norm": 32.5, |
| "learning_rate": 8.823529411764707e-06, |
| "loss": 0.4313, |
| "step": 100 |
| }, |
| { |
| "epoch": 0.632183908045977, |
| "grad_norm": 22.875, |
| "learning_rate": 7.647058823529411e-06, |
| "loss": 0.3371, |
| "step": 110 |
| }, |
| { |
| "epoch": 0.6896551724137931, |
| "grad_norm": 21.375, |
| "learning_rate": 6.470588235294119e-06, |
| "loss": 0.3055, |
| "step": 120 |
| }, |
| { |
| "epoch": 0.7471264367816092, |
| "grad_norm": 18.375, |
| "learning_rate": 5.294117647058824e-06, |
| "loss": 0.2467, |
| "step": 130 |
| }, |
| { |
| "epoch": 0.8045977011494253, |
| "grad_norm": 16.25, |
| "learning_rate": 4.11764705882353e-06, |
| "loss": 0.239, |
| "step": 140 |
| }, |
| { |
| "epoch": 0.8620689655172413, |
| "grad_norm": 14.3125, |
| "learning_rate": 2.9411764705882355e-06, |
| "loss": 0.2174, |
| "step": 150 |
| }, |
| { |
| "epoch": 0.9195402298850575, |
| "grad_norm": 16.0, |
| "learning_rate": 1.7647058823529414e-06, |
| "loss": 0.1995, |
| "step": 160 |
| }, |
| { |
| "epoch": 0.9770114942528736, |
| "grad_norm": 12.1875, |
| "learning_rate": 5.882352941176471e-07, |
| "loss": 0.1996, |
| "step": 170 |
| } |
| ], |
| "logging_steps": 10, |
| "max_steps": 174, |
| "num_input_tokens_seen": 0, |
| "num_train_epochs": 1, |
| "save_steps": 200, |
| "stateful_callbacks": { |
| "TrainerControl": { |
| "args": { |
| "should_epoch_stop": false, |
| "should_evaluate": false, |
| "should_log": false, |
| "should_save": true, |
| "should_training_stop": true |
| }, |
| "attributes": {} |
| } |
| }, |
| "total_flos": 1.5409736011717632e+16, |
| "train_batch_size": 32, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|