| { |
| "best_global_step": null, |
| "best_metric": null, |
| "best_model_checkpoint": null, |
| "epoch": 0.0128, |
| "eval_steps": 1000, |
| "global_step": 200, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 0.00064, |
| "grad_norm": 1.6144306659698486, |
| "learning_rate": 1.1520000000000002e-08, |
| "loss": 0.729, |
| "step": 10 |
| }, |
| { |
| "epoch": 0.00128, |
| "grad_norm": 2.0952296257019043, |
| "learning_rate": 2.4320000000000002e-08, |
| "loss": 0.7295, |
| "step": 20 |
| }, |
| { |
| "epoch": 0.00192, |
| "grad_norm": 1.3587689399719238, |
| "learning_rate": 3.7120000000000004e-08, |
| "loss": 0.73, |
| "step": 30 |
| }, |
| { |
| "epoch": 0.00256, |
| "grad_norm": 1.2531732320785522, |
| "learning_rate": 4.9920000000000006e-08, |
| "loss": 0.7221, |
| "step": 40 |
| }, |
| { |
| "epoch": 0.0032, |
| "grad_norm": 1.437932014465332, |
| "learning_rate": 6.272000000000001e-08, |
| "loss": 0.7209, |
| "step": 50 |
| }, |
| { |
| "epoch": 0.00384, |
| "grad_norm": 1.418426752090454, |
| "learning_rate": 7.552e-08, |
| "loss": 0.729, |
| "step": 60 |
| }, |
| { |
| "epoch": 0.00448, |
| "grad_norm": 1.9476298093795776, |
| "learning_rate": 8.832e-08, |
| "loss": 0.7242, |
| "step": 70 |
| }, |
| { |
| "epoch": 0.00512, |
| "grad_norm": 1.7948051691055298, |
| "learning_rate": 1.0112000000000001e-07, |
| "loss": 0.7227, |
| "step": 80 |
| }, |
| { |
| "epoch": 0.00576, |
| "grad_norm": 1.6534360647201538, |
| "learning_rate": 1.1392e-07, |
| "loss": 0.7234, |
| "step": 90 |
| }, |
| { |
| "epoch": 0.0064, |
| "grad_norm": 1.0920158624649048, |
| "learning_rate": 1.2672e-07, |
| "loss": 0.7328, |
| "step": 100 |
| }, |
| { |
| "epoch": 0.00704, |
| "grad_norm": 1.977837085723877, |
| "learning_rate": 1.3952000000000002e-07, |
| "loss": 0.7263, |
| "step": 110 |
| }, |
| { |
| "epoch": 0.00768, |
| "grad_norm": 1.388983130455017, |
| "learning_rate": 1.5232000000000003e-07, |
| "loss": 0.7286, |
| "step": 120 |
| }, |
| { |
| "epoch": 0.00832, |
| "grad_norm": 1.2956682443618774, |
| "learning_rate": 1.6512e-07, |
| "loss": 0.7251, |
| "step": 130 |
| }, |
| { |
| "epoch": 0.00896, |
| "grad_norm": 1.8125052452087402, |
| "learning_rate": 1.7792e-07, |
| "loss": 0.7251, |
| "step": 140 |
| }, |
| { |
| "epoch": 0.0096, |
| "grad_norm": 1.626846194267273, |
| "learning_rate": 1.9072e-07, |
| "loss": 0.727, |
| "step": 150 |
| }, |
| { |
| "epoch": 0.01024, |
| "grad_norm": 2.3243086338043213, |
| "learning_rate": 2.0352e-07, |
| "loss": 0.726, |
| "step": 160 |
| }, |
| { |
| "epoch": 0.01088, |
| "grad_norm": 1.4734737873077393, |
| "learning_rate": 2.1632e-07, |
| "loss": 0.7252, |
| "step": 170 |
| }, |
| { |
| "epoch": 0.01152, |
| "grad_norm": 2.090498685836792, |
| "learning_rate": 2.2912e-07, |
| "loss": 0.7273, |
| "step": 180 |
| }, |
| { |
| "epoch": 0.01216, |
| "grad_norm": 1.7563093900680542, |
| "learning_rate": 2.4192000000000004e-07, |
| "loss": 0.719, |
| "step": 190 |
| }, |
| { |
| "epoch": 0.0128, |
| "grad_norm": 1.449843168258667, |
| "learning_rate": 2.5472000000000005e-07, |
| "loss": 0.7237, |
| "step": 200 |
| } |
| ], |
| "logging_steps": 10, |
| "max_steps": 156250, |
| "num_input_tokens_seen": 0, |
| "num_train_epochs": 10, |
| "save_steps": 200, |
| "stateful_callbacks": { |
| "EarlyStoppingCallback": { |
| "args": { |
| "early_stopping_patience": 3, |
| "early_stopping_threshold": 0.0 |
| }, |
| "attributes": { |
| "early_stopping_patience_counter": 0 |
| } |
| }, |
| "TrainerControl": { |
| "args": { |
| "should_epoch_stop": false, |
| "should_evaluate": false, |
| "should_log": false, |
| "should_save": true, |
| "should_training_stop": false |
| }, |
| "attributes": {} |
| } |
| }, |
| "total_flos": 1683910754304000.0, |
| "train_batch_size": 32, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|