| { |
| "best_global_step": null, |
| "best_metric": null, |
| "best_model_checkpoint": null, |
| "epoch": 0.0256, |
| "eval_steps": 1000, |
| "global_step": 400, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 0.00064, |
| "grad_norm": 1.6144306659698486, |
| "learning_rate": 1.1520000000000002e-08, |
| "loss": 0.729, |
| "step": 10 |
| }, |
| { |
| "epoch": 0.00128, |
| "grad_norm": 2.0952296257019043, |
| "learning_rate": 2.4320000000000002e-08, |
| "loss": 0.7295, |
| "step": 20 |
| }, |
| { |
| "epoch": 0.00192, |
| "grad_norm": 1.3587689399719238, |
| "learning_rate": 3.7120000000000004e-08, |
| "loss": 0.73, |
| "step": 30 |
| }, |
| { |
| "epoch": 0.00256, |
| "grad_norm": 1.2531732320785522, |
| "learning_rate": 4.9920000000000006e-08, |
| "loss": 0.7221, |
| "step": 40 |
| }, |
| { |
| "epoch": 0.0032, |
| "grad_norm": 1.437932014465332, |
| "learning_rate": 6.272000000000001e-08, |
| "loss": 0.7209, |
| "step": 50 |
| }, |
| { |
| "epoch": 0.00384, |
| "grad_norm": 1.418426752090454, |
| "learning_rate": 7.552e-08, |
| "loss": 0.729, |
| "step": 60 |
| }, |
| { |
| "epoch": 0.00448, |
| "grad_norm": 1.9476298093795776, |
| "learning_rate": 8.832e-08, |
| "loss": 0.7242, |
| "step": 70 |
| }, |
| { |
| "epoch": 0.00512, |
| "grad_norm": 1.7948051691055298, |
| "learning_rate": 1.0112000000000001e-07, |
| "loss": 0.7227, |
| "step": 80 |
| }, |
| { |
| "epoch": 0.00576, |
| "grad_norm": 1.6534360647201538, |
| "learning_rate": 1.1392e-07, |
| "loss": 0.7234, |
| "step": 90 |
| }, |
| { |
| "epoch": 0.0064, |
| "grad_norm": 1.0920158624649048, |
| "learning_rate": 1.2672e-07, |
| "loss": 0.7328, |
| "step": 100 |
| }, |
| { |
| "epoch": 0.00704, |
| "grad_norm": 1.977837085723877, |
| "learning_rate": 1.3952000000000002e-07, |
| "loss": 0.7263, |
| "step": 110 |
| }, |
| { |
| "epoch": 0.00768, |
| "grad_norm": 1.388983130455017, |
| "learning_rate": 1.5232000000000003e-07, |
| "loss": 0.7286, |
| "step": 120 |
| }, |
| { |
| "epoch": 0.00832, |
| "grad_norm": 1.2956682443618774, |
| "learning_rate": 1.6512e-07, |
| "loss": 0.7251, |
| "step": 130 |
| }, |
| { |
| "epoch": 0.00896, |
| "grad_norm": 1.8125052452087402, |
| "learning_rate": 1.7792e-07, |
| "loss": 0.7251, |
| "step": 140 |
| }, |
| { |
| "epoch": 0.0096, |
| "grad_norm": 1.626846194267273, |
| "learning_rate": 1.9072e-07, |
| "loss": 0.727, |
| "step": 150 |
| }, |
| { |
| "epoch": 0.01024, |
| "grad_norm": 2.3243086338043213, |
| "learning_rate": 2.0352e-07, |
| "loss": 0.726, |
| "step": 160 |
| }, |
| { |
| "epoch": 0.01088, |
| "grad_norm": 1.4734737873077393, |
| "learning_rate": 2.1632e-07, |
| "loss": 0.7252, |
| "step": 170 |
| }, |
| { |
| "epoch": 0.01152, |
| "grad_norm": 2.090498685836792, |
| "learning_rate": 2.2912e-07, |
| "loss": 0.7273, |
| "step": 180 |
| }, |
| { |
| "epoch": 0.01216, |
| "grad_norm": 1.7563093900680542, |
| "learning_rate": 2.4192000000000004e-07, |
| "loss": 0.719, |
| "step": 190 |
| }, |
| { |
| "epoch": 0.0128, |
| "grad_norm": 1.449843168258667, |
| "learning_rate": 2.5472000000000005e-07, |
| "loss": 0.7237, |
| "step": 200 |
| }, |
| { |
| "epoch": 0.01344, |
| "grad_norm": 141396.296875, |
| "learning_rate": 5.350742447516642e-07, |
| "loss": 0.7217, |
| "step": 210 |
| }, |
| { |
| "epoch": 0.01408, |
| "grad_norm": 102339.1640625, |
| "learning_rate": 5.606758832565284e-07, |
| "loss": 0.7215, |
| "step": 220 |
| }, |
| { |
| "epoch": 0.01472, |
| "grad_norm": 134052.9375, |
| "learning_rate": 5.862775217613928e-07, |
| "loss": 0.7115, |
| "step": 230 |
| }, |
| { |
| "epoch": 0.01536, |
| "grad_norm": 87181.984375, |
| "learning_rate": 6.118791602662571e-07, |
| "loss": 0.7241, |
| "step": 240 |
| }, |
| { |
| "epoch": 0.016, |
| "grad_norm": 100231.328125, |
| "learning_rate": 6.374807987711214e-07, |
| "loss": 0.71, |
| "step": 250 |
| }, |
| { |
| "epoch": 0.01664, |
| "grad_norm": 136721.484375, |
| "learning_rate": 6.630824372759858e-07, |
| "loss": 0.7188, |
| "step": 260 |
| }, |
| { |
| "epoch": 0.01728, |
| "grad_norm": 115868.8125, |
| "learning_rate": 6.8868407578085e-07, |
| "loss": 0.7199, |
| "step": 270 |
| }, |
| { |
| "epoch": 0.01792, |
| "grad_norm": 70205.1484375, |
| "learning_rate": 7.142857142857143e-07, |
| "loss": 0.7299, |
| "step": 280 |
| }, |
| { |
| "epoch": 0.01856, |
| "grad_norm": 98926.4453125, |
| "learning_rate": 7.398873527905787e-07, |
| "loss": 0.7159, |
| "step": 290 |
| }, |
| { |
| "epoch": 0.0192, |
| "grad_norm": 134108.140625, |
| "learning_rate": 7.65488991295443e-07, |
| "loss": 0.7122, |
| "step": 300 |
| }, |
| { |
| "epoch": 0.01984, |
| "grad_norm": 103719.140625, |
| "learning_rate": 7.910906298003073e-07, |
| "loss": 0.7185, |
| "step": 310 |
| }, |
| { |
| "epoch": 0.02048, |
| "grad_norm": 85624.953125, |
| "learning_rate": 8.166922683051716e-07, |
| "loss": 0.718, |
| "step": 320 |
| }, |
| { |
| "epoch": 0.02112, |
| "grad_norm": 138824.15625, |
| "learning_rate": 8.422939068100359e-07, |
| "loss": 0.713, |
| "step": 330 |
| }, |
| { |
| "epoch": 0.02176, |
| "grad_norm": 73629.0859375, |
| "learning_rate": 8.678955453149002e-07, |
| "loss": 0.7186, |
| "step": 340 |
| }, |
| { |
| "epoch": 0.0224, |
| "grad_norm": 132493.0, |
| "learning_rate": 8.934971838197646e-07, |
| "loss": 0.7133, |
| "step": 350 |
| }, |
| { |
| "epoch": 0.02304, |
| "grad_norm": 85223.625, |
| "learning_rate": 9.190988223246289e-07, |
| "loss": 0.7124, |
| "step": 360 |
| }, |
| { |
| "epoch": 0.02368, |
| "grad_norm": 77868.78125, |
| "learning_rate": 9.447004608294931e-07, |
| "loss": 0.7058, |
| "step": 370 |
| }, |
| { |
| "epoch": 0.02432, |
| "grad_norm": 75874.3046875, |
| "learning_rate": 9.703020993343575e-07, |
| "loss": 0.7139, |
| "step": 380 |
| }, |
| { |
| "epoch": 0.02496, |
| "grad_norm": 151937.703125, |
| "learning_rate": 9.959037378392218e-07, |
| "loss": 0.713, |
| "step": 390 |
| }, |
| { |
| "epoch": 0.0256, |
| "grad_norm": 161711.671875, |
| "learning_rate": 1.021505376344086e-06, |
| "loss": 0.7137, |
| "step": 400 |
| } |
| ], |
| "logging_steps": 10, |
| "max_steps": 78125, |
| "num_input_tokens_seen": 0, |
| "num_train_epochs": 5, |
| "save_steps": 200, |
| "stateful_callbacks": { |
| "EarlyStoppingCallback": { |
| "args": { |
| "early_stopping_patience": 3, |
| "early_stopping_threshold": 0.0 |
| }, |
| "attributes": { |
| "early_stopping_patience_counter": 0 |
| } |
| }, |
| "TrainerControl": { |
| "args": { |
| "should_epoch_stop": false, |
| "should_evaluate": false, |
| "should_log": false, |
| "should_save": true, |
| "should_training_stop": false |
| }, |
| "attributes": {} |
| } |
| }, |
| "total_flos": 3367295286497280.0, |
| "train_batch_size": 32, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|