| { |
| "best_metric": null, |
| "best_model_checkpoint": null, |
| "epoch": 62.5, |
| "eval_steps": 500, |
| "global_step": 250, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 2.5, |
| "grad_norm": 1.271669626235962, |
| "learning_rate": 0.0001992114701314478, |
| "loss": 2.5012, |
| "step": 10 |
| }, |
| { |
| "epoch": 5.0, |
| "grad_norm": 0.9371187686920166, |
| "learning_rate": 0.0001968583161128631, |
| "loss": 1.7466, |
| "step": 20 |
| }, |
| { |
| "epoch": 7.5, |
| "grad_norm": 1.0007169246673584, |
| "learning_rate": 0.00019297764858882514, |
| "loss": 1.3847, |
| "step": 30 |
| }, |
| { |
| "epoch": 10.0, |
| "grad_norm": 0.7307058572769165, |
| "learning_rate": 0.00018763066800438636, |
| "loss": 1.1869, |
| "step": 40 |
| }, |
| { |
| "epoch": 12.5, |
| "grad_norm": 0.8612673282623291, |
| "learning_rate": 0.00018090169943749476, |
| "loss": 1.0578, |
| "step": 50 |
| }, |
| { |
| "epoch": 15.0, |
| "grad_norm": 1.1447019577026367, |
| "learning_rate": 0.00017289686274214118, |
| "loss": 0.9319, |
| "step": 60 |
| }, |
| { |
| "epoch": 17.5, |
| "grad_norm": 1.2556893825531006, |
| "learning_rate": 0.000163742398974869, |
| "loss": 0.8036, |
| "step": 70 |
| }, |
| { |
| "epoch": 20.0, |
| "grad_norm": 1.5113087892532349, |
| "learning_rate": 0.00015358267949789966, |
| "loss": 0.6837, |
| "step": 80 |
| }, |
| { |
| "epoch": 22.5, |
| "grad_norm": 1.7928667068481445, |
| "learning_rate": 0.00014257792915650728, |
| "loss": 0.5693, |
| "step": 90 |
| }, |
| { |
| "epoch": 25.0, |
| "grad_norm": 2.2895917892456055, |
| "learning_rate": 0.00013090169943749476, |
| "loss": 0.4696, |
| "step": 100 |
| }, |
| { |
| "epoch": 27.5, |
| "grad_norm": 2.3151087760925293, |
| "learning_rate": 0.00011873813145857249, |
| "loss": 0.3774, |
| "step": 110 |
| }, |
| { |
| "epoch": 30.0, |
| "grad_norm": 2.5339276790618896, |
| "learning_rate": 0.00010627905195293135, |
| "loss": 0.301, |
| "step": 120 |
| }, |
| { |
| "epoch": 32.5, |
| "grad_norm": 2.7135488986968994, |
| "learning_rate": 9.372094804706867e-05, |
| "loss": 0.2342, |
| "step": 130 |
| }, |
| { |
| "epoch": 35.0, |
| "grad_norm": 2.314831256866455, |
| "learning_rate": 8.126186854142752e-05, |
| "loss": 0.1907, |
| "step": 140 |
| }, |
| { |
| "epoch": 37.5, |
| "grad_norm": 2.233260154724121, |
| "learning_rate": 6.909830056250527e-05, |
| "loss": 0.1554, |
| "step": 150 |
| }, |
| { |
| "epoch": 40.0, |
| "grad_norm": 2.4932591915130615, |
| "learning_rate": 5.7422070843492734e-05, |
| "loss": 0.1362, |
| "step": 160 |
| }, |
| { |
| "epoch": 42.5, |
| "grad_norm": 1.6581228971481323, |
| "learning_rate": 4.6417320502100316e-05, |
| "loss": 0.1201, |
| "step": 170 |
| }, |
| { |
| "epoch": 45.0, |
| "grad_norm": 1.8939262628555298, |
| "learning_rate": 3.6257601025131026e-05, |
| "loss": 0.1123, |
| "step": 180 |
| }, |
| { |
| "epoch": 47.5, |
| "grad_norm": 1.9351285696029663, |
| "learning_rate": 2.7103137257858868e-05, |
| "loss": 0.1059, |
| "step": 190 |
| }, |
| { |
| "epoch": 50.0, |
| "grad_norm": 1.6971222162246704, |
| "learning_rate": 1.9098300562505266e-05, |
| "loss": 0.1008, |
| "step": 200 |
| }, |
| { |
| "epoch": 52.5, |
| "grad_norm": 1.565306544303894, |
| "learning_rate": 1.2369331995613665e-05, |
| "loss": 0.0979, |
| "step": 210 |
| }, |
| { |
| "epoch": 55.0, |
| "grad_norm": 1.6731207370758057, |
| "learning_rate": 7.022351411174866e-06, |
| "loss": 0.0967, |
| "step": 220 |
| }, |
| { |
| "epoch": 57.5, |
| "grad_norm": 1.9035307168960571, |
| "learning_rate": 3.1416838871368924e-06, |
| "loss": 0.0954, |
| "step": 230 |
| }, |
| { |
| "epoch": 60.0, |
| "grad_norm": 1.338454008102417, |
| "learning_rate": 7.885298685522235e-07, |
| "loss": 0.0946, |
| "step": 240 |
| }, |
| { |
| "epoch": 62.5, |
| "grad_norm": 2.8155736923217773, |
| "learning_rate": 0.0, |
| "loss": 0.0937, |
| "step": 250 |
| } |
| ], |
| "logging_steps": 10, |
| "max_steps": 250, |
| "num_input_tokens_seen": 0, |
| "num_train_epochs": 63, |
| "save_steps": 500, |
| "stateful_callbacks": { |
| "TrainerControl": { |
| "args": { |
| "should_epoch_stop": false, |
| "should_evaluate": false, |
| "should_log": false, |
| "should_save": true, |
| "should_training_stop": true |
| }, |
| "attributes": {} |
| } |
| }, |
| "total_flos": 6.617159544353587e+16, |
| "train_batch_size": 4, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|