{ "best_global_step": null, "best_metric": null, "best_model_checkpoint": null, "epoch": 1.984924623115578, "eval_steps": 500, "global_step": 198, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.010050251256281407, "grad_norm": 5.70887565612793, "learning_rate": 5e-09, "loss": 0.0615, "num_input_tokens_seen": 30216, "step": 1 }, { "epoch": 0.10050251256281408, "grad_norm": 20.16037940979004, "learning_rate": 5e-08, "loss": 0.2943, "num_input_tokens_seen": 302936, "step": 10 }, { "epoch": 0.20100502512562815, "grad_norm": 8.642094612121582, "learning_rate": 1e-07, "loss": 0.1826, "num_input_tokens_seen": 603952, "step": 20 }, { "epoch": 0.3015075376884422, "grad_norm": 15.085785865783691, "learning_rate": 9.922326639307917e-08, "loss": 0.1819, "num_input_tokens_seen": 910224, "step": 30 }, { "epoch": 0.4020100502512563, "grad_norm": 11.678885459899902, "learning_rate": 9.691719817616146e-08, "loss": 0.1965, "num_input_tokens_seen": 1216736, "step": 40 }, { "epoch": 0.5025125628140703, "grad_norm": 32.34952926635742, "learning_rate": 9.31534433766042e-08, "loss": 0.1411, "num_input_tokens_seen": 1520504, "step": 50 }, { "epoch": 0.6030150753768844, "grad_norm": 5.019775390625, "learning_rate": 8.804893938804839e-08, "loss": 0.3276, "num_input_tokens_seen": 1833328, "step": 60 }, { "epoch": 0.7035175879396985, "grad_norm": 7.245314598083496, "learning_rate": 8.176227980227692e-08, "loss": 0.209, "num_input_tokens_seen": 2137896, "step": 70 }, { "epoch": 0.8040201005025126, "grad_norm": 5.125178337097168, "learning_rate": 7.448878701031142e-08, "loss": 0.2118, "num_input_tokens_seen": 2443048, "step": 80 }, { "epoch": 0.9045226130653267, "grad_norm": 13.4613618850708, "learning_rate": 6.64544436638005e-08, "loss": 0.2609, "num_input_tokens_seen": 2743800, "step": 90 }, { "epoch": 1.0, "grad_norm": 7.303499698638916, "learning_rate": 5.79088715422152e-08, "loss": 0.2351, "num_input_tokens_seen": 3035624, "step": 100 }, { "epoch": 1.100502512562814, "grad_norm": 12.452404975891113, "learning_rate": 4.9117575967843566e-08, "loss": 0.2456, "num_input_tokens_seen": 3344432, "step": 110 }, { "epoch": 1.2010050251256281, "grad_norm": 4.935022830963135, "learning_rate": 4.035369672952515e-08, "loss": 0.1659, "num_input_tokens_seen": 3648640, "step": 120 }, { "epoch": 1.3015075376884422, "grad_norm": 27.987436294555664, "learning_rate": 3.1889521808515883e-08, "loss": 0.2033, "num_input_tokens_seen": 3952320, "step": 130 }, { "epoch": 1.4020100502512562, "grad_norm": 8.334047317504883, "learning_rate": 2.3988027569455894e-08, "loss": 0.2033, "num_input_tokens_seen": 4258032, "step": 140 }, { "epoch": 1.5025125628140703, "grad_norm": 6.84324312210083, "learning_rate": 1.689470825715998e-08, "loss": 0.1917, "num_input_tokens_seen": 4564368, "step": 150 }, { "epoch": 1.6030150753768844, "grad_norm": 6.922938823699951, "learning_rate": 1.0829948651407373e-08, "loss": 0.1825, "num_input_tokens_seen": 4869656, "step": 160 }, { "epoch": 1.7035175879396984, "grad_norm": 5.297750473022461, "learning_rate": 5.982176856345444e-09, "loss": 0.1895, "num_input_tokens_seen": 5170880, "step": 170 }, { "epoch": 1.8040201005025125, "grad_norm": 19.33493995666504, "learning_rate": 2.50200996285046e-09, "loss": 0.2759, "num_input_tokens_seen": 5482360, "step": 180 }, { "epoch": 1.9045226130653266, "grad_norm": 12.66413688659668, "learning_rate": 4.975744742772847e-10, "loss": 0.2818, "num_input_tokens_seen": 5783664, "step": 190 }, { "epoch": 1.984924623115578, "num_input_tokens_seen": 6025936, "step": 198, "total_flos": 32602670678016.0, "train_loss": 0.22024261590206262, "train_runtime": 12365.0204, "train_samples_per_second": 0.128, "train_steps_per_second": 0.016 } ], "logging_steps": 10, "max_steps": 198, "num_input_tokens_seen": 6025936, "num_train_epochs": 2, "save_steps": 500, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 32602670678016.0, "train_batch_size": 1, "trial_name": null, "trial_params": null }