{ "best_global_step": null, "best_metric": null, "best_model_checkpoint": null, "epoch": 1.0, "eval_steps": 100.0, "global_step": 35, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.02857142857142857, "grad_norm": 0.04918956756591797, "learning_rate": 0.0, "loss": 0.5625, "step": 1 }, { "epoch": 0.05714285714285714, "grad_norm": 0.04941160976886749, "learning_rate": 5e-05, "loss": 0.5623, "step": 2 }, { "epoch": 0.08571428571428572, "grad_norm": 0.05087224394083023, "learning_rate": 0.0001, "loss": 0.5009, "step": 3 }, { "epoch": 0.11428571428571428, "grad_norm": 0.053648702800273895, "learning_rate": 0.00015000000000000001, "loss": 0.5467, "step": 4 }, { "epoch": 0.14285714285714285, "grad_norm": 0.056984659284353256, "learning_rate": 0.0002, "loss": 0.5003, "step": 5 }, { "epoch": 0.17142857142857143, "grad_norm": 0.06416453421115875, "learning_rate": 0.00019948693233918952, "loss": 0.4514, "step": 6 }, { "epoch": 0.2, "grad_norm": 0.07367783039808273, "learning_rate": 0.00019795299412524945, "loss": 0.4195, "step": 7 }, { "epoch": 0.22857142857142856, "grad_norm": 0.09809113293886185, "learning_rate": 0.00019541392564000488, "loss": 0.3534, "step": 8 }, { "epoch": 0.2571428571428571, "grad_norm": 0.2686680257320404, "learning_rate": 0.00019189578116202307, "loss": 0.3339, "step": 9 }, { "epoch": 0.2857142857142857, "grad_norm": 0.12331166863441467, "learning_rate": 0.00018743466161445823, "loss": 0.2941, "step": 10 }, { "epoch": 0.3142857142857143, "grad_norm": 0.13653112947940826, "learning_rate": 0.00018207634412072764, "loss": 0.2648, "step": 11 }, { "epoch": 0.34285714285714286, "grad_norm": 0.12992143630981445, "learning_rate": 0.0001758758122692791, "loss": 0.2015, "step": 12 }, { "epoch": 0.37142857142857144, "grad_norm": 0.13980498909950256, "learning_rate": 0.00016889669190756868, "loss": 0.1789, "step": 13 }, { "epoch": 0.4, "grad_norm": 0.1361037641763687, "learning_rate": 0.0001612105982547663, "loss": 0.1397, "step": 14 }, { "epoch": 0.42857142857142855, "grad_norm": 0.11609825491905212, "learning_rate": 0.00015289640103269625, "loss": 0.1201, "step": 15 }, { "epoch": 0.45714285714285713, "grad_norm": 0.099339060485363, "learning_rate": 0.00014403941515576344, "loss": 0.0973, "step": 16 }, { "epoch": 0.4857142857142857, "grad_norm": 0.09612716734409332, "learning_rate": 0.00013473052528448201, "loss": 0.0855, "step": 17 }, { "epoch": 0.5142857142857142, "grad_norm": 0.10055588185787201, "learning_rate": 0.00012506525322587207, "loss": 0.0849, "step": 18 }, { "epoch": 0.5428571428571428, "grad_norm": 0.08217591792345047, "learning_rate": 0.00011514277775045768, "loss": 0.0725, "step": 19 }, { "epoch": 0.5714285714285714, "grad_norm": 0.07315342128276825, "learning_rate": 0.00010506491688387127, "loss": 0.0684, "step": 20 }, { "epoch": 0.6, "grad_norm": 0.08092162758111954, "learning_rate": 9.493508311612874e-05, "loss": 0.0573, "step": 21 }, { "epoch": 0.6285714285714286, "grad_norm": 0.06385105103254318, "learning_rate": 8.485722224954237e-05, "loss": 0.0594, "step": 22 }, { "epoch": 0.6571428571428571, "grad_norm": 0.05351648107171059, "learning_rate": 7.493474677412794e-05, "loss": 0.0552, "step": 23 }, { "epoch": 0.6857142857142857, "grad_norm": 0.05012252926826477, "learning_rate": 6.526947471551798e-05, "loss": 0.0607, "step": 24 }, { "epoch": 0.7142857142857143, "grad_norm": 0.04566913843154907, "learning_rate": 5.596058484423656e-05, "loss": 0.0547, "step": 25 }, { "epoch": 0.7428571428571429, "grad_norm": 0.044832587242126465, "learning_rate": 4.710359896730379e-05, "loss": 0.0483, "step": 26 }, { "epoch": 0.7714285714285715, "grad_norm": 0.05177149549126625, "learning_rate": 3.878940174523371e-05, "loss": 0.0544, "step": 27 }, { "epoch": 0.8, "grad_norm": 0.04932279884815216, "learning_rate": 3.110330809243134e-05, "loss": 0.0483, "step": 28 }, { "epoch": 0.8285714285714286, "grad_norm": 0.05275353044271469, "learning_rate": 2.4124187730720917e-05, "loss": 0.0479, "step": 29 }, { "epoch": 0.8571428571428571, "grad_norm": 0.04073500633239746, "learning_rate": 1.7923655879272393e-05, "loss": 0.0525, "step": 30 }, { "epoch": 0.8857142857142857, "grad_norm": 0.04079524427652359, "learning_rate": 1.2565338385541792e-05, "loss": 0.0469, "step": 31 }, { "epoch": 0.9142857142857143, "grad_norm": 0.06132270023226738, "learning_rate": 8.10421883797694e-06, "loss": 0.0421, "step": 32 }, { "epoch": 0.9428571428571428, "grad_norm": 0.04404183104634285, "learning_rate": 4.586074359995119e-06, "loss": 0.0467, "step": 33 }, { "epoch": 0.9714285714285714, "grad_norm": 0.03794850781559944, "learning_rate": 2.0470058747505516e-06, "loss": 0.0467, "step": 34 }, { "epoch": 1.0, "grad_norm": 0.04732782021164894, "learning_rate": 5.130676608104845e-07, "loss": 0.0425, "step": 35 } ], "logging_steps": 1.0, "max_steps": 35, "num_input_tokens_seen": 0, "num_train_epochs": 9223372036854775807, "save_steps": 500, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 4.2046170486276096e+17, "train_batch_size": 4, "trial_name": null, "trial_params": null }