{ "best_metric": null, "best_model_checkpoint": null, "epoch": 0.075, "eval_steps": 500, "global_step": 300, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.0025, "grad_norm": 28.533191680908203, "learning_rate": 0.00019950000000000002, "loss": 10.2028, "step": 10 }, { "epoch": 0.005, "grad_norm": 1.5962224006652832, "learning_rate": 0.000199, "loss": 1.8731, "step": 20 }, { "epoch": 0.0075, "grad_norm": 0.8784465193748474, "learning_rate": 0.00019850000000000003, "loss": 0.5834, "step": 30 }, { "epoch": 0.01, "grad_norm": 0.6551746726036072, "learning_rate": 0.00019800000000000002, "loss": 0.4274, "step": 40 }, { "epoch": 0.0125, "grad_norm": 0.7458402514457703, "learning_rate": 0.00019750000000000003, "loss": 0.2921, "step": 50 }, { "epoch": 0.015, "grad_norm": 0.6910417675971985, "learning_rate": 0.00019700000000000002, "loss": 0.1889, "step": 60 }, { "epoch": 0.0175, "grad_norm": 0.575136125087738, "learning_rate": 0.0001965, "loss": 0.1379, "step": 70 }, { "epoch": 0.02, "grad_norm": 1.2535680532455444, "learning_rate": 0.000196, "loss": 0.1053, "step": 80 }, { "epoch": 0.0225, "grad_norm": 0.9697864651679993, "learning_rate": 0.0001955, "loss": 0.0756, "step": 90 }, { "epoch": 0.025, "grad_norm": 0.508269727230072, "learning_rate": 0.000195, "loss": 0.055, "step": 100 }, { "epoch": 0.0275, "grad_norm": 0.6621774435043335, "learning_rate": 0.0001945, "loss": 0.0487, "step": 110 }, { "epoch": 0.03, "grad_norm": 0.5406679511070251, "learning_rate": 0.000194, "loss": 0.0406, "step": 120 }, { "epoch": 0.0325, "grad_norm": 0.35967350006103516, "learning_rate": 0.00019350000000000001, "loss": 0.0347, "step": 130 }, { "epoch": 0.035, "grad_norm": 0.6122244000434875, "learning_rate": 0.000193, "loss": 0.0334, "step": 140 }, { "epoch": 0.0375, "grad_norm": 0.4679579734802246, "learning_rate": 0.00019250000000000002, "loss": 0.0342, "step": 150 }, { "epoch": 0.04, "grad_norm": 0.6229879856109619, "learning_rate": 0.000192, "loss": 0.0328, "step": 160 }, { "epoch": 0.0425, "grad_norm": 0.4741787314414978, "learning_rate": 0.00019150000000000002, "loss": 0.0328, "step": 170 }, { "epoch": 0.045, "grad_norm": 0.3581089377403259, "learning_rate": 0.000191, "loss": 0.0329, "step": 180 }, { "epoch": 0.0475, "grad_norm": 0.2805705964565277, "learning_rate": 0.00019050000000000002, "loss": 0.0316, "step": 190 }, { "epoch": 0.05, "grad_norm": 0.36797094345092773, "learning_rate": 0.00019, "loss": 0.0314, "step": 200 }, { "epoch": 0.0525, "grad_norm": 0.22872206568717957, "learning_rate": 0.0001895, "loss": 0.0304, "step": 210 }, { "epoch": 0.055, "grad_norm": 0.3525296151638031, "learning_rate": 0.00018899999999999999, "loss": 0.0315, "step": 220 }, { "epoch": 0.0575, "grad_norm": 0.21026159822940826, "learning_rate": 0.0001885, "loss": 0.0302, "step": 230 }, { "epoch": 0.06, "grad_norm": 0.1741417497396469, "learning_rate": 0.000188, "loss": 0.0307, "step": 240 }, { "epoch": 0.0625, "grad_norm": 0.35116010904312134, "learning_rate": 0.0001875, "loss": 0.0305, "step": 250 }, { "epoch": 0.065, "grad_norm": 0.2572971284389496, "learning_rate": 0.00018700000000000002, "loss": 0.0313, "step": 260 }, { "epoch": 0.0675, "grad_norm": 0.2466694414615631, "learning_rate": 0.0001865, "loss": 0.0299, "step": 270 }, { "epoch": 0.07, "grad_norm": 0.19943873584270477, "learning_rate": 0.00018600000000000002, "loss": 0.0304, "step": 280 }, { "epoch": 0.0725, "grad_norm": 0.3378709852695465, "learning_rate": 0.0001855, "loss": 0.0299, "step": 290 }, { "epoch": 0.075, "grad_norm": 0.23438668251037598, "learning_rate": 0.00018500000000000002, "loss": 0.0301, "step": 300 } ], "logging_steps": 10, "max_steps": 4000, "num_input_tokens_seen": 0, "num_train_epochs": 1, "save_steps": 100, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": false }, "attributes": {} } }, "total_flos": 330337183334400.0, "train_batch_size": 2, "trial_name": null, "trial_params": null }