{ "best_metric": null, "best_model_checkpoint": null, "epoch": 0.989010989010989, "eval_steps": 500, "global_step": 30, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.03296703296703297, "grad_norm": 31.073850631713867, "learning_rate": 5.0000000000000004e-08, "loss": 2.5967, "step": 1 }, { "epoch": 0.06593406593406594, "grad_norm": 32.91181182861328, "learning_rate": 1.0000000000000001e-07, "loss": 2.723, "step": 2 }, { "epoch": 0.0989010989010989, "grad_norm": 31.494897842407227, "learning_rate": 1.5000000000000002e-07, "loss": 2.6125, "step": 3 }, { "epoch": 0.13186813186813187, "grad_norm": 30.80953598022461, "learning_rate": 2.0000000000000002e-07, "loss": 2.58, "step": 4 }, { "epoch": 0.16483516483516483, "grad_norm": 31.269071578979492, "learning_rate": 2.5000000000000004e-07, "loss": 2.5977, "step": 5 }, { "epoch": 0.1978021978021978, "grad_norm": 30.687875747680664, "learning_rate": 3.0000000000000004e-07, "loss": 2.5588, "step": 6 }, { "epoch": 0.23076923076923078, "grad_norm": 31.30279541015625, "learning_rate": 3.5000000000000004e-07, "loss": 2.5731, "step": 7 }, { "epoch": 0.26373626373626374, "grad_norm": 31.384830474853516, "learning_rate": 4.0000000000000003e-07, "loss": 2.561, "step": 8 }, { "epoch": 0.2967032967032967, "grad_norm": 30.58422088623047, "learning_rate": 4.5000000000000003e-07, "loss": 2.4872, "step": 9 }, { "epoch": 0.32967032967032966, "grad_norm": 30.883068084716797, "learning_rate": 5.000000000000001e-07, "loss": 2.5257, "step": 10 }, { "epoch": 0.3626373626373626, "grad_norm": 32.198814392089844, "learning_rate": 5.5e-07, "loss": 2.6286, "step": 11 }, { "epoch": 0.3956043956043956, "grad_norm": 31.001300811767578, "learning_rate": 6.000000000000001e-07, "loss": 2.4632, "step": 12 }, { "epoch": 0.42857142857142855, "grad_norm": 31.106016159057617, "learning_rate": 6.5e-07, "loss": 2.4274, "step": 13 }, { "epoch": 0.46153846153846156, "grad_norm": 31.180011749267578, "learning_rate": 7.000000000000001e-07, "loss": 2.3864, "step": 14 }, { "epoch": 0.4945054945054945, "grad_norm": 30.95736312866211, "learning_rate": 7.5e-07, "loss": 2.2977, "step": 15 }, { "epoch": 0.5274725274725275, "grad_norm": 31.67963218688965, "learning_rate": 8.000000000000001e-07, "loss": 2.2632, "step": 16 }, { "epoch": 0.5604395604395604, "grad_norm": 32.420562744140625, "learning_rate": 8.500000000000001e-07, "loss": 2.1983, "step": 17 }, { "epoch": 0.5934065934065934, "grad_norm": 32.46091079711914, "learning_rate": 9.000000000000001e-07, "loss": 2.0623, "step": 18 }, { "epoch": 0.6263736263736264, "grad_norm": 31.34447479248047, "learning_rate": 9.500000000000001e-07, "loss": 1.8633, "step": 19 }, { "epoch": 0.6593406593406593, "grad_norm": 31.65386962890625, "learning_rate": 1.0000000000000002e-06, "loss": 1.7861, "step": 20 }, { "epoch": 0.6923076923076923, "grad_norm": 30.12110137939453, "learning_rate": 1.0500000000000001e-06, "loss": 1.6408, "step": 21 }, { "epoch": 0.7252747252747253, "grad_norm": 28.824857711791992, "learning_rate": 1.1e-06, "loss": 1.5022, "step": 22 }, { "epoch": 0.7582417582417582, "grad_norm": 27.37271499633789, "learning_rate": 1.1500000000000002e-06, "loss": 1.3799, "step": 23 }, { "epoch": 0.7912087912087912, "grad_norm": 26.869949340820312, "learning_rate": 1.2000000000000002e-06, "loss": 1.2699, "step": 24 }, { "epoch": 0.8241758241758241, "grad_norm": 26.696306228637695, "learning_rate": 1.25e-06, "loss": 1.107, "step": 25 }, { "epoch": 0.8571428571428571, "grad_norm": 29.182300567626953, "learning_rate": 1.3e-06, "loss": 0.9981, "step": 26 }, { "epoch": 0.8901098901098901, "grad_norm": 28.117998123168945, "learning_rate": 1.3500000000000002e-06, "loss": 0.7886, "step": 27 }, { "epoch": 0.9230769230769231, "grad_norm": 27.153093338012695, "learning_rate": 1.4000000000000001e-06, "loss": 0.6565, "step": 28 }, { "epoch": 0.9560439560439561, "grad_norm": 24.350711822509766, "learning_rate": 1.45e-06, "loss": 0.5143, "step": 29 }, { "epoch": 0.989010989010989, "grad_norm": 21.189594268798828, "learning_rate": 1.5e-06, "loss": 0.3953, "step": 30 } ], "logging_steps": 1, "max_steps": 180, "num_input_tokens_seen": 0, "num_train_epochs": 6, "save_steps": 30, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": false }, "attributes": {} } }, "total_flos": 7.57811009028096e+16, "train_batch_size": 4, "trial_name": null, "trial_params": null }