{ "best_metric": null, "best_model_checkpoint": null, "epoch": 1.8518518518518519, "eval_steps": 200, "global_step": 200, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.009259259259259259, "eval_loss": 2.1662161350250244, "eval_runtime": 26.9869, "eval_samples_per_second": 55.62, "eval_steps_per_second": 6.966, "step": 1 }, { "epoch": 0.09259259259259259, "grad_norm": 3.96875, "learning_rate": 6.666666666666667e-05, "loss": 1.8577, "step": 10 }, { "epoch": 0.18518518518518517, "grad_norm": 1.4453125, "learning_rate": 0.00013333333333333334, "loss": 1.8326, "step": 20 }, { "epoch": 0.2777777777777778, "grad_norm": 1.609375, "learning_rate": 0.0002, "loss": 1.8689, "step": 30 }, { "epoch": 0.37037037037037035, "grad_norm": 1.40625, "learning_rate": 0.00019984815164333163, "loss": 1.983, "step": 40 }, { "epoch": 0.46296296296296297, "grad_norm": 1.25, "learning_rate": 0.00019939306773179497, "loss": 2.0149, "step": 50 }, { "epoch": 0.5555555555555556, "grad_norm": 1.234375, "learning_rate": 0.00019863613034027224, "loss": 2.0506, "step": 60 }, { "epoch": 0.6481481481481481, "grad_norm": 1.3125, "learning_rate": 0.00019757963826274357, "loss": 2.0991, "step": 70 }, { "epoch": 0.7407407407407407, "grad_norm": 1.328125, "learning_rate": 0.00019622680003092503, "loss": 2.1253, "step": 80 }, { "epoch": 0.8333333333333334, "grad_norm": 2.1875, "learning_rate": 0.00019458172417006347, "loss": 2.1094, "step": 90 }, { "epoch": 0.9259259259259259, "grad_norm": 1.2265625, "learning_rate": 0.00019264940672148018, "loss": 2.1243, "step": 100 }, { "epoch": 1.0185185185185186, "grad_norm": 1.40625, "learning_rate": 0.00019043571606975777, "loss": 1.8817, "step": 110 }, { "epoch": 1.1111111111111112, "grad_norm": 1.203125, "learning_rate": 0.0001879473751206489, "loss": 0.9706, "step": 120 }, { "epoch": 1.2037037037037037, "grad_norm": 1.1171875, "learning_rate": 0.00018519194088383273, "loss": 0.9351, "step": 130 }, { "epoch": 1.2962962962962963, "grad_norm": 1.234375, "learning_rate": 0.0001821777815225245, "loss": 0.9448, "step": 140 }, { "epoch": 1.3888888888888888, "grad_norm": 1.1796875, "learning_rate": 0.00017891405093963938, "loss": 0.9423, "step": 150 }, { "epoch": 1.4814814814814814, "grad_norm": 1.109375, "learning_rate": 0.00017541066097768963, "loss": 0.9714, "step": 160 }, { "epoch": 1.574074074074074, "grad_norm": 1.0546875, "learning_rate": 0.00017167825131684513, "loss": 0.9688, "step": 170 }, { "epoch": 1.6666666666666665, "grad_norm": 1.1328125, "learning_rate": 0.00016772815716257412, "loss": 1.0031, "step": 180 }, { "epoch": 1.7592592592592593, "grad_norm": 1.1953125, "learning_rate": 0.00016357237482099684, "loss": 0.987, "step": 190 }, { "epoch": 1.8518518518518519, "grad_norm": 1.1796875, "learning_rate": 0.00015922352526649803, "loss": 1.0077, "step": 200 }, { "epoch": 1.8518518518518519, "eval_loss": 2.0790088176727295, "eval_runtime": 27.2846, "eval_samples_per_second": 55.013, "eval_steps_per_second": 6.89, "step": 200 } ], "logging_steps": 10, "max_steps": 600, "num_input_tokens_seen": 0, "num_train_epochs": 6, "save_steps": 200, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": false }, "attributes": {} } }, "total_flos": 1.2990695522435072e+17, "train_batch_size": 2, "trial_name": null, "trial_params": null }