{ "best_metric": null, "best_model_checkpoint": null, "epoch": 1.9950617283950618, "eval_steps": 100, "global_step": 202, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.09876543209876543, "grad_norm": 0.3113946318626404, "learning_rate": 0.00019988322268323268, "loss": 1.1897, "step": 10 }, { "epoch": 0.19753086419753085, "grad_norm": 0.25871261954307556, "learning_rate": 0.00019781476007338058, "loss": 0.6655, "step": 20 }, { "epoch": 0.2962962962962963, "grad_norm": 0.13192948698997498, "learning_rate": 0.0001932129465573568, "loss": 0.363, "step": 30 }, { "epoch": 0.3950617283950617, "grad_norm": 0.1178111806511879, "learning_rate": 0.00018619696668800492, "loss": 0.2902, "step": 40 }, { "epoch": 0.49382716049382713, "grad_norm": 0.06365109980106354, "learning_rate": 0.00017694853065861662, "loss": 0.2627, "step": 50 }, { "epoch": 0.5925925925925926, "grad_norm": 0.06805580109357834, "learning_rate": 0.0001657071681043731, "loss": 0.2479, "step": 60 }, { "epoch": 0.691358024691358, "grad_norm": 0.05795082449913025, "learning_rate": 0.0001527640244106133, "loss": 0.248, "step": 70 }, { "epoch": 0.7901234567901234, "grad_norm": 0.054490040987730026, "learning_rate": 0.0001384543202002851, "loss": 0.2374, "step": 80 }, { "epoch": 0.8888888888888888, "grad_norm": 0.050945691764354706, "learning_rate": 0.00012314866929589432, "loss": 0.2364, "step": 90 }, { "epoch": 0.9876543209876543, "grad_norm": 0.05336359888315201, "learning_rate": 0.00010724348001617625, "loss": 0.2314, "step": 100 }, { "epoch": 0.9876543209876543, "eval_loss": 0.23581312596797943, "eval_runtime": 270.7343, "eval_samples_per_second": 0.665, "eval_steps_per_second": 0.665, "step": 100 }, { "epoch": 1.0864197530864197, "grad_norm": 0.052394308149814606, "learning_rate": 9.115068840886417e-05, "loss": 0.2339, "step": 110 }, { "epoch": 1.1851851851851851, "grad_norm": 0.06545981019735336, "learning_rate": 7.528708932343304e-05, "loss": 0.2301, "step": 120 }, { "epoch": 1.2839506172839505, "grad_norm": 0.06937788426876068, "learning_rate": 6.006354164343046e-05, "loss": 0.2322, "step": 130 }, { "epoch": 1.382716049382716, "grad_norm": 0.07582119852304459, "learning_rate": 4.587432725720687e-05, "loss": 0.2234, "step": 140 }, { "epoch": 1.4814814814814814, "grad_norm": 0.11781789362430573, "learning_rate": 3.308693936411421e-05, "loss": 0.2164, "step": 150 }, { "epoch": 1.5802469135802468, "grad_norm": 0.08647400885820389, "learning_rate": 2.2032564593677774e-05, "loss": 0.2197, "step": 160 }, { "epoch": 1.6790123456790123, "grad_norm": 0.09350328147411346, "learning_rate": 1.2997505445856084e-05, "loss": 0.2194, "step": 170 }, { "epoch": 1.7777777777777777, "grad_norm": 0.14273136854171753, "learning_rate": 6.215765206679569e-06, "loss": 0.2219, "step": 180 }, { "epoch": 1.876543209876543, "grad_norm": 0.10584201663732529, "learning_rate": 1.8629873860586566e-06, "loss": 0.2298, "step": 190 }, { "epoch": 1.9753086419753085, "grad_norm": 0.08690895885229111, "learning_rate": 5.190664313851068e-08, "loss": 0.2228, "step": 200 }, { "epoch": 1.9753086419753085, "eval_loss": 0.2289411574602127, "eval_runtime": 273.2063, "eval_samples_per_second": 0.659, "eval_steps_per_second": 0.659, "step": 200 }, { "epoch": 1.9950617283950618, "step": 202, "total_flos": 1.2609051312227942e+17, "train_loss": 0.3101574260704588, "train_runtime": 14633.2305, "train_samples_per_second": 0.221, "train_steps_per_second": 0.014 }, { "epoch": 1.9950617283950618, "eval_loss": 0.22894252836704254, "eval_runtime": 273.8519, "eval_samples_per_second": 0.657, "eval_steps_per_second": 0.657, "step": 202 } ], "logging_steps": 10, "max_steps": 202, "num_input_tokens_seen": 0, "num_train_epochs": 2, "save_steps": 100, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 1.2609051312227942e+17, "train_batch_size": 1, "trial_name": null, "trial_params": null }