{ "best_metric": null, "best_model_checkpoint": null, "epoch": 1.967741935483871, "eval_steps": 500, "global_step": 72, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.027649769585253458, "grad_norm": 0.12118180724620531, "learning_rate": 1.25e-06, "loss": 0.4052, "step": 1 }, { "epoch": 0.1382488479262673, "grad_norm": 0.09276164809571165, "learning_rate": 6.25e-06, "loss": 0.5046, "step": 5 }, { "epoch": 0.2764976958525346, "grad_norm": 0.12423129115518604, "learning_rate": 9.975923633360985e-06, "loss": 0.494, "step": 10 }, { "epoch": 0.4147465437788018, "grad_norm": 0.0915349647233371, "learning_rate": 9.707720325915105e-06, "loss": 0.4316, "step": 15 }, { "epoch": 0.5529953917050692, "grad_norm": 0.07446859498374453, "learning_rate": 9.157348061512728e-06, "loss": 0.432, "step": 20 }, { "epoch": 0.6912442396313364, "grad_norm": 0.06496916435703345, "learning_rate": 8.357794774235094e-06, "loss": 0.4402, "step": 25 }, { "epoch": 0.8294930875576036, "grad_norm": 0.06298369476648798, "learning_rate": 7.3569836841299905e-06, "loss": 0.4185, "step": 30 }, { "epoch": 0.967741935483871, "grad_norm": 0.06731429829180048, "learning_rate": 6.21490089951632e-06, "loss": 0.4048, "step": 35 }, { "epoch": 1.0, "eval_loss": 0.3824561536312103, "eval_runtime": 19.7357, "eval_samples_per_second": 19.356, "eval_steps_per_second": 4.864, "step": 37 }, { "epoch": 1.0829493087557605, "grad_norm": 0.06374957595243645, "learning_rate": 5e-06, "loss": 0.3963, "step": 40 }, { "epoch": 1.2211981566820276, "grad_norm": 0.0680946365437224, "learning_rate": 3.7850991004836813e-06, "loss": 0.3484, "step": 45 }, { "epoch": 1.359447004608295, "grad_norm": 0.05004119075815043, "learning_rate": 2.6430163158700116e-06, "loss": 0.3416, "step": 50 }, { "epoch": 1.4976958525345623, "grad_norm": 0.05461700270757283, "learning_rate": 1.642205225764908e-06, "loss": 0.37, "step": 55 }, { "epoch": 1.6359447004608296, "grad_norm": 0.05050588248109686, "learning_rate": 8.426519384872733e-07, "loss": 0.3603, "step": 60 }, { "epoch": 1.7741935483870968, "grad_norm": 0.04335256475558046, "learning_rate": 2.9227967408489653e-07, "loss": 0.3524, "step": 65 }, { "epoch": 1.912442396313364, "grad_norm": 0.04869022142601223, "learning_rate": 2.4076366639015914e-08, "loss": 0.3643, "step": 70 }, { "epoch": 1.967741935483871, "eval_loss": 0.37427836656570435, "eval_runtime": 19.6704, "eval_samples_per_second": 19.42, "eval_steps_per_second": 4.88, "step": 72 }, { "epoch": 1.967741935483871, "step": 72, "total_flos": 1.7760167664602317e+17, "train_loss": 0.4003302885426415, "train_runtime": 975.808, "train_samples_per_second": 3.554, "train_steps_per_second": 0.074 } ], "logging_steps": 5, "max_steps": 72, "num_input_tokens_seen": 0, "num_train_epochs": 2, "save_steps": 500, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 1.7760167664602317e+17, "train_batch_size": 1, "trial_name": null, "trial_params": null }