{ "best_global_step": null, "best_metric": null, "best_model_checkpoint": null, "epoch": 0.2773604530220733, "eval_steps": 500, "global_step": 3000, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.01849069686813822, "grad_norm": 1.859375, "learning_rate": 4.908006656804734e-05, "loss": 1.5227, "step": 200 }, { "epoch": 0.03698139373627644, "grad_norm": 2.109375, "learning_rate": 4.815551035502959e-05, "loss": 1.2793, "step": 400 }, { "epoch": 0.05547209060441465, "grad_norm": 1.9453125, "learning_rate": 4.723095414201183e-05, "loss": 1.1667, "step": 600 }, { "epoch": 0.07396278747255287, "grad_norm": 1.4765625, "learning_rate": 4.6306397928994084e-05, "loss": 1.0706, "step": 800 }, { "epoch": 0.09245348434069109, "grad_norm": 2.21875, "learning_rate": 4.538184171597633e-05, "loss": 1.0422, "step": 1000 }, { "epoch": 0.1109441812088293, "grad_norm": 2.0625, "learning_rate": 4.445728550295859e-05, "loss": 0.9874, "step": 1200 }, { "epoch": 0.12943487807696752, "grad_norm": 2.8125, "learning_rate": 4.353272928994083e-05, "loss": 0.9652, "step": 1400 }, { "epoch": 0.14792557494510575, "grad_norm": 2.390625, "learning_rate": 4.260817307692308e-05, "loss": 0.9308, "step": 1600 }, { "epoch": 0.16641627181324395, "grad_norm": 2.5, "learning_rate": 4.168361686390533e-05, "loss": 0.9078, "step": 1800 }, { "epoch": 0.18490696868138218, "grad_norm": 2.203125, "learning_rate": 4.075906065088758e-05, "loss": 0.8999, "step": 2000 }, { "epoch": 0.2033976655495204, "grad_norm": 1.796875, "learning_rate": 3.9834504437869823e-05, "loss": 0.883, "step": 2200 }, { "epoch": 0.2218883624176586, "grad_norm": 2.765625, "learning_rate": 3.8909948224852075e-05, "loss": 0.8667, "step": 2400 }, { "epoch": 0.24037905928579684, "grad_norm": 2.125, "learning_rate": 3.798539201183432e-05, "loss": 0.8539, "step": 2600 }, { "epoch": 0.25886975615393504, "grad_norm": 3.03125, "learning_rate": 3.706083579881657e-05, "loss": 0.8267, "step": 2800 }, { "epoch": 0.2773604530220733, "grad_norm": 3.4375, "learning_rate": 3.6136279585798815e-05, "loss": 0.8302, "step": 3000 } ], "logging_steps": 200, "max_steps": 10816, "num_input_tokens_seen": 0, "num_train_epochs": 1, "save_steps": 1000, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": false }, "attributes": {} } }, "total_flos": 1.762326800176251e+17, "train_batch_size": 3, "trial_name": null, "trial_params": null }