{ "best_global_step": null, "best_metric": null, "best_model_checkpoint": null, "epoch": 0.5547209060441466, "eval_steps": 500, "global_step": 6000, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.01849069686813822, "grad_norm": 1.859375, "learning_rate": 4.908006656804734e-05, "loss": 1.5227, "step": 200 }, { "epoch": 0.03698139373627644, "grad_norm": 2.109375, "learning_rate": 4.815551035502959e-05, "loss": 1.2793, "step": 400 }, { "epoch": 0.05547209060441465, "grad_norm": 1.9453125, "learning_rate": 4.723095414201183e-05, "loss": 1.1667, "step": 600 }, { "epoch": 0.07396278747255287, "grad_norm": 1.4765625, "learning_rate": 4.6306397928994084e-05, "loss": 1.0706, "step": 800 }, { "epoch": 0.09245348434069109, "grad_norm": 2.21875, "learning_rate": 4.538184171597633e-05, "loss": 1.0422, "step": 1000 }, { "epoch": 0.1109441812088293, "grad_norm": 2.0625, "learning_rate": 4.445728550295859e-05, "loss": 0.9874, "step": 1200 }, { "epoch": 0.12943487807696752, "grad_norm": 2.8125, "learning_rate": 4.353272928994083e-05, "loss": 0.9652, "step": 1400 }, { "epoch": 0.14792557494510575, "grad_norm": 2.390625, "learning_rate": 4.260817307692308e-05, "loss": 0.9308, "step": 1600 }, { "epoch": 0.16641627181324395, "grad_norm": 2.5, "learning_rate": 4.168361686390533e-05, "loss": 0.9078, "step": 1800 }, { "epoch": 0.18490696868138218, "grad_norm": 2.203125, "learning_rate": 4.075906065088758e-05, "loss": 0.8999, "step": 2000 }, { "epoch": 0.2033976655495204, "grad_norm": 1.796875, "learning_rate": 3.9834504437869823e-05, "loss": 0.883, "step": 2200 }, { "epoch": 0.2218883624176586, "grad_norm": 2.765625, "learning_rate": 3.8909948224852075e-05, "loss": 0.8667, "step": 2400 }, { "epoch": 0.24037905928579684, "grad_norm": 2.125, "learning_rate": 3.798539201183432e-05, "loss": 0.8539, "step": 2600 }, { "epoch": 0.25886975615393504, "grad_norm": 3.03125, "learning_rate": 3.706083579881657e-05, "loss": 0.8267, "step": 2800 }, { "epoch": 0.2773604530220733, "grad_norm": 3.4375, "learning_rate": 3.6136279585798815e-05, "loss": 0.8302, "step": 3000 }, { "epoch": 0.2958511498902115, "grad_norm": 2.1875, "learning_rate": 3.521172337278107e-05, "loss": 0.8287, "step": 3200 }, { "epoch": 0.3143418467583497, "grad_norm": 2.84375, "learning_rate": 3.428716715976332e-05, "loss": 0.8054, "step": 3400 }, { "epoch": 0.3328325436264879, "grad_norm": 2.375, "learning_rate": 3.336261094674556e-05, "loss": 0.813, "step": 3600 }, { "epoch": 0.35132324049462615, "grad_norm": 2.171875, "learning_rate": 3.2438054733727814e-05, "loss": 0.7772, "step": 3800 }, { "epoch": 0.36981393736276436, "grad_norm": 2.15625, "learning_rate": 3.151349852071006e-05, "loss": 0.7957, "step": 4000 }, { "epoch": 0.38830463423090256, "grad_norm": 2.21875, "learning_rate": 3.058894230769231e-05, "loss": 0.8008, "step": 4200 }, { "epoch": 0.4067953310990408, "grad_norm": 1.90625, "learning_rate": 2.9664386094674558e-05, "loss": 0.7764, "step": 4400 }, { "epoch": 0.425286027967179, "grad_norm": 2.640625, "learning_rate": 2.8739829881656806e-05, "loss": 0.7573, "step": 4600 }, { "epoch": 0.4437767248353172, "grad_norm": 2.0, "learning_rate": 2.781527366863905e-05, "loss": 0.7785, "step": 4800 }, { "epoch": 0.46226742170345547, "grad_norm": 2.0625, "learning_rate": 2.68907174556213e-05, "loss": 0.7704, "step": 5000 }, { "epoch": 0.4807581185715937, "grad_norm": 2.015625, "learning_rate": 2.5966161242603554e-05, "loss": 0.7417, "step": 5200 }, { "epoch": 0.4992488154397319, "grad_norm": 1.8671875, "learning_rate": 2.50416050295858e-05, "loss": 0.7409, "step": 5400 }, { "epoch": 0.5177395123078701, "grad_norm": 2.671875, "learning_rate": 2.411704881656805e-05, "loss": 0.7605, "step": 5600 }, { "epoch": 0.5362302091760083, "grad_norm": 2.109375, "learning_rate": 2.3192492603550298e-05, "loss": 0.7578, "step": 5800 }, { "epoch": 0.5547209060441466, "grad_norm": 1.90625, "learning_rate": 2.2267936390532546e-05, "loss": 0.7425, "step": 6000 } ], "logging_steps": 200, "max_steps": 10816, "num_input_tokens_seen": 0, "num_train_epochs": 1, "save_steps": 1000, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": false }, "attributes": {} } }, "total_flos": 3.52507816693334e+17, "train_batch_size": 3, "trial_name": null, "trial_params": null }