| { | |
| "best_global_step": null, | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 0.5547209060441466, | |
| "eval_steps": 500, | |
| "global_step": 6000, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.01849069686813822, | |
| "grad_norm": 1.859375, | |
| "learning_rate": 4.908006656804734e-05, | |
| "loss": 1.5227, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 0.03698139373627644, | |
| "grad_norm": 2.109375, | |
| "learning_rate": 4.815551035502959e-05, | |
| "loss": 1.2793, | |
| "step": 400 | |
| }, | |
| { | |
| "epoch": 0.05547209060441465, | |
| "grad_norm": 1.9453125, | |
| "learning_rate": 4.723095414201183e-05, | |
| "loss": 1.1667, | |
| "step": 600 | |
| }, | |
| { | |
| "epoch": 0.07396278747255287, | |
| "grad_norm": 1.4765625, | |
| "learning_rate": 4.6306397928994084e-05, | |
| "loss": 1.0706, | |
| "step": 800 | |
| }, | |
| { | |
| "epoch": 0.09245348434069109, | |
| "grad_norm": 2.21875, | |
| "learning_rate": 4.538184171597633e-05, | |
| "loss": 1.0422, | |
| "step": 1000 | |
| }, | |
| { | |
| "epoch": 0.1109441812088293, | |
| "grad_norm": 2.0625, | |
| "learning_rate": 4.445728550295859e-05, | |
| "loss": 0.9874, | |
| "step": 1200 | |
| }, | |
| { | |
| "epoch": 0.12943487807696752, | |
| "grad_norm": 2.8125, | |
| "learning_rate": 4.353272928994083e-05, | |
| "loss": 0.9652, | |
| "step": 1400 | |
| }, | |
| { | |
| "epoch": 0.14792557494510575, | |
| "grad_norm": 2.390625, | |
| "learning_rate": 4.260817307692308e-05, | |
| "loss": 0.9308, | |
| "step": 1600 | |
| }, | |
| { | |
| "epoch": 0.16641627181324395, | |
| "grad_norm": 2.5, | |
| "learning_rate": 4.168361686390533e-05, | |
| "loss": 0.9078, | |
| "step": 1800 | |
| }, | |
| { | |
| "epoch": 0.18490696868138218, | |
| "grad_norm": 2.203125, | |
| "learning_rate": 4.075906065088758e-05, | |
| "loss": 0.8999, | |
| "step": 2000 | |
| }, | |
| { | |
| "epoch": 0.2033976655495204, | |
| "grad_norm": 1.796875, | |
| "learning_rate": 3.9834504437869823e-05, | |
| "loss": 0.883, | |
| "step": 2200 | |
| }, | |
| { | |
| "epoch": 0.2218883624176586, | |
| "grad_norm": 2.765625, | |
| "learning_rate": 3.8909948224852075e-05, | |
| "loss": 0.8667, | |
| "step": 2400 | |
| }, | |
| { | |
| "epoch": 0.24037905928579684, | |
| "grad_norm": 2.125, | |
| "learning_rate": 3.798539201183432e-05, | |
| "loss": 0.8539, | |
| "step": 2600 | |
| }, | |
| { | |
| "epoch": 0.25886975615393504, | |
| "grad_norm": 3.03125, | |
| "learning_rate": 3.706083579881657e-05, | |
| "loss": 0.8267, | |
| "step": 2800 | |
| }, | |
| { | |
| "epoch": 0.2773604530220733, | |
| "grad_norm": 3.4375, | |
| "learning_rate": 3.6136279585798815e-05, | |
| "loss": 0.8302, | |
| "step": 3000 | |
| }, | |
| { | |
| "epoch": 0.2958511498902115, | |
| "grad_norm": 2.1875, | |
| "learning_rate": 3.521172337278107e-05, | |
| "loss": 0.8287, | |
| "step": 3200 | |
| }, | |
| { | |
| "epoch": 0.3143418467583497, | |
| "grad_norm": 2.84375, | |
| "learning_rate": 3.428716715976332e-05, | |
| "loss": 0.8054, | |
| "step": 3400 | |
| }, | |
| { | |
| "epoch": 0.3328325436264879, | |
| "grad_norm": 2.375, | |
| "learning_rate": 3.336261094674556e-05, | |
| "loss": 0.813, | |
| "step": 3600 | |
| }, | |
| { | |
| "epoch": 0.35132324049462615, | |
| "grad_norm": 2.171875, | |
| "learning_rate": 3.2438054733727814e-05, | |
| "loss": 0.7772, | |
| "step": 3800 | |
| }, | |
| { | |
| "epoch": 0.36981393736276436, | |
| "grad_norm": 2.15625, | |
| "learning_rate": 3.151349852071006e-05, | |
| "loss": 0.7957, | |
| "step": 4000 | |
| }, | |
| { | |
| "epoch": 0.38830463423090256, | |
| "grad_norm": 2.21875, | |
| "learning_rate": 3.058894230769231e-05, | |
| "loss": 0.8008, | |
| "step": 4200 | |
| }, | |
| { | |
| "epoch": 0.4067953310990408, | |
| "grad_norm": 1.90625, | |
| "learning_rate": 2.9664386094674558e-05, | |
| "loss": 0.7764, | |
| "step": 4400 | |
| }, | |
| { | |
| "epoch": 0.425286027967179, | |
| "grad_norm": 2.640625, | |
| "learning_rate": 2.8739829881656806e-05, | |
| "loss": 0.7573, | |
| "step": 4600 | |
| }, | |
| { | |
| "epoch": 0.4437767248353172, | |
| "grad_norm": 2.0, | |
| "learning_rate": 2.781527366863905e-05, | |
| "loss": 0.7785, | |
| "step": 4800 | |
| }, | |
| { | |
| "epoch": 0.46226742170345547, | |
| "grad_norm": 2.0625, | |
| "learning_rate": 2.68907174556213e-05, | |
| "loss": 0.7704, | |
| "step": 5000 | |
| }, | |
| { | |
| "epoch": 0.4807581185715937, | |
| "grad_norm": 2.015625, | |
| "learning_rate": 2.5966161242603554e-05, | |
| "loss": 0.7417, | |
| "step": 5200 | |
| }, | |
| { | |
| "epoch": 0.4992488154397319, | |
| "grad_norm": 1.8671875, | |
| "learning_rate": 2.50416050295858e-05, | |
| "loss": 0.7409, | |
| "step": 5400 | |
| }, | |
| { | |
| "epoch": 0.5177395123078701, | |
| "grad_norm": 2.671875, | |
| "learning_rate": 2.411704881656805e-05, | |
| "loss": 0.7605, | |
| "step": 5600 | |
| }, | |
| { | |
| "epoch": 0.5362302091760083, | |
| "grad_norm": 2.109375, | |
| "learning_rate": 2.3192492603550298e-05, | |
| "loss": 0.7578, | |
| "step": 5800 | |
| }, | |
| { | |
| "epoch": 0.5547209060441466, | |
| "grad_norm": 1.90625, | |
| "learning_rate": 2.2267936390532546e-05, | |
| "loss": 0.7425, | |
| "step": 6000 | |
| } | |
| ], | |
| "logging_steps": 200, | |
| "max_steps": 10816, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 1, | |
| "save_steps": 1000, | |
| "stateful_callbacks": { | |
| "TrainerControl": { | |
| "args": { | |
| "should_epoch_stop": false, | |
| "should_evaluate": false, | |
| "should_log": false, | |
| "should_save": true, | |
| "should_training_stop": false | |
| }, | |
| "attributes": {} | |
| } | |
| }, | |
| "total_flos": 3.52507816693334e+17, | |
| "train_batch_size": 3, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |