| { | |
| "best_global_step": null, | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 0.2773604530220733, | |
| "eval_steps": 500, | |
| "global_step": 3000, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.01849069686813822, | |
| "grad_norm": 1.859375, | |
| "learning_rate": 4.908006656804734e-05, | |
| "loss": 1.5227, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 0.03698139373627644, | |
| "grad_norm": 2.109375, | |
| "learning_rate": 4.815551035502959e-05, | |
| "loss": 1.2793, | |
| "step": 400 | |
| }, | |
| { | |
| "epoch": 0.05547209060441465, | |
| "grad_norm": 1.9453125, | |
| "learning_rate": 4.723095414201183e-05, | |
| "loss": 1.1667, | |
| "step": 600 | |
| }, | |
| { | |
| "epoch": 0.07396278747255287, | |
| "grad_norm": 1.4765625, | |
| "learning_rate": 4.6306397928994084e-05, | |
| "loss": 1.0706, | |
| "step": 800 | |
| }, | |
| { | |
| "epoch": 0.09245348434069109, | |
| "grad_norm": 2.21875, | |
| "learning_rate": 4.538184171597633e-05, | |
| "loss": 1.0422, | |
| "step": 1000 | |
| }, | |
| { | |
| "epoch": 0.1109441812088293, | |
| "grad_norm": 2.0625, | |
| "learning_rate": 4.445728550295859e-05, | |
| "loss": 0.9874, | |
| "step": 1200 | |
| }, | |
| { | |
| "epoch": 0.12943487807696752, | |
| "grad_norm": 2.8125, | |
| "learning_rate": 4.353272928994083e-05, | |
| "loss": 0.9652, | |
| "step": 1400 | |
| }, | |
| { | |
| "epoch": 0.14792557494510575, | |
| "grad_norm": 2.390625, | |
| "learning_rate": 4.260817307692308e-05, | |
| "loss": 0.9308, | |
| "step": 1600 | |
| }, | |
| { | |
| "epoch": 0.16641627181324395, | |
| "grad_norm": 2.5, | |
| "learning_rate": 4.168361686390533e-05, | |
| "loss": 0.9078, | |
| "step": 1800 | |
| }, | |
| { | |
| "epoch": 0.18490696868138218, | |
| "grad_norm": 2.203125, | |
| "learning_rate": 4.075906065088758e-05, | |
| "loss": 0.8999, | |
| "step": 2000 | |
| }, | |
| { | |
| "epoch": 0.2033976655495204, | |
| "grad_norm": 1.796875, | |
| "learning_rate": 3.9834504437869823e-05, | |
| "loss": 0.883, | |
| "step": 2200 | |
| }, | |
| { | |
| "epoch": 0.2218883624176586, | |
| "grad_norm": 2.765625, | |
| "learning_rate": 3.8909948224852075e-05, | |
| "loss": 0.8667, | |
| "step": 2400 | |
| }, | |
| { | |
| "epoch": 0.24037905928579684, | |
| "grad_norm": 2.125, | |
| "learning_rate": 3.798539201183432e-05, | |
| "loss": 0.8539, | |
| "step": 2600 | |
| }, | |
| { | |
| "epoch": 0.25886975615393504, | |
| "grad_norm": 3.03125, | |
| "learning_rate": 3.706083579881657e-05, | |
| "loss": 0.8267, | |
| "step": 2800 | |
| }, | |
| { | |
| "epoch": 0.2773604530220733, | |
| "grad_norm": 3.4375, | |
| "learning_rate": 3.6136279585798815e-05, | |
| "loss": 0.8302, | |
| "step": 3000 | |
| } | |
| ], | |
| "logging_steps": 200, | |
| "max_steps": 10816, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 1, | |
| "save_steps": 1000, | |
| "stateful_callbacks": { | |
| "TrainerControl": { | |
| "args": { | |
| "should_epoch_stop": false, | |
| "should_evaluate": false, | |
| "should_log": false, | |
| "should_save": true, | |
| "should_training_stop": false | |
| }, | |
| "attributes": {} | |
| } | |
| }, | |
| "total_flos": 1.762326800176251e+17, | |
| "train_batch_size": 3, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |