| { | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 1.967741935483871, | |
| "eval_steps": 500, | |
| "global_step": 72, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.027649769585253458, | |
| "grad_norm": 0.12118180724620531, | |
| "learning_rate": 1.25e-06, | |
| "loss": 0.4052, | |
| "step": 1 | |
| }, | |
| { | |
| "epoch": 0.1382488479262673, | |
| "grad_norm": 0.09276164809571165, | |
| "learning_rate": 6.25e-06, | |
| "loss": 0.5046, | |
| "step": 5 | |
| }, | |
| { | |
| "epoch": 0.2764976958525346, | |
| "grad_norm": 0.12423129115518604, | |
| "learning_rate": 9.975923633360985e-06, | |
| "loss": 0.494, | |
| "step": 10 | |
| }, | |
| { | |
| "epoch": 0.4147465437788018, | |
| "grad_norm": 0.0915349647233371, | |
| "learning_rate": 9.707720325915105e-06, | |
| "loss": 0.4316, | |
| "step": 15 | |
| }, | |
| { | |
| "epoch": 0.5529953917050692, | |
| "grad_norm": 0.07446859498374453, | |
| "learning_rate": 9.157348061512728e-06, | |
| "loss": 0.432, | |
| "step": 20 | |
| }, | |
| { | |
| "epoch": 0.6912442396313364, | |
| "grad_norm": 0.06496916435703345, | |
| "learning_rate": 8.357794774235094e-06, | |
| "loss": 0.4402, | |
| "step": 25 | |
| }, | |
| { | |
| "epoch": 0.8294930875576036, | |
| "grad_norm": 0.06298369476648798, | |
| "learning_rate": 7.3569836841299905e-06, | |
| "loss": 0.4185, | |
| "step": 30 | |
| }, | |
| { | |
| "epoch": 0.967741935483871, | |
| "grad_norm": 0.06731429829180048, | |
| "learning_rate": 6.21490089951632e-06, | |
| "loss": 0.4048, | |
| "step": 35 | |
| }, | |
| { | |
| "epoch": 1.0, | |
| "eval_loss": 0.3824561536312103, | |
| "eval_runtime": 19.7357, | |
| "eval_samples_per_second": 19.356, | |
| "eval_steps_per_second": 4.864, | |
| "step": 37 | |
| }, | |
| { | |
| "epoch": 1.0829493087557605, | |
| "grad_norm": 0.06374957595243645, | |
| "learning_rate": 5e-06, | |
| "loss": 0.3963, | |
| "step": 40 | |
| }, | |
| { | |
| "epoch": 1.2211981566820276, | |
| "grad_norm": 0.0680946365437224, | |
| "learning_rate": 3.7850991004836813e-06, | |
| "loss": 0.3484, | |
| "step": 45 | |
| }, | |
| { | |
| "epoch": 1.359447004608295, | |
| "grad_norm": 0.05004119075815043, | |
| "learning_rate": 2.6430163158700116e-06, | |
| "loss": 0.3416, | |
| "step": 50 | |
| }, | |
| { | |
| "epoch": 1.4976958525345623, | |
| "grad_norm": 0.05461700270757283, | |
| "learning_rate": 1.642205225764908e-06, | |
| "loss": 0.37, | |
| "step": 55 | |
| }, | |
| { | |
| "epoch": 1.6359447004608296, | |
| "grad_norm": 0.05050588248109686, | |
| "learning_rate": 8.426519384872733e-07, | |
| "loss": 0.3603, | |
| "step": 60 | |
| }, | |
| { | |
| "epoch": 1.7741935483870968, | |
| "grad_norm": 0.04335256475558046, | |
| "learning_rate": 2.9227967408489653e-07, | |
| "loss": 0.3524, | |
| "step": 65 | |
| }, | |
| { | |
| "epoch": 1.912442396313364, | |
| "grad_norm": 0.04869022142601223, | |
| "learning_rate": 2.4076366639015914e-08, | |
| "loss": 0.3643, | |
| "step": 70 | |
| }, | |
| { | |
| "epoch": 1.967741935483871, | |
| "eval_loss": 0.37427836656570435, | |
| "eval_runtime": 19.6704, | |
| "eval_samples_per_second": 19.42, | |
| "eval_steps_per_second": 4.88, | |
| "step": 72 | |
| }, | |
| { | |
| "epoch": 1.967741935483871, | |
| "step": 72, | |
| "total_flos": 1.7760167664602317e+17, | |
| "train_loss": 0.4003302885426415, | |
| "train_runtime": 975.808, | |
| "train_samples_per_second": 3.554, | |
| "train_steps_per_second": 0.074 | |
| } | |
| ], | |
| "logging_steps": 5, | |
| "max_steps": 72, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 2, | |
| "save_steps": 500, | |
| "stateful_callbacks": { | |
| "TrainerControl": { | |
| "args": { | |
| "should_epoch_stop": false, | |
| "should_evaluate": false, | |
| "should_log": false, | |
| "should_save": true, | |
| "should_training_stop": true | |
| }, | |
| "attributes": {} | |
| } | |
| }, | |
| "total_flos": 1.7760167664602317e+17, | |
| "train_batch_size": 1, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |