| { |
| "best_metric": null, |
| "best_model_checkpoint": null, |
| "epoch": 99.96506051609957, |
| "eval_steps": 21890, |
| "global_step": 218900, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 9.999771637360128, |
| "grad_norm": 2.7868080139160156, |
| "learning_rate": 9.000593878483327e-06, |
| "loss": 2.1154, |
| "step": 21890 |
| }, |
| { |
| "epoch": 9.999771637360128, |
| "eval_accuracy": 0.12790865384615385, |
| "eval_loss": 2.7239530086517334, |
| "eval_runtime": 86.8701, |
| "eval_samples_per_second": 478.876, |
| "eval_steps_per_second": 23.944, |
| "step": 21890 |
| }, |
| { |
| "epoch": 19.99566110984243, |
| "grad_norm": 3.3717546463012695, |
| "learning_rate": 8.000959342165372e-06, |
| "loss": 2.0659, |
| "step": 43780 |
| }, |
| { |
| "epoch": 19.99566110984243, |
| "eval_accuracy": 0.1278125, |
| "eval_loss": 2.7772293090820312, |
| "eval_runtime": 86.8569, |
| "eval_samples_per_second": 478.948, |
| "eval_steps_per_second": 23.947, |
| "step": 43780 |
| }, |
| { |
| "epoch": 29.99703128568166, |
| "grad_norm": 3.0808234214782715, |
| "learning_rate": 7.0015532206486995e-06, |
| "loss": 1.9905, |
| "step": 65670 |
| }, |
| { |
| "epoch": 29.99703128568166, |
| "eval_accuracy": 0.13122596153846153, |
| "eval_loss": 2.780440092086792, |
| "eval_runtime": 86.9347, |
| "eval_samples_per_second": 478.52, |
| "eval_steps_per_second": 23.926, |
| "step": 65670 |
| }, |
| { |
| "epoch": 39.99246403288422, |
| "grad_norm": 2.7015247344970703, |
| "learning_rate": 6.002010050251257e-06, |
| "loss": 1.9583, |
| "step": 87560 |
| }, |
| { |
| "epoch": 39.99246403288422, |
| "eval_accuracy": 0.13014423076923076, |
| "eval_loss": 2.829221487045288, |
| "eval_runtime": 86.9347, |
| "eval_samples_per_second": 478.52, |
| "eval_steps_per_second": 23.926, |
| "step": 87560 |
| }, |
| { |
| "epoch": 49.98789678008678, |
| "grad_norm": 3.6309220790863037, |
| "learning_rate": 5.00251256281407e-06, |
| "loss": 1.9332, |
| "step": 109450 |
| }, |
| { |
| "epoch": 49.98789678008678, |
| "eval_accuracy": 0.12955929487179488, |
| "eval_loss": 2.8339743614196777, |
| "eval_runtime": 86.9494, |
| "eval_samples_per_second": 478.439, |
| "eval_steps_per_second": 23.922, |
| "step": 109450 |
| }, |
| { |
| "epoch": 59.983329527289335, |
| "grad_norm": 1.8577121496200562, |
| "learning_rate": 4.003015075376885e-06, |
| "loss": 1.9132, |
| "step": 131340 |
| }, |
| { |
| "epoch": 59.983329527289335, |
| "eval_accuracy": 0.12921875, |
| "eval_loss": 2.8476154804229736, |
| "eval_runtime": 86.9388, |
| "eval_samples_per_second": 478.497, |
| "eval_steps_per_second": 23.925, |
| "step": 131340 |
| }, |
| { |
| "epoch": 69.9787622744919, |
| "grad_norm": 2.821331262588501, |
| "learning_rate": 3.003517587939699e-06, |
| "loss": 1.8997, |
| "step": 153230 |
| }, |
| { |
| "epoch": 69.9787622744919, |
| "eval_accuracy": 0.1286826923076923, |
| "eval_loss": 2.8616456985473633, |
| "eval_runtime": 86.8596, |
| "eval_samples_per_second": 478.934, |
| "eval_steps_per_second": 23.947, |
| "step": 153230 |
| }, |
| { |
| "epoch": 79.97419502169446, |
| "grad_norm": 3.619612216949463, |
| "learning_rate": 2.0040201005025127e-06, |
| "loss": 1.8921, |
| "step": 175120 |
| }, |
| { |
| "epoch": 79.97419502169446, |
| "eval_accuracy": 0.12806089743589744, |
| "eval_loss": 2.888369083404541, |
| "eval_runtime": 86.948, |
| "eval_samples_per_second": 478.447, |
| "eval_steps_per_second": 23.922, |
| "step": 175120 |
| }, |
| { |
| "epoch": 89.96962776889701, |
| "grad_norm": 2.831256866455078, |
| "learning_rate": 1.0044769301050709e-06, |
| "loss": 1.8875, |
| "step": 197010 |
| }, |
| { |
| "epoch": 89.96962776889701, |
| "eval_accuracy": 0.12728365384615384, |
| "eval_loss": 2.9168872833251953, |
| "eval_runtime": 87.0546, |
| "eval_samples_per_second": 477.861, |
| "eval_steps_per_second": 23.893, |
| "step": 197010 |
| }, |
| { |
| "epoch": 99.96506051609957, |
| "grad_norm": 2.589529514312744, |
| "learning_rate": 4.979442667884879e-09, |
| "loss": 1.8833, |
| "step": 218900 |
| }, |
| { |
| "epoch": 99.96506051609957, |
| "eval_accuracy": 0.1268359375, |
| "eval_loss": 2.9121108055114746, |
| "eval_runtime": 86.8592, |
| "eval_samples_per_second": 478.936, |
| "eval_steps_per_second": 23.947, |
| "step": 218900 |
| }, |
| { |
| "epoch": 99.96506051609957, |
| "step": 218900, |
| "total_flos": 1.1488768422484287e+19, |
| "train_loss": 1.4928506880995889, |
| "train_runtime": 186579.6133, |
| "train_samples_per_second": 234.678, |
| "train_steps_per_second": 1.173 |
| } |
| ], |
| "logging_steps": 21890, |
| "max_steps": 218900, |
| "num_input_tokens_seen": 0, |
| "num_train_epochs": 100, |
| "save_steps": 500, |
| "stateful_callbacks": { |
| "TrainerControl": { |
| "args": { |
| "should_epoch_stop": false, |
| "should_evaluate": false, |
| "should_log": false, |
| "should_save": true, |
| "should_training_stop": true |
| }, |
| "attributes": {} |
| } |
| }, |
| "total_flos": 1.1488768422484287e+19, |
| "train_batch_size": 50, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|