| { |
| "best_global_step": 540, |
| "best_metric": 0.43301182985305786, |
| "best_model_checkpoint": "Mistral-7B-v0.1/r4/checkpoint-540", |
| "epoch": 2.2506963788300833, |
| "eval_steps": 60, |
| "global_step": 540, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "entropy": 2.3212984272175365, |
| "epoch": 0.25069637883008355, |
| "grad_norm": 4.5548248291015625, |
| "learning_rate": 7.638888888888889e-05, |
| "loss": 2.8492, |
| "mean_token_accuracy": 0.6425296268943284, |
| "num_tokens": 1474560.0, |
| "step": 60 |
| }, |
| { |
| "epoch": 0.25069637883008355, |
| "eval_entropy": 0.6633218830669081, |
| "eval_loss": 0.5672379732131958, |
| "eval_mean_token_accuracy": 0.8630788239178719, |
| "eval_num_tokens": 1474560.0, |
| "eval_runtime": 1043.9462, |
| "eval_samples_per_second": 2.357, |
| "eval_steps_per_second": 0.295, |
| "step": 60 |
| }, |
| { |
| "entropy": 0.5157420211368137, |
| "epoch": 0.5013927576601671, |
| "grad_norm": 1.0181090831756592, |
| "learning_rate": 9.891743688752738e-05, |
| "loss": 0.5017, |
| "mean_token_accuracy": 0.8742851712637477, |
| "num_tokens": 2949120.0, |
| "step": 120 |
| }, |
| { |
| "epoch": 0.5013927576601671, |
| "eval_entropy": 0.4764551486965124, |
| "eval_loss": 0.46269991993904114, |
| "eval_mean_token_accuracy": 0.882140948400869, |
| "eval_num_tokens": 2949120.0, |
| "eval_runtime": 1038.9576, |
| "eval_samples_per_second": 2.369, |
| "eval_steps_per_second": 0.296, |
| "step": 120 |
| }, |
| { |
| "entropy": 0.46944635676013097, |
| "epoch": 0.7520891364902507, |
| "grad_norm": 1.4101026058197021, |
| "learning_rate": 9.389450641873323e-05, |
| "loss": 0.4597, |
| "mean_token_accuracy": 0.8829208799534374, |
| "num_tokens": 4423680.0, |
| "step": 180 |
| }, |
| { |
| "epoch": 0.7520891364902507, |
| "eval_entropy": 0.4749697573579751, |
| "eval_loss": 0.45166516304016113, |
| "eval_mean_token_accuracy": 0.8840483388343414, |
| "eval_num_tokens": 4423680.0, |
| "eval_runtime": 1040.6612, |
| "eval_samples_per_second": 2.365, |
| "eval_steps_per_second": 0.296, |
| "step": 180 |
| }, |
| { |
| "entropy": 0.4655254686648926, |
| "epoch": 1.0, |
| "grad_norm": 1.558998465538025, |
| "learning_rate": 8.518351670729529e-05, |
| "loss": 0.4543, |
| "mean_token_accuracy": 0.8834420842735955, |
| "num_tokens": 5879808.0, |
| "step": 240 |
| }, |
| { |
| "epoch": 1.0, |
| "eval_entropy": 0.4612268569407525, |
| "eval_loss": 0.4448107182979584, |
| "eval_mean_token_accuracy": 0.8855506770022503, |
| "eval_num_tokens": 5879808.0, |
| "eval_runtime": 1040.4289, |
| "eval_samples_per_second": 2.365, |
| "eval_steps_per_second": 0.296, |
| "step": 240 |
| }, |
| { |
| "entropy": 0.45157561596069073, |
| "epoch": 1.2506963788300836, |
| "grad_norm": 0.6592407822608948, |
| "learning_rate": 7.351637360519813e-05, |
| "loss": 0.4387, |
| "mean_token_accuracy": 0.8871405469046698, |
| "num_tokens": 7354368.0, |
| "step": 300 |
| }, |
| { |
| "epoch": 1.2506963788300836, |
| "eval_entropy": 0.4438396056557631, |
| "eval_loss": 0.44106850028038025, |
| "eval_mean_token_accuracy": 0.8862724611898521, |
| "eval_num_tokens": 7354368.0, |
| "eval_runtime": 1037.3961, |
| "eval_samples_per_second": 2.372, |
| "eval_steps_per_second": 0.297, |
| "step": 300 |
| }, |
| { |
| "entropy": 0.4534259519229333, |
| "epoch": 1.501392757660167, |
| "grad_norm": 0.6076720356941223, |
| "learning_rate": 5.9873361855649876e-05, |
| "loss": 0.4402, |
| "mean_token_accuracy": 0.8862164333462715, |
| "num_tokens": 8828928.0, |
| "step": 360 |
| }, |
| { |
| "epoch": 1.501392757660167, |
| "eval_entropy": 0.44579531716835963, |
| "eval_loss": 0.43730634450912476, |
| "eval_mean_token_accuracy": 0.8868549373242762, |
| "eval_num_tokens": 8828928.0, |
| "eval_runtime": 1044.4842, |
| "eval_samples_per_second": 2.356, |
| "eval_steps_per_second": 0.295, |
| "step": 360 |
| }, |
| { |
| "entropy": 0.44368693138621434, |
| "epoch": 1.7520891364902507, |
| "grad_norm": 0.6398904919624329, |
| "learning_rate": 4.5400780612818626e-05, |
| "loss": 0.4223, |
| "mean_token_accuracy": 0.888861709115218, |
| "num_tokens": 11753472.0, |
| "step": 420 |
| }, |
| { |
| "epoch": 1.7520891364902507, |
| "eval_entropy": 0.4433383915524978, |
| "eval_loss": 0.4355394244194031, |
| "eval_mean_token_accuracy": 0.8874243900373384, |
| "eval_num_tokens": 11753472.0, |
| "eval_runtime": 1037.2958, |
| "eval_samples_per_second": 2.373, |
| "eval_steps_per_second": 0.297, |
| "step": 420 |
| }, |
| { |
| "entropy": 0.4386935969919301, |
| "epoch": 2.0, |
| "grad_norm": 1.2995598316192627, |
| "learning_rate": 3.131463026883449e-05, |
| "loss": 0.4264, |
| "mean_token_accuracy": 0.8898014740997486, |
| "num_tokens": 13209600.0, |
| "step": 480 |
| }, |
| { |
| "epoch": 2.0, |
| "eval_entropy": 0.4436397281076227, |
| "eval_loss": 0.4339684844017029, |
| "eval_mean_token_accuracy": 0.8877705109196824, |
| "eval_num_tokens": 13209600.0, |
| "eval_runtime": 1041.4468, |
| "eval_samples_per_second": 2.363, |
| "eval_steps_per_second": 0.296, |
| "step": 480 |
| }, |
| { |
| "entropy": 0.43632681195934614, |
| "epoch": 2.2506963788300833, |
| "grad_norm": 0.5876320004463196, |
| "learning_rate": 1.8798442914793663e-05, |
| "loss": 0.4221, |
| "mean_token_accuracy": 0.8905482163031896, |
| "num_tokens": 14684160.0, |
| "step": 540 |
| }, |
| { |
| "epoch": 2.2506963788300833, |
| "eval_entropy": 0.4378794138985021, |
| "eval_loss": 0.43301182985305786, |
| "eval_mean_token_accuracy": 0.8880488022968367, |
| "eval_num_tokens": 14684160.0, |
| "eval_runtime": 1046.5384, |
| "eval_samples_per_second": 2.352, |
| "eval_steps_per_second": 0.294, |
| "step": 540 |
| } |
| ], |
| "logging_steps": 60, |
| "max_steps": 720, |
| "num_input_tokens_seen": 0, |
| "num_train_epochs": 3, |
| "save_steps": 60, |
| "stateful_callbacks": { |
| "EarlyStoppingCallback": { |
| "args": { |
| "early_stopping_patience": 5, |
| "early_stopping_threshold": 0.01 |
| }, |
| "attributes": { |
| "early_stopping_patience_counter": 7 |
| } |
| }, |
| "TrainerControl": { |
| "args": { |
| "should_epoch_stop": false, |
| "should_evaluate": false, |
| "should_log": false, |
| "should_save": true, |
| "should_training_stop": true |
| }, |
| "attributes": {} |
| } |
| }, |
| "total_flos": 6.266922890113843e+17, |
| "train_batch_size": 8, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|