| { |
| "best_global_step": null, |
| "best_metric": null, |
| "best_model_checkpoint": null, |
| "epoch": 1.0, |
| "eval_steps": 500, |
| "global_step": 3447, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "entropy": 0.5079199960827827, |
| "epoch": 0.1450536698578474, |
| "grad_norm": 0.67578125, |
| "learning_rate": 4.363528715216104e-05, |
| "loss": 0.5061, |
| "mean_token_accuracy": 0.847917699560523, |
| "num_tokens": 34675536.0, |
| "step": 500 |
| }, |
| { |
| "entropy": 0.4645708839818835, |
| "epoch": 0.2901073397156948, |
| "grad_norm": 0.609375, |
| "learning_rate": 3.6234458259325044e-05, |
| "loss": 0.4625, |
| "mean_token_accuracy": 0.8582921151965857, |
| "num_tokens": 69136517.0, |
| "step": 1000 |
| }, |
| { |
| "entropy": 0.4413825359642506, |
| "epoch": 0.4351610095735422, |
| "grad_norm": 0.5859375, |
| "learning_rate": 2.8833629366489046e-05, |
| "loss": 0.4391, |
| "mean_token_accuracy": 0.8642738572955132, |
| "num_tokens": 103657138.0, |
| "step": 1500 |
| }, |
| { |
| "entropy": 0.426292674459517, |
| "epoch": 0.5802146794313896, |
| "grad_norm": 0.58984375, |
| "learning_rate": 2.143280047365305e-05, |
| "loss": 0.4239, |
| "mean_token_accuracy": 0.8679788280278444, |
| "num_tokens": 138126943.0, |
| "step": 2000 |
| }, |
| { |
| "entropy": 0.41619670213758947, |
| "epoch": 0.725268349289237, |
| "grad_norm": 0.6015625, |
| "learning_rate": 1.4031971580817053e-05, |
| "loss": 0.4135, |
| "mean_token_accuracy": 0.8707230059802532, |
| "num_tokens": 172549297.0, |
| "step": 2500 |
| }, |
| { |
| "entropy": 0.41040751719474794, |
| "epoch": 0.8703220191470844, |
| "grad_norm": 0.61328125, |
| "learning_rate": 6.631142687981054e-06, |
| "loss": 0.4075, |
| "mean_token_accuracy": 0.8723464601933956, |
| "num_tokens": 207014931.0, |
| "step": 3000 |
| }, |
| { |
| "entropy": 0.40811442769647177, |
| "epoch": 1.0, |
| "mean_token_accuracy": 0.8727223603117386, |
| "num_tokens": 237886773.0, |
| "step": 3447, |
| "total_flos": 1.4839042051252683e+19, |
| "train_loss": 0.43732520784887674, |
| "train_runtime": 16918.1607, |
| "train_samples_per_second": 19.559, |
| "train_steps_per_second": 0.204 |
| } |
| ], |
| "logging_steps": 500, |
| "max_steps": 3447, |
| "num_input_tokens_seen": 0, |
| "num_train_epochs": 1, |
| "save_steps": 500, |
| "stateful_callbacks": { |
| "TrainerControl": { |
| "args": { |
| "should_epoch_stop": false, |
| "should_evaluate": false, |
| "should_log": false, |
| "should_save": true, |
| "should_training_stop": true |
| }, |
| "attributes": {} |
| } |
| }, |
| "total_flos": 1.4839042051252683e+19, |
| "train_batch_size": 4, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|