| { |
| "best_global_step": null, |
| "best_metric": null, |
| "best_model_checkpoint": null, |
| "epoch": 0.23998080153587714, |
| "eval_steps": 500, |
| "global_step": 1000, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "entropy": 1.0767303490638733, |
| "epoch": 0.023998080153587713, |
| "grad_norm": 1.40625, |
| "learning_rate": 7.857142857142858e-06, |
| "loss": 0.7652, |
| "mean_token_accuracy": 0.807009916305542, |
| "num_tokens": 46104284.0, |
| "step": 100 |
| }, |
| { |
| "entropy": 1.011719313263893, |
| "epoch": 0.04799616030717543, |
| "grad_norm": 2.5, |
| "learning_rate": 9.991950086669187e-06, |
| "loss": 0.4576, |
| "mean_token_accuracy": 0.853641785979271, |
| "num_tokens": 92210808.0, |
| "step": 200 |
| }, |
| { |
| "entropy": 0.9967595022916794, |
| "epoch": 0.07199424046076314, |
| "grad_norm": 1.6875, |
| "learning_rate": 9.954845660034937e-06, |
| "loss": 0.4416, |
| "mean_token_accuracy": 0.8567710411548615, |
| "num_tokens": 138315654.0, |
| "step": 300 |
| }, |
| { |
| "entropy": 0.982376498579979, |
| "epoch": 0.09599232061435085, |
| "grad_norm": 1.640625, |
| "learning_rate": 9.887809392638194e-06, |
| "loss": 0.4328, |
| "mean_token_accuracy": 0.8593817538022995, |
| "num_tokens": 184416108.0, |
| "step": 400 |
| }, |
| { |
| "entropy": 0.9777479559183121, |
| "epoch": 0.11999040076793857, |
| "grad_norm": 1.046875, |
| "learning_rate": 9.791246245403818e-06, |
| "loss": 0.4293, |
| "mean_token_accuracy": 0.8590555649995804, |
| "num_tokens": 230522098.0, |
| "step": 500 |
| }, |
| { |
| "entropy": 0.9740712708234787, |
| "epoch": 0.14398848092152627, |
| "grad_norm": 5.875, |
| "learning_rate": 9.665739548862132e-06, |
| "loss": 0.4405, |
| "mean_token_accuracy": 0.8574600327014923, |
| "num_tokens": 276628106.0, |
| "step": 600 |
| }, |
| { |
| "entropy": 0.9753435063362121, |
| "epoch": 0.16798656107511398, |
| "grad_norm": 5.34375, |
| "learning_rate": 9.512047479294147e-06, |
| "loss": 0.4346, |
| "mean_token_accuracy": 0.8599888670444489, |
| "num_tokens": 322733236.0, |
| "step": 700 |
| }, |
| { |
| "entropy": 0.9697796589136124, |
| "epoch": 0.1919846412287017, |
| "grad_norm": 1.421875, |
| "learning_rate": 9.331098478647084e-06, |
| "loss": 0.4324, |
| "mean_token_accuracy": 0.8604245519638062, |
| "num_tokens": 368840126.0, |
| "step": 800 |
| }, |
| { |
| "entropy": 0.9738489526510239, |
| "epoch": 0.2159827213822894, |
| "grad_norm": 4.5625, |
| "learning_rate": 9.123985645888116e-06, |
| "loss": 0.423, |
| "mean_token_accuracy": 0.8620242869853973, |
| "num_tokens": 414942536.0, |
| "step": 900 |
| }, |
| { |
| "entropy": 0.9679786705970764, |
| "epoch": 0.23998080153587714, |
| "grad_norm": 4.34375, |
| "learning_rate": 8.891960133677763e-06, |
| "loss": 0.4212, |
| "mean_token_accuracy": 0.8623434334993363, |
| "num_tokens": 461046116.0, |
| "step": 1000 |
| } |
| ], |
| "logging_steps": 100, |
| "max_steps": 4167, |
| "num_input_tokens_seen": 0, |
| "num_train_epochs": 9223372036854775807, |
| "save_steps": 1000, |
| "stateful_callbacks": { |
| "TrainerControl": { |
| "args": { |
| "should_epoch_stop": false, |
| "should_evaluate": false, |
| "should_log": false, |
| "should_save": true, |
| "should_training_stop": false |
| }, |
| "attributes": {} |
| } |
| }, |
| "total_flos": 5.620733471602246e+18, |
| "train_batch_size": 30, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|