{ "best_global_step": null, "best_metric": null, "best_model_checkpoint": null, "epoch": 1.0, "eval_steps": 500, "global_step": 113, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "entropy": 0.3933267489075661, "epoch": 0.08888888888888889, "grad_norm": 19.091772079467773, "learning_rate": 2e-05, "loss": 4.2829, "mean_token_accuracy": 0.5681655570864678, "num_tokens": 29409.0, "step": 10 }, { "entropy": 0.7650148034095764, "epoch": 0.17777777777777778, "grad_norm": 3.8214948177337646, "learning_rate": 2e-05, "loss": 1.9461, "mean_token_accuracy": 0.6879387736320496, "num_tokens": 58827.0, "step": 20 }, { "entropy": 1.1828202903270721, "epoch": 0.26666666666666666, "grad_norm": 2.6364810466766357, "learning_rate": 2e-05, "loss": 1.4227, "mean_token_accuracy": 0.7338245347142219, "num_tokens": 88299.0, "step": 30 }, { "entropy": 1.134617891907692, "epoch": 0.35555555555555557, "grad_norm": 1.9795665740966797, "learning_rate": 2e-05, "loss": 1.1307, "mean_token_accuracy": 0.7887017637491226, "num_tokens": 117759.0, "step": 40 }, { "entropy": 0.877768449485302, "epoch": 0.4444444444444444, "grad_norm": 1.8397494554519653, "learning_rate": 2e-05, "loss": 0.8535, "mean_token_accuracy": 0.8351033940911293, "num_tokens": 147140.0, "step": 50 }, { "entropy": 0.587233804166317, "epoch": 0.5333333333333333, "grad_norm": 1.7626832723617554, "learning_rate": 2e-05, "loss": 0.5781, "mean_token_accuracy": 0.8860435307025909, "num_tokens": 176659.0, "step": 60 }, { "entropy": 0.3405880033969879, "epoch": 0.6222222222222222, "grad_norm": 1.520534634590149, "learning_rate": 2e-05, "loss": 0.3419, "mean_token_accuracy": 0.9315642505884171, "num_tokens": 206147.0, "step": 70 }, { "entropy": 0.19235755391418935, "epoch": 0.7111111111111111, "grad_norm": 1.268977403640747, "learning_rate": 2e-05, "loss": 0.1858, "mean_token_accuracy": 0.9728681713342666, "num_tokens": 235603.0, "step": 80 }, { "entropy": 0.11804858762770891, "epoch": 0.8, "grad_norm": 0.781975269317627, "learning_rate": 2e-05, "loss": 0.1064, "mean_token_accuracy": 0.9887924045324326, "num_tokens": 265047.0, "step": 90 }, { "entropy": 0.09983876422047615, "epoch": 0.8888888888888888, "grad_norm": 0.4874080419540405, "learning_rate": 2e-05, "loss": 0.0815, "mean_token_accuracy": 0.9895498856902123, "num_tokens": 294524.0, "step": 100 }, { "entropy": 0.08479245882481337, "epoch": 0.9777777777777777, "grad_norm": 0.3734425902366638, "learning_rate": 2e-05, "loss": 0.0768, "mean_token_accuracy": 0.9904760375618935, "num_tokens": 324032.0, "step": 110 } ], "logging_steps": 10, "max_steps": 904, "num_input_tokens_seen": 0, "num_train_epochs": 8, "save_steps": 500, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": false }, "attributes": {} } }, "total_flos": 207041312797440.0, "train_batch_size": 1, "trial_name": null, "trial_params": null }