{ "best_global_step": 8000, "best_metric": 0.13777850568294525, "best_model_checkpoint": "check-point-qwen2.5-bags/checkpoint-8000", "epoch": 1.9176005273566248, "eval_steps": 2000, "global_step": 8000, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.479415113561455, "eval_entropy": 0.15452208999515504, "eval_loss": 0.15754735469818115, "eval_mean_token_accuracy": 0.9525203127269597, "eval_num_tokens": 32768000.0, "eval_runtime": 11.8915, "eval_samples_per_second": 86.784, "eval_steps_per_second": 21.696, "step": 2000 }, { "epoch": 0.95883022712291, "eval_entropy": 0.1454699612403101, "eval_loss": 0.14876286685466766, "eval_mean_token_accuracy": 0.954681836819464, "eval_num_tokens": 65536000.0, "eval_runtime": 11.8969, "eval_samples_per_second": 86.745, "eval_steps_per_second": 21.686, "step": 4000 }, { "epoch": 1.43818541379517, "eval_entropy": 0.1354526253633721, "eval_loss": 0.14411340653896332, "eval_mean_token_accuracy": 0.9560297101505043, "eval_num_tokens": 98297856.0, "eval_runtime": 11.9057, "eval_samples_per_second": 86.682, "eval_steps_per_second": 21.67, "step": 6000 }, { "epoch": 1.9176005273566248, "eval_entropy": 0.13540720385174418, "eval_loss": 0.13777850568294525, "eval_mean_token_accuracy": 0.9574325225611989, "eval_num_tokens": 131065856.0, "eval_runtime": 11.8969, "eval_samples_per_second": 86.745, "eval_steps_per_second": 21.686, "step": 8000 } ], "logging_steps": 8344, "max_steps": 41720, "num_input_tokens_seen": 0, "num_train_epochs": 10, "save_steps": 2000, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": false }, "attributes": {} } }, "total_flos": 2.8144933446038323e+17, "train_batch_size": 4, "trial_name": null, "trial_params": null }