{ "best_global_step": 100, "best_metric": 0.11088702827692032, "best_model_checkpoint": "/content/models/gemma_qlora_lmh_inst/checkpoint-100", "epoch": 1.7008547008547008, "eval_steps": 20, "global_step": 100, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "entropy": 2.139886352419853, "epoch": 0.3418803418803419, "grad_norm": 6.644908428192139, "learning_rate": 8.389830508474577e-06, "loss": 0.449, "mean_token_accuracy": 0.9069619461894035, "num_tokens": 118924.0, "step": 20 }, { "epoch": 0.3418803418803419, "eval_entropy": 2.112155056407309, "eval_loss": 0.16615918278694153, "eval_mean_token_accuracy": 0.9459472375038342, "eval_num_tokens": 118924.0, "eval_runtime": 47.2708, "eval_samples_per_second": 39.369, "eval_steps_per_second": 2.475, "step": 20 }, { "entropy": 2.0837366968393325, "epoch": 0.6837606837606838, "grad_norm": 2.919997215270996, "learning_rate": 6.694915254237288e-06, "loss": 0.1523, "mean_token_accuracy": 0.9496713325381279, "num_tokens": 236856.0, "step": 40 }, { "epoch": 0.6837606837606838, "eval_entropy": 2.0886894973934207, "eval_loss": 0.1265888810157776, "eval_mean_token_accuracy": 0.959908177709987, "eval_num_tokens": 236856.0, "eval_runtime": 46.4967, "eval_samples_per_second": 40.024, "eval_steps_per_second": 2.516, "step": 40 }, { "entropy": 2.087970663339664, "epoch": 1.017094017094017, "grad_norm": 2.6256933212280273, "learning_rate": 5e-06, "loss": 0.136, "mean_token_accuracy": 0.9595257181387681, "num_tokens": 347411.0, "step": 60 }, { "epoch": 1.017094017094017, "eval_entropy": 2.1006248659557767, "eval_loss": 0.12076133489608765, "eval_mean_token_accuracy": 0.960268861717648, "eval_num_tokens": 347411.0, "eval_runtime": 46.4488, "eval_samples_per_second": 40.066, "eval_steps_per_second": 2.519, "step": 60 }, { "entropy": 2.077034068107605, "epoch": 1.358974358974359, "grad_norm": 2.3626747131347656, "learning_rate": 3.305084745762712e-06, "loss": 0.096, "mean_token_accuracy": 0.9696433156728744, "num_tokens": 463138.0, "step": 80 }, { "epoch": 1.358974358974359, "eval_entropy": 2.0711163577870426, "eval_loss": 0.12328439950942993, "eval_mean_token_accuracy": 0.9593201702476567, "eval_num_tokens": 463138.0, "eval_runtime": 46.4721, "eval_samples_per_second": 40.046, "eval_steps_per_second": 2.518, "step": 80 }, { "entropy": 2.0657004177570344, "epoch": 1.7008547008547008, "grad_norm": 1.6588151454925537, "learning_rate": 1.6101694915254237e-06, "loss": 0.0803, "mean_token_accuracy": 0.9724445611238479, "num_tokens": 581349.0, "step": 100 }, { "epoch": 1.7008547008547008, "eval_entropy": 2.0421143230210004, "eval_loss": 0.11088702827692032, "eval_mean_token_accuracy": 0.9651837888945881, "eval_num_tokens": 581349.0, "eval_runtime": 46.4848, "eval_samples_per_second": 40.035, "eval_steps_per_second": 2.517, "step": 100 } ], "logging_steps": 20, "max_steps": 118, "num_input_tokens_seen": 0, "num_train_epochs": 2, "save_steps": 20, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": false }, "attributes": {} } }, "total_flos": 1.6519515681526272e+16, "train_batch_size": 16, "trial_name": null, "trial_params": null }