{ "best_global_step": 121, "best_metric": 0.6695265769958496, "best_model_checkpoint": "/home/plucky/ml-workspace/models/gemma4-26b-securecode/checkpoint-121", "epoch": 1.0, "eval_steps": 500, "global_step": 121, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "entropy": 1.1113492242991925, "epoch": 0.0827300930713547, "grad_norm": 10.3125, "learning_rate": 1.8e-05, "loss": 93.48836059570313, "mean_token_accuracy": 0.4107020549476147, "num_tokens": 81920.0, "step": 10 }, { "entropy": 0.8875315530225635, "epoch": 0.1654601861427094, "grad_norm": 6.15625, "learning_rate": 3.8e-05, "loss": 67.76697998046875, "mean_token_accuracy": 0.5182974558323622, "num_tokens": 163840.0, "step": 20 }, { "entropy": 0.673606987670064, "epoch": 0.2481902792140641, "grad_norm": 2.421875, "learning_rate": 5.8e-05, "loss": 37.221334838867186, "mean_token_accuracy": 0.6476027386263012, "num_tokens": 245760.0, "step": 30 }, { "entropy": 1.0845661748200655, "epoch": 0.3309203722854188, "grad_norm": 1.3671875, "learning_rate": 7.800000000000001e-05, "loss": 22.017848205566406, "mean_token_accuracy": 0.7083170266821981, "num_tokens": 327680.0, "step": 40 }, { "entropy": 1.1636322166770696, "epoch": 0.4136504653567735, "grad_norm": 0.703125, "learning_rate": 9.8e-05, "loss": 17.47879638671875, "mean_token_accuracy": 0.7332558700814843, "num_tokens": 409600.0, "step": 50 }, { "entropy": 0.9551631901413202, "epoch": 0.4963805584281282, "grad_norm": 0.40625, "learning_rate": 0.000118, "loss": 15.09481201171875, "mean_token_accuracy": 0.7555772982537746, "num_tokens": 491520.0, "step": 60 }, { "entropy": 0.8048430571332574, "epoch": 0.5791106514994829, "grad_norm": 0.375, "learning_rate": 0.000138, "loss": 13.297686767578124, "mean_token_accuracy": 0.7774828754365444, "num_tokens": 573440.0, "step": 70 }, { "entropy": 0.8100443260744215, "epoch": 0.6618407445708376, "grad_norm": 0.4609375, "learning_rate": 0.00015800000000000002, "loss": 12.752572631835937, "mean_token_accuracy": 0.7837084107100963, "num_tokens": 655360.0, "step": 80 }, { "entropy": 0.7172152267768979, "epoch": 0.7445708376421923, "grad_norm": 2.1875, "learning_rate": 0.00017800000000000002, "loss": 11.629959106445312, "mean_token_accuracy": 0.799449609220028, "num_tokens": 737280.0, "step": 90 }, { "entropy": 0.7284062243998051, "epoch": 0.827300930713547, "grad_norm": 0.40625, "learning_rate": 0.00019800000000000002, "loss": 11.506278991699219, "mean_token_accuracy": 0.8022871781140566, "num_tokens": 819200.0, "step": 100 }, { "entropy": 0.6922262106090784, "epoch": 0.9100310237849017, "grad_norm": 0.341796875, "learning_rate": 0.00019942266891397815, "loss": 11.149666595458985, "mean_token_accuracy": 0.8068982377648354, "num_tokens": 901120.0, "step": 110 }, { "entropy": 0.6608987387269736, "epoch": 0.9927611168562565, "grad_norm": 0.373046875, "learning_rate": 0.00019743551343638324, "loss": 10.666960906982421, "mean_token_accuracy": 0.8124388422816992, "num_tokens": 983040.0, "step": 120 }, { "epoch": 1.0, "eval_entropy": 0.6862195637336997, "eval_loss": 0.6695265769958496, "eval_mean_token_accuracy": 0.8135074851124786, "eval_num_tokens": 990208.0, "eval_runtime": 255.0413, "eval_samples_per_second": 0.843, "eval_steps_per_second": 0.843, "step": 121 } ], "logging_steps": 10, "max_steps": 363, "num_input_tokens_seen": 0, "num_train_epochs": 3, "save_steps": 500, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": false }, "attributes": {} } }, "total_flos": 1.4904406021973606e+17, "train_batch_size": 1, "trial_name": null, "trial_params": null }