| { |
| "best_global_step": 121, |
| "best_metric": 0.6695265769958496, |
| "best_model_checkpoint": "/home/plucky/ml-workspace/models/gemma4-26b-securecode/checkpoint-121", |
| "epoch": 1.0, |
| "eval_steps": 500, |
| "global_step": 121, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "entropy": 1.1113492242991925, |
| "epoch": 0.0827300930713547, |
| "grad_norm": 10.3125, |
| "learning_rate": 1.8e-05, |
| "loss": 93.48836059570313, |
| "mean_token_accuracy": 0.4107020549476147, |
| "num_tokens": 81920.0, |
| "step": 10 |
| }, |
| { |
| "entropy": 0.8875315530225635, |
| "epoch": 0.1654601861427094, |
| "grad_norm": 6.15625, |
| "learning_rate": 3.8e-05, |
| "loss": 67.76697998046875, |
| "mean_token_accuracy": 0.5182974558323622, |
| "num_tokens": 163840.0, |
| "step": 20 |
| }, |
| { |
| "entropy": 0.673606987670064, |
| "epoch": 0.2481902792140641, |
| "grad_norm": 2.421875, |
| "learning_rate": 5.8e-05, |
| "loss": 37.221334838867186, |
| "mean_token_accuracy": 0.6476027386263012, |
| "num_tokens": 245760.0, |
| "step": 30 |
| }, |
| { |
| "entropy": 1.0845661748200655, |
| "epoch": 0.3309203722854188, |
| "grad_norm": 1.3671875, |
| "learning_rate": 7.800000000000001e-05, |
| "loss": 22.017848205566406, |
| "mean_token_accuracy": 0.7083170266821981, |
| "num_tokens": 327680.0, |
| "step": 40 |
| }, |
| { |
| "entropy": 1.1636322166770696, |
| "epoch": 0.4136504653567735, |
| "grad_norm": 0.703125, |
| "learning_rate": 9.8e-05, |
| "loss": 17.47879638671875, |
| "mean_token_accuracy": 0.7332558700814843, |
| "num_tokens": 409600.0, |
| "step": 50 |
| }, |
| { |
| "entropy": 0.9551631901413202, |
| "epoch": 0.4963805584281282, |
| "grad_norm": 0.40625, |
| "learning_rate": 0.000118, |
| "loss": 15.09481201171875, |
| "mean_token_accuracy": 0.7555772982537746, |
| "num_tokens": 491520.0, |
| "step": 60 |
| }, |
| { |
| "entropy": 0.8048430571332574, |
| "epoch": 0.5791106514994829, |
| "grad_norm": 0.375, |
| "learning_rate": 0.000138, |
| "loss": 13.297686767578124, |
| "mean_token_accuracy": 0.7774828754365444, |
| "num_tokens": 573440.0, |
| "step": 70 |
| }, |
| { |
| "entropy": 0.8100443260744215, |
| "epoch": 0.6618407445708376, |
| "grad_norm": 0.4609375, |
| "learning_rate": 0.00015800000000000002, |
| "loss": 12.752572631835937, |
| "mean_token_accuracy": 0.7837084107100963, |
| "num_tokens": 655360.0, |
| "step": 80 |
| }, |
| { |
| "entropy": 0.7172152267768979, |
| "epoch": 0.7445708376421923, |
| "grad_norm": 2.1875, |
| "learning_rate": 0.00017800000000000002, |
| "loss": 11.629959106445312, |
| "mean_token_accuracy": 0.799449609220028, |
| "num_tokens": 737280.0, |
| "step": 90 |
| }, |
| { |
| "entropy": 0.7284062243998051, |
| "epoch": 0.827300930713547, |
| "grad_norm": 0.40625, |
| "learning_rate": 0.00019800000000000002, |
| "loss": 11.506278991699219, |
| "mean_token_accuracy": 0.8022871781140566, |
| "num_tokens": 819200.0, |
| "step": 100 |
| }, |
| { |
| "entropy": 0.6922262106090784, |
| "epoch": 0.9100310237849017, |
| "grad_norm": 0.341796875, |
| "learning_rate": 0.00019942266891397815, |
| "loss": 11.149666595458985, |
| "mean_token_accuracy": 0.8068982377648354, |
| "num_tokens": 901120.0, |
| "step": 110 |
| }, |
| { |
| "entropy": 0.6608987387269736, |
| "epoch": 0.9927611168562565, |
| "grad_norm": 0.373046875, |
| "learning_rate": 0.00019743551343638324, |
| "loss": 10.666960906982421, |
| "mean_token_accuracy": 0.8124388422816992, |
| "num_tokens": 983040.0, |
| "step": 120 |
| }, |
| { |
| "epoch": 1.0, |
| "eval_entropy": 0.6862195637336997, |
| "eval_loss": 0.6695265769958496, |
| "eval_mean_token_accuracy": 0.8135074851124786, |
| "eval_num_tokens": 990208.0, |
| "eval_runtime": 255.0413, |
| "eval_samples_per_second": 0.843, |
| "eval_steps_per_second": 0.843, |
| "step": 121 |
| } |
| ], |
| "logging_steps": 10, |
| "max_steps": 363, |
| "num_input_tokens_seen": 0, |
| "num_train_epochs": 3, |
| "save_steps": 500, |
| "stateful_callbacks": { |
| "TrainerControl": { |
| "args": { |
| "should_epoch_stop": false, |
| "should_evaluate": false, |
| "should_log": false, |
| "should_save": true, |
| "should_training_stop": false |
| }, |
| "attributes": {} |
| } |
| }, |
| "total_flos": 1.4904406021973606e+17, |
| "train_batch_size": 1, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|