| { | |
| "best_global_step": 140, | |
| "best_metric": 0.22836367785930634, | |
| "best_model_checkpoint": "/content/models/gemma_jigsaw_lmh/checkpoint-140", | |
| "epoch": 2.7450980392156863, | |
| "eval_steps": 20, | |
| "global_step": 140, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "entropy": 2.560735213756561, | |
| "epoch": 0.39215686274509803, | |
| "grad_norm": 6.186038494110107, | |
| "learning_rate": 8.758169934640524e-06, | |
| "loss": 0.5541, | |
| "mean_token_accuracy": 0.7453125, | |
| "num_tokens": 67412.0, | |
| "step": 20 | |
| }, | |
| { | |
| "epoch": 0.39215686274509803, | |
| "eval_entropy": 2.491304580981915, | |
| "eval_loss": 0.3785918951034546, | |
| "eval_mean_token_accuracy": 0.7578671345343957, | |
| "eval_num_tokens": 67412.0, | |
| "eval_runtime": 2.6932, | |
| "eval_samples_per_second": 75.376, | |
| "eval_steps_per_second": 4.827, | |
| "step": 20 | |
| }, | |
| { | |
| "entropy": 2.419952464103699, | |
| "epoch": 0.7843137254901961, | |
| "grad_norm": 2.4226012229919434, | |
| "learning_rate": 7.450980392156863e-06, | |
| "loss": 0.3293, | |
| "mean_token_accuracy": 0.81640625, | |
| "num_tokens": 134808.0, | |
| "step": 40 | |
| }, | |
| { | |
| "epoch": 0.7843137254901961, | |
| "eval_entropy": 2.4435020043299747, | |
| "eval_loss": 0.2946617007255554, | |
| "eval_mean_token_accuracy": 0.8513986009817857, | |
| "eval_num_tokens": 134808.0, | |
| "eval_runtime": 2.6642, | |
| "eval_samples_per_second": 76.195, | |
| "eval_steps_per_second": 4.879, | |
| "step": 40 | |
| }, | |
| { | |
| "entropy": 2.423317462205887, | |
| "epoch": 1.1764705882352942, | |
| "grad_norm": 10.875091552734375, | |
| "learning_rate": 6.143790849673204e-06, | |
| "loss": 0.2871, | |
| "mean_token_accuracy": 0.8487723216414451, | |
| "num_tokens": 201046.0, | |
| "step": 60 | |
| }, | |
| { | |
| "epoch": 1.1764705882352942, | |
| "eval_entropy": 2.448130937723013, | |
| "eval_loss": 0.2594100534915924, | |
| "eval_mean_token_accuracy": 0.8621066441902747, | |
| "eval_num_tokens": 201046.0, | |
| "eval_runtime": 2.6633, | |
| "eval_samples_per_second": 76.221, | |
| "eval_steps_per_second": 4.881, | |
| "step": 60 | |
| }, | |
| { | |
| "entropy": 2.3511528968811035, | |
| "epoch": 1.5686274509803921, | |
| "grad_norm": 5.017323970794678, | |
| "learning_rate": 4.836601307189543e-06, | |
| "loss": 0.2503, | |
| "mean_token_accuracy": 0.87109375, | |
| "num_tokens": 269334.0, | |
| "step": 80 | |
| }, | |
| { | |
| "epoch": 1.5686274509803921, | |
| "eval_entropy": 2.3352334682758036, | |
| "eval_loss": 0.2594275176525116, | |
| "eval_mean_token_accuracy": 0.8824300720141485, | |
| "eval_num_tokens": 269334.0, | |
| "eval_runtime": 2.6613, | |
| "eval_samples_per_second": 76.278, | |
| "eval_steps_per_second": 4.885, | |
| "step": 80 | |
| }, | |
| { | |
| "entropy": 2.3079891920089723, | |
| "epoch": 1.9607843137254903, | |
| "grad_norm": 7.2991743087768555, | |
| "learning_rate": 3.529411764705883e-06, | |
| "loss": 0.247, | |
| "mean_token_accuracy": 0.87890625, | |
| "num_tokens": 337424.0, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 1.9607843137254903, | |
| "eval_entropy": 2.3540270145122824, | |
| "eval_loss": 0.23124322295188904, | |
| "eval_mean_token_accuracy": 0.8861451057287363, | |
| "eval_num_tokens": 337424.0, | |
| "eval_runtime": 2.7035, | |
| "eval_samples_per_second": 75.087, | |
| "eval_steps_per_second": 4.809, | |
| "step": 100 | |
| }, | |
| { | |
| "entropy": 2.3495707869529725, | |
| "epoch": 2.3529411764705883, | |
| "grad_norm": 5.988176345825195, | |
| "learning_rate": 2.222222222222222e-06, | |
| "loss": 0.2009, | |
| "mean_token_accuracy": 0.9171875, | |
| "num_tokens": 404955.0, | |
| "step": 120 | |
| }, | |
| { | |
| "epoch": 2.3529411764705883, | |
| "eval_entropy": 2.4085900966937723, | |
| "eval_loss": 0.23444519937038422, | |
| "eval_mean_token_accuracy": 0.8957604903441209, | |
| "eval_num_tokens": 404955.0, | |
| "eval_runtime": 2.6365, | |
| "eval_samples_per_second": 76.995, | |
| "eval_steps_per_second": 4.931, | |
| "step": 120 | |
| }, | |
| { | |
| "entropy": 2.3811947822570803, | |
| "epoch": 2.7450980392156863, | |
| "grad_norm": 9.767471313476562, | |
| "learning_rate": 9.150326797385621e-07, | |
| "loss": 0.1973, | |
| "mean_token_accuracy": 0.91328125, | |
| "num_tokens": 472246.0, | |
| "step": 140 | |
| }, | |
| { | |
| "epoch": 2.7450980392156863, | |
| "eval_entropy": 2.4008009983943057, | |
| "eval_loss": 0.22836367785930634, | |
| "eval_mean_token_accuracy": 0.9040646873987638, | |
| "eval_num_tokens": 472246.0, | |
| "eval_runtime": 2.645, | |
| "eval_samples_per_second": 76.748, | |
| "eval_steps_per_second": 4.915, | |
| "step": 140 | |
| } | |
| ], | |
| "logging_steps": 20, | |
| "max_steps": 153, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 3, | |
| "save_steps": 20, | |
| "stateful_callbacks": { | |
| "TrainerControl": { | |
| "args": { | |
| "should_epoch_stop": false, | |
| "should_evaluate": false, | |
| "should_log": false, | |
| "should_save": true, | |
| "should_training_stop": false | |
| }, | |
| "attributes": {} | |
| } | |
| }, | |
| "total_flos": 1.0222031863807488e+16, | |
| "train_batch_size": 16, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |