{ "best_metric": null, "best_model_checkpoint": null, "epoch": 4.0, "eval_steps": 500, "global_step": 8456, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.23651844843897823, "grad_norm": 0.75390625, "learning_rate": 1.881740775780511e-05, "loss": 0.2864, "mean_token_accuracy": 0.9243696781396866, "step": 500 }, { "epoch": 0.47303689687795647, "grad_norm": 0.94140625, "learning_rate": 1.763481551561022e-05, "loss": 0.2402, "mean_token_accuracy": 0.9334622280597686, "step": 1000 }, { "epoch": 0.7095553453169348, "grad_norm": 1.4453125, "learning_rate": 1.6452223273415328e-05, "loss": 0.2289, "mean_token_accuracy": 0.9359804515838623, "step": 1500 }, { "epoch": 0.9460737937559129, "grad_norm": 0.9765625, "learning_rate": 1.5269631031220437e-05, "loss": 0.2271, "mean_token_accuracy": 0.9360219603776931, "step": 2000 }, { "epoch": 1.1825922421948911, "grad_norm": 1.2265625, "learning_rate": 1.4087038789025544e-05, "loss": 0.2127, "mean_token_accuracy": 0.9386496288776398, "step": 2500 }, { "epoch": 1.4191106906338695, "grad_norm": 1.6171875, "learning_rate": 1.2904446546830653e-05, "loss": 0.2061, "mean_token_accuracy": 0.9404804083108902, "step": 3000 }, { "epoch": 1.6556291390728477, "grad_norm": 0.7265625, "learning_rate": 1.1721854304635763e-05, "loss": 0.2084, "mean_token_accuracy": 0.9395650044679642, "step": 3500 }, { "epoch": 1.8921475875118259, "grad_norm": 1.25, "learning_rate": 1.0539262062440872e-05, "loss": 0.2, "mean_token_accuracy": 0.9420614712238312, "step": 4000 }, { "epoch": 2.128666035950804, "grad_norm": 1.1484375, "learning_rate": 9.35666982024598e-06, "loss": 0.2018, "mean_token_accuracy": 0.941098618388176, "step": 4500 }, { "epoch": 2.3651844843897822, "grad_norm": 0.98046875, "learning_rate": 8.174077578051088e-06, "loss": 0.1967, "mean_token_accuracy": 0.94244316136837, "step": 5000 }, { "epoch": 2.6017029328287604, "grad_norm": 0.72265625, "learning_rate": 6.991485335856198e-06, "loss": 0.1918, "mean_token_accuracy": 0.9436353342533111, "step": 5500 }, { "epoch": 2.838221381267739, "grad_norm": 1.3125, "learning_rate": 5.808893093661306e-06, "loss": 0.1947, "mean_token_accuracy": 0.9429612046480179, "step": 6000 }, { "epoch": 3.074739829706717, "grad_norm": 1.171875, "learning_rate": 4.626300851466415e-06, "loss": 0.1925, "mean_token_accuracy": 0.9436095499992371, "step": 6500 }, { "epoch": 3.3112582781456954, "grad_norm": 1.328125, "learning_rate": 3.443708609271523e-06, "loss": 0.1888, "mean_token_accuracy": 0.9442265207767486, "step": 7000 }, { "epoch": 3.5477767265846736, "grad_norm": 1.015625, "learning_rate": 2.2611163670766324e-06, "loss": 0.1948, "mean_token_accuracy": 0.9426910729408264, "step": 7500 }, { "epoch": 3.7842951750236518, "grad_norm": 1.5078125, "learning_rate": 1.078524124881741e-06, "loss": 0.1929, "mean_token_accuracy": 0.9433188273906707, "step": 8000 } ], "logging_steps": 500, "max_steps": 8456, "num_input_tokens_seen": 0, "num_train_epochs": 4, "save_steps": 500, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 5.722176240290611e+17, "train_batch_size": 4, "trial_name": null, "trial_params": null }