| { | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 0.2553191489361702, | |
| "eval_steps": 500, | |
| "global_step": 21, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.0121580547112462, | |
| "grad_norm": 0.0986328125, | |
| "learning_rate": 2e-05, | |
| "loss": 0.8436, | |
| "step": 1 | |
| }, | |
| { | |
| "epoch": 0.0243161094224924, | |
| "grad_norm": 0.09765625, | |
| "learning_rate": 4e-05, | |
| "loss": 0.9406, | |
| "step": 2 | |
| }, | |
| { | |
| "epoch": 0.0364741641337386, | |
| "grad_norm": 0.11083984375, | |
| "learning_rate": 6e-05, | |
| "loss": 1.1161, | |
| "step": 3 | |
| }, | |
| { | |
| "epoch": 0.0486322188449848, | |
| "grad_norm": 0.09521484375, | |
| "learning_rate": 8e-05, | |
| "loss": 0.9467, | |
| "step": 4 | |
| }, | |
| { | |
| "epoch": 0.060790273556231005, | |
| "grad_norm": 0.10009765625, | |
| "learning_rate": 0.0001, | |
| "loss": 0.8456, | |
| "step": 5 | |
| }, | |
| { | |
| "epoch": 0.0729483282674772, | |
| "grad_norm": 0.10009765625, | |
| "learning_rate": 0.00012, | |
| "loss": 0.9215, | |
| "step": 6 | |
| }, | |
| { | |
| "epoch": 0.0851063829787234, | |
| "grad_norm": 0.107421875, | |
| "learning_rate": 0.00014, | |
| "loss": 0.8381, | |
| "step": 7 | |
| }, | |
| { | |
| "epoch": 0.0972644376899696, | |
| "grad_norm": 0.1123046875, | |
| "learning_rate": 0.00016, | |
| "loss": 0.9411, | |
| "step": 8 | |
| }, | |
| { | |
| "epoch": 0.1094224924012158, | |
| "grad_norm": 0.11572265625, | |
| "learning_rate": 0.00018, | |
| "loss": 0.9348, | |
| "step": 9 | |
| }, | |
| { | |
| "epoch": 0.12158054711246201, | |
| "grad_norm": 0.1279296875, | |
| "learning_rate": 0.0002, | |
| "loss": 0.9752, | |
| "step": 10 | |
| }, | |
| { | |
| "epoch": 0.1337386018237082, | |
| "grad_norm": 0.1416015625, | |
| "learning_rate": 0.0001999048221581858, | |
| "loss": 1.1067, | |
| "step": 11 | |
| }, | |
| { | |
| "epoch": 0.1458966565349544, | |
| "grad_norm": 0.1328125, | |
| "learning_rate": 0.00019961946980917456, | |
| "loss": 0.904, | |
| "step": 12 | |
| }, | |
| { | |
| "epoch": 0.1580547112462006, | |
| "grad_norm": 0.146484375, | |
| "learning_rate": 0.00019914448613738106, | |
| "loss": 0.9718, | |
| "step": 13 | |
| }, | |
| { | |
| "epoch": 0.1702127659574468, | |
| "grad_norm": 0.1435546875, | |
| "learning_rate": 0.00019848077530122083, | |
| "loss": 0.9354, | |
| "step": 14 | |
| }, | |
| { | |
| "epoch": 0.182370820668693, | |
| "grad_norm": 0.12890625, | |
| "learning_rate": 0.00019762960071199333, | |
| "loss": 1.0536, | |
| "step": 15 | |
| }, | |
| { | |
| "epoch": 0.1945288753799392, | |
| "grad_norm": 0.1259765625, | |
| "learning_rate": 0.00019659258262890683, | |
| "loss": 1.0129, | |
| "step": 16 | |
| }, | |
| { | |
| "epoch": 0.2066869300911854, | |
| "grad_norm": 0.1328125, | |
| "learning_rate": 0.0001953716950748227, | |
| "loss": 1.0696, | |
| "step": 17 | |
| }, | |
| { | |
| "epoch": 0.2188449848024316, | |
| "grad_norm": 0.1025390625, | |
| "learning_rate": 0.00019396926207859084, | |
| "loss": 0.802, | |
| "step": 18 | |
| }, | |
| { | |
| "epoch": 0.23100303951367782, | |
| "grad_norm": 0.12255859375, | |
| "learning_rate": 0.0001923879532511287, | |
| "loss": 0.9757, | |
| "step": 19 | |
| }, | |
| { | |
| "epoch": 0.24316109422492402, | |
| "grad_norm": 0.123046875, | |
| "learning_rate": 0.000190630778703665, | |
| "loss": 1.1032, | |
| "step": 20 | |
| }, | |
| { | |
| "epoch": 0.2553191489361702, | |
| "grad_norm": 0.12158203125, | |
| "learning_rate": 0.00018870108331782217, | |
| "loss": 0.9319, | |
| "step": 21 | |
| } | |
| ], | |
| "logging_steps": 1, | |
| "max_steps": 82, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 1, | |
| "save_steps": 21, | |
| "total_flos": 1.5579632954769408e+16, | |
| "train_batch_size": 2, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |