| { |
| "best_metric": null, |
| "best_model_checkpoint": null, |
| "epoch": 2.896, |
| "eval_steps": 500, |
| "global_step": 45, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 0.064, |
| "grad_norm": 5.85249391104972, |
| "learning_rate": 4.000000000000001e-06, |
| "loss": 0.8383, |
| "step": 1 |
| }, |
| { |
| "epoch": 0.128, |
| "grad_norm": 5.7880200477972314, |
| "learning_rate": 8.000000000000001e-06, |
| "loss": 0.8521, |
| "step": 2 |
| }, |
| { |
| "epoch": 0.192, |
| "grad_norm": 4.637830956527449, |
| "learning_rate": 1.2e-05, |
| "loss": 0.8444, |
| "step": 3 |
| }, |
| { |
| "epoch": 0.256, |
| "grad_norm": 2.174262328740971, |
| "learning_rate": 1.6000000000000003e-05, |
| "loss": 0.7812, |
| "step": 4 |
| }, |
| { |
| "epoch": 0.32, |
| "grad_norm": 5.057305436302631, |
| "learning_rate": 2e-05, |
| "loss": 0.7787, |
| "step": 5 |
| }, |
| { |
| "epoch": 0.384, |
| "grad_norm": 8.176173201136002, |
| "learning_rate": 1.9969173337331283e-05, |
| "loss": 0.8424, |
| "step": 6 |
| }, |
| { |
| "epoch": 0.448, |
| "grad_norm": 6.401762925359562, |
| "learning_rate": 1.9876883405951378e-05, |
| "loss": 0.7736, |
| "step": 7 |
| }, |
| { |
| "epoch": 0.512, |
| "grad_norm": 3.2961629781668953, |
| "learning_rate": 1.9723699203976768e-05, |
| "loss": 0.7289, |
| "step": 8 |
| }, |
| { |
| "epoch": 0.576, |
| "grad_norm": 2.581895471280243, |
| "learning_rate": 1.9510565162951538e-05, |
| "loss": 0.7189, |
| "step": 9 |
| }, |
| { |
| "epoch": 0.64, |
| "grad_norm": 1.8876971192011507, |
| "learning_rate": 1.9238795325112867e-05, |
| "loss": 0.6747, |
| "step": 10 |
| }, |
| { |
| "epoch": 0.704, |
| "grad_norm": 1.458550787382178, |
| "learning_rate": 1.891006524188368e-05, |
| "loss": 0.6609, |
| "step": 11 |
| }, |
| { |
| "epoch": 0.768, |
| "grad_norm": 1.3017499709559937, |
| "learning_rate": 1.8526401643540924e-05, |
| "loss": 0.6662, |
| "step": 12 |
| }, |
| { |
| "epoch": 0.832, |
| "grad_norm": 1.384898941850018, |
| "learning_rate": 1.8090169943749477e-05, |
| "loss": 0.6792, |
| "step": 13 |
| }, |
| { |
| "epoch": 0.896, |
| "grad_norm": 1.363683022675478, |
| "learning_rate": 1.7604059656000313e-05, |
| "loss": 0.637, |
| "step": 14 |
| }, |
| { |
| "epoch": 0.96, |
| "grad_norm": 1.0440100554501006, |
| "learning_rate": 1.7071067811865477e-05, |
| "loss": 0.6611, |
| "step": 15 |
| }, |
| { |
| "epoch": 1.032, |
| "grad_norm": 1.4642435617268161, |
| "learning_rate": 1.6494480483301836e-05, |
| "loss": 0.9129, |
| "step": 16 |
| }, |
| { |
| "epoch": 1.096, |
| "grad_norm": 1.2688197311643739, |
| "learning_rate": 1.5877852522924733e-05, |
| "loss": 0.6914, |
| "step": 17 |
| }, |
| { |
| "epoch": 1.16, |
| "grad_norm": 0.8143916637114235, |
| "learning_rate": 1.5224985647159489e-05, |
| "loss": 0.4865, |
| "step": 18 |
| }, |
| { |
| "epoch": 1.224, |
| "grad_norm": 0.9165299977448033, |
| "learning_rate": 1.4539904997395468e-05, |
| "loss": 0.5731, |
| "step": 19 |
| }, |
| { |
| "epoch": 1.288, |
| "grad_norm": 0.7360018502732568, |
| "learning_rate": 1.3826834323650899e-05, |
| "loss": 0.4969, |
| "step": 20 |
| }, |
| { |
| "epoch": 1.3519999999999999, |
| "grad_norm": 0.7602013706953981, |
| "learning_rate": 1.3090169943749475e-05, |
| "loss": 0.572, |
| "step": 21 |
| }, |
| { |
| "epoch": 1.416, |
| "grad_norm": 0.6823276613726659, |
| "learning_rate": 1.2334453638559057e-05, |
| "loss": 0.5332, |
| "step": 22 |
| }, |
| { |
| "epoch": 1.48, |
| "grad_norm": 0.6128440387016976, |
| "learning_rate": 1.156434465040231e-05, |
| "loss": 0.5485, |
| "step": 23 |
| }, |
| { |
| "epoch": 1.544, |
| "grad_norm": 0.6379847198707073, |
| "learning_rate": 1.0784590957278452e-05, |
| "loss": 0.5301, |
| "step": 24 |
| }, |
| { |
| "epoch": 1.608, |
| "grad_norm": 0.6063364000158917, |
| "learning_rate": 1e-05, |
| "loss": 0.56, |
| "step": 25 |
| }, |
| { |
| "epoch": 1.6720000000000002, |
| "grad_norm": 0.598214112142404, |
| "learning_rate": 9.215409042721553e-06, |
| "loss": 0.5712, |
| "step": 26 |
| }, |
| { |
| "epoch": 1.736, |
| "grad_norm": 0.598964734762257, |
| "learning_rate": 8.43565534959769e-06, |
| "loss": 0.506, |
| "step": 27 |
| }, |
| { |
| "epoch": 1.8, |
| "grad_norm": 0.564643443704361, |
| "learning_rate": 7.66554636144095e-06, |
| "loss": 0.5203, |
| "step": 28 |
| }, |
| { |
| "epoch": 1.8639999999999999, |
| "grad_norm": 0.547875294216537, |
| "learning_rate": 6.909830056250527e-06, |
| "loss": 0.4922, |
| "step": 29 |
| }, |
| { |
| "epoch": 1.928, |
| "grad_norm": 0.5824606933429736, |
| "learning_rate": 6.173165676349103e-06, |
| "loss": 0.5583, |
| "step": 30 |
| }, |
| { |
| "epoch": 1.992, |
| "grad_norm": 0.8335341090192849, |
| "learning_rate": 5.460095002604533e-06, |
| "loss": 0.7596, |
| "step": 31 |
| }, |
| { |
| "epoch": 2.064, |
| "grad_norm": 0.5850931076101747, |
| "learning_rate": 4.775014352840512e-06, |
| "loss": 0.4994, |
| "step": 32 |
| }, |
| { |
| "epoch": 2.128, |
| "grad_norm": 0.5107787299774194, |
| "learning_rate": 4.12214747707527e-06, |
| "loss": 0.455, |
| "step": 33 |
| }, |
| { |
| "epoch": 2.192, |
| "grad_norm": 0.465974220692486, |
| "learning_rate": 3.505519516698165e-06, |
| "loss": 0.473, |
| "step": 34 |
| }, |
| { |
| "epoch": 2.2560000000000002, |
| "grad_norm": 0.49096442552776787, |
| "learning_rate": 2.9289321881345257e-06, |
| "loss": 0.4711, |
| "step": 35 |
| }, |
| { |
| "epoch": 2.32, |
| "grad_norm": 0.4545396809474353, |
| "learning_rate": 2.395940343999691e-06, |
| "loss": 0.5029, |
| "step": 36 |
| }, |
| { |
| "epoch": 2.384, |
| "grad_norm": 0.4158603803574914, |
| "learning_rate": 1.9098300562505266e-06, |
| "loss": 0.4538, |
| "step": 37 |
| }, |
| { |
| "epoch": 2.448, |
| "grad_norm": 0.5599429573170525, |
| "learning_rate": 1.4735983564590784e-06, |
| "loss": 0.4522, |
| "step": 38 |
| }, |
| { |
| "epoch": 2.512, |
| "grad_norm": 0.4773395982310906, |
| "learning_rate": 1.0899347581163222e-06, |
| "loss": 0.4694, |
| "step": 39 |
| }, |
| { |
| "epoch": 2.576, |
| "grad_norm": 0.4555133299662401, |
| "learning_rate": 7.612046748871327e-07, |
| "loss": 0.4794, |
| "step": 40 |
| }, |
| { |
| "epoch": 2.64, |
| "grad_norm": 0.43925529257171464, |
| "learning_rate": 4.894348370484648e-07, |
| "loss": 0.4532, |
| "step": 41 |
| }, |
| { |
| "epoch": 2.7039999999999997, |
| "grad_norm": 0.40275165109816563, |
| "learning_rate": 2.7630079602323447e-07, |
| "loss": 0.483, |
| "step": 42 |
| }, |
| { |
| "epoch": 2.768, |
| "grad_norm": 0.45666320321239945, |
| "learning_rate": 1.231165940486234e-07, |
| "loss": 0.4769, |
| "step": 43 |
| }, |
| { |
| "epoch": 2.832, |
| "grad_norm": 0.39135454805689784, |
| "learning_rate": 3.082666266872036e-08, |
| "loss": 0.4274, |
| "step": 44 |
| }, |
| { |
| "epoch": 2.896, |
| "grad_norm": 0.4269794519040765, |
| "learning_rate": 0.0, |
| "loss": 0.4811, |
| "step": 45 |
| }, |
| { |
| "epoch": 2.896, |
| "step": 45, |
| "total_flos": 5.682962857616998e+16, |
| "train_loss": 0.6006159649954902, |
| "train_runtime": 3170.2046, |
| "train_samples_per_second": 0.943, |
| "train_steps_per_second": 0.014 |
| } |
| ], |
| "logging_steps": 1.0, |
| "max_steps": 45, |
| "num_input_tokens_seen": 0, |
| "num_train_epochs": 3, |
| "save_steps": 500, |
| "stateful_callbacks": { |
| "TrainerControl": { |
| "args": { |
| "should_epoch_stop": false, |
| "should_evaluate": false, |
| "should_log": false, |
| "should_save": true, |
| "should_training_stop": true |
| }, |
| "attributes": {} |
| } |
| }, |
| "total_flos": 5.682962857616998e+16, |
| "train_batch_size": 1, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|