| { |
| "best_global_step": null, |
| "best_metric": null, |
| "best_model_checkpoint": null, |
| "epoch": 0.6182868596356776, |
| "eval_steps": 500, |
| "global_step": 437, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 0.0014148440723928551, |
| "grad_norm": 524.617919921875, |
| "learning_rate": 5e-05, |
| "loss": 1007.5778198242188, |
| "step": 1 |
| }, |
| { |
| "epoch": 0.0282968814478571, |
| "grad_norm": 181.72775268554688, |
| "learning_rate": 4.975383568761708e-05, |
| "loss": 764.3506373355264, |
| "step": 20 |
| }, |
| { |
| "epoch": 0.0565937628957142, |
| "grad_norm": 99.92882537841797, |
| "learning_rate": 4.896830532173602e-05, |
| "loss": 568.538037109375, |
| "step": 40 |
| }, |
| { |
| "epoch": 0.0848906443435713, |
| "grad_norm": 103.10233306884766, |
| "learning_rate": 4.765986363359608e-05, |
| "loss": 500.249609375, |
| "step": 60 |
| }, |
| { |
| "epoch": 0.1131875257914284, |
| "grad_norm": 68.32410430908203, |
| "learning_rate": 4.585705661191276e-05, |
| "loss": 473.528125, |
| "step": 80 |
| }, |
| { |
| "epoch": 0.14148440723928551, |
| "grad_norm": 56.6121711730957, |
| "learning_rate": 4.975383568761708e-05, |
| "loss": 474.111279296875, |
| "step": 100 |
| }, |
| { |
| "epoch": 0.1697812886871426, |
| "grad_norm": 60.745384216308594, |
| "learning_rate": 4.896830532173602e-05, |
| "loss": 455.85908203125, |
| "step": 120 |
| }, |
| { |
| "epoch": 0.1980781701349997, |
| "grad_norm": 54.7177734375, |
| "learning_rate": 4.765986363359608e-05, |
| "loss": 452.77314453125, |
| "step": 140 |
| }, |
| { |
| "epoch": 0.2263750515828568, |
| "grad_norm": 59.06763458251953, |
| "learning_rate": 4.585705661191276e-05, |
| "loss": 435.56240234375, |
| "step": 160 |
| }, |
| { |
| "epoch": 0.2546719330307139, |
| "grad_norm": 61.36757278442383, |
| "learning_rate": 4.359921570673359e-05, |
| "loss": 421.743212890625, |
| "step": 180 |
| }, |
| { |
| "epoch": 0.28296881447857103, |
| "grad_norm": 94.43900299072266, |
| "learning_rate": 4.093559974371725e-05, |
| "loss": 423.50859375, |
| "step": 200 |
| }, |
| { |
| "epoch": 0.3112656959264281, |
| "grad_norm": 117.91323852539062, |
| "learning_rate": 3.792432025501064e-05, |
| "loss": 414.050341796875, |
| "step": 220 |
| }, |
| { |
| "epoch": 0.3395625773742852, |
| "grad_norm": 76.31731414794922, |
| "learning_rate": 3.463107367254725e-05, |
| "loss": 403.8785400390625, |
| "step": 240 |
| }, |
| { |
| "epoch": 0.3678594588221423, |
| "grad_norm": 85.47946166992188, |
| "learning_rate": 3.112770804323089e-05, |
| "loss": 401.882421875, |
| "step": 260 |
| }, |
| { |
| "epoch": 0.3961563402699994, |
| "grad_norm": 142.3809051513672, |
| "learning_rate": 2.749065553566885e-05, |
| "loss": 387.7595458984375, |
| "step": 280 |
| }, |
| { |
| "epoch": 0.4244532217178565, |
| "grad_norm": 142.94207763671875, |
| "learning_rate": 2.3799264936115517e-05, |
| "loss": 389.3813720703125, |
| "step": 300 |
| }, |
| { |
| "epoch": 0.4527501031657136, |
| "grad_norm": 130.78367614746094, |
| "learning_rate": 2.0134070513200866e-05, |
| "loss": 382.645947265625, |
| "step": 320 |
| }, |
| { |
| "epoch": 0.4810469846135707, |
| "grad_norm": 113.1230239868164, |
| "learning_rate": 1.6575035019247017e-05, |
| "loss": 382.0867431640625, |
| "step": 340 |
| }, |
| { |
| "epoch": 0.5093438660614278, |
| "grad_norm": 105.29116821289062, |
| "learning_rate": 1.319980516023202e-05, |
| "loss": 377.62783203125, |
| "step": 360 |
| }, |
| { |
| "epoch": 0.5376407475092849, |
| "grad_norm": 76.38480377197266, |
| "learning_rate": 1.0082017594434984e-05, |
| "loss": 376.00263671875, |
| "step": 380 |
| }, |
| { |
| "epoch": 0.5659376289571421, |
| "grad_norm": 100.89452362060547, |
| "learning_rate": 7.289692417422445e-06, |
| "loss": 362.219677734375, |
| "step": 400 |
| }, |
| { |
| "epoch": 0.5942345104049991, |
| "grad_norm": 94.8159408569336, |
| "learning_rate": 4.883749182366356e-06, |
| "loss": 370.16728515625, |
| "step": 420 |
| } |
| ], |
| "logging_steps": 20, |
| "max_steps": 425, |
| "num_input_tokens_seen": 0, |
| "num_train_epochs": 1, |
| "save_steps": 40, |
| "stateful_callbacks": { |
| "TrainerControl": { |
| "args": { |
| "should_epoch_stop": false, |
| "should_evaluate": false, |
| "should_log": false, |
| "should_save": true, |
| "should_training_stop": true |
| }, |
| "attributes": {} |
| } |
| }, |
| "total_flos": 3225692002516992.0, |
| "train_batch_size": 1, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|