{ "best_global_step": null, "best_metric": null, "best_model_checkpoint": null, "epoch": 3.0, "eval_steps": 200, "global_step": 225, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.13468013468013468, "grad_norm": 0.31015893816947937, "learning_rate": 1.5000000000000002e-05, "loss": 0.38761081695556643, "step": 10 }, { "epoch": 0.26936026936026936, "grad_norm": 0.21577006578445435, "learning_rate": 1.9946749853292233e-05, "loss": 0.3502843379974365, "step": 20 }, { "epoch": 0.40404040404040403, "grad_norm": 0.22367797791957855, "learning_rate": 1.9687297122400952e-05, "loss": 0.3208463191986084, "step": 30 }, { "epoch": 0.5387205387205387, "grad_norm": 0.20365038514137268, "learning_rate": 1.9217488001088784e-05, "loss": 0.3184423685073853, "step": 40 }, { "epoch": 0.6734006734006734, "grad_norm": 0.2231130301952362, "learning_rate": 1.8547524236003675e-05, "loss": 0.3126647472381592, "step": 50 }, { "epoch": 0.8080808080808081, "grad_norm": 0.209013894200325, "learning_rate": 1.7691953864390208e-05, "loss": 0.3156299591064453, "step": 60 }, { "epoch": 0.9427609427609428, "grad_norm": 0.1777929961681366, "learning_rate": 1.666935530836651e-05, "loss": 0.2766378402709961, "step": 70 }, { "epoch": 1.0673400673400673, "grad_norm": 0.17185629904270172, "learning_rate": 1.5501933950752655e-05, "loss": 0.24300642013549806, "step": 80 }, { "epoch": 1.202020202020202, "grad_norm": 0.18560396134853363, "learning_rate": 1.4215039952670482e-05, "loss": 0.20093803405761718, "step": 90 }, { "epoch": 1.3367003367003367, "grad_norm": 0.17960919439792633, "learning_rate": 1.2836617783342968e-05, "loss": 0.19915937185287474, "step": 100 }, { "epoch": 1.4713804713804715, "grad_norm": 0.17622533440589905, "learning_rate": 1.1396599415368062e-05, "loss": 0.19454092979431153, "step": 110 }, { "epoch": 1.606060606060606, "grad_norm": 0.1789940446615219, "learning_rate": 9.926254362026875e-06, "loss": 0.19552748203277587, "step": 120 }, { "epoch": 1.7407407407407407, "grad_norm": 0.1863929182291031, "learning_rate": 8.457510670346976e-06, "loss": 0.19949347972869874, "step": 130 }, { "epoch": 1.8754208754208754, "grad_norm": 0.16495266556739807, "learning_rate": 7.022261614327448e-06, "loss": 0.20035164356231688, "step": 140 }, { "epoch": 2.0, "grad_norm": 0.3942849636077881, "learning_rate": 5.651673143248509e-06, "loss": 0.19270834922790528, "step": 150 }, { "epoch": 2.1346801346801345, "grad_norm": 0.17539165914058685, "learning_rate": 4.375507123592194e-06, "loss": 0.1418288230895996, "step": 160 }, { "epoch": 2.2693602693602695, "grad_norm": 0.18225708603858948, "learning_rate": 3.2214750701474875e-06, "loss": 0.13898892402648927, "step": 170 }, { "epoch": 2.404040404040404, "grad_norm": 0.172335684299469, "learning_rate": 2.2146363998111077e-06, "loss": 0.14039983749389648, "step": 180 }, { "epoch": 2.538720538720539, "grad_norm": 0.1543879359960556, "learning_rate": 1.3768542747997215e-06, "loss": 0.13042680025100709, "step": 190 }, { "epoch": 2.6734006734006734, "grad_norm": 0.16937494277954102, "learning_rate": 7.263208514547548e-07, "loss": 0.12810491323471068, "step": 200 }, { "epoch": 2.808080808080808, "grad_norm": 0.17165407538414001, "learning_rate": 2.7716224370188836e-07, "loss": 0.1331431746482849, "step": 210 }, { "epoch": 2.942760942760943, "grad_norm": 0.14236703515052795, "learning_rate": 3.913177925055189e-08, "loss": 0.12828856706619263, "step": 220 } ], "logging_steps": 10, "max_steps": 225, "num_input_tokens_seen": 0, "num_train_epochs": 3, "save_steps": 200, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 2.2113763633135616e+16, "train_batch_size": 2, "trial_name": null, "trial_params": null }