| { |
| "best_global_step": null, |
| "best_metric": null, |
| "best_model_checkpoint": null, |
| "epoch": 3.0, |
| "eval_steps": 200, |
| "global_step": 225, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 0.13468013468013468, |
| "grad_norm": 0.31015893816947937, |
| "learning_rate": 1.5000000000000002e-05, |
| "loss": 0.38761081695556643, |
| "step": 10 |
| }, |
| { |
| "epoch": 0.26936026936026936, |
| "grad_norm": 0.21577006578445435, |
| "learning_rate": 1.9946749853292233e-05, |
| "loss": 0.3502843379974365, |
| "step": 20 |
| }, |
| { |
| "epoch": 0.40404040404040403, |
| "grad_norm": 0.22367797791957855, |
| "learning_rate": 1.9687297122400952e-05, |
| "loss": 0.3208463191986084, |
| "step": 30 |
| }, |
| { |
| "epoch": 0.5387205387205387, |
| "grad_norm": 0.20365038514137268, |
| "learning_rate": 1.9217488001088784e-05, |
| "loss": 0.3184423685073853, |
| "step": 40 |
| }, |
| { |
| "epoch": 0.6734006734006734, |
| "grad_norm": 0.2231130301952362, |
| "learning_rate": 1.8547524236003675e-05, |
| "loss": 0.3126647472381592, |
| "step": 50 |
| }, |
| { |
| "epoch": 0.8080808080808081, |
| "grad_norm": 0.209013894200325, |
| "learning_rate": 1.7691953864390208e-05, |
| "loss": 0.3156299591064453, |
| "step": 60 |
| }, |
| { |
| "epoch": 0.9427609427609428, |
| "grad_norm": 0.1777929961681366, |
| "learning_rate": 1.666935530836651e-05, |
| "loss": 0.2766378402709961, |
| "step": 70 |
| }, |
| { |
| "epoch": 1.0673400673400673, |
| "grad_norm": 0.17185629904270172, |
| "learning_rate": 1.5501933950752655e-05, |
| "loss": 0.24300642013549806, |
| "step": 80 |
| }, |
| { |
| "epoch": 1.202020202020202, |
| "grad_norm": 0.18560396134853363, |
| "learning_rate": 1.4215039952670482e-05, |
| "loss": 0.20093803405761718, |
| "step": 90 |
| }, |
| { |
| "epoch": 1.3367003367003367, |
| "grad_norm": 0.17960919439792633, |
| "learning_rate": 1.2836617783342968e-05, |
| "loss": 0.19915937185287474, |
| "step": 100 |
| }, |
| { |
| "epoch": 1.4713804713804715, |
| "grad_norm": 0.17622533440589905, |
| "learning_rate": 1.1396599415368062e-05, |
| "loss": 0.19454092979431153, |
| "step": 110 |
| }, |
| { |
| "epoch": 1.606060606060606, |
| "grad_norm": 0.1789940446615219, |
| "learning_rate": 9.926254362026875e-06, |
| "loss": 0.19552748203277587, |
| "step": 120 |
| }, |
| { |
| "epoch": 1.7407407407407407, |
| "grad_norm": 0.1863929182291031, |
| "learning_rate": 8.457510670346976e-06, |
| "loss": 0.19949347972869874, |
| "step": 130 |
| }, |
| { |
| "epoch": 1.8754208754208754, |
| "grad_norm": 0.16495266556739807, |
| "learning_rate": 7.022261614327448e-06, |
| "loss": 0.20035164356231688, |
| "step": 140 |
| }, |
| { |
| "epoch": 2.0, |
| "grad_norm": 0.3942849636077881, |
| "learning_rate": 5.651673143248509e-06, |
| "loss": 0.19270834922790528, |
| "step": 150 |
| }, |
| { |
| "epoch": 2.1346801346801345, |
| "grad_norm": 0.17539165914058685, |
| "learning_rate": 4.375507123592194e-06, |
| "loss": 0.1418288230895996, |
| "step": 160 |
| }, |
| { |
| "epoch": 2.2693602693602695, |
| "grad_norm": 0.18225708603858948, |
| "learning_rate": 3.2214750701474875e-06, |
| "loss": 0.13898892402648927, |
| "step": 170 |
| }, |
| { |
| "epoch": 2.404040404040404, |
| "grad_norm": 0.172335684299469, |
| "learning_rate": 2.2146363998111077e-06, |
| "loss": 0.14039983749389648, |
| "step": 180 |
| }, |
| { |
| "epoch": 2.538720538720539, |
| "grad_norm": 0.1543879359960556, |
| "learning_rate": 1.3768542747997215e-06, |
| "loss": 0.13042680025100709, |
| "step": 190 |
| }, |
| { |
| "epoch": 2.6734006734006734, |
| "grad_norm": 0.16937494277954102, |
| "learning_rate": 7.263208514547548e-07, |
| "loss": 0.12810491323471068, |
| "step": 200 |
| }, |
| { |
| "epoch": 2.808080808080808, |
| "grad_norm": 0.17165407538414001, |
| "learning_rate": 2.7716224370188836e-07, |
| "loss": 0.1331431746482849, |
| "step": 210 |
| }, |
| { |
| "epoch": 2.942760942760943, |
| "grad_norm": 0.14236703515052795, |
| "learning_rate": 3.913177925055189e-08, |
| "loss": 0.12828856706619263, |
| "step": 220 |
| } |
| ], |
| "logging_steps": 10, |
| "max_steps": 225, |
| "num_input_tokens_seen": 0, |
| "num_train_epochs": 3, |
| "save_steps": 200, |
| "stateful_callbacks": { |
| "TrainerControl": { |
| "args": { |
| "should_epoch_stop": false, |
| "should_evaluate": false, |
| "should_log": false, |
| "should_save": true, |
| "should_training_stop": true |
| }, |
| "attributes": {} |
| } |
| }, |
| "total_flos": 2.2113763633135616e+16, |
| "train_batch_size": 2, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|