| { |
| "best_global_step": null, |
| "best_metric": null, |
| "best_model_checkpoint": null, |
| "epoch": 16.0, |
| "eval_steps": 500, |
| "global_step": 400, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 0.4, |
| "grad_norm": 5.043191909790039, |
| "learning_rate": 6e-05, |
| "loss": 3.0122, |
| "step": 10 |
| }, |
| { |
| "epoch": 0.8, |
| "grad_norm": 5.835005283355713, |
| "learning_rate": 0.00012666666666666666, |
| "loss": 1.63, |
| "step": 20 |
| }, |
| { |
| "epoch": 1.2, |
| "grad_norm": 2.365471601486206, |
| "learning_rate": 0.00019333333333333333, |
| "loss": 0.4526, |
| "step": 30 |
| }, |
| { |
| "epoch": 1.6, |
| "grad_norm": 1.4100245237350464, |
| "learning_rate": 0.00019513513513513516, |
| "loss": 0.1436, |
| "step": 40 |
| }, |
| { |
| "epoch": 2.0, |
| "grad_norm": 1.047510027885437, |
| "learning_rate": 0.00018972972972972974, |
| "loss": 0.123, |
| "step": 50 |
| }, |
| { |
| "epoch": 2.4, |
| "grad_norm": 0.5822039246559143, |
| "learning_rate": 0.00018432432432432435, |
| "loss": 0.1186, |
| "step": 60 |
| }, |
| { |
| "epoch": 2.8, |
| "grad_norm": 0.6053642630577087, |
| "learning_rate": 0.00017891891891891893, |
| "loss": 0.1125, |
| "step": 70 |
| }, |
| { |
| "epoch": 3.2, |
| "grad_norm": 0.4842885136604309, |
| "learning_rate": 0.00017351351351351353, |
| "loss": 0.1064, |
| "step": 80 |
| }, |
| { |
| "epoch": 3.6, |
| "grad_norm": 0.3560635447502136, |
| "learning_rate": 0.00016810810810810812, |
| "loss": 0.109, |
| "step": 90 |
| }, |
| { |
| "epoch": 4.0, |
| "grad_norm": 0.33792516589164734, |
| "learning_rate": 0.00016270270270270272, |
| "loss": 0.108, |
| "step": 100 |
| }, |
| { |
| "epoch": 4.4, |
| "grad_norm": 0.38220927119255066, |
| "learning_rate": 0.0001572972972972973, |
| "loss": 0.1023, |
| "step": 110 |
| }, |
| { |
| "epoch": 4.8, |
| "grad_norm": 0.373237669467926, |
| "learning_rate": 0.0001518918918918919, |
| "loss": 0.1047, |
| "step": 120 |
| }, |
| { |
| "epoch": 5.2, |
| "grad_norm": 0.4177246689796448, |
| "learning_rate": 0.0001464864864864865, |
| "loss": 0.1018, |
| "step": 130 |
| }, |
| { |
| "epoch": 5.6, |
| "grad_norm": 0.4290566146373749, |
| "learning_rate": 0.0001410810810810811, |
| "loss": 0.1021, |
| "step": 140 |
| }, |
| { |
| "epoch": 6.0, |
| "grad_norm": 0.46347615122795105, |
| "learning_rate": 0.00013567567567567568, |
| "loss": 0.1027, |
| "step": 150 |
| }, |
| { |
| "epoch": 6.4, |
| "grad_norm": 0.49635258316993713, |
| "learning_rate": 0.0001302702702702703, |
| "loss": 0.1001, |
| "step": 160 |
| }, |
| { |
| "epoch": 6.8, |
| "grad_norm": 0.2947765588760376, |
| "learning_rate": 0.00012486486486486487, |
| "loss": 0.1, |
| "step": 170 |
| }, |
| { |
| "epoch": 7.2, |
| "grad_norm": 0.3166660964488983, |
| "learning_rate": 0.00011945945945945946, |
| "loss": 0.1021, |
| "step": 180 |
| }, |
| { |
| "epoch": 7.6, |
| "grad_norm": 0.30084937810897827, |
| "learning_rate": 0.00011405405405405406, |
| "loss": 0.0974, |
| "step": 190 |
| }, |
| { |
| "epoch": 8.0, |
| "grad_norm": 0.26676973700523376, |
| "learning_rate": 0.00010864864864864865, |
| "loss": 0.1001, |
| "step": 200 |
| }, |
| { |
| "epoch": 8.4, |
| "grad_norm": 0.3572346270084381, |
| "learning_rate": 0.00010324324324324324, |
| "loss": 0.099, |
| "step": 210 |
| }, |
| { |
| "epoch": 8.8, |
| "grad_norm": 0.26489362120628357, |
| "learning_rate": 9.783783783783784e-05, |
| "loss": 0.098, |
| "step": 220 |
| }, |
| { |
| "epoch": 9.2, |
| "grad_norm": 0.40212690830230713, |
| "learning_rate": 9.243243243243243e-05, |
| "loss": 0.0971, |
| "step": 230 |
| }, |
| { |
| "epoch": 9.6, |
| "grad_norm": 0.30549320578575134, |
| "learning_rate": 8.702702702702702e-05, |
| "loss": 0.0982, |
| "step": 240 |
| }, |
| { |
| "epoch": 10.0, |
| "grad_norm": 0.36995482444763184, |
| "learning_rate": 8.162162162162162e-05, |
| "loss": 0.0964, |
| "step": 250 |
| }, |
| { |
| "epoch": 10.4, |
| "grad_norm": 0.4164905548095703, |
| "learning_rate": 7.621621621621621e-05, |
| "loss": 0.0962, |
| "step": 260 |
| }, |
| { |
| "epoch": 10.8, |
| "grad_norm": 0.3316217362880707, |
| "learning_rate": 7.081081081081081e-05, |
| "loss": 0.095, |
| "step": 270 |
| }, |
| { |
| "epoch": 11.2, |
| "grad_norm": 0.45118996500968933, |
| "learning_rate": 6.54054054054054e-05, |
| "loss": 0.0941, |
| "step": 280 |
| }, |
| { |
| "epoch": 11.6, |
| "grad_norm": 0.4211569130420685, |
| "learning_rate": 6e-05, |
| "loss": 0.0942, |
| "step": 290 |
| }, |
| { |
| "epoch": 12.0, |
| "grad_norm": 0.35818570852279663, |
| "learning_rate": 5.4594594594594595e-05, |
| "loss": 0.0964, |
| "step": 300 |
| }, |
| { |
| "epoch": 12.4, |
| "grad_norm": 0.3817833364009857, |
| "learning_rate": 4.9189189189189196e-05, |
| "loss": 0.0921, |
| "step": 310 |
| }, |
| { |
| "epoch": 12.8, |
| "grad_norm": 0.35149484872817993, |
| "learning_rate": 4.378378378378379e-05, |
| "loss": 0.0933, |
| "step": 320 |
| }, |
| { |
| "epoch": 13.2, |
| "grad_norm": 0.4558711349964142, |
| "learning_rate": 3.8378378378378384e-05, |
| "loss": 0.0924, |
| "step": 330 |
| }, |
| { |
| "epoch": 13.6, |
| "grad_norm": 0.41883108019828796, |
| "learning_rate": 3.297297297297298e-05, |
| "loss": 0.0901, |
| "step": 340 |
| }, |
| { |
| "epoch": 14.0, |
| "grad_norm": 0.46422043442726135, |
| "learning_rate": 2.7567567567567572e-05, |
| "loss": 0.0933, |
| "step": 350 |
| }, |
| { |
| "epoch": 14.4, |
| "grad_norm": 0.453189492225647, |
| "learning_rate": 2.2162162162162166e-05, |
| "loss": 0.0879, |
| "step": 360 |
| }, |
| { |
| "epoch": 14.8, |
| "grad_norm": 0.38964295387268066, |
| "learning_rate": 1.675675675675676e-05, |
| "loss": 0.091, |
| "step": 370 |
| }, |
| { |
| "epoch": 15.2, |
| "grad_norm": 0.4412716031074524, |
| "learning_rate": 1.1351351351351352e-05, |
| "loss": 0.0885, |
| "step": 380 |
| }, |
| { |
| "epoch": 15.6, |
| "grad_norm": 0.4414565861225128, |
| "learning_rate": 5.945945945945946e-06, |
| "loss": 0.0891, |
| "step": 390 |
| }, |
| { |
| "epoch": 16.0, |
| "grad_norm": 0.4880811870098114, |
| "learning_rate": 5.405405405405406e-07, |
| "loss": 0.087, |
| "step": 400 |
| } |
| ], |
| "logging_steps": 10, |
| "max_steps": 400, |
| "num_input_tokens_seen": 0, |
| "num_train_epochs": 16, |
| "save_steps": 500, |
| "stateful_callbacks": { |
| "TrainerControl": { |
| "args": { |
| "should_epoch_stop": false, |
| "should_evaluate": false, |
| "should_log": false, |
| "should_save": true, |
| "should_training_stop": true |
| }, |
| "attributes": {} |
| } |
| }, |
| "total_flos": 6.130612988907725e+16, |
| "train_batch_size": 1, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|