{ "best_metric": null, "best_model_checkpoint": null, "epoch": 4.96, "eval_steps": 500, "global_step": 310, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.16, "grad_norm": 23.102539576958876, "learning_rate": 1.0752688172043011e-06, "loss": 2.3229, "step": 10 }, { "epoch": 0.32, "grad_norm": 7.585603806935928, "learning_rate": 2.1505376344086023e-06, "loss": 1.8243, "step": 20 }, { "epoch": 0.48, "grad_norm": 2.2963511032502937, "learning_rate": 3.225806451612903e-06, "loss": 1.0535, "step": 30 }, { "epoch": 0.64, "grad_norm": 1.0735883950229164, "learning_rate": 4.3010752688172045e-06, "loss": 0.6957, "step": 40 }, { "epoch": 0.8, "grad_norm": 1.0448299606211808, "learning_rate": 5.376344086021506e-06, "loss": 0.5694, "step": 50 }, { "epoch": 0.96, "grad_norm": 0.6110446099263456, "learning_rate": 6.451612903225806e-06, "loss": 0.5119, "step": 60 }, { "epoch": 1.12, "grad_norm": 0.5939516414788868, "learning_rate": 7.526881720430108e-06, "loss": 0.4705, "step": 70 }, { "epoch": 1.28, "grad_norm": 0.5407169140835424, "learning_rate": 8.602150537634409e-06, "loss": 0.3921, "step": 80 }, { "epoch": 1.44, "grad_norm": 0.4210892519038007, "learning_rate": 9.67741935483871e-06, "loss": 0.3977, "step": 90 }, { "epoch": 1.6, "grad_norm": 0.3772090447904717, "learning_rate": 9.998274321315453e-06, "loss": 0.3705, "step": 100 }, { "epoch": 1.76, "grad_norm": 0.6336085876305326, "learning_rate": 9.989824885009142e-06, "loss": 0.3523, "step": 110 }, { "epoch": 1.92, "grad_norm": 0.4337054073917208, "learning_rate": 9.974346616959476e-06, "loss": 0.3577, "step": 120 }, { "epoch": 2.08, "grad_norm": 0.3772473620821422, "learning_rate": 9.951861320364822e-06, "loss": 0.3559, "step": 130 }, { "epoch": 2.24, "grad_norm": 0.39779187628182006, "learning_rate": 9.922400668754833e-06, "loss": 0.2897, "step": 140 }, { "epoch": 2.4, "grad_norm": 0.38309322843856186, "learning_rate": 9.88600616137407e-06, "loss": 0.2848, "step": 150 }, { "epoch": 2.56, "grad_norm": 0.5417645489744292, "learning_rate": 9.8427290647248e-06, "loss": 0.2789, "step": 160 }, { "epoch": 2.7199999999999998, "grad_norm": 0.4069370179701877, "learning_rate": 9.792630340351301e-06, "loss": 0.2634, "step": 170 }, { "epoch": 2.88, "grad_norm": 0.38899969177115107, "learning_rate": 9.735780558967434e-06, "loss": 0.2697, "step": 180 }, { "epoch": 3.04, "grad_norm": 0.41142994330371174, "learning_rate": 9.67225980104841e-06, "loss": 0.2718, "step": 190 }, { "epoch": 3.2, "grad_norm": 0.4601501476242835, "learning_rate": 9.602157544026785e-06, "loss": 0.184, "step": 200 }, { "epoch": 3.36, "grad_norm": 0.4047321863260235, "learning_rate": 9.525572536251608e-06, "loss": 0.1853, "step": 210 }, { "epoch": 3.52, "grad_norm": 0.504432502658599, "learning_rate": 9.442612657888237e-06, "loss": 0.1781, "step": 220 }, { "epoch": 3.68, "grad_norm": 0.5750425579505938, "learning_rate": 9.353394768954791e-06, "loss": 0.191, "step": 230 }, { "epoch": 3.84, "grad_norm": 0.3988291661235405, "learning_rate": 9.258044544709276e-06, "loss": 0.1764, "step": 240 }, { "epoch": 4.0, "grad_norm": 0.7118375771625418, "learning_rate": 9.156696298619266e-06, "loss": 0.1956, "step": 250 }, { "epoch": 4.16, "grad_norm": 0.47011665809938036, "learning_rate": 9.049492793163539e-06, "loss": 0.1034, "step": 260 }, { "epoch": 4.32, "grad_norm": 0.42719370359955, "learning_rate": 8.936585038732143e-06, "loss": 0.0999, "step": 270 }, { "epoch": 4.48, "grad_norm": 0.4505633813264433, "learning_rate": 8.818132080908178e-06, "loss": 0.0989, "step": 280 }, { "epoch": 4.64, "grad_norm": 0.3870120698399163, "learning_rate": 8.694300776430958e-06, "loss": 0.0957, "step": 290 }, { "epoch": 4.8, "grad_norm": 0.43164855953731046, "learning_rate": 8.565265558156101e-06, "loss": 0.0985, "step": 300 }, { "epoch": 4.96, "grad_norm": 0.44351115954290926, "learning_rate": 8.43120818934367e-06, "loss": 0.09, "step": 310 } ], "logging_steps": 10, "max_steps": 930, "num_input_tokens_seen": 0, "num_train_epochs": 15, "save_steps": 310, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": false }, "attributes": {} } }, "total_flos": 55946563551232.0, "train_batch_size": 1, "trial_name": null, "trial_params": null }