{ "best_global_step": null, "best_metric": null, "best_model_checkpoint": null, "epoch": 1.0, "eval_steps": 500, "global_step": 4467, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.02238638907544213, "grad_norm": 8.243054389953613, "learning_rate": 4.993942787771599e-06, "loss": 1.1762, "step": 100 }, { "epoch": 0.04477277815088426, "grad_norm": 13.294663429260254, "learning_rate": 4.975555846433033e-06, "loss": 1.1404, "step": 200 }, { "epoch": 0.0671591672263264, "grad_norm": 8.65041446685791, "learning_rate": 4.9449294649220665e-06, "loss": 1.0161, "step": 300 }, { "epoch": 0.08954555630176853, "grad_norm": 10.313958168029785, "learning_rate": 4.902215063682208e-06, "loss": 0.9855, "step": 400 }, { "epoch": 0.11193194537721066, "grad_norm": 10.495174407958984, "learning_rate": 4.847623827752661e-06, "loss": 0.9682, "step": 500 }, { "epoch": 0.1343183344526528, "grad_norm": 12.217084884643555, "learning_rate": 4.781425662644569e-06, "loss": 0.988, "step": 600 }, { "epoch": 0.15670472352809492, "grad_norm": 10.601492881774902, "learning_rate": 4.703947859896326e-06, "loss": 1.0177, "step": 700 }, { "epoch": 0.17909111260353705, "grad_norm": 7.260547637939453, "learning_rate": 4.615573478905602e-06, "loss": 0.9601, "step": 800 }, { "epoch": 0.20147750167897918, "grad_norm": 10.645676612854004, "learning_rate": 4.5167394530384775e-06, "loss": 0.9422, "step": 900 }, { "epoch": 0.2238638907544213, "grad_norm": 10.681941986083984, "learning_rate": 4.407934429379341e-06, "loss": 1.0196, "step": 1000 }, { "epoch": 0.24625027982986344, "grad_norm": 9.875200271606445, "learning_rate": 4.28969635280205e-06, "loss": 1.0191, "step": 1100 }, { "epoch": 0.2686366689053056, "grad_norm": 10.363883018493652, "learning_rate": 4.162609806307003e-06, "loss": 0.9449, "step": 1200 }, { "epoch": 0.2910230579807477, "grad_norm": 8.477913856506348, "learning_rate": 4.027303120773824e-06, "loss": 0.9039, "step": 1300 }, { "epoch": 0.31340944705618984, "grad_norm": 9.964315414428711, "learning_rate": 3.884445268419355e-06, "loss": 1.0146, "step": 1400 }, { "epoch": 0.33579583613163194, "grad_norm": 10.20909309387207, "learning_rate": 3.734742555320098e-06, "loss": 1.0387, "step": 1500 }, { "epoch": 0.3581822252070741, "grad_norm": 12.675057411193848, "learning_rate": 3.578935129351634e-06, "loss": 1.0002, "step": 1600 }, { "epoch": 0.3805686142825162, "grad_norm": 11.391951560974121, "learning_rate": 3.4177933208102103e-06, "loss": 0.9976, "step": 1700 }, { "epoch": 0.40295500335795836, "grad_norm": 10.763954162597656, "learning_rate": 3.2521138338088676e-06, "loss": 0.9535, "step": 1800 }, { "epoch": 0.42534139243340047, "grad_norm": 9.04262638092041, "learning_rate": 3.0827158072783113e-06, "loss": 1.0119, "step": 1900 }, { "epoch": 0.4477277815088426, "grad_norm": 7.757719993591309, "learning_rate": 2.9104367650473923e-06, "loss": 0.9353, "step": 2000 }, { "epoch": 0.47011417058428473, "grad_norm": 8.288910865783691, "learning_rate": 2.7361284750264927e-06, "loss": 1.0068, "step": 2100 }, { "epoch": 0.4925005596597269, "grad_norm": 10.480790138244629, "learning_rate": 2.5606527379664746e-06, "loss": 0.9621, "step": 2200 }, { "epoch": 0.514886948735169, "grad_norm": 12.541658401489258, "learning_rate": 2.384877126614103e-06, "loss": 0.9756, "step": 2300 }, { "epoch": 0.5372733378106112, "grad_norm": 13.887962341308594, "learning_rate": 2.20967069633002e-06, "loss": 0.9665, "step": 2400 }, { "epoch": 0.5596597268860533, "grad_norm": 11.765827178955078, "learning_rate": 2.035899688376515e-06, "loss": 0.9511, "step": 2500 }, { "epoch": 0.5820461159614954, "grad_norm": 11.959718704223633, "learning_rate": 1.8644232471185239e-06, "loss": 0.9713, "step": 2600 }, { "epoch": 0.6044325050369376, "grad_norm": 8.351862907409668, "learning_rate": 1.6960891723125235e-06, "loss": 1.0633, "step": 2700 }, { "epoch": 0.6268188941123797, "grad_norm": 7.508358478546143, "learning_rate": 1.5317297274845156e-06, "loss": 0.895, "step": 2800 }, { "epoch": 0.6492052831878218, "grad_norm": 12.4747314453125, "learning_rate": 1.372157525120959e-06, "loss": 0.9394, "step": 2900 }, { "epoch": 0.6715916722632639, "grad_norm": 12.205965042114258, "learning_rate": 1.2181615090167711e-06, "loss": 0.9485, "step": 3000 }, { "epoch": 0.6939780613387061, "grad_norm": 6.783346652984619, "learning_rate": 1.0705030536441147e-06, "loss": 0.8932, "step": 3100 }, { "epoch": 0.7163644504141482, "grad_norm": 9.786669731140137, "learning_rate": 9.299121998271918e-07, "loss": 0.9629, "step": 3200 }, { "epoch": 0.7387508394895903, "grad_norm": 8.05592155456543, "learning_rate": 7.970840453342679e-07, "loss": 0.9799, "step": 3300 }, { "epoch": 0.7611372285650324, "grad_norm": 11.015290260314941, "learning_rate": 6.726753082323087e-07, "loss": 0.8752, "step": 3400 }, { "epoch": 0.7835236176404746, "grad_norm": 8.78658390045166, "learning_rate": 5.573010799953652e-07, "loss": 0.8905, "step": 3500 }, { "epoch": 0.8059100067159167, "grad_norm": 11.667632102966309, "learning_rate": 4.515317844197653e-07, "loss": 0.9166, "step": 3600 }, { "epoch": 0.8282963957913588, "grad_norm": 8.403421401977539, "learning_rate": 3.5589035738156305e-07, "loss": 0.9322, "step": 3700 }, { "epoch": 0.8506827848668009, "grad_norm": 11.51596450805664, "learning_rate": 2.708496613798717e-07, "loss": 0.909, "step": 3800 }, { "epoch": 0.8730691739422431, "grad_norm": 6.4879045486450195, "learning_rate": 1.9683014764887682e-07, "loss": 0.9552, "step": 3900 }, { "epoch": 0.8954555630176853, "grad_norm": 11.149494171142578, "learning_rate": 1.3419777739733408e-07, "loss": 0.9242, "step": 4000 }, { "epoch": 0.9178419520931274, "grad_norm": 8.041584014892578, "learning_rate": 8.326221245317373e-08, "loss": 0.9278, "step": 4100 }, { "epoch": 0.9402283411685695, "grad_norm": 9.19898796081543, "learning_rate": 4.427528425888977e-08, "loss": 0.8939, "step": 4200 }, { "epoch": 0.9626147302440117, "grad_norm": 9.239889144897461, "learning_rate": 1.7429748787176626e-08, "loss": 0.9563, "step": 4300 }, { "epoch": 0.9850011193194538, "grad_norm": 6.8510565757751465, "learning_rate": 2.8583335326598516e-09, "loss": 0.961, "step": 4400 } ], "logging_steps": 100, "max_steps": 4467, "num_input_tokens_seen": 0, "num_train_epochs": 1, "save_steps": 100000, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 2.417341778362368e+16, "train_batch_size": 4, "trial_name": null, "trial_params": null }