| { |
| "best_global_step": null, |
| "best_metric": null, |
| "best_model_checkpoint": null, |
| "epoch": 1.0, |
| "eval_steps": 500, |
| "global_step": 4467, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 0.02238638907544213, |
| "grad_norm": 8.243054389953613, |
| "learning_rate": 4.993942787771599e-06, |
| "loss": 1.1762, |
| "step": 100 |
| }, |
| { |
| "epoch": 0.04477277815088426, |
| "grad_norm": 13.294663429260254, |
| "learning_rate": 4.975555846433033e-06, |
| "loss": 1.1404, |
| "step": 200 |
| }, |
| { |
| "epoch": 0.0671591672263264, |
| "grad_norm": 8.65041446685791, |
| "learning_rate": 4.9449294649220665e-06, |
| "loss": 1.0161, |
| "step": 300 |
| }, |
| { |
| "epoch": 0.08954555630176853, |
| "grad_norm": 10.313958168029785, |
| "learning_rate": 4.902215063682208e-06, |
| "loss": 0.9855, |
| "step": 400 |
| }, |
| { |
| "epoch": 0.11193194537721066, |
| "grad_norm": 10.495174407958984, |
| "learning_rate": 4.847623827752661e-06, |
| "loss": 0.9682, |
| "step": 500 |
| }, |
| { |
| "epoch": 0.1343183344526528, |
| "grad_norm": 12.217084884643555, |
| "learning_rate": 4.781425662644569e-06, |
| "loss": 0.988, |
| "step": 600 |
| }, |
| { |
| "epoch": 0.15670472352809492, |
| "grad_norm": 10.601492881774902, |
| "learning_rate": 4.703947859896326e-06, |
| "loss": 1.0177, |
| "step": 700 |
| }, |
| { |
| "epoch": 0.17909111260353705, |
| "grad_norm": 7.260547637939453, |
| "learning_rate": 4.615573478905602e-06, |
| "loss": 0.9601, |
| "step": 800 |
| }, |
| { |
| "epoch": 0.20147750167897918, |
| "grad_norm": 10.645676612854004, |
| "learning_rate": 4.5167394530384775e-06, |
| "loss": 0.9422, |
| "step": 900 |
| }, |
| { |
| "epoch": 0.2238638907544213, |
| "grad_norm": 10.681941986083984, |
| "learning_rate": 4.407934429379341e-06, |
| "loss": 1.0196, |
| "step": 1000 |
| }, |
| { |
| "epoch": 0.24625027982986344, |
| "grad_norm": 9.875200271606445, |
| "learning_rate": 4.28969635280205e-06, |
| "loss": 1.0191, |
| "step": 1100 |
| }, |
| { |
| "epoch": 0.2686366689053056, |
| "grad_norm": 10.363883018493652, |
| "learning_rate": 4.162609806307003e-06, |
| "loss": 0.9449, |
| "step": 1200 |
| }, |
| { |
| "epoch": 0.2910230579807477, |
| "grad_norm": 8.477913856506348, |
| "learning_rate": 4.027303120773824e-06, |
| "loss": 0.9039, |
| "step": 1300 |
| }, |
| { |
| "epoch": 0.31340944705618984, |
| "grad_norm": 9.964315414428711, |
| "learning_rate": 3.884445268419355e-06, |
| "loss": 1.0146, |
| "step": 1400 |
| }, |
| { |
| "epoch": 0.33579583613163194, |
| "grad_norm": 10.20909309387207, |
| "learning_rate": 3.734742555320098e-06, |
| "loss": 1.0387, |
| "step": 1500 |
| }, |
| { |
| "epoch": 0.3581822252070741, |
| "grad_norm": 12.675057411193848, |
| "learning_rate": 3.578935129351634e-06, |
| "loss": 1.0002, |
| "step": 1600 |
| }, |
| { |
| "epoch": 0.3805686142825162, |
| "grad_norm": 11.391951560974121, |
| "learning_rate": 3.4177933208102103e-06, |
| "loss": 0.9976, |
| "step": 1700 |
| }, |
| { |
| "epoch": 0.40295500335795836, |
| "grad_norm": 10.763954162597656, |
| "learning_rate": 3.2521138338088676e-06, |
| "loss": 0.9535, |
| "step": 1800 |
| }, |
| { |
| "epoch": 0.42534139243340047, |
| "grad_norm": 9.04262638092041, |
| "learning_rate": 3.0827158072783113e-06, |
| "loss": 1.0119, |
| "step": 1900 |
| }, |
| { |
| "epoch": 0.4477277815088426, |
| "grad_norm": 7.757719993591309, |
| "learning_rate": 2.9104367650473923e-06, |
| "loss": 0.9353, |
| "step": 2000 |
| }, |
| { |
| "epoch": 0.47011417058428473, |
| "grad_norm": 8.288910865783691, |
| "learning_rate": 2.7361284750264927e-06, |
| "loss": 1.0068, |
| "step": 2100 |
| }, |
| { |
| "epoch": 0.4925005596597269, |
| "grad_norm": 10.480790138244629, |
| "learning_rate": 2.5606527379664746e-06, |
| "loss": 0.9621, |
| "step": 2200 |
| }, |
| { |
| "epoch": 0.514886948735169, |
| "grad_norm": 12.541658401489258, |
| "learning_rate": 2.384877126614103e-06, |
| "loss": 0.9756, |
| "step": 2300 |
| }, |
| { |
| "epoch": 0.5372733378106112, |
| "grad_norm": 13.887962341308594, |
| "learning_rate": 2.20967069633002e-06, |
| "loss": 0.9665, |
| "step": 2400 |
| }, |
| { |
| "epoch": 0.5596597268860533, |
| "grad_norm": 11.765827178955078, |
| "learning_rate": 2.035899688376515e-06, |
| "loss": 0.9511, |
| "step": 2500 |
| }, |
| { |
| "epoch": 0.5820461159614954, |
| "grad_norm": 11.959718704223633, |
| "learning_rate": 1.8644232471185239e-06, |
| "loss": 0.9713, |
| "step": 2600 |
| }, |
| { |
| "epoch": 0.6044325050369376, |
| "grad_norm": 8.351862907409668, |
| "learning_rate": 1.6960891723125235e-06, |
| "loss": 1.0633, |
| "step": 2700 |
| }, |
| { |
| "epoch": 0.6268188941123797, |
| "grad_norm": 7.508358478546143, |
| "learning_rate": 1.5317297274845156e-06, |
| "loss": 0.895, |
| "step": 2800 |
| }, |
| { |
| "epoch": 0.6492052831878218, |
| "grad_norm": 12.4747314453125, |
| "learning_rate": 1.372157525120959e-06, |
| "loss": 0.9394, |
| "step": 2900 |
| }, |
| { |
| "epoch": 0.6715916722632639, |
| "grad_norm": 12.205965042114258, |
| "learning_rate": 1.2181615090167711e-06, |
| "loss": 0.9485, |
| "step": 3000 |
| }, |
| { |
| "epoch": 0.6939780613387061, |
| "grad_norm": 6.783346652984619, |
| "learning_rate": 1.0705030536441147e-06, |
| "loss": 0.8932, |
| "step": 3100 |
| }, |
| { |
| "epoch": 0.7163644504141482, |
| "grad_norm": 9.786669731140137, |
| "learning_rate": 9.299121998271918e-07, |
| "loss": 0.9629, |
| "step": 3200 |
| }, |
| { |
| "epoch": 0.7387508394895903, |
| "grad_norm": 8.05592155456543, |
| "learning_rate": 7.970840453342679e-07, |
| "loss": 0.9799, |
| "step": 3300 |
| }, |
| { |
| "epoch": 0.7611372285650324, |
| "grad_norm": 11.015290260314941, |
| "learning_rate": 6.726753082323087e-07, |
| "loss": 0.8752, |
| "step": 3400 |
| }, |
| { |
| "epoch": 0.7835236176404746, |
| "grad_norm": 8.78658390045166, |
| "learning_rate": 5.573010799953652e-07, |
| "loss": 0.8905, |
| "step": 3500 |
| }, |
| { |
| "epoch": 0.8059100067159167, |
| "grad_norm": 11.667632102966309, |
| "learning_rate": 4.515317844197653e-07, |
| "loss": 0.9166, |
| "step": 3600 |
| }, |
| { |
| "epoch": 0.8282963957913588, |
| "grad_norm": 8.403421401977539, |
| "learning_rate": 3.5589035738156305e-07, |
| "loss": 0.9322, |
| "step": 3700 |
| }, |
| { |
| "epoch": 0.8506827848668009, |
| "grad_norm": 11.51596450805664, |
| "learning_rate": 2.708496613798717e-07, |
| "loss": 0.909, |
| "step": 3800 |
| }, |
| { |
| "epoch": 0.8730691739422431, |
| "grad_norm": 6.4879045486450195, |
| "learning_rate": 1.9683014764887682e-07, |
| "loss": 0.9552, |
| "step": 3900 |
| }, |
| { |
| "epoch": 0.8954555630176853, |
| "grad_norm": 11.149494171142578, |
| "learning_rate": 1.3419777739733408e-07, |
| "loss": 0.9242, |
| "step": 4000 |
| }, |
| { |
| "epoch": 0.9178419520931274, |
| "grad_norm": 8.041584014892578, |
| "learning_rate": 8.326221245317373e-08, |
| "loss": 0.9278, |
| "step": 4100 |
| }, |
| { |
| "epoch": 0.9402283411685695, |
| "grad_norm": 9.19898796081543, |
| "learning_rate": 4.427528425888977e-08, |
| "loss": 0.8939, |
| "step": 4200 |
| }, |
| { |
| "epoch": 0.9626147302440117, |
| "grad_norm": 9.239889144897461, |
| "learning_rate": 1.7429748787176626e-08, |
| "loss": 0.9563, |
| "step": 4300 |
| }, |
| { |
| "epoch": 0.9850011193194538, |
| "grad_norm": 6.8510565757751465, |
| "learning_rate": 2.8583335326598516e-09, |
| "loss": 0.961, |
| "step": 4400 |
| } |
| ], |
| "logging_steps": 100, |
| "max_steps": 4467, |
| "num_input_tokens_seen": 0, |
| "num_train_epochs": 1, |
| "save_steps": 100000, |
| "stateful_callbacks": { |
| "TrainerControl": { |
| "args": { |
| "should_epoch_stop": false, |
| "should_evaluate": false, |
| "should_log": false, |
| "should_save": true, |
| "should_training_stop": true |
| }, |
| "attributes": {} |
| } |
| }, |
| "total_flos": 2.417341778362368e+16, |
| "train_batch_size": 4, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|