| { |
| "best_global_step": null, |
| "best_metric": null, |
| "best_model_checkpoint": null, |
| "epoch": 3.0, |
| "eval_steps": 500, |
| "global_step": 3177, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 0.0944733112895607, |
| "grad_norm": 0.9754980802536011, |
| "learning_rate": 0.00019376770538243626, |
| "loss": 4.4691, |
| "step": 100 |
| }, |
| { |
| "epoch": 0.1889466225791214, |
| "grad_norm": 0.941046953201294, |
| "learning_rate": 0.00018747245829398804, |
| "loss": 4.2364, |
| "step": 200 |
| }, |
| { |
| "epoch": 0.2834199338686821, |
| "grad_norm": 1.0237231254577637, |
| "learning_rate": 0.00018117721120553982, |
| "loss": 4.15, |
| "step": 300 |
| }, |
| { |
| "epoch": 0.3778932451582428, |
| "grad_norm": 1.000512719154358, |
| "learning_rate": 0.0001748819641170916, |
| "loss": 3.9995, |
| "step": 400 |
| }, |
| { |
| "epoch": 0.4723665564478035, |
| "grad_norm": 0.8025191426277161, |
| "learning_rate": 0.00016858671702864338, |
| "loss": 4.0276, |
| "step": 500 |
| }, |
| { |
| "epoch": 0.5668398677373642, |
| "grad_norm": 1.0698238611221313, |
| "learning_rate": 0.00016229146994019514, |
| "loss": 3.9939, |
| "step": 600 |
| }, |
| { |
| "epoch": 0.6613131790269249, |
| "grad_norm": 8.519857406616211, |
| "learning_rate": 0.00015599622285174692, |
| "loss": 3.9232, |
| "step": 700 |
| }, |
| { |
| "epoch": 0.7557864903164856, |
| "grad_norm": 0.8686571717262268, |
| "learning_rate": 0.0001497009757632987, |
| "loss": 3.9484, |
| "step": 800 |
| }, |
| { |
| "epoch": 0.8502598016060463, |
| "grad_norm": 0.8734745383262634, |
| "learning_rate": 0.00014340572867485048, |
| "loss": 3.9375, |
| "step": 900 |
| }, |
| { |
| "epoch": 0.944733112895607, |
| "grad_norm": 2.162109851837158, |
| "learning_rate": 0.00013711048158640226, |
| "loss": 3.9377, |
| "step": 1000 |
| }, |
| { |
| "epoch": 1.0387340576287198, |
| "grad_norm": 1.0627549886703491, |
| "learning_rate": 0.00013081523449795404, |
| "loss": 3.8939, |
| "step": 1100 |
| }, |
| { |
| "epoch": 1.1332073689182807, |
| "grad_norm": 1.9133814573287964, |
| "learning_rate": 0.00012451998740950582, |
| "loss": 3.9357, |
| "step": 1200 |
| }, |
| { |
| "epoch": 1.2276806802078413, |
| "grad_norm": 1.2283552885055542, |
| "learning_rate": 0.00011822474032105762, |
| "loss": 3.8598, |
| "step": 1300 |
| }, |
| { |
| "epoch": 1.322153991497402, |
| "grad_norm": 0.7728371620178223, |
| "learning_rate": 0.0001119294932326094, |
| "loss": 3.9038, |
| "step": 1400 |
| }, |
| { |
| "epoch": 1.4166273027869627, |
| "grad_norm": 1.8633266687393188, |
| "learning_rate": 0.00010563424614416115, |
| "loss": 3.8733, |
| "step": 1500 |
| }, |
| { |
| "epoch": 1.5111006140765233, |
| "grad_norm": 1.6378132104873657, |
| "learning_rate": 9.933899905571294e-05, |
| "loss": 3.8717, |
| "step": 1600 |
| }, |
| { |
| "epoch": 1.6055739253660841, |
| "grad_norm": 1.4249569177627563, |
| "learning_rate": 9.304375196726472e-05, |
| "loss": 3.8242, |
| "step": 1700 |
| }, |
| { |
| "epoch": 1.700047236655645, |
| "grad_norm": 0.8903579711914062, |
| "learning_rate": 8.674850487881649e-05, |
| "loss": 3.8954, |
| "step": 1800 |
| }, |
| { |
| "epoch": 1.7945205479452055, |
| "grad_norm": 1.4902220964431763, |
| "learning_rate": 8.045325779036827e-05, |
| "loss": 3.9158, |
| "step": 1900 |
| }, |
| { |
| "epoch": 1.8889938592347661, |
| "grad_norm": 1.4620416164398193, |
| "learning_rate": 7.415801070192005e-05, |
| "loss": 3.9019, |
| "step": 2000 |
| }, |
| { |
| "epoch": 1.9834671705243268, |
| "grad_norm": 2.231694221496582, |
| "learning_rate": 6.786276361347183e-05, |
| "loss": 3.864, |
| "step": 2100 |
| }, |
| { |
| "epoch": 2.0774681152574397, |
| "grad_norm": 1.02834153175354, |
| "learning_rate": 6.15675165250236e-05, |
| "loss": 3.8216, |
| "step": 2200 |
| }, |
| { |
| "epoch": 2.1719414265470003, |
| "grad_norm": 1.1353607177734375, |
| "learning_rate": 5.527226943657539e-05, |
| "loss": 3.7749, |
| "step": 2300 |
| }, |
| { |
| "epoch": 2.2664147378365613, |
| "grad_norm": 1.426005482673645, |
| "learning_rate": 4.897702234812717e-05, |
| "loss": 3.8876, |
| "step": 2400 |
| }, |
| { |
| "epoch": 2.360888049126122, |
| "grad_norm": 1.3677780628204346, |
| "learning_rate": 4.268177525967895e-05, |
| "loss": 3.7192, |
| "step": 2500 |
| }, |
| { |
| "epoch": 2.4553613604156825, |
| "grad_norm": 0.8309673070907593, |
| "learning_rate": 3.6386528171230724e-05, |
| "loss": 3.8808, |
| "step": 2600 |
| }, |
| { |
| "epoch": 2.549834671705243, |
| "grad_norm": 1.9310344457626343, |
| "learning_rate": 3.00912810827825e-05, |
| "loss": 3.8534, |
| "step": 2700 |
| }, |
| { |
| "epoch": 2.644307982994804, |
| "grad_norm": 1.9357428550720215, |
| "learning_rate": 2.3796033994334278e-05, |
| "loss": 3.8101, |
| "step": 2800 |
| }, |
| { |
| "epoch": 2.738781294284365, |
| "grad_norm": 0.9809176325798035, |
| "learning_rate": 1.750078690588606e-05, |
| "loss": 3.8252, |
| "step": 2900 |
| }, |
| { |
| "epoch": 2.8332546055739254, |
| "grad_norm": 1.5673308372497559, |
| "learning_rate": 1.1205539817437834e-05, |
| "loss": 3.851, |
| "step": 3000 |
| }, |
| { |
| "epoch": 2.927727916863486, |
| "grad_norm": 1.6177445650100708, |
| "learning_rate": 4.910292728989612e-06, |
| "loss": 3.7975, |
| "step": 3100 |
| } |
| ], |
| "logging_steps": 100, |
| "max_steps": 3177, |
| "num_input_tokens_seen": 0, |
| "num_train_epochs": 3, |
| "save_steps": 500, |
| "stateful_callbacks": { |
| "TrainerControl": { |
| "args": { |
| "should_epoch_stop": false, |
| "should_evaluate": false, |
| "should_log": false, |
| "should_save": true, |
| "should_training_stop": true |
| }, |
| "attributes": {} |
| } |
| }, |
| "total_flos": 6660091440267264.0, |
| "train_batch_size": 4, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|