{ "best_global_step": null, "best_metric": null, "best_model_checkpoint": null, "epoch": 2.8332546055739254, "eval_steps": 500, "global_step": 3000, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.0944733112895607, "grad_norm": 0.9754980802536011, "learning_rate": 0.00019376770538243626, "loss": 4.4691, "step": 100 }, { "epoch": 0.1889466225791214, "grad_norm": 0.941046953201294, "learning_rate": 0.00018747245829398804, "loss": 4.2364, "step": 200 }, { "epoch": 0.2834199338686821, "grad_norm": 1.0237231254577637, "learning_rate": 0.00018117721120553982, "loss": 4.15, "step": 300 }, { "epoch": 0.3778932451582428, "grad_norm": 1.000512719154358, "learning_rate": 0.0001748819641170916, "loss": 3.9995, "step": 400 }, { "epoch": 0.4723665564478035, "grad_norm": 0.8025191426277161, "learning_rate": 0.00016858671702864338, "loss": 4.0276, "step": 500 }, { "epoch": 0.5668398677373642, "grad_norm": 1.0698238611221313, "learning_rate": 0.00016229146994019514, "loss": 3.9939, "step": 600 }, { "epoch": 0.6613131790269249, "grad_norm": 8.519857406616211, "learning_rate": 0.00015599622285174692, "loss": 3.9232, "step": 700 }, { "epoch": 0.7557864903164856, "grad_norm": 0.8686571717262268, "learning_rate": 0.0001497009757632987, "loss": 3.9484, "step": 800 }, { "epoch": 0.8502598016060463, "grad_norm": 0.8734745383262634, "learning_rate": 0.00014340572867485048, "loss": 3.9375, "step": 900 }, { "epoch": 0.944733112895607, "grad_norm": 2.162109851837158, "learning_rate": 0.00013711048158640226, "loss": 3.9377, "step": 1000 }, { "epoch": 1.0387340576287198, "grad_norm": 1.0627549886703491, "learning_rate": 0.00013081523449795404, "loss": 3.8939, "step": 1100 }, { "epoch": 1.1332073689182807, "grad_norm": 1.9133814573287964, "learning_rate": 0.00012451998740950582, "loss": 3.9357, "step": 1200 }, { "epoch": 1.2276806802078413, "grad_norm": 1.2283552885055542, "learning_rate": 0.00011822474032105762, "loss": 3.8598, "step": 1300 }, { "epoch": 1.322153991497402, "grad_norm": 0.7728371620178223, "learning_rate": 0.0001119294932326094, "loss": 3.9038, "step": 1400 }, { "epoch": 1.4166273027869627, "grad_norm": 1.8633266687393188, "learning_rate": 0.00010563424614416115, "loss": 3.8733, "step": 1500 }, { "epoch": 1.5111006140765233, "grad_norm": 1.6378132104873657, "learning_rate": 9.933899905571294e-05, "loss": 3.8717, "step": 1600 }, { "epoch": 1.6055739253660841, "grad_norm": 1.4249569177627563, "learning_rate": 9.304375196726472e-05, "loss": 3.8242, "step": 1700 }, { "epoch": 1.700047236655645, "grad_norm": 0.8903579711914062, "learning_rate": 8.674850487881649e-05, "loss": 3.8954, "step": 1800 }, { "epoch": 1.7945205479452055, "grad_norm": 1.4902220964431763, "learning_rate": 8.045325779036827e-05, "loss": 3.9158, "step": 1900 }, { "epoch": 1.8889938592347661, "grad_norm": 1.4620416164398193, "learning_rate": 7.415801070192005e-05, "loss": 3.9019, "step": 2000 }, { "epoch": 1.9834671705243268, "grad_norm": 2.231694221496582, "learning_rate": 6.786276361347183e-05, "loss": 3.864, "step": 2100 }, { "epoch": 2.0774681152574397, "grad_norm": 1.02834153175354, "learning_rate": 6.15675165250236e-05, "loss": 3.8216, "step": 2200 }, { "epoch": 2.1719414265470003, "grad_norm": 1.1353607177734375, "learning_rate": 5.527226943657539e-05, "loss": 3.7749, "step": 2300 }, { "epoch": 2.2664147378365613, "grad_norm": 1.426005482673645, "learning_rate": 4.897702234812717e-05, "loss": 3.8876, "step": 2400 }, { "epoch": 2.360888049126122, "grad_norm": 1.3677780628204346, "learning_rate": 4.268177525967895e-05, "loss": 3.7192, "step": 2500 }, { "epoch": 2.4553613604156825, "grad_norm": 0.8309673070907593, "learning_rate": 3.6386528171230724e-05, "loss": 3.8808, "step": 2600 }, { "epoch": 2.549834671705243, "grad_norm": 1.9310344457626343, "learning_rate": 3.00912810827825e-05, "loss": 3.8534, "step": 2700 }, { "epoch": 2.644307982994804, "grad_norm": 1.9357428550720215, "learning_rate": 2.3796033994334278e-05, "loss": 3.8101, "step": 2800 }, { "epoch": 2.738781294284365, "grad_norm": 0.9809176325798035, "learning_rate": 1.750078690588606e-05, "loss": 3.8252, "step": 2900 }, { "epoch": 2.8332546055739254, "grad_norm": 1.5673308372497559, "learning_rate": 1.1205539817437834e-05, "loss": 3.851, "step": 3000 } ], "logging_steps": 100, "max_steps": 3177, "num_input_tokens_seen": 0, "num_train_epochs": 3, "save_steps": 500, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": false }, "attributes": {} } }, "total_flos": 6290130059919360.0, "train_batch_size": 4, "trial_name": null, "trial_params": null }