| { | |
| "best_global_step": null, | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 2.8332546055739254, | |
| "eval_steps": 500, | |
| "global_step": 3000, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.0944733112895607, | |
| "grad_norm": 0.9754980802536011, | |
| "learning_rate": 0.00019376770538243626, | |
| "loss": 4.4691, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 0.1889466225791214, | |
| "grad_norm": 0.941046953201294, | |
| "learning_rate": 0.00018747245829398804, | |
| "loss": 4.2364, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 0.2834199338686821, | |
| "grad_norm": 1.0237231254577637, | |
| "learning_rate": 0.00018117721120553982, | |
| "loss": 4.15, | |
| "step": 300 | |
| }, | |
| { | |
| "epoch": 0.3778932451582428, | |
| "grad_norm": 1.000512719154358, | |
| "learning_rate": 0.0001748819641170916, | |
| "loss": 3.9995, | |
| "step": 400 | |
| }, | |
| { | |
| "epoch": 0.4723665564478035, | |
| "grad_norm": 0.8025191426277161, | |
| "learning_rate": 0.00016858671702864338, | |
| "loss": 4.0276, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 0.5668398677373642, | |
| "grad_norm": 1.0698238611221313, | |
| "learning_rate": 0.00016229146994019514, | |
| "loss": 3.9939, | |
| "step": 600 | |
| }, | |
| { | |
| "epoch": 0.6613131790269249, | |
| "grad_norm": 8.519857406616211, | |
| "learning_rate": 0.00015599622285174692, | |
| "loss": 3.9232, | |
| "step": 700 | |
| }, | |
| { | |
| "epoch": 0.7557864903164856, | |
| "grad_norm": 0.8686571717262268, | |
| "learning_rate": 0.0001497009757632987, | |
| "loss": 3.9484, | |
| "step": 800 | |
| }, | |
| { | |
| "epoch": 0.8502598016060463, | |
| "grad_norm": 0.8734745383262634, | |
| "learning_rate": 0.00014340572867485048, | |
| "loss": 3.9375, | |
| "step": 900 | |
| }, | |
| { | |
| "epoch": 0.944733112895607, | |
| "grad_norm": 2.162109851837158, | |
| "learning_rate": 0.00013711048158640226, | |
| "loss": 3.9377, | |
| "step": 1000 | |
| }, | |
| { | |
| "epoch": 1.0387340576287198, | |
| "grad_norm": 1.0627549886703491, | |
| "learning_rate": 0.00013081523449795404, | |
| "loss": 3.8939, | |
| "step": 1100 | |
| }, | |
| { | |
| "epoch": 1.1332073689182807, | |
| "grad_norm": 1.9133814573287964, | |
| "learning_rate": 0.00012451998740950582, | |
| "loss": 3.9357, | |
| "step": 1200 | |
| }, | |
| { | |
| "epoch": 1.2276806802078413, | |
| "grad_norm": 1.2283552885055542, | |
| "learning_rate": 0.00011822474032105762, | |
| "loss": 3.8598, | |
| "step": 1300 | |
| }, | |
| { | |
| "epoch": 1.322153991497402, | |
| "grad_norm": 0.7728371620178223, | |
| "learning_rate": 0.0001119294932326094, | |
| "loss": 3.9038, | |
| "step": 1400 | |
| }, | |
| { | |
| "epoch": 1.4166273027869627, | |
| "grad_norm": 1.8633266687393188, | |
| "learning_rate": 0.00010563424614416115, | |
| "loss": 3.8733, | |
| "step": 1500 | |
| }, | |
| { | |
| "epoch": 1.5111006140765233, | |
| "grad_norm": 1.6378132104873657, | |
| "learning_rate": 9.933899905571294e-05, | |
| "loss": 3.8717, | |
| "step": 1600 | |
| }, | |
| { | |
| "epoch": 1.6055739253660841, | |
| "grad_norm": 1.4249569177627563, | |
| "learning_rate": 9.304375196726472e-05, | |
| "loss": 3.8242, | |
| "step": 1700 | |
| }, | |
| { | |
| "epoch": 1.700047236655645, | |
| "grad_norm": 0.8903579711914062, | |
| "learning_rate": 8.674850487881649e-05, | |
| "loss": 3.8954, | |
| "step": 1800 | |
| }, | |
| { | |
| "epoch": 1.7945205479452055, | |
| "grad_norm": 1.4902220964431763, | |
| "learning_rate": 8.045325779036827e-05, | |
| "loss": 3.9158, | |
| "step": 1900 | |
| }, | |
| { | |
| "epoch": 1.8889938592347661, | |
| "grad_norm": 1.4620416164398193, | |
| "learning_rate": 7.415801070192005e-05, | |
| "loss": 3.9019, | |
| "step": 2000 | |
| }, | |
| { | |
| "epoch": 1.9834671705243268, | |
| "grad_norm": 2.231694221496582, | |
| "learning_rate": 6.786276361347183e-05, | |
| "loss": 3.864, | |
| "step": 2100 | |
| }, | |
| { | |
| "epoch": 2.0774681152574397, | |
| "grad_norm": 1.02834153175354, | |
| "learning_rate": 6.15675165250236e-05, | |
| "loss": 3.8216, | |
| "step": 2200 | |
| }, | |
| { | |
| "epoch": 2.1719414265470003, | |
| "grad_norm": 1.1353607177734375, | |
| "learning_rate": 5.527226943657539e-05, | |
| "loss": 3.7749, | |
| "step": 2300 | |
| }, | |
| { | |
| "epoch": 2.2664147378365613, | |
| "grad_norm": 1.426005482673645, | |
| "learning_rate": 4.897702234812717e-05, | |
| "loss": 3.8876, | |
| "step": 2400 | |
| }, | |
| { | |
| "epoch": 2.360888049126122, | |
| "grad_norm": 1.3677780628204346, | |
| "learning_rate": 4.268177525967895e-05, | |
| "loss": 3.7192, | |
| "step": 2500 | |
| }, | |
| { | |
| "epoch": 2.4553613604156825, | |
| "grad_norm": 0.8309673070907593, | |
| "learning_rate": 3.6386528171230724e-05, | |
| "loss": 3.8808, | |
| "step": 2600 | |
| }, | |
| { | |
| "epoch": 2.549834671705243, | |
| "grad_norm": 1.9310344457626343, | |
| "learning_rate": 3.00912810827825e-05, | |
| "loss": 3.8534, | |
| "step": 2700 | |
| }, | |
| { | |
| "epoch": 2.644307982994804, | |
| "grad_norm": 1.9357428550720215, | |
| "learning_rate": 2.3796033994334278e-05, | |
| "loss": 3.8101, | |
| "step": 2800 | |
| }, | |
| { | |
| "epoch": 2.738781294284365, | |
| "grad_norm": 0.9809176325798035, | |
| "learning_rate": 1.750078690588606e-05, | |
| "loss": 3.8252, | |
| "step": 2900 | |
| }, | |
| { | |
| "epoch": 2.8332546055739254, | |
| "grad_norm": 1.5673308372497559, | |
| "learning_rate": 1.1205539817437834e-05, | |
| "loss": 3.851, | |
| "step": 3000 | |
| } | |
| ], | |
| "logging_steps": 100, | |
| "max_steps": 3177, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 3, | |
| "save_steps": 500, | |
| "stateful_callbacks": { | |
| "TrainerControl": { | |
| "args": { | |
| "should_epoch_stop": false, | |
| "should_evaluate": false, | |
| "should_log": false, | |
| "should_save": true, | |
| "should_training_stop": false | |
| }, | |
| "attributes": {} | |
| } | |
| }, | |
| "total_flos": 6290130059919360.0, | |
| "train_batch_size": 4, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |