{ "best_global_step": null, "best_metric": null, "best_model_checkpoint": null, "epoch": 7.0, "eval_steps": 500, "global_step": 3500, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.2, "grad_norm": 21.81972312927246, "learning_rate": 4.8825e-05, "loss": 46.8162, "step": 100 }, { "epoch": 0.4, "grad_norm": 19.624361038208008, "learning_rate": 4.7575000000000004e-05, "loss": 35.0983, "step": 200 }, { "epoch": 0.6, "grad_norm": 17.971105575561523, "learning_rate": 4.63375e-05, "loss": 33.9533, "step": 300 }, { "epoch": 0.8, "grad_norm": 16.36736297607422, "learning_rate": 4.5087500000000005e-05, "loss": 33.1973, "step": 400 }, { "epoch": 1.0, "grad_norm": 20.30103874206543, "learning_rate": 4.38375e-05, "loss": 32.5962, "step": 500 }, { "epoch": 1.2, "grad_norm": 17.021833419799805, "learning_rate": 4.2587500000000005e-05, "loss": 31.0238, "step": 600 }, { "epoch": 1.4, "grad_norm": 18.680706024169922, "learning_rate": 4.13375e-05, "loss": 30.8848, "step": 700 }, { "epoch": 1.6, "grad_norm": 17.90163230895996, "learning_rate": 4.00875e-05, "loss": 30.7304, "step": 800 }, { "epoch": 1.8, "grad_norm": 16.27179527282715, "learning_rate": 3.88375e-05, "loss": 30.6612, "step": 900 }, { "epoch": 2.0, "grad_norm": 17.097187042236328, "learning_rate": 3.7587500000000006e-05, "loss": 30.476, "step": 1000 }, { "epoch": 2.2, "grad_norm": 17.85845184326172, "learning_rate": 3.63375e-05, "loss": 29.0566, "step": 1100 }, { "epoch": 2.4, "grad_norm": 20.759204864501953, "learning_rate": 3.50875e-05, "loss": 29.1165, "step": 1200 }, { "epoch": 2.6, "grad_norm": 16.447481155395508, "learning_rate": 3.38375e-05, "loss": 29.0477, "step": 1300 }, { "epoch": 2.8, "grad_norm": 16.511810302734375, "learning_rate": 3.2587500000000006e-05, "loss": 29.0484, "step": 1400 }, { "epoch": 3.0, "grad_norm": 15.86971378326416, "learning_rate": 3.13375e-05, "loss": 29.0645, "step": 1500 }, { "epoch": 3.2, "grad_norm": 17.456417083740234, "learning_rate": 3.0087500000000003e-05, "loss": 27.8042, "step": 1600 }, { "epoch": 3.4, "grad_norm": 18.249238967895508, "learning_rate": 2.88375e-05, "loss": 27.8263, "step": 1700 }, { "epoch": 3.6, "grad_norm": 16.873659133911133, "learning_rate": 2.75875e-05, "loss": 27.8207, "step": 1800 }, { "epoch": 3.8, "grad_norm": 18.681793212890625, "learning_rate": 2.6337500000000003e-05, "loss": 27.8755, "step": 1900 }, { "epoch": 4.0, "grad_norm": 16.73720359802246, "learning_rate": 2.5087500000000003e-05, "loss": 27.7821, "step": 2000 }, { "epoch": 4.2, "grad_norm": 17.58981704711914, "learning_rate": 2.38375e-05, "loss": 26.7495, "step": 2100 }, { "epoch": 4.4, "grad_norm": 17.446008682250977, "learning_rate": 2.25875e-05, "loss": 26.9017, "step": 2200 }, { "epoch": 4.6, "grad_norm": 17.84132194519043, "learning_rate": 2.13375e-05, "loss": 26.9374, "step": 2300 }, { "epoch": 4.8, "grad_norm": 16.804340362548828, "learning_rate": 2.00875e-05, "loss": 26.8123, "step": 2400 }, { "epoch": 5.0, "grad_norm": 17.43560218811035, "learning_rate": 1.88375e-05, "loss": 26.8506, "step": 2500 }, { "epoch": 5.2, "grad_norm": 17.689725875854492, "learning_rate": 1.75875e-05, "loss": 26.0296, "step": 2600 }, { "epoch": 5.4, "grad_norm": 17.706035614013672, "learning_rate": 1.63375e-05, "loss": 26.1274, "step": 2700 }, { "epoch": 5.6, "grad_norm": 17.015243530273438, "learning_rate": 1.50875e-05, "loss": 26.0055, "step": 2800 }, { "epoch": 5.8, "grad_norm": 17.540559768676758, "learning_rate": 1.3837500000000001e-05, "loss": 26.0804, "step": 2900 }, { "epoch": 6.0, "grad_norm": 17.685171127319336, "learning_rate": 1.25875e-05, "loss": 26.1928, "step": 3000 }, { "epoch": 6.2, "grad_norm": 18.444171905517578, "learning_rate": 1.1337500000000001e-05, "loss": 25.493, "step": 3100 }, { "epoch": 6.4, "grad_norm": 18.2116756439209, "learning_rate": 1.0087500000000001e-05, "loss": 25.5397, "step": 3200 }, { "epoch": 6.6, "grad_norm": 17.715866088867188, "learning_rate": 8.8375e-06, "loss": 25.4594, "step": 3300 }, { "epoch": 6.8, "grad_norm": 18.493494033813477, "learning_rate": 7.5875e-06, "loss": 25.5701, "step": 3400 }, { "epoch": 7.0, "grad_norm": 18.52659797668457, "learning_rate": 6.337500000000001e-06, "loss": 25.5077, "step": 3500 } ], "logging_steps": 100, "max_steps": 4000, "num_input_tokens_seen": 0, "num_train_epochs": 8, "save_steps": 500, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": false }, "attributes": {} } }, "total_flos": 0.0, "train_batch_size": 30, "trial_name": null, "trial_params": null }