{ "best_global_step": null, "best_metric": null, "best_model_checkpoint": null, "epoch": 1.766671573678787, "eval_steps": 500, "global_step": 1500, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.014721036360959812, "grad_norm": 1.0893694162368774, "learning_rate": 4.808635917566242e-06, "loss": 1.1444, "step": 50 }, { "epoch": 0.029442072721919624, "grad_norm": 1.118213415145874, "learning_rate": 9.715407262021591e-06, "loss": 0.9497, "step": 100 }, { "epoch": 0.04416310908287943, "grad_norm": 1.3216179609298706, "learning_rate": 1.4622178606476939e-05, "loss": 0.8346, "step": 150 }, { "epoch": 0.05888414544383925, "grad_norm": 1.292870044708252, "learning_rate": 1.9528949950932288e-05, "loss": 0.7699, "step": 200 }, { "epoch": 0.07360518180479905, "grad_norm": 1.4046356678009033, "learning_rate": 2.4435721295387637e-05, "loss": 0.7252, "step": 250 }, { "epoch": 0.08832621816575886, "grad_norm": 1.4438663721084595, "learning_rate": 2.9342492639842983e-05, "loss": 0.6996, "step": 300 }, { "epoch": 0.10304725452671869, "grad_norm": 1.3257337808609009, "learning_rate": 3.424926398429833e-05, "loss": 0.686, "step": 350 }, { "epoch": 0.1177682908876785, "grad_norm": 1.3639789819717407, "learning_rate": 3.9156035328753685e-05, "loss": 0.6601, "step": 400 }, { "epoch": 0.1324893272486383, "grad_norm": 1.4148070812225342, "learning_rate": 4.406280667320903e-05, "loss": 0.6371, "step": 450 }, { "epoch": 0.1472103636095981, "grad_norm": 1.3940412998199463, "learning_rate": 4.8969578017664384e-05, "loss": 0.6417, "step": 500 }, { "epoch": 0.6477255998822317, "grad_norm": 0.6072946190834045, "learning_rate": 9.599483839268026e-05, "loss": 0.6122, "step": 550 }, { "epoch": 0.7066097453260709, "grad_norm": 0.6030572652816772, "learning_rate": 9.454410179022932e-05, "loss": 0.5809, "step": 600 }, { "epoch": 0.7654938907699103, "grad_norm": 0.5781008005142212, "learning_rate": 9.288422825194501e-05, "loss": 0.5446, "step": 650 }, { "epoch": 0.8243780362137495, "grad_norm": 0.5412103533744812, "learning_rate": 9.102301097269974e-05, "loss": 0.5339, "step": 700 }, { "epoch": 0.8832621816575887, "grad_norm": 0.5678456425666809, "learning_rate": 8.896918846697821e-05, "loss": 0.5296, "step": 750 }, { "epoch": 0.942146327101428, "grad_norm": 0.525556206703186, "learning_rate": 8.673240354108538e-05, "loss": 0.5176, "step": 800 }, { "epoch": 1.0011776829088768, "grad_norm": 1.9685856103897095, "learning_rate": 8.432315801965616e-05, "loss": 0.5104, "step": 850 }, { "epoch": 1.0600618283527161, "grad_norm": 0.6006094217300415, "learning_rate": 8.175276343902802e-05, "loss": 0.4685, "step": 900 }, { "epoch": 1.1189459737965552, "grad_norm": 0.5228903889656067, "learning_rate": 7.903328793897418e-05, "loss": 0.473, "step": 950 }, { "epoch": 1.1778301192403946, "grad_norm": 0.5006899237632751, "learning_rate": 7.6177499602143e-05, "loss": 0.4679, "step": 1000 }, { "epoch": 1.1778301192403946, "eval_loss": 0.4844963848590851, "eval_runtime": 2172.4438, "eval_samples_per_second": 1.39, "eval_steps_per_second": 0.695, "step": 1000 }, { "epoch": 1.2367142646842337, "grad_norm": 0.5041179060935974, "learning_rate": 7.319880650722838e-05, "loss": 0.4541, "step": 1050 }, { "epoch": 1.295598410128073, "grad_norm": 0.5369197726249695, "learning_rate": 7.01111937773246e-05, "loss": 0.4576, "step": 1100 }, { "epoch": 1.3544825555719122, "grad_norm": 0.5211925506591797, "learning_rate": 6.692915791902665e-05, "loss": 0.4472, "step": 1150 }, { "epoch": 1.4133667010157516, "grad_norm": 0.5664705038070679, "learning_rate": 6.366763876055806e-05, "loss": 0.4427, "step": 1200 }, { "epoch": 1.4722508464595907, "grad_norm": 0.5420666337013245, "learning_rate": 6.034194930847975e-05, "loss": 0.4395, "step": 1250 }, { "epoch": 1.53113499190343, "grad_norm": 0.558952271938324, "learning_rate": 5.6967703852306786e-05, "loss": 0.4305, "step": 1300 }, { "epoch": 1.5900191373472694, "grad_norm": 0.510136067867279, "learning_rate": 5.356074465458553e-05, "loss": 0.428, "step": 1350 }, { "epoch": 1.6489032827911085, "grad_norm": 0.506799578666687, "learning_rate": 5.013706757062534e-05, "loss": 0.4251, "step": 1400 }, { "epoch": 1.7077874282349477, "grad_norm": 0.5179591178894043, "learning_rate": 4.671274694710388e-05, "loss": 0.4188, "step": 1450 }, { "epoch": 1.766671573678787, "grad_norm": 0.531908392906189, "learning_rate": 4.3303860152151445e-05, "loss": 0.4177, "step": 1500 }, { "epoch": 1.766671573678787, "eval_loss": 0.4321376383304596, "eval_runtime": 2174.1694, "eval_samples_per_second": 1.389, "eval_steps_per_second": 0.695, "step": 1500 } ], "logging_steps": 50, "max_steps": 2547, "num_input_tokens_seen": 0, "num_train_epochs": 3, "save_steps": 500, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": false }, "attributes": {} } }, "total_flos": 2.011350882666414e+17, "train_batch_size": 1, "trial_name": null, "trial_params": null }