| { | |
| "best_global_step": null, | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 1.766671573678787, | |
| "eval_steps": 500, | |
| "global_step": 1500, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.014721036360959812, | |
| "grad_norm": 1.0893694162368774, | |
| "learning_rate": 4.808635917566242e-06, | |
| "loss": 1.1444, | |
| "step": 50 | |
| }, | |
| { | |
| "epoch": 0.029442072721919624, | |
| "grad_norm": 1.118213415145874, | |
| "learning_rate": 9.715407262021591e-06, | |
| "loss": 0.9497, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 0.04416310908287943, | |
| "grad_norm": 1.3216179609298706, | |
| "learning_rate": 1.4622178606476939e-05, | |
| "loss": 0.8346, | |
| "step": 150 | |
| }, | |
| { | |
| "epoch": 0.05888414544383925, | |
| "grad_norm": 1.292870044708252, | |
| "learning_rate": 1.9528949950932288e-05, | |
| "loss": 0.7699, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 0.07360518180479905, | |
| "grad_norm": 1.4046356678009033, | |
| "learning_rate": 2.4435721295387637e-05, | |
| "loss": 0.7252, | |
| "step": 250 | |
| }, | |
| { | |
| "epoch": 0.08832621816575886, | |
| "grad_norm": 1.4438663721084595, | |
| "learning_rate": 2.9342492639842983e-05, | |
| "loss": 0.6996, | |
| "step": 300 | |
| }, | |
| { | |
| "epoch": 0.10304725452671869, | |
| "grad_norm": 1.3257337808609009, | |
| "learning_rate": 3.424926398429833e-05, | |
| "loss": 0.686, | |
| "step": 350 | |
| }, | |
| { | |
| "epoch": 0.1177682908876785, | |
| "grad_norm": 1.3639789819717407, | |
| "learning_rate": 3.9156035328753685e-05, | |
| "loss": 0.6601, | |
| "step": 400 | |
| }, | |
| { | |
| "epoch": 0.1324893272486383, | |
| "grad_norm": 1.4148070812225342, | |
| "learning_rate": 4.406280667320903e-05, | |
| "loss": 0.6371, | |
| "step": 450 | |
| }, | |
| { | |
| "epoch": 0.1472103636095981, | |
| "grad_norm": 1.3940412998199463, | |
| "learning_rate": 4.8969578017664384e-05, | |
| "loss": 0.6417, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 0.6477255998822317, | |
| "grad_norm": 0.6072946190834045, | |
| "learning_rate": 9.599483839268026e-05, | |
| "loss": 0.6122, | |
| "step": 550 | |
| }, | |
| { | |
| "epoch": 0.7066097453260709, | |
| "grad_norm": 0.6030572652816772, | |
| "learning_rate": 9.454410179022932e-05, | |
| "loss": 0.5809, | |
| "step": 600 | |
| }, | |
| { | |
| "epoch": 0.7654938907699103, | |
| "grad_norm": 0.5781008005142212, | |
| "learning_rate": 9.288422825194501e-05, | |
| "loss": 0.5446, | |
| "step": 650 | |
| }, | |
| { | |
| "epoch": 0.8243780362137495, | |
| "grad_norm": 0.5412103533744812, | |
| "learning_rate": 9.102301097269974e-05, | |
| "loss": 0.5339, | |
| "step": 700 | |
| }, | |
| { | |
| "epoch": 0.8832621816575887, | |
| "grad_norm": 0.5678456425666809, | |
| "learning_rate": 8.896918846697821e-05, | |
| "loss": 0.5296, | |
| "step": 750 | |
| }, | |
| { | |
| "epoch": 0.942146327101428, | |
| "grad_norm": 0.525556206703186, | |
| "learning_rate": 8.673240354108538e-05, | |
| "loss": 0.5176, | |
| "step": 800 | |
| }, | |
| { | |
| "epoch": 1.0011776829088768, | |
| "grad_norm": 1.9685856103897095, | |
| "learning_rate": 8.432315801965616e-05, | |
| "loss": 0.5104, | |
| "step": 850 | |
| }, | |
| { | |
| "epoch": 1.0600618283527161, | |
| "grad_norm": 0.6006094217300415, | |
| "learning_rate": 8.175276343902802e-05, | |
| "loss": 0.4685, | |
| "step": 900 | |
| }, | |
| { | |
| "epoch": 1.1189459737965552, | |
| "grad_norm": 0.5228903889656067, | |
| "learning_rate": 7.903328793897418e-05, | |
| "loss": 0.473, | |
| "step": 950 | |
| }, | |
| { | |
| "epoch": 1.1778301192403946, | |
| "grad_norm": 0.5006899237632751, | |
| "learning_rate": 7.6177499602143e-05, | |
| "loss": 0.4679, | |
| "step": 1000 | |
| }, | |
| { | |
| "epoch": 1.1778301192403946, | |
| "eval_loss": 0.4844963848590851, | |
| "eval_runtime": 2172.4438, | |
| "eval_samples_per_second": 1.39, | |
| "eval_steps_per_second": 0.695, | |
| "step": 1000 | |
| }, | |
| { | |
| "epoch": 1.2367142646842337, | |
| "grad_norm": 0.5041179060935974, | |
| "learning_rate": 7.319880650722838e-05, | |
| "loss": 0.4541, | |
| "step": 1050 | |
| }, | |
| { | |
| "epoch": 1.295598410128073, | |
| "grad_norm": 0.5369197726249695, | |
| "learning_rate": 7.01111937773246e-05, | |
| "loss": 0.4576, | |
| "step": 1100 | |
| }, | |
| { | |
| "epoch": 1.3544825555719122, | |
| "grad_norm": 0.5211925506591797, | |
| "learning_rate": 6.692915791902665e-05, | |
| "loss": 0.4472, | |
| "step": 1150 | |
| }, | |
| { | |
| "epoch": 1.4133667010157516, | |
| "grad_norm": 0.5664705038070679, | |
| "learning_rate": 6.366763876055806e-05, | |
| "loss": 0.4427, | |
| "step": 1200 | |
| }, | |
| { | |
| "epoch": 1.4722508464595907, | |
| "grad_norm": 0.5420666337013245, | |
| "learning_rate": 6.034194930847975e-05, | |
| "loss": 0.4395, | |
| "step": 1250 | |
| }, | |
| { | |
| "epoch": 1.53113499190343, | |
| "grad_norm": 0.558952271938324, | |
| "learning_rate": 5.6967703852306786e-05, | |
| "loss": 0.4305, | |
| "step": 1300 | |
| }, | |
| { | |
| "epoch": 1.5900191373472694, | |
| "grad_norm": 0.510136067867279, | |
| "learning_rate": 5.356074465458553e-05, | |
| "loss": 0.428, | |
| "step": 1350 | |
| }, | |
| { | |
| "epoch": 1.6489032827911085, | |
| "grad_norm": 0.506799578666687, | |
| "learning_rate": 5.013706757062534e-05, | |
| "loss": 0.4251, | |
| "step": 1400 | |
| }, | |
| { | |
| "epoch": 1.7077874282349477, | |
| "grad_norm": 0.5179591178894043, | |
| "learning_rate": 4.671274694710388e-05, | |
| "loss": 0.4188, | |
| "step": 1450 | |
| }, | |
| { | |
| "epoch": 1.766671573678787, | |
| "grad_norm": 0.531908392906189, | |
| "learning_rate": 4.3303860152151445e-05, | |
| "loss": 0.4177, | |
| "step": 1500 | |
| }, | |
| { | |
| "epoch": 1.766671573678787, | |
| "eval_loss": 0.4321376383304596, | |
| "eval_runtime": 2174.1694, | |
| "eval_samples_per_second": 1.389, | |
| "eval_steps_per_second": 0.695, | |
| "step": 1500 | |
| } | |
| ], | |
| "logging_steps": 50, | |
| "max_steps": 2547, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 3, | |
| "save_steps": 500, | |
| "stateful_callbacks": { | |
| "TrainerControl": { | |
| "args": { | |
| "should_epoch_stop": false, | |
| "should_evaluate": false, | |
| "should_log": false, | |
| "should_save": true, | |
| "should_training_stop": false | |
| }, | |
| "attributes": {} | |
| } | |
| }, | |
| "total_flos": 2.011350882666414e+17, | |
| "train_batch_size": 1, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |