| { | |
| "best_global_step": null, | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 1.1778301192403946, | |
| "eval_steps": 500, | |
| "global_step": 1000, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.014721036360959812, | |
| "grad_norm": 1.0893694162368774, | |
| "learning_rate": 4.808635917566242e-06, | |
| "loss": 1.1444, | |
| "step": 50 | |
| }, | |
| { | |
| "epoch": 0.029442072721919624, | |
| "grad_norm": 1.118213415145874, | |
| "learning_rate": 9.715407262021591e-06, | |
| "loss": 0.9497, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 0.04416310908287943, | |
| "grad_norm": 1.3216179609298706, | |
| "learning_rate": 1.4622178606476939e-05, | |
| "loss": 0.8346, | |
| "step": 150 | |
| }, | |
| { | |
| "epoch": 0.05888414544383925, | |
| "grad_norm": 1.292870044708252, | |
| "learning_rate": 1.9528949950932288e-05, | |
| "loss": 0.7699, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 0.07360518180479905, | |
| "grad_norm": 1.4046356678009033, | |
| "learning_rate": 2.4435721295387637e-05, | |
| "loss": 0.7252, | |
| "step": 250 | |
| }, | |
| { | |
| "epoch": 0.08832621816575886, | |
| "grad_norm": 1.4438663721084595, | |
| "learning_rate": 2.9342492639842983e-05, | |
| "loss": 0.6996, | |
| "step": 300 | |
| }, | |
| { | |
| "epoch": 0.10304725452671869, | |
| "grad_norm": 1.3257337808609009, | |
| "learning_rate": 3.424926398429833e-05, | |
| "loss": 0.686, | |
| "step": 350 | |
| }, | |
| { | |
| "epoch": 0.1177682908876785, | |
| "grad_norm": 1.3639789819717407, | |
| "learning_rate": 3.9156035328753685e-05, | |
| "loss": 0.6601, | |
| "step": 400 | |
| }, | |
| { | |
| "epoch": 0.1324893272486383, | |
| "grad_norm": 1.4148070812225342, | |
| "learning_rate": 4.406280667320903e-05, | |
| "loss": 0.6371, | |
| "step": 450 | |
| }, | |
| { | |
| "epoch": 0.1472103636095981, | |
| "grad_norm": 1.3940412998199463, | |
| "learning_rate": 4.8969578017664384e-05, | |
| "loss": 0.6417, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 0.6477255998822317, | |
| "grad_norm": 0.6072946190834045, | |
| "learning_rate": 9.599483839268026e-05, | |
| "loss": 0.6122, | |
| "step": 550 | |
| }, | |
| { | |
| "epoch": 0.7066097453260709, | |
| "grad_norm": 0.6030572652816772, | |
| "learning_rate": 9.454410179022932e-05, | |
| "loss": 0.5809, | |
| "step": 600 | |
| }, | |
| { | |
| "epoch": 0.7654938907699103, | |
| "grad_norm": 0.5781008005142212, | |
| "learning_rate": 9.288422825194501e-05, | |
| "loss": 0.5446, | |
| "step": 650 | |
| }, | |
| { | |
| "epoch": 0.8243780362137495, | |
| "grad_norm": 0.5412103533744812, | |
| "learning_rate": 9.102301097269974e-05, | |
| "loss": 0.5339, | |
| "step": 700 | |
| }, | |
| { | |
| "epoch": 0.8832621816575887, | |
| "grad_norm": 0.5678456425666809, | |
| "learning_rate": 8.896918846697821e-05, | |
| "loss": 0.5296, | |
| "step": 750 | |
| }, | |
| { | |
| "epoch": 0.942146327101428, | |
| "grad_norm": 0.525556206703186, | |
| "learning_rate": 8.673240354108538e-05, | |
| "loss": 0.5176, | |
| "step": 800 | |
| }, | |
| { | |
| "epoch": 1.0011776829088768, | |
| "grad_norm": 1.9685856103897095, | |
| "learning_rate": 8.432315801965616e-05, | |
| "loss": 0.5104, | |
| "step": 850 | |
| }, | |
| { | |
| "epoch": 1.0600618283527161, | |
| "grad_norm": 0.6006094217300415, | |
| "learning_rate": 8.175276343902802e-05, | |
| "loss": 0.4685, | |
| "step": 900 | |
| }, | |
| { | |
| "epoch": 1.1189459737965552, | |
| "grad_norm": 0.5228903889656067, | |
| "learning_rate": 7.903328793897418e-05, | |
| "loss": 0.473, | |
| "step": 950 | |
| }, | |
| { | |
| "epoch": 1.1778301192403946, | |
| "grad_norm": 0.5006899237632751, | |
| "learning_rate": 7.6177499602143e-05, | |
| "loss": 0.4679, | |
| "step": 1000 | |
| }, | |
| { | |
| "epoch": 1.1778301192403946, | |
| "eval_loss": 0.4844963848590851, | |
| "eval_runtime": 2172.4438, | |
| "eval_samples_per_second": 1.39, | |
| "eval_steps_per_second": 0.695, | |
| "step": 1000 | |
| } | |
| ], | |
| "logging_steps": 50, | |
| "max_steps": 2547, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 3, | |
| "save_steps": 500, | |
| "stateful_callbacks": { | |
| "TrainerControl": { | |
| "args": { | |
| "should_epoch_stop": false, | |
| "should_evaluate": false, | |
| "should_log": false, | |
| "should_save": true, | |
| "should_training_stop": false | |
| }, | |
| "attributes": {} | |
| } | |
| }, | |
| "total_flos": 1.1226707705777357e+17, | |
| "train_batch_size": 1, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |