| { | |
| "best_global_step": null, | |
| "best_metric": 0.5865987539291382, | |
| "best_model_checkpoint": null, | |
| "epoch": 10.0, | |
| "eval_steps": 500, | |
| "global_step": 57100, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 1.0, | |
| "grad_norm": 5.180218696594238, | |
| "learning_rate": 3.600420315236428e-05, | |
| "loss": 1.2756, | |
| "step": 5710 | |
| }, | |
| { | |
| "epoch": 1.0, | |
| "eval_loss": 0.9367296695709229, | |
| "eval_runtime": 76.8888, | |
| "eval_samples_per_second": 132.971, | |
| "eval_steps_per_second": 16.621, | |
| "step": 5710 | |
| }, | |
| { | |
| "epoch": 2.0, | |
| "grad_norm": 5.356524467468262, | |
| "learning_rate": 3.200630472854641e-05, | |
| "loss": 0.9522, | |
| "step": 11420 | |
| }, | |
| { | |
| "epoch": 2.0, | |
| "eval_loss": 0.8216637969017029, | |
| "eval_runtime": 73.2993, | |
| "eval_samples_per_second": 139.483, | |
| "eval_steps_per_second": 17.435, | |
| "step": 11420 | |
| }, | |
| { | |
| "epoch": 3.0, | |
| "grad_norm": 4.990707874298096, | |
| "learning_rate": 2.800700525394046e-05, | |
| "loss": 0.8493, | |
| "step": 17130 | |
| }, | |
| { | |
| "epoch": 3.0, | |
| "eval_loss": 0.7453346848487854, | |
| "eval_runtime": 76.8549, | |
| "eval_samples_per_second": 133.03, | |
| "eval_steps_per_second": 16.629, | |
| "step": 17130 | |
| }, | |
| { | |
| "epoch": 4.0, | |
| "grad_norm": 4.565978527069092, | |
| "learning_rate": 2.4009106830122595e-05, | |
| "loss": 0.781, | |
| "step": 22840 | |
| }, | |
| { | |
| "epoch": 4.0, | |
| "eval_loss": 0.6874940991401672, | |
| "eval_runtime": 38.7504, | |
| "eval_samples_per_second": 263.842, | |
| "eval_steps_per_second": 32.98, | |
| "step": 22840 | |
| }, | |
| { | |
| "epoch": 5.0, | |
| "grad_norm": 4.659147262573242, | |
| "learning_rate": 2.0010507880910684e-05, | |
| "loss": 0.7316, | |
| "step": 28550 | |
| }, | |
| { | |
| "epoch": 5.0, | |
| "eval_loss": 0.6624111533164978, | |
| "eval_runtime": 36.5212, | |
| "eval_samples_per_second": 279.947, | |
| "eval_steps_per_second": 34.993, | |
| "step": 28550 | |
| }, | |
| { | |
| "epoch": 6.0, | |
| "grad_norm": 4.955957412719727, | |
| "learning_rate": 1.6011908931698776e-05, | |
| "loss": 0.6985, | |
| "step": 34260 | |
| }, | |
| { | |
| "epoch": 6.0, | |
| "eval_loss": 0.6377598643302917, | |
| "eval_runtime": 35.9349, | |
| "eval_samples_per_second": 284.514, | |
| "eval_steps_per_second": 35.564, | |
| "step": 34260 | |
| }, | |
| { | |
| "epoch": 7.0, | |
| "grad_norm": 4.549160003662109, | |
| "learning_rate": 1.2012609457092821e-05, | |
| "loss": 0.6736, | |
| "step": 39970 | |
| }, | |
| { | |
| "epoch": 7.0, | |
| "eval_loss": 0.6155329346656799, | |
| "eval_runtime": 37.8645, | |
| "eval_samples_per_second": 270.015, | |
| "eval_steps_per_second": 33.752, | |
| "step": 39970 | |
| }, | |
| { | |
| "epoch": 8.0, | |
| "grad_norm": 4.006523609161377, | |
| "learning_rate": 8.01260945709282e-06, | |
| "loss": 0.6502, | |
| "step": 45680 | |
| }, | |
| { | |
| "epoch": 8.0, | |
| "eval_loss": 0.6062974333763123, | |
| "eval_runtime": 36.5102, | |
| "eval_samples_per_second": 280.031, | |
| "eval_steps_per_second": 35.004, | |
| "step": 45680 | |
| }, | |
| { | |
| "epoch": 9.0, | |
| "grad_norm": 3.7265231609344482, | |
| "learning_rate": 4.013309982486865e-06, | |
| "loss": 0.6345, | |
| "step": 51390 | |
| }, | |
| { | |
| "epoch": 9.0, | |
| "eval_loss": 0.5894550681114197, | |
| "eval_runtime": 43.2376, | |
| "eval_samples_per_second": 236.461, | |
| "eval_steps_per_second": 29.558, | |
| "step": 51390 | |
| }, | |
| { | |
| "epoch": 10.0, | |
| "grad_norm": 3.897273302078247, | |
| "learning_rate": 1.611208406304729e-08, | |
| "loss": 0.6232, | |
| "step": 57100 | |
| }, | |
| { | |
| "epoch": 10.0, | |
| "eval_loss": 0.5865987539291382, | |
| "eval_runtime": 36.5729, | |
| "eval_samples_per_second": 279.551, | |
| "eval_steps_per_second": 34.944, | |
| "step": 57100 | |
| }, | |
| { | |
| "epoch": 10.0, | |
| "step": 57100, | |
| "total_flos": 6.01310491705344e+16, | |
| "train_loss": 0.7869737379939251, | |
| "train_runtime": 10124.3653, | |
| "train_samples_per_second": 90.247, | |
| "train_steps_per_second": 5.64 | |
| }, | |
| { | |
| "epoch": 10.0, | |
| "eval_loss": 0.5878283381462097, | |
| "eval_runtime": 36.4992, | |
| "eval_samples_per_second": 280.116, | |
| "eval_steps_per_second": 35.014, | |
| "step": 57100 | |
| } | |
| ], | |
| "logging_steps": 500, | |
| "max_steps": 57100, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 10, | |
| "save_steps": 500, | |
| "stateful_callbacks": { | |
| "EarlyStoppingCallback": { | |
| "args": { | |
| "early_stopping_patience": 2, | |
| "early_stopping_threshold": 0.0 | |
| }, | |
| "attributes": { | |
| "early_stopping_patience_counter": 0 | |
| } | |
| }, | |
| "TrainerControl": { | |
| "args": { | |
| "should_epoch_stop": false, | |
| "should_evaluate": false, | |
| "should_log": false, | |
| "should_save": false, | |
| "should_training_stop": false | |
| }, | |
| "attributes": {} | |
| } | |
| }, | |
| "total_flos": 6.01310491705344e+16, | |
| "train_batch_size": 16, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |