| { | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 10.0, | |
| "eval_steps": 500, | |
| "global_step": 60, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.17, | |
| "grad_norm": 1.4496120364361036, | |
| "learning_rate": 3.3333333333333333e-06, | |
| "loss": 1.9155, | |
| "step": 1 | |
| }, | |
| { | |
| "epoch": 0.83, | |
| "grad_norm": 1.218827428736731, | |
| "learning_rate": 1.6666666666666667e-05, | |
| "loss": 1.8892, | |
| "step": 5 | |
| }, | |
| { | |
| "epoch": 1.0, | |
| "eval_loss": 1.798085331916809, | |
| "eval_runtime": 2.1006, | |
| "eval_samples_per_second": 6.189, | |
| "eval_steps_per_second": 0.476, | |
| "step": 6 | |
| }, | |
| { | |
| "epoch": 1.67, | |
| "grad_norm": 0.6165859667642012, | |
| "learning_rate": 1.973044870579824e-05, | |
| "loss": 1.7346, | |
| "step": 10 | |
| }, | |
| { | |
| "epoch": 2.0, | |
| "eval_loss": 1.7420978546142578, | |
| "eval_runtime": 2.0904, | |
| "eval_samples_per_second": 6.219, | |
| "eval_steps_per_second": 0.478, | |
| "step": 12 | |
| }, | |
| { | |
| "epoch": 2.5, | |
| "grad_norm": 0.5485370800727245, | |
| "learning_rate": 1.866025403784439e-05, | |
| "loss": 1.5782, | |
| "step": 15 | |
| }, | |
| { | |
| "epoch": 3.0, | |
| "eval_loss": 1.683698058128357, | |
| "eval_runtime": 2.0865, | |
| "eval_samples_per_second": 6.231, | |
| "eval_steps_per_second": 0.479, | |
| "step": 18 | |
| }, | |
| { | |
| "epoch": 3.33, | |
| "grad_norm": 0.6025243263949436, | |
| "learning_rate": 1.686241637868734e-05, | |
| "loss": 1.3988, | |
| "step": 20 | |
| }, | |
| { | |
| "epoch": 4.0, | |
| "eval_loss": 1.701674222946167, | |
| "eval_runtime": 2.1036, | |
| "eval_samples_per_second": 6.18, | |
| "eval_steps_per_second": 0.475, | |
| "step": 24 | |
| }, | |
| { | |
| "epoch": 4.17, | |
| "grad_norm": 0.9100140176610921, | |
| "learning_rate": 1.4487991802004625e-05, | |
| "loss": 1.2335, | |
| "step": 25 | |
| }, | |
| { | |
| "epoch": 5.0, | |
| "grad_norm": 0.7356725391870051, | |
| "learning_rate": 1.1736481776669307e-05, | |
| "loss": 1.0825, | |
| "step": 30 | |
| }, | |
| { | |
| "epoch": 5.0, | |
| "eval_loss": 1.7303403615951538, | |
| "eval_runtime": 2.0757, | |
| "eval_samples_per_second": 6.263, | |
| "eval_steps_per_second": 0.482, | |
| "step": 30 | |
| }, | |
| { | |
| "epoch": 5.83, | |
| "grad_norm": 1.0173385546393663, | |
| "learning_rate": 8.839070858747697e-06, | |
| "loss": 0.9252, | |
| "step": 35 | |
| }, | |
| { | |
| "epoch": 6.0, | |
| "eval_loss": 1.8101954460144043, | |
| "eval_runtime": 2.0878, | |
| "eval_samples_per_second": 6.227, | |
| "eval_steps_per_second": 0.479, | |
| "step": 36 | |
| }, | |
| { | |
| "epoch": 6.67, | |
| "grad_norm": 0.971085635461868, | |
| "learning_rate": 6.039202339608432e-06, | |
| "loss": 0.794, | |
| "step": 40 | |
| }, | |
| { | |
| "epoch": 7.0, | |
| "eval_loss": 1.9195078611373901, | |
| "eval_runtime": 2.0852, | |
| "eval_samples_per_second": 6.234, | |
| "eval_steps_per_second": 0.48, | |
| "step": 42 | |
| }, | |
| { | |
| "epoch": 7.5, | |
| "grad_norm": 1.3012574014474572, | |
| "learning_rate": 3.5721239031346067e-06, | |
| "loss": 0.7094, | |
| "step": 45 | |
| }, | |
| { | |
| "epoch": 8.0, | |
| "eval_loss": 1.9668892621994019, | |
| "eval_runtime": 2.0987, | |
| "eval_samples_per_second": 6.194, | |
| "eval_steps_per_second": 0.476, | |
| "step": 48 | |
| }, | |
| { | |
| "epoch": 8.33, | |
| "grad_norm": 0.8907063313216519, | |
| "learning_rate": 1.6451218858706374e-06, | |
| "loss": 0.6205, | |
| "step": 50 | |
| }, | |
| { | |
| "epoch": 9.0, | |
| "eval_loss": 1.996960163116455, | |
| "eval_runtime": 2.0971, | |
| "eval_samples_per_second": 6.199, | |
| "eval_steps_per_second": 0.477, | |
| "step": 54 | |
| }, | |
| { | |
| "epoch": 9.17, | |
| "grad_norm": 0.6292600013299577, | |
| "learning_rate": 4.2010487684511105e-07, | |
| "loss": 0.5905, | |
| "step": 55 | |
| }, | |
| { | |
| "epoch": 10.0, | |
| "grad_norm": 0.694109189410468, | |
| "learning_rate": 0.0, | |
| "loss": 0.5749, | |
| "step": 60 | |
| }, | |
| { | |
| "epoch": 10.0, | |
| "eval_loss": 1.9975172281265259, | |
| "eval_runtime": 2.0907, | |
| "eval_samples_per_second": 6.218, | |
| "eval_steps_per_second": 0.478, | |
| "step": 60 | |
| }, | |
| { | |
| "epoch": 10.0, | |
| "step": 60, | |
| "total_flos": 12562779340800.0, | |
| "train_loss": 1.0947178224722545, | |
| "train_runtime": 366.9794, | |
| "train_samples_per_second": 20.192, | |
| "train_steps_per_second": 0.163 | |
| } | |
| ], | |
| "logging_steps": 5, | |
| "max_steps": 60, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 10, | |
| "save_steps": 1000000000, | |
| "total_flos": 12562779340800.0, | |
| "train_batch_size": 8, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |