| { | |
| "best_metric": 0.5491589903831482, | |
| "best_model_checkpoint": "checkpoints/checkpoint-200", | |
| "epoch": 2.9723076923076923, | |
| "eval_steps": 100, | |
| "global_step": 243, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.12307692307692308, | |
| "gpu_memory": 4887.19873046875, | |
| "grad_norm": 0.4532247483730316, | |
| "learning_rate": 0.00023076923076923076, | |
| "loss": 2.0289, | |
| "step": 10 | |
| }, | |
| { | |
| "epoch": 0.24615384615384617, | |
| "gpu_memory": 4887.19873046875, | |
| "grad_norm": 0.4620782732963562, | |
| "learning_rate": 0.00029931487386844626, | |
| "loss": 1.6411, | |
| "step": 20 | |
| }, | |
| { | |
| "epoch": 0.36923076923076925, | |
| "gpu_memory": 4887.19873046875, | |
| "grad_norm": 0.3020133078098297, | |
| "learning_rate": 0.0002959742119362563, | |
| "loss": 1.3515, | |
| "step": 30 | |
| }, | |
| { | |
| "epoch": 0.49230769230769234, | |
| "gpu_memory": 4887.19873046875, | |
| "grad_norm": 0.31100866198539734, | |
| "learning_rate": 0.0002899143266295095, | |
| "loss": 1.1845, | |
| "step": 40 | |
| }, | |
| { | |
| "epoch": 0.6153846153846154, | |
| "gpu_memory": 4887.19873046875, | |
| "grad_norm": 0.3017350733280182, | |
| "learning_rate": 0.00028124810214572737, | |
| "loss": 1.1433, | |
| "step": 50 | |
| }, | |
| { | |
| "epoch": 0.7384615384615385, | |
| "gpu_memory": 4887.19873046875, | |
| "grad_norm": 0.3388933837413788, | |
| "learning_rate": 0.0002701369738499162, | |
| "loss": 1.0192, | |
| "step": 60 | |
| }, | |
| { | |
| "epoch": 0.8615384615384616, | |
| "gpu_memory": 4887.19873046875, | |
| "grad_norm": 0.36166566610336304, | |
| "learning_rate": 0.00025678792103916504, | |
| "loss": 0.9971, | |
| "step": 70 | |
| }, | |
| { | |
| "epoch": 0.9846153846153847, | |
| "gpu_memory": 4887.19873046875, | |
| "grad_norm": 0.34558528661727905, | |
| "learning_rate": 0.00024144961130996017, | |
| "loss": 0.9646, | |
| "step": 80 | |
| }, | |
| { | |
| "epoch": 1.0984615384615384, | |
| "gpu_memory": 4887.19873046875, | |
| "grad_norm": 0.3525680601596832, | |
| "learning_rate": 0.0002244077683513602, | |
| "loss": 0.9099, | |
| "step": 90 | |
| }, | |
| { | |
| "epoch": 1.2215384615384615, | |
| "gpu_memory": 4887.19873046875, | |
| "grad_norm": 0.43674904108047485, | |
| "learning_rate": 0.0002059798494532787, | |
| "loss": 0.8937, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 1.2215384615384615, | |
| "eval_loss": 0.6234937310218811, | |
| "eval_runtime": 0.2002, | |
| "eval_samples_per_second": 4.994, | |
| "eval_steps_per_second": 4.994, | |
| "gpu_memory": 4887.19873046875, | |
| "learning_rate": 0.0002059798494532787, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 1.3446153846153845, | |
| "gpu_memory": 4887.19873046875, | |
| "grad_norm": 0.3604443073272705, | |
| "learning_rate": 0.00018650913187782535, | |
| "loss": 0.8791, | |
| "step": 110 | |
| }, | |
| { | |
| "epoch": 1.4676923076923076, | |
| "gpu_memory": 4887.19873046875, | |
| "grad_norm": 0.349542498588562, | |
| "learning_rate": 0.00016635831825341846, | |
| "loss": 0.8584, | |
| "step": 120 | |
| }, | |
| { | |
| "epoch": 1.5907692307692307, | |
| "gpu_memory": 4887.19873046875, | |
| "grad_norm": 0.3813496232032776, | |
| "learning_rate": 0.00014590278011107714, | |
| "loss": 0.8552, | |
| "step": 130 | |
| }, | |
| { | |
| "epoch": 1.7138461538461538, | |
| "gpu_memory": 4887.19873046875, | |
| "grad_norm": 0.3789571225643158, | |
| "learning_rate": 0.00012552356542302868, | |
| "loss": 0.8731, | |
| "step": 140 | |
| }, | |
| { | |
| "epoch": 1.8369230769230769, | |
| "gpu_memory": 4887.19873046875, | |
| "grad_norm": 0.3803671896457672, | |
| "learning_rate": 0.00010560030039995649, | |
| "loss": 0.8411, | |
| "step": 150 | |
| }, | |
| { | |
| "epoch": 1.96, | |
| "gpu_memory": 4887.19873046875, | |
| "grad_norm": 0.4134896397590637, | |
| "learning_rate": 8.650411777297534e-05, | |
| "loss": 0.8157, | |
| "step": 160 | |
| }, | |
| { | |
| "epoch": 2.0738461538461537, | |
| "gpu_memory": 4887.19873046875, | |
| "grad_norm": 0.39426007866859436, | |
| "learning_rate": 6.859074329306077e-05, | |
| "loss": 0.8023, | |
| "step": 170 | |
| }, | |
| { | |
| "epoch": 2.1969230769230768, | |
| "gpu_memory": 4887.19873046875, | |
| "grad_norm": 0.3811410367488861, | |
| "learning_rate": 5.2193869233367433e-05, | |
| "loss": 0.7673, | |
| "step": 180 | |
| }, | |
| { | |
| "epoch": 2.32, | |
| "gpu_memory": 4887.19873046875, | |
| "grad_norm": 0.37296849489212036, | |
| "learning_rate": 3.761893833355035e-05, | |
| "loss": 0.7864, | |
| "step": 190 | |
| }, | |
| { | |
| "epoch": 2.443076923076923, | |
| "gpu_memory": 4887.19873046875, | |
| "grad_norm": 0.45706120133399963, | |
| "learning_rate": 2.5137453979444762e-05, | |
| "loss": 0.7803, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 2.443076923076923, | |
| "eval_loss": 0.5491589903831482, | |
| "eval_runtime": 0.1999, | |
| "eval_samples_per_second": 5.002, | |
| "eval_steps_per_second": 5.002, | |
| "gpu_memory": 4887.19873046875, | |
| "learning_rate": 2.5137453979444762e-05, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 2.566153846153846, | |
| "gpu_memory": 4887.19873046875, | |
| "grad_norm": 0.4115428924560547, | |
| "learning_rate": 1.4981922608692365e-05, | |
| "loss": 0.7901, | |
| "step": 210 | |
| }, | |
| { | |
| "epoch": 2.689230769230769, | |
| "gpu_memory": 4887.19873046875, | |
| "grad_norm": 0.404224157333374, | |
| "learning_rate": 7.34152255572697e-06, | |
| "loss": 0.7795, | |
| "step": 220 | |
| }, | |
| { | |
| "epoch": 2.812307692307692, | |
| "gpu_memory": 4887.19873046875, | |
| "grad_norm": 0.382841020822525, | |
| "learning_rate": 2.3585800173432813e-06, | |
| "loss": 0.7933, | |
| "step": 230 | |
| }, | |
| { | |
| "epoch": 2.9353846153846153, | |
| "gpu_memory": 4887.19873046875, | |
| "grad_norm": 0.4079365134239197, | |
| "learning_rate": 1.259177849420312e-07, | |
| "loss": 0.828, | |
| "step": 240 | |
| } | |
| ], | |
| "logging_steps": 10, | |
| "max_steps": 243, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 3, | |
| "save_steps": 100, | |
| "stateful_callbacks": { | |
| "TrainerControl": { | |
| "args": { | |
| "should_epoch_stop": false, | |
| "should_evaluate": false, | |
| "should_log": false, | |
| "should_save": true, | |
| "should_training_stop": true | |
| }, | |
| "attributes": {} | |
| } | |
| }, | |
| "total_flos": 3.675188391365837e+16, | |
| "train_batch_size": 1, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |