{ "best_metric": 0.5491589903831482, "best_model_checkpoint": "checkpoints/checkpoint-200", "epoch": 2.9723076923076923, "eval_steps": 100, "global_step": 243, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.12307692307692308, "gpu_memory": 4887.19873046875, "grad_norm": 0.4532247483730316, "learning_rate": 0.00023076923076923076, "loss": 2.0289, "step": 10 }, { "epoch": 0.24615384615384617, "gpu_memory": 4887.19873046875, "grad_norm": 0.4620782732963562, "learning_rate": 0.00029931487386844626, "loss": 1.6411, "step": 20 }, { "epoch": 0.36923076923076925, "gpu_memory": 4887.19873046875, "grad_norm": 0.3020133078098297, "learning_rate": 0.0002959742119362563, "loss": 1.3515, "step": 30 }, { "epoch": 0.49230769230769234, "gpu_memory": 4887.19873046875, "grad_norm": 0.31100866198539734, "learning_rate": 0.0002899143266295095, "loss": 1.1845, "step": 40 }, { "epoch": 0.6153846153846154, "gpu_memory": 4887.19873046875, "grad_norm": 0.3017350733280182, "learning_rate": 0.00028124810214572737, "loss": 1.1433, "step": 50 }, { "epoch": 0.7384615384615385, "gpu_memory": 4887.19873046875, "grad_norm": 0.3388933837413788, "learning_rate": 0.0002701369738499162, "loss": 1.0192, "step": 60 }, { "epoch": 0.8615384615384616, "gpu_memory": 4887.19873046875, "grad_norm": 0.36166566610336304, "learning_rate": 0.00025678792103916504, "loss": 0.9971, "step": 70 }, { "epoch": 0.9846153846153847, "gpu_memory": 4887.19873046875, "grad_norm": 0.34558528661727905, "learning_rate": 0.00024144961130996017, "loss": 0.9646, "step": 80 }, { "epoch": 1.0984615384615384, "gpu_memory": 4887.19873046875, "grad_norm": 0.3525680601596832, "learning_rate": 0.0002244077683513602, "loss": 0.9099, "step": 90 }, { "epoch": 1.2215384615384615, "gpu_memory": 4887.19873046875, "grad_norm": 0.43674904108047485, "learning_rate": 0.0002059798494532787, "loss": 0.8937, "step": 100 }, { "epoch": 1.2215384615384615, "eval_loss": 0.6234937310218811, "eval_runtime": 0.2002, "eval_samples_per_second": 4.994, "eval_steps_per_second": 4.994, "gpu_memory": 4887.19873046875, "learning_rate": 0.0002059798494532787, "step": 100 }, { "epoch": 1.3446153846153845, "gpu_memory": 4887.19873046875, "grad_norm": 0.3604443073272705, "learning_rate": 0.00018650913187782535, "loss": 0.8791, "step": 110 }, { "epoch": 1.4676923076923076, "gpu_memory": 4887.19873046875, "grad_norm": 0.349542498588562, "learning_rate": 0.00016635831825341846, "loss": 0.8584, "step": 120 }, { "epoch": 1.5907692307692307, "gpu_memory": 4887.19873046875, "grad_norm": 0.3813496232032776, "learning_rate": 0.00014590278011107714, "loss": 0.8552, "step": 130 }, { "epoch": 1.7138461538461538, "gpu_memory": 4887.19873046875, "grad_norm": 0.3789571225643158, "learning_rate": 0.00012552356542302868, "loss": 0.8731, "step": 140 }, { "epoch": 1.8369230769230769, "gpu_memory": 4887.19873046875, "grad_norm": 0.3803671896457672, "learning_rate": 0.00010560030039995649, "loss": 0.8411, "step": 150 }, { "epoch": 1.96, "gpu_memory": 4887.19873046875, "grad_norm": 0.4134896397590637, "learning_rate": 8.650411777297534e-05, "loss": 0.8157, "step": 160 }, { "epoch": 2.0738461538461537, "gpu_memory": 4887.19873046875, "grad_norm": 0.39426007866859436, "learning_rate": 6.859074329306077e-05, "loss": 0.8023, "step": 170 }, { "epoch": 2.1969230769230768, "gpu_memory": 4887.19873046875, "grad_norm": 0.3811410367488861, "learning_rate": 5.2193869233367433e-05, "loss": 0.7673, "step": 180 }, { "epoch": 2.32, "gpu_memory": 4887.19873046875, "grad_norm": 0.37296849489212036, "learning_rate": 3.761893833355035e-05, "loss": 0.7864, "step": 190 }, { "epoch": 2.443076923076923, "gpu_memory": 4887.19873046875, "grad_norm": 0.45706120133399963, "learning_rate": 2.5137453979444762e-05, "loss": 0.7803, "step": 200 }, { "epoch": 2.443076923076923, "eval_loss": 0.5491589903831482, "eval_runtime": 0.1999, "eval_samples_per_second": 5.002, "eval_steps_per_second": 5.002, "gpu_memory": 4887.19873046875, "learning_rate": 2.5137453979444762e-05, "step": 200 }, { "epoch": 2.566153846153846, "gpu_memory": 4887.19873046875, "grad_norm": 0.4115428924560547, "learning_rate": 1.4981922608692365e-05, "loss": 0.7901, "step": 210 }, { "epoch": 2.689230769230769, "gpu_memory": 4887.19873046875, "grad_norm": 0.404224157333374, "learning_rate": 7.34152255572697e-06, "loss": 0.7795, "step": 220 }, { "epoch": 2.812307692307692, "gpu_memory": 4887.19873046875, "grad_norm": 0.382841020822525, "learning_rate": 2.3585800173432813e-06, "loss": 0.7933, "step": 230 }, { "epoch": 2.9353846153846153, "gpu_memory": 4887.19873046875, "grad_norm": 0.4079365134239197, "learning_rate": 1.259177849420312e-07, "loss": 0.828, "step": 240 } ], "logging_steps": 10, "max_steps": 243, "num_input_tokens_seen": 0, "num_train_epochs": 3, "save_steps": 100, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 3.675188391365837e+16, "train_batch_size": 1, "trial_name": null, "trial_params": null }