SRS
Training in progress, step 243, checkpoint
766011f verified
{
"best_metric": 0.5491589903831482,
"best_model_checkpoint": "checkpoints/checkpoint-200",
"epoch": 2.9723076923076923,
"eval_steps": 100,
"global_step": 243,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.12307692307692308,
"gpu_memory": 4887.19873046875,
"grad_norm": 0.4532247483730316,
"learning_rate": 0.00023076923076923076,
"loss": 2.0289,
"step": 10
},
{
"epoch": 0.24615384615384617,
"gpu_memory": 4887.19873046875,
"grad_norm": 0.4620782732963562,
"learning_rate": 0.00029931487386844626,
"loss": 1.6411,
"step": 20
},
{
"epoch": 0.36923076923076925,
"gpu_memory": 4887.19873046875,
"grad_norm": 0.3020133078098297,
"learning_rate": 0.0002959742119362563,
"loss": 1.3515,
"step": 30
},
{
"epoch": 0.49230769230769234,
"gpu_memory": 4887.19873046875,
"grad_norm": 0.31100866198539734,
"learning_rate": 0.0002899143266295095,
"loss": 1.1845,
"step": 40
},
{
"epoch": 0.6153846153846154,
"gpu_memory": 4887.19873046875,
"grad_norm": 0.3017350733280182,
"learning_rate": 0.00028124810214572737,
"loss": 1.1433,
"step": 50
},
{
"epoch": 0.7384615384615385,
"gpu_memory": 4887.19873046875,
"grad_norm": 0.3388933837413788,
"learning_rate": 0.0002701369738499162,
"loss": 1.0192,
"step": 60
},
{
"epoch": 0.8615384615384616,
"gpu_memory": 4887.19873046875,
"grad_norm": 0.36166566610336304,
"learning_rate": 0.00025678792103916504,
"loss": 0.9971,
"step": 70
},
{
"epoch": 0.9846153846153847,
"gpu_memory": 4887.19873046875,
"grad_norm": 0.34558528661727905,
"learning_rate": 0.00024144961130996017,
"loss": 0.9646,
"step": 80
},
{
"epoch": 1.0984615384615384,
"gpu_memory": 4887.19873046875,
"grad_norm": 0.3525680601596832,
"learning_rate": 0.0002244077683513602,
"loss": 0.9099,
"step": 90
},
{
"epoch": 1.2215384615384615,
"gpu_memory": 4887.19873046875,
"grad_norm": 0.43674904108047485,
"learning_rate": 0.0002059798494532787,
"loss": 0.8937,
"step": 100
},
{
"epoch": 1.2215384615384615,
"eval_loss": 0.6234937310218811,
"eval_runtime": 0.2002,
"eval_samples_per_second": 4.994,
"eval_steps_per_second": 4.994,
"gpu_memory": 4887.19873046875,
"learning_rate": 0.0002059798494532787,
"step": 100
},
{
"epoch": 1.3446153846153845,
"gpu_memory": 4887.19873046875,
"grad_norm": 0.3604443073272705,
"learning_rate": 0.00018650913187782535,
"loss": 0.8791,
"step": 110
},
{
"epoch": 1.4676923076923076,
"gpu_memory": 4887.19873046875,
"grad_norm": 0.349542498588562,
"learning_rate": 0.00016635831825341846,
"loss": 0.8584,
"step": 120
},
{
"epoch": 1.5907692307692307,
"gpu_memory": 4887.19873046875,
"grad_norm": 0.3813496232032776,
"learning_rate": 0.00014590278011107714,
"loss": 0.8552,
"step": 130
},
{
"epoch": 1.7138461538461538,
"gpu_memory": 4887.19873046875,
"grad_norm": 0.3789571225643158,
"learning_rate": 0.00012552356542302868,
"loss": 0.8731,
"step": 140
},
{
"epoch": 1.8369230769230769,
"gpu_memory": 4887.19873046875,
"grad_norm": 0.3803671896457672,
"learning_rate": 0.00010560030039995649,
"loss": 0.8411,
"step": 150
},
{
"epoch": 1.96,
"gpu_memory": 4887.19873046875,
"grad_norm": 0.4134896397590637,
"learning_rate": 8.650411777297534e-05,
"loss": 0.8157,
"step": 160
},
{
"epoch": 2.0738461538461537,
"gpu_memory": 4887.19873046875,
"grad_norm": 0.39426007866859436,
"learning_rate": 6.859074329306077e-05,
"loss": 0.8023,
"step": 170
},
{
"epoch": 2.1969230769230768,
"gpu_memory": 4887.19873046875,
"grad_norm": 0.3811410367488861,
"learning_rate": 5.2193869233367433e-05,
"loss": 0.7673,
"step": 180
},
{
"epoch": 2.32,
"gpu_memory": 4887.19873046875,
"grad_norm": 0.37296849489212036,
"learning_rate": 3.761893833355035e-05,
"loss": 0.7864,
"step": 190
},
{
"epoch": 2.443076923076923,
"gpu_memory": 4887.19873046875,
"grad_norm": 0.45706120133399963,
"learning_rate": 2.5137453979444762e-05,
"loss": 0.7803,
"step": 200
},
{
"epoch": 2.443076923076923,
"eval_loss": 0.5491589903831482,
"eval_runtime": 0.1999,
"eval_samples_per_second": 5.002,
"eval_steps_per_second": 5.002,
"gpu_memory": 4887.19873046875,
"learning_rate": 2.5137453979444762e-05,
"step": 200
},
{
"epoch": 2.566153846153846,
"gpu_memory": 4887.19873046875,
"grad_norm": 0.4115428924560547,
"learning_rate": 1.4981922608692365e-05,
"loss": 0.7901,
"step": 210
},
{
"epoch": 2.689230769230769,
"gpu_memory": 4887.19873046875,
"grad_norm": 0.404224157333374,
"learning_rate": 7.34152255572697e-06,
"loss": 0.7795,
"step": 220
},
{
"epoch": 2.812307692307692,
"gpu_memory": 4887.19873046875,
"grad_norm": 0.382841020822525,
"learning_rate": 2.3585800173432813e-06,
"loss": 0.7933,
"step": 230
},
{
"epoch": 2.9353846153846153,
"gpu_memory": 4887.19873046875,
"grad_norm": 0.4079365134239197,
"learning_rate": 1.259177849420312e-07,
"loss": 0.828,
"step": 240
}
],
"logging_steps": 10,
"max_steps": 243,
"num_input_tokens_seen": 0,
"num_train_epochs": 3,
"save_steps": 100,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": true
},
"attributes": {}
}
},
"total_flos": 3.675188391365837e+16,
"train_batch_size": 1,
"trial_name": null,
"trial_params": null
}