TCS_Pair_base / last-checkpoint /trainer_state.json
mgh6's picture
Training in progress, step 900, checkpoint
625977f verified
{
"best_metric": 0.10389433056116104,
"best_model_checkpoint": "mgh6/TCS_Pair_base/checkpoint-650",
"epoch": 0.21178962230850687,
"eval_steps": 50,
"global_step": 900,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.011766090128250382,
"grad_norm": 1.275758147239685,
"learning_rate": 9.988232525300071e-05,
"loss": 0.0357,
"step": 50
},
{
"epoch": 0.011766090128250382,
"eval_loss": 0.18010935187339783,
"eval_runtime": 13.7184,
"eval_samples_per_second": 2045.21,
"eval_steps_per_second": 8.018,
"step": 50
},
{
"epoch": 0.023532180256500765,
"grad_norm": 0.9043275713920593,
"learning_rate": 9.976465050600142e-05,
"loss": 0.0653,
"step": 100
},
{
"epoch": 0.023532180256500765,
"eval_loss": 0.14653433859348297,
"eval_runtime": 13.7483,
"eval_samples_per_second": 2040.76,
"eval_steps_per_second": 8.001,
"step": 100
},
{
"epoch": 0.03529827038475115,
"grad_norm": 0.7122117877006531,
"learning_rate": 9.964697575900212e-05,
"loss": 0.0808,
"step": 150
},
{
"epoch": 0.03529827038475115,
"eval_loss": 0.14078955352306366,
"eval_runtime": 13.804,
"eval_samples_per_second": 2032.521,
"eval_steps_per_second": 7.969,
"step": 150
},
{
"epoch": 0.04706436051300153,
"grad_norm": 0.6643016338348389,
"learning_rate": 9.952930101200283e-05,
"loss": 0.0795,
"step": 200
},
{
"epoch": 0.04706436051300153,
"eval_loss": 0.14704547822475433,
"eval_runtime": 13.7761,
"eval_samples_per_second": 2036.65,
"eval_steps_per_second": 7.985,
"step": 200
},
{
"epoch": 0.05883045064125191,
"grad_norm": 0.7624136209487915,
"learning_rate": 9.941162626500353e-05,
"loss": 0.0713,
"step": 250
},
{
"epoch": 0.05883045064125191,
"eval_loss": 0.1481221318244934,
"eval_runtime": 13.7735,
"eval_samples_per_second": 2037.023,
"eval_steps_per_second": 7.986,
"step": 250
},
{
"epoch": 0.0705965407695023,
"grad_norm": 0.7390868663787842,
"learning_rate": 9.929395151800424e-05,
"loss": 0.0781,
"step": 300
},
{
"epoch": 0.0705965407695023,
"eval_loss": 0.12939240038394928,
"eval_runtime": 13.7779,
"eval_samples_per_second": 2036.381,
"eval_steps_per_second": 7.984,
"step": 300
},
{
"epoch": 0.08236263089775267,
"grad_norm": 0.6493538618087769,
"learning_rate": 9.917627677100495e-05,
"loss": 0.0762,
"step": 350
},
{
"epoch": 0.08236263089775267,
"eval_loss": 0.11381174623966217,
"eval_runtime": 13.7913,
"eval_samples_per_second": 2034.394,
"eval_steps_per_second": 7.976,
"step": 350
},
{
"epoch": 0.09412872102600306,
"grad_norm": 0.483694463968277,
"learning_rate": 9.905860202400565e-05,
"loss": 0.1041,
"step": 400
},
{
"epoch": 0.09412872102600306,
"eval_loss": 0.1289786696434021,
"eval_runtime": 13.7941,
"eval_samples_per_second": 2033.979,
"eval_steps_per_second": 7.974,
"step": 400
},
{
"epoch": 0.10589481115425343,
"grad_norm": 0.744751513004303,
"learning_rate": 9.894092727700636e-05,
"loss": 0.0957,
"step": 450
},
{
"epoch": 0.10589481115425343,
"eval_loss": 0.1261110156774521,
"eval_runtime": 13.7703,
"eval_samples_per_second": 2037.504,
"eval_steps_per_second": 7.988,
"step": 450
},
{
"epoch": 0.11766090128250382,
"grad_norm": 0.8092584609985352,
"learning_rate": 9.882325253000706e-05,
"loss": 0.0954,
"step": 500
},
{
"epoch": 0.11766090128250382,
"eval_loss": 0.11313354969024658,
"eval_runtime": 13.7468,
"eval_samples_per_second": 2040.988,
"eval_steps_per_second": 8.002,
"step": 500
},
{
"epoch": 0.1294269914107542,
"grad_norm": 0.5611206889152527,
"learning_rate": 9.870557778300777e-05,
"loss": 0.0912,
"step": 550
},
{
"epoch": 0.1294269914107542,
"eval_loss": 0.11919673532247543,
"eval_runtime": 13.802,
"eval_samples_per_second": 2032.825,
"eval_steps_per_second": 7.97,
"step": 550
},
{
"epoch": 0.1411930815390046,
"grad_norm": 0.7006365656852722,
"learning_rate": 9.858790303600848e-05,
"loss": 0.0938,
"step": 600
},
{
"epoch": 0.1411930815390046,
"eval_loss": 0.10846291482448578,
"eval_runtime": 13.7875,
"eval_samples_per_second": 2034.962,
"eval_steps_per_second": 7.978,
"step": 600
},
{
"epoch": 0.15295917166725498,
"grad_norm": 0.5973498225212097,
"learning_rate": 9.847022828900918e-05,
"loss": 0.0946,
"step": 650
},
{
"epoch": 0.15295917166725498,
"eval_loss": 0.10389433056116104,
"eval_runtime": 13.7964,
"eval_samples_per_second": 2033.643,
"eval_steps_per_second": 7.973,
"step": 650
},
{
"epoch": 0.16472526179550534,
"grad_norm": 0.6352247595787048,
"learning_rate": 9.835255354200989e-05,
"loss": 0.0893,
"step": 700
},
{
"epoch": 0.16472526179550534,
"eval_loss": 0.10826652497053146,
"eval_runtime": 13.8022,
"eval_samples_per_second": 2032.788,
"eval_steps_per_second": 7.97,
"step": 700
},
{
"epoch": 0.17649135192375573,
"grad_norm": 0.6947146654129028,
"learning_rate": 9.823487879501059e-05,
"loss": 0.0867,
"step": 750
},
{
"epoch": 0.17649135192375573,
"eval_loss": 0.10910729318857193,
"eval_runtime": 13.7759,
"eval_samples_per_second": 2036.679,
"eval_steps_per_second": 7.985,
"step": 750
},
{
"epoch": 0.18825744205200612,
"grad_norm": 0.5221682190895081,
"learning_rate": 9.81172040480113e-05,
"loss": 0.0915,
"step": 800
},
{
"epoch": 0.18825744205200612,
"eval_loss": 0.11108585447072983,
"eval_runtime": 13.7759,
"eval_samples_per_second": 2036.68,
"eval_steps_per_second": 7.985,
"step": 800
},
{
"epoch": 0.2000235321802565,
"grad_norm": 0.5295658707618713,
"learning_rate": 9.7999529301012e-05,
"loss": 0.0861,
"step": 850
},
{
"epoch": 0.2000235321802565,
"eval_loss": 0.10687608271837234,
"eval_runtime": 13.7743,
"eval_samples_per_second": 2036.916,
"eval_steps_per_second": 7.986,
"step": 850
},
{
"epoch": 0.21178962230850687,
"grad_norm": 0.712462306022644,
"learning_rate": 9.788185455401271e-05,
"loss": 0.0832,
"step": 900
},
{
"epoch": 0.21178962230850687,
"eval_loss": 0.10970053821802139,
"eval_runtime": 13.7864,
"eval_samples_per_second": 2035.119,
"eval_steps_per_second": 7.979,
"step": 900
}
],
"logging_steps": 50,
"max_steps": 42490,
"num_input_tokens_seen": 0,
"num_train_epochs": 10,
"save_steps": 50,
"stateful_callbacks": {
"EarlyStoppingCallback": {
"args": {
"early_stopping_patience": 5,
"early_stopping_threshold": 0.0
},
"attributes": {
"early_stopping_patience_counter": 5
}
},
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": true
},
"attributes": {}
}
},
"total_flos": 0.0,
"train_batch_size": 64,
"trial_name": null,
"trial_params": null
}