HYdsl's picture
Upload 7 files
62a4941 verified
{
"best_metric": 0.06389161199331284,
"best_model_checkpoint": "finqa_models/training_naver-trecdl22-crossencoder-debertav3_2024-11-17_17-26-51/checkpoint-14424",
"epoch": 3.0,
"eval_steps": 500,
"global_step": 14424,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.10399334442595674,
"grad_norm": 0.12019415944814682,
"learning_rate": 1.9310870770937327e-07,
"loss": 0.1965,
"step": 500
},
{
"epoch": 0.2079866888519135,
"grad_norm": 6.474766254425049,
"learning_rate": 1.8618968386023294e-07,
"loss": 0.1351,
"step": 1000
},
{
"epoch": 0.3119800332778702,
"grad_norm": 23.737518310546875,
"learning_rate": 1.7925679423183582e-07,
"loss": 0.1353,
"step": 1500
},
{
"epoch": 0.415973377703827,
"grad_norm": 0.015338932164013386,
"learning_rate": 1.723239046034387e-07,
"loss": 0.1414,
"step": 2000
},
{
"epoch": 0.5199667221297837,
"grad_norm": 0.05833850055932999,
"learning_rate": 1.653910149750416e-07,
"loss": 0.1432,
"step": 2500
},
{
"epoch": 0.6239600665557404,
"grad_norm": 37.305580139160156,
"learning_rate": 1.5845812534664445e-07,
"loss": 0.1155,
"step": 3000
},
{
"epoch": 0.7279534109816972,
"grad_norm": 0.431114137172699,
"learning_rate": 1.5153910149750415e-07,
"loss": 0.1299,
"step": 3500
},
{
"epoch": 0.831946755407654,
"grad_norm": 0.022228846326470375,
"learning_rate": 1.4460621186910704e-07,
"loss": 0.116,
"step": 4000
},
{
"epoch": 0.9359400998336106,
"grad_norm": 9.204265594482422,
"learning_rate": 1.3767332224070992e-07,
"loss": 0.0917,
"step": 4500
},
{
"epoch": 1.0,
"eval_loss": 0.06588456779718399,
"eval_runtime": 6590.3071,
"eval_samples_per_second": 84.805,
"eval_steps_per_second": 5.3,
"step": 4808
},
{
"epoch": 1.0399334442595674,
"grad_norm": 44.67750549316406,
"learning_rate": 1.307404326123128e-07,
"loss": 0.1156,
"step": 5000
},
{
"epoch": 1.1439267886855242,
"grad_norm": 0.012149451300501823,
"learning_rate": 1.2382140876317248e-07,
"loss": 0.0982,
"step": 5500
},
{
"epoch": 1.2479201331114809,
"grad_norm": 1.34881591796875,
"learning_rate": 1.1688851913477538e-07,
"loss": 0.1059,
"step": 6000
},
{
"epoch": 1.3519134775374377,
"grad_norm": 0.14894534647464752,
"learning_rate": 1.0996949528563504e-07,
"loss": 0.1085,
"step": 6500
},
{
"epoch": 1.4559068219633944,
"grad_norm": 0.004204815719276667,
"learning_rate": 1.0303660565723793e-07,
"loss": 0.0949,
"step": 7000
},
{
"epoch": 1.559900166389351,
"grad_norm": 0.024665243923664093,
"learning_rate": 9.610371602884082e-08,
"loss": 0.1246,
"step": 7500
},
{
"epoch": 1.6638935108153077,
"grad_norm": 71.5008316040039,
"learning_rate": 8.91708264004437e-08,
"loss": 0.101,
"step": 8000
},
{
"epoch": 1.7678868552412645,
"grad_norm": 88.50396728515625,
"learning_rate": 8.22379367720466e-08,
"loss": 0.1177,
"step": 8500
},
{
"epoch": 1.8718801996672214,
"grad_norm": 0.28365257382392883,
"learning_rate": 7.530504714364947e-08,
"loss": 0.0987,
"step": 9000
},
{
"epoch": 1.975873544093178,
"grad_norm": 25.998218536376953,
"learning_rate": 6.838602329450914e-08,
"loss": 0.1001,
"step": 9500
},
{
"epoch": 2.0,
"eval_loss": 0.06637805700302124,
"eval_runtime": 6587.2735,
"eval_samples_per_second": 84.844,
"eval_steps_per_second": 5.303,
"step": 9616
},
{
"epoch": 2.0798668885191347,
"grad_norm": 59.35243225097656,
"learning_rate": 6.145313366611204e-08,
"loss": 0.0962,
"step": 10000
},
{
"epoch": 2.1838602329450914,
"grad_norm": 0.29308849573135376,
"learning_rate": 5.4520244037714916e-08,
"loss": 0.082,
"step": 10500
},
{
"epoch": 2.2878535773710484,
"grad_norm": 1.2921684980392456,
"learning_rate": 4.75873544093178e-08,
"loss": 0.102,
"step": 11000
},
{
"epoch": 2.391846921797005,
"grad_norm": 0.1976485699415207,
"learning_rate": 4.0654464780920686e-08,
"loss": 0.0869,
"step": 11500
},
{
"epoch": 2.4958402662229617,
"grad_norm": 0.0572974793612957,
"learning_rate": 3.373544093178036e-08,
"loss": 0.0971,
"step": 12000
},
{
"epoch": 2.5998336106489184,
"grad_norm": 0.030522992834448814,
"learning_rate": 2.6802551303383248e-08,
"loss": 0.1078,
"step": 12500
},
{
"epoch": 2.7038269550748755,
"grad_norm": 9.981757164001465,
"learning_rate": 1.9869661674986133e-08,
"loss": 0.091,
"step": 13000
},
{
"epoch": 2.8078202995008317,
"grad_norm": 0.05898750573396683,
"learning_rate": 1.2936772046589018e-08,
"loss": 0.0928,
"step": 13500
},
{
"epoch": 2.9118136439267888,
"grad_norm": 0.04275263100862503,
"learning_rate": 6.003882418191902e-09,
"loss": 0.0764,
"step": 14000
},
{
"epoch": 3.0,
"eval_loss": 0.06389161199331284,
"eval_runtime": 6582.4724,
"eval_samples_per_second": 84.906,
"eval_steps_per_second": 5.307,
"step": 14424
}
],
"logging_steps": 500,
"max_steps": 14424,
"num_input_tokens_seen": 0,
"num_train_epochs": 3,
"save_steps": 500,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": true
},
"attributes": {}
}
},
"total_flos": 1.0751821588988006e+17,
"train_batch_size": 8,
"trial_name": null,
"trial_params": null
}