wiki-retriver / trainer_state.json
Kapturz0ny's picture
Upload fine-tuned retriever model
55ec7e6 verified
{
"best_global_step": 450,
"best_metric": 0.8513853904282116,
"best_model_checkpoint": "../models/retriever_trainer_bge_base/checkpoint-450",
"epoch": 1.9148936170212765,
"eval_steps": 50,
"global_step": 450,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.0425531914893617,
"grad_norm": 0.8533166646957397,
"learning_rate": 1.5254237288135596e-06,
"loss": 0.2054,
"step": 10
},
{
"epoch": 0.0851063829787234,
"grad_norm": 0.7839147448539734,
"learning_rate": 3.2203389830508473e-06,
"loss": 0.2017,
"step": 20
},
{
"epoch": 0.1276595744680851,
"grad_norm": 0.9631144404411316,
"learning_rate": 4.915254237288136e-06,
"loss": 0.197,
"step": 30
},
{
"epoch": 0.1702127659574468,
"grad_norm": 1.077710747718811,
"learning_rate": 6.610169491525424e-06,
"loss": 0.1913,
"step": 40
},
{
"epoch": 0.2127659574468085,
"grad_norm": 0.8926519155502319,
"learning_rate": 8.305084745762712e-06,
"loss": 0.1827,
"step": 50
},
{
"epoch": 0.2127659574468085,
"eval_accuracy_at_0.5_thresh": 0.7808510638297872,
"eval_f1_at_0.5_thresh": 0.7444168734491315,
"eval_loss": 0.18353447318077087,
"eval_precision_at_0.5_thresh": 0.7389162561576355,
"eval_recall_at_0.5_thresh": 0.75,
"eval_runtime": 1.2282,
"eval_samples_per_second": 382.661,
"eval_steps_per_second": 12.213,
"step": 50
},
{
"epoch": 0.2553191489361702,
"grad_norm": 1.0001214742660522,
"learning_rate": 1e-05,
"loss": 0.1897,
"step": 60
},
{
"epoch": 0.2978723404255319,
"grad_norm": 0.8536041378974915,
"learning_rate": 1.169491525423729e-05,
"loss": 0.1811,
"step": 70
},
{
"epoch": 0.3404255319148936,
"grad_norm": 0.9402114748954773,
"learning_rate": 1.3389830508474577e-05,
"loss": 0.1777,
"step": 80
},
{
"epoch": 0.3829787234042553,
"grad_norm": 0.9630193710327148,
"learning_rate": 1.5084745762711865e-05,
"loss": 0.1697,
"step": 90
},
{
"epoch": 0.425531914893617,
"grad_norm": 1.079608678817749,
"learning_rate": 1.6779661016949154e-05,
"loss": 0.166,
"step": 100
},
{
"epoch": 0.425531914893617,
"eval_accuracy_at_0.5_thresh": 0.8063829787234043,
"eval_f1_at_0.5_thresh": 0.723404255319149,
"eval_loss": 0.15719293057918549,
"eval_precision_at_0.5_thresh": 0.9224806201550387,
"eval_recall_at_0.5_thresh": 0.595,
"eval_runtime": 1.1886,
"eval_samples_per_second": 395.432,
"eval_steps_per_second": 12.62,
"step": 100
},
{
"epoch": 0.46808510638297873,
"grad_norm": 1.0034838914871216,
"learning_rate": 1.8474576271186443e-05,
"loss": 0.1685,
"step": 110
},
{
"epoch": 0.5106382978723404,
"grad_norm": 0.9138811230659485,
"learning_rate": 1.9981078524124883e-05,
"loss": 0.1708,
"step": 120
},
{
"epoch": 0.5531914893617021,
"grad_norm": 0.9457395076751709,
"learning_rate": 1.97918637653737e-05,
"loss": 0.1524,
"step": 130
},
{
"epoch": 0.5957446808510638,
"grad_norm": 1.1439883708953857,
"learning_rate": 1.960264900662252e-05,
"loss": 0.1642,
"step": 140
},
{
"epoch": 0.6382978723404256,
"grad_norm": 0.900199830532074,
"learning_rate": 1.9413434247871333e-05,
"loss": 0.1612,
"step": 150
},
{
"epoch": 0.6382978723404256,
"eval_accuracy_at_0.5_thresh": 0.8553191489361702,
"eval_f1_at_0.5_thresh": 0.8308457711442786,
"eval_loss": 0.14183281362056732,
"eval_precision_at_0.5_thresh": 0.8267326732673267,
"eval_recall_at_0.5_thresh": 0.835,
"eval_runtime": 1.1664,
"eval_samples_per_second": 402.933,
"eval_steps_per_second": 12.86,
"step": 150
},
{
"epoch": 0.6808510638297872,
"grad_norm": 1.2067737579345703,
"learning_rate": 1.922421948912015e-05,
"loss": 0.1553,
"step": 160
},
{
"epoch": 0.723404255319149,
"grad_norm": 1.0066120624542236,
"learning_rate": 1.9035004730368973e-05,
"loss": 0.1495,
"step": 170
},
{
"epoch": 0.7659574468085106,
"grad_norm": 1.0830472707748413,
"learning_rate": 1.8845789971617787e-05,
"loss": 0.1364,
"step": 180
},
{
"epoch": 0.8085106382978723,
"grad_norm": 0.9800769686698914,
"learning_rate": 1.8656575212866605e-05,
"loss": 0.1295,
"step": 190
},
{
"epoch": 0.851063829787234,
"grad_norm": 0.9936037659645081,
"learning_rate": 1.8467360454115423e-05,
"loss": 0.1545,
"step": 200
},
{
"epoch": 0.851063829787234,
"eval_accuracy_at_0.5_thresh": 0.8382978723404255,
"eval_f1_at_0.5_thresh": 0.8020833333333334,
"eval_loss": 0.13116958737373352,
"eval_precision_at_0.5_thresh": 0.8369565217391305,
"eval_recall_at_0.5_thresh": 0.77,
"eval_runtime": 1.1719,
"eval_samples_per_second": 401.047,
"eval_steps_per_second": 12.799,
"step": 200
},
{
"epoch": 0.8936170212765957,
"grad_norm": 0.9179720878601074,
"learning_rate": 1.827814569536424e-05,
"loss": 0.1328,
"step": 210
},
{
"epoch": 0.9361702127659575,
"grad_norm": 1.5345299243927002,
"learning_rate": 1.808893093661306e-05,
"loss": 0.1355,
"step": 220
},
{
"epoch": 0.9787234042553191,
"grad_norm": 1.037996530532837,
"learning_rate": 1.7899716177861873e-05,
"loss": 0.1289,
"step": 230
},
{
"epoch": 1.0212765957446808,
"grad_norm": 1.2900911569595337,
"learning_rate": 1.771050141911069e-05,
"loss": 0.1245,
"step": 240
},
{
"epoch": 1.0638297872340425,
"grad_norm": 0.7172983884811401,
"learning_rate": 1.752128666035951e-05,
"loss": 0.1067,
"step": 250
},
{
"epoch": 1.0638297872340425,
"eval_accuracy_at_0.5_thresh": 0.8425531914893617,
"eval_f1_at_0.5_thresh": 0.8131313131313131,
"eval_loss": 0.12659965455532074,
"eval_precision_at_0.5_thresh": 0.8214285714285714,
"eval_recall_at_0.5_thresh": 0.805,
"eval_runtime": 1.1658,
"eval_samples_per_second": 403.141,
"eval_steps_per_second": 12.866,
"step": 250
},
{
"epoch": 1.1063829787234043,
"grad_norm": 1.0125339031219482,
"learning_rate": 1.7332071901608327e-05,
"loss": 0.1177,
"step": 260
},
{
"epoch": 1.148936170212766,
"grad_norm": 1.0160564184188843,
"learning_rate": 1.7142857142857142e-05,
"loss": 0.1034,
"step": 270
},
{
"epoch": 1.1914893617021276,
"grad_norm": 1.3702187538146973,
"learning_rate": 1.6953642384105963e-05,
"loss": 0.1117,
"step": 280
},
{
"epoch": 1.2340425531914894,
"grad_norm": 1.317497730255127,
"learning_rate": 1.676442762535478e-05,
"loss": 0.1235,
"step": 290
},
{
"epoch": 1.2765957446808511,
"grad_norm": 0.8722613453865051,
"learning_rate": 1.6575212866603596e-05,
"loss": 0.0933,
"step": 300
},
{
"epoch": 1.2765957446808511,
"eval_accuracy_at_0.5_thresh": 0.8489361702127659,
"eval_f1_at_0.5_thresh": 0.8202531645569621,
"eval_loss": 0.1204555556178093,
"eval_precision_at_0.5_thresh": 0.8307692307692308,
"eval_recall_at_0.5_thresh": 0.81,
"eval_runtime": 1.1714,
"eval_samples_per_second": 401.236,
"eval_steps_per_second": 12.805,
"step": 300
},
{
"epoch": 1.3191489361702127,
"grad_norm": 1.0289169549942017,
"learning_rate": 1.6385998107852414e-05,
"loss": 0.1039,
"step": 310
},
{
"epoch": 1.3617021276595744,
"grad_norm": 0.9575484991073608,
"learning_rate": 1.619678334910123e-05,
"loss": 0.1161,
"step": 320
},
{
"epoch": 1.4042553191489362,
"grad_norm": 1.3465416431427002,
"learning_rate": 1.600756859035005e-05,
"loss": 0.0912,
"step": 330
},
{
"epoch": 1.4468085106382977,
"grad_norm": 0.9446049332618713,
"learning_rate": 1.5818353831598864e-05,
"loss": 0.0868,
"step": 340
},
{
"epoch": 1.4893617021276595,
"grad_norm": 1.3670283555984497,
"learning_rate": 1.5629139072847682e-05,
"loss": 0.0984,
"step": 350
},
{
"epoch": 1.4893617021276595,
"eval_accuracy_at_0.5_thresh": 0.8617021276595744,
"eval_f1_at_0.5_thresh": 0.8387096774193549,
"eval_loss": 0.12064016610383987,
"eval_precision_at_0.5_thresh": 0.8325123152709359,
"eval_recall_at_0.5_thresh": 0.845,
"eval_runtime": 1.1704,
"eval_samples_per_second": 401.579,
"eval_steps_per_second": 12.816,
"step": 350
},
{
"epoch": 1.5319148936170213,
"grad_norm": 1.2213770151138306,
"learning_rate": 1.54399243140965e-05,
"loss": 0.109,
"step": 360
},
{
"epoch": 1.574468085106383,
"grad_norm": 0.8301506638526917,
"learning_rate": 1.5250709555345318e-05,
"loss": 0.1014,
"step": 370
},
{
"epoch": 1.6170212765957448,
"grad_norm": 1.2703402042388916,
"learning_rate": 1.5061494796594136e-05,
"loss": 0.1054,
"step": 380
},
{
"epoch": 1.6595744680851063,
"grad_norm": 1.1871310472488403,
"learning_rate": 1.4872280037842952e-05,
"loss": 0.1043,
"step": 390
},
{
"epoch": 1.702127659574468,
"grad_norm": 0.7756622433662415,
"learning_rate": 1.468306527909177e-05,
"loss": 0.1052,
"step": 400
},
{
"epoch": 1.702127659574468,
"eval_accuracy_at_0.5_thresh": 0.8659574468085106,
"eval_f1_at_0.5_thresh": 0.8372093023255814,
"eval_loss": 0.1149349957704544,
"eval_precision_at_0.5_thresh": 0.8663101604278075,
"eval_recall_at_0.5_thresh": 0.81,
"eval_runtime": 1.1721,
"eval_samples_per_second": 400.999,
"eval_steps_per_second": 12.798,
"step": 400
},
{
"epoch": 1.7446808510638299,
"grad_norm": 1.2976441383361816,
"learning_rate": 1.4493850520340588e-05,
"loss": 0.1295,
"step": 410
},
{
"epoch": 1.7872340425531914,
"grad_norm": 1.139825463294983,
"learning_rate": 1.4304635761589404e-05,
"loss": 0.1032,
"step": 420
},
{
"epoch": 1.8297872340425532,
"grad_norm": 1.3335371017456055,
"learning_rate": 1.4115421002838224e-05,
"loss": 0.1001,
"step": 430
},
{
"epoch": 1.872340425531915,
"grad_norm": 1.33684241771698,
"learning_rate": 1.3926206244087039e-05,
"loss": 0.1052,
"step": 440
},
{
"epoch": 1.9148936170212765,
"grad_norm": 1.1832659244537354,
"learning_rate": 1.3736991485335858e-05,
"loss": 0.1064,
"step": 450
},
{
"epoch": 1.9148936170212765,
"eval_accuracy_at_0.5_thresh": 0.874468085106383,
"eval_f1_at_0.5_thresh": 0.8513853904282116,
"eval_loss": 0.11494793742895126,
"eval_precision_at_0.5_thresh": 0.8578680203045685,
"eval_recall_at_0.5_thresh": 0.845,
"eval_runtime": 1.1713,
"eval_samples_per_second": 401.267,
"eval_steps_per_second": 12.806,
"step": 450
}
],
"logging_steps": 10,
"max_steps": 1175,
"num_input_tokens_seen": 0,
"num_train_epochs": 5,
"save_steps": 50,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": false
},
"attributes": {}
}
},
"total_flos": 0.0,
"train_batch_size": 16,
"trial_name": null,
"trial_params": null
}