qwen-pgm-pairs / trainer_state.json
LAMDEC's picture
Upload folder using huggingface_hub
1958ac4 verified
{
"best_global_step": null,
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 1.0,
"eval_steps": 79,
"global_step": 782,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0,
"eval_cosine_accuracy@1": 0.2495,
"eval_cosine_accuracy@10": 0.4625,
"eval_cosine_accuracy@3": 0.344,
"eval_cosine_accuracy@5": 0.3965,
"eval_cosine_map@100": 0.1559092484894483,
"eval_cosine_mrr@10": 0.3108763888888889,
"eval_cosine_ndcg@10": 0.2096322787377784,
"eval_cosine_precision@1": 0.2495,
"eval_cosine_precision@10": 0.1072,
"eval_cosine_precision@3": 0.18666666666666665,
"eval_cosine_precision@5": 0.15159999999999998,
"eval_cosine_recall@1": 0.0643912531049299,
"eval_cosine_recall@10": 0.20289694590062918,
"eval_cosine_recall@3": 0.12590059739815648,
"eval_cosine_recall@5": 0.15806433844030476,
"eval_loss": 3.089846134185791,
"eval_runtime": 271.6862,
"eval_samples_per_second": 7.361,
"eval_steps_per_second": 0.118,
"step": 0
},
{
"epoch": 0.10112,
"grad_norm": 6.488596439361572,
"learning_rate": 4.936708860759494e-05,
"loss": 1.1095,
"step": 79
},
{
"epoch": 0.10112,
"eval_cosine_accuracy@1": 0.559,
"eval_cosine_accuracy@10": 0.8265,
"eval_cosine_accuracy@3": 0.698,
"eval_cosine_accuracy@5": 0.7555,
"eval_cosine_map@100": 0.4500740959017874,
"eval_cosine_mrr@10": 0.6431708333333328,
"eval_cosine_ndcg@10": 0.5303275584101319,
"eval_cosine_precision@1": 0.559,
"eval_cosine_precision@10": 0.24805000000000002,
"eval_cosine_precision@3": 0.44783333333333336,
"eval_cosine_precision@5": 0.36169999999999997,
"eval_cosine_recall@1": 0.17592097493397865,
"eval_cosine_recall@10": 0.5413604201504909,
"eval_cosine_recall@3": 0.3544624337101741,
"eval_cosine_recall@5": 0.43704739404031684,
"eval_loss": 0.4636768698692322,
"eval_runtime": 279.9611,
"eval_samples_per_second": 7.144,
"eval_steps_per_second": 0.114,
"step": 79
},
{
"epoch": 0.20224,
"grad_norm": 4.111133575439453,
"learning_rate": 4.4452347083926033e-05,
"loss": 0.369,
"step": 158
},
{
"epoch": 0.20224,
"eval_cosine_accuracy@1": 0.628,
"eval_cosine_accuracy@10": 0.879,
"eval_cosine_accuracy@3": 0.7655,
"eval_cosine_accuracy@5": 0.817,
"eval_cosine_map@100": 0.5197308098624411,
"eval_cosine_mrr@10": 0.7098900793650795,
"eval_cosine_ndcg@10": 0.6001941938931266,
"eval_cosine_precision@1": 0.628,
"eval_cosine_precision@10": 0.27375,
"eval_cosine_precision@3": 0.49133333333333334,
"eval_cosine_precision@5": 0.3966,
"eval_cosine_recall@1": 0.2136060048255348,
"eval_cosine_recall@10": 0.607901124413225,
"eval_cosine_recall@3": 0.4115900630337399,
"eval_cosine_recall@5": 0.4973507574392067,
"eval_loss": 0.29928770661354065,
"eval_runtime": 278.2977,
"eval_samples_per_second": 7.187,
"eval_steps_per_second": 0.115,
"step": 158
},
{
"epoch": 0.30336,
"grad_norm": 2.7525439262390137,
"learning_rate": 3.883357041251778e-05,
"loss": 0.2779,
"step": 237
},
{
"epoch": 0.30336,
"eval_cosine_accuracy@1": 0.637,
"eval_cosine_accuracy@10": 0.8895,
"eval_cosine_accuracy@3": 0.779,
"eval_cosine_accuracy@5": 0.829,
"eval_cosine_map@100": 0.5359616957068315,
"eval_cosine_mrr@10": 0.7192515873015861,
"eval_cosine_ndcg@10": 0.6154242844350049,
"eval_cosine_precision@1": 0.637,
"eval_cosine_precision@10": 0.28485000000000005,
"eval_cosine_precision@3": 0.5083333333333333,
"eval_cosine_precision@5": 0.4151,
"eval_cosine_recall@1": 0.21074726147683806,
"eval_cosine_recall@10": 0.6277595587315691,
"eval_cosine_recall@3": 0.4188129487357172,
"eval_cosine_recall@5": 0.5121328631227711,
"eval_loss": 0.25021883845329285,
"eval_runtime": 277.4835,
"eval_samples_per_second": 7.208,
"eval_steps_per_second": 0.115,
"step": 237
},
{
"epoch": 0.40448,
"grad_norm": 2.8394055366516113,
"learning_rate": 3.321479374110953e-05,
"loss": 0.224,
"step": 316
},
{
"epoch": 0.40448,
"eval_cosine_accuracy@1": 0.668,
"eval_cosine_accuracy@10": 0.914,
"eval_cosine_accuracy@3": 0.8115,
"eval_cosine_accuracy@5": 0.865,
"eval_cosine_map@100": 0.5721924042351211,
"eval_cosine_mrr@10": 0.7500142857142853,
"eval_cosine_ndcg@10": 0.6495973268729938,
"eval_cosine_precision@1": 0.668,
"eval_cosine_precision@10": 0.2962,
"eval_cosine_precision@3": 0.5318333333333333,
"eval_cosine_precision@5": 0.4289000000000001,
"eval_cosine_recall@1": 0.23326866628390464,
"eval_cosine_recall@10": 0.6563189809120494,
"eval_cosine_recall@3": 0.45311345100760037,
"eval_cosine_recall@5": 0.5465839152881188,
"eval_loss": 0.21039120852947235,
"eval_runtime": 277.7261,
"eval_samples_per_second": 7.201,
"eval_steps_per_second": 0.115,
"step": 316
},
{
"epoch": 0.5056,
"grad_norm": 3.2892227172851562,
"learning_rate": 2.759601706970128e-05,
"loss": 0.1912,
"step": 395
},
{
"epoch": 0.5056,
"eval_cosine_accuracy@1": 0.6665,
"eval_cosine_accuracy@10": 0.9255,
"eval_cosine_accuracy@3": 0.8095,
"eval_cosine_accuracy@5": 0.8635,
"eval_cosine_map@100": 0.5806000470832352,
"eval_cosine_mrr@10": 0.7500138888888884,
"eval_cosine_ndcg@10": 0.6572566525532144,
"eval_cosine_precision@1": 0.6665,
"eval_cosine_precision@10": 0.30145000000000005,
"eval_cosine_precision@3": 0.534,
"eval_cosine_precision@5": 0.43610000000000004,
"eval_cosine_recall@1": 0.23175847262295138,
"eval_cosine_recall@10": 0.6703179486558828,
"eval_cosine_recall@3": 0.44944904156805865,
"eval_cosine_recall@5": 0.554136889607487,
"eval_loss": 0.16795697808265686,
"eval_runtime": 277.2687,
"eval_samples_per_second": 7.213,
"eval_steps_per_second": 0.115,
"step": 395
},
{
"epoch": 0.60672,
"grad_norm": 2.89556622505188,
"learning_rate": 2.197724039829303e-05,
"loss": 0.1624,
"step": 474
},
{
"epoch": 0.60672,
"eval_cosine_accuracy@1": 0.6975,
"eval_cosine_accuracy@10": 0.9355,
"eval_cosine_accuracy@3": 0.8375,
"eval_cosine_accuracy@5": 0.887,
"eval_cosine_map@100": 0.6093090009951994,
"eval_cosine_mrr@10": 0.7765077380952375,
"eval_cosine_ndcg@10": 0.6851203967143871,
"eval_cosine_precision@1": 0.6975,
"eval_cosine_precision@10": 0.3129,
"eval_cosine_precision@3": 0.5613333333333332,
"eval_cosine_precision@5": 0.4528000000000001,
"eval_cosine_recall@1": 0.2445300283996011,
"eval_cosine_recall@10": 0.6928329252080592,
"eval_cosine_recall@3": 0.4812278629980086,
"eval_cosine_recall@5": 0.5788771761374806,
"eval_loss": 0.15388070046901703,
"eval_runtime": 276.9338,
"eval_samples_per_second": 7.222,
"eval_steps_per_second": 0.116,
"step": 474
},
{
"epoch": 0.70784,
"grad_norm": 1.8849042654037476,
"learning_rate": 1.6358463726884778e-05,
"loss": 0.144,
"step": 553
},
{
"epoch": 0.70784,
"eval_cosine_accuracy@1": 0.7125,
"eval_cosine_accuracy@10": 0.942,
"eval_cosine_accuracy@3": 0.8455,
"eval_cosine_accuracy@5": 0.8935,
"eval_cosine_map@100": 0.6286298958450689,
"eval_cosine_mrr@10": 0.7904275793650791,
"eval_cosine_ndcg@10": 0.7012467907285034,
"eval_cosine_precision@1": 0.7125,
"eval_cosine_precision@10": 0.31815,
"eval_cosine_precision@3": 0.5643333333333334,
"eval_cosine_precision@5": 0.46270000000000006,
"eval_cosine_recall@1": 0.2580733056498445,
"eval_cosine_recall@10": 0.7046707449914186,
"eval_cosine_recall@3": 0.4866446364412588,
"eval_cosine_recall@5": 0.589748134227783,
"eval_loss": 0.14380718767642975,
"eval_runtime": 276.988,
"eval_samples_per_second": 7.221,
"eval_steps_per_second": 0.116,
"step": 553
},
{
"epoch": 0.80896,
"grad_norm": 1.348805546760559,
"learning_rate": 1.073968705547653e-05,
"loss": 0.1189,
"step": 632
},
{
"epoch": 0.80896,
"eval_cosine_accuracy@1": 0.695,
"eval_cosine_accuracy@10": 0.947,
"eval_cosine_accuracy@3": 0.843,
"eval_cosine_accuracy@5": 0.896,
"eval_cosine_map@100": 0.6235293375316939,
"eval_cosine_mrr@10": 0.779907539682539,
"eval_cosine_ndcg@10": 0.69591191846525,
"eval_cosine_precision@1": 0.695,
"eval_cosine_precision@10": 0.31725,
"eval_cosine_precision@3": 0.5648333333333333,
"eval_cosine_precision@5": 0.463,
"eval_cosine_recall@1": 0.24691620939983008,
"eval_cosine_recall@10": 0.706279289199774,
"eval_cosine_recall@3": 0.48786519068806705,
"eval_cosine_recall@5": 0.5948458444043049,
"eval_loss": 0.12694863975048065,
"eval_runtime": 276.686,
"eval_samples_per_second": 7.228,
"eval_steps_per_second": 0.116,
"step": 632
},
{
"epoch": 0.91008,
"grad_norm": 1.5586862564086914,
"learning_rate": 5.120910384068279e-06,
"loss": 0.1048,
"step": 711
},
{
"epoch": 0.91008,
"eval_cosine_accuracy@1": 0.7235,
"eval_cosine_accuracy@10": 0.9485,
"eval_cosine_accuracy@3": 0.86,
"eval_cosine_accuracy@5": 0.901,
"eval_cosine_map@100": 0.6422912711695729,
"eval_cosine_mrr@10": 0.8008688492063484,
"eval_cosine_ndcg@10": 0.712959315512067,
"eval_cosine_precision@1": 0.7235,
"eval_cosine_precision@10": 0.32180000000000003,
"eval_cosine_precision@3": 0.5791666666666666,
"eval_cosine_precision@5": 0.4691,
"eval_cosine_recall@1": 0.2616417105499789,
"eval_cosine_recall@10": 0.715349774186521,
"eval_cosine_recall@3": 0.505012640408982,
"eval_cosine_recall@5": 0.6035736332324141,
"eval_loss": 0.11470390856266022,
"eval_runtime": 276.7545,
"eval_samples_per_second": 7.227,
"eval_steps_per_second": 0.116,
"step": 711
}
],
"logging_steps": 79,
"max_steps": 782,
"num_input_tokens_seen": 0,
"num_train_epochs": 1,
"save_steps": 79,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": true
},
"attributes": {}
}
},
"total_flos": 0.0,
"train_batch_size": 64,
"trial_name": null,
"trial_params": null
}