{ "best_global_step": 38, "best_metric": 0.7260517487265687, "best_model_checkpoint": "MNLP_M3_document_encoder_sciqa/checkpoint-38", "epoch": 2.0, "eval_steps": 500, "global_step": 38, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.5423728813559322, "grad_norm": 164.72093200683594, "learning_rate": 1.9987954562051724e-05, "loss": 22.4049, "step": 10 }, { "epoch": 1.0, "eval_dim_128_cosine_accuracy@1": 0.5452812202097236, "eval_dim_128_cosine_accuracy@10": 0.8760724499523356, "eval_dim_128_cosine_accuracy@3": 0.7416587225929456, "eval_dim_128_cosine_accuracy@5": 0.8074356530028599, "eval_dim_128_cosine_map@100": 0.6605612754102786, "eval_dim_128_cosine_mrr@10": 0.6559349796480402, "eval_dim_128_cosine_ndcg@10": 0.7092688022688834, "eval_dim_128_cosine_precision@1": 0.5452812202097236, "eval_dim_128_cosine_precision@10": 0.08760724499523356, "eval_dim_128_cosine_precision@3": 0.24721957419764853, "eval_dim_128_cosine_precision@5": 0.161487130600572, "eval_dim_128_cosine_recall@1": 0.5452812202097236, "eval_dim_128_cosine_recall@10": 0.8760724499523356, "eval_dim_128_cosine_recall@3": 0.7416587225929456, "eval_dim_128_cosine_recall@5": 0.8074356530028599, "eval_dim_192_cosine_accuracy@1": 0.5624404194470924, "eval_dim_192_cosine_accuracy@10": 0.8932316491897044, "eval_dim_192_cosine_accuracy@3": 0.7597712106768351, "eval_dim_192_cosine_accuracy@5": 0.8188751191611058, "eval_dim_192_cosine_map@100": 0.677245219852975, "eval_dim_192_cosine_mrr@10": 0.6730234388003697, "eval_dim_192_cosine_ndcg@10": 0.7262712999939527, "eval_dim_192_cosine_precision@1": 0.5624404194470924, "eval_dim_192_cosine_precision@10": 0.08932316491897044, "eval_dim_192_cosine_precision@3": 0.25325707022561167, "eval_dim_192_cosine_precision@5": 0.16377502383222117, "eval_dim_192_cosine_recall@1": 0.5624404194470924, "eval_dim_192_cosine_recall@10": 0.8932316491897044, "eval_dim_192_cosine_recall@3": 0.7597712106768351, "eval_dim_192_cosine_recall@5": 0.8188751191611058, "eval_dim_256_cosine_accuracy@1": 0.5653002859866539, "eval_dim_256_cosine_accuracy@10": 0.8960915157292659, "eval_dim_256_cosine_accuracy@3": 0.7683508102955195, "eval_dim_256_cosine_accuracy@5": 0.8236415633937083, "eval_dim_256_cosine_map@100": 0.6831583296339104, "eval_dim_256_cosine_mrr@10": 0.6786784844220503, "eval_dim_256_cosine_ndcg@10": 0.7314611486548883, "eval_dim_256_cosine_precision@1": 0.5653002859866539, "eval_dim_256_cosine_precision@10": 0.08960915157292659, "eval_dim_256_cosine_precision@3": 0.25611693676517316, "eval_dim_256_cosine_precision@5": 0.16472831267874166, "eval_dim_256_cosine_recall@1": 0.5653002859866539, "eval_dim_256_cosine_recall@10": 0.8960915157292659, "eval_dim_256_cosine_recall@3": 0.7683508102955195, "eval_dim_256_cosine_recall@5": 0.8236415633937083, "eval_dim_384_cosine_accuracy@1": 0.5786463298379408, "eval_dim_384_cosine_accuracy@10": 0.9075309818875119, "eval_dim_384_cosine_accuracy@3": 0.776930409914204, "eval_dim_384_cosine_accuracy@5": 0.8417540514775977, "eval_dim_384_cosine_map@100": 0.6932934943306605, "eval_dim_384_cosine_mrr@10": 0.6894563227261042, "eval_dim_384_cosine_ndcg@10": 0.7423737824827953, "eval_dim_384_cosine_precision@1": 0.5786463298379408, "eval_dim_384_cosine_precision@10": 0.0907530981887512, "eval_dim_384_cosine_precision@3": 0.2589768033047346, "eval_dim_384_cosine_precision@5": 0.16835081029551957, "eval_dim_384_cosine_recall@1": 0.5786463298379408, "eval_dim_384_cosine_recall@10": 0.9075309818875119, "eval_dim_384_cosine_recall@3": 0.776930409914204, "eval_dim_384_cosine_recall@5": 0.8417540514775977, "eval_dim_64_cosine_accuracy@1": 0.49285033365109626, "eval_dim_64_cosine_accuracy@10": 0.8274547187797903, "eval_dim_64_cosine_accuracy@3": 0.684461391801716, "eval_dim_64_cosine_accuracy@5": 0.7578646329837941, "eval_dim_64_cosine_map@100": 0.6088952628032813, "eval_dim_64_cosine_mrr@10": 0.6032237807738285, "eval_dim_64_cosine_ndcg@10": 0.6575406372744073, "eval_dim_64_cosine_precision@1": 0.49285033365109626, "eval_dim_64_cosine_precision@10": 0.08274547187797902, "eval_dim_64_cosine_precision@3": 0.2281537972672386, "eval_dim_64_cosine_precision@5": 0.1515729265967588, "eval_dim_64_cosine_recall@1": 0.49285033365109626, "eval_dim_64_cosine_recall@10": 0.8274547187797903, "eval_dim_64_cosine_recall@3": 0.684461391801716, "eval_dim_64_cosine_recall@5": 0.7578646329837941, "eval_dim_96_cosine_accuracy@1": 0.5214489990467112, "eval_dim_96_cosine_accuracy@10": 0.8636796949475691, "eval_dim_96_cosine_accuracy@3": 0.7264061010486177, "eval_dim_96_cosine_accuracy@5": 0.7893231649189705, "eval_dim_96_cosine_map@100": 0.6418431352074736, "eval_dim_96_cosine_mrr@10": 0.6369528046363133, "eval_dim_96_cosine_ndcg@10": 0.6919097155042885, "eval_dim_96_cosine_precision@1": 0.5214489990467112, "eval_dim_96_cosine_precision@10": 0.0863679694947569, "eval_dim_96_cosine_precision@3": 0.2421353670162059, "eval_dim_96_cosine_precision@5": 0.15786463298379408, "eval_dim_96_cosine_recall@1": 0.5214489990467112, "eval_dim_96_cosine_recall@10": 0.8636796949475691, "eval_dim_96_cosine_recall@3": 0.7264061010486177, "eval_dim_96_cosine_recall@5": 0.7893231649189705, "eval_runtime": 116.4269, "eval_samples_per_second": 0.0, "eval_sequential_score": 0.6575406372744073, "eval_steps_per_second": 0.0, "step": 19 }, { "epoch": 1.0542372881355933, "grad_norm": 107.04779815673828, "learning_rate": 1.8577286100002723e-05, "loss": 16.6616, "step": 20 }, { "epoch": 1.5966101694915253, "grad_norm": 97.63832092285156, "learning_rate": 1.5141027441932217e-05, "loss": 16.8367, "step": 30 }, { "epoch": 2.0, "eval_dim_128_cosine_accuracy@1": 0.567206863679695, "eval_dim_128_cosine_accuracy@10": 0.886558627264061, "eval_dim_128_cosine_accuracy@3": 0.7607244995233555, "eval_dim_128_cosine_accuracy@5": 0.8236415633937083, "eval_dim_128_cosine_map@100": 0.6790430112153837, "eval_dim_128_cosine_mrr@10": 0.6746886679679823, "eval_dim_128_cosine_ndcg@10": 0.7260517487265687, "eval_dim_128_cosine_precision@1": 0.567206863679695, "eval_dim_128_cosine_precision@10": 0.0886558627264061, "eval_dim_128_cosine_precision@3": 0.25357483317445184, "eval_dim_128_cosine_precision@5": 0.16472831267874166, "eval_dim_128_cosine_recall@1": 0.567206863679695, "eval_dim_128_cosine_recall@10": 0.886558627264061, "eval_dim_128_cosine_recall@3": 0.7607244995233555, "eval_dim_128_cosine_recall@5": 0.8236415633937083, "eval_dim_192_cosine_accuracy@1": 0.5805529075309819, "eval_dim_192_cosine_accuracy@10": 0.9008579599618685, "eval_dim_192_cosine_accuracy@3": 0.782650142993327, "eval_dim_192_cosine_accuracy@5": 0.8322211630123928, "eval_dim_192_cosine_map@100": 0.6964841260809953, "eval_dim_192_cosine_mrr@10": 0.6923562879234952, "eval_dim_192_cosine_ndcg@10": 0.7430712975035773, "eval_dim_192_cosine_precision@1": 0.5805529075309819, "eval_dim_192_cosine_precision@10": 0.09008579599618685, "eval_dim_192_cosine_precision@3": 0.26088338099777564, "eval_dim_192_cosine_precision@5": 0.16644423260247856, "eval_dim_192_cosine_recall@1": 0.5805529075309819, "eval_dim_192_cosine_recall@10": 0.9008579599618685, "eval_dim_192_cosine_recall@3": 0.782650142993327, "eval_dim_192_cosine_recall@5": 0.8322211630123928, "eval_dim_256_cosine_accuracy@1": 0.5919923736892279, "eval_dim_256_cosine_accuracy@10": 0.9142040038131554, "eval_dim_256_cosine_accuracy@3": 0.7902764537654909, "eval_dim_256_cosine_accuracy@5": 0.8360343183984748, "eval_dim_256_cosine_map@100": 0.7038093293311698, "eval_dim_256_cosine_mrr@10": 0.700305279404422, "eval_dim_256_cosine_ndcg@10": 0.7520267351833514, "eval_dim_256_cosine_precision@1": 0.5919923736892279, "eval_dim_256_cosine_precision@10": 0.09142040038131555, "eval_dim_256_cosine_precision@3": 0.26342548458849696, "eval_dim_256_cosine_precision@5": 0.16720686367969492, "eval_dim_256_cosine_recall@1": 0.5919923736892279, "eval_dim_256_cosine_recall@10": 0.9142040038131554, "eval_dim_256_cosine_recall@3": 0.7902764537654909, "eval_dim_256_cosine_recall@5": 0.8360343183984748, "eval_dim_384_cosine_accuracy@1": 0.6015252621544328, "eval_dim_384_cosine_accuracy@10": 0.9199237368922784, "eval_dim_384_cosine_accuracy@3": 0.7959961868446139, "eval_dim_384_cosine_accuracy@5": 0.8531935176358436, "eval_dim_384_cosine_map@100": 0.713601684515785, "eval_dim_384_cosine_mrr@10": 0.7104082497314151, "eval_dim_384_cosine_ndcg@10": 0.761241503632434, "eval_dim_384_cosine_precision@1": 0.6015252621544328, "eval_dim_384_cosine_precision@10": 0.09199237368922783, "eval_dim_384_cosine_precision@3": 0.26533206228153794, "eval_dim_384_cosine_precision@5": 0.17063870352716873, "eval_dim_384_cosine_recall@1": 0.6015252621544328, "eval_dim_384_cosine_recall@10": 0.9199237368922784, "eval_dim_384_cosine_recall@3": 0.7959961868446139, "eval_dim_384_cosine_recall@5": 0.8531935176358436, "eval_dim_64_cosine_accuracy@1": 0.5138226882745471, "eval_dim_64_cosine_accuracy@10": 0.8341277407054337, "eval_dim_64_cosine_accuracy@3": 0.7016205910390848, "eval_dim_64_cosine_accuracy@5": 0.7645376549094376, "eval_dim_64_cosine_map@100": 0.6242158272303533, "eval_dim_64_cosine_mrr@10": 0.618670464690484, "eval_dim_64_cosine_ndcg@10": 0.6707950308444217, "eval_dim_64_cosine_precision@1": 0.5138226882745471, "eval_dim_64_cosine_precision@10": 0.08341277407054337, "eval_dim_64_cosine_precision@3": 0.2338735303463616, "eval_dim_64_cosine_precision@5": 0.1529075309818875, "eval_dim_64_cosine_recall@1": 0.5138226882745471, "eval_dim_64_cosine_recall@10": 0.8341277407054337, "eval_dim_64_cosine_recall@3": 0.7016205910390848, "eval_dim_64_cosine_recall@5": 0.7645376549094376, "eval_dim_96_cosine_accuracy@1": 0.5471877979027645, "eval_dim_96_cosine_accuracy@10": 0.8722592945662536, "eval_dim_96_cosine_accuracy@3": 0.7407054337464252, "eval_dim_96_cosine_accuracy@5": 0.8017159199237369, "eval_dim_96_cosine_map@100": 0.6622003643008398, "eval_dim_96_cosine_mrr@10": 0.6576811627097615, "eval_dim_96_cosine_ndcg@10": 0.7097194683573752, "eval_dim_96_cosine_precision@1": 0.5471877979027645, "eval_dim_96_cosine_precision@10": 0.08722592945662536, "eval_dim_96_cosine_precision@3": 0.2469018112488084, "eval_dim_96_cosine_precision@5": 0.16034318398474737, "eval_dim_96_cosine_recall@1": 0.5471877979027645, "eval_dim_96_cosine_recall@10": 0.8722592945662536, "eval_dim_96_cosine_recall@3": 0.7407054337464252, "eval_dim_96_cosine_recall@5": 0.8017159199237369, "eval_runtime": 119.8934, "eval_samples_per_second": 0.0, "eval_sequential_score": 0.6707950308444217, "eval_steps_per_second": 0.0, "step": 38 } ], "logging_steps": 10, "max_steps": 72, "num_input_tokens_seen": 0, "num_train_epochs": 4, "save_steps": 500, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": false }, "attributes": {} } }, "total_flos": 0.0, "train_batch_size": 32, "trial_name": null, "trial_params": null }