File size: 12,362 Bytes
4c1bea0 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 |
{
"best_global_step": 38,
"best_metric": 0.7260517487265687,
"best_model_checkpoint": "MNLP_M3_document_encoder_sciqa/checkpoint-38",
"epoch": 2.0,
"eval_steps": 500,
"global_step": 38,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.5423728813559322,
"grad_norm": 164.72093200683594,
"learning_rate": 1.9987954562051724e-05,
"loss": 22.4049,
"step": 10
},
{
"epoch": 1.0,
"eval_dim_128_cosine_accuracy@1": 0.5452812202097236,
"eval_dim_128_cosine_accuracy@10": 0.8760724499523356,
"eval_dim_128_cosine_accuracy@3": 0.7416587225929456,
"eval_dim_128_cosine_accuracy@5": 0.8074356530028599,
"eval_dim_128_cosine_map@100": 0.6605612754102786,
"eval_dim_128_cosine_mrr@10": 0.6559349796480402,
"eval_dim_128_cosine_ndcg@10": 0.7092688022688834,
"eval_dim_128_cosine_precision@1": 0.5452812202097236,
"eval_dim_128_cosine_precision@10": 0.08760724499523356,
"eval_dim_128_cosine_precision@3": 0.24721957419764853,
"eval_dim_128_cosine_precision@5": 0.161487130600572,
"eval_dim_128_cosine_recall@1": 0.5452812202097236,
"eval_dim_128_cosine_recall@10": 0.8760724499523356,
"eval_dim_128_cosine_recall@3": 0.7416587225929456,
"eval_dim_128_cosine_recall@5": 0.8074356530028599,
"eval_dim_192_cosine_accuracy@1": 0.5624404194470924,
"eval_dim_192_cosine_accuracy@10": 0.8932316491897044,
"eval_dim_192_cosine_accuracy@3": 0.7597712106768351,
"eval_dim_192_cosine_accuracy@5": 0.8188751191611058,
"eval_dim_192_cosine_map@100": 0.677245219852975,
"eval_dim_192_cosine_mrr@10": 0.6730234388003697,
"eval_dim_192_cosine_ndcg@10": 0.7262712999939527,
"eval_dim_192_cosine_precision@1": 0.5624404194470924,
"eval_dim_192_cosine_precision@10": 0.08932316491897044,
"eval_dim_192_cosine_precision@3": 0.25325707022561167,
"eval_dim_192_cosine_precision@5": 0.16377502383222117,
"eval_dim_192_cosine_recall@1": 0.5624404194470924,
"eval_dim_192_cosine_recall@10": 0.8932316491897044,
"eval_dim_192_cosine_recall@3": 0.7597712106768351,
"eval_dim_192_cosine_recall@5": 0.8188751191611058,
"eval_dim_256_cosine_accuracy@1": 0.5653002859866539,
"eval_dim_256_cosine_accuracy@10": 0.8960915157292659,
"eval_dim_256_cosine_accuracy@3": 0.7683508102955195,
"eval_dim_256_cosine_accuracy@5": 0.8236415633937083,
"eval_dim_256_cosine_map@100": 0.6831583296339104,
"eval_dim_256_cosine_mrr@10": 0.6786784844220503,
"eval_dim_256_cosine_ndcg@10": 0.7314611486548883,
"eval_dim_256_cosine_precision@1": 0.5653002859866539,
"eval_dim_256_cosine_precision@10": 0.08960915157292659,
"eval_dim_256_cosine_precision@3": 0.25611693676517316,
"eval_dim_256_cosine_precision@5": 0.16472831267874166,
"eval_dim_256_cosine_recall@1": 0.5653002859866539,
"eval_dim_256_cosine_recall@10": 0.8960915157292659,
"eval_dim_256_cosine_recall@3": 0.7683508102955195,
"eval_dim_256_cosine_recall@5": 0.8236415633937083,
"eval_dim_384_cosine_accuracy@1": 0.5786463298379408,
"eval_dim_384_cosine_accuracy@10": 0.9075309818875119,
"eval_dim_384_cosine_accuracy@3": 0.776930409914204,
"eval_dim_384_cosine_accuracy@5": 0.8417540514775977,
"eval_dim_384_cosine_map@100": 0.6932934943306605,
"eval_dim_384_cosine_mrr@10": 0.6894563227261042,
"eval_dim_384_cosine_ndcg@10": 0.7423737824827953,
"eval_dim_384_cosine_precision@1": 0.5786463298379408,
"eval_dim_384_cosine_precision@10": 0.0907530981887512,
"eval_dim_384_cosine_precision@3": 0.2589768033047346,
"eval_dim_384_cosine_precision@5": 0.16835081029551957,
"eval_dim_384_cosine_recall@1": 0.5786463298379408,
"eval_dim_384_cosine_recall@10": 0.9075309818875119,
"eval_dim_384_cosine_recall@3": 0.776930409914204,
"eval_dim_384_cosine_recall@5": 0.8417540514775977,
"eval_dim_64_cosine_accuracy@1": 0.49285033365109626,
"eval_dim_64_cosine_accuracy@10": 0.8274547187797903,
"eval_dim_64_cosine_accuracy@3": 0.684461391801716,
"eval_dim_64_cosine_accuracy@5": 0.7578646329837941,
"eval_dim_64_cosine_map@100": 0.6088952628032813,
"eval_dim_64_cosine_mrr@10": 0.6032237807738285,
"eval_dim_64_cosine_ndcg@10": 0.6575406372744073,
"eval_dim_64_cosine_precision@1": 0.49285033365109626,
"eval_dim_64_cosine_precision@10": 0.08274547187797902,
"eval_dim_64_cosine_precision@3": 0.2281537972672386,
"eval_dim_64_cosine_precision@5": 0.1515729265967588,
"eval_dim_64_cosine_recall@1": 0.49285033365109626,
"eval_dim_64_cosine_recall@10": 0.8274547187797903,
"eval_dim_64_cosine_recall@3": 0.684461391801716,
"eval_dim_64_cosine_recall@5": 0.7578646329837941,
"eval_dim_96_cosine_accuracy@1": 0.5214489990467112,
"eval_dim_96_cosine_accuracy@10": 0.8636796949475691,
"eval_dim_96_cosine_accuracy@3": 0.7264061010486177,
"eval_dim_96_cosine_accuracy@5": 0.7893231649189705,
"eval_dim_96_cosine_map@100": 0.6418431352074736,
"eval_dim_96_cosine_mrr@10": 0.6369528046363133,
"eval_dim_96_cosine_ndcg@10": 0.6919097155042885,
"eval_dim_96_cosine_precision@1": 0.5214489990467112,
"eval_dim_96_cosine_precision@10": 0.0863679694947569,
"eval_dim_96_cosine_precision@3": 0.2421353670162059,
"eval_dim_96_cosine_precision@5": 0.15786463298379408,
"eval_dim_96_cosine_recall@1": 0.5214489990467112,
"eval_dim_96_cosine_recall@10": 0.8636796949475691,
"eval_dim_96_cosine_recall@3": 0.7264061010486177,
"eval_dim_96_cosine_recall@5": 0.7893231649189705,
"eval_runtime": 116.4269,
"eval_samples_per_second": 0.0,
"eval_sequential_score": 0.6575406372744073,
"eval_steps_per_second": 0.0,
"step": 19
},
{
"epoch": 1.0542372881355933,
"grad_norm": 107.04779815673828,
"learning_rate": 1.8577286100002723e-05,
"loss": 16.6616,
"step": 20
},
{
"epoch": 1.5966101694915253,
"grad_norm": 97.63832092285156,
"learning_rate": 1.5141027441932217e-05,
"loss": 16.8367,
"step": 30
},
{
"epoch": 2.0,
"eval_dim_128_cosine_accuracy@1": 0.567206863679695,
"eval_dim_128_cosine_accuracy@10": 0.886558627264061,
"eval_dim_128_cosine_accuracy@3": 0.7607244995233555,
"eval_dim_128_cosine_accuracy@5": 0.8236415633937083,
"eval_dim_128_cosine_map@100": 0.6790430112153837,
"eval_dim_128_cosine_mrr@10": 0.6746886679679823,
"eval_dim_128_cosine_ndcg@10": 0.7260517487265687,
"eval_dim_128_cosine_precision@1": 0.567206863679695,
"eval_dim_128_cosine_precision@10": 0.0886558627264061,
"eval_dim_128_cosine_precision@3": 0.25357483317445184,
"eval_dim_128_cosine_precision@5": 0.16472831267874166,
"eval_dim_128_cosine_recall@1": 0.567206863679695,
"eval_dim_128_cosine_recall@10": 0.886558627264061,
"eval_dim_128_cosine_recall@3": 0.7607244995233555,
"eval_dim_128_cosine_recall@5": 0.8236415633937083,
"eval_dim_192_cosine_accuracy@1": 0.5805529075309819,
"eval_dim_192_cosine_accuracy@10": 0.9008579599618685,
"eval_dim_192_cosine_accuracy@3": 0.782650142993327,
"eval_dim_192_cosine_accuracy@5": 0.8322211630123928,
"eval_dim_192_cosine_map@100": 0.6964841260809953,
"eval_dim_192_cosine_mrr@10": 0.6923562879234952,
"eval_dim_192_cosine_ndcg@10": 0.7430712975035773,
"eval_dim_192_cosine_precision@1": 0.5805529075309819,
"eval_dim_192_cosine_precision@10": 0.09008579599618685,
"eval_dim_192_cosine_precision@3": 0.26088338099777564,
"eval_dim_192_cosine_precision@5": 0.16644423260247856,
"eval_dim_192_cosine_recall@1": 0.5805529075309819,
"eval_dim_192_cosine_recall@10": 0.9008579599618685,
"eval_dim_192_cosine_recall@3": 0.782650142993327,
"eval_dim_192_cosine_recall@5": 0.8322211630123928,
"eval_dim_256_cosine_accuracy@1": 0.5919923736892279,
"eval_dim_256_cosine_accuracy@10": 0.9142040038131554,
"eval_dim_256_cosine_accuracy@3": 0.7902764537654909,
"eval_dim_256_cosine_accuracy@5": 0.8360343183984748,
"eval_dim_256_cosine_map@100": 0.7038093293311698,
"eval_dim_256_cosine_mrr@10": 0.700305279404422,
"eval_dim_256_cosine_ndcg@10": 0.7520267351833514,
"eval_dim_256_cosine_precision@1": 0.5919923736892279,
"eval_dim_256_cosine_precision@10": 0.09142040038131555,
"eval_dim_256_cosine_precision@3": 0.26342548458849696,
"eval_dim_256_cosine_precision@5": 0.16720686367969492,
"eval_dim_256_cosine_recall@1": 0.5919923736892279,
"eval_dim_256_cosine_recall@10": 0.9142040038131554,
"eval_dim_256_cosine_recall@3": 0.7902764537654909,
"eval_dim_256_cosine_recall@5": 0.8360343183984748,
"eval_dim_384_cosine_accuracy@1": 0.6015252621544328,
"eval_dim_384_cosine_accuracy@10": 0.9199237368922784,
"eval_dim_384_cosine_accuracy@3": 0.7959961868446139,
"eval_dim_384_cosine_accuracy@5": 0.8531935176358436,
"eval_dim_384_cosine_map@100": 0.713601684515785,
"eval_dim_384_cosine_mrr@10": 0.7104082497314151,
"eval_dim_384_cosine_ndcg@10": 0.761241503632434,
"eval_dim_384_cosine_precision@1": 0.6015252621544328,
"eval_dim_384_cosine_precision@10": 0.09199237368922783,
"eval_dim_384_cosine_precision@3": 0.26533206228153794,
"eval_dim_384_cosine_precision@5": 0.17063870352716873,
"eval_dim_384_cosine_recall@1": 0.6015252621544328,
"eval_dim_384_cosine_recall@10": 0.9199237368922784,
"eval_dim_384_cosine_recall@3": 0.7959961868446139,
"eval_dim_384_cosine_recall@5": 0.8531935176358436,
"eval_dim_64_cosine_accuracy@1": 0.5138226882745471,
"eval_dim_64_cosine_accuracy@10": 0.8341277407054337,
"eval_dim_64_cosine_accuracy@3": 0.7016205910390848,
"eval_dim_64_cosine_accuracy@5": 0.7645376549094376,
"eval_dim_64_cosine_map@100": 0.6242158272303533,
"eval_dim_64_cosine_mrr@10": 0.618670464690484,
"eval_dim_64_cosine_ndcg@10": 0.6707950308444217,
"eval_dim_64_cosine_precision@1": 0.5138226882745471,
"eval_dim_64_cosine_precision@10": 0.08341277407054337,
"eval_dim_64_cosine_precision@3": 0.2338735303463616,
"eval_dim_64_cosine_precision@5": 0.1529075309818875,
"eval_dim_64_cosine_recall@1": 0.5138226882745471,
"eval_dim_64_cosine_recall@10": 0.8341277407054337,
"eval_dim_64_cosine_recall@3": 0.7016205910390848,
"eval_dim_64_cosine_recall@5": 0.7645376549094376,
"eval_dim_96_cosine_accuracy@1": 0.5471877979027645,
"eval_dim_96_cosine_accuracy@10": 0.8722592945662536,
"eval_dim_96_cosine_accuracy@3": 0.7407054337464252,
"eval_dim_96_cosine_accuracy@5": 0.8017159199237369,
"eval_dim_96_cosine_map@100": 0.6622003643008398,
"eval_dim_96_cosine_mrr@10": 0.6576811627097615,
"eval_dim_96_cosine_ndcg@10": 0.7097194683573752,
"eval_dim_96_cosine_precision@1": 0.5471877979027645,
"eval_dim_96_cosine_precision@10": 0.08722592945662536,
"eval_dim_96_cosine_precision@3": 0.2469018112488084,
"eval_dim_96_cosine_precision@5": 0.16034318398474737,
"eval_dim_96_cosine_recall@1": 0.5471877979027645,
"eval_dim_96_cosine_recall@10": 0.8722592945662536,
"eval_dim_96_cosine_recall@3": 0.7407054337464252,
"eval_dim_96_cosine_recall@5": 0.8017159199237369,
"eval_runtime": 119.8934,
"eval_samples_per_second": 0.0,
"eval_sequential_score": 0.6707950308444217,
"eval_steps_per_second": 0.0,
"step": 38
}
],
"logging_steps": 10,
"max_steps": 72,
"num_input_tokens_seen": 0,
"num_train_epochs": 4,
"save_steps": 500,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": false
},
"attributes": {}
}
},
"total_flos": 0.0,
"train_batch_size": 32,
"trial_name": null,
"trial_params": null
}
|