File size: 11,077 Bytes
a78bb43 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 |
{
"best_global_step": null,
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 1.0,
"eval_steps": 79,
"global_step": 782,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0,
"eval_cosine_accuracy@1": 0.3175,
"eval_cosine_accuracy@10": 0.5405,
"eval_cosine_accuracy@3": 0.4285,
"eval_cosine_accuracy@5": 0.472,
"eval_cosine_map@100": 0.20295798777707474,
"eval_cosine_mrr@10": 0.38407797619047646,
"eval_cosine_ndcg@10": 0.26664320865382884,
"eval_cosine_precision@1": 0.3175,
"eval_cosine_precision@10": 0.13325,
"eval_cosine_precision@3": 0.239,
"eval_cosine_precision@5": 0.192,
"eval_cosine_recall@1": 0.08617124715273779,
"eval_cosine_recall@10": 0.25484566308493556,
"eval_cosine_recall@3": 0.16493508136064844,
"eval_cosine_recall@5": 0.20128376057243394,
"eval_loss": 2.716991424560547,
"eval_runtime": 93.6318,
"eval_samples_per_second": 21.36,
"eval_steps_per_second": 0.342,
"step": 0
},
{
"epoch": 0.10112,
"grad_norm": 15.54442024230957,
"learning_rate": 4.936708860759494e-05,
"loss": 0.6977,
"step": 79
},
{
"epoch": 0.10112,
"eval_cosine_accuracy@1": 0.611,
"eval_cosine_accuracy@10": 0.867,
"eval_cosine_accuracy@3": 0.7485,
"eval_cosine_accuracy@5": 0.802,
"eval_cosine_map@100": 0.4842158123573022,
"eval_cosine_mrr@10": 0.6918936507936507,
"eval_cosine_ndcg@10": 0.5731052776794775,
"eval_cosine_precision@1": 0.611,
"eval_cosine_precision@10": 0.26645,
"eval_cosine_precision@3": 0.48033333333333333,
"eval_cosine_precision@5": 0.3886,
"eval_cosine_recall@1": 0.19547386901725833,
"eval_cosine_recall@10": 0.582591867678025,
"eval_cosine_recall@3": 0.38178869386096725,
"eval_cosine_recall@5": 0.47092166449135925,
"eval_loss": 0.3820127546787262,
"eval_runtime": 94.5547,
"eval_samples_per_second": 21.152,
"eval_steps_per_second": 0.338,
"step": 79
},
{
"epoch": 0.20224,
"grad_norm": 11.99033260345459,
"learning_rate": 4.4452347083926033e-05,
"loss": 0.3696,
"step": 158
},
{
"epoch": 0.20224,
"eval_cosine_accuracy@1": 0.6395,
"eval_cosine_accuracy@10": 0.8815,
"eval_cosine_accuracy@3": 0.771,
"eval_cosine_accuracy@5": 0.8215,
"eval_cosine_map@100": 0.518232118499808,
"eval_cosine_mrr@10": 0.7167617063492063,
"eval_cosine_ndcg@10": 0.6015016271902943,
"eval_cosine_precision@1": 0.6395,
"eval_cosine_precision@10": 0.27385000000000004,
"eval_cosine_precision@3": 0.4965,
"eval_cosine_precision@5": 0.39940000000000003,
"eval_cosine_recall@1": 0.21741346374345538,
"eval_cosine_recall@10": 0.6032130601430059,
"eval_cosine_recall@3": 0.411628698268123,
"eval_cosine_recall@5": 0.49754964310983035,
"eval_loss": 0.33004653453826904,
"eval_runtime": 94.5446,
"eval_samples_per_second": 21.154,
"eval_steps_per_second": 0.338,
"step": 158
},
{
"epoch": 0.30336,
"grad_norm": 7.6528425216674805,
"learning_rate": 3.883357041251778e-05,
"loss": 0.3079,
"step": 237
},
{
"epoch": 0.30336,
"eval_cosine_accuracy@1": 0.6435,
"eval_cosine_accuracy@10": 0.878,
"eval_cosine_accuracy@3": 0.775,
"eval_cosine_accuracy@5": 0.8215,
"eval_cosine_map@100": 0.5383738084900617,
"eval_cosine_mrr@10": 0.7194515873015865,
"eval_cosine_ndcg@10": 0.6155692521247632,
"eval_cosine_precision@1": 0.6435,
"eval_cosine_precision@10": 0.2824,
"eval_cosine_precision@3": 0.5153333333333333,
"eval_cosine_precision@5": 0.4173,
"eval_cosine_recall@1": 0.2183039885889674,
"eval_cosine_recall@10": 0.6148750852993538,
"eval_cosine_recall@3": 0.4253399672316602,
"eval_cosine_recall@5": 0.5135875884980249,
"eval_loss": 0.26221561431884766,
"eval_runtime": 94.3747,
"eval_samples_per_second": 21.192,
"eval_steps_per_second": 0.339,
"step": 237
},
{
"epoch": 0.40448,
"grad_norm": 7.216766834259033,
"learning_rate": 3.321479374110953e-05,
"loss": 0.2471,
"step": 316
},
{
"epoch": 0.40448,
"eval_cosine_accuracy@1": 0.6615,
"eval_cosine_accuracy@10": 0.9055,
"eval_cosine_accuracy@3": 0.8035,
"eval_cosine_accuracy@5": 0.8565,
"eval_cosine_map@100": 0.5627360990015163,
"eval_cosine_mrr@10": 0.7432835317460309,
"eval_cosine_ndcg@10": 0.6395267347742315,
"eval_cosine_precision@1": 0.6615,
"eval_cosine_precision@10": 0.2906,
"eval_cosine_precision@3": 0.524,
"eval_cosine_precision@5": 0.4232,
"eval_cosine_recall@1": 0.23236891554586947,
"eval_cosine_recall@10": 0.6438819108166526,
"eval_cosine_recall@3": 0.44429773626339913,
"eval_cosine_recall@5": 0.536914325811876,
"eval_loss": 0.23154108226299286,
"eval_runtime": 94.3188,
"eval_samples_per_second": 21.205,
"eval_steps_per_second": 0.339,
"step": 316
},
{
"epoch": 0.5056,
"grad_norm": 6.026930332183838,
"learning_rate": 2.759601706970128e-05,
"loss": 0.2129,
"step": 395
},
{
"epoch": 0.5056,
"eval_cosine_accuracy@1": 0.691,
"eval_cosine_accuracy@10": 0.923,
"eval_cosine_accuracy@3": 0.8285,
"eval_cosine_accuracy@5": 0.877,
"eval_cosine_map@100": 0.5988053929521469,
"eval_cosine_mrr@10": 0.7697992063492058,
"eval_cosine_ndcg@10": 0.6734974207335584,
"eval_cosine_precision@1": 0.691,
"eval_cosine_precision@10": 0.30469999999999997,
"eval_cosine_precision@3": 0.5516666666666665,
"eval_cosine_precision@5": 0.4453000000000001,
"eval_cosine_recall@1": 0.2451002751627125,
"eval_cosine_recall@10": 0.6720344352159927,
"eval_cosine_recall@3": 0.4752111086795837,
"eval_cosine_recall@5": 0.5689998210708447,
"eval_loss": 0.1887313276529312,
"eval_runtime": 94.6158,
"eval_samples_per_second": 21.138,
"eval_steps_per_second": 0.338,
"step": 395
},
{
"epoch": 0.60672,
"grad_norm": 6.972903728485107,
"learning_rate": 2.197724039829303e-05,
"loss": 0.1782,
"step": 474
},
{
"epoch": 0.60672,
"eval_cosine_accuracy@1": 0.7035,
"eval_cosine_accuracy@10": 0.9305,
"eval_cosine_accuracy@3": 0.842,
"eval_cosine_accuracy@5": 0.888,
"eval_cosine_map@100": 0.6071005790786185,
"eval_cosine_mrr@10": 0.781702777777777,
"eval_cosine_ndcg@10": 0.6808092483733388,
"eval_cosine_precision@1": 0.7035,
"eval_cosine_precision@10": 0.3067,
"eval_cosine_precision@3": 0.562,
"eval_cosine_precision@5": 0.4495,
"eval_cosine_recall@1": 0.25034756468684327,
"eval_cosine_recall@10": 0.6768599428870521,
"eval_cosine_recall@3": 0.4826217978486824,
"eval_cosine_recall@5": 0.5710397017343101,
"eval_loss": 0.17932943999767303,
"eval_runtime": 94.3886,
"eval_samples_per_second": 21.189,
"eval_steps_per_second": 0.339,
"step": 474
},
{
"epoch": 0.70784,
"grad_norm": 5.119401931762695,
"learning_rate": 1.6358463726884778e-05,
"loss": 0.1601,
"step": 553
},
{
"epoch": 0.70784,
"eval_cosine_accuracy@1": 0.7265,
"eval_cosine_accuracy@10": 0.942,
"eval_cosine_accuracy@3": 0.862,
"eval_cosine_accuracy@5": 0.903,
"eval_cosine_map@100": 0.6338351727535749,
"eval_cosine_mrr@10": 0.8012726190476185,
"eval_cosine_ndcg@10": 0.7052858216291891,
"eval_cosine_precision@1": 0.7265,
"eval_cosine_precision@10": 0.31645000000000006,
"eval_cosine_precision@3": 0.5746666666666667,
"eval_cosine_precision@5": 0.4643,
"eval_cosine_recall@1": 0.26731400185268916,
"eval_cosine_recall@10": 0.6964398448419685,
"eval_cosine_recall@3": 0.5006986582097148,
"eval_cosine_recall@5": 0.5936279161380148,
"eval_loss": 0.1466195434331894,
"eval_runtime": 94.3179,
"eval_samples_per_second": 21.205,
"eval_steps_per_second": 0.339,
"step": 553
},
{
"epoch": 0.80896,
"grad_norm": 5.0713982582092285,
"learning_rate": 1.073968705547653e-05,
"loss": 0.1269,
"step": 632
},
{
"epoch": 0.80896,
"eval_cosine_accuracy@1": 0.7235,
"eval_cosine_accuracy@10": 0.9405,
"eval_cosine_accuracy@3": 0.858,
"eval_cosine_accuracy@5": 0.901,
"eval_cosine_map@100": 0.6337029110249771,
"eval_cosine_mrr@10": 0.7987942460317458,
"eval_cosine_ndcg@10": 0.702721958317939,
"eval_cosine_precision@1": 0.7235,
"eval_cosine_precision@10": 0.31520000000000004,
"eval_cosine_precision@3": 0.573,
"eval_cosine_precision@5": 0.462,
"eval_cosine_recall@1": 0.2652290650574583,
"eval_cosine_recall@10": 0.6960502030808166,
"eval_cosine_recall@3": 0.4984801971314316,
"eval_cosine_recall@5": 0.5914106569114703,
"eval_loss": 0.14512212574481964,
"eval_runtime": 94.7293,
"eval_samples_per_second": 21.113,
"eval_steps_per_second": 0.338,
"step": 632
},
{
"epoch": 0.91008,
"grad_norm": 4.609349250793457,
"learning_rate": 5.120910384068279e-06,
"loss": 0.1047,
"step": 711
},
{
"epoch": 0.91008,
"eval_cosine_accuracy@1": 0.744,
"eval_cosine_accuracy@10": 0.9465,
"eval_cosine_accuracy@3": 0.873,
"eval_cosine_accuracy@5": 0.914,
"eval_cosine_map@100": 0.6546158733411345,
"eval_cosine_mrr@10": 0.8165956349206344,
"eval_cosine_ndcg@10": 0.7225101978855893,
"eval_cosine_precision@1": 0.744,
"eval_cosine_precision@10": 0.323,
"eval_cosine_precision@3": 0.5885,
"eval_cosine_precision@5": 0.4711,
"eval_cosine_recall@1": 0.27657015412151464,
"eval_cosine_recall@10": 0.7128635786880844,
"eval_cosine_recall@3": 0.5135658345260549,
"eval_cosine_recall@5": 0.6083090142946717,
"eval_loss": 0.11803647130727768,
"eval_runtime": 94.4018,
"eval_samples_per_second": 21.186,
"eval_steps_per_second": 0.339,
"step": 711
}
],
"logging_steps": 79,
"max_steps": 782,
"num_input_tokens_seen": 0,
"num_train_epochs": 1,
"save_steps": 79,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": true
},
"attributes": {}
}
},
"total_flos": 0.0,
"train_batch_size": 64,
"trial_name": null,
"trial_params": null
}
|