File size: 30,299 Bytes
6981c42
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
{
  "best_global_step": 276,
  "best_metric": 0.12710943818092346,
  "best_model_checkpoint": "bkai-fine-tuned-legal/checkpoint-276",
  "epoch": 6.0,
  "eval_steps": 500,
  "global_step": 276,
  "is_hyper_param_search": false,
  "is_local_process_zero": true,
  "is_world_process_zero": true,
  "log_history": [
    {
      "epoch": 1.0,
      "grad_norm": 3.53977108001709,
      "learning_rate": 2.4107142857142858e-05,
      "loss": 1.016345148501189,
      "step": 46
    },
    {
      "epoch": 1.0,
      "eval_dim_128_cosine_accuracy@1": 0.213777715799749,
      "eval_dim_128_cosine_accuracy@10": 0.670199414307628,
      "eval_dim_128_cosine_accuracy@3": 0.41361037512201926,
      "eval_dim_128_cosine_accuracy@5": 0.5034165388369823,
      "eval_dim_128_cosine_map@100": 0.35224211836983055,
      "eval_dim_128_cosine_mrr@10": 0.3412989488083612,
      "eval_dim_128_cosine_ndcg@10": 0.41868129352359357,
      "eval_dim_128_cosine_precision@1": 0.213777715799749,
      "eval_dim_128_cosine_precision@10": 0.06701994143076279,
      "eval_dim_128_cosine_precision@3": 0.13787012504067309,
      "eval_dim_128_cosine_precision@5": 0.10068330776739645,
      "eval_dim_128_cosine_recall@1": 0.213777715799749,
      "eval_dim_128_cosine_recall@10": 0.670199414307628,
      "eval_dim_128_cosine_recall@3": 0.41361037512201926,
      "eval_dim_128_cosine_recall@5": 0.5034165388369823,
      "eval_dim_256_cosine_accuracy@1": 0.2169850787895691,
      "eval_dim_256_cosine_accuracy@10": 0.6794031515827639,
      "eval_dim_256_cosine_accuracy@3": 0.41486543020499234,
      "eval_dim_256_cosine_accuracy@5": 0.5027192860131083,
      "eval_dim_256_cosine_map@100": 0.3555720655683085,
      "eval_dim_256_cosine_mrr@10": 0.344718033171525,
      "eval_dim_256_cosine_ndcg@10": 0.4233294657573625,
      "eval_dim_256_cosine_precision@1": 0.2169850787895691,
      "eval_dim_256_cosine_precision@10": 0.06794031515827638,
      "eval_dim_256_cosine_precision@3": 0.13828847673499742,
      "eval_dim_256_cosine_precision@5": 0.10054385720262167,
      "eval_dim_256_cosine_recall@1": 0.2169850787895691,
      "eval_dim_256_cosine_recall@10": 0.6794031515827639,
      "eval_dim_256_cosine_recall@3": 0.41486543020499234,
      "eval_dim_256_cosine_recall@5": 0.5027192860131083,
      "eval_dim_512_cosine_accuracy@1": 0.22102914516803793,
      "eval_dim_512_cosine_accuracy@10": 0.684562822479431,
      "eval_dim_512_cosine_accuracy@3": 0.4230930135267048,
      "eval_dim_512_cosine_accuracy@5": 0.5092734625575234,
      "eval_dim_512_cosine_map@100": 0.36044654114667407,
      "eval_dim_512_cosine_mrr@10": 0.3498800282442726,
      "eval_dim_512_cosine_ndcg@10": 0.4285631830606908,
      "eval_dim_512_cosine_precision@1": 0.22102914516803793,
      "eval_dim_512_cosine_precision@10": 0.06845628224794309,
      "eval_dim_512_cosine_precision@3": 0.14103100450890158,
      "eval_dim_512_cosine_precision@5": 0.10185469251150465,
      "eval_dim_512_cosine_recall@1": 0.22102914516803793,
      "eval_dim_512_cosine_recall@10": 0.684562822479431,
      "eval_dim_512_cosine_recall@3": 0.4230930135267048,
      "eval_dim_512_cosine_recall@5": 0.5092734625575234,
      "eval_dim_64_cosine_accuracy@1": 0.19746199972109887,
      "eval_dim_64_cosine_accuracy@10": 0.6455166643424906,
      "eval_dim_64_cosine_accuracy@3": 0.38613861386138615,
      "eval_dim_64_cosine_accuracy@5": 0.4791521405661693,
      "eval_dim_64_cosine_map@100": 0.332107485720445,
      "eval_dim_64_cosine_mrr@10": 0.32084381979445287,
      "eval_dim_64_cosine_ndcg@10": 0.3971543906124372,
      "eval_dim_64_cosine_precision@1": 0.19746199972109887,
      "eval_dim_64_cosine_precision@10": 0.06455166643424905,
      "eval_dim_64_cosine_precision@3": 0.12871287128712872,
      "eval_dim_64_cosine_precision@5": 0.09583042811323385,
      "eval_dim_64_cosine_recall@1": 0.19746199972109887,
      "eval_dim_64_cosine_recall@10": 0.6455166643424906,
      "eval_dim_64_cosine_recall@3": 0.38613861386138615,
      "eval_dim_64_cosine_recall@5": 0.4791521405661693,
      "eval_dim_768_cosine_accuracy@1": 0.2259099149351555,
      "eval_dim_768_cosine_accuracy@10": 0.6851206247385302,
      "eval_dim_768_cosine_accuracy@3": 0.42978664063589456,
      "eval_dim_768_cosine_accuracy@5": 0.5170826941849115,
      "eval_dim_768_cosine_map@100": 0.3656884435565745,
      "eval_dim_768_cosine_mrr@10": 0.3550936532285014,
      "eval_dim_768_cosine_ndcg@10": 0.43283429692560266,
      "eval_dim_768_cosine_precision@1": 0.2259099149351555,
      "eval_dim_768_cosine_precision@10": 0.06851206247385301,
      "eval_dim_768_cosine_precision@3": 0.1432622135452982,
      "eval_dim_768_cosine_precision@5": 0.10341653883698228,
      "eval_dim_768_cosine_recall@1": 0.2259099149351555,
      "eval_dim_768_cosine_recall@10": 0.6851206247385302,
      "eval_dim_768_cosine_recall@3": 0.42978664063589456,
      "eval_dim_768_cosine_recall@5": 0.5170826941849115,
      "eval_loss": 0.38568806648254395,
      "eval_runtime": 5135.457,
      "eval_samples_per_second": 1.397,
      "eval_sequential_score": 0.3971543906124372,
      "eval_steps_per_second": 0.013,
      "step": 46
    },
    {
      "epoch": 2.0,
      "grad_norm": 3.2190756797790527,
      "learning_rate": 2.9078090590344733e-05,
      "loss": 0.2932926675547724,
      "step": 92
    },
    {
      "epoch": 2.0,
      "eval_dim_128_cosine_accuracy@1": 0.3525310277506624,
      "eval_dim_128_cosine_accuracy@10": 0.7081299679263701,
      "eval_dim_128_cosine_accuracy@3": 0.47273741458652907,
      "eval_dim_128_cosine_accuracy@5": 0.5437177520568959,
      "eval_dim_128_cosine_map@100": 0.45170521683593867,
      "eval_dim_128_cosine_mrr@10": 0.4414466446644676,
      "eval_dim_128_cosine_ndcg@10": 0.5032435031696283,
      "eval_dim_128_cosine_precision@1": 0.3525310277506624,
      "eval_dim_128_cosine_precision@10": 0.070812996792637,
      "eval_dim_128_cosine_precision@3": 0.1575791381955097,
      "eval_dim_128_cosine_precision@5": 0.10874355041137916,
      "eval_dim_128_cosine_recall@1": 0.3525310277506624,
      "eval_dim_128_cosine_recall@10": 0.7081299679263701,
      "eval_dim_128_cosine_recall@3": 0.47273741458652907,
      "eval_dim_128_cosine_recall@5": 0.5437177520568959,
      "eval_dim_256_cosine_accuracy@1": 0.3576906986473295,
      "eval_dim_256_cosine_accuracy@10": 0.7170548040719565,
      "eval_dim_256_cosine_accuracy@3": 0.4801282945195928,
      "eval_dim_256_cosine_accuracy@5": 0.5580811602286989,
      "eval_dim_256_cosine_map@100": 0.45920859350066756,
      "eval_dim_256_cosine_mrr@10": 0.4493326294400066,
      "eval_dim_256_cosine_ndcg@10": 0.5115508657442693,
      "eval_dim_256_cosine_precision@1": 0.3576906986473295,
      "eval_dim_256_cosine_precision@10": 0.07170548040719565,
      "eval_dim_256_cosine_precision@3": 0.16004276483986424,
      "eval_dim_256_cosine_precision@5": 0.11161623204573978,
      "eval_dim_256_cosine_recall@1": 0.3576906986473295,
      "eval_dim_256_cosine_recall@10": 0.7170548040719565,
      "eval_dim_256_cosine_recall@3": 0.4801282945195928,
      "eval_dim_256_cosine_recall@5": 0.5580811602286989,
      "eval_dim_512_cosine_accuracy@1": 0.3614558638962488,
      "eval_dim_512_cosine_accuracy@10": 0.7205410681913261,
      "eval_dim_512_cosine_accuracy@3": 0.4808255473434667,
      "eval_dim_512_cosine_accuracy@5": 0.5544554455445545,
      "eval_dim_512_cosine_map@100": 0.46086139918174507,
      "eval_dim_512_cosine_mrr@10": 0.45110962806542504,
      "eval_dim_512_cosine_ndcg@10": 0.5136290484927198,
      "eval_dim_512_cosine_precision@1": 0.3614558638962488,
      "eval_dim_512_cosine_precision@10": 0.07205410681913263,
      "eval_dim_512_cosine_precision@3": 0.16027518244782224,
      "eval_dim_512_cosine_precision@5": 0.11089108910891088,
      "eval_dim_512_cosine_recall@1": 0.3614558638962488,
      "eval_dim_512_cosine_recall@10": 0.7205410681913261,
      "eval_dim_512_cosine_recall@3": 0.4808255473434667,
      "eval_dim_512_cosine_recall@5": 0.5544554455445545,
      "eval_dim_64_cosine_accuracy@1": 0.34527959838237343,
      "eval_dim_64_cosine_accuracy@10": 0.6934876586250174,
      "eval_dim_64_cosine_accuracy@3": 0.4613024682749965,
      "eval_dim_64_cosine_accuracy@5": 0.5328406080044624,
      "eval_dim_64_cosine_map@100": 0.4429389789139068,
      "eval_dim_64_cosine_mrr@10": 0.43229392858802956,
      "eval_dim_64_cosine_ndcg@10": 0.49282395499966114,
      "eval_dim_64_cosine_precision@1": 0.34527959838237343,
      "eval_dim_64_cosine_precision@10": 0.06934876586250174,
      "eval_dim_64_cosine_precision@3": 0.15376748942499882,
      "eval_dim_64_cosine_precision@5": 0.10656812160089248,
      "eval_dim_64_cosine_recall@1": 0.34527959838237343,
      "eval_dim_64_cosine_recall@10": 0.6934876586250174,
      "eval_dim_64_cosine_recall@3": 0.4613024682749965,
      "eval_dim_64_cosine_recall@5": 0.5328406080044624,
      "eval_dim_768_cosine_accuracy@1": 0.36480267745084366,
      "eval_dim_768_cosine_accuracy@10": 0.7151024961651095,
      "eval_dim_768_cosine_accuracy@3": 0.48333565750941293,
      "eval_dim_768_cosine_accuracy@5": 0.5564077534514015,
      "eval_dim_768_cosine_map@100": 0.4630869213805269,
      "eval_dim_768_cosine_mrr@10": 0.45277440218871223,
      "eval_dim_768_cosine_ndcg@10": 0.5136830862608868,
      "eval_dim_768_cosine_precision@1": 0.36480267745084366,
      "eval_dim_768_cosine_precision@10": 0.07151024961651094,
      "eval_dim_768_cosine_precision@3": 0.16111188583647096,
      "eval_dim_768_cosine_precision@5": 0.11128155069028028,
      "eval_dim_768_cosine_recall@1": 0.36480267745084366,
      "eval_dim_768_cosine_recall@10": 0.7151024961651095,
      "eval_dim_768_cosine_recall@3": 0.48333565750941293,
      "eval_dim_768_cosine_recall@5": 0.5564077534514015,
      "eval_loss": 0.19174915552139282,
      "eval_runtime": 5125.6155,
      "eval_samples_per_second": 1.399,
      "eval_sequential_score": 0.49282395499966114,
      "eval_steps_per_second": 0.013,
      "step": 92
    },
    {
      "epoch": 3.0,
      "grad_norm": 1.435935139656067,
      "learning_rate": 2.5281411335025595e-05,
      "loss": 0.1528056186178456,
      "step": 138
    },
    {
      "epoch": 3.0,
      "eval_dim_128_cosine_accuracy@1": 0.3654999302747176,
      "eval_dim_128_cosine_accuracy@10": 0.7250034862641194,
      "eval_dim_128_cosine_accuracy@3": 0.4868219216287826,
      "eval_dim_128_cosine_accuracy@5": 0.5612885232185191,
      "eval_dim_128_cosine_map@100": 0.46579021075100474,
      "eval_dim_128_cosine_mrr@10": 0.4560526525489593,
      "eval_dim_128_cosine_ndcg@10": 0.5184617012059481,
      "eval_dim_128_cosine_precision@1": 0.3654999302747176,
      "eval_dim_128_cosine_precision@10": 0.07250034862641193,
      "eval_dim_128_cosine_precision@3": 0.16227397387626086,
      "eval_dim_128_cosine_precision@5": 0.11225770464370378,
      "eval_dim_128_cosine_recall@1": 0.3654999302747176,
      "eval_dim_128_cosine_recall@10": 0.7250034862641194,
      "eval_dim_128_cosine_recall@3": 0.4868219216287826,
      "eval_dim_128_cosine_recall@5": 0.5612885232185191,
      "eval_dim_256_cosine_accuracy@1": 0.36508157858039325,
      "eval_dim_256_cosine_accuracy@10": 0.7268163436061916,
      "eval_dim_256_cosine_accuracy@3": 0.48654302049923304,
      "eval_dim_256_cosine_accuracy@5": 0.5640775345140148,
      "eval_dim_256_cosine_map@100": 0.4656359359210204,
      "eval_dim_256_cosine_mrr@10": 0.4559058420932647,
      "eval_dim_256_cosine_ndcg@10": 0.5187585343172242,
      "eval_dim_256_cosine_precision@1": 0.36508157858039325,
      "eval_dim_256_cosine_precision@10": 0.07268163436061917,
      "eval_dim_256_cosine_precision@3": 0.16218100683307768,
      "eval_dim_256_cosine_precision@5": 0.11281550690280295,
      "eval_dim_256_cosine_recall@1": 0.36508157858039325,
      "eval_dim_256_cosine_recall@10": 0.7268163436061916,
      "eval_dim_256_cosine_recall@3": 0.48654302049923304,
      "eval_dim_256_cosine_recall@5": 0.5640775345140148,
      "eval_dim_512_cosine_accuracy@1": 0.37303026077255613,
      "eval_dim_512_cosine_accuracy@10": 0.7275135964300655,
      "eval_dim_512_cosine_accuracy@3": 0.49532840608004464,
      "eval_dim_512_cosine_accuracy@5": 0.5689583042811324,
      "eval_dim_512_cosine_map@100": 0.47218646353528615,
      "eval_dim_512_cosine_mrr@10": 0.46250495270855235,
      "eval_dim_512_cosine_ndcg@10": 0.5240494016636663,
      "eval_dim_512_cosine_precision@1": 0.37303026077255613,
      "eval_dim_512_cosine_precision@10": 0.07275135964300654,
      "eval_dim_512_cosine_precision@3": 0.16510946869334822,
      "eval_dim_512_cosine_precision@5": 0.11379166085622647,
      "eval_dim_512_cosine_recall@1": 0.37303026077255613,
      "eval_dim_512_cosine_recall@10": 0.7275135964300655,
      "eval_dim_512_cosine_recall@3": 0.49532840608004464,
      "eval_dim_512_cosine_recall@5": 0.5689583042811324,
      "eval_dim_64_cosine_accuracy@1": 0.354762236787059,
      "eval_dim_64_cosine_accuracy@10": 0.7084088690559197,
      "eval_dim_64_cosine_accuracy@3": 0.4685538976432855,
      "eval_dim_64_cosine_accuracy@5": 0.5463673127876167,
      "eval_dim_64_cosine_map@100": 0.4528341447756788,
      "eval_dim_64_cosine_mrr@10": 0.4426069619034351,
      "eval_dim_64_cosine_ndcg@10": 0.5042049654509246,
      "eval_dim_64_cosine_precision@1": 0.354762236787059,
      "eval_dim_64_cosine_precision@10": 0.07084088690559197,
      "eval_dim_64_cosine_precision@3": 0.15618463254776183,
      "eval_dim_64_cosine_precision@5": 0.10927346255752335,
      "eval_dim_64_cosine_recall@1": 0.354762236787059,
      "eval_dim_64_cosine_recall@10": 0.7084088690559197,
      "eval_dim_64_cosine_recall@3": 0.4685538976432855,
      "eval_dim_64_cosine_recall@5": 0.5463673127876167,
      "eval_dim_768_cosine_accuracy@1": 0.3689861943940873,
      "eval_dim_768_cosine_accuracy@10": 0.730442058290336,
      "eval_dim_768_cosine_accuracy@3": 0.4868219216287826,
      "eval_dim_768_cosine_accuracy@5": 0.5649142379026635,
      "eval_dim_768_cosine_map@100": 0.46810281327770226,
      "eval_dim_768_cosine_mrr@10": 0.45858146679859224,
      "eval_dim_768_cosine_ndcg@10": 0.5215840208749241,
      "eval_dim_768_cosine_precision@1": 0.3689861943940873,
      "eval_dim_768_cosine_precision@10": 0.0730442058290336,
      "eval_dim_768_cosine_precision@3": 0.16227397387626086,
      "eval_dim_768_cosine_precision@5": 0.11298284758053269,
      "eval_dim_768_cosine_recall@1": 0.3689861943940873,
      "eval_dim_768_cosine_recall@10": 0.730442058290336,
      "eval_dim_768_cosine_recall@3": 0.4868219216287826,
      "eval_dim_768_cosine_recall@5": 0.5649142379026635,
      "eval_loss": 0.15238769352436066,
      "eval_runtime": 5128.0318,
      "eval_samples_per_second": 1.399,
      "eval_sequential_score": 0.5042049654509246,
      "eval_steps_per_second": 0.013,
      "step": 138
    },
    {
      "epoch": 4.0,
      "grad_norm": 1.2450511455535889,
      "learning_rate": 1.9318122786371193e-05,
      "loss": 0.11104167026022206,
      "step": 184
    },
    {
      "epoch": 4.0,
      "eval_dim_128_cosine_accuracy@1": 0.36619718309859156,
      "eval_dim_128_cosine_accuracy@10": 0.7255612885232186,
      "eval_dim_128_cosine_accuracy@3": 0.4857063171105843,
      "eval_dim_128_cosine_accuracy@5": 0.5624041277367173,
      "eval_dim_128_cosine_map@100": 0.46578169439240885,
      "eval_dim_128_cosine_mrr@10": 0.45578664506289746,
      "eval_dim_128_cosine_ndcg@10": 0.5183564003053771,
      "eval_dim_128_cosine_precision@1": 0.36619718309859156,
      "eval_dim_128_cosine_precision@10": 0.07255612885232185,
      "eval_dim_128_cosine_precision@3": 0.1619021057035281,
      "eval_dim_128_cosine_precision@5": 0.11248082554734345,
      "eval_dim_128_cosine_recall@1": 0.36619718309859156,
      "eval_dim_128_cosine_recall@10": 0.7255612885232186,
      "eval_dim_128_cosine_recall@3": 0.4857063171105843,
      "eval_dim_128_cosine_recall@5": 0.5624041277367173,
      "eval_dim_256_cosine_accuracy@1": 0.36619718309859156,
      "eval_dim_256_cosine_accuracy@10": 0.7269557941709663,
      "eval_dim_256_cosine_accuracy@3": 0.4847301631571608,
      "eval_dim_256_cosine_accuracy@5": 0.5621252266071678,
      "eval_dim_256_cosine_map@100": 0.4659867486137855,
      "eval_dim_256_cosine_mrr@10": 0.4562129653609242,
      "eval_dim_256_cosine_ndcg@10": 0.5189825183389356,
      "eval_dim_256_cosine_precision@1": 0.36619718309859156,
      "eval_dim_256_cosine_precision@10": 0.07269557941709663,
      "eval_dim_256_cosine_precision@3": 0.1615767210523869,
      "eval_dim_256_cosine_precision@5": 0.11242504532143355,
      "eval_dim_256_cosine_recall@1": 0.36619718309859156,
      "eval_dim_256_cosine_recall@10": 0.7269557941709663,
      "eval_dim_256_cosine_recall@3": 0.4847301631571608,
      "eval_dim_256_cosine_recall@5": 0.5621252266071678,
      "eval_dim_512_cosine_accuracy@1": 0.3657788314042672,
      "eval_dim_512_cosine_accuracy@10": 0.7297448054664621,
      "eval_dim_512_cosine_accuracy@3": 0.48710082275833216,
      "eval_dim_512_cosine_accuracy@5": 0.561985776042393,
      "eval_dim_512_cosine_map@100": 0.4664999147434714,
      "eval_dim_512_cosine_mrr@10": 0.45684652469271236,
      "eval_dim_512_cosine_ndcg@10": 0.5201161952598219,
      "eval_dim_512_cosine_precision@1": 0.3657788314042672,
      "eval_dim_512_cosine_precision@10": 0.07297448054664621,
      "eval_dim_512_cosine_precision@3": 0.1623669409194441,
      "eval_dim_512_cosine_precision@5": 0.11239715520847858,
      "eval_dim_512_cosine_recall@1": 0.3657788314042672,
      "eval_dim_512_cosine_recall@10": 0.7297448054664621,
      "eval_dim_512_cosine_recall@3": 0.48710082275833216,
      "eval_dim_512_cosine_recall@5": 0.561985776042393,
      "eval_dim_64_cosine_accuracy@1": 0.3514154232324641,
      "eval_dim_64_cosine_accuracy@10": 0.7138474410821364,
      "eval_dim_64_cosine_accuracy@3": 0.4725979640217543,
      "eval_dim_64_cosine_accuracy@5": 0.5434388509273462,
      "eval_dim_64_cosine_map@100": 0.4521933044833062,
      "eval_dim_64_cosine_mrr@10": 0.4420436258917645,
      "eval_dim_64_cosine_ndcg@10": 0.5050332106839454,
      "eval_dim_64_cosine_precision@1": 0.3514154232324641,
      "eval_dim_64_cosine_precision@10": 0.07138474410821363,
      "eval_dim_64_cosine_precision@3": 0.15753265467391808,
      "eval_dim_64_cosine_precision@5": 0.10868777018546923,
      "eval_dim_64_cosine_recall@1": 0.3514154232324641,
      "eval_dim_64_cosine_recall@10": 0.7138474410821364,
      "eval_dim_64_cosine_recall@3": 0.4725979640217543,
      "eval_dim_64_cosine_recall@5": 0.5434388509273462,
      "eval_dim_768_cosine_accuracy@1": 0.3723330079486822,
      "eval_dim_768_cosine_accuracy@10": 0.7312787616789848,
      "eval_dim_768_cosine_accuracy@3": 0.4951889555152698,
      "eval_dim_768_cosine_accuracy@5": 0.570492260493655,
      "eval_dim_768_cosine_map@100": 0.47234757553193524,
      "eval_dim_768_cosine_mrr@10": 0.46258391935773235,
      "eval_dim_768_cosine_ndcg@10": 0.5249716949025338,
      "eval_dim_768_cosine_precision@1": 0.3723330079486822,
      "eval_dim_768_cosine_precision@10": 0.07312787616789848,
      "eval_dim_768_cosine_precision@3": 0.1650629851717566,
      "eval_dim_768_cosine_precision@5": 0.11409845209873098,
      "eval_dim_768_cosine_recall@1": 0.3723330079486822,
      "eval_dim_768_cosine_recall@10": 0.7312787616789848,
      "eval_dim_768_cosine_recall@3": 0.4951889555152698,
      "eval_dim_768_cosine_recall@5": 0.570492260493655,
      "eval_loss": 0.13917988538742065,
      "eval_runtime": 5127.8405,
      "eval_samples_per_second": 1.399,
      "eval_sequential_score": 0.5050332106839454,
      "eval_steps_per_second": 0.013,
      "step": 184
    },
    {
      "epoch": 5.0,
      "grad_norm": 1.0852317810058594,
      "learning_rate": 1.2444873080259475e-05,
      "loss": 0.08755316941634468,
      "step": 230
    },
    {
      "epoch": 5.0,
      "eval_dim_128_cosine_accuracy@1": 0.36926509552363684,
      "eval_dim_128_cosine_accuracy@10": 0.7303026077255613,
      "eval_dim_128_cosine_accuracy@3": 0.4847301631571608,
      "eval_dim_128_cosine_accuracy@5": 0.559336215311672,
      "eval_dim_128_cosine_map@100": 0.46751231421013056,
      "eval_dim_128_cosine_mrr@10": 0.4577717127849617,
      "eval_dim_128_cosine_ndcg@10": 0.5208050058986332,
      "eval_dim_128_cosine_precision@1": 0.36926509552363684,
      "eval_dim_128_cosine_precision@10": 0.07303026077255612,
      "eval_dim_128_cosine_precision@3": 0.1615767210523869,
      "eval_dim_128_cosine_precision@5": 0.11186724306233439,
      "eval_dim_128_cosine_recall@1": 0.36926509552363684,
      "eval_dim_128_cosine_recall@10": 0.7303026077255613,
      "eval_dim_128_cosine_recall@3": 0.4847301631571608,
      "eval_dim_128_cosine_recall@5": 0.559336215311672,
      "eval_dim_256_cosine_accuracy@1": 0.36745223818156464,
      "eval_dim_256_cosine_accuracy@10": 0.7289081020778134,
      "eval_dim_256_cosine_accuracy@3": 0.4869613721935574,
      "eval_dim_256_cosine_accuracy@5": 0.5610096220889694,
      "eval_dim_256_cosine_map@100": 0.4672783873145468,
      "eval_dim_256_cosine_mrr@10": 0.45734638856239845,
      "eval_dim_256_cosine_ndcg@10": 0.5202779962822549,
      "eval_dim_256_cosine_precision@1": 0.36745223818156464,
      "eval_dim_256_cosine_precision@10": 0.07289081020778135,
      "eval_dim_256_cosine_precision@3": 0.16232045739785247,
      "eval_dim_256_cosine_precision@5": 0.11220192441779388,
      "eval_dim_256_cosine_recall@1": 0.36745223818156464,
      "eval_dim_256_cosine_recall@10": 0.7289081020778134,
      "eval_dim_256_cosine_recall@3": 0.4869613721935574,
      "eval_dim_256_cosine_recall@5": 0.5610096220889694,
      "eval_dim_512_cosine_accuracy@1": 0.3733091619021057,
      "eval_dim_512_cosine_accuracy@10": 0.7358806303165528,
      "eval_dim_512_cosine_accuracy@3": 0.49714126342211684,
      "eval_dim_512_cosine_accuracy@5": 0.5745363268721239,
      "eval_dim_512_cosine_map@100": 0.47438800022814137,
      "eval_dim_512_cosine_mrr@10": 0.4649837971724749,
      "eval_dim_512_cosine_ndcg@10": 0.5278640631789735,
      "eval_dim_512_cosine_precision@1": 0.3733091619021057,
      "eval_dim_512_cosine_precision@10": 0.07358806303165527,
      "eval_dim_512_cosine_precision@3": 0.16571375447403894,
      "eval_dim_512_cosine_precision@5": 0.11490726537442476,
      "eval_dim_512_cosine_recall@1": 0.3733091619021057,
      "eval_dim_512_cosine_recall@10": 0.7358806303165528,
      "eval_dim_512_cosine_recall@3": 0.49714126342211684,
      "eval_dim_512_cosine_recall@5": 0.5745363268721239,
      "eval_dim_64_cosine_accuracy@1": 0.35601729187003206,
      "eval_dim_64_cosine_accuracy@10": 0.7197043648026774,
      "eval_dim_64_cosine_accuracy@3": 0.46980895272625856,
      "eval_dim_64_cosine_accuracy@5": 0.5460884116580672,
      "eval_dim_64_cosine_map@100": 0.45478631984265544,
      "eval_dim_64_cosine_mrr@10": 0.4447587062529192,
      "eval_dim_64_cosine_ndcg@10": 0.5083136533419047,
      "eval_dim_64_cosine_precision@1": 0.35601729187003206,
      "eval_dim_64_cosine_precision@10": 0.07197043648026773,
      "eval_dim_64_cosine_precision@3": 0.15660298424208619,
      "eval_dim_64_cosine_precision@5": 0.10921768233161343,
      "eval_dim_64_cosine_recall@1": 0.35601729187003206,
      "eval_dim_64_cosine_recall@10": 0.7197043648026774,
      "eval_dim_64_cosine_recall@3": 0.46980895272625856,
      "eval_dim_64_cosine_recall@5": 0.5460884116580672,
      "eval_dim_768_cosine_accuracy@1": 0.36954399665318644,
      "eval_dim_768_cosine_accuracy@10": 0.7333705201506067,
      "eval_dim_768_cosine_accuracy@3": 0.4882164272765305,
      "eval_dim_768_cosine_accuracy@5": 0.5631013805605912,
      "eval_dim_768_cosine_map@100": 0.4692423416522091,
      "eval_dim_768_cosine_mrr@10": 0.45940809875756333,
      "eval_dim_768_cosine_ndcg@10": 0.5228646825758372,
      "eval_dim_768_cosine_precision@1": 0.36954399665318644,
      "eval_dim_768_cosine_precision@10": 0.07333705201506065,
      "eval_dim_768_cosine_precision@3": 0.16273880909217683,
      "eval_dim_768_cosine_precision@5": 0.11262027611211824,
      "eval_dim_768_cosine_recall@1": 0.36954399665318644,
      "eval_dim_768_cosine_recall@10": 0.7333705201506067,
      "eval_dim_768_cosine_recall@3": 0.4882164272765305,
      "eval_dim_768_cosine_recall@5": 0.5631013805605912,
      "eval_loss": 0.1289709359407425,
      "eval_runtime": 5133.7392,
      "eval_samples_per_second": 1.397,
      "eval_sequential_score": 0.5083136533419047,
      "eval_steps_per_second": 0.013,
      "step": 230
    },
    {
      "epoch": 6.0,
      "grad_norm": 1.0251929759979248,
      "learning_rate": 6.11006712953975e-06,
      "loss": 0.07780430109604546,
      "step": 276
    },
    {
      "epoch": 6.0,
      "eval_dim_128_cosine_accuracy@1": 0.36424487519174453,
      "eval_dim_128_cosine_accuracy@10": 0.729187003207363,
      "eval_dim_128_cosine_accuracy@3": 0.48333565750941293,
      "eval_dim_128_cosine_accuracy@5": 0.5603123692650955,
      "eval_dim_128_cosine_map@100": 0.4650707051757783,
      "eval_dim_128_cosine_mrr@10": 0.4552743855874532,
      "eval_dim_128_cosine_ndcg@10": 0.5187763857272285,
      "eval_dim_128_cosine_precision@1": 0.36424487519174453,
      "eval_dim_128_cosine_precision@10": 0.0729187003207363,
      "eval_dim_128_cosine_precision@3": 0.16111188583647096,
      "eval_dim_128_cosine_precision@5": 0.1120624738530191,
      "eval_dim_128_cosine_recall@1": 0.36424487519174453,
      "eval_dim_128_cosine_recall@10": 0.729187003207363,
      "eval_dim_128_cosine_recall@3": 0.48333565750941293,
      "eval_dim_128_cosine_recall@5": 0.5603123692650955,
      "eval_dim_256_cosine_accuracy@1": 0.37163575512480823,
      "eval_dim_256_cosine_accuracy@10": 0.7309998605494352,
      "eval_dim_256_cosine_accuracy@3": 0.49295774647887325,
      "eval_dim_256_cosine_accuracy@5": 0.5681216008924836,
      "eval_dim_256_cosine_map@100": 0.4718786149307726,
      "eval_dim_256_cosine_mrr@10": 0.46190055625280924,
      "eval_dim_256_cosine_ndcg@10": 0.524333456443,
      "eval_dim_256_cosine_precision@1": 0.37163575512480823,
      "eval_dim_256_cosine_precision@10": 0.07309998605494351,
      "eval_dim_256_cosine_precision@3": 0.16431924882629106,
      "eval_dim_256_cosine_precision@5": 0.11362432017849672,
      "eval_dim_256_cosine_recall@1": 0.37163575512480823,
      "eval_dim_256_cosine_recall@10": 0.7309998605494352,
      "eval_dim_256_cosine_recall@3": 0.49295774647887325,
      "eval_dim_256_cosine_recall@5": 0.5681216008924836,
      "eval_dim_512_cosine_accuracy@1": 0.3719146562543578,
      "eval_dim_512_cosine_accuracy@10": 0.7375540370938503,
      "eval_dim_512_cosine_accuracy@3": 0.4942128015618463,
      "eval_dim_512_cosine_accuracy@5": 0.5781620415562683,
      "eval_dim_512_cosine_map@100": 0.4735810945322351,
      "eval_dim_512_cosine_mrr@10": 0.4641111797296452,
      "eval_dim_512_cosine_ndcg@10": 0.5276527222739883,
      "eval_dim_512_cosine_precision@1": 0.3719146562543578,
      "eval_dim_512_cosine_precision@10": 0.07375540370938502,
      "eval_dim_512_cosine_precision@3": 0.16473760052061545,
      "eval_dim_512_cosine_precision@5": 0.11563240831125365,
      "eval_dim_512_cosine_recall@1": 0.3719146562543578,
      "eval_dim_512_cosine_recall@10": 0.7375540370938503,
      "eval_dim_512_cosine_recall@3": 0.4942128015618463,
      "eval_dim_512_cosine_recall@5": 0.5781620415562683,
      "eval_dim_64_cosine_accuracy@1": 0.35852740203597827,
      "eval_dim_64_cosine_accuracy@10": 0.7167759029424069,
      "eval_dim_64_cosine_accuracy@3": 0.47413192023427697,
      "eval_dim_64_cosine_accuracy@5": 0.5512480825547343,
      "eval_dim_64_cosine_map@100": 0.4579655459168757,
      "eval_dim_64_cosine_mrr@10": 0.44765800523714366,
      "eval_dim_64_cosine_ndcg@10": 0.5100114350662204,
      "eval_dim_64_cosine_precision@1": 0.35852740203597827,
      "eval_dim_64_cosine_precision@10": 0.07167759029424069,
      "eval_dim_64_cosine_precision@3": 0.15804397341142565,
      "eval_dim_64_cosine_precision@5": 0.11024961651094686,
      "eval_dim_64_cosine_recall@1": 0.35852740203597827,
      "eval_dim_64_cosine_recall@10": 0.7167759029424069,
      "eval_dim_64_cosine_recall@3": 0.47413192023427697,
      "eval_dim_64_cosine_recall@5": 0.5512480825547343,
      "eval_dim_768_cosine_accuracy@1": 0.37651652489192583,
      "eval_dim_768_cosine_accuracy@10": 0.7325338167619578,
      "eval_dim_768_cosine_accuracy@3": 0.4960256589039186,
      "eval_dim_768_cosine_accuracy@5": 0.5675637986333845,
      "eval_dim_768_cosine_map@100": 0.47495758740026794,
      "eval_dim_768_cosine_mrr@10": 0.46505562528083916,
      "eval_dim_768_cosine_ndcg@10": 0.5270382021795976,
      "eval_dim_768_cosine_precision@1": 0.37651652489192583,
      "eval_dim_768_cosine_precision@10": 0.07325338167619579,
      "eval_dim_768_cosine_precision@3": 0.16534188630130617,
      "eval_dim_768_cosine_precision@5": 0.11351275972667689,
      "eval_dim_768_cosine_recall@1": 0.37651652489192583,
      "eval_dim_768_cosine_recall@10": 0.7325338167619578,
      "eval_dim_768_cosine_recall@3": 0.4960256589039186,
      "eval_dim_768_cosine_recall@5": 0.5675637986333845,
      "eval_loss": 0.12710943818092346,
      "eval_runtime": 5133.9648,
      "eval_samples_per_second": 1.397,
      "eval_sequential_score": 0.5100114350662204,
      "eval_steps_per_second": 0.013,
      "step": 276
    }
  ],
  "logging_steps": 500,
  "max_steps": 368,
  "num_input_tokens_seen": 0,
  "num_train_epochs": 8,
  "save_steps": 500,
  "stateful_callbacks": {
    "EarlyStoppingCallback": {
      "args": {
        "early_stopping_patience": 1,
        "early_stopping_threshold": 0.001
      },
      "attributes": {
        "early_stopping_patience_counter": 0
      }
    },
    "TrainerControl": {
      "args": {
        "should_epoch_stop": false,
        "should_evaluate": false,
        "should_log": false,
        "should_save": true,
        "should_training_stop": false
      },
      "attributes": {}
    }
  },
  "total_flos": 0.0,
  "train_batch_size": 104,
  "trial_name": null,
  "trial_params": null
}