File size: 36,458 Bytes
44e0275
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
1001
1002
1003
1004
1005
1006
1007
1008
1009
1010
1011
1012
1013
1014
1015
1016
1017
1018
1019
1020
1021
1022
1023
1024
1025
1026
1027
1028
1029
1030
1031
1032
1033
1034
1035
1036
1037
1038
1039
1040
1041
1042
1043
1044
1045
1046
1047
1048
1049
1050
1051
1052
1053
1054
1055
1056
1057
1058
1059
1060
1061
1062
1063
1064
1065
1066
1067
1068
1069
1070
1071
1072
1073
1074
1075
1076
1077
1078
1079
1080
1081
1082
1083
1084
{
  "best_metric": null,
  "best_model_checkpoint": null,
  "epoch": 3.6253776435045317,
  "eval_steps": 2000,
  "global_step": 42000,
  "is_hyper_param_search": false,
  "is_local_process_zero": true,
  "is_world_process_zero": true,
  "log_history": [
    {
      "epoch": 0.04315925766076824,
      "grad_norm": 30.676715850830078,
      "learning_rate": 2.1579628830384117e-06,
      "loss": 0.358,
      "step": 500
    },
    {
      "epoch": 0.08631851532153648,
      "grad_norm": 19.524194717407227,
      "learning_rate": 4.3159257660768235e-06,
      "loss": 0.1048,
      "step": 1000
    },
    {
      "epoch": 0.1294777729823047,
      "grad_norm": 0.00297492160461843,
      "learning_rate": 6.473888649115235e-06,
      "loss": 0.0827,
      "step": 1500
    },
    {
      "epoch": 0.17263703064307295,
      "grad_norm": 8.900677680969238,
      "learning_rate": 8.631851532153647e-06,
      "loss": 0.067,
      "step": 2000
    },
    {
      "epoch": 0.17263703064307295,
      "eval_cosine_accuracy@1": 0.6191955808734679,
      "eval_cosine_accuracy@10": 0.9514931814258588,
      "eval_cosine_accuracy@3": 0.8606939409632315,
      "eval_cosine_accuracy@5": 0.909891248058001,
      "eval_cosine_map@100": 0.748128574680106,
      "eval_cosine_mrr@10": 0.7459635876906734,
      "eval_cosine_ndcg@10": 0.7968614059582585,
      "eval_cosine_precision@1": 0.6191955808734679,
      "eval_cosine_precision@10": 0.09514931814258587,
      "eval_cosine_precision@3": 0.28689798032107716,
      "eval_cosine_precision@5": 0.18197824961160017,
      "eval_cosine_recall@1": 0.6191955808734679,
      "eval_cosine_recall@10": 0.9514931814258588,
      "eval_cosine_recall@3": 0.8606939409632315,
      "eval_cosine_recall@5": 0.909891248058001,
      "eval_runtime": 468.0233,
      "eval_samples_per_second": 0.0,
      "eval_steps_per_second": 0.0,
      "step": 2000
    },
    {
      "epoch": 0.21579628830384118,
      "grad_norm": 0.12022869288921356,
      "learning_rate": 1.0789814415192059e-05,
      "loss": 0.0491,
      "step": 2500
    },
    {
      "epoch": 0.2589555459646094,
      "grad_norm": 0.07568053156137466,
      "learning_rate": 1.294777729823047e-05,
      "loss": 0.0831,
      "step": 3000
    },
    {
      "epoch": 0.3021148036253776,
      "grad_norm": 0.0246192067861557,
      "learning_rate": 1.5105740181268884e-05,
      "loss": 0.062,
      "step": 3500
    },
    {
      "epoch": 0.3452740612861459,
      "grad_norm": 0.009853623807430267,
      "learning_rate": 1.7263703064307294e-05,
      "loss": 0.0657,
      "step": 4000
    },
    {
      "epoch": 0.3452740612861459,
      "eval_cosine_accuracy@1": 0.6362851717590196,
      "eval_cosine_accuracy@10": 0.9523562920766442,
      "eval_cosine_accuracy@3": 0.8606939409632315,
      "eval_cosine_accuracy@5": 0.9110996029691006,
      "eval_cosine_map@100": 0.7589134849598074,
      "eval_cosine_mrr@10": 0.756632799848751,
      "eval_cosine_ndcg@10": 0.8050365772218437,
      "eval_cosine_precision@1": 0.6362851717590196,
      "eval_cosine_precision@10": 0.09523562920766442,
      "eval_cosine_precision@3": 0.28689798032107716,
      "eval_cosine_precision@5": 0.1822199205938201,
      "eval_cosine_recall@1": 0.6362851717590196,
      "eval_cosine_recall@10": 0.9523562920766442,
      "eval_cosine_recall@3": 0.8606939409632315,
      "eval_cosine_recall@5": 0.9110996029691006,
      "eval_runtime": 467.8258,
      "eval_samples_per_second": 0.0,
      "eval_steps_per_second": 0.0,
      "step": 4000
    },
    {
      "epoch": 0.38843331894691413,
      "grad_norm": 0.017763391137123108,
      "learning_rate": 1.9421665947345706e-05,
      "loss": 0.0522,
      "step": 4500
    },
    {
      "epoch": 0.43159257660768235,
      "grad_norm": 21.21623420715332,
      "learning_rate": 1.982446885041485e-05,
      "loss": 0.049,
      "step": 5000
    },
    {
      "epoch": 0.4747518342684506,
      "grad_norm": 0.13613158464431763,
      "learning_rate": 1.958467219797612e-05,
      "loss": 0.0426,
      "step": 5500
    },
    {
      "epoch": 0.5179110919292188,
      "grad_norm": 0.1645500212907791,
      "learning_rate": 1.9344875545537384e-05,
      "loss": 0.0708,
      "step": 6000
    },
    {
      "epoch": 0.5179110919292188,
      "eval_cosine_accuracy@1": 0.6526842741239427,
      "eval_cosine_accuracy@10": 0.9642672190574831,
      "eval_cosine_accuracy@3": 0.8865872604867944,
      "eval_cosine_accuracy@5": 0.9287070602451234,
      "eval_cosine_map@100": 0.7759321604397249,
      "eval_cosine_mrr@10": 0.7742270364616298,
      "eval_cosine_ndcg@10": 0.8214808830487713,
      "eval_cosine_precision@1": 0.6526842741239427,
      "eval_cosine_precision@10": 0.0964267219057483,
      "eval_cosine_precision@3": 0.2955290868289315,
      "eval_cosine_precision@5": 0.1857414120490247,
      "eval_cosine_recall@1": 0.6526842741239427,
      "eval_cosine_recall@10": 0.9642672190574831,
      "eval_cosine_recall@3": 0.8865872604867944,
      "eval_cosine_recall@5": 0.9287070602451234,
      "eval_runtime": 467.7458,
      "eval_samples_per_second": 0.0,
      "eval_steps_per_second": 0.0,
      "step": 6000
    },
    {
      "epoch": 0.561070349589987,
      "grad_norm": 0.3336288332939148,
      "learning_rate": 1.9105078893098655e-05,
      "loss": 0.0236,
      "step": 6500
    },
    {
      "epoch": 0.6042296072507553,
      "grad_norm": 0.011359921656548977,
      "learning_rate": 1.886528224065992e-05,
      "loss": 0.024,
      "step": 7000
    },
    {
      "epoch": 0.6473888649115235,
      "grad_norm": 0.0021573721896857023,
      "learning_rate": 1.8625485588221192e-05,
      "loss": 0.0256,
      "step": 7500
    },
    {
      "epoch": 0.6905481225722918,
      "grad_norm": 0.024769997224211693,
      "learning_rate": 1.8385688935782457e-05,
      "loss": 0.041,
      "step": 8000
    },
    {
      "epoch": 0.6905481225722918,
      "eval_cosine_accuracy@1": 0.6390471258415329,
      "eval_cosine_accuracy@10": 0.9573623338511997,
      "eval_cosine_accuracy@3": 0.8693250474710857,
      "eval_cosine_accuracy@5": 0.9195580873467979,
      "eval_cosine_map@100": 0.7640704294756044,
      "eval_cosine_mrr@10": 0.762041421091137,
      "eval_cosine_ndcg@10": 0.8104943817099518,
      "eval_cosine_precision@1": 0.6390471258415329,
      "eval_cosine_precision@10": 0.09573623338511995,
      "eval_cosine_precision@3": 0.2897750158236953,
      "eval_cosine_precision@5": 0.18391161746935958,
      "eval_cosine_recall@1": 0.6390471258415329,
      "eval_cosine_recall@10": 0.9573623338511997,
      "eval_cosine_recall@3": 0.8693250474710857,
      "eval_cosine_recall@5": 0.9195580873467979,
      "eval_runtime": 467.5761,
      "eval_samples_per_second": 0.0,
      "eval_steps_per_second": 0.0,
      "step": 8000
    },
    {
      "epoch": 0.73370738023306,
      "grad_norm": 0.001473304582759738,
      "learning_rate": 1.8145892283343725e-05,
      "loss": 0.0285,
      "step": 8500
    },
    {
      "epoch": 0.7768666378938283,
      "grad_norm": 0.002119662007316947,
      "learning_rate": 1.7906095630904994e-05,
      "loss": 0.0249,
      "step": 9000
    },
    {
      "epoch": 0.8200258955545965,
      "grad_norm": 0.035019177943468094,
      "learning_rate": 1.7666298978466262e-05,
      "loss": 0.0368,
      "step": 9500
    },
    {
      "epoch": 0.8631851532153647,
      "grad_norm": 0.2664908468723297,
      "learning_rate": 1.742650232602753e-05,
      "loss": 0.0588,
      "step": 10000
    },
    {
      "epoch": 0.8631851532153647,
      "eval_cosine_accuracy@1": 0.6407733471431037,
      "eval_cosine_accuracy@10": 0.9589159330226135,
      "eval_cosine_accuracy@3": 0.8734679785948558,
      "eval_cosine_accuracy@5": 0.9204211979975833,
      "eval_cosine_map@100": 0.7652575174635105,
      "eval_cosine_mrr@10": 0.7632412818974197,
      "eval_cosine_ndcg@10": 0.811775458664963,
      "eval_cosine_precision@1": 0.6407733471431037,
      "eval_cosine_precision@10": 0.09589159330226135,
      "eval_cosine_precision@3": 0.2911559928649519,
      "eval_cosine_precision@5": 0.18408423959951664,
      "eval_cosine_recall@1": 0.6407733471431037,
      "eval_cosine_recall@10": 0.9589159330226135,
      "eval_cosine_recall@3": 0.8734679785948558,
      "eval_cosine_recall@5": 0.9204211979975833,
      "eval_runtime": 467.8166,
      "eval_samples_per_second": 0.0,
      "eval_steps_per_second": 0.0,
      "step": 10000
    },
    {
      "epoch": 0.9063444108761329,
      "grad_norm": 0.032082412391901016,
      "learning_rate": 1.71867056735888e-05,
      "loss": 0.0386,
      "step": 10500
    },
    {
      "epoch": 0.9495036685369012,
      "grad_norm": 8.98410415649414,
      "learning_rate": 1.6946909021150067e-05,
      "loss": 0.0456,
      "step": 11000
    },
    {
      "epoch": 0.9926629261976694,
      "grad_norm": 0.002887778216972947,
      "learning_rate": 1.6707112368711332e-05,
      "loss": 0.0399,
      "step": 11500
    },
    {
      "epoch": 1.0358221838584376,
      "grad_norm": 0.039170317351818085,
      "learning_rate": 1.6467315716272604e-05,
      "loss": 0.0424,
      "step": 12000
    },
    {
      "epoch": 1.0358221838584376,
      "eval_cosine_accuracy@1": 0.6606248921111687,
      "eval_cosine_accuracy@10": 0.9654755739685827,
      "eval_cosine_accuracy@3": 0.8808907301916106,
      "eval_cosine_accuracy@5": 0.9300880372863801,
      "eval_cosine_map@100": 0.7789505370634054,
      "eval_cosine_mrr@10": 0.7772537463112309,
      "eval_cosine_ndcg@10": 0.8239196088222247,
      "eval_cosine_precision@1": 0.6606248921111687,
      "eval_cosine_precision@10": 0.09654755739685827,
      "eval_cosine_precision@3": 0.2936302433972035,
      "eval_cosine_precision@5": 0.186017607457276,
      "eval_cosine_recall@1": 0.6606248921111687,
      "eval_cosine_recall@10": 0.9654755739685827,
      "eval_cosine_recall@3": 0.8808907301916106,
      "eval_cosine_recall@5": 0.9300880372863801,
      "eval_runtime": 467.7683,
      "eval_samples_per_second": 0.0,
      "eval_steps_per_second": 0.0,
      "step": 12000
    },
    {
      "epoch": 1.0789814415192058,
      "grad_norm": 0.07316175103187561,
      "learning_rate": 1.622751906383387e-05,
      "loss": 0.0107,
      "step": 12500
    },
    {
      "epoch": 1.122140699179974,
      "grad_norm": 0.03618592023849487,
      "learning_rate": 1.598772241139514e-05,
      "loss": 0.0279,
      "step": 13000
    },
    {
      "epoch": 1.1652999568407423,
      "grad_norm": 0.023356635123491287,
      "learning_rate": 1.5747925758956405e-05,
      "loss": 0.0236,
      "step": 13500
    },
    {
      "epoch": 1.2084592145015105,
      "grad_norm": 0.002293772529810667,
      "learning_rate": 1.5508129106517674e-05,
      "loss": 0.024,
      "step": 14000
    },
    {
      "epoch": 1.2084592145015105,
      "eval_cosine_accuracy@1": 0.6506128085620576,
      "eval_cosine_accuracy@10": 0.9640945969273261,
      "eval_cosine_accuracy@3": 0.8803728638011393,
      "eval_cosine_accuracy@5": 0.9266355946832384,
      "eval_cosine_map@100": 0.7732572758885798,
      "eval_cosine_mrr@10": 0.7715017303313533,
      "eval_cosine_ndcg@10": 0.8192838549207232,
      "eval_cosine_precision@1": 0.6506128085620576,
      "eval_cosine_precision@10": 0.09640945969273261,
      "eval_cosine_precision@3": 0.29345762126704644,
      "eval_cosine_precision@5": 0.18532711893664763,
      "eval_cosine_recall@1": 0.6506128085620576,
      "eval_cosine_recall@10": 0.9640945969273261,
      "eval_cosine_recall@3": 0.8803728638011393,
      "eval_cosine_recall@5": 0.9266355946832384,
      "eval_runtime": 467.8783,
      "eval_samples_per_second": 0.0,
      "eval_steps_per_second": 0.0,
      "step": 14000
    },
    {
      "epoch": 1.2516184721622787,
      "grad_norm": 0.007560160476714373,
      "learning_rate": 1.5268332454078942e-05,
      "loss": 0.0143,
      "step": 14500
    },
    {
      "epoch": 1.2947777298230472,
      "grad_norm": 0.004202102776616812,
      "learning_rate": 1.5028535801640209e-05,
      "loss": 0.0118,
      "step": 15000
    },
    {
      "epoch": 1.3379369874838152,
      "grad_norm": 0.00022126469411887228,
      "learning_rate": 1.4788739149201479e-05,
      "loss": 0.0078,
      "step": 15500
    },
    {
      "epoch": 1.3810962451445836,
      "grad_norm": 0.011956814676523209,
      "learning_rate": 1.4548942496762745e-05,
      "loss": 0.023,
      "step": 16000
    },
    {
      "epoch": 1.3810962451445836,
      "eval_cosine_accuracy@1": 0.6533747626445711,
      "eval_cosine_accuracy@10": 0.9642672190574831,
      "eval_cosine_accuracy@3": 0.8826169514931814,
      "eval_cosine_accuracy@5": 0.9302606594165372,
      "eval_cosine_map@100": 0.7763076224553367,
      "eval_cosine_mrr@10": 0.7745393318153555,
      "eval_cosine_ndcg@10": 0.8216976031852626,
      "eval_cosine_precision@1": 0.6533747626445711,
      "eval_cosine_precision@10": 0.0964267219057483,
      "eval_cosine_precision@3": 0.2942056504977271,
      "eval_cosine_precision@5": 0.18605213188330738,
      "eval_cosine_recall@1": 0.6533747626445711,
      "eval_cosine_recall@10": 0.9642672190574831,
      "eval_cosine_recall@3": 0.8826169514931814,
      "eval_cosine_recall@5": 0.9302606594165372,
      "eval_runtime": 467.7532,
      "eval_samples_per_second": 0.0,
      "eval_steps_per_second": 0.0,
      "step": 16000
    },
    {
      "epoch": 1.4242555028053516,
      "grad_norm": 0.008947977796196938,
      "learning_rate": 1.4309145844324015e-05,
      "loss": 0.0239,
      "step": 16500
    },
    {
      "epoch": 1.46741476046612,
      "grad_norm": 0.20168237388134003,
      "learning_rate": 1.4069349191885282e-05,
      "loss": 0.0335,
      "step": 17000
    },
    {
      "epoch": 1.510574018126888,
      "grad_norm": 0.003233299357816577,
      "learning_rate": 1.3829552539446552e-05,
      "loss": 0.0119,
      "step": 17500
    },
    {
      "epoch": 1.5537332757876565,
      "grad_norm": 0.013063711114227772,
      "learning_rate": 1.3589755887007819e-05,
      "loss": 0.0411,
      "step": 18000
    },
    {
      "epoch": 1.5537332757876565,
      "eval_cosine_accuracy@1": 0.6644225789746245,
      "eval_cosine_accuracy@10": 0.9680649059209391,
      "eval_cosine_accuracy@3": 0.8898670809597791,
      "eval_cosine_accuracy@5": 0.9335404798895218,
      "eval_cosine_map@100": 0.7848911785594413,
      "eval_cosine_mrr@10": 0.7833323743214994,
      "eval_cosine_ndcg@10": 0.8292454833247894,
      "eval_cosine_precision@1": 0.6644225789746245,
      "eval_cosine_precision@10": 0.09680649059209388,
      "eval_cosine_precision@3": 0.2966223603199264,
      "eval_cosine_precision@5": 0.18670809597790436,
      "eval_cosine_recall@1": 0.6644225789746245,
      "eval_cosine_recall@10": 0.9680649059209391,
      "eval_cosine_recall@3": 0.8898670809597791,
      "eval_cosine_recall@5": 0.9335404798895218,
      "eval_runtime": 467.9161,
      "eval_samples_per_second": 0.0,
      "eval_steps_per_second": 0.0,
      "step": 18000
    },
    {
      "epoch": 1.5968925334484245,
      "grad_norm": 3.0231621265411377,
      "learning_rate": 1.3349959234569087e-05,
      "loss": 0.0168,
      "step": 18500
    },
    {
      "epoch": 1.640051791109193,
      "grad_norm": 0.08278048038482666,
      "learning_rate": 1.3110162582130355e-05,
      "loss": 0.0059,
      "step": 19000
    },
    {
      "epoch": 1.6832110487699612,
      "grad_norm": 0.10015950351953506,
      "learning_rate": 1.2870365929691622e-05,
      "loss": 0.0234,
      "step": 19500
    },
    {
      "epoch": 1.7263703064307294,
      "grad_norm": 2.1657984256744385,
      "learning_rate": 1.263056927725289e-05,
      "loss": 0.0184,
      "step": 20000
    },
    {
      "epoch": 1.7263703064307294,
      "eval_cosine_accuracy@1": 0.6768513723459347,
      "eval_cosine_accuracy@10": 0.969963749352667,
      "eval_cosine_accuracy@3": 0.897807698947005,
      "eval_cosine_accuracy@5": 0.9369929224926635,
      "eval_cosine_map@100": 0.7938770196077543,
      "eval_cosine_mrr@10": 0.7923516066188262,
      "eval_cosine_ndcg@10": 0.8365875778541227,
      "eval_cosine_precision@1": 0.6768513723459347,
      "eval_cosine_precision@10": 0.09699637493526668,
      "eval_cosine_precision@3": 0.29926923298233504,
      "eval_cosine_precision@5": 0.1873985844985327,
      "eval_cosine_recall@1": 0.6768513723459347,
      "eval_cosine_recall@10": 0.969963749352667,
      "eval_cosine_recall@3": 0.897807698947005,
      "eval_cosine_recall@5": 0.9369929224926635,
      "eval_runtime": 467.8044,
      "eval_samples_per_second": 0.0,
      "eval_steps_per_second": 0.0,
      "step": 20000
    },
    {
      "epoch": 1.7695295640914976,
      "grad_norm": 1.5666255950927734,
      "learning_rate": 1.2390772624814159e-05,
      "loss": 0.0128,
      "step": 20500
    },
    {
      "epoch": 1.8126888217522659,
      "grad_norm": 0.00032274972181767225,
      "learning_rate": 1.2150975972375427e-05,
      "loss": 0.0166,
      "step": 21000
    },
    {
      "epoch": 1.855848079413034,
      "grad_norm": 0.051935628056526184,
      "learning_rate": 1.1911179319936694e-05,
      "loss": 0.0181,
      "step": 21500
    },
    {
      "epoch": 1.8990073370738023,
      "grad_norm": 0.02546406351029873,
      "learning_rate": 1.1671382667497964e-05,
      "loss": 0.0148,
      "step": 22000
    },
    {
      "epoch": 1.8990073370738023,
      "eval_cosine_accuracy@1": 0.6744346625237355,
      "eval_cosine_accuracy@10": 0.9697911272225099,
      "eval_cosine_accuracy@3": 0.8971172104263767,
      "eval_cosine_accuracy@5": 0.9388917659243915,
      "eval_cosine_map@100": 0.792274316391964,
      "eval_cosine_mrr@10": 0.7907476593261165,
      "eval_cosine_ndcg@10": 0.8353359235071491,
      "eval_cosine_precision@1": 0.6744346625237355,
      "eval_cosine_precision@10": 0.09697911272225099,
      "eval_cosine_precision@3": 0.2990390701421256,
      "eval_cosine_precision@5": 0.1877783531848783,
      "eval_cosine_recall@1": 0.6744346625237355,
      "eval_cosine_recall@10": 0.9697911272225099,
      "eval_cosine_recall@3": 0.8971172104263767,
      "eval_cosine_recall@5": 0.9388917659243915,
      "eval_runtime": 467.8952,
      "eval_samples_per_second": 0.0,
      "eval_steps_per_second": 0.0,
      "step": 22000
    },
    {
      "epoch": 1.9421665947345705,
      "grad_norm": 0.009108115918934345,
      "learning_rate": 1.143158601505923e-05,
      "loss": 0.0225,
      "step": 22500
    },
    {
      "epoch": 1.9853258523953388,
      "grad_norm": 0.06883949786424637,
      "learning_rate": 1.1191789362620497e-05,
      "loss": 0.0158,
      "step": 23000
    },
    {
      "epoch": 2.028485110056107,
      "grad_norm": 0.00019052527204621583,
      "learning_rate": 1.0951992710181767e-05,
      "loss": 0.0123,
      "step": 23500
    },
    {
      "epoch": 2.071644367716875,
      "grad_norm": 0.005655207671225071,
      "learning_rate": 1.0712196057743034e-05,
      "loss": 0.0173,
      "step": 24000
    },
    {
      "epoch": 2.071644367716875,
      "eval_cosine_accuracy@1": 0.6718453305713793,
      "eval_cosine_accuracy@10": 0.9685827723114103,
      "eval_cosine_accuracy@3": 0.8934921456930779,
      "eval_cosine_accuracy@5": 0.9383738995339203,
      "eval_cosine_map@100": 0.7895192117982024,
      "eval_cosine_mrr@10": 0.7879250134946668,
      "eval_cosine_ndcg@10": 0.832874525127316,
      "eval_cosine_precision@1": 0.6718453305713793,
      "eval_cosine_precision@10": 0.09685827723114103,
      "eval_cosine_precision@3": 0.297830715231026,
      "eval_cosine_precision@5": 0.18767477990678402,
      "eval_cosine_recall@1": 0.6718453305713793,
      "eval_cosine_recall@10": 0.9685827723114103,
      "eval_cosine_recall@3": 0.8934921456930779,
      "eval_cosine_recall@5": 0.9383738995339203,
      "eval_runtime": 468.4558,
      "eval_samples_per_second": 0.0,
      "eval_steps_per_second": 0.0,
      "step": 24000
    },
    {
      "epoch": 2.1148036253776437,
      "grad_norm": 0.1119648739695549,
      "learning_rate": 1.0472399405304304e-05,
      "loss": 0.0167,
      "step": 24500
    },
    {
      "epoch": 2.1579628830384117,
      "grad_norm": 0.03796195238828659,
      "learning_rate": 1.023260275286557e-05,
      "loss": 0.0125,
      "step": 25000
    },
    {
      "epoch": 2.20112214069918,
      "grad_norm": 0.012651159428060055,
      "learning_rate": 9.992806100426838e-06,
      "loss": 0.013,
      "step": 25500
    },
    {
      "epoch": 2.244281398359948,
      "grad_norm": 0.0021349990274757147,
      "learning_rate": 9.753009447988107e-06,
      "loss": 0.0079,
      "step": 26000
    },
    {
      "epoch": 2.244281398359948,
      "eval_cosine_accuracy@1": 0.669255998619023,
      "eval_cosine_accuracy@10": 0.9709994821336095,
      "eval_cosine_accuracy@3": 0.8950457448644916,
      "eval_cosine_accuracy@5": 0.9390643880545486,
      "eval_cosine_map@100": 0.7897457483356454,
      "eval_cosine_mrr@10": 0.7882845059308039,
      "eval_cosine_ndcg@10": 0.8337888145070348,
      "eval_cosine_precision@1": 0.669255998619023,
      "eval_cosine_precision@10": 0.09709994821336093,
      "eval_cosine_precision@3": 0.29834858162149724,
      "eval_cosine_precision@5": 0.18781287761090973,
      "eval_cosine_recall@1": 0.669255998619023,
      "eval_cosine_recall@10": 0.9709994821336095,
      "eval_cosine_recall@3": 0.8950457448644916,
      "eval_cosine_recall@5": 0.9390643880545486,
      "eval_runtime": 467.762,
      "eval_samples_per_second": 0.0,
      "eval_steps_per_second": 0.0,
      "step": 26000
    },
    {
      "epoch": 2.2874406560207166,
      "grad_norm": 0.4521012306213379,
      "learning_rate": 9.513212795549375e-06,
      "loss": 0.007,
      "step": 26500
    },
    {
      "epoch": 2.3305999136814846,
      "grad_norm": 0.0015283157117664814,
      "learning_rate": 9.273416143110643e-06,
      "loss": 0.0171,
      "step": 27000
    },
    {
      "epoch": 2.373759171342253,
      "grad_norm": 0.0033215314615517855,
      "learning_rate": 9.033619490671912e-06,
      "loss": 0.0058,
      "step": 27500
    },
    {
      "epoch": 2.416918429003021,
      "grad_norm": 4.302379131317139,
      "learning_rate": 8.793822838233178e-06,
      "loss": 0.0048,
      "step": 28000
    },
    {
      "epoch": 2.416918429003021,
      "eval_cosine_accuracy@1": 0.6825479026411186,
      "eval_cosine_accuracy@10": 0.9718625927843949,
      "eval_cosine_accuracy@3": 0.8993612981184188,
      "eval_cosine_accuracy@5": 0.9390643880545486,
      "eval_cosine_map@100": 0.7983751737002095,
      "eval_cosine_mrr@10": 0.7969948679166703,
      "eval_cosine_ndcg@10": 0.8405363983140419,
      "eval_cosine_precision@1": 0.6825479026411186,
      "eval_cosine_precision@10": 0.09718625927843948,
      "eval_cosine_precision@3": 0.2997870993728063,
      "eval_cosine_precision@5": 0.18781287761090973,
      "eval_cosine_recall@1": 0.6825479026411186,
      "eval_cosine_recall@10": 0.9718625927843949,
      "eval_cosine_recall@3": 0.8993612981184188,
      "eval_cosine_recall@5": 0.9390643880545486,
      "eval_runtime": 467.6926,
      "eval_samples_per_second": 0.0,
      "eval_steps_per_second": 0.0,
      "step": 28000
    },
    {
      "epoch": 2.4600776866637895,
      "grad_norm": 0.001049822778441012,
      "learning_rate": 8.554026185794447e-06,
      "loss": 0.005,
      "step": 28500
    },
    {
      "epoch": 2.5032369443245575,
      "grad_norm": 0.0011170560028403997,
      "learning_rate": 8.314229533355715e-06,
      "loss": 0.0141,
      "step": 29000
    },
    {
      "epoch": 2.546396201985326,
      "grad_norm": 0.0026090971659868956,
      "learning_rate": 8.074432880916982e-06,
      "loss": 0.0132,
      "step": 29500
    },
    {
      "epoch": 2.5895554596460943,
      "grad_norm": 7.936817564768717e-05,
      "learning_rate": 7.83463622847825e-06,
      "loss": 0.006,
      "step": 30000
    },
    {
      "epoch": 2.5895554596460943,
      "eval_cosine_accuracy@1": 0.6911790091489729,
      "eval_cosine_accuracy@10": 0.9735888140859659,
      "eval_cosine_accuracy@3": 0.9092007595373727,
      "eval_cosine_accuracy@5": 0.9442430519592612,
      "eval_cosine_map@100": 0.8050289389600185,
      "eval_cosine_mrr@10": 0.8036913735515502,
      "eval_cosine_ndcg@10": 0.8461133955612519,
      "eval_cosine_precision@1": 0.6911790091489729,
      "eval_cosine_precision@10": 0.09735888140859657,
      "eval_cosine_precision@3": 0.3030669198457909,
      "eval_cosine_precision@5": 0.18884861039185225,
      "eval_cosine_recall@1": 0.6911790091489729,
      "eval_cosine_recall@10": 0.9735888140859659,
      "eval_cosine_recall@3": 0.9092007595373727,
      "eval_cosine_recall@5": 0.9442430519592612,
      "eval_runtime": 467.8028,
      "eval_samples_per_second": 0.0,
      "eval_steps_per_second": 0.0,
      "step": 30000
    },
    {
      "epoch": 2.6327147173068624,
      "grad_norm": 0.014025676064193249,
      "learning_rate": 7.5948395760395184e-06,
      "loss": 0.0095,
      "step": 30500
    },
    {
      "epoch": 2.6758739749676304,
      "grad_norm": 0.0240753386169672,
      "learning_rate": 7.355042923600787e-06,
      "loss": 0.0061,
      "step": 31000
    },
    {
      "epoch": 2.719033232628399,
      "grad_norm": 0.051389552652835846,
      "learning_rate": 7.115246271162055e-06,
      "loss": 0.0107,
      "step": 31500
    },
    {
      "epoch": 2.7621924902891672,
      "grad_norm": 0.0053047193214297295,
      "learning_rate": 6.875449618723323e-06,
      "loss": 0.0157,
      "step": 32000
    },
    {
      "epoch": 2.7621924902891672,
      "eval_cosine_accuracy@1": 0.689452787847402,
      "eval_cosine_accuracy@10": 0.9723804591748663,
      "eval_cosine_accuracy@3": 0.9074745382358018,
      "eval_cosine_accuracy@5": 0.9442430519592612,
      "eval_cosine_map@100": 0.8041420474637542,
      "eval_cosine_mrr@10": 0.8027525694667068,
      "eval_cosine_ndcg@10": 0.8451171490975874,
      "eval_cosine_precision@1": 0.689452787847402,
      "eval_cosine_precision@10": 0.09723804591748661,
      "eval_cosine_precision@3": 0.3024915127452673,
      "eval_cosine_precision@5": 0.18884861039185225,
      "eval_cosine_recall@1": 0.689452787847402,
      "eval_cosine_recall@10": 0.9723804591748663,
      "eval_cosine_recall@3": 0.9074745382358018,
      "eval_cosine_recall@5": 0.9442430519592612,
      "eval_runtime": 467.7248,
      "eval_samples_per_second": 0.0,
      "eval_steps_per_second": 0.0,
      "step": 32000
    },
    {
      "epoch": 2.8053517479499352,
      "grad_norm": 0.005983938928693533,
      "learning_rate": 6.635652966284592e-06,
      "loss": 0.005,
      "step": 32500
    },
    {
      "epoch": 2.8485110056107033,
      "grad_norm": 0.006458807270973921,
      "learning_rate": 6.395856313845859e-06,
      "loss": 0.0087,
      "step": 33000
    },
    {
      "epoch": 2.8916702632714717,
      "grad_norm": 0.00440911203622818,
      "learning_rate": 6.1560596614071276e-06,
      "loss": 0.0064,
      "step": 33500
    },
    {
      "epoch": 2.93482952093224,
      "grad_norm": 0.0034452094696462154,
      "learning_rate": 5.916263008968395e-06,
      "loss": 0.005,
      "step": 34000
    },
    {
      "epoch": 2.93482952093224,
      "eval_cosine_accuracy@1": 0.6884170550664596,
      "eval_cosine_accuracy@10": 0.9725530813050233,
      "eval_cosine_accuracy@3": 0.9083376488865873,
      "eval_cosine_accuracy@5": 0.9463145175211463,
      "eval_cosine_map@100": 0.8037708008346327,
      "eval_cosine_mrr@10": 0.8023887614773162,
      "eval_cosine_ndcg@10": 0.8449160090668899,
      "eval_cosine_precision@1": 0.6884170550664596,
      "eval_cosine_precision@10": 0.0972553081305023,
      "eval_cosine_precision@3": 0.30277921629552906,
      "eval_cosine_precision@5": 0.18926290350422922,
      "eval_cosine_recall@1": 0.6884170550664596,
      "eval_cosine_recall@10": 0.9725530813050233,
      "eval_cosine_recall@3": 0.9083376488865873,
      "eval_cosine_recall@5": 0.9463145175211463,
      "eval_runtime": 467.6593,
      "eval_samples_per_second": 0.0,
      "eval_steps_per_second": 0.0,
      "step": 34000
    },
    {
      "epoch": 2.977988778593008,
      "grad_norm": 1.5224103927612305,
      "learning_rate": 5.6764663565296625e-06,
      "loss": 0.0115,
      "step": 34500
    },
    {
      "epoch": 3.0211480362537766,
      "grad_norm": 0.007577585522085428,
      "learning_rate": 5.436669704090931e-06,
      "loss": 0.0079,
      "step": 35000
    },
    {
      "epoch": 3.0643072939145446,
      "grad_norm": 0.01359875500202179,
      "learning_rate": 5.196873051652199e-06,
      "loss": 0.0045,
      "step": 35500
    },
    {
      "epoch": 3.107466551575313,
      "grad_norm": 0.005014342721551657,
      "learning_rate": 4.9570763992134675e-06,
      "loss": 0.0029,
      "step": 36000
    },
    {
      "epoch": 3.107466551575313,
      "eval_cosine_accuracy@1": 0.6875539444156741,
      "eval_cosine_accuracy@10": 0.972035214914552,
      "eval_cosine_accuracy@3": 0.9067840497151735,
      "eval_cosine_accuracy@5": 0.9442430519592612,
      "eval_cosine_map@100": 0.8031759037555115,
      "eval_cosine_mrr@10": 0.8017571836836468,
      "eval_cosine_ndcg@10": 0.8443043752760462,
      "eval_cosine_precision@1": 0.6875539444156741,
      "eval_cosine_precision@10": 0.09720352149145518,
      "eval_cosine_precision@3": 0.3022613499050578,
      "eval_cosine_precision@5": 0.18884861039185225,
      "eval_cosine_recall@1": 0.6875539444156741,
      "eval_cosine_recall@10": 0.972035214914552,
      "eval_cosine_recall@3": 0.9067840497151735,
      "eval_cosine_recall@5": 0.9442430519592612,
      "eval_runtime": 467.7266,
      "eval_samples_per_second": 0.0,
      "eval_steps_per_second": 0.0,
      "step": 36000
    },
    {
      "epoch": 3.150625809236081,
      "grad_norm": 0.015572451055049896,
      "learning_rate": 4.717279746774736e-06,
      "loss": 0.0161,
      "step": 36500
    },
    {
      "epoch": 3.1937850668968495,
      "grad_norm": 0.004311546217650175,
      "learning_rate": 4.477483094336003e-06,
      "loss": 0.0144,
      "step": 37000
    },
    {
      "epoch": 3.2369443245576175,
      "grad_norm": 0.0009289888548664749,
      "learning_rate": 4.237686441897272e-06,
      "loss": 0.0076,
      "step": 37500
    },
    {
      "epoch": 3.280103582218386,
      "grad_norm": 0.0010557913919910789,
      "learning_rate": 3.997889789458539e-06,
      "loss": 0.0157,
      "step": 38000
    },
    {
      "epoch": 3.280103582218386,
      "eval_cosine_accuracy@1": 0.6977386500949422,
      "eval_cosine_accuracy@10": 0.9747971689970655,
      "eval_cosine_accuracy@3": 0.909891248058001,
      "eval_cosine_accuracy@5": 0.9470050060417745,
      "eval_cosine_map@100": 0.809749193191093,
      "eval_cosine_mrr@10": 0.8084805416498834,
      "eval_cosine_ndcg@10": 0.8499994995327701,
      "eval_cosine_precision@1": 0.6977386500949422,
      "eval_cosine_precision@10": 0.09747971689970651,
      "eval_cosine_precision@3": 0.30329708268600036,
      "eval_cosine_precision@5": 0.18940100120835487,
      "eval_cosine_recall@1": 0.6977386500949422,
      "eval_cosine_recall@10": 0.9747971689970655,
      "eval_cosine_recall@3": 0.909891248058001,
      "eval_cosine_recall@5": 0.9470050060417745,
      "eval_runtime": 467.9009,
      "eval_samples_per_second": 0.0,
      "eval_steps_per_second": 0.0,
      "step": 38000
    },
    {
      "epoch": 3.323262839879154,
      "grad_norm": 0.002490697894245386,
      "learning_rate": 3.7580931370198075e-06,
      "loss": 0.0039,
      "step": 38500
    },
    {
      "epoch": 3.3664220975399224,
      "grad_norm": 0.0011037011863663793,
      "learning_rate": 3.518296484581076e-06,
      "loss": 0.0045,
      "step": 39000
    },
    {
      "epoch": 3.4095813552006904,
      "grad_norm": 0.008491401560604572,
      "learning_rate": 3.2784998321423433e-06,
      "loss": 0.0033,
      "step": 39500
    },
    {
      "epoch": 3.452740612861459,
      "grad_norm": 0.0002366910339333117,
      "learning_rate": 3.0387031797036116e-06,
      "loss": 0.0064,
      "step": 40000
    },
    {
      "epoch": 3.452740612861459,
      "eval_cosine_accuracy@1": 0.6832383911617469,
      "eval_cosine_accuracy@10": 0.97393405834628,
      "eval_cosine_accuracy@3": 0.9062661833247022,
      "eval_cosine_accuracy@5": 0.9464871396513033,
      "eval_cosine_map@100": 0.8011659555812971,
      "eval_cosine_mrr@10": 0.7998895081365299,
      "eval_cosine_ndcg@10": 0.8433601615941685,
      "eval_cosine_precision@1": 0.6832383911617469,
      "eval_cosine_precision@10": 0.097393405834628,
      "eval_cosine_precision@3": 0.30208872777490076,
      "eval_cosine_precision@5": 0.18929742793026064,
      "eval_cosine_recall@1": 0.6832383911617469,
      "eval_cosine_recall@10": 0.97393405834628,
      "eval_cosine_recall@3": 0.9062661833247022,
      "eval_cosine_recall@5": 0.9464871396513033,
      "eval_runtime": 467.6658,
      "eval_samples_per_second": 0.0,
      "eval_steps_per_second": 0.0,
      "step": 40000
    },
    {
      "epoch": 3.495899870522227,
      "grad_norm": 0.0015487176133319736,
      "learning_rate": 2.7989065272648796e-06,
      "loss": 0.0054,
      "step": 40500
    },
    {
      "epoch": 3.5390591281829953,
      "grad_norm": 1.1207655668258667,
      "learning_rate": 2.559109874826148e-06,
      "loss": 0.0061,
      "step": 41000
    },
    {
      "epoch": 3.5822183858437633,
      "grad_norm": 0.0002378961944486946,
      "learning_rate": 2.319313222387416e-06,
      "loss": 0.0051,
      "step": 41500
    },
    {
      "epoch": 3.6253776435045317,
      "grad_norm": 0.0002853251644410193,
      "learning_rate": 2.0795165699486837e-06,
      "loss": 0.0019,
      "step": 42000
    },
    {
      "epoch": 3.6253776435045317,
      "eval_cosine_accuracy@1": 0.6910063870188158,
      "eval_cosine_accuracy@10": 0.9742793026065941,
      "eval_cosine_accuracy@3": 0.9109269808389435,
      "eval_cosine_accuracy@5": 0.9461418953909891,
      "eval_cosine_map@100": 0.8061197699360279,
      "eval_cosine_mrr@10": 0.804833419644399,
      "eval_cosine_ndcg@10": 0.8471731447814336,
      "eval_cosine_precision@1": 0.6910063870188158,
      "eval_cosine_precision@10": 0.09742793026065939,
      "eval_cosine_precision@3": 0.30364232694631454,
      "eval_cosine_precision@5": 0.18922837907819778,
      "eval_cosine_recall@1": 0.6910063870188158,
      "eval_cosine_recall@10": 0.9742793026065941,
      "eval_cosine_recall@3": 0.9109269808389435,
      "eval_cosine_recall@5": 0.9461418953909891,
      "eval_runtime": 467.6854,
      "eval_samples_per_second": 0.0,
      "eval_steps_per_second": 0.0,
      "step": 42000
    }
  ],
  "logging_steps": 500,
  "max_steps": 46340,
  "num_input_tokens_seen": 0,
  "num_train_epochs": 4,
  "save_steps": 2000,
  "stateful_callbacks": {
    "TrainerControl": {
      "args": {
        "should_epoch_stop": false,
        "should_evaluate": false,
        "should_log": false,
        "should_save": true,
        "should_training_stop": false
      },
      "attributes": {}
    }
  },
  "total_flos": 0.0,
  "train_batch_size": 4,
  "trial_name": null,
  "trial_params": null
}