{ "best_global_step": null, "best_metric": null, "best_model_checkpoint": null, "epoch": 2.0, "eval_steps": 2000, "global_step": 6326, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "document_regularizer_loss": 0.7471, "epoch": 0.006323110970597534, "grad_norm": 10069.7373046875, "learning_rate": 2.0000000000000003e-06, "loss": 787.2433, "query_regularizer_loss": 0.746, "sparse_loss": 785.7502, "step": 20 }, { "document_regularizer_loss": 0.7124, "epoch": 0.012646221941195067, "grad_norm": 13459.8525390625, "learning_rate": 4.105263157894737e-06, "loss": 440.0618, "query_regularizer_loss": 0.712, "sparse_loss": 438.6375, "step": 40 }, { "document_regularizer_loss": 0.6455, "epoch": 0.0189693329117926, "grad_norm": 3622.762451171875, "learning_rate": 6.2105263157894745e-06, "loss": 271.8732, "query_regularizer_loss": 0.6464, "sparse_loss": 270.5813, "step": 60 }, { "document_regularizer_loss": 0.5798, "epoch": 0.025292443882390134, "grad_norm": 21794.759765625, "learning_rate": 8.315789473684212e-06, "loss": 159.8073, "query_regularizer_loss": 0.5803, "sparse_loss": 158.6473, "step": 80 }, { "document_regularizer_loss": 0.5264, "epoch": 0.03161555485298767, "grad_norm": 3427.578369140625, "learning_rate": 1.0421052631578948e-05, "loss": 115.2051, "query_regularizer_loss": 0.526, "sparse_loss": 114.1528, "step": 100 }, { "document_regularizer_loss": 0.4789, "epoch": 0.0379386658235852, "grad_norm": 1716.6964111328125, "learning_rate": 1.2526315789473686e-05, "loss": 68.2212, "query_regularizer_loss": 0.4789, "sparse_loss": 67.2635, "step": 120 }, { "document_regularizer_loss": 0.4385, "epoch": 0.044261776794182736, "grad_norm": 745.2850341796875, "learning_rate": 1.4631578947368422e-05, "loss": 37.2703, "query_regularizer_loss": 0.4386, "sparse_loss": 36.3932, "step": 140 }, { "document_regularizer_loss": 0.405, "epoch": 0.05058488776478027, "grad_norm": 1856.085693359375, "learning_rate": 1.673684210526316e-05, "loss": 21.9191, "query_regularizer_loss": 0.405, "sparse_loss": 21.109, "step": 160 }, { "document_regularizer_loss": 0.3851, "epoch": 0.05690799873537781, "grad_norm": 309.21807861328125, "learning_rate": 1.8842105263157894e-05, "loss": 13.6624, "query_regularizer_loss": 0.3852, "sparse_loss": 12.8921, "step": 180 }, { "document_regularizer_loss": 0.3724, "epoch": 0.06323110970597534, "grad_norm": 76.83500671386719, "learning_rate": 2.0947368421052632e-05, "loss": 6.6098, "query_regularizer_loss": 0.3724, "sparse_loss": 5.865, "step": 200 }, { "document_regularizer_loss": 0.3636, "epoch": 0.06955422067657287, "grad_norm": 31.61189842224121, "learning_rate": 2.305263157894737e-05, "loss": 4.905, "query_regularizer_loss": 0.3636, "sparse_loss": 4.1779, "step": 220 }, { "document_regularizer_loss": 0.3525, "epoch": 0.0758773316471704, "grad_norm": 54.798858642578125, "learning_rate": 2.5157894736842108e-05, "loss": 4.4195, "query_regularizer_loss": 0.3525, "sparse_loss": 3.7144, "step": 240 }, { "document_regularizer_loss": 0.3441, "epoch": 0.08220044261776795, "grad_norm": 35.759185791015625, "learning_rate": 2.7263157894736846e-05, "loss": 3.7145, "query_regularizer_loss": 0.3441, "sparse_loss": 3.0263, "step": 260 }, { "document_regularizer_loss": 0.333, "epoch": 0.08852355358836547, "grad_norm": 28.589937210083008, "learning_rate": 2.9368421052631577e-05, "loss": 3.3807, "query_regularizer_loss": 0.333, "sparse_loss": 2.7146, "step": 280 }, { "document_regularizer_loss": 0.3202, "epoch": 0.09484666455896301, "grad_norm": 21.61874008178711, "learning_rate": 3.147368421052632e-05, "loss": 3.3389, "query_regularizer_loss": 0.3202, "sparse_loss": 2.6984, "step": 300 }, { "document_regularizer_loss": 0.3065, "epoch": 0.10116977552956054, "grad_norm": 23.4547176361084, "learning_rate": 3.357894736842105e-05, "loss": 3.1854, "query_regularizer_loss": 0.3065, "sparse_loss": 2.5724, "step": 320 }, { "document_regularizer_loss": 0.2938, "epoch": 0.10749288650015808, "grad_norm": 30.065744400024414, "learning_rate": 3.5684210526315794e-05, "loss": 3.0942, "query_regularizer_loss": 0.2938, "sparse_loss": 2.5066, "step": 340 }, { "document_regularizer_loss": 0.2794, "epoch": 0.11381599747075562, "grad_norm": 24.388168334960938, "learning_rate": 3.778947368421053e-05, "loss": 2.8802, "query_regularizer_loss": 0.2794, "sparse_loss": 2.3214, "step": 360 }, { "document_regularizer_loss": 0.2675, "epoch": 0.12013910844135314, "grad_norm": 30.463876724243164, "learning_rate": 3.989473684210526e-05, "loss": 2.8744, "query_regularizer_loss": 0.2676, "sparse_loss": 2.3393, "step": 380 }, { "document_regularizer_loss": 0.2574, "epoch": 0.12646221941195068, "grad_norm": 8.956077575683594, "learning_rate": 4.2e-05, "loss": 2.9323, "query_regularizer_loss": 0.2574, "sparse_loss": 2.4174, "step": 400 }, { "document_regularizer_loss": 0.2454, "epoch": 0.13278533038254822, "grad_norm": 17.979278564453125, "learning_rate": 4.410526315789474e-05, "loss": 2.805, "query_regularizer_loss": 0.2454, "sparse_loss": 2.3142, "step": 420 }, { "document_regularizer_loss": 0.235, "epoch": 0.13910844135314573, "grad_norm": 16.181251525878906, "learning_rate": 4.6210526315789473e-05, "loss": 2.7811, "query_regularizer_loss": 0.235, "sparse_loss": 2.3111, "step": 440 }, { "document_regularizer_loss": 0.2264, "epoch": 0.14543155232374327, "grad_norm": 17.261810302734375, "learning_rate": 4.8315789473684215e-05, "loss": 2.7447, "query_regularizer_loss": 0.2264, "sparse_loss": 2.2919, "step": 460 }, { "document_regularizer_loss": 0.2184, "epoch": 0.1517546632943408, "grad_norm": 16.512298583984375, "learning_rate": 4.999994234069837e-05, "loss": 2.7353, "query_regularizer_loss": 0.2184, "sparse_loss": 2.2986, "step": 480 }, { "document_regularizer_loss": 0.21, "epoch": 0.15807777426493835, "grad_norm": 36.3756217956543, "learning_rate": 4.9997924293067854e-05, "loss": 2.6297, "query_regularizer_loss": 0.21, "sparse_loss": 2.2096, "step": 500 }, { "document_regularizer_loss": 0.202, "epoch": 0.1644008852355359, "grad_norm": 54.841121673583984, "learning_rate": 4.9993023546318026e-05, "loss": 2.6286, "query_regularizer_loss": 0.2021, "sparse_loss": 2.2245, "step": 520 }, { "document_regularizer_loss": 0.1967, "epoch": 0.1707239962061334, "grad_norm": 104.60382080078125, "learning_rate": 4.998524066559095e-05, "loss": 2.6424, "query_regularizer_loss": 0.1966, "sparse_loss": 2.2491, "step": 540 }, { "document_regularizer_loss": 0.193, "epoch": 0.17704710717673094, "grad_norm": 61.492584228515625, "learning_rate": 4.997457654838927e-05, "loss": 2.5846, "query_regularizer_loss": 0.193, "sparse_loss": 2.1985, "step": 560 }, { "document_regularizer_loss": 0.1896, "epoch": 0.18337021814732848, "grad_norm": 53.26578140258789, "learning_rate": 4.9961032424472766e-05, "loss": 2.6457, "query_regularizer_loss": 0.1896, "sparse_loss": 2.2664, "step": 580 }, { "document_regularizer_loss": 0.1867, "epoch": 0.18969332911792602, "grad_norm": 13.084442138671875, "learning_rate": 4.9944609855716445e-05, "loss": 2.4484, "query_regularizer_loss": 0.1867, "sparse_loss": 2.075, "step": 600 }, { "document_regularizer_loss": 0.1808, "epoch": 0.19601644008852356, "grad_norm": 15.175243377685547, "learning_rate": 4.992531073593055e-05, "loss": 2.5683, "query_regularizer_loss": 0.1808, "sparse_loss": 2.2068, "step": 620 }, { "document_regularizer_loss": 0.1747, "epoch": 0.20233955105912108, "grad_norm": 30.665386199951172, "learning_rate": 4.990313729064209e-05, "loss": 2.5078, "query_regularizer_loss": 0.1747, "sparse_loss": 2.1584, "step": 640 }, { "document_regularizer_loss": 0.1695, "epoch": 0.20866266202971862, "grad_norm": 18.14386749267578, "learning_rate": 4.98780920768382e-05, "loss": 2.4674, "query_regularizer_loss": 0.1695, "sparse_loss": 2.1284, "step": 660 }, { "document_regularizer_loss": 0.165, "epoch": 0.21498577300031615, "grad_norm": 25.72368812561035, "learning_rate": 4.985017798267132e-05, "loss": 2.4734, "query_regularizer_loss": 0.165, "sparse_loss": 2.1434, "step": 680 }, { "document_regularizer_loss": 0.1575, "epoch": 0.2213088839709137, "grad_norm": 13.785391807556152, "learning_rate": 4.981939822712609e-05, "loss": 2.3857, "query_regularizer_loss": 0.1575, "sparse_loss": 2.0707, "step": 700 }, { "document_regularizer_loss": 0.151, "epoch": 0.22763199494151123, "grad_norm": 59.33202362060547, "learning_rate": 4.9785756359648204e-05, "loss": 2.3447, "query_regularizer_loss": 0.151, "sparse_loss": 2.0427, "step": 720 }, { "document_regularizer_loss": 0.1457, "epoch": 0.23395510591210875, "grad_norm": 11.074970245361328, "learning_rate": 4.9749256259735024e-05, "loss": 2.3187, "query_regularizer_loss": 0.1457, "sparse_loss": 2.0272, "step": 740 }, { "document_regularizer_loss": 0.1402, "epoch": 0.24027821688270629, "grad_norm": 14.643407821655273, "learning_rate": 4.9709902136488276e-05, "loss": 2.4015, "query_regularizer_loss": 0.1402, "sparse_loss": 2.121, "step": 760 }, { "document_regularizer_loss": 0.1392, "epoch": 0.24660132785330383, "grad_norm": 27.94011688232422, "learning_rate": 4.9667698528128593e-05, "loss": 2.3671, "query_regularizer_loss": 0.1392, "sparse_loss": 2.0887, "step": 780 }, { "document_regularizer_loss": 0.1338, "epoch": 0.25292443882390137, "grad_norm": 17.952852249145508, "learning_rate": 4.9622650301472265e-05, "loss": 2.2929, "query_regularizer_loss": 0.1338, "sparse_loss": 2.0252, "step": 800 }, { "document_regularizer_loss": 0.128, "epoch": 0.2592475497944989, "grad_norm": 12.585227012634277, "learning_rate": 4.957476265136993e-05, "loss": 2.3264, "query_regularizer_loss": 0.128, "sparse_loss": 2.0704, "step": 820 }, { "document_regularizer_loss": 0.1269, "epoch": 0.26557066076509644, "grad_norm": 16.949398040771484, "learning_rate": 4.952404110010757e-05, "loss": 2.7711, "query_regularizer_loss": 0.1269, "sparse_loss": 2.5173, "step": 840 }, { "document_regularizer_loss": 0.1213, "epoch": 0.27189377173569396, "grad_norm": 11.61090087890625, "learning_rate": 4.947049149676968e-05, "loss": 2.2928, "query_regularizer_loss": 0.1213, "sparse_loss": 2.0502, "step": 860 }, { "document_regularizer_loss": 0.1157, "epoch": 0.27821688270629147, "grad_norm": 10.377798080444336, "learning_rate": 4.941412001656474e-05, "loss": 2.1857, "query_regularizer_loss": 0.1157, "sparse_loss": 1.9544, "step": 880 }, { "document_regularizer_loss": 0.1112, "epoch": 0.28453999367688904, "grad_norm": 12.839447021484375, "learning_rate": 4.9354933160113135e-05, "loss": 2.1, "query_regularizer_loss": 0.1111, "sparse_loss": 1.8777, "step": 900 }, { "document_regularizer_loss": 0.1094, "epoch": 0.29086310464748655, "grad_norm": 15.7372465133667, "learning_rate": 4.929293775269754e-05, "loss": 2.1598, "query_regularizer_loss": 0.1094, "sparse_loss": 1.941, "step": 920 }, { "document_regularizer_loss": 0.1069, "epoch": 0.2971862156180841, "grad_norm": 85.94547271728516, "learning_rate": 4.9228140943475766e-05, "loss": 1.9527, "query_regularizer_loss": 0.1069, "sparse_loss": 1.7389, "step": 940 }, { "document_regularizer_loss": 0.1045, "epoch": 0.3035093265886816, "grad_norm": 16.49176025390625, "learning_rate": 4.9160550204656416e-05, "loss": 2.0608, "query_regularizer_loss": 0.1045, "sparse_loss": 1.8519, "step": 960 }, { "document_regularizer_loss": 0.1019, "epoch": 0.30983243755927914, "grad_norm": 11.689447402954102, "learning_rate": 4.909017333063719e-05, "loss": 2.0235, "query_regularizer_loss": 0.1019, "sparse_loss": 1.8198, "step": 980 }, { "document_regularizer_loss": 0.0988, "epoch": 0.3161555485298767, "grad_norm": 15.240221977233887, "learning_rate": 4.901701843710602e-05, "loss": 1.9305, "query_regularizer_loss": 0.0988, "sparse_loss": 1.7328, "step": 1000 }, { "document_regularizer_loss": 0.0962, "epoch": 0.3224786595004742, "grad_norm": 8.189220428466797, "learning_rate": 4.894109396010522e-05, "loss": 1.9598, "query_regularizer_loss": 0.0962, "sparse_loss": 1.7673, "step": 1020 }, { "document_regularizer_loss": 0.0945, "epoch": 0.3288017704710718, "grad_norm": 17.443077087402344, "learning_rate": 4.886240865505865e-05, "loss": 1.9558, "query_regularizer_loss": 0.0945, "sparse_loss": 1.7667, "step": 1040 }, { "document_regularizer_loss": 0.094, "epoch": 0.3351248814416693, "grad_norm": 10.96996784210205, "learning_rate": 4.87809715957621e-05, "loss": 2.0087, "query_regularizer_loss": 0.094, "sparse_loss": 1.8206, "step": 1060 }, { "document_regularizer_loss": 0.0934, "epoch": 0.3414479924122668, "grad_norm": 9.809027671813965, "learning_rate": 4.8696792173336845e-05, "loss": 1.9493, "query_regularizer_loss": 0.0934, "sparse_loss": 1.7625, "step": 1080 }, { "document_regularizer_loss": 0.0914, "epoch": 0.3477711033828644, "grad_norm": 20.714750289916992, "learning_rate": 4.860988009514675e-05, "loss": 1.7575, "query_regularizer_loss": 0.0914, "sparse_loss": 1.5748, "step": 1100 }, { "document_regularizer_loss": 0.0898, "epoch": 0.3540942143534619, "grad_norm": 16.20149040222168, "learning_rate": 4.852024538367882e-05, "loss": 1.7915, "query_regularizer_loss": 0.0898, "sparse_loss": 1.6119, "step": 1120 }, { "document_regularizer_loss": 0.0883, "epoch": 0.36041732532405946, "grad_norm": 43.65206527709961, "learning_rate": 4.842789837538741e-05, "loss": 1.8282, "query_regularizer_loss": 0.0883, "sparse_loss": 1.6516, "step": 1140 }, { "document_regularizer_loss": 0.0866, "epoch": 0.36674043629465697, "grad_norm": 9.418038368225098, "learning_rate": 4.83328497195023e-05, "loss": 1.774, "query_regularizer_loss": 0.0867, "sparse_loss": 1.6007, "step": 1160 }, { "document_regularizer_loss": 0.0845, "epoch": 0.3730635472652545, "grad_norm": 13.31879997253418, "learning_rate": 4.82351103768006e-05, "loss": 1.7967, "query_regularizer_loss": 0.0845, "sparse_loss": 1.6277, "step": 1180 }, { "document_regularizer_loss": 0.0831, "epoch": 0.37938665823585205, "grad_norm": 14.40135383605957, "learning_rate": 4.813469161834282e-05, "loss": 1.7661, "query_regularizer_loss": 0.0831, "sparse_loss": 1.5999, "step": 1200 }, { "document_regularizer_loss": 0.0817, "epoch": 0.38570976920644956, "grad_norm": 9.40664005279541, "learning_rate": 4.803160502417309e-05, "loss": 1.7127, "query_regularizer_loss": 0.0817, "sparse_loss": 1.5493, "step": 1220 }, { "document_regularizer_loss": 0.0799, "epoch": 0.3920328801770471, "grad_norm": 7.551331520080566, "learning_rate": 4.7925862481983794e-05, "loss": 1.6856, "query_regularizer_loss": 0.0799, "sparse_loss": 1.5259, "step": 1240 }, { "document_regularizer_loss": 0.0792, "epoch": 0.39835599114764464, "grad_norm": 8.512903213500977, "learning_rate": 4.7817476185744705e-05, "loss": 1.737, "query_regularizer_loss": 0.0792, "sparse_loss": 1.5786, "step": 1260 }, { "document_regularizer_loss": 0.0777, "epoch": 0.40467910211824215, "grad_norm": 7.908294200897217, "learning_rate": 4.770645863429681e-05, "loss": 1.7078, "query_regularizer_loss": 0.0777, "sparse_loss": 1.5525, "step": 1280 }, { "document_regularizer_loss": 0.076, "epoch": 0.4110022130888397, "grad_norm": 7.902525901794434, "learning_rate": 4.759282262991097e-05, "loss": 1.7971, "query_regularizer_loss": 0.076, "sparse_loss": 1.6451, "step": 1300 }, { "document_regularizer_loss": 0.0745, "epoch": 0.41732532405943723, "grad_norm": 7.720804214477539, "learning_rate": 4.7476581276811594e-05, "loss": 1.6587, "query_regularizer_loss": 0.0745, "sparse_loss": 1.5096, "step": 1320 }, { "document_regularizer_loss": 0.0728, "epoch": 0.4236484350300348, "grad_norm": 6.726785659790039, "learning_rate": 4.7357747979665504e-05, "loss": 1.6127, "query_regularizer_loss": 0.0728, "sparse_loss": 1.4672, "step": 1340 }, { "document_regularizer_loss": 0.0714, "epoch": 0.4299715460006323, "grad_norm": 8.149948120117188, "learning_rate": 4.723633644203612e-05, "loss": 1.5483, "query_regularizer_loss": 0.0714, "sparse_loss": 1.4055, "step": 1360 }, { "document_regularizer_loss": 0.0706, "epoch": 0.4362946569712298, "grad_norm": 11.196396827697754, "learning_rate": 4.711236066480322e-05, "loss": 1.5743, "query_regularizer_loss": 0.0706, "sparse_loss": 1.4331, "step": 1380 }, { "document_regularizer_loss": 0.0696, "epoch": 0.4426177679418274, "grad_norm": 8.784821510314941, "learning_rate": 4.698583494454837e-05, "loss": 1.6291, "query_regularizer_loss": 0.0696, "sparse_loss": 1.4899, "step": 1400 }, { "document_regularizer_loss": 0.0681, "epoch": 0.4489408789124249, "grad_norm": 8.496731758117676, "learning_rate": 4.68567738719063e-05, "loss": 1.6277, "query_regularizer_loss": 0.068, "sparse_loss": 1.4916, "step": 1420 }, { "document_regularizer_loss": 0.0666, "epoch": 0.45526398988302247, "grad_norm": 9.2264986038208, "learning_rate": 4.672519232988234e-05, "loss": 1.5486, "query_regularizer_loss": 0.0666, "sparse_loss": 1.4155, "step": 1440 }, { "document_regularizer_loss": 0.0654, "epoch": 0.46158710085362, "grad_norm": 8.807757377624512, "learning_rate": 4.659110549213615e-05, "loss": 1.5393, "query_regularizer_loss": 0.0654, "sparse_loss": 1.4084, "step": 1460 }, { "document_regularizer_loss": 0.064, "epoch": 0.4679102118242175, "grad_norm": 6.820550441741943, "learning_rate": 4.645452882123192e-05, "loss": 1.5138, "query_regularizer_loss": 0.064, "sparse_loss": 1.3858, "step": 1480 }, { "document_regularizer_loss": 0.0622, "epoch": 0.47423332279481506, "grad_norm": 6.402284622192383, "learning_rate": 4.6315478066855274e-05, "loss": 1.5601, "query_regularizer_loss": 0.0621, "sparse_loss": 1.4358, "step": 1500 }, { "document_regularizer_loss": 0.0614, "epoch": 0.48055643376541257, "grad_norm": 24.8136043548584, "learning_rate": 4.617396926399706e-05, "loss": 1.5127, "query_regularizer_loss": 0.0614, "sparse_loss": 1.39, "step": 1520 }, { "document_regularizer_loss": 0.0615, "epoch": 0.48687954473601014, "grad_norm": 14.119754791259766, "learning_rate": 4.603001873110422e-05, "loss": 1.5186, "query_regularizer_loss": 0.0615, "sparse_loss": 1.3956, "step": 1540 }, { "document_regularizer_loss": 0.0606, "epoch": 0.49320265570660765, "grad_norm": 10.744440078735352, "learning_rate": 4.588364306819801e-05, "loss": 1.4835, "query_regularizer_loss": 0.0606, "sparse_loss": 1.3624, "step": 1560 }, { "document_regularizer_loss": 0.0598, "epoch": 0.49952576667720516, "grad_norm": 7.516956329345703, "learning_rate": 4.57348591549597e-05, "loss": 1.3831, "query_regularizer_loss": 0.0598, "sparse_loss": 1.2636, "step": 1580 }, { "document_regularizer_loss": 0.0586, "epoch": 0.5058488776478027, "grad_norm": 9.290154457092285, "learning_rate": 4.558368414878405e-05, "loss": 1.5297, "query_regularizer_loss": 0.0586, "sparse_loss": 1.4126, "step": 1600 }, { "document_regularizer_loss": 0.0575, "epoch": 0.5121719886184003, "grad_norm": 8.14932918548584, "learning_rate": 4.543013548280082e-05, "loss": 1.4104, "query_regularizer_loss": 0.0575, "sparse_loss": 1.2954, "step": 1620 }, { "document_regularizer_loss": 0.0563, "epoch": 0.5184950995889978, "grad_norm": 6.615036964416504, "learning_rate": 4.527423086386432e-05, "loss": 1.3922, "query_regularizer_loss": 0.0563, "sparse_loss": 1.2795, "step": 1640 }, { "document_regularizer_loss": 0.0554, "epoch": 0.5248182105595953, "grad_norm": 5.8693013191223145, "learning_rate": 4.51159882705116e-05, "loss": 1.4043, "query_regularizer_loss": 0.0554, "sparse_loss": 1.2935, "step": 1660 }, { "document_regularizer_loss": 0.0541, "epoch": 0.5311413215301929, "grad_norm": 6.5546650886535645, "learning_rate": 4.495542595088914e-05, "loss": 1.4286, "query_regularizer_loss": 0.0541, "sparse_loss": 1.3203, "step": 1680 }, { "document_regularizer_loss": 0.0535, "epoch": 0.5374644325007903, "grad_norm": 7.110738754272461, "learning_rate": 4.4792562420648574e-05, "loss": 1.3533, "query_regularizer_loss": 0.0535, "sparse_loss": 1.2462, "step": 1700 }, { "document_regularizer_loss": 0.053, "epoch": 0.5437875434713879, "grad_norm": 7.050394058227539, "learning_rate": 4.462741646081145e-05, "loss": 1.3941, "query_regularizer_loss": 0.053, "sparse_loss": 1.288, "step": 1720 }, { "document_regularizer_loss": 0.0516, "epoch": 0.5501106544419855, "grad_norm": 7.823602199554443, "learning_rate": 4.446000711560351e-05, "loss": 1.3218, "query_regularizer_loss": 0.0516, "sparse_loss": 1.2186, "step": 1740 }, { "document_regularizer_loss": 0.0511, "epoch": 0.5564337654125829, "grad_norm": 8.4823579788208, "learning_rate": 4.42903536902585e-05, "loss": 1.3049, "query_regularizer_loss": 0.0511, "sparse_loss": 1.2027, "step": 1760 }, { "document_regularizer_loss": 0.051, "epoch": 0.5627568763831805, "grad_norm": 6.614449977874756, "learning_rate": 4.4118475748791985e-05, "loss": 1.4483, "query_regularizer_loss": 0.051, "sparse_loss": 1.3464, "step": 1780 }, { "document_regularizer_loss": 0.0503, "epoch": 0.5690799873537781, "grad_norm": 6.088893890380859, "learning_rate": 4.3944393111745255e-05, "loss": 1.3819, "query_regularizer_loss": 0.0503, "sparse_loss": 1.2812, "step": 1800 }, { "document_regularizer_loss": 0.0497, "epoch": 0.5754030983243756, "grad_norm": 7.0833024978637695, "learning_rate": 4.376812585389967e-05, "loss": 1.3073, "query_regularizer_loss": 0.0497, "sparse_loss": 1.2078, "step": 1820 }, { "document_regularizer_loss": 0.0488, "epoch": 0.5817262092949731, "grad_norm": 6.979008197784424, "learning_rate": 4.358969430196166e-05, "loss": 1.3515, "query_regularizer_loss": 0.0488, "sparse_loss": 1.2538, "step": 1840 }, { "document_regularizer_loss": 0.0483, "epoch": 0.5880493202655707, "grad_norm": 6.331544399261475, "learning_rate": 4.340911903221875e-05, "loss": 1.3165, "query_regularizer_loss": 0.0482, "sparse_loss": 1.22, "step": 1860 }, { "document_regularizer_loss": 0.0476, "epoch": 0.5943724312361682, "grad_norm": 5.857104301452637, "learning_rate": 4.322642086816674e-05, "loss": 1.2582, "query_regularizer_loss": 0.0476, "sparse_loss": 1.163, "step": 1880 }, { "document_regularizer_loss": 0.0471, "epoch": 0.6006955422067657, "grad_norm": 11.978568077087402, "learning_rate": 4.3041620878108336e-05, "loss": 1.2801, "query_regularizer_loss": 0.0471, "sparse_loss": 1.1858, "step": 1900 }, { "document_regularizer_loss": 0.0463, "epoch": 0.6070186531773633, "grad_norm": 7.875554084777832, "learning_rate": 4.2854740372723686e-05, "loss": 1.2912, "query_regularizer_loss": 0.0463, "sparse_loss": 1.1985, "step": 1920 }, { "document_regularizer_loss": 0.0455, "epoch": 0.6133417641479608, "grad_norm": 7.675542831420898, "learning_rate": 4.266580090261282e-05, "loss": 1.2768, "query_regularizer_loss": 0.0455, "sparse_loss": 1.1858, "step": 1940 }, { "document_regularizer_loss": 0.045, "epoch": 0.6196648751185583, "grad_norm": 14.170219421386719, "learning_rate": 4.247482425581053e-05, "loss": 1.2681, "query_regularizer_loss": 0.045, "sparse_loss": 1.178, "step": 1960 }, { "document_regularizer_loss": 0.045, "epoch": 0.6259879860891558, "grad_norm": 15.395133972167969, "learning_rate": 4.2281832455273805e-05, "loss": 1.2818, "query_regularizer_loss": 0.045, "sparse_loss": 1.1918, "step": 1980 }, { "document_regularizer_loss": 0.0447, "epoch": 0.6323110970597534, "grad_norm": 10.426234245300293, "learning_rate": 4.208684775634221e-05, "loss": 1.2085, "query_regularizer_loss": 0.0447, "sparse_loss": 1.1191, "step": 2000 }, { "epoch": 0.6323110970597534, "eval_runtime": 274.6851, "eval_samples_per_second": 0.0, "eval_sparse-ir-eval_avg_flops": 893.3501586914062, "eval_sparse-ir-eval_corpus_active_dims": 1024.0, "eval_sparse-ir-eval_corpus_sparsity_ratio": 0.9796696315120712, "eval_sparse-ir-eval_dot_accuracy@1": 0.044191161767646474, "eval_sparse-ir-eval_dot_accuracy@100": 0.34593081383723256, "eval_sparse-ir-eval_dot_accuracy@50": 0.2571485702859428, "eval_sparse-ir-eval_dot_accuracy@8": 0.1227754449110178, "eval_sparse-ir-eval_dot_map@100": 0.0739891059526251, "eval_sparse-ir-eval_dot_mrr@10": 0.06742675274468914, "eval_sparse-ir-eval_dot_ndcg@10": 0.08339501666788006, "eval_sparse-ir-eval_dot_precision@1": 0.044191161767646474, "eval_sparse-ir-eval_dot_precision@100": 0.0034593081383723257, "eval_sparse-ir-eval_dot_precision@50": 0.005142971405718857, "eval_sparse-ir-eval_dot_precision@8": 0.015346930613877225, "eval_sparse-ir-eval_dot_recall@1": 0.044191161767646474, "eval_sparse-ir-eval_dot_recall@100": 0.34593081383723256, "eval_sparse-ir-eval_dot_recall@50": 0.2571485702859428, "eval_sparse-ir-eval_dot_recall@8": 0.1227754449110178, "eval_sparse-ir-eval_query_active_dims": 1024.0, "eval_sparse-ir-eval_query_sparsity_ratio": 0.9796696315120712, "eval_steps_per_second": 0.0, "step": 2000 }, { "document_regularizer_loss": 0.0442, "epoch": 0.638634208030351, "grad_norm": 6.597539901733398, "learning_rate": 4.1889892644171435e-05, "loss": 1.2319, "query_regularizer_loss": 0.0442, "sparse_loss": 1.1435, "step": 2020 }, { "document_regularizer_loss": 0.0436, "epoch": 0.6449573190009484, "grad_norm": 5.0735087394714355, "learning_rate": 4.1690989831140394e-05, "loss": 1.2843, "query_regularizer_loss": 0.0436, "sparse_loss": 1.1971, "step": 2040 }, { "document_regularizer_loss": 0.0429, "epoch": 0.651280429971546, "grad_norm": 6.517344951629639, "learning_rate": 4.1490162254232054e-05, "loss": 1.2895, "query_regularizer_loss": 0.0429, "sparse_loss": 1.2036, "step": 2060 }, { "document_regularizer_loss": 0.0426, "epoch": 0.6576035409421436, "grad_norm": 21.777257919311523, "learning_rate": 4.1287433072388436e-05, "loss": 1.2754, "query_regularizer_loss": 0.0426, "sparse_loss": 1.1902, "step": 2080 }, { "document_regularizer_loss": 0.0423, "epoch": 0.663926651912741, "grad_norm": 7.3678975105285645, "learning_rate": 4.108282566383994e-05, "loss": 1.3094, "query_regularizer_loss": 0.0423, "sparse_loss": 1.2248, "step": 2100 }, { "document_regularizer_loss": 0.0422, "epoch": 0.6702497628833386, "grad_norm": 6.312955379486084, "learning_rate": 4.087636362340948e-05, "loss": 1.1937, "query_regularizer_loss": 0.0422, "sparse_loss": 1.1092, "step": 2120 }, { "document_regularizer_loss": 0.0417, "epoch": 0.6765728738539362, "grad_norm": 5.551113128662109, "learning_rate": 4.0668070759791524e-05, "loss": 1.2294, "query_regularizer_loss": 0.0417, "sparse_loss": 1.1461, "step": 2140 }, { "document_regularizer_loss": 0.0408, "epoch": 0.6828959848245336, "grad_norm": 7.7479023933410645, "learning_rate": 4.0457971092806566e-05, "loss": 1.2211, "query_regularizer_loss": 0.0408, "sparse_loss": 1.1395, "step": 2160 }, { "document_regularizer_loss": 0.0402, "epoch": 0.6892190957951312, "grad_norm": 13.669305801391602, "learning_rate": 4.0246088850631246e-05, "loss": 1.3088, "query_regularizer_loss": 0.0402, "sparse_loss": 1.2285, "step": 2180 }, { "document_regularizer_loss": 0.0398, "epoch": 0.6955422067657288, "grad_norm": 28.209056854248047, "learning_rate": 4.003244846700437e-05, "loss": 1.1989, "query_regularizer_loss": 0.0398, "sparse_loss": 1.1193, "step": 2200 }, { "document_regularizer_loss": 0.0396, "epoch": 0.7018653177363263, "grad_norm": 7.1696319580078125, "learning_rate": 3.981707457840927e-05, "loss": 1.2486, "query_regularizer_loss": 0.0396, "sparse_loss": 1.1695, "step": 2220 }, { "document_regularizer_loss": 0.0394, "epoch": 0.7081884287069238, "grad_norm": 8.591996192932129, "learning_rate": 3.9599992021232865e-05, "loss": 1.1296, "query_regularizer_loss": 0.0394, "sparse_loss": 1.0508, "step": 2240 }, { "document_regularizer_loss": 0.0393, "epoch": 0.7145115396775213, "grad_norm": 9.680275917053223, "learning_rate": 3.938122582890147e-05, "loss": 1.1456, "query_regularizer_loss": 0.0393, "sparse_loss": 1.0669, "step": 2260 }, { "document_regularizer_loss": 0.0384, "epoch": 0.7208346506481189, "grad_norm": 6.841869354248047, "learning_rate": 3.916080122899408e-05, "loss": 1.2594, "query_regularizer_loss": 0.0384, "sparse_loss": 1.1827, "step": 2280 }, { "document_regularizer_loss": 0.0381, "epoch": 0.7271577616187164, "grad_norm": 7.08558988571167, "learning_rate": 3.893874364033319e-05, "loss": 1.1598, "query_regularizer_loss": 0.0381, "sparse_loss": 1.0835, "step": 2300 }, { "document_regularizer_loss": 0.0373, "epoch": 0.7334808725893139, "grad_norm": 4.713133811950684, "learning_rate": 3.871507867005353e-05, "loss": 1.1291, "query_regularizer_loss": 0.0373, "sparse_loss": 1.0544, "step": 2320 }, { "document_regularizer_loss": 0.0372, "epoch": 0.7398039835599115, "grad_norm": 6.019435405731201, "learning_rate": 3.8489832110649106e-05, "loss": 1.1203, "query_regularizer_loss": 0.0372, "sparse_loss": 1.0459, "step": 2340 }, { "document_regularizer_loss": 0.037, "epoch": 0.746127094530509, "grad_norm": 5.9214887619018555, "learning_rate": 3.8263029936998914e-05, "loss": 1.1708, "query_regularizer_loss": 0.037, "sparse_loss": 1.0969, "step": 2360 }, { "document_regularizer_loss": 0.0365, "epoch": 0.7524502055011065, "grad_norm": 13.986381530761719, "learning_rate": 3.803469830337154e-05, "loss": 1.175, "query_regularizer_loss": 0.0365, "sparse_loss": 1.102, "step": 2380 }, { "document_regularizer_loss": 0.0363, "epoch": 0.7587733164717041, "grad_norm": 5.55244255065918, "learning_rate": 3.7804863540409155e-05, "loss": 1.2057, "query_regularizer_loss": 0.0363, "sparse_loss": 1.1331, "step": 2400 }, { "document_regularizer_loss": 0.0359, "epoch": 0.7650964274423017, "grad_norm": 4.60949182510376, "learning_rate": 3.7573552152091065e-05, "loss": 1.2125, "query_regularizer_loss": 0.0359, "sparse_loss": 1.1407, "step": 2420 }, { "document_regularizer_loss": 0.0353, "epoch": 0.7714195384128991, "grad_norm": 6.324008941650391, "learning_rate": 3.7340790812677426e-05, "loss": 1.2678, "query_regularizer_loss": 0.0353, "sparse_loss": 1.1972, "step": 2440 }, { "document_regularizer_loss": 0.0349, "epoch": 0.7777426493834967, "grad_norm": 6.179075241088867, "learning_rate": 3.710660636363315e-05, "loss": 1.1447, "query_regularizer_loss": 0.0349, "sparse_loss": 1.0749, "step": 2460 }, { "document_regularizer_loss": 0.0346, "epoch": 0.7840657603540943, "grad_norm": 5.324189186096191, "learning_rate": 3.687102581053267e-05, "loss": 1.2268, "query_regularizer_loss": 0.0346, "sparse_loss": 1.1575, "step": 2480 }, { "document_regularizer_loss": 0.0342, "epoch": 0.7903888713246917, "grad_norm": 6.188036918640137, "learning_rate": 3.6634076319945706e-05, "loss": 1.1557, "query_regularizer_loss": 0.0342, "sparse_loss": 1.0872, "step": 2500 }, { "document_regularizer_loss": 0.0335, "epoch": 0.7967119822952893, "grad_norm": 5.936458587646484, "learning_rate": 3.639578521630445e-05, "loss": 1.1321, "query_regularizer_loss": 0.0335, "sparse_loss": 1.0651, "step": 2520 }, { "document_regularizer_loss": 0.0331, "epoch": 0.8030350932658868, "grad_norm": 5.506819248199463, "learning_rate": 3.615617997875265e-05, "loss": 1.1172, "query_regularizer_loss": 0.0331, "sparse_loss": 1.051, "step": 2540 }, { "document_regularizer_loss": 0.0332, "epoch": 0.8093582042364843, "grad_norm": 4.83391809463501, "learning_rate": 3.591528823797672e-05, "loss": 1.1761, "query_regularizer_loss": 0.0332, "sparse_loss": 1.1097, "step": 2560 }, { "document_regularizer_loss": 0.0327, "epoch": 0.8156813152070819, "grad_norm": 5.821810722351074, "learning_rate": 3.567313777301946e-05, "loss": 1.1746, "query_regularizer_loss": 0.0327, "sparse_loss": 1.1091, "step": 2580 }, { "document_regularizer_loss": 0.0323, "epoch": 0.8220044261776794, "grad_norm": 5.580266952514648, "learning_rate": 3.5429756508076664e-05, "loss": 1.1864, "query_regularizer_loss": 0.0323, "sparse_loss": 1.1218, "step": 2600 }, { "document_regularizer_loss": 0.032, "epoch": 0.828327537148277, "grad_norm": 6.912046909332275, "learning_rate": 3.5185172509276926e-05, "loss": 1.096, "query_regularizer_loss": 0.032, "sparse_loss": 1.0319, "step": 2620 }, { "document_regularizer_loss": 0.0316, "epoch": 0.8346506481188745, "grad_norm": 4.675662517547607, "learning_rate": 3.4939413981445165e-05, "loss": 1.0784, "query_regularizer_loss": 0.0315, "sparse_loss": 1.0153, "step": 2640 }, { "document_regularizer_loss": 0.0313, "epoch": 0.840973759089472, "grad_norm": 6.675909042358398, "learning_rate": 3.46925092648501e-05, "loss": 1.1665, "query_regularizer_loss": 0.0313, "sparse_loss": 1.104, "step": 2660 }, { "document_regularizer_loss": 0.0309, "epoch": 0.8472968700600696, "grad_norm": 6.421684741973877, "learning_rate": 3.444448683193611e-05, "loss": 1.0553, "query_regularizer_loss": 0.0309, "sparse_loss": 0.9936, "step": 2680 }, { "document_regularizer_loss": 0.0307, "epoch": 0.853619981030667, "grad_norm": 8.149559020996094, "learning_rate": 3.419537528403986e-05, "loss": 1.0657, "query_regularizer_loss": 0.0307, "sparse_loss": 1.0042, "step": 2700 }, { "document_regularizer_loss": 0.0305, "epoch": 0.8599430920012646, "grad_norm": 7.086170196533203, "learning_rate": 3.39452033480921e-05, "loss": 1.0973, "query_regularizer_loss": 0.0305, "sparse_loss": 1.0362, "step": 2720 }, { "document_regularizer_loss": 0.0302, "epoch": 0.8662662029718622, "grad_norm": 6.212243556976318, "learning_rate": 3.3693999873304904e-05, "loss": 1.0824, "query_regularizer_loss": 0.0302, "sparse_loss": 1.0219, "step": 2740 }, { "document_regularizer_loss": 0.0296, "epoch": 0.8725893139424596, "grad_norm": 6.539682865142822, "learning_rate": 3.344179382784488e-05, "loss": 1.0886, "query_regularizer_loss": 0.0296, "sparse_loss": 1.0295, "step": 2760 }, { "document_regularizer_loss": 0.029, "epoch": 0.8789124249130572, "grad_norm": 5.484647274017334, "learning_rate": 3.3188614295492595e-05, "loss": 1.1338, "query_regularizer_loss": 0.029, "sparse_loss": 1.0757, "step": 2780 }, { "document_regularizer_loss": 0.0292, "epoch": 0.8852355358836548, "grad_norm": 6.082838535308838, "learning_rate": 3.293449047228874e-05, "loss": 1.1033, "query_regularizer_loss": 0.0292, "sparse_loss": 1.0449, "step": 2800 }, { "document_regularizer_loss": 0.0288, "epoch": 0.8915586468542523, "grad_norm": 7.450719356536865, "learning_rate": 3.2679451663167326e-05, "loss": 1.0429, "query_regularizer_loss": 0.0288, "sparse_loss": 0.9852, "step": 2820 }, { "document_regularizer_loss": 0.0285, "epoch": 0.8978817578248498, "grad_norm": 6.752073287963867, "learning_rate": 3.242352727857625e-05, "loss": 1.0102, "query_regularizer_loss": 0.0285, "sparse_loss": 0.9532, "step": 2840 }, { "document_regularizer_loss": 0.0281, "epoch": 0.9042048687954474, "grad_norm": 5.853407859802246, "learning_rate": 3.216674683108583e-05, "loss": 1.1599, "query_regularizer_loss": 0.0281, "sparse_loss": 1.1036, "step": 2860 }, { "document_regularizer_loss": 0.0281, "epoch": 0.9105279797660449, "grad_norm": 7.191678524017334, "learning_rate": 3.1909139931985415e-05, "loss": 1.0423, "query_regularizer_loss": 0.0281, "sparse_loss": 0.9862, "step": 2880 }, { "document_regularizer_loss": 0.028, "epoch": 0.9168510907366424, "grad_norm": 10.293112754821777, "learning_rate": 3.165073628786876e-05, "loss": 1.0815, "query_regularizer_loss": 0.028, "sparse_loss": 1.0256, "step": 2900 }, { "document_regularizer_loss": 0.0278, "epoch": 0.92317420170724, "grad_norm": 5.882568836212158, "learning_rate": 3.139156569720826e-05, "loss": 1.0804, "query_regularizer_loss": 0.0278, "sparse_loss": 1.0248, "step": 2920 }, { "document_regularizer_loss": 0.0276, "epoch": 0.9294973126778375, "grad_norm": 5.085528373718262, "learning_rate": 3.113165804691871e-05, "loss": 1.1668, "query_regularizer_loss": 0.0276, "sparse_loss": 1.1115, "step": 2940 }, { "document_regularizer_loss": 0.0273, "epoch": 0.935820423648435, "grad_norm": 5.272675037384033, "learning_rate": 3.0871043308910816e-05, "loss": 1.0606, "query_regularizer_loss": 0.0273, "sparse_loss": 1.006, "step": 2960 }, { "document_regularizer_loss": 0.0267, "epoch": 0.9421435346190326, "grad_norm": 5.916753768920898, "learning_rate": 3.06097515366349e-05, "loss": 1.0705, "query_regularizer_loss": 0.0267, "sparse_loss": 1.0172, "step": 2980 }, { "document_regularizer_loss": 0.0265, "epoch": 0.9484666455896301, "grad_norm": 6.121260166168213, "learning_rate": 3.034781286161519e-05, "loss": 1.072, "query_regularizer_loss": 0.0265, "sparse_loss": 1.0189, "step": 3000 }, { "document_regularizer_loss": 0.0264, "epoch": 0.9547897565602277, "grad_norm": 5.811629295349121, "learning_rate": 3.0085257489975167e-05, "loss": 1.1239, "query_regularizer_loss": 0.0264, "sparse_loss": 1.0711, "step": 3020 }, { "document_regularizer_loss": 0.0263, "epoch": 0.9611128675308251, "grad_norm": 14.449254989624023, "learning_rate": 2.982211569895424e-05, "loss": 1.112, "query_regularizer_loss": 0.0263, "sparse_loss": 1.0594, "step": 3040 }, { "document_regularizer_loss": 0.0264, "epoch": 0.9674359785014227, "grad_norm": 7.664610862731934, "learning_rate": 2.9558417833416264e-05, "loss": 1.0759, "query_regularizer_loss": 0.0264, "sparse_loss": 1.0231, "step": 3060 }, { "document_regularizer_loss": 0.0257, "epoch": 0.9737590894720203, "grad_norm": 6.444000720977783, "learning_rate": 2.9294194302350225e-05, "loss": 0.956, "query_regularizer_loss": 0.0257, "sparse_loss": 0.9047, "step": 3080 }, { "document_regularizer_loss": 0.0255, "epoch": 0.9800822004426177, "grad_norm": 5.407084941864014, "learning_rate": 2.902947557536359e-05, "loss": 0.9945, "query_regularizer_loss": 0.0255, "sparse_loss": 0.9435, "step": 3100 }, { "document_regularizer_loss": 0.0253, "epoch": 0.9864053114132153, "grad_norm": 7.782375335693359, "learning_rate": 2.8764292179168566e-05, "loss": 1.0119, "query_regularizer_loss": 0.0253, "sparse_loss": 0.9613, "step": 3120 }, { "document_regularizer_loss": 0.025, "epoch": 0.9927284223838129, "grad_norm": 5.379085540771484, "learning_rate": 2.849867469406191e-05, "loss": 0.9965, "query_regularizer_loss": 0.025, "sparse_loss": 0.9465, "step": 3140 }, { "document_regularizer_loss": 0.0247, "epoch": 0.9990515333544103, "grad_norm": 13.918062210083008, "learning_rate": 2.8232653750398404e-05, "loss": 1.1177, "query_regularizer_loss": 0.0247, "sparse_loss": 1.0683, "step": 3160 }, { "document_regularizer_loss": 0.0243, "epoch": 1.005374644325008, "grad_norm": 5.923994541168213, "learning_rate": 2.796626002505871e-05, "loss": 0.8884, "query_regularizer_loss": 0.0243, "sparse_loss": 0.8398, "step": 3180 }, { "document_regularizer_loss": 0.0243, "epoch": 1.0116977552956055, "grad_norm": 5.905787467956543, "learning_rate": 2.7699524237911735e-05, "loss": 0.9041, "query_regularizer_loss": 0.0243, "sparse_loss": 0.8555, "step": 3200 }, { "document_regularizer_loss": 0.0242, "epoch": 1.018020866266203, "grad_norm": 7.144820213317871, "learning_rate": 2.7432477148272124e-05, "loss": 0.9367, "query_regularizer_loss": 0.0242, "sparse_loss": 0.8882, "step": 3220 }, { "document_regularizer_loss": 0.024, "epoch": 1.0243439772368006, "grad_norm": 5.734910011291504, "learning_rate": 2.7165149551353152e-05, "loss": 0.8253, "query_regularizer_loss": 0.024, "sparse_loss": 0.7774, "step": 3240 }, { "document_regularizer_loss": 0.0237, "epoch": 1.030667088207398, "grad_norm": 4.406752586364746, "learning_rate": 2.689757227471551e-05, "loss": 0.8637, "query_regularizer_loss": 0.0237, "sparse_loss": 0.8163, "step": 3260 }, { "document_regularizer_loss": 0.0235, "epoch": 1.0369901991779955, "grad_norm": 18.512943267822266, "learning_rate": 2.662977617471234e-05, "loss": 0.8665, "query_regularizer_loss": 0.0235, "sparse_loss": 0.8195, "step": 3280 }, { "document_regularizer_loss": 0.0236, "epoch": 1.0433133101485932, "grad_norm": 4.601492404937744, "learning_rate": 2.636179213293094e-05, "loss": 0.8306, "query_regularizer_loss": 0.0236, "sparse_loss": 0.7835, "step": 3300 }, { "document_regularizer_loss": 0.0234, "epoch": 1.0496364211191906, "grad_norm": 6.115499973297119, "learning_rate": 2.609365105263162e-05, "loss": 0.8374, "query_regularizer_loss": 0.0234, "sparse_loss": 0.7906, "step": 3320 }, { "document_regularizer_loss": 0.0235, "epoch": 1.055959532089788, "grad_norm": 16.041154861450195, "learning_rate": 2.5825383855183954e-05, "loss": 0.9326, "query_regularizer_loss": 0.0235, "sparse_loss": 0.8855, "step": 3340 }, { "document_regularizer_loss": 0.0233, "epoch": 1.0622826430603858, "grad_norm": 6.99527645111084, "learning_rate": 2.5557021476501058e-05, "loss": 0.8675, "query_regularizer_loss": 0.0233, "sparse_loss": 0.8209, "step": 3360 }, { "document_regularizer_loss": 0.0232, "epoch": 1.0686057540309832, "grad_norm": 9.15439224243164, "learning_rate": 2.528859486347211e-05, "loss": 0.8846, "query_regularizer_loss": 0.0232, "sparse_loss": 0.8383, "step": 3380 }, { "document_regularizer_loss": 0.0232, "epoch": 1.0749288650015807, "grad_norm": 6.056853771209717, "learning_rate": 2.502013497039362e-05, "loss": 0.8782, "query_regularizer_loss": 0.0232, "sparse_loss": 0.8318, "step": 3400 }, { "document_regularizer_loss": 0.0228, "epoch": 1.0812519759721784, "grad_norm": 11.15111255645752, "learning_rate": 2.4751672755399892e-05, "loss": 0.9058, "query_regularizer_loss": 0.0228, "sparse_loss": 0.8602, "step": 3420 }, { "document_regularizer_loss": 0.0226, "epoch": 1.0875750869427758, "grad_norm": 5.096249103546143, "learning_rate": 2.4483239176892978e-05, "loss": 0.8242, "query_regularizer_loss": 0.0226, "sparse_loss": 0.7789, "step": 3440 }, { "document_regularizer_loss": 0.0224, "epoch": 1.0938981979133733, "grad_norm": 4.854412078857422, "learning_rate": 2.4214865189972626e-05, "loss": 0.8406, "query_regularizer_loss": 0.0224, "sparse_loss": 0.7958, "step": 3460 }, { "document_regularizer_loss": 0.0222, "epoch": 1.100221308883971, "grad_norm": 5.57534122467041, "learning_rate": 2.3946581742866662e-05, "loss": 0.8854, "query_regularizer_loss": 0.0222, "sparse_loss": 0.841, "step": 3480 }, { "document_regularizer_loss": 0.022, "epoch": 1.1065444198545684, "grad_norm": 35.59614562988281, "learning_rate": 2.367841977336206e-05, "loss": 0.9114, "query_regularizer_loss": 0.022, "sparse_loss": 0.8674, "step": 3500 }, { "document_regularizer_loss": 0.0218, "epoch": 1.112867530825166, "grad_norm": 5.315453052520752, "learning_rate": 2.3410410205237292e-05, "loss": 0.7916, "query_regularizer_loss": 0.0218, "sparse_loss": 0.7481, "step": 3520 }, { "document_regularizer_loss": 0.0217, "epoch": 1.1191906417957636, "grad_norm": 6.13749361038208, "learning_rate": 2.31425839446963e-05, "loss": 0.8902, "query_regularizer_loss": 0.0217, "sparse_loss": 0.8469, "step": 3540 }, { "document_regularizer_loss": 0.0212, "epoch": 1.125513752766361, "grad_norm": 6.6804962158203125, "learning_rate": 2.2874971876804425e-05, "loss": 0.8235, "query_regularizer_loss": 0.0212, "sparse_loss": 0.7812, "step": 3560 }, { "document_regularizer_loss": 0.021, "epoch": 1.1318368637369587, "grad_norm": 7.544855117797852, "learning_rate": 2.2607604861926847e-05, "loss": 0.8662, "query_regularizer_loss": 0.021, "sparse_loss": 0.8241, "step": 3580 }, { "document_regularizer_loss": 0.0209, "epoch": 1.1381599747075561, "grad_norm": 5.754782199859619, "learning_rate": 2.2340513732169845e-05, "loss": 0.8252, "query_regularizer_loss": 0.0209, "sparse_loss": 0.7835, "step": 3600 }, { "document_regularizer_loss": 0.0208, "epoch": 1.1444830856781536, "grad_norm": 4.892688751220703, "learning_rate": 2.2073729287825283e-05, "loss": 0.8636, "query_regularizer_loss": 0.0208, "sparse_loss": 0.822, "step": 3620 }, { "document_regularizer_loss": 0.0207, "epoch": 1.1508061966487513, "grad_norm": 7.930765628814697, "learning_rate": 2.1807282293818827e-05, "loss": 0.8013, "query_regularizer_loss": 0.0207, "sparse_loss": 0.7599, "step": 3640 }, { "document_regularizer_loss": 0.0209, "epoch": 1.1571293076193487, "grad_norm": 5.00435733795166, "learning_rate": 2.1541203476162222e-05, "loss": 0.8126, "query_regularizer_loss": 0.0209, "sparse_loss": 0.7708, "step": 3660 }, { "document_regularizer_loss": 0.0208, "epoch": 1.1634524185899462, "grad_norm": 5.13680362701416, "learning_rate": 2.1275523518409994e-05, "loss": 0.8361, "query_regularizer_loss": 0.0208, "sparse_loss": 0.7945, "step": 3680 }, { "document_regularizer_loss": 0.0205, "epoch": 1.1697755295605439, "grad_norm": 11.393424034118652, "learning_rate": 2.101027305812113e-05, "loss": 0.8975, "query_regularizer_loss": 0.0205, "sparse_loss": 0.8566, "step": 3700 }, { "document_regularizer_loss": 0.0202, "epoch": 1.1760986405311413, "grad_norm": 5.252847671508789, "learning_rate": 2.0745482683326047e-05, "loss": 0.8723, "query_regularizer_loss": 0.0202, "sparse_loss": 0.832, "step": 3720 }, { "document_regularizer_loss": 0.0202, "epoch": 1.1824217515017388, "grad_norm": 6.195733547210693, "learning_rate": 2.0481182928999194e-05, "loss": 0.7598, "query_regularizer_loss": 0.0202, "sparse_loss": 0.7195, "step": 3740 }, { "document_regularizer_loss": 0.0202, "epoch": 1.1887448624723365, "grad_norm": 6.0683135986328125, "learning_rate": 2.0217404273537928e-05, "loss": 0.8172, "query_regularizer_loss": 0.0202, "sparse_loss": 0.7768, "step": 3760 }, { "document_regularizer_loss": 0.0201, "epoch": 1.195067973442934, "grad_norm": 6.833969593048096, "learning_rate": 1.9954177135247733e-05, "loss": 0.7955, "query_regularizer_loss": 0.0201, "sparse_loss": 0.7554, "step": 3780 }, { "document_regularizer_loss": 0.0198, "epoch": 1.2013910844135314, "grad_norm": 6.259845733642578, "learning_rate": 1.969153186883449e-05, "loss": 0.8491, "query_regularizer_loss": 0.0198, "sparse_loss": 0.8096, "step": 3800 }, { "document_regularizer_loss": 0.0196, "epoch": 1.207714195384129, "grad_norm": 6.139260768890381, "learning_rate": 1.942949876190405e-05, "loss": 0.8096, "query_regularizer_loss": 0.0196, "sparse_loss": 0.7705, "step": 3820 }, { "document_regularizer_loss": 0.0193, "epoch": 1.2140373063547265, "grad_norm": 7.0147175788879395, "learning_rate": 1.9168108031469556e-05, "loss": 0.8215, "query_regularizer_loss": 0.0193, "sparse_loss": 0.7829, "step": 3840 }, { "document_regularizer_loss": 0.0193, "epoch": 1.220360417325324, "grad_norm": 4.83867883682251, "learning_rate": 1.8907389820466858e-05, "loss": 0.8388, "query_regularizer_loss": 0.0193, "sparse_loss": 0.8003, "step": 3860 }, { "document_regularizer_loss": 0.0193, "epoch": 1.2266835282959216, "grad_norm": 5.086630344390869, "learning_rate": 1.8647374194278515e-05, "loss": 0.8766, "query_regularizer_loss": 0.0193, "sparse_loss": 0.8381, "step": 3880 }, { "document_regularizer_loss": 0.0193, "epoch": 1.233006639266519, "grad_norm": 7.497378826141357, "learning_rate": 1.8388091137266754e-05, "loss": 0.8822, "query_regularizer_loss": 0.0193, "sparse_loss": 0.8436, "step": 3900 }, { "document_regularizer_loss": 0.0191, "epoch": 1.2393297502371166, "grad_norm": 6.353434085845947, "learning_rate": 1.8129570549315694e-05, "loss": 0.7843, "query_regularizer_loss": 0.0191, "sparse_loss": 0.746, "step": 3920 }, { "document_regularizer_loss": 0.0189, "epoch": 1.2456528612077142, "grad_norm": 6.209091663360596, "learning_rate": 1.7871842242383447e-05, "loss": 0.7955, "query_regularizer_loss": 0.0189, "sparse_loss": 0.7578, "step": 3940 }, { "document_regularizer_loss": 0.0186, "epoch": 1.2519759721783117, "grad_norm": 5.476807594299316, "learning_rate": 1.761493593706418e-05, "loss": 0.7593, "query_regularizer_loss": 0.0186, "sparse_loss": 0.7221, "step": 3960 }, { "document_regularizer_loss": 0.0189, "epoch": 1.2582990831489091, "grad_norm": 5.646886825561523, "learning_rate": 1.7358881259160883e-05, "loss": 0.8728, "query_regularizer_loss": 0.0189, "sparse_loss": 0.8351, "step": 3980 }, { "document_regularizer_loss": 0.0187, "epoch": 1.2646221941195068, "grad_norm": 5.88949728012085, "learning_rate": 1.710370773626896e-05, "loss": 0.7812, "query_regularizer_loss": 0.0187, "sparse_loss": 0.7438, "step": 4000 }, { "epoch": 1.2646221941195068, "eval_runtime": 144.6586, "eval_samples_per_second": 0.0, "eval_sparse-ir-eval_avg_flops": 853.5161743164062, "eval_sparse-ir-eval_corpus_active_dims": 1024.0, "eval_sparse-ir-eval_corpus_sparsity_ratio": 0.9796696315120712, "eval_sparse-ir-eval_dot_accuracy@1": 0.04979004199160168, "eval_sparse-ir-eval_dot_accuracy@100": 0.3879224155168966, "eval_sparse-ir-eval_dot_accuracy@50": 0.29734053189362125, "eval_sparse-ir-eval_dot_accuracy@8": 0.14277144571085784, "eval_sparse-ir-eval_dot_map@100": 0.08555792971172127, "eval_sparse-ir-eval_dot_mrr@10": 0.07802764843856622, "eval_sparse-ir-eval_dot_ndcg@10": 0.09659653047217633, "eval_sparse-ir-eval_dot_precision@1": 0.04979004199160168, "eval_sparse-ir-eval_dot_precision@100": 0.0038792241551689668, "eval_sparse-ir-eval_dot_precision@50": 0.005946810637872426, "eval_sparse-ir-eval_dot_precision@8": 0.01784643071385723, "eval_sparse-ir-eval_dot_recall@1": 0.04979004199160168, "eval_sparse-ir-eval_dot_recall@100": 0.3879224155168966, "eval_sparse-ir-eval_dot_recall@50": 0.29734053189362125, "eval_sparse-ir-eval_dot_recall@8": 0.14277144571085784, "eval_sparse-ir-eval_query_active_dims": 1024.0, "eval_sparse-ir-eval_query_sparsity_ratio": 0.9796696315120712, "eval_steps_per_second": 0.0, "step": 4000 }, { "document_regularizer_loss": 0.0185, "epoch": 1.2709453050901043, "grad_norm": 6.946842193603516, "learning_rate": 1.6849444794371173e-05, "loss": 0.7947, "query_regularizer_loss": 0.0185, "sparse_loss": 0.7577, "step": 4020 }, { "document_regularizer_loss": 0.0187, "epoch": 1.2772684160607017, "grad_norm": 5.774596214294434, "learning_rate": 1.6596121754444365e-05, "loss": 0.861, "query_regularizer_loss": 0.0187, "sparse_loss": 0.8236, "step": 4040 }, { "document_regularizer_loss": 0.0185, "epoch": 1.2835915270312994, "grad_norm": 5.410093307495117, "learning_rate": 1.6343767829078157e-05, "loss": 0.7238, "query_regularizer_loss": 0.0185, "sparse_loss": 0.6868, "step": 4060 }, { "document_regularizer_loss": 0.0183, "epoch": 1.2899146380018969, "grad_norm": 6.174851894378662, "learning_rate": 1.609241211910628e-05, "loss": 0.8105, "query_regularizer_loss": 0.0183, "sparse_loss": 0.7738, "step": 4080 }, { "document_regularizer_loss": 0.0181, "epoch": 1.2962377489724943, "grad_norm": 6.427083969116211, "learning_rate": 1.5842083610250713e-05, "loss": 0.804, "query_regularizer_loss": 0.0182, "sparse_loss": 0.7677, "step": 4100 }, { "document_regularizer_loss": 0.0181, "epoch": 1.302560859943092, "grad_norm": 5.061923503875732, "learning_rate": 1.5592811169779146e-05, "loss": 0.8112, "query_regularizer_loss": 0.0181, "sparse_loss": 0.775, "step": 4120 }, { "document_regularizer_loss": 0.0181, "epoch": 1.3088839709136895, "grad_norm": 9.3052339553833, "learning_rate": 1.5344623543176047e-05, "loss": 0.8061, "query_regularizer_loss": 0.0181, "sparse_loss": 0.7699, "step": 4140 }, { "document_regularizer_loss": 0.0181, "epoch": 1.3152070818842871, "grad_norm": 6.645140171051025, "learning_rate": 1.5097549350827823e-05, "loss": 0.8149, "query_regularizer_loss": 0.0181, "sparse_loss": 0.7786, "step": 4160 }, { "document_regularizer_loss": 0.0179, "epoch": 1.3215301928548846, "grad_norm": 9.232198715209961, "learning_rate": 1.4851617084722384e-05, "loss": 0.7243, "query_regularizer_loss": 0.0179, "sparse_loss": 0.6885, "step": 4180 }, { "document_regularizer_loss": 0.0179, "epoch": 1.327853303825482, "grad_norm": 6.141971588134766, "learning_rate": 1.4606855105163509e-05, "loss": 0.7487, "query_regularizer_loss": 0.0179, "sparse_loss": 0.713, "step": 4200 }, { "document_regularizer_loss": 0.0178, "epoch": 1.3341764147960797, "grad_norm": 7.69699239730835, "learning_rate": 1.436329163750042e-05, "loss": 0.789, "query_regularizer_loss": 0.0178, "sparse_loss": 0.7533, "step": 4220 }, { "document_regularizer_loss": 0.0179, "epoch": 1.3404995257666772, "grad_norm": 6.825509071350098, "learning_rate": 1.412095476887289e-05, "loss": 0.7696, "query_regularizer_loss": 0.0179, "sparse_loss": 0.7339, "step": 4240 }, { "document_regularizer_loss": 0.0177, "epoch": 1.3468226367372749, "grad_norm": 6.219385623931885, "learning_rate": 1.3879872444972326e-05, "loss": 0.7236, "query_regularizer_loss": 0.0177, "sparse_loss": 0.6883, "step": 4260 }, { "document_regularizer_loss": 0.0176, "epoch": 1.3531457477078723, "grad_norm": 9.367851257324219, "learning_rate": 1.3640072466819087e-05, "loss": 0.7761, "query_regularizer_loss": 0.0176, "sparse_loss": 0.7409, "step": 4280 }, { "document_regularizer_loss": 0.0174, "epoch": 1.3594688586784698, "grad_norm": 6.194346904754639, "learning_rate": 1.3401582487556613e-05, "loss": 0.7864, "query_regularizer_loss": 0.0174, "sparse_loss": 0.7516, "step": 4300 }, { "document_regularizer_loss": 0.0175, "epoch": 1.3657919696490675, "grad_norm": 6.247494697570801, "learning_rate": 1.3164430009262479e-05, "loss": 0.8002, "query_regularizer_loss": 0.0175, "sparse_loss": 0.7652, "step": 4320 }, { "document_regularizer_loss": 0.0175, "epoch": 1.372115080619665, "grad_norm": 5.456516265869141, "learning_rate": 1.2928642379776946e-05, "loss": 0.7939, "query_regularizer_loss": 0.0175, "sparse_loss": 0.759, "step": 4340 }, { "document_regularizer_loss": 0.0174, "epoch": 1.3784381915902624, "grad_norm": 6.565524578094482, "learning_rate": 1.2694246789549268e-05, "loss": 0.7647, "query_regularizer_loss": 0.0174, "sparse_loss": 0.73, "step": 4360 }, { "document_regularizer_loss": 0.017, "epoch": 1.38476130256086, "grad_norm": 5.862294673919678, "learning_rate": 1.2461270268502138e-05, "loss": 0.7741, "query_regularizer_loss": 0.017, "sparse_loss": 0.74, "step": 4380 }, { "document_regularizer_loss": 0.0168, "epoch": 1.3910844135314575, "grad_norm": 4.879271030426025, "learning_rate": 1.2229739682914707e-05, "loss": 0.7361, "query_regularizer_loss": 0.0167, "sparse_loss": 0.7026, "step": 4400 }, { "document_regularizer_loss": 0.0169, "epoch": 1.397407524502055, "grad_norm": 6.44930362701416, "learning_rate": 1.1999681732324397e-05, "loss": 0.7732, "query_regularizer_loss": 0.0169, "sparse_loss": 0.7395, "step": 4420 }, { "document_regularizer_loss": 0.0168, "epoch": 1.4037306354726526, "grad_norm": 5.632932662963867, "learning_rate": 1.1771122946448002e-05, "loss": 0.79, "query_regularizer_loss": 0.0168, "sparse_loss": 0.7563, "step": 4440 }, { "document_regularizer_loss": 0.0166, "epoch": 1.41005374644325, "grad_norm": 11.790848731994629, "learning_rate": 1.1544089682122288e-05, "loss": 0.7661, "query_regularizer_loss": 0.0166, "sparse_loss": 0.7329, "step": 4460 }, { "document_regularizer_loss": 0.0168, "epoch": 1.4163768574138476, "grad_norm": 4.643637657165527, "learning_rate": 1.1318608120264676e-05, "loss": 0.7779, "query_regularizer_loss": 0.0168, "sparse_loss": 0.7442, "step": 4480 }, { "document_regularizer_loss": 0.0168, "epoch": 1.4226999683844452, "grad_norm": 6.348916530609131, "learning_rate": 1.1094704262854047e-05, "loss": 0.7711, "query_regularizer_loss": 0.0168, "sparse_loss": 0.7375, "step": 4500 }, { "document_regularizer_loss": 0.0166, "epoch": 1.4290230793550427, "grad_norm": 5.77072811126709, "learning_rate": 1.0872403929932312e-05, "loss": 0.7952, "query_regularizer_loss": 0.0166, "sparse_loss": 0.7619, "step": 4520 }, { "document_regularizer_loss": 0.0166, "epoch": 1.4353461903256401, "grad_norm": 6.92957067489624, "learning_rate": 1.0651732756626848e-05, "loss": 0.7743, "query_regularizer_loss": 0.0166, "sparse_loss": 0.741, "step": 4540 }, { "document_regularizer_loss": 0.0165, "epoch": 1.4416693012962378, "grad_norm": 5.602739334106445, "learning_rate": 1.0432716190194397e-05, "loss": 0.72, "query_regularizer_loss": 0.0165, "sparse_loss": 0.6869, "step": 4560 }, { "document_regularizer_loss": 0.0165, "epoch": 1.4479924122668353, "grad_norm": 5.612238883972168, "learning_rate": 1.0215379487086452e-05, "loss": 0.7801, "query_regularizer_loss": 0.0165, "sparse_loss": 0.7471, "step": 4580 }, { "document_regularizer_loss": 0.0163, "epoch": 1.4543155232374327, "grad_norm": 5.197407245635986, "learning_rate": 9.999747710036875e-06, "loss": 0.7453, "query_regularizer_loss": 0.0163, "sparse_loss": 0.7127, "step": 4600 }, { "document_regularizer_loss": 0.0163, "epoch": 1.4606386342080304, "grad_norm": 4.72848653793335, "learning_rate": 9.785845725171583e-06, "loss": 0.7509, "query_regularizer_loss": 0.0163, "sparse_loss": 0.7184, "step": 4620 }, { "document_regularizer_loss": 0.0163, "epoch": 1.4669617451786279, "grad_norm": 5.626763820648193, "learning_rate": 9.573698199141146e-06, "loss": 0.7558, "query_regularizer_loss": 0.0163, "sparse_loss": 0.7232, "step": 4640 }, { "document_regularizer_loss": 0.0162, "epoch": 1.4732848561492253, "grad_norm": 6.345512390136719, "learning_rate": 9.363329596276258e-06, "loss": 0.7718, "query_regularizer_loss": 0.0162, "sparse_loss": 0.7394, "step": 4660 }, { "document_regularizer_loss": 0.0161, "epoch": 1.479607967119823, "grad_norm": 20.017658233642578, "learning_rate": 9.15476417576656e-06, "loss": 0.6954, "query_regularizer_loss": 0.0161, "sparse_loss": 0.6632, "step": 4680 }, { "document_regularizer_loss": 0.0159, "epoch": 1.4859310780904205, "grad_norm": 5.100090026855469, "learning_rate": 8.948025988863163e-06, "loss": 0.705, "query_regularizer_loss": 0.0159, "sparse_loss": 0.6732, "step": 4700 }, { "document_regularizer_loss": 0.0159, "epoch": 1.492254189061018, "grad_norm": 7.6336750984191895, "learning_rate": 8.743138876105056e-06, "loss": 0.751, "query_regularizer_loss": 0.0159, "sparse_loss": 0.7192, "step": 4720 }, { "document_regularizer_loss": 0.0159, "epoch": 1.4985773000316156, "grad_norm": 5.1957783699035645, "learning_rate": 8.54012646456995e-06, "loss": 0.765, "query_regularizer_loss": 0.0159, "sparse_loss": 0.7333, "step": 4740 }, { "document_regularizer_loss": 0.0158, "epoch": 1.504900411002213, "grad_norm": 6.796875953674316, "learning_rate": 8.33901216514959e-06, "loss": 0.7983, "query_regularizer_loss": 0.0158, "sparse_loss": 0.7667, "step": 4760 }, { "document_regularizer_loss": 0.0158, "epoch": 1.5112235219728105, "grad_norm": 6.2827959060668945, "learning_rate": 8.139819169850152e-06, "loss": 0.7716, "query_regularizer_loss": 0.0158, "sparse_loss": 0.74, "step": 4780 }, { "document_regularizer_loss": 0.0157, "epoch": 1.5175466329434082, "grad_norm": 23.407991409301758, "learning_rate": 7.942570449117689e-06, "loss": 0.7747, "query_regularizer_loss": 0.0157, "sparse_loss": 0.7434, "step": 4800 }, { "document_regularizer_loss": 0.0155, "epoch": 1.5238697439140056, "grad_norm": 5.709648132324219, "learning_rate": 7.747288749189344e-06, "loss": 0.7613, "query_regularizer_loss": 0.0155, "sparse_loss": 0.7302, "step": 4820 }, { "document_regularizer_loss": 0.0156, "epoch": 1.530192854884603, "grad_norm": 7.462285041809082, "learning_rate": 7.553996589470214e-06, "loss": 0.7962, "query_regularizer_loss": 0.0156, "sparse_loss": 0.7651, "step": 4840 }, { "document_regularizer_loss": 0.0155, "epoch": 1.5365159658552008, "grad_norm": 6.551488399505615, "learning_rate": 7.362716259936572e-06, "loss": 0.7893, "query_regularizer_loss": 0.0155, "sparse_loss": 0.7583, "step": 4860 }, { "document_regularizer_loss": 0.0156, "epoch": 1.5428390768257982, "grad_norm": 4.285749912261963, "learning_rate": 7.173469818565334e-06, "loss": 0.7291, "query_regularizer_loss": 0.0156, "sparse_loss": 0.6979, "step": 4880 }, { "document_regularizer_loss": 0.0156, "epoch": 1.5491621877963957, "grad_norm": 5.514683246612549, "learning_rate": 6.986279088790468e-06, "loss": 0.6982, "query_regularizer_loss": 0.0155, "sparse_loss": 0.6671, "step": 4900 }, { "document_regularizer_loss": 0.0154, "epoch": 1.5554852987669934, "grad_norm": 4.710526466369629, "learning_rate": 6.801165656986317e-06, "loss": 0.7057, "query_regularizer_loss": 0.0154, "sparse_loss": 0.6748, "step": 4920 }, { "document_regularizer_loss": 0.0155, "epoch": 1.561808409737591, "grad_norm": 6.305178642272949, "learning_rate": 6.618150869978346e-06, "loss": 0.7883, "query_regularizer_loss": 0.0155, "sparse_loss": 0.7574, "step": 4940 }, { "document_regularizer_loss": 0.0155, "epoch": 1.5681315207081883, "grad_norm": 7.8116044998168945, "learning_rate": 6.43725583258147e-06, "loss": 0.782, "query_regularizer_loss": 0.0155, "sparse_loss": 0.751, "step": 4960 }, { "document_regularizer_loss": 0.0153, "epoch": 1.574454631678786, "grad_norm": 6.840033531188965, "learning_rate": 6.25850140516629e-06, "loss": 0.7625, "query_regularizer_loss": 0.0153, "sparse_loss": 0.7318, "step": 4980 }, { "document_regularizer_loss": 0.0153, "epoch": 1.5807777426493836, "grad_norm": 4.293910980224609, "learning_rate": 6.08190820125353e-06, "loss": 0.7101, "query_regularizer_loss": 0.0153, "sparse_loss": 0.6795, "step": 5000 }, { "document_regularizer_loss": 0.0152, "epoch": 1.5871008536199809, "grad_norm": 4.924117088317871, "learning_rate": 5.907496585136932e-06, "loss": 0.7394, "query_regularizer_loss": 0.0152, "sparse_loss": 0.709, "step": 5020 }, { "document_regularizer_loss": 0.0152, "epoch": 1.5934239645905786, "grad_norm": 5.151610851287842, "learning_rate": 5.735286669534912e-06, "loss": 0.6894, "query_regularizer_loss": 0.0152, "sparse_loss": 0.659, "step": 5040 }, { "document_regularizer_loss": 0.0152, "epoch": 1.5997470755611762, "grad_norm": 6.865243434906006, "learning_rate": 5.5652983132711946e-06, "loss": 0.6992, "query_regularizer_loss": 0.0152, "sparse_loss": 0.6689, "step": 5060 }, { "document_regularizer_loss": 0.0152, "epoch": 1.6060701865317735, "grad_norm": 5.148654460906982, "learning_rate": 5.397551118984756e-06, "loss": 0.7032, "query_regularizer_loss": 0.0152, "sparse_loss": 0.6729, "step": 5080 }, { "document_regularizer_loss": 0.0151, "epoch": 1.6123932975023711, "grad_norm": 7.1046929359436035, "learning_rate": 5.232064430869266e-06, "loss": 0.7659, "query_regularizer_loss": 0.0151, "sparse_loss": 0.7356, "step": 5100 }, { "document_regularizer_loss": 0.0152, "epoch": 1.6187164084729688, "grad_norm": 6.476480484008789, "learning_rate": 5.068857332442408e-06, "loss": 0.7268, "query_regularizer_loss": 0.0152, "sparse_loss": 0.6965, "step": 5120 }, { "document_regularizer_loss": 0.0151, "epoch": 1.6250395194435663, "grad_norm": 5.051494598388672, "learning_rate": 4.907948644345184e-06, "loss": 0.6928, "query_regularizer_loss": 0.0151, "sparse_loss": 0.6627, "step": 5140 }, { "document_regularizer_loss": 0.015, "epoch": 1.6313626304141637, "grad_norm": 9.967106819152832, "learning_rate": 4.7493569221715776e-06, "loss": 0.7134, "query_regularizer_loss": 0.015, "sparse_loss": 0.6833, "step": 5160 }, { "document_regularizer_loss": 0.0149, "epoch": 1.6376857413847614, "grad_norm": 7.947037220001221, "learning_rate": 4.593100454328744e-06, "loss": 0.8233, "query_regularizer_loss": 0.015, "sparse_loss": 0.7934, "step": 5180 }, { "document_regularizer_loss": 0.0148, "epoch": 1.6440088523553589, "grad_norm": 5.4551825523376465, "learning_rate": 4.439197259928082e-06, "loss": 0.7258, "query_regularizer_loss": 0.0148, "sparse_loss": 0.6962, "step": 5200 }, { "document_regularizer_loss": 0.0148, "epoch": 1.6503319633259563, "grad_norm": 5.254171848297119, "learning_rate": 4.2876650867072516e-06, "loss": 0.653, "query_regularizer_loss": 0.0148, "sparse_loss": 0.6234, "step": 5220 }, { "document_regularizer_loss": 0.0148, "epoch": 1.656655074296554, "grad_norm": 59.719482421875, "learning_rate": 4.1385214089836365e-06, "loss": 0.764, "query_regularizer_loss": 0.0148, "sparse_loss": 0.7344, "step": 5240 }, { "document_regularizer_loss": 0.0148, "epoch": 1.6629781852671515, "grad_norm": 6.208061218261719, "learning_rate": 3.991783425639148e-06, "loss": 0.8153, "query_regularizer_loss": 0.0148, "sparse_loss": 0.7858, "step": 5260 }, { "document_regularizer_loss": 0.0148, "epoch": 1.669301296237749, "grad_norm": 5.488613128662109, "learning_rate": 3.8474680581369635e-06, "loss": 0.6717, "query_regularizer_loss": 0.0148, "sparse_loss": 0.6422, "step": 5280 }, { "document_regularizer_loss": 0.0147, "epoch": 1.6756244072083466, "grad_norm": 13.9483642578125, "learning_rate": 3.7055919485701613e-06, "loss": 0.7592, "query_regularizer_loss": 0.0147, "sparse_loss": 0.7298, "step": 5300 }, { "document_regularizer_loss": 0.0147, "epoch": 1.681947518178944, "grad_norm": 7.517942905426025, "learning_rate": 3.5661714577425954e-06, "loss": 0.7114, "query_regularizer_loss": 0.0147, "sparse_loss": 0.6821, "step": 5320 }, { "document_regularizer_loss": 0.0147, "epoch": 1.6882706291495415, "grad_norm": 4.885865211486816, "learning_rate": 3.429222663282211e-06, "loss": 0.7035, "query_regularizer_loss": 0.0147, "sparse_loss": 0.6741, "step": 5340 }, { "document_regularizer_loss": 0.0146, "epoch": 1.6945937401201392, "grad_norm": 6.0214715003967285, "learning_rate": 3.2947613577870017e-06, "loss": 0.702, "query_regularizer_loss": 0.0146, "sparse_loss": 0.6728, "step": 5360 }, { "document_regularizer_loss": 0.0147, "epoch": 1.7009168510907366, "grad_norm": 6.653244495391846, "learning_rate": 3.162803047003865e-06, "loss": 0.735, "query_regularizer_loss": 0.0147, "sparse_loss": 0.7057, "step": 5380 }, { "document_regularizer_loss": 0.0147, "epoch": 1.707239962061334, "grad_norm": 7.448154449462891, "learning_rate": 3.0333629480404915e-06, "loss": 0.7298, "query_regularizer_loss": 0.0147, "sparse_loss": 0.7005, "step": 5400 }, { "document_regularizer_loss": 0.0146, "epoch": 1.7135630730319318, "grad_norm": 5.637879848480225, "learning_rate": 2.9064559876106097e-06, "loss": 0.7082, "query_regularizer_loss": 0.0145, "sparse_loss": 0.6791, "step": 5420 }, { "document_regularizer_loss": 0.0145, "epoch": 1.7198861840025292, "grad_norm": 6.466550827026367, "learning_rate": 2.7820968003126143e-06, "loss": 0.693, "query_regularizer_loss": 0.0146, "sparse_loss": 0.6639, "step": 5440 }, { "document_regularizer_loss": 0.0146, "epoch": 1.7262092949731267, "grad_norm": 6.480766773223877, "learning_rate": 2.660299726941995e-06, "loss": 0.7466, "query_regularizer_loss": 0.0146, "sparse_loss": 0.7175, "step": 5460 }, { "document_regularizer_loss": 0.0145, "epoch": 1.7325324059437244, "grad_norm": 5.0478315353393555, "learning_rate": 2.541078812837544e-06, "loss": 0.691, "query_regularizer_loss": 0.0145, "sparse_loss": 0.6619, "step": 5480 }, { "document_regularizer_loss": 0.0145, "epoch": 1.7388555169143218, "grad_norm": 18.3403263092041, "learning_rate": 2.4244478062617285e-06, "loss": 0.8491, "query_regularizer_loss": 0.0145, "sparse_loss": 0.8201, "step": 5500 }, { "document_regularizer_loss": 0.0145, "epoch": 1.7451786278849193, "grad_norm": 4.109490871429443, "learning_rate": 2.3104201568152406e-06, "loss": 0.7267, "query_regularizer_loss": 0.0145, "sparse_loss": 0.6977, "step": 5520 }, { "document_regularizer_loss": 0.0145, "epoch": 1.751501738855517, "grad_norm": 5.1971540451049805, "learning_rate": 2.1990090138860443e-06, "loss": 0.6938, "query_regularizer_loss": 0.0145, "sparse_loss": 0.6649, "step": 5540 }, { "document_regularizer_loss": 0.0144, "epoch": 1.7578248498261144, "grad_norm": 5.344772815704346, "learning_rate": 2.090227225132993e-06, "loss": 0.7251, "query_regularizer_loss": 0.0144, "sparse_loss": 0.6962, "step": 5560 }, { "document_regularizer_loss": 0.0144, "epoch": 1.7641479607967119, "grad_norm": 6.859626770019531, "learning_rate": 1.9840873350042975e-06, "loss": 0.6835, "query_regularizer_loss": 0.0144, "sparse_loss": 0.6546, "step": 5580 }, { "document_regularizer_loss": 0.0144, "epoch": 1.7704710717673096, "grad_norm": 7.862534046173096, "learning_rate": 1.8806015832909223e-06, "loss": 0.7431, "query_regularizer_loss": 0.0144, "sparse_loss": 0.7143, "step": 5600 }, { "document_regularizer_loss": 0.0144, "epoch": 1.776794182737907, "grad_norm": 7.577550411224365, "learning_rate": 1.7797819037151137e-06, "loss": 0.7031, "query_regularizer_loss": 0.0144, "sparse_loss": 0.6744, "step": 5620 }, { "document_regularizer_loss": 0.0143, "epoch": 1.7831172937085045, "grad_norm": 5.902777194976807, "learning_rate": 1.6816399225542512e-06, "loss": 0.6999, "query_regularizer_loss": 0.0143, "sparse_loss": 0.6713, "step": 5640 }, { "document_regularizer_loss": 0.0143, "epoch": 1.7894404046791021, "grad_norm": 7.284171104431152, "learning_rate": 1.5861869573000982e-06, "loss": 0.7097, "query_regularizer_loss": 0.0143, "sparse_loss": 0.6811, "step": 5660 }, { "document_regularizer_loss": 0.0143, "epoch": 1.7957635156496996, "grad_norm": 5.959704875946045, "learning_rate": 1.4934340153537424e-06, "loss": 0.7125, "query_regularizer_loss": 0.0143, "sparse_loss": 0.6839, "step": 5680 }, { "document_regularizer_loss": 0.0143, "epoch": 1.802086626620297, "grad_norm": 6.5475568771362305, "learning_rate": 1.4033917927562228e-06, "loss": 0.7399, "query_regularizer_loss": 0.0143, "sparse_loss": 0.7113, "step": 5700 }, { "document_regularizer_loss": 0.0143, "epoch": 1.8084097375908947, "grad_norm": 4.185171604156494, "learning_rate": 1.3160706729550886e-06, "loss": 0.677, "query_regularizer_loss": 0.0143, "sparse_loss": 0.6484, "step": 5720 }, { "document_regularizer_loss": 0.0142, "epoch": 1.8147328485614924, "grad_norm": 25.05087661743164, "learning_rate": 1.2314807256070093e-06, "loss": 0.7428, "query_regularizer_loss": 0.0142, "sparse_loss": 0.7143, "step": 5740 }, { "document_regularizer_loss": 0.0142, "epoch": 1.8210559595320897, "grad_norm": 6.057918548583984, "learning_rate": 1.1496317054165734e-06, "loss": 0.7495, "query_regularizer_loss": 0.0142, "sparse_loss": 0.7211, "step": 5760 }, { "document_regularizer_loss": 0.0142, "epoch": 1.8273790705026873, "grad_norm": 5.152764797210693, "learning_rate": 1.070533051011388e-06, "loss": 0.7266, "query_regularizer_loss": 0.0142, "sparse_loss": 0.6981, "step": 5780 }, { "document_regularizer_loss": 0.0142, "epoch": 1.833702181473285, "grad_norm": 6.547135829925537, "learning_rate": 9.94193883853653e-07, "loss": 0.6984, "query_regularizer_loss": 0.0142, "sparse_loss": 0.67, "step": 5800 }, { "document_regularizer_loss": 0.0142, "epoch": 1.8400252924438822, "grad_norm": 4.288388729095459, "learning_rate": 9.20623007188276e-07, "loss": 0.7527, "query_regularizer_loss": 0.0142, "sparse_loss": 0.7243, "step": 5820 }, { "document_regularizer_loss": 0.0142, "epoch": 1.84634840341448, "grad_norm": 10.613944053649902, "learning_rate": 8.498289050277331e-07, "loss": 0.6564, "query_regularizer_loss": 0.0142, "sparse_loss": 0.6281, "step": 5840 }, { "document_regularizer_loss": 0.0142, "epoch": 1.8526715143850776, "grad_norm": 5.5082902908325195, "learning_rate": 7.81819741173681e-07, "loss": 0.7028, "query_regularizer_loss": 0.0142, "sparse_loss": 0.6744, "step": 5860 }, { "document_regularizer_loss": 0.0142, "epoch": 1.8589946253556748, "grad_norm": 10.326964378356934, "learning_rate": 7.166033582755583e-07, "loss": 0.7015, "query_regularizer_loss": 0.0142, "sparse_loss": 0.6732, "step": 5880 }, { "document_regularizer_loss": 0.0142, "epoch": 1.8653177363262725, "grad_norm": 4.219756603240967, "learning_rate": 6.541872769261631e-07, "loss": 0.7219, "query_regularizer_loss": 0.0142, "sparse_loss": 0.6936, "step": 5900 }, { "document_regularizer_loss": 0.0142, "epoch": 1.8716408472968702, "grad_norm": 6.046718597412109, "learning_rate": 5.945786947944176e-07, "loss": 0.7569, "query_regularizer_loss": 0.0142, "sparse_loss": 0.7285, "step": 5920 }, { "document_regularizer_loss": 0.0142, "epoch": 1.8779639582674676, "grad_norm": 6.10089635848999, "learning_rate": 5.377844857953423e-07, "loss": 0.6832, "query_regularizer_loss": 0.0142, "sparse_loss": 0.6548, "step": 5940 }, { "document_regularizer_loss": 0.0142, "epoch": 1.884287069238065, "grad_norm": 9.817451477050781, "learning_rate": 4.838111992973627e-07, "loss": 0.72, "query_regularizer_loss": 0.0142, "sparse_loss": 0.6916, "step": 5960 }, { "document_regularizer_loss": 0.0142, "epoch": 1.8906101802086628, "grad_norm": 6.2373433113098145, "learning_rate": 4.3266505936708226e-07, "loss": 0.6878, "query_regularizer_loss": 0.0142, "sparse_loss": 0.6595, "step": 5980 }, { "document_regularizer_loss": 0.0142, "epoch": 1.8969332911792602, "grad_norm": 5.504240036010742, "learning_rate": 3.843519640514937e-07, "loss": 0.6468, "query_regularizer_loss": 0.0142, "sparse_loss": 0.6185, "step": 6000 }, { "epoch": 1.8969332911792602, "eval_runtime": 119.8241, "eval_samples_per_second": 0.0, "eval_sparse-ir-eval_avg_flops": 828.1011962890625, "eval_sparse-ir-eval_corpus_active_dims": 1024.0, "eval_sparse-ir-eval_corpus_sparsity_ratio": 0.9796696315120712, "eval_sparse-ir-eval_dot_accuracy@1": 0.05598880223955209, "eval_sparse-ir-eval_dot_accuracy@100": 0.41011797640471903, "eval_sparse-ir-eval_dot_accuracy@50": 0.32113577284543093, "eval_sparse-ir-eval_dot_accuracy@8": 0.15736852629474105, "eval_sparse-ir-eval_dot_map@100": 0.09527335248492443, "eval_sparse-ir-eval_dot_mrr@10": 0.08729730244427296, "eval_sparse-ir-eval_dot_ndcg@10": 0.10700833927390947, "eval_sparse-ir-eval_dot_precision@1": 0.05598880223955209, "eval_sparse-ir-eval_dot_precision@100": 0.004101179764047191, "eval_sparse-ir-eval_dot_precision@50": 0.006422715456908619, "eval_sparse-ir-eval_dot_precision@8": 0.01967106578684263, "eval_sparse-ir-eval_dot_recall@1": 0.05598880223955209, "eval_sparse-ir-eval_dot_recall@100": 0.41011797640471903, "eval_sparse-ir-eval_dot_recall@50": 0.32113577284543093, "eval_sparse-ir-eval_dot_recall@8": 0.15736852629474105, "eval_sparse-ir-eval_query_active_dims": 1024.0, "eval_sparse-ir-eval_query_sparsity_ratio": 0.9796696315120712, "eval_steps_per_second": 0.0, "step": 6000 }, { "document_regularizer_loss": 0.0142, "epoch": 1.9032564021498577, "grad_norm": 24.536941528320312, "learning_rate": 3.388774846978804e-07, "loss": 0.6901, "query_regularizer_loss": 0.0142, "sparse_loss": 0.6618, "step": 6020 }, { "document_regularizer_loss": 0.0142, "epoch": 1.9095795131204554, "grad_norm": 4.5274786949157715, "learning_rate": 2.9624686531129766e-07, "loss": 0.7066, "query_regularizer_loss": 0.0142, "sparse_loss": 0.6782, "step": 6040 }, { "document_regularizer_loss": 0.0142, "epoch": 1.9159026240910528, "grad_norm": 5.511288642883301, "learning_rate": 2.5646502194988097e-07, "loss": 0.6818, "query_regularizer_loss": 0.0142, "sparse_loss": 0.6535, "step": 6060 }, { "document_regularizer_loss": 0.0142, "epoch": 1.9222257350616503, "grad_norm": 5.5782904624938965, "learning_rate": 2.1953654215791653e-07, "loss": 0.735, "query_regularizer_loss": 0.0142, "sparse_loss": 0.7067, "step": 6080 }, { "document_regularizer_loss": 0.0142, "epoch": 1.928548846032248, "grad_norm": 8.528367042541504, "learning_rate": 1.8546568443683077e-07, "loss": 0.7364, "query_regularizer_loss": 0.0142, "sparse_loss": 0.708, "step": 6100 }, { "document_regularizer_loss": 0.0142, "epoch": 1.9348719570028454, "grad_norm": 6.339615821838379, "learning_rate": 1.5425637775409728e-07, "loss": 0.7485, "query_regularizer_loss": 0.0142, "sparse_loss": 0.7202, "step": 6120 }, { "document_regularizer_loss": 0.0142, "epoch": 1.9411950679734429, "grad_norm": 5.221982955932617, "learning_rate": 1.2591222109017143e-07, "loss": 0.7123, "query_regularizer_loss": 0.0142, "sparse_loss": 0.684, "step": 6140 }, { "document_regularizer_loss": 0.0142, "epoch": 1.9475181789440406, "grad_norm": 6.821455478668213, "learning_rate": 1.0043648302345276e-07, "loss": 0.7488, "query_regularizer_loss": 0.0142, "sparse_loss": 0.7205, "step": 6160 }, { "document_regularizer_loss": 0.0142, "epoch": 1.953841289914638, "grad_norm": 6.010687351226807, "learning_rate": 7.783210135337282e-08, "loss": 0.7161, "query_regularizer_loss": 0.0142, "sparse_loss": 0.6878, "step": 6180 }, { "document_regularizer_loss": 0.0142, "epoch": 1.9601644008852355, "grad_norm": 5.301513195037842, "learning_rate": 5.810168276160211e-08, "loss": 0.6795, "query_regularizer_loss": 0.0142, "sparse_loss": 0.6512, "step": 6200 }, { "document_regularizer_loss": 0.0141, "epoch": 1.9664875118558331, "grad_norm": 5.3588480949401855, "learning_rate": 4.1247502511465585e-08, "loss": 0.6925, "query_regularizer_loss": 0.0142, "sparse_loss": 0.6642, "step": 6220 }, { "document_regularizer_loss": 0.0142, "epoch": 1.9728106228264306, "grad_norm": 5.97401237487793, "learning_rate": 2.7271504185558126e-08, "loss": 0.8108, "query_regularizer_loss": 0.0142, "sparse_loss": 0.7825, "step": 6240 }, { "document_regularizer_loss": 0.0142, "epoch": 1.979133733797028, "grad_norm": 6.50218391418457, "learning_rate": 1.6175299461615447e-08, "loss": 0.7295, "query_regularizer_loss": 0.0142, "sparse_loss": 0.7012, "step": 6260 }, { "document_regularizer_loss": 0.0142, "epoch": 1.9854568447676257, "grad_norm": 6.084305286407471, "learning_rate": 7.96016792666554e-09, "loss": 0.7232, "query_regularizer_loss": 0.0142, "sparse_loss": 0.6949, "step": 6280 }, { "document_regularizer_loss": 0.0142, "epoch": 1.9917799557382232, "grad_norm": 5.208037376403809, "learning_rate": 2.627056929460636e-09, "loss": 0.7575, "query_regularizer_loss": 0.0142, "sparse_loss": 0.7292, "step": 6300 }, { "document_regularizer_loss": 0.0142, "epoch": 1.9981030667088207, "grad_norm": 5.697127819061279, "learning_rate": 1.7658147123955637e-10, "loss": 0.7006, "query_regularizer_loss": 0.0142, "sparse_loss": 0.6723, "step": 6320 } ], "logging_steps": 20, "max_steps": 6326, "num_input_tokens_seen": 0, "num_train_epochs": 2, "save_steps": 2000, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 0.0, "train_batch_size": 64, "trial_name": null, "trial_params": null }