DylanJHJ's picture
upload models
7613c5d
{
"best_global_step": null,
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 1.2039489525644114,
"eval_steps": 1000.0,
"global_step": 10000,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0,
"eval/nano_beir.arguana": 0.5201586889046041,
"step": 0
},
{
"epoch": 0,
"eval/nano_beir.climate_fever": 0.23997409265084552,
"step": 0
},
{
"epoch": 0,
"eval/nano_beir.dbpedia_entity": 0.34286887314902237,
"step": 0
},
{
"epoch": 0,
"eval/nano_beir.fever": 0.6213496755517833,
"step": 0
},
{
"epoch": 0,
"eval/nano_beir.fiqa": 0.4542903465876373,
"step": 0
},
{
"epoch": 0,
"eval/nano_beir.hotpotqa": 0.6560051657630788,
"step": 0
},
{
"epoch": 0,
"eval/nano_beir.nfcorpus": 0.3328835143785305,
"step": 0
},
{
"epoch": 0,
"eval/nano_beir.nq": 0.4570725655218753,
"step": 0
},
{
"epoch": 0,
"eval/nano_beir.quora": 0.832098944676056,
"step": 0
},
{
"epoch": 0,
"eval/nano_beir.scidocs": 0.25908236715595256,
"step": 0
},
{
"epoch": 0,
"eval/nano_beir.scifact": 0.7391066781087818,
"step": 0
},
{
"epoch": 0,
"eval/nano_beir.webis_touche2020": 0.30431141846717047,
"step": 0
},
{
"epoch": 0,
"eval/avg": 0.4799335275762782,
"step": 0
},
{
"epoch": 0.0012039489525644113,
"grad_norm": 9.375,
"learning_rate": 4.5e-07,
"loss": 1.2124,
"step": 10
},
{
"epoch": 0.0024078979051288226,
"grad_norm": 11.625,
"learning_rate": 9.5e-07,
"loss": 1.2431,
"step": 20
},
{
"epoch": 0.003611846857693234,
"grad_norm": 12.9375,
"learning_rate": 1.45e-06,
"loss": 1.2449,
"step": 30
},
{
"epoch": 0.004815795810257645,
"grad_norm": 10.25,
"learning_rate": 1.95e-06,
"loss": 1.3433,
"step": 40
},
{
"epoch": 0.006019744762822056,
"grad_norm": 11.375,
"learning_rate": 2.4500000000000003e-06,
"loss": 1.3456,
"step": 50
},
{
"epoch": 0.007223693715386468,
"grad_norm": 12.125,
"learning_rate": 2.95e-06,
"loss": 1.3311,
"step": 60
},
{
"epoch": 0.00842764266795088,
"grad_norm": 10.75,
"learning_rate": 3.4500000000000004e-06,
"loss": 1.2169,
"step": 70
},
{
"epoch": 0.00963159162051529,
"grad_norm": 19.125,
"learning_rate": 3.95e-06,
"loss": 1.4157,
"step": 80
},
{
"epoch": 0.010835540573079701,
"grad_norm": 12.1875,
"learning_rate": 4.45e-06,
"loss": 1.1054,
"step": 90
},
{
"epoch": 0.012039489525644112,
"grad_norm": 9.0,
"learning_rate": 4.950000000000001e-06,
"loss": 1.1525,
"step": 100
},
{
"epoch": 0.013243438478208525,
"grad_norm": 9.625,
"learning_rate": 5.45e-06,
"loss": 1.2883,
"step": 110
},
{
"epoch": 0.014447387430772935,
"grad_norm": 11.25,
"learning_rate": 5.95e-06,
"loss": 1.2931,
"step": 120
},
{
"epoch": 0.015651336383337346,
"grad_norm": 11.4375,
"learning_rate": 6.45e-06,
"loss": 1.3545,
"step": 130
},
{
"epoch": 0.01685528533590176,
"grad_norm": 15.1875,
"learning_rate": 6.950000000000001e-06,
"loss": 1.3885,
"step": 140
},
{
"epoch": 0.018059234288466168,
"grad_norm": 10.9375,
"learning_rate": 7.45e-06,
"loss": 1.1764,
"step": 150
},
{
"epoch": 0.01926318324103058,
"grad_norm": 12.0625,
"learning_rate": 7.95e-06,
"loss": 1.3003,
"step": 160
},
{
"epoch": 0.020467132193594993,
"grad_norm": 12.3125,
"learning_rate": 8.45e-06,
"loss": 1.5038,
"step": 170
},
{
"epoch": 0.021671081146159402,
"grad_norm": 10.9375,
"learning_rate": 8.95e-06,
"loss": 1.1602,
"step": 180
},
{
"epoch": 0.022875030098723815,
"grad_norm": 10.5625,
"learning_rate": 9.450000000000001e-06,
"loss": 1.1491,
"step": 190
},
{
"epoch": 0.024078979051288224,
"grad_norm": 11.375,
"learning_rate": 9.950000000000001e-06,
"loss": 1.3445,
"step": 200
},
{
"epoch": 0.025282928003852637,
"grad_norm": 9.125,
"learning_rate": 1.045e-05,
"loss": 1.2298,
"step": 210
},
{
"epoch": 0.02648687695641705,
"grad_norm": 10.3125,
"learning_rate": 1.095e-05,
"loss": 1.2853,
"step": 220
},
{
"epoch": 0.02769082590898146,
"grad_norm": 9.8125,
"learning_rate": 1.145e-05,
"loss": 1.2399,
"step": 230
},
{
"epoch": 0.02889477486154587,
"grad_norm": 8.5625,
"learning_rate": 1.195e-05,
"loss": 1.281,
"step": 240
},
{
"epoch": 0.03009872381411028,
"grad_norm": 12.5,
"learning_rate": 1.2450000000000001e-05,
"loss": 1.1571,
"step": 250
},
{
"epoch": 0.03130267276667469,
"grad_norm": 11.0625,
"learning_rate": 1.2950000000000001e-05,
"loss": 1.2818,
"step": 260
},
{
"epoch": 0.032506621719239105,
"grad_norm": 10.0,
"learning_rate": 1.3450000000000002e-05,
"loss": 1.0802,
"step": 270
},
{
"epoch": 0.03371057067180352,
"grad_norm": 15.9375,
"learning_rate": 1.3950000000000002e-05,
"loss": 1.1303,
"step": 280
},
{
"epoch": 0.03491451962436792,
"grad_norm": 11.5625,
"learning_rate": 1.4449999999999999e-05,
"loss": 1.086,
"step": 290
},
{
"epoch": 0.036118468576932336,
"grad_norm": 11.75,
"learning_rate": 1.4950000000000001e-05,
"loss": 1.1996,
"step": 300
},
{
"epoch": 0.03732241752949675,
"grad_norm": 7.5625,
"learning_rate": 1.545e-05,
"loss": 1.0982,
"step": 310
},
{
"epoch": 0.03852636648206116,
"grad_norm": 9.5,
"learning_rate": 1.595e-05,
"loss": 1.0364,
"step": 320
},
{
"epoch": 0.039730315434625574,
"grad_norm": 10.75,
"learning_rate": 1.645e-05,
"loss": 1.0967,
"step": 330
},
{
"epoch": 0.040934264387189986,
"grad_norm": 9.3125,
"learning_rate": 1.6950000000000002e-05,
"loss": 1.1497,
"step": 340
},
{
"epoch": 0.04213821333975439,
"grad_norm": 9.0625,
"learning_rate": 1.745e-05,
"loss": 1.1596,
"step": 350
},
{
"epoch": 0.043342162292318805,
"grad_norm": 9.3125,
"learning_rate": 1.795e-05,
"loss": 1.0344,
"step": 360
},
{
"epoch": 0.04454611124488322,
"grad_norm": 10.625,
"learning_rate": 1.845e-05,
"loss": 1.186,
"step": 370
},
{
"epoch": 0.04575006019744763,
"grad_norm": 9.9375,
"learning_rate": 1.895e-05,
"loss": 0.9519,
"step": 380
},
{
"epoch": 0.04695400915001204,
"grad_norm": 10.125,
"learning_rate": 1.9450000000000002e-05,
"loss": 1.1593,
"step": 390
},
{
"epoch": 0.04815795810257645,
"grad_norm": 10.0,
"learning_rate": 1.995e-05,
"loss": 1.0437,
"step": 400
},
{
"epoch": 0.04936190705514086,
"grad_norm": 10.5,
"learning_rate": 2.045e-05,
"loss": 1.1235,
"step": 410
},
{
"epoch": 0.05056585600770527,
"grad_norm": 8.375,
"learning_rate": 2.095e-05,
"loss": 0.9802,
"step": 420
},
{
"epoch": 0.051769804960269686,
"grad_norm": 11.125,
"learning_rate": 2.145e-05,
"loss": 1.1923,
"step": 430
},
{
"epoch": 0.0529737539128341,
"grad_norm": 10.5625,
"learning_rate": 2.195e-05,
"loss": 1.0591,
"step": 440
},
{
"epoch": 0.054177702865398504,
"grad_norm": 12.625,
"learning_rate": 2.245e-05,
"loss": 1.0929,
"step": 450
},
{
"epoch": 0.05538165181796292,
"grad_norm": 7.4375,
"learning_rate": 2.2950000000000002e-05,
"loss": 0.8406,
"step": 460
},
{
"epoch": 0.05658560077052733,
"grad_norm": 11.125,
"learning_rate": 2.345e-05,
"loss": 0.9958,
"step": 470
},
{
"epoch": 0.05778954972309174,
"grad_norm": 9.9375,
"learning_rate": 2.395e-05,
"loss": 0.9885,
"step": 480
},
{
"epoch": 0.058993498675656154,
"grad_norm": 9.625,
"learning_rate": 2.445e-05,
"loss": 1.0203,
"step": 490
},
{
"epoch": 0.06019744762822056,
"grad_norm": 10.0,
"learning_rate": 2.495e-05,
"loss": 0.92,
"step": 500
},
{
"epoch": 0.06140139658078497,
"grad_norm": 10.5625,
"learning_rate": 2.5450000000000002e-05,
"loss": 1.1444,
"step": 510
},
{
"epoch": 0.06260534553334939,
"grad_norm": 11.75,
"learning_rate": 2.595e-05,
"loss": 0.9312,
"step": 520
},
{
"epoch": 0.0638092944859138,
"grad_norm": 9.5,
"learning_rate": 2.6450000000000003e-05,
"loss": 1.0683,
"step": 530
},
{
"epoch": 0.06501324343847821,
"grad_norm": 10.75,
"learning_rate": 2.6950000000000005e-05,
"loss": 0.9993,
"step": 540
},
{
"epoch": 0.06621719239104262,
"grad_norm": 5.9375,
"learning_rate": 2.7450000000000003e-05,
"loss": 0.9648,
"step": 550
},
{
"epoch": 0.06742114134360704,
"grad_norm": 10.25,
"learning_rate": 2.7950000000000005e-05,
"loss": 0.9591,
"step": 560
},
{
"epoch": 0.06862509029617145,
"grad_norm": 9.6875,
"learning_rate": 2.845e-05,
"loss": 1.0011,
"step": 570
},
{
"epoch": 0.06982903924873585,
"grad_norm": 8.9375,
"learning_rate": 2.895e-05,
"loss": 0.8669,
"step": 580
},
{
"epoch": 0.07103298820130026,
"grad_norm": 8.8125,
"learning_rate": 2.945e-05,
"loss": 0.8381,
"step": 590
},
{
"epoch": 0.07223693715386467,
"grad_norm": 7.9375,
"learning_rate": 2.995e-05,
"loss": 0.8767,
"step": 600
},
{
"epoch": 0.07344088610642908,
"grad_norm": 7.75,
"learning_rate": 3.045e-05,
"loss": 0.7875,
"step": 610
},
{
"epoch": 0.0746448350589935,
"grad_norm": 6.21875,
"learning_rate": 3.095e-05,
"loss": 0.8435,
"step": 620
},
{
"epoch": 0.07584878401155791,
"grad_norm": 8.1875,
"learning_rate": 3.145e-05,
"loss": 0.8618,
"step": 630
},
{
"epoch": 0.07705273296412232,
"grad_norm": 8.5,
"learning_rate": 3.1950000000000004e-05,
"loss": 0.8589,
"step": 640
},
{
"epoch": 0.07825668191668674,
"grad_norm": 8.5,
"learning_rate": 3.245e-05,
"loss": 0.9605,
"step": 650
},
{
"epoch": 0.07946063086925115,
"grad_norm": 7.4375,
"learning_rate": 3.295e-05,
"loss": 0.9085,
"step": 660
},
{
"epoch": 0.08066457982181556,
"grad_norm": 6.71875,
"learning_rate": 3.345000000000001e-05,
"loss": 0.8026,
"step": 670
},
{
"epoch": 0.08186852877437997,
"grad_norm": 10.0625,
"learning_rate": 3.3950000000000005e-05,
"loss": 0.8285,
"step": 680
},
{
"epoch": 0.08307247772694437,
"grad_norm": 8.4375,
"learning_rate": 3.445e-05,
"loss": 0.8712,
"step": 690
},
{
"epoch": 0.08427642667950878,
"grad_norm": 8.5,
"learning_rate": 3.495e-05,
"loss": 0.8366,
"step": 700
},
{
"epoch": 0.0854803756320732,
"grad_norm": 9.3125,
"learning_rate": 3.545e-05,
"loss": 0.9514,
"step": 710
},
{
"epoch": 0.08668432458463761,
"grad_norm": 7.34375,
"learning_rate": 3.595e-05,
"loss": 0.9519,
"step": 720
},
{
"epoch": 0.08788827353720202,
"grad_norm": 7.03125,
"learning_rate": 3.645e-05,
"loss": 0.8114,
"step": 730
},
{
"epoch": 0.08909222248976643,
"grad_norm": 8.3125,
"learning_rate": 3.6950000000000004e-05,
"loss": 0.8443,
"step": 740
},
{
"epoch": 0.09029617144233085,
"grad_norm": 5.8125,
"learning_rate": 3.745e-05,
"loss": 0.815,
"step": 750
},
{
"epoch": 0.09150012039489526,
"grad_norm": 7.125,
"learning_rate": 3.795e-05,
"loss": 0.8194,
"step": 760
},
{
"epoch": 0.09270406934745967,
"grad_norm": 7.375,
"learning_rate": 3.845e-05,
"loss": 0.8795,
"step": 770
},
{
"epoch": 0.09390801830002408,
"grad_norm": 6.125,
"learning_rate": 3.8950000000000005e-05,
"loss": 0.7941,
"step": 780
},
{
"epoch": 0.09511196725258848,
"grad_norm": 6.65625,
"learning_rate": 3.9450000000000003e-05,
"loss": 0.9511,
"step": 790
},
{
"epoch": 0.0963159162051529,
"grad_norm": 6.78125,
"learning_rate": 3.995e-05,
"loss": 0.9017,
"step": 800
},
{
"epoch": 0.09751986515771731,
"grad_norm": 5.9375,
"learning_rate": 4.045000000000001e-05,
"loss": 0.8358,
"step": 810
},
{
"epoch": 0.09872381411028172,
"grad_norm": 5.6875,
"learning_rate": 4.095e-05,
"loss": 0.8172,
"step": 820
},
{
"epoch": 0.09992776306284613,
"grad_norm": 5.4375,
"learning_rate": 4.145e-05,
"loss": 0.9306,
"step": 830
},
{
"epoch": 0.10113171201541055,
"grad_norm": 6.125,
"learning_rate": 4.195e-05,
"loss": 0.801,
"step": 840
},
{
"epoch": 0.10233566096797496,
"grad_norm": 6.4375,
"learning_rate": 4.245e-05,
"loss": 0.793,
"step": 850
},
{
"epoch": 0.10353960992053937,
"grad_norm": 5.25,
"learning_rate": 4.295e-05,
"loss": 0.7686,
"step": 860
},
{
"epoch": 0.10474355887310378,
"grad_norm": 6.65625,
"learning_rate": 4.345e-05,
"loss": 0.8891,
"step": 870
},
{
"epoch": 0.1059475078256682,
"grad_norm": 5.71875,
"learning_rate": 4.3950000000000004e-05,
"loss": 0.7847,
"step": 880
},
{
"epoch": 0.10715145677823261,
"grad_norm": 6.78125,
"learning_rate": 4.445e-05,
"loss": 0.7822,
"step": 890
},
{
"epoch": 0.10835540573079701,
"grad_norm": 6.15625,
"learning_rate": 4.495e-05,
"loss": 0.7343,
"step": 900
},
{
"epoch": 0.10955935468336142,
"grad_norm": 5.78125,
"learning_rate": 4.545000000000001e-05,
"loss": 0.7725,
"step": 910
},
{
"epoch": 0.11076330363592583,
"grad_norm": 6.625,
"learning_rate": 4.5950000000000006e-05,
"loss": 0.7744,
"step": 920
},
{
"epoch": 0.11196725258849025,
"grad_norm": 10.4375,
"learning_rate": 4.6450000000000004e-05,
"loss": 0.72,
"step": 930
},
{
"epoch": 0.11317120154105466,
"grad_norm": 6.5625,
"learning_rate": 4.695e-05,
"loss": 0.79,
"step": 940
},
{
"epoch": 0.11437515049361907,
"grad_norm": 5.75,
"learning_rate": 4.745e-05,
"loss": 0.8084,
"step": 950
},
{
"epoch": 0.11557909944618348,
"grad_norm": 7.71875,
"learning_rate": 4.795e-05,
"loss": 0.7332,
"step": 960
},
{
"epoch": 0.1167830483987479,
"grad_norm": 6.875,
"learning_rate": 4.845e-05,
"loss": 0.827,
"step": 970
},
{
"epoch": 0.11798699735131231,
"grad_norm": 6.375,
"learning_rate": 4.8950000000000004e-05,
"loss": 0.8598,
"step": 980
},
{
"epoch": 0.11919094630387672,
"grad_norm": 6.5625,
"learning_rate": 4.945e-05,
"loss": 0.8785,
"step": 990
},
{
"epoch": 0.12039489525644112,
"grad_norm": 6.5,
"learning_rate": 4.995e-05,
"loss": 0.7385,
"step": 1000
},
{
"epoch": 0.12039489525644112,
"eval/nano_beir.arguana": 0.4762374986283534,
"step": 1000
},
{
"epoch": 0.12039489525644112,
"eval/nano_beir.climate_fever": 0.233898612493137,
"step": 1000
},
{
"epoch": 0.12039489525644112,
"eval/nano_beir.dbpedia_entity": 0.3310615619837843,
"step": 1000
},
{
"epoch": 0.12039489525644112,
"eval/nano_beir.fever": 0.6534142202824448,
"step": 1000
},
{
"epoch": 0.12039489525644112,
"eval/nano_beir.fiqa": 0.44285036968778174,
"step": 1000
},
{
"epoch": 0.12039489525644112,
"eval/nano_beir.hotpotqa": 0.6857747415442759,
"step": 1000
},
{
"epoch": 0.12039489525644112,
"eval/nano_beir.nfcorpus": 0.32960913350288634,
"step": 1000
},
{
"epoch": 0.12039489525644112,
"eval/nano_beir.nq": 0.4234776162530602,
"step": 1000
},
{
"epoch": 0.12039489525644112,
"eval/nano_beir.quora": 0.815858322680335,
"step": 1000
},
{
"epoch": 0.12039489525644112,
"eval/nano_beir.scidocs": 0.2474366934846326,
"step": 1000
},
{
"epoch": 0.12039489525644112,
"eval/nano_beir.scifact": 0.7414771066794785,
"step": 1000
},
{
"epoch": 0.12039489525644112,
"eval/nano_beir.webis_touche2020": 0.29723028397312157,
"step": 1000
},
{
"epoch": 0.12039489525644112,
"eval/avg": 0.47319384676610765,
"step": 1000
},
{
"epoch": 0.12159884420900553,
"grad_norm": 6.28125,
"learning_rate": 5.045e-05,
"loss": 0.8105,
"step": 1010
},
{
"epoch": 0.12280279316156995,
"grad_norm": 6.15625,
"learning_rate": 5.095e-05,
"loss": 0.773,
"step": 1020
},
{
"epoch": 0.12400674211413436,
"grad_norm": 7.125,
"learning_rate": 5.145e-05,
"loss": 0.7829,
"step": 1030
},
{
"epoch": 0.12521069106669877,
"grad_norm": 7.0,
"learning_rate": 5.1949999999999996e-05,
"loss": 0.7485,
"step": 1040
},
{
"epoch": 0.1264146400192632,
"grad_norm": 6.1875,
"learning_rate": 5.245e-05,
"loss": 0.7062,
"step": 1050
},
{
"epoch": 0.1276185889718276,
"grad_norm": 6.1875,
"learning_rate": 5.295e-05,
"loss": 0.8668,
"step": 1060
},
{
"epoch": 0.128822537924392,
"grad_norm": 8.4375,
"learning_rate": 5.345e-05,
"loss": 0.7711,
"step": 1070
},
{
"epoch": 0.13002648687695642,
"grad_norm": 6.0,
"learning_rate": 5.3950000000000004e-05,
"loss": 0.788,
"step": 1080
},
{
"epoch": 0.13123043582952082,
"grad_norm": 5.59375,
"learning_rate": 5.445e-05,
"loss": 0.7042,
"step": 1090
},
{
"epoch": 0.13243438478208525,
"grad_norm": 5.25,
"learning_rate": 5.495e-05,
"loss": 0.7907,
"step": 1100
},
{
"epoch": 0.13363833373464964,
"grad_norm": 5.4375,
"learning_rate": 5.545e-05,
"loss": 0.7463,
"step": 1110
},
{
"epoch": 0.13484228268721407,
"grad_norm": 4.71875,
"learning_rate": 5.5950000000000005e-05,
"loss": 0.7524,
"step": 1120
},
{
"epoch": 0.13604623163977847,
"grad_norm": 3.671875,
"learning_rate": 5.645e-05,
"loss": 0.6886,
"step": 1130
},
{
"epoch": 0.1372501805923429,
"grad_norm": 5.34375,
"learning_rate": 5.695e-05,
"loss": 0.796,
"step": 1140
},
{
"epoch": 0.1384541295449073,
"grad_norm": 9.9375,
"learning_rate": 5.745e-05,
"loss": 0.7372,
"step": 1150
},
{
"epoch": 0.1396580784974717,
"grad_norm": 8.75,
"learning_rate": 5.7950000000000006e-05,
"loss": 0.7826,
"step": 1160
},
{
"epoch": 0.14086202745003612,
"grad_norm": 6.03125,
"learning_rate": 5.8450000000000005e-05,
"loss": 0.938,
"step": 1170
},
{
"epoch": 0.14206597640260052,
"grad_norm": 6.71875,
"learning_rate": 5.895e-05,
"loss": 0.7121,
"step": 1180
},
{
"epoch": 0.14326992535516495,
"grad_norm": 5.6875,
"learning_rate": 5.945000000000001e-05,
"loss": 0.7018,
"step": 1190
},
{
"epoch": 0.14447387430772934,
"grad_norm": 5.40625,
"learning_rate": 5.995000000000001e-05,
"loss": 0.7568,
"step": 1200
},
{
"epoch": 0.14567782326029377,
"grad_norm": 5.625,
"learning_rate": 6.0450000000000006e-05,
"loss": 0.7159,
"step": 1210
},
{
"epoch": 0.14688177221285817,
"grad_norm": 5.71875,
"learning_rate": 6.0950000000000004e-05,
"loss": 0.7932,
"step": 1220
},
{
"epoch": 0.1480857211654226,
"grad_norm": 6.53125,
"learning_rate": 6.145e-05,
"loss": 0.6287,
"step": 1230
},
{
"epoch": 0.149289670117987,
"grad_norm": 6.625,
"learning_rate": 6.195e-05,
"loss": 0.8986,
"step": 1240
},
{
"epoch": 0.15049361907055142,
"grad_norm": 5.96875,
"learning_rate": 6.245000000000001e-05,
"loss": 0.681,
"step": 1250
},
{
"epoch": 0.15169756802311582,
"grad_norm": 6.65625,
"learning_rate": 6.295e-05,
"loss": 0.7139,
"step": 1260
},
{
"epoch": 0.15290151697568022,
"grad_norm": 6.75,
"learning_rate": 6.345e-05,
"loss": 0.72,
"step": 1270
},
{
"epoch": 0.15410546592824464,
"grad_norm": 5.71875,
"learning_rate": 6.395e-05,
"loss": 0.7386,
"step": 1280
},
{
"epoch": 0.15530941488080904,
"grad_norm": 5.6875,
"learning_rate": 6.445e-05,
"loss": 0.769,
"step": 1290
},
{
"epoch": 0.15651336383337347,
"grad_norm": 6.0625,
"learning_rate": 6.494999999999999e-05,
"loss": 0.7968,
"step": 1300
},
{
"epoch": 0.15771731278593787,
"grad_norm": 6.625,
"learning_rate": 6.545e-05,
"loss": 0.6689,
"step": 1310
},
{
"epoch": 0.1589212617385023,
"grad_norm": 6.6875,
"learning_rate": 6.595e-05,
"loss": 0.6724,
"step": 1320
},
{
"epoch": 0.1601252106910667,
"grad_norm": 7.46875,
"learning_rate": 6.645e-05,
"loss": 0.7493,
"step": 1330
},
{
"epoch": 0.16132915964363112,
"grad_norm": 5.25,
"learning_rate": 6.695e-05,
"loss": 0.7919,
"step": 1340
},
{
"epoch": 0.16253310859619552,
"grad_norm": 8.3125,
"learning_rate": 6.745e-05,
"loss": 0.6997,
"step": 1350
},
{
"epoch": 0.16373705754875995,
"grad_norm": 5.375,
"learning_rate": 6.795e-05,
"loss": 0.8986,
"step": 1360
},
{
"epoch": 0.16494100650132434,
"grad_norm": 5.5,
"learning_rate": 6.845e-05,
"loss": 0.6123,
"step": 1370
},
{
"epoch": 0.16614495545388874,
"grad_norm": 5.5,
"learning_rate": 6.895000000000001e-05,
"loss": 0.7185,
"step": 1380
},
{
"epoch": 0.16734890440645317,
"grad_norm": 4.625,
"learning_rate": 6.945000000000001e-05,
"loss": 0.6719,
"step": 1390
},
{
"epoch": 0.16855285335901757,
"grad_norm": 6.59375,
"learning_rate": 6.995e-05,
"loss": 0.8054,
"step": 1400
},
{
"epoch": 0.169756802311582,
"grad_norm": 5.28125,
"learning_rate": 7.045e-05,
"loss": 0.7182,
"step": 1410
},
{
"epoch": 0.1709607512641464,
"grad_norm": 5.0625,
"learning_rate": 7.095e-05,
"loss": 0.6471,
"step": 1420
},
{
"epoch": 0.17216470021671082,
"grad_norm": 4.5625,
"learning_rate": 7.145e-05,
"loss": 0.6292,
"step": 1430
},
{
"epoch": 0.17336864916927522,
"grad_norm": 6.875,
"learning_rate": 7.195e-05,
"loss": 0.7815,
"step": 1440
},
{
"epoch": 0.17457259812183964,
"grad_norm": 5.25,
"learning_rate": 7.245000000000001e-05,
"loss": 0.6529,
"step": 1450
},
{
"epoch": 0.17577654707440404,
"grad_norm": 6.59375,
"learning_rate": 7.295000000000001e-05,
"loss": 0.7466,
"step": 1460
},
{
"epoch": 0.17698049602696847,
"grad_norm": 4.84375,
"learning_rate": 7.345000000000001e-05,
"loss": 0.7231,
"step": 1470
},
{
"epoch": 0.17818444497953287,
"grad_norm": 4.90625,
"learning_rate": 7.395000000000001e-05,
"loss": 0.6796,
"step": 1480
},
{
"epoch": 0.17938839393209727,
"grad_norm": 4.96875,
"learning_rate": 7.445000000000001e-05,
"loss": 0.7144,
"step": 1490
},
{
"epoch": 0.1805923428846617,
"grad_norm": 6.03125,
"learning_rate": 7.495e-05,
"loss": 0.6959,
"step": 1500
},
{
"epoch": 0.1817962918372261,
"grad_norm": 5.84375,
"learning_rate": 7.545e-05,
"loss": 0.7335,
"step": 1510
},
{
"epoch": 0.18300024078979052,
"grad_norm": 6.125,
"learning_rate": 7.595e-05,
"loss": 0.6612,
"step": 1520
},
{
"epoch": 0.18420418974235492,
"grad_norm": 4.71875,
"learning_rate": 7.645e-05,
"loss": 0.7597,
"step": 1530
},
{
"epoch": 0.18540813869491934,
"grad_norm": 5.71875,
"learning_rate": 7.695e-05,
"loss": 0.639,
"step": 1540
},
{
"epoch": 0.18661208764748374,
"grad_norm": 6.375,
"learning_rate": 7.745e-05,
"loss": 0.6398,
"step": 1550
},
{
"epoch": 0.18781603660004817,
"grad_norm": 6.5625,
"learning_rate": 7.795e-05,
"loss": 0.7311,
"step": 1560
},
{
"epoch": 0.18901998555261257,
"grad_norm": 6.28125,
"learning_rate": 7.845e-05,
"loss": 0.7353,
"step": 1570
},
{
"epoch": 0.19022393450517697,
"grad_norm": 4.5625,
"learning_rate": 7.895000000000001e-05,
"loss": 0.6926,
"step": 1580
},
{
"epoch": 0.1914278834577414,
"grad_norm": 5.875,
"learning_rate": 7.945e-05,
"loss": 0.7124,
"step": 1590
},
{
"epoch": 0.1926318324103058,
"grad_norm": 5.15625,
"learning_rate": 7.995e-05,
"loss": 0.758,
"step": 1600
},
{
"epoch": 0.19383578136287022,
"grad_norm": 4.625,
"learning_rate": 8.045e-05,
"loss": 0.6659,
"step": 1610
},
{
"epoch": 0.19503973031543462,
"grad_norm": 5.0625,
"learning_rate": 8.095e-05,
"loss": 0.6103,
"step": 1620
},
{
"epoch": 0.19624367926799904,
"grad_norm": 5.96875,
"learning_rate": 8.145e-05,
"loss": 0.7149,
"step": 1630
},
{
"epoch": 0.19744762822056344,
"grad_norm": 5.875,
"learning_rate": 8.195e-05,
"loss": 0.686,
"step": 1640
},
{
"epoch": 0.19865157717312787,
"grad_norm": 6.75,
"learning_rate": 8.245e-05,
"loss": 0.7359,
"step": 1650
},
{
"epoch": 0.19985552612569227,
"grad_norm": 4.90625,
"learning_rate": 8.295000000000001e-05,
"loss": 0.7223,
"step": 1660
},
{
"epoch": 0.2010594750782567,
"grad_norm": 5.84375,
"learning_rate": 8.345000000000001e-05,
"loss": 0.7182,
"step": 1670
},
{
"epoch": 0.2022634240308211,
"grad_norm": 4.875,
"learning_rate": 8.395000000000001e-05,
"loss": 0.7124,
"step": 1680
},
{
"epoch": 0.2034673729833855,
"grad_norm": 4.875,
"learning_rate": 8.445e-05,
"loss": 0.6543,
"step": 1690
},
{
"epoch": 0.20467132193594992,
"grad_norm": 4.3125,
"learning_rate": 8.495e-05,
"loss": 0.6369,
"step": 1700
},
{
"epoch": 0.20587527088851432,
"grad_norm": 6.6875,
"learning_rate": 8.545e-05,
"loss": 0.7942,
"step": 1710
},
{
"epoch": 0.20707921984107874,
"grad_norm": 6.9375,
"learning_rate": 8.595e-05,
"loss": 0.7056,
"step": 1720
},
{
"epoch": 0.20828316879364314,
"grad_norm": 7.90625,
"learning_rate": 8.645000000000001e-05,
"loss": 0.6661,
"step": 1730
},
{
"epoch": 0.20948711774620757,
"grad_norm": 5.40625,
"learning_rate": 8.695000000000001e-05,
"loss": 0.7553,
"step": 1740
},
{
"epoch": 0.21069106669877197,
"grad_norm": 5.40625,
"learning_rate": 8.745000000000001e-05,
"loss": 0.5773,
"step": 1750
},
{
"epoch": 0.2118950156513364,
"grad_norm": 6.875,
"learning_rate": 8.795e-05,
"loss": 0.6403,
"step": 1760
},
{
"epoch": 0.2130989646039008,
"grad_norm": 5.21875,
"learning_rate": 8.845e-05,
"loss": 0.6805,
"step": 1770
},
{
"epoch": 0.21430291355646522,
"grad_norm": 6.34375,
"learning_rate": 8.895e-05,
"loss": 0.6963,
"step": 1780
},
{
"epoch": 0.21550686250902962,
"grad_norm": 5.6875,
"learning_rate": 8.945e-05,
"loss": 0.6754,
"step": 1790
},
{
"epoch": 0.21671081146159402,
"grad_norm": 4.875,
"learning_rate": 8.995e-05,
"loss": 0.6748,
"step": 1800
},
{
"epoch": 0.21791476041415844,
"grad_norm": 5.1875,
"learning_rate": 9.045e-05,
"loss": 0.6637,
"step": 1810
},
{
"epoch": 0.21911870936672284,
"grad_norm": 5.46875,
"learning_rate": 9.095e-05,
"loss": 0.6794,
"step": 1820
},
{
"epoch": 0.22032265831928727,
"grad_norm": 5.90625,
"learning_rate": 9.145e-05,
"loss": 0.6713,
"step": 1830
},
{
"epoch": 0.22152660727185167,
"grad_norm": 5.90625,
"learning_rate": 9.195e-05,
"loss": 0.6591,
"step": 1840
},
{
"epoch": 0.2227305562244161,
"grad_norm": 5.84375,
"learning_rate": 9.245e-05,
"loss": 0.6735,
"step": 1850
},
{
"epoch": 0.2239345051769805,
"grad_norm": 5.21875,
"learning_rate": 9.295000000000001e-05,
"loss": 0.6405,
"step": 1860
},
{
"epoch": 0.22513845412954492,
"grad_norm": 6.75,
"learning_rate": 9.345000000000001e-05,
"loss": 0.855,
"step": 1870
},
{
"epoch": 0.22634240308210932,
"grad_norm": 4.65625,
"learning_rate": 9.395000000000001e-05,
"loss": 0.6795,
"step": 1880
},
{
"epoch": 0.22754635203467374,
"grad_norm": 5.53125,
"learning_rate": 9.445e-05,
"loss": 0.7027,
"step": 1890
},
{
"epoch": 0.22875030098723814,
"grad_norm": 5.71875,
"learning_rate": 9.495e-05,
"loss": 0.6272,
"step": 1900
},
{
"epoch": 0.22995424993980254,
"grad_norm": 5.28125,
"learning_rate": 9.545e-05,
"loss": 0.6486,
"step": 1910
},
{
"epoch": 0.23115819889236697,
"grad_norm": 7.75,
"learning_rate": 9.595e-05,
"loss": 0.6701,
"step": 1920
},
{
"epoch": 0.23236214784493137,
"grad_norm": 5.8125,
"learning_rate": 9.645000000000001e-05,
"loss": 0.6247,
"step": 1930
},
{
"epoch": 0.2335660967974958,
"grad_norm": 6.25,
"learning_rate": 9.695000000000001e-05,
"loss": 0.5737,
"step": 1940
},
{
"epoch": 0.2347700457500602,
"grad_norm": 5.6875,
"learning_rate": 9.745000000000001e-05,
"loss": 0.6458,
"step": 1950
},
{
"epoch": 0.23597399470262462,
"grad_norm": 5.75,
"learning_rate": 9.795000000000001e-05,
"loss": 0.6185,
"step": 1960
},
{
"epoch": 0.23717794365518902,
"grad_norm": 5.03125,
"learning_rate": 9.845000000000001e-05,
"loss": 0.6793,
"step": 1970
},
{
"epoch": 0.23838189260775344,
"grad_norm": 4.8125,
"learning_rate": 9.895e-05,
"loss": 0.6427,
"step": 1980
},
{
"epoch": 0.23958584156031784,
"grad_norm": 6.4375,
"learning_rate": 9.945e-05,
"loss": 0.5906,
"step": 1990
},
{
"epoch": 0.24078979051288224,
"grad_norm": 4.875,
"learning_rate": 9.995e-05,
"loss": 0.722,
"step": 2000
},
{
"epoch": 0.24078979051288224,
"eval/nano_beir.arguana": 0.4525611780495533,
"step": 2000
},
{
"epoch": 0.24078979051288224,
"eval/nano_beir.climate_fever": 0.2570101915834051,
"step": 2000
},
{
"epoch": 0.24078979051288224,
"eval/nano_beir.dbpedia_entity": 0.32066621016721925,
"step": 2000
},
{
"epoch": 0.24078979051288224,
"eval/nano_beir.fever": 0.6653091674586123,
"step": 2000
},
{
"epoch": 0.24078979051288224,
"eval/nano_beir.fiqa": 0.4363959457519276,
"step": 2000
},
{
"epoch": 0.24078979051288224,
"eval/nano_beir.hotpotqa": 0.6777722196406764,
"step": 2000
},
{
"epoch": 0.24078979051288224,
"eval/nano_beir.nfcorpus": 0.34079846618923787,
"step": 2000
},
{
"epoch": 0.24078979051288224,
"eval/nano_beir.nq": 0.4250588398489226,
"step": 2000
},
{
"epoch": 0.24078979051288224,
"eval/nano_beir.quora": 0.8267157890689902,
"step": 2000
},
{
"epoch": 0.24078979051288224,
"eval/nano_beir.scidocs": 0.2523924791841879,
"step": 2000
},
{
"epoch": 0.24078979051288224,
"eval/nano_beir.scifact": 0.7454362051377628,
"step": 2000
},
{
"epoch": 0.24078979051288224,
"eval/nano_beir.webis_touche2020": 0.2983187434961579,
"step": 2000
},
{
"epoch": 0.24078979051288224,
"eval/avg": 0.47486961963138774,
"step": 2000
},
{
"epoch": 0.24199373946544667,
"grad_norm": 4.90625,
"learning_rate": 9.999993831498517e-05,
"loss": 0.629,
"step": 2010
},
{
"epoch": 0.24319768841801107,
"grad_norm": 5.15625,
"learning_rate": 9.999972508303057e-05,
"loss": 0.6816,
"step": 2020
},
{
"epoch": 0.2444016373705755,
"grad_norm": 4.3125,
"learning_rate": 9.999935954324219e-05,
"loss": 0.6942,
"step": 2030
},
{
"epoch": 0.2456055863231399,
"grad_norm": 4.40625,
"learning_rate": 9.999884169673351e-05,
"loss": 0.6213,
"step": 2040
},
{
"epoch": 0.24680953527570432,
"grad_norm": 6.375,
"learning_rate": 9.9998171545082e-05,
"loss": 0.7049,
"step": 2050
},
{
"epoch": 0.24801348422826872,
"grad_norm": 6.40625,
"learning_rate": 9.999734909032906e-05,
"loss": 0.6177,
"step": 2060
},
{
"epoch": 0.24921743318083314,
"grad_norm": 5.71875,
"learning_rate": 9.999637433497999e-05,
"loss": 0.674,
"step": 2070
},
{
"epoch": 0.25042138213339754,
"grad_norm": 5.25,
"learning_rate": 9.999524728200411e-05,
"loss": 0.5632,
"step": 2080
},
{
"epoch": 0.25162533108596197,
"grad_norm": 5.96875,
"learning_rate": 9.999396793483462e-05,
"loss": 0.6868,
"step": 2090
},
{
"epoch": 0.2528292800385264,
"grad_norm": 7.125,
"learning_rate": 9.99925362973686e-05,
"loss": 0.6709,
"step": 2100
},
{
"epoch": 0.25403322899109076,
"grad_norm": 4.5,
"learning_rate": 9.999095237396707e-05,
"loss": 0.6105,
"step": 2110
},
{
"epoch": 0.2552371779436552,
"grad_norm": 6.09375,
"learning_rate": 9.998921616945498e-05,
"loss": 0.7128,
"step": 2120
},
{
"epoch": 0.2564411268962196,
"grad_norm": 5.5,
"learning_rate": 9.998732768912104e-05,
"loss": 0.6496,
"step": 2130
},
{
"epoch": 0.257645075848784,
"grad_norm": 4.90625,
"learning_rate": 9.998528693871796e-05,
"loss": 0.7465,
"step": 2140
},
{
"epoch": 0.2588490248013484,
"grad_norm": 6.09375,
"learning_rate": 9.998309392446217e-05,
"loss": 0.6041,
"step": 2150
},
{
"epoch": 0.26005297375391284,
"grad_norm": 5.15625,
"learning_rate": 9.998074865303399e-05,
"loss": 0.6091,
"step": 2160
},
{
"epoch": 0.26125692270647727,
"grad_norm": 4.4375,
"learning_rate": 9.997825113157753e-05,
"loss": 0.7457,
"step": 2170
},
{
"epoch": 0.26246087165904164,
"grad_norm": 4.8125,
"learning_rate": 9.997560136770065e-05,
"loss": 0.6663,
"step": 2180
},
{
"epoch": 0.26366482061160607,
"grad_norm": 4.21875,
"learning_rate": 9.997279936947502e-05,
"loss": 0.7311,
"step": 2190
},
{
"epoch": 0.2648687695641705,
"grad_norm": 5.5625,
"learning_rate": 9.996984514543598e-05,
"loss": 0.5879,
"step": 2200
},
{
"epoch": 0.26607271851673486,
"grad_norm": 4.96875,
"learning_rate": 9.996673870458264e-05,
"loss": 0.7008,
"step": 2210
},
{
"epoch": 0.2672766674692993,
"grad_norm": 5.09375,
"learning_rate": 9.996348005637775e-05,
"loss": 0.5099,
"step": 2220
},
{
"epoch": 0.2684806164218637,
"grad_norm": 6.6875,
"learning_rate": 9.99600692107477e-05,
"loss": 0.6186,
"step": 2230
},
{
"epoch": 0.26968456537442814,
"grad_norm": 5.15625,
"learning_rate": 9.995650617808252e-05,
"loss": 0.6839,
"step": 2240
},
{
"epoch": 0.2708885143269925,
"grad_norm": 3.90625,
"learning_rate": 9.995279096923585e-05,
"loss": 0.6287,
"step": 2250
},
{
"epoch": 0.27209246327955694,
"grad_norm": 3.921875,
"learning_rate": 9.994892359552483e-05,
"loss": 0.5221,
"step": 2260
},
{
"epoch": 0.27329641223212137,
"grad_norm": 6.4375,
"learning_rate": 9.994490406873019e-05,
"loss": 0.6728,
"step": 2270
},
{
"epoch": 0.2745003611846858,
"grad_norm": 4.375,
"learning_rate": 9.994073240109606e-05,
"loss": 0.5306,
"step": 2280
},
{
"epoch": 0.27570431013725016,
"grad_norm": 5.65625,
"learning_rate": 9.993640860533009e-05,
"loss": 0.6765,
"step": 2290
},
{
"epoch": 0.2769082590898146,
"grad_norm": 5.09375,
"learning_rate": 9.99319326946033e-05,
"loss": 0.5858,
"step": 2300
},
{
"epoch": 0.278112208042379,
"grad_norm": 6.0625,
"learning_rate": 9.992730468255011e-05,
"loss": 0.6345,
"step": 2310
},
{
"epoch": 0.2793161569949434,
"grad_norm": 4.9375,
"learning_rate": 9.992252458326823e-05,
"loss": 0.5993,
"step": 2320
},
{
"epoch": 0.2805201059475078,
"grad_norm": 4.65625,
"learning_rate": 9.991759241131869e-05,
"loss": 0.7848,
"step": 2330
},
{
"epoch": 0.28172405490007224,
"grad_norm": 4.5625,
"learning_rate": 9.99125081817257e-05,
"loss": 0.565,
"step": 2340
},
{
"epoch": 0.28292800385263667,
"grad_norm": 3.796875,
"learning_rate": 9.990727190997674e-05,
"loss": 0.5264,
"step": 2350
},
{
"epoch": 0.28413195280520104,
"grad_norm": 4.21875,
"learning_rate": 9.99018836120224e-05,
"loss": 0.6909,
"step": 2360
},
{
"epoch": 0.28533590175776546,
"grad_norm": 4.0,
"learning_rate": 9.989634330427636e-05,
"loss": 0.6056,
"step": 2370
},
{
"epoch": 0.2865398507103299,
"grad_norm": 5.9375,
"learning_rate": 9.989065100361536e-05,
"loss": 0.6339,
"step": 2380
},
{
"epoch": 0.2877437996628943,
"grad_norm": 5.84375,
"learning_rate": 9.988480672737915e-05,
"loss": 0.6631,
"step": 2390
},
{
"epoch": 0.2889477486154587,
"grad_norm": 4.03125,
"learning_rate": 9.987881049337037e-05,
"loss": 0.6711,
"step": 2400
},
{
"epoch": 0.2901516975680231,
"grad_norm": 4.65625,
"learning_rate": 9.987266231985462e-05,
"loss": 0.6853,
"step": 2410
},
{
"epoch": 0.29135564652058754,
"grad_norm": 5.5625,
"learning_rate": 9.986636222556032e-05,
"loss": 0.593,
"step": 2420
},
{
"epoch": 0.2925595954731519,
"grad_norm": 5.40625,
"learning_rate": 9.98599102296786e-05,
"loss": 0.5731,
"step": 2430
},
{
"epoch": 0.29376354442571634,
"grad_norm": 4.21875,
"learning_rate": 9.985330635186341e-05,
"loss": 0.5369,
"step": 2440
},
{
"epoch": 0.29496749337828077,
"grad_norm": 5.1875,
"learning_rate": 9.984655061223126e-05,
"loss": 0.6457,
"step": 2450
},
{
"epoch": 0.2961714423308452,
"grad_norm": 4.6875,
"learning_rate": 9.983964303136133e-05,
"loss": 0.482,
"step": 2460
},
{
"epoch": 0.29737539128340956,
"grad_norm": 6.5,
"learning_rate": 9.983258363029531e-05,
"loss": 0.6507,
"step": 2470
},
{
"epoch": 0.298579340235974,
"grad_norm": 3.875,
"learning_rate": 9.982537243053737e-05,
"loss": 0.5553,
"step": 2480
},
{
"epoch": 0.2997832891885384,
"grad_norm": 5.21875,
"learning_rate": 9.981800945405403e-05,
"loss": 0.5712,
"step": 2490
},
{
"epoch": 0.30098723814110284,
"grad_norm": 4.875,
"learning_rate": 9.981049472327426e-05,
"loss": 0.6483,
"step": 2500
},
{
"epoch": 0.3021911870936672,
"grad_norm": 4.15625,
"learning_rate": 9.980282826108918e-05,
"loss": 0.5455,
"step": 2510
},
{
"epoch": 0.30339513604623164,
"grad_norm": 5.09375,
"learning_rate": 9.979501009085219e-05,
"loss": 0.7318,
"step": 2520
},
{
"epoch": 0.30459908499879607,
"grad_norm": 5.78125,
"learning_rate": 9.978704023637878e-05,
"loss": 0.6642,
"step": 2530
},
{
"epoch": 0.30580303395136044,
"grad_norm": 6.71875,
"learning_rate": 9.97789187219465e-05,
"loss": 0.6208,
"step": 2540
},
{
"epoch": 0.30700698290392486,
"grad_norm": 4.90625,
"learning_rate": 9.977064557229492e-05,
"loss": 0.6076,
"step": 2550
},
{
"epoch": 0.3082109318564893,
"grad_norm": 6.09375,
"learning_rate": 9.976222081262545e-05,
"loss": 0.5148,
"step": 2560
},
{
"epoch": 0.3094148808090537,
"grad_norm": 4.78125,
"learning_rate": 9.975364446860142e-05,
"loss": 0.642,
"step": 2570
},
{
"epoch": 0.3106188297616181,
"grad_norm": 5.0625,
"learning_rate": 9.974491656634782e-05,
"loss": 0.6305,
"step": 2580
},
{
"epoch": 0.3118227787141825,
"grad_norm": 4.5625,
"learning_rate": 9.973603713245135e-05,
"loss": 0.6977,
"step": 2590
},
{
"epoch": 0.31302672766674694,
"grad_norm": 5.0,
"learning_rate": 9.972700619396033e-05,
"loss": 0.6351,
"step": 2600
},
{
"epoch": 0.31423067661931137,
"grad_norm": 4.8125,
"learning_rate": 9.971782377838457e-05,
"loss": 0.6461,
"step": 2610
},
{
"epoch": 0.31543462557187574,
"grad_norm": 4.53125,
"learning_rate": 9.970848991369529e-05,
"loss": 0.6827,
"step": 2620
},
{
"epoch": 0.31663857452444016,
"grad_norm": 3.890625,
"learning_rate": 9.969900462832505e-05,
"loss": 0.6179,
"step": 2630
},
{
"epoch": 0.3178425234770046,
"grad_norm": 4.90625,
"learning_rate": 9.968936795116768e-05,
"loss": 0.5521,
"step": 2640
},
{
"epoch": 0.31904647242956896,
"grad_norm": 4.59375,
"learning_rate": 9.967957991157817e-05,
"loss": 0.6362,
"step": 2650
},
{
"epoch": 0.3202504213821334,
"grad_norm": 5.46875,
"learning_rate": 9.96696405393726e-05,
"loss": 0.7081,
"step": 2660
},
{
"epoch": 0.3214543703346978,
"grad_norm": 4.125,
"learning_rate": 9.965954986482799e-05,
"loss": 0.6079,
"step": 2670
},
{
"epoch": 0.32265831928726224,
"grad_norm": 4.625,
"learning_rate": 9.964930791868233e-05,
"loss": 0.6601,
"step": 2680
},
{
"epoch": 0.3238622682398266,
"grad_norm": 4.4375,
"learning_rate": 9.963891473213431e-05,
"loss": 0.4889,
"step": 2690
},
{
"epoch": 0.32506621719239104,
"grad_norm": 5.84375,
"learning_rate": 9.962837033684343e-05,
"loss": 0.5751,
"step": 2700
},
{
"epoch": 0.32627016614495546,
"grad_norm": 5.875,
"learning_rate": 9.96176747649297e-05,
"loss": 0.5564,
"step": 2710
},
{
"epoch": 0.3274741150975199,
"grad_norm": 3.8125,
"learning_rate": 9.960682804897372e-05,
"loss": 0.6049,
"step": 2720
},
{
"epoch": 0.32867806405008426,
"grad_norm": 5.0625,
"learning_rate": 9.959583022201647e-05,
"loss": 0.6224,
"step": 2730
},
{
"epoch": 0.3298820130026487,
"grad_norm": 4.9375,
"learning_rate": 9.958468131755923e-05,
"loss": 0.6209,
"step": 2740
},
{
"epoch": 0.3310859619552131,
"grad_norm": 5.09375,
"learning_rate": 9.957338136956347e-05,
"loss": 0.6223,
"step": 2750
},
{
"epoch": 0.3322899109077775,
"grad_norm": 4.625,
"learning_rate": 9.956193041245084e-05,
"loss": 0.5007,
"step": 2760
},
{
"epoch": 0.3334938598603419,
"grad_norm": 5.125,
"learning_rate": 9.955032848110288e-05,
"loss": 0.7128,
"step": 2770
},
{
"epoch": 0.33469780881290634,
"grad_norm": 3.765625,
"learning_rate": 9.953857561086115e-05,
"loss": 0.6118,
"step": 2780
},
{
"epoch": 0.33590175776547077,
"grad_norm": 4.5,
"learning_rate": 9.952667183752689e-05,
"loss": 0.6389,
"step": 2790
},
{
"epoch": 0.33710570671803514,
"grad_norm": 5.40625,
"learning_rate": 9.951461719736109e-05,
"loss": 0.6104,
"step": 2800
},
{
"epoch": 0.33830965567059956,
"grad_norm": 5.65625,
"learning_rate": 9.950241172708423e-05,
"loss": 0.5951,
"step": 2810
},
{
"epoch": 0.339513604623164,
"grad_norm": 4.375,
"learning_rate": 9.949005546387631e-05,
"loss": 0.719,
"step": 2820
},
{
"epoch": 0.3407175535757284,
"grad_norm": 6.0,
"learning_rate": 9.947754844537666e-05,
"loss": 0.5546,
"step": 2830
},
{
"epoch": 0.3419215025282928,
"grad_norm": 5.0,
"learning_rate": 9.946489070968383e-05,
"loss": 0.6485,
"step": 2840
},
{
"epoch": 0.3431254514808572,
"grad_norm": 5.4375,
"learning_rate": 9.945208229535548e-05,
"loss": 0.4833,
"step": 2850
},
{
"epoch": 0.34432940043342164,
"grad_norm": 4.6875,
"learning_rate": 9.943912324140823e-05,
"loss": 0.6452,
"step": 2860
},
{
"epoch": 0.345533349385986,
"grad_norm": 7.78125,
"learning_rate": 9.942601358731762e-05,
"loss": 0.6203,
"step": 2870
},
{
"epoch": 0.34673729833855044,
"grad_norm": 5.78125,
"learning_rate": 9.941275337301796e-05,
"loss": 0.5906,
"step": 2880
},
{
"epoch": 0.34794124729111486,
"grad_norm": 3.578125,
"learning_rate": 9.939934263890213e-05,
"loss": 0.6033,
"step": 2890
},
{
"epoch": 0.3491451962436793,
"grad_norm": 4.21875,
"learning_rate": 9.938578142582155e-05,
"loss": 0.5577,
"step": 2900
},
{
"epoch": 0.35034914519624366,
"grad_norm": 5.1875,
"learning_rate": 9.937206977508604e-05,
"loss": 0.584,
"step": 2910
},
{
"epoch": 0.3515530941488081,
"grad_norm": 5.65625,
"learning_rate": 9.935820772846367e-05,
"loss": 0.6395,
"step": 2920
},
{
"epoch": 0.3527570431013725,
"grad_norm": 4.9375,
"learning_rate": 9.934419532818063e-05,
"loss": 0.6887,
"step": 2930
},
{
"epoch": 0.35396099205393694,
"grad_norm": 3.515625,
"learning_rate": 9.933003261692113e-05,
"loss": 0.6007,
"step": 2940
},
{
"epoch": 0.3551649410065013,
"grad_norm": 3.765625,
"learning_rate": 9.931571963782721e-05,
"loss": 0.5343,
"step": 2950
},
{
"epoch": 0.35636888995906574,
"grad_norm": 6.625,
"learning_rate": 9.930125643449875e-05,
"loss": 0.6603,
"step": 2960
},
{
"epoch": 0.35757283891163016,
"grad_norm": 5.78125,
"learning_rate": 9.928664305099314e-05,
"loss": 0.6181,
"step": 2970
},
{
"epoch": 0.35877678786419454,
"grad_norm": 5.46875,
"learning_rate": 9.927187953182525e-05,
"loss": 0.6866,
"step": 2980
},
{
"epoch": 0.35998073681675896,
"grad_norm": 5.3125,
"learning_rate": 9.925696592196736e-05,
"loss": 0.6638,
"step": 2990
},
{
"epoch": 0.3611846857693234,
"grad_norm": 4.65625,
"learning_rate": 9.92419022668489e-05,
"loss": 0.6748,
"step": 3000
},
{
"epoch": 0.3611846857693234,
"eval/nano_beir.arguana": 0.43364873258121056,
"step": 3000
},
{
"epoch": 0.3611846857693234,
"eval/nano_beir.climate_fever": 0.2460637609555945,
"step": 3000
},
{
"epoch": 0.3611846857693234,
"eval/nano_beir.dbpedia_entity": 0.3159084412902975,
"step": 3000
},
{
"epoch": 0.3611846857693234,
"eval/nano_beir.fever": 0.6561861919849862,
"step": 3000
},
{
"epoch": 0.3611846857693234,
"eval/nano_beir.fiqa": 0.4461917273661319,
"step": 3000
},
{
"epoch": 0.3611846857693234,
"eval/nano_beir.hotpotqa": 0.6787641908210825,
"step": 3000
},
{
"epoch": 0.3611846857693234,
"eval/nano_beir.nfcorpus": 0.3435535179091152,
"step": 3000
},
{
"epoch": 0.3611846857693234,
"eval/nano_beir.nq": 0.4301288631476355,
"step": 3000
},
{
"epoch": 0.3611846857693234,
"eval/nano_beir.quora": 0.840867288047918,
"step": 3000
},
{
"epoch": 0.3611846857693234,
"eval/nano_beir.scidocs": 0.2483700409338938,
"step": 3000
},
{
"epoch": 0.3611846857693234,
"eval/nano_beir.scifact": 0.7407091437249231,
"step": 3000
},
{
"epoch": 0.3611846857693234,
"eval/nano_beir.webis_touche2020": 0.2932674650458427,
"step": 3000
},
{
"epoch": 0.3611846857693234,
"eval/avg": 0.4728049469840527,
"step": 3000
},
{
"epoch": 0.3623886347218878,
"grad_norm": 4.21875,
"learning_rate": 9.92266886123564e-05,
"loss": 0.6107,
"step": 3010
},
{
"epoch": 0.3635925836744522,
"grad_norm": 5.875,
"learning_rate": 9.921132500483325e-05,
"loss": 0.6822,
"step": 3020
},
{
"epoch": 0.3647965326270166,
"grad_norm": 5.1875,
"learning_rate": 9.919581149107968e-05,
"loss": 0.6194,
"step": 3030
},
{
"epoch": 0.36600048157958104,
"grad_norm": 3.5,
"learning_rate": 9.918014811835254e-05,
"loss": 0.5776,
"step": 3040
},
{
"epoch": 0.3672044305321454,
"grad_norm": 6.84375,
"learning_rate": 9.91643349343652e-05,
"loss": 0.6947,
"step": 3050
},
{
"epoch": 0.36840837948470984,
"grad_norm": 5.25,
"learning_rate": 9.914837198728733e-05,
"loss": 0.5273,
"step": 3060
},
{
"epoch": 0.36961232843727426,
"grad_norm": 6.25,
"learning_rate": 9.913225932574486e-05,
"loss": 0.655,
"step": 3070
},
{
"epoch": 0.3708162773898387,
"grad_norm": 6.46875,
"learning_rate": 9.911599699881974e-05,
"loss": 0.659,
"step": 3080
},
{
"epoch": 0.37202022634240306,
"grad_norm": 5.09375,
"learning_rate": 9.909958505604984e-05,
"loss": 0.6164,
"step": 3090
},
{
"epoch": 0.3732241752949675,
"grad_norm": 5.90625,
"learning_rate": 9.908302354742878e-05,
"loss": 0.6318,
"step": 3100
},
{
"epoch": 0.3744281242475319,
"grad_norm": 3.859375,
"learning_rate": 9.906631252340578e-05,
"loss": 0.541,
"step": 3110
},
{
"epoch": 0.37563207320009634,
"grad_norm": 5.40625,
"learning_rate": 9.904945203488554e-05,
"loss": 0.5854,
"step": 3120
},
{
"epoch": 0.3768360221526607,
"grad_norm": 5.6875,
"learning_rate": 9.903244213322802e-05,
"loss": 0.6657,
"step": 3130
},
{
"epoch": 0.37803997110522514,
"grad_norm": 5.78125,
"learning_rate": 9.901528287024833e-05,
"loss": 0.5835,
"step": 3140
},
{
"epoch": 0.37924392005778956,
"grad_norm": 4.875,
"learning_rate": 9.899797429821656e-05,
"loss": 0.6369,
"step": 3150
},
{
"epoch": 0.38044786901035393,
"grad_norm": 5.84375,
"learning_rate": 9.898051646985762e-05,
"loss": 0.5991,
"step": 3160
},
{
"epoch": 0.38165181796291836,
"grad_norm": 5.3125,
"learning_rate": 9.896290943835109e-05,
"loss": 0.5792,
"step": 3170
},
{
"epoch": 0.3828557669154828,
"grad_norm": 4.34375,
"learning_rate": 9.894515325733103e-05,
"loss": 0.635,
"step": 3180
},
{
"epoch": 0.3840597158680472,
"grad_norm": 3.859375,
"learning_rate": 9.892724798088586e-05,
"loss": 0.7034,
"step": 3190
},
{
"epoch": 0.3852636648206116,
"grad_norm": 5.03125,
"learning_rate": 9.890919366355816e-05,
"loss": 0.658,
"step": 3200
},
{
"epoch": 0.386467613773176,
"grad_norm": 5.1875,
"learning_rate": 9.889099036034451e-05,
"loss": 0.5952,
"step": 3210
},
{
"epoch": 0.38767156272574044,
"grad_norm": 5.46875,
"learning_rate": 9.88726381266953e-05,
"loss": 0.6666,
"step": 3220
},
{
"epoch": 0.38887551167830486,
"grad_norm": 5.5,
"learning_rate": 9.885413701851464e-05,
"loss": 0.7236,
"step": 3230
},
{
"epoch": 0.39007946063086923,
"grad_norm": 5.4375,
"learning_rate": 9.883548709216013e-05,
"loss": 0.6152,
"step": 3240
},
{
"epoch": 0.39128340958343366,
"grad_norm": 5.71875,
"learning_rate": 9.881668840444265e-05,
"loss": 0.6167,
"step": 3250
},
{
"epoch": 0.3924873585359981,
"grad_norm": 4.46875,
"learning_rate": 9.879774101262627e-05,
"loss": 0.5904,
"step": 3260
},
{
"epoch": 0.39369130748856246,
"grad_norm": 4.0,
"learning_rate": 9.877864497442804e-05,
"loss": 0.5288,
"step": 3270
},
{
"epoch": 0.3948952564411269,
"grad_norm": 5.25,
"learning_rate": 9.87594003480178e-05,
"loss": 0.6141,
"step": 3280
},
{
"epoch": 0.3960992053936913,
"grad_norm": 6.0625,
"learning_rate": 9.874000719201804e-05,
"loss": 0.7486,
"step": 3290
},
{
"epoch": 0.39730315434625574,
"grad_norm": 5.1875,
"learning_rate": 9.872046556550363e-05,
"loss": 0.6557,
"step": 3300
},
{
"epoch": 0.3985071032988201,
"grad_norm": 5.25,
"learning_rate": 9.870077552800179e-05,
"loss": 0.6125,
"step": 3310
},
{
"epoch": 0.39971105225138454,
"grad_norm": 5.0625,
"learning_rate": 9.868093713949178e-05,
"loss": 0.5268,
"step": 3320
},
{
"epoch": 0.40091500120394896,
"grad_norm": 4.96875,
"learning_rate": 9.866095046040478e-05,
"loss": 0.6483,
"step": 3330
},
{
"epoch": 0.4021189501565134,
"grad_norm": 4.90625,
"learning_rate": 9.864081555162366e-05,
"loss": 0.7377,
"step": 3340
},
{
"epoch": 0.40332289910907776,
"grad_norm": 5.34375,
"learning_rate": 9.862053247448286e-05,
"loss": 0.5641,
"step": 3350
},
{
"epoch": 0.4045268480616422,
"grad_norm": 4.5625,
"learning_rate": 9.860010129076813e-05,
"loss": 0.6322,
"step": 3360
},
{
"epoch": 0.4057307970142066,
"grad_norm": 5.25,
"learning_rate": 9.857952206271643e-05,
"loss": 0.614,
"step": 3370
},
{
"epoch": 0.406934745966771,
"grad_norm": 5.3125,
"learning_rate": 9.855879485301564e-05,
"loss": 0.5311,
"step": 3380
},
{
"epoch": 0.4081386949193354,
"grad_norm": 4.46875,
"learning_rate": 9.853791972480445e-05,
"loss": 0.6266,
"step": 3390
},
{
"epoch": 0.40934264387189984,
"grad_norm": 5.40625,
"learning_rate": 9.85168967416721e-05,
"loss": 0.5445,
"step": 3400
},
{
"epoch": 0.41054659282446426,
"grad_norm": 5.1875,
"learning_rate": 9.849572596765826e-05,
"loss": 0.6219,
"step": 3410
},
{
"epoch": 0.41175054177702863,
"grad_norm": 6.0,
"learning_rate": 9.847440746725275e-05,
"loss": 0.6091,
"step": 3420
},
{
"epoch": 0.41295449072959306,
"grad_norm": 6.03125,
"learning_rate": 9.845294130539546e-05,
"loss": 0.5503,
"step": 3430
},
{
"epoch": 0.4141584396821575,
"grad_norm": 3.953125,
"learning_rate": 9.8431327547476e-05,
"loss": 0.5049,
"step": 3440
},
{
"epoch": 0.4153623886347219,
"grad_norm": 4.40625,
"learning_rate": 9.840956625933367e-05,
"loss": 0.6165,
"step": 3450
},
{
"epoch": 0.4165663375872863,
"grad_norm": 5.78125,
"learning_rate": 9.838765750725709e-05,
"loss": 0.6076,
"step": 3460
},
{
"epoch": 0.4177702865398507,
"grad_norm": 5.9375,
"learning_rate": 9.836560135798415e-05,
"loss": 0.6464,
"step": 3470
},
{
"epoch": 0.41897423549241514,
"grad_norm": 4.53125,
"learning_rate": 9.834339787870166e-05,
"loss": 0.5661,
"step": 3480
},
{
"epoch": 0.4201781844449795,
"grad_norm": 4.25,
"learning_rate": 9.832104713704531e-05,
"loss": 0.5922,
"step": 3490
},
{
"epoch": 0.42138213339754393,
"grad_norm": 4.40625,
"learning_rate": 9.829854920109934e-05,
"loss": 0.5543,
"step": 3500
},
{
"epoch": 0.42258608235010836,
"grad_norm": 5.78125,
"learning_rate": 9.827590413939632e-05,
"loss": 0.5931,
"step": 3510
},
{
"epoch": 0.4237900313026728,
"grad_norm": 5.375,
"learning_rate": 9.825311202091707e-05,
"loss": 0.7203,
"step": 3520
},
{
"epoch": 0.42499398025523716,
"grad_norm": 5.96875,
"learning_rate": 9.823017291509034e-05,
"loss": 0.6172,
"step": 3530
},
{
"epoch": 0.4261979292078016,
"grad_norm": 5.71875,
"learning_rate": 9.820708689179259e-05,
"loss": 0.5939,
"step": 3540
},
{
"epoch": 0.427401878160366,
"grad_norm": 5.71875,
"learning_rate": 9.818385402134788e-05,
"loss": 0.5387,
"step": 3550
},
{
"epoch": 0.42860582711293044,
"grad_norm": 4.75,
"learning_rate": 9.816047437452757e-05,
"loss": 0.6173,
"step": 3560
},
{
"epoch": 0.4298097760654948,
"grad_norm": 6.1875,
"learning_rate": 9.81369480225501e-05,
"loss": 0.5851,
"step": 3570
},
{
"epoch": 0.43101372501805923,
"grad_norm": 5.71875,
"learning_rate": 9.811327503708081e-05,
"loss": 0.5655,
"step": 3580
},
{
"epoch": 0.43221767397062366,
"grad_norm": 5.09375,
"learning_rate": 9.808945549023174e-05,
"loss": 0.5661,
"step": 3590
},
{
"epoch": 0.43342162292318803,
"grad_norm": 4.84375,
"learning_rate": 9.806548945456134e-05,
"loss": 0.7306,
"step": 3600
},
{
"epoch": 0.43462557187575246,
"grad_norm": 3.953125,
"learning_rate": 9.804137700307434e-05,
"loss": 0.5806,
"step": 3610
},
{
"epoch": 0.4358295208283169,
"grad_norm": 3.765625,
"learning_rate": 9.801711820922142e-05,
"loss": 0.6206,
"step": 3620
},
{
"epoch": 0.4370334697808813,
"grad_norm": 5.75,
"learning_rate": 9.799271314689908e-05,
"loss": 0.6145,
"step": 3630
},
{
"epoch": 0.4382374187334457,
"grad_norm": 4.21875,
"learning_rate": 9.796816189044939e-05,
"loss": 0.5976,
"step": 3640
},
{
"epoch": 0.4394413676860101,
"grad_norm": 4.46875,
"learning_rate": 9.794346451465972e-05,
"loss": 0.7035,
"step": 3650
},
{
"epoch": 0.44064531663857454,
"grad_norm": 5.96875,
"learning_rate": 9.791862109476257e-05,
"loss": 0.5378,
"step": 3660
},
{
"epoch": 0.44184926559113896,
"grad_norm": 4.90625,
"learning_rate": 9.789363170643528e-05,
"loss": 0.5624,
"step": 3670
},
{
"epoch": 0.44305321454370333,
"grad_norm": 4.75,
"learning_rate": 9.786849642579989e-05,
"loss": 0.613,
"step": 3680
},
{
"epoch": 0.44425716349626776,
"grad_norm": 5.5625,
"learning_rate": 9.784321532942282e-05,
"loss": 0.5219,
"step": 3690
},
{
"epoch": 0.4454611124488322,
"grad_norm": 4.34375,
"learning_rate": 9.781778849431464e-05,
"loss": 0.6854,
"step": 3700
},
{
"epoch": 0.44666506140139656,
"grad_norm": 6.40625,
"learning_rate": 9.779221599792995e-05,
"loss": 0.5974,
"step": 3710
},
{
"epoch": 0.447869010353961,
"grad_norm": 5.96875,
"learning_rate": 9.776649791816698e-05,
"loss": 0.6709,
"step": 3720
},
{
"epoch": 0.4490729593065254,
"grad_norm": 5.25,
"learning_rate": 9.774063433336746e-05,
"loss": 0.6575,
"step": 3730
},
{
"epoch": 0.45027690825908984,
"grad_norm": 4.3125,
"learning_rate": 9.771462532231635e-05,
"loss": 0.5585,
"step": 3740
},
{
"epoch": 0.4514808572116542,
"grad_norm": 4.34375,
"learning_rate": 9.768847096424164e-05,
"loss": 0.5921,
"step": 3750
},
{
"epoch": 0.45268480616421863,
"grad_norm": 4.96875,
"learning_rate": 9.7662171338814e-05,
"loss": 0.5416,
"step": 3760
},
{
"epoch": 0.45388875511678306,
"grad_norm": 6.46875,
"learning_rate": 9.763572652614668e-05,
"loss": 0.5491,
"step": 3770
},
{
"epoch": 0.4550927040693475,
"grad_norm": 4.15625,
"learning_rate": 9.760913660679515e-05,
"loss": 0.5819,
"step": 3780
},
{
"epoch": 0.45629665302191186,
"grad_norm": 5.75,
"learning_rate": 9.758240166175692e-05,
"loss": 0.6891,
"step": 3790
},
{
"epoch": 0.4575006019744763,
"grad_norm": 5.28125,
"learning_rate": 9.755552177247127e-05,
"loss": 0.631,
"step": 3800
},
{
"epoch": 0.4587045509270407,
"grad_norm": 5.25,
"learning_rate": 9.752849702081901e-05,
"loss": 0.5935,
"step": 3810
},
{
"epoch": 0.4599084998796051,
"grad_norm": 4.3125,
"learning_rate": 9.750132748912219e-05,
"loss": 0.5239,
"step": 3820
},
{
"epoch": 0.4611124488321695,
"grad_norm": 5.90625,
"learning_rate": 9.747401326014395e-05,
"loss": 0.6117,
"step": 3830
},
{
"epoch": 0.46231639778473393,
"grad_norm": 5.84375,
"learning_rate": 9.744655441708818e-05,
"loss": 0.5105,
"step": 3840
},
{
"epoch": 0.46352034673729836,
"grad_norm": 5.96875,
"learning_rate": 9.741895104359924e-05,
"loss": 0.5531,
"step": 3850
},
{
"epoch": 0.46472429568986273,
"grad_norm": 4.9375,
"learning_rate": 9.739120322376181e-05,
"loss": 0.5423,
"step": 3860
},
{
"epoch": 0.46592824464242716,
"grad_norm": 4.65625,
"learning_rate": 9.736331104210056e-05,
"loss": 0.6987,
"step": 3870
},
{
"epoch": 0.4671321935949916,
"grad_norm": 5.0625,
"learning_rate": 9.733527458357992e-05,
"loss": 0.6308,
"step": 3880
},
{
"epoch": 0.468336142547556,
"grad_norm": 4.1875,
"learning_rate": 9.730709393360378e-05,
"loss": 0.6365,
"step": 3890
},
{
"epoch": 0.4695400915001204,
"grad_norm": 5.125,
"learning_rate": 9.72787691780153e-05,
"loss": 0.6731,
"step": 3900
},
{
"epoch": 0.4707440404526848,
"grad_norm": 4.4375,
"learning_rate": 9.725030040309662e-05,
"loss": 0.589,
"step": 3910
},
{
"epoch": 0.47194798940524924,
"grad_norm": 4.0625,
"learning_rate": 9.722168769556854e-05,
"loss": 0.5825,
"step": 3920
},
{
"epoch": 0.4731519383578136,
"grad_norm": 6.0,
"learning_rate": 9.719293114259033e-05,
"loss": 0.5556,
"step": 3930
},
{
"epoch": 0.47435588731037803,
"grad_norm": 6.4375,
"learning_rate": 9.716403083175946e-05,
"loss": 0.6386,
"step": 3940
},
{
"epoch": 0.47555983626294246,
"grad_norm": 6.34375,
"learning_rate": 9.713498685111128e-05,
"loss": 0.6426,
"step": 3950
},
{
"epoch": 0.4767637852155069,
"grad_norm": 4.3125,
"learning_rate": 9.710579928911876e-05,
"loss": 0.5251,
"step": 3960
},
{
"epoch": 0.47796773416807126,
"grad_norm": 5.46875,
"learning_rate": 9.707646823469232e-05,
"loss": 0.5381,
"step": 3970
},
{
"epoch": 0.4791716831206357,
"grad_norm": 5.5625,
"learning_rate": 9.704699377717941e-05,
"loss": 0.6042,
"step": 3980
},
{
"epoch": 0.4803756320732001,
"grad_norm": 5.6875,
"learning_rate": 9.701737600636436e-05,
"loss": 0.5149,
"step": 3990
},
{
"epoch": 0.4815795810257645,
"grad_norm": 4.5625,
"learning_rate": 9.698761501246801e-05,
"loss": 0.4981,
"step": 4000
},
{
"epoch": 0.4815795810257645,
"eval/nano_beir.arguana": 0.4533164648115519,
"step": 4000
},
{
"epoch": 0.4815795810257645,
"eval/nano_beir.climate_fever": 0.2415330766300153,
"step": 4000
},
{
"epoch": 0.4815795810257645,
"eval/nano_beir.dbpedia_entity": 0.3267307232969415,
"step": 4000
},
{
"epoch": 0.4815795810257645,
"eval/nano_beir.fever": 0.6627993781769471,
"step": 4000
},
{
"epoch": 0.4815795810257645,
"eval/nano_beir.fiqa": 0.44257400247946427,
"step": 4000
},
{
"epoch": 0.4815795810257645,
"eval/nano_beir.hotpotqa": 0.6918069318835383,
"step": 4000
},
{
"epoch": 0.4815795810257645,
"eval/nano_beir.nfcorpus": 0.34324706059190796,
"step": 4000
},
{
"epoch": 0.4815795810257645,
"eval/nano_beir.nq": 0.43319148909889377,
"step": 4000
},
{
"epoch": 0.4815795810257645,
"eval/nano_beir.quora": 0.8358698300981718,
"step": 4000
},
{
"epoch": 0.4815795810257645,
"eval/nano_beir.scidocs": 0.25506900118184184,
"step": 4000
},
{
"epoch": 0.4815795810257645,
"eval/nano_beir.scifact": 0.7542598352338724,
"step": 4000
},
{
"epoch": 0.4815795810257645,
"eval/nano_beir.webis_touche2020": 0.2901138539973763,
"step": 4000
},
{
"epoch": 0.4815795810257645,
"eval/avg": 0.47754263729004354,
"step": 4000
},
{
"epoch": 0.4827835299783289,
"grad_norm": 5.0,
"learning_rate": 9.695771088614753e-05,
"loss": 0.6134,
"step": 4010
},
{
"epoch": 0.48398747893089333,
"grad_norm": 4.40625,
"learning_rate": 9.692766371849606e-05,
"loss": 0.5217,
"step": 4020
},
{
"epoch": 0.48519142788345776,
"grad_norm": 4.875,
"learning_rate": 9.689747360104252e-05,
"loss": 0.6753,
"step": 4030
},
{
"epoch": 0.48639537683602213,
"grad_norm": 4.53125,
"learning_rate": 9.686714062575118e-05,
"loss": 0.5712,
"step": 4040
},
{
"epoch": 0.48759932578858656,
"grad_norm": 4.75,
"learning_rate": 9.683666488502158e-05,
"loss": 0.5574,
"step": 4050
},
{
"epoch": 0.488803274741151,
"grad_norm": 5.125,
"learning_rate": 9.680604647168813e-05,
"loss": 0.6066,
"step": 4060
},
{
"epoch": 0.4900072236937154,
"grad_norm": 5.21875,
"learning_rate": 9.677528547901981e-05,
"loss": 0.5138,
"step": 4070
},
{
"epoch": 0.4912111726462798,
"grad_norm": 3.671875,
"learning_rate": 9.674438200071991e-05,
"loss": 0.5157,
"step": 4080
},
{
"epoch": 0.4924151215988442,
"grad_norm": 4.84375,
"learning_rate": 9.671333613092583e-05,
"loss": 0.5865,
"step": 4090
},
{
"epoch": 0.49361907055140863,
"grad_norm": 6.4375,
"learning_rate": 9.668214796420866e-05,
"loss": 0.5833,
"step": 4100
},
{
"epoch": 0.494823019503973,
"grad_norm": 6.09375,
"learning_rate": 9.665081759557295e-05,
"loss": 0.5759,
"step": 4110
},
{
"epoch": 0.49602696845653743,
"grad_norm": 5.8125,
"learning_rate": 9.661934512045644e-05,
"loss": 0.5217,
"step": 4120
},
{
"epoch": 0.49723091740910186,
"grad_norm": 6.0625,
"learning_rate": 9.658773063472975e-05,
"loss": 0.6464,
"step": 4130
},
{
"epoch": 0.4984348663616663,
"grad_norm": 5.09375,
"learning_rate": 9.655597423469609e-05,
"loss": 0.5918,
"step": 4140
},
{
"epoch": 0.49963881531423066,
"grad_norm": 5.5625,
"learning_rate": 9.652407601709096e-05,
"loss": 0.5892,
"step": 4150
},
{
"epoch": 0.5008427642667951,
"grad_norm": 6.0625,
"learning_rate": 9.649203607908187e-05,
"loss": 0.6439,
"step": 4160
},
{
"epoch": 0.5020467132193595,
"grad_norm": 5.09375,
"learning_rate": 9.645985451826803e-05,
"loss": 0.5668,
"step": 4170
},
{
"epoch": 0.5032506621719239,
"grad_norm": 5.28125,
"learning_rate": 9.642753143268002e-05,
"loss": 0.6326,
"step": 4180
},
{
"epoch": 0.5044546111244883,
"grad_norm": 5.28125,
"learning_rate": 9.63950669207796e-05,
"loss": 0.561,
"step": 4190
},
{
"epoch": 0.5056585600770528,
"grad_norm": 6.4375,
"learning_rate": 9.636246108145929e-05,
"loss": 0.6184,
"step": 4200
},
{
"epoch": 0.5068625090296172,
"grad_norm": 5.09375,
"learning_rate": 9.632971401404214e-05,
"loss": 0.5612,
"step": 4210
},
{
"epoch": 0.5080664579821815,
"grad_norm": 6.21875,
"learning_rate": 9.62968258182814e-05,
"loss": 0.6528,
"step": 4220
},
{
"epoch": 0.509270406934746,
"grad_norm": 5.53125,
"learning_rate": 9.626379659436017e-05,
"loss": 0.6768,
"step": 4230
},
{
"epoch": 0.5104743558873104,
"grad_norm": 3.75,
"learning_rate": 9.623062644289127e-05,
"loss": 0.5089,
"step": 4240
},
{
"epoch": 0.5116783048398748,
"grad_norm": 6.1875,
"learning_rate": 9.61973154649167e-05,
"loss": 0.5932,
"step": 4250
},
{
"epoch": 0.5128822537924392,
"grad_norm": 5.28125,
"learning_rate": 9.616386376190745e-05,
"loss": 0.6448,
"step": 4260
},
{
"epoch": 0.5140862027450036,
"grad_norm": 5.75,
"learning_rate": 9.613027143576325e-05,
"loss": 0.5775,
"step": 4270
},
{
"epoch": 0.515290151697568,
"grad_norm": 3.625,
"learning_rate": 9.609653858881211e-05,
"loss": 0.5966,
"step": 4280
},
{
"epoch": 0.5164941006501325,
"grad_norm": 5.28125,
"learning_rate": 9.606266532381018e-05,
"loss": 0.5776,
"step": 4290
},
{
"epoch": 0.5176980496026968,
"grad_norm": 5.875,
"learning_rate": 9.602865174394127e-05,
"loss": 0.5759,
"step": 4300
},
{
"epoch": 0.5189019985552613,
"grad_norm": 4.5625,
"learning_rate": 9.599449795281662e-05,
"loss": 0.6263,
"step": 4310
},
{
"epoch": 0.5201059475078257,
"grad_norm": 4.34375,
"learning_rate": 9.596020405447466e-05,
"loss": 0.578,
"step": 4320
},
{
"epoch": 0.52130989646039,
"grad_norm": 5.71875,
"learning_rate": 9.592577015338051e-05,
"loss": 0.5554,
"step": 4330
},
{
"epoch": 0.5225138454129545,
"grad_norm": 5.78125,
"learning_rate": 9.589119635442582e-05,
"loss": 0.6189,
"step": 4340
},
{
"epoch": 0.5237177943655189,
"grad_norm": 4.4375,
"learning_rate": 9.585648276292836e-05,
"loss": 0.6218,
"step": 4350
},
{
"epoch": 0.5249217433180833,
"grad_norm": 5.25,
"learning_rate": 9.582162948463177e-05,
"loss": 0.5972,
"step": 4360
},
{
"epoch": 0.5261256922706478,
"grad_norm": 5.96875,
"learning_rate": 9.578663662570518e-05,
"loss": 0.6237,
"step": 4370
},
{
"epoch": 0.5273296412232121,
"grad_norm": 5.03125,
"learning_rate": 9.575150429274287e-05,
"loss": 0.5754,
"step": 4380
},
{
"epoch": 0.5285335901757765,
"grad_norm": 3.46875,
"learning_rate": 9.571623259276408e-05,
"loss": 0.5525,
"step": 4390
},
{
"epoch": 0.529737539128341,
"grad_norm": 3.84375,
"learning_rate": 9.568082163321247e-05,
"loss": 0.5924,
"step": 4400
},
{
"epoch": 0.5309414880809054,
"grad_norm": 4.1875,
"learning_rate": 9.5645271521956e-05,
"loss": 0.5747,
"step": 4410
},
{
"epoch": 0.5321454370334697,
"grad_norm": 5.0625,
"learning_rate": 9.560958236728646e-05,
"loss": 0.681,
"step": 4420
},
{
"epoch": 0.5333493859860342,
"grad_norm": 5.03125,
"learning_rate": 9.557375427791919e-05,
"loss": 0.5728,
"step": 4430
},
{
"epoch": 0.5345533349385986,
"grad_norm": 4.75,
"learning_rate": 9.553778736299279e-05,
"loss": 0.5402,
"step": 4440
},
{
"epoch": 0.5357572838911631,
"grad_norm": 5.375,
"learning_rate": 9.550168173206873e-05,
"loss": 0.6011,
"step": 4450
},
{
"epoch": 0.5369612328437274,
"grad_norm": 4.84375,
"learning_rate": 9.546543749513096e-05,
"loss": 0.6079,
"step": 4460
},
{
"epoch": 0.5381651817962918,
"grad_norm": 4.375,
"learning_rate": 9.54290547625858e-05,
"loss": 0.6335,
"step": 4470
},
{
"epoch": 0.5393691307488563,
"grad_norm": 4.5625,
"learning_rate": 9.539253364526128e-05,
"loss": 0.6007,
"step": 4480
},
{
"epoch": 0.5405730797014207,
"grad_norm": 4.40625,
"learning_rate": 9.535587425440712e-05,
"loss": 0.5092,
"step": 4490
},
{
"epoch": 0.541777028653985,
"grad_norm": 3.671875,
"learning_rate": 9.531907670169415e-05,
"loss": 0.5609,
"step": 4500
},
{
"epoch": 0.5429809776065495,
"grad_norm": 5.3125,
"learning_rate": 9.52821410992141e-05,
"loss": 0.5731,
"step": 4510
},
{
"epoch": 0.5441849265591139,
"grad_norm": 5.6875,
"learning_rate": 9.524506755947921e-05,
"loss": 0.6179,
"step": 4520
},
{
"epoch": 0.5453888755116783,
"grad_norm": 3.640625,
"learning_rate": 9.520785619542196e-05,
"loss": 0.5156,
"step": 4530
},
{
"epoch": 0.5465928244642427,
"grad_norm": 5.46875,
"learning_rate": 9.517050712039456e-05,
"loss": 0.6161,
"step": 4540
},
{
"epoch": 0.5477967734168071,
"grad_norm": 4.46875,
"learning_rate": 9.51330204481688e-05,
"loss": 0.5739,
"step": 4550
},
{
"epoch": 0.5490007223693716,
"grad_norm": 5.3125,
"learning_rate": 9.509539629293558e-05,
"loss": 0.6028,
"step": 4560
},
{
"epoch": 0.550204671321936,
"grad_norm": 5.0625,
"learning_rate": 9.50576347693046e-05,
"loss": 0.6909,
"step": 4570
},
{
"epoch": 0.5514086202745003,
"grad_norm": 3.703125,
"learning_rate": 9.5019735992304e-05,
"loss": 0.6186,
"step": 4580
},
{
"epoch": 0.5526125692270648,
"grad_norm": 4.0625,
"learning_rate": 9.498170007738005e-05,
"loss": 0.5563,
"step": 4590
},
{
"epoch": 0.5538165181796292,
"grad_norm": 4.09375,
"learning_rate": 9.494352714039674e-05,
"loss": 0.535,
"step": 4600
},
{
"epoch": 0.5550204671321936,
"grad_norm": 4.46875,
"learning_rate": 9.490521729763542e-05,
"loss": 0.5516,
"step": 4610
},
{
"epoch": 0.556224416084758,
"grad_norm": 4.40625,
"learning_rate": 9.486677066579456e-05,
"loss": 0.6259,
"step": 4620
},
{
"epoch": 0.5574283650373224,
"grad_norm": 4.78125,
"learning_rate": 9.482818736198925e-05,
"loss": 0.5551,
"step": 4630
},
{
"epoch": 0.5586323139898868,
"grad_norm": 5.28125,
"learning_rate": 9.478946750375092e-05,
"loss": 0.6171,
"step": 4640
},
{
"epoch": 0.5598362629424513,
"grad_norm": 5.40625,
"learning_rate": 9.475061120902698e-05,
"loss": 0.5887,
"step": 4650
},
{
"epoch": 0.5610402118950156,
"grad_norm": 5.6875,
"learning_rate": 9.471161859618045e-05,
"loss": 0.5992,
"step": 4660
},
{
"epoch": 0.5622441608475801,
"grad_norm": 4.9375,
"learning_rate": 9.467248978398957e-05,
"loss": 0.61,
"step": 4670
},
{
"epoch": 0.5634481098001445,
"grad_norm": 5.53125,
"learning_rate": 9.46332248916475e-05,
"loss": 0.5746,
"step": 4680
},
{
"epoch": 0.5646520587527089,
"grad_norm": 5.75,
"learning_rate": 9.459382403876192e-05,
"loss": 0.5999,
"step": 4690
},
{
"epoch": 0.5658560077052733,
"grad_norm": 6.28125,
"learning_rate": 9.455428734535463e-05,
"loss": 0.5727,
"step": 4700
},
{
"epoch": 0.5670599566578377,
"grad_norm": 4.65625,
"learning_rate": 9.451461493186129e-05,
"loss": 0.6048,
"step": 4710
},
{
"epoch": 0.5682639056104021,
"grad_norm": 5.125,
"learning_rate": 9.447480691913094e-05,
"loss": 0.575,
"step": 4720
},
{
"epoch": 0.5694678545629666,
"grad_norm": 5.25,
"learning_rate": 9.443486342842569e-05,
"loss": 0.5623,
"step": 4730
},
{
"epoch": 0.5706718035155309,
"grad_norm": 5.65625,
"learning_rate": 9.439478458142033e-05,
"loss": 0.6507,
"step": 4740
},
{
"epoch": 0.5718757524680953,
"grad_norm": 5.53125,
"learning_rate": 9.435457050020198e-05,
"loss": 0.6093,
"step": 4750
},
{
"epoch": 0.5730797014206598,
"grad_norm": 5.09375,
"learning_rate": 9.431422130726973e-05,
"loss": 0.5577,
"step": 4760
},
{
"epoch": 0.5742836503732242,
"grad_norm": 4.21875,
"learning_rate": 9.42737371255342e-05,
"loss": 0.6243,
"step": 4770
},
{
"epoch": 0.5754875993257886,
"grad_norm": 5.625,
"learning_rate": 9.423311807831723e-05,
"loss": 0.5395,
"step": 4780
},
{
"epoch": 0.576691548278353,
"grad_norm": 6.03125,
"learning_rate": 9.419236428935147e-05,
"loss": 0.6082,
"step": 4790
},
{
"epoch": 0.5778954972309174,
"grad_norm": 7.4375,
"learning_rate": 9.415147588278005e-05,
"loss": 0.6082,
"step": 4800
},
{
"epoch": 0.5790994461834819,
"grad_norm": 4.3125,
"learning_rate": 9.411045298315614e-05,
"loss": 0.6097,
"step": 4810
},
{
"epoch": 0.5803033951360462,
"grad_norm": 4.78125,
"learning_rate": 9.406929571544261e-05,
"loss": 0.5135,
"step": 4820
},
{
"epoch": 0.5815073440886106,
"grad_norm": 4.40625,
"learning_rate": 9.402800420501164e-05,
"loss": 0.5316,
"step": 4830
},
{
"epoch": 0.5827112930411751,
"grad_norm": 4.21875,
"learning_rate": 9.398657857764431e-05,
"loss": 0.5902,
"step": 4840
},
{
"epoch": 0.5839152419937395,
"grad_norm": 5.25,
"learning_rate": 9.394501895953026e-05,
"loss": 0.6585,
"step": 4850
},
{
"epoch": 0.5851191909463038,
"grad_norm": 5.96875,
"learning_rate": 9.390332547726733e-05,
"loss": 0.6581,
"step": 4860
},
{
"epoch": 0.5863231398988683,
"grad_norm": 6.53125,
"learning_rate": 9.386149825786108e-05,
"loss": 0.6056,
"step": 4870
},
{
"epoch": 0.5875270888514327,
"grad_norm": 4.40625,
"learning_rate": 9.381953742872447e-05,
"loss": 0.6065,
"step": 4880
},
{
"epoch": 0.5887310378039972,
"grad_norm": 4.6875,
"learning_rate": 9.377744311767746e-05,
"loss": 0.5369,
"step": 4890
},
{
"epoch": 0.5899349867565615,
"grad_norm": 6.4375,
"learning_rate": 9.373521545294663e-05,
"loss": 0.5646,
"step": 4900
},
{
"epoch": 0.5911389357091259,
"grad_norm": 4.9375,
"learning_rate": 9.369285456316479e-05,
"loss": 0.5593,
"step": 4910
},
{
"epoch": 0.5923428846616904,
"grad_norm": 5.09375,
"learning_rate": 9.36503605773705e-05,
"loss": 0.6057,
"step": 4920
},
{
"epoch": 0.5935468336142548,
"grad_norm": 4.40625,
"learning_rate": 9.360773362500787e-05,
"loss": 0.532,
"step": 4930
},
{
"epoch": 0.5947507825668191,
"grad_norm": 5.71875,
"learning_rate": 9.356497383592596e-05,
"loss": 0.5265,
"step": 4940
},
{
"epoch": 0.5959547315193836,
"grad_norm": 4.75,
"learning_rate": 9.352208134037851e-05,
"loss": 0.5161,
"step": 4950
},
{
"epoch": 0.597158680471948,
"grad_norm": 5.90625,
"learning_rate": 9.347905626902349e-05,
"loss": 0.5466,
"step": 4960
},
{
"epoch": 0.5983626294245123,
"grad_norm": 4.75,
"learning_rate": 9.343589875292274e-05,
"loss": 0.5735,
"step": 4970
},
{
"epoch": 0.5995665783770768,
"grad_norm": 4.34375,
"learning_rate": 9.339260892354153e-05,
"loss": 0.5381,
"step": 4980
},
{
"epoch": 0.6007705273296412,
"grad_norm": 5.34375,
"learning_rate": 9.334918691274818e-05,
"loss": 0.5387,
"step": 4990
},
{
"epoch": 0.6019744762822057,
"grad_norm": 4.84375,
"learning_rate": 9.330563285281371e-05,
"loss": 0.5521,
"step": 5000
},
{
"epoch": 0.6019744762822057,
"eval/nano_beir.arguana": 0.4570177803142012,
"step": 5000
},
{
"epoch": 0.6019744762822057,
"eval/nano_beir.climate_fever": 0.22373365886432237,
"step": 5000
},
{
"epoch": 0.6019744762822057,
"eval/nano_beir.dbpedia_entity": 0.3103155680013099,
"step": 5000
},
{
"epoch": 0.6019744762822057,
"eval/nano_beir.fever": 0.644329402155076,
"step": 5000
},
{
"epoch": 0.6019744762822057,
"eval/nano_beir.fiqa": 0.44276237969184934,
"step": 5000
},
{
"epoch": 0.6019744762822057,
"eval/nano_beir.hotpotqa": 0.670071897693377,
"step": 5000
},
{
"epoch": 0.6019744762822057,
"eval/nano_beir.nfcorpus": 0.3433558636570113,
"step": 5000
},
{
"epoch": 0.6019744762822057,
"eval/nano_beir.nq": 0.42199069476386775,
"step": 5000
},
{
"epoch": 0.6019744762822057,
"eval/nano_beir.quora": 0.8387259309259241,
"step": 5000
},
{
"epoch": 0.6019744762822057,
"eval/nano_beir.scidocs": 0.2622439774312085,
"step": 5000
},
{
"epoch": 0.6019744762822057,
"eval/nano_beir.scifact": 0.7499705659426336,
"step": 5000
},
{
"epoch": 0.6019744762822057,
"eval/nano_beir.webis_touche2020": 0.29534440434985226,
"step": 5000
},
{
"epoch": 0.6019744762822057,
"eval/avg": 0.47165517698255277,
"step": 5000
},
{
"epoch": 0.60317842523477,
"grad_norm": 4.59375,
"learning_rate": 9.32619468764113e-05,
"loss": 0.5345,
"step": 5010
},
{
"epoch": 0.6043823741873344,
"grad_norm": 5.0,
"learning_rate": 9.3218129116616e-05,
"loss": 0.5474,
"step": 5020
},
{
"epoch": 0.6055863231398989,
"grad_norm": 5.1875,
"learning_rate": 9.317417970690435e-05,
"loss": 0.5487,
"step": 5030
},
{
"epoch": 0.6067902720924633,
"grad_norm": 5.34375,
"learning_rate": 9.313009878115381e-05,
"loss": 0.6008,
"step": 5040
},
{
"epoch": 0.6079942210450276,
"grad_norm": 4.625,
"learning_rate": 9.30858864736426e-05,
"loss": 0.5572,
"step": 5050
},
{
"epoch": 0.6091981699975921,
"grad_norm": 4.5,
"learning_rate": 9.304154291904903e-05,
"loss": 0.4904,
"step": 5060
},
{
"epoch": 0.6104021189501565,
"grad_norm": 5.96875,
"learning_rate": 9.299706825245126e-05,
"loss": 0.615,
"step": 5070
},
{
"epoch": 0.6116060679027209,
"grad_norm": 7.0625,
"learning_rate": 9.295246260932684e-05,
"loss": 0.5712,
"step": 5080
},
{
"epoch": 0.6128100168552854,
"grad_norm": 5.125,
"learning_rate": 9.29077261255523e-05,
"loss": 0.6262,
"step": 5090
},
{
"epoch": 0.6140139658078497,
"grad_norm": 6.0,
"learning_rate": 9.286285893740274e-05,
"loss": 0.4861,
"step": 5100
},
{
"epoch": 0.6152179147604142,
"grad_norm": 5.78125,
"learning_rate": 9.281786118155135e-05,
"loss": 0.6522,
"step": 5110
},
{
"epoch": 0.6164218637129786,
"grad_norm": 5.21875,
"learning_rate": 9.277273299506917e-05,
"loss": 0.528,
"step": 5120
},
{
"epoch": 0.617625812665543,
"grad_norm": 6.71875,
"learning_rate": 9.272747451542441e-05,
"loss": 0.6185,
"step": 5130
},
{
"epoch": 0.6188297616181074,
"grad_norm": 5.78125,
"learning_rate": 9.268208588048231e-05,
"loss": 0.5152,
"step": 5140
},
{
"epoch": 0.6200337105706718,
"grad_norm": 5.125,
"learning_rate": 9.263656722850447e-05,
"loss": 0.6066,
"step": 5150
},
{
"epoch": 0.6212376595232362,
"grad_norm": 6.40625,
"learning_rate": 9.259091869814864e-05,
"loss": 0.506,
"step": 5160
},
{
"epoch": 0.6224416084758007,
"grad_norm": 4.875,
"learning_rate": 9.254514042846812e-05,
"loss": 0.5415,
"step": 5170
},
{
"epoch": 0.623645557428365,
"grad_norm": 5.90625,
"learning_rate": 9.24992325589115e-05,
"loss": 0.6742,
"step": 5180
},
{
"epoch": 0.6248495063809294,
"grad_norm": 4.875,
"learning_rate": 9.245319522932209e-05,
"loss": 0.5703,
"step": 5190
},
{
"epoch": 0.6260534553334939,
"grad_norm": 5.625,
"learning_rate": 9.240702857993757e-05,
"loss": 0.5885,
"step": 5200
},
{
"epoch": 0.6272574042860583,
"grad_norm": 4.34375,
"learning_rate": 9.236073275138958e-05,
"loss": 0.607,
"step": 5210
},
{
"epoch": 0.6284613532386227,
"grad_norm": 10.25,
"learning_rate": 9.231430788470326e-05,
"loss": 0.5329,
"step": 5220
},
{
"epoch": 0.6296653021911871,
"grad_norm": 6.625,
"learning_rate": 9.226775412129676e-05,
"loss": 0.6086,
"step": 5230
},
{
"epoch": 0.6308692511437515,
"grad_norm": 4.375,
"learning_rate": 9.222107160298093e-05,
"loss": 0.5085,
"step": 5240
},
{
"epoch": 0.632073200096316,
"grad_norm": 6.46875,
"learning_rate": 9.217426047195882e-05,
"loss": 0.5333,
"step": 5250
},
{
"epoch": 0.6332771490488803,
"grad_norm": 6.0625,
"learning_rate": 9.212732087082528e-05,
"loss": 0.6255,
"step": 5260
},
{
"epoch": 0.6344810980014447,
"grad_norm": 4.59375,
"learning_rate": 9.208025294256645e-05,
"loss": 0.4732,
"step": 5270
},
{
"epoch": 0.6356850469540092,
"grad_norm": 5.3125,
"learning_rate": 9.20330568305594e-05,
"loss": 0.4817,
"step": 5280
},
{
"epoch": 0.6368889959065736,
"grad_norm": 6.0,
"learning_rate": 9.198573267857168e-05,
"loss": 0.5174,
"step": 5290
},
{
"epoch": 0.6380929448591379,
"grad_norm": 7.6875,
"learning_rate": 9.193828063076087e-05,
"loss": 0.5626,
"step": 5300
},
{
"epoch": 0.6392968938117024,
"grad_norm": 8.0625,
"learning_rate": 9.189070083167411e-05,
"loss": 0.488,
"step": 5310
},
{
"epoch": 0.6405008427642668,
"grad_norm": 7.875,
"learning_rate": 9.184299342624776e-05,
"loss": 0.5151,
"step": 5320
},
{
"epoch": 0.6417047917168313,
"grad_norm": 3.953125,
"learning_rate": 9.179515855980682e-05,
"loss": 0.4968,
"step": 5330
},
{
"epoch": 0.6429087406693956,
"grad_norm": 5.90625,
"learning_rate": 9.17471963780646e-05,
"loss": 0.6388,
"step": 5340
},
{
"epoch": 0.64411268962196,
"grad_norm": 4.5,
"learning_rate": 9.169910702712221e-05,
"loss": 0.5517,
"step": 5350
},
{
"epoch": 0.6453166385745245,
"grad_norm": 6.125,
"learning_rate": 9.165089065346816e-05,
"loss": 0.5511,
"step": 5360
},
{
"epoch": 0.6465205875270889,
"grad_norm": 4.40625,
"learning_rate": 9.160254740397791e-05,
"loss": 0.5346,
"step": 5370
},
{
"epoch": 0.6477245364796532,
"grad_norm": 6.3125,
"learning_rate": 9.155407742591335e-05,
"loss": 0.5436,
"step": 5380
},
{
"epoch": 0.6489284854322177,
"grad_norm": 5.4375,
"learning_rate": 9.150548086692247e-05,
"loss": 0.6382,
"step": 5390
},
{
"epoch": 0.6501324343847821,
"grad_norm": 4.40625,
"learning_rate": 9.145675787503878e-05,
"loss": 0.6069,
"step": 5400
},
{
"epoch": 0.6513363833373464,
"grad_norm": 4.6875,
"learning_rate": 9.140790859868098e-05,
"loss": 0.5635,
"step": 5410
},
{
"epoch": 0.6525403322899109,
"grad_norm": 4.84375,
"learning_rate": 9.135893318665245e-05,
"loss": 0.6027,
"step": 5420
},
{
"epoch": 0.6537442812424753,
"grad_norm": 5.125,
"learning_rate": 9.130983178814077e-05,
"loss": 0.5553,
"step": 5430
},
{
"epoch": 0.6549482301950398,
"grad_norm": 6.625,
"learning_rate": 9.126060455271733e-05,
"loss": 0.5377,
"step": 5440
},
{
"epoch": 0.6561521791476042,
"grad_norm": 5.0,
"learning_rate": 9.12112516303368e-05,
"loss": 0.591,
"step": 5450
},
{
"epoch": 0.6573561281001685,
"grad_norm": 4.125,
"learning_rate": 9.116177317133676e-05,
"loss": 0.5018,
"step": 5460
},
{
"epoch": 0.658560077052733,
"grad_norm": 5.40625,
"learning_rate": 9.11121693264372e-05,
"loss": 0.5977,
"step": 5470
},
{
"epoch": 0.6597640260052974,
"grad_norm": 4.75,
"learning_rate": 9.106244024673999e-05,
"loss": 0.6475,
"step": 5480
},
{
"epoch": 0.6609679749578617,
"grad_norm": 4.8125,
"learning_rate": 9.101258608372856e-05,
"loss": 0.5259,
"step": 5490
},
{
"epoch": 0.6621719239104262,
"grad_norm": 5.8125,
"learning_rate": 9.096260698926732e-05,
"loss": 0.547,
"step": 5500
},
{
"epoch": 0.6633758728629906,
"grad_norm": 5.15625,
"learning_rate": 9.091250311560126e-05,
"loss": 0.5728,
"step": 5510
},
{
"epoch": 0.664579821815555,
"grad_norm": 6.9375,
"learning_rate": 9.08622746153555e-05,
"loss": 0.537,
"step": 5520
},
{
"epoch": 0.6657837707681195,
"grad_norm": 5.5625,
"learning_rate": 9.081192164153472e-05,
"loss": 0.6255,
"step": 5530
},
{
"epoch": 0.6669877197206838,
"grad_norm": 5.0625,
"learning_rate": 9.076144434752283e-05,
"loss": 0.5898,
"step": 5540
},
{
"epoch": 0.6681916686732483,
"grad_norm": 5.40625,
"learning_rate": 9.071084288708243e-05,
"loss": 0.5625,
"step": 5550
},
{
"epoch": 0.6693956176258127,
"grad_norm": 5.03125,
"learning_rate": 9.066011741435431e-05,
"loss": 0.6161,
"step": 5560
},
{
"epoch": 0.670599566578377,
"grad_norm": 4.625,
"learning_rate": 9.060926808385711e-05,
"loss": 0.5116,
"step": 5570
},
{
"epoch": 0.6718035155309415,
"grad_norm": 5.75,
"learning_rate": 9.055829505048667e-05,
"loss": 0.5747,
"step": 5580
},
{
"epoch": 0.6730074644835059,
"grad_norm": 4.9375,
"learning_rate": 9.05071984695157e-05,
"loss": 0.551,
"step": 5590
},
{
"epoch": 0.6742114134360703,
"grad_norm": 5.40625,
"learning_rate": 9.045597849659324e-05,
"loss": 0.6106,
"step": 5600
},
{
"epoch": 0.6754153623886348,
"grad_norm": 5.65625,
"learning_rate": 9.040463528774423e-05,
"loss": 0.5439,
"step": 5610
},
{
"epoch": 0.6766193113411991,
"grad_norm": 5.8125,
"learning_rate": 9.035316899936896e-05,
"loss": 0.6368,
"step": 5620
},
{
"epoch": 0.6778232602937635,
"grad_norm": 4.96875,
"learning_rate": 9.03015797882427e-05,
"loss": 0.6602,
"step": 5630
},
{
"epoch": 0.679027209246328,
"grad_norm": 6.03125,
"learning_rate": 9.024986781151512e-05,
"loss": 0.6983,
"step": 5640
},
{
"epoch": 0.6802311581988923,
"grad_norm": 4.84375,
"learning_rate": 9.019803322670986e-05,
"loss": 0.5296,
"step": 5650
},
{
"epoch": 0.6814351071514568,
"grad_norm": 4.59375,
"learning_rate": 9.014607619172406e-05,
"loss": 0.6231,
"step": 5660
},
{
"epoch": 0.6826390561040212,
"grad_norm": 4.75,
"learning_rate": 9.009399686482787e-05,
"loss": 0.5251,
"step": 5670
},
{
"epoch": 0.6838430050565856,
"grad_norm": 4.71875,
"learning_rate": 9.004179540466396e-05,
"loss": 0.5826,
"step": 5680
},
{
"epoch": 0.68504695400915,
"grad_norm": 6.09375,
"learning_rate": 8.998947197024699e-05,
"loss": 0.6823,
"step": 5690
},
{
"epoch": 0.6862509029617144,
"grad_norm": 5.0,
"learning_rate": 8.993702672096324e-05,
"loss": 0.5507,
"step": 5700
},
{
"epoch": 0.6874548519142788,
"grad_norm": 5.03125,
"learning_rate": 8.988445981657006e-05,
"loss": 0.5415,
"step": 5710
},
{
"epoch": 0.6886588008668433,
"grad_norm": 4.65625,
"learning_rate": 8.983177141719531e-05,
"loss": 0.5462,
"step": 5720
},
{
"epoch": 0.6898627498194077,
"grad_norm": 4.96875,
"learning_rate": 8.977896168333702e-05,
"loss": 0.6314,
"step": 5730
},
{
"epoch": 0.691066698771972,
"grad_norm": 5.0625,
"learning_rate": 8.972603077586279e-05,
"loss": 0.5939,
"step": 5740
},
{
"epoch": 0.6922706477245365,
"grad_norm": 6.46875,
"learning_rate": 8.967297885600936e-05,
"loss": 0.5588,
"step": 5750
},
{
"epoch": 0.6934745966771009,
"grad_norm": 5.3125,
"learning_rate": 8.961980608538203e-05,
"loss": 0.5653,
"step": 5760
},
{
"epoch": 0.6946785456296654,
"grad_norm": 4.3125,
"learning_rate": 8.956651262595434e-05,
"loss": 0.5809,
"step": 5770
},
{
"epoch": 0.6958824945822297,
"grad_norm": 5.5625,
"learning_rate": 8.951309864006738e-05,
"loss": 0.5478,
"step": 5780
},
{
"epoch": 0.6970864435347941,
"grad_norm": 4.6875,
"learning_rate": 8.945956429042943e-05,
"loss": 0.5419,
"step": 5790
},
{
"epoch": 0.6982903924873586,
"grad_norm": 4.84375,
"learning_rate": 8.940590974011538e-05,
"loss": 0.61,
"step": 5800
},
{
"epoch": 0.699494341439923,
"grad_norm": 5.9375,
"learning_rate": 8.935213515256631e-05,
"loss": 0.6236,
"step": 5810
},
{
"epoch": 0.7006982903924873,
"grad_norm": 5.53125,
"learning_rate": 8.929824069158894e-05,
"loss": 0.5342,
"step": 5820
},
{
"epoch": 0.7019022393450518,
"grad_norm": 4.65625,
"learning_rate": 8.924422652135513e-05,
"loss": 0.596,
"step": 5830
},
{
"epoch": 0.7031061882976162,
"grad_norm": 5.90625,
"learning_rate": 8.919009280640143e-05,
"loss": 0.6242,
"step": 5840
},
{
"epoch": 0.7043101372501805,
"grad_norm": 5.46875,
"learning_rate": 8.913583971162852e-05,
"loss": 0.568,
"step": 5850
},
{
"epoch": 0.705514086202745,
"grad_norm": 5.5,
"learning_rate": 8.908146740230072e-05,
"loss": 0.701,
"step": 5860
},
{
"epoch": 0.7067180351553094,
"grad_norm": 4.53125,
"learning_rate": 8.902697604404552e-05,
"loss": 0.5754,
"step": 5870
},
{
"epoch": 0.7079219841078739,
"grad_norm": 4.375,
"learning_rate": 8.897236580285308e-05,
"loss": 0.528,
"step": 5880
},
{
"epoch": 0.7091259330604383,
"grad_norm": 4.84375,
"learning_rate": 8.891763684507562e-05,
"loss": 0.5544,
"step": 5890
},
{
"epoch": 0.7103298820130026,
"grad_norm": 5.4375,
"learning_rate": 8.886278933742705e-05,
"loss": 0.5169,
"step": 5900
},
{
"epoch": 0.7115338309655671,
"grad_norm": 5.03125,
"learning_rate": 8.88078234469824e-05,
"loss": 0.495,
"step": 5910
},
{
"epoch": 0.7127377799181315,
"grad_norm": 5.09375,
"learning_rate": 8.875273934117729e-05,
"loss": 0.5762,
"step": 5920
},
{
"epoch": 0.7139417288706958,
"grad_norm": 3.296875,
"learning_rate": 8.869753718780748e-05,
"loss": 0.4769,
"step": 5930
},
{
"epoch": 0.7151456778232603,
"grad_norm": 4.75,
"learning_rate": 8.864221715502829e-05,
"loss": 0.5226,
"step": 5940
},
{
"epoch": 0.7163496267758247,
"grad_norm": 5.8125,
"learning_rate": 8.858677941135414e-05,
"loss": 0.541,
"step": 5950
},
{
"epoch": 0.7175535757283891,
"grad_norm": 4.75,
"learning_rate": 8.8531224125658e-05,
"loss": 0.614,
"step": 5960
},
{
"epoch": 0.7187575246809536,
"grad_norm": 4.09375,
"learning_rate": 8.84755514671709e-05,
"loss": 0.5008,
"step": 5970
},
{
"epoch": 0.7199614736335179,
"grad_norm": 5.5625,
"learning_rate": 8.841976160548145e-05,
"loss": 0.4964,
"step": 5980
},
{
"epoch": 0.7211654225860824,
"grad_norm": 6.3125,
"learning_rate": 8.836385471053518e-05,
"loss": 0.5982,
"step": 5990
},
{
"epoch": 0.7223693715386468,
"grad_norm": 3.140625,
"learning_rate": 8.830783095263425e-05,
"loss": 0.5714,
"step": 6000
},
{
"epoch": 0.7223693715386468,
"eval/nano_beir.arguana": 0.4400179028802884,
"step": 6000
},
{
"epoch": 0.7223693715386468,
"eval/nano_beir.climate_fever": 0.22393590221302478,
"step": 6000
},
{
"epoch": 0.7223693715386468,
"eval/nano_beir.dbpedia_entity": 0.31118121695768797,
"step": 6000
},
{
"epoch": 0.7223693715386468,
"eval/nano_beir.fever": 0.6298696267901086,
"step": 6000
},
{
"epoch": 0.7223693715386468,
"eval/nano_beir.fiqa": 0.44466912850421125,
"step": 6000
},
{
"epoch": 0.7223693715386468,
"eval/nano_beir.hotpotqa": 0.6753796301863233,
"step": 6000
},
{
"epoch": 0.7223693715386468,
"eval/nano_beir.nfcorpus": 0.34319280956790604,
"step": 6000
},
{
"epoch": 0.7223693715386468,
"eval/nano_beir.nq": 0.4252519019711077,
"step": 6000
},
{
"epoch": 0.7223693715386468,
"eval/nano_beir.quora": 0.8434164830151667,
"step": 6000
},
{
"epoch": 0.7223693715386468,
"eval/nano_beir.scidocs": 0.25278134215874354,
"step": 6000
},
{
"epoch": 0.7223693715386468,
"eval/nano_beir.scifact": 0.7278062143873253,
"step": 6000
},
{
"epoch": 0.7223693715386468,
"eval/nano_beir.webis_touche2020": 0.291971653952184,
"step": 6000
},
{
"epoch": 0.7223693715386468,
"eval/avg": 0.46745615104867316,
"step": 6000
},
{
"epoch": 0.7235733204912111,
"grad_norm": 5.34375,
"learning_rate": 8.825169050243672e-05,
"loss": 0.5388,
"step": 6010
},
{
"epoch": 0.7247772694437756,
"grad_norm": 6.65625,
"learning_rate": 8.819543353095613e-05,
"loss": 0.5378,
"step": 6020
},
{
"epoch": 0.72598121839634,
"grad_norm": 6.0625,
"learning_rate": 8.813906020956097e-05,
"loss": 0.5838,
"step": 6030
},
{
"epoch": 0.7271851673489044,
"grad_norm": 5.40625,
"learning_rate": 8.808257070997417e-05,
"loss": 0.6621,
"step": 6040
},
{
"epoch": 0.7283891163014689,
"grad_norm": 4.53125,
"learning_rate": 8.802596520427253e-05,
"loss": 0.5482,
"step": 6050
},
{
"epoch": 0.7295930652540332,
"grad_norm": 4.5625,
"learning_rate": 8.796924386488624e-05,
"loss": 0.4768,
"step": 6060
},
{
"epoch": 0.7307970142065976,
"grad_norm": 4.46875,
"learning_rate": 8.791240686459834e-05,
"loss": 0.5397,
"step": 6070
},
{
"epoch": 0.7320009631591621,
"grad_norm": 4.65625,
"learning_rate": 8.785545437654418e-05,
"loss": 0.5296,
"step": 6080
},
{
"epoch": 0.7332049121117264,
"grad_norm": 5.25,
"learning_rate": 8.779838657421092e-05,
"loss": 0.5747,
"step": 6090
},
{
"epoch": 0.7344088610642908,
"grad_norm": 4.75,
"learning_rate": 8.774120363143699e-05,
"loss": 0.5907,
"step": 6100
},
{
"epoch": 0.7356128100168553,
"grad_norm": 5.8125,
"learning_rate": 8.768390572241155e-05,
"loss": 0.593,
"step": 6110
},
{
"epoch": 0.7368167589694197,
"grad_norm": 7.40625,
"learning_rate": 8.762649302167395e-05,
"loss": 0.5113,
"step": 6120
},
{
"epoch": 0.7380207079219842,
"grad_norm": 4.34375,
"learning_rate": 8.756896570411326e-05,
"loss": 0.505,
"step": 6130
},
{
"epoch": 0.7392246568745485,
"grad_norm": 4.78125,
"learning_rate": 8.751132394496763e-05,
"loss": 0.5092,
"step": 6140
},
{
"epoch": 0.7404286058271129,
"grad_norm": 5.25,
"learning_rate": 8.745356791982391e-05,
"loss": 0.5254,
"step": 6150
},
{
"epoch": 0.7416325547796774,
"grad_norm": 5.125,
"learning_rate": 8.739569780461692e-05,
"loss": 0.5022,
"step": 6160
},
{
"epoch": 0.7428365037322417,
"grad_norm": 5.03125,
"learning_rate": 8.733771377562908e-05,
"loss": 0.5269,
"step": 6170
},
{
"epoch": 0.7440404526848061,
"grad_norm": 4.9375,
"learning_rate": 8.72796160094898e-05,
"loss": 0.5009,
"step": 6180
},
{
"epoch": 0.7452444016373706,
"grad_norm": 5.0,
"learning_rate": 8.722140468317495e-05,
"loss": 0.4828,
"step": 6190
},
{
"epoch": 0.746448350589935,
"grad_norm": 4.15625,
"learning_rate": 8.716307997400633e-05,
"loss": 0.6166,
"step": 6200
},
{
"epoch": 0.7476522995424993,
"grad_norm": 4.875,
"learning_rate": 8.710464205965112e-05,
"loss": 0.5558,
"step": 6210
},
{
"epoch": 0.7488562484950638,
"grad_norm": 6.5625,
"learning_rate": 8.704609111812134e-05,
"loss": 0.5042,
"step": 6220
},
{
"epoch": 0.7500601974476282,
"grad_norm": 6.0,
"learning_rate": 8.698742732777332e-05,
"loss": 0.6088,
"step": 6230
},
{
"epoch": 0.7512641464001927,
"grad_norm": 5.5625,
"learning_rate": 8.692865086730713e-05,
"loss": 0.5861,
"step": 6240
},
{
"epoch": 0.752468095352757,
"grad_norm": 4.9375,
"learning_rate": 8.686976191576603e-05,
"loss": 0.6649,
"step": 6250
},
{
"epoch": 0.7536720443053214,
"grad_norm": 4.65625,
"learning_rate": 8.681076065253604e-05,
"loss": 0.5009,
"step": 6260
},
{
"epoch": 0.7548759932578859,
"grad_norm": 4.78125,
"learning_rate": 8.67516472573452e-05,
"loss": 0.5638,
"step": 6270
},
{
"epoch": 0.7560799422104503,
"grad_norm": 4.1875,
"learning_rate": 8.669242191026319e-05,
"loss": 0.6176,
"step": 6280
},
{
"epoch": 0.7572838911630146,
"grad_norm": 5.4375,
"learning_rate": 8.663308479170065e-05,
"loss": 0.6254,
"step": 6290
},
{
"epoch": 0.7584878401155791,
"grad_norm": 5.625,
"learning_rate": 8.657363608240876e-05,
"loss": 0.55,
"step": 6300
},
{
"epoch": 0.7596917890681435,
"grad_norm": 5.40625,
"learning_rate": 8.651407596347861e-05,
"loss": 0.5809,
"step": 6310
},
{
"epoch": 0.7608957380207079,
"grad_norm": 4.03125,
"learning_rate": 8.645440461634062e-05,
"loss": 0.5495,
"step": 6320
},
{
"epoch": 0.7620996869732723,
"grad_norm": 5.25,
"learning_rate": 8.639462222276409e-05,
"loss": 0.5944,
"step": 6330
},
{
"epoch": 0.7633036359258367,
"grad_norm": 5.09375,
"learning_rate": 8.633472896485655e-05,
"loss": 0.5652,
"step": 6340
},
{
"epoch": 0.7645075848784012,
"grad_norm": 5.1875,
"learning_rate": 8.627472502506323e-05,
"loss": 0.4355,
"step": 6350
},
{
"epoch": 0.7657115338309656,
"grad_norm": 5.0,
"learning_rate": 8.621461058616656e-05,
"loss": 0.52,
"step": 6360
},
{
"epoch": 0.76691548278353,
"grad_norm": 7.21875,
"learning_rate": 8.615438583128554e-05,
"loss": 0.5633,
"step": 6370
},
{
"epoch": 0.7681194317360944,
"grad_norm": 4.03125,
"learning_rate": 8.609405094387522e-05,
"loss": 0.6013,
"step": 6380
},
{
"epoch": 0.7693233806886588,
"grad_norm": 5.75,
"learning_rate": 8.603360610772612e-05,
"loss": 0.6285,
"step": 6390
},
{
"epoch": 0.7705273296412232,
"grad_norm": 6.1875,
"learning_rate": 8.597305150696373e-05,
"loss": 0.6052,
"step": 6400
},
{
"epoch": 0.7717312785937877,
"grad_norm": 7.09375,
"learning_rate": 8.591238732604782e-05,
"loss": 0.5929,
"step": 6410
},
{
"epoch": 0.772935227546352,
"grad_norm": 4.40625,
"learning_rate": 8.585161374977202e-05,
"loss": 0.5027,
"step": 6420
},
{
"epoch": 0.7741391764989164,
"grad_norm": 4.75,
"learning_rate": 8.579073096326317e-05,
"loss": 0.5662,
"step": 6430
},
{
"epoch": 0.7753431254514809,
"grad_norm": 6.59375,
"learning_rate": 8.572973915198085e-05,
"loss": 0.5426,
"step": 6440
},
{
"epoch": 0.7765470744040452,
"grad_norm": 5.625,
"learning_rate": 8.566863850171663e-05,
"loss": 0.5626,
"step": 6450
},
{
"epoch": 0.7777510233566097,
"grad_norm": 6.4375,
"learning_rate": 8.560742919859372e-05,
"loss": 0.5834,
"step": 6460
},
{
"epoch": 0.7789549723091741,
"grad_norm": 4.0625,
"learning_rate": 8.554611142906628e-05,
"loss": 0.4978,
"step": 6470
},
{
"epoch": 0.7801589212617385,
"grad_norm": 5.03125,
"learning_rate": 8.548468537991884e-05,
"loss": 0.6378,
"step": 6480
},
{
"epoch": 0.781362870214303,
"grad_norm": 6.625,
"learning_rate": 8.542315123826583e-05,
"loss": 0.5465,
"step": 6490
},
{
"epoch": 0.7825668191668673,
"grad_norm": 3.34375,
"learning_rate": 8.536150919155089e-05,
"loss": 0.5495,
"step": 6500
},
{
"epoch": 0.7837707681194317,
"grad_norm": 4.96875,
"learning_rate": 8.52997594275464e-05,
"loss": 0.6084,
"step": 6510
},
{
"epoch": 0.7849747170719962,
"grad_norm": 4.53125,
"learning_rate": 8.523790213435285e-05,
"loss": 0.591,
"step": 6520
},
{
"epoch": 0.7861786660245605,
"grad_norm": 5.03125,
"learning_rate": 8.517593750039827e-05,
"loss": 0.5792,
"step": 6530
},
{
"epoch": 0.7873826149771249,
"grad_norm": 5.375,
"learning_rate": 8.511386571443771e-05,
"loss": 0.5298,
"step": 6540
},
{
"epoch": 0.7885865639296894,
"grad_norm": 4.28125,
"learning_rate": 8.505168696555255e-05,
"loss": 0.4803,
"step": 6550
},
{
"epoch": 0.7897905128822538,
"grad_norm": 4.78125,
"learning_rate": 8.498940144315008e-05,
"loss": 0.5974,
"step": 6560
},
{
"epoch": 0.7909944618348183,
"grad_norm": 6.0625,
"learning_rate": 8.49270093369628e-05,
"loss": 0.6089,
"step": 6570
},
{
"epoch": 0.7921984107873826,
"grad_norm": 5.53125,
"learning_rate": 8.486451083704787e-05,
"loss": 0.644,
"step": 6580
},
{
"epoch": 0.793402359739947,
"grad_norm": 3.71875,
"learning_rate": 8.480190613378657e-05,
"loss": 0.4426,
"step": 6590
},
{
"epoch": 0.7946063086925115,
"grad_norm": 5.46875,
"learning_rate": 8.473919541788366e-05,
"loss": 0.517,
"step": 6600
},
{
"epoch": 0.7958102576450758,
"grad_norm": 4.1875,
"learning_rate": 8.46763788803669e-05,
"loss": 0.5433,
"step": 6610
},
{
"epoch": 0.7970142065976402,
"grad_norm": 4.90625,
"learning_rate": 8.461345671258634e-05,
"loss": 0.5834,
"step": 6620
},
{
"epoch": 0.7982181555502047,
"grad_norm": 4.71875,
"learning_rate": 8.455042910621379e-05,
"loss": 0.5459,
"step": 6630
},
{
"epoch": 0.7994221045027691,
"grad_norm": 5.15625,
"learning_rate": 8.448729625324228e-05,
"loss": 0.5661,
"step": 6640
},
{
"epoch": 0.8006260534553334,
"grad_norm": 6.3125,
"learning_rate": 8.442405834598546e-05,
"loss": 0.5928,
"step": 6650
},
{
"epoch": 0.8018300024078979,
"grad_norm": 5.1875,
"learning_rate": 8.436071557707692e-05,
"loss": 0.5282,
"step": 6660
},
{
"epoch": 0.8030339513604623,
"grad_norm": 4.1875,
"learning_rate": 8.429726813946974e-05,
"loss": 0.5746,
"step": 6670
},
{
"epoch": 0.8042379003130268,
"grad_norm": 4.90625,
"learning_rate": 8.423371622643583e-05,
"loss": 0.5927,
"step": 6680
},
{
"epoch": 0.8054418492655911,
"grad_norm": 5.3125,
"learning_rate": 8.417006003156532e-05,
"loss": 0.6273,
"step": 6690
},
{
"epoch": 0.8066457982181555,
"grad_norm": 5.4375,
"learning_rate": 8.410629974876602e-05,
"loss": 0.5254,
"step": 6700
},
{
"epoch": 0.80784974717072,
"grad_norm": 5.6875,
"learning_rate": 8.404243557226282e-05,
"loss": 0.5679,
"step": 6710
},
{
"epoch": 0.8090536961232844,
"grad_norm": 6.3125,
"learning_rate": 8.397846769659707e-05,
"loss": 0.5201,
"step": 6720
},
{
"epoch": 0.8102576450758487,
"grad_norm": 4.53125,
"learning_rate": 8.391439631662601e-05,
"loss": 0.5174,
"step": 6730
},
{
"epoch": 0.8114615940284132,
"grad_norm": 8.5625,
"learning_rate": 8.385022162752218e-05,
"loss": 0.6297,
"step": 6740
},
{
"epoch": 0.8126655429809776,
"grad_norm": 6.9375,
"learning_rate": 8.378594382477282e-05,
"loss": 0.5586,
"step": 6750
},
{
"epoch": 0.813869491933542,
"grad_norm": 5.21875,
"learning_rate": 8.372156310417925e-05,
"loss": 0.5683,
"step": 6760
},
{
"epoch": 0.8150734408861064,
"grad_norm": 5.75,
"learning_rate": 8.365707966185632e-05,
"loss": 0.5273,
"step": 6770
},
{
"epoch": 0.8162773898386708,
"grad_norm": 4.375,
"learning_rate": 8.359249369423177e-05,
"loss": 0.506,
"step": 6780
},
{
"epoch": 0.8174813387912353,
"grad_norm": 4.65625,
"learning_rate": 8.352780539804566e-05,
"loss": 0.5251,
"step": 6790
},
{
"epoch": 0.8186852877437997,
"grad_norm": 5.0625,
"learning_rate": 8.346301497034976e-05,
"loss": 0.5472,
"step": 6800
},
{
"epoch": 0.819889236696364,
"grad_norm": 4.84375,
"learning_rate": 8.339812260850696e-05,
"loss": 0.5018,
"step": 6810
},
{
"epoch": 0.8210931856489285,
"grad_norm": 7.34375,
"learning_rate": 8.333312851019064e-05,
"loss": 0.4934,
"step": 6820
},
{
"epoch": 0.8222971346014929,
"grad_norm": 5.0625,
"learning_rate": 8.326803287338407e-05,
"loss": 0.5823,
"step": 6830
},
{
"epoch": 0.8235010835540573,
"grad_norm": 5.90625,
"learning_rate": 8.32028358963799e-05,
"loss": 0.514,
"step": 6840
},
{
"epoch": 0.8247050325066217,
"grad_norm": 5.03125,
"learning_rate": 8.313753777777938e-05,
"loss": 0.5718,
"step": 6850
},
{
"epoch": 0.8259089814591861,
"grad_norm": 6.53125,
"learning_rate": 8.307213871649193e-05,
"loss": 0.5325,
"step": 6860
},
{
"epoch": 0.8271129304117505,
"grad_norm": 3.359375,
"learning_rate": 8.300663891173443e-05,
"loss": 0.5679,
"step": 6870
},
{
"epoch": 0.828316879364315,
"grad_norm": 6.09375,
"learning_rate": 8.294103856303065e-05,
"loss": 0.6696,
"step": 6880
},
{
"epoch": 0.8295208283168793,
"grad_norm": 4.65625,
"learning_rate": 8.28753378702106e-05,
"loss": 0.6318,
"step": 6890
},
{
"epoch": 0.8307247772694438,
"grad_norm": 3.4375,
"learning_rate": 8.280953703341004e-05,
"loss": 0.5005,
"step": 6900
},
{
"epoch": 0.8319287262220082,
"grad_norm": 4.96875,
"learning_rate": 8.274363625306967e-05,
"loss": 0.4801,
"step": 6910
},
{
"epoch": 0.8331326751745726,
"grad_norm": 4.875,
"learning_rate": 8.267763572993473e-05,
"loss": 0.6241,
"step": 6920
},
{
"epoch": 0.834336624127137,
"grad_norm": 5.5,
"learning_rate": 8.261153566505424e-05,
"loss": 0.5087,
"step": 6930
},
{
"epoch": 0.8355405730797014,
"grad_norm": 6.53125,
"learning_rate": 8.254533625978047e-05,
"loss": 0.5375,
"step": 6940
},
{
"epoch": 0.8367445220322658,
"grad_norm": 6.0625,
"learning_rate": 8.247903771576829e-05,
"loss": 0.5876,
"step": 6950
},
{
"epoch": 0.8379484709848303,
"grad_norm": 4.71875,
"learning_rate": 8.241264023497457e-05,
"loss": 0.5871,
"step": 6960
},
{
"epoch": 0.8391524199373946,
"grad_norm": 4.625,
"learning_rate": 8.234614401965754e-05,
"loss": 0.5091,
"step": 6970
},
{
"epoch": 0.840356368889959,
"grad_norm": 6.9375,
"learning_rate": 8.227954927237622e-05,
"loss": 0.5497,
"step": 6980
},
{
"epoch": 0.8415603178425235,
"grad_norm": 6.21875,
"learning_rate": 8.221285619598975e-05,
"loss": 0.4829,
"step": 6990
},
{
"epoch": 0.8427642667950879,
"grad_norm": 6.125,
"learning_rate": 8.21460649936568e-05,
"loss": 0.5203,
"step": 7000
},
{
"epoch": 0.8427642667950879,
"eval/nano_beir.arguana": 0.4461517725692969,
"step": 7000
},
{
"epoch": 0.8427642667950879,
"eval/nano_beir.climate_fever": 0.21845433664712227,
"step": 7000
},
{
"epoch": 0.8427642667950879,
"eval/nano_beir.dbpedia_entity": 0.31537338068593374,
"step": 7000
},
{
"epoch": 0.8427642667950879,
"eval/nano_beir.fever": 0.609276835489847,
"step": 7000
},
{
"epoch": 0.8427642667950879,
"eval/nano_beir.fiqa": 0.4361975400218617,
"step": 7000
},
{
"epoch": 0.8427642667950879,
"eval/nano_beir.hotpotqa": 0.6757493116862631,
"step": 7000
},
{
"epoch": 0.8427642667950879,
"eval/nano_beir.nfcorpus": 0.3420271686022287,
"step": 7000
},
{
"epoch": 0.8427642667950879,
"eval/nano_beir.nq": 0.4295335176324975,
"step": 7000
},
{
"epoch": 0.8427642667950879,
"eval/nano_beir.quora": 0.8443977496765365,
"step": 7000
},
{
"epoch": 0.8427642667950879,
"eval/nano_beir.scidocs": 0.24951636439149139,
"step": 7000
},
{
"epoch": 0.8427642667950879,
"eval/nano_beir.scifact": 0.7661963891041157,
"step": 7000
},
{
"epoch": 0.8427642667950879,
"eval/nano_beir.webis_touche2020": 0.292416330919565,
"step": 7000
},
{
"epoch": 0.8427642667950879,
"eval/avg": 0.46877422478556324,
"step": 7000
},
{
"epoch": 0.8439682157476524,
"grad_norm": 6.4375,
"learning_rate": 8.207917586883496e-05,
"loss": 0.5324,
"step": 7010
},
{
"epoch": 0.8451721647002167,
"grad_norm": 3.78125,
"learning_rate": 8.201218902528009e-05,
"loss": 0.5236,
"step": 7020
},
{
"epoch": 0.8463761136527811,
"grad_norm": 4.84375,
"learning_rate": 8.194510466704577e-05,
"loss": 0.5216,
"step": 7030
},
{
"epoch": 0.8475800626053456,
"grad_norm": 5.5625,
"learning_rate": 8.187792299848256e-05,
"loss": 0.6368,
"step": 7040
},
{
"epoch": 0.84878401155791,
"grad_norm": 4.75,
"learning_rate": 8.181064422423748e-05,
"loss": 0.6082,
"step": 7050
},
{
"epoch": 0.8499879605104743,
"grad_norm": 4.6875,
"learning_rate": 8.174326854925334e-05,
"loss": 0.5115,
"step": 7060
},
{
"epoch": 0.8511919094630388,
"grad_norm": 4.59375,
"learning_rate": 8.167579617876814e-05,
"loss": 0.5916,
"step": 7070
},
{
"epoch": 0.8523958584156032,
"grad_norm": 4.78125,
"learning_rate": 8.160822731831441e-05,
"loss": 0.5437,
"step": 7080
},
{
"epoch": 0.8535998073681675,
"grad_norm": 5.90625,
"learning_rate": 8.154056217371864e-05,
"loss": 0.4986,
"step": 7090
},
{
"epoch": 0.854803756320732,
"grad_norm": 5.96875,
"learning_rate": 8.147280095110056e-05,
"loss": 0.5419,
"step": 7100
},
{
"epoch": 0.8560077052732964,
"grad_norm": 4.90625,
"learning_rate": 8.140494385687265e-05,
"loss": 0.5619,
"step": 7110
},
{
"epoch": 0.8572116542258609,
"grad_norm": 5.625,
"learning_rate": 8.133699109773934e-05,
"loss": 0.6761,
"step": 7120
},
{
"epoch": 0.8584156031784252,
"grad_norm": 4.6875,
"learning_rate": 8.126894288069654e-05,
"loss": 0.4346,
"step": 7130
},
{
"epoch": 0.8596195521309896,
"grad_norm": 5.375,
"learning_rate": 8.120079941303094e-05,
"loss": 0.5643,
"step": 7140
},
{
"epoch": 0.8608235010835541,
"grad_norm": 5.09375,
"learning_rate": 8.113256090231932e-05,
"loss": 0.5733,
"step": 7150
},
{
"epoch": 0.8620274500361185,
"grad_norm": 4.53125,
"learning_rate": 8.106422755642807e-05,
"loss": 0.5154,
"step": 7160
},
{
"epoch": 0.8632313989886828,
"grad_norm": 4.3125,
"learning_rate": 8.099579958351235e-05,
"loss": 0.5001,
"step": 7170
},
{
"epoch": 0.8644353479412473,
"grad_norm": 5.8125,
"learning_rate": 8.092727719201567e-05,
"loss": 0.5154,
"step": 7180
},
{
"epoch": 0.8656392968938117,
"grad_norm": 5.6875,
"learning_rate": 8.085866059066912e-05,
"loss": 0.5119,
"step": 7190
},
{
"epoch": 0.8668432458463761,
"grad_norm": 5.28125,
"learning_rate": 8.078994998849076e-05,
"loss": 0.5805,
"step": 7200
},
{
"epoch": 0.8680471947989405,
"grad_norm": 5.3125,
"learning_rate": 8.072114559478501e-05,
"loss": 0.546,
"step": 7210
},
{
"epoch": 0.8692511437515049,
"grad_norm": 5.03125,
"learning_rate": 8.065224761914196e-05,
"loss": 0.64,
"step": 7220
},
{
"epoch": 0.8704550927040694,
"grad_norm": 5.28125,
"learning_rate": 8.058325627143681e-05,
"loss": 0.5975,
"step": 7230
},
{
"epoch": 0.8716590416566338,
"grad_norm": 4.4375,
"learning_rate": 8.051417176182916e-05,
"loss": 0.5317,
"step": 7240
},
{
"epoch": 0.8728629906091981,
"grad_norm": 4.6875,
"learning_rate": 8.044499430076244e-05,
"loss": 0.5345,
"step": 7250
},
{
"epoch": 0.8740669395617626,
"grad_norm": 5.34375,
"learning_rate": 8.037572409896315e-05,
"loss": 0.5364,
"step": 7260
},
{
"epoch": 0.875270888514327,
"grad_norm": 6.1875,
"learning_rate": 8.030636136744037e-05,
"loss": 0.5909,
"step": 7270
},
{
"epoch": 0.8764748374668914,
"grad_norm": 4.375,
"learning_rate": 8.023690631748498e-05,
"loss": 0.5158,
"step": 7280
},
{
"epoch": 0.8776787864194558,
"grad_norm": 5.59375,
"learning_rate": 8.016735916066913e-05,
"loss": 0.6194,
"step": 7290
},
{
"epoch": 0.8788827353720202,
"grad_norm": 5.53125,
"learning_rate": 8.009772010884553e-05,
"loss": 0.605,
"step": 7300
},
{
"epoch": 0.8800866843245846,
"grad_norm": 5.3125,
"learning_rate": 8.002798937414678e-05,
"loss": 0.3946,
"step": 7310
},
{
"epoch": 0.8812906332771491,
"grad_norm": 7.25,
"learning_rate": 7.99581671689848e-05,
"loss": 0.7105,
"step": 7320
},
{
"epoch": 0.8824945822297134,
"grad_norm": 5.46875,
"learning_rate": 7.988825370605014e-05,
"loss": 0.5163,
"step": 7330
},
{
"epoch": 0.8836985311822779,
"grad_norm": 5.0625,
"learning_rate": 7.981824919831133e-05,
"loss": 0.5501,
"step": 7340
},
{
"epoch": 0.8849024801348423,
"grad_norm": 6.25,
"learning_rate": 7.974815385901426e-05,
"loss": 0.5986,
"step": 7350
},
{
"epoch": 0.8861064290874067,
"grad_norm": 5.09375,
"learning_rate": 7.967796790168145e-05,
"loss": 0.4977,
"step": 7360
},
{
"epoch": 0.8873103780399711,
"grad_norm": 5.59375,
"learning_rate": 7.960769154011155e-05,
"loss": 0.4331,
"step": 7370
},
{
"epoch": 0.8885143269925355,
"grad_norm": 5.0625,
"learning_rate": 7.95373249883785e-05,
"loss": 0.6035,
"step": 7380
},
{
"epoch": 0.8897182759450999,
"grad_norm": 4.75,
"learning_rate": 7.946686846083104e-05,
"loss": 0.4763,
"step": 7390
},
{
"epoch": 0.8909222248976644,
"grad_norm": 6.375,
"learning_rate": 7.939632217209197e-05,
"loss": 0.609,
"step": 7400
},
{
"epoch": 0.8921261738502287,
"grad_norm": 4.53125,
"learning_rate": 7.932568633705752e-05,
"loss": 0.519,
"step": 7410
},
{
"epoch": 0.8933301228027931,
"grad_norm": 5.3125,
"learning_rate": 7.925496117089668e-05,
"loss": 0.5286,
"step": 7420
},
{
"epoch": 0.8945340717553576,
"grad_norm": 4.25,
"learning_rate": 7.91841468890506e-05,
"loss": 0.4768,
"step": 7430
},
{
"epoch": 0.895738020707922,
"grad_norm": 5.84375,
"learning_rate": 7.911324370723183e-05,
"loss": 0.4968,
"step": 7440
},
{
"epoch": 0.8969419696604864,
"grad_norm": 5.4375,
"learning_rate": 7.904225184142378e-05,
"loss": 0.5682,
"step": 7450
},
{
"epoch": 0.8981459186130508,
"grad_norm": 4.78125,
"learning_rate": 7.897117150787999e-05,
"loss": 0.5079,
"step": 7460
},
{
"epoch": 0.8993498675656152,
"grad_norm": 4.875,
"learning_rate": 7.890000292312346e-05,
"loss": 0.553,
"step": 7470
},
{
"epoch": 0.9005538165181797,
"grad_norm": 5.4375,
"learning_rate": 7.882874630394606e-05,
"loss": 0.476,
"step": 7480
},
{
"epoch": 0.901757765470744,
"grad_norm": 4.375,
"learning_rate": 7.87574018674078e-05,
"loss": 0.5101,
"step": 7490
},
{
"epoch": 0.9029617144233084,
"grad_norm": 5.59375,
"learning_rate": 7.868596983083623e-05,
"loss": 0.5443,
"step": 7500
},
{
"epoch": 0.9041656633758729,
"grad_norm": 5.3125,
"learning_rate": 7.861445041182569e-05,
"loss": 0.5673,
"step": 7510
},
{
"epoch": 0.9053696123284373,
"grad_norm": 6.0625,
"learning_rate": 7.854284382823673e-05,
"loss": 0.6815,
"step": 7520
},
{
"epoch": 0.9065735612810016,
"grad_norm": 5.125,
"learning_rate": 7.847115029819547e-05,
"loss": 0.6024,
"step": 7530
},
{
"epoch": 0.9077775102335661,
"grad_norm": 6.0,
"learning_rate": 7.839937004009277e-05,
"loss": 0.5362,
"step": 7540
},
{
"epoch": 0.9089814591861305,
"grad_norm": 5.15625,
"learning_rate": 7.83275032725838e-05,
"loss": 0.5177,
"step": 7550
},
{
"epoch": 0.910185408138695,
"grad_norm": 4.09375,
"learning_rate": 7.825555021458716e-05,
"loss": 0.5492,
"step": 7560
},
{
"epoch": 0.9113893570912593,
"grad_norm": 4.71875,
"learning_rate": 7.818351108528438e-05,
"loss": 0.6011,
"step": 7570
},
{
"epoch": 0.9125933060438237,
"grad_norm": 5.21875,
"learning_rate": 7.81113861041191e-05,
"loss": 0.5642,
"step": 7580
},
{
"epoch": 0.9137972549963882,
"grad_norm": 5.1875,
"learning_rate": 7.803917549079655e-05,
"loss": 0.5859,
"step": 7590
},
{
"epoch": 0.9150012039489526,
"grad_norm": 5.0,
"learning_rate": 7.796687946528278e-05,
"loss": 0.5527,
"step": 7600
},
{
"epoch": 0.9162051529015169,
"grad_norm": 3.546875,
"learning_rate": 7.7894498247804e-05,
"loss": 0.4797,
"step": 7610
},
{
"epoch": 0.9174091018540814,
"grad_norm": 4.3125,
"learning_rate": 7.782203205884598e-05,
"loss": 0.4499,
"step": 7620
},
{
"epoch": 0.9186130508066458,
"grad_norm": 4.5625,
"learning_rate": 7.774948111915328e-05,
"loss": 0.5914,
"step": 7630
},
{
"epoch": 0.9198169997592102,
"grad_norm": 5.1875,
"learning_rate": 7.767684564972863e-05,
"loss": 0.5344,
"step": 7640
},
{
"epoch": 0.9210209487117746,
"grad_norm": 5.25,
"learning_rate": 7.76041258718323e-05,
"loss": 0.556,
"step": 7650
},
{
"epoch": 0.922224897664339,
"grad_norm": 5.0625,
"learning_rate": 7.753132200698133e-05,
"loss": 0.6187,
"step": 7660
},
{
"epoch": 0.9234288466169035,
"grad_norm": 8.4375,
"learning_rate": 7.745843427694892e-05,
"loss": 0.606,
"step": 7670
},
{
"epoch": 0.9246327955694679,
"grad_norm": 2.96875,
"learning_rate": 7.738546290376373e-05,
"loss": 0.4342,
"step": 7680
},
{
"epoch": 0.9258367445220322,
"grad_norm": 5.40625,
"learning_rate": 7.731240810970922e-05,
"loss": 0.5671,
"step": 7690
},
{
"epoch": 0.9270406934745967,
"grad_norm": 4.5,
"learning_rate": 7.723927011732297e-05,
"loss": 0.5316,
"step": 7700
},
{
"epoch": 0.9282446424271611,
"grad_norm": 6.4375,
"learning_rate": 7.716604914939598e-05,
"loss": 0.5789,
"step": 7710
},
{
"epoch": 0.9294485913797255,
"grad_norm": 6.65625,
"learning_rate": 7.709274542897201e-05,
"loss": 0.5105,
"step": 7720
},
{
"epoch": 0.93065254033229,
"grad_norm": 5.15625,
"learning_rate": 7.701935917934693e-05,
"loss": 0.6031,
"step": 7730
},
{
"epoch": 0.9318564892848543,
"grad_norm": 4.28125,
"learning_rate": 7.694589062406796e-05,
"loss": 0.523,
"step": 7740
},
{
"epoch": 0.9330604382374187,
"grad_norm": 5.0,
"learning_rate": 7.687233998693309e-05,
"loss": 0.4716,
"step": 7750
},
{
"epoch": 0.9342643871899832,
"grad_norm": 4.3125,
"learning_rate": 7.679870749199028e-05,
"loss": 0.4669,
"step": 7760
},
{
"epoch": 0.9354683361425475,
"grad_norm": 8.4375,
"learning_rate": 7.672499336353687e-05,
"loss": 0.7322,
"step": 7770
},
{
"epoch": 0.936672285095112,
"grad_norm": 5.28125,
"learning_rate": 7.665119782611894e-05,
"loss": 0.5542,
"step": 7780
},
{
"epoch": 0.9378762340476764,
"grad_norm": 4.4375,
"learning_rate": 7.657732110453043e-05,
"loss": 0.6595,
"step": 7790
},
{
"epoch": 0.9390801830002408,
"grad_norm": 4.53125,
"learning_rate": 7.650336342381269e-05,
"loss": 0.5459,
"step": 7800
},
{
"epoch": 0.9402841319528052,
"grad_norm": 5.09375,
"learning_rate": 7.642932500925361e-05,
"loss": 0.5241,
"step": 7810
},
{
"epoch": 0.9414880809053696,
"grad_norm": 7.125,
"learning_rate": 7.635520608638707e-05,
"loss": 0.5525,
"step": 7820
},
{
"epoch": 0.942692029857934,
"grad_norm": 6.03125,
"learning_rate": 7.628100688099215e-05,
"loss": 0.6514,
"step": 7830
},
{
"epoch": 0.9438959788104985,
"grad_norm": 5.3125,
"learning_rate": 7.620672761909248e-05,
"loss": 0.5113,
"step": 7840
},
{
"epoch": 0.9450999277630628,
"grad_norm": 5.1875,
"learning_rate": 7.613236852695562e-05,
"loss": 0.5192,
"step": 7850
},
{
"epoch": 0.9463038767156272,
"grad_norm": 4.75,
"learning_rate": 7.605792983109222e-05,
"loss": 0.5733,
"step": 7860
},
{
"epoch": 0.9475078256681917,
"grad_norm": 5.78125,
"learning_rate": 7.598341175825547e-05,
"loss": 0.5582,
"step": 7870
},
{
"epoch": 0.9487117746207561,
"grad_norm": 4.1875,
"learning_rate": 7.590881453544034e-05,
"loss": 0.5084,
"step": 7880
},
{
"epoch": 0.9499157235733204,
"grad_norm": 4.40625,
"learning_rate": 7.58341383898829e-05,
"loss": 0.4794,
"step": 7890
},
{
"epoch": 0.9511196725258849,
"grad_norm": 5.875,
"learning_rate": 7.575938354905965e-05,
"loss": 0.5453,
"step": 7900
},
{
"epoch": 0.9523236214784493,
"grad_norm": 6.5625,
"learning_rate": 7.568455024068681e-05,
"loss": 0.6449,
"step": 7910
},
{
"epoch": 0.9535275704310138,
"grad_norm": 5.21875,
"learning_rate": 7.56096386927196e-05,
"loss": 0.5068,
"step": 7920
},
{
"epoch": 0.9547315193835781,
"grad_norm": 5.71875,
"learning_rate": 7.55346491333516e-05,
"loss": 0.5355,
"step": 7930
},
{
"epoch": 0.9559354683361425,
"grad_norm": 7.21875,
"learning_rate": 7.545958179101399e-05,
"loss": 0.6247,
"step": 7940
},
{
"epoch": 0.957139417288707,
"grad_norm": 6.1875,
"learning_rate": 7.538443689437492e-05,
"loss": 0.5096,
"step": 7950
},
{
"epoch": 0.9583433662412714,
"grad_norm": 5.65625,
"learning_rate": 7.53092146723388e-05,
"loss": 0.5115,
"step": 7960
},
{
"epoch": 0.9595473151938357,
"grad_norm": 5.875,
"learning_rate": 7.523391535404553e-05,
"loss": 0.526,
"step": 7970
},
{
"epoch": 0.9607512641464002,
"grad_norm": 6.34375,
"learning_rate": 7.515853916886993e-05,
"loss": 0.6069,
"step": 7980
},
{
"epoch": 0.9619552130989646,
"grad_norm": 5.1875,
"learning_rate": 7.508308634642091e-05,
"loss": 0.5304,
"step": 7990
},
{
"epoch": 0.963159162051529,
"grad_norm": 6.46875,
"learning_rate": 7.500755711654084e-05,
"loss": 0.6276,
"step": 8000
},
{
"epoch": 0.963159162051529,
"eval/nano_beir.arguana": 0.4435129729684945,
"step": 8000
},
{
"epoch": 0.963159162051529,
"eval/nano_beir.climate_fever": 0.2338541130261396,
"step": 8000
},
{
"epoch": 0.963159162051529,
"eval/nano_beir.dbpedia_entity": 0.3026479712325987,
"step": 8000
},
{
"epoch": 0.963159162051529,
"eval/nano_beir.fever": 0.597370319352728,
"step": 8000
},
{
"epoch": 0.963159162051529,
"eval/nano_beir.fiqa": 0.4424709911666331,
"step": 8000
},
{
"epoch": 0.963159162051529,
"eval/nano_beir.hotpotqa": 0.6627172049641262,
"step": 8000
},
{
"epoch": 0.963159162051529,
"eval/nano_beir.nfcorpus": 0.3459841740142863,
"step": 8000
},
{
"epoch": 0.963159162051529,
"eval/nano_beir.nq": 0.41742628142572313,
"step": 8000
},
{
"epoch": 0.963159162051529,
"eval/nano_beir.quora": 0.8464612465256194,
"step": 8000
},
{
"epoch": 0.963159162051529,
"eval/nano_beir.scidocs": 0.2538029143015277,
"step": 8000
},
{
"epoch": 0.963159162051529,
"eval/nano_beir.scifact": 0.7493179432819632,
"step": 8000
},
{
"epoch": 0.963159162051529,
"eval/nano_beir.webis_touche2020": 0.28785117199297444,
"step": 8000
},
{
"epoch": 0.963159162051529,
"eval/avg": 0.4652847753544012,
"step": 8000
},
{
"epoch": 0.9643631110040934,
"grad_norm": 6.84375,
"learning_rate": 7.493195170930487e-05,
"loss": 0.4977,
"step": 8010
},
{
"epoch": 0.9655670599566578,
"grad_norm": 5.6875,
"learning_rate": 7.485627035502018e-05,
"loss": 0.5237,
"step": 8020
},
{
"epoch": 0.9667710089092223,
"grad_norm": 5.625,
"learning_rate": 7.47805132842253e-05,
"loss": 0.5604,
"step": 8030
},
{
"epoch": 0.9679749578617867,
"grad_norm": 5.59375,
"learning_rate": 7.470468072768941e-05,
"loss": 0.5526,
"step": 8040
},
{
"epoch": 0.969178906814351,
"grad_norm": 5.25,
"learning_rate": 7.462877291641164e-05,
"loss": 0.5168,
"step": 8050
},
{
"epoch": 0.9703828557669155,
"grad_norm": 7.15625,
"learning_rate": 7.45527900816203e-05,
"loss": 0.6167,
"step": 8060
},
{
"epoch": 0.9715868047194799,
"grad_norm": 5.0625,
"learning_rate": 7.447673245477234e-05,
"loss": 0.5086,
"step": 8070
},
{
"epoch": 0.9727907536720443,
"grad_norm": 4.6875,
"learning_rate": 7.440060026755244e-05,
"loss": 0.5153,
"step": 8080
},
{
"epoch": 0.9739947026246087,
"grad_norm": 4.875,
"learning_rate": 7.432439375187247e-05,
"loss": 0.4041,
"step": 8090
},
{
"epoch": 0.9751986515771731,
"grad_norm": 5.375,
"learning_rate": 7.424811313987068e-05,
"loss": 0.4805,
"step": 8100
},
{
"epoch": 0.9764026005297375,
"grad_norm": 7.0625,
"learning_rate": 7.417175866391104e-05,
"loss": 0.5984,
"step": 8110
},
{
"epoch": 0.977606549482302,
"grad_norm": 5.1875,
"learning_rate": 7.409533055658253e-05,
"loss": 0.5615,
"step": 8120
},
{
"epoch": 0.9788104984348663,
"grad_norm": 6.375,
"learning_rate": 7.401882905069843e-05,
"loss": 0.5285,
"step": 8130
},
{
"epoch": 0.9800144473874308,
"grad_norm": 3.453125,
"learning_rate": 7.394225437929559e-05,
"loss": 0.4706,
"step": 8140
},
{
"epoch": 0.9812183963399952,
"grad_norm": 5.375,
"learning_rate": 7.386560677563374e-05,
"loss": 0.5216,
"step": 8150
},
{
"epoch": 0.9824223452925596,
"grad_norm": 3.734375,
"learning_rate": 7.378888647319474e-05,
"loss": 0.5955,
"step": 8160
},
{
"epoch": 0.983626294245124,
"grad_norm": 5.84375,
"learning_rate": 7.3712093705682e-05,
"loss": 0.5387,
"step": 8170
},
{
"epoch": 0.9848302431976884,
"grad_norm": 6.25,
"learning_rate": 7.363522870701953e-05,
"loss": 0.5374,
"step": 8180
},
{
"epoch": 0.9860341921502528,
"grad_norm": 6.09375,
"learning_rate": 7.355829171135153e-05,
"loss": 0.5325,
"step": 8190
},
{
"epoch": 0.9872381411028173,
"grad_norm": 4.84375,
"learning_rate": 7.348128295304136e-05,
"loss": 0.5604,
"step": 8200
},
{
"epoch": 0.9884420900553816,
"grad_norm": 5.25,
"learning_rate": 7.34042026666711e-05,
"loss": 0.5857,
"step": 8210
},
{
"epoch": 0.989646039007946,
"grad_norm": 5.40625,
"learning_rate": 7.332705108704064e-05,
"loss": 0.4678,
"step": 8220
},
{
"epoch": 0.9908499879605105,
"grad_norm": 6.28125,
"learning_rate": 7.324982844916709e-05,
"loss": 0.6176,
"step": 8230
},
{
"epoch": 0.9920539369130749,
"grad_norm": 4.90625,
"learning_rate": 7.317253498828399e-05,
"loss": 0.59,
"step": 8240
},
{
"epoch": 0.9932578858656393,
"grad_norm": 5.25,
"learning_rate": 7.309517093984063e-05,
"loss": 0.5609,
"step": 8250
},
{
"epoch": 0.9944618348182037,
"grad_norm": 4.84375,
"learning_rate": 7.301773653950133e-05,
"loss": 0.4433,
"step": 8260
},
{
"epoch": 0.9956657837707681,
"grad_norm": 5.96875,
"learning_rate": 7.294023202314466e-05,
"loss": 0.5165,
"step": 8270
},
{
"epoch": 0.9968697327233326,
"grad_norm": 7.03125,
"learning_rate": 7.286265762686287e-05,
"loss": 0.6314,
"step": 8280
},
{
"epoch": 0.9980736816758969,
"grad_norm": 4.53125,
"learning_rate": 7.2785013586961e-05,
"loss": 0.5256,
"step": 8290
},
{
"epoch": 0.9992776306284613,
"grad_norm": 5.4375,
"learning_rate": 7.270730013995626e-05,
"loss": 0.5578,
"step": 8300
},
{
"epoch": 1.0004815795810258,
"grad_norm": 4.625,
"learning_rate": 7.262951752257728e-05,
"loss": 0.5762,
"step": 8310
},
{
"epoch": 1.0016855285335902,
"grad_norm": 6.125,
"learning_rate": 7.255166597176342e-05,
"loss": 0.4986,
"step": 8320
},
{
"epoch": 1.0028894774861545,
"grad_norm": 5.75,
"learning_rate": 7.2473745724664e-05,
"loss": 0.5158,
"step": 8330
},
{
"epoch": 1.004093426438719,
"grad_norm": 5.53125,
"learning_rate": 7.239575701863758e-05,
"loss": 0.4551,
"step": 8340
},
{
"epoch": 1.0052973753912835,
"grad_norm": 5.46875,
"learning_rate": 7.231770009125133e-05,
"loss": 0.5597,
"step": 8350
},
{
"epoch": 1.0065013243438479,
"grad_norm": 4.5625,
"learning_rate": 7.223957518028015e-05,
"loss": 0.4733,
"step": 8360
},
{
"epoch": 1.0077052732964122,
"grad_norm": 5.5625,
"learning_rate": 7.216138252370609e-05,
"loss": 0.5646,
"step": 8370
},
{
"epoch": 1.0089092222489766,
"grad_norm": 5.96875,
"learning_rate": 7.208312235971753e-05,
"loss": 0.6426,
"step": 8380
},
{
"epoch": 1.010113171201541,
"grad_norm": 5.28125,
"learning_rate": 7.200479492670851e-05,
"loss": 0.5593,
"step": 8390
},
{
"epoch": 1.0113171201541056,
"grad_norm": 6.25,
"learning_rate": 7.192640046327795e-05,
"loss": 0.5158,
"step": 8400
},
{
"epoch": 1.01252106910667,
"grad_norm": 4.59375,
"learning_rate": 7.184793920822902e-05,
"loss": 0.5326,
"step": 8410
},
{
"epoch": 1.0137250180592343,
"grad_norm": 9.625,
"learning_rate": 7.17694114005683e-05,
"loss": 0.5874,
"step": 8420
},
{
"epoch": 1.0149289670117987,
"grad_norm": 6.9375,
"learning_rate": 7.169081727950509e-05,
"loss": 0.5065,
"step": 8430
},
{
"epoch": 1.016132915964363,
"grad_norm": 5.375,
"learning_rate": 7.161215708445073e-05,
"loss": 0.5399,
"step": 8440
},
{
"epoch": 1.0173368649169274,
"grad_norm": 4.90625,
"learning_rate": 7.15334310550178e-05,
"loss": 0.5065,
"step": 8450
},
{
"epoch": 1.018540813869492,
"grad_norm": 5.46875,
"learning_rate": 7.145463943101946e-05,
"loss": 0.5881,
"step": 8460
},
{
"epoch": 1.0197447628220564,
"grad_norm": 4.6875,
"learning_rate": 7.137578245246865e-05,
"loss": 0.5602,
"step": 8470
},
{
"epoch": 1.0209487117746208,
"grad_norm": 4.875,
"learning_rate": 7.12968603595774e-05,
"loss": 0.5295,
"step": 8480
},
{
"epoch": 1.0221526607271851,
"grad_norm": 4.25,
"learning_rate": 7.12178733927561e-05,
"loss": 0.4936,
"step": 8490
},
{
"epoch": 1.0233566096797495,
"grad_norm": 5.84375,
"learning_rate": 7.113882179261273e-05,
"loss": 0.5008,
"step": 8500
},
{
"epoch": 1.0245605586323139,
"grad_norm": 6.5,
"learning_rate": 7.105970579995221e-05,
"loss": 0.465,
"step": 8510
},
{
"epoch": 1.0257645075848785,
"grad_norm": 4.84375,
"learning_rate": 7.098052565577553e-05,
"loss": 0.5243,
"step": 8520
},
{
"epoch": 1.0269684565374428,
"grad_norm": 5.71875,
"learning_rate": 7.090128160127919e-05,
"loss": 0.5907,
"step": 8530
},
{
"epoch": 1.0281724054900072,
"grad_norm": 5.75,
"learning_rate": 7.082197387785429e-05,
"loss": 0.5077,
"step": 8540
},
{
"epoch": 1.0293763544425716,
"grad_norm": 3.25,
"learning_rate": 7.07426027270859e-05,
"loss": 0.4881,
"step": 8550
},
{
"epoch": 1.030580303395136,
"grad_norm": 4.4375,
"learning_rate": 7.066316839075234e-05,
"loss": 0.4338,
"step": 8560
},
{
"epoch": 1.0317842523477005,
"grad_norm": 4.875,
"learning_rate": 7.058367111082433e-05,
"loss": 0.4917,
"step": 8570
},
{
"epoch": 1.032988201300265,
"grad_norm": 4.65625,
"learning_rate": 7.050411112946442e-05,
"loss": 0.6375,
"step": 8580
},
{
"epoch": 1.0341921502528293,
"grad_norm": 5.21875,
"learning_rate": 7.042448868902611e-05,
"loss": 0.5467,
"step": 8590
},
{
"epoch": 1.0353960992053937,
"grad_norm": 4.8125,
"learning_rate": 7.034480403205312e-05,
"loss": 0.5345,
"step": 8600
},
{
"epoch": 1.036600048157958,
"grad_norm": 4.53125,
"learning_rate": 7.026505740127878e-05,
"loss": 0.509,
"step": 8610
},
{
"epoch": 1.0378039971105224,
"grad_norm": 6.15625,
"learning_rate": 7.018524903962511e-05,
"loss": 0.5413,
"step": 8620
},
{
"epoch": 1.039007946063087,
"grad_norm": 3.984375,
"learning_rate": 7.010537919020228e-05,
"loss": 0.4464,
"step": 8630
},
{
"epoch": 1.0402118950156514,
"grad_norm": 4.5,
"learning_rate": 7.002544809630764e-05,
"loss": 0.4836,
"step": 8640
},
{
"epoch": 1.0414158439682157,
"grad_norm": 4.40625,
"learning_rate": 6.994545600142521e-05,
"loss": 0.5262,
"step": 8650
},
{
"epoch": 1.04261979292078,
"grad_norm": 3.671875,
"learning_rate": 6.986540314922475e-05,
"loss": 0.4739,
"step": 8660
},
{
"epoch": 1.0438237418733445,
"grad_norm": 7.96875,
"learning_rate": 6.978528978356117e-05,
"loss": 0.5743,
"step": 8670
},
{
"epoch": 1.045027690825909,
"grad_norm": 6.3125,
"learning_rate": 6.970511614847364e-05,
"loss": 0.5369,
"step": 8680
},
{
"epoch": 1.0462316397784734,
"grad_norm": 5.5625,
"learning_rate": 6.962488248818497e-05,
"loss": 0.5339,
"step": 8690
},
{
"epoch": 1.0474355887310378,
"grad_norm": 6.0625,
"learning_rate": 6.954458904710082e-05,
"loss": 0.4756,
"step": 8700
},
{
"epoch": 1.0486395376836022,
"grad_norm": 5.9375,
"learning_rate": 6.94642360698089e-05,
"loss": 0.5706,
"step": 8710
},
{
"epoch": 1.0498434866361666,
"grad_norm": 6.03125,
"learning_rate": 6.938382380107833e-05,
"loss": 0.5521,
"step": 8720
},
{
"epoch": 1.051047435588731,
"grad_norm": 5.84375,
"learning_rate": 6.930335248585884e-05,
"loss": 0.5216,
"step": 8730
},
{
"epoch": 1.0522513845412955,
"grad_norm": 6.125,
"learning_rate": 6.922282236928001e-05,
"loss": 0.5819,
"step": 8740
},
{
"epoch": 1.05345533349386,
"grad_norm": 7.25,
"learning_rate": 6.914223369665051e-05,
"loss": 0.4959,
"step": 8750
},
{
"epoch": 1.0546592824464243,
"grad_norm": 5.40625,
"learning_rate": 6.906158671345746e-05,
"loss": 0.5474,
"step": 8760
},
{
"epoch": 1.0558632313989886,
"grad_norm": 5.09375,
"learning_rate": 6.898088166536552e-05,
"loss": 0.5346,
"step": 8770
},
{
"epoch": 1.057067180351553,
"grad_norm": 5.21875,
"learning_rate": 6.890011879821627e-05,
"loss": 0.4822,
"step": 8780
},
{
"epoch": 1.0582711293041176,
"grad_norm": 6.15625,
"learning_rate": 6.881929835802743e-05,
"loss": 0.4604,
"step": 8790
},
{
"epoch": 1.059475078256682,
"grad_norm": 5.4375,
"learning_rate": 6.873842059099205e-05,
"loss": 0.4587,
"step": 8800
},
{
"epoch": 1.0606790272092463,
"grad_norm": 5.875,
"learning_rate": 6.865748574347786e-05,
"loss": 0.4671,
"step": 8810
},
{
"epoch": 1.0618829761618107,
"grad_norm": 3.53125,
"learning_rate": 6.857649406202641e-05,
"loss": 0.4866,
"step": 8820
},
{
"epoch": 1.063086925114375,
"grad_norm": 6.5,
"learning_rate": 6.849544579335244e-05,
"loss": 0.4871,
"step": 8830
},
{
"epoch": 1.0642908740669395,
"grad_norm": 4.84375,
"learning_rate": 6.841434118434302e-05,
"loss": 0.5286,
"step": 8840
},
{
"epoch": 1.065494823019504,
"grad_norm": 5.0,
"learning_rate": 6.833318048205684e-05,
"loss": 0.5464,
"step": 8850
},
{
"epoch": 1.0666987719720684,
"grad_norm": 4.875,
"learning_rate": 6.825196393372351e-05,
"loss": 0.5389,
"step": 8860
},
{
"epoch": 1.0679027209246328,
"grad_norm": 6.65625,
"learning_rate": 6.817069178674267e-05,
"loss": 0.559,
"step": 8870
},
{
"epoch": 1.0691066698771972,
"grad_norm": 4.4375,
"learning_rate": 6.808936428868343e-05,
"loss": 0.5231,
"step": 8880
},
{
"epoch": 1.0703106188297615,
"grad_norm": 6.6875,
"learning_rate": 6.80079816872834e-05,
"loss": 0.5876,
"step": 8890
},
{
"epoch": 1.0715145677823261,
"grad_norm": 4.75,
"learning_rate": 6.792654423044813e-05,
"loss": 0.4508,
"step": 8900
},
{
"epoch": 1.0727185167348905,
"grad_norm": 4.5625,
"learning_rate": 6.784505216625023e-05,
"loss": 0.5662,
"step": 8910
},
{
"epoch": 1.0739224656874549,
"grad_norm": 3.875,
"learning_rate": 6.776350574292866e-05,
"loss": 0.4851,
"step": 8920
},
{
"epoch": 1.0751264146400192,
"grad_norm": 5.53125,
"learning_rate": 6.7681905208888e-05,
"loss": 0.4982,
"step": 8930
},
{
"epoch": 1.0763303635925836,
"grad_norm": 5.96875,
"learning_rate": 6.760025081269756e-05,
"loss": 0.4629,
"step": 8940
},
{
"epoch": 1.077534312545148,
"grad_norm": 6.8125,
"learning_rate": 6.751854280309082e-05,
"loss": 0.5809,
"step": 8950
},
{
"epoch": 1.0787382614977126,
"grad_norm": 5.5625,
"learning_rate": 6.74367814289646e-05,
"loss": 0.5069,
"step": 8960
},
{
"epoch": 1.079942210450277,
"grad_norm": 6.90625,
"learning_rate": 6.735496693937814e-05,
"loss": 0.501,
"step": 8970
},
{
"epoch": 1.0811461594028413,
"grad_norm": 5.21875,
"learning_rate": 6.727309958355262e-05,
"loss": 0.5077,
"step": 8980
},
{
"epoch": 1.0823501083554057,
"grad_norm": 6.34375,
"learning_rate": 6.719117961087018e-05,
"loss": 0.5802,
"step": 8990
},
{
"epoch": 1.08355405730797,
"grad_norm": 4.25,
"learning_rate": 6.710920727087329e-05,
"loss": 0.4715,
"step": 9000
},
{
"epoch": 1.08355405730797,
"eval/nano_beir.arguana": 0.4478891276101664,
"step": 9000
},
{
"epoch": 1.08355405730797,
"eval/nano_beir.climate_fever": 0.21768095572682353,
"step": 9000
},
{
"epoch": 1.08355405730797,
"eval/nano_beir.dbpedia_entity": 0.30809903314530207,
"step": 9000
},
{
"epoch": 1.08355405730797,
"eval/nano_beir.fever": 0.6082167867321827,
"step": 9000
},
{
"epoch": 1.08355405730797,
"eval/nano_beir.fiqa": 0.4350751407965357,
"step": 9000
},
{
"epoch": 1.08355405730797,
"eval/nano_beir.hotpotqa": 0.6646494721523275,
"step": 9000
},
{
"epoch": 1.08355405730797,
"eval/nano_beir.nfcorpus": 0.3555960324290284,
"step": 9000
},
{
"epoch": 1.08355405730797,
"eval/nano_beir.nq": 0.4091867037906162,
"step": 9000
},
{
"epoch": 1.08355405730797,
"eval/nano_beir.quora": 0.8394569706244888,
"step": 9000
},
{
"epoch": 1.08355405730797,
"eval/nano_beir.scidocs": 0.2586556690506404,
"step": 9000
},
{
"epoch": 1.08355405730797,
"eval/nano_beir.scifact": 0.7491458028776218,
"step": 9000
},
{
"epoch": 1.08355405730797,
"eval/nano_beir.webis_touche2020": 0.3009680779309195,
"step": 9000
},
{
"epoch": 1.08355405730797,
"eval/avg": 0.46621831440555445,
"step": 9000
},
{
"epoch": 1.0847580062605346,
"grad_norm": 4.71875,
"learning_rate": 6.702718281326387e-05,
"loss": 0.4288,
"step": 9010
},
{
"epoch": 1.085961955213099,
"grad_norm": 6.21875,
"learning_rate": 6.694510648790269e-05,
"loss": 0.7266,
"step": 9020
},
{
"epoch": 1.0871659041656634,
"grad_norm": 5.03125,
"learning_rate": 6.686297854480843e-05,
"loss": 0.503,
"step": 9030
},
{
"epoch": 1.0883698531182278,
"grad_norm": 5.0625,
"learning_rate": 6.678079923415708e-05,
"loss": 0.5301,
"step": 9040
},
{
"epoch": 1.0895738020707921,
"grad_norm": 5.6875,
"learning_rate": 6.669856880628107e-05,
"loss": 0.4934,
"step": 9050
},
{
"epoch": 1.0907777510233565,
"grad_norm": 3.546875,
"learning_rate": 6.661628751166851e-05,
"loss": 0.5875,
"step": 9060
},
{
"epoch": 1.091981699975921,
"grad_norm": 5.71875,
"learning_rate": 6.653395560096254e-05,
"loss": 0.5581,
"step": 9070
},
{
"epoch": 1.0931856489284855,
"grad_norm": 4.375,
"learning_rate": 6.645157332496039e-05,
"loss": 0.4523,
"step": 9080
},
{
"epoch": 1.0943895978810498,
"grad_norm": 5.53125,
"learning_rate": 6.63691409346128e-05,
"loss": 0.6269,
"step": 9090
},
{
"epoch": 1.0955935468336142,
"grad_norm": 5.78125,
"learning_rate": 6.62866586810231e-05,
"loss": 0.5518,
"step": 9100
},
{
"epoch": 1.0967974957861786,
"grad_norm": 6.5625,
"learning_rate": 6.620412681544656e-05,
"loss": 0.5039,
"step": 9110
},
{
"epoch": 1.0980014447387432,
"grad_norm": 5.21875,
"learning_rate": 6.612154558928955e-05,
"loss": 0.5028,
"step": 9120
},
{
"epoch": 1.0992053936913075,
"grad_norm": 6.125,
"learning_rate": 6.60389152541088e-05,
"loss": 0.4643,
"step": 9130
},
{
"epoch": 1.100409342643872,
"grad_norm": 4.375,
"learning_rate": 6.595623606161064e-05,
"loss": 0.4437,
"step": 9140
},
{
"epoch": 1.1016132915964363,
"grad_norm": 5.125,
"learning_rate": 6.587350826365023e-05,
"loss": 0.4973,
"step": 9150
},
{
"epoch": 1.1028172405490007,
"grad_norm": 4.875,
"learning_rate": 6.579073211223079e-05,
"loss": 0.4953,
"step": 9160
},
{
"epoch": 1.104021189501565,
"grad_norm": 4.96875,
"learning_rate": 6.570790785950284e-05,
"loss": 0.4535,
"step": 9170
},
{
"epoch": 1.1052251384541296,
"grad_norm": 4.0,
"learning_rate": 6.562503575776342e-05,
"loss": 0.4662,
"step": 9180
},
{
"epoch": 1.106429087406694,
"grad_norm": 7.40625,
"learning_rate": 6.55421160594553e-05,
"loss": 0.5784,
"step": 9190
},
{
"epoch": 1.1076330363592584,
"grad_norm": 4.875,
"learning_rate": 6.54591490171663e-05,
"loss": 0.5191,
"step": 9200
},
{
"epoch": 1.1088369853118227,
"grad_norm": 5.65625,
"learning_rate": 6.537613488362837e-05,
"loss": 0.5627,
"step": 9210
},
{
"epoch": 1.110040934264387,
"grad_norm": 6.21875,
"learning_rate": 6.5293073911717e-05,
"loss": 0.4988,
"step": 9220
},
{
"epoch": 1.1112448832169517,
"grad_norm": 4.6875,
"learning_rate": 6.52099663544503e-05,
"loss": 0.5353,
"step": 9230
},
{
"epoch": 1.112448832169516,
"grad_norm": 5.71875,
"learning_rate": 6.51268124649883e-05,
"loss": 0.6123,
"step": 9240
},
{
"epoch": 1.1136527811220804,
"grad_norm": 5.28125,
"learning_rate": 6.504361249663217e-05,
"loss": 0.4849,
"step": 9250
},
{
"epoch": 1.1148567300746448,
"grad_norm": 4.84375,
"learning_rate": 6.496036670282344e-05,
"loss": 0.5687,
"step": 9260
},
{
"epoch": 1.1160606790272092,
"grad_norm": 4.96875,
"learning_rate": 6.487707533714324e-05,
"loss": 0.5215,
"step": 9270
},
{
"epoch": 1.1172646279797736,
"grad_norm": 6.5,
"learning_rate": 6.47937386533115e-05,
"loss": 0.5049,
"step": 9280
},
{
"epoch": 1.1184685769323381,
"grad_norm": 4.59375,
"learning_rate": 6.471035690518624e-05,
"loss": 0.5697,
"step": 9290
},
{
"epoch": 1.1196725258849025,
"grad_norm": 6.3125,
"learning_rate": 6.462693034676271e-05,
"loss": 0.5415,
"step": 9300
},
{
"epoch": 1.1208764748374669,
"grad_norm": 5.1875,
"learning_rate": 6.454345923217267e-05,
"loss": 0.4293,
"step": 9310
},
{
"epoch": 1.1220804237900313,
"grad_norm": 7.28125,
"learning_rate": 6.445994381568361e-05,
"loss": 0.6065,
"step": 9320
},
{
"epoch": 1.1232843727425956,
"grad_norm": 7.34375,
"learning_rate": 6.437638435169798e-05,
"loss": 0.5667,
"step": 9330
},
{
"epoch": 1.1244883216951602,
"grad_norm": 4.625,
"learning_rate": 6.42927810947524e-05,
"loss": 0.496,
"step": 9340
},
{
"epoch": 1.1256922706477246,
"grad_norm": 5.4375,
"learning_rate": 6.420913429951687e-05,
"loss": 0.6485,
"step": 9350
},
{
"epoch": 1.126896219600289,
"grad_norm": 6.96875,
"learning_rate": 6.412544422079407e-05,
"loss": 0.5209,
"step": 9360
},
{
"epoch": 1.1281001685528533,
"grad_norm": 5.125,
"learning_rate": 6.404171111351846e-05,
"loss": 0.4859,
"step": 9370
},
{
"epoch": 1.1293041175054177,
"grad_norm": 5.90625,
"learning_rate": 6.395793523275563e-05,
"loss": 0.5221,
"step": 9380
},
{
"epoch": 1.130508066457982,
"grad_norm": 4.5625,
"learning_rate": 6.387411683370144e-05,
"loss": 0.4907,
"step": 9390
},
{
"epoch": 1.1317120154105467,
"grad_norm": 5.40625,
"learning_rate": 6.379025617168124e-05,
"loss": 0.4682,
"step": 9400
},
{
"epoch": 1.132915964363111,
"grad_norm": 6.59375,
"learning_rate": 6.370635350214919e-05,
"loss": 0.4697,
"step": 9410
},
{
"epoch": 1.1341199133156754,
"grad_norm": 4.40625,
"learning_rate": 6.362240908068733e-05,
"loss": 0.4391,
"step": 9420
},
{
"epoch": 1.1353238622682398,
"grad_norm": 5.28125,
"learning_rate": 6.353842316300498e-05,
"loss": 0.5172,
"step": 9430
},
{
"epoch": 1.1365278112208042,
"grad_norm": 6.03125,
"learning_rate": 6.345439600493775e-05,
"loss": 0.5424,
"step": 9440
},
{
"epoch": 1.1377317601733687,
"grad_norm": 5.84375,
"learning_rate": 6.337032786244699e-05,
"loss": 0.5685,
"step": 9450
},
{
"epoch": 1.1389357091259331,
"grad_norm": 5.3125,
"learning_rate": 6.32862189916188e-05,
"loss": 0.478,
"step": 9460
},
{
"epoch": 1.1401396580784975,
"grad_norm": 4.03125,
"learning_rate": 6.32020696486634e-05,
"loss": 0.5501,
"step": 9470
},
{
"epoch": 1.1413436070310619,
"grad_norm": 6.96875,
"learning_rate": 6.311788008991432e-05,
"loss": 0.6207,
"step": 9480
},
{
"epoch": 1.1425475559836262,
"grad_norm": 15.25,
"learning_rate": 6.303365057182748e-05,
"loss": 0.5221,
"step": 9490
},
{
"epoch": 1.1437515049361906,
"grad_norm": 5.15625,
"learning_rate": 6.294938135098067e-05,
"loss": 0.422,
"step": 9500
},
{
"epoch": 1.1449554538887552,
"grad_norm": 5.15625,
"learning_rate": 6.286507268407251e-05,
"loss": 0.5932,
"step": 9510
},
{
"epoch": 1.1461594028413196,
"grad_norm": 3.828125,
"learning_rate": 6.278072482792185e-05,
"loss": 0.4619,
"step": 9520
},
{
"epoch": 1.147363351793884,
"grad_norm": 5.125,
"learning_rate": 6.269633803946686e-05,
"loss": 0.5775,
"step": 9530
},
{
"epoch": 1.1485673007464483,
"grad_norm": 10.5,
"learning_rate": 6.261191257576435e-05,
"loss": 0.5186,
"step": 9540
},
{
"epoch": 1.1497712496990127,
"grad_norm": 4.75,
"learning_rate": 6.252744869398891e-05,
"loss": 0.4728,
"step": 9550
},
{
"epoch": 1.1509751986515773,
"grad_norm": 5.4375,
"learning_rate": 6.244294665143218e-05,
"loss": 0.5198,
"step": 9560
},
{
"epoch": 1.1521791476041416,
"grad_norm": 4.875,
"learning_rate": 6.235840670550204e-05,
"loss": 0.4947,
"step": 9570
},
{
"epoch": 1.153383096556706,
"grad_norm": 5.71875,
"learning_rate": 6.227382911372183e-05,
"loss": 0.5112,
"step": 9580
},
{
"epoch": 1.1545870455092704,
"grad_norm": 5.40625,
"learning_rate": 6.218921413372956e-05,
"loss": 0.5281,
"step": 9590
},
{
"epoch": 1.1557909944618348,
"grad_norm": 5.90625,
"learning_rate": 6.210456202327711e-05,
"loss": 0.5837,
"step": 9600
},
{
"epoch": 1.1569949434143991,
"grad_norm": 4.9375,
"learning_rate": 6.201987304022952e-05,
"loss": 0.4458,
"step": 9610
},
{
"epoch": 1.1581988923669637,
"grad_norm": 5.90625,
"learning_rate": 6.193514744256412e-05,
"loss": 0.5455,
"step": 9620
},
{
"epoch": 1.159402841319528,
"grad_norm": 5.875,
"learning_rate": 6.185038548836974e-05,
"loss": 0.5426,
"step": 9630
},
{
"epoch": 1.1606067902720925,
"grad_norm": 4.84375,
"learning_rate": 6.176558743584602e-05,
"loss": 0.4871,
"step": 9640
},
{
"epoch": 1.1618107392246568,
"grad_norm": 4.4375,
"learning_rate": 6.168075354330252e-05,
"loss": 0.4667,
"step": 9650
},
{
"epoch": 1.1630146881772212,
"grad_norm": 4.8125,
"learning_rate": 6.159588406915803e-05,
"loss": 0.5099,
"step": 9660
},
{
"epoch": 1.1642186371297858,
"grad_norm": 3.90625,
"learning_rate": 6.151097927193962e-05,
"loss": 0.4835,
"step": 9670
},
{
"epoch": 1.1654225860823502,
"grad_norm": 5.53125,
"learning_rate": 6.142603941028208e-05,
"loss": 0.5084,
"step": 9680
},
{
"epoch": 1.1666265350349145,
"grad_norm": 4.15625,
"learning_rate": 6.134106474292693e-05,
"loss": 0.4638,
"step": 9690
},
{
"epoch": 1.167830483987479,
"grad_norm": 4.03125,
"learning_rate": 6.125605552872176e-05,
"loss": 0.5356,
"step": 9700
},
{
"epoch": 1.1690344329400433,
"grad_norm": 5.46875,
"learning_rate": 6.117101202661935e-05,
"loss": 0.5472,
"step": 9710
},
{
"epoch": 1.1702383818926076,
"grad_norm": 5.28125,
"learning_rate": 6.1085934495677e-05,
"loss": 0.5299,
"step": 9720
},
{
"epoch": 1.1714423308451722,
"grad_norm": 5.15625,
"learning_rate": 6.10008231950556e-05,
"loss": 0.4516,
"step": 9730
},
{
"epoch": 1.1726462797977366,
"grad_norm": 4.6875,
"learning_rate": 6.0915678384018924e-05,
"loss": 0.4285,
"step": 9740
},
{
"epoch": 1.173850228750301,
"grad_norm": 4.1875,
"learning_rate": 6.083050032193286e-05,
"loss": 0.4627,
"step": 9750
},
{
"epoch": 1.1750541777028654,
"grad_norm": 4.0625,
"learning_rate": 6.074528926826453e-05,
"loss": 0.4609,
"step": 9760
},
{
"epoch": 1.1762581266554297,
"grad_norm": 5.21875,
"learning_rate": 6.0660045482581594e-05,
"loss": 0.5352,
"step": 9770
},
{
"epoch": 1.1774620756079943,
"grad_norm": 8.875,
"learning_rate": 6.0574769224551406e-05,
"loss": 0.5907,
"step": 9780
},
{
"epoch": 1.1786660245605587,
"grad_norm": 4.375,
"learning_rate": 6.048946075394023e-05,
"loss": 0.524,
"step": 9790
},
{
"epoch": 1.179869973513123,
"grad_norm": 4.9375,
"learning_rate": 6.040412033061248e-05,
"loss": 0.5806,
"step": 9800
},
{
"epoch": 1.1810739224656874,
"grad_norm": 3.890625,
"learning_rate": 6.031874821452985e-05,
"loss": 0.5066,
"step": 9810
},
{
"epoch": 1.1822778714182518,
"grad_norm": 4.71875,
"learning_rate": 6.0233344665750656e-05,
"loss": 0.5188,
"step": 9820
},
{
"epoch": 1.1834818203708162,
"grad_norm": 6.21875,
"learning_rate": 6.014790994442888e-05,
"loss": 0.5018,
"step": 9830
},
{
"epoch": 1.1846857693233808,
"grad_norm": 7.0625,
"learning_rate": 6.0062444310813525e-05,
"loss": 0.4706,
"step": 9840
},
{
"epoch": 1.1858897182759451,
"grad_norm": 5.375,
"learning_rate": 5.9976948025247724e-05,
"loss": 0.6273,
"step": 9850
},
{
"epoch": 1.1870936672285095,
"grad_norm": 6.25,
"learning_rate": 5.9891421348167985e-05,
"loss": 0.4983,
"step": 9860
},
{
"epoch": 1.1882976161810739,
"grad_norm": 4.5,
"learning_rate": 5.980586454010341e-05,
"loss": 0.4992,
"step": 9870
},
{
"epoch": 1.1895015651336383,
"grad_norm": 4.3125,
"learning_rate": 5.9720277861674856e-05,
"loss": 0.4477,
"step": 9880
},
{
"epoch": 1.1907055140862028,
"grad_norm": 5.59375,
"learning_rate": 5.9634661573594205e-05,
"loss": 0.427,
"step": 9890
},
{
"epoch": 1.1919094630387672,
"grad_norm": 6.03125,
"learning_rate": 5.9549015936663524e-05,
"loss": 0.4641,
"step": 9900
},
{
"epoch": 1.1931134119913316,
"grad_norm": 7.15625,
"learning_rate": 5.946334121177425e-05,
"loss": 0.53,
"step": 9910
},
{
"epoch": 1.194317360943896,
"grad_norm": 5.71875,
"learning_rate": 5.937763765990647e-05,
"loss": 0.5573,
"step": 9920
},
{
"epoch": 1.1955213098964603,
"grad_norm": 5.8125,
"learning_rate": 5.929190554212807e-05,
"loss": 0.4941,
"step": 9930
},
{
"epoch": 1.1967252588490247,
"grad_norm": 5.1875,
"learning_rate": 5.920614511959395e-05,
"loss": 0.5426,
"step": 9940
},
{
"epoch": 1.1979292078015893,
"grad_norm": 5.6875,
"learning_rate": 5.9120356653545215e-05,
"loss": 0.4803,
"step": 9950
},
{
"epoch": 1.1991331567541537,
"grad_norm": 4.4375,
"learning_rate": 5.9034540405308424e-05,
"loss": 0.5357,
"step": 9960
},
{
"epoch": 1.200337105706718,
"grad_norm": 4.0,
"learning_rate": 5.8948696636294744e-05,
"loss": 0.5537,
"step": 9970
},
{
"epoch": 1.2015410546592824,
"grad_norm": 6.375,
"learning_rate": 5.8862825607999196e-05,
"loss": 0.6009,
"step": 9980
},
{
"epoch": 1.2027450036118468,
"grad_norm": 5.75,
"learning_rate": 5.87769275819998e-05,
"loss": 0.5499,
"step": 9990
},
{
"epoch": 1.2039489525644114,
"grad_norm": 5.34375,
"learning_rate": 5.869100281995685e-05,
"loss": 0.6009,
"step": 10000
}
],
"logging_steps": 10,
"max_steps": 20000,
"num_input_tokens_seen": 0,
"num_train_epochs": 3,
"save_steps": 5000,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": false
},
"attributes": {}
}
},
"total_flos": 0.0,
"train_batch_size": 16,
"trial_name": null,
"trial_params": null
}