CocoRoF's picture
Training in progress, step 4000, checkpoint
b9e8514 verified
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 1.8744142455482662,
"eval_steps": 250,
"global_step": 4000,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.004686035613870665,
"grad_norm": 2.907787561416626,
"learning_rate": 9.997071227741332e-06,
"loss": 3.3815,
"step": 10
},
{
"epoch": 0.00937207122774133,
"grad_norm": 2.2910118103027344,
"learning_rate": 9.994142455482663e-06,
"loss": 3.3605,
"step": 20
},
{
"epoch": 0.014058106841611996,
"grad_norm": 2.791727066040039,
"learning_rate": 9.991213683223994e-06,
"loss": 3.3338,
"step": 30
},
{
"epoch": 0.01874414245548266,
"grad_norm": 2.881253242492676,
"learning_rate": 9.988284910965324e-06,
"loss": 3.3047,
"step": 40
},
{
"epoch": 0.023430178069353328,
"grad_norm": 3.5495920181274414,
"learning_rate": 9.985356138706655e-06,
"loss": 3.266,
"step": 50
},
{
"epoch": 0.028116213683223992,
"grad_norm": 3.8195812702178955,
"learning_rate": 9.982427366447985e-06,
"loss": 3.2116,
"step": 60
},
{
"epoch": 0.03280224929709466,
"grad_norm": 5.006792068481445,
"learning_rate": 9.979498594189316e-06,
"loss": 3.1271,
"step": 70
},
{
"epoch": 0.03748828491096532,
"grad_norm": 5.206729412078857,
"learning_rate": 9.976569821930647e-06,
"loss": 3.0472,
"step": 80
},
{
"epoch": 0.04217432052483599,
"grad_norm": 6.317724227905273,
"learning_rate": 9.973641049671978e-06,
"loss": 2.9458,
"step": 90
},
{
"epoch": 0.046860356138706656,
"grad_norm": 7.30826997756958,
"learning_rate": 9.97071227741331e-06,
"loss": 2.9002,
"step": 100
},
{
"epoch": 0.05154639175257732,
"grad_norm": 7.05161190032959,
"learning_rate": 9.96778350515464e-06,
"loss": 2.8379,
"step": 110
},
{
"epoch": 0.056232427366447985,
"grad_norm": 12.389013290405273,
"learning_rate": 9.964854732895972e-06,
"loss": 2.7637,
"step": 120
},
{
"epoch": 0.06091846298031865,
"grad_norm": 19.661762237548828,
"learning_rate": 9.961925960637301e-06,
"loss": 2.7413,
"step": 130
},
{
"epoch": 0.06560449859418932,
"grad_norm": 7.9712018966674805,
"learning_rate": 9.958997188378632e-06,
"loss": 2.6953,
"step": 140
},
{
"epoch": 0.07029053420805999,
"grad_norm": 44.79791259765625,
"learning_rate": 9.956068416119962e-06,
"loss": 2.6795,
"step": 150
},
{
"epoch": 0.07497656982193064,
"grad_norm": 7.748485565185547,
"learning_rate": 9.953139643861293e-06,
"loss": 2.6179,
"step": 160
},
{
"epoch": 0.07966260543580131,
"grad_norm": 7.135361194610596,
"learning_rate": 9.950210871602624e-06,
"loss": 2.5714,
"step": 170
},
{
"epoch": 0.08434864104967198,
"grad_norm": 5.464244365692139,
"learning_rate": 9.947282099343956e-06,
"loss": 2.4817,
"step": 180
},
{
"epoch": 0.08903467666354264,
"grad_norm": 10.304727554321289,
"learning_rate": 9.944353327085287e-06,
"loss": 2.3939,
"step": 190
},
{
"epoch": 0.09372071227741331,
"grad_norm": 8.390380859375,
"learning_rate": 9.941424554826618e-06,
"loss": 2.3162,
"step": 200
},
{
"epoch": 0.09840674789128398,
"grad_norm": 7.206277847290039,
"learning_rate": 9.938495782567949e-06,
"loss": 2.2413,
"step": 210
},
{
"epoch": 0.10309278350515463,
"grad_norm": 10.72529411315918,
"learning_rate": 9.935567010309279e-06,
"loss": 2.1816,
"step": 220
},
{
"epoch": 0.1077788191190253,
"grad_norm": 8.411327362060547,
"learning_rate": 9.93263823805061e-06,
"loss": 2.0204,
"step": 230
},
{
"epoch": 0.11246485473289597,
"grad_norm": 9.118602752685547,
"learning_rate": 9.929709465791941e-06,
"loss": 1.9329,
"step": 240
},
{
"epoch": 0.11715089034676664,
"grad_norm": 11.883502960205078,
"learning_rate": 9.92678069353327e-06,
"loss": 1.8041,
"step": 250
},
{
"epoch": 0.11715089034676664,
"eval_loss": 0.20095524191856384,
"eval_pearson_cosine": 0.5629603652959432,
"eval_pearson_dot": 0.32442021258601983,
"eval_pearson_euclidean": 0.5948642130310873,
"eval_pearson_manhattan": 0.5931866084570743,
"eval_runtime": 46.3498,
"eval_samples_per_second": 32.363,
"eval_spearman_cosine": 0.5645428688364399,
"eval_spearman_dot": 0.3123519595505677,
"eval_spearman_euclidean": 0.5966715855304487,
"eval_spearman_manhattan": 0.5951499296436052,
"eval_steps_per_second": 32.363,
"step": 250
},
{
"epoch": 0.1218369259606373,
"grad_norm": 9.455839157104492,
"learning_rate": 9.923851921274602e-06,
"loss": 1.7175,
"step": 260
},
{
"epoch": 0.12652296157450796,
"grad_norm": 9.907763481140137,
"learning_rate": 9.920923149015933e-06,
"loss": 1.5752,
"step": 270
},
{
"epoch": 0.13120899718837864,
"grad_norm": 10.268372535705566,
"learning_rate": 9.917994376757264e-06,
"loss": 1.5905,
"step": 280
},
{
"epoch": 0.1358950328022493,
"grad_norm": 12.264440536499023,
"learning_rate": 9.915065604498595e-06,
"loss": 1.4994,
"step": 290
},
{
"epoch": 0.14058106841611998,
"grad_norm": 10.21927547454834,
"learning_rate": 9.912136832239926e-06,
"loss": 1.4741,
"step": 300
},
{
"epoch": 0.14526710402999063,
"grad_norm": 12.204063415527344,
"learning_rate": 9.909208059981256e-06,
"loss": 1.3685,
"step": 310
},
{
"epoch": 0.14995313964386128,
"grad_norm": 8.701486587524414,
"learning_rate": 9.906279287722587e-06,
"loss": 1.3407,
"step": 320
},
{
"epoch": 0.15463917525773196,
"grad_norm": 11.478012084960938,
"learning_rate": 9.903350515463918e-06,
"loss": 1.3996,
"step": 330
},
{
"epoch": 0.15932521087160262,
"grad_norm": 8.862137794494629,
"learning_rate": 9.90042174320525e-06,
"loss": 1.2921,
"step": 340
},
{
"epoch": 0.1640112464854733,
"grad_norm": 8.181413650512695,
"learning_rate": 9.897492970946579e-06,
"loss": 1.2948,
"step": 350
},
{
"epoch": 0.16869728209934395,
"grad_norm": 12.891910552978516,
"learning_rate": 9.89456419868791e-06,
"loss": 1.2444,
"step": 360
},
{
"epoch": 0.1733833177132146,
"grad_norm": 9.783638000488281,
"learning_rate": 9.891635426429241e-06,
"loss": 1.1765,
"step": 370
},
{
"epoch": 0.1780693533270853,
"grad_norm": 10.521812438964844,
"learning_rate": 9.888706654170573e-06,
"loss": 1.2163,
"step": 380
},
{
"epoch": 0.18275538894095594,
"grad_norm": 9.507091522216797,
"learning_rate": 9.885777881911904e-06,
"loss": 1.1555,
"step": 390
},
{
"epoch": 0.18744142455482662,
"grad_norm": 10.072102546691895,
"learning_rate": 9.882849109653235e-06,
"loss": 1.1631,
"step": 400
},
{
"epoch": 0.19212746016869728,
"grad_norm": 12.557927131652832,
"learning_rate": 9.879920337394564e-06,
"loss": 1.1319,
"step": 410
},
{
"epoch": 0.19681349578256796,
"grad_norm": 7.743768692016602,
"learning_rate": 9.876991565135896e-06,
"loss": 1.2022,
"step": 420
},
{
"epoch": 0.2014995313964386,
"grad_norm": 9.258079528808594,
"learning_rate": 9.874062792877227e-06,
"loss": 1.1219,
"step": 430
},
{
"epoch": 0.20618556701030927,
"grad_norm": 8.362629890441895,
"learning_rate": 9.871134020618558e-06,
"loss": 1.1138,
"step": 440
},
{
"epoch": 0.21087160262417995,
"grad_norm": 8.71789264678955,
"learning_rate": 9.868205248359888e-06,
"loss": 1.0473,
"step": 450
},
{
"epoch": 0.2155576382380506,
"grad_norm": 8.710640907287598,
"learning_rate": 9.865276476101219e-06,
"loss": 1.0933,
"step": 460
},
{
"epoch": 0.22024367385192128,
"grad_norm": 7.57949686050415,
"learning_rate": 9.86234770384255e-06,
"loss": 1.0429,
"step": 470
},
{
"epoch": 0.22492970946579194,
"grad_norm": 8.775091171264648,
"learning_rate": 9.859418931583881e-06,
"loss": 1.0406,
"step": 480
},
{
"epoch": 0.2296157450796626,
"grad_norm": 9.942752838134766,
"learning_rate": 9.856490159325212e-06,
"loss": 1.0526,
"step": 490
},
{
"epoch": 0.23430178069353327,
"grad_norm": 10.166437149047852,
"learning_rate": 9.853561387066542e-06,
"loss": 1.0265,
"step": 500
},
{
"epoch": 0.23430178069353327,
"eval_loss": 0.09848710149526596,
"eval_pearson_cosine": 0.7114527090607083,
"eval_pearson_dot": 0.5814656567702485,
"eval_pearson_euclidean": 0.7022168021213133,
"eval_pearson_manhattan": 0.7010309676073874,
"eval_runtime": 48.356,
"eval_samples_per_second": 31.02,
"eval_spearman_cosine": 0.7098203386273151,
"eval_spearman_dot": 0.5861254786395066,
"eval_spearman_euclidean": 0.7102590115372712,
"eval_spearman_manhattan": 0.7094011853041999,
"eval_steps_per_second": 31.02,
"step": 500
},
{
"epoch": 0.23898781630740393,
"grad_norm": 6.910321235656738,
"learning_rate": 9.850632614807873e-06,
"loss": 1.0267,
"step": 510
},
{
"epoch": 0.2436738519212746,
"grad_norm": 8.010503768920898,
"learning_rate": 9.847703842549204e-06,
"loss": 0.97,
"step": 520
},
{
"epoch": 0.24835988753514526,
"grad_norm": 8.340336799621582,
"learning_rate": 9.844775070290535e-06,
"loss": 0.9773,
"step": 530
},
{
"epoch": 0.2530459231490159,
"grad_norm": 6.75998592376709,
"learning_rate": 9.841846298031867e-06,
"loss": 0.9694,
"step": 540
},
{
"epoch": 0.25773195876288657,
"grad_norm": 6.592973709106445,
"learning_rate": 9.838917525773196e-06,
"loss": 0.9101,
"step": 550
},
{
"epoch": 0.2624179943767573,
"grad_norm": 8.13701343536377,
"learning_rate": 9.835988753514527e-06,
"loss": 0.9693,
"step": 560
},
{
"epoch": 0.26710402999062793,
"grad_norm": 10.256951332092285,
"learning_rate": 9.833059981255859e-06,
"loss": 0.9405,
"step": 570
},
{
"epoch": 0.2717900656044986,
"grad_norm": 9.521321296691895,
"learning_rate": 9.83013120899719e-06,
"loss": 0.8731,
"step": 580
},
{
"epoch": 0.27647610121836924,
"grad_norm": 7.164852142333984,
"learning_rate": 9.82720243673852e-06,
"loss": 0.9387,
"step": 590
},
{
"epoch": 0.28116213683223995,
"grad_norm": 8.326433181762695,
"learning_rate": 9.82427366447985e-06,
"loss": 0.8388,
"step": 600
},
{
"epoch": 0.2858481724461106,
"grad_norm": 8.819974899291992,
"learning_rate": 9.821344892221182e-06,
"loss": 0.9034,
"step": 610
},
{
"epoch": 0.29053420805998126,
"grad_norm": 6.0674052238464355,
"learning_rate": 9.818416119962513e-06,
"loss": 0.8225,
"step": 620
},
{
"epoch": 0.2952202436738519,
"grad_norm": 7.898690223693848,
"learning_rate": 9.815487347703844e-06,
"loss": 0.8916,
"step": 630
},
{
"epoch": 0.29990627928772257,
"grad_norm": 9.459305763244629,
"learning_rate": 9.812558575445175e-06,
"loss": 0.8771,
"step": 640
},
{
"epoch": 0.3045923149015933,
"grad_norm": 7.231110095977783,
"learning_rate": 9.809629803186505e-06,
"loss": 0.8575,
"step": 650
},
{
"epoch": 0.30927835051546393,
"grad_norm": 5.850890159606934,
"learning_rate": 9.806701030927836e-06,
"loss": 0.8294,
"step": 660
},
{
"epoch": 0.3139643861293346,
"grad_norm": 12.532159805297852,
"learning_rate": 9.803772258669167e-06,
"loss": 0.8745,
"step": 670
},
{
"epoch": 0.31865042174320524,
"grad_norm": 6.576635837554932,
"learning_rate": 9.800843486410497e-06,
"loss": 0.8167,
"step": 680
},
{
"epoch": 0.3233364573570759,
"grad_norm": 7.243174076080322,
"learning_rate": 9.797914714151828e-06,
"loss": 0.8886,
"step": 690
},
{
"epoch": 0.3280224929709466,
"grad_norm": 6.775111675262451,
"learning_rate": 9.794985941893159e-06,
"loss": 0.8205,
"step": 700
},
{
"epoch": 0.33270852858481725,
"grad_norm": 7.494016647338867,
"learning_rate": 9.79205716963449e-06,
"loss": 0.7778,
"step": 710
},
{
"epoch": 0.3373945641986879,
"grad_norm": 5.593213081359863,
"learning_rate": 9.789128397375821e-06,
"loss": 0.7875,
"step": 720
},
{
"epoch": 0.34208059981255856,
"grad_norm": 7.325387001037598,
"learning_rate": 9.786199625117153e-06,
"loss": 0.7839,
"step": 730
},
{
"epoch": 0.3467666354264292,
"grad_norm": 5.411241054534912,
"learning_rate": 9.783270852858484e-06,
"loss": 0.8363,
"step": 740
},
{
"epoch": 0.3514526710402999,
"grad_norm": 5.667125225067139,
"learning_rate": 9.780342080599813e-06,
"loss": 0.7904,
"step": 750
},
{
"epoch": 0.3514526710402999,
"eval_loss": 0.07609602808952332,
"eval_pearson_cosine": 0.7390127527190131,
"eval_pearson_dot": 0.6193519334256266,
"eval_pearson_euclidean": 0.7286540107637123,
"eval_pearson_manhattan": 0.7280163166143723,
"eval_runtime": 48.6286,
"eval_samples_per_second": 30.846,
"eval_spearman_cosine": 0.7392385981828663,
"eval_spearman_dot": 0.6275059521836013,
"eval_spearman_euclidean": 0.7379755721813188,
"eval_spearman_manhattan": 0.7372480627669395,
"eval_steps_per_second": 30.846,
"step": 750
},
{
"epoch": 0.3561387066541706,
"grad_norm": 5.931227207183838,
"learning_rate": 9.777413308341144e-06,
"loss": 0.7801,
"step": 760
},
{
"epoch": 0.36082474226804123,
"grad_norm": 5.550874710083008,
"learning_rate": 9.774484536082474e-06,
"loss": 0.7466,
"step": 770
},
{
"epoch": 0.3655107778819119,
"grad_norm": 5.67214298248291,
"learning_rate": 9.771555763823805e-06,
"loss": 0.7561,
"step": 780
},
{
"epoch": 0.3701968134957826,
"grad_norm": 5.121714115142822,
"learning_rate": 9.768626991565136e-06,
"loss": 0.7395,
"step": 790
},
{
"epoch": 0.37488284910965325,
"grad_norm": 4.957924842834473,
"learning_rate": 9.765698219306467e-06,
"loss": 0.7368,
"step": 800
},
{
"epoch": 0.3795688847235239,
"grad_norm": 6.30219030380249,
"learning_rate": 9.762769447047799e-06,
"loss": 0.8091,
"step": 810
},
{
"epoch": 0.38425492033739456,
"grad_norm": 6.518470287322998,
"learning_rate": 9.75984067478913e-06,
"loss": 0.7525,
"step": 820
},
{
"epoch": 0.3889409559512652,
"grad_norm": 6.101437568664551,
"learning_rate": 9.756911902530461e-06,
"loss": 0.7263,
"step": 830
},
{
"epoch": 0.3936269915651359,
"grad_norm": 5.428840160369873,
"learning_rate": 9.75398313027179e-06,
"loss": 0.7881,
"step": 840
},
{
"epoch": 0.3983130271790066,
"grad_norm": 7.170475482940674,
"learning_rate": 9.751054358013122e-06,
"loss": 0.7218,
"step": 850
},
{
"epoch": 0.4029990627928772,
"grad_norm": 6.153990745544434,
"learning_rate": 9.748125585754453e-06,
"loss": 0.748,
"step": 860
},
{
"epoch": 0.4076850984067479,
"grad_norm": 5.364086151123047,
"learning_rate": 9.745196813495782e-06,
"loss": 0.786,
"step": 870
},
{
"epoch": 0.41237113402061853,
"grad_norm": 5.541423797607422,
"learning_rate": 9.742268041237114e-06,
"loss": 0.7427,
"step": 880
},
{
"epoch": 0.41705716963448924,
"grad_norm": 5.1667022705078125,
"learning_rate": 9.739339268978445e-06,
"loss": 0.6918,
"step": 890
},
{
"epoch": 0.4217432052483599,
"grad_norm": 4.839612007141113,
"learning_rate": 9.736410496719776e-06,
"loss": 0.7056,
"step": 900
},
{
"epoch": 0.42642924086223055,
"grad_norm": 4.407963275909424,
"learning_rate": 9.733481724461107e-06,
"loss": 0.6313,
"step": 910
},
{
"epoch": 0.4311152764761012,
"grad_norm": 7.052595138549805,
"learning_rate": 9.730552952202438e-06,
"loss": 0.7489,
"step": 920
},
{
"epoch": 0.43580131208997186,
"grad_norm": 5.71290397644043,
"learning_rate": 9.727624179943768e-06,
"loss": 0.6578,
"step": 930
},
{
"epoch": 0.44048734770384257,
"grad_norm": 6.3575825691223145,
"learning_rate": 9.724695407685099e-06,
"loss": 0.6914,
"step": 940
},
{
"epoch": 0.4451733833177132,
"grad_norm": 5.223476886749268,
"learning_rate": 9.72176663542643e-06,
"loss": 0.6494,
"step": 950
},
{
"epoch": 0.4498594189315839,
"grad_norm": 6.220378398895264,
"learning_rate": 9.71883786316776e-06,
"loss": 0.6996,
"step": 960
},
{
"epoch": 0.45454545454545453,
"grad_norm": 6.475409507751465,
"learning_rate": 9.715909090909091e-06,
"loss": 0.721,
"step": 970
},
{
"epoch": 0.4592314901593252,
"grad_norm": 5.10095739364624,
"learning_rate": 9.712980318650422e-06,
"loss": 0.6734,
"step": 980
},
{
"epoch": 0.4639175257731959,
"grad_norm": 7.8438801765441895,
"learning_rate": 9.710051546391753e-06,
"loss": 0.7409,
"step": 990
},
{
"epoch": 0.46860356138706655,
"grad_norm": 5.446135997772217,
"learning_rate": 9.707122774133085e-06,
"loss": 0.6772,
"step": 1000
},
{
"epoch": 0.46860356138706655,
"eval_loss": 0.06938865035772324,
"eval_pearson_cosine": 0.7523242546763527,
"eval_pearson_dot": 0.6339033623348058,
"eval_pearson_euclidean": 0.7449881727323344,
"eval_pearson_manhattan": 0.7443626147120028,
"eval_runtime": 47.885,
"eval_samples_per_second": 31.325,
"eval_spearman_cosine": 0.7542578168613095,
"eval_spearman_dot": 0.6408093688850417,
"eval_spearman_euclidean": 0.7532432307302356,
"eval_spearman_manhattan": 0.7526380381288565,
"eval_steps_per_second": 31.325,
"step": 1000
},
{
"epoch": 0.4732895970009372,
"grad_norm": 6.391997814178467,
"learning_rate": 9.704194001874416e-06,
"loss": 0.6965,
"step": 1010
},
{
"epoch": 0.47797563261480785,
"grad_norm": 5.345996379852295,
"learning_rate": 9.701265229615747e-06,
"loss": 0.6447,
"step": 1020
},
{
"epoch": 0.48266166822867856,
"grad_norm": 5.60822057723999,
"learning_rate": 9.698336457357076e-06,
"loss": 0.6854,
"step": 1030
},
{
"epoch": 0.4873477038425492,
"grad_norm": 6.488014221191406,
"learning_rate": 9.695407685098408e-06,
"loss": 0.7089,
"step": 1040
},
{
"epoch": 0.49203373945641987,
"grad_norm": 5.387355804443359,
"learning_rate": 9.692478912839737e-06,
"loss": 0.6949,
"step": 1050
},
{
"epoch": 0.4967197750702905,
"grad_norm": 5.179281234741211,
"learning_rate": 9.689550140581068e-06,
"loss": 0.6571,
"step": 1060
},
{
"epoch": 0.5014058106841612,
"grad_norm": 5.786458492279053,
"learning_rate": 9.6866213683224e-06,
"loss": 0.7154,
"step": 1070
},
{
"epoch": 0.5060918462980318,
"grad_norm": 6.279985427856445,
"learning_rate": 9.68369259606373e-06,
"loss": 0.6757,
"step": 1080
},
{
"epoch": 0.5107778819119025,
"grad_norm": 4.793182849884033,
"learning_rate": 9.680763823805062e-06,
"loss": 0.7136,
"step": 1090
},
{
"epoch": 0.5154639175257731,
"grad_norm": 7.646529674530029,
"learning_rate": 9.677835051546393e-06,
"loss": 0.6396,
"step": 1100
},
{
"epoch": 0.5201499531396439,
"grad_norm": 5.7034912109375,
"learning_rate": 9.674906279287724e-06,
"loss": 0.665,
"step": 1110
},
{
"epoch": 0.5248359887535146,
"grad_norm": 6.54317045211792,
"learning_rate": 9.671977507029054e-06,
"loss": 0.6713,
"step": 1120
},
{
"epoch": 0.5295220243673852,
"grad_norm": 5.6496806144714355,
"learning_rate": 9.669048734770385e-06,
"loss": 0.6876,
"step": 1130
},
{
"epoch": 0.5342080599812559,
"grad_norm": 5.326486110687256,
"learning_rate": 9.666119962511716e-06,
"loss": 0.6951,
"step": 1140
},
{
"epoch": 0.5388940955951266,
"grad_norm": 5.124545574188232,
"learning_rate": 9.663191190253046e-06,
"loss": 0.6388,
"step": 1150
},
{
"epoch": 0.5435801312089972,
"grad_norm": 4.34152364730835,
"learning_rate": 9.660262417994377e-06,
"loss": 0.6322,
"step": 1160
},
{
"epoch": 0.5482661668228679,
"grad_norm": 8.722075462341309,
"learning_rate": 9.657333645735708e-06,
"loss": 0.6776,
"step": 1170
},
{
"epoch": 0.5529522024367385,
"grad_norm": 5.417623996734619,
"learning_rate": 9.65440487347704e-06,
"loss": 0.6492,
"step": 1180
},
{
"epoch": 0.5576382380506092,
"grad_norm": 4.369041919708252,
"learning_rate": 9.65147610121837e-06,
"loss": 0.6039,
"step": 1190
},
{
"epoch": 0.5623242736644799,
"grad_norm": 6.5720062255859375,
"learning_rate": 9.648547328959702e-06,
"loss": 0.6911,
"step": 1200
},
{
"epoch": 0.5670103092783505,
"grad_norm": 7.112950325012207,
"learning_rate": 9.645618556701031e-06,
"loss": 0.6214,
"step": 1210
},
{
"epoch": 0.5716963448922212,
"grad_norm": 5.643182277679443,
"learning_rate": 9.642689784442362e-06,
"loss": 0.6959,
"step": 1220
},
{
"epoch": 0.5763823805060918,
"grad_norm": 5.078190803527832,
"learning_rate": 9.639761012183694e-06,
"loss": 0.6633,
"step": 1230
},
{
"epoch": 0.5810684161199625,
"grad_norm": 5.247280120849609,
"learning_rate": 9.636832239925025e-06,
"loss": 0.6415,
"step": 1240
},
{
"epoch": 0.5857544517338332,
"grad_norm": 5.110747814178467,
"learning_rate": 9.633903467666354e-06,
"loss": 0.6031,
"step": 1250
},
{
"epoch": 0.5857544517338332,
"eval_loss": 0.06345358490943909,
"eval_pearson_cosine": 0.7580338914962539,
"eval_pearson_dot": 0.6394158052533783,
"eval_pearson_euclidean": 0.7521759780114508,
"eval_pearson_manhattan": 0.7513571158009427,
"eval_runtime": 44.2242,
"eval_samples_per_second": 33.918,
"eval_spearman_cosine": 0.758882658229917,
"eval_spearman_dot": 0.6455380162932587,
"eval_spearman_euclidean": 0.7604619351541958,
"eval_spearman_manhattan": 0.7599139087493931,
"eval_steps_per_second": 33.918,
"step": 1250
},
{
"epoch": 0.5904404873477038,
"grad_norm": 6.717201232910156,
"learning_rate": 9.630974695407685e-06,
"loss": 0.6553,
"step": 1260
},
{
"epoch": 0.5951265229615745,
"grad_norm": 6.948915004730225,
"learning_rate": 9.628045923149017e-06,
"loss": 0.6528,
"step": 1270
},
{
"epoch": 0.5998125585754451,
"grad_norm": 5.585124969482422,
"learning_rate": 9.625117150890348e-06,
"loss": 0.6125,
"step": 1280
},
{
"epoch": 0.6044985941893158,
"grad_norm": 4.020166397094727,
"learning_rate": 9.622188378631679e-06,
"loss": 0.5857,
"step": 1290
},
{
"epoch": 0.6091846298031866,
"grad_norm": 4.905421257019043,
"learning_rate": 9.619259606373008e-06,
"loss": 0.6128,
"step": 1300
},
{
"epoch": 0.6138706654170571,
"grad_norm": 5.642446517944336,
"learning_rate": 9.61633083411434e-06,
"loss": 0.6177,
"step": 1310
},
{
"epoch": 0.6185567010309279,
"grad_norm": 5.623671531677246,
"learning_rate": 9.613402061855671e-06,
"loss": 0.6076,
"step": 1320
},
{
"epoch": 0.6232427366447985,
"grad_norm": 3.6249349117279053,
"learning_rate": 9.610473289597002e-06,
"loss": 0.5987,
"step": 1330
},
{
"epoch": 0.6279287722586692,
"grad_norm": 4.7242608070373535,
"learning_rate": 9.607544517338333e-06,
"loss": 0.6082,
"step": 1340
},
{
"epoch": 0.6326148078725399,
"grad_norm": 9.071741104125977,
"learning_rate": 9.604615745079663e-06,
"loss": 0.6369,
"step": 1350
},
{
"epoch": 0.6373008434864105,
"grad_norm": 5.471718788146973,
"learning_rate": 9.601686972820994e-06,
"loss": 0.6235,
"step": 1360
},
{
"epoch": 0.6419868791002812,
"grad_norm": 6.0755934715271,
"learning_rate": 9.598758200562325e-06,
"loss": 0.6197,
"step": 1370
},
{
"epoch": 0.6466729147141518,
"grad_norm": 5.650800704956055,
"learning_rate": 9.595829428303656e-06,
"loss": 0.5947,
"step": 1380
},
{
"epoch": 0.6513589503280225,
"grad_norm": 4.409568786621094,
"learning_rate": 9.592900656044986e-06,
"loss": 0.6632,
"step": 1390
},
{
"epoch": 0.6560449859418932,
"grad_norm": 6.575608730316162,
"learning_rate": 9.589971883786317e-06,
"loss": 0.5655,
"step": 1400
},
{
"epoch": 0.6607310215557638,
"grad_norm": 4.897518634796143,
"learning_rate": 9.587043111527648e-06,
"loss": 0.6064,
"step": 1410
},
{
"epoch": 0.6654170571696345,
"grad_norm": 4.505845546722412,
"learning_rate": 9.58411433926898e-06,
"loss": 0.6217,
"step": 1420
},
{
"epoch": 0.6701030927835051,
"grad_norm": 11.04179573059082,
"learning_rate": 9.58118556701031e-06,
"loss": 0.626,
"step": 1430
},
{
"epoch": 0.6747891283973758,
"grad_norm": 7.031481742858887,
"learning_rate": 9.578256794751642e-06,
"loss": 0.6644,
"step": 1440
},
{
"epoch": 0.6794751640112465,
"grad_norm": 5.177082061767578,
"learning_rate": 9.575328022492971e-06,
"loss": 0.5794,
"step": 1450
},
{
"epoch": 0.6841611996251171,
"grad_norm": 5.830789566040039,
"learning_rate": 9.572399250234303e-06,
"loss": 0.5962,
"step": 1460
},
{
"epoch": 0.6888472352389878,
"grad_norm": 5.322279453277588,
"learning_rate": 9.569470477975634e-06,
"loss": 0.5528,
"step": 1470
},
{
"epoch": 0.6935332708528584,
"grad_norm": 5.191045761108398,
"learning_rate": 9.566541705716965e-06,
"loss": 0.602,
"step": 1480
},
{
"epoch": 0.6982193064667291,
"grad_norm": 4.832320213317871,
"learning_rate": 9.563612933458294e-06,
"loss": 0.5732,
"step": 1490
},
{
"epoch": 0.7029053420805998,
"grad_norm": 5.9457926750183105,
"learning_rate": 9.560684161199626e-06,
"loss": 0.6017,
"step": 1500
},
{
"epoch": 0.7029053420805998,
"eval_loss": 0.059113115072250366,
"eval_pearson_cosine": 0.7675747794888963,
"eval_pearson_dot": 0.6475892776570333,
"eval_pearson_euclidean": 0.7594640382486553,
"eval_pearson_manhattan": 0.7585029707701096,
"eval_runtime": 45.7613,
"eval_samples_per_second": 32.779,
"eval_spearman_cosine": 0.768339335776319,
"eval_spearman_dot": 0.655445685087582,
"eval_spearman_euclidean": 0.7680811238488432,
"eval_spearman_manhattan": 0.7673055147561156,
"eval_steps_per_second": 32.779,
"step": 1500
},
{
"epoch": 0.7075913776944704,
"grad_norm": 4.822035789489746,
"learning_rate": 9.557755388940957e-06,
"loss": 0.5891,
"step": 1510
},
{
"epoch": 0.7122774133083412,
"grad_norm": 7.0355753898620605,
"learning_rate": 9.554826616682288e-06,
"loss": 0.6019,
"step": 1520
},
{
"epoch": 0.7169634489222118,
"grad_norm": 7.064100742340088,
"learning_rate": 9.55189784442362e-06,
"loss": 0.5656,
"step": 1530
},
{
"epoch": 0.7216494845360825,
"grad_norm": 4.629329204559326,
"learning_rate": 9.54896907216495e-06,
"loss": 0.5839,
"step": 1540
},
{
"epoch": 0.7263355201499532,
"grad_norm": 5.421347141265869,
"learning_rate": 9.54604029990628e-06,
"loss": 0.5684,
"step": 1550
},
{
"epoch": 0.7310215557638238,
"grad_norm": 4.520521640777588,
"learning_rate": 9.543111527647611e-06,
"loss": 0.5979,
"step": 1560
},
{
"epoch": 0.7357075913776945,
"grad_norm": 5.172377109527588,
"learning_rate": 9.540182755388942e-06,
"loss": 0.5678,
"step": 1570
},
{
"epoch": 0.7403936269915652,
"grad_norm": 5.090722560882568,
"learning_rate": 9.537253983130272e-06,
"loss": 0.556,
"step": 1580
},
{
"epoch": 0.7450796626054358,
"grad_norm": 4.6714887619018555,
"learning_rate": 9.534325210871603e-06,
"loss": 0.564,
"step": 1590
},
{
"epoch": 0.7497656982193065,
"grad_norm": 4.211735248565674,
"learning_rate": 9.531396438612934e-06,
"loss": 0.617,
"step": 1600
},
{
"epoch": 0.7544517338331771,
"grad_norm": 4.693171501159668,
"learning_rate": 9.528467666354265e-06,
"loss": 0.5657,
"step": 1610
},
{
"epoch": 0.7591377694470478,
"grad_norm": 6.890966892242432,
"learning_rate": 9.525538894095597e-06,
"loss": 0.5838,
"step": 1620
},
{
"epoch": 0.7638238050609185,
"grad_norm": 3.5127806663513184,
"learning_rate": 9.522610121836928e-06,
"loss": 0.5669,
"step": 1630
},
{
"epoch": 0.7685098406747891,
"grad_norm": 4.389316082000732,
"learning_rate": 9.519681349578259e-06,
"loss": 0.5669,
"step": 1640
},
{
"epoch": 0.7731958762886598,
"grad_norm": 4.59335470199585,
"learning_rate": 9.516752577319588e-06,
"loss": 0.604,
"step": 1650
},
{
"epoch": 0.7778819119025304,
"grad_norm": 5.345147132873535,
"learning_rate": 9.51382380506092e-06,
"loss": 0.6132,
"step": 1660
},
{
"epoch": 0.7825679475164011,
"grad_norm": 5.133398532867432,
"learning_rate": 9.510895032802249e-06,
"loss": 0.5539,
"step": 1670
},
{
"epoch": 0.7872539831302718,
"grad_norm": 7.907310962677002,
"learning_rate": 9.50796626054358e-06,
"loss": 0.61,
"step": 1680
},
{
"epoch": 0.7919400187441424,
"grad_norm": 4.504448890686035,
"learning_rate": 9.505037488284911e-06,
"loss": 0.5851,
"step": 1690
},
{
"epoch": 0.7966260543580131,
"grad_norm": 4.3662028312683105,
"learning_rate": 9.502108716026243e-06,
"loss": 0.5915,
"step": 1700
},
{
"epoch": 0.8013120899718837,
"grad_norm": 5.221836566925049,
"learning_rate": 9.499179943767574e-06,
"loss": 0.581,
"step": 1710
},
{
"epoch": 0.8059981255857545,
"grad_norm": 6.357667446136475,
"learning_rate": 9.496251171508905e-06,
"loss": 0.5937,
"step": 1720
},
{
"epoch": 0.8106841611996252,
"grad_norm": 6.262212753295898,
"learning_rate": 9.493322399250236e-06,
"loss": 0.606,
"step": 1730
},
{
"epoch": 0.8153701968134958,
"grad_norm": 4.363849639892578,
"learning_rate": 9.490393626991566e-06,
"loss": 0.5524,
"step": 1740
},
{
"epoch": 0.8200562324273665,
"grad_norm": 5.514476299285889,
"learning_rate": 9.487464854732897e-06,
"loss": 0.5611,
"step": 1750
},
{
"epoch": 0.8200562324273665,
"eval_loss": 0.05879165977239609,
"eval_pearson_cosine": 0.7714099892705395,
"eval_pearson_dot": 0.6462212772089089,
"eval_pearson_euclidean": 0.7641084348061273,
"eval_pearson_manhattan": 0.7629885828620147,
"eval_runtime": 43.6421,
"eval_samples_per_second": 34.37,
"eval_spearman_cosine": 0.7720168259371313,
"eval_spearman_dot": 0.6536245076677092,
"eval_spearman_euclidean": 0.7726348092699838,
"eval_spearman_manhattan": 0.7716062900578692,
"eval_steps_per_second": 34.37,
"step": 1750
},
{
"epoch": 0.8247422680412371,
"grad_norm": 6.260695457458496,
"learning_rate": 9.484536082474226e-06,
"loss": 0.5566,
"step": 1760
},
{
"epoch": 0.8294283036551078,
"grad_norm": 4.187561511993408,
"learning_rate": 9.481607310215558e-06,
"loss": 0.5077,
"step": 1770
},
{
"epoch": 0.8341143392689785,
"grad_norm": 4.611522197723389,
"learning_rate": 9.478678537956889e-06,
"loss": 0.5449,
"step": 1780
},
{
"epoch": 0.8388003748828491,
"grad_norm": 12.466484069824219,
"learning_rate": 9.47574976569822e-06,
"loss": 0.5744,
"step": 1790
},
{
"epoch": 0.8434864104967198,
"grad_norm": 4.683777332305908,
"learning_rate": 9.472820993439551e-06,
"loss": 0.5102,
"step": 1800
},
{
"epoch": 0.8481724461105904,
"grad_norm": 5.541889190673828,
"learning_rate": 9.469892221180882e-06,
"loss": 0.5589,
"step": 1810
},
{
"epoch": 0.8528584817244611,
"grad_norm": 8.524742126464844,
"learning_rate": 9.466963448922214e-06,
"loss": 0.5872,
"step": 1820
},
{
"epoch": 0.8575445173383318,
"grad_norm": 7.117620944976807,
"learning_rate": 9.464034676663543e-06,
"loss": 0.5484,
"step": 1830
},
{
"epoch": 0.8622305529522024,
"grad_norm": 5.3457841873168945,
"learning_rate": 9.461105904404874e-06,
"loss": 0.5624,
"step": 1840
},
{
"epoch": 0.8669165885660731,
"grad_norm": 4.375561714172363,
"learning_rate": 9.458177132146204e-06,
"loss": 0.525,
"step": 1850
},
{
"epoch": 0.8716026241799437,
"grad_norm": 4.6026082038879395,
"learning_rate": 9.455248359887535e-06,
"loss": 0.5855,
"step": 1860
},
{
"epoch": 0.8762886597938144,
"grad_norm": 5.399001121520996,
"learning_rate": 9.452319587628866e-06,
"loss": 0.5775,
"step": 1870
},
{
"epoch": 0.8809746954076851,
"grad_norm": 3.9378573894500732,
"learning_rate": 9.449390815370197e-06,
"loss": 0.5068,
"step": 1880
},
{
"epoch": 0.8856607310215557,
"grad_norm": 5.515146255493164,
"learning_rate": 9.446462043111529e-06,
"loss": 0.5718,
"step": 1890
},
{
"epoch": 0.8903467666354264,
"grad_norm": 4.8671345710754395,
"learning_rate": 9.44353327085286e-06,
"loss": 0.5552,
"step": 1900
},
{
"epoch": 0.895032802249297,
"grad_norm": 5.388006210327148,
"learning_rate": 9.440604498594191e-06,
"loss": 0.5854,
"step": 1910
},
{
"epoch": 0.8997188378631678,
"grad_norm": 6.608395099639893,
"learning_rate": 9.43767572633552e-06,
"loss": 0.5459,
"step": 1920
},
{
"epoch": 0.9044048734770385,
"grad_norm": 4.6435160636901855,
"learning_rate": 9.434746954076852e-06,
"loss": 0.529,
"step": 1930
},
{
"epoch": 0.9090909090909091,
"grad_norm": 4.642300605773926,
"learning_rate": 9.431818181818183e-06,
"loss": 0.5255,
"step": 1940
},
{
"epoch": 0.9137769447047798,
"grad_norm": 5.40919828414917,
"learning_rate": 9.428889409559512e-06,
"loss": 0.5605,
"step": 1950
},
{
"epoch": 0.9184629803186504,
"grad_norm": 4.9874467849731445,
"learning_rate": 9.425960637300844e-06,
"loss": 0.5798,
"step": 1960
},
{
"epoch": 0.9231490159325211,
"grad_norm": 4.9304094314575195,
"learning_rate": 9.423031865042175e-06,
"loss": 0.5576,
"step": 1970
},
{
"epoch": 0.9278350515463918,
"grad_norm": 5.080467224121094,
"learning_rate": 9.420103092783506e-06,
"loss": 0.5221,
"step": 1980
},
{
"epoch": 0.9325210871602624,
"grad_norm": 5.083141326904297,
"learning_rate": 9.417174320524837e-06,
"loss": 0.6041,
"step": 1990
},
{
"epoch": 0.9372071227741331,
"grad_norm": 3.8194010257720947,
"learning_rate": 9.414245548266168e-06,
"loss": 0.5439,
"step": 2000
},
{
"epoch": 0.9372071227741331,
"eval_loss": 0.058015577495098114,
"eval_pearson_cosine": 0.7772706274362164,
"eval_pearson_dot": 0.6518150260238968,
"eval_pearson_euclidean": 0.7681856098914253,
"eval_pearson_manhattan": 0.7668726914631314,
"eval_runtime": 45.6952,
"eval_samples_per_second": 32.826,
"eval_spearman_cosine": 0.7781983730395821,
"eval_spearman_dot": 0.6578238148510893,
"eval_spearman_euclidean": 0.7779674226973379,
"eval_spearman_manhattan": 0.7766391726420421,
"eval_steps_per_second": 32.826,
"step": 2000
},
{
"epoch": 0.9418931583880038,
"grad_norm": 5.383081912994385,
"learning_rate": 9.411316776007498e-06,
"loss": 0.5343,
"step": 2010
},
{
"epoch": 0.9465791940018744,
"grad_norm": 5.533719539642334,
"learning_rate": 9.408388003748829e-06,
"loss": 0.5313,
"step": 2020
},
{
"epoch": 0.9512652296157451,
"grad_norm": 4.267172336578369,
"learning_rate": 9.40545923149016e-06,
"loss": 0.5172,
"step": 2030
},
{
"epoch": 0.9559512652296157,
"grad_norm": 4.8553009033203125,
"learning_rate": 9.402530459231491e-06,
"loss": 0.5104,
"step": 2040
},
{
"epoch": 0.9606373008434864,
"grad_norm": 6.460834503173828,
"learning_rate": 9.399601686972821e-06,
"loss": 0.5225,
"step": 2050
},
{
"epoch": 0.9653233364573571,
"grad_norm": 27.46290397644043,
"learning_rate": 9.396672914714152e-06,
"loss": 0.544,
"step": 2060
},
{
"epoch": 0.9700093720712277,
"grad_norm": 4.89717435836792,
"learning_rate": 9.393744142455483e-06,
"loss": 0.5653,
"step": 2070
},
{
"epoch": 0.9746954076850984,
"grad_norm": 4.803583145141602,
"learning_rate": 9.390815370196814e-06,
"loss": 0.5739,
"step": 2080
},
{
"epoch": 0.979381443298969,
"grad_norm": 4.121029853820801,
"learning_rate": 9.387886597938146e-06,
"loss": 0.5192,
"step": 2090
},
{
"epoch": 0.9840674789128397,
"grad_norm": 4.464984893798828,
"learning_rate": 9.384957825679475e-06,
"loss": 0.5393,
"step": 2100
},
{
"epoch": 0.9887535145267105,
"grad_norm": 6.364498615264893,
"learning_rate": 9.382029053420806e-06,
"loss": 0.5764,
"step": 2110
},
{
"epoch": 0.993439550140581,
"grad_norm": 3.743790864944458,
"learning_rate": 9.379100281162138e-06,
"loss": 0.5276,
"step": 2120
},
{
"epoch": 0.9981255857544518,
"grad_norm": 4.737389087677002,
"learning_rate": 9.376171508903469e-06,
"loss": 0.5211,
"step": 2130
},
{
"epoch": 1.0028116213683225,
"grad_norm": 3.622758626937866,
"learning_rate": 9.3732427366448e-06,
"loss": 0.5329,
"step": 2140
},
{
"epoch": 1.007497656982193,
"grad_norm": 3.5359978675842285,
"learning_rate": 9.37031396438613e-06,
"loss": 0.4941,
"step": 2150
},
{
"epoch": 1.0121836925960637,
"grad_norm": 4.669582843780518,
"learning_rate": 9.36738519212746e-06,
"loss": 0.4821,
"step": 2160
},
{
"epoch": 1.0168697282099344,
"grad_norm": 3.767122507095337,
"learning_rate": 9.364456419868792e-06,
"loss": 0.4886,
"step": 2170
},
{
"epoch": 1.021555763823805,
"grad_norm": 3.9681687355041504,
"learning_rate": 9.361527647610123e-06,
"loss": 0.493,
"step": 2180
},
{
"epoch": 1.0262417994376758,
"grad_norm": 3.389897108078003,
"learning_rate": 9.358598875351454e-06,
"loss": 0.4688,
"step": 2190
},
{
"epoch": 1.0309278350515463,
"grad_norm": 3.5152347087860107,
"learning_rate": 9.355670103092784e-06,
"loss": 0.4625,
"step": 2200
},
{
"epoch": 1.035613870665417,
"grad_norm": 3.23901629447937,
"learning_rate": 9.352741330834115e-06,
"loss": 0.5143,
"step": 2210
},
{
"epoch": 1.0402999062792877,
"grad_norm": 4.617633819580078,
"learning_rate": 9.349812558575446e-06,
"loss": 0.4732,
"step": 2220
},
{
"epoch": 1.0449859418931584,
"grad_norm": 5.245469570159912,
"learning_rate": 9.346883786316777e-06,
"loss": 0.5213,
"step": 2230
},
{
"epoch": 1.0496719775070291,
"grad_norm": 4.20419454574585,
"learning_rate": 9.343955014058108e-06,
"loss": 0.5042,
"step": 2240
},
{
"epoch": 1.0543580131208996,
"grad_norm": 4.6322102546691895,
"learning_rate": 9.341026241799438e-06,
"loss": 0.4982,
"step": 2250
},
{
"epoch": 1.0543580131208996,
"eval_loss": 0.05779802054166794,
"eval_pearson_cosine": 0.7770314842083366,
"eval_pearson_dot": 0.6498110843024136,
"eval_pearson_euclidean": 0.7709013065859232,
"eval_pearson_manhattan": 0.7695278239114174,
"eval_runtime": 48.4856,
"eval_samples_per_second": 30.937,
"eval_spearman_cosine": 0.7783328375480574,
"eval_spearman_dot": 0.6551905692522538,
"eval_spearman_euclidean": 0.7802862933680744,
"eval_spearman_manhattan": 0.7790525675974715,
"eval_steps_per_second": 30.937,
"step": 2250
},
{
"epoch": 1.0590440487347703,
"grad_norm": 4.474431991577148,
"learning_rate": 9.33809746954077e-06,
"loss": 0.5227,
"step": 2260
},
{
"epoch": 1.063730084348641,
"grad_norm": 4.538947105407715,
"learning_rate": 9.3351686972821e-06,
"loss": 0.5158,
"step": 2270
},
{
"epoch": 1.0684161199625117,
"grad_norm": 6.6143693923950195,
"learning_rate": 9.332239925023432e-06,
"loss": 0.461,
"step": 2280
},
{
"epoch": 1.0731021555763824,
"grad_norm": 4.316189765930176,
"learning_rate": 9.329311152764761e-06,
"loss": 0.5079,
"step": 2290
},
{
"epoch": 1.077788191190253,
"grad_norm": 4.054687976837158,
"learning_rate": 9.326382380506092e-06,
"loss": 0.5022,
"step": 2300
},
{
"epoch": 1.0824742268041236,
"grad_norm": 4.232051849365234,
"learning_rate": 9.323453608247423e-06,
"loss": 0.5096,
"step": 2310
},
{
"epoch": 1.0871602624179943,
"grad_norm": 3.7785236835479736,
"learning_rate": 9.320524835988755e-06,
"loss": 0.4614,
"step": 2320
},
{
"epoch": 1.091846298031865,
"grad_norm": 4.865905284881592,
"learning_rate": 9.317596063730086e-06,
"loss": 0.5135,
"step": 2330
},
{
"epoch": 1.0965323336457358,
"grad_norm": 4.681485176086426,
"learning_rate": 9.314667291471417e-06,
"loss": 0.5061,
"step": 2340
},
{
"epoch": 1.1012183692596063,
"grad_norm": 4.256619453430176,
"learning_rate": 9.311738519212747e-06,
"loss": 0.4627,
"step": 2350
},
{
"epoch": 1.105904404873477,
"grad_norm": 4.459606170654297,
"learning_rate": 9.308809746954078e-06,
"loss": 0.5171,
"step": 2360
},
{
"epoch": 1.1105904404873477,
"grad_norm": 4.008665084838867,
"learning_rate": 9.305880974695409e-06,
"loss": 0.4422,
"step": 2370
},
{
"epoch": 1.1152764761012184,
"grad_norm": 3.674177885055542,
"learning_rate": 9.302952202436738e-06,
"loss": 0.5233,
"step": 2380
},
{
"epoch": 1.119962511715089,
"grad_norm": 4.463940620422363,
"learning_rate": 9.30002343017807e-06,
"loss": 0.4731,
"step": 2390
},
{
"epoch": 1.1246485473289598,
"grad_norm": 3.9289097785949707,
"learning_rate": 9.2970946579194e-06,
"loss": 0.4869,
"step": 2400
},
{
"epoch": 1.1293345829428303,
"grad_norm": 4.097565174102783,
"learning_rate": 9.294165885660732e-06,
"loss": 0.4594,
"step": 2410
},
{
"epoch": 1.134020618556701,
"grad_norm": 4.55318546295166,
"learning_rate": 9.291237113402063e-06,
"loss": 0.494,
"step": 2420
},
{
"epoch": 1.1387066541705717,
"grad_norm": 4.425617694854736,
"learning_rate": 9.288308341143394e-06,
"loss": 0.4829,
"step": 2430
},
{
"epoch": 1.1433926897844424,
"grad_norm": 3.908015489578247,
"learning_rate": 9.285379568884726e-06,
"loss": 0.4793,
"step": 2440
},
{
"epoch": 1.148078725398313,
"grad_norm": 3.7293996810913086,
"learning_rate": 9.282450796626055e-06,
"loss": 0.5399,
"step": 2450
},
{
"epoch": 1.1527647610121836,
"grad_norm": 4.584681034088135,
"learning_rate": 9.279522024367386e-06,
"loss": 0.4479,
"step": 2460
},
{
"epoch": 1.1574507966260543,
"grad_norm": 4.109914302825928,
"learning_rate": 9.276593252108716e-06,
"loss": 0.4599,
"step": 2470
},
{
"epoch": 1.162136832239925,
"grad_norm": 4.446422100067139,
"learning_rate": 9.273664479850047e-06,
"loss": 0.4727,
"step": 2480
},
{
"epoch": 1.1668228678537957,
"grad_norm": 5.975160598754883,
"learning_rate": 9.270735707591378e-06,
"loss": 0.4509,
"step": 2490
},
{
"epoch": 1.1715089034676662,
"grad_norm": 4.379275321960449,
"learning_rate": 9.26780693533271e-06,
"loss": 0.4828,
"step": 2500
},
{
"epoch": 1.1715089034676662,
"eval_loss": 0.05214480683207512,
"eval_pearson_cosine": 0.7792755247272061,
"eval_pearson_dot": 0.6569300577465214,
"eval_pearson_euclidean": 0.7718322585231894,
"eval_pearson_manhattan": 0.7703922250718165,
"eval_runtime": 47.8089,
"eval_samples_per_second": 31.375,
"eval_spearman_cosine": 0.7799819701975583,
"eval_spearman_dot": 0.662507389274304,
"eval_spearman_euclidean": 0.7818437831063969,
"eval_spearman_manhattan": 0.7805341558401507,
"eval_steps_per_second": 31.375,
"step": 2500
},
{
"epoch": 1.176194939081537,
"grad_norm": 3.5287399291992188,
"learning_rate": 9.26487816307404e-06,
"loss": 0.4591,
"step": 2510
},
{
"epoch": 1.1808809746954076,
"grad_norm": 3.277655601501465,
"learning_rate": 9.261949390815372e-06,
"loss": 0.4479,
"step": 2520
},
{
"epoch": 1.1855670103092784,
"grad_norm": 4.732039451599121,
"learning_rate": 9.259020618556703e-06,
"loss": 0.461,
"step": 2530
},
{
"epoch": 1.190253045923149,
"grad_norm": 4.4760966300964355,
"learning_rate": 9.256091846298032e-06,
"loss": 0.4652,
"step": 2540
},
{
"epoch": 1.1949390815370198,
"grad_norm": 7.485498428344727,
"learning_rate": 9.253163074039364e-06,
"loss": 0.4779,
"step": 2550
},
{
"epoch": 1.1996251171508903,
"grad_norm": 3.9956140518188477,
"learning_rate": 9.250234301780693e-06,
"loss": 0.4567,
"step": 2560
},
{
"epoch": 1.204311152764761,
"grad_norm": 3.547563314437866,
"learning_rate": 9.247305529522024e-06,
"loss": 0.4988,
"step": 2570
},
{
"epoch": 1.2089971883786317,
"grad_norm": 5.354389667510986,
"learning_rate": 9.244376757263355e-06,
"loss": 0.464,
"step": 2580
},
{
"epoch": 1.2136832239925024,
"grad_norm": 3.791760206222534,
"learning_rate": 9.241447985004687e-06,
"loss": 0.4441,
"step": 2590
},
{
"epoch": 1.218369259606373,
"grad_norm": 4.77889347076416,
"learning_rate": 9.238519212746018e-06,
"loss": 0.4655,
"step": 2600
},
{
"epoch": 1.2230552952202436,
"grad_norm": 5.804917335510254,
"learning_rate": 9.235590440487349e-06,
"loss": 0.4912,
"step": 2610
},
{
"epoch": 1.2277413308341143,
"grad_norm": 3.841860771179199,
"learning_rate": 9.23266166822868e-06,
"loss": 0.472,
"step": 2620
},
{
"epoch": 1.232427366447985,
"grad_norm": 4.4197540283203125,
"learning_rate": 9.22973289597001e-06,
"loss": 0.4821,
"step": 2630
},
{
"epoch": 1.2371134020618557,
"grad_norm": 5.844490051269531,
"learning_rate": 9.226804123711341e-06,
"loss": 0.5655,
"step": 2640
},
{
"epoch": 1.2417994376757264,
"grad_norm": 3.5442116260528564,
"learning_rate": 9.223875351452672e-06,
"loss": 0.4532,
"step": 2650
},
{
"epoch": 1.246485473289597,
"grad_norm": 5.259571075439453,
"learning_rate": 9.220946579194002e-06,
"loss": 0.4856,
"step": 2660
},
{
"epoch": 1.2511715089034676,
"grad_norm": 4.675846576690674,
"learning_rate": 9.218017806935333e-06,
"loss": 0.4576,
"step": 2670
},
{
"epoch": 1.2558575445173383,
"grad_norm": 5.236482620239258,
"learning_rate": 9.215089034676664e-06,
"loss": 0.513,
"step": 2680
},
{
"epoch": 1.260543580131209,
"grad_norm": 4.658278465270996,
"learning_rate": 9.212160262417995e-06,
"loss": 0.4734,
"step": 2690
},
{
"epoch": 1.2652296157450795,
"grad_norm": 3.7085494995117188,
"learning_rate": 9.209231490159326e-06,
"loss": 0.5279,
"step": 2700
},
{
"epoch": 1.2699156513589505,
"grad_norm": 3.4627673625946045,
"learning_rate": 9.206302717900658e-06,
"loss": 0.4773,
"step": 2710
},
{
"epoch": 1.274601686972821,
"grad_norm": 4.618409633636475,
"learning_rate": 9.203373945641987e-06,
"loss": 0.4354,
"step": 2720
},
{
"epoch": 1.2792877225866917,
"grad_norm": 3.1090590953826904,
"learning_rate": 9.200445173383318e-06,
"loss": 0.4409,
"step": 2730
},
{
"epoch": 1.2839737582005624,
"grad_norm": 4.328725337982178,
"learning_rate": 9.19751640112465e-06,
"loss": 0.4799,
"step": 2740
},
{
"epoch": 1.2886597938144329,
"grad_norm": 3.8362419605255127,
"learning_rate": 9.194587628865979e-06,
"loss": 0.5062,
"step": 2750
},
{
"epoch": 1.2886597938144329,
"eval_loss": 0.05263364687561989,
"eval_pearson_cosine": 0.7755555336434341,
"eval_pearson_dot": 0.6502184577290961,
"eval_pearson_euclidean": 0.7709853609297426,
"eval_pearson_manhattan": 0.769572635033791,
"eval_runtime": 44.8508,
"eval_samples_per_second": 33.444,
"eval_spearman_cosine": 0.7765036654281985,
"eval_spearman_dot": 0.6558936409143281,
"eval_spearman_euclidean": 0.7808945633743188,
"eval_spearman_manhattan": 0.7795729380744477,
"eval_steps_per_second": 33.444,
"step": 2750
},
{
"epoch": 1.2933458294283038,
"grad_norm": 3.6972432136535645,
"learning_rate": 9.19165885660731e-06,
"loss": 0.488,
"step": 2760
},
{
"epoch": 1.2980318650421743,
"grad_norm": 6.73103141784668,
"learning_rate": 9.188730084348641e-06,
"loss": 0.4553,
"step": 2770
},
{
"epoch": 1.302717900656045,
"grad_norm": 4.371028423309326,
"learning_rate": 9.185801312089973e-06,
"loss": 0.4555,
"step": 2780
},
{
"epoch": 1.3074039362699157,
"grad_norm": 3.4788401126861572,
"learning_rate": 9.182872539831304e-06,
"loss": 0.4561,
"step": 2790
},
{
"epoch": 1.3120899718837864,
"grad_norm": 3.832277774810791,
"learning_rate": 9.179943767572635e-06,
"loss": 0.4838,
"step": 2800
},
{
"epoch": 1.316776007497657,
"grad_norm": 3.5579423904418945,
"learning_rate": 9.177014995313966e-06,
"loss": 0.4404,
"step": 2810
},
{
"epoch": 1.3214620431115276,
"grad_norm": 3.7768073081970215,
"learning_rate": 9.174086223055296e-06,
"loss": 0.4724,
"step": 2820
},
{
"epoch": 1.3261480787253983,
"grad_norm": 3.957035779953003,
"learning_rate": 9.171157450796627e-06,
"loss": 0.471,
"step": 2830
},
{
"epoch": 1.330834114339269,
"grad_norm": 3.6035895347595215,
"learning_rate": 9.168228678537958e-06,
"loss": 0.4645,
"step": 2840
},
{
"epoch": 1.3355201499531397,
"grad_norm": 4.358327388763428,
"learning_rate": 9.165299906279288e-06,
"loss": 0.4301,
"step": 2850
},
{
"epoch": 1.3402061855670104,
"grad_norm": 3.4666709899902344,
"learning_rate": 9.162371134020619e-06,
"loss": 0.4508,
"step": 2860
},
{
"epoch": 1.344892221180881,
"grad_norm": 3.912290096282959,
"learning_rate": 9.15944236176195e-06,
"loss": 0.4379,
"step": 2870
},
{
"epoch": 1.3495782567947516,
"grad_norm": 4.305796146392822,
"learning_rate": 9.156513589503281e-06,
"loss": 0.4194,
"step": 2880
},
{
"epoch": 1.3542642924086223,
"grad_norm": 4.231681823730469,
"learning_rate": 9.153584817244612e-06,
"loss": 0.4017,
"step": 2890
},
{
"epoch": 1.358950328022493,
"grad_norm": 4.43821382522583,
"learning_rate": 9.150656044985944e-06,
"loss": 0.4185,
"step": 2900
},
{
"epoch": 1.3636363636363638,
"grad_norm": 4.922164440155029,
"learning_rate": 9.147727272727273e-06,
"loss": 0.5199,
"step": 2910
},
{
"epoch": 1.3683223992502342,
"grad_norm": 4.577489852905273,
"learning_rate": 9.144798500468604e-06,
"loss": 0.4237,
"step": 2920
},
{
"epoch": 1.373008434864105,
"grad_norm": 3.9537651538848877,
"learning_rate": 9.141869728209935e-06,
"loss": 0.4888,
"step": 2930
},
{
"epoch": 1.3776944704779757,
"grad_norm": 4.165870189666748,
"learning_rate": 9.138940955951267e-06,
"loss": 0.4476,
"step": 2940
},
{
"epoch": 1.3823805060918464,
"grad_norm": 4.492893218994141,
"learning_rate": 9.136012183692596e-06,
"loss": 0.5159,
"step": 2950
},
{
"epoch": 1.387066541705717,
"grad_norm": 3.847490072250366,
"learning_rate": 9.133083411433927e-06,
"loss": 0.4497,
"step": 2960
},
{
"epoch": 1.3917525773195876,
"grad_norm": 6.766137599945068,
"learning_rate": 9.130154639175258e-06,
"loss": 0.4379,
"step": 2970
},
{
"epoch": 1.3964386129334583,
"grad_norm": 3.9198007583618164,
"learning_rate": 9.12722586691659e-06,
"loss": 0.4519,
"step": 2980
},
{
"epoch": 1.401124648547329,
"grad_norm": 3.67480731010437,
"learning_rate": 9.124297094657921e-06,
"loss": 0.4108,
"step": 2990
},
{
"epoch": 1.4058106841611997,
"grad_norm": 3.3013832569122314,
"learning_rate": 9.12136832239925e-06,
"loss": 0.433,
"step": 3000
},
{
"epoch": 1.4058106841611997,
"eval_loss": 0.0497601218521595,
"eval_pearson_cosine": 0.7834985989633054,
"eval_pearson_dot": 0.6669524421664974,
"eval_pearson_euclidean": 0.7743874834934843,
"eval_pearson_manhattan": 0.7730376146204847,
"eval_runtime": 47.8141,
"eval_samples_per_second": 31.371,
"eval_spearman_cosine": 0.7845889452017747,
"eval_spearman_dot": 0.6729435548765089,
"eval_spearman_euclidean": 0.784591658726837,
"eval_spearman_manhattan": 0.7832975474858643,
"eval_steps_per_second": 31.371,
"step": 3000
},
{
"epoch": 1.4104967197750704,
"grad_norm": 4.2792487144470215,
"learning_rate": 9.118439550140582e-06,
"loss": 0.4878,
"step": 3010
},
{
"epoch": 1.415182755388941,
"grad_norm": 3.8892383575439453,
"learning_rate": 9.115510777881913e-06,
"loss": 0.4676,
"step": 3020
},
{
"epoch": 1.4198687910028116,
"grad_norm": 5.0008745193481445,
"learning_rate": 9.112582005623244e-06,
"loss": 0.4729,
"step": 3030
},
{
"epoch": 1.4245548266166823,
"grad_norm": 5.607409477233887,
"learning_rate": 9.109653233364575e-06,
"loss": 0.4762,
"step": 3040
},
{
"epoch": 1.429240862230553,
"grad_norm": 3.0340139865875244,
"learning_rate": 9.106724461105905e-06,
"loss": 0.4438,
"step": 3050
},
{
"epoch": 1.4339268978444237,
"grad_norm": 4.310724258422852,
"learning_rate": 9.103795688847236e-06,
"loss": 0.4499,
"step": 3060
},
{
"epoch": 1.4386129334582942,
"grad_norm": 4.481917381286621,
"learning_rate": 9.100866916588567e-06,
"loss": 0.4493,
"step": 3070
},
{
"epoch": 1.443298969072165,
"grad_norm": 4.330621719360352,
"learning_rate": 9.097938144329898e-06,
"loss": 0.4505,
"step": 3080
},
{
"epoch": 1.4479850046860356,
"grad_norm": 4.335081577301025,
"learning_rate": 9.095009372071228e-06,
"loss": 0.446,
"step": 3090
},
{
"epoch": 1.4526710402999063,
"grad_norm": 3.0894672870635986,
"learning_rate": 9.092080599812559e-06,
"loss": 0.4404,
"step": 3100
},
{
"epoch": 1.457357075913777,
"grad_norm": 4.6363983154296875,
"learning_rate": 9.08915182755389e-06,
"loss": 0.5358,
"step": 3110
},
{
"epoch": 1.4620431115276475,
"grad_norm": 3.80387806892395,
"learning_rate": 9.086223055295221e-06,
"loss": 0.4374,
"step": 3120
},
{
"epoch": 1.4667291471415183,
"grad_norm": 3.276442289352417,
"learning_rate": 9.083294283036552e-06,
"loss": 0.5013,
"step": 3130
},
{
"epoch": 1.471415182755389,
"grad_norm": 3.843419075012207,
"learning_rate": 9.080365510777884e-06,
"loss": 0.4694,
"step": 3140
},
{
"epoch": 1.4761012183692597,
"grad_norm": 4.7606730461120605,
"learning_rate": 9.077436738519213e-06,
"loss": 0.4215,
"step": 3150
},
{
"epoch": 1.4807872539831304,
"grad_norm": 3.739225149154663,
"learning_rate": 9.074507966260544e-06,
"loss": 0.4756,
"step": 3160
},
{
"epoch": 1.4854732895970009,
"grad_norm": 3.36938214302063,
"learning_rate": 9.071579194001876e-06,
"loss": 0.4243,
"step": 3170
},
{
"epoch": 1.4901593252108716,
"grad_norm": 6.589993476867676,
"learning_rate": 9.068650421743205e-06,
"loss": 0.4698,
"step": 3180
},
{
"epoch": 1.4948453608247423,
"grad_norm": 3.8416695594787598,
"learning_rate": 9.065721649484536e-06,
"loss": 0.4964,
"step": 3190
},
{
"epoch": 1.499531396438613,
"grad_norm": 4.367741584777832,
"learning_rate": 9.062792877225867e-06,
"loss": 0.4417,
"step": 3200
},
{
"epoch": 1.5042174320524837,
"grad_norm": 3.500617742538452,
"learning_rate": 9.059864104967199e-06,
"loss": 0.4522,
"step": 3210
},
{
"epoch": 1.5089034676663542,
"grad_norm": 3.5349769592285156,
"learning_rate": 9.05693533270853e-06,
"loss": 0.4393,
"step": 3220
},
{
"epoch": 1.513589503280225,
"grad_norm": 3.8469526767730713,
"learning_rate": 9.054006560449861e-06,
"loss": 0.4453,
"step": 3230
},
{
"epoch": 1.5182755388940956,
"grad_norm": 3.209933280944824,
"learning_rate": 9.051077788191192e-06,
"loss": 0.4599,
"step": 3240
},
{
"epoch": 1.522961574507966,
"grad_norm": 3.7976036071777344,
"learning_rate": 9.048149015932522e-06,
"loss": 0.4373,
"step": 3250
},
{
"epoch": 1.522961574507966,
"eval_loss": 0.049798864871263504,
"eval_pearson_cosine": 0.7866421286010308,
"eval_pearson_dot": 0.6641640853451243,
"eval_pearson_euclidean": 0.7777378719378305,
"eval_pearson_manhattan": 0.7764827785285746,
"eval_runtime": 43.7509,
"eval_samples_per_second": 34.285,
"eval_spearman_cosine": 0.7870351053050699,
"eval_spearman_dot": 0.6708598238937284,
"eval_spearman_euclidean": 0.7874683707378692,
"eval_spearman_manhattan": 0.7865203522698128,
"eval_steps_per_second": 34.285,
"step": 3250
},
{
"epoch": 1.527647610121837,
"grad_norm": 4.851262092590332,
"learning_rate": 9.045220243673853e-06,
"loss": 0.491,
"step": 3260
},
{
"epoch": 1.5323336457357075,
"grad_norm": 4.183891773223877,
"learning_rate": 9.042291471415184e-06,
"loss": 0.453,
"step": 3270
},
{
"epoch": 1.5370196813495782,
"grad_norm": 4.280774116516113,
"learning_rate": 9.039362699156514e-06,
"loss": 0.4413,
"step": 3280
},
{
"epoch": 1.541705716963449,
"grad_norm": 4.118307113647461,
"learning_rate": 9.036433926897845e-06,
"loss": 0.4661,
"step": 3290
},
{
"epoch": 1.5463917525773194,
"grad_norm": 5.99712610244751,
"learning_rate": 9.033505154639176e-06,
"loss": 0.5205,
"step": 3300
},
{
"epoch": 1.5510777881911904,
"grad_norm": 4.146691799163818,
"learning_rate": 9.030576382380507e-06,
"loss": 0.428,
"step": 3310
},
{
"epoch": 1.5557638238050608,
"grad_norm": 3.899887800216675,
"learning_rate": 9.027647610121838e-06,
"loss": 0.4564,
"step": 3320
},
{
"epoch": 1.5604498594189316,
"grad_norm": 3.9663302898406982,
"learning_rate": 9.02471883786317e-06,
"loss": 0.4539,
"step": 3330
},
{
"epoch": 1.5651358950328023,
"grad_norm": 3.526458263397217,
"learning_rate": 9.021790065604499e-06,
"loss": 0.4844,
"step": 3340
},
{
"epoch": 1.569821930646673,
"grad_norm": 4.192911624908447,
"learning_rate": 9.01886129334583e-06,
"loss": 0.4278,
"step": 3350
},
{
"epoch": 1.5745079662605437,
"grad_norm": 4.185749530792236,
"learning_rate": 9.015932521087161e-06,
"loss": 0.4632,
"step": 3360
},
{
"epoch": 1.5791940018744142,
"grad_norm": 3.411773204803467,
"learning_rate": 9.013003748828491e-06,
"loss": 0.436,
"step": 3370
},
{
"epoch": 1.5838800374882849,
"grad_norm": 4.467881679534912,
"learning_rate": 9.010074976569822e-06,
"loss": 0.4133,
"step": 3380
},
{
"epoch": 1.5885660731021556,
"grad_norm": 3.77736496925354,
"learning_rate": 9.007146204311153e-06,
"loss": 0.4452,
"step": 3390
},
{
"epoch": 1.5932521087160263,
"grad_norm": 4.084095478057861,
"learning_rate": 9.004217432052485e-06,
"loss": 0.4605,
"step": 3400
},
{
"epoch": 1.597938144329897,
"grad_norm": 3.3393008708953857,
"learning_rate": 9.001288659793816e-06,
"loss": 0.4157,
"step": 3410
},
{
"epoch": 1.6026241799437675,
"grad_norm": 3.096881151199341,
"learning_rate": 8.998359887535147e-06,
"loss": 0.4478,
"step": 3420
},
{
"epoch": 1.6073102155576382,
"grad_norm": 3.0557243824005127,
"learning_rate": 8.995431115276478e-06,
"loss": 0.4452,
"step": 3430
},
{
"epoch": 1.611996251171509,
"grad_norm": 3.7997219562530518,
"learning_rate": 8.992502343017808e-06,
"loss": 0.4287,
"step": 3440
},
{
"epoch": 1.6166822867853796,
"grad_norm": 3.6995465755462646,
"learning_rate": 8.989573570759139e-06,
"loss": 0.4423,
"step": 3450
},
{
"epoch": 1.6213683223992503,
"grad_norm": 4.1384053230285645,
"learning_rate": 8.986644798500468e-06,
"loss": 0.4563,
"step": 3460
},
{
"epoch": 1.6260543580131208,
"grad_norm": 4.637014865875244,
"learning_rate": 8.9837160262418e-06,
"loss": 0.4538,
"step": 3470
},
{
"epoch": 1.6307403936269915,
"grad_norm": 4.30952262878418,
"learning_rate": 8.98078725398313e-06,
"loss": 0.3993,
"step": 3480
},
{
"epoch": 1.6354264292408622,
"grad_norm": 4.746737003326416,
"learning_rate": 8.977858481724462e-06,
"loss": 0.4274,
"step": 3490
},
{
"epoch": 1.640112464854733,
"grad_norm": 3.8592286109924316,
"learning_rate": 8.974929709465793e-06,
"loss": 0.4066,
"step": 3500
},
{
"epoch": 1.640112464854733,
"eval_loss": 0.050406068563461304,
"eval_pearson_cosine": 0.7840015528942317,
"eval_pearson_dot": 0.659932129633507,
"eval_pearson_euclidean": 0.7769297052026758,
"eval_pearson_manhattan": 0.7754185185705609,
"eval_runtime": 44.0859,
"eval_samples_per_second": 34.024,
"eval_spearman_cosine": 0.7845451302239834,
"eval_spearman_dot": 0.6667296644451466,
"eval_spearman_euclidean": 0.7868327314956118,
"eval_spearman_manhattan": 0.7856021398727839,
"eval_steps_per_second": 34.024,
"step": 3500
},
{
"epoch": 1.6447985004686037,
"grad_norm": 5.983098030090332,
"learning_rate": 8.972000937207124e-06,
"loss": 0.4451,
"step": 3510
},
{
"epoch": 1.6494845360824741,
"grad_norm": 4.052550315856934,
"learning_rate": 8.969072164948455e-06,
"loss": 0.4331,
"step": 3520
},
{
"epoch": 1.6541705716963448,
"grad_norm": 3.7970380783081055,
"learning_rate": 8.966143392689785e-06,
"loss": 0.4427,
"step": 3530
},
{
"epoch": 1.6588566073102156,
"grad_norm": 4.695807456970215,
"learning_rate": 8.963214620431116e-06,
"loss": 0.4522,
"step": 3540
},
{
"epoch": 1.6635426429240863,
"grad_norm": 4.41202974319458,
"learning_rate": 8.960285848172446e-06,
"loss": 0.4275,
"step": 3550
},
{
"epoch": 1.668228678537957,
"grad_norm": 5.364877223968506,
"learning_rate": 8.957357075913777e-06,
"loss": 0.4321,
"step": 3560
},
{
"epoch": 1.6729147141518275,
"grad_norm": 3.801132917404175,
"learning_rate": 8.954428303655108e-06,
"loss": 0.4494,
"step": 3570
},
{
"epoch": 1.6776007497656982,
"grad_norm": 4.197866439819336,
"learning_rate": 8.95149953139644e-06,
"loss": 0.4126,
"step": 3580
},
{
"epoch": 1.6822867853795689,
"grad_norm": 5.34595251083374,
"learning_rate": 8.94857075913777e-06,
"loss": 0.4757,
"step": 3590
},
{
"epoch": 1.6869728209934396,
"grad_norm": 4.772789478302002,
"learning_rate": 8.945641986879102e-06,
"loss": 0.4037,
"step": 3600
},
{
"epoch": 1.6916588566073103,
"grad_norm": 4.81839656829834,
"learning_rate": 8.942713214620433e-06,
"loss": 0.4192,
"step": 3610
},
{
"epoch": 1.6963448922211808,
"grad_norm": 3.470919132232666,
"learning_rate": 8.939784442361762e-06,
"loss": 0.4106,
"step": 3620
},
{
"epoch": 1.7010309278350515,
"grad_norm": 3.2051522731781006,
"learning_rate": 8.936855670103094e-06,
"loss": 0.4162,
"step": 3630
},
{
"epoch": 1.7057169634489222,
"grad_norm": 3.8122334480285645,
"learning_rate": 8.933926897844423e-06,
"loss": 0.4054,
"step": 3640
},
{
"epoch": 1.710402999062793,
"grad_norm": 5.07956075668335,
"learning_rate": 8.930998125585754e-06,
"loss": 0.4164,
"step": 3650
},
{
"epoch": 1.7150890346766636,
"grad_norm": 3.754542112350464,
"learning_rate": 8.928069353327085e-06,
"loss": 0.3703,
"step": 3660
},
{
"epoch": 1.7197750702905341,
"grad_norm": 3.4620890617370605,
"learning_rate": 8.925140581068417e-06,
"loss": 0.4667,
"step": 3670
},
{
"epoch": 1.7244611059044048,
"grad_norm": 4.179393768310547,
"learning_rate": 8.922211808809748e-06,
"loss": 0.4384,
"step": 3680
},
{
"epoch": 1.7291471415182755,
"grad_norm": 3.0865719318389893,
"learning_rate": 8.919283036551079e-06,
"loss": 0.4248,
"step": 3690
},
{
"epoch": 1.7338331771321462,
"grad_norm": 3.9282147884368896,
"learning_rate": 8.91635426429241e-06,
"loss": 0.4231,
"step": 3700
},
{
"epoch": 1.738519212746017,
"grad_norm": 3.9746885299682617,
"learning_rate": 8.91342549203374e-06,
"loss": 0.4152,
"step": 3710
},
{
"epoch": 1.7432052483598874,
"grad_norm": 3.8340625762939453,
"learning_rate": 8.910496719775071e-06,
"loss": 0.4458,
"step": 3720
},
{
"epoch": 1.7478912839737581,
"grad_norm": 4.861859321594238,
"learning_rate": 8.907567947516402e-06,
"loss": 0.4274,
"step": 3730
},
{
"epoch": 1.7525773195876289,
"grad_norm": 3.3457283973693848,
"learning_rate": 8.904639175257732e-06,
"loss": 0.4534,
"step": 3740
},
{
"epoch": 1.7572633552014996,
"grad_norm": 4.057953834533691,
"learning_rate": 8.901710402999063e-06,
"loss": 0.484,
"step": 3750
},
{
"epoch": 1.7572633552014996,
"eval_loss": 0.05240313336253166,
"eval_pearson_cosine": 0.7879299521989642,
"eval_pearson_dot": 0.6605985065084816,
"eval_pearson_euclidean": 0.7797438530556207,
"eval_pearson_manhattan": 0.778216782480726,
"eval_runtime": 44.9916,
"eval_samples_per_second": 33.34,
"eval_spearman_cosine": 0.7888982276270184,
"eval_spearman_dot": 0.6669965792210436,
"eval_spearman_euclidean": 0.7899037728263932,
"eval_spearman_manhattan": 0.7886320032383264,
"eval_steps_per_second": 33.34,
"step": 3750
},
{
"epoch": 1.7619493908153703,
"grad_norm": 3.281102418899536,
"learning_rate": 8.898781630740394e-06,
"loss": 0.4074,
"step": 3760
},
{
"epoch": 1.7666354264292408,
"grad_norm": 4.710203170776367,
"learning_rate": 8.895852858481725e-06,
"loss": 0.4537,
"step": 3770
},
{
"epoch": 1.7713214620431117,
"grad_norm": 4.636346817016602,
"learning_rate": 8.892924086223056e-06,
"loss": 0.4348,
"step": 3780
},
{
"epoch": 1.7760074976569822,
"grad_norm": 4.518571376800537,
"learning_rate": 8.889995313964388e-06,
"loss": 0.4515,
"step": 3790
},
{
"epoch": 1.780693533270853,
"grad_norm": 4.0576066970825195,
"learning_rate": 8.887066541705717e-06,
"loss": 0.4276,
"step": 3800
},
{
"epoch": 1.7853795688847236,
"grad_norm": 5.657445430755615,
"learning_rate": 8.884137769447048e-06,
"loss": 0.4277,
"step": 3810
},
{
"epoch": 1.790065604498594,
"grad_norm": 5.393405437469482,
"learning_rate": 8.88120899718838e-06,
"loss": 0.428,
"step": 3820
},
{
"epoch": 1.794751640112465,
"grad_norm": 4.101112365722656,
"learning_rate": 8.87828022492971e-06,
"loss": 0.4489,
"step": 3830
},
{
"epoch": 1.7994376757263355,
"grad_norm": 3.531888246536255,
"learning_rate": 8.87535145267104e-06,
"loss": 0.3673,
"step": 3840
},
{
"epoch": 1.8041237113402062,
"grad_norm": 3.4490315914154053,
"learning_rate": 8.872422680412371e-06,
"loss": 0.4059,
"step": 3850
},
{
"epoch": 1.808809746954077,
"grad_norm": 3.034252643585205,
"learning_rate": 8.869493908153702e-06,
"loss": 0.3832,
"step": 3860
},
{
"epoch": 1.8134957825679474,
"grad_norm": 4.064283847808838,
"learning_rate": 8.866565135895034e-06,
"loss": 0.4704,
"step": 3870
},
{
"epoch": 1.8181818181818183,
"grad_norm": 3.2689194679260254,
"learning_rate": 8.863636363636365e-06,
"loss": 0.4428,
"step": 3880
},
{
"epoch": 1.8228678537956888,
"grad_norm": 3.173530101776123,
"learning_rate": 8.860707591377694e-06,
"loss": 0.4283,
"step": 3890
},
{
"epoch": 1.8275538894095595,
"grad_norm": 3.638122081756592,
"learning_rate": 8.857778819119026e-06,
"loss": 0.4225,
"step": 3900
},
{
"epoch": 1.8322399250234302,
"grad_norm": 3.636679172515869,
"learning_rate": 8.854850046860357e-06,
"loss": 0.4154,
"step": 3910
},
{
"epoch": 1.8369259606373007,
"grad_norm": 3.810847520828247,
"learning_rate": 8.851921274601688e-06,
"loss": 0.3931,
"step": 3920
},
{
"epoch": 1.8416119962511717,
"grad_norm": 3.7469394207000732,
"learning_rate": 8.848992502343019e-06,
"loss": 0.4472,
"step": 3930
},
{
"epoch": 1.8462980318650422,
"grad_norm": 4.962492942810059,
"learning_rate": 8.846063730084349e-06,
"loss": 0.4324,
"step": 3940
},
{
"epoch": 1.8509840674789129,
"grad_norm": 3.4641172885894775,
"learning_rate": 8.84313495782568e-06,
"loss": 0.4234,
"step": 3950
},
{
"epoch": 1.8556701030927836,
"grad_norm": 3.8601555824279785,
"learning_rate": 8.840206185567011e-06,
"loss": 0.4045,
"step": 3960
},
{
"epoch": 1.860356138706654,
"grad_norm": 6.290759086608887,
"learning_rate": 8.837277413308342e-06,
"loss": 0.4655,
"step": 3970
},
{
"epoch": 1.865042174320525,
"grad_norm": 3.5882256031036377,
"learning_rate": 8.834348641049673e-06,
"loss": 0.4298,
"step": 3980
},
{
"epoch": 1.8697282099343955,
"grad_norm": 3.133535623550415,
"learning_rate": 8.831419868791003e-06,
"loss": 0.4508,
"step": 3990
},
{
"epoch": 1.8744142455482662,
"grad_norm": 3.220383644104004,
"learning_rate": 8.828491096532334e-06,
"loss": 0.4348,
"step": 4000
},
{
"epoch": 1.8744142455482662,
"eval_loss": 0.04981923848390579,
"eval_pearson_cosine": 0.790612878761543,
"eval_pearson_dot": 0.6612786229229286,
"eval_pearson_euclidean": 0.7799249806775554,
"eval_pearson_manhattan": 0.7784476870813819,
"eval_runtime": 45.9371,
"eval_samples_per_second": 32.653,
"eval_spearman_cosine": 0.7908100570922554,
"eval_spearman_dot": 0.6689224987064551,
"eval_spearman_euclidean": 0.7902520878335856,
"eval_spearman_manhattan": 0.7892503488739743,
"eval_steps_per_second": 32.653,
"step": 4000
}
],
"logging_steps": 10,
"max_steps": 4268,
"num_input_tokens_seen": 0,
"num_train_epochs": 2,
"save_steps": 1000,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": false
},
"attributes": {}
}
},
"total_flos": 0.0,
"train_batch_size": 16,
"trial_name": null,
"trial_params": null
}