| { | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 1.8744142455482662, | |
| "eval_steps": 250, | |
| "global_step": 4000, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.004686035613870665, | |
| "grad_norm": 2.907787561416626, | |
| "learning_rate": 9.997071227741332e-06, | |
| "loss": 3.3815, | |
| "step": 10 | |
| }, | |
| { | |
| "epoch": 0.00937207122774133, | |
| "grad_norm": 2.2910118103027344, | |
| "learning_rate": 9.994142455482663e-06, | |
| "loss": 3.3605, | |
| "step": 20 | |
| }, | |
| { | |
| "epoch": 0.014058106841611996, | |
| "grad_norm": 2.791727066040039, | |
| "learning_rate": 9.991213683223994e-06, | |
| "loss": 3.3338, | |
| "step": 30 | |
| }, | |
| { | |
| "epoch": 0.01874414245548266, | |
| "grad_norm": 2.881253242492676, | |
| "learning_rate": 9.988284910965324e-06, | |
| "loss": 3.3047, | |
| "step": 40 | |
| }, | |
| { | |
| "epoch": 0.023430178069353328, | |
| "grad_norm": 3.5495920181274414, | |
| "learning_rate": 9.985356138706655e-06, | |
| "loss": 3.266, | |
| "step": 50 | |
| }, | |
| { | |
| "epoch": 0.028116213683223992, | |
| "grad_norm": 3.8195812702178955, | |
| "learning_rate": 9.982427366447985e-06, | |
| "loss": 3.2116, | |
| "step": 60 | |
| }, | |
| { | |
| "epoch": 0.03280224929709466, | |
| "grad_norm": 5.006792068481445, | |
| "learning_rate": 9.979498594189316e-06, | |
| "loss": 3.1271, | |
| "step": 70 | |
| }, | |
| { | |
| "epoch": 0.03748828491096532, | |
| "grad_norm": 5.206729412078857, | |
| "learning_rate": 9.976569821930647e-06, | |
| "loss": 3.0472, | |
| "step": 80 | |
| }, | |
| { | |
| "epoch": 0.04217432052483599, | |
| "grad_norm": 6.317724227905273, | |
| "learning_rate": 9.973641049671978e-06, | |
| "loss": 2.9458, | |
| "step": 90 | |
| }, | |
| { | |
| "epoch": 0.046860356138706656, | |
| "grad_norm": 7.30826997756958, | |
| "learning_rate": 9.97071227741331e-06, | |
| "loss": 2.9002, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 0.05154639175257732, | |
| "grad_norm": 7.05161190032959, | |
| "learning_rate": 9.96778350515464e-06, | |
| "loss": 2.8379, | |
| "step": 110 | |
| }, | |
| { | |
| "epoch": 0.056232427366447985, | |
| "grad_norm": 12.389013290405273, | |
| "learning_rate": 9.964854732895972e-06, | |
| "loss": 2.7637, | |
| "step": 120 | |
| }, | |
| { | |
| "epoch": 0.06091846298031865, | |
| "grad_norm": 19.661762237548828, | |
| "learning_rate": 9.961925960637301e-06, | |
| "loss": 2.7413, | |
| "step": 130 | |
| }, | |
| { | |
| "epoch": 0.06560449859418932, | |
| "grad_norm": 7.9712018966674805, | |
| "learning_rate": 9.958997188378632e-06, | |
| "loss": 2.6953, | |
| "step": 140 | |
| }, | |
| { | |
| "epoch": 0.07029053420805999, | |
| "grad_norm": 44.79791259765625, | |
| "learning_rate": 9.956068416119962e-06, | |
| "loss": 2.6795, | |
| "step": 150 | |
| }, | |
| { | |
| "epoch": 0.07497656982193064, | |
| "grad_norm": 7.748485565185547, | |
| "learning_rate": 9.953139643861293e-06, | |
| "loss": 2.6179, | |
| "step": 160 | |
| }, | |
| { | |
| "epoch": 0.07966260543580131, | |
| "grad_norm": 7.135361194610596, | |
| "learning_rate": 9.950210871602624e-06, | |
| "loss": 2.5714, | |
| "step": 170 | |
| }, | |
| { | |
| "epoch": 0.08434864104967198, | |
| "grad_norm": 5.464244365692139, | |
| "learning_rate": 9.947282099343956e-06, | |
| "loss": 2.4817, | |
| "step": 180 | |
| }, | |
| { | |
| "epoch": 0.08903467666354264, | |
| "grad_norm": 10.304727554321289, | |
| "learning_rate": 9.944353327085287e-06, | |
| "loss": 2.3939, | |
| "step": 190 | |
| }, | |
| { | |
| "epoch": 0.09372071227741331, | |
| "grad_norm": 8.390380859375, | |
| "learning_rate": 9.941424554826618e-06, | |
| "loss": 2.3162, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 0.09840674789128398, | |
| "grad_norm": 7.206277847290039, | |
| "learning_rate": 9.938495782567949e-06, | |
| "loss": 2.2413, | |
| "step": 210 | |
| }, | |
| { | |
| "epoch": 0.10309278350515463, | |
| "grad_norm": 10.72529411315918, | |
| "learning_rate": 9.935567010309279e-06, | |
| "loss": 2.1816, | |
| "step": 220 | |
| }, | |
| { | |
| "epoch": 0.1077788191190253, | |
| "grad_norm": 8.411327362060547, | |
| "learning_rate": 9.93263823805061e-06, | |
| "loss": 2.0204, | |
| "step": 230 | |
| }, | |
| { | |
| "epoch": 0.11246485473289597, | |
| "grad_norm": 9.118602752685547, | |
| "learning_rate": 9.929709465791941e-06, | |
| "loss": 1.9329, | |
| "step": 240 | |
| }, | |
| { | |
| "epoch": 0.11715089034676664, | |
| "grad_norm": 11.883502960205078, | |
| "learning_rate": 9.92678069353327e-06, | |
| "loss": 1.8041, | |
| "step": 250 | |
| }, | |
| { | |
| "epoch": 0.11715089034676664, | |
| "eval_loss": 0.20095524191856384, | |
| "eval_pearson_cosine": 0.5629603652959432, | |
| "eval_pearson_dot": 0.32442021258601983, | |
| "eval_pearson_euclidean": 0.5948642130310873, | |
| "eval_pearson_manhattan": 0.5931866084570743, | |
| "eval_runtime": 46.3498, | |
| "eval_samples_per_second": 32.363, | |
| "eval_spearman_cosine": 0.5645428688364399, | |
| "eval_spearman_dot": 0.3123519595505677, | |
| "eval_spearman_euclidean": 0.5966715855304487, | |
| "eval_spearman_manhattan": 0.5951499296436052, | |
| "eval_steps_per_second": 32.363, | |
| "step": 250 | |
| }, | |
| { | |
| "epoch": 0.1218369259606373, | |
| "grad_norm": 9.455839157104492, | |
| "learning_rate": 9.923851921274602e-06, | |
| "loss": 1.7175, | |
| "step": 260 | |
| }, | |
| { | |
| "epoch": 0.12652296157450796, | |
| "grad_norm": 9.907763481140137, | |
| "learning_rate": 9.920923149015933e-06, | |
| "loss": 1.5752, | |
| "step": 270 | |
| }, | |
| { | |
| "epoch": 0.13120899718837864, | |
| "grad_norm": 10.268372535705566, | |
| "learning_rate": 9.917994376757264e-06, | |
| "loss": 1.5905, | |
| "step": 280 | |
| }, | |
| { | |
| "epoch": 0.1358950328022493, | |
| "grad_norm": 12.264440536499023, | |
| "learning_rate": 9.915065604498595e-06, | |
| "loss": 1.4994, | |
| "step": 290 | |
| }, | |
| { | |
| "epoch": 0.14058106841611998, | |
| "grad_norm": 10.21927547454834, | |
| "learning_rate": 9.912136832239926e-06, | |
| "loss": 1.4741, | |
| "step": 300 | |
| }, | |
| { | |
| "epoch": 0.14526710402999063, | |
| "grad_norm": 12.204063415527344, | |
| "learning_rate": 9.909208059981256e-06, | |
| "loss": 1.3685, | |
| "step": 310 | |
| }, | |
| { | |
| "epoch": 0.14995313964386128, | |
| "grad_norm": 8.701486587524414, | |
| "learning_rate": 9.906279287722587e-06, | |
| "loss": 1.3407, | |
| "step": 320 | |
| }, | |
| { | |
| "epoch": 0.15463917525773196, | |
| "grad_norm": 11.478012084960938, | |
| "learning_rate": 9.903350515463918e-06, | |
| "loss": 1.3996, | |
| "step": 330 | |
| }, | |
| { | |
| "epoch": 0.15932521087160262, | |
| "grad_norm": 8.862137794494629, | |
| "learning_rate": 9.90042174320525e-06, | |
| "loss": 1.2921, | |
| "step": 340 | |
| }, | |
| { | |
| "epoch": 0.1640112464854733, | |
| "grad_norm": 8.181413650512695, | |
| "learning_rate": 9.897492970946579e-06, | |
| "loss": 1.2948, | |
| "step": 350 | |
| }, | |
| { | |
| "epoch": 0.16869728209934395, | |
| "grad_norm": 12.891910552978516, | |
| "learning_rate": 9.89456419868791e-06, | |
| "loss": 1.2444, | |
| "step": 360 | |
| }, | |
| { | |
| "epoch": 0.1733833177132146, | |
| "grad_norm": 9.783638000488281, | |
| "learning_rate": 9.891635426429241e-06, | |
| "loss": 1.1765, | |
| "step": 370 | |
| }, | |
| { | |
| "epoch": 0.1780693533270853, | |
| "grad_norm": 10.521812438964844, | |
| "learning_rate": 9.888706654170573e-06, | |
| "loss": 1.2163, | |
| "step": 380 | |
| }, | |
| { | |
| "epoch": 0.18275538894095594, | |
| "grad_norm": 9.507091522216797, | |
| "learning_rate": 9.885777881911904e-06, | |
| "loss": 1.1555, | |
| "step": 390 | |
| }, | |
| { | |
| "epoch": 0.18744142455482662, | |
| "grad_norm": 10.072102546691895, | |
| "learning_rate": 9.882849109653235e-06, | |
| "loss": 1.1631, | |
| "step": 400 | |
| }, | |
| { | |
| "epoch": 0.19212746016869728, | |
| "grad_norm": 12.557927131652832, | |
| "learning_rate": 9.879920337394564e-06, | |
| "loss": 1.1319, | |
| "step": 410 | |
| }, | |
| { | |
| "epoch": 0.19681349578256796, | |
| "grad_norm": 7.743768692016602, | |
| "learning_rate": 9.876991565135896e-06, | |
| "loss": 1.2022, | |
| "step": 420 | |
| }, | |
| { | |
| "epoch": 0.2014995313964386, | |
| "grad_norm": 9.258079528808594, | |
| "learning_rate": 9.874062792877227e-06, | |
| "loss": 1.1219, | |
| "step": 430 | |
| }, | |
| { | |
| "epoch": 0.20618556701030927, | |
| "grad_norm": 8.362629890441895, | |
| "learning_rate": 9.871134020618558e-06, | |
| "loss": 1.1138, | |
| "step": 440 | |
| }, | |
| { | |
| "epoch": 0.21087160262417995, | |
| "grad_norm": 8.71789264678955, | |
| "learning_rate": 9.868205248359888e-06, | |
| "loss": 1.0473, | |
| "step": 450 | |
| }, | |
| { | |
| "epoch": 0.2155576382380506, | |
| "grad_norm": 8.710640907287598, | |
| "learning_rate": 9.865276476101219e-06, | |
| "loss": 1.0933, | |
| "step": 460 | |
| }, | |
| { | |
| "epoch": 0.22024367385192128, | |
| "grad_norm": 7.57949686050415, | |
| "learning_rate": 9.86234770384255e-06, | |
| "loss": 1.0429, | |
| "step": 470 | |
| }, | |
| { | |
| "epoch": 0.22492970946579194, | |
| "grad_norm": 8.775091171264648, | |
| "learning_rate": 9.859418931583881e-06, | |
| "loss": 1.0406, | |
| "step": 480 | |
| }, | |
| { | |
| "epoch": 0.2296157450796626, | |
| "grad_norm": 9.942752838134766, | |
| "learning_rate": 9.856490159325212e-06, | |
| "loss": 1.0526, | |
| "step": 490 | |
| }, | |
| { | |
| "epoch": 0.23430178069353327, | |
| "grad_norm": 10.166437149047852, | |
| "learning_rate": 9.853561387066542e-06, | |
| "loss": 1.0265, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 0.23430178069353327, | |
| "eval_loss": 0.09848710149526596, | |
| "eval_pearson_cosine": 0.7114527090607083, | |
| "eval_pearson_dot": 0.5814656567702485, | |
| "eval_pearson_euclidean": 0.7022168021213133, | |
| "eval_pearson_manhattan": 0.7010309676073874, | |
| "eval_runtime": 48.356, | |
| "eval_samples_per_second": 31.02, | |
| "eval_spearman_cosine": 0.7098203386273151, | |
| "eval_spearman_dot": 0.5861254786395066, | |
| "eval_spearman_euclidean": 0.7102590115372712, | |
| "eval_spearman_manhattan": 0.7094011853041999, | |
| "eval_steps_per_second": 31.02, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 0.23898781630740393, | |
| "grad_norm": 6.910321235656738, | |
| "learning_rate": 9.850632614807873e-06, | |
| "loss": 1.0267, | |
| "step": 510 | |
| }, | |
| { | |
| "epoch": 0.2436738519212746, | |
| "grad_norm": 8.010503768920898, | |
| "learning_rate": 9.847703842549204e-06, | |
| "loss": 0.97, | |
| "step": 520 | |
| }, | |
| { | |
| "epoch": 0.24835988753514526, | |
| "grad_norm": 8.340336799621582, | |
| "learning_rate": 9.844775070290535e-06, | |
| "loss": 0.9773, | |
| "step": 530 | |
| }, | |
| { | |
| "epoch": 0.2530459231490159, | |
| "grad_norm": 6.75998592376709, | |
| "learning_rate": 9.841846298031867e-06, | |
| "loss": 0.9694, | |
| "step": 540 | |
| }, | |
| { | |
| "epoch": 0.25773195876288657, | |
| "grad_norm": 6.592973709106445, | |
| "learning_rate": 9.838917525773196e-06, | |
| "loss": 0.9101, | |
| "step": 550 | |
| }, | |
| { | |
| "epoch": 0.2624179943767573, | |
| "grad_norm": 8.13701343536377, | |
| "learning_rate": 9.835988753514527e-06, | |
| "loss": 0.9693, | |
| "step": 560 | |
| }, | |
| { | |
| "epoch": 0.26710402999062793, | |
| "grad_norm": 10.256951332092285, | |
| "learning_rate": 9.833059981255859e-06, | |
| "loss": 0.9405, | |
| "step": 570 | |
| }, | |
| { | |
| "epoch": 0.2717900656044986, | |
| "grad_norm": 9.521321296691895, | |
| "learning_rate": 9.83013120899719e-06, | |
| "loss": 0.8731, | |
| "step": 580 | |
| }, | |
| { | |
| "epoch": 0.27647610121836924, | |
| "grad_norm": 7.164852142333984, | |
| "learning_rate": 9.82720243673852e-06, | |
| "loss": 0.9387, | |
| "step": 590 | |
| }, | |
| { | |
| "epoch": 0.28116213683223995, | |
| "grad_norm": 8.326433181762695, | |
| "learning_rate": 9.82427366447985e-06, | |
| "loss": 0.8388, | |
| "step": 600 | |
| }, | |
| { | |
| "epoch": 0.2858481724461106, | |
| "grad_norm": 8.819974899291992, | |
| "learning_rate": 9.821344892221182e-06, | |
| "loss": 0.9034, | |
| "step": 610 | |
| }, | |
| { | |
| "epoch": 0.29053420805998126, | |
| "grad_norm": 6.0674052238464355, | |
| "learning_rate": 9.818416119962513e-06, | |
| "loss": 0.8225, | |
| "step": 620 | |
| }, | |
| { | |
| "epoch": 0.2952202436738519, | |
| "grad_norm": 7.898690223693848, | |
| "learning_rate": 9.815487347703844e-06, | |
| "loss": 0.8916, | |
| "step": 630 | |
| }, | |
| { | |
| "epoch": 0.29990627928772257, | |
| "grad_norm": 9.459305763244629, | |
| "learning_rate": 9.812558575445175e-06, | |
| "loss": 0.8771, | |
| "step": 640 | |
| }, | |
| { | |
| "epoch": 0.3045923149015933, | |
| "grad_norm": 7.231110095977783, | |
| "learning_rate": 9.809629803186505e-06, | |
| "loss": 0.8575, | |
| "step": 650 | |
| }, | |
| { | |
| "epoch": 0.30927835051546393, | |
| "grad_norm": 5.850890159606934, | |
| "learning_rate": 9.806701030927836e-06, | |
| "loss": 0.8294, | |
| "step": 660 | |
| }, | |
| { | |
| "epoch": 0.3139643861293346, | |
| "grad_norm": 12.532159805297852, | |
| "learning_rate": 9.803772258669167e-06, | |
| "loss": 0.8745, | |
| "step": 670 | |
| }, | |
| { | |
| "epoch": 0.31865042174320524, | |
| "grad_norm": 6.576635837554932, | |
| "learning_rate": 9.800843486410497e-06, | |
| "loss": 0.8167, | |
| "step": 680 | |
| }, | |
| { | |
| "epoch": 0.3233364573570759, | |
| "grad_norm": 7.243174076080322, | |
| "learning_rate": 9.797914714151828e-06, | |
| "loss": 0.8886, | |
| "step": 690 | |
| }, | |
| { | |
| "epoch": 0.3280224929709466, | |
| "grad_norm": 6.775111675262451, | |
| "learning_rate": 9.794985941893159e-06, | |
| "loss": 0.8205, | |
| "step": 700 | |
| }, | |
| { | |
| "epoch": 0.33270852858481725, | |
| "grad_norm": 7.494016647338867, | |
| "learning_rate": 9.79205716963449e-06, | |
| "loss": 0.7778, | |
| "step": 710 | |
| }, | |
| { | |
| "epoch": 0.3373945641986879, | |
| "grad_norm": 5.593213081359863, | |
| "learning_rate": 9.789128397375821e-06, | |
| "loss": 0.7875, | |
| "step": 720 | |
| }, | |
| { | |
| "epoch": 0.34208059981255856, | |
| "grad_norm": 7.325387001037598, | |
| "learning_rate": 9.786199625117153e-06, | |
| "loss": 0.7839, | |
| "step": 730 | |
| }, | |
| { | |
| "epoch": 0.3467666354264292, | |
| "grad_norm": 5.411241054534912, | |
| "learning_rate": 9.783270852858484e-06, | |
| "loss": 0.8363, | |
| "step": 740 | |
| }, | |
| { | |
| "epoch": 0.3514526710402999, | |
| "grad_norm": 5.667125225067139, | |
| "learning_rate": 9.780342080599813e-06, | |
| "loss": 0.7904, | |
| "step": 750 | |
| }, | |
| { | |
| "epoch": 0.3514526710402999, | |
| "eval_loss": 0.07609602808952332, | |
| "eval_pearson_cosine": 0.7390127527190131, | |
| "eval_pearson_dot": 0.6193519334256266, | |
| "eval_pearson_euclidean": 0.7286540107637123, | |
| "eval_pearson_manhattan": 0.7280163166143723, | |
| "eval_runtime": 48.6286, | |
| "eval_samples_per_second": 30.846, | |
| "eval_spearman_cosine": 0.7392385981828663, | |
| "eval_spearman_dot": 0.6275059521836013, | |
| "eval_spearman_euclidean": 0.7379755721813188, | |
| "eval_spearman_manhattan": 0.7372480627669395, | |
| "eval_steps_per_second": 30.846, | |
| "step": 750 | |
| }, | |
| { | |
| "epoch": 0.3561387066541706, | |
| "grad_norm": 5.931227207183838, | |
| "learning_rate": 9.777413308341144e-06, | |
| "loss": 0.7801, | |
| "step": 760 | |
| }, | |
| { | |
| "epoch": 0.36082474226804123, | |
| "grad_norm": 5.550874710083008, | |
| "learning_rate": 9.774484536082474e-06, | |
| "loss": 0.7466, | |
| "step": 770 | |
| }, | |
| { | |
| "epoch": 0.3655107778819119, | |
| "grad_norm": 5.67214298248291, | |
| "learning_rate": 9.771555763823805e-06, | |
| "loss": 0.7561, | |
| "step": 780 | |
| }, | |
| { | |
| "epoch": 0.3701968134957826, | |
| "grad_norm": 5.121714115142822, | |
| "learning_rate": 9.768626991565136e-06, | |
| "loss": 0.7395, | |
| "step": 790 | |
| }, | |
| { | |
| "epoch": 0.37488284910965325, | |
| "grad_norm": 4.957924842834473, | |
| "learning_rate": 9.765698219306467e-06, | |
| "loss": 0.7368, | |
| "step": 800 | |
| }, | |
| { | |
| "epoch": 0.3795688847235239, | |
| "grad_norm": 6.30219030380249, | |
| "learning_rate": 9.762769447047799e-06, | |
| "loss": 0.8091, | |
| "step": 810 | |
| }, | |
| { | |
| "epoch": 0.38425492033739456, | |
| "grad_norm": 6.518470287322998, | |
| "learning_rate": 9.75984067478913e-06, | |
| "loss": 0.7525, | |
| "step": 820 | |
| }, | |
| { | |
| "epoch": 0.3889409559512652, | |
| "grad_norm": 6.101437568664551, | |
| "learning_rate": 9.756911902530461e-06, | |
| "loss": 0.7263, | |
| "step": 830 | |
| }, | |
| { | |
| "epoch": 0.3936269915651359, | |
| "grad_norm": 5.428840160369873, | |
| "learning_rate": 9.75398313027179e-06, | |
| "loss": 0.7881, | |
| "step": 840 | |
| }, | |
| { | |
| "epoch": 0.3983130271790066, | |
| "grad_norm": 7.170475482940674, | |
| "learning_rate": 9.751054358013122e-06, | |
| "loss": 0.7218, | |
| "step": 850 | |
| }, | |
| { | |
| "epoch": 0.4029990627928772, | |
| "grad_norm": 6.153990745544434, | |
| "learning_rate": 9.748125585754453e-06, | |
| "loss": 0.748, | |
| "step": 860 | |
| }, | |
| { | |
| "epoch": 0.4076850984067479, | |
| "grad_norm": 5.364086151123047, | |
| "learning_rate": 9.745196813495782e-06, | |
| "loss": 0.786, | |
| "step": 870 | |
| }, | |
| { | |
| "epoch": 0.41237113402061853, | |
| "grad_norm": 5.541423797607422, | |
| "learning_rate": 9.742268041237114e-06, | |
| "loss": 0.7427, | |
| "step": 880 | |
| }, | |
| { | |
| "epoch": 0.41705716963448924, | |
| "grad_norm": 5.1667022705078125, | |
| "learning_rate": 9.739339268978445e-06, | |
| "loss": 0.6918, | |
| "step": 890 | |
| }, | |
| { | |
| "epoch": 0.4217432052483599, | |
| "grad_norm": 4.839612007141113, | |
| "learning_rate": 9.736410496719776e-06, | |
| "loss": 0.7056, | |
| "step": 900 | |
| }, | |
| { | |
| "epoch": 0.42642924086223055, | |
| "grad_norm": 4.407963275909424, | |
| "learning_rate": 9.733481724461107e-06, | |
| "loss": 0.6313, | |
| "step": 910 | |
| }, | |
| { | |
| "epoch": 0.4311152764761012, | |
| "grad_norm": 7.052595138549805, | |
| "learning_rate": 9.730552952202438e-06, | |
| "loss": 0.7489, | |
| "step": 920 | |
| }, | |
| { | |
| "epoch": 0.43580131208997186, | |
| "grad_norm": 5.71290397644043, | |
| "learning_rate": 9.727624179943768e-06, | |
| "loss": 0.6578, | |
| "step": 930 | |
| }, | |
| { | |
| "epoch": 0.44048734770384257, | |
| "grad_norm": 6.3575825691223145, | |
| "learning_rate": 9.724695407685099e-06, | |
| "loss": 0.6914, | |
| "step": 940 | |
| }, | |
| { | |
| "epoch": 0.4451733833177132, | |
| "grad_norm": 5.223476886749268, | |
| "learning_rate": 9.72176663542643e-06, | |
| "loss": 0.6494, | |
| "step": 950 | |
| }, | |
| { | |
| "epoch": 0.4498594189315839, | |
| "grad_norm": 6.220378398895264, | |
| "learning_rate": 9.71883786316776e-06, | |
| "loss": 0.6996, | |
| "step": 960 | |
| }, | |
| { | |
| "epoch": 0.45454545454545453, | |
| "grad_norm": 6.475409507751465, | |
| "learning_rate": 9.715909090909091e-06, | |
| "loss": 0.721, | |
| "step": 970 | |
| }, | |
| { | |
| "epoch": 0.4592314901593252, | |
| "grad_norm": 5.10095739364624, | |
| "learning_rate": 9.712980318650422e-06, | |
| "loss": 0.6734, | |
| "step": 980 | |
| }, | |
| { | |
| "epoch": 0.4639175257731959, | |
| "grad_norm": 7.8438801765441895, | |
| "learning_rate": 9.710051546391753e-06, | |
| "loss": 0.7409, | |
| "step": 990 | |
| }, | |
| { | |
| "epoch": 0.46860356138706655, | |
| "grad_norm": 5.446135997772217, | |
| "learning_rate": 9.707122774133085e-06, | |
| "loss": 0.6772, | |
| "step": 1000 | |
| }, | |
| { | |
| "epoch": 0.46860356138706655, | |
| "eval_loss": 0.06938865035772324, | |
| "eval_pearson_cosine": 0.7523242546763527, | |
| "eval_pearson_dot": 0.6339033623348058, | |
| "eval_pearson_euclidean": 0.7449881727323344, | |
| "eval_pearson_manhattan": 0.7443626147120028, | |
| "eval_runtime": 47.885, | |
| "eval_samples_per_second": 31.325, | |
| "eval_spearman_cosine": 0.7542578168613095, | |
| "eval_spearman_dot": 0.6408093688850417, | |
| "eval_spearman_euclidean": 0.7532432307302356, | |
| "eval_spearman_manhattan": 0.7526380381288565, | |
| "eval_steps_per_second": 31.325, | |
| "step": 1000 | |
| }, | |
| { | |
| "epoch": 0.4732895970009372, | |
| "grad_norm": 6.391997814178467, | |
| "learning_rate": 9.704194001874416e-06, | |
| "loss": 0.6965, | |
| "step": 1010 | |
| }, | |
| { | |
| "epoch": 0.47797563261480785, | |
| "grad_norm": 5.345996379852295, | |
| "learning_rate": 9.701265229615747e-06, | |
| "loss": 0.6447, | |
| "step": 1020 | |
| }, | |
| { | |
| "epoch": 0.48266166822867856, | |
| "grad_norm": 5.60822057723999, | |
| "learning_rate": 9.698336457357076e-06, | |
| "loss": 0.6854, | |
| "step": 1030 | |
| }, | |
| { | |
| "epoch": 0.4873477038425492, | |
| "grad_norm": 6.488014221191406, | |
| "learning_rate": 9.695407685098408e-06, | |
| "loss": 0.7089, | |
| "step": 1040 | |
| }, | |
| { | |
| "epoch": 0.49203373945641987, | |
| "grad_norm": 5.387355804443359, | |
| "learning_rate": 9.692478912839737e-06, | |
| "loss": 0.6949, | |
| "step": 1050 | |
| }, | |
| { | |
| "epoch": 0.4967197750702905, | |
| "grad_norm": 5.179281234741211, | |
| "learning_rate": 9.689550140581068e-06, | |
| "loss": 0.6571, | |
| "step": 1060 | |
| }, | |
| { | |
| "epoch": 0.5014058106841612, | |
| "grad_norm": 5.786458492279053, | |
| "learning_rate": 9.6866213683224e-06, | |
| "loss": 0.7154, | |
| "step": 1070 | |
| }, | |
| { | |
| "epoch": 0.5060918462980318, | |
| "grad_norm": 6.279985427856445, | |
| "learning_rate": 9.68369259606373e-06, | |
| "loss": 0.6757, | |
| "step": 1080 | |
| }, | |
| { | |
| "epoch": 0.5107778819119025, | |
| "grad_norm": 4.793182849884033, | |
| "learning_rate": 9.680763823805062e-06, | |
| "loss": 0.7136, | |
| "step": 1090 | |
| }, | |
| { | |
| "epoch": 0.5154639175257731, | |
| "grad_norm": 7.646529674530029, | |
| "learning_rate": 9.677835051546393e-06, | |
| "loss": 0.6396, | |
| "step": 1100 | |
| }, | |
| { | |
| "epoch": 0.5201499531396439, | |
| "grad_norm": 5.7034912109375, | |
| "learning_rate": 9.674906279287724e-06, | |
| "loss": 0.665, | |
| "step": 1110 | |
| }, | |
| { | |
| "epoch": 0.5248359887535146, | |
| "grad_norm": 6.54317045211792, | |
| "learning_rate": 9.671977507029054e-06, | |
| "loss": 0.6713, | |
| "step": 1120 | |
| }, | |
| { | |
| "epoch": 0.5295220243673852, | |
| "grad_norm": 5.6496806144714355, | |
| "learning_rate": 9.669048734770385e-06, | |
| "loss": 0.6876, | |
| "step": 1130 | |
| }, | |
| { | |
| "epoch": 0.5342080599812559, | |
| "grad_norm": 5.326486110687256, | |
| "learning_rate": 9.666119962511716e-06, | |
| "loss": 0.6951, | |
| "step": 1140 | |
| }, | |
| { | |
| "epoch": 0.5388940955951266, | |
| "grad_norm": 5.124545574188232, | |
| "learning_rate": 9.663191190253046e-06, | |
| "loss": 0.6388, | |
| "step": 1150 | |
| }, | |
| { | |
| "epoch": 0.5435801312089972, | |
| "grad_norm": 4.34152364730835, | |
| "learning_rate": 9.660262417994377e-06, | |
| "loss": 0.6322, | |
| "step": 1160 | |
| }, | |
| { | |
| "epoch": 0.5482661668228679, | |
| "grad_norm": 8.722075462341309, | |
| "learning_rate": 9.657333645735708e-06, | |
| "loss": 0.6776, | |
| "step": 1170 | |
| }, | |
| { | |
| "epoch": 0.5529522024367385, | |
| "grad_norm": 5.417623996734619, | |
| "learning_rate": 9.65440487347704e-06, | |
| "loss": 0.6492, | |
| "step": 1180 | |
| }, | |
| { | |
| "epoch": 0.5576382380506092, | |
| "grad_norm": 4.369041919708252, | |
| "learning_rate": 9.65147610121837e-06, | |
| "loss": 0.6039, | |
| "step": 1190 | |
| }, | |
| { | |
| "epoch": 0.5623242736644799, | |
| "grad_norm": 6.5720062255859375, | |
| "learning_rate": 9.648547328959702e-06, | |
| "loss": 0.6911, | |
| "step": 1200 | |
| }, | |
| { | |
| "epoch": 0.5670103092783505, | |
| "grad_norm": 7.112950325012207, | |
| "learning_rate": 9.645618556701031e-06, | |
| "loss": 0.6214, | |
| "step": 1210 | |
| }, | |
| { | |
| "epoch": 0.5716963448922212, | |
| "grad_norm": 5.643182277679443, | |
| "learning_rate": 9.642689784442362e-06, | |
| "loss": 0.6959, | |
| "step": 1220 | |
| }, | |
| { | |
| "epoch": 0.5763823805060918, | |
| "grad_norm": 5.078190803527832, | |
| "learning_rate": 9.639761012183694e-06, | |
| "loss": 0.6633, | |
| "step": 1230 | |
| }, | |
| { | |
| "epoch": 0.5810684161199625, | |
| "grad_norm": 5.247280120849609, | |
| "learning_rate": 9.636832239925025e-06, | |
| "loss": 0.6415, | |
| "step": 1240 | |
| }, | |
| { | |
| "epoch": 0.5857544517338332, | |
| "grad_norm": 5.110747814178467, | |
| "learning_rate": 9.633903467666354e-06, | |
| "loss": 0.6031, | |
| "step": 1250 | |
| }, | |
| { | |
| "epoch": 0.5857544517338332, | |
| "eval_loss": 0.06345358490943909, | |
| "eval_pearson_cosine": 0.7580338914962539, | |
| "eval_pearson_dot": 0.6394158052533783, | |
| "eval_pearson_euclidean": 0.7521759780114508, | |
| "eval_pearson_manhattan": 0.7513571158009427, | |
| "eval_runtime": 44.2242, | |
| "eval_samples_per_second": 33.918, | |
| "eval_spearman_cosine": 0.758882658229917, | |
| "eval_spearman_dot": 0.6455380162932587, | |
| "eval_spearman_euclidean": 0.7604619351541958, | |
| "eval_spearman_manhattan": 0.7599139087493931, | |
| "eval_steps_per_second": 33.918, | |
| "step": 1250 | |
| }, | |
| { | |
| "epoch": 0.5904404873477038, | |
| "grad_norm": 6.717201232910156, | |
| "learning_rate": 9.630974695407685e-06, | |
| "loss": 0.6553, | |
| "step": 1260 | |
| }, | |
| { | |
| "epoch": 0.5951265229615745, | |
| "grad_norm": 6.948915004730225, | |
| "learning_rate": 9.628045923149017e-06, | |
| "loss": 0.6528, | |
| "step": 1270 | |
| }, | |
| { | |
| "epoch": 0.5998125585754451, | |
| "grad_norm": 5.585124969482422, | |
| "learning_rate": 9.625117150890348e-06, | |
| "loss": 0.6125, | |
| "step": 1280 | |
| }, | |
| { | |
| "epoch": 0.6044985941893158, | |
| "grad_norm": 4.020166397094727, | |
| "learning_rate": 9.622188378631679e-06, | |
| "loss": 0.5857, | |
| "step": 1290 | |
| }, | |
| { | |
| "epoch": 0.6091846298031866, | |
| "grad_norm": 4.905421257019043, | |
| "learning_rate": 9.619259606373008e-06, | |
| "loss": 0.6128, | |
| "step": 1300 | |
| }, | |
| { | |
| "epoch": 0.6138706654170571, | |
| "grad_norm": 5.642446517944336, | |
| "learning_rate": 9.61633083411434e-06, | |
| "loss": 0.6177, | |
| "step": 1310 | |
| }, | |
| { | |
| "epoch": 0.6185567010309279, | |
| "grad_norm": 5.623671531677246, | |
| "learning_rate": 9.613402061855671e-06, | |
| "loss": 0.6076, | |
| "step": 1320 | |
| }, | |
| { | |
| "epoch": 0.6232427366447985, | |
| "grad_norm": 3.6249349117279053, | |
| "learning_rate": 9.610473289597002e-06, | |
| "loss": 0.5987, | |
| "step": 1330 | |
| }, | |
| { | |
| "epoch": 0.6279287722586692, | |
| "grad_norm": 4.7242608070373535, | |
| "learning_rate": 9.607544517338333e-06, | |
| "loss": 0.6082, | |
| "step": 1340 | |
| }, | |
| { | |
| "epoch": 0.6326148078725399, | |
| "grad_norm": 9.071741104125977, | |
| "learning_rate": 9.604615745079663e-06, | |
| "loss": 0.6369, | |
| "step": 1350 | |
| }, | |
| { | |
| "epoch": 0.6373008434864105, | |
| "grad_norm": 5.471718788146973, | |
| "learning_rate": 9.601686972820994e-06, | |
| "loss": 0.6235, | |
| "step": 1360 | |
| }, | |
| { | |
| "epoch": 0.6419868791002812, | |
| "grad_norm": 6.0755934715271, | |
| "learning_rate": 9.598758200562325e-06, | |
| "loss": 0.6197, | |
| "step": 1370 | |
| }, | |
| { | |
| "epoch": 0.6466729147141518, | |
| "grad_norm": 5.650800704956055, | |
| "learning_rate": 9.595829428303656e-06, | |
| "loss": 0.5947, | |
| "step": 1380 | |
| }, | |
| { | |
| "epoch": 0.6513589503280225, | |
| "grad_norm": 4.409568786621094, | |
| "learning_rate": 9.592900656044986e-06, | |
| "loss": 0.6632, | |
| "step": 1390 | |
| }, | |
| { | |
| "epoch": 0.6560449859418932, | |
| "grad_norm": 6.575608730316162, | |
| "learning_rate": 9.589971883786317e-06, | |
| "loss": 0.5655, | |
| "step": 1400 | |
| }, | |
| { | |
| "epoch": 0.6607310215557638, | |
| "grad_norm": 4.897518634796143, | |
| "learning_rate": 9.587043111527648e-06, | |
| "loss": 0.6064, | |
| "step": 1410 | |
| }, | |
| { | |
| "epoch": 0.6654170571696345, | |
| "grad_norm": 4.505845546722412, | |
| "learning_rate": 9.58411433926898e-06, | |
| "loss": 0.6217, | |
| "step": 1420 | |
| }, | |
| { | |
| "epoch": 0.6701030927835051, | |
| "grad_norm": 11.04179573059082, | |
| "learning_rate": 9.58118556701031e-06, | |
| "loss": 0.626, | |
| "step": 1430 | |
| }, | |
| { | |
| "epoch": 0.6747891283973758, | |
| "grad_norm": 7.031481742858887, | |
| "learning_rate": 9.578256794751642e-06, | |
| "loss": 0.6644, | |
| "step": 1440 | |
| }, | |
| { | |
| "epoch": 0.6794751640112465, | |
| "grad_norm": 5.177082061767578, | |
| "learning_rate": 9.575328022492971e-06, | |
| "loss": 0.5794, | |
| "step": 1450 | |
| }, | |
| { | |
| "epoch": 0.6841611996251171, | |
| "grad_norm": 5.830789566040039, | |
| "learning_rate": 9.572399250234303e-06, | |
| "loss": 0.5962, | |
| "step": 1460 | |
| }, | |
| { | |
| "epoch": 0.6888472352389878, | |
| "grad_norm": 5.322279453277588, | |
| "learning_rate": 9.569470477975634e-06, | |
| "loss": 0.5528, | |
| "step": 1470 | |
| }, | |
| { | |
| "epoch": 0.6935332708528584, | |
| "grad_norm": 5.191045761108398, | |
| "learning_rate": 9.566541705716965e-06, | |
| "loss": 0.602, | |
| "step": 1480 | |
| }, | |
| { | |
| "epoch": 0.6982193064667291, | |
| "grad_norm": 4.832320213317871, | |
| "learning_rate": 9.563612933458294e-06, | |
| "loss": 0.5732, | |
| "step": 1490 | |
| }, | |
| { | |
| "epoch": 0.7029053420805998, | |
| "grad_norm": 5.9457926750183105, | |
| "learning_rate": 9.560684161199626e-06, | |
| "loss": 0.6017, | |
| "step": 1500 | |
| }, | |
| { | |
| "epoch": 0.7029053420805998, | |
| "eval_loss": 0.059113115072250366, | |
| "eval_pearson_cosine": 0.7675747794888963, | |
| "eval_pearson_dot": 0.6475892776570333, | |
| "eval_pearson_euclidean": 0.7594640382486553, | |
| "eval_pearson_manhattan": 0.7585029707701096, | |
| "eval_runtime": 45.7613, | |
| "eval_samples_per_second": 32.779, | |
| "eval_spearman_cosine": 0.768339335776319, | |
| "eval_spearman_dot": 0.655445685087582, | |
| "eval_spearman_euclidean": 0.7680811238488432, | |
| "eval_spearman_manhattan": 0.7673055147561156, | |
| "eval_steps_per_second": 32.779, | |
| "step": 1500 | |
| }, | |
| { | |
| "epoch": 0.7075913776944704, | |
| "grad_norm": 4.822035789489746, | |
| "learning_rate": 9.557755388940957e-06, | |
| "loss": 0.5891, | |
| "step": 1510 | |
| }, | |
| { | |
| "epoch": 0.7122774133083412, | |
| "grad_norm": 7.0355753898620605, | |
| "learning_rate": 9.554826616682288e-06, | |
| "loss": 0.6019, | |
| "step": 1520 | |
| }, | |
| { | |
| "epoch": 0.7169634489222118, | |
| "grad_norm": 7.064100742340088, | |
| "learning_rate": 9.55189784442362e-06, | |
| "loss": 0.5656, | |
| "step": 1530 | |
| }, | |
| { | |
| "epoch": 0.7216494845360825, | |
| "grad_norm": 4.629329204559326, | |
| "learning_rate": 9.54896907216495e-06, | |
| "loss": 0.5839, | |
| "step": 1540 | |
| }, | |
| { | |
| "epoch": 0.7263355201499532, | |
| "grad_norm": 5.421347141265869, | |
| "learning_rate": 9.54604029990628e-06, | |
| "loss": 0.5684, | |
| "step": 1550 | |
| }, | |
| { | |
| "epoch": 0.7310215557638238, | |
| "grad_norm": 4.520521640777588, | |
| "learning_rate": 9.543111527647611e-06, | |
| "loss": 0.5979, | |
| "step": 1560 | |
| }, | |
| { | |
| "epoch": 0.7357075913776945, | |
| "grad_norm": 5.172377109527588, | |
| "learning_rate": 9.540182755388942e-06, | |
| "loss": 0.5678, | |
| "step": 1570 | |
| }, | |
| { | |
| "epoch": 0.7403936269915652, | |
| "grad_norm": 5.090722560882568, | |
| "learning_rate": 9.537253983130272e-06, | |
| "loss": 0.556, | |
| "step": 1580 | |
| }, | |
| { | |
| "epoch": 0.7450796626054358, | |
| "grad_norm": 4.6714887619018555, | |
| "learning_rate": 9.534325210871603e-06, | |
| "loss": 0.564, | |
| "step": 1590 | |
| }, | |
| { | |
| "epoch": 0.7497656982193065, | |
| "grad_norm": 4.211735248565674, | |
| "learning_rate": 9.531396438612934e-06, | |
| "loss": 0.617, | |
| "step": 1600 | |
| }, | |
| { | |
| "epoch": 0.7544517338331771, | |
| "grad_norm": 4.693171501159668, | |
| "learning_rate": 9.528467666354265e-06, | |
| "loss": 0.5657, | |
| "step": 1610 | |
| }, | |
| { | |
| "epoch": 0.7591377694470478, | |
| "grad_norm": 6.890966892242432, | |
| "learning_rate": 9.525538894095597e-06, | |
| "loss": 0.5838, | |
| "step": 1620 | |
| }, | |
| { | |
| "epoch": 0.7638238050609185, | |
| "grad_norm": 3.5127806663513184, | |
| "learning_rate": 9.522610121836928e-06, | |
| "loss": 0.5669, | |
| "step": 1630 | |
| }, | |
| { | |
| "epoch": 0.7685098406747891, | |
| "grad_norm": 4.389316082000732, | |
| "learning_rate": 9.519681349578259e-06, | |
| "loss": 0.5669, | |
| "step": 1640 | |
| }, | |
| { | |
| "epoch": 0.7731958762886598, | |
| "grad_norm": 4.59335470199585, | |
| "learning_rate": 9.516752577319588e-06, | |
| "loss": 0.604, | |
| "step": 1650 | |
| }, | |
| { | |
| "epoch": 0.7778819119025304, | |
| "grad_norm": 5.345147132873535, | |
| "learning_rate": 9.51382380506092e-06, | |
| "loss": 0.6132, | |
| "step": 1660 | |
| }, | |
| { | |
| "epoch": 0.7825679475164011, | |
| "grad_norm": 5.133398532867432, | |
| "learning_rate": 9.510895032802249e-06, | |
| "loss": 0.5539, | |
| "step": 1670 | |
| }, | |
| { | |
| "epoch": 0.7872539831302718, | |
| "grad_norm": 7.907310962677002, | |
| "learning_rate": 9.50796626054358e-06, | |
| "loss": 0.61, | |
| "step": 1680 | |
| }, | |
| { | |
| "epoch": 0.7919400187441424, | |
| "grad_norm": 4.504448890686035, | |
| "learning_rate": 9.505037488284911e-06, | |
| "loss": 0.5851, | |
| "step": 1690 | |
| }, | |
| { | |
| "epoch": 0.7966260543580131, | |
| "grad_norm": 4.3662028312683105, | |
| "learning_rate": 9.502108716026243e-06, | |
| "loss": 0.5915, | |
| "step": 1700 | |
| }, | |
| { | |
| "epoch": 0.8013120899718837, | |
| "grad_norm": 5.221836566925049, | |
| "learning_rate": 9.499179943767574e-06, | |
| "loss": 0.581, | |
| "step": 1710 | |
| }, | |
| { | |
| "epoch": 0.8059981255857545, | |
| "grad_norm": 6.357667446136475, | |
| "learning_rate": 9.496251171508905e-06, | |
| "loss": 0.5937, | |
| "step": 1720 | |
| }, | |
| { | |
| "epoch": 0.8106841611996252, | |
| "grad_norm": 6.262212753295898, | |
| "learning_rate": 9.493322399250236e-06, | |
| "loss": 0.606, | |
| "step": 1730 | |
| }, | |
| { | |
| "epoch": 0.8153701968134958, | |
| "grad_norm": 4.363849639892578, | |
| "learning_rate": 9.490393626991566e-06, | |
| "loss": 0.5524, | |
| "step": 1740 | |
| }, | |
| { | |
| "epoch": 0.8200562324273665, | |
| "grad_norm": 5.514476299285889, | |
| "learning_rate": 9.487464854732897e-06, | |
| "loss": 0.5611, | |
| "step": 1750 | |
| }, | |
| { | |
| "epoch": 0.8200562324273665, | |
| "eval_loss": 0.05879165977239609, | |
| "eval_pearson_cosine": 0.7714099892705395, | |
| "eval_pearson_dot": 0.6462212772089089, | |
| "eval_pearson_euclidean": 0.7641084348061273, | |
| "eval_pearson_manhattan": 0.7629885828620147, | |
| "eval_runtime": 43.6421, | |
| "eval_samples_per_second": 34.37, | |
| "eval_spearman_cosine": 0.7720168259371313, | |
| "eval_spearman_dot": 0.6536245076677092, | |
| "eval_spearman_euclidean": 0.7726348092699838, | |
| "eval_spearman_manhattan": 0.7716062900578692, | |
| "eval_steps_per_second": 34.37, | |
| "step": 1750 | |
| }, | |
| { | |
| "epoch": 0.8247422680412371, | |
| "grad_norm": 6.260695457458496, | |
| "learning_rate": 9.484536082474226e-06, | |
| "loss": 0.5566, | |
| "step": 1760 | |
| }, | |
| { | |
| "epoch": 0.8294283036551078, | |
| "grad_norm": 4.187561511993408, | |
| "learning_rate": 9.481607310215558e-06, | |
| "loss": 0.5077, | |
| "step": 1770 | |
| }, | |
| { | |
| "epoch": 0.8341143392689785, | |
| "grad_norm": 4.611522197723389, | |
| "learning_rate": 9.478678537956889e-06, | |
| "loss": 0.5449, | |
| "step": 1780 | |
| }, | |
| { | |
| "epoch": 0.8388003748828491, | |
| "grad_norm": 12.466484069824219, | |
| "learning_rate": 9.47574976569822e-06, | |
| "loss": 0.5744, | |
| "step": 1790 | |
| }, | |
| { | |
| "epoch": 0.8434864104967198, | |
| "grad_norm": 4.683777332305908, | |
| "learning_rate": 9.472820993439551e-06, | |
| "loss": 0.5102, | |
| "step": 1800 | |
| }, | |
| { | |
| "epoch": 0.8481724461105904, | |
| "grad_norm": 5.541889190673828, | |
| "learning_rate": 9.469892221180882e-06, | |
| "loss": 0.5589, | |
| "step": 1810 | |
| }, | |
| { | |
| "epoch": 0.8528584817244611, | |
| "grad_norm": 8.524742126464844, | |
| "learning_rate": 9.466963448922214e-06, | |
| "loss": 0.5872, | |
| "step": 1820 | |
| }, | |
| { | |
| "epoch": 0.8575445173383318, | |
| "grad_norm": 7.117620944976807, | |
| "learning_rate": 9.464034676663543e-06, | |
| "loss": 0.5484, | |
| "step": 1830 | |
| }, | |
| { | |
| "epoch": 0.8622305529522024, | |
| "grad_norm": 5.3457841873168945, | |
| "learning_rate": 9.461105904404874e-06, | |
| "loss": 0.5624, | |
| "step": 1840 | |
| }, | |
| { | |
| "epoch": 0.8669165885660731, | |
| "grad_norm": 4.375561714172363, | |
| "learning_rate": 9.458177132146204e-06, | |
| "loss": 0.525, | |
| "step": 1850 | |
| }, | |
| { | |
| "epoch": 0.8716026241799437, | |
| "grad_norm": 4.6026082038879395, | |
| "learning_rate": 9.455248359887535e-06, | |
| "loss": 0.5855, | |
| "step": 1860 | |
| }, | |
| { | |
| "epoch": 0.8762886597938144, | |
| "grad_norm": 5.399001121520996, | |
| "learning_rate": 9.452319587628866e-06, | |
| "loss": 0.5775, | |
| "step": 1870 | |
| }, | |
| { | |
| "epoch": 0.8809746954076851, | |
| "grad_norm": 3.9378573894500732, | |
| "learning_rate": 9.449390815370197e-06, | |
| "loss": 0.5068, | |
| "step": 1880 | |
| }, | |
| { | |
| "epoch": 0.8856607310215557, | |
| "grad_norm": 5.515146255493164, | |
| "learning_rate": 9.446462043111529e-06, | |
| "loss": 0.5718, | |
| "step": 1890 | |
| }, | |
| { | |
| "epoch": 0.8903467666354264, | |
| "grad_norm": 4.8671345710754395, | |
| "learning_rate": 9.44353327085286e-06, | |
| "loss": 0.5552, | |
| "step": 1900 | |
| }, | |
| { | |
| "epoch": 0.895032802249297, | |
| "grad_norm": 5.388006210327148, | |
| "learning_rate": 9.440604498594191e-06, | |
| "loss": 0.5854, | |
| "step": 1910 | |
| }, | |
| { | |
| "epoch": 0.8997188378631678, | |
| "grad_norm": 6.608395099639893, | |
| "learning_rate": 9.43767572633552e-06, | |
| "loss": 0.5459, | |
| "step": 1920 | |
| }, | |
| { | |
| "epoch": 0.9044048734770385, | |
| "grad_norm": 4.6435160636901855, | |
| "learning_rate": 9.434746954076852e-06, | |
| "loss": 0.529, | |
| "step": 1930 | |
| }, | |
| { | |
| "epoch": 0.9090909090909091, | |
| "grad_norm": 4.642300605773926, | |
| "learning_rate": 9.431818181818183e-06, | |
| "loss": 0.5255, | |
| "step": 1940 | |
| }, | |
| { | |
| "epoch": 0.9137769447047798, | |
| "grad_norm": 5.40919828414917, | |
| "learning_rate": 9.428889409559512e-06, | |
| "loss": 0.5605, | |
| "step": 1950 | |
| }, | |
| { | |
| "epoch": 0.9184629803186504, | |
| "grad_norm": 4.9874467849731445, | |
| "learning_rate": 9.425960637300844e-06, | |
| "loss": 0.5798, | |
| "step": 1960 | |
| }, | |
| { | |
| "epoch": 0.9231490159325211, | |
| "grad_norm": 4.9304094314575195, | |
| "learning_rate": 9.423031865042175e-06, | |
| "loss": 0.5576, | |
| "step": 1970 | |
| }, | |
| { | |
| "epoch": 0.9278350515463918, | |
| "grad_norm": 5.080467224121094, | |
| "learning_rate": 9.420103092783506e-06, | |
| "loss": 0.5221, | |
| "step": 1980 | |
| }, | |
| { | |
| "epoch": 0.9325210871602624, | |
| "grad_norm": 5.083141326904297, | |
| "learning_rate": 9.417174320524837e-06, | |
| "loss": 0.6041, | |
| "step": 1990 | |
| }, | |
| { | |
| "epoch": 0.9372071227741331, | |
| "grad_norm": 3.8194010257720947, | |
| "learning_rate": 9.414245548266168e-06, | |
| "loss": 0.5439, | |
| "step": 2000 | |
| }, | |
| { | |
| "epoch": 0.9372071227741331, | |
| "eval_loss": 0.058015577495098114, | |
| "eval_pearson_cosine": 0.7772706274362164, | |
| "eval_pearson_dot": 0.6518150260238968, | |
| "eval_pearson_euclidean": 0.7681856098914253, | |
| "eval_pearson_manhattan": 0.7668726914631314, | |
| "eval_runtime": 45.6952, | |
| "eval_samples_per_second": 32.826, | |
| "eval_spearman_cosine": 0.7781983730395821, | |
| "eval_spearman_dot": 0.6578238148510893, | |
| "eval_spearman_euclidean": 0.7779674226973379, | |
| "eval_spearman_manhattan": 0.7766391726420421, | |
| "eval_steps_per_second": 32.826, | |
| "step": 2000 | |
| }, | |
| { | |
| "epoch": 0.9418931583880038, | |
| "grad_norm": 5.383081912994385, | |
| "learning_rate": 9.411316776007498e-06, | |
| "loss": 0.5343, | |
| "step": 2010 | |
| }, | |
| { | |
| "epoch": 0.9465791940018744, | |
| "grad_norm": 5.533719539642334, | |
| "learning_rate": 9.408388003748829e-06, | |
| "loss": 0.5313, | |
| "step": 2020 | |
| }, | |
| { | |
| "epoch": 0.9512652296157451, | |
| "grad_norm": 4.267172336578369, | |
| "learning_rate": 9.40545923149016e-06, | |
| "loss": 0.5172, | |
| "step": 2030 | |
| }, | |
| { | |
| "epoch": 0.9559512652296157, | |
| "grad_norm": 4.8553009033203125, | |
| "learning_rate": 9.402530459231491e-06, | |
| "loss": 0.5104, | |
| "step": 2040 | |
| }, | |
| { | |
| "epoch": 0.9606373008434864, | |
| "grad_norm": 6.460834503173828, | |
| "learning_rate": 9.399601686972821e-06, | |
| "loss": 0.5225, | |
| "step": 2050 | |
| }, | |
| { | |
| "epoch": 0.9653233364573571, | |
| "grad_norm": 27.46290397644043, | |
| "learning_rate": 9.396672914714152e-06, | |
| "loss": 0.544, | |
| "step": 2060 | |
| }, | |
| { | |
| "epoch": 0.9700093720712277, | |
| "grad_norm": 4.89717435836792, | |
| "learning_rate": 9.393744142455483e-06, | |
| "loss": 0.5653, | |
| "step": 2070 | |
| }, | |
| { | |
| "epoch": 0.9746954076850984, | |
| "grad_norm": 4.803583145141602, | |
| "learning_rate": 9.390815370196814e-06, | |
| "loss": 0.5739, | |
| "step": 2080 | |
| }, | |
| { | |
| "epoch": 0.979381443298969, | |
| "grad_norm": 4.121029853820801, | |
| "learning_rate": 9.387886597938146e-06, | |
| "loss": 0.5192, | |
| "step": 2090 | |
| }, | |
| { | |
| "epoch": 0.9840674789128397, | |
| "grad_norm": 4.464984893798828, | |
| "learning_rate": 9.384957825679475e-06, | |
| "loss": 0.5393, | |
| "step": 2100 | |
| }, | |
| { | |
| "epoch": 0.9887535145267105, | |
| "grad_norm": 6.364498615264893, | |
| "learning_rate": 9.382029053420806e-06, | |
| "loss": 0.5764, | |
| "step": 2110 | |
| }, | |
| { | |
| "epoch": 0.993439550140581, | |
| "grad_norm": 3.743790864944458, | |
| "learning_rate": 9.379100281162138e-06, | |
| "loss": 0.5276, | |
| "step": 2120 | |
| }, | |
| { | |
| "epoch": 0.9981255857544518, | |
| "grad_norm": 4.737389087677002, | |
| "learning_rate": 9.376171508903469e-06, | |
| "loss": 0.5211, | |
| "step": 2130 | |
| }, | |
| { | |
| "epoch": 1.0028116213683225, | |
| "grad_norm": 3.622758626937866, | |
| "learning_rate": 9.3732427366448e-06, | |
| "loss": 0.5329, | |
| "step": 2140 | |
| }, | |
| { | |
| "epoch": 1.007497656982193, | |
| "grad_norm": 3.5359978675842285, | |
| "learning_rate": 9.37031396438613e-06, | |
| "loss": 0.4941, | |
| "step": 2150 | |
| }, | |
| { | |
| "epoch": 1.0121836925960637, | |
| "grad_norm": 4.669582843780518, | |
| "learning_rate": 9.36738519212746e-06, | |
| "loss": 0.4821, | |
| "step": 2160 | |
| }, | |
| { | |
| "epoch": 1.0168697282099344, | |
| "grad_norm": 3.767122507095337, | |
| "learning_rate": 9.364456419868792e-06, | |
| "loss": 0.4886, | |
| "step": 2170 | |
| }, | |
| { | |
| "epoch": 1.021555763823805, | |
| "grad_norm": 3.9681687355041504, | |
| "learning_rate": 9.361527647610123e-06, | |
| "loss": 0.493, | |
| "step": 2180 | |
| }, | |
| { | |
| "epoch": 1.0262417994376758, | |
| "grad_norm": 3.389897108078003, | |
| "learning_rate": 9.358598875351454e-06, | |
| "loss": 0.4688, | |
| "step": 2190 | |
| }, | |
| { | |
| "epoch": 1.0309278350515463, | |
| "grad_norm": 3.5152347087860107, | |
| "learning_rate": 9.355670103092784e-06, | |
| "loss": 0.4625, | |
| "step": 2200 | |
| }, | |
| { | |
| "epoch": 1.035613870665417, | |
| "grad_norm": 3.23901629447937, | |
| "learning_rate": 9.352741330834115e-06, | |
| "loss": 0.5143, | |
| "step": 2210 | |
| }, | |
| { | |
| "epoch": 1.0402999062792877, | |
| "grad_norm": 4.617633819580078, | |
| "learning_rate": 9.349812558575446e-06, | |
| "loss": 0.4732, | |
| "step": 2220 | |
| }, | |
| { | |
| "epoch": 1.0449859418931584, | |
| "grad_norm": 5.245469570159912, | |
| "learning_rate": 9.346883786316777e-06, | |
| "loss": 0.5213, | |
| "step": 2230 | |
| }, | |
| { | |
| "epoch": 1.0496719775070291, | |
| "grad_norm": 4.20419454574585, | |
| "learning_rate": 9.343955014058108e-06, | |
| "loss": 0.5042, | |
| "step": 2240 | |
| }, | |
| { | |
| "epoch": 1.0543580131208996, | |
| "grad_norm": 4.6322102546691895, | |
| "learning_rate": 9.341026241799438e-06, | |
| "loss": 0.4982, | |
| "step": 2250 | |
| }, | |
| { | |
| "epoch": 1.0543580131208996, | |
| "eval_loss": 0.05779802054166794, | |
| "eval_pearson_cosine": 0.7770314842083366, | |
| "eval_pearson_dot": 0.6498110843024136, | |
| "eval_pearson_euclidean": 0.7709013065859232, | |
| "eval_pearson_manhattan": 0.7695278239114174, | |
| "eval_runtime": 48.4856, | |
| "eval_samples_per_second": 30.937, | |
| "eval_spearman_cosine": 0.7783328375480574, | |
| "eval_spearman_dot": 0.6551905692522538, | |
| "eval_spearman_euclidean": 0.7802862933680744, | |
| "eval_spearman_manhattan": 0.7790525675974715, | |
| "eval_steps_per_second": 30.937, | |
| "step": 2250 | |
| }, | |
| { | |
| "epoch": 1.0590440487347703, | |
| "grad_norm": 4.474431991577148, | |
| "learning_rate": 9.33809746954077e-06, | |
| "loss": 0.5227, | |
| "step": 2260 | |
| }, | |
| { | |
| "epoch": 1.063730084348641, | |
| "grad_norm": 4.538947105407715, | |
| "learning_rate": 9.3351686972821e-06, | |
| "loss": 0.5158, | |
| "step": 2270 | |
| }, | |
| { | |
| "epoch": 1.0684161199625117, | |
| "grad_norm": 6.6143693923950195, | |
| "learning_rate": 9.332239925023432e-06, | |
| "loss": 0.461, | |
| "step": 2280 | |
| }, | |
| { | |
| "epoch": 1.0731021555763824, | |
| "grad_norm": 4.316189765930176, | |
| "learning_rate": 9.329311152764761e-06, | |
| "loss": 0.5079, | |
| "step": 2290 | |
| }, | |
| { | |
| "epoch": 1.077788191190253, | |
| "grad_norm": 4.054687976837158, | |
| "learning_rate": 9.326382380506092e-06, | |
| "loss": 0.5022, | |
| "step": 2300 | |
| }, | |
| { | |
| "epoch": 1.0824742268041236, | |
| "grad_norm": 4.232051849365234, | |
| "learning_rate": 9.323453608247423e-06, | |
| "loss": 0.5096, | |
| "step": 2310 | |
| }, | |
| { | |
| "epoch": 1.0871602624179943, | |
| "grad_norm": 3.7785236835479736, | |
| "learning_rate": 9.320524835988755e-06, | |
| "loss": 0.4614, | |
| "step": 2320 | |
| }, | |
| { | |
| "epoch": 1.091846298031865, | |
| "grad_norm": 4.865905284881592, | |
| "learning_rate": 9.317596063730086e-06, | |
| "loss": 0.5135, | |
| "step": 2330 | |
| }, | |
| { | |
| "epoch": 1.0965323336457358, | |
| "grad_norm": 4.681485176086426, | |
| "learning_rate": 9.314667291471417e-06, | |
| "loss": 0.5061, | |
| "step": 2340 | |
| }, | |
| { | |
| "epoch": 1.1012183692596063, | |
| "grad_norm": 4.256619453430176, | |
| "learning_rate": 9.311738519212747e-06, | |
| "loss": 0.4627, | |
| "step": 2350 | |
| }, | |
| { | |
| "epoch": 1.105904404873477, | |
| "grad_norm": 4.459606170654297, | |
| "learning_rate": 9.308809746954078e-06, | |
| "loss": 0.5171, | |
| "step": 2360 | |
| }, | |
| { | |
| "epoch": 1.1105904404873477, | |
| "grad_norm": 4.008665084838867, | |
| "learning_rate": 9.305880974695409e-06, | |
| "loss": 0.4422, | |
| "step": 2370 | |
| }, | |
| { | |
| "epoch": 1.1152764761012184, | |
| "grad_norm": 3.674177885055542, | |
| "learning_rate": 9.302952202436738e-06, | |
| "loss": 0.5233, | |
| "step": 2380 | |
| }, | |
| { | |
| "epoch": 1.119962511715089, | |
| "grad_norm": 4.463940620422363, | |
| "learning_rate": 9.30002343017807e-06, | |
| "loss": 0.4731, | |
| "step": 2390 | |
| }, | |
| { | |
| "epoch": 1.1246485473289598, | |
| "grad_norm": 3.9289097785949707, | |
| "learning_rate": 9.2970946579194e-06, | |
| "loss": 0.4869, | |
| "step": 2400 | |
| }, | |
| { | |
| "epoch": 1.1293345829428303, | |
| "grad_norm": 4.097565174102783, | |
| "learning_rate": 9.294165885660732e-06, | |
| "loss": 0.4594, | |
| "step": 2410 | |
| }, | |
| { | |
| "epoch": 1.134020618556701, | |
| "grad_norm": 4.55318546295166, | |
| "learning_rate": 9.291237113402063e-06, | |
| "loss": 0.494, | |
| "step": 2420 | |
| }, | |
| { | |
| "epoch": 1.1387066541705717, | |
| "grad_norm": 4.425617694854736, | |
| "learning_rate": 9.288308341143394e-06, | |
| "loss": 0.4829, | |
| "step": 2430 | |
| }, | |
| { | |
| "epoch": 1.1433926897844424, | |
| "grad_norm": 3.908015489578247, | |
| "learning_rate": 9.285379568884726e-06, | |
| "loss": 0.4793, | |
| "step": 2440 | |
| }, | |
| { | |
| "epoch": 1.148078725398313, | |
| "grad_norm": 3.7293996810913086, | |
| "learning_rate": 9.282450796626055e-06, | |
| "loss": 0.5399, | |
| "step": 2450 | |
| }, | |
| { | |
| "epoch": 1.1527647610121836, | |
| "grad_norm": 4.584681034088135, | |
| "learning_rate": 9.279522024367386e-06, | |
| "loss": 0.4479, | |
| "step": 2460 | |
| }, | |
| { | |
| "epoch": 1.1574507966260543, | |
| "grad_norm": 4.109914302825928, | |
| "learning_rate": 9.276593252108716e-06, | |
| "loss": 0.4599, | |
| "step": 2470 | |
| }, | |
| { | |
| "epoch": 1.162136832239925, | |
| "grad_norm": 4.446422100067139, | |
| "learning_rate": 9.273664479850047e-06, | |
| "loss": 0.4727, | |
| "step": 2480 | |
| }, | |
| { | |
| "epoch": 1.1668228678537957, | |
| "grad_norm": 5.975160598754883, | |
| "learning_rate": 9.270735707591378e-06, | |
| "loss": 0.4509, | |
| "step": 2490 | |
| }, | |
| { | |
| "epoch": 1.1715089034676662, | |
| "grad_norm": 4.379275321960449, | |
| "learning_rate": 9.26780693533271e-06, | |
| "loss": 0.4828, | |
| "step": 2500 | |
| }, | |
| { | |
| "epoch": 1.1715089034676662, | |
| "eval_loss": 0.05214480683207512, | |
| "eval_pearson_cosine": 0.7792755247272061, | |
| "eval_pearson_dot": 0.6569300577465214, | |
| "eval_pearson_euclidean": 0.7718322585231894, | |
| "eval_pearson_manhattan": 0.7703922250718165, | |
| "eval_runtime": 47.8089, | |
| "eval_samples_per_second": 31.375, | |
| "eval_spearman_cosine": 0.7799819701975583, | |
| "eval_spearman_dot": 0.662507389274304, | |
| "eval_spearman_euclidean": 0.7818437831063969, | |
| "eval_spearman_manhattan": 0.7805341558401507, | |
| "eval_steps_per_second": 31.375, | |
| "step": 2500 | |
| }, | |
| { | |
| "epoch": 1.176194939081537, | |
| "grad_norm": 3.5287399291992188, | |
| "learning_rate": 9.26487816307404e-06, | |
| "loss": 0.4591, | |
| "step": 2510 | |
| }, | |
| { | |
| "epoch": 1.1808809746954076, | |
| "grad_norm": 3.277655601501465, | |
| "learning_rate": 9.261949390815372e-06, | |
| "loss": 0.4479, | |
| "step": 2520 | |
| }, | |
| { | |
| "epoch": 1.1855670103092784, | |
| "grad_norm": 4.732039451599121, | |
| "learning_rate": 9.259020618556703e-06, | |
| "loss": 0.461, | |
| "step": 2530 | |
| }, | |
| { | |
| "epoch": 1.190253045923149, | |
| "grad_norm": 4.4760966300964355, | |
| "learning_rate": 9.256091846298032e-06, | |
| "loss": 0.4652, | |
| "step": 2540 | |
| }, | |
| { | |
| "epoch": 1.1949390815370198, | |
| "grad_norm": 7.485498428344727, | |
| "learning_rate": 9.253163074039364e-06, | |
| "loss": 0.4779, | |
| "step": 2550 | |
| }, | |
| { | |
| "epoch": 1.1996251171508903, | |
| "grad_norm": 3.9956140518188477, | |
| "learning_rate": 9.250234301780693e-06, | |
| "loss": 0.4567, | |
| "step": 2560 | |
| }, | |
| { | |
| "epoch": 1.204311152764761, | |
| "grad_norm": 3.547563314437866, | |
| "learning_rate": 9.247305529522024e-06, | |
| "loss": 0.4988, | |
| "step": 2570 | |
| }, | |
| { | |
| "epoch": 1.2089971883786317, | |
| "grad_norm": 5.354389667510986, | |
| "learning_rate": 9.244376757263355e-06, | |
| "loss": 0.464, | |
| "step": 2580 | |
| }, | |
| { | |
| "epoch": 1.2136832239925024, | |
| "grad_norm": 3.791760206222534, | |
| "learning_rate": 9.241447985004687e-06, | |
| "loss": 0.4441, | |
| "step": 2590 | |
| }, | |
| { | |
| "epoch": 1.218369259606373, | |
| "grad_norm": 4.77889347076416, | |
| "learning_rate": 9.238519212746018e-06, | |
| "loss": 0.4655, | |
| "step": 2600 | |
| }, | |
| { | |
| "epoch": 1.2230552952202436, | |
| "grad_norm": 5.804917335510254, | |
| "learning_rate": 9.235590440487349e-06, | |
| "loss": 0.4912, | |
| "step": 2610 | |
| }, | |
| { | |
| "epoch": 1.2277413308341143, | |
| "grad_norm": 3.841860771179199, | |
| "learning_rate": 9.23266166822868e-06, | |
| "loss": 0.472, | |
| "step": 2620 | |
| }, | |
| { | |
| "epoch": 1.232427366447985, | |
| "grad_norm": 4.4197540283203125, | |
| "learning_rate": 9.22973289597001e-06, | |
| "loss": 0.4821, | |
| "step": 2630 | |
| }, | |
| { | |
| "epoch": 1.2371134020618557, | |
| "grad_norm": 5.844490051269531, | |
| "learning_rate": 9.226804123711341e-06, | |
| "loss": 0.5655, | |
| "step": 2640 | |
| }, | |
| { | |
| "epoch": 1.2417994376757264, | |
| "grad_norm": 3.5442116260528564, | |
| "learning_rate": 9.223875351452672e-06, | |
| "loss": 0.4532, | |
| "step": 2650 | |
| }, | |
| { | |
| "epoch": 1.246485473289597, | |
| "grad_norm": 5.259571075439453, | |
| "learning_rate": 9.220946579194002e-06, | |
| "loss": 0.4856, | |
| "step": 2660 | |
| }, | |
| { | |
| "epoch": 1.2511715089034676, | |
| "grad_norm": 4.675846576690674, | |
| "learning_rate": 9.218017806935333e-06, | |
| "loss": 0.4576, | |
| "step": 2670 | |
| }, | |
| { | |
| "epoch": 1.2558575445173383, | |
| "grad_norm": 5.236482620239258, | |
| "learning_rate": 9.215089034676664e-06, | |
| "loss": 0.513, | |
| "step": 2680 | |
| }, | |
| { | |
| "epoch": 1.260543580131209, | |
| "grad_norm": 4.658278465270996, | |
| "learning_rate": 9.212160262417995e-06, | |
| "loss": 0.4734, | |
| "step": 2690 | |
| }, | |
| { | |
| "epoch": 1.2652296157450795, | |
| "grad_norm": 3.7085494995117188, | |
| "learning_rate": 9.209231490159326e-06, | |
| "loss": 0.5279, | |
| "step": 2700 | |
| }, | |
| { | |
| "epoch": 1.2699156513589505, | |
| "grad_norm": 3.4627673625946045, | |
| "learning_rate": 9.206302717900658e-06, | |
| "loss": 0.4773, | |
| "step": 2710 | |
| }, | |
| { | |
| "epoch": 1.274601686972821, | |
| "grad_norm": 4.618409633636475, | |
| "learning_rate": 9.203373945641987e-06, | |
| "loss": 0.4354, | |
| "step": 2720 | |
| }, | |
| { | |
| "epoch": 1.2792877225866917, | |
| "grad_norm": 3.1090590953826904, | |
| "learning_rate": 9.200445173383318e-06, | |
| "loss": 0.4409, | |
| "step": 2730 | |
| }, | |
| { | |
| "epoch": 1.2839737582005624, | |
| "grad_norm": 4.328725337982178, | |
| "learning_rate": 9.19751640112465e-06, | |
| "loss": 0.4799, | |
| "step": 2740 | |
| }, | |
| { | |
| "epoch": 1.2886597938144329, | |
| "grad_norm": 3.8362419605255127, | |
| "learning_rate": 9.194587628865979e-06, | |
| "loss": 0.5062, | |
| "step": 2750 | |
| }, | |
| { | |
| "epoch": 1.2886597938144329, | |
| "eval_loss": 0.05263364687561989, | |
| "eval_pearson_cosine": 0.7755555336434341, | |
| "eval_pearson_dot": 0.6502184577290961, | |
| "eval_pearson_euclidean": 0.7709853609297426, | |
| "eval_pearson_manhattan": 0.769572635033791, | |
| "eval_runtime": 44.8508, | |
| "eval_samples_per_second": 33.444, | |
| "eval_spearman_cosine": 0.7765036654281985, | |
| "eval_spearman_dot": 0.6558936409143281, | |
| "eval_spearman_euclidean": 0.7808945633743188, | |
| "eval_spearman_manhattan": 0.7795729380744477, | |
| "eval_steps_per_second": 33.444, | |
| "step": 2750 | |
| }, | |
| { | |
| "epoch": 1.2933458294283038, | |
| "grad_norm": 3.6972432136535645, | |
| "learning_rate": 9.19165885660731e-06, | |
| "loss": 0.488, | |
| "step": 2760 | |
| }, | |
| { | |
| "epoch": 1.2980318650421743, | |
| "grad_norm": 6.73103141784668, | |
| "learning_rate": 9.188730084348641e-06, | |
| "loss": 0.4553, | |
| "step": 2770 | |
| }, | |
| { | |
| "epoch": 1.302717900656045, | |
| "grad_norm": 4.371028423309326, | |
| "learning_rate": 9.185801312089973e-06, | |
| "loss": 0.4555, | |
| "step": 2780 | |
| }, | |
| { | |
| "epoch": 1.3074039362699157, | |
| "grad_norm": 3.4788401126861572, | |
| "learning_rate": 9.182872539831304e-06, | |
| "loss": 0.4561, | |
| "step": 2790 | |
| }, | |
| { | |
| "epoch": 1.3120899718837864, | |
| "grad_norm": 3.832277774810791, | |
| "learning_rate": 9.179943767572635e-06, | |
| "loss": 0.4838, | |
| "step": 2800 | |
| }, | |
| { | |
| "epoch": 1.316776007497657, | |
| "grad_norm": 3.5579423904418945, | |
| "learning_rate": 9.177014995313966e-06, | |
| "loss": 0.4404, | |
| "step": 2810 | |
| }, | |
| { | |
| "epoch": 1.3214620431115276, | |
| "grad_norm": 3.7768073081970215, | |
| "learning_rate": 9.174086223055296e-06, | |
| "loss": 0.4724, | |
| "step": 2820 | |
| }, | |
| { | |
| "epoch": 1.3261480787253983, | |
| "grad_norm": 3.957035779953003, | |
| "learning_rate": 9.171157450796627e-06, | |
| "loss": 0.471, | |
| "step": 2830 | |
| }, | |
| { | |
| "epoch": 1.330834114339269, | |
| "grad_norm": 3.6035895347595215, | |
| "learning_rate": 9.168228678537958e-06, | |
| "loss": 0.4645, | |
| "step": 2840 | |
| }, | |
| { | |
| "epoch": 1.3355201499531397, | |
| "grad_norm": 4.358327388763428, | |
| "learning_rate": 9.165299906279288e-06, | |
| "loss": 0.4301, | |
| "step": 2850 | |
| }, | |
| { | |
| "epoch": 1.3402061855670104, | |
| "grad_norm": 3.4666709899902344, | |
| "learning_rate": 9.162371134020619e-06, | |
| "loss": 0.4508, | |
| "step": 2860 | |
| }, | |
| { | |
| "epoch": 1.344892221180881, | |
| "grad_norm": 3.912290096282959, | |
| "learning_rate": 9.15944236176195e-06, | |
| "loss": 0.4379, | |
| "step": 2870 | |
| }, | |
| { | |
| "epoch": 1.3495782567947516, | |
| "grad_norm": 4.305796146392822, | |
| "learning_rate": 9.156513589503281e-06, | |
| "loss": 0.4194, | |
| "step": 2880 | |
| }, | |
| { | |
| "epoch": 1.3542642924086223, | |
| "grad_norm": 4.231681823730469, | |
| "learning_rate": 9.153584817244612e-06, | |
| "loss": 0.4017, | |
| "step": 2890 | |
| }, | |
| { | |
| "epoch": 1.358950328022493, | |
| "grad_norm": 4.43821382522583, | |
| "learning_rate": 9.150656044985944e-06, | |
| "loss": 0.4185, | |
| "step": 2900 | |
| }, | |
| { | |
| "epoch": 1.3636363636363638, | |
| "grad_norm": 4.922164440155029, | |
| "learning_rate": 9.147727272727273e-06, | |
| "loss": 0.5199, | |
| "step": 2910 | |
| }, | |
| { | |
| "epoch": 1.3683223992502342, | |
| "grad_norm": 4.577489852905273, | |
| "learning_rate": 9.144798500468604e-06, | |
| "loss": 0.4237, | |
| "step": 2920 | |
| }, | |
| { | |
| "epoch": 1.373008434864105, | |
| "grad_norm": 3.9537651538848877, | |
| "learning_rate": 9.141869728209935e-06, | |
| "loss": 0.4888, | |
| "step": 2930 | |
| }, | |
| { | |
| "epoch": 1.3776944704779757, | |
| "grad_norm": 4.165870189666748, | |
| "learning_rate": 9.138940955951267e-06, | |
| "loss": 0.4476, | |
| "step": 2940 | |
| }, | |
| { | |
| "epoch": 1.3823805060918464, | |
| "grad_norm": 4.492893218994141, | |
| "learning_rate": 9.136012183692596e-06, | |
| "loss": 0.5159, | |
| "step": 2950 | |
| }, | |
| { | |
| "epoch": 1.387066541705717, | |
| "grad_norm": 3.847490072250366, | |
| "learning_rate": 9.133083411433927e-06, | |
| "loss": 0.4497, | |
| "step": 2960 | |
| }, | |
| { | |
| "epoch": 1.3917525773195876, | |
| "grad_norm": 6.766137599945068, | |
| "learning_rate": 9.130154639175258e-06, | |
| "loss": 0.4379, | |
| "step": 2970 | |
| }, | |
| { | |
| "epoch": 1.3964386129334583, | |
| "grad_norm": 3.9198007583618164, | |
| "learning_rate": 9.12722586691659e-06, | |
| "loss": 0.4519, | |
| "step": 2980 | |
| }, | |
| { | |
| "epoch": 1.401124648547329, | |
| "grad_norm": 3.67480731010437, | |
| "learning_rate": 9.124297094657921e-06, | |
| "loss": 0.4108, | |
| "step": 2990 | |
| }, | |
| { | |
| "epoch": 1.4058106841611997, | |
| "grad_norm": 3.3013832569122314, | |
| "learning_rate": 9.12136832239925e-06, | |
| "loss": 0.433, | |
| "step": 3000 | |
| }, | |
| { | |
| "epoch": 1.4058106841611997, | |
| "eval_loss": 0.0497601218521595, | |
| "eval_pearson_cosine": 0.7834985989633054, | |
| "eval_pearson_dot": 0.6669524421664974, | |
| "eval_pearson_euclidean": 0.7743874834934843, | |
| "eval_pearson_manhattan": 0.7730376146204847, | |
| "eval_runtime": 47.8141, | |
| "eval_samples_per_second": 31.371, | |
| "eval_spearman_cosine": 0.7845889452017747, | |
| "eval_spearman_dot": 0.6729435548765089, | |
| "eval_spearman_euclidean": 0.784591658726837, | |
| "eval_spearman_manhattan": 0.7832975474858643, | |
| "eval_steps_per_second": 31.371, | |
| "step": 3000 | |
| }, | |
| { | |
| "epoch": 1.4104967197750704, | |
| "grad_norm": 4.2792487144470215, | |
| "learning_rate": 9.118439550140582e-06, | |
| "loss": 0.4878, | |
| "step": 3010 | |
| }, | |
| { | |
| "epoch": 1.415182755388941, | |
| "grad_norm": 3.8892383575439453, | |
| "learning_rate": 9.115510777881913e-06, | |
| "loss": 0.4676, | |
| "step": 3020 | |
| }, | |
| { | |
| "epoch": 1.4198687910028116, | |
| "grad_norm": 5.0008745193481445, | |
| "learning_rate": 9.112582005623244e-06, | |
| "loss": 0.4729, | |
| "step": 3030 | |
| }, | |
| { | |
| "epoch": 1.4245548266166823, | |
| "grad_norm": 5.607409477233887, | |
| "learning_rate": 9.109653233364575e-06, | |
| "loss": 0.4762, | |
| "step": 3040 | |
| }, | |
| { | |
| "epoch": 1.429240862230553, | |
| "grad_norm": 3.0340139865875244, | |
| "learning_rate": 9.106724461105905e-06, | |
| "loss": 0.4438, | |
| "step": 3050 | |
| }, | |
| { | |
| "epoch": 1.4339268978444237, | |
| "grad_norm": 4.310724258422852, | |
| "learning_rate": 9.103795688847236e-06, | |
| "loss": 0.4499, | |
| "step": 3060 | |
| }, | |
| { | |
| "epoch": 1.4386129334582942, | |
| "grad_norm": 4.481917381286621, | |
| "learning_rate": 9.100866916588567e-06, | |
| "loss": 0.4493, | |
| "step": 3070 | |
| }, | |
| { | |
| "epoch": 1.443298969072165, | |
| "grad_norm": 4.330621719360352, | |
| "learning_rate": 9.097938144329898e-06, | |
| "loss": 0.4505, | |
| "step": 3080 | |
| }, | |
| { | |
| "epoch": 1.4479850046860356, | |
| "grad_norm": 4.335081577301025, | |
| "learning_rate": 9.095009372071228e-06, | |
| "loss": 0.446, | |
| "step": 3090 | |
| }, | |
| { | |
| "epoch": 1.4526710402999063, | |
| "grad_norm": 3.0894672870635986, | |
| "learning_rate": 9.092080599812559e-06, | |
| "loss": 0.4404, | |
| "step": 3100 | |
| }, | |
| { | |
| "epoch": 1.457357075913777, | |
| "grad_norm": 4.6363983154296875, | |
| "learning_rate": 9.08915182755389e-06, | |
| "loss": 0.5358, | |
| "step": 3110 | |
| }, | |
| { | |
| "epoch": 1.4620431115276475, | |
| "grad_norm": 3.80387806892395, | |
| "learning_rate": 9.086223055295221e-06, | |
| "loss": 0.4374, | |
| "step": 3120 | |
| }, | |
| { | |
| "epoch": 1.4667291471415183, | |
| "grad_norm": 3.276442289352417, | |
| "learning_rate": 9.083294283036552e-06, | |
| "loss": 0.5013, | |
| "step": 3130 | |
| }, | |
| { | |
| "epoch": 1.471415182755389, | |
| "grad_norm": 3.843419075012207, | |
| "learning_rate": 9.080365510777884e-06, | |
| "loss": 0.4694, | |
| "step": 3140 | |
| }, | |
| { | |
| "epoch": 1.4761012183692597, | |
| "grad_norm": 4.7606730461120605, | |
| "learning_rate": 9.077436738519213e-06, | |
| "loss": 0.4215, | |
| "step": 3150 | |
| }, | |
| { | |
| "epoch": 1.4807872539831304, | |
| "grad_norm": 3.739225149154663, | |
| "learning_rate": 9.074507966260544e-06, | |
| "loss": 0.4756, | |
| "step": 3160 | |
| }, | |
| { | |
| "epoch": 1.4854732895970009, | |
| "grad_norm": 3.36938214302063, | |
| "learning_rate": 9.071579194001876e-06, | |
| "loss": 0.4243, | |
| "step": 3170 | |
| }, | |
| { | |
| "epoch": 1.4901593252108716, | |
| "grad_norm": 6.589993476867676, | |
| "learning_rate": 9.068650421743205e-06, | |
| "loss": 0.4698, | |
| "step": 3180 | |
| }, | |
| { | |
| "epoch": 1.4948453608247423, | |
| "grad_norm": 3.8416695594787598, | |
| "learning_rate": 9.065721649484536e-06, | |
| "loss": 0.4964, | |
| "step": 3190 | |
| }, | |
| { | |
| "epoch": 1.499531396438613, | |
| "grad_norm": 4.367741584777832, | |
| "learning_rate": 9.062792877225867e-06, | |
| "loss": 0.4417, | |
| "step": 3200 | |
| }, | |
| { | |
| "epoch": 1.5042174320524837, | |
| "grad_norm": 3.500617742538452, | |
| "learning_rate": 9.059864104967199e-06, | |
| "loss": 0.4522, | |
| "step": 3210 | |
| }, | |
| { | |
| "epoch": 1.5089034676663542, | |
| "grad_norm": 3.5349769592285156, | |
| "learning_rate": 9.05693533270853e-06, | |
| "loss": 0.4393, | |
| "step": 3220 | |
| }, | |
| { | |
| "epoch": 1.513589503280225, | |
| "grad_norm": 3.8469526767730713, | |
| "learning_rate": 9.054006560449861e-06, | |
| "loss": 0.4453, | |
| "step": 3230 | |
| }, | |
| { | |
| "epoch": 1.5182755388940956, | |
| "grad_norm": 3.209933280944824, | |
| "learning_rate": 9.051077788191192e-06, | |
| "loss": 0.4599, | |
| "step": 3240 | |
| }, | |
| { | |
| "epoch": 1.522961574507966, | |
| "grad_norm": 3.7976036071777344, | |
| "learning_rate": 9.048149015932522e-06, | |
| "loss": 0.4373, | |
| "step": 3250 | |
| }, | |
| { | |
| "epoch": 1.522961574507966, | |
| "eval_loss": 0.049798864871263504, | |
| "eval_pearson_cosine": 0.7866421286010308, | |
| "eval_pearson_dot": 0.6641640853451243, | |
| "eval_pearson_euclidean": 0.7777378719378305, | |
| "eval_pearson_manhattan": 0.7764827785285746, | |
| "eval_runtime": 43.7509, | |
| "eval_samples_per_second": 34.285, | |
| "eval_spearman_cosine": 0.7870351053050699, | |
| "eval_spearman_dot": 0.6708598238937284, | |
| "eval_spearman_euclidean": 0.7874683707378692, | |
| "eval_spearman_manhattan": 0.7865203522698128, | |
| "eval_steps_per_second": 34.285, | |
| "step": 3250 | |
| }, | |
| { | |
| "epoch": 1.527647610121837, | |
| "grad_norm": 4.851262092590332, | |
| "learning_rate": 9.045220243673853e-06, | |
| "loss": 0.491, | |
| "step": 3260 | |
| }, | |
| { | |
| "epoch": 1.5323336457357075, | |
| "grad_norm": 4.183891773223877, | |
| "learning_rate": 9.042291471415184e-06, | |
| "loss": 0.453, | |
| "step": 3270 | |
| }, | |
| { | |
| "epoch": 1.5370196813495782, | |
| "grad_norm": 4.280774116516113, | |
| "learning_rate": 9.039362699156514e-06, | |
| "loss": 0.4413, | |
| "step": 3280 | |
| }, | |
| { | |
| "epoch": 1.541705716963449, | |
| "grad_norm": 4.118307113647461, | |
| "learning_rate": 9.036433926897845e-06, | |
| "loss": 0.4661, | |
| "step": 3290 | |
| }, | |
| { | |
| "epoch": 1.5463917525773194, | |
| "grad_norm": 5.99712610244751, | |
| "learning_rate": 9.033505154639176e-06, | |
| "loss": 0.5205, | |
| "step": 3300 | |
| }, | |
| { | |
| "epoch": 1.5510777881911904, | |
| "grad_norm": 4.146691799163818, | |
| "learning_rate": 9.030576382380507e-06, | |
| "loss": 0.428, | |
| "step": 3310 | |
| }, | |
| { | |
| "epoch": 1.5557638238050608, | |
| "grad_norm": 3.899887800216675, | |
| "learning_rate": 9.027647610121838e-06, | |
| "loss": 0.4564, | |
| "step": 3320 | |
| }, | |
| { | |
| "epoch": 1.5604498594189316, | |
| "grad_norm": 3.9663302898406982, | |
| "learning_rate": 9.02471883786317e-06, | |
| "loss": 0.4539, | |
| "step": 3330 | |
| }, | |
| { | |
| "epoch": 1.5651358950328023, | |
| "grad_norm": 3.526458263397217, | |
| "learning_rate": 9.021790065604499e-06, | |
| "loss": 0.4844, | |
| "step": 3340 | |
| }, | |
| { | |
| "epoch": 1.569821930646673, | |
| "grad_norm": 4.192911624908447, | |
| "learning_rate": 9.01886129334583e-06, | |
| "loss": 0.4278, | |
| "step": 3350 | |
| }, | |
| { | |
| "epoch": 1.5745079662605437, | |
| "grad_norm": 4.185749530792236, | |
| "learning_rate": 9.015932521087161e-06, | |
| "loss": 0.4632, | |
| "step": 3360 | |
| }, | |
| { | |
| "epoch": 1.5791940018744142, | |
| "grad_norm": 3.411773204803467, | |
| "learning_rate": 9.013003748828491e-06, | |
| "loss": 0.436, | |
| "step": 3370 | |
| }, | |
| { | |
| "epoch": 1.5838800374882849, | |
| "grad_norm": 4.467881679534912, | |
| "learning_rate": 9.010074976569822e-06, | |
| "loss": 0.4133, | |
| "step": 3380 | |
| }, | |
| { | |
| "epoch": 1.5885660731021556, | |
| "grad_norm": 3.77736496925354, | |
| "learning_rate": 9.007146204311153e-06, | |
| "loss": 0.4452, | |
| "step": 3390 | |
| }, | |
| { | |
| "epoch": 1.5932521087160263, | |
| "grad_norm": 4.084095478057861, | |
| "learning_rate": 9.004217432052485e-06, | |
| "loss": 0.4605, | |
| "step": 3400 | |
| }, | |
| { | |
| "epoch": 1.597938144329897, | |
| "grad_norm": 3.3393008708953857, | |
| "learning_rate": 9.001288659793816e-06, | |
| "loss": 0.4157, | |
| "step": 3410 | |
| }, | |
| { | |
| "epoch": 1.6026241799437675, | |
| "grad_norm": 3.096881151199341, | |
| "learning_rate": 8.998359887535147e-06, | |
| "loss": 0.4478, | |
| "step": 3420 | |
| }, | |
| { | |
| "epoch": 1.6073102155576382, | |
| "grad_norm": 3.0557243824005127, | |
| "learning_rate": 8.995431115276478e-06, | |
| "loss": 0.4452, | |
| "step": 3430 | |
| }, | |
| { | |
| "epoch": 1.611996251171509, | |
| "grad_norm": 3.7997219562530518, | |
| "learning_rate": 8.992502343017808e-06, | |
| "loss": 0.4287, | |
| "step": 3440 | |
| }, | |
| { | |
| "epoch": 1.6166822867853796, | |
| "grad_norm": 3.6995465755462646, | |
| "learning_rate": 8.989573570759139e-06, | |
| "loss": 0.4423, | |
| "step": 3450 | |
| }, | |
| { | |
| "epoch": 1.6213683223992503, | |
| "grad_norm": 4.1384053230285645, | |
| "learning_rate": 8.986644798500468e-06, | |
| "loss": 0.4563, | |
| "step": 3460 | |
| }, | |
| { | |
| "epoch": 1.6260543580131208, | |
| "grad_norm": 4.637014865875244, | |
| "learning_rate": 8.9837160262418e-06, | |
| "loss": 0.4538, | |
| "step": 3470 | |
| }, | |
| { | |
| "epoch": 1.6307403936269915, | |
| "grad_norm": 4.30952262878418, | |
| "learning_rate": 8.98078725398313e-06, | |
| "loss": 0.3993, | |
| "step": 3480 | |
| }, | |
| { | |
| "epoch": 1.6354264292408622, | |
| "grad_norm": 4.746737003326416, | |
| "learning_rate": 8.977858481724462e-06, | |
| "loss": 0.4274, | |
| "step": 3490 | |
| }, | |
| { | |
| "epoch": 1.640112464854733, | |
| "grad_norm": 3.8592286109924316, | |
| "learning_rate": 8.974929709465793e-06, | |
| "loss": 0.4066, | |
| "step": 3500 | |
| }, | |
| { | |
| "epoch": 1.640112464854733, | |
| "eval_loss": 0.050406068563461304, | |
| "eval_pearson_cosine": 0.7840015528942317, | |
| "eval_pearson_dot": 0.659932129633507, | |
| "eval_pearson_euclidean": 0.7769297052026758, | |
| "eval_pearson_manhattan": 0.7754185185705609, | |
| "eval_runtime": 44.0859, | |
| "eval_samples_per_second": 34.024, | |
| "eval_spearman_cosine": 0.7845451302239834, | |
| "eval_spearman_dot": 0.6667296644451466, | |
| "eval_spearman_euclidean": 0.7868327314956118, | |
| "eval_spearman_manhattan": 0.7856021398727839, | |
| "eval_steps_per_second": 34.024, | |
| "step": 3500 | |
| }, | |
| { | |
| "epoch": 1.6447985004686037, | |
| "grad_norm": 5.983098030090332, | |
| "learning_rate": 8.972000937207124e-06, | |
| "loss": 0.4451, | |
| "step": 3510 | |
| }, | |
| { | |
| "epoch": 1.6494845360824741, | |
| "grad_norm": 4.052550315856934, | |
| "learning_rate": 8.969072164948455e-06, | |
| "loss": 0.4331, | |
| "step": 3520 | |
| }, | |
| { | |
| "epoch": 1.6541705716963448, | |
| "grad_norm": 3.7970380783081055, | |
| "learning_rate": 8.966143392689785e-06, | |
| "loss": 0.4427, | |
| "step": 3530 | |
| }, | |
| { | |
| "epoch": 1.6588566073102156, | |
| "grad_norm": 4.695807456970215, | |
| "learning_rate": 8.963214620431116e-06, | |
| "loss": 0.4522, | |
| "step": 3540 | |
| }, | |
| { | |
| "epoch": 1.6635426429240863, | |
| "grad_norm": 4.41202974319458, | |
| "learning_rate": 8.960285848172446e-06, | |
| "loss": 0.4275, | |
| "step": 3550 | |
| }, | |
| { | |
| "epoch": 1.668228678537957, | |
| "grad_norm": 5.364877223968506, | |
| "learning_rate": 8.957357075913777e-06, | |
| "loss": 0.4321, | |
| "step": 3560 | |
| }, | |
| { | |
| "epoch": 1.6729147141518275, | |
| "grad_norm": 3.801132917404175, | |
| "learning_rate": 8.954428303655108e-06, | |
| "loss": 0.4494, | |
| "step": 3570 | |
| }, | |
| { | |
| "epoch": 1.6776007497656982, | |
| "grad_norm": 4.197866439819336, | |
| "learning_rate": 8.95149953139644e-06, | |
| "loss": 0.4126, | |
| "step": 3580 | |
| }, | |
| { | |
| "epoch": 1.6822867853795689, | |
| "grad_norm": 5.34595251083374, | |
| "learning_rate": 8.94857075913777e-06, | |
| "loss": 0.4757, | |
| "step": 3590 | |
| }, | |
| { | |
| "epoch": 1.6869728209934396, | |
| "grad_norm": 4.772789478302002, | |
| "learning_rate": 8.945641986879102e-06, | |
| "loss": 0.4037, | |
| "step": 3600 | |
| }, | |
| { | |
| "epoch": 1.6916588566073103, | |
| "grad_norm": 4.81839656829834, | |
| "learning_rate": 8.942713214620433e-06, | |
| "loss": 0.4192, | |
| "step": 3610 | |
| }, | |
| { | |
| "epoch": 1.6963448922211808, | |
| "grad_norm": 3.470919132232666, | |
| "learning_rate": 8.939784442361762e-06, | |
| "loss": 0.4106, | |
| "step": 3620 | |
| }, | |
| { | |
| "epoch": 1.7010309278350515, | |
| "grad_norm": 3.2051522731781006, | |
| "learning_rate": 8.936855670103094e-06, | |
| "loss": 0.4162, | |
| "step": 3630 | |
| }, | |
| { | |
| "epoch": 1.7057169634489222, | |
| "grad_norm": 3.8122334480285645, | |
| "learning_rate": 8.933926897844423e-06, | |
| "loss": 0.4054, | |
| "step": 3640 | |
| }, | |
| { | |
| "epoch": 1.710402999062793, | |
| "grad_norm": 5.07956075668335, | |
| "learning_rate": 8.930998125585754e-06, | |
| "loss": 0.4164, | |
| "step": 3650 | |
| }, | |
| { | |
| "epoch": 1.7150890346766636, | |
| "grad_norm": 3.754542112350464, | |
| "learning_rate": 8.928069353327085e-06, | |
| "loss": 0.3703, | |
| "step": 3660 | |
| }, | |
| { | |
| "epoch": 1.7197750702905341, | |
| "grad_norm": 3.4620890617370605, | |
| "learning_rate": 8.925140581068417e-06, | |
| "loss": 0.4667, | |
| "step": 3670 | |
| }, | |
| { | |
| "epoch": 1.7244611059044048, | |
| "grad_norm": 4.179393768310547, | |
| "learning_rate": 8.922211808809748e-06, | |
| "loss": 0.4384, | |
| "step": 3680 | |
| }, | |
| { | |
| "epoch": 1.7291471415182755, | |
| "grad_norm": 3.0865719318389893, | |
| "learning_rate": 8.919283036551079e-06, | |
| "loss": 0.4248, | |
| "step": 3690 | |
| }, | |
| { | |
| "epoch": 1.7338331771321462, | |
| "grad_norm": 3.9282147884368896, | |
| "learning_rate": 8.91635426429241e-06, | |
| "loss": 0.4231, | |
| "step": 3700 | |
| }, | |
| { | |
| "epoch": 1.738519212746017, | |
| "grad_norm": 3.9746885299682617, | |
| "learning_rate": 8.91342549203374e-06, | |
| "loss": 0.4152, | |
| "step": 3710 | |
| }, | |
| { | |
| "epoch": 1.7432052483598874, | |
| "grad_norm": 3.8340625762939453, | |
| "learning_rate": 8.910496719775071e-06, | |
| "loss": 0.4458, | |
| "step": 3720 | |
| }, | |
| { | |
| "epoch": 1.7478912839737581, | |
| "grad_norm": 4.861859321594238, | |
| "learning_rate": 8.907567947516402e-06, | |
| "loss": 0.4274, | |
| "step": 3730 | |
| }, | |
| { | |
| "epoch": 1.7525773195876289, | |
| "grad_norm": 3.3457283973693848, | |
| "learning_rate": 8.904639175257732e-06, | |
| "loss": 0.4534, | |
| "step": 3740 | |
| }, | |
| { | |
| "epoch": 1.7572633552014996, | |
| "grad_norm": 4.057953834533691, | |
| "learning_rate": 8.901710402999063e-06, | |
| "loss": 0.484, | |
| "step": 3750 | |
| }, | |
| { | |
| "epoch": 1.7572633552014996, | |
| "eval_loss": 0.05240313336253166, | |
| "eval_pearson_cosine": 0.7879299521989642, | |
| "eval_pearson_dot": 0.6605985065084816, | |
| "eval_pearson_euclidean": 0.7797438530556207, | |
| "eval_pearson_manhattan": 0.778216782480726, | |
| "eval_runtime": 44.9916, | |
| "eval_samples_per_second": 33.34, | |
| "eval_spearman_cosine": 0.7888982276270184, | |
| "eval_spearman_dot": 0.6669965792210436, | |
| "eval_spearman_euclidean": 0.7899037728263932, | |
| "eval_spearman_manhattan": 0.7886320032383264, | |
| "eval_steps_per_second": 33.34, | |
| "step": 3750 | |
| }, | |
| { | |
| "epoch": 1.7619493908153703, | |
| "grad_norm": 3.281102418899536, | |
| "learning_rate": 8.898781630740394e-06, | |
| "loss": 0.4074, | |
| "step": 3760 | |
| }, | |
| { | |
| "epoch": 1.7666354264292408, | |
| "grad_norm": 4.710203170776367, | |
| "learning_rate": 8.895852858481725e-06, | |
| "loss": 0.4537, | |
| "step": 3770 | |
| }, | |
| { | |
| "epoch": 1.7713214620431117, | |
| "grad_norm": 4.636346817016602, | |
| "learning_rate": 8.892924086223056e-06, | |
| "loss": 0.4348, | |
| "step": 3780 | |
| }, | |
| { | |
| "epoch": 1.7760074976569822, | |
| "grad_norm": 4.518571376800537, | |
| "learning_rate": 8.889995313964388e-06, | |
| "loss": 0.4515, | |
| "step": 3790 | |
| }, | |
| { | |
| "epoch": 1.780693533270853, | |
| "grad_norm": 4.0576066970825195, | |
| "learning_rate": 8.887066541705717e-06, | |
| "loss": 0.4276, | |
| "step": 3800 | |
| }, | |
| { | |
| "epoch": 1.7853795688847236, | |
| "grad_norm": 5.657445430755615, | |
| "learning_rate": 8.884137769447048e-06, | |
| "loss": 0.4277, | |
| "step": 3810 | |
| }, | |
| { | |
| "epoch": 1.790065604498594, | |
| "grad_norm": 5.393405437469482, | |
| "learning_rate": 8.88120899718838e-06, | |
| "loss": 0.428, | |
| "step": 3820 | |
| }, | |
| { | |
| "epoch": 1.794751640112465, | |
| "grad_norm": 4.101112365722656, | |
| "learning_rate": 8.87828022492971e-06, | |
| "loss": 0.4489, | |
| "step": 3830 | |
| }, | |
| { | |
| "epoch": 1.7994376757263355, | |
| "grad_norm": 3.531888246536255, | |
| "learning_rate": 8.87535145267104e-06, | |
| "loss": 0.3673, | |
| "step": 3840 | |
| }, | |
| { | |
| "epoch": 1.8041237113402062, | |
| "grad_norm": 3.4490315914154053, | |
| "learning_rate": 8.872422680412371e-06, | |
| "loss": 0.4059, | |
| "step": 3850 | |
| }, | |
| { | |
| "epoch": 1.808809746954077, | |
| "grad_norm": 3.034252643585205, | |
| "learning_rate": 8.869493908153702e-06, | |
| "loss": 0.3832, | |
| "step": 3860 | |
| }, | |
| { | |
| "epoch": 1.8134957825679474, | |
| "grad_norm": 4.064283847808838, | |
| "learning_rate": 8.866565135895034e-06, | |
| "loss": 0.4704, | |
| "step": 3870 | |
| }, | |
| { | |
| "epoch": 1.8181818181818183, | |
| "grad_norm": 3.2689194679260254, | |
| "learning_rate": 8.863636363636365e-06, | |
| "loss": 0.4428, | |
| "step": 3880 | |
| }, | |
| { | |
| "epoch": 1.8228678537956888, | |
| "grad_norm": 3.173530101776123, | |
| "learning_rate": 8.860707591377694e-06, | |
| "loss": 0.4283, | |
| "step": 3890 | |
| }, | |
| { | |
| "epoch": 1.8275538894095595, | |
| "grad_norm": 3.638122081756592, | |
| "learning_rate": 8.857778819119026e-06, | |
| "loss": 0.4225, | |
| "step": 3900 | |
| }, | |
| { | |
| "epoch": 1.8322399250234302, | |
| "grad_norm": 3.636679172515869, | |
| "learning_rate": 8.854850046860357e-06, | |
| "loss": 0.4154, | |
| "step": 3910 | |
| }, | |
| { | |
| "epoch": 1.8369259606373007, | |
| "grad_norm": 3.810847520828247, | |
| "learning_rate": 8.851921274601688e-06, | |
| "loss": 0.3931, | |
| "step": 3920 | |
| }, | |
| { | |
| "epoch": 1.8416119962511717, | |
| "grad_norm": 3.7469394207000732, | |
| "learning_rate": 8.848992502343019e-06, | |
| "loss": 0.4472, | |
| "step": 3930 | |
| }, | |
| { | |
| "epoch": 1.8462980318650422, | |
| "grad_norm": 4.962492942810059, | |
| "learning_rate": 8.846063730084349e-06, | |
| "loss": 0.4324, | |
| "step": 3940 | |
| }, | |
| { | |
| "epoch": 1.8509840674789129, | |
| "grad_norm": 3.4641172885894775, | |
| "learning_rate": 8.84313495782568e-06, | |
| "loss": 0.4234, | |
| "step": 3950 | |
| }, | |
| { | |
| "epoch": 1.8556701030927836, | |
| "grad_norm": 3.8601555824279785, | |
| "learning_rate": 8.840206185567011e-06, | |
| "loss": 0.4045, | |
| "step": 3960 | |
| }, | |
| { | |
| "epoch": 1.860356138706654, | |
| "grad_norm": 6.290759086608887, | |
| "learning_rate": 8.837277413308342e-06, | |
| "loss": 0.4655, | |
| "step": 3970 | |
| }, | |
| { | |
| "epoch": 1.865042174320525, | |
| "grad_norm": 3.5882256031036377, | |
| "learning_rate": 8.834348641049673e-06, | |
| "loss": 0.4298, | |
| "step": 3980 | |
| }, | |
| { | |
| "epoch": 1.8697282099343955, | |
| "grad_norm": 3.133535623550415, | |
| "learning_rate": 8.831419868791003e-06, | |
| "loss": 0.4508, | |
| "step": 3990 | |
| }, | |
| { | |
| "epoch": 1.8744142455482662, | |
| "grad_norm": 3.220383644104004, | |
| "learning_rate": 8.828491096532334e-06, | |
| "loss": 0.4348, | |
| "step": 4000 | |
| }, | |
| { | |
| "epoch": 1.8744142455482662, | |
| "eval_loss": 0.04981923848390579, | |
| "eval_pearson_cosine": 0.790612878761543, | |
| "eval_pearson_dot": 0.6612786229229286, | |
| "eval_pearson_euclidean": 0.7799249806775554, | |
| "eval_pearson_manhattan": 0.7784476870813819, | |
| "eval_runtime": 45.9371, | |
| "eval_samples_per_second": 32.653, | |
| "eval_spearman_cosine": 0.7908100570922554, | |
| "eval_spearman_dot": 0.6689224987064551, | |
| "eval_spearman_euclidean": 0.7902520878335856, | |
| "eval_spearman_manhattan": 0.7892503488739743, | |
| "eval_steps_per_second": 32.653, | |
| "step": 4000 | |
| } | |
| ], | |
| "logging_steps": 10, | |
| "max_steps": 4268, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 2, | |
| "save_steps": 1000, | |
| "stateful_callbacks": { | |
| "TrainerControl": { | |
| "args": { | |
| "should_epoch_stop": false, | |
| "should_evaluate": false, | |
| "should_log": false, | |
| "should_save": true, | |
| "should_training_stop": false | |
| }, | |
| "attributes": {} | |
| } | |
| }, | |
| "total_flos": 0.0, | |
| "train_batch_size": 16, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |